1 /* 2 * ARM translation: M-profile MVE instructions 3 * 4 * Copyright (c) 2021 Linaro, Ltd. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "tcg/tcg-op.h" 22 #include "tcg/tcg-op-gvec.h" 23 #include "exec/exec-all.h" 24 #include "exec/gen-icount.h" 25 #include "translate.h" 26 #include "translate-a32.h" 27 28 static inline int vidup_imm(DisasContext *s, int x) 29 { 30 return 1 << x; 31 } 32 33 /* Include the generated decoder */ 34 #include "decode-mve.c.inc" 35 36 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 37 typedef void MVEGenLdStSGFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 38 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 39 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr); 40 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 41 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 42 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64); 43 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32); 44 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64); 45 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32); 46 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 47 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 48 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 49 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 50 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 51 52 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */ 53 static inline long mve_qreg_offset(unsigned reg) 54 { 55 return offsetof(CPUARMState, vfp.zregs[reg].d[0]); 56 } 57 58 static TCGv_ptr mve_qreg_ptr(unsigned reg) 59 { 60 TCGv_ptr ret = tcg_temp_new_ptr(); 61 tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg)); 62 return ret; 63 } 64 65 static bool mve_check_qreg_bank(DisasContext *s, int qmask) 66 { 67 /* 68 * Check whether Qregs are in range. For v8.1M only Q0..Q7 69 * are supported, see VFPSmallRegisterBank(). 70 */ 71 return qmask < 8; 72 } 73 74 bool mve_eci_check(DisasContext *s) 75 { 76 /* 77 * This is a beatwise insn: check that ECI is valid (not a 78 * reserved value) and note that we are handling it. 79 * Return true if OK, false if we generated an exception. 80 */ 81 s->eci_handled = true; 82 switch (s->eci) { 83 case ECI_NONE: 84 case ECI_A0: 85 case ECI_A0A1: 86 case ECI_A0A1A2: 87 case ECI_A0A1A2B0: 88 return true; 89 default: 90 /* Reserved value: INVSTATE UsageFault */ 91 gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(), 92 default_exception_el(s)); 93 return false; 94 } 95 } 96 97 void mve_update_eci(DisasContext *s) 98 { 99 /* 100 * The helper function will always update the CPUState field, 101 * so we only need to update the DisasContext field. 102 */ 103 if (s->eci) { 104 s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE; 105 } 106 } 107 108 void mve_update_and_store_eci(DisasContext *s) 109 { 110 /* 111 * For insns which don't call a helper function that will call 112 * mve_advance_vpt(), this version updates s->eci and also stores 113 * it out to the CPUState field. 114 */ 115 if (s->eci) { 116 mve_update_eci(s); 117 store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits); 118 } 119 } 120 121 static bool mve_skip_first_beat(DisasContext *s) 122 { 123 /* Return true if PSR.ECI says we must skip the first beat of this insn */ 124 switch (s->eci) { 125 case ECI_NONE: 126 return false; 127 case ECI_A0: 128 case ECI_A0A1: 129 case ECI_A0A1A2: 130 case ECI_A0A1A2B0: 131 return true; 132 default: 133 g_assert_not_reached(); 134 } 135 } 136 137 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn, 138 unsigned msize) 139 { 140 TCGv_i32 addr; 141 uint32_t offset; 142 TCGv_ptr qreg; 143 144 if (!dc_isar_feature(aa32_mve, s) || 145 !mve_check_qreg_bank(s, a->qd) || 146 !fn) { 147 return false; 148 } 149 150 /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */ 151 if (a->rn == 15 || (a->rn == 13 && a->w)) { 152 return false; 153 } 154 155 if (!mve_eci_check(s) || !vfp_access_check(s)) { 156 return true; 157 } 158 159 offset = a->imm << msize; 160 if (!a->a) { 161 offset = -offset; 162 } 163 addr = load_reg(s, a->rn); 164 if (a->p) { 165 tcg_gen_addi_i32(addr, addr, offset); 166 } 167 168 qreg = mve_qreg_ptr(a->qd); 169 fn(cpu_env, qreg, addr); 170 tcg_temp_free_ptr(qreg); 171 172 /* 173 * Writeback always happens after the last beat of the insn, 174 * regardless of predication 175 */ 176 if (a->w) { 177 if (!a->p) { 178 tcg_gen_addi_i32(addr, addr, offset); 179 } 180 store_reg(s, a->rn, addr); 181 } else { 182 tcg_temp_free_i32(addr); 183 } 184 mve_update_eci(s); 185 return true; 186 } 187 188 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a) 189 { 190 static MVEGenLdStFn * const ldstfns[4][2] = { 191 { gen_helper_mve_vstrb, gen_helper_mve_vldrb }, 192 { gen_helper_mve_vstrh, gen_helper_mve_vldrh }, 193 { gen_helper_mve_vstrw, gen_helper_mve_vldrw }, 194 { NULL, NULL } 195 }; 196 return do_ldst(s, a, ldstfns[a->size][a->l], a->size); 197 } 198 199 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \ 200 static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \ 201 { \ 202 static MVEGenLdStFn * const ldstfns[2][2] = { \ 203 { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \ 204 { NULL, gen_helper_mve_##ULD }, \ 205 }; \ 206 return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \ 207 } 208 209 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8) 210 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8) 211 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16) 212 213 static bool do_ldst_sg(DisasContext *s, arg_vldst_sg *a, MVEGenLdStSGFn fn) 214 { 215 TCGv_i32 addr; 216 TCGv_ptr qd, qm; 217 218 if (!dc_isar_feature(aa32_mve, s) || 219 !mve_check_qreg_bank(s, a->qd | a->qm) || 220 !fn || a->rn == 15) { 221 /* Rn case is UNPREDICTABLE */ 222 return false; 223 } 224 225 if (!mve_eci_check(s) || !vfp_access_check(s)) { 226 return true; 227 } 228 229 addr = load_reg(s, a->rn); 230 231 qd = mve_qreg_ptr(a->qd); 232 qm = mve_qreg_ptr(a->qm); 233 fn(cpu_env, qd, qm, addr); 234 tcg_temp_free_ptr(qd); 235 tcg_temp_free_ptr(qm); 236 tcg_temp_free_i32(addr); 237 mve_update_eci(s); 238 return true; 239 } 240 241 /* 242 * The naming scheme here is "vldrb_sg_sh == in-memory byte loads 243 * signextended to halfword elements in register". _os_ indicates that 244 * the offsets in Qm should be scaled by the element size. 245 */ 246 /* This macro is just to make the arrays more compact in these functions */ 247 #define F(N) gen_helper_mve_##N 248 249 /* VLDRB/VSTRB (ie msize 1) with OS=1 is UNPREDICTABLE; we UNDEF */ 250 static bool trans_VLDR_S_sg(DisasContext *s, arg_vldst_sg *a) 251 { 252 static MVEGenLdStSGFn * const fns[2][4][4] = { { 253 { NULL, F(vldrb_sg_sh), F(vldrb_sg_sw), NULL }, 254 { NULL, NULL, F(vldrh_sg_sw), NULL }, 255 { NULL, NULL, NULL, NULL }, 256 { NULL, NULL, NULL, NULL } 257 }, { 258 { NULL, NULL, NULL, NULL }, 259 { NULL, NULL, F(vldrh_sg_os_sw), NULL }, 260 { NULL, NULL, NULL, NULL }, 261 { NULL, NULL, NULL, NULL } 262 } 263 }; 264 if (a->qd == a->qm) { 265 return false; /* UNPREDICTABLE */ 266 } 267 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 268 } 269 270 static bool trans_VLDR_U_sg(DisasContext *s, arg_vldst_sg *a) 271 { 272 static MVEGenLdStSGFn * const fns[2][4][4] = { { 273 { F(vldrb_sg_ub), F(vldrb_sg_uh), F(vldrb_sg_uw), NULL }, 274 { NULL, F(vldrh_sg_uh), F(vldrh_sg_uw), NULL }, 275 { NULL, NULL, F(vldrw_sg_uw), NULL }, 276 { NULL, NULL, NULL, F(vldrd_sg_ud) } 277 }, { 278 { NULL, NULL, NULL, NULL }, 279 { NULL, F(vldrh_sg_os_uh), F(vldrh_sg_os_uw), NULL }, 280 { NULL, NULL, F(vldrw_sg_os_uw), NULL }, 281 { NULL, NULL, NULL, F(vldrd_sg_os_ud) } 282 } 283 }; 284 if (a->qd == a->qm) { 285 return false; /* UNPREDICTABLE */ 286 } 287 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 288 } 289 290 static bool trans_VSTR_sg(DisasContext *s, arg_vldst_sg *a) 291 { 292 static MVEGenLdStSGFn * const fns[2][4][4] = { { 293 { F(vstrb_sg_ub), F(vstrb_sg_uh), F(vstrb_sg_uw), NULL }, 294 { NULL, F(vstrh_sg_uh), F(vstrh_sg_uw), NULL }, 295 { NULL, NULL, F(vstrw_sg_uw), NULL }, 296 { NULL, NULL, NULL, F(vstrd_sg_ud) } 297 }, { 298 { NULL, NULL, NULL, NULL }, 299 { NULL, F(vstrh_sg_os_uh), F(vstrh_sg_os_uw), NULL }, 300 { NULL, NULL, F(vstrw_sg_os_uw), NULL }, 301 { NULL, NULL, NULL, F(vstrd_sg_os_ud) } 302 } 303 }; 304 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 305 } 306 307 #undef F 308 309 static bool do_ldst_sg_imm(DisasContext *s, arg_vldst_sg_imm *a, 310 MVEGenLdStSGFn *fn, unsigned msize) 311 { 312 uint32_t offset; 313 TCGv_ptr qd, qm; 314 315 if (!dc_isar_feature(aa32_mve, s) || 316 !mve_check_qreg_bank(s, a->qd | a->qm) || 317 !fn) { 318 return false; 319 } 320 321 if (!mve_eci_check(s) || !vfp_access_check(s)) { 322 return true; 323 } 324 325 offset = a->imm << msize; 326 if (!a->a) { 327 offset = -offset; 328 } 329 330 qd = mve_qreg_ptr(a->qd); 331 qm = mve_qreg_ptr(a->qm); 332 fn(cpu_env, qd, qm, tcg_constant_i32(offset)); 333 tcg_temp_free_ptr(qd); 334 tcg_temp_free_ptr(qm); 335 mve_update_eci(s); 336 return true; 337 } 338 339 static bool trans_VLDRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 340 { 341 static MVEGenLdStSGFn * const fns[] = { 342 gen_helper_mve_vldrw_sg_uw, 343 gen_helper_mve_vldrw_sg_wb_uw, 344 }; 345 if (a->qd == a->qm) { 346 return false; /* UNPREDICTABLE */ 347 } 348 return do_ldst_sg_imm(s, a, fns[a->w], MO_32); 349 } 350 351 static bool trans_VLDRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 352 { 353 static MVEGenLdStSGFn * const fns[] = { 354 gen_helper_mve_vldrd_sg_ud, 355 gen_helper_mve_vldrd_sg_wb_ud, 356 }; 357 if (a->qd == a->qm) { 358 return false; /* UNPREDICTABLE */ 359 } 360 return do_ldst_sg_imm(s, a, fns[a->w], MO_64); 361 } 362 363 static bool trans_VSTRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 364 { 365 static MVEGenLdStSGFn * const fns[] = { 366 gen_helper_mve_vstrw_sg_uw, 367 gen_helper_mve_vstrw_sg_wb_uw, 368 }; 369 return do_ldst_sg_imm(s, a, fns[a->w], MO_32); 370 } 371 372 static bool trans_VSTRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 373 { 374 static MVEGenLdStSGFn * const fns[] = { 375 gen_helper_mve_vstrd_sg_ud, 376 gen_helper_mve_vstrd_sg_wb_ud, 377 }; 378 return do_ldst_sg_imm(s, a, fns[a->w], MO_64); 379 } 380 381 static bool trans_VDUP(DisasContext *s, arg_VDUP *a) 382 { 383 TCGv_ptr qd; 384 TCGv_i32 rt; 385 386 if (!dc_isar_feature(aa32_mve, s) || 387 !mve_check_qreg_bank(s, a->qd)) { 388 return false; 389 } 390 if (a->rt == 13 || a->rt == 15) { 391 /* UNPREDICTABLE; we choose to UNDEF */ 392 return false; 393 } 394 if (!mve_eci_check(s) || !vfp_access_check(s)) { 395 return true; 396 } 397 398 qd = mve_qreg_ptr(a->qd); 399 rt = load_reg(s, a->rt); 400 tcg_gen_dup_i32(a->size, rt, rt); 401 gen_helper_mve_vdup(cpu_env, qd, rt); 402 tcg_temp_free_ptr(qd); 403 tcg_temp_free_i32(rt); 404 mve_update_eci(s); 405 return true; 406 } 407 408 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn) 409 { 410 TCGv_ptr qd, qm; 411 412 if (!dc_isar_feature(aa32_mve, s) || 413 !mve_check_qreg_bank(s, a->qd | a->qm) || 414 !fn) { 415 return false; 416 } 417 418 if (!mve_eci_check(s) || !vfp_access_check(s)) { 419 return true; 420 } 421 422 qd = mve_qreg_ptr(a->qd); 423 qm = mve_qreg_ptr(a->qm); 424 fn(cpu_env, qd, qm); 425 tcg_temp_free_ptr(qd); 426 tcg_temp_free_ptr(qm); 427 mve_update_eci(s); 428 return true; 429 } 430 431 #define DO_1OP(INSN, FN) \ 432 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 433 { \ 434 static MVEGenOneOpFn * const fns[] = { \ 435 gen_helper_mve_##FN##b, \ 436 gen_helper_mve_##FN##h, \ 437 gen_helper_mve_##FN##w, \ 438 NULL, \ 439 }; \ 440 return do_1op(s, a, fns[a->size]); \ 441 } 442 443 DO_1OP(VCLZ, vclz) 444 DO_1OP(VCLS, vcls) 445 DO_1OP(VABS, vabs) 446 DO_1OP(VNEG, vneg) 447 DO_1OP(VQABS, vqabs) 448 DO_1OP(VQNEG, vqneg) 449 DO_1OP(VMAXA, vmaxa) 450 DO_1OP(VMINA, vmina) 451 452 /* Narrowing moves: only size 0 and 1 are valid */ 453 #define DO_VMOVN(INSN, FN) \ 454 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 455 { \ 456 static MVEGenOneOpFn * const fns[] = { \ 457 gen_helper_mve_##FN##b, \ 458 gen_helper_mve_##FN##h, \ 459 NULL, \ 460 NULL, \ 461 }; \ 462 return do_1op(s, a, fns[a->size]); \ 463 } 464 465 DO_VMOVN(VMOVNB, vmovnb) 466 DO_VMOVN(VMOVNT, vmovnt) 467 DO_VMOVN(VQMOVUNB, vqmovunb) 468 DO_VMOVN(VQMOVUNT, vqmovunt) 469 DO_VMOVN(VQMOVN_BS, vqmovnbs) 470 DO_VMOVN(VQMOVN_TS, vqmovnts) 471 DO_VMOVN(VQMOVN_BU, vqmovnbu) 472 DO_VMOVN(VQMOVN_TU, vqmovntu) 473 474 static bool trans_VREV16(DisasContext *s, arg_1op *a) 475 { 476 static MVEGenOneOpFn * const fns[] = { 477 gen_helper_mve_vrev16b, 478 NULL, 479 NULL, 480 NULL, 481 }; 482 return do_1op(s, a, fns[a->size]); 483 } 484 485 static bool trans_VREV32(DisasContext *s, arg_1op *a) 486 { 487 static MVEGenOneOpFn * const fns[] = { 488 gen_helper_mve_vrev32b, 489 gen_helper_mve_vrev32h, 490 NULL, 491 NULL, 492 }; 493 return do_1op(s, a, fns[a->size]); 494 } 495 496 static bool trans_VREV64(DisasContext *s, arg_1op *a) 497 { 498 static MVEGenOneOpFn * const fns[] = { 499 gen_helper_mve_vrev64b, 500 gen_helper_mve_vrev64h, 501 gen_helper_mve_vrev64w, 502 NULL, 503 }; 504 return do_1op(s, a, fns[a->size]); 505 } 506 507 static bool trans_VMVN(DisasContext *s, arg_1op *a) 508 { 509 return do_1op(s, a, gen_helper_mve_vmvn); 510 } 511 512 static bool trans_VABS_fp(DisasContext *s, arg_1op *a) 513 { 514 static MVEGenOneOpFn * const fns[] = { 515 NULL, 516 gen_helper_mve_vfabsh, 517 gen_helper_mve_vfabss, 518 NULL, 519 }; 520 if (!dc_isar_feature(aa32_mve_fp, s)) { 521 return false; 522 } 523 return do_1op(s, a, fns[a->size]); 524 } 525 526 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a) 527 { 528 static MVEGenOneOpFn * const fns[] = { 529 NULL, 530 gen_helper_mve_vfnegh, 531 gen_helper_mve_vfnegs, 532 NULL, 533 }; 534 if (!dc_isar_feature(aa32_mve_fp, s)) { 535 return false; 536 } 537 return do_1op(s, a, fns[a->size]); 538 } 539 540 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn) 541 { 542 TCGv_ptr qd, qn, qm; 543 544 if (!dc_isar_feature(aa32_mve, s) || 545 !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) || 546 !fn) { 547 return false; 548 } 549 if (!mve_eci_check(s) || !vfp_access_check(s)) { 550 return true; 551 } 552 553 qd = mve_qreg_ptr(a->qd); 554 qn = mve_qreg_ptr(a->qn); 555 qm = mve_qreg_ptr(a->qm); 556 fn(cpu_env, qd, qn, qm); 557 tcg_temp_free_ptr(qd); 558 tcg_temp_free_ptr(qn); 559 tcg_temp_free_ptr(qm); 560 mve_update_eci(s); 561 return true; 562 } 563 564 #define DO_LOGIC(INSN, HELPER) \ 565 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 566 { \ 567 return do_2op(s, a, HELPER); \ 568 } 569 570 DO_LOGIC(VAND, gen_helper_mve_vand) 571 DO_LOGIC(VBIC, gen_helper_mve_vbic) 572 DO_LOGIC(VORR, gen_helper_mve_vorr) 573 DO_LOGIC(VORN, gen_helper_mve_vorn) 574 DO_LOGIC(VEOR, gen_helper_mve_veor) 575 576 DO_LOGIC(VPSEL, gen_helper_mve_vpsel) 577 578 #define DO_2OP(INSN, FN) \ 579 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 580 { \ 581 static MVEGenTwoOpFn * const fns[] = { \ 582 gen_helper_mve_##FN##b, \ 583 gen_helper_mve_##FN##h, \ 584 gen_helper_mve_##FN##w, \ 585 NULL, \ 586 }; \ 587 return do_2op(s, a, fns[a->size]); \ 588 } 589 590 DO_2OP(VADD, vadd) 591 DO_2OP(VSUB, vsub) 592 DO_2OP(VMUL, vmul) 593 DO_2OP(VMULH_S, vmulhs) 594 DO_2OP(VMULH_U, vmulhu) 595 DO_2OP(VRMULH_S, vrmulhs) 596 DO_2OP(VRMULH_U, vrmulhu) 597 DO_2OP(VMAX_S, vmaxs) 598 DO_2OP(VMAX_U, vmaxu) 599 DO_2OP(VMIN_S, vmins) 600 DO_2OP(VMIN_U, vminu) 601 DO_2OP(VABD_S, vabds) 602 DO_2OP(VABD_U, vabdu) 603 DO_2OP(VHADD_S, vhadds) 604 DO_2OP(VHADD_U, vhaddu) 605 DO_2OP(VHSUB_S, vhsubs) 606 DO_2OP(VHSUB_U, vhsubu) 607 DO_2OP(VMULL_BS, vmullbs) 608 DO_2OP(VMULL_BU, vmullbu) 609 DO_2OP(VMULL_TS, vmullts) 610 DO_2OP(VMULL_TU, vmulltu) 611 DO_2OP(VQDMULH, vqdmulh) 612 DO_2OP(VQRDMULH, vqrdmulh) 613 DO_2OP(VQADD_S, vqadds) 614 DO_2OP(VQADD_U, vqaddu) 615 DO_2OP(VQSUB_S, vqsubs) 616 DO_2OP(VQSUB_U, vqsubu) 617 DO_2OP(VSHL_S, vshls) 618 DO_2OP(VSHL_U, vshlu) 619 DO_2OP(VRSHL_S, vrshls) 620 DO_2OP(VRSHL_U, vrshlu) 621 DO_2OP(VQSHL_S, vqshls) 622 DO_2OP(VQSHL_U, vqshlu) 623 DO_2OP(VQRSHL_S, vqrshls) 624 DO_2OP(VQRSHL_U, vqrshlu) 625 DO_2OP(VQDMLADH, vqdmladh) 626 DO_2OP(VQDMLADHX, vqdmladhx) 627 DO_2OP(VQRDMLADH, vqrdmladh) 628 DO_2OP(VQRDMLADHX, vqrdmladhx) 629 DO_2OP(VQDMLSDH, vqdmlsdh) 630 DO_2OP(VQDMLSDHX, vqdmlsdhx) 631 DO_2OP(VQRDMLSDH, vqrdmlsdh) 632 DO_2OP(VQRDMLSDHX, vqrdmlsdhx) 633 DO_2OP(VRHADD_S, vrhadds) 634 DO_2OP(VRHADD_U, vrhaddu) 635 /* 636 * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose 637 * so we can reuse the DO_2OP macro. (Our implementation calculates the 638 * "expected" results in this case.) Similarly for VHCADD. 639 */ 640 DO_2OP(VCADD90, vcadd90) 641 DO_2OP(VCADD270, vcadd270) 642 DO_2OP(VHCADD90, vhcadd90) 643 DO_2OP(VHCADD270, vhcadd270) 644 645 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a) 646 { 647 static MVEGenTwoOpFn * const fns[] = { 648 NULL, 649 gen_helper_mve_vqdmullbh, 650 gen_helper_mve_vqdmullbw, 651 NULL, 652 }; 653 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 654 /* UNPREDICTABLE; we choose to undef */ 655 return false; 656 } 657 return do_2op(s, a, fns[a->size]); 658 } 659 660 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a) 661 { 662 static MVEGenTwoOpFn * const fns[] = { 663 NULL, 664 gen_helper_mve_vqdmullth, 665 gen_helper_mve_vqdmulltw, 666 NULL, 667 }; 668 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 669 /* UNPREDICTABLE; we choose to undef */ 670 return false; 671 } 672 return do_2op(s, a, fns[a->size]); 673 } 674 675 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a) 676 { 677 /* 678 * Note that a->size indicates the output size, ie VMULL.P8 679 * is the 8x8->16 operation and a->size is MO_16; VMULL.P16 680 * is the 16x16->32 operation and a->size is MO_32. 681 */ 682 static MVEGenTwoOpFn * const fns[] = { 683 NULL, 684 gen_helper_mve_vmullpbh, 685 gen_helper_mve_vmullpbw, 686 NULL, 687 }; 688 return do_2op(s, a, fns[a->size]); 689 } 690 691 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a) 692 { 693 /* a->size is as for trans_VMULLP_B */ 694 static MVEGenTwoOpFn * const fns[] = { 695 NULL, 696 gen_helper_mve_vmullpth, 697 gen_helper_mve_vmullptw, 698 NULL, 699 }; 700 return do_2op(s, a, fns[a->size]); 701 } 702 703 /* 704 * VADC and VSBC: these perform an add-with-carry or subtract-with-carry 705 * of the 32-bit elements in each lane of the input vectors, where the 706 * carry-out of each add is the carry-in of the next. The initial carry 707 * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C 708 * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C. 709 * These insns are subject to beat-wise execution. Partial execution 710 * of an I=1 (initial carry input fixed) insn which does not 711 * execute the first beat must start with the current FPSCR.NZCV 712 * value, not the fixed constant input. 713 */ 714 static bool trans_VADC(DisasContext *s, arg_2op *a) 715 { 716 return do_2op(s, a, gen_helper_mve_vadc); 717 } 718 719 static bool trans_VADCI(DisasContext *s, arg_2op *a) 720 { 721 if (mve_skip_first_beat(s)) { 722 return trans_VADC(s, a); 723 } 724 return do_2op(s, a, gen_helper_mve_vadci); 725 } 726 727 static bool trans_VSBC(DisasContext *s, arg_2op *a) 728 { 729 return do_2op(s, a, gen_helper_mve_vsbc); 730 } 731 732 static bool trans_VSBCI(DisasContext *s, arg_2op *a) 733 { 734 if (mve_skip_first_beat(s)) { 735 return trans_VSBC(s, a); 736 } 737 return do_2op(s, a, gen_helper_mve_vsbci); 738 } 739 740 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a, 741 MVEGenTwoOpScalarFn fn) 742 { 743 TCGv_ptr qd, qn; 744 TCGv_i32 rm; 745 746 if (!dc_isar_feature(aa32_mve, s) || 747 !mve_check_qreg_bank(s, a->qd | a->qn) || 748 !fn) { 749 return false; 750 } 751 if (a->rm == 13 || a->rm == 15) { 752 /* UNPREDICTABLE */ 753 return false; 754 } 755 if (!mve_eci_check(s) || !vfp_access_check(s)) { 756 return true; 757 } 758 759 qd = mve_qreg_ptr(a->qd); 760 qn = mve_qreg_ptr(a->qn); 761 rm = load_reg(s, a->rm); 762 fn(cpu_env, qd, qn, rm); 763 tcg_temp_free_i32(rm); 764 tcg_temp_free_ptr(qd); 765 tcg_temp_free_ptr(qn); 766 mve_update_eci(s); 767 return true; 768 } 769 770 #define DO_2OP_SCALAR(INSN, FN) \ 771 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \ 772 { \ 773 static MVEGenTwoOpScalarFn * const fns[] = { \ 774 gen_helper_mve_##FN##b, \ 775 gen_helper_mve_##FN##h, \ 776 gen_helper_mve_##FN##w, \ 777 NULL, \ 778 }; \ 779 return do_2op_scalar(s, a, fns[a->size]); \ 780 } 781 782 DO_2OP_SCALAR(VADD_scalar, vadd_scalar) 783 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar) 784 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar) 785 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar) 786 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar) 787 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar) 788 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar) 789 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar) 790 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar) 791 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar) 792 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar) 793 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar) 794 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar) 795 DO_2OP_SCALAR(VBRSR, vbrsr) 796 DO_2OP_SCALAR(VMLA, vmla) 797 DO_2OP_SCALAR(VMLAS, vmlas) 798 DO_2OP_SCALAR(VQDMLAH, vqdmlah) 799 DO_2OP_SCALAR(VQRDMLAH, vqrdmlah) 800 DO_2OP_SCALAR(VQDMLASH, vqdmlash) 801 DO_2OP_SCALAR(VQRDMLASH, vqrdmlash) 802 803 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a) 804 { 805 static MVEGenTwoOpScalarFn * const fns[] = { 806 NULL, 807 gen_helper_mve_vqdmullb_scalarh, 808 gen_helper_mve_vqdmullb_scalarw, 809 NULL, 810 }; 811 if (a->qd == a->qn && a->size == MO_32) { 812 /* UNPREDICTABLE; we choose to undef */ 813 return false; 814 } 815 return do_2op_scalar(s, a, fns[a->size]); 816 } 817 818 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a) 819 { 820 static MVEGenTwoOpScalarFn * const fns[] = { 821 NULL, 822 gen_helper_mve_vqdmullt_scalarh, 823 gen_helper_mve_vqdmullt_scalarw, 824 NULL, 825 }; 826 if (a->qd == a->qn && a->size == MO_32) { 827 /* UNPREDICTABLE; we choose to undef */ 828 return false; 829 } 830 return do_2op_scalar(s, a, fns[a->size]); 831 } 832 833 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a, 834 MVEGenLongDualAccOpFn *fn) 835 { 836 TCGv_ptr qn, qm; 837 TCGv_i64 rda; 838 TCGv_i32 rdalo, rdahi; 839 840 if (!dc_isar_feature(aa32_mve, s) || 841 !mve_check_qreg_bank(s, a->qn | a->qm) || 842 !fn) { 843 return false; 844 } 845 /* 846 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 847 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 848 */ 849 if (a->rdahi == 13 || a->rdahi == 15) { 850 return false; 851 } 852 if (!mve_eci_check(s) || !vfp_access_check(s)) { 853 return true; 854 } 855 856 qn = mve_qreg_ptr(a->qn); 857 qm = mve_qreg_ptr(a->qm); 858 859 /* 860 * This insn is subject to beat-wise execution. Partial execution 861 * of an A=0 (no-accumulate) insn which does not execute the first 862 * beat must start with the current rda value, not 0. 863 */ 864 if (a->a || mve_skip_first_beat(s)) { 865 rda = tcg_temp_new_i64(); 866 rdalo = load_reg(s, a->rdalo); 867 rdahi = load_reg(s, a->rdahi); 868 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 869 tcg_temp_free_i32(rdalo); 870 tcg_temp_free_i32(rdahi); 871 } else { 872 rda = tcg_const_i64(0); 873 } 874 875 fn(rda, cpu_env, qn, qm, rda); 876 tcg_temp_free_ptr(qn); 877 tcg_temp_free_ptr(qm); 878 879 rdalo = tcg_temp_new_i32(); 880 rdahi = tcg_temp_new_i32(); 881 tcg_gen_extrl_i64_i32(rdalo, rda); 882 tcg_gen_extrh_i64_i32(rdahi, rda); 883 store_reg(s, a->rdalo, rdalo); 884 store_reg(s, a->rdahi, rdahi); 885 tcg_temp_free_i64(rda); 886 mve_update_eci(s); 887 return true; 888 } 889 890 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a) 891 { 892 static MVEGenLongDualAccOpFn * const fns[4][2] = { 893 { NULL, NULL }, 894 { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh }, 895 { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw }, 896 { NULL, NULL }, 897 }; 898 return do_long_dual_acc(s, a, fns[a->size][a->x]); 899 } 900 901 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a) 902 { 903 static MVEGenLongDualAccOpFn * const fns[4][2] = { 904 { NULL, NULL }, 905 { gen_helper_mve_vmlaldavuh, NULL }, 906 { gen_helper_mve_vmlaldavuw, NULL }, 907 { NULL, NULL }, 908 }; 909 return do_long_dual_acc(s, a, fns[a->size][a->x]); 910 } 911 912 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a) 913 { 914 static MVEGenLongDualAccOpFn * const fns[4][2] = { 915 { NULL, NULL }, 916 { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh }, 917 { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw }, 918 { NULL, NULL }, 919 }; 920 return do_long_dual_acc(s, a, fns[a->size][a->x]); 921 } 922 923 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a) 924 { 925 static MVEGenLongDualAccOpFn * const fns[] = { 926 gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw, 927 }; 928 return do_long_dual_acc(s, a, fns[a->x]); 929 } 930 931 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a) 932 { 933 static MVEGenLongDualAccOpFn * const fns[] = { 934 gen_helper_mve_vrmlaldavhuw, NULL, 935 }; 936 return do_long_dual_acc(s, a, fns[a->x]); 937 } 938 939 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a) 940 { 941 static MVEGenLongDualAccOpFn * const fns[] = { 942 gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw, 943 }; 944 return do_long_dual_acc(s, a, fns[a->x]); 945 } 946 947 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn) 948 { 949 TCGv_ptr qn, qm; 950 TCGv_i32 rda; 951 952 if (!dc_isar_feature(aa32_mve, s) || 953 !mve_check_qreg_bank(s, a->qn) || 954 !fn) { 955 return false; 956 } 957 if (!mve_eci_check(s) || !vfp_access_check(s)) { 958 return true; 959 } 960 961 qn = mve_qreg_ptr(a->qn); 962 qm = mve_qreg_ptr(a->qm); 963 964 /* 965 * This insn is subject to beat-wise execution. Partial execution 966 * of an A=0 (no-accumulate) insn which does not execute the first 967 * beat must start with the current rda value, not 0. 968 */ 969 if (a->a || mve_skip_first_beat(s)) { 970 rda = load_reg(s, a->rda); 971 } else { 972 rda = tcg_const_i32(0); 973 } 974 975 fn(rda, cpu_env, qn, qm, rda); 976 store_reg(s, a->rda, rda); 977 tcg_temp_free_ptr(qn); 978 tcg_temp_free_ptr(qm); 979 980 mve_update_eci(s); 981 return true; 982 } 983 984 #define DO_DUAL_ACC(INSN, FN) \ 985 static bool trans_##INSN(DisasContext *s, arg_vmladav *a) \ 986 { \ 987 static MVEGenDualAccOpFn * const fns[4][2] = { \ 988 { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb }, \ 989 { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh }, \ 990 { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw }, \ 991 { NULL, NULL }, \ 992 }; \ 993 return do_dual_acc(s, a, fns[a->size][a->x]); \ 994 } 995 996 DO_DUAL_ACC(VMLADAV_S, vmladavs) 997 DO_DUAL_ACC(VMLSDAV, vmlsdav) 998 999 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a) 1000 { 1001 static MVEGenDualAccOpFn * const fns[4][2] = { 1002 { gen_helper_mve_vmladavub, NULL }, 1003 { gen_helper_mve_vmladavuh, NULL }, 1004 { gen_helper_mve_vmladavuw, NULL }, 1005 { NULL, NULL }, 1006 }; 1007 return do_dual_acc(s, a, fns[a->size][a->x]); 1008 } 1009 1010 static void gen_vpst(DisasContext *s, uint32_t mask) 1011 { 1012 /* 1013 * Set the VPR mask fields. We take advantage of MASK01 and MASK23 1014 * being adjacent fields in the register. 1015 * 1016 * Updating the masks is not predicated, but it is subject to beat-wise 1017 * execution, and the mask is updated on the odd-numbered beats. 1018 * So if PSR.ECI says we should skip beat 1, we mustn't update the 1019 * 01 mask field. 1020 */ 1021 TCGv_i32 vpr = load_cpu_field(v7m.vpr); 1022 switch (s->eci) { 1023 case ECI_NONE: 1024 case ECI_A0: 1025 /* Update both 01 and 23 fields */ 1026 tcg_gen_deposit_i32(vpr, vpr, 1027 tcg_constant_i32(mask | (mask << 4)), 1028 R_V7M_VPR_MASK01_SHIFT, 1029 R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH); 1030 break; 1031 case ECI_A0A1: 1032 case ECI_A0A1A2: 1033 case ECI_A0A1A2B0: 1034 /* Update only the 23 mask field */ 1035 tcg_gen_deposit_i32(vpr, vpr, 1036 tcg_constant_i32(mask), 1037 R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH); 1038 break; 1039 default: 1040 g_assert_not_reached(); 1041 } 1042 store_cpu_field(vpr, v7m.vpr); 1043 } 1044 1045 static bool trans_VPST(DisasContext *s, arg_VPST *a) 1046 { 1047 /* mask == 0 is a "related encoding" */ 1048 if (!dc_isar_feature(aa32_mve, s) || !a->mask) { 1049 return false; 1050 } 1051 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1052 return true; 1053 } 1054 gen_vpst(s, a->mask); 1055 mve_update_and_store_eci(s); 1056 return true; 1057 } 1058 1059 static bool trans_VPNOT(DisasContext *s, arg_VPNOT *a) 1060 { 1061 /* 1062 * Invert the predicate in VPR.P0. We have call out to 1063 * a helper because this insn itself is beatwise and can 1064 * be predicated. 1065 */ 1066 if (!dc_isar_feature(aa32_mve, s)) { 1067 return false; 1068 } 1069 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1070 return true; 1071 } 1072 1073 gen_helper_mve_vpnot(cpu_env); 1074 mve_update_eci(s); 1075 return true; 1076 } 1077 1078 static bool trans_VADDV(DisasContext *s, arg_VADDV *a) 1079 { 1080 /* VADDV: vector add across vector */ 1081 static MVEGenVADDVFn * const fns[4][2] = { 1082 { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub }, 1083 { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh }, 1084 { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw }, 1085 { NULL, NULL } 1086 }; 1087 TCGv_ptr qm; 1088 TCGv_i32 rda; 1089 1090 if (!dc_isar_feature(aa32_mve, s) || 1091 a->size == 3) { 1092 return false; 1093 } 1094 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1095 return true; 1096 } 1097 1098 /* 1099 * This insn is subject to beat-wise execution. Partial execution 1100 * of an A=0 (no-accumulate) insn which does not execute the first 1101 * beat must start with the current value of Rda, not zero. 1102 */ 1103 if (a->a || mve_skip_first_beat(s)) { 1104 /* Accumulate input from Rda */ 1105 rda = load_reg(s, a->rda); 1106 } else { 1107 /* Accumulate starting at zero */ 1108 rda = tcg_const_i32(0); 1109 } 1110 1111 qm = mve_qreg_ptr(a->qm); 1112 fns[a->size][a->u](rda, cpu_env, qm, rda); 1113 store_reg(s, a->rda, rda); 1114 tcg_temp_free_ptr(qm); 1115 1116 mve_update_eci(s); 1117 return true; 1118 } 1119 1120 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a) 1121 { 1122 /* 1123 * Vector Add Long Across Vector: accumulate the 32-bit 1124 * elements of the vector into a 64-bit result stored in 1125 * a pair of general-purpose registers. 1126 * No need to check Qm's bank: it is only 3 bits in decode. 1127 */ 1128 TCGv_ptr qm; 1129 TCGv_i64 rda; 1130 TCGv_i32 rdalo, rdahi; 1131 1132 if (!dc_isar_feature(aa32_mve, s)) { 1133 return false; 1134 } 1135 /* 1136 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 1137 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 1138 */ 1139 if (a->rdahi == 13 || a->rdahi == 15) { 1140 return false; 1141 } 1142 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1143 return true; 1144 } 1145 1146 /* 1147 * This insn is subject to beat-wise execution. Partial execution 1148 * of an A=0 (no-accumulate) insn which does not execute the first 1149 * beat must start with the current value of RdaHi:RdaLo, not zero. 1150 */ 1151 if (a->a || mve_skip_first_beat(s)) { 1152 /* Accumulate input from RdaHi:RdaLo */ 1153 rda = tcg_temp_new_i64(); 1154 rdalo = load_reg(s, a->rdalo); 1155 rdahi = load_reg(s, a->rdahi); 1156 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 1157 tcg_temp_free_i32(rdalo); 1158 tcg_temp_free_i32(rdahi); 1159 } else { 1160 /* Accumulate starting at zero */ 1161 rda = tcg_const_i64(0); 1162 } 1163 1164 qm = mve_qreg_ptr(a->qm); 1165 if (a->u) { 1166 gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda); 1167 } else { 1168 gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda); 1169 } 1170 tcg_temp_free_ptr(qm); 1171 1172 rdalo = tcg_temp_new_i32(); 1173 rdahi = tcg_temp_new_i32(); 1174 tcg_gen_extrl_i64_i32(rdalo, rda); 1175 tcg_gen_extrh_i64_i32(rdahi, rda); 1176 store_reg(s, a->rdalo, rdalo); 1177 store_reg(s, a->rdahi, rdahi); 1178 tcg_temp_free_i64(rda); 1179 mve_update_eci(s); 1180 return true; 1181 } 1182 1183 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn) 1184 { 1185 TCGv_ptr qd; 1186 uint64_t imm; 1187 1188 if (!dc_isar_feature(aa32_mve, s) || 1189 !mve_check_qreg_bank(s, a->qd) || 1190 !fn) { 1191 return false; 1192 } 1193 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1194 return true; 1195 } 1196 1197 imm = asimd_imm_const(a->imm, a->cmode, a->op); 1198 1199 qd = mve_qreg_ptr(a->qd); 1200 fn(cpu_env, qd, tcg_constant_i64(imm)); 1201 tcg_temp_free_ptr(qd); 1202 mve_update_eci(s); 1203 return true; 1204 } 1205 1206 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a) 1207 { 1208 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */ 1209 MVEGenOneOpImmFn *fn; 1210 1211 if ((a->cmode & 1) && a->cmode < 12) { 1212 if (a->op) { 1213 /* 1214 * For op=1, the immediate will be inverted by asimd_imm_const(), 1215 * so the VBIC becomes a logical AND operation. 1216 */ 1217 fn = gen_helper_mve_vandi; 1218 } else { 1219 fn = gen_helper_mve_vorri; 1220 } 1221 } else { 1222 /* There is one unallocated cmode/op combination in this space */ 1223 if (a->cmode == 15 && a->op == 1) { 1224 return false; 1225 } 1226 /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */ 1227 fn = gen_helper_mve_vmovi; 1228 } 1229 return do_1imm(s, a, fn); 1230 } 1231 1232 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn, 1233 bool negateshift) 1234 { 1235 TCGv_ptr qd, qm; 1236 int shift = a->shift; 1237 1238 if (!dc_isar_feature(aa32_mve, s) || 1239 !mve_check_qreg_bank(s, a->qd | a->qm) || 1240 !fn) { 1241 return false; 1242 } 1243 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1244 return true; 1245 } 1246 1247 /* 1248 * When we handle a right shift insn using a left-shift helper 1249 * which permits a negative shift count to indicate a right-shift, 1250 * we must negate the shift count. 1251 */ 1252 if (negateshift) { 1253 shift = -shift; 1254 } 1255 1256 qd = mve_qreg_ptr(a->qd); 1257 qm = mve_qreg_ptr(a->qm); 1258 fn(cpu_env, qd, qm, tcg_constant_i32(shift)); 1259 tcg_temp_free_ptr(qd); 1260 tcg_temp_free_ptr(qm); 1261 mve_update_eci(s); 1262 return true; 1263 } 1264 1265 #define DO_2SHIFT(INSN, FN, NEGATESHIFT) \ 1266 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1267 { \ 1268 static MVEGenTwoOpShiftFn * const fns[] = { \ 1269 gen_helper_mve_##FN##b, \ 1270 gen_helper_mve_##FN##h, \ 1271 gen_helper_mve_##FN##w, \ 1272 NULL, \ 1273 }; \ 1274 return do_2shift(s, a, fns[a->size], NEGATESHIFT); \ 1275 } 1276 1277 DO_2SHIFT(VSHLI, vshli_u, false) 1278 DO_2SHIFT(VQSHLI_S, vqshli_s, false) 1279 DO_2SHIFT(VQSHLI_U, vqshli_u, false) 1280 DO_2SHIFT(VQSHLUI, vqshlui_s, false) 1281 /* These right shifts use a left-shift helper with negated shift count */ 1282 DO_2SHIFT(VSHRI_S, vshli_s, true) 1283 DO_2SHIFT(VSHRI_U, vshli_u, true) 1284 DO_2SHIFT(VRSHRI_S, vrshli_s, true) 1285 DO_2SHIFT(VRSHRI_U, vrshli_u, true) 1286 1287 DO_2SHIFT(VSRI, vsri, false) 1288 DO_2SHIFT(VSLI, vsli, false) 1289 1290 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a, 1291 MVEGenTwoOpShiftFn *fn) 1292 { 1293 TCGv_ptr qda; 1294 TCGv_i32 rm; 1295 1296 if (!dc_isar_feature(aa32_mve, s) || 1297 !mve_check_qreg_bank(s, a->qda) || 1298 a->rm == 13 || a->rm == 15 || !fn) { 1299 /* Rm cases are UNPREDICTABLE */ 1300 return false; 1301 } 1302 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1303 return true; 1304 } 1305 1306 qda = mve_qreg_ptr(a->qda); 1307 rm = load_reg(s, a->rm); 1308 fn(cpu_env, qda, qda, rm); 1309 tcg_temp_free_ptr(qda); 1310 tcg_temp_free_i32(rm); 1311 mve_update_eci(s); 1312 return true; 1313 } 1314 1315 #define DO_2SHIFT_SCALAR(INSN, FN) \ 1316 static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a) \ 1317 { \ 1318 static MVEGenTwoOpShiftFn * const fns[] = { \ 1319 gen_helper_mve_##FN##b, \ 1320 gen_helper_mve_##FN##h, \ 1321 gen_helper_mve_##FN##w, \ 1322 NULL, \ 1323 }; \ 1324 return do_2shift_scalar(s, a, fns[a->size]); \ 1325 } 1326 1327 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s) 1328 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u) 1329 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s) 1330 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u) 1331 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s) 1332 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u) 1333 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s) 1334 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u) 1335 1336 #define DO_VSHLL(INSN, FN) \ 1337 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1338 { \ 1339 static MVEGenTwoOpShiftFn * const fns[] = { \ 1340 gen_helper_mve_##FN##b, \ 1341 gen_helper_mve_##FN##h, \ 1342 }; \ 1343 return do_2shift(s, a, fns[a->size], false); \ 1344 } 1345 1346 DO_VSHLL(VSHLL_BS, vshllbs) 1347 DO_VSHLL(VSHLL_BU, vshllbu) 1348 DO_VSHLL(VSHLL_TS, vshllts) 1349 DO_VSHLL(VSHLL_TU, vshlltu) 1350 1351 #define DO_2SHIFT_N(INSN, FN) \ 1352 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1353 { \ 1354 static MVEGenTwoOpShiftFn * const fns[] = { \ 1355 gen_helper_mve_##FN##b, \ 1356 gen_helper_mve_##FN##h, \ 1357 }; \ 1358 return do_2shift(s, a, fns[a->size], false); \ 1359 } 1360 1361 DO_2SHIFT_N(VSHRNB, vshrnb) 1362 DO_2SHIFT_N(VSHRNT, vshrnt) 1363 DO_2SHIFT_N(VRSHRNB, vrshrnb) 1364 DO_2SHIFT_N(VRSHRNT, vrshrnt) 1365 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s) 1366 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s) 1367 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u) 1368 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u) 1369 DO_2SHIFT_N(VQSHRUNB, vqshrunb) 1370 DO_2SHIFT_N(VQSHRUNT, vqshrunt) 1371 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s) 1372 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s) 1373 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u) 1374 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u) 1375 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb) 1376 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt) 1377 1378 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a) 1379 { 1380 /* 1381 * Whole Vector Left Shift with Carry. The carry is taken 1382 * from a general purpose register and written back there. 1383 * An imm of 0 means "shift by 32". 1384 */ 1385 TCGv_ptr qd; 1386 TCGv_i32 rdm; 1387 1388 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1389 return false; 1390 } 1391 if (a->rdm == 13 || a->rdm == 15) { 1392 /* CONSTRAINED UNPREDICTABLE: we UNDEF */ 1393 return false; 1394 } 1395 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1396 return true; 1397 } 1398 1399 qd = mve_qreg_ptr(a->qd); 1400 rdm = load_reg(s, a->rdm); 1401 gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm)); 1402 store_reg(s, a->rdm, rdm); 1403 tcg_temp_free_ptr(qd); 1404 mve_update_eci(s); 1405 return true; 1406 } 1407 1408 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn) 1409 { 1410 TCGv_ptr qd; 1411 TCGv_i32 rn; 1412 1413 /* 1414 * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP). 1415 * This fills the vector with elements of successively increasing 1416 * or decreasing values, starting from Rn. 1417 */ 1418 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1419 return false; 1420 } 1421 if (a->size == MO_64) { 1422 /* size 0b11 is another encoding */ 1423 return false; 1424 } 1425 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1426 return true; 1427 } 1428 1429 qd = mve_qreg_ptr(a->qd); 1430 rn = load_reg(s, a->rn); 1431 fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm)); 1432 store_reg(s, a->rn, rn); 1433 tcg_temp_free_ptr(qd); 1434 mve_update_eci(s); 1435 return true; 1436 } 1437 1438 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn) 1439 { 1440 TCGv_ptr qd; 1441 TCGv_i32 rn, rm; 1442 1443 /* 1444 * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP) 1445 * This fills the vector with elements of successively increasing 1446 * or decreasing values, starting from Rn. Rm specifies a point where 1447 * the count wraps back around to 0. The updated offset is written back 1448 * to Rn. 1449 */ 1450 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1451 return false; 1452 } 1453 if (!fn || a->rm == 13 || a->rm == 15) { 1454 /* 1455 * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE; 1456 * Rm == 13 is VIWDUP, VDWDUP. 1457 */ 1458 return false; 1459 } 1460 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1461 return true; 1462 } 1463 1464 qd = mve_qreg_ptr(a->qd); 1465 rn = load_reg(s, a->rn); 1466 rm = load_reg(s, a->rm); 1467 fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm)); 1468 store_reg(s, a->rn, rn); 1469 tcg_temp_free_ptr(qd); 1470 tcg_temp_free_i32(rm); 1471 mve_update_eci(s); 1472 return true; 1473 } 1474 1475 static bool trans_VIDUP(DisasContext *s, arg_vidup *a) 1476 { 1477 static MVEGenVIDUPFn * const fns[] = { 1478 gen_helper_mve_vidupb, 1479 gen_helper_mve_viduph, 1480 gen_helper_mve_vidupw, 1481 NULL, 1482 }; 1483 return do_vidup(s, a, fns[a->size]); 1484 } 1485 1486 static bool trans_VDDUP(DisasContext *s, arg_vidup *a) 1487 { 1488 static MVEGenVIDUPFn * const fns[] = { 1489 gen_helper_mve_vidupb, 1490 gen_helper_mve_viduph, 1491 gen_helper_mve_vidupw, 1492 NULL, 1493 }; 1494 /* VDDUP is just like VIDUP but with a negative immediate */ 1495 a->imm = -a->imm; 1496 return do_vidup(s, a, fns[a->size]); 1497 } 1498 1499 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a) 1500 { 1501 static MVEGenVIWDUPFn * const fns[] = { 1502 gen_helper_mve_viwdupb, 1503 gen_helper_mve_viwduph, 1504 gen_helper_mve_viwdupw, 1505 NULL, 1506 }; 1507 return do_viwdup(s, a, fns[a->size]); 1508 } 1509 1510 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a) 1511 { 1512 static MVEGenVIWDUPFn * const fns[] = { 1513 gen_helper_mve_vdwdupb, 1514 gen_helper_mve_vdwduph, 1515 gen_helper_mve_vdwdupw, 1516 NULL, 1517 }; 1518 return do_viwdup(s, a, fns[a->size]); 1519 } 1520 1521 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn) 1522 { 1523 TCGv_ptr qn, qm; 1524 1525 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1526 !fn) { 1527 return false; 1528 } 1529 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1530 return true; 1531 } 1532 1533 qn = mve_qreg_ptr(a->qn); 1534 qm = mve_qreg_ptr(a->qm); 1535 fn(cpu_env, qn, qm); 1536 tcg_temp_free_ptr(qn); 1537 tcg_temp_free_ptr(qm); 1538 if (a->mask) { 1539 /* VPT */ 1540 gen_vpst(s, a->mask); 1541 } 1542 mve_update_eci(s); 1543 return true; 1544 } 1545 1546 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a, 1547 MVEGenScalarCmpFn *fn) 1548 { 1549 TCGv_ptr qn; 1550 TCGv_i32 rm; 1551 1552 if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) { 1553 return false; 1554 } 1555 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1556 return true; 1557 } 1558 1559 qn = mve_qreg_ptr(a->qn); 1560 if (a->rm == 15) { 1561 /* Encoding Rm=0b1111 means "constant zero" */ 1562 rm = tcg_constant_i32(0); 1563 } else { 1564 rm = load_reg(s, a->rm); 1565 } 1566 fn(cpu_env, qn, rm); 1567 tcg_temp_free_ptr(qn); 1568 tcg_temp_free_i32(rm); 1569 if (a->mask) { 1570 /* VPT */ 1571 gen_vpst(s, a->mask); 1572 } 1573 mve_update_eci(s); 1574 return true; 1575 } 1576 1577 #define DO_VCMP(INSN, FN) \ 1578 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \ 1579 { \ 1580 static MVEGenCmpFn * const fns[] = { \ 1581 gen_helper_mve_##FN##b, \ 1582 gen_helper_mve_##FN##h, \ 1583 gen_helper_mve_##FN##w, \ 1584 NULL, \ 1585 }; \ 1586 return do_vcmp(s, a, fns[a->size]); \ 1587 } \ 1588 static bool trans_##INSN##_scalar(DisasContext *s, \ 1589 arg_vcmp_scalar *a) \ 1590 { \ 1591 static MVEGenScalarCmpFn * const fns[] = { \ 1592 gen_helper_mve_##FN##_scalarb, \ 1593 gen_helper_mve_##FN##_scalarh, \ 1594 gen_helper_mve_##FN##_scalarw, \ 1595 NULL, \ 1596 }; \ 1597 return do_vcmp_scalar(s, a, fns[a->size]); \ 1598 } 1599 1600 DO_VCMP(VCMPEQ, vcmpeq) 1601 DO_VCMP(VCMPNE, vcmpne) 1602 DO_VCMP(VCMPCS, vcmpcs) 1603 DO_VCMP(VCMPHI, vcmphi) 1604 DO_VCMP(VCMPGE, vcmpge) 1605 DO_VCMP(VCMPLT, vcmplt) 1606 DO_VCMP(VCMPGT, vcmpgt) 1607 DO_VCMP(VCMPLE, vcmple) 1608 1609 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn) 1610 { 1611 /* 1612 * MIN/MAX operations across a vector: compute the min or 1613 * max of the initial value in a general purpose register 1614 * and all the elements in the vector, and store it back 1615 * into the general purpose register. 1616 */ 1617 TCGv_ptr qm; 1618 TCGv_i32 rda; 1619 1620 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1621 !fn || a->rda == 13 || a->rda == 15) { 1622 /* Rda cases are UNPREDICTABLE */ 1623 return false; 1624 } 1625 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1626 return true; 1627 } 1628 1629 qm = mve_qreg_ptr(a->qm); 1630 rda = load_reg(s, a->rda); 1631 fn(rda, cpu_env, qm, rda); 1632 store_reg(s, a->rda, rda); 1633 tcg_temp_free_ptr(qm); 1634 mve_update_eci(s); 1635 return true; 1636 } 1637 1638 #define DO_VMAXV(INSN, FN) \ 1639 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ 1640 { \ 1641 static MVEGenVADDVFn * const fns[] = { \ 1642 gen_helper_mve_##FN##b, \ 1643 gen_helper_mve_##FN##h, \ 1644 gen_helper_mve_##FN##w, \ 1645 NULL, \ 1646 }; \ 1647 return do_vmaxv(s, a, fns[a->size]); \ 1648 } 1649 1650 DO_VMAXV(VMAXV_S, vmaxvs) 1651 DO_VMAXV(VMAXV_U, vmaxvu) 1652 DO_VMAXV(VMAXAV, vmaxav) 1653 DO_VMAXV(VMINV_S, vminvs) 1654 DO_VMAXV(VMINV_U, vminvu) 1655 DO_VMAXV(VMINAV, vminav) 1656 1657 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn) 1658 { 1659 /* Absolute difference accumulated across vector */ 1660 TCGv_ptr qn, qm; 1661 TCGv_i32 rda; 1662 1663 if (!dc_isar_feature(aa32_mve, s) || 1664 !mve_check_qreg_bank(s, a->qm | a->qn) || 1665 !fn || a->rda == 13 || a->rda == 15) { 1666 /* Rda cases are UNPREDICTABLE */ 1667 return false; 1668 } 1669 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1670 return true; 1671 } 1672 1673 qm = mve_qreg_ptr(a->qm); 1674 qn = mve_qreg_ptr(a->qn); 1675 rda = load_reg(s, a->rda); 1676 fn(rda, cpu_env, qn, qm, rda); 1677 store_reg(s, a->rda, rda); 1678 tcg_temp_free_ptr(qm); 1679 tcg_temp_free_ptr(qn); 1680 mve_update_eci(s); 1681 return true; 1682 } 1683 1684 #define DO_VABAV(INSN, FN) \ 1685 static bool trans_##INSN(DisasContext *s, arg_vabav *a) \ 1686 { \ 1687 static MVEGenVABAVFn * const fns[] = { \ 1688 gen_helper_mve_##FN##b, \ 1689 gen_helper_mve_##FN##h, \ 1690 gen_helper_mve_##FN##w, \ 1691 NULL, \ 1692 }; \ 1693 return do_vabav(s, a, fns[a->size]); \ 1694 } 1695 1696 DO_VABAV(VABAV_S, vabavs) 1697 DO_VABAV(VABAV_U, vabavu) 1698 1699 static bool trans_VMOV_to_2gp(DisasContext *s, arg_VMOV_to_2gp *a) 1700 { 1701 /* 1702 * VMOV two 32-bit vector lanes to two general-purpose registers. 1703 * This insn is not predicated but it is subject to beat-wise 1704 * execution if it is not in an IT block. For us this means 1705 * only that if PSR.ECI says we should not be executing the beat 1706 * corresponding to the lane of the vector register being accessed 1707 * then we should skip perfoming the move, and that we need to do 1708 * the usual check for bad ECI state and advance of ECI state. 1709 * (If PSR.ECI is non-zero then we cannot be in an IT block.) 1710 */ 1711 TCGv_i32 tmp; 1712 int vd; 1713 1714 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) || 1715 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15 || 1716 a->rt == a->rt2) { 1717 /* Rt/Rt2 cases are UNPREDICTABLE */ 1718 return false; 1719 } 1720 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1721 return true; 1722 } 1723 1724 /* Convert Qreg index to Dreg for read_neon_element32() etc */ 1725 vd = a->qd * 2; 1726 1727 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) { 1728 tmp = tcg_temp_new_i32(); 1729 read_neon_element32(tmp, vd, a->idx, MO_32); 1730 store_reg(s, a->rt, tmp); 1731 } 1732 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) { 1733 tmp = tcg_temp_new_i32(); 1734 read_neon_element32(tmp, vd + 1, a->idx, MO_32); 1735 store_reg(s, a->rt2, tmp); 1736 } 1737 1738 mve_update_and_store_eci(s); 1739 return true; 1740 } 1741 1742 static bool trans_VMOV_from_2gp(DisasContext *s, arg_VMOV_to_2gp *a) 1743 { 1744 /* 1745 * VMOV two general-purpose registers to two 32-bit vector lanes. 1746 * This insn is not predicated but it is subject to beat-wise 1747 * execution if it is not in an IT block. For us this means 1748 * only that if PSR.ECI says we should not be executing the beat 1749 * corresponding to the lane of the vector register being accessed 1750 * then we should skip perfoming the move, and that we need to do 1751 * the usual check for bad ECI state and advance of ECI state. 1752 * (If PSR.ECI is non-zero then we cannot be in an IT block.) 1753 */ 1754 TCGv_i32 tmp; 1755 int vd; 1756 1757 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) || 1758 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15) { 1759 /* Rt/Rt2 cases are UNPREDICTABLE */ 1760 return false; 1761 } 1762 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1763 return true; 1764 } 1765 1766 /* Convert Qreg idx to Dreg for read_neon_element32() etc */ 1767 vd = a->qd * 2; 1768 1769 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) { 1770 tmp = load_reg(s, a->rt); 1771 write_neon_element32(tmp, vd, a->idx, MO_32); 1772 tcg_temp_free_i32(tmp); 1773 } 1774 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) { 1775 tmp = load_reg(s, a->rt2); 1776 write_neon_element32(tmp, vd + 1, a->idx, MO_32); 1777 tcg_temp_free_i32(tmp); 1778 } 1779 1780 mve_update_and_store_eci(s); 1781 return true; 1782 } 1783