1 /* 2 * ARM translation: M-profile MVE instructions 3 * 4 * Copyright (c) 2021 Linaro, Ltd. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "tcg/tcg-op.h" 22 #include "tcg/tcg-op-gvec.h" 23 #include "exec/exec-all.h" 24 #include "exec/gen-icount.h" 25 #include "translate.h" 26 #include "translate-a32.h" 27 28 static inline int vidup_imm(DisasContext *s, int x) 29 { 30 return 1 << x; 31 } 32 33 /* Include the generated decoder */ 34 #include "decode-mve.c.inc" 35 36 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 37 typedef void MVEGenLdStSGFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 38 typedef void MVEGenLdStIlFn(TCGv_ptr, TCGv_i32, TCGv_i32); 39 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 40 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr); 41 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 42 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 43 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64); 44 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32); 45 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64); 46 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32); 47 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 48 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 49 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 50 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 51 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 52 53 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */ 54 static inline long mve_qreg_offset(unsigned reg) 55 { 56 return offsetof(CPUARMState, vfp.zregs[reg].d[0]); 57 } 58 59 static TCGv_ptr mve_qreg_ptr(unsigned reg) 60 { 61 TCGv_ptr ret = tcg_temp_new_ptr(); 62 tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg)); 63 return ret; 64 } 65 66 static bool mve_check_qreg_bank(DisasContext *s, int qmask) 67 { 68 /* 69 * Check whether Qregs are in range. For v8.1M only Q0..Q7 70 * are supported, see VFPSmallRegisterBank(). 71 */ 72 return qmask < 8; 73 } 74 75 bool mve_eci_check(DisasContext *s) 76 { 77 /* 78 * This is a beatwise insn: check that ECI is valid (not a 79 * reserved value) and note that we are handling it. 80 * Return true if OK, false if we generated an exception. 81 */ 82 s->eci_handled = true; 83 switch (s->eci) { 84 case ECI_NONE: 85 case ECI_A0: 86 case ECI_A0A1: 87 case ECI_A0A1A2: 88 case ECI_A0A1A2B0: 89 return true; 90 default: 91 /* Reserved value: INVSTATE UsageFault */ 92 gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(), 93 default_exception_el(s)); 94 return false; 95 } 96 } 97 98 void mve_update_eci(DisasContext *s) 99 { 100 /* 101 * The helper function will always update the CPUState field, 102 * so we only need to update the DisasContext field. 103 */ 104 if (s->eci) { 105 s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE; 106 } 107 } 108 109 void mve_update_and_store_eci(DisasContext *s) 110 { 111 /* 112 * For insns which don't call a helper function that will call 113 * mve_advance_vpt(), this version updates s->eci and also stores 114 * it out to the CPUState field. 115 */ 116 if (s->eci) { 117 mve_update_eci(s); 118 store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits); 119 } 120 } 121 122 static bool mve_skip_first_beat(DisasContext *s) 123 { 124 /* Return true if PSR.ECI says we must skip the first beat of this insn */ 125 switch (s->eci) { 126 case ECI_NONE: 127 return false; 128 case ECI_A0: 129 case ECI_A0A1: 130 case ECI_A0A1A2: 131 case ECI_A0A1A2B0: 132 return true; 133 default: 134 g_assert_not_reached(); 135 } 136 } 137 138 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn, 139 unsigned msize) 140 { 141 TCGv_i32 addr; 142 uint32_t offset; 143 TCGv_ptr qreg; 144 145 if (!dc_isar_feature(aa32_mve, s) || 146 !mve_check_qreg_bank(s, a->qd) || 147 !fn) { 148 return false; 149 } 150 151 /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */ 152 if (a->rn == 15 || (a->rn == 13 && a->w)) { 153 return false; 154 } 155 156 if (!mve_eci_check(s) || !vfp_access_check(s)) { 157 return true; 158 } 159 160 offset = a->imm << msize; 161 if (!a->a) { 162 offset = -offset; 163 } 164 addr = load_reg(s, a->rn); 165 if (a->p) { 166 tcg_gen_addi_i32(addr, addr, offset); 167 } 168 169 qreg = mve_qreg_ptr(a->qd); 170 fn(cpu_env, qreg, addr); 171 tcg_temp_free_ptr(qreg); 172 173 /* 174 * Writeback always happens after the last beat of the insn, 175 * regardless of predication 176 */ 177 if (a->w) { 178 if (!a->p) { 179 tcg_gen_addi_i32(addr, addr, offset); 180 } 181 store_reg(s, a->rn, addr); 182 } else { 183 tcg_temp_free_i32(addr); 184 } 185 mve_update_eci(s); 186 return true; 187 } 188 189 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a) 190 { 191 static MVEGenLdStFn * const ldstfns[4][2] = { 192 { gen_helper_mve_vstrb, gen_helper_mve_vldrb }, 193 { gen_helper_mve_vstrh, gen_helper_mve_vldrh }, 194 { gen_helper_mve_vstrw, gen_helper_mve_vldrw }, 195 { NULL, NULL } 196 }; 197 return do_ldst(s, a, ldstfns[a->size][a->l], a->size); 198 } 199 200 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \ 201 static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \ 202 { \ 203 static MVEGenLdStFn * const ldstfns[2][2] = { \ 204 { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \ 205 { NULL, gen_helper_mve_##ULD }, \ 206 }; \ 207 return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \ 208 } 209 210 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8) 211 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8) 212 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16) 213 214 static bool do_ldst_sg(DisasContext *s, arg_vldst_sg *a, MVEGenLdStSGFn fn) 215 { 216 TCGv_i32 addr; 217 TCGv_ptr qd, qm; 218 219 if (!dc_isar_feature(aa32_mve, s) || 220 !mve_check_qreg_bank(s, a->qd | a->qm) || 221 !fn || a->rn == 15) { 222 /* Rn case is UNPREDICTABLE */ 223 return false; 224 } 225 226 if (!mve_eci_check(s) || !vfp_access_check(s)) { 227 return true; 228 } 229 230 addr = load_reg(s, a->rn); 231 232 qd = mve_qreg_ptr(a->qd); 233 qm = mve_qreg_ptr(a->qm); 234 fn(cpu_env, qd, qm, addr); 235 tcg_temp_free_ptr(qd); 236 tcg_temp_free_ptr(qm); 237 tcg_temp_free_i32(addr); 238 mve_update_eci(s); 239 return true; 240 } 241 242 /* 243 * The naming scheme here is "vldrb_sg_sh == in-memory byte loads 244 * signextended to halfword elements in register". _os_ indicates that 245 * the offsets in Qm should be scaled by the element size. 246 */ 247 /* This macro is just to make the arrays more compact in these functions */ 248 #define F(N) gen_helper_mve_##N 249 250 /* VLDRB/VSTRB (ie msize 1) with OS=1 is UNPREDICTABLE; we UNDEF */ 251 static bool trans_VLDR_S_sg(DisasContext *s, arg_vldst_sg *a) 252 { 253 static MVEGenLdStSGFn * const fns[2][4][4] = { { 254 { NULL, F(vldrb_sg_sh), F(vldrb_sg_sw), NULL }, 255 { NULL, NULL, F(vldrh_sg_sw), NULL }, 256 { NULL, NULL, NULL, NULL }, 257 { NULL, NULL, NULL, NULL } 258 }, { 259 { NULL, NULL, NULL, NULL }, 260 { NULL, NULL, F(vldrh_sg_os_sw), NULL }, 261 { NULL, NULL, NULL, NULL }, 262 { NULL, NULL, NULL, NULL } 263 } 264 }; 265 if (a->qd == a->qm) { 266 return false; /* UNPREDICTABLE */ 267 } 268 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 269 } 270 271 static bool trans_VLDR_U_sg(DisasContext *s, arg_vldst_sg *a) 272 { 273 static MVEGenLdStSGFn * const fns[2][4][4] = { { 274 { F(vldrb_sg_ub), F(vldrb_sg_uh), F(vldrb_sg_uw), NULL }, 275 { NULL, F(vldrh_sg_uh), F(vldrh_sg_uw), NULL }, 276 { NULL, NULL, F(vldrw_sg_uw), NULL }, 277 { NULL, NULL, NULL, F(vldrd_sg_ud) } 278 }, { 279 { NULL, NULL, NULL, NULL }, 280 { NULL, F(vldrh_sg_os_uh), F(vldrh_sg_os_uw), NULL }, 281 { NULL, NULL, F(vldrw_sg_os_uw), NULL }, 282 { NULL, NULL, NULL, F(vldrd_sg_os_ud) } 283 } 284 }; 285 if (a->qd == a->qm) { 286 return false; /* UNPREDICTABLE */ 287 } 288 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 289 } 290 291 static bool trans_VSTR_sg(DisasContext *s, arg_vldst_sg *a) 292 { 293 static MVEGenLdStSGFn * const fns[2][4][4] = { { 294 { F(vstrb_sg_ub), F(vstrb_sg_uh), F(vstrb_sg_uw), NULL }, 295 { NULL, F(vstrh_sg_uh), F(vstrh_sg_uw), NULL }, 296 { NULL, NULL, F(vstrw_sg_uw), NULL }, 297 { NULL, NULL, NULL, F(vstrd_sg_ud) } 298 }, { 299 { NULL, NULL, NULL, NULL }, 300 { NULL, F(vstrh_sg_os_uh), F(vstrh_sg_os_uw), NULL }, 301 { NULL, NULL, F(vstrw_sg_os_uw), NULL }, 302 { NULL, NULL, NULL, F(vstrd_sg_os_ud) } 303 } 304 }; 305 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 306 } 307 308 #undef F 309 310 static bool do_ldst_sg_imm(DisasContext *s, arg_vldst_sg_imm *a, 311 MVEGenLdStSGFn *fn, unsigned msize) 312 { 313 uint32_t offset; 314 TCGv_ptr qd, qm; 315 316 if (!dc_isar_feature(aa32_mve, s) || 317 !mve_check_qreg_bank(s, a->qd | a->qm) || 318 !fn) { 319 return false; 320 } 321 322 if (!mve_eci_check(s) || !vfp_access_check(s)) { 323 return true; 324 } 325 326 offset = a->imm << msize; 327 if (!a->a) { 328 offset = -offset; 329 } 330 331 qd = mve_qreg_ptr(a->qd); 332 qm = mve_qreg_ptr(a->qm); 333 fn(cpu_env, qd, qm, tcg_constant_i32(offset)); 334 tcg_temp_free_ptr(qd); 335 tcg_temp_free_ptr(qm); 336 mve_update_eci(s); 337 return true; 338 } 339 340 static bool trans_VLDRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 341 { 342 static MVEGenLdStSGFn * const fns[] = { 343 gen_helper_mve_vldrw_sg_uw, 344 gen_helper_mve_vldrw_sg_wb_uw, 345 }; 346 if (a->qd == a->qm) { 347 return false; /* UNPREDICTABLE */ 348 } 349 return do_ldst_sg_imm(s, a, fns[a->w], MO_32); 350 } 351 352 static bool trans_VLDRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 353 { 354 static MVEGenLdStSGFn * const fns[] = { 355 gen_helper_mve_vldrd_sg_ud, 356 gen_helper_mve_vldrd_sg_wb_ud, 357 }; 358 if (a->qd == a->qm) { 359 return false; /* UNPREDICTABLE */ 360 } 361 return do_ldst_sg_imm(s, a, fns[a->w], MO_64); 362 } 363 364 static bool trans_VSTRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 365 { 366 static MVEGenLdStSGFn * const fns[] = { 367 gen_helper_mve_vstrw_sg_uw, 368 gen_helper_mve_vstrw_sg_wb_uw, 369 }; 370 return do_ldst_sg_imm(s, a, fns[a->w], MO_32); 371 } 372 373 static bool trans_VSTRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 374 { 375 static MVEGenLdStSGFn * const fns[] = { 376 gen_helper_mve_vstrd_sg_ud, 377 gen_helper_mve_vstrd_sg_wb_ud, 378 }; 379 return do_ldst_sg_imm(s, a, fns[a->w], MO_64); 380 } 381 382 static bool do_vldst_il(DisasContext *s, arg_vldst_il *a, MVEGenLdStIlFn *fn, 383 int addrinc) 384 { 385 TCGv_i32 rn; 386 387 if (!dc_isar_feature(aa32_mve, s) || 388 !mve_check_qreg_bank(s, a->qd) || 389 !fn || (a->rn == 13 && a->w) || a->rn == 15) { 390 /* Variously UNPREDICTABLE or UNDEF or related-encoding */ 391 return false; 392 } 393 if (!mve_eci_check(s) || !vfp_access_check(s)) { 394 return true; 395 } 396 397 rn = load_reg(s, a->rn); 398 /* 399 * We pass the index of Qd, not a pointer, because the helper must 400 * access multiple Q registers starting at Qd and working up. 401 */ 402 fn(cpu_env, tcg_constant_i32(a->qd), rn); 403 404 if (a->w) { 405 tcg_gen_addi_i32(rn, rn, addrinc); 406 store_reg(s, a->rn, rn); 407 } else { 408 tcg_temp_free_i32(rn); 409 } 410 mve_update_and_store_eci(s); 411 return true; 412 } 413 414 /* This macro is just to make the arrays more compact in these functions */ 415 #define F(N) gen_helper_mve_##N 416 417 static bool trans_VLD2(DisasContext *s, arg_vldst_il *a) 418 { 419 static MVEGenLdStIlFn * const fns[4][4] = { 420 { F(vld20b), F(vld20h), F(vld20w), NULL, }, 421 { F(vld21b), F(vld21h), F(vld21w), NULL, }, 422 { NULL, NULL, NULL, NULL }, 423 { NULL, NULL, NULL, NULL }, 424 }; 425 if (a->qd > 6) { 426 return false; 427 } 428 return do_vldst_il(s, a, fns[a->pat][a->size], 32); 429 } 430 431 static bool trans_VLD4(DisasContext *s, arg_vldst_il *a) 432 { 433 static MVEGenLdStIlFn * const fns[4][4] = { 434 { F(vld40b), F(vld40h), F(vld40w), NULL, }, 435 { F(vld41b), F(vld41h), F(vld41w), NULL, }, 436 { F(vld42b), F(vld42h), F(vld42w), NULL, }, 437 { F(vld43b), F(vld43h), F(vld43w), NULL, }, 438 }; 439 if (a->qd > 4) { 440 return false; 441 } 442 return do_vldst_il(s, a, fns[a->pat][a->size], 64); 443 } 444 445 static bool trans_VST2(DisasContext *s, arg_vldst_il *a) 446 { 447 static MVEGenLdStIlFn * const fns[4][4] = { 448 { F(vst20b), F(vst20h), F(vst20w), NULL, }, 449 { F(vst21b), F(vst21h), F(vst21w), NULL, }, 450 { NULL, NULL, NULL, NULL }, 451 { NULL, NULL, NULL, NULL }, 452 }; 453 if (a->qd > 6) { 454 return false; 455 } 456 return do_vldst_il(s, a, fns[a->pat][a->size], 32); 457 } 458 459 static bool trans_VST4(DisasContext *s, arg_vldst_il *a) 460 { 461 static MVEGenLdStIlFn * const fns[4][4] = { 462 { F(vst40b), F(vst40h), F(vst40w), NULL, }, 463 { F(vst41b), F(vst41h), F(vst41w), NULL, }, 464 { F(vst42b), F(vst42h), F(vst42w), NULL, }, 465 { F(vst43b), F(vst43h), F(vst43w), NULL, }, 466 }; 467 if (a->qd > 4) { 468 return false; 469 } 470 return do_vldst_il(s, a, fns[a->pat][a->size], 64); 471 } 472 473 #undef F 474 475 static bool trans_VDUP(DisasContext *s, arg_VDUP *a) 476 { 477 TCGv_ptr qd; 478 TCGv_i32 rt; 479 480 if (!dc_isar_feature(aa32_mve, s) || 481 !mve_check_qreg_bank(s, a->qd)) { 482 return false; 483 } 484 if (a->rt == 13 || a->rt == 15) { 485 /* UNPREDICTABLE; we choose to UNDEF */ 486 return false; 487 } 488 if (!mve_eci_check(s) || !vfp_access_check(s)) { 489 return true; 490 } 491 492 qd = mve_qreg_ptr(a->qd); 493 rt = load_reg(s, a->rt); 494 tcg_gen_dup_i32(a->size, rt, rt); 495 gen_helper_mve_vdup(cpu_env, qd, rt); 496 tcg_temp_free_ptr(qd); 497 tcg_temp_free_i32(rt); 498 mve_update_eci(s); 499 return true; 500 } 501 502 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn) 503 { 504 TCGv_ptr qd, qm; 505 506 if (!dc_isar_feature(aa32_mve, s) || 507 !mve_check_qreg_bank(s, a->qd | a->qm) || 508 !fn) { 509 return false; 510 } 511 512 if (!mve_eci_check(s) || !vfp_access_check(s)) { 513 return true; 514 } 515 516 qd = mve_qreg_ptr(a->qd); 517 qm = mve_qreg_ptr(a->qm); 518 fn(cpu_env, qd, qm); 519 tcg_temp_free_ptr(qd); 520 tcg_temp_free_ptr(qm); 521 mve_update_eci(s); 522 return true; 523 } 524 525 #define DO_1OP(INSN, FN) \ 526 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 527 { \ 528 static MVEGenOneOpFn * const fns[] = { \ 529 gen_helper_mve_##FN##b, \ 530 gen_helper_mve_##FN##h, \ 531 gen_helper_mve_##FN##w, \ 532 NULL, \ 533 }; \ 534 return do_1op(s, a, fns[a->size]); \ 535 } 536 537 DO_1OP(VCLZ, vclz) 538 DO_1OP(VCLS, vcls) 539 DO_1OP(VABS, vabs) 540 DO_1OP(VNEG, vneg) 541 DO_1OP(VQABS, vqabs) 542 DO_1OP(VQNEG, vqneg) 543 DO_1OP(VMAXA, vmaxa) 544 DO_1OP(VMINA, vmina) 545 546 /* 547 * For simple float/int conversions we use the fixed-point 548 * conversion helpers with a zero shift count 549 */ 550 #define DO_VCVT(INSN, HFN, SFN) \ 551 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 552 { \ 553 gen_helper_mve_##HFN(env, qd, qm, tcg_constant_i32(0)); \ 554 } \ 555 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 556 { \ 557 gen_helper_mve_##SFN(env, qd, qm, tcg_constant_i32(0)); \ 558 } \ 559 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 560 { \ 561 static MVEGenOneOpFn * const fns[] = { \ 562 NULL, \ 563 gen_##INSN##h, \ 564 gen_##INSN##s, \ 565 NULL, \ 566 }; \ 567 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 568 return false; \ 569 } \ 570 return do_1op(s, a, fns[a->size]); \ 571 } 572 573 DO_VCVT(VCVT_SF, vcvt_sh, vcvt_sf) 574 DO_VCVT(VCVT_UF, vcvt_uh, vcvt_uf) 575 DO_VCVT(VCVT_FS, vcvt_hs, vcvt_fs) 576 DO_VCVT(VCVT_FU, vcvt_hu, vcvt_fu) 577 578 /* Narrowing moves: only size 0 and 1 are valid */ 579 #define DO_VMOVN(INSN, FN) \ 580 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 581 { \ 582 static MVEGenOneOpFn * const fns[] = { \ 583 gen_helper_mve_##FN##b, \ 584 gen_helper_mve_##FN##h, \ 585 NULL, \ 586 NULL, \ 587 }; \ 588 return do_1op(s, a, fns[a->size]); \ 589 } 590 591 DO_VMOVN(VMOVNB, vmovnb) 592 DO_VMOVN(VMOVNT, vmovnt) 593 DO_VMOVN(VQMOVUNB, vqmovunb) 594 DO_VMOVN(VQMOVUNT, vqmovunt) 595 DO_VMOVN(VQMOVN_BS, vqmovnbs) 596 DO_VMOVN(VQMOVN_TS, vqmovnts) 597 DO_VMOVN(VQMOVN_BU, vqmovnbu) 598 DO_VMOVN(VQMOVN_TU, vqmovntu) 599 600 static bool trans_VREV16(DisasContext *s, arg_1op *a) 601 { 602 static MVEGenOneOpFn * const fns[] = { 603 gen_helper_mve_vrev16b, 604 NULL, 605 NULL, 606 NULL, 607 }; 608 return do_1op(s, a, fns[a->size]); 609 } 610 611 static bool trans_VREV32(DisasContext *s, arg_1op *a) 612 { 613 static MVEGenOneOpFn * const fns[] = { 614 gen_helper_mve_vrev32b, 615 gen_helper_mve_vrev32h, 616 NULL, 617 NULL, 618 }; 619 return do_1op(s, a, fns[a->size]); 620 } 621 622 static bool trans_VREV64(DisasContext *s, arg_1op *a) 623 { 624 static MVEGenOneOpFn * const fns[] = { 625 gen_helper_mve_vrev64b, 626 gen_helper_mve_vrev64h, 627 gen_helper_mve_vrev64w, 628 NULL, 629 }; 630 return do_1op(s, a, fns[a->size]); 631 } 632 633 static bool trans_VMVN(DisasContext *s, arg_1op *a) 634 { 635 return do_1op(s, a, gen_helper_mve_vmvn); 636 } 637 638 static bool trans_VABS_fp(DisasContext *s, arg_1op *a) 639 { 640 static MVEGenOneOpFn * const fns[] = { 641 NULL, 642 gen_helper_mve_vfabsh, 643 gen_helper_mve_vfabss, 644 NULL, 645 }; 646 if (!dc_isar_feature(aa32_mve_fp, s)) { 647 return false; 648 } 649 return do_1op(s, a, fns[a->size]); 650 } 651 652 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a) 653 { 654 static MVEGenOneOpFn * const fns[] = { 655 NULL, 656 gen_helper_mve_vfnegh, 657 gen_helper_mve_vfnegs, 658 NULL, 659 }; 660 if (!dc_isar_feature(aa32_mve_fp, s)) { 661 return false; 662 } 663 return do_1op(s, a, fns[a->size]); 664 } 665 666 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn) 667 { 668 TCGv_ptr qd, qn, qm; 669 670 if (!dc_isar_feature(aa32_mve, s) || 671 !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) || 672 !fn) { 673 return false; 674 } 675 if (!mve_eci_check(s) || !vfp_access_check(s)) { 676 return true; 677 } 678 679 qd = mve_qreg_ptr(a->qd); 680 qn = mve_qreg_ptr(a->qn); 681 qm = mve_qreg_ptr(a->qm); 682 fn(cpu_env, qd, qn, qm); 683 tcg_temp_free_ptr(qd); 684 tcg_temp_free_ptr(qn); 685 tcg_temp_free_ptr(qm); 686 mve_update_eci(s); 687 return true; 688 } 689 690 #define DO_LOGIC(INSN, HELPER) \ 691 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 692 { \ 693 return do_2op(s, a, HELPER); \ 694 } 695 696 DO_LOGIC(VAND, gen_helper_mve_vand) 697 DO_LOGIC(VBIC, gen_helper_mve_vbic) 698 DO_LOGIC(VORR, gen_helper_mve_vorr) 699 DO_LOGIC(VORN, gen_helper_mve_vorn) 700 DO_LOGIC(VEOR, gen_helper_mve_veor) 701 702 DO_LOGIC(VPSEL, gen_helper_mve_vpsel) 703 704 #define DO_2OP(INSN, FN) \ 705 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 706 { \ 707 static MVEGenTwoOpFn * const fns[] = { \ 708 gen_helper_mve_##FN##b, \ 709 gen_helper_mve_##FN##h, \ 710 gen_helper_mve_##FN##w, \ 711 NULL, \ 712 }; \ 713 return do_2op(s, a, fns[a->size]); \ 714 } 715 716 DO_2OP(VADD, vadd) 717 DO_2OP(VSUB, vsub) 718 DO_2OP(VMUL, vmul) 719 DO_2OP(VMULH_S, vmulhs) 720 DO_2OP(VMULH_U, vmulhu) 721 DO_2OP(VRMULH_S, vrmulhs) 722 DO_2OP(VRMULH_U, vrmulhu) 723 DO_2OP(VMAX_S, vmaxs) 724 DO_2OP(VMAX_U, vmaxu) 725 DO_2OP(VMIN_S, vmins) 726 DO_2OP(VMIN_U, vminu) 727 DO_2OP(VABD_S, vabds) 728 DO_2OP(VABD_U, vabdu) 729 DO_2OP(VHADD_S, vhadds) 730 DO_2OP(VHADD_U, vhaddu) 731 DO_2OP(VHSUB_S, vhsubs) 732 DO_2OP(VHSUB_U, vhsubu) 733 DO_2OP(VMULL_BS, vmullbs) 734 DO_2OP(VMULL_BU, vmullbu) 735 DO_2OP(VMULL_TS, vmullts) 736 DO_2OP(VMULL_TU, vmulltu) 737 DO_2OP(VQDMULH, vqdmulh) 738 DO_2OP(VQRDMULH, vqrdmulh) 739 DO_2OP(VQADD_S, vqadds) 740 DO_2OP(VQADD_U, vqaddu) 741 DO_2OP(VQSUB_S, vqsubs) 742 DO_2OP(VQSUB_U, vqsubu) 743 DO_2OP(VSHL_S, vshls) 744 DO_2OP(VSHL_U, vshlu) 745 DO_2OP(VRSHL_S, vrshls) 746 DO_2OP(VRSHL_U, vrshlu) 747 DO_2OP(VQSHL_S, vqshls) 748 DO_2OP(VQSHL_U, vqshlu) 749 DO_2OP(VQRSHL_S, vqrshls) 750 DO_2OP(VQRSHL_U, vqrshlu) 751 DO_2OP(VQDMLADH, vqdmladh) 752 DO_2OP(VQDMLADHX, vqdmladhx) 753 DO_2OP(VQRDMLADH, vqrdmladh) 754 DO_2OP(VQRDMLADHX, vqrdmladhx) 755 DO_2OP(VQDMLSDH, vqdmlsdh) 756 DO_2OP(VQDMLSDHX, vqdmlsdhx) 757 DO_2OP(VQRDMLSDH, vqrdmlsdh) 758 DO_2OP(VQRDMLSDHX, vqrdmlsdhx) 759 DO_2OP(VRHADD_S, vrhadds) 760 DO_2OP(VRHADD_U, vrhaddu) 761 /* 762 * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose 763 * so we can reuse the DO_2OP macro. (Our implementation calculates the 764 * "expected" results in this case.) Similarly for VHCADD. 765 */ 766 DO_2OP(VCADD90, vcadd90) 767 DO_2OP(VCADD270, vcadd270) 768 DO_2OP(VHCADD90, vhcadd90) 769 DO_2OP(VHCADD270, vhcadd270) 770 771 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a) 772 { 773 static MVEGenTwoOpFn * const fns[] = { 774 NULL, 775 gen_helper_mve_vqdmullbh, 776 gen_helper_mve_vqdmullbw, 777 NULL, 778 }; 779 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 780 /* UNPREDICTABLE; we choose to undef */ 781 return false; 782 } 783 return do_2op(s, a, fns[a->size]); 784 } 785 786 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a) 787 { 788 static MVEGenTwoOpFn * const fns[] = { 789 NULL, 790 gen_helper_mve_vqdmullth, 791 gen_helper_mve_vqdmulltw, 792 NULL, 793 }; 794 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 795 /* UNPREDICTABLE; we choose to undef */ 796 return false; 797 } 798 return do_2op(s, a, fns[a->size]); 799 } 800 801 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a) 802 { 803 /* 804 * Note that a->size indicates the output size, ie VMULL.P8 805 * is the 8x8->16 operation and a->size is MO_16; VMULL.P16 806 * is the 16x16->32 operation and a->size is MO_32. 807 */ 808 static MVEGenTwoOpFn * const fns[] = { 809 NULL, 810 gen_helper_mve_vmullpbh, 811 gen_helper_mve_vmullpbw, 812 NULL, 813 }; 814 return do_2op(s, a, fns[a->size]); 815 } 816 817 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a) 818 { 819 /* a->size is as for trans_VMULLP_B */ 820 static MVEGenTwoOpFn * const fns[] = { 821 NULL, 822 gen_helper_mve_vmullpth, 823 gen_helper_mve_vmullptw, 824 NULL, 825 }; 826 return do_2op(s, a, fns[a->size]); 827 } 828 829 /* 830 * VADC and VSBC: these perform an add-with-carry or subtract-with-carry 831 * of the 32-bit elements in each lane of the input vectors, where the 832 * carry-out of each add is the carry-in of the next. The initial carry 833 * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C 834 * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C. 835 * These insns are subject to beat-wise execution. Partial execution 836 * of an I=1 (initial carry input fixed) insn which does not 837 * execute the first beat must start with the current FPSCR.NZCV 838 * value, not the fixed constant input. 839 */ 840 static bool trans_VADC(DisasContext *s, arg_2op *a) 841 { 842 return do_2op(s, a, gen_helper_mve_vadc); 843 } 844 845 static bool trans_VADCI(DisasContext *s, arg_2op *a) 846 { 847 if (mve_skip_first_beat(s)) { 848 return trans_VADC(s, a); 849 } 850 return do_2op(s, a, gen_helper_mve_vadci); 851 } 852 853 static bool trans_VSBC(DisasContext *s, arg_2op *a) 854 { 855 return do_2op(s, a, gen_helper_mve_vsbc); 856 } 857 858 static bool trans_VSBCI(DisasContext *s, arg_2op *a) 859 { 860 if (mve_skip_first_beat(s)) { 861 return trans_VSBC(s, a); 862 } 863 return do_2op(s, a, gen_helper_mve_vsbci); 864 } 865 866 #define DO_2OP_FP(INSN, FN) \ 867 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 868 { \ 869 static MVEGenTwoOpFn * const fns[] = { \ 870 NULL, \ 871 gen_helper_mve_##FN##h, \ 872 gen_helper_mve_##FN##s, \ 873 NULL, \ 874 }; \ 875 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 876 return false; \ 877 } \ 878 return do_2op(s, a, fns[a->size]); \ 879 } 880 881 DO_2OP_FP(VADD_fp, vfadd) 882 DO_2OP_FP(VSUB_fp, vfsub) 883 DO_2OP_FP(VMUL_fp, vfmul) 884 DO_2OP_FP(VABD_fp, vfabd) 885 DO_2OP_FP(VMAXNM, vmaxnm) 886 DO_2OP_FP(VMINNM, vminnm) 887 DO_2OP_FP(VCADD90_fp, vfcadd90) 888 DO_2OP_FP(VCADD270_fp, vfcadd270) 889 DO_2OP_FP(VFMA, vfma) 890 DO_2OP_FP(VFMS, vfms) 891 DO_2OP_FP(VCMUL0, vcmul0) 892 DO_2OP_FP(VCMUL90, vcmul90) 893 DO_2OP_FP(VCMUL180, vcmul180) 894 DO_2OP_FP(VCMUL270, vcmul270) 895 DO_2OP_FP(VCMLA0, vcmla0) 896 DO_2OP_FP(VCMLA90, vcmla90) 897 DO_2OP_FP(VCMLA180, vcmla180) 898 DO_2OP_FP(VCMLA270, vcmla270) 899 DO_2OP_FP(VMAXNMA, vmaxnma) 900 DO_2OP_FP(VMINNMA, vminnma) 901 902 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a, 903 MVEGenTwoOpScalarFn fn) 904 { 905 TCGv_ptr qd, qn; 906 TCGv_i32 rm; 907 908 if (!dc_isar_feature(aa32_mve, s) || 909 !mve_check_qreg_bank(s, a->qd | a->qn) || 910 !fn) { 911 return false; 912 } 913 if (a->rm == 13 || a->rm == 15) { 914 /* UNPREDICTABLE */ 915 return false; 916 } 917 if (!mve_eci_check(s) || !vfp_access_check(s)) { 918 return true; 919 } 920 921 qd = mve_qreg_ptr(a->qd); 922 qn = mve_qreg_ptr(a->qn); 923 rm = load_reg(s, a->rm); 924 fn(cpu_env, qd, qn, rm); 925 tcg_temp_free_i32(rm); 926 tcg_temp_free_ptr(qd); 927 tcg_temp_free_ptr(qn); 928 mve_update_eci(s); 929 return true; 930 } 931 932 #define DO_2OP_SCALAR(INSN, FN) \ 933 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \ 934 { \ 935 static MVEGenTwoOpScalarFn * const fns[] = { \ 936 gen_helper_mve_##FN##b, \ 937 gen_helper_mve_##FN##h, \ 938 gen_helper_mve_##FN##w, \ 939 NULL, \ 940 }; \ 941 return do_2op_scalar(s, a, fns[a->size]); \ 942 } 943 944 DO_2OP_SCALAR(VADD_scalar, vadd_scalar) 945 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar) 946 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar) 947 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar) 948 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar) 949 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar) 950 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar) 951 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar) 952 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar) 953 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar) 954 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar) 955 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar) 956 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar) 957 DO_2OP_SCALAR(VBRSR, vbrsr) 958 DO_2OP_SCALAR(VMLA, vmla) 959 DO_2OP_SCALAR(VMLAS, vmlas) 960 DO_2OP_SCALAR(VQDMLAH, vqdmlah) 961 DO_2OP_SCALAR(VQRDMLAH, vqrdmlah) 962 DO_2OP_SCALAR(VQDMLASH, vqdmlash) 963 DO_2OP_SCALAR(VQRDMLASH, vqrdmlash) 964 965 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a) 966 { 967 static MVEGenTwoOpScalarFn * const fns[] = { 968 NULL, 969 gen_helper_mve_vqdmullb_scalarh, 970 gen_helper_mve_vqdmullb_scalarw, 971 NULL, 972 }; 973 if (a->qd == a->qn && a->size == MO_32) { 974 /* UNPREDICTABLE; we choose to undef */ 975 return false; 976 } 977 return do_2op_scalar(s, a, fns[a->size]); 978 } 979 980 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a) 981 { 982 static MVEGenTwoOpScalarFn * const fns[] = { 983 NULL, 984 gen_helper_mve_vqdmullt_scalarh, 985 gen_helper_mve_vqdmullt_scalarw, 986 NULL, 987 }; 988 if (a->qd == a->qn && a->size == MO_32) { 989 /* UNPREDICTABLE; we choose to undef */ 990 return false; 991 } 992 return do_2op_scalar(s, a, fns[a->size]); 993 } 994 995 996 #define DO_2OP_FP_SCALAR(INSN, FN) \ 997 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \ 998 { \ 999 static MVEGenTwoOpScalarFn * const fns[] = { \ 1000 NULL, \ 1001 gen_helper_mve_##FN##h, \ 1002 gen_helper_mve_##FN##s, \ 1003 NULL, \ 1004 }; \ 1005 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1006 return false; \ 1007 } \ 1008 return do_2op_scalar(s, a, fns[a->size]); \ 1009 } 1010 1011 DO_2OP_FP_SCALAR(VADD_fp_scalar, vfadd_scalar) 1012 DO_2OP_FP_SCALAR(VSUB_fp_scalar, vfsub_scalar) 1013 DO_2OP_FP_SCALAR(VMUL_fp_scalar, vfmul_scalar) 1014 DO_2OP_FP_SCALAR(VFMA_scalar, vfma_scalar) 1015 DO_2OP_FP_SCALAR(VFMAS_scalar, vfmas_scalar) 1016 1017 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a, 1018 MVEGenLongDualAccOpFn *fn) 1019 { 1020 TCGv_ptr qn, qm; 1021 TCGv_i64 rda; 1022 TCGv_i32 rdalo, rdahi; 1023 1024 if (!dc_isar_feature(aa32_mve, s) || 1025 !mve_check_qreg_bank(s, a->qn | a->qm) || 1026 !fn) { 1027 return false; 1028 } 1029 /* 1030 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 1031 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 1032 */ 1033 if (a->rdahi == 13 || a->rdahi == 15) { 1034 return false; 1035 } 1036 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1037 return true; 1038 } 1039 1040 qn = mve_qreg_ptr(a->qn); 1041 qm = mve_qreg_ptr(a->qm); 1042 1043 /* 1044 * This insn is subject to beat-wise execution. Partial execution 1045 * of an A=0 (no-accumulate) insn which does not execute the first 1046 * beat must start with the current rda value, not 0. 1047 */ 1048 if (a->a || mve_skip_first_beat(s)) { 1049 rda = tcg_temp_new_i64(); 1050 rdalo = load_reg(s, a->rdalo); 1051 rdahi = load_reg(s, a->rdahi); 1052 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 1053 tcg_temp_free_i32(rdalo); 1054 tcg_temp_free_i32(rdahi); 1055 } else { 1056 rda = tcg_const_i64(0); 1057 } 1058 1059 fn(rda, cpu_env, qn, qm, rda); 1060 tcg_temp_free_ptr(qn); 1061 tcg_temp_free_ptr(qm); 1062 1063 rdalo = tcg_temp_new_i32(); 1064 rdahi = tcg_temp_new_i32(); 1065 tcg_gen_extrl_i64_i32(rdalo, rda); 1066 tcg_gen_extrh_i64_i32(rdahi, rda); 1067 store_reg(s, a->rdalo, rdalo); 1068 store_reg(s, a->rdahi, rdahi); 1069 tcg_temp_free_i64(rda); 1070 mve_update_eci(s); 1071 return true; 1072 } 1073 1074 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a) 1075 { 1076 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1077 { NULL, NULL }, 1078 { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh }, 1079 { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw }, 1080 { NULL, NULL }, 1081 }; 1082 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1083 } 1084 1085 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a) 1086 { 1087 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1088 { NULL, NULL }, 1089 { gen_helper_mve_vmlaldavuh, NULL }, 1090 { gen_helper_mve_vmlaldavuw, NULL }, 1091 { NULL, NULL }, 1092 }; 1093 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1094 } 1095 1096 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a) 1097 { 1098 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1099 { NULL, NULL }, 1100 { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh }, 1101 { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw }, 1102 { NULL, NULL }, 1103 }; 1104 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1105 } 1106 1107 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a) 1108 { 1109 static MVEGenLongDualAccOpFn * const fns[] = { 1110 gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw, 1111 }; 1112 return do_long_dual_acc(s, a, fns[a->x]); 1113 } 1114 1115 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a) 1116 { 1117 static MVEGenLongDualAccOpFn * const fns[] = { 1118 gen_helper_mve_vrmlaldavhuw, NULL, 1119 }; 1120 return do_long_dual_acc(s, a, fns[a->x]); 1121 } 1122 1123 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a) 1124 { 1125 static MVEGenLongDualAccOpFn * const fns[] = { 1126 gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw, 1127 }; 1128 return do_long_dual_acc(s, a, fns[a->x]); 1129 } 1130 1131 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn) 1132 { 1133 TCGv_ptr qn, qm; 1134 TCGv_i32 rda; 1135 1136 if (!dc_isar_feature(aa32_mve, s) || 1137 !mve_check_qreg_bank(s, a->qn) || 1138 !fn) { 1139 return false; 1140 } 1141 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1142 return true; 1143 } 1144 1145 qn = mve_qreg_ptr(a->qn); 1146 qm = mve_qreg_ptr(a->qm); 1147 1148 /* 1149 * This insn is subject to beat-wise execution. Partial execution 1150 * of an A=0 (no-accumulate) insn which does not execute the first 1151 * beat must start with the current rda value, not 0. 1152 */ 1153 if (a->a || mve_skip_first_beat(s)) { 1154 rda = load_reg(s, a->rda); 1155 } else { 1156 rda = tcg_const_i32(0); 1157 } 1158 1159 fn(rda, cpu_env, qn, qm, rda); 1160 store_reg(s, a->rda, rda); 1161 tcg_temp_free_ptr(qn); 1162 tcg_temp_free_ptr(qm); 1163 1164 mve_update_eci(s); 1165 return true; 1166 } 1167 1168 #define DO_DUAL_ACC(INSN, FN) \ 1169 static bool trans_##INSN(DisasContext *s, arg_vmladav *a) \ 1170 { \ 1171 static MVEGenDualAccOpFn * const fns[4][2] = { \ 1172 { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb }, \ 1173 { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh }, \ 1174 { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw }, \ 1175 { NULL, NULL }, \ 1176 }; \ 1177 return do_dual_acc(s, a, fns[a->size][a->x]); \ 1178 } 1179 1180 DO_DUAL_ACC(VMLADAV_S, vmladavs) 1181 DO_DUAL_ACC(VMLSDAV, vmlsdav) 1182 1183 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a) 1184 { 1185 static MVEGenDualAccOpFn * const fns[4][2] = { 1186 { gen_helper_mve_vmladavub, NULL }, 1187 { gen_helper_mve_vmladavuh, NULL }, 1188 { gen_helper_mve_vmladavuw, NULL }, 1189 { NULL, NULL }, 1190 }; 1191 return do_dual_acc(s, a, fns[a->size][a->x]); 1192 } 1193 1194 static void gen_vpst(DisasContext *s, uint32_t mask) 1195 { 1196 /* 1197 * Set the VPR mask fields. We take advantage of MASK01 and MASK23 1198 * being adjacent fields in the register. 1199 * 1200 * Updating the masks is not predicated, but it is subject to beat-wise 1201 * execution, and the mask is updated on the odd-numbered beats. 1202 * So if PSR.ECI says we should skip beat 1, we mustn't update the 1203 * 01 mask field. 1204 */ 1205 TCGv_i32 vpr = load_cpu_field(v7m.vpr); 1206 switch (s->eci) { 1207 case ECI_NONE: 1208 case ECI_A0: 1209 /* Update both 01 and 23 fields */ 1210 tcg_gen_deposit_i32(vpr, vpr, 1211 tcg_constant_i32(mask | (mask << 4)), 1212 R_V7M_VPR_MASK01_SHIFT, 1213 R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH); 1214 break; 1215 case ECI_A0A1: 1216 case ECI_A0A1A2: 1217 case ECI_A0A1A2B0: 1218 /* Update only the 23 mask field */ 1219 tcg_gen_deposit_i32(vpr, vpr, 1220 tcg_constant_i32(mask), 1221 R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH); 1222 break; 1223 default: 1224 g_assert_not_reached(); 1225 } 1226 store_cpu_field(vpr, v7m.vpr); 1227 } 1228 1229 static bool trans_VPST(DisasContext *s, arg_VPST *a) 1230 { 1231 /* mask == 0 is a "related encoding" */ 1232 if (!dc_isar_feature(aa32_mve, s) || !a->mask) { 1233 return false; 1234 } 1235 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1236 return true; 1237 } 1238 gen_vpst(s, a->mask); 1239 mve_update_and_store_eci(s); 1240 return true; 1241 } 1242 1243 static bool trans_VPNOT(DisasContext *s, arg_VPNOT *a) 1244 { 1245 /* 1246 * Invert the predicate in VPR.P0. We have call out to 1247 * a helper because this insn itself is beatwise and can 1248 * be predicated. 1249 */ 1250 if (!dc_isar_feature(aa32_mve, s)) { 1251 return false; 1252 } 1253 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1254 return true; 1255 } 1256 1257 gen_helper_mve_vpnot(cpu_env); 1258 mve_update_eci(s); 1259 return true; 1260 } 1261 1262 static bool trans_VADDV(DisasContext *s, arg_VADDV *a) 1263 { 1264 /* VADDV: vector add across vector */ 1265 static MVEGenVADDVFn * const fns[4][2] = { 1266 { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub }, 1267 { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh }, 1268 { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw }, 1269 { NULL, NULL } 1270 }; 1271 TCGv_ptr qm; 1272 TCGv_i32 rda; 1273 1274 if (!dc_isar_feature(aa32_mve, s) || 1275 a->size == 3) { 1276 return false; 1277 } 1278 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1279 return true; 1280 } 1281 1282 /* 1283 * This insn is subject to beat-wise execution. Partial execution 1284 * of an A=0 (no-accumulate) insn which does not execute the first 1285 * beat must start with the current value of Rda, not zero. 1286 */ 1287 if (a->a || mve_skip_first_beat(s)) { 1288 /* Accumulate input from Rda */ 1289 rda = load_reg(s, a->rda); 1290 } else { 1291 /* Accumulate starting at zero */ 1292 rda = tcg_const_i32(0); 1293 } 1294 1295 qm = mve_qreg_ptr(a->qm); 1296 fns[a->size][a->u](rda, cpu_env, qm, rda); 1297 store_reg(s, a->rda, rda); 1298 tcg_temp_free_ptr(qm); 1299 1300 mve_update_eci(s); 1301 return true; 1302 } 1303 1304 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a) 1305 { 1306 /* 1307 * Vector Add Long Across Vector: accumulate the 32-bit 1308 * elements of the vector into a 64-bit result stored in 1309 * a pair of general-purpose registers. 1310 * No need to check Qm's bank: it is only 3 bits in decode. 1311 */ 1312 TCGv_ptr qm; 1313 TCGv_i64 rda; 1314 TCGv_i32 rdalo, rdahi; 1315 1316 if (!dc_isar_feature(aa32_mve, s)) { 1317 return false; 1318 } 1319 /* 1320 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 1321 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 1322 */ 1323 if (a->rdahi == 13 || a->rdahi == 15) { 1324 return false; 1325 } 1326 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1327 return true; 1328 } 1329 1330 /* 1331 * This insn is subject to beat-wise execution. Partial execution 1332 * of an A=0 (no-accumulate) insn which does not execute the first 1333 * beat must start with the current value of RdaHi:RdaLo, not zero. 1334 */ 1335 if (a->a || mve_skip_first_beat(s)) { 1336 /* Accumulate input from RdaHi:RdaLo */ 1337 rda = tcg_temp_new_i64(); 1338 rdalo = load_reg(s, a->rdalo); 1339 rdahi = load_reg(s, a->rdahi); 1340 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 1341 tcg_temp_free_i32(rdalo); 1342 tcg_temp_free_i32(rdahi); 1343 } else { 1344 /* Accumulate starting at zero */ 1345 rda = tcg_const_i64(0); 1346 } 1347 1348 qm = mve_qreg_ptr(a->qm); 1349 if (a->u) { 1350 gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda); 1351 } else { 1352 gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda); 1353 } 1354 tcg_temp_free_ptr(qm); 1355 1356 rdalo = tcg_temp_new_i32(); 1357 rdahi = tcg_temp_new_i32(); 1358 tcg_gen_extrl_i64_i32(rdalo, rda); 1359 tcg_gen_extrh_i64_i32(rdahi, rda); 1360 store_reg(s, a->rdalo, rdalo); 1361 store_reg(s, a->rdahi, rdahi); 1362 tcg_temp_free_i64(rda); 1363 mve_update_eci(s); 1364 return true; 1365 } 1366 1367 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn) 1368 { 1369 TCGv_ptr qd; 1370 uint64_t imm; 1371 1372 if (!dc_isar_feature(aa32_mve, s) || 1373 !mve_check_qreg_bank(s, a->qd) || 1374 !fn) { 1375 return false; 1376 } 1377 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1378 return true; 1379 } 1380 1381 imm = asimd_imm_const(a->imm, a->cmode, a->op); 1382 1383 qd = mve_qreg_ptr(a->qd); 1384 fn(cpu_env, qd, tcg_constant_i64(imm)); 1385 tcg_temp_free_ptr(qd); 1386 mve_update_eci(s); 1387 return true; 1388 } 1389 1390 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a) 1391 { 1392 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */ 1393 MVEGenOneOpImmFn *fn; 1394 1395 if ((a->cmode & 1) && a->cmode < 12) { 1396 if (a->op) { 1397 /* 1398 * For op=1, the immediate will be inverted by asimd_imm_const(), 1399 * so the VBIC becomes a logical AND operation. 1400 */ 1401 fn = gen_helper_mve_vandi; 1402 } else { 1403 fn = gen_helper_mve_vorri; 1404 } 1405 } else { 1406 /* There is one unallocated cmode/op combination in this space */ 1407 if (a->cmode == 15 && a->op == 1) { 1408 return false; 1409 } 1410 /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */ 1411 fn = gen_helper_mve_vmovi; 1412 } 1413 return do_1imm(s, a, fn); 1414 } 1415 1416 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn, 1417 bool negateshift) 1418 { 1419 TCGv_ptr qd, qm; 1420 int shift = a->shift; 1421 1422 if (!dc_isar_feature(aa32_mve, s) || 1423 !mve_check_qreg_bank(s, a->qd | a->qm) || 1424 !fn) { 1425 return false; 1426 } 1427 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1428 return true; 1429 } 1430 1431 /* 1432 * When we handle a right shift insn using a left-shift helper 1433 * which permits a negative shift count to indicate a right-shift, 1434 * we must negate the shift count. 1435 */ 1436 if (negateshift) { 1437 shift = -shift; 1438 } 1439 1440 qd = mve_qreg_ptr(a->qd); 1441 qm = mve_qreg_ptr(a->qm); 1442 fn(cpu_env, qd, qm, tcg_constant_i32(shift)); 1443 tcg_temp_free_ptr(qd); 1444 tcg_temp_free_ptr(qm); 1445 mve_update_eci(s); 1446 return true; 1447 } 1448 1449 #define DO_2SHIFT(INSN, FN, NEGATESHIFT) \ 1450 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1451 { \ 1452 static MVEGenTwoOpShiftFn * const fns[] = { \ 1453 gen_helper_mve_##FN##b, \ 1454 gen_helper_mve_##FN##h, \ 1455 gen_helper_mve_##FN##w, \ 1456 NULL, \ 1457 }; \ 1458 return do_2shift(s, a, fns[a->size], NEGATESHIFT); \ 1459 } 1460 1461 DO_2SHIFT(VSHLI, vshli_u, false) 1462 DO_2SHIFT(VQSHLI_S, vqshli_s, false) 1463 DO_2SHIFT(VQSHLI_U, vqshli_u, false) 1464 DO_2SHIFT(VQSHLUI, vqshlui_s, false) 1465 /* These right shifts use a left-shift helper with negated shift count */ 1466 DO_2SHIFT(VSHRI_S, vshli_s, true) 1467 DO_2SHIFT(VSHRI_U, vshli_u, true) 1468 DO_2SHIFT(VRSHRI_S, vrshli_s, true) 1469 DO_2SHIFT(VRSHRI_U, vrshli_u, true) 1470 1471 DO_2SHIFT(VSRI, vsri, false) 1472 DO_2SHIFT(VSLI, vsli, false) 1473 1474 #define DO_2SHIFT_FP(INSN, FN) \ 1475 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1476 { \ 1477 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1478 return false; \ 1479 } \ 1480 return do_2shift(s, a, gen_helper_mve_##FN, false); \ 1481 } 1482 1483 DO_2SHIFT_FP(VCVT_SH_fixed, vcvt_sh) 1484 DO_2SHIFT_FP(VCVT_UH_fixed, vcvt_uh) 1485 DO_2SHIFT_FP(VCVT_HS_fixed, vcvt_hs) 1486 DO_2SHIFT_FP(VCVT_HU_fixed, vcvt_hu) 1487 DO_2SHIFT_FP(VCVT_SF_fixed, vcvt_sf) 1488 DO_2SHIFT_FP(VCVT_UF_fixed, vcvt_uf) 1489 DO_2SHIFT_FP(VCVT_FS_fixed, vcvt_fs) 1490 DO_2SHIFT_FP(VCVT_FU_fixed, vcvt_fu) 1491 1492 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a, 1493 MVEGenTwoOpShiftFn *fn) 1494 { 1495 TCGv_ptr qda; 1496 TCGv_i32 rm; 1497 1498 if (!dc_isar_feature(aa32_mve, s) || 1499 !mve_check_qreg_bank(s, a->qda) || 1500 a->rm == 13 || a->rm == 15 || !fn) { 1501 /* Rm cases are UNPREDICTABLE */ 1502 return false; 1503 } 1504 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1505 return true; 1506 } 1507 1508 qda = mve_qreg_ptr(a->qda); 1509 rm = load_reg(s, a->rm); 1510 fn(cpu_env, qda, qda, rm); 1511 tcg_temp_free_ptr(qda); 1512 tcg_temp_free_i32(rm); 1513 mve_update_eci(s); 1514 return true; 1515 } 1516 1517 #define DO_2SHIFT_SCALAR(INSN, FN) \ 1518 static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a) \ 1519 { \ 1520 static MVEGenTwoOpShiftFn * const fns[] = { \ 1521 gen_helper_mve_##FN##b, \ 1522 gen_helper_mve_##FN##h, \ 1523 gen_helper_mve_##FN##w, \ 1524 NULL, \ 1525 }; \ 1526 return do_2shift_scalar(s, a, fns[a->size]); \ 1527 } 1528 1529 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s) 1530 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u) 1531 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s) 1532 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u) 1533 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s) 1534 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u) 1535 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s) 1536 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u) 1537 1538 #define DO_VSHLL(INSN, FN) \ 1539 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1540 { \ 1541 static MVEGenTwoOpShiftFn * const fns[] = { \ 1542 gen_helper_mve_##FN##b, \ 1543 gen_helper_mve_##FN##h, \ 1544 }; \ 1545 return do_2shift(s, a, fns[a->size], false); \ 1546 } 1547 1548 DO_VSHLL(VSHLL_BS, vshllbs) 1549 DO_VSHLL(VSHLL_BU, vshllbu) 1550 DO_VSHLL(VSHLL_TS, vshllts) 1551 DO_VSHLL(VSHLL_TU, vshlltu) 1552 1553 #define DO_2SHIFT_N(INSN, FN) \ 1554 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1555 { \ 1556 static MVEGenTwoOpShiftFn * const fns[] = { \ 1557 gen_helper_mve_##FN##b, \ 1558 gen_helper_mve_##FN##h, \ 1559 }; \ 1560 return do_2shift(s, a, fns[a->size], false); \ 1561 } 1562 1563 DO_2SHIFT_N(VSHRNB, vshrnb) 1564 DO_2SHIFT_N(VSHRNT, vshrnt) 1565 DO_2SHIFT_N(VRSHRNB, vrshrnb) 1566 DO_2SHIFT_N(VRSHRNT, vrshrnt) 1567 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s) 1568 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s) 1569 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u) 1570 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u) 1571 DO_2SHIFT_N(VQSHRUNB, vqshrunb) 1572 DO_2SHIFT_N(VQSHRUNT, vqshrunt) 1573 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s) 1574 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s) 1575 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u) 1576 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u) 1577 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb) 1578 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt) 1579 1580 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a) 1581 { 1582 /* 1583 * Whole Vector Left Shift with Carry. The carry is taken 1584 * from a general purpose register and written back there. 1585 * An imm of 0 means "shift by 32". 1586 */ 1587 TCGv_ptr qd; 1588 TCGv_i32 rdm; 1589 1590 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1591 return false; 1592 } 1593 if (a->rdm == 13 || a->rdm == 15) { 1594 /* CONSTRAINED UNPREDICTABLE: we UNDEF */ 1595 return false; 1596 } 1597 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1598 return true; 1599 } 1600 1601 qd = mve_qreg_ptr(a->qd); 1602 rdm = load_reg(s, a->rdm); 1603 gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm)); 1604 store_reg(s, a->rdm, rdm); 1605 tcg_temp_free_ptr(qd); 1606 mve_update_eci(s); 1607 return true; 1608 } 1609 1610 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn) 1611 { 1612 TCGv_ptr qd; 1613 TCGv_i32 rn; 1614 1615 /* 1616 * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP). 1617 * This fills the vector with elements of successively increasing 1618 * or decreasing values, starting from Rn. 1619 */ 1620 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1621 return false; 1622 } 1623 if (a->size == MO_64) { 1624 /* size 0b11 is another encoding */ 1625 return false; 1626 } 1627 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1628 return true; 1629 } 1630 1631 qd = mve_qreg_ptr(a->qd); 1632 rn = load_reg(s, a->rn); 1633 fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm)); 1634 store_reg(s, a->rn, rn); 1635 tcg_temp_free_ptr(qd); 1636 mve_update_eci(s); 1637 return true; 1638 } 1639 1640 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn) 1641 { 1642 TCGv_ptr qd; 1643 TCGv_i32 rn, rm; 1644 1645 /* 1646 * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP) 1647 * This fills the vector with elements of successively increasing 1648 * or decreasing values, starting from Rn. Rm specifies a point where 1649 * the count wraps back around to 0. The updated offset is written back 1650 * to Rn. 1651 */ 1652 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1653 return false; 1654 } 1655 if (!fn || a->rm == 13 || a->rm == 15) { 1656 /* 1657 * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE; 1658 * Rm == 13 is VIWDUP, VDWDUP. 1659 */ 1660 return false; 1661 } 1662 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1663 return true; 1664 } 1665 1666 qd = mve_qreg_ptr(a->qd); 1667 rn = load_reg(s, a->rn); 1668 rm = load_reg(s, a->rm); 1669 fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm)); 1670 store_reg(s, a->rn, rn); 1671 tcg_temp_free_ptr(qd); 1672 tcg_temp_free_i32(rm); 1673 mve_update_eci(s); 1674 return true; 1675 } 1676 1677 static bool trans_VIDUP(DisasContext *s, arg_vidup *a) 1678 { 1679 static MVEGenVIDUPFn * const fns[] = { 1680 gen_helper_mve_vidupb, 1681 gen_helper_mve_viduph, 1682 gen_helper_mve_vidupw, 1683 NULL, 1684 }; 1685 return do_vidup(s, a, fns[a->size]); 1686 } 1687 1688 static bool trans_VDDUP(DisasContext *s, arg_vidup *a) 1689 { 1690 static MVEGenVIDUPFn * const fns[] = { 1691 gen_helper_mve_vidupb, 1692 gen_helper_mve_viduph, 1693 gen_helper_mve_vidupw, 1694 NULL, 1695 }; 1696 /* VDDUP is just like VIDUP but with a negative immediate */ 1697 a->imm = -a->imm; 1698 return do_vidup(s, a, fns[a->size]); 1699 } 1700 1701 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a) 1702 { 1703 static MVEGenVIWDUPFn * const fns[] = { 1704 gen_helper_mve_viwdupb, 1705 gen_helper_mve_viwduph, 1706 gen_helper_mve_viwdupw, 1707 NULL, 1708 }; 1709 return do_viwdup(s, a, fns[a->size]); 1710 } 1711 1712 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a) 1713 { 1714 static MVEGenVIWDUPFn * const fns[] = { 1715 gen_helper_mve_vdwdupb, 1716 gen_helper_mve_vdwduph, 1717 gen_helper_mve_vdwdupw, 1718 NULL, 1719 }; 1720 return do_viwdup(s, a, fns[a->size]); 1721 } 1722 1723 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn) 1724 { 1725 TCGv_ptr qn, qm; 1726 1727 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1728 !fn) { 1729 return false; 1730 } 1731 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1732 return true; 1733 } 1734 1735 qn = mve_qreg_ptr(a->qn); 1736 qm = mve_qreg_ptr(a->qm); 1737 fn(cpu_env, qn, qm); 1738 tcg_temp_free_ptr(qn); 1739 tcg_temp_free_ptr(qm); 1740 if (a->mask) { 1741 /* VPT */ 1742 gen_vpst(s, a->mask); 1743 } 1744 mve_update_eci(s); 1745 return true; 1746 } 1747 1748 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a, 1749 MVEGenScalarCmpFn *fn) 1750 { 1751 TCGv_ptr qn; 1752 TCGv_i32 rm; 1753 1754 if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) { 1755 return false; 1756 } 1757 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1758 return true; 1759 } 1760 1761 qn = mve_qreg_ptr(a->qn); 1762 if (a->rm == 15) { 1763 /* Encoding Rm=0b1111 means "constant zero" */ 1764 rm = tcg_constant_i32(0); 1765 } else { 1766 rm = load_reg(s, a->rm); 1767 } 1768 fn(cpu_env, qn, rm); 1769 tcg_temp_free_ptr(qn); 1770 tcg_temp_free_i32(rm); 1771 if (a->mask) { 1772 /* VPT */ 1773 gen_vpst(s, a->mask); 1774 } 1775 mve_update_eci(s); 1776 return true; 1777 } 1778 1779 #define DO_VCMP(INSN, FN) \ 1780 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \ 1781 { \ 1782 static MVEGenCmpFn * const fns[] = { \ 1783 gen_helper_mve_##FN##b, \ 1784 gen_helper_mve_##FN##h, \ 1785 gen_helper_mve_##FN##w, \ 1786 NULL, \ 1787 }; \ 1788 return do_vcmp(s, a, fns[a->size]); \ 1789 } \ 1790 static bool trans_##INSN##_scalar(DisasContext *s, \ 1791 arg_vcmp_scalar *a) \ 1792 { \ 1793 static MVEGenScalarCmpFn * const fns[] = { \ 1794 gen_helper_mve_##FN##_scalarb, \ 1795 gen_helper_mve_##FN##_scalarh, \ 1796 gen_helper_mve_##FN##_scalarw, \ 1797 NULL, \ 1798 }; \ 1799 return do_vcmp_scalar(s, a, fns[a->size]); \ 1800 } 1801 1802 DO_VCMP(VCMPEQ, vcmpeq) 1803 DO_VCMP(VCMPNE, vcmpne) 1804 DO_VCMP(VCMPCS, vcmpcs) 1805 DO_VCMP(VCMPHI, vcmphi) 1806 DO_VCMP(VCMPGE, vcmpge) 1807 DO_VCMP(VCMPLT, vcmplt) 1808 DO_VCMP(VCMPGT, vcmpgt) 1809 DO_VCMP(VCMPLE, vcmple) 1810 1811 #define DO_VCMP_FP(INSN, FN) \ 1812 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \ 1813 { \ 1814 static MVEGenCmpFn * const fns[] = { \ 1815 NULL, \ 1816 gen_helper_mve_##FN##h, \ 1817 gen_helper_mve_##FN##s, \ 1818 NULL, \ 1819 }; \ 1820 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1821 return false; \ 1822 } \ 1823 return do_vcmp(s, a, fns[a->size]); \ 1824 } \ 1825 static bool trans_##INSN##_scalar(DisasContext *s, \ 1826 arg_vcmp_scalar *a) \ 1827 { \ 1828 static MVEGenScalarCmpFn * const fns[] = { \ 1829 NULL, \ 1830 gen_helper_mve_##FN##_scalarh, \ 1831 gen_helper_mve_##FN##_scalars, \ 1832 NULL, \ 1833 }; \ 1834 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1835 return false; \ 1836 } \ 1837 return do_vcmp_scalar(s, a, fns[a->size]); \ 1838 } 1839 1840 DO_VCMP_FP(VCMPEQ_fp, vfcmpeq) 1841 DO_VCMP_FP(VCMPNE_fp, vfcmpne) 1842 DO_VCMP_FP(VCMPGE_fp, vfcmpge) 1843 DO_VCMP_FP(VCMPLT_fp, vfcmplt) 1844 DO_VCMP_FP(VCMPGT_fp, vfcmpgt) 1845 DO_VCMP_FP(VCMPLE_fp, vfcmple) 1846 1847 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn) 1848 { 1849 /* 1850 * MIN/MAX operations across a vector: compute the min or 1851 * max of the initial value in a general purpose register 1852 * and all the elements in the vector, and store it back 1853 * into the general purpose register. 1854 */ 1855 TCGv_ptr qm; 1856 TCGv_i32 rda; 1857 1858 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1859 !fn || a->rda == 13 || a->rda == 15) { 1860 /* Rda cases are UNPREDICTABLE */ 1861 return false; 1862 } 1863 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1864 return true; 1865 } 1866 1867 qm = mve_qreg_ptr(a->qm); 1868 rda = load_reg(s, a->rda); 1869 fn(rda, cpu_env, qm, rda); 1870 store_reg(s, a->rda, rda); 1871 tcg_temp_free_ptr(qm); 1872 mve_update_eci(s); 1873 return true; 1874 } 1875 1876 #define DO_VMAXV(INSN, FN) \ 1877 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ 1878 { \ 1879 static MVEGenVADDVFn * const fns[] = { \ 1880 gen_helper_mve_##FN##b, \ 1881 gen_helper_mve_##FN##h, \ 1882 gen_helper_mve_##FN##w, \ 1883 NULL, \ 1884 }; \ 1885 return do_vmaxv(s, a, fns[a->size]); \ 1886 } 1887 1888 DO_VMAXV(VMAXV_S, vmaxvs) 1889 DO_VMAXV(VMAXV_U, vmaxvu) 1890 DO_VMAXV(VMAXAV, vmaxav) 1891 DO_VMAXV(VMINV_S, vminvs) 1892 DO_VMAXV(VMINV_U, vminvu) 1893 DO_VMAXV(VMINAV, vminav) 1894 1895 #define DO_VMAXV_FP(INSN, FN) \ 1896 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ 1897 { \ 1898 static MVEGenVADDVFn * const fns[] = { \ 1899 NULL, \ 1900 gen_helper_mve_##FN##h, \ 1901 gen_helper_mve_##FN##s, \ 1902 NULL, \ 1903 }; \ 1904 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1905 return false; \ 1906 } \ 1907 return do_vmaxv(s, a, fns[a->size]); \ 1908 } 1909 1910 DO_VMAXV_FP(VMAXNMV, vmaxnmv) 1911 DO_VMAXV_FP(VMINNMV, vminnmv) 1912 DO_VMAXV_FP(VMAXNMAV, vmaxnmav) 1913 DO_VMAXV_FP(VMINNMAV, vminnmav) 1914 1915 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn) 1916 { 1917 /* Absolute difference accumulated across vector */ 1918 TCGv_ptr qn, qm; 1919 TCGv_i32 rda; 1920 1921 if (!dc_isar_feature(aa32_mve, s) || 1922 !mve_check_qreg_bank(s, a->qm | a->qn) || 1923 !fn || a->rda == 13 || a->rda == 15) { 1924 /* Rda cases are UNPREDICTABLE */ 1925 return false; 1926 } 1927 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1928 return true; 1929 } 1930 1931 qm = mve_qreg_ptr(a->qm); 1932 qn = mve_qreg_ptr(a->qn); 1933 rda = load_reg(s, a->rda); 1934 fn(rda, cpu_env, qn, qm, rda); 1935 store_reg(s, a->rda, rda); 1936 tcg_temp_free_ptr(qm); 1937 tcg_temp_free_ptr(qn); 1938 mve_update_eci(s); 1939 return true; 1940 } 1941 1942 #define DO_VABAV(INSN, FN) \ 1943 static bool trans_##INSN(DisasContext *s, arg_vabav *a) \ 1944 { \ 1945 static MVEGenVABAVFn * const fns[] = { \ 1946 gen_helper_mve_##FN##b, \ 1947 gen_helper_mve_##FN##h, \ 1948 gen_helper_mve_##FN##w, \ 1949 NULL, \ 1950 }; \ 1951 return do_vabav(s, a, fns[a->size]); \ 1952 } 1953 1954 DO_VABAV(VABAV_S, vabavs) 1955 DO_VABAV(VABAV_U, vabavu) 1956 1957 static bool trans_VMOV_to_2gp(DisasContext *s, arg_VMOV_to_2gp *a) 1958 { 1959 /* 1960 * VMOV two 32-bit vector lanes to two general-purpose registers. 1961 * This insn is not predicated but it is subject to beat-wise 1962 * execution if it is not in an IT block. For us this means 1963 * only that if PSR.ECI says we should not be executing the beat 1964 * corresponding to the lane of the vector register being accessed 1965 * then we should skip perfoming the move, and that we need to do 1966 * the usual check for bad ECI state and advance of ECI state. 1967 * (If PSR.ECI is non-zero then we cannot be in an IT block.) 1968 */ 1969 TCGv_i32 tmp; 1970 int vd; 1971 1972 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) || 1973 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15 || 1974 a->rt == a->rt2) { 1975 /* Rt/Rt2 cases are UNPREDICTABLE */ 1976 return false; 1977 } 1978 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1979 return true; 1980 } 1981 1982 /* Convert Qreg index to Dreg for read_neon_element32() etc */ 1983 vd = a->qd * 2; 1984 1985 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) { 1986 tmp = tcg_temp_new_i32(); 1987 read_neon_element32(tmp, vd, a->idx, MO_32); 1988 store_reg(s, a->rt, tmp); 1989 } 1990 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) { 1991 tmp = tcg_temp_new_i32(); 1992 read_neon_element32(tmp, vd + 1, a->idx, MO_32); 1993 store_reg(s, a->rt2, tmp); 1994 } 1995 1996 mve_update_and_store_eci(s); 1997 return true; 1998 } 1999 2000 static bool trans_VMOV_from_2gp(DisasContext *s, arg_VMOV_to_2gp *a) 2001 { 2002 /* 2003 * VMOV two general-purpose registers to two 32-bit vector lanes. 2004 * This insn is not predicated but it is subject to beat-wise 2005 * execution if it is not in an IT block. For us this means 2006 * only that if PSR.ECI says we should not be executing the beat 2007 * corresponding to the lane of the vector register being accessed 2008 * then we should skip perfoming the move, and that we need to do 2009 * the usual check for bad ECI state and advance of ECI state. 2010 * (If PSR.ECI is non-zero then we cannot be in an IT block.) 2011 */ 2012 TCGv_i32 tmp; 2013 int vd; 2014 2015 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) || 2016 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15) { 2017 /* Rt/Rt2 cases are UNPREDICTABLE */ 2018 return false; 2019 } 2020 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2021 return true; 2022 } 2023 2024 /* Convert Qreg idx to Dreg for read_neon_element32() etc */ 2025 vd = a->qd * 2; 2026 2027 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) { 2028 tmp = load_reg(s, a->rt); 2029 write_neon_element32(tmp, vd, a->idx, MO_32); 2030 tcg_temp_free_i32(tmp); 2031 } 2032 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) { 2033 tmp = load_reg(s, a->rt2); 2034 write_neon_element32(tmp, vd + 1, a->idx, MO_32); 2035 tcg_temp_free_i32(tmp); 2036 } 2037 2038 mve_update_and_store_eci(s); 2039 return true; 2040 } 2041