1 /* 2 * ARM translation: M-profile MVE instructions 3 * 4 * Copyright (c) 2021 Linaro, Ltd. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "tcg/tcg-op.h" 22 #include "tcg/tcg-op-gvec.h" 23 #include "exec/exec-all.h" 24 #include "exec/gen-icount.h" 25 #include "translate.h" 26 #include "translate-a32.h" 27 28 static inline int vidup_imm(DisasContext *s, int x) 29 { 30 return 1 << x; 31 } 32 33 /* Include the generated decoder */ 34 #include "decode-mve.c.inc" 35 36 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 37 typedef void MVEGenLdStSGFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 38 typedef void MVEGenLdStIlFn(TCGv_ptr, TCGv_i32, TCGv_i32); 39 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 40 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr); 41 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 42 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 43 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64); 44 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32); 45 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64); 46 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32); 47 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 48 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 49 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 50 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 51 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 52 typedef void MVEGenVCVTRmodeFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 53 54 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */ 55 static inline long mve_qreg_offset(unsigned reg) 56 { 57 return offsetof(CPUARMState, vfp.zregs[reg].d[0]); 58 } 59 60 static TCGv_ptr mve_qreg_ptr(unsigned reg) 61 { 62 TCGv_ptr ret = tcg_temp_new_ptr(); 63 tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg)); 64 return ret; 65 } 66 67 static bool mve_no_predication(DisasContext *s) 68 { 69 /* 70 * Return true if we are executing the entire MVE instruction 71 * with no predication or partial-execution, and so we can safely 72 * use an inline TCG vector implementation. 73 */ 74 return s->eci == 0 && s->mve_no_pred; 75 } 76 77 static bool mve_check_qreg_bank(DisasContext *s, int qmask) 78 { 79 /* 80 * Check whether Qregs are in range. For v8.1M only Q0..Q7 81 * are supported, see VFPSmallRegisterBank(). 82 */ 83 return qmask < 8; 84 } 85 86 bool mve_eci_check(DisasContext *s) 87 { 88 /* 89 * This is a beatwise insn: check that ECI is valid (not a 90 * reserved value) and note that we are handling it. 91 * Return true if OK, false if we generated an exception. 92 */ 93 s->eci_handled = true; 94 switch (s->eci) { 95 case ECI_NONE: 96 case ECI_A0: 97 case ECI_A0A1: 98 case ECI_A0A1A2: 99 case ECI_A0A1A2B0: 100 return true; 101 default: 102 /* Reserved value: INVSTATE UsageFault */ 103 gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(), 104 default_exception_el(s)); 105 return false; 106 } 107 } 108 109 void mve_update_eci(DisasContext *s) 110 { 111 /* 112 * The helper function will always update the CPUState field, 113 * so we only need to update the DisasContext field. 114 */ 115 if (s->eci) { 116 s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE; 117 } 118 } 119 120 void mve_update_and_store_eci(DisasContext *s) 121 { 122 /* 123 * For insns which don't call a helper function that will call 124 * mve_advance_vpt(), this version updates s->eci and also stores 125 * it out to the CPUState field. 126 */ 127 if (s->eci) { 128 mve_update_eci(s); 129 store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits); 130 } 131 } 132 133 static bool mve_skip_first_beat(DisasContext *s) 134 { 135 /* Return true if PSR.ECI says we must skip the first beat of this insn */ 136 switch (s->eci) { 137 case ECI_NONE: 138 return false; 139 case ECI_A0: 140 case ECI_A0A1: 141 case ECI_A0A1A2: 142 case ECI_A0A1A2B0: 143 return true; 144 default: 145 g_assert_not_reached(); 146 } 147 } 148 149 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn, 150 unsigned msize) 151 { 152 TCGv_i32 addr; 153 uint32_t offset; 154 TCGv_ptr qreg; 155 156 if (!dc_isar_feature(aa32_mve, s) || 157 !mve_check_qreg_bank(s, a->qd) || 158 !fn) { 159 return false; 160 } 161 162 /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */ 163 if (a->rn == 15 || (a->rn == 13 && a->w)) { 164 return false; 165 } 166 167 if (!mve_eci_check(s) || !vfp_access_check(s)) { 168 return true; 169 } 170 171 offset = a->imm << msize; 172 if (!a->a) { 173 offset = -offset; 174 } 175 addr = load_reg(s, a->rn); 176 if (a->p) { 177 tcg_gen_addi_i32(addr, addr, offset); 178 } 179 180 qreg = mve_qreg_ptr(a->qd); 181 fn(cpu_env, qreg, addr); 182 tcg_temp_free_ptr(qreg); 183 184 /* 185 * Writeback always happens after the last beat of the insn, 186 * regardless of predication 187 */ 188 if (a->w) { 189 if (!a->p) { 190 tcg_gen_addi_i32(addr, addr, offset); 191 } 192 store_reg(s, a->rn, addr); 193 } else { 194 tcg_temp_free_i32(addr); 195 } 196 mve_update_eci(s); 197 return true; 198 } 199 200 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a) 201 { 202 static MVEGenLdStFn * const ldstfns[4][2] = { 203 { gen_helper_mve_vstrb, gen_helper_mve_vldrb }, 204 { gen_helper_mve_vstrh, gen_helper_mve_vldrh }, 205 { gen_helper_mve_vstrw, gen_helper_mve_vldrw }, 206 { NULL, NULL } 207 }; 208 return do_ldst(s, a, ldstfns[a->size][a->l], a->size); 209 } 210 211 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \ 212 static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \ 213 { \ 214 static MVEGenLdStFn * const ldstfns[2][2] = { \ 215 { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \ 216 { NULL, gen_helper_mve_##ULD }, \ 217 }; \ 218 return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \ 219 } 220 221 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8) 222 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8) 223 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16) 224 225 static bool do_ldst_sg(DisasContext *s, arg_vldst_sg *a, MVEGenLdStSGFn fn) 226 { 227 TCGv_i32 addr; 228 TCGv_ptr qd, qm; 229 230 if (!dc_isar_feature(aa32_mve, s) || 231 !mve_check_qreg_bank(s, a->qd | a->qm) || 232 !fn || a->rn == 15) { 233 /* Rn case is UNPREDICTABLE */ 234 return false; 235 } 236 237 if (!mve_eci_check(s) || !vfp_access_check(s)) { 238 return true; 239 } 240 241 addr = load_reg(s, a->rn); 242 243 qd = mve_qreg_ptr(a->qd); 244 qm = mve_qreg_ptr(a->qm); 245 fn(cpu_env, qd, qm, addr); 246 tcg_temp_free_ptr(qd); 247 tcg_temp_free_ptr(qm); 248 tcg_temp_free_i32(addr); 249 mve_update_eci(s); 250 return true; 251 } 252 253 /* 254 * The naming scheme here is "vldrb_sg_sh == in-memory byte loads 255 * signextended to halfword elements in register". _os_ indicates that 256 * the offsets in Qm should be scaled by the element size. 257 */ 258 /* This macro is just to make the arrays more compact in these functions */ 259 #define F(N) gen_helper_mve_##N 260 261 /* VLDRB/VSTRB (ie msize 1) with OS=1 is UNPREDICTABLE; we UNDEF */ 262 static bool trans_VLDR_S_sg(DisasContext *s, arg_vldst_sg *a) 263 { 264 static MVEGenLdStSGFn * const fns[2][4][4] = { { 265 { NULL, F(vldrb_sg_sh), F(vldrb_sg_sw), NULL }, 266 { NULL, NULL, F(vldrh_sg_sw), NULL }, 267 { NULL, NULL, NULL, NULL }, 268 { NULL, NULL, NULL, NULL } 269 }, { 270 { NULL, NULL, NULL, NULL }, 271 { NULL, NULL, F(vldrh_sg_os_sw), NULL }, 272 { NULL, NULL, NULL, NULL }, 273 { NULL, NULL, NULL, NULL } 274 } 275 }; 276 if (a->qd == a->qm) { 277 return false; /* UNPREDICTABLE */ 278 } 279 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 280 } 281 282 static bool trans_VLDR_U_sg(DisasContext *s, arg_vldst_sg *a) 283 { 284 static MVEGenLdStSGFn * const fns[2][4][4] = { { 285 { F(vldrb_sg_ub), F(vldrb_sg_uh), F(vldrb_sg_uw), NULL }, 286 { NULL, F(vldrh_sg_uh), F(vldrh_sg_uw), NULL }, 287 { NULL, NULL, F(vldrw_sg_uw), NULL }, 288 { NULL, NULL, NULL, F(vldrd_sg_ud) } 289 }, { 290 { NULL, NULL, NULL, NULL }, 291 { NULL, F(vldrh_sg_os_uh), F(vldrh_sg_os_uw), NULL }, 292 { NULL, NULL, F(vldrw_sg_os_uw), NULL }, 293 { NULL, NULL, NULL, F(vldrd_sg_os_ud) } 294 } 295 }; 296 if (a->qd == a->qm) { 297 return false; /* UNPREDICTABLE */ 298 } 299 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 300 } 301 302 static bool trans_VSTR_sg(DisasContext *s, arg_vldst_sg *a) 303 { 304 static MVEGenLdStSGFn * const fns[2][4][4] = { { 305 { F(vstrb_sg_ub), F(vstrb_sg_uh), F(vstrb_sg_uw), NULL }, 306 { NULL, F(vstrh_sg_uh), F(vstrh_sg_uw), NULL }, 307 { NULL, NULL, F(vstrw_sg_uw), NULL }, 308 { NULL, NULL, NULL, F(vstrd_sg_ud) } 309 }, { 310 { NULL, NULL, NULL, NULL }, 311 { NULL, F(vstrh_sg_os_uh), F(vstrh_sg_os_uw), NULL }, 312 { NULL, NULL, F(vstrw_sg_os_uw), NULL }, 313 { NULL, NULL, NULL, F(vstrd_sg_os_ud) } 314 } 315 }; 316 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 317 } 318 319 #undef F 320 321 static bool do_ldst_sg_imm(DisasContext *s, arg_vldst_sg_imm *a, 322 MVEGenLdStSGFn *fn, unsigned msize) 323 { 324 uint32_t offset; 325 TCGv_ptr qd, qm; 326 327 if (!dc_isar_feature(aa32_mve, s) || 328 !mve_check_qreg_bank(s, a->qd | a->qm) || 329 !fn) { 330 return false; 331 } 332 333 if (!mve_eci_check(s) || !vfp_access_check(s)) { 334 return true; 335 } 336 337 offset = a->imm << msize; 338 if (!a->a) { 339 offset = -offset; 340 } 341 342 qd = mve_qreg_ptr(a->qd); 343 qm = mve_qreg_ptr(a->qm); 344 fn(cpu_env, qd, qm, tcg_constant_i32(offset)); 345 tcg_temp_free_ptr(qd); 346 tcg_temp_free_ptr(qm); 347 mve_update_eci(s); 348 return true; 349 } 350 351 static bool trans_VLDRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 352 { 353 static MVEGenLdStSGFn * const fns[] = { 354 gen_helper_mve_vldrw_sg_uw, 355 gen_helper_mve_vldrw_sg_wb_uw, 356 }; 357 if (a->qd == a->qm) { 358 return false; /* UNPREDICTABLE */ 359 } 360 return do_ldst_sg_imm(s, a, fns[a->w], MO_32); 361 } 362 363 static bool trans_VLDRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 364 { 365 static MVEGenLdStSGFn * const fns[] = { 366 gen_helper_mve_vldrd_sg_ud, 367 gen_helper_mve_vldrd_sg_wb_ud, 368 }; 369 if (a->qd == a->qm) { 370 return false; /* UNPREDICTABLE */ 371 } 372 return do_ldst_sg_imm(s, a, fns[a->w], MO_64); 373 } 374 375 static bool trans_VSTRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 376 { 377 static MVEGenLdStSGFn * const fns[] = { 378 gen_helper_mve_vstrw_sg_uw, 379 gen_helper_mve_vstrw_sg_wb_uw, 380 }; 381 return do_ldst_sg_imm(s, a, fns[a->w], MO_32); 382 } 383 384 static bool trans_VSTRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 385 { 386 static MVEGenLdStSGFn * const fns[] = { 387 gen_helper_mve_vstrd_sg_ud, 388 gen_helper_mve_vstrd_sg_wb_ud, 389 }; 390 return do_ldst_sg_imm(s, a, fns[a->w], MO_64); 391 } 392 393 static bool do_vldst_il(DisasContext *s, arg_vldst_il *a, MVEGenLdStIlFn *fn, 394 int addrinc) 395 { 396 TCGv_i32 rn; 397 398 if (!dc_isar_feature(aa32_mve, s) || 399 !mve_check_qreg_bank(s, a->qd) || 400 !fn || (a->rn == 13 && a->w) || a->rn == 15) { 401 /* Variously UNPREDICTABLE or UNDEF or related-encoding */ 402 return false; 403 } 404 if (!mve_eci_check(s) || !vfp_access_check(s)) { 405 return true; 406 } 407 408 rn = load_reg(s, a->rn); 409 /* 410 * We pass the index of Qd, not a pointer, because the helper must 411 * access multiple Q registers starting at Qd and working up. 412 */ 413 fn(cpu_env, tcg_constant_i32(a->qd), rn); 414 415 if (a->w) { 416 tcg_gen_addi_i32(rn, rn, addrinc); 417 store_reg(s, a->rn, rn); 418 } else { 419 tcg_temp_free_i32(rn); 420 } 421 mve_update_and_store_eci(s); 422 return true; 423 } 424 425 /* This macro is just to make the arrays more compact in these functions */ 426 #define F(N) gen_helper_mve_##N 427 428 static bool trans_VLD2(DisasContext *s, arg_vldst_il *a) 429 { 430 static MVEGenLdStIlFn * const fns[4][4] = { 431 { F(vld20b), F(vld20h), F(vld20w), NULL, }, 432 { F(vld21b), F(vld21h), F(vld21w), NULL, }, 433 { NULL, NULL, NULL, NULL }, 434 { NULL, NULL, NULL, NULL }, 435 }; 436 if (a->qd > 6) { 437 return false; 438 } 439 return do_vldst_il(s, a, fns[a->pat][a->size], 32); 440 } 441 442 static bool trans_VLD4(DisasContext *s, arg_vldst_il *a) 443 { 444 static MVEGenLdStIlFn * const fns[4][4] = { 445 { F(vld40b), F(vld40h), F(vld40w), NULL, }, 446 { F(vld41b), F(vld41h), F(vld41w), NULL, }, 447 { F(vld42b), F(vld42h), F(vld42w), NULL, }, 448 { F(vld43b), F(vld43h), F(vld43w), NULL, }, 449 }; 450 if (a->qd > 4) { 451 return false; 452 } 453 return do_vldst_il(s, a, fns[a->pat][a->size], 64); 454 } 455 456 static bool trans_VST2(DisasContext *s, arg_vldst_il *a) 457 { 458 static MVEGenLdStIlFn * const fns[4][4] = { 459 { F(vst20b), F(vst20h), F(vst20w), NULL, }, 460 { F(vst21b), F(vst21h), F(vst21w), NULL, }, 461 { NULL, NULL, NULL, NULL }, 462 { NULL, NULL, NULL, NULL }, 463 }; 464 if (a->qd > 6) { 465 return false; 466 } 467 return do_vldst_il(s, a, fns[a->pat][a->size], 32); 468 } 469 470 static bool trans_VST4(DisasContext *s, arg_vldst_il *a) 471 { 472 static MVEGenLdStIlFn * const fns[4][4] = { 473 { F(vst40b), F(vst40h), F(vst40w), NULL, }, 474 { F(vst41b), F(vst41h), F(vst41w), NULL, }, 475 { F(vst42b), F(vst42h), F(vst42w), NULL, }, 476 { F(vst43b), F(vst43h), F(vst43w), NULL, }, 477 }; 478 if (a->qd > 4) { 479 return false; 480 } 481 return do_vldst_il(s, a, fns[a->pat][a->size], 64); 482 } 483 484 #undef F 485 486 static bool trans_VDUP(DisasContext *s, arg_VDUP *a) 487 { 488 TCGv_ptr qd; 489 TCGv_i32 rt; 490 491 if (!dc_isar_feature(aa32_mve, s) || 492 !mve_check_qreg_bank(s, a->qd)) { 493 return false; 494 } 495 if (a->rt == 13 || a->rt == 15) { 496 /* UNPREDICTABLE; we choose to UNDEF */ 497 return false; 498 } 499 if (!mve_eci_check(s) || !vfp_access_check(s)) { 500 return true; 501 } 502 503 qd = mve_qreg_ptr(a->qd); 504 rt = load_reg(s, a->rt); 505 tcg_gen_dup_i32(a->size, rt, rt); 506 gen_helper_mve_vdup(cpu_env, qd, rt); 507 tcg_temp_free_ptr(qd); 508 tcg_temp_free_i32(rt); 509 mve_update_eci(s); 510 return true; 511 } 512 513 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn) 514 { 515 TCGv_ptr qd, qm; 516 517 if (!dc_isar_feature(aa32_mve, s) || 518 !mve_check_qreg_bank(s, a->qd | a->qm) || 519 !fn) { 520 return false; 521 } 522 523 if (!mve_eci_check(s) || !vfp_access_check(s)) { 524 return true; 525 } 526 527 qd = mve_qreg_ptr(a->qd); 528 qm = mve_qreg_ptr(a->qm); 529 fn(cpu_env, qd, qm); 530 tcg_temp_free_ptr(qd); 531 tcg_temp_free_ptr(qm); 532 mve_update_eci(s); 533 return true; 534 } 535 536 #define DO_1OP(INSN, FN) \ 537 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 538 { \ 539 static MVEGenOneOpFn * const fns[] = { \ 540 gen_helper_mve_##FN##b, \ 541 gen_helper_mve_##FN##h, \ 542 gen_helper_mve_##FN##w, \ 543 NULL, \ 544 }; \ 545 return do_1op(s, a, fns[a->size]); \ 546 } 547 548 DO_1OP(VCLZ, vclz) 549 DO_1OP(VCLS, vcls) 550 DO_1OP(VABS, vabs) 551 DO_1OP(VNEG, vneg) 552 DO_1OP(VQABS, vqabs) 553 DO_1OP(VQNEG, vqneg) 554 DO_1OP(VMAXA, vmaxa) 555 DO_1OP(VMINA, vmina) 556 557 /* 558 * For simple float/int conversions we use the fixed-point 559 * conversion helpers with a zero shift count 560 */ 561 #define DO_VCVT(INSN, HFN, SFN) \ 562 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 563 { \ 564 gen_helper_mve_##HFN(env, qd, qm, tcg_constant_i32(0)); \ 565 } \ 566 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 567 { \ 568 gen_helper_mve_##SFN(env, qd, qm, tcg_constant_i32(0)); \ 569 } \ 570 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 571 { \ 572 static MVEGenOneOpFn * const fns[] = { \ 573 NULL, \ 574 gen_##INSN##h, \ 575 gen_##INSN##s, \ 576 NULL, \ 577 }; \ 578 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 579 return false; \ 580 } \ 581 return do_1op(s, a, fns[a->size]); \ 582 } 583 584 DO_VCVT(VCVT_SF, vcvt_sh, vcvt_sf) 585 DO_VCVT(VCVT_UF, vcvt_uh, vcvt_uf) 586 DO_VCVT(VCVT_FS, vcvt_hs, vcvt_fs) 587 DO_VCVT(VCVT_FU, vcvt_hu, vcvt_fu) 588 589 static bool do_vcvt_rmode(DisasContext *s, arg_1op *a, 590 enum arm_fprounding rmode, bool u) 591 { 592 /* 593 * Handle VCVT fp to int with specified rounding mode. 594 * This is a 1op fn but we must pass the rounding mode as 595 * an immediate to the helper. 596 */ 597 TCGv_ptr qd, qm; 598 static MVEGenVCVTRmodeFn * const fns[4][2] = { 599 { NULL, NULL }, 600 { gen_helper_mve_vcvt_rm_sh, gen_helper_mve_vcvt_rm_uh }, 601 { gen_helper_mve_vcvt_rm_ss, gen_helper_mve_vcvt_rm_us }, 602 { NULL, NULL }, 603 }; 604 MVEGenVCVTRmodeFn *fn = fns[a->size][u]; 605 606 if (!dc_isar_feature(aa32_mve_fp, s) || 607 !mve_check_qreg_bank(s, a->qd | a->qm) || 608 !fn) { 609 return false; 610 } 611 612 if (!mve_eci_check(s) || !vfp_access_check(s)) { 613 return true; 614 } 615 616 qd = mve_qreg_ptr(a->qd); 617 qm = mve_qreg_ptr(a->qm); 618 fn(cpu_env, qd, qm, tcg_constant_i32(arm_rmode_to_sf(rmode))); 619 tcg_temp_free_ptr(qd); 620 tcg_temp_free_ptr(qm); 621 mve_update_eci(s); 622 return true; 623 } 624 625 #define DO_VCVT_RMODE(INSN, RMODE, U) \ 626 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 627 { \ 628 return do_vcvt_rmode(s, a, RMODE, U); \ 629 } \ 630 631 DO_VCVT_RMODE(VCVTAS, FPROUNDING_TIEAWAY, false) 632 DO_VCVT_RMODE(VCVTAU, FPROUNDING_TIEAWAY, true) 633 DO_VCVT_RMODE(VCVTNS, FPROUNDING_TIEEVEN, false) 634 DO_VCVT_RMODE(VCVTNU, FPROUNDING_TIEEVEN, true) 635 DO_VCVT_RMODE(VCVTPS, FPROUNDING_POSINF, false) 636 DO_VCVT_RMODE(VCVTPU, FPROUNDING_POSINF, true) 637 DO_VCVT_RMODE(VCVTMS, FPROUNDING_NEGINF, false) 638 DO_VCVT_RMODE(VCVTMU, FPROUNDING_NEGINF, true) 639 640 #define DO_VCVT_SH(INSN, FN) \ 641 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 642 { \ 643 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 644 return false; \ 645 } \ 646 return do_1op(s, a, gen_helper_mve_##FN); \ 647 } \ 648 649 DO_VCVT_SH(VCVTB_SH, vcvtb_sh) 650 DO_VCVT_SH(VCVTT_SH, vcvtt_sh) 651 DO_VCVT_SH(VCVTB_HS, vcvtb_hs) 652 DO_VCVT_SH(VCVTT_HS, vcvtt_hs) 653 654 #define DO_VRINT(INSN, RMODE) \ 655 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 656 { \ 657 gen_helper_mve_vrint_rm_h(env, qd, qm, \ 658 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \ 659 } \ 660 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 661 { \ 662 gen_helper_mve_vrint_rm_s(env, qd, qm, \ 663 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \ 664 } \ 665 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 666 { \ 667 static MVEGenOneOpFn * const fns[] = { \ 668 NULL, \ 669 gen_##INSN##h, \ 670 gen_##INSN##s, \ 671 NULL, \ 672 }; \ 673 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 674 return false; \ 675 } \ 676 return do_1op(s, a, fns[a->size]); \ 677 } 678 679 DO_VRINT(VRINTN, FPROUNDING_TIEEVEN) 680 DO_VRINT(VRINTA, FPROUNDING_TIEAWAY) 681 DO_VRINT(VRINTZ, FPROUNDING_ZERO) 682 DO_VRINT(VRINTM, FPROUNDING_NEGINF) 683 DO_VRINT(VRINTP, FPROUNDING_POSINF) 684 685 static bool trans_VRINTX(DisasContext *s, arg_1op *a) 686 { 687 static MVEGenOneOpFn * const fns[] = { 688 NULL, 689 gen_helper_mve_vrintx_h, 690 gen_helper_mve_vrintx_s, 691 NULL, 692 }; 693 if (!dc_isar_feature(aa32_mve_fp, s)) { 694 return false; 695 } 696 return do_1op(s, a, fns[a->size]); 697 } 698 699 /* Narrowing moves: only size 0 and 1 are valid */ 700 #define DO_VMOVN(INSN, FN) \ 701 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 702 { \ 703 static MVEGenOneOpFn * const fns[] = { \ 704 gen_helper_mve_##FN##b, \ 705 gen_helper_mve_##FN##h, \ 706 NULL, \ 707 NULL, \ 708 }; \ 709 return do_1op(s, a, fns[a->size]); \ 710 } 711 712 DO_VMOVN(VMOVNB, vmovnb) 713 DO_VMOVN(VMOVNT, vmovnt) 714 DO_VMOVN(VQMOVUNB, vqmovunb) 715 DO_VMOVN(VQMOVUNT, vqmovunt) 716 DO_VMOVN(VQMOVN_BS, vqmovnbs) 717 DO_VMOVN(VQMOVN_TS, vqmovnts) 718 DO_VMOVN(VQMOVN_BU, vqmovnbu) 719 DO_VMOVN(VQMOVN_TU, vqmovntu) 720 721 static bool trans_VREV16(DisasContext *s, arg_1op *a) 722 { 723 static MVEGenOneOpFn * const fns[] = { 724 gen_helper_mve_vrev16b, 725 NULL, 726 NULL, 727 NULL, 728 }; 729 return do_1op(s, a, fns[a->size]); 730 } 731 732 static bool trans_VREV32(DisasContext *s, arg_1op *a) 733 { 734 static MVEGenOneOpFn * const fns[] = { 735 gen_helper_mve_vrev32b, 736 gen_helper_mve_vrev32h, 737 NULL, 738 NULL, 739 }; 740 return do_1op(s, a, fns[a->size]); 741 } 742 743 static bool trans_VREV64(DisasContext *s, arg_1op *a) 744 { 745 static MVEGenOneOpFn * const fns[] = { 746 gen_helper_mve_vrev64b, 747 gen_helper_mve_vrev64h, 748 gen_helper_mve_vrev64w, 749 NULL, 750 }; 751 return do_1op(s, a, fns[a->size]); 752 } 753 754 static bool trans_VMVN(DisasContext *s, arg_1op *a) 755 { 756 return do_1op(s, a, gen_helper_mve_vmvn); 757 } 758 759 static bool trans_VABS_fp(DisasContext *s, arg_1op *a) 760 { 761 static MVEGenOneOpFn * const fns[] = { 762 NULL, 763 gen_helper_mve_vfabsh, 764 gen_helper_mve_vfabss, 765 NULL, 766 }; 767 if (!dc_isar_feature(aa32_mve_fp, s)) { 768 return false; 769 } 770 return do_1op(s, a, fns[a->size]); 771 } 772 773 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a) 774 { 775 static MVEGenOneOpFn * const fns[] = { 776 NULL, 777 gen_helper_mve_vfnegh, 778 gen_helper_mve_vfnegs, 779 NULL, 780 }; 781 if (!dc_isar_feature(aa32_mve_fp, s)) { 782 return false; 783 } 784 return do_1op(s, a, fns[a->size]); 785 } 786 787 static bool do_2op_vec(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn, 788 GVecGen3Fn *vecfn) 789 { 790 TCGv_ptr qd, qn, qm; 791 792 if (!dc_isar_feature(aa32_mve, s) || 793 !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) || 794 !fn) { 795 return false; 796 } 797 if (!mve_eci_check(s) || !vfp_access_check(s)) { 798 return true; 799 } 800 801 if (vecfn && mve_no_predication(s)) { 802 vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qn), 803 mve_qreg_offset(a->qm), 16, 16); 804 } else { 805 qd = mve_qreg_ptr(a->qd); 806 qn = mve_qreg_ptr(a->qn); 807 qm = mve_qreg_ptr(a->qm); 808 fn(cpu_env, qd, qn, qm); 809 tcg_temp_free_ptr(qd); 810 tcg_temp_free_ptr(qn); 811 tcg_temp_free_ptr(qm); 812 } 813 mve_update_eci(s); 814 return true; 815 } 816 817 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn *fn) 818 { 819 return do_2op_vec(s, a, fn, NULL); 820 } 821 822 #define DO_LOGIC(INSN, HELPER, VECFN) \ 823 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 824 { \ 825 return do_2op_vec(s, a, HELPER, VECFN); \ 826 } 827 828 DO_LOGIC(VAND, gen_helper_mve_vand, tcg_gen_gvec_and) 829 DO_LOGIC(VBIC, gen_helper_mve_vbic, tcg_gen_gvec_andc) 830 DO_LOGIC(VORR, gen_helper_mve_vorr, tcg_gen_gvec_or) 831 DO_LOGIC(VORN, gen_helper_mve_vorn, tcg_gen_gvec_orc) 832 DO_LOGIC(VEOR, gen_helper_mve_veor, tcg_gen_gvec_xor) 833 834 static bool trans_VPSEL(DisasContext *s, arg_2op *a) 835 { 836 /* This insn updates predication bits */ 837 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 838 return do_2op(s, a, gen_helper_mve_vpsel); 839 } 840 841 #define DO_2OP(INSN, FN) \ 842 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 843 { \ 844 static MVEGenTwoOpFn * const fns[] = { \ 845 gen_helper_mve_##FN##b, \ 846 gen_helper_mve_##FN##h, \ 847 gen_helper_mve_##FN##w, \ 848 NULL, \ 849 }; \ 850 return do_2op(s, a, fns[a->size]); \ 851 } 852 853 DO_2OP(VADD, vadd) 854 DO_2OP(VSUB, vsub) 855 DO_2OP(VMUL, vmul) 856 DO_2OP(VMULH_S, vmulhs) 857 DO_2OP(VMULH_U, vmulhu) 858 DO_2OP(VRMULH_S, vrmulhs) 859 DO_2OP(VRMULH_U, vrmulhu) 860 DO_2OP(VMAX_S, vmaxs) 861 DO_2OP(VMAX_U, vmaxu) 862 DO_2OP(VMIN_S, vmins) 863 DO_2OP(VMIN_U, vminu) 864 DO_2OP(VABD_S, vabds) 865 DO_2OP(VABD_U, vabdu) 866 DO_2OP(VHADD_S, vhadds) 867 DO_2OP(VHADD_U, vhaddu) 868 DO_2OP(VHSUB_S, vhsubs) 869 DO_2OP(VHSUB_U, vhsubu) 870 DO_2OP(VMULL_BS, vmullbs) 871 DO_2OP(VMULL_BU, vmullbu) 872 DO_2OP(VMULL_TS, vmullts) 873 DO_2OP(VMULL_TU, vmulltu) 874 DO_2OP(VQDMULH, vqdmulh) 875 DO_2OP(VQRDMULH, vqrdmulh) 876 DO_2OP(VQADD_S, vqadds) 877 DO_2OP(VQADD_U, vqaddu) 878 DO_2OP(VQSUB_S, vqsubs) 879 DO_2OP(VQSUB_U, vqsubu) 880 DO_2OP(VSHL_S, vshls) 881 DO_2OP(VSHL_U, vshlu) 882 DO_2OP(VRSHL_S, vrshls) 883 DO_2OP(VRSHL_U, vrshlu) 884 DO_2OP(VQSHL_S, vqshls) 885 DO_2OP(VQSHL_U, vqshlu) 886 DO_2OP(VQRSHL_S, vqrshls) 887 DO_2OP(VQRSHL_U, vqrshlu) 888 DO_2OP(VQDMLADH, vqdmladh) 889 DO_2OP(VQDMLADHX, vqdmladhx) 890 DO_2OP(VQRDMLADH, vqrdmladh) 891 DO_2OP(VQRDMLADHX, vqrdmladhx) 892 DO_2OP(VQDMLSDH, vqdmlsdh) 893 DO_2OP(VQDMLSDHX, vqdmlsdhx) 894 DO_2OP(VQRDMLSDH, vqrdmlsdh) 895 DO_2OP(VQRDMLSDHX, vqrdmlsdhx) 896 DO_2OP(VRHADD_S, vrhadds) 897 DO_2OP(VRHADD_U, vrhaddu) 898 /* 899 * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose 900 * so we can reuse the DO_2OP macro. (Our implementation calculates the 901 * "expected" results in this case.) Similarly for VHCADD. 902 */ 903 DO_2OP(VCADD90, vcadd90) 904 DO_2OP(VCADD270, vcadd270) 905 DO_2OP(VHCADD90, vhcadd90) 906 DO_2OP(VHCADD270, vhcadd270) 907 908 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a) 909 { 910 static MVEGenTwoOpFn * const fns[] = { 911 NULL, 912 gen_helper_mve_vqdmullbh, 913 gen_helper_mve_vqdmullbw, 914 NULL, 915 }; 916 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 917 /* UNPREDICTABLE; we choose to undef */ 918 return false; 919 } 920 return do_2op(s, a, fns[a->size]); 921 } 922 923 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a) 924 { 925 static MVEGenTwoOpFn * const fns[] = { 926 NULL, 927 gen_helper_mve_vqdmullth, 928 gen_helper_mve_vqdmulltw, 929 NULL, 930 }; 931 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 932 /* UNPREDICTABLE; we choose to undef */ 933 return false; 934 } 935 return do_2op(s, a, fns[a->size]); 936 } 937 938 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a) 939 { 940 /* 941 * Note that a->size indicates the output size, ie VMULL.P8 942 * is the 8x8->16 operation and a->size is MO_16; VMULL.P16 943 * is the 16x16->32 operation and a->size is MO_32. 944 */ 945 static MVEGenTwoOpFn * const fns[] = { 946 NULL, 947 gen_helper_mve_vmullpbh, 948 gen_helper_mve_vmullpbw, 949 NULL, 950 }; 951 return do_2op(s, a, fns[a->size]); 952 } 953 954 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a) 955 { 956 /* a->size is as for trans_VMULLP_B */ 957 static MVEGenTwoOpFn * const fns[] = { 958 NULL, 959 gen_helper_mve_vmullpth, 960 gen_helper_mve_vmullptw, 961 NULL, 962 }; 963 return do_2op(s, a, fns[a->size]); 964 } 965 966 /* 967 * VADC and VSBC: these perform an add-with-carry or subtract-with-carry 968 * of the 32-bit elements in each lane of the input vectors, where the 969 * carry-out of each add is the carry-in of the next. The initial carry 970 * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C 971 * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C. 972 * These insns are subject to beat-wise execution. Partial execution 973 * of an I=1 (initial carry input fixed) insn which does not 974 * execute the first beat must start with the current FPSCR.NZCV 975 * value, not the fixed constant input. 976 */ 977 static bool trans_VADC(DisasContext *s, arg_2op *a) 978 { 979 return do_2op(s, a, gen_helper_mve_vadc); 980 } 981 982 static bool trans_VADCI(DisasContext *s, arg_2op *a) 983 { 984 if (mve_skip_first_beat(s)) { 985 return trans_VADC(s, a); 986 } 987 return do_2op(s, a, gen_helper_mve_vadci); 988 } 989 990 static bool trans_VSBC(DisasContext *s, arg_2op *a) 991 { 992 return do_2op(s, a, gen_helper_mve_vsbc); 993 } 994 995 static bool trans_VSBCI(DisasContext *s, arg_2op *a) 996 { 997 if (mve_skip_first_beat(s)) { 998 return trans_VSBC(s, a); 999 } 1000 return do_2op(s, a, gen_helper_mve_vsbci); 1001 } 1002 1003 #define DO_2OP_FP(INSN, FN) \ 1004 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 1005 { \ 1006 static MVEGenTwoOpFn * const fns[] = { \ 1007 NULL, \ 1008 gen_helper_mve_##FN##h, \ 1009 gen_helper_mve_##FN##s, \ 1010 NULL, \ 1011 }; \ 1012 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1013 return false; \ 1014 } \ 1015 return do_2op(s, a, fns[a->size]); \ 1016 } 1017 1018 DO_2OP_FP(VADD_fp, vfadd) 1019 DO_2OP_FP(VSUB_fp, vfsub) 1020 DO_2OP_FP(VMUL_fp, vfmul) 1021 DO_2OP_FP(VABD_fp, vfabd) 1022 DO_2OP_FP(VMAXNM, vmaxnm) 1023 DO_2OP_FP(VMINNM, vminnm) 1024 DO_2OP_FP(VCADD90_fp, vfcadd90) 1025 DO_2OP_FP(VCADD270_fp, vfcadd270) 1026 DO_2OP_FP(VFMA, vfma) 1027 DO_2OP_FP(VFMS, vfms) 1028 DO_2OP_FP(VCMUL0, vcmul0) 1029 DO_2OP_FP(VCMUL90, vcmul90) 1030 DO_2OP_FP(VCMUL180, vcmul180) 1031 DO_2OP_FP(VCMUL270, vcmul270) 1032 DO_2OP_FP(VCMLA0, vcmla0) 1033 DO_2OP_FP(VCMLA90, vcmla90) 1034 DO_2OP_FP(VCMLA180, vcmla180) 1035 DO_2OP_FP(VCMLA270, vcmla270) 1036 DO_2OP_FP(VMAXNMA, vmaxnma) 1037 DO_2OP_FP(VMINNMA, vminnma) 1038 1039 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a, 1040 MVEGenTwoOpScalarFn fn) 1041 { 1042 TCGv_ptr qd, qn; 1043 TCGv_i32 rm; 1044 1045 if (!dc_isar_feature(aa32_mve, s) || 1046 !mve_check_qreg_bank(s, a->qd | a->qn) || 1047 !fn) { 1048 return false; 1049 } 1050 if (a->rm == 13 || a->rm == 15) { 1051 /* UNPREDICTABLE */ 1052 return false; 1053 } 1054 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1055 return true; 1056 } 1057 1058 qd = mve_qreg_ptr(a->qd); 1059 qn = mve_qreg_ptr(a->qn); 1060 rm = load_reg(s, a->rm); 1061 fn(cpu_env, qd, qn, rm); 1062 tcg_temp_free_i32(rm); 1063 tcg_temp_free_ptr(qd); 1064 tcg_temp_free_ptr(qn); 1065 mve_update_eci(s); 1066 return true; 1067 } 1068 1069 #define DO_2OP_SCALAR(INSN, FN) \ 1070 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \ 1071 { \ 1072 static MVEGenTwoOpScalarFn * const fns[] = { \ 1073 gen_helper_mve_##FN##b, \ 1074 gen_helper_mve_##FN##h, \ 1075 gen_helper_mve_##FN##w, \ 1076 NULL, \ 1077 }; \ 1078 return do_2op_scalar(s, a, fns[a->size]); \ 1079 } 1080 1081 DO_2OP_SCALAR(VADD_scalar, vadd_scalar) 1082 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar) 1083 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar) 1084 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar) 1085 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar) 1086 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar) 1087 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar) 1088 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar) 1089 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar) 1090 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar) 1091 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar) 1092 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar) 1093 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar) 1094 DO_2OP_SCALAR(VBRSR, vbrsr) 1095 DO_2OP_SCALAR(VMLA, vmla) 1096 DO_2OP_SCALAR(VMLAS, vmlas) 1097 DO_2OP_SCALAR(VQDMLAH, vqdmlah) 1098 DO_2OP_SCALAR(VQRDMLAH, vqrdmlah) 1099 DO_2OP_SCALAR(VQDMLASH, vqdmlash) 1100 DO_2OP_SCALAR(VQRDMLASH, vqrdmlash) 1101 1102 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a) 1103 { 1104 static MVEGenTwoOpScalarFn * const fns[] = { 1105 NULL, 1106 gen_helper_mve_vqdmullb_scalarh, 1107 gen_helper_mve_vqdmullb_scalarw, 1108 NULL, 1109 }; 1110 if (a->qd == a->qn && a->size == MO_32) { 1111 /* UNPREDICTABLE; we choose to undef */ 1112 return false; 1113 } 1114 return do_2op_scalar(s, a, fns[a->size]); 1115 } 1116 1117 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a) 1118 { 1119 static MVEGenTwoOpScalarFn * const fns[] = { 1120 NULL, 1121 gen_helper_mve_vqdmullt_scalarh, 1122 gen_helper_mve_vqdmullt_scalarw, 1123 NULL, 1124 }; 1125 if (a->qd == a->qn && a->size == MO_32) { 1126 /* UNPREDICTABLE; we choose to undef */ 1127 return false; 1128 } 1129 return do_2op_scalar(s, a, fns[a->size]); 1130 } 1131 1132 1133 #define DO_2OP_FP_SCALAR(INSN, FN) \ 1134 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \ 1135 { \ 1136 static MVEGenTwoOpScalarFn * const fns[] = { \ 1137 NULL, \ 1138 gen_helper_mve_##FN##h, \ 1139 gen_helper_mve_##FN##s, \ 1140 NULL, \ 1141 }; \ 1142 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1143 return false; \ 1144 } \ 1145 return do_2op_scalar(s, a, fns[a->size]); \ 1146 } 1147 1148 DO_2OP_FP_SCALAR(VADD_fp_scalar, vfadd_scalar) 1149 DO_2OP_FP_SCALAR(VSUB_fp_scalar, vfsub_scalar) 1150 DO_2OP_FP_SCALAR(VMUL_fp_scalar, vfmul_scalar) 1151 DO_2OP_FP_SCALAR(VFMA_scalar, vfma_scalar) 1152 DO_2OP_FP_SCALAR(VFMAS_scalar, vfmas_scalar) 1153 1154 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a, 1155 MVEGenLongDualAccOpFn *fn) 1156 { 1157 TCGv_ptr qn, qm; 1158 TCGv_i64 rda; 1159 TCGv_i32 rdalo, rdahi; 1160 1161 if (!dc_isar_feature(aa32_mve, s) || 1162 !mve_check_qreg_bank(s, a->qn | a->qm) || 1163 !fn) { 1164 return false; 1165 } 1166 /* 1167 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 1168 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 1169 */ 1170 if (a->rdahi == 13 || a->rdahi == 15) { 1171 return false; 1172 } 1173 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1174 return true; 1175 } 1176 1177 qn = mve_qreg_ptr(a->qn); 1178 qm = mve_qreg_ptr(a->qm); 1179 1180 /* 1181 * This insn is subject to beat-wise execution. Partial execution 1182 * of an A=0 (no-accumulate) insn which does not execute the first 1183 * beat must start with the current rda value, not 0. 1184 */ 1185 if (a->a || mve_skip_first_beat(s)) { 1186 rda = tcg_temp_new_i64(); 1187 rdalo = load_reg(s, a->rdalo); 1188 rdahi = load_reg(s, a->rdahi); 1189 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 1190 tcg_temp_free_i32(rdalo); 1191 tcg_temp_free_i32(rdahi); 1192 } else { 1193 rda = tcg_const_i64(0); 1194 } 1195 1196 fn(rda, cpu_env, qn, qm, rda); 1197 tcg_temp_free_ptr(qn); 1198 tcg_temp_free_ptr(qm); 1199 1200 rdalo = tcg_temp_new_i32(); 1201 rdahi = tcg_temp_new_i32(); 1202 tcg_gen_extrl_i64_i32(rdalo, rda); 1203 tcg_gen_extrh_i64_i32(rdahi, rda); 1204 store_reg(s, a->rdalo, rdalo); 1205 store_reg(s, a->rdahi, rdahi); 1206 tcg_temp_free_i64(rda); 1207 mve_update_eci(s); 1208 return true; 1209 } 1210 1211 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a) 1212 { 1213 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1214 { NULL, NULL }, 1215 { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh }, 1216 { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw }, 1217 { NULL, NULL }, 1218 }; 1219 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1220 } 1221 1222 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a) 1223 { 1224 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1225 { NULL, NULL }, 1226 { gen_helper_mve_vmlaldavuh, NULL }, 1227 { gen_helper_mve_vmlaldavuw, NULL }, 1228 { NULL, NULL }, 1229 }; 1230 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1231 } 1232 1233 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a) 1234 { 1235 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1236 { NULL, NULL }, 1237 { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh }, 1238 { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw }, 1239 { NULL, NULL }, 1240 }; 1241 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1242 } 1243 1244 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a) 1245 { 1246 static MVEGenLongDualAccOpFn * const fns[] = { 1247 gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw, 1248 }; 1249 return do_long_dual_acc(s, a, fns[a->x]); 1250 } 1251 1252 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a) 1253 { 1254 static MVEGenLongDualAccOpFn * const fns[] = { 1255 gen_helper_mve_vrmlaldavhuw, NULL, 1256 }; 1257 return do_long_dual_acc(s, a, fns[a->x]); 1258 } 1259 1260 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a) 1261 { 1262 static MVEGenLongDualAccOpFn * const fns[] = { 1263 gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw, 1264 }; 1265 return do_long_dual_acc(s, a, fns[a->x]); 1266 } 1267 1268 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn) 1269 { 1270 TCGv_ptr qn, qm; 1271 TCGv_i32 rda; 1272 1273 if (!dc_isar_feature(aa32_mve, s) || 1274 !mve_check_qreg_bank(s, a->qn) || 1275 !fn) { 1276 return false; 1277 } 1278 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1279 return true; 1280 } 1281 1282 qn = mve_qreg_ptr(a->qn); 1283 qm = mve_qreg_ptr(a->qm); 1284 1285 /* 1286 * This insn is subject to beat-wise execution. Partial execution 1287 * of an A=0 (no-accumulate) insn which does not execute the first 1288 * beat must start with the current rda value, not 0. 1289 */ 1290 if (a->a || mve_skip_first_beat(s)) { 1291 rda = load_reg(s, a->rda); 1292 } else { 1293 rda = tcg_const_i32(0); 1294 } 1295 1296 fn(rda, cpu_env, qn, qm, rda); 1297 store_reg(s, a->rda, rda); 1298 tcg_temp_free_ptr(qn); 1299 tcg_temp_free_ptr(qm); 1300 1301 mve_update_eci(s); 1302 return true; 1303 } 1304 1305 #define DO_DUAL_ACC(INSN, FN) \ 1306 static bool trans_##INSN(DisasContext *s, arg_vmladav *a) \ 1307 { \ 1308 static MVEGenDualAccOpFn * const fns[4][2] = { \ 1309 { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb }, \ 1310 { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh }, \ 1311 { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw }, \ 1312 { NULL, NULL }, \ 1313 }; \ 1314 return do_dual_acc(s, a, fns[a->size][a->x]); \ 1315 } 1316 1317 DO_DUAL_ACC(VMLADAV_S, vmladavs) 1318 DO_DUAL_ACC(VMLSDAV, vmlsdav) 1319 1320 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a) 1321 { 1322 static MVEGenDualAccOpFn * const fns[4][2] = { 1323 { gen_helper_mve_vmladavub, NULL }, 1324 { gen_helper_mve_vmladavuh, NULL }, 1325 { gen_helper_mve_vmladavuw, NULL }, 1326 { NULL, NULL }, 1327 }; 1328 return do_dual_acc(s, a, fns[a->size][a->x]); 1329 } 1330 1331 static void gen_vpst(DisasContext *s, uint32_t mask) 1332 { 1333 /* 1334 * Set the VPR mask fields. We take advantage of MASK01 and MASK23 1335 * being adjacent fields in the register. 1336 * 1337 * Updating the masks is not predicated, but it is subject to beat-wise 1338 * execution, and the mask is updated on the odd-numbered beats. 1339 * So if PSR.ECI says we should skip beat 1, we mustn't update the 1340 * 01 mask field. 1341 */ 1342 TCGv_i32 vpr = load_cpu_field(v7m.vpr); 1343 switch (s->eci) { 1344 case ECI_NONE: 1345 case ECI_A0: 1346 /* Update both 01 and 23 fields */ 1347 tcg_gen_deposit_i32(vpr, vpr, 1348 tcg_constant_i32(mask | (mask << 4)), 1349 R_V7M_VPR_MASK01_SHIFT, 1350 R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH); 1351 break; 1352 case ECI_A0A1: 1353 case ECI_A0A1A2: 1354 case ECI_A0A1A2B0: 1355 /* Update only the 23 mask field */ 1356 tcg_gen_deposit_i32(vpr, vpr, 1357 tcg_constant_i32(mask), 1358 R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH); 1359 break; 1360 default: 1361 g_assert_not_reached(); 1362 } 1363 store_cpu_field(vpr, v7m.vpr); 1364 } 1365 1366 static bool trans_VPST(DisasContext *s, arg_VPST *a) 1367 { 1368 /* mask == 0 is a "related encoding" */ 1369 if (!dc_isar_feature(aa32_mve, s) || !a->mask) { 1370 return false; 1371 } 1372 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1373 return true; 1374 } 1375 gen_vpst(s, a->mask); 1376 mve_update_and_store_eci(s); 1377 return true; 1378 } 1379 1380 static bool trans_VPNOT(DisasContext *s, arg_VPNOT *a) 1381 { 1382 /* 1383 * Invert the predicate in VPR.P0. We have call out to 1384 * a helper because this insn itself is beatwise and can 1385 * be predicated. 1386 */ 1387 if (!dc_isar_feature(aa32_mve, s)) { 1388 return false; 1389 } 1390 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1391 return true; 1392 } 1393 1394 gen_helper_mve_vpnot(cpu_env); 1395 /* This insn updates predication bits */ 1396 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 1397 mve_update_eci(s); 1398 return true; 1399 } 1400 1401 static bool trans_VADDV(DisasContext *s, arg_VADDV *a) 1402 { 1403 /* VADDV: vector add across vector */ 1404 static MVEGenVADDVFn * const fns[4][2] = { 1405 { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub }, 1406 { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh }, 1407 { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw }, 1408 { NULL, NULL } 1409 }; 1410 TCGv_ptr qm; 1411 TCGv_i32 rda; 1412 1413 if (!dc_isar_feature(aa32_mve, s) || 1414 a->size == 3) { 1415 return false; 1416 } 1417 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1418 return true; 1419 } 1420 1421 /* 1422 * This insn is subject to beat-wise execution. Partial execution 1423 * of an A=0 (no-accumulate) insn which does not execute the first 1424 * beat must start with the current value of Rda, not zero. 1425 */ 1426 if (a->a || mve_skip_first_beat(s)) { 1427 /* Accumulate input from Rda */ 1428 rda = load_reg(s, a->rda); 1429 } else { 1430 /* Accumulate starting at zero */ 1431 rda = tcg_const_i32(0); 1432 } 1433 1434 qm = mve_qreg_ptr(a->qm); 1435 fns[a->size][a->u](rda, cpu_env, qm, rda); 1436 store_reg(s, a->rda, rda); 1437 tcg_temp_free_ptr(qm); 1438 1439 mve_update_eci(s); 1440 return true; 1441 } 1442 1443 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a) 1444 { 1445 /* 1446 * Vector Add Long Across Vector: accumulate the 32-bit 1447 * elements of the vector into a 64-bit result stored in 1448 * a pair of general-purpose registers. 1449 * No need to check Qm's bank: it is only 3 bits in decode. 1450 */ 1451 TCGv_ptr qm; 1452 TCGv_i64 rda; 1453 TCGv_i32 rdalo, rdahi; 1454 1455 if (!dc_isar_feature(aa32_mve, s)) { 1456 return false; 1457 } 1458 /* 1459 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 1460 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 1461 */ 1462 if (a->rdahi == 13 || a->rdahi == 15) { 1463 return false; 1464 } 1465 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1466 return true; 1467 } 1468 1469 /* 1470 * This insn is subject to beat-wise execution. Partial execution 1471 * of an A=0 (no-accumulate) insn which does not execute the first 1472 * beat must start with the current value of RdaHi:RdaLo, not zero. 1473 */ 1474 if (a->a || mve_skip_first_beat(s)) { 1475 /* Accumulate input from RdaHi:RdaLo */ 1476 rda = tcg_temp_new_i64(); 1477 rdalo = load_reg(s, a->rdalo); 1478 rdahi = load_reg(s, a->rdahi); 1479 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 1480 tcg_temp_free_i32(rdalo); 1481 tcg_temp_free_i32(rdahi); 1482 } else { 1483 /* Accumulate starting at zero */ 1484 rda = tcg_const_i64(0); 1485 } 1486 1487 qm = mve_qreg_ptr(a->qm); 1488 if (a->u) { 1489 gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda); 1490 } else { 1491 gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda); 1492 } 1493 tcg_temp_free_ptr(qm); 1494 1495 rdalo = tcg_temp_new_i32(); 1496 rdahi = tcg_temp_new_i32(); 1497 tcg_gen_extrl_i64_i32(rdalo, rda); 1498 tcg_gen_extrh_i64_i32(rdahi, rda); 1499 store_reg(s, a->rdalo, rdalo); 1500 store_reg(s, a->rdahi, rdahi); 1501 tcg_temp_free_i64(rda); 1502 mve_update_eci(s); 1503 return true; 1504 } 1505 1506 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn) 1507 { 1508 TCGv_ptr qd; 1509 uint64_t imm; 1510 1511 if (!dc_isar_feature(aa32_mve, s) || 1512 !mve_check_qreg_bank(s, a->qd) || 1513 !fn) { 1514 return false; 1515 } 1516 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1517 return true; 1518 } 1519 1520 imm = asimd_imm_const(a->imm, a->cmode, a->op); 1521 1522 qd = mve_qreg_ptr(a->qd); 1523 fn(cpu_env, qd, tcg_constant_i64(imm)); 1524 tcg_temp_free_ptr(qd); 1525 mve_update_eci(s); 1526 return true; 1527 } 1528 1529 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a) 1530 { 1531 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */ 1532 MVEGenOneOpImmFn *fn; 1533 1534 if ((a->cmode & 1) && a->cmode < 12) { 1535 if (a->op) { 1536 /* 1537 * For op=1, the immediate will be inverted by asimd_imm_const(), 1538 * so the VBIC becomes a logical AND operation. 1539 */ 1540 fn = gen_helper_mve_vandi; 1541 } else { 1542 fn = gen_helper_mve_vorri; 1543 } 1544 } else { 1545 /* There is one unallocated cmode/op combination in this space */ 1546 if (a->cmode == 15 && a->op == 1) { 1547 return false; 1548 } 1549 /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */ 1550 fn = gen_helper_mve_vmovi; 1551 } 1552 return do_1imm(s, a, fn); 1553 } 1554 1555 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn, 1556 bool negateshift) 1557 { 1558 TCGv_ptr qd, qm; 1559 int shift = a->shift; 1560 1561 if (!dc_isar_feature(aa32_mve, s) || 1562 !mve_check_qreg_bank(s, a->qd | a->qm) || 1563 !fn) { 1564 return false; 1565 } 1566 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1567 return true; 1568 } 1569 1570 /* 1571 * When we handle a right shift insn using a left-shift helper 1572 * which permits a negative shift count to indicate a right-shift, 1573 * we must negate the shift count. 1574 */ 1575 if (negateshift) { 1576 shift = -shift; 1577 } 1578 1579 qd = mve_qreg_ptr(a->qd); 1580 qm = mve_qreg_ptr(a->qm); 1581 fn(cpu_env, qd, qm, tcg_constant_i32(shift)); 1582 tcg_temp_free_ptr(qd); 1583 tcg_temp_free_ptr(qm); 1584 mve_update_eci(s); 1585 return true; 1586 } 1587 1588 #define DO_2SHIFT(INSN, FN, NEGATESHIFT) \ 1589 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1590 { \ 1591 static MVEGenTwoOpShiftFn * const fns[] = { \ 1592 gen_helper_mve_##FN##b, \ 1593 gen_helper_mve_##FN##h, \ 1594 gen_helper_mve_##FN##w, \ 1595 NULL, \ 1596 }; \ 1597 return do_2shift(s, a, fns[a->size], NEGATESHIFT); \ 1598 } 1599 1600 DO_2SHIFT(VSHLI, vshli_u, false) 1601 DO_2SHIFT(VQSHLI_S, vqshli_s, false) 1602 DO_2SHIFT(VQSHLI_U, vqshli_u, false) 1603 DO_2SHIFT(VQSHLUI, vqshlui_s, false) 1604 /* These right shifts use a left-shift helper with negated shift count */ 1605 DO_2SHIFT(VSHRI_S, vshli_s, true) 1606 DO_2SHIFT(VSHRI_U, vshli_u, true) 1607 DO_2SHIFT(VRSHRI_S, vrshli_s, true) 1608 DO_2SHIFT(VRSHRI_U, vrshli_u, true) 1609 1610 DO_2SHIFT(VSRI, vsri, false) 1611 DO_2SHIFT(VSLI, vsli, false) 1612 1613 #define DO_2SHIFT_FP(INSN, FN) \ 1614 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1615 { \ 1616 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1617 return false; \ 1618 } \ 1619 return do_2shift(s, a, gen_helper_mve_##FN, false); \ 1620 } 1621 1622 DO_2SHIFT_FP(VCVT_SH_fixed, vcvt_sh) 1623 DO_2SHIFT_FP(VCVT_UH_fixed, vcvt_uh) 1624 DO_2SHIFT_FP(VCVT_HS_fixed, vcvt_hs) 1625 DO_2SHIFT_FP(VCVT_HU_fixed, vcvt_hu) 1626 DO_2SHIFT_FP(VCVT_SF_fixed, vcvt_sf) 1627 DO_2SHIFT_FP(VCVT_UF_fixed, vcvt_uf) 1628 DO_2SHIFT_FP(VCVT_FS_fixed, vcvt_fs) 1629 DO_2SHIFT_FP(VCVT_FU_fixed, vcvt_fu) 1630 1631 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a, 1632 MVEGenTwoOpShiftFn *fn) 1633 { 1634 TCGv_ptr qda; 1635 TCGv_i32 rm; 1636 1637 if (!dc_isar_feature(aa32_mve, s) || 1638 !mve_check_qreg_bank(s, a->qda) || 1639 a->rm == 13 || a->rm == 15 || !fn) { 1640 /* Rm cases are UNPREDICTABLE */ 1641 return false; 1642 } 1643 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1644 return true; 1645 } 1646 1647 qda = mve_qreg_ptr(a->qda); 1648 rm = load_reg(s, a->rm); 1649 fn(cpu_env, qda, qda, rm); 1650 tcg_temp_free_ptr(qda); 1651 tcg_temp_free_i32(rm); 1652 mve_update_eci(s); 1653 return true; 1654 } 1655 1656 #define DO_2SHIFT_SCALAR(INSN, FN) \ 1657 static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a) \ 1658 { \ 1659 static MVEGenTwoOpShiftFn * const fns[] = { \ 1660 gen_helper_mve_##FN##b, \ 1661 gen_helper_mve_##FN##h, \ 1662 gen_helper_mve_##FN##w, \ 1663 NULL, \ 1664 }; \ 1665 return do_2shift_scalar(s, a, fns[a->size]); \ 1666 } 1667 1668 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s) 1669 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u) 1670 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s) 1671 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u) 1672 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s) 1673 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u) 1674 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s) 1675 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u) 1676 1677 #define DO_VSHLL(INSN, FN) \ 1678 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1679 { \ 1680 static MVEGenTwoOpShiftFn * const fns[] = { \ 1681 gen_helper_mve_##FN##b, \ 1682 gen_helper_mve_##FN##h, \ 1683 }; \ 1684 return do_2shift(s, a, fns[a->size], false); \ 1685 } 1686 1687 DO_VSHLL(VSHLL_BS, vshllbs) 1688 DO_VSHLL(VSHLL_BU, vshllbu) 1689 DO_VSHLL(VSHLL_TS, vshllts) 1690 DO_VSHLL(VSHLL_TU, vshlltu) 1691 1692 #define DO_2SHIFT_N(INSN, FN) \ 1693 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1694 { \ 1695 static MVEGenTwoOpShiftFn * const fns[] = { \ 1696 gen_helper_mve_##FN##b, \ 1697 gen_helper_mve_##FN##h, \ 1698 }; \ 1699 return do_2shift(s, a, fns[a->size], false); \ 1700 } 1701 1702 DO_2SHIFT_N(VSHRNB, vshrnb) 1703 DO_2SHIFT_N(VSHRNT, vshrnt) 1704 DO_2SHIFT_N(VRSHRNB, vrshrnb) 1705 DO_2SHIFT_N(VRSHRNT, vrshrnt) 1706 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s) 1707 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s) 1708 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u) 1709 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u) 1710 DO_2SHIFT_N(VQSHRUNB, vqshrunb) 1711 DO_2SHIFT_N(VQSHRUNT, vqshrunt) 1712 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s) 1713 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s) 1714 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u) 1715 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u) 1716 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb) 1717 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt) 1718 1719 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a) 1720 { 1721 /* 1722 * Whole Vector Left Shift with Carry. The carry is taken 1723 * from a general purpose register and written back there. 1724 * An imm of 0 means "shift by 32". 1725 */ 1726 TCGv_ptr qd; 1727 TCGv_i32 rdm; 1728 1729 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1730 return false; 1731 } 1732 if (a->rdm == 13 || a->rdm == 15) { 1733 /* CONSTRAINED UNPREDICTABLE: we UNDEF */ 1734 return false; 1735 } 1736 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1737 return true; 1738 } 1739 1740 qd = mve_qreg_ptr(a->qd); 1741 rdm = load_reg(s, a->rdm); 1742 gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm)); 1743 store_reg(s, a->rdm, rdm); 1744 tcg_temp_free_ptr(qd); 1745 mve_update_eci(s); 1746 return true; 1747 } 1748 1749 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn) 1750 { 1751 TCGv_ptr qd; 1752 TCGv_i32 rn; 1753 1754 /* 1755 * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP). 1756 * This fills the vector with elements of successively increasing 1757 * or decreasing values, starting from Rn. 1758 */ 1759 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1760 return false; 1761 } 1762 if (a->size == MO_64) { 1763 /* size 0b11 is another encoding */ 1764 return false; 1765 } 1766 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1767 return true; 1768 } 1769 1770 qd = mve_qreg_ptr(a->qd); 1771 rn = load_reg(s, a->rn); 1772 fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm)); 1773 store_reg(s, a->rn, rn); 1774 tcg_temp_free_ptr(qd); 1775 mve_update_eci(s); 1776 return true; 1777 } 1778 1779 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn) 1780 { 1781 TCGv_ptr qd; 1782 TCGv_i32 rn, rm; 1783 1784 /* 1785 * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP) 1786 * This fills the vector with elements of successively increasing 1787 * or decreasing values, starting from Rn. Rm specifies a point where 1788 * the count wraps back around to 0. The updated offset is written back 1789 * to Rn. 1790 */ 1791 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1792 return false; 1793 } 1794 if (!fn || a->rm == 13 || a->rm == 15) { 1795 /* 1796 * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE; 1797 * Rm == 13 is VIWDUP, VDWDUP. 1798 */ 1799 return false; 1800 } 1801 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1802 return true; 1803 } 1804 1805 qd = mve_qreg_ptr(a->qd); 1806 rn = load_reg(s, a->rn); 1807 rm = load_reg(s, a->rm); 1808 fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm)); 1809 store_reg(s, a->rn, rn); 1810 tcg_temp_free_ptr(qd); 1811 tcg_temp_free_i32(rm); 1812 mve_update_eci(s); 1813 return true; 1814 } 1815 1816 static bool trans_VIDUP(DisasContext *s, arg_vidup *a) 1817 { 1818 static MVEGenVIDUPFn * const fns[] = { 1819 gen_helper_mve_vidupb, 1820 gen_helper_mve_viduph, 1821 gen_helper_mve_vidupw, 1822 NULL, 1823 }; 1824 return do_vidup(s, a, fns[a->size]); 1825 } 1826 1827 static bool trans_VDDUP(DisasContext *s, arg_vidup *a) 1828 { 1829 static MVEGenVIDUPFn * const fns[] = { 1830 gen_helper_mve_vidupb, 1831 gen_helper_mve_viduph, 1832 gen_helper_mve_vidupw, 1833 NULL, 1834 }; 1835 /* VDDUP is just like VIDUP but with a negative immediate */ 1836 a->imm = -a->imm; 1837 return do_vidup(s, a, fns[a->size]); 1838 } 1839 1840 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a) 1841 { 1842 static MVEGenVIWDUPFn * const fns[] = { 1843 gen_helper_mve_viwdupb, 1844 gen_helper_mve_viwduph, 1845 gen_helper_mve_viwdupw, 1846 NULL, 1847 }; 1848 return do_viwdup(s, a, fns[a->size]); 1849 } 1850 1851 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a) 1852 { 1853 static MVEGenVIWDUPFn * const fns[] = { 1854 gen_helper_mve_vdwdupb, 1855 gen_helper_mve_vdwduph, 1856 gen_helper_mve_vdwdupw, 1857 NULL, 1858 }; 1859 return do_viwdup(s, a, fns[a->size]); 1860 } 1861 1862 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn) 1863 { 1864 TCGv_ptr qn, qm; 1865 1866 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1867 !fn) { 1868 return false; 1869 } 1870 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1871 return true; 1872 } 1873 1874 qn = mve_qreg_ptr(a->qn); 1875 qm = mve_qreg_ptr(a->qm); 1876 fn(cpu_env, qn, qm); 1877 tcg_temp_free_ptr(qn); 1878 tcg_temp_free_ptr(qm); 1879 if (a->mask) { 1880 /* VPT */ 1881 gen_vpst(s, a->mask); 1882 } 1883 /* This insn updates predication bits */ 1884 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 1885 mve_update_eci(s); 1886 return true; 1887 } 1888 1889 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a, 1890 MVEGenScalarCmpFn *fn) 1891 { 1892 TCGv_ptr qn; 1893 TCGv_i32 rm; 1894 1895 if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) { 1896 return false; 1897 } 1898 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1899 return true; 1900 } 1901 1902 qn = mve_qreg_ptr(a->qn); 1903 if (a->rm == 15) { 1904 /* Encoding Rm=0b1111 means "constant zero" */ 1905 rm = tcg_constant_i32(0); 1906 } else { 1907 rm = load_reg(s, a->rm); 1908 } 1909 fn(cpu_env, qn, rm); 1910 tcg_temp_free_ptr(qn); 1911 tcg_temp_free_i32(rm); 1912 if (a->mask) { 1913 /* VPT */ 1914 gen_vpst(s, a->mask); 1915 } 1916 /* This insn updates predication bits */ 1917 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 1918 mve_update_eci(s); 1919 return true; 1920 } 1921 1922 #define DO_VCMP(INSN, FN) \ 1923 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \ 1924 { \ 1925 static MVEGenCmpFn * const fns[] = { \ 1926 gen_helper_mve_##FN##b, \ 1927 gen_helper_mve_##FN##h, \ 1928 gen_helper_mve_##FN##w, \ 1929 NULL, \ 1930 }; \ 1931 return do_vcmp(s, a, fns[a->size]); \ 1932 } \ 1933 static bool trans_##INSN##_scalar(DisasContext *s, \ 1934 arg_vcmp_scalar *a) \ 1935 { \ 1936 static MVEGenScalarCmpFn * const fns[] = { \ 1937 gen_helper_mve_##FN##_scalarb, \ 1938 gen_helper_mve_##FN##_scalarh, \ 1939 gen_helper_mve_##FN##_scalarw, \ 1940 NULL, \ 1941 }; \ 1942 return do_vcmp_scalar(s, a, fns[a->size]); \ 1943 } 1944 1945 DO_VCMP(VCMPEQ, vcmpeq) 1946 DO_VCMP(VCMPNE, vcmpne) 1947 DO_VCMP(VCMPCS, vcmpcs) 1948 DO_VCMP(VCMPHI, vcmphi) 1949 DO_VCMP(VCMPGE, vcmpge) 1950 DO_VCMP(VCMPLT, vcmplt) 1951 DO_VCMP(VCMPGT, vcmpgt) 1952 DO_VCMP(VCMPLE, vcmple) 1953 1954 #define DO_VCMP_FP(INSN, FN) \ 1955 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \ 1956 { \ 1957 static MVEGenCmpFn * const fns[] = { \ 1958 NULL, \ 1959 gen_helper_mve_##FN##h, \ 1960 gen_helper_mve_##FN##s, \ 1961 NULL, \ 1962 }; \ 1963 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1964 return false; \ 1965 } \ 1966 return do_vcmp(s, a, fns[a->size]); \ 1967 } \ 1968 static bool trans_##INSN##_scalar(DisasContext *s, \ 1969 arg_vcmp_scalar *a) \ 1970 { \ 1971 static MVEGenScalarCmpFn * const fns[] = { \ 1972 NULL, \ 1973 gen_helper_mve_##FN##_scalarh, \ 1974 gen_helper_mve_##FN##_scalars, \ 1975 NULL, \ 1976 }; \ 1977 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1978 return false; \ 1979 } \ 1980 return do_vcmp_scalar(s, a, fns[a->size]); \ 1981 } 1982 1983 DO_VCMP_FP(VCMPEQ_fp, vfcmpeq) 1984 DO_VCMP_FP(VCMPNE_fp, vfcmpne) 1985 DO_VCMP_FP(VCMPGE_fp, vfcmpge) 1986 DO_VCMP_FP(VCMPLT_fp, vfcmplt) 1987 DO_VCMP_FP(VCMPGT_fp, vfcmpgt) 1988 DO_VCMP_FP(VCMPLE_fp, vfcmple) 1989 1990 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn) 1991 { 1992 /* 1993 * MIN/MAX operations across a vector: compute the min or 1994 * max of the initial value in a general purpose register 1995 * and all the elements in the vector, and store it back 1996 * into the general purpose register. 1997 */ 1998 TCGv_ptr qm; 1999 TCGv_i32 rda; 2000 2001 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 2002 !fn || a->rda == 13 || a->rda == 15) { 2003 /* Rda cases are UNPREDICTABLE */ 2004 return false; 2005 } 2006 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2007 return true; 2008 } 2009 2010 qm = mve_qreg_ptr(a->qm); 2011 rda = load_reg(s, a->rda); 2012 fn(rda, cpu_env, qm, rda); 2013 store_reg(s, a->rda, rda); 2014 tcg_temp_free_ptr(qm); 2015 mve_update_eci(s); 2016 return true; 2017 } 2018 2019 #define DO_VMAXV(INSN, FN) \ 2020 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ 2021 { \ 2022 static MVEGenVADDVFn * const fns[] = { \ 2023 gen_helper_mve_##FN##b, \ 2024 gen_helper_mve_##FN##h, \ 2025 gen_helper_mve_##FN##w, \ 2026 NULL, \ 2027 }; \ 2028 return do_vmaxv(s, a, fns[a->size]); \ 2029 } 2030 2031 DO_VMAXV(VMAXV_S, vmaxvs) 2032 DO_VMAXV(VMAXV_U, vmaxvu) 2033 DO_VMAXV(VMAXAV, vmaxav) 2034 DO_VMAXV(VMINV_S, vminvs) 2035 DO_VMAXV(VMINV_U, vminvu) 2036 DO_VMAXV(VMINAV, vminav) 2037 2038 #define DO_VMAXV_FP(INSN, FN) \ 2039 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ 2040 { \ 2041 static MVEGenVADDVFn * const fns[] = { \ 2042 NULL, \ 2043 gen_helper_mve_##FN##h, \ 2044 gen_helper_mve_##FN##s, \ 2045 NULL, \ 2046 }; \ 2047 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 2048 return false; \ 2049 } \ 2050 return do_vmaxv(s, a, fns[a->size]); \ 2051 } 2052 2053 DO_VMAXV_FP(VMAXNMV, vmaxnmv) 2054 DO_VMAXV_FP(VMINNMV, vminnmv) 2055 DO_VMAXV_FP(VMAXNMAV, vmaxnmav) 2056 DO_VMAXV_FP(VMINNMAV, vminnmav) 2057 2058 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn) 2059 { 2060 /* Absolute difference accumulated across vector */ 2061 TCGv_ptr qn, qm; 2062 TCGv_i32 rda; 2063 2064 if (!dc_isar_feature(aa32_mve, s) || 2065 !mve_check_qreg_bank(s, a->qm | a->qn) || 2066 !fn || a->rda == 13 || a->rda == 15) { 2067 /* Rda cases are UNPREDICTABLE */ 2068 return false; 2069 } 2070 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2071 return true; 2072 } 2073 2074 qm = mve_qreg_ptr(a->qm); 2075 qn = mve_qreg_ptr(a->qn); 2076 rda = load_reg(s, a->rda); 2077 fn(rda, cpu_env, qn, qm, rda); 2078 store_reg(s, a->rda, rda); 2079 tcg_temp_free_ptr(qm); 2080 tcg_temp_free_ptr(qn); 2081 mve_update_eci(s); 2082 return true; 2083 } 2084 2085 #define DO_VABAV(INSN, FN) \ 2086 static bool trans_##INSN(DisasContext *s, arg_vabav *a) \ 2087 { \ 2088 static MVEGenVABAVFn * const fns[] = { \ 2089 gen_helper_mve_##FN##b, \ 2090 gen_helper_mve_##FN##h, \ 2091 gen_helper_mve_##FN##w, \ 2092 NULL, \ 2093 }; \ 2094 return do_vabav(s, a, fns[a->size]); \ 2095 } 2096 2097 DO_VABAV(VABAV_S, vabavs) 2098 DO_VABAV(VABAV_U, vabavu) 2099 2100 static bool trans_VMOV_to_2gp(DisasContext *s, arg_VMOV_to_2gp *a) 2101 { 2102 /* 2103 * VMOV two 32-bit vector lanes to two general-purpose registers. 2104 * This insn is not predicated but it is subject to beat-wise 2105 * execution if it is not in an IT block. For us this means 2106 * only that if PSR.ECI says we should not be executing the beat 2107 * corresponding to the lane of the vector register being accessed 2108 * then we should skip perfoming the move, and that we need to do 2109 * the usual check for bad ECI state and advance of ECI state. 2110 * (If PSR.ECI is non-zero then we cannot be in an IT block.) 2111 */ 2112 TCGv_i32 tmp; 2113 int vd; 2114 2115 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) || 2116 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15 || 2117 a->rt == a->rt2) { 2118 /* Rt/Rt2 cases are UNPREDICTABLE */ 2119 return false; 2120 } 2121 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2122 return true; 2123 } 2124 2125 /* Convert Qreg index to Dreg for read_neon_element32() etc */ 2126 vd = a->qd * 2; 2127 2128 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) { 2129 tmp = tcg_temp_new_i32(); 2130 read_neon_element32(tmp, vd, a->idx, MO_32); 2131 store_reg(s, a->rt, tmp); 2132 } 2133 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) { 2134 tmp = tcg_temp_new_i32(); 2135 read_neon_element32(tmp, vd + 1, a->idx, MO_32); 2136 store_reg(s, a->rt2, tmp); 2137 } 2138 2139 mve_update_and_store_eci(s); 2140 return true; 2141 } 2142 2143 static bool trans_VMOV_from_2gp(DisasContext *s, arg_VMOV_to_2gp *a) 2144 { 2145 /* 2146 * VMOV two general-purpose registers to two 32-bit vector lanes. 2147 * This insn is not predicated but it is subject to beat-wise 2148 * execution if it is not in an IT block. For us this means 2149 * only that if PSR.ECI says we should not be executing the beat 2150 * corresponding to the lane of the vector register being accessed 2151 * then we should skip perfoming the move, and that we need to do 2152 * the usual check for bad ECI state and advance of ECI state. 2153 * (If PSR.ECI is non-zero then we cannot be in an IT block.) 2154 */ 2155 TCGv_i32 tmp; 2156 int vd; 2157 2158 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) || 2159 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15) { 2160 /* Rt/Rt2 cases are UNPREDICTABLE */ 2161 return false; 2162 } 2163 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2164 return true; 2165 } 2166 2167 /* Convert Qreg idx to Dreg for read_neon_element32() etc */ 2168 vd = a->qd * 2; 2169 2170 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) { 2171 tmp = load_reg(s, a->rt); 2172 write_neon_element32(tmp, vd, a->idx, MO_32); 2173 tcg_temp_free_i32(tmp); 2174 } 2175 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) { 2176 tmp = load_reg(s, a->rt2); 2177 write_neon_element32(tmp, vd + 1, a->idx, MO_32); 2178 tcg_temp_free_i32(tmp); 2179 } 2180 2181 mve_update_and_store_eci(s); 2182 return true; 2183 } 2184