1 /* 2 * ARM translation: M-profile MVE instructions 3 * 4 * Copyright (c) 2021 Linaro, Ltd. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "tcg/tcg-op.h" 22 #include "tcg/tcg-op-gvec.h" 23 #include "exec/exec-all.h" 24 #include "exec/gen-icount.h" 25 #include "translate.h" 26 #include "translate-a32.h" 27 28 static inline int vidup_imm(DisasContext *s, int x) 29 { 30 return 1 << x; 31 } 32 33 /* Include the generated decoder */ 34 #include "decode-mve.c.inc" 35 36 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 37 typedef void MVEGenLdStSGFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 38 typedef void MVEGenLdStIlFn(TCGv_ptr, TCGv_i32, TCGv_i32); 39 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 40 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr); 41 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 42 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 43 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64); 44 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32); 45 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64); 46 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32); 47 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 48 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 49 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 50 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 51 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 52 typedef void MVEGenVCVTRmodeFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 53 54 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */ 55 static inline long mve_qreg_offset(unsigned reg) 56 { 57 return offsetof(CPUARMState, vfp.zregs[reg].d[0]); 58 } 59 60 static TCGv_ptr mve_qreg_ptr(unsigned reg) 61 { 62 TCGv_ptr ret = tcg_temp_new_ptr(); 63 tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg)); 64 return ret; 65 } 66 67 static bool mve_no_predication(DisasContext *s) 68 { 69 /* 70 * Return true if we are executing the entire MVE instruction 71 * with no predication or partial-execution, and so we can safely 72 * use an inline TCG vector implementation. 73 */ 74 return s->eci == 0 && s->mve_no_pred; 75 } 76 77 static bool mve_check_qreg_bank(DisasContext *s, int qmask) 78 { 79 /* 80 * Check whether Qregs are in range. For v8.1M only Q0..Q7 81 * are supported, see VFPSmallRegisterBank(). 82 */ 83 return qmask < 8; 84 } 85 86 bool mve_eci_check(DisasContext *s) 87 { 88 /* 89 * This is a beatwise insn: check that ECI is valid (not a 90 * reserved value) and note that we are handling it. 91 * Return true if OK, false if we generated an exception. 92 */ 93 s->eci_handled = true; 94 switch (s->eci) { 95 case ECI_NONE: 96 case ECI_A0: 97 case ECI_A0A1: 98 case ECI_A0A1A2: 99 case ECI_A0A1A2B0: 100 return true; 101 default: 102 /* Reserved value: INVSTATE UsageFault */ 103 gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(), 104 default_exception_el(s)); 105 return false; 106 } 107 } 108 109 void mve_update_eci(DisasContext *s) 110 { 111 /* 112 * The helper function will always update the CPUState field, 113 * so we only need to update the DisasContext field. 114 */ 115 if (s->eci) { 116 s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE; 117 } 118 } 119 120 void mve_update_and_store_eci(DisasContext *s) 121 { 122 /* 123 * For insns which don't call a helper function that will call 124 * mve_advance_vpt(), this version updates s->eci and also stores 125 * it out to the CPUState field. 126 */ 127 if (s->eci) { 128 mve_update_eci(s); 129 store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits); 130 } 131 } 132 133 static bool mve_skip_first_beat(DisasContext *s) 134 { 135 /* Return true if PSR.ECI says we must skip the first beat of this insn */ 136 switch (s->eci) { 137 case ECI_NONE: 138 return false; 139 case ECI_A0: 140 case ECI_A0A1: 141 case ECI_A0A1A2: 142 case ECI_A0A1A2B0: 143 return true; 144 default: 145 g_assert_not_reached(); 146 } 147 } 148 149 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn, 150 unsigned msize) 151 { 152 TCGv_i32 addr; 153 uint32_t offset; 154 TCGv_ptr qreg; 155 156 if (!dc_isar_feature(aa32_mve, s) || 157 !mve_check_qreg_bank(s, a->qd) || 158 !fn) { 159 return false; 160 } 161 162 /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */ 163 if (a->rn == 15 || (a->rn == 13 && a->w)) { 164 return false; 165 } 166 167 if (!mve_eci_check(s) || !vfp_access_check(s)) { 168 return true; 169 } 170 171 offset = a->imm << msize; 172 if (!a->a) { 173 offset = -offset; 174 } 175 addr = load_reg(s, a->rn); 176 if (a->p) { 177 tcg_gen_addi_i32(addr, addr, offset); 178 } 179 180 qreg = mve_qreg_ptr(a->qd); 181 fn(cpu_env, qreg, addr); 182 tcg_temp_free_ptr(qreg); 183 184 /* 185 * Writeback always happens after the last beat of the insn, 186 * regardless of predication 187 */ 188 if (a->w) { 189 if (!a->p) { 190 tcg_gen_addi_i32(addr, addr, offset); 191 } 192 store_reg(s, a->rn, addr); 193 } else { 194 tcg_temp_free_i32(addr); 195 } 196 mve_update_eci(s); 197 return true; 198 } 199 200 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a) 201 { 202 static MVEGenLdStFn * const ldstfns[4][2] = { 203 { gen_helper_mve_vstrb, gen_helper_mve_vldrb }, 204 { gen_helper_mve_vstrh, gen_helper_mve_vldrh }, 205 { gen_helper_mve_vstrw, gen_helper_mve_vldrw }, 206 { NULL, NULL } 207 }; 208 return do_ldst(s, a, ldstfns[a->size][a->l], a->size); 209 } 210 211 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \ 212 static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \ 213 { \ 214 static MVEGenLdStFn * const ldstfns[2][2] = { \ 215 { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \ 216 { NULL, gen_helper_mve_##ULD }, \ 217 }; \ 218 return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \ 219 } 220 221 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8) 222 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8) 223 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16) 224 225 static bool do_ldst_sg(DisasContext *s, arg_vldst_sg *a, MVEGenLdStSGFn fn) 226 { 227 TCGv_i32 addr; 228 TCGv_ptr qd, qm; 229 230 if (!dc_isar_feature(aa32_mve, s) || 231 !mve_check_qreg_bank(s, a->qd | a->qm) || 232 !fn || a->rn == 15) { 233 /* Rn case is UNPREDICTABLE */ 234 return false; 235 } 236 237 if (!mve_eci_check(s) || !vfp_access_check(s)) { 238 return true; 239 } 240 241 addr = load_reg(s, a->rn); 242 243 qd = mve_qreg_ptr(a->qd); 244 qm = mve_qreg_ptr(a->qm); 245 fn(cpu_env, qd, qm, addr); 246 tcg_temp_free_ptr(qd); 247 tcg_temp_free_ptr(qm); 248 tcg_temp_free_i32(addr); 249 mve_update_eci(s); 250 return true; 251 } 252 253 /* 254 * The naming scheme here is "vldrb_sg_sh == in-memory byte loads 255 * signextended to halfword elements in register". _os_ indicates that 256 * the offsets in Qm should be scaled by the element size. 257 */ 258 /* This macro is just to make the arrays more compact in these functions */ 259 #define F(N) gen_helper_mve_##N 260 261 /* VLDRB/VSTRB (ie msize 1) with OS=1 is UNPREDICTABLE; we UNDEF */ 262 static bool trans_VLDR_S_sg(DisasContext *s, arg_vldst_sg *a) 263 { 264 static MVEGenLdStSGFn * const fns[2][4][4] = { { 265 { NULL, F(vldrb_sg_sh), F(vldrb_sg_sw), NULL }, 266 { NULL, NULL, F(vldrh_sg_sw), NULL }, 267 { NULL, NULL, NULL, NULL }, 268 { NULL, NULL, NULL, NULL } 269 }, { 270 { NULL, NULL, NULL, NULL }, 271 { NULL, NULL, F(vldrh_sg_os_sw), NULL }, 272 { NULL, NULL, NULL, NULL }, 273 { NULL, NULL, NULL, NULL } 274 } 275 }; 276 if (a->qd == a->qm) { 277 return false; /* UNPREDICTABLE */ 278 } 279 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 280 } 281 282 static bool trans_VLDR_U_sg(DisasContext *s, arg_vldst_sg *a) 283 { 284 static MVEGenLdStSGFn * const fns[2][4][4] = { { 285 { F(vldrb_sg_ub), F(vldrb_sg_uh), F(vldrb_sg_uw), NULL }, 286 { NULL, F(vldrh_sg_uh), F(vldrh_sg_uw), NULL }, 287 { NULL, NULL, F(vldrw_sg_uw), NULL }, 288 { NULL, NULL, NULL, F(vldrd_sg_ud) } 289 }, { 290 { NULL, NULL, NULL, NULL }, 291 { NULL, F(vldrh_sg_os_uh), F(vldrh_sg_os_uw), NULL }, 292 { NULL, NULL, F(vldrw_sg_os_uw), NULL }, 293 { NULL, NULL, NULL, F(vldrd_sg_os_ud) } 294 } 295 }; 296 if (a->qd == a->qm) { 297 return false; /* UNPREDICTABLE */ 298 } 299 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 300 } 301 302 static bool trans_VSTR_sg(DisasContext *s, arg_vldst_sg *a) 303 { 304 static MVEGenLdStSGFn * const fns[2][4][4] = { { 305 { F(vstrb_sg_ub), F(vstrb_sg_uh), F(vstrb_sg_uw), NULL }, 306 { NULL, F(vstrh_sg_uh), F(vstrh_sg_uw), NULL }, 307 { NULL, NULL, F(vstrw_sg_uw), NULL }, 308 { NULL, NULL, NULL, F(vstrd_sg_ud) } 309 }, { 310 { NULL, NULL, NULL, NULL }, 311 { NULL, F(vstrh_sg_os_uh), F(vstrh_sg_os_uw), NULL }, 312 { NULL, NULL, F(vstrw_sg_os_uw), NULL }, 313 { NULL, NULL, NULL, F(vstrd_sg_os_ud) } 314 } 315 }; 316 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 317 } 318 319 #undef F 320 321 static bool do_ldst_sg_imm(DisasContext *s, arg_vldst_sg_imm *a, 322 MVEGenLdStSGFn *fn, unsigned msize) 323 { 324 uint32_t offset; 325 TCGv_ptr qd, qm; 326 327 if (!dc_isar_feature(aa32_mve, s) || 328 !mve_check_qreg_bank(s, a->qd | a->qm) || 329 !fn) { 330 return false; 331 } 332 333 if (!mve_eci_check(s) || !vfp_access_check(s)) { 334 return true; 335 } 336 337 offset = a->imm << msize; 338 if (!a->a) { 339 offset = -offset; 340 } 341 342 qd = mve_qreg_ptr(a->qd); 343 qm = mve_qreg_ptr(a->qm); 344 fn(cpu_env, qd, qm, tcg_constant_i32(offset)); 345 tcg_temp_free_ptr(qd); 346 tcg_temp_free_ptr(qm); 347 mve_update_eci(s); 348 return true; 349 } 350 351 static bool trans_VLDRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 352 { 353 static MVEGenLdStSGFn * const fns[] = { 354 gen_helper_mve_vldrw_sg_uw, 355 gen_helper_mve_vldrw_sg_wb_uw, 356 }; 357 if (a->qd == a->qm) { 358 return false; /* UNPREDICTABLE */ 359 } 360 return do_ldst_sg_imm(s, a, fns[a->w], MO_32); 361 } 362 363 static bool trans_VLDRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 364 { 365 static MVEGenLdStSGFn * const fns[] = { 366 gen_helper_mve_vldrd_sg_ud, 367 gen_helper_mve_vldrd_sg_wb_ud, 368 }; 369 if (a->qd == a->qm) { 370 return false; /* UNPREDICTABLE */ 371 } 372 return do_ldst_sg_imm(s, a, fns[a->w], MO_64); 373 } 374 375 static bool trans_VSTRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 376 { 377 static MVEGenLdStSGFn * const fns[] = { 378 gen_helper_mve_vstrw_sg_uw, 379 gen_helper_mve_vstrw_sg_wb_uw, 380 }; 381 return do_ldst_sg_imm(s, a, fns[a->w], MO_32); 382 } 383 384 static bool trans_VSTRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 385 { 386 static MVEGenLdStSGFn * const fns[] = { 387 gen_helper_mve_vstrd_sg_ud, 388 gen_helper_mve_vstrd_sg_wb_ud, 389 }; 390 return do_ldst_sg_imm(s, a, fns[a->w], MO_64); 391 } 392 393 static bool do_vldst_il(DisasContext *s, arg_vldst_il *a, MVEGenLdStIlFn *fn, 394 int addrinc) 395 { 396 TCGv_i32 rn; 397 398 if (!dc_isar_feature(aa32_mve, s) || 399 !mve_check_qreg_bank(s, a->qd) || 400 !fn || (a->rn == 13 && a->w) || a->rn == 15) { 401 /* Variously UNPREDICTABLE or UNDEF or related-encoding */ 402 return false; 403 } 404 if (!mve_eci_check(s) || !vfp_access_check(s)) { 405 return true; 406 } 407 408 rn = load_reg(s, a->rn); 409 /* 410 * We pass the index of Qd, not a pointer, because the helper must 411 * access multiple Q registers starting at Qd and working up. 412 */ 413 fn(cpu_env, tcg_constant_i32(a->qd), rn); 414 415 if (a->w) { 416 tcg_gen_addi_i32(rn, rn, addrinc); 417 store_reg(s, a->rn, rn); 418 } else { 419 tcg_temp_free_i32(rn); 420 } 421 mve_update_and_store_eci(s); 422 return true; 423 } 424 425 /* This macro is just to make the arrays more compact in these functions */ 426 #define F(N) gen_helper_mve_##N 427 428 static bool trans_VLD2(DisasContext *s, arg_vldst_il *a) 429 { 430 static MVEGenLdStIlFn * const fns[4][4] = { 431 { F(vld20b), F(vld20h), F(vld20w), NULL, }, 432 { F(vld21b), F(vld21h), F(vld21w), NULL, }, 433 { NULL, NULL, NULL, NULL }, 434 { NULL, NULL, NULL, NULL }, 435 }; 436 if (a->qd > 6) { 437 return false; 438 } 439 return do_vldst_il(s, a, fns[a->pat][a->size], 32); 440 } 441 442 static bool trans_VLD4(DisasContext *s, arg_vldst_il *a) 443 { 444 static MVEGenLdStIlFn * const fns[4][4] = { 445 { F(vld40b), F(vld40h), F(vld40w), NULL, }, 446 { F(vld41b), F(vld41h), F(vld41w), NULL, }, 447 { F(vld42b), F(vld42h), F(vld42w), NULL, }, 448 { F(vld43b), F(vld43h), F(vld43w), NULL, }, 449 }; 450 if (a->qd > 4) { 451 return false; 452 } 453 return do_vldst_il(s, a, fns[a->pat][a->size], 64); 454 } 455 456 static bool trans_VST2(DisasContext *s, arg_vldst_il *a) 457 { 458 static MVEGenLdStIlFn * const fns[4][4] = { 459 { F(vst20b), F(vst20h), F(vst20w), NULL, }, 460 { F(vst21b), F(vst21h), F(vst21w), NULL, }, 461 { NULL, NULL, NULL, NULL }, 462 { NULL, NULL, NULL, NULL }, 463 }; 464 if (a->qd > 6) { 465 return false; 466 } 467 return do_vldst_il(s, a, fns[a->pat][a->size], 32); 468 } 469 470 static bool trans_VST4(DisasContext *s, arg_vldst_il *a) 471 { 472 static MVEGenLdStIlFn * const fns[4][4] = { 473 { F(vst40b), F(vst40h), F(vst40w), NULL, }, 474 { F(vst41b), F(vst41h), F(vst41w), NULL, }, 475 { F(vst42b), F(vst42h), F(vst42w), NULL, }, 476 { F(vst43b), F(vst43h), F(vst43w), NULL, }, 477 }; 478 if (a->qd > 4) { 479 return false; 480 } 481 return do_vldst_il(s, a, fns[a->pat][a->size], 64); 482 } 483 484 #undef F 485 486 static bool trans_VDUP(DisasContext *s, arg_VDUP *a) 487 { 488 TCGv_ptr qd; 489 TCGv_i32 rt; 490 491 if (!dc_isar_feature(aa32_mve, s) || 492 !mve_check_qreg_bank(s, a->qd)) { 493 return false; 494 } 495 if (a->rt == 13 || a->rt == 15) { 496 /* UNPREDICTABLE; we choose to UNDEF */ 497 return false; 498 } 499 if (!mve_eci_check(s) || !vfp_access_check(s)) { 500 return true; 501 } 502 503 qd = mve_qreg_ptr(a->qd); 504 rt = load_reg(s, a->rt); 505 tcg_gen_dup_i32(a->size, rt, rt); 506 gen_helper_mve_vdup(cpu_env, qd, rt); 507 tcg_temp_free_ptr(qd); 508 tcg_temp_free_i32(rt); 509 mve_update_eci(s); 510 return true; 511 } 512 513 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn) 514 { 515 TCGv_ptr qd, qm; 516 517 if (!dc_isar_feature(aa32_mve, s) || 518 !mve_check_qreg_bank(s, a->qd | a->qm) || 519 !fn) { 520 return false; 521 } 522 523 if (!mve_eci_check(s) || !vfp_access_check(s)) { 524 return true; 525 } 526 527 qd = mve_qreg_ptr(a->qd); 528 qm = mve_qreg_ptr(a->qm); 529 fn(cpu_env, qd, qm); 530 tcg_temp_free_ptr(qd); 531 tcg_temp_free_ptr(qm); 532 mve_update_eci(s); 533 return true; 534 } 535 536 #define DO_1OP(INSN, FN) \ 537 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 538 { \ 539 static MVEGenOneOpFn * const fns[] = { \ 540 gen_helper_mve_##FN##b, \ 541 gen_helper_mve_##FN##h, \ 542 gen_helper_mve_##FN##w, \ 543 NULL, \ 544 }; \ 545 return do_1op(s, a, fns[a->size]); \ 546 } 547 548 DO_1OP(VCLZ, vclz) 549 DO_1OP(VCLS, vcls) 550 DO_1OP(VABS, vabs) 551 DO_1OP(VNEG, vneg) 552 DO_1OP(VQABS, vqabs) 553 DO_1OP(VQNEG, vqneg) 554 DO_1OP(VMAXA, vmaxa) 555 DO_1OP(VMINA, vmina) 556 557 /* 558 * For simple float/int conversions we use the fixed-point 559 * conversion helpers with a zero shift count 560 */ 561 #define DO_VCVT(INSN, HFN, SFN) \ 562 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 563 { \ 564 gen_helper_mve_##HFN(env, qd, qm, tcg_constant_i32(0)); \ 565 } \ 566 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 567 { \ 568 gen_helper_mve_##SFN(env, qd, qm, tcg_constant_i32(0)); \ 569 } \ 570 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 571 { \ 572 static MVEGenOneOpFn * const fns[] = { \ 573 NULL, \ 574 gen_##INSN##h, \ 575 gen_##INSN##s, \ 576 NULL, \ 577 }; \ 578 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 579 return false; \ 580 } \ 581 return do_1op(s, a, fns[a->size]); \ 582 } 583 584 DO_VCVT(VCVT_SF, vcvt_sh, vcvt_sf) 585 DO_VCVT(VCVT_UF, vcvt_uh, vcvt_uf) 586 DO_VCVT(VCVT_FS, vcvt_hs, vcvt_fs) 587 DO_VCVT(VCVT_FU, vcvt_hu, vcvt_fu) 588 589 static bool do_vcvt_rmode(DisasContext *s, arg_1op *a, 590 enum arm_fprounding rmode, bool u) 591 { 592 /* 593 * Handle VCVT fp to int with specified rounding mode. 594 * This is a 1op fn but we must pass the rounding mode as 595 * an immediate to the helper. 596 */ 597 TCGv_ptr qd, qm; 598 static MVEGenVCVTRmodeFn * const fns[4][2] = { 599 { NULL, NULL }, 600 { gen_helper_mve_vcvt_rm_sh, gen_helper_mve_vcvt_rm_uh }, 601 { gen_helper_mve_vcvt_rm_ss, gen_helper_mve_vcvt_rm_us }, 602 { NULL, NULL }, 603 }; 604 MVEGenVCVTRmodeFn *fn = fns[a->size][u]; 605 606 if (!dc_isar_feature(aa32_mve_fp, s) || 607 !mve_check_qreg_bank(s, a->qd | a->qm) || 608 !fn) { 609 return false; 610 } 611 612 if (!mve_eci_check(s) || !vfp_access_check(s)) { 613 return true; 614 } 615 616 qd = mve_qreg_ptr(a->qd); 617 qm = mve_qreg_ptr(a->qm); 618 fn(cpu_env, qd, qm, tcg_constant_i32(arm_rmode_to_sf(rmode))); 619 tcg_temp_free_ptr(qd); 620 tcg_temp_free_ptr(qm); 621 mve_update_eci(s); 622 return true; 623 } 624 625 #define DO_VCVT_RMODE(INSN, RMODE, U) \ 626 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 627 { \ 628 return do_vcvt_rmode(s, a, RMODE, U); \ 629 } \ 630 631 DO_VCVT_RMODE(VCVTAS, FPROUNDING_TIEAWAY, false) 632 DO_VCVT_RMODE(VCVTAU, FPROUNDING_TIEAWAY, true) 633 DO_VCVT_RMODE(VCVTNS, FPROUNDING_TIEEVEN, false) 634 DO_VCVT_RMODE(VCVTNU, FPROUNDING_TIEEVEN, true) 635 DO_VCVT_RMODE(VCVTPS, FPROUNDING_POSINF, false) 636 DO_VCVT_RMODE(VCVTPU, FPROUNDING_POSINF, true) 637 DO_VCVT_RMODE(VCVTMS, FPROUNDING_NEGINF, false) 638 DO_VCVT_RMODE(VCVTMU, FPROUNDING_NEGINF, true) 639 640 #define DO_VCVT_SH(INSN, FN) \ 641 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 642 { \ 643 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 644 return false; \ 645 } \ 646 return do_1op(s, a, gen_helper_mve_##FN); \ 647 } \ 648 649 DO_VCVT_SH(VCVTB_SH, vcvtb_sh) 650 DO_VCVT_SH(VCVTT_SH, vcvtt_sh) 651 DO_VCVT_SH(VCVTB_HS, vcvtb_hs) 652 DO_VCVT_SH(VCVTT_HS, vcvtt_hs) 653 654 #define DO_VRINT(INSN, RMODE) \ 655 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 656 { \ 657 gen_helper_mve_vrint_rm_h(env, qd, qm, \ 658 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \ 659 } \ 660 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 661 { \ 662 gen_helper_mve_vrint_rm_s(env, qd, qm, \ 663 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \ 664 } \ 665 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 666 { \ 667 static MVEGenOneOpFn * const fns[] = { \ 668 NULL, \ 669 gen_##INSN##h, \ 670 gen_##INSN##s, \ 671 NULL, \ 672 }; \ 673 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 674 return false; \ 675 } \ 676 return do_1op(s, a, fns[a->size]); \ 677 } 678 679 DO_VRINT(VRINTN, FPROUNDING_TIEEVEN) 680 DO_VRINT(VRINTA, FPROUNDING_TIEAWAY) 681 DO_VRINT(VRINTZ, FPROUNDING_ZERO) 682 DO_VRINT(VRINTM, FPROUNDING_NEGINF) 683 DO_VRINT(VRINTP, FPROUNDING_POSINF) 684 685 static bool trans_VRINTX(DisasContext *s, arg_1op *a) 686 { 687 static MVEGenOneOpFn * const fns[] = { 688 NULL, 689 gen_helper_mve_vrintx_h, 690 gen_helper_mve_vrintx_s, 691 NULL, 692 }; 693 if (!dc_isar_feature(aa32_mve_fp, s)) { 694 return false; 695 } 696 return do_1op(s, a, fns[a->size]); 697 } 698 699 /* Narrowing moves: only size 0 and 1 are valid */ 700 #define DO_VMOVN(INSN, FN) \ 701 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 702 { \ 703 static MVEGenOneOpFn * const fns[] = { \ 704 gen_helper_mve_##FN##b, \ 705 gen_helper_mve_##FN##h, \ 706 NULL, \ 707 NULL, \ 708 }; \ 709 return do_1op(s, a, fns[a->size]); \ 710 } 711 712 DO_VMOVN(VMOVNB, vmovnb) 713 DO_VMOVN(VMOVNT, vmovnt) 714 DO_VMOVN(VQMOVUNB, vqmovunb) 715 DO_VMOVN(VQMOVUNT, vqmovunt) 716 DO_VMOVN(VQMOVN_BS, vqmovnbs) 717 DO_VMOVN(VQMOVN_TS, vqmovnts) 718 DO_VMOVN(VQMOVN_BU, vqmovnbu) 719 DO_VMOVN(VQMOVN_TU, vqmovntu) 720 721 static bool trans_VREV16(DisasContext *s, arg_1op *a) 722 { 723 static MVEGenOneOpFn * const fns[] = { 724 gen_helper_mve_vrev16b, 725 NULL, 726 NULL, 727 NULL, 728 }; 729 return do_1op(s, a, fns[a->size]); 730 } 731 732 static bool trans_VREV32(DisasContext *s, arg_1op *a) 733 { 734 static MVEGenOneOpFn * const fns[] = { 735 gen_helper_mve_vrev32b, 736 gen_helper_mve_vrev32h, 737 NULL, 738 NULL, 739 }; 740 return do_1op(s, a, fns[a->size]); 741 } 742 743 static bool trans_VREV64(DisasContext *s, arg_1op *a) 744 { 745 static MVEGenOneOpFn * const fns[] = { 746 gen_helper_mve_vrev64b, 747 gen_helper_mve_vrev64h, 748 gen_helper_mve_vrev64w, 749 NULL, 750 }; 751 return do_1op(s, a, fns[a->size]); 752 } 753 754 static bool trans_VMVN(DisasContext *s, arg_1op *a) 755 { 756 return do_1op(s, a, gen_helper_mve_vmvn); 757 } 758 759 static bool trans_VABS_fp(DisasContext *s, arg_1op *a) 760 { 761 static MVEGenOneOpFn * const fns[] = { 762 NULL, 763 gen_helper_mve_vfabsh, 764 gen_helper_mve_vfabss, 765 NULL, 766 }; 767 if (!dc_isar_feature(aa32_mve_fp, s)) { 768 return false; 769 } 770 return do_1op(s, a, fns[a->size]); 771 } 772 773 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a) 774 { 775 static MVEGenOneOpFn * const fns[] = { 776 NULL, 777 gen_helper_mve_vfnegh, 778 gen_helper_mve_vfnegs, 779 NULL, 780 }; 781 if (!dc_isar_feature(aa32_mve_fp, s)) { 782 return false; 783 } 784 return do_1op(s, a, fns[a->size]); 785 } 786 787 static bool do_2op_vec(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn, 788 GVecGen3Fn *vecfn) 789 { 790 TCGv_ptr qd, qn, qm; 791 792 if (!dc_isar_feature(aa32_mve, s) || 793 !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) || 794 !fn) { 795 return false; 796 } 797 if (!mve_eci_check(s) || !vfp_access_check(s)) { 798 return true; 799 } 800 801 if (vecfn && mve_no_predication(s)) { 802 vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qn), 803 mve_qreg_offset(a->qm), 16, 16); 804 } else { 805 qd = mve_qreg_ptr(a->qd); 806 qn = mve_qreg_ptr(a->qn); 807 qm = mve_qreg_ptr(a->qm); 808 fn(cpu_env, qd, qn, qm); 809 tcg_temp_free_ptr(qd); 810 tcg_temp_free_ptr(qn); 811 tcg_temp_free_ptr(qm); 812 } 813 mve_update_eci(s); 814 return true; 815 } 816 817 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn *fn) 818 { 819 return do_2op_vec(s, a, fn, NULL); 820 } 821 822 #define DO_LOGIC(INSN, HELPER, VECFN) \ 823 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 824 { \ 825 return do_2op_vec(s, a, HELPER, VECFN); \ 826 } 827 828 DO_LOGIC(VAND, gen_helper_mve_vand, tcg_gen_gvec_and) 829 DO_LOGIC(VBIC, gen_helper_mve_vbic, tcg_gen_gvec_andc) 830 DO_LOGIC(VORR, gen_helper_mve_vorr, tcg_gen_gvec_or) 831 DO_LOGIC(VORN, gen_helper_mve_vorn, tcg_gen_gvec_orc) 832 DO_LOGIC(VEOR, gen_helper_mve_veor, tcg_gen_gvec_xor) 833 834 static bool trans_VPSEL(DisasContext *s, arg_2op *a) 835 { 836 /* This insn updates predication bits */ 837 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 838 return do_2op(s, a, gen_helper_mve_vpsel); 839 } 840 841 #define DO_2OP_VEC(INSN, FN, VECFN) \ 842 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 843 { \ 844 static MVEGenTwoOpFn * const fns[] = { \ 845 gen_helper_mve_##FN##b, \ 846 gen_helper_mve_##FN##h, \ 847 gen_helper_mve_##FN##w, \ 848 NULL, \ 849 }; \ 850 return do_2op_vec(s, a, fns[a->size], VECFN); \ 851 } 852 853 #define DO_2OP(INSN, FN) DO_2OP_VEC(INSN, FN, NULL) 854 855 DO_2OP_VEC(VADD, vadd, tcg_gen_gvec_add) 856 DO_2OP_VEC(VSUB, vsub, tcg_gen_gvec_sub) 857 DO_2OP_VEC(VMUL, vmul, tcg_gen_gvec_mul) 858 DO_2OP(VMULH_S, vmulhs) 859 DO_2OP(VMULH_U, vmulhu) 860 DO_2OP(VRMULH_S, vrmulhs) 861 DO_2OP(VRMULH_U, vrmulhu) 862 DO_2OP_VEC(VMAX_S, vmaxs, tcg_gen_gvec_smax) 863 DO_2OP_VEC(VMAX_U, vmaxu, tcg_gen_gvec_umax) 864 DO_2OP_VEC(VMIN_S, vmins, tcg_gen_gvec_smin) 865 DO_2OP_VEC(VMIN_U, vminu, tcg_gen_gvec_umin) 866 DO_2OP(VABD_S, vabds) 867 DO_2OP(VABD_U, vabdu) 868 DO_2OP(VHADD_S, vhadds) 869 DO_2OP(VHADD_U, vhaddu) 870 DO_2OP(VHSUB_S, vhsubs) 871 DO_2OP(VHSUB_U, vhsubu) 872 DO_2OP(VMULL_BS, vmullbs) 873 DO_2OP(VMULL_BU, vmullbu) 874 DO_2OP(VMULL_TS, vmullts) 875 DO_2OP(VMULL_TU, vmulltu) 876 DO_2OP(VQDMULH, vqdmulh) 877 DO_2OP(VQRDMULH, vqrdmulh) 878 DO_2OP(VQADD_S, vqadds) 879 DO_2OP(VQADD_U, vqaddu) 880 DO_2OP(VQSUB_S, vqsubs) 881 DO_2OP(VQSUB_U, vqsubu) 882 DO_2OP(VSHL_S, vshls) 883 DO_2OP(VSHL_U, vshlu) 884 DO_2OP(VRSHL_S, vrshls) 885 DO_2OP(VRSHL_U, vrshlu) 886 DO_2OP(VQSHL_S, vqshls) 887 DO_2OP(VQSHL_U, vqshlu) 888 DO_2OP(VQRSHL_S, vqrshls) 889 DO_2OP(VQRSHL_U, vqrshlu) 890 DO_2OP(VQDMLADH, vqdmladh) 891 DO_2OP(VQDMLADHX, vqdmladhx) 892 DO_2OP(VQRDMLADH, vqrdmladh) 893 DO_2OP(VQRDMLADHX, vqrdmladhx) 894 DO_2OP(VQDMLSDH, vqdmlsdh) 895 DO_2OP(VQDMLSDHX, vqdmlsdhx) 896 DO_2OP(VQRDMLSDH, vqrdmlsdh) 897 DO_2OP(VQRDMLSDHX, vqrdmlsdhx) 898 DO_2OP(VRHADD_S, vrhadds) 899 DO_2OP(VRHADD_U, vrhaddu) 900 /* 901 * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose 902 * so we can reuse the DO_2OP macro. (Our implementation calculates the 903 * "expected" results in this case.) Similarly for VHCADD. 904 */ 905 DO_2OP(VCADD90, vcadd90) 906 DO_2OP(VCADD270, vcadd270) 907 DO_2OP(VHCADD90, vhcadd90) 908 DO_2OP(VHCADD270, vhcadd270) 909 910 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a) 911 { 912 static MVEGenTwoOpFn * const fns[] = { 913 NULL, 914 gen_helper_mve_vqdmullbh, 915 gen_helper_mve_vqdmullbw, 916 NULL, 917 }; 918 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 919 /* UNPREDICTABLE; we choose to undef */ 920 return false; 921 } 922 return do_2op(s, a, fns[a->size]); 923 } 924 925 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a) 926 { 927 static MVEGenTwoOpFn * const fns[] = { 928 NULL, 929 gen_helper_mve_vqdmullth, 930 gen_helper_mve_vqdmulltw, 931 NULL, 932 }; 933 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 934 /* UNPREDICTABLE; we choose to undef */ 935 return false; 936 } 937 return do_2op(s, a, fns[a->size]); 938 } 939 940 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a) 941 { 942 /* 943 * Note that a->size indicates the output size, ie VMULL.P8 944 * is the 8x8->16 operation and a->size is MO_16; VMULL.P16 945 * is the 16x16->32 operation and a->size is MO_32. 946 */ 947 static MVEGenTwoOpFn * const fns[] = { 948 NULL, 949 gen_helper_mve_vmullpbh, 950 gen_helper_mve_vmullpbw, 951 NULL, 952 }; 953 return do_2op(s, a, fns[a->size]); 954 } 955 956 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a) 957 { 958 /* a->size is as for trans_VMULLP_B */ 959 static MVEGenTwoOpFn * const fns[] = { 960 NULL, 961 gen_helper_mve_vmullpth, 962 gen_helper_mve_vmullptw, 963 NULL, 964 }; 965 return do_2op(s, a, fns[a->size]); 966 } 967 968 /* 969 * VADC and VSBC: these perform an add-with-carry or subtract-with-carry 970 * of the 32-bit elements in each lane of the input vectors, where the 971 * carry-out of each add is the carry-in of the next. The initial carry 972 * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C 973 * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C. 974 * These insns are subject to beat-wise execution. Partial execution 975 * of an I=1 (initial carry input fixed) insn which does not 976 * execute the first beat must start with the current FPSCR.NZCV 977 * value, not the fixed constant input. 978 */ 979 static bool trans_VADC(DisasContext *s, arg_2op *a) 980 { 981 return do_2op(s, a, gen_helper_mve_vadc); 982 } 983 984 static bool trans_VADCI(DisasContext *s, arg_2op *a) 985 { 986 if (mve_skip_first_beat(s)) { 987 return trans_VADC(s, a); 988 } 989 return do_2op(s, a, gen_helper_mve_vadci); 990 } 991 992 static bool trans_VSBC(DisasContext *s, arg_2op *a) 993 { 994 return do_2op(s, a, gen_helper_mve_vsbc); 995 } 996 997 static bool trans_VSBCI(DisasContext *s, arg_2op *a) 998 { 999 if (mve_skip_first_beat(s)) { 1000 return trans_VSBC(s, a); 1001 } 1002 return do_2op(s, a, gen_helper_mve_vsbci); 1003 } 1004 1005 #define DO_2OP_FP(INSN, FN) \ 1006 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 1007 { \ 1008 static MVEGenTwoOpFn * const fns[] = { \ 1009 NULL, \ 1010 gen_helper_mve_##FN##h, \ 1011 gen_helper_mve_##FN##s, \ 1012 NULL, \ 1013 }; \ 1014 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1015 return false; \ 1016 } \ 1017 return do_2op(s, a, fns[a->size]); \ 1018 } 1019 1020 DO_2OP_FP(VADD_fp, vfadd) 1021 DO_2OP_FP(VSUB_fp, vfsub) 1022 DO_2OP_FP(VMUL_fp, vfmul) 1023 DO_2OP_FP(VABD_fp, vfabd) 1024 DO_2OP_FP(VMAXNM, vmaxnm) 1025 DO_2OP_FP(VMINNM, vminnm) 1026 DO_2OP_FP(VCADD90_fp, vfcadd90) 1027 DO_2OP_FP(VCADD270_fp, vfcadd270) 1028 DO_2OP_FP(VFMA, vfma) 1029 DO_2OP_FP(VFMS, vfms) 1030 DO_2OP_FP(VCMUL0, vcmul0) 1031 DO_2OP_FP(VCMUL90, vcmul90) 1032 DO_2OP_FP(VCMUL180, vcmul180) 1033 DO_2OP_FP(VCMUL270, vcmul270) 1034 DO_2OP_FP(VCMLA0, vcmla0) 1035 DO_2OP_FP(VCMLA90, vcmla90) 1036 DO_2OP_FP(VCMLA180, vcmla180) 1037 DO_2OP_FP(VCMLA270, vcmla270) 1038 DO_2OP_FP(VMAXNMA, vmaxnma) 1039 DO_2OP_FP(VMINNMA, vminnma) 1040 1041 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a, 1042 MVEGenTwoOpScalarFn fn) 1043 { 1044 TCGv_ptr qd, qn; 1045 TCGv_i32 rm; 1046 1047 if (!dc_isar_feature(aa32_mve, s) || 1048 !mve_check_qreg_bank(s, a->qd | a->qn) || 1049 !fn) { 1050 return false; 1051 } 1052 if (a->rm == 13 || a->rm == 15) { 1053 /* UNPREDICTABLE */ 1054 return false; 1055 } 1056 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1057 return true; 1058 } 1059 1060 qd = mve_qreg_ptr(a->qd); 1061 qn = mve_qreg_ptr(a->qn); 1062 rm = load_reg(s, a->rm); 1063 fn(cpu_env, qd, qn, rm); 1064 tcg_temp_free_i32(rm); 1065 tcg_temp_free_ptr(qd); 1066 tcg_temp_free_ptr(qn); 1067 mve_update_eci(s); 1068 return true; 1069 } 1070 1071 #define DO_2OP_SCALAR(INSN, FN) \ 1072 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \ 1073 { \ 1074 static MVEGenTwoOpScalarFn * const fns[] = { \ 1075 gen_helper_mve_##FN##b, \ 1076 gen_helper_mve_##FN##h, \ 1077 gen_helper_mve_##FN##w, \ 1078 NULL, \ 1079 }; \ 1080 return do_2op_scalar(s, a, fns[a->size]); \ 1081 } 1082 1083 DO_2OP_SCALAR(VADD_scalar, vadd_scalar) 1084 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar) 1085 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar) 1086 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar) 1087 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar) 1088 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar) 1089 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar) 1090 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar) 1091 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar) 1092 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar) 1093 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar) 1094 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar) 1095 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar) 1096 DO_2OP_SCALAR(VBRSR, vbrsr) 1097 DO_2OP_SCALAR(VMLA, vmla) 1098 DO_2OP_SCALAR(VMLAS, vmlas) 1099 DO_2OP_SCALAR(VQDMLAH, vqdmlah) 1100 DO_2OP_SCALAR(VQRDMLAH, vqrdmlah) 1101 DO_2OP_SCALAR(VQDMLASH, vqdmlash) 1102 DO_2OP_SCALAR(VQRDMLASH, vqrdmlash) 1103 1104 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a) 1105 { 1106 static MVEGenTwoOpScalarFn * const fns[] = { 1107 NULL, 1108 gen_helper_mve_vqdmullb_scalarh, 1109 gen_helper_mve_vqdmullb_scalarw, 1110 NULL, 1111 }; 1112 if (a->qd == a->qn && a->size == MO_32) { 1113 /* UNPREDICTABLE; we choose to undef */ 1114 return false; 1115 } 1116 return do_2op_scalar(s, a, fns[a->size]); 1117 } 1118 1119 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a) 1120 { 1121 static MVEGenTwoOpScalarFn * const fns[] = { 1122 NULL, 1123 gen_helper_mve_vqdmullt_scalarh, 1124 gen_helper_mve_vqdmullt_scalarw, 1125 NULL, 1126 }; 1127 if (a->qd == a->qn && a->size == MO_32) { 1128 /* UNPREDICTABLE; we choose to undef */ 1129 return false; 1130 } 1131 return do_2op_scalar(s, a, fns[a->size]); 1132 } 1133 1134 1135 #define DO_2OP_FP_SCALAR(INSN, FN) \ 1136 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \ 1137 { \ 1138 static MVEGenTwoOpScalarFn * const fns[] = { \ 1139 NULL, \ 1140 gen_helper_mve_##FN##h, \ 1141 gen_helper_mve_##FN##s, \ 1142 NULL, \ 1143 }; \ 1144 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1145 return false; \ 1146 } \ 1147 return do_2op_scalar(s, a, fns[a->size]); \ 1148 } 1149 1150 DO_2OP_FP_SCALAR(VADD_fp_scalar, vfadd_scalar) 1151 DO_2OP_FP_SCALAR(VSUB_fp_scalar, vfsub_scalar) 1152 DO_2OP_FP_SCALAR(VMUL_fp_scalar, vfmul_scalar) 1153 DO_2OP_FP_SCALAR(VFMA_scalar, vfma_scalar) 1154 DO_2OP_FP_SCALAR(VFMAS_scalar, vfmas_scalar) 1155 1156 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a, 1157 MVEGenLongDualAccOpFn *fn) 1158 { 1159 TCGv_ptr qn, qm; 1160 TCGv_i64 rda; 1161 TCGv_i32 rdalo, rdahi; 1162 1163 if (!dc_isar_feature(aa32_mve, s) || 1164 !mve_check_qreg_bank(s, a->qn | a->qm) || 1165 !fn) { 1166 return false; 1167 } 1168 /* 1169 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 1170 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 1171 */ 1172 if (a->rdahi == 13 || a->rdahi == 15) { 1173 return false; 1174 } 1175 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1176 return true; 1177 } 1178 1179 qn = mve_qreg_ptr(a->qn); 1180 qm = mve_qreg_ptr(a->qm); 1181 1182 /* 1183 * This insn is subject to beat-wise execution. Partial execution 1184 * of an A=0 (no-accumulate) insn which does not execute the first 1185 * beat must start with the current rda value, not 0. 1186 */ 1187 if (a->a || mve_skip_first_beat(s)) { 1188 rda = tcg_temp_new_i64(); 1189 rdalo = load_reg(s, a->rdalo); 1190 rdahi = load_reg(s, a->rdahi); 1191 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 1192 tcg_temp_free_i32(rdalo); 1193 tcg_temp_free_i32(rdahi); 1194 } else { 1195 rda = tcg_const_i64(0); 1196 } 1197 1198 fn(rda, cpu_env, qn, qm, rda); 1199 tcg_temp_free_ptr(qn); 1200 tcg_temp_free_ptr(qm); 1201 1202 rdalo = tcg_temp_new_i32(); 1203 rdahi = tcg_temp_new_i32(); 1204 tcg_gen_extrl_i64_i32(rdalo, rda); 1205 tcg_gen_extrh_i64_i32(rdahi, rda); 1206 store_reg(s, a->rdalo, rdalo); 1207 store_reg(s, a->rdahi, rdahi); 1208 tcg_temp_free_i64(rda); 1209 mve_update_eci(s); 1210 return true; 1211 } 1212 1213 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a) 1214 { 1215 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1216 { NULL, NULL }, 1217 { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh }, 1218 { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw }, 1219 { NULL, NULL }, 1220 }; 1221 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1222 } 1223 1224 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a) 1225 { 1226 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1227 { NULL, NULL }, 1228 { gen_helper_mve_vmlaldavuh, NULL }, 1229 { gen_helper_mve_vmlaldavuw, NULL }, 1230 { NULL, NULL }, 1231 }; 1232 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1233 } 1234 1235 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a) 1236 { 1237 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1238 { NULL, NULL }, 1239 { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh }, 1240 { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw }, 1241 { NULL, NULL }, 1242 }; 1243 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1244 } 1245 1246 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a) 1247 { 1248 static MVEGenLongDualAccOpFn * const fns[] = { 1249 gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw, 1250 }; 1251 return do_long_dual_acc(s, a, fns[a->x]); 1252 } 1253 1254 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a) 1255 { 1256 static MVEGenLongDualAccOpFn * const fns[] = { 1257 gen_helper_mve_vrmlaldavhuw, NULL, 1258 }; 1259 return do_long_dual_acc(s, a, fns[a->x]); 1260 } 1261 1262 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a) 1263 { 1264 static MVEGenLongDualAccOpFn * const fns[] = { 1265 gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw, 1266 }; 1267 return do_long_dual_acc(s, a, fns[a->x]); 1268 } 1269 1270 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn) 1271 { 1272 TCGv_ptr qn, qm; 1273 TCGv_i32 rda; 1274 1275 if (!dc_isar_feature(aa32_mve, s) || 1276 !mve_check_qreg_bank(s, a->qn) || 1277 !fn) { 1278 return false; 1279 } 1280 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1281 return true; 1282 } 1283 1284 qn = mve_qreg_ptr(a->qn); 1285 qm = mve_qreg_ptr(a->qm); 1286 1287 /* 1288 * This insn is subject to beat-wise execution. Partial execution 1289 * of an A=0 (no-accumulate) insn which does not execute the first 1290 * beat must start with the current rda value, not 0. 1291 */ 1292 if (a->a || mve_skip_first_beat(s)) { 1293 rda = load_reg(s, a->rda); 1294 } else { 1295 rda = tcg_const_i32(0); 1296 } 1297 1298 fn(rda, cpu_env, qn, qm, rda); 1299 store_reg(s, a->rda, rda); 1300 tcg_temp_free_ptr(qn); 1301 tcg_temp_free_ptr(qm); 1302 1303 mve_update_eci(s); 1304 return true; 1305 } 1306 1307 #define DO_DUAL_ACC(INSN, FN) \ 1308 static bool trans_##INSN(DisasContext *s, arg_vmladav *a) \ 1309 { \ 1310 static MVEGenDualAccOpFn * const fns[4][2] = { \ 1311 { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb }, \ 1312 { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh }, \ 1313 { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw }, \ 1314 { NULL, NULL }, \ 1315 }; \ 1316 return do_dual_acc(s, a, fns[a->size][a->x]); \ 1317 } 1318 1319 DO_DUAL_ACC(VMLADAV_S, vmladavs) 1320 DO_DUAL_ACC(VMLSDAV, vmlsdav) 1321 1322 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a) 1323 { 1324 static MVEGenDualAccOpFn * const fns[4][2] = { 1325 { gen_helper_mve_vmladavub, NULL }, 1326 { gen_helper_mve_vmladavuh, NULL }, 1327 { gen_helper_mve_vmladavuw, NULL }, 1328 { NULL, NULL }, 1329 }; 1330 return do_dual_acc(s, a, fns[a->size][a->x]); 1331 } 1332 1333 static void gen_vpst(DisasContext *s, uint32_t mask) 1334 { 1335 /* 1336 * Set the VPR mask fields. We take advantage of MASK01 and MASK23 1337 * being adjacent fields in the register. 1338 * 1339 * Updating the masks is not predicated, but it is subject to beat-wise 1340 * execution, and the mask is updated on the odd-numbered beats. 1341 * So if PSR.ECI says we should skip beat 1, we mustn't update the 1342 * 01 mask field. 1343 */ 1344 TCGv_i32 vpr = load_cpu_field(v7m.vpr); 1345 switch (s->eci) { 1346 case ECI_NONE: 1347 case ECI_A0: 1348 /* Update both 01 and 23 fields */ 1349 tcg_gen_deposit_i32(vpr, vpr, 1350 tcg_constant_i32(mask | (mask << 4)), 1351 R_V7M_VPR_MASK01_SHIFT, 1352 R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH); 1353 break; 1354 case ECI_A0A1: 1355 case ECI_A0A1A2: 1356 case ECI_A0A1A2B0: 1357 /* Update only the 23 mask field */ 1358 tcg_gen_deposit_i32(vpr, vpr, 1359 tcg_constant_i32(mask), 1360 R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH); 1361 break; 1362 default: 1363 g_assert_not_reached(); 1364 } 1365 store_cpu_field(vpr, v7m.vpr); 1366 } 1367 1368 static bool trans_VPST(DisasContext *s, arg_VPST *a) 1369 { 1370 /* mask == 0 is a "related encoding" */ 1371 if (!dc_isar_feature(aa32_mve, s) || !a->mask) { 1372 return false; 1373 } 1374 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1375 return true; 1376 } 1377 gen_vpst(s, a->mask); 1378 mve_update_and_store_eci(s); 1379 return true; 1380 } 1381 1382 static bool trans_VPNOT(DisasContext *s, arg_VPNOT *a) 1383 { 1384 /* 1385 * Invert the predicate in VPR.P0. We have call out to 1386 * a helper because this insn itself is beatwise and can 1387 * be predicated. 1388 */ 1389 if (!dc_isar_feature(aa32_mve, s)) { 1390 return false; 1391 } 1392 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1393 return true; 1394 } 1395 1396 gen_helper_mve_vpnot(cpu_env); 1397 /* This insn updates predication bits */ 1398 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 1399 mve_update_eci(s); 1400 return true; 1401 } 1402 1403 static bool trans_VADDV(DisasContext *s, arg_VADDV *a) 1404 { 1405 /* VADDV: vector add across vector */ 1406 static MVEGenVADDVFn * const fns[4][2] = { 1407 { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub }, 1408 { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh }, 1409 { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw }, 1410 { NULL, NULL } 1411 }; 1412 TCGv_ptr qm; 1413 TCGv_i32 rda; 1414 1415 if (!dc_isar_feature(aa32_mve, s) || 1416 a->size == 3) { 1417 return false; 1418 } 1419 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1420 return true; 1421 } 1422 1423 /* 1424 * This insn is subject to beat-wise execution. Partial execution 1425 * of an A=0 (no-accumulate) insn which does not execute the first 1426 * beat must start with the current value of Rda, not zero. 1427 */ 1428 if (a->a || mve_skip_first_beat(s)) { 1429 /* Accumulate input from Rda */ 1430 rda = load_reg(s, a->rda); 1431 } else { 1432 /* Accumulate starting at zero */ 1433 rda = tcg_const_i32(0); 1434 } 1435 1436 qm = mve_qreg_ptr(a->qm); 1437 fns[a->size][a->u](rda, cpu_env, qm, rda); 1438 store_reg(s, a->rda, rda); 1439 tcg_temp_free_ptr(qm); 1440 1441 mve_update_eci(s); 1442 return true; 1443 } 1444 1445 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a) 1446 { 1447 /* 1448 * Vector Add Long Across Vector: accumulate the 32-bit 1449 * elements of the vector into a 64-bit result stored in 1450 * a pair of general-purpose registers. 1451 * No need to check Qm's bank: it is only 3 bits in decode. 1452 */ 1453 TCGv_ptr qm; 1454 TCGv_i64 rda; 1455 TCGv_i32 rdalo, rdahi; 1456 1457 if (!dc_isar_feature(aa32_mve, s)) { 1458 return false; 1459 } 1460 /* 1461 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 1462 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 1463 */ 1464 if (a->rdahi == 13 || a->rdahi == 15) { 1465 return false; 1466 } 1467 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1468 return true; 1469 } 1470 1471 /* 1472 * This insn is subject to beat-wise execution. Partial execution 1473 * of an A=0 (no-accumulate) insn which does not execute the first 1474 * beat must start with the current value of RdaHi:RdaLo, not zero. 1475 */ 1476 if (a->a || mve_skip_first_beat(s)) { 1477 /* Accumulate input from RdaHi:RdaLo */ 1478 rda = tcg_temp_new_i64(); 1479 rdalo = load_reg(s, a->rdalo); 1480 rdahi = load_reg(s, a->rdahi); 1481 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 1482 tcg_temp_free_i32(rdalo); 1483 tcg_temp_free_i32(rdahi); 1484 } else { 1485 /* Accumulate starting at zero */ 1486 rda = tcg_const_i64(0); 1487 } 1488 1489 qm = mve_qreg_ptr(a->qm); 1490 if (a->u) { 1491 gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda); 1492 } else { 1493 gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda); 1494 } 1495 tcg_temp_free_ptr(qm); 1496 1497 rdalo = tcg_temp_new_i32(); 1498 rdahi = tcg_temp_new_i32(); 1499 tcg_gen_extrl_i64_i32(rdalo, rda); 1500 tcg_gen_extrh_i64_i32(rdahi, rda); 1501 store_reg(s, a->rdalo, rdalo); 1502 store_reg(s, a->rdahi, rdahi); 1503 tcg_temp_free_i64(rda); 1504 mve_update_eci(s); 1505 return true; 1506 } 1507 1508 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn) 1509 { 1510 TCGv_ptr qd; 1511 uint64_t imm; 1512 1513 if (!dc_isar_feature(aa32_mve, s) || 1514 !mve_check_qreg_bank(s, a->qd) || 1515 !fn) { 1516 return false; 1517 } 1518 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1519 return true; 1520 } 1521 1522 imm = asimd_imm_const(a->imm, a->cmode, a->op); 1523 1524 qd = mve_qreg_ptr(a->qd); 1525 fn(cpu_env, qd, tcg_constant_i64(imm)); 1526 tcg_temp_free_ptr(qd); 1527 mve_update_eci(s); 1528 return true; 1529 } 1530 1531 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a) 1532 { 1533 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */ 1534 MVEGenOneOpImmFn *fn; 1535 1536 if ((a->cmode & 1) && a->cmode < 12) { 1537 if (a->op) { 1538 /* 1539 * For op=1, the immediate will be inverted by asimd_imm_const(), 1540 * so the VBIC becomes a logical AND operation. 1541 */ 1542 fn = gen_helper_mve_vandi; 1543 } else { 1544 fn = gen_helper_mve_vorri; 1545 } 1546 } else { 1547 /* There is one unallocated cmode/op combination in this space */ 1548 if (a->cmode == 15 && a->op == 1) { 1549 return false; 1550 } 1551 /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */ 1552 fn = gen_helper_mve_vmovi; 1553 } 1554 return do_1imm(s, a, fn); 1555 } 1556 1557 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn, 1558 bool negateshift) 1559 { 1560 TCGv_ptr qd, qm; 1561 int shift = a->shift; 1562 1563 if (!dc_isar_feature(aa32_mve, s) || 1564 !mve_check_qreg_bank(s, a->qd | a->qm) || 1565 !fn) { 1566 return false; 1567 } 1568 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1569 return true; 1570 } 1571 1572 /* 1573 * When we handle a right shift insn using a left-shift helper 1574 * which permits a negative shift count to indicate a right-shift, 1575 * we must negate the shift count. 1576 */ 1577 if (negateshift) { 1578 shift = -shift; 1579 } 1580 1581 qd = mve_qreg_ptr(a->qd); 1582 qm = mve_qreg_ptr(a->qm); 1583 fn(cpu_env, qd, qm, tcg_constant_i32(shift)); 1584 tcg_temp_free_ptr(qd); 1585 tcg_temp_free_ptr(qm); 1586 mve_update_eci(s); 1587 return true; 1588 } 1589 1590 #define DO_2SHIFT(INSN, FN, NEGATESHIFT) \ 1591 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1592 { \ 1593 static MVEGenTwoOpShiftFn * const fns[] = { \ 1594 gen_helper_mve_##FN##b, \ 1595 gen_helper_mve_##FN##h, \ 1596 gen_helper_mve_##FN##w, \ 1597 NULL, \ 1598 }; \ 1599 return do_2shift(s, a, fns[a->size], NEGATESHIFT); \ 1600 } 1601 1602 DO_2SHIFT(VSHLI, vshli_u, false) 1603 DO_2SHIFT(VQSHLI_S, vqshli_s, false) 1604 DO_2SHIFT(VQSHLI_U, vqshli_u, false) 1605 DO_2SHIFT(VQSHLUI, vqshlui_s, false) 1606 /* These right shifts use a left-shift helper with negated shift count */ 1607 DO_2SHIFT(VSHRI_S, vshli_s, true) 1608 DO_2SHIFT(VSHRI_U, vshli_u, true) 1609 DO_2SHIFT(VRSHRI_S, vrshli_s, true) 1610 DO_2SHIFT(VRSHRI_U, vrshli_u, true) 1611 1612 DO_2SHIFT(VSRI, vsri, false) 1613 DO_2SHIFT(VSLI, vsli, false) 1614 1615 #define DO_2SHIFT_FP(INSN, FN) \ 1616 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1617 { \ 1618 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1619 return false; \ 1620 } \ 1621 return do_2shift(s, a, gen_helper_mve_##FN, false); \ 1622 } 1623 1624 DO_2SHIFT_FP(VCVT_SH_fixed, vcvt_sh) 1625 DO_2SHIFT_FP(VCVT_UH_fixed, vcvt_uh) 1626 DO_2SHIFT_FP(VCVT_HS_fixed, vcvt_hs) 1627 DO_2SHIFT_FP(VCVT_HU_fixed, vcvt_hu) 1628 DO_2SHIFT_FP(VCVT_SF_fixed, vcvt_sf) 1629 DO_2SHIFT_FP(VCVT_UF_fixed, vcvt_uf) 1630 DO_2SHIFT_FP(VCVT_FS_fixed, vcvt_fs) 1631 DO_2SHIFT_FP(VCVT_FU_fixed, vcvt_fu) 1632 1633 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a, 1634 MVEGenTwoOpShiftFn *fn) 1635 { 1636 TCGv_ptr qda; 1637 TCGv_i32 rm; 1638 1639 if (!dc_isar_feature(aa32_mve, s) || 1640 !mve_check_qreg_bank(s, a->qda) || 1641 a->rm == 13 || a->rm == 15 || !fn) { 1642 /* Rm cases are UNPREDICTABLE */ 1643 return false; 1644 } 1645 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1646 return true; 1647 } 1648 1649 qda = mve_qreg_ptr(a->qda); 1650 rm = load_reg(s, a->rm); 1651 fn(cpu_env, qda, qda, rm); 1652 tcg_temp_free_ptr(qda); 1653 tcg_temp_free_i32(rm); 1654 mve_update_eci(s); 1655 return true; 1656 } 1657 1658 #define DO_2SHIFT_SCALAR(INSN, FN) \ 1659 static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a) \ 1660 { \ 1661 static MVEGenTwoOpShiftFn * const fns[] = { \ 1662 gen_helper_mve_##FN##b, \ 1663 gen_helper_mve_##FN##h, \ 1664 gen_helper_mve_##FN##w, \ 1665 NULL, \ 1666 }; \ 1667 return do_2shift_scalar(s, a, fns[a->size]); \ 1668 } 1669 1670 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s) 1671 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u) 1672 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s) 1673 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u) 1674 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s) 1675 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u) 1676 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s) 1677 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u) 1678 1679 #define DO_VSHLL(INSN, FN) \ 1680 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1681 { \ 1682 static MVEGenTwoOpShiftFn * const fns[] = { \ 1683 gen_helper_mve_##FN##b, \ 1684 gen_helper_mve_##FN##h, \ 1685 }; \ 1686 return do_2shift(s, a, fns[a->size], false); \ 1687 } 1688 1689 DO_VSHLL(VSHLL_BS, vshllbs) 1690 DO_VSHLL(VSHLL_BU, vshllbu) 1691 DO_VSHLL(VSHLL_TS, vshllts) 1692 DO_VSHLL(VSHLL_TU, vshlltu) 1693 1694 #define DO_2SHIFT_N(INSN, FN) \ 1695 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1696 { \ 1697 static MVEGenTwoOpShiftFn * const fns[] = { \ 1698 gen_helper_mve_##FN##b, \ 1699 gen_helper_mve_##FN##h, \ 1700 }; \ 1701 return do_2shift(s, a, fns[a->size], false); \ 1702 } 1703 1704 DO_2SHIFT_N(VSHRNB, vshrnb) 1705 DO_2SHIFT_N(VSHRNT, vshrnt) 1706 DO_2SHIFT_N(VRSHRNB, vrshrnb) 1707 DO_2SHIFT_N(VRSHRNT, vrshrnt) 1708 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s) 1709 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s) 1710 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u) 1711 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u) 1712 DO_2SHIFT_N(VQSHRUNB, vqshrunb) 1713 DO_2SHIFT_N(VQSHRUNT, vqshrunt) 1714 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s) 1715 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s) 1716 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u) 1717 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u) 1718 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb) 1719 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt) 1720 1721 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a) 1722 { 1723 /* 1724 * Whole Vector Left Shift with Carry. The carry is taken 1725 * from a general purpose register and written back there. 1726 * An imm of 0 means "shift by 32". 1727 */ 1728 TCGv_ptr qd; 1729 TCGv_i32 rdm; 1730 1731 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1732 return false; 1733 } 1734 if (a->rdm == 13 || a->rdm == 15) { 1735 /* CONSTRAINED UNPREDICTABLE: we UNDEF */ 1736 return false; 1737 } 1738 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1739 return true; 1740 } 1741 1742 qd = mve_qreg_ptr(a->qd); 1743 rdm = load_reg(s, a->rdm); 1744 gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm)); 1745 store_reg(s, a->rdm, rdm); 1746 tcg_temp_free_ptr(qd); 1747 mve_update_eci(s); 1748 return true; 1749 } 1750 1751 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn) 1752 { 1753 TCGv_ptr qd; 1754 TCGv_i32 rn; 1755 1756 /* 1757 * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP). 1758 * This fills the vector with elements of successively increasing 1759 * or decreasing values, starting from Rn. 1760 */ 1761 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1762 return false; 1763 } 1764 if (a->size == MO_64) { 1765 /* size 0b11 is another encoding */ 1766 return false; 1767 } 1768 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1769 return true; 1770 } 1771 1772 qd = mve_qreg_ptr(a->qd); 1773 rn = load_reg(s, a->rn); 1774 fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm)); 1775 store_reg(s, a->rn, rn); 1776 tcg_temp_free_ptr(qd); 1777 mve_update_eci(s); 1778 return true; 1779 } 1780 1781 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn) 1782 { 1783 TCGv_ptr qd; 1784 TCGv_i32 rn, rm; 1785 1786 /* 1787 * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP) 1788 * This fills the vector with elements of successively increasing 1789 * or decreasing values, starting from Rn. Rm specifies a point where 1790 * the count wraps back around to 0. The updated offset is written back 1791 * to Rn. 1792 */ 1793 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1794 return false; 1795 } 1796 if (!fn || a->rm == 13 || a->rm == 15) { 1797 /* 1798 * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE; 1799 * Rm == 13 is VIWDUP, VDWDUP. 1800 */ 1801 return false; 1802 } 1803 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1804 return true; 1805 } 1806 1807 qd = mve_qreg_ptr(a->qd); 1808 rn = load_reg(s, a->rn); 1809 rm = load_reg(s, a->rm); 1810 fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm)); 1811 store_reg(s, a->rn, rn); 1812 tcg_temp_free_ptr(qd); 1813 tcg_temp_free_i32(rm); 1814 mve_update_eci(s); 1815 return true; 1816 } 1817 1818 static bool trans_VIDUP(DisasContext *s, arg_vidup *a) 1819 { 1820 static MVEGenVIDUPFn * const fns[] = { 1821 gen_helper_mve_vidupb, 1822 gen_helper_mve_viduph, 1823 gen_helper_mve_vidupw, 1824 NULL, 1825 }; 1826 return do_vidup(s, a, fns[a->size]); 1827 } 1828 1829 static bool trans_VDDUP(DisasContext *s, arg_vidup *a) 1830 { 1831 static MVEGenVIDUPFn * const fns[] = { 1832 gen_helper_mve_vidupb, 1833 gen_helper_mve_viduph, 1834 gen_helper_mve_vidupw, 1835 NULL, 1836 }; 1837 /* VDDUP is just like VIDUP but with a negative immediate */ 1838 a->imm = -a->imm; 1839 return do_vidup(s, a, fns[a->size]); 1840 } 1841 1842 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a) 1843 { 1844 static MVEGenVIWDUPFn * const fns[] = { 1845 gen_helper_mve_viwdupb, 1846 gen_helper_mve_viwduph, 1847 gen_helper_mve_viwdupw, 1848 NULL, 1849 }; 1850 return do_viwdup(s, a, fns[a->size]); 1851 } 1852 1853 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a) 1854 { 1855 static MVEGenVIWDUPFn * const fns[] = { 1856 gen_helper_mve_vdwdupb, 1857 gen_helper_mve_vdwduph, 1858 gen_helper_mve_vdwdupw, 1859 NULL, 1860 }; 1861 return do_viwdup(s, a, fns[a->size]); 1862 } 1863 1864 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn) 1865 { 1866 TCGv_ptr qn, qm; 1867 1868 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1869 !fn) { 1870 return false; 1871 } 1872 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1873 return true; 1874 } 1875 1876 qn = mve_qreg_ptr(a->qn); 1877 qm = mve_qreg_ptr(a->qm); 1878 fn(cpu_env, qn, qm); 1879 tcg_temp_free_ptr(qn); 1880 tcg_temp_free_ptr(qm); 1881 if (a->mask) { 1882 /* VPT */ 1883 gen_vpst(s, a->mask); 1884 } 1885 /* This insn updates predication bits */ 1886 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 1887 mve_update_eci(s); 1888 return true; 1889 } 1890 1891 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a, 1892 MVEGenScalarCmpFn *fn) 1893 { 1894 TCGv_ptr qn; 1895 TCGv_i32 rm; 1896 1897 if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) { 1898 return false; 1899 } 1900 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1901 return true; 1902 } 1903 1904 qn = mve_qreg_ptr(a->qn); 1905 if (a->rm == 15) { 1906 /* Encoding Rm=0b1111 means "constant zero" */ 1907 rm = tcg_constant_i32(0); 1908 } else { 1909 rm = load_reg(s, a->rm); 1910 } 1911 fn(cpu_env, qn, rm); 1912 tcg_temp_free_ptr(qn); 1913 tcg_temp_free_i32(rm); 1914 if (a->mask) { 1915 /* VPT */ 1916 gen_vpst(s, a->mask); 1917 } 1918 /* This insn updates predication bits */ 1919 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 1920 mve_update_eci(s); 1921 return true; 1922 } 1923 1924 #define DO_VCMP(INSN, FN) \ 1925 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \ 1926 { \ 1927 static MVEGenCmpFn * const fns[] = { \ 1928 gen_helper_mve_##FN##b, \ 1929 gen_helper_mve_##FN##h, \ 1930 gen_helper_mve_##FN##w, \ 1931 NULL, \ 1932 }; \ 1933 return do_vcmp(s, a, fns[a->size]); \ 1934 } \ 1935 static bool trans_##INSN##_scalar(DisasContext *s, \ 1936 arg_vcmp_scalar *a) \ 1937 { \ 1938 static MVEGenScalarCmpFn * const fns[] = { \ 1939 gen_helper_mve_##FN##_scalarb, \ 1940 gen_helper_mve_##FN##_scalarh, \ 1941 gen_helper_mve_##FN##_scalarw, \ 1942 NULL, \ 1943 }; \ 1944 return do_vcmp_scalar(s, a, fns[a->size]); \ 1945 } 1946 1947 DO_VCMP(VCMPEQ, vcmpeq) 1948 DO_VCMP(VCMPNE, vcmpne) 1949 DO_VCMP(VCMPCS, vcmpcs) 1950 DO_VCMP(VCMPHI, vcmphi) 1951 DO_VCMP(VCMPGE, vcmpge) 1952 DO_VCMP(VCMPLT, vcmplt) 1953 DO_VCMP(VCMPGT, vcmpgt) 1954 DO_VCMP(VCMPLE, vcmple) 1955 1956 #define DO_VCMP_FP(INSN, FN) \ 1957 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \ 1958 { \ 1959 static MVEGenCmpFn * const fns[] = { \ 1960 NULL, \ 1961 gen_helper_mve_##FN##h, \ 1962 gen_helper_mve_##FN##s, \ 1963 NULL, \ 1964 }; \ 1965 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1966 return false; \ 1967 } \ 1968 return do_vcmp(s, a, fns[a->size]); \ 1969 } \ 1970 static bool trans_##INSN##_scalar(DisasContext *s, \ 1971 arg_vcmp_scalar *a) \ 1972 { \ 1973 static MVEGenScalarCmpFn * const fns[] = { \ 1974 NULL, \ 1975 gen_helper_mve_##FN##_scalarh, \ 1976 gen_helper_mve_##FN##_scalars, \ 1977 NULL, \ 1978 }; \ 1979 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1980 return false; \ 1981 } \ 1982 return do_vcmp_scalar(s, a, fns[a->size]); \ 1983 } 1984 1985 DO_VCMP_FP(VCMPEQ_fp, vfcmpeq) 1986 DO_VCMP_FP(VCMPNE_fp, vfcmpne) 1987 DO_VCMP_FP(VCMPGE_fp, vfcmpge) 1988 DO_VCMP_FP(VCMPLT_fp, vfcmplt) 1989 DO_VCMP_FP(VCMPGT_fp, vfcmpgt) 1990 DO_VCMP_FP(VCMPLE_fp, vfcmple) 1991 1992 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn) 1993 { 1994 /* 1995 * MIN/MAX operations across a vector: compute the min or 1996 * max of the initial value in a general purpose register 1997 * and all the elements in the vector, and store it back 1998 * into the general purpose register. 1999 */ 2000 TCGv_ptr qm; 2001 TCGv_i32 rda; 2002 2003 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 2004 !fn || a->rda == 13 || a->rda == 15) { 2005 /* Rda cases are UNPREDICTABLE */ 2006 return false; 2007 } 2008 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2009 return true; 2010 } 2011 2012 qm = mve_qreg_ptr(a->qm); 2013 rda = load_reg(s, a->rda); 2014 fn(rda, cpu_env, qm, rda); 2015 store_reg(s, a->rda, rda); 2016 tcg_temp_free_ptr(qm); 2017 mve_update_eci(s); 2018 return true; 2019 } 2020 2021 #define DO_VMAXV(INSN, FN) \ 2022 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ 2023 { \ 2024 static MVEGenVADDVFn * const fns[] = { \ 2025 gen_helper_mve_##FN##b, \ 2026 gen_helper_mve_##FN##h, \ 2027 gen_helper_mve_##FN##w, \ 2028 NULL, \ 2029 }; \ 2030 return do_vmaxv(s, a, fns[a->size]); \ 2031 } 2032 2033 DO_VMAXV(VMAXV_S, vmaxvs) 2034 DO_VMAXV(VMAXV_U, vmaxvu) 2035 DO_VMAXV(VMAXAV, vmaxav) 2036 DO_VMAXV(VMINV_S, vminvs) 2037 DO_VMAXV(VMINV_U, vminvu) 2038 DO_VMAXV(VMINAV, vminav) 2039 2040 #define DO_VMAXV_FP(INSN, FN) \ 2041 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ 2042 { \ 2043 static MVEGenVADDVFn * const fns[] = { \ 2044 NULL, \ 2045 gen_helper_mve_##FN##h, \ 2046 gen_helper_mve_##FN##s, \ 2047 NULL, \ 2048 }; \ 2049 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 2050 return false; \ 2051 } \ 2052 return do_vmaxv(s, a, fns[a->size]); \ 2053 } 2054 2055 DO_VMAXV_FP(VMAXNMV, vmaxnmv) 2056 DO_VMAXV_FP(VMINNMV, vminnmv) 2057 DO_VMAXV_FP(VMAXNMAV, vmaxnmav) 2058 DO_VMAXV_FP(VMINNMAV, vminnmav) 2059 2060 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn) 2061 { 2062 /* Absolute difference accumulated across vector */ 2063 TCGv_ptr qn, qm; 2064 TCGv_i32 rda; 2065 2066 if (!dc_isar_feature(aa32_mve, s) || 2067 !mve_check_qreg_bank(s, a->qm | a->qn) || 2068 !fn || a->rda == 13 || a->rda == 15) { 2069 /* Rda cases are UNPREDICTABLE */ 2070 return false; 2071 } 2072 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2073 return true; 2074 } 2075 2076 qm = mve_qreg_ptr(a->qm); 2077 qn = mve_qreg_ptr(a->qn); 2078 rda = load_reg(s, a->rda); 2079 fn(rda, cpu_env, qn, qm, rda); 2080 store_reg(s, a->rda, rda); 2081 tcg_temp_free_ptr(qm); 2082 tcg_temp_free_ptr(qn); 2083 mve_update_eci(s); 2084 return true; 2085 } 2086 2087 #define DO_VABAV(INSN, FN) \ 2088 static bool trans_##INSN(DisasContext *s, arg_vabav *a) \ 2089 { \ 2090 static MVEGenVABAVFn * const fns[] = { \ 2091 gen_helper_mve_##FN##b, \ 2092 gen_helper_mve_##FN##h, \ 2093 gen_helper_mve_##FN##w, \ 2094 NULL, \ 2095 }; \ 2096 return do_vabav(s, a, fns[a->size]); \ 2097 } 2098 2099 DO_VABAV(VABAV_S, vabavs) 2100 DO_VABAV(VABAV_U, vabavu) 2101 2102 static bool trans_VMOV_to_2gp(DisasContext *s, arg_VMOV_to_2gp *a) 2103 { 2104 /* 2105 * VMOV two 32-bit vector lanes to two general-purpose registers. 2106 * This insn is not predicated but it is subject to beat-wise 2107 * execution if it is not in an IT block. For us this means 2108 * only that if PSR.ECI says we should not be executing the beat 2109 * corresponding to the lane of the vector register being accessed 2110 * then we should skip perfoming the move, and that we need to do 2111 * the usual check for bad ECI state and advance of ECI state. 2112 * (If PSR.ECI is non-zero then we cannot be in an IT block.) 2113 */ 2114 TCGv_i32 tmp; 2115 int vd; 2116 2117 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) || 2118 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15 || 2119 a->rt == a->rt2) { 2120 /* Rt/Rt2 cases are UNPREDICTABLE */ 2121 return false; 2122 } 2123 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2124 return true; 2125 } 2126 2127 /* Convert Qreg index to Dreg for read_neon_element32() etc */ 2128 vd = a->qd * 2; 2129 2130 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) { 2131 tmp = tcg_temp_new_i32(); 2132 read_neon_element32(tmp, vd, a->idx, MO_32); 2133 store_reg(s, a->rt, tmp); 2134 } 2135 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) { 2136 tmp = tcg_temp_new_i32(); 2137 read_neon_element32(tmp, vd + 1, a->idx, MO_32); 2138 store_reg(s, a->rt2, tmp); 2139 } 2140 2141 mve_update_and_store_eci(s); 2142 return true; 2143 } 2144 2145 static bool trans_VMOV_from_2gp(DisasContext *s, arg_VMOV_to_2gp *a) 2146 { 2147 /* 2148 * VMOV two general-purpose registers to two 32-bit vector lanes. 2149 * This insn is not predicated but it is subject to beat-wise 2150 * execution if it is not in an IT block. For us this means 2151 * only that if PSR.ECI says we should not be executing the beat 2152 * corresponding to the lane of the vector register being accessed 2153 * then we should skip perfoming the move, and that we need to do 2154 * the usual check for bad ECI state and advance of ECI state. 2155 * (If PSR.ECI is non-zero then we cannot be in an IT block.) 2156 */ 2157 TCGv_i32 tmp; 2158 int vd; 2159 2160 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) || 2161 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15) { 2162 /* Rt/Rt2 cases are UNPREDICTABLE */ 2163 return false; 2164 } 2165 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2166 return true; 2167 } 2168 2169 /* Convert Qreg idx to Dreg for read_neon_element32() etc */ 2170 vd = a->qd * 2; 2171 2172 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) { 2173 tmp = load_reg(s, a->rt); 2174 write_neon_element32(tmp, vd, a->idx, MO_32); 2175 tcg_temp_free_i32(tmp); 2176 } 2177 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) { 2178 tmp = load_reg(s, a->rt2); 2179 write_neon_element32(tmp, vd + 1, a->idx, MO_32); 2180 tcg_temp_free_i32(tmp); 2181 } 2182 2183 mve_update_and_store_eci(s); 2184 return true; 2185 } 2186