1 /* 2 * ARM translation: M-profile MVE instructions 3 * 4 * Copyright (c) 2021 Linaro, Ltd. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "tcg/tcg-op.h" 22 #include "tcg/tcg-op-gvec.h" 23 #include "exec/exec-all.h" 24 #include "exec/gen-icount.h" 25 #include "translate.h" 26 #include "translate-a32.h" 27 28 static inline int vidup_imm(DisasContext *s, int x) 29 { 30 return 1 << x; 31 } 32 33 /* Include the generated decoder */ 34 #include "decode-mve.c.inc" 35 36 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 37 typedef void MVEGenLdStSGFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 38 typedef void MVEGenLdStIlFn(TCGv_ptr, TCGv_i32, TCGv_i32); 39 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 40 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr); 41 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 42 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 43 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64); 44 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32); 45 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64); 46 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32); 47 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 48 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 49 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 50 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 51 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 52 53 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */ 54 static inline long mve_qreg_offset(unsigned reg) 55 { 56 return offsetof(CPUARMState, vfp.zregs[reg].d[0]); 57 } 58 59 static TCGv_ptr mve_qreg_ptr(unsigned reg) 60 { 61 TCGv_ptr ret = tcg_temp_new_ptr(); 62 tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg)); 63 return ret; 64 } 65 66 static bool mve_check_qreg_bank(DisasContext *s, int qmask) 67 { 68 /* 69 * Check whether Qregs are in range. For v8.1M only Q0..Q7 70 * are supported, see VFPSmallRegisterBank(). 71 */ 72 return qmask < 8; 73 } 74 75 bool mve_eci_check(DisasContext *s) 76 { 77 /* 78 * This is a beatwise insn: check that ECI is valid (not a 79 * reserved value) and note that we are handling it. 80 * Return true if OK, false if we generated an exception. 81 */ 82 s->eci_handled = true; 83 switch (s->eci) { 84 case ECI_NONE: 85 case ECI_A0: 86 case ECI_A0A1: 87 case ECI_A0A1A2: 88 case ECI_A0A1A2B0: 89 return true; 90 default: 91 /* Reserved value: INVSTATE UsageFault */ 92 gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(), 93 default_exception_el(s)); 94 return false; 95 } 96 } 97 98 void mve_update_eci(DisasContext *s) 99 { 100 /* 101 * The helper function will always update the CPUState field, 102 * so we only need to update the DisasContext field. 103 */ 104 if (s->eci) { 105 s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE; 106 } 107 } 108 109 void mve_update_and_store_eci(DisasContext *s) 110 { 111 /* 112 * For insns which don't call a helper function that will call 113 * mve_advance_vpt(), this version updates s->eci and also stores 114 * it out to the CPUState field. 115 */ 116 if (s->eci) { 117 mve_update_eci(s); 118 store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits); 119 } 120 } 121 122 static bool mve_skip_first_beat(DisasContext *s) 123 { 124 /* Return true if PSR.ECI says we must skip the first beat of this insn */ 125 switch (s->eci) { 126 case ECI_NONE: 127 return false; 128 case ECI_A0: 129 case ECI_A0A1: 130 case ECI_A0A1A2: 131 case ECI_A0A1A2B0: 132 return true; 133 default: 134 g_assert_not_reached(); 135 } 136 } 137 138 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn, 139 unsigned msize) 140 { 141 TCGv_i32 addr; 142 uint32_t offset; 143 TCGv_ptr qreg; 144 145 if (!dc_isar_feature(aa32_mve, s) || 146 !mve_check_qreg_bank(s, a->qd) || 147 !fn) { 148 return false; 149 } 150 151 /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */ 152 if (a->rn == 15 || (a->rn == 13 && a->w)) { 153 return false; 154 } 155 156 if (!mve_eci_check(s) || !vfp_access_check(s)) { 157 return true; 158 } 159 160 offset = a->imm << msize; 161 if (!a->a) { 162 offset = -offset; 163 } 164 addr = load_reg(s, a->rn); 165 if (a->p) { 166 tcg_gen_addi_i32(addr, addr, offset); 167 } 168 169 qreg = mve_qreg_ptr(a->qd); 170 fn(cpu_env, qreg, addr); 171 tcg_temp_free_ptr(qreg); 172 173 /* 174 * Writeback always happens after the last beat of the insn, 175 * regardless of predication 176 */ 177 if (a->w) { 178 if (!a->p) { 179 tcg_gen_addi_i32(addr, addr, offset); 180 } 181 store_reg(s, a->rn, addr); 182 } else { 183 tcg_temp_free_i32(addr); 184 } 185 mve_update_eci(s); 186 return true; 187 } 188 189 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a) 190 { 191 static MVEGenLdStFn * const ldstfns[4][2] = { 192 { gen_helper_mve_vstrb, gen_helper_mve_vldrb }, 193 { gen_helper_mve_vstrh, gen_helper_mve_vldrh }, 194 { gen_helper_mve_vstrw, gen_helper_mve_vldrw }, 195 { NULL, NULL } 196 }; 197 return do_ldst(s, a, ldstfns[a->size][a->l], a->size); 198 } 199 200 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \ 201 static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \ 202 { \ 203 static MVEGenLdStFn * const ldstfns[2][2] = { \ 204 { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \ 205 { NULL, gen_helper_mve_##ULD }, \ 206 }; \ 207 return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \ 208 } 209 210 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8) 211 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8) 212 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16) 213 214 static bool do_ldst_sg(DisasContext *s, arg_vldst_sg *a, MVEGenLdStSGFn fn) 215 { 216 TCGv_i32 addr; 217 TCGv_ptr qd, qm; 218 219 if (!dc_isar_feature(aa32_mve, s) || 220 !mve_check_qreg_bank(s, a->qd | a->qm) || 221 !fn || a->rn == 15) { 222 /* Rn case is UNPREDICTABLE */ 223 return false; 224 } 225 226 if (!mve_eci_check(s) || !vfp_access_check(s)) { 227 return true; 228 } 229 230 addr = load_reg(s, a->rn); 231 232 qd = mve_qreg_ptr(a->qd); 233 qm = mve_qreg_ptr(a->qm); 234 fn(cpu_env, qd, qm, addr); 235 tcg_temp_free_ptr(qd); 236 tcg_temp_free_ptr(qm); 237 tcg_temp_free_i32(addr); 238 mve_update_eci(s); 239 return true; 240 } 241 242 /* 243 * The naming scheme here is "vldrb_sg_sh == in-memory byte loads 244 * signextended to halfword elements in register". _os_ indicates that 245 * the offsets in Qm should be scaled by the element size. 246 */ 247 /* This macro is just to make the arrays more compact in these functions */ 248 #define F(N) gen_helper_mve_##N 249 250 /* VLDRB/VSTRB (ie msize 1) with OS=1 is UNPREDICTABLE; we UNDEF */ 251 static bool trans_VLDR_S_sg(DisasContext *s, arg_vldst_sg *a) 252 { 253 static MVEGenLdStSGFn * const fns[2][4][4] = { { 254 { NULL, F(vldrb_sg_sh), F(vldrb_sg_sw), NULL }, 255 { NULL, NULL, F(vldrh_sg_sw), NULL }, 256 { NULL, NULL, NULL, NULL }, 257 { NULL, NULL, NULL, NULL } 258 }, { 259 { NULL, NULL, NULL, NULL }, 260 { NULL, NULL, F(vldrh_sg_os_sw), NULL }, 261 { NULL, NULL, NULL, NULL }, 262 { NULL, NULL, NULL, NULL } 263 } 264 }; 265 if (a->qd == a->qm) { 266 return false; /* UNPREDICTABLE */ 267 } 268 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 269 } 270 271 static bool trans_VLDR_U_sg(DisasContext *s, arg_vldst_sg *a) 272 { 273 static MVEGenLdStSGFn * const fns[2][4][4] = { { 274 { F(vldrb_sg_ub), F(vldrb_sg_uh), F(vldrb_sg_uw), NULL }, 275 { NULL, F(vldrh_sg_uh), F(vldrh_sg_uw), NULL }, 276 { NULL, NULL, F(vldrw_sg_uw), NULL }, 277 { NULL, NULL, NULL, F(vldrd_sg_ud) } 278 }, { 279 { NULL, NULL, NULL, NULL }, 280 { NULL, F(vldrh_sg_os_uh), F(vldrh_sg_os_uw), NULL }, 281 { NULL, NULL, F(vldrw_sg_os_uw), NULL }, 282 { NULL, NULL, NULL, F(vldrd_sg_os_ud) } 283 } 284 }; 285 if (a->qd == a->qm) { 286 return false; /* UNPREDICTABLE */ 287 } 288 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 289 } 290 291 static bool trans_VSTR_sg(DisasContext *s, arg_vldst_sg *a) 292 { 293 static MVEGenLdStSGFn * const fns[2][4][4] = { { 294 { F(vstrb_sg_ub), F(vstrb_sg_uh), F(vstrb_sg_uw), NULL }, 295 { NULL, F(vstrh_sg_uh), F(vstrh_sg_uw), NULL }, 296 { NULL, NULL, F(vstrw_sg_uw), NULL }, 297 { NULL, NULL, NULL, F(vstrd_sg_ud) } 298 }, { 299 { NULL, NULL, NULL, NULL }, 300 { NULL, F(vstrh_sg_os_uh), F(vstrh_sg_os_uw), NULL }, 301 { NULL, NULL, F(vstrw_sg_os_uw), NULL }, 302 { NULL, NULL, NULL, F(vstrd_sg_os_ud) } 303 } 304 }; 305 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 306 } 307 308 #undef F 309 310 static bool do_ldst_sg_imm(DisasContext *s, arg_vldst_sg_imm *a, 311 MVEGenLdStSGFn *fn, unsigned msize) 312 { 313 uint32_t offset; 314 TCGv_ptr qd, qm; 315 316 if (!dc_isar_feature(aa32_mve, s) || 317 !mve_check_qreg_bank(s, a->qd | a->qm) || 318 !fn) { 319 return false; 320 } 321 322 if (!mve_eci_check(s) || !vfp_access_check(s)) { 323 return true; 324 } 325 326 offset = a->imm << msize; 327 if (!a->a) { 328 offset = -offset; 329 } 330 331 qd = mve_qreg_ptr(a->qd); 332 qm = mve_qreg_ptr(a->qm); 333 fn(cpu_env, qd, qm, tcg_constant_i32(offset)); 334 tcg_temp_free_ptr(qd); 335 tcg_temp_free_ptr(qm); 336 mve_update_eci(s); 337 return true; 338 } 339 340 static bool trans_VLDRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 341 { 342 static MVEGenLdStSGFn * const fns[] = { 343 gen_helper_mve_vldrw_sg_uw, 344 gen_helper_mve_vldrw_sg_wb_uw, 345 }; 346 if (a->qd == a->qm) { 347 return false; /* UNPREDICTABLE */ 348 } 349 return do_ldst_sg_imm(s, a, fns[a->w], MO_32); 350 } 351 352 static bool trans_VLDRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 353 { 354 static MVEGenLdStSGFn * const fns[] = { 355 gen_helper_mve_vldrd_sg_ud, 356 gen_helper_mve_vldrd_sg_wb_ud, 357 }; 358 if (a->qd == a->qm) { 359 return false; /* UNPREDICTABLE */ 360 } 361 return do_ldst_sg_imm(s, a, fns[a->w], MO_64); 362 } 363 364 static bool trans_VSTRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 365 { 366 static MVEGenLdStSGFn * const fns[] = { 367 gen_helper_mve_vstrw_sg_uw, 368 gen_helper_mve_vstrw_sg_wb_uw, 369 }; 370 return do_ldst_sg_imm(s, a, fns[a->w], MO_32); 371 } 372 373 static bool trans_VSTRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 374 { 375 static MVEGenLdStSGFn * const fns[] = { 376 gen_helper_mve_vstrd_sg_ud, 377 gen_helper_mve_vstrd_sg_wb_ud, 378 }; 379 return do_ldst_sg_imm(s, a, fns[a->w], MO_64); 380 } 381 382 static bool do_vldst_il(DisasContext *s, arg_vldst_il *a, MVEGenLdStIlFn *fn, 383 int addrinc) 384 { 385 TCGv_i32 rn; 386 387 if (!dc_isar_feature(aa32_mve, s) || 388 !mve_check_qreg_bank(s, a->qd) || 389 !fn || (a->rn == 13 && a->w) || a->rn == 15) { 390 /* Variously UNPREDICTABLE or UNDEF or related-encoding */ 391 return false; 392 } 393 if (!mve_eci_check(s) || !vfp_access_check(s)) { 394 return true; 395 } 396 397 rn = load_reg(s, a->rn); 398 /* 399 * We pass the index of Qd, not a pointer, because the helper must 400 * access multiple Q registers starting at Qd and working up. 401 */ 402 fn(cpu_env, tcg_constant_i32(a->qd), rn); 403 404 if (a->w) { 405 tcg_gen_addi_i32(rn, rn, addrinc); 406 store_reg(s, a->rn, rn); 407 } else { 408 tcg_temp_free_i32(rn); 409 } 410 mve_update_and_store_eci(s); 411 return true; 412 } 413 414 /* This macro is just to make the arrays more compact in these functions */ 415 #define F(N) gen_helper_mve_##N 416 417 static bool trans_VLD2(DisasContext *s, arg_vldst_il *a) 418 { 419 static MVEGenLdStIlFn * const fns[4][4] = { 420 { F(vld20b), F(vld20h), F(vld20w), NULL, }, 421 { F(vld21b), F(vld21h), F(vld21w), NULL, }, 422 { NULL, NULL, NULL, NULL }, 423 { NULL, NULL, NULL, NULL }, 424 }; 425 if (a->qd > 6) { 426 return false; 427 } 428 return do_vldst_il(s, a, fns[a->pat][a->size], 32); 429 } 430 431 static bool trans_VLD4(DisasContext *s, arg_vldst_il *a) 432 { 433 static MVEGenLdStIlFn * const fns[4][4] = { 434 { F(vld40b), F(vld40h), F(vld40w), NULL, }, 435 { F(vld41b), F(vld41h), F(vld41w), NULL, }, 436 { F(vld42b), F(vld42h), F(vld42w), NULL, }, 437 { F(vld43b), F(vld43h), F(vld43w), NULL, }, 438 }; 439 if (a->qd > 4) { 440 return false; 441 } 442 return do_vldst_il(s, a, fns[a->pat][a->size], 64); 443 } 444 445 static bool trans_VST2(DisasContext *s, arg_vldst_il *a) 446 { 447 static MVEGenLdStIlFn * const fns[4][4] = { 448 { F(vst20b), F(vst20h), F(vst20w), NULL, }, 449 { F(vst21b), F(vst21h), F(vst21w), NULL, }, 450 { NULL, NULL, NULL, NULL }, 451 { NULL, NULL, NULL, NULL }, 452 }; 453 if (a->qd > 6) { 454 return false; 455 } 456 return do_vldst_il(s, a, fns[a->pat][a->size], 32); 457 } 458 459 static bool trans_VST4(DisasContext *s, arg_vldst_il *a) 460 { 461 static MVEGenLdStIlFn * const fns[4][4] = { 462 { F(vst40b), F(vst40h), F(vst40w), NULL, }, 463 { F(vst41b), F(vst41h), F(vst41w), NULL, }, 464 { F(vst42b), F(vst42h), F(vst42w), NULL, }, 465 { F(vst43b), F(vst43h), F(vst43w), NULL, }, 466 }; 467 if (a->qd > 4) { 468 return false; 469 } 470 return do_vldst_il(s, a, fns[a->pat][a->size], 64); 471 } 472 473 #undef F 474 475 static bool trans_VDUP(DisasContext *s, arg_VDUP *a) 476 { 477 TCGv_ptr qd; 478 TCGv_i32 rt; 479 480 if (!dc_isar_feature(aa32_mve, s) || 481 !mve_check_qreg_bank(s, a->qd)) { 482 return false; 483 } 484 if (a->rt == 13 || a->rt == 15) { 485 /* UNPREDICTABLE; we choose to UNDEF */ 486 return false; 487 } 488 if (!mve_eci_check(s) || !vfp_access_check(s)) { 489 return true; 490 } 491 492 qd = mve_qreg_ptr(a->qd); 493 rt = load_reg(s, a->rt); 494 tcg_gen_dup_i32(a->size, rt, rt); 495 gen_helper_mve_vdup(cpu_env, qd, rt); 496 tcg_temp_free_ptr(qd); 497 tcg_temp_free_i32(rt); 498 mve_update_eci(s); 499 return true; 500 } 501 502 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn) 503 { 504 TCGv_ptr qd, qm; 505 506 if (!dc_isar_feature(aa32_mve, s) || 507 !mve_check_qreg_bank(s, a->qd | a->qm) || 508 !fn) { 509 return false; 510 } 511 512 if (!mve_eci_check(s) || !vfp_access_check(s)) { 513 return true; 514 } 515 516 qd = mve_qreg_ptr(a->qd); 517 qm = mve_qreg_ptr(a->qm); 518 fn(cpu_env, qd, qm); 519 tcg_temp_free_ptr(qd); 520 tcg_temp_free_ptr(qm); 521 mve_update_eci(s); 522 return true; 523 } 524 525 #define DO_1OP(INSN, FN) \ 526 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 527 { \ 528 static MVEGenOneOpFn * const fns[] = { \ 529 gen_helper_mve_##FN##b, \ 530 gen_helper_mve_##FN##h, \ 531 gen_helper_mve_##FN##w, \ 532 NULL, \ 533 }; \ 534 return do_1op(s, a, fns[a->size]); \ 535 } 536 537 DO_1OP(VCLZ, vclz) 538 DO_1OP(VCLS, vcls) 539 DO_1OP(VABS, vabs) 540 DO_1OP(VNEG, vneg) 541 DO_1OP(VQABS, vqabs) 542 DO_1OP(VQNEG, vqneg) 543 DO_1OP(VMAXA, vmaxa) 544 DO_1OP(VMINA, vmina) 545 546 /* Narrowing moves: only size 0 and 1 are valid */ 547 #define DO_VMOVN(INSN, FN) \ 548 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 549 { \ 550 static MVEGenOneOpFn * const fns[] = { \ 551 gen_helper_mve_##FN##b, \ 552 gen_helper_mve_##FN##h, \ 553 NULL, \ 554 NULL, \ 555 }; \ 556 return do_1op(s, a, fns[a->size]); \ 557 } 558 559 DO_VMOVN(VMOVNB, vmovnb) 560 DO_VMOVN(VMOVNT, vmovnt) 561 DO_VMOVN(VQMOVUNB, vqmovunb) 562 DO_VMOVN(VQMOVUNT, vqmovunt) 563 DO_VMOVN(VQMOVN_BS, vqmovnbs) 564 DO_VMOVN(VQMOVN_TS, vqmovnts) 565 DO_VMOVN(VQMOVN_BU, vqmovnbu) 566 DO_VMOVN(VQMOVN_TU, vqmovntu) 567 568 static bool trans_VREV16(DisasContext *s, arg_1op *a) 569 { 570 static MVEGenOneOpFn * const fns[] = { 571 gen_helper_mve_vrev16b, 572 NULL, 573 NULL, 574 NULL, 575 }; 576 return do_1op(s, a, fns[a->size]); 577 } 578 579 static bool trans_VREV32(DisasContext *s, arg_1op *a) 580 { 581 static MVEGenOneOpFn * const fns[] = { 582 gen_helper_mve_vrev32b, 583 gen_helper_mve_vrev32h, 584 NULL, 585 NULL, 586 }; 587 return do_1op(s, a, fns[a->size]); 588 } 589 590 static bool trans_VREV64(DisasContext *s, arg_1op *a) 591 { 592 static MVEGenOneOpFn * const fns[] = { 593 gen_helper_mve_vrev64b, 594 gen_helper_mve_vrev64h, 595 gen_helper_mve_vrev64w, 596 NULL, 597 }; 598 return do_1op(s, a, fns[a->size]); 599 } 600 601 static bool trans_VMVN(DisasContext *s, arg_1op *a) 602 { 603 return do_1op(s, a, gen_helper_mve_vmvn); 604 } 605 606 static bool trans_VABS_fp(DisasContext *s, arg_1op *a) 607 { 608 static MVEGenOneOpFn * const fns[] = { 609 NULL, 610 gen_helper_mve_vfabsh, 611 gen_helper_mve_vfabss, 612 NULL, 613 }; 614 if (!dc_isar_feature(aa32_mve_fp, s)) { 615 return false; 616 } 617 return do_1op(s, a, fns[a->size]); 618 } 619 620 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a) 621 { 622 static MVEGenOneOpFn * const fns[] = { 623 NULL, 624 gen_helper_mve_vfnegh, 625 gen_helper_mve_vfnegs, 626 NULL, 627 }; 628 if (!dc_isar_feature(aa32_mve_fp, s)) { 629 return false; 630 } 631 return do_1op(s, a, fns[a->size]); 632 } 633 634 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn) 635 { 636 TCGv_ptr qd, qn, qm; 637 638 if (!dc_isar_feature(aa32_mve, s) || 639 !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) || 640 !fn) { 641 return false; 642 } 643 if (!mve_eci_check(s) || !vfp_access_check(s)) { 644 return true; 645 } 646 647 qd = mve_qreg_ptr(a->qd); 648 qn = mve_qreg_ptr(a->qn); 649 qm = mve_qreg_ptr(a->qm); 650 fn(cpu_env, qd, qn, qm); 651 tcg_temp_free_ptr(qd); 652 tcg_temp_free_ptr(qn); 653 tcg_temp_free_ptr(qm); 654 mve_update_eci(s); 655 return true; 656 } 657 658 #define DO_LOGIC(INSN, HELPER) \ 659 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 660 { \ 661 return do_2op(s, a, HELPER); \ 662 } 663 664 DO_LOGIC(VAND, gen_helper_mve_vand) 665 DO_LOGIC(VBIC, gen_helper_mve_vbic) 666 DO_LOGIC(VORR, gen_helper_mve_vorr) 667 DO_LOGIC(VORN, gen_helper_mve_vorn) 668 DO_LOGIC(VEOR, gen_helper_mve_veor) 669 670 DO_LOGIC(VPSEL, gen_helper_mve_vpsel) 671 672 #define DO_2OP(INSN, FN) \ 673 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 674 { \ 675 static MVEGenTwoOpFn * const fns[] = { \ 676 gen_helper_mve_##FN##b, \ 677 gen_helper_mve_##FN##h, \ 678 gen_helper_mve_##FN##w, \ 679 NULL, \ 680 }; \ 681 return do_2op(s, a, fns[a->size]); \ 682 } 683 684 DO_2OP(VADD, vadd) 685 DO_2OP(VSUB, vsub) 686 DO_2OP(VMUL, vmul) 687 DO_2OP(VMULH_S, vmulhs) 688 DO_2OP(VMULH_U, vmulhu) 689 DO_2OP(VRMULH_S, vrmulhs) 690 DO_2OP(VRMULH_U, vrmulhu) 691 DO_2OP(VMAX_S, vmaxs) 692 DO_2OP(VMAX_U, vmaxu) 693 DO_2OP(VMIN_S, vmins) 694 DO_2OP(VMIN_U, vminu) 695 DO_2OP(VABD_S, vabds) 696 DO_2OP(VABD_U, vabdu) 697 DO_2OP(VHADD_S, vhadds) 698 DO_2OP(VHADD_U, vhaddu) 699 DO_2OP(VHSUB_S, vhsubs) 700 DO_2OP(VHSUB_U, vhsubu) 701 DO_2OP(VMULL_BS, vmullbs) 702 DO_2OP(VMULL_BU, vmullbu) 703 DO_2OP(VMULL_TS, vmullts) 704 DO_2OP(VMULL_TU, vmulltu) 705 DO_2OP(VQDMULH, vqdmulh) 706 DO_2OP(VQRDMULH, vqrdmulh) 707 DO_2OP(VQADD_S, vqadds) 708 DO_2OP(VQADD_U, vqaddu) 709 DO_2OP(VQSUB_S, vqsubs) 710 DO_2OP(VQSUB_U, vqsubu) 711 DO_2OP(VSHL_S, vshls) 712 DO_2OP(VSHL_U, vshlu) 713 DO_2OP(VRSHL_S, vrshls) 714 DO_2OP(VRSHL_U, vrshlu) 715 DO_2OP(VQSHL_S, vqshls) 716 DO_2OP(VQSHL_U, vqshlu) 717 DO_2OP(VQRSHL_S, vqrshls) 718 DO_2OP(VQRSHL_U, vqrshlu) 719 DO_2OP(VQDMLADH, vqdmladh) 720 DO_2OP(VQDMLADHX, vqdmladhx) 721 DO_2OP(VQRDMLADH, vqrdmladh) 722 DO_2OP(VQRDMLADHX, vqrdmladhx) 723 DO_2OP(VQDMLSDH, vqdmlsdh) 724 DO_2OP(VQDMLSDHX, vqdmlsdhx) 725 DO_2OP(VQRDMLSDH, vqrdmlsdh) 726 DO_2OP(VQRDMLSDHX, vqrdmlsdhx) 727 DO_2OP(VRHADD_S, vrhadds) 728 DO_2OP(VRHADD_U, vrhaddu) 729 /* 730 * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose 731 * so we can reuse the DO_2OP macro. (Our implementation calculates the 732 * "expected" results in this case.) Similarly for VHCADD. 733 */ 734 DO_2OP(VCADD90, vcadd90) 735 DO_2OP(VCADD270, vcadd270) 736 DO_2OP(VHCADD90, vhcadd90) 737 DO_2OP(VHCADD270, vhcadd270) 738 739 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a) 740 { 741 static MVEGenTwoOpFn * const fns[] = { 742 NULL, 743 gen_helper_mve_vqdmullbh, 744 gen_helper_mve_vqdmullbw, 745 NULL, 746 }; 747 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 748 /* UNPREDICTABLE; we choose to undef */ 749 return false; 750 } 751 return do_2op(s, a, fns[a->size]); 752 } 753 754 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a) 755 { 756 static MVEGenTwoOpFn * const fns[] = { 757 NULL, 758 gen_helper_mve_vqdmullth, 759 gen_helper_mve_vqdmulltw, 760 NULL, 761 }; 762 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 763 /* UNPREDICTABLE; we choose to undef */ 764 return false; 765 } 766 return do_2op(s, a, fns[a->size]); 767 } 768 769 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a) 770 { 771 /* 772 * Note that a->size indicates the output size, ie VMULL.P8 773 * is the 8x8->16 operation and a->size is MO_16; VMULL.P16 774 * is the 16x16->32 operation and a->size is MO_32. 775 */ 776 static MVEGenTwoOpFn * const fns[] = { 777 NULL, 778 gen_helper_mve_vmullpbh, 779 gen_helper_mve_vmullpbw, 780 NULL, 781 }; 782 return do_2op(s, a, fns[a->size]); 783 } 784 785 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a) 786 { 787 /* a->size is as for trans_VMULLP_B */ 788 static MVEGenTwoOpFn * const fns[] = { 789 NULL, 790 gen_helper_mve_vmullpth, 791 gen_helper_mve_vmullptw, 792 NULL, 793 }; 794 return do_2op(s, a, fns[a->size]); 795 } 796 797 /* 798 * VADC and VSBC: these perform an add-with-carry or subtract-with-carry 799 * of the 32-bit elements in each lane of the input vectors, where the 800 * carry-out of each add is the carry-in of the next. The initial carry 801 * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C 802 * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C. 803 * These insns are subject to beat-wise execution. Partial execution 804 * of an I=1 (initial carry input fixed) insn which does not 805 * execute the first beat must start with the current FPSCR.NZCV 806 * value, not the fixed constant input. 807 */ 808 static bool trans_VADC(DisasContext *s, arg_2op *a) 809 { 810 return do_2op(s, a, gen_helper_mve_vadc); 811 } 812 813 static bool trans_VADCI(DisasContext *s, arg_2op *a) 814 { 815 if (mve_skip_first_beat(s)) { 816 return trans_VADC(s, a); 817 } 818 return do_2op(s, a, gen_helper_mve_vadci); 819 } 820 821 static bool trans_VSBC(DisasContext *s, arg_2op *a) 822 { 823 return do_2op(s, a, gen_helper_mve_vsbc); 824 } 825 826 static bool trans_VSBCI(DisasContext *s, arg_2op *a) 827 { 828 if (mve_skip_first_beat(s)) { 829 return trans_VSBC(s, a); 830 } 831 return do_2op(s, a, gen_helper_mve_vsbci); 832 } 833 834 #define DO_2OP_FP(INSN, FN) \ 835 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 836 { \ 837 static MVEGenTwoOpFn * const fns[] = { \ 838 NULL, \ 839 gen_helper_mve_##FN##h, \ 840 gen_helper_mve_##FN##s, \ 841 NULL, \ 842 }; \ 843 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 844 return false; \ 845 } \ 846 return do_2op(s, a, fns[a->size]); \ 847 } 848 849 DO_2OP_FP(VADD_fp, vfadd) 850 DO_2OP_FP(VSUB_fp, vfsub) 851 DO_2OP_FP(VMUL_fp, vfmul) 852 DO_2OP_FP(VABD_fp, vfabd) 853 DO_2OP_FP(VMAXNM, vmaxnm) 854 DO_2OP_FP(VMINNM, vminnm) 855 DO_2OP_FP(VCADD90_fp, vfcadd90) 856 DO_2OP_FP(VCADD270_fp, vfcadd270) 857 DO_2OP_FP(VFMA, vfma) 858 DO_2OP_FP(VFMS, vfms) 859 DO_2OP_FP(VCMUL0, vcmul0) 860 DO_2OP_FP(VCMUL90, vcmul90) 861 DO_2OP_FP(VCMUL180, vcmul180) 862 DO_2OP_FP(VCMUL270, vcmul270) 863 DO_2OP_FP(VCMLA0, vcmla0) 864 DO_2OP_FP(VCMLA90, vcmla90) 865 DO_2OP_FP(VCMLA180, vcmla180) 866 DO_2OP_FP(VCMLA270, vcmla270) 867 DO_2OP_FP(VMAXNMA, vmaxnma) 868 DO_2OP_FP(VMINNMA, vminnma) 869 870 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a, 871 MVEGenTwoOpScalarFn fn) 872 { 873 TCGv_ptr qd, qn; 874 TCGv_i32 rm; 875 876 if (!dc_isar_feature(aa32_mve, s) || 877 !mve_check_qreg_bank(s, a->qd | a->qn) || 878 !fn) { 879 return false; 880 } 881 if (a->rm == 13 || a->rm == 15) { 882 /* UNPREDICTABLE */ 883 return false; 884 } 885 if (!mve_eci_check(s) || !vfp_access_check(s)) { 886 return true; 887 } 888 889 qd = mve_qreg_ptr(a->qd); 890 qn = mve_qreg_ptr(a->qn); 891 rm = load_reg(s, a->rm); 892 fn(cpu_env, qd, qn, rm); 893 tcg_temp_free_i32(rm); 894 tcg_temp_free_ptr(qd); 895 tcg_temp_free_ptr(qn); 896 mve_update_eci(s); 897 return true; 898 } 899 900 #define DO_2OP_SCALAR(INSN, FN) \ 901 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \ 902 { \ 903 static MVEGenTwoOpScalarFn * const fns[] = { \ 904 gen_helper_mve_##FN##b, \ 905 gen_helper_mve_##FN##h, \ 906 gen_helper_mve_##FN##w, \ 907 NULL, \ 908 }; \ 909 return do_2op_scalar(s, a, fns[a->size]); \ 910 } 911 912 DO_2OP_SCALAR(VADD_scalar, vadd_scalar) 913 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar) 914 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar) 915 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar) 916 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar) 917 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar) 918 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar) 919 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar) 920 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar) 921 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar) 922 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar) 923 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar) 924 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar) 925 DO_2OP_SCALAR(VBRSR, vbrsr) 926 DO_2OP_SCALAR(VMLA, vmla) 927 DO_2OP_SCALAR(VMLAS, vmlas) 928 DO_2OP_SCALAR(VQDMLAH, vqdmlah) 929 DO_2OP_SCALAR(VQRDMLAH, vqrdmlah) 930 DO_2OP_SCALAR(VQDMLASH, vqdmlash) 931 DO_2OP_SCALAR(VQRDMLASH, vqrdmlash) 932 933 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a) 934 { 935 static MVEGenTwoOpScalarFn * const fns[] = { 936 NULL, 937 gen_helper_mve_vqdmullb_scalarh, 938 gen_helper_mve_vqdmullb_scalarw, 939 NULL, 940 }; 941 if (a->qd == a->qn && a->size == MO_32) { 942 /* UNPREDICTABLE; we choose to undef */ 943 return false; 944 } 945 return do_2op_scalar(s, a, fns[a->size]); 946 } 947 948 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a) 949 { 950 static MVEGenTwoOpScalarFn * const fns[] = { 951 NULL, 952 gen_helper_mve_vqdmullt_scalarh, 953 gen_helper_mve_vqdmullt_scalarw, 954 NULL, 955 }; 956 if (a->qd == a->qn && a->size == MO_32) { 957 /* UNPREDICTABLE; we choose to undef */ 958 return false; 959 } 960 return do_2op_scalar(s, a, fns[a->size]); 961 } 962 963 964 #define DO_2OP_FP_SCALAR(INSN, FN) \ 965 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \ 966 { \ 967 static MVEGenTwoOpScalarFn * const fns[] = { \ 968 NULL, \ 969 gen_helper_mve_##FN##h, \ 970 gen_helper_mve_##FN##s, \ 971 NULL, \ 972 }; \ 973 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 974 return false; \ 975 } \ 976 return do_2op_scalar(s, a, fns[a->size]); \ 977 } 978 979 DO_2OP_FP_SCALAR(VADD_fp_scalar, vfadd_scalar) 980 DO_2OP_FP_SCALAR(VSUB_fp_scalar, vfsub_scalar) 981 DO_2OP_FP_SCALAR(VMUL_fp_scalar, vfmul_scalar) 982 DO_2OP_FP_SCALAR(VFMA_scalar, vfma_scalar) 983 DO_2OP_FP_SCALAR(VFMAS_scalar, vfmas_scalar) 984 985 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a, 986 MVEGenLongDualAccOpFn *fn) 987 { 988 TCGv_ptr qn, qm; 989 TCGv_i64 rda; 990 TCGv_i32 rdalo, rdahi; 991 992 if (!dc_isar_feature(aa32_mve, s) || 993 !mve_check_qreg_bank(s, a->qn | a->qm) || 994 !fn) { 995 return false; 996 } 997 /* 998 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 999 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 1000 */ 1001 if (a->rdahi == 13 || a->rdahi == 15) { 1002 return false; 1003 } 1004 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1005 return true; 1006 } 1007 1008 qn = mve_qreg_ptr(a->qn); 1009 qm = mve_qreg_ptr(a->qm); 1010 1011 /* 1012 * This insn is subject to beat-wise execution. Partial execution 1013 * of an A=0 (no-accumulate) insn which does not execute the first 1014 * beat must start with the current rda value, not 0. 1015 */ 1016 if (a->a || mve_skip_first_beat(s)) { 1017 rda = tcg_temp_new_i64(); 1018 rdalo = load_reg(s, a->rdalo); 1019 rdahi = load_reg(s, a->rdahi); 1020 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 1021 tcg_temp_free_i32(rdalo); 1022 tcg_temp_free_i32(rdahi); 1023 } else { 1024 rda = tcg_const_i64(0); 1025 } 1026 1027 fn(rda, cpu_env, qn, qm, rda); 1028 tcg_temp_free_ptr(qn); 1029 tcg_temp_free_ptr(qm); 1030 1031 rdalo = tcg_temp_new_i32(); 1032 rdahi = tcg_temp_new_i32(); 1033 tcg_gen_extrl_i64_i32(rdalo, rda); 1034 tcg_gen_extrh_i64_i32(rdahi, rda); 1035 store_reg(s, a->rdalo, rdalo); 1036 store_reg(s, a->rdahi, rdahi); 1037 tcg_temp_free_i64(rda); 1038 mve_update_eci(s); 1039 return true; 1040 } 1041 1042 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a) 1043 { 1044 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1045 { NULL, NULL }, 1046 { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh }, 1047 { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw }, 1048 { NULL, NULL }, 1049 }; 1050 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1051 } 1052 1053 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a) 1054 { 1055 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1056 { NULL, NULL }, 1057 { gen_helper_mve_vmlaldavuh, NULL }, 1058 { gen_helper_mve_vmlaldavuw, NULL }, 1059 { NULL, NULL }, 1060 }; 1061 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1062 } 1063 1064 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a) 1065 { 1066 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1067 { NULL, NULL }, 1068 { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh }, 1069 { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw }, 1070 { NULL, NULL }, 1071 }; 1072 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1073 } 1074 1075 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a) 1076 { 1077 static MVEGenLongDualAccOpFn * const fns[] = { 1078 gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw, 1079 }; 1080 return do_long_dual_acc(s, a, fns[a->x]); 1081 } 1082 1083 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a) 1084 { 1085 static MVEGenLongDualAccOpFn * const fns[] = { 1086 gen_helper_mve_vrmlaldavhuw, NULL, 1087 }; 1088 return do_long_dual_acc(s, a, fns[a->x]); 1089 } 1090 1091 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a) 1092 { 1093 static MVEGenLongDualAccOpFn * const fns[] = { 1094 gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw, 1095 }; 1096 return do_long_dual_acc(s, a, fns[a->x]); 1097 } 1098 1099 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn) 1100 { 1101 TCGv_ptr qn, qm; 1102 TCGv_i32 rda; 1103 1104 if (!dc_isar_feature(aa32_mve, s) || 1105 !mve_check_qreg_bank(s, a->qn) || 1106 !fn) { 1107 return false; 1108 } 1109 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1110 return true; 1111 } 1112 1113 qn = mve_qreg_ptr(a->qn); 1114 qm = mve_qreg_ptr(a->qm); 1115 1116 /* 1117 * This insn is subject to beat-wise execution. Partial execution 1118 * of an A=0 (no-accumulate) insn which does not execute the first 1119 * beat must start with the current rda value, not 0. 1120 */ 1121 if (a->a || mve_skip_first_beat(s)) { 1122 rda = load_reg(s, a->rda); 1123 } else { 1124 rda = tcg_const_i32(0); 1125 } 1126 1127 fn(rda, cpu_env, qn, qm, rda); 1128 store_reg(s, a->rda, rda); 1129 tcg_temp_free_ptr(qn); 1130 tcg_temp_free_ptr(qm); 1131 1132 mve_update_eci(s); 1133 return true; 1134 } 1135 1136 #define DO_DUAL_ACC(INSN, FN) \ 1137 static bool trans_##INSN(DisasContext *s, arg_vmladav *a) \ 1138 { \ 1139 static MVEGenDualAccOpFn * const fns[4][2] = { \ 1140 { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb }, \ 1141 { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh }, \ 1142 { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw }, \ 1143 { NULL, NULL }, \ 1144 }; \ 1145 return do_dual_acc(s, a, fns[a->size][a->x]); \ 1146 } 1147 1148 DO_DUAL_ACC(VMLADAV_S, vmladavs) 1149 DO_DUAL_ACC(VMLSDAV, vmlsdav) 1150 1151 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a) 1152 { 1153 static MVEGenDualAccOpFn * const fns[4][2] = { 1154 { gen_helper_mve_vmladavub, NULL }, 1155 { gen_helper_mve_vmladavuh, NULL }, 1156 { gen_helper_mve_vmladavuw, NULL }, 1157 { NULL, NULL }, 1158 }; 1159 return do_dual_acc(s, a, fns[a->size][a->x]); 1160 } 1161 1162 static void gen_vpst(DisasContext *s, uint32_t mask) 1163 { 1164 /* 1165 * Set the VPR mask fields. We take advantage of MASK01 and MASK23 1166 * being adjacent fields in the register. 1167 * 1168 * Updating the masks is not predicated, but it is subject to beat-wise 1169 * execution, and the mask is updated on the odd-numbered beats. 1170 * So if PSR.ECI says we should skip beat 1, we mustn't update the 1171 * 01 mask field. 1172 */ 1173 TCGv_i32 vpr = load_cpu_field(v7m.vpr); 1174 switch (s->eci) { 1175 case ECI_NONE: 1176 case ECI_A0: 1177 /* Update both 01 and 23 fields */ 1178 tcg_gen_deposit_i32(vpr, vpr, 1179 tcg_constant_i32(mask | (mask << 4)), 1180 R_V7M_VPR_MASK01_SHIFT, 1181 R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH); 1182 break; 1183 case ECI_A0A1: 1184 case ECI_A0A1A2: 1185 case ECI_A0A1A2B0: 1186 /* Update only the 23 mask field */ 1187 tcg_gen_deposit_i32(vpr, vpr, 1188 tcg_constant_i32(mask), 1189 R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH); 1190 break; 1191 default: 1192 g_assert_not_reached(); 1193 } 1194 store_cpu_field(vpr, v7m.vpr); 1195 } 1196 1197 static bool trans_VPST(DisasContext *s, arg_VPST *a) 1198 { 1199 /* mask == 0 is a "related encoding" */ 1200 if (!dc_isar_feature(aa32_mve, s) || !a->mask) { 1201 return false; 1202 } 1203 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1204 return true; 1205 } 1206 gen_vpst(s, a->mask); 1207 mve_update_and_store_eci(s); 1208 return true; 1209 } 1210 1211 static bool trans_VPNOT(DisasContext *s, arg_VPNOT *a) 1212 { 1213 /* 1214 * Invert the predicate in VPR.P0. We have call out to 1215 * a helper because this insn itself is beatwise and can 1216 * be predicated. 1217 */ 1218 if (!dc_isar_feature(aa32_mve, s)) { 1219 return false; 1220 } 1221 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1222 return true; 1223 } 1224 1225 gen_helper_mve_vpnot(cpu_env); 1226 mve_update_eci(s); 1227 return true; 1228 } 1229 1230 static bool trans_VADDV(DisasContext *s, arg_VADDV *a) 1231 { 1232 /* VADDV: vector add across vector */ 1233 static MVEGenVADDVFn * const fns[4][2] = { 1234 { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub }, 1235 { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh }, 1236 { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw }, 1237 { NULL, NULL } 1238 }; 1239 TCGv_ptr qm; 1240 TCGv_i32 rda; 1241 1242 if (!dc_isar_feature(aa32_mve, s) || 1243 a->size == 3) { 1244 return false; 1245 } 1246 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1247 return true; 1248 } 1249 1250 /* 1251 * This insn is subject to beat-wise execution. Partial execution 1252 * of an A=0 (no-accumulate) insn which does not execute the first 1253 * beat must start with the current value of Rda, not zero. 1254 */ 1255 if (a->a || mve_skip_first_beat(s)) { 1256 /* Accumulate input from Rda */ 1257 rda = load_reg(s, a->rda); 1258 } else { 1259 /* Accumulate starting at zero */ 1260 rda = tcg_const_i32(0); 1261 } 1262 1263 qm = mve_qreg_ptr(a->qm); 1264 fns[a->size][a->u](rda, cpu_env, qm, rda); 1265 store_reg(s, a->rda, rda); 1266 tcg_temp_free_ptr(qm); 1267 1268 mve_update_eci(s); 1269 return true; 1270 } 1271 1272 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a) 1273 { 1274 /* 1275 * Vector Add Long Across Vector: accumulate the 32-bit 1276 * elements of the vector into a 64-bit result stored in 1277 * a pair of general-purpose registers. 1278 * No need to check Qm's bank: it is only 3 bits in decode. 1279 */ 1280 TCGv_ptr qm; 1281 TCGv_i64 rda; 1282 TCGv_i32 rdalo, rdahi; 1283 1284 if (!dc_isar_feature(aa32_mve, s)) { 1285 return false; 1286 } 1287 /* 1288 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 1289 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 1290 */ 1291 if (a->rdahi == 13 || a->rdahi == 15) { 1292 return false; 1293 } 1294 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1295 return true; 1296 } 1297 1298 /* 1299 * This insn is subject to beat-wise execution. Partial execution 1300 * of an A=0 (no-accumulate) insn which does not execute the first 1301 * beat must start with the current value of RdaHi:RdaLo, not zero. 1302 */ 1303 if (a->a || mve_skip_first_beat(s)) { 1304 /* Accumulate input from RdaHi:RdaLo */ 1305 rda = tcg_temp_new_i64(); 1306 rdalo = load_reg(s, a->rdalo); 1307 rdahi = load_reg(s, a->rdahi); 1308 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 1309 tcg_temp_free_i32(rdalo); 1310 tcg_temp_free_i32(rdahi); 1311 } else { 1312 /* Accumulate starting at zero */ 1313 rda = tcg_const_i64(0); 1314 } 1315 1316 qm = mve_qreg_ptr(a->qm); 1317 if (a->u) { 1318 gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda); 1319 } else { 1320 gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda); 1321 } 1322 tcg_temp_free_ptr(qm); 1323 1324 rdalo = tcg_temp_new_i32(); 1325 rdahi = tcg_temp_new_i32(); 1326 tcg_gen_extrl_i64_i32(rdalo, rda); 1327 tcg_gen_extrh_i64_i32(rdahi, rda); 1328 store_reg(s, a->rdalo, rdalo); 1329 store_reg(s, a->rdahi, rdahi); 1330 tcg_temp_free_i64(rda); 1331 mve_update_eci(s); 1332 return true; 1333 } 1334 1335 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn) 1336 { 1337 TCGv_ptr qd; 1338 uint64_t imm; 1339 1340 if (!dc_isar_feature(aa32_mve, s) || 1341 !mve_check_qreg_bank(s, a->qd) || 1342 !fn) { 1343 return false; 1344 } 1345 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1346 return true; 1347 } 1348 1349 imm = asimd_imm_const(a->imm, a->cmode, a->op); 1350 1351 qd = mve_qreg_ptr(a->qd); 1352 fn(cpu_env, qd, tcg_constant_i64(imm)); 1353 tcg_temp_free_ptr(qd); 1354 mve_update_eci(s); 1355 return true; 1356 } 1357 1358 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a) 1359 { 1360 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */ 1361 MVEGenOneOpImmFn *fn; 1362 1363 if ((a->cmode & 1) && a->cmode < 12) { 1364 if (a->op) { 1365 /* 1366 * For op=1, the immediate will be inverted by asimd_imm_const(), 1367 * so the VBIC becomes a logical AND operation. 1368 */ 1369 fn = gen_helper_mve_vandi; 1370 } else { 1371 fn = gen_helper_mve_vorri; 1372 } 1373 } else { 1374 /* There is one unallocated cmode/op combination in this space */ 1375 if (a->cmode == 15 && a->op == 1) { 1376 return false; 1377 } 1378 /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */ 1379 fn = gen_helper_mve_vmovi; 1380 } 1381 return do_1imm(s, a, fn); 1382 } 1383 1384 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn, 1385 bool negateshift) 1386 { 1387 TCGv_ptr qd, qm; 1388 int shift = a->shift; 1389 1390 if (!dc_isar_feature(aa32_mve, s) || 1391 !mve_check_qreg_bank(s, a->qd | a->qm) || 1392 !fn) { 1393 return false; 1394 } 1395 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1396 return true; 1397 } 1398 1399 /* 1400 * When we handle a right shift insn using a left-shift helper 1401 * which permits a negative shift count to indicate a right-shift, 1402 * we must negate the shift count. 1403 */ 1404 if (negateshift) { 1405 shift = -shift; 1406 } 1407 1408 qd = mve_qreg_ptr(a->qd); 1409 qm = mve_qreg_ptr(a->qm); 1410 fn(cpu_env, qd, qm, tcg_constant_i32(shift)); 1411 tcg_temp_free_ptr(qd); 1412 tcg_temp_free_ptr(qm); 1413 mve_update_eci(s); 1414 return true; 1415 } 1416 1417 #define DO_2SHIFT(INSN, FN, NEGATESHIFT) \ 1418 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1419 { \ 1420 static MVEGenTwoOpShiftFn * const fns[] = { \ 1421 gen_helper_mve_##FN##b, \ 1422 gen_helper_mve_##FN##h, \ 1423 gen_helper_mve_##FN##w, \ 1424 NULL, \ 1425 }; \ 1426 return do_2shift(s, a, fns[a->size], NEGATESHIFT); \ 1427 } 1428 1429 DO_2SHIFT(VSHLI, vshli_u, false) 1430 DO_2SHIFT(VQSHLI_S, vqshli_s, false) 1431 DO_2SHIFT(VQSHLI_U, vqshli_u, false) 1432 DO_2SHIFT(VQSHLUI, vqshlui_s, false) 1433 /* These right shifts use a left-shift helper with negated shift count */ 1434 DO_2SHIFT(VSHRI_S, vshli_s, true) 1435 DO_2SHIFT(VSHRI_U, vshli_u, true) 1436 DO_2SHIFT(VRSHRI_S, vrshli_s, true) 1437 DO_2SHIFT(VRSHRI_U, vrshli_u, true) 1438 1439 DO_2SHIFT(VSRI, vsri, false) 1440 DO_2SHIFT(VSLI, vsli, false) 1441 1442 #define DO_2SHIFT_FP(INSN, FN) \ 1443 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1444 { \ 1445 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1446 return false; \ 1447 } \ 1448 return do_2shift(s, a, gen_helper_mve_##FN, false); \ 1449 } 1450 1451 DO_2SHIFT_FP(VCVT_SH_fixed, vcvt_sh) 1452 DO_2SHIFT_FP(VCVT_UH_fixed, vcvt_uh) 1453 DO_2SHIFT_FP(VCVT_HS_fixed, vcvt_hs) 1454 DO_2SHIFT_FP(VCVT_HU_fixed, vcvt_hu) 1455 DO_2SHIFT_FP(VCVT_SF_fixed, vcvt_sf) 1456 DO_2SHIFT_FP(VCVT_UF_fixed, vcvt_uf) 1457 DO_2SHIFT_FP(VCVT_FS_fixed, vcvt_fs) 1458 DO_2SHIFT_FP(VCVT_FU_fixed, vcvt_fu) 1459 1460 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a, 1461 MVEGenTwoOpShiftFn *fn) 1462 { 1463 TCGv_ptr qda; 1464 TCGv_i32 rm; 1465 1466 if (!dc_isar_feature(aa32_mve, s) || 1467 !mve_check_qreg_bank(s, a->qda) || 1468 a->rm == 13 || a->rm == 15 || !fn) { 1469 /* Rm cases are UNPREDICTABLE */ 1470 return false; 1471 } 1472 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1473 return true; 1474 } 1475 1476 qda = mve_qreg_ptr(a->qda); 1477 rm = load_reg(s, a->rm); 1478 fn(cpu_env, qda, qda, rm); 1479 tcg_temp_free_ptr(qda); 1480 tcg_temp_free_i32(rm); 1481 mve_update_eci(s); 1482 return true; 1483 } 1484 1485 #define DO_2SHIFT_SCALAR(INSN, FN) \ 1486 static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a) \ 1487 { \ 1488 static MVEGenTwoOpShiftFn * const fns[] = { \ 1489 gen_helper_mve_##FN##b, \ 1490 gen_helper_mve_##FN##h, \ 1491 gen_helper_mve_##FN##w, \ 1492 NULL, \ 1493 }; \ 1494 return do_2shift_scalar(s, a, fns[a->size]); \ 1495 } 1496 1497 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s) 1498 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u) 1499 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s) 1500 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u) 1501 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s) 1502 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u) 1503 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s) 1504 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u) 1505 1506 #define DO_VSHLL(INSN, FN) \ 1507 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1508 { \ 1509 static MVEGenTwoOpShiftFn * const fns[] = { \ 1510 gen_helper_mve_##FN##b, \ 1511 gen_helper_mve_##FN##h, \ 1512 }; \ 1513 return do_2shift(s, a, fns[a->size], false); \ 1514 } 1515 1516 DO_VSHLL(VSHLL_BS, vshllbs) 1517 DO_VSHLL(VSHLL_BU, vshllbu) 1518 DO_VSHLL(VSHLL_TS, vshllts) 1519 DO_VSHLL(VSHLL_TU, vshlltu) 1520 1521 #define DO_2SHIFT_N(INSN, FN) \ 1522 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1523 { \ 1524 static MVEGenTwoOpShiftFn * const fns[] = { \ 1525 gen_helper_mve_##FN##b, \ 1526 gen_helper_mve_##FN##h, \ 1527 }; \ 1528 return do_2shift(s, a, fns[a->size], false); \ 1529 } 1530 1531 DO_2SHIFT_N(VSHRNB, vshrnb) 1532 DO_2SHIFT_N(VSHRNT, vshrnt) 1533 DO_2SHIFT_N(VRSHRNB, vrshrnb) 1534 DO_2SHIFT_N(VRSHRNT, vrshrnt) 1535 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s) 1536 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s) 1537 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u) 1538 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u) 1539 DO_2SHIFT_N(VQSHRUNB, vqshrunb) 1540 DO_2SHIFT_N(VQSHRUNT, vqshrunt) 1541 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s) 1542 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s) 1543 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u) 1544 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u) 1545 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb) 1546 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt) 1547 1548 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a) 1549 { 1550 /* 1551 * Whole Vector Left Shift with Carry. The carry is taken 1552 * from a general purpose register and written back there. 1553 * An imm of 0 means "shift by 32". 1554 */ 1555 TCGv_ptr qd; 1556 TCGv_i32 rdm; 1557 1558 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1559 return false; 1560 } 1561 if (a->rdm == 13 || a->rdm == 15) { 1562 /* CONSTRAINED UNPREDICTABLE: we UNDEF */ 1563 return false; 1564 } 1565 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1566 return true; 1567 } 1568 1569 qd = mve_qreg_ptr(a->qd); 1570 rdm = load_reg(s, a->rdm); 1571 gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm)); 1572 store_reg(s, a->rdm, rdm); 1573 tcg_temp_free_ptr(qd); 1574 mve_update_eci(s); 1575 return true; 1576 } 1577 1578 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn) 1579 { 1580 TCGv_ptr qd; 1581 TCGv_i32 rn; 1582 1583 /* 1584 * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP). 1585 * This fills the vector with elements of successively increasing 1586 * or decreasing values, starting from Rn. 1587 */ 1588 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1589 return false; 1590 } 1591 if (a->size == MO_64) { 1592 /* size 0b11 is another encoding */ 1593 return false; 1594 } 1595 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1596 return true; 1597 } 1598 1599 qd = mve_qreg_ptr(a->qd); 1600 rn = load_reg(s, a->rn); 1601 fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm)); 1602 store_reg(s, a->rn, rn); 1603 tcg_temp_free_ptr(qd); 1604 mve_update_eci(s); 1605 return true; 1606 } 1607 1608 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn) 1609 { 1610 TCGv_ptr qd; 1611 TCGv_i32 rn, rm; 1612 1613 /* 1614 * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP) 1615 * This fills the vector with elements of successively increasing 1616 * or decreasing values, starting from Rn. Rm specifies a point where 1617 * the count wraps back around to 0. The updated offset is written back 1618 * to Rn. 1619 */ 1620 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1621 return false; 1622 } 1623 if (!fn || a->rm == 13 || a->rm == 15) { 1624 /* 1625 * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE; 1626 * Rm == 13 is VIWDUP, VDWDUP. 1627 */ 1628 return false; 1629 } 1630 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1631 return true; 1632 } 1633 1634 qd = mve_qreg_ptr(a->qd); 1635 rn = load_reg(s, a->rn); 1636 rm = load_reg(s, a->rm); 1637 fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm)); 1638 store_reg(s, a->rn, rn); 1639 tcg_temp_free_ptr(qd); 1640 tcg_temp_free_i32(rm); 1641 mve_update_eci(s); 1642 return true; 1643 } 1644 1645 static bool trans_VIDUP(DisasContext *s, arg_vidup *a) 1646 { 1647 static MVEGenVIDUPFn * const fns[] = { 1648 gen_helper_mve_vidupb, 1649 gen_helper_mve_viduph, 1650 gen_helper_mve_vidupw, 1651 NULL, 1652 }; 1653 return do_vidup(s, a, fns[a->size]); 1654 } 1655 1656 static bool trans_VDDUP(DisasContext *s, arg_vidup *a) 1657 { 1658 static MVEGenVIDUPFn * const fns[] = { 1659 gen_helper_mve_vidupb, 1660 gen_helper_mve_viduph, 1661 gen_helper_mve_vidupw, 1662 NULL, 1663 }; 1664 /* VDDUP is just like VIDUP but with a negative immediate */ 1665 a->imm = -a->imm; 1666 return do_vidup(s, a, fns[a->size]); 1667 } 1668 1669 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a) 1670 { 1671 static MVEGenVIWDUPFn * const fns[] = { 1672 gen_helper_mve_viwdupb, 1673 gen_helper_mve_viwduph, 1674 gen_helper_mve_viwdupw, 1675 NULL, 1676 }; 1677 return do_viwdup(s, a, fns[a->size]); 1678 } 1679 1680 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a) 1681 { 1682 static MVEGenVIWDUPFn * const fns[] = { 1683 gen_helper_mve_vdwdupb, 1684 gen_helper_mve_vdwduph, 1685 gen_helper_mve_vdwdupw, 1686 NULL, 1687 }; 1688 return do_viwdup(s, a, fns[a->size]); 1689 } 1690 1691 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn) 1692 { 1693 TCGv_ptr qn, qm; 1694 1695 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1696 !fn) { 1697 return false; 1698 } 1699 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1700 return true; 1701 } 1702 1703 qn = mve_qreg_ptr(a->qn); 1704 qm = mve_qreg_ptr(a->qm); 1705 fn(cpu_env, qn, qm); 1706 tcg_temp_free_ptr(qn); 1707 tcg_temp_free_ptr(qm); 1708 if (a->mask) { 1709 /* VPT */ 1710 gen_vpst(s, a->mask); 1711 } 1712 mve_update_eci(s); 1713 return true; 1714 } 1715 1716 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a, 1717 MVEGenScalarCmpFn *fn) 1718 { 1719 TCGv_ptr qn; 1720 TCGv_i32 rm; 1721 1722 if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) { 1723 return false; 1724 } 1725 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1726 return true; 1727 } 1728 1729 qn = mve_qreg_ptr(a->qn); 1730 if (a->rm == 15) { 1731 /* Encoding Rm=0b1111 means "constant zero" */ 1732 rm = tcg_constant_i32(0); 1733 } else { 1734 rm = load_reg(s, a->rm); 1735 } 1736 fn(cpu_env, qn, rm); 1737 tcg_temp_free_ptr(qn); 1738 tcg_temp_free_i32(rm); 1739 if (a->mask) { 1740 /* VPT */ 1741 gen_vpst(s, a->mask); 1742 } 1743 mve_update_eci(s); 1744 return true; 1745 } 1746 1747 #define DO_VCMP(INSN, FN) \ 1748 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \ 1749 { \ 1750 static MVEGenCmpFn * const fns[] = { \ 1751 gen_helper_mve_##FN##b, \ 1752 gen_helper_mve_##FN##h, \ 1753 gen_helper_mve_##FN##w, \ 1754 NULL, \ 1755 }; \ 1756 return do_vcmp(s, a, fns[a->size]); \ 1757 } \ 1758 static bool trans_##INSN##_scalar(DisasContext *s, \ 1759 arg_vcmp_scalar *a) \ 1760 { \ 1761 static MVEGenScalarCmpFn * const fns[] = { \ 1762 gen_helper_mve_##FN##_scalarb, \ 1763 gen_helper_mve_##FN##_scalarh, \ 1764 gen_helper_mve_##FN##_scalarw, \ 1765 NULL, \ 1766 }; \ 1767 return do_vcmp_scalar(s, a, fns[a->size]); \ 1768 } 1769 1770 DO_VCMP(VCMPEQ, vcmpeq) 1771 DO_VCMP(VCMPNE, vcmpne) 1772 DO_VCMP(VCMPCS, vcmpcs) 1773 DO_VCMP(VCMPHI, vcmphi) 1774 DO_VCMP(VCMPGE, vcmpge) 1775 DO_VCMP(VCMPLT, vcmplt) 1776 DO_VCMP(VCMPGT, vcmpgt) 1777 DO_VCMP(VCMPLE, vcmple) 1778 1779 #define DO_VCMP_FP(INSN, FN) \ 1780 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \ 1781 { \ 1782 static MVEGenCmpFn * const fns[] = { \ 1783 NULL, \ 1784 gen_helper_mve_##FN##h, \ 1785 gen_helper_mve_##FN##s, \ 1786 NULL, \ 1787 }; \ 1788 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1789 return false; \ 1790 } \ 1791 return do_vcmp(s, a, fns[a->size]); \ 1792 } \ 1793 static bool trans_##INSN##_scalar(DisasContext *s, \ 1794 arg_vcmp_scalar *a) \ 1795 { \ 1796 static MVEGenScalarCmpFn * const fns[] = { \ 1797 NULL, \ 1798 gen_helper_mve_##FN##_scalarh, \ 1799 gen_helper_mve_##FN##_scalars, \ 1800 NULL, \ 1801 }; \ 1802 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1803 return false; \ 1804 } \ 1805 return do_vcmp_scalar(s, a, fns[a->size]); \ 1806 } 1807 1808 DO_VCMP_FP(VCMPEQ_fp, vfcmpeq) 1809 DO_VCMP_FP(VCMPNE_fp, vfcmpne) 1810 DO_VCMP_FP(VCMPGE_fp, vfcmpge) 1811 DO_VCMP_FP(VCMPLT_fp, vfcmplt) 1812 DO_VCMP_FP(VCMPGT_fp, vfcmpgt) 1813 DO_VCMP_FP(VCMPLE_fp, vfcmple) 1814 1815 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn) 1816 { 1817 /* 1818 * MIN/MAX operations across a vector: compute the min or 1819 * max of the initial value in a general purpose register 1820 * and all the elements in the vector, and store it back 1821 * into the general purpose register. 1822 */ 1823 TCGv_ptr qm; 1824 TCGv_i32 rda; 1825 1826 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1827 !fn || a->rda == 13 || a->rda == 15) { 1828 /* Rda cases are UNPREDICTABLE */ 1829 return false; 1830 } 1831 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1832 return true; 1833 } 1834 1835 qm = mve_qreg_ptr(a->qm); 1836 rda = load_reg(s, a->rda); 1837 fn(rda, cpu_env, qm, rda); 1838 store_reg(s, a->rda, rda); 1839 tcg_temp_free_ptr(qm); 1840 mve_update_eci(s); 1841 return true; 1842 } 1843 1844 #define DO_VMAXV(INSN, FN) \ 1845 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ 1846 { \ 1847 static MVEGenVADDVFn * const fns[] = { \ 1848 gen_helper_mve_##FN##b, \ 1849 gen_helper_mve_##FN##h, \ 1850 gen_helper_mve_##FN##w, \ 1851 NULL, \ 1852 }; \ 1853 return do_vmaxv(s, a, fns[a->size]); \ 1854 } 1855 1856 DO_VMAXV(VMAXV_S, vmaxvs) 1857 DO_VMAXV(VMAXV_U, vmaxvu) 1858 DO_VMAXV(VMAXAV, vmaxav) 1859 DO_VMAXV(VMINV_S, vminvs) 1860 DO_VMAXV(VMINV_U, vminvu) 1861 DO_VMAXV(VMINAV, vminav) 1862 1863 #define DO_VMAXV_FP(INSN, FN) \ 1864 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ 1865 { \ 1866 static MVEGenVADDVFn * const fns[] = { \ 1867 NULL, \ 1868 gen_helper_mve_##FN##h, \ 1869 gen_helper_mve_##FN##s, \ 1870 NULL, \ 1871 }; \ 1872 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1873 return false; \ 1874 } \ 1875 return do_vmaxv(s, a, fns[a->size]); \ 1876 } 1877 1878 DO_VMAXV_FP(VMAXNMV, vmaxnmv) 1879 DO_VMAXV_FP(VMINNMV, vminnmv) 1880 DO_VMAXV_FP(VMAXNMAV, vmaxnmav) 1881 DO_VMAXV_FP(VMINNMAV, vminnmav) 1882 1883 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn) 1884 { 1885 /* Absolute difference accumulated across vector */ 1886 TCGv_ptr qn, qm; 1887 TCGv_i32 rda; 1888 1889 if (!dc_isar_feature(aa32_mve, s) || 1890 !mve_check_qreg_bank(s, a->qm | a->qn) || 1891 !fn || a->rda == 13 || a->rda == 15) { 1892 /* Rda cases are UNPREDICTABLE */ 1893 return false; 1894 } 1895 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1896 return true; 1897 } 1898 1899 qm = mve_qreg_ptr(a->qm); 1900 qn = mve_qreg_ptr(a->qn); 1901 rda = load_reg(s, a->rda); 1902 fn(rda, cpu_env, qn, qm, rda); 1903 store_reg(s, a->rda, rda); 1904 tcg_temp_free_ptr(qm); 1905 tcg_temp_free_ptr(qn); 1906 mve_update_eci(s); 1907 return true; 1908 } 1909 1910 #define DO_VABAV(INSN, FN) \ 1911 static bool trans_##INSN(DisasContext *s, arg_vabav *a) \ 1912 { \ 1913 static MVEGenVABAVFn * const fns[] = { \ 1914 gen_helper_mve_##FN##b, \ 1915 gen_helper_mve_##FN##h, \ 1916 gen_helper_mve_##FN##w, \ 1917 NULL, \ 1918 }; \ 1919 return do_vabav(s, a, fns[a->size]); \ 1920 } 1921 1922 DO_VABAV(VABAV_S, vabavs) 1923 DO_VABAV(VABAV_U, vabavu) 1924 1925 static bool trans_VMOV_to_2gp(DisasContext *s, arg_VMOV_to_2gp *a) 1926 { 1927 /* 1928 * VMOV two 32-bit vector lanes to two general-purpose registers. 1929 * This insn is not predicated but it is subject to beat-wise 1930 * execution if it is not in an IT block. For us this means 1931 * only that if PSR.ECI says we should not be executing the beat 1932 * corresponding to the lane of the vector register being accessed 1933 * then we should skip perfoming the move, and that we need to do 1934 * the usual check for bad ECI state and advance of ECI state. 1935 * (If PSR.ECI is non-zero then we cannot be in an IT block.) 1936 */ 1937 TCGv_i32 tmp; 1938 int vd; 1939 1940 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) || 1941 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15 || 1942 a->rt == a->rt2) { 1943 /* Rt/Rt2 cases are UNPREDICTABLE */ 1944 return false; 1945 } 1946 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1947 return true; 1948 } 1949 1950 /* Convert Qreg index to Dreg for read_neon_element32() etc */ 1951 vd = a->qd * 2; 1952 1953 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) { 1954 tmp = tcg_temp_new_i32(); 1955 read_neon_element32(tmp, vd, a->idx, MO_32); 1956 store_reg(s, a->rt, tmp); 1957 } 1958 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) { 1959 tmp = tcg_temp_new_i32(); 1960 read_neon_element32(tmp, vd + 1, a->idx, MO_32); 1961 store_reg(s, a->rt2, tmp); 1962 } 1963 1964 mve_update_and_store_eci(s); 1965 return true; 1966 } 1967 1968 static bool trans_VMOV_from_2gp(DisasContext *s, arg_VMOV_to_2gp *a) 1969 { 1970 /* 1971 * VMOV two general-purpose registers to two 32-bit vector lanes. 1972 * This insn is not predicated but it is subject to beat-wise 1973 * execution if it is not in an IT block. For us this means 1974 * only that if PSR.ECI says we should not be executing the beat 1975 * corresponding to the lane of the vector register being accessed 1976 * then we should skip perfoming the move, and that we need to do 1977 * the usual check for bad ECI state and advance of ECI state. 1978 * (If PSR.ECI is non-zero then we cannot be in an IT block.) 1979 */ 1980 TCGv_i32 tmp; 1981 int vd; 1982 1983 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) || 1984 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15) { 1985 /* Rt/Rt2 cases are UNPREDICTABLE */ 1986 return false; 1987 } 1988 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1989 return true; 1990 } 1991 1992 /* Convert Qreg idx to Dreg for read_neon_element32() etc */ 1993 vd = a->qd * 2; 1994 1995 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) { 1996 tmp = load_reg(s, a->rt); 1997 write_neon_element32(tmp, vd, a->idx, MO_32); 1998 tcg_temp_free_i32(tmp); 1999 } 2000 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) { 2001 tmp = load_reg(s, a->rt2); 2002 write_neon_element32(tmp, vd + 1, a->idx, MO_32); 2003 tcg_temp_free_i32(tmp); 2004 } 2005 2006 mve_update_and_store_eci(s); 2007 return true; 2008 } 2009