1 /* 2 * ARM translation: M-profile MVE instructions 3 * 4 * Copyright (c) 2021 Linaro, Ltd. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "tcg/tcg-op.h" 22 #include "tcg/tcg-op-gvec.h" 23 #include "exec/exec-all.h" 24 #include "exec/gen-icount.h" 25 #include "translate.h" 26 #include "translate-a32.h" 27 28 /* Include the generated decoder */ 29 #include "decode-mve.c.inc" 30 31 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 32 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 33 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr); 34 typedef void MVEGenDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64); 35 36 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */ 37 static inline long mve_qreg_offset(unsigned reg) 38 { 39 return offsetof(CPUARMState, vfp.zregs[reg].d[0]); 40 } 41 42 static TCGv_ptr mve_qreg_ptr(unsigned reg) 43 { 44 TCGv_ptr ret = tcg_temp_new_ptr(); 45 tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg)); 46 return ret; 47 } 48 49 static bool mve_check_qreg_bank(DisasContext *s, int qmask) 50 { 51 /* 52 * Check whether Qregs are in range. For v8.1M only Q0..Q7 53 * are supported, see VFPSmallRegisterBank(). 54 */ 55 return qmask < 8; 56 } 57 58 static bool mve_eci_check(DisasContext *s) 59 { 60 /* 61 * This is a beatwise insn: check that ECI is valid (not a 62 * reserved value) and note that we are handling it. 63 * Return true if OK, false if we generated an exception. 64 */ 65 s->eci_handled = true; 66 switch (s->eci) { 67 case ECI_NONE: 68 case ECI_A0: 69 case ECI_A0A1: 70 case ECI_A0A1A2: 71 case ECI_A0A1A2B0: 72 return true; 73 default: 74 /* Reserved value: INVSTATE UsageFault */ 75 gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(), 76 default_exception_el(s)); 77 return false; 78 } 79 } 80 81 static void mve_update_eci(DisasContext *s) 82 { 83 /* 84 * The helper function will always update the CPUState field, 85 * so we only need to update the DisasContext field. 86 */ 87 if (s->eci) { 88 s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE; 89 } 90 } 91 92 static bool mve_skip_first_beat(DisasContext *s) 93 { 94 /* Return true if PSR.ECI says we must skip the first beat of this insn */ 95 switch (s->eci) { 96 case ECI_NONE: 97 return false; 98 case ECI_A0: 99 case ECI_A0A1: 100 case ECI_A0A1A2: 101 case ECI_A0A1A2B0: 102 return true; 103 default: 104 g_assert_not_reached(); 105 } 106 } 107 108 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn) 109 { 110 TCGv_i32 addr; 111 uint32_t offset; 112 TCGv_ptr qreg; 113 114 if (!dc_isar_feature(aa32_mve, s) || 115 !mve_check_qreg_bank(s, a->qd) || 116 !fn) { 117 return false; 118 } 119 120 /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */ 121 if (a->rn == 15 || (a->rn == 13 && a->w)) { 122 return false; 123 } 124 125 if (!mve_eci_check(s) || !vfp_access_check(s)) { 126 return true; 127 } 128 129 offset = a->imm << a->size; 130 if (!a->a) { 131 offset = -offset; 132 } 133 addr = load_reg(s, a->rn); 134 if (a->p) { 135 tcg_gen_addi_i32(addr, addr, offset); 136 } 137 138 qreg = mve_qreg_ptr(a->qd); 139 fn(cpu_env, qreg, addr); 140 tcg_temp_free_ptr(qreg); 141 142 /* 143 * Writeback always happens after the last beat of the insn, 144 * regardless of predication 145 */ 146 if (a->w) { 147 if (!a->p) { 148 tcg_gen_addi_i32(addr, addr, offset); 149 } 150 store_reg(s, a->rn, addr); 151 } else { 152 tcg_temp_free_i32(addr); 153 } 154 mve_update_eci(s); 155 return true; 156 } 157 158 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a) 159 { 160 static MVEGenLdStFn * const ldstfns[4][2] = { 161 { gen_helper_mve_vstrb, gen_helper_mve_vldrb }, 162 { gen_helper_mve_vstrh, gen_helper_mve_vldrh }, 163 { gen_helper_mve_vstrw, gen_helper_mve_vldrw }, 164 { NULL, NULL } 165 }; 166 return do_ldst(s, a, ldstfns[a->size][a->l]); 167 } 168 169 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST) \ 170 static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \ 171 { \ 172 static MVEGenLdStFn * const ldstfns[2][2] = { \ 173 { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \ 174 { NULL, gen_helper_mve_##ULD }, \ 175 }; \ 176 return do_ldst(s, a, ldstfns[a->u][a->l]); \ 177 } 178 179 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h) 180 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w) 181 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w) 182 183 static bool trans_VDUP(DisasContext *s, arg_VDUP *a) 184 { 185 TCGv_ptr qd; 186 TCGv_i32 rt; 187 188 if (!dc_isar_feature(aa32_mve, s) || 189 !mve_check_qreg_bank(s, a->qd)) { 190 return false; 191 } 192 if (a->rt == 13 || a->rt == 15) { 193 /* UNPREDICTABLE; we choose to UNDEF */ 194 return false; 195 } 196 if (!mve_eci_check(s) || !vfp_access_check(s)) { 197 return true; 198 } 199 200 qd = mve_qreg_ptr(a->qd); 201 rt = load_reg(s, a->rt); 202 tcg_gen_dup_i32(a->size, rt, rt); 203 gen_helper_mve_vdup(cpu_env, qd, rt); 204 tcg_temp_free_ptr(qd); 205 tcg_temp_free_i32(rt); 206 mve_update_eci(s); 207 return true; 208 } 209 210 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn) 211 { 212 TCGv_ptr qd, qm; 213 214 if (!dc_isar_feature(aa32_mve, s) || 215 !mve_check_qreg_bank(s, a->qd | a->qm) || 216 !fn) { 217 return false; 218 } 219 220 if (!mve_eci_check(s) || !vfp_access_check(s)) { 221 return true; 222 } 223 224 qd = mve_qreg_ptr(a->qd); 225 qm = mve_qreg_ptr(a->qm); 226 fn(cpu_env, qd, qm); 227 tcg_temp_free_ptr(qd); 228 tcg_temp_free_ptr(qm); 229 mve_update_eci(s); 230 return true; 231 } 232 233 #define DO_1OP(INSN, FN) \ 234 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 235 { \ 236 static MVEGenOneOpFn * const fns[] = { \ 237 gen_helper_mve_##FN##b, \ 238 gen_helper_mve_##FN##h, \ 239 gen_helper_mve_##FN##w, \ 240 NULL, \ 241 }; \ 242 return do_1op(s, a, fns[a->size]); \ 243 } 244 245 DO_1OP(VCLZ, vclz) 246 DO_1OP(VCLS, vcls) 247 DO_1OP(VABS, vabs) 248 DO_1OP(VNEG, vneg) 249 250 static bool trans_VREV16(DisasContext *s, arg_1op *a) 251 { 252 static MVEGenOneOpFn * const fns[] = { 253 gen_helper_mve_vrev16b, 254 NULL, 255 NULL, 256 NULL, 257 }; 258 return do_1op(s, a, fns[a->size]); 259 } 260 261 static bool trans_VREV32(DisasContext *s, arg_1op *a) 262 { 263 static MVEGenOneOpFn * const fns[] = { 264 gen_helper_mve_vrev32b, 265 gen_helper_mve_vrev32h, 266 NULL, 267 NULL, 268 }; 269 return do_1op(s, a, fns[a->size]); 270 } 271 272 static bool trans_VREV64(DisasContext *s, arg_1op *a) 273 { 274 static MVEGenOneOpFn * const fns[] = { 275 gen_helper_mve_vrev64b, 276 gen_helper_mve_vrev64h, 277 gen_helper_mve_vrev64w, 278 NULL, 279 }; 280 return do_1op(s, a, fns[a->size]); 281 } 282 283 static bool trans_VMVN(DisasContext *s, arg_1op *a) 284 { 285 return do_1op(s, a, gen_helper_mve_vmvn); 286 } 287 288 static bool trans_VABS_fp(DisasContext *s, arg_1op *a) 289 { 290 static MVEGenOneOpFn * const fns[] = { 291 NULL, 292 gen_helper_mve_vfabsh, 293 gen_helper_mve_vfabss, 294 NULL, 295 }; 296 if (!dc_isar_feature(aa32_mve_fp, s)) { 297 return false; 298 } 299 return do_1op(s, a, fns[a->size]); 300 } 301 302 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a) 303 { 304 static MVEGenOneOpFn * const fns[] = { 305 NULL, 306 gen_helper_mve_vfnegh, 307 gen_helper_mve_vfnegs, 308 NULL, 309 }; 310 if (!dc_isar_feature(aa32_mve_fp, s)) { 311 return false; 312 } 313 return do_1op(s, a, fns[a->size]); 314 } 315 316 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn) 317 { 318 TCGv_ptr qd, qn, qm; 319 320 if (!dc_isar_feature(aa32_mve, s) || 321 !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) || 322 !fn) { 323 return false; 324 } 325 if (!mve_eci_check(s) || !vfp_access_check(s)) { 326 return true; 327 } 328 329 qd = mve_qreg_ptr(a->qd); 330 qn = mve_qreg_ptr(a->qn); 331 qm = mve_qreg_ptr(a->qm); 332 fn(cpu_env, qd, qn, qm); 333 tcg_temp_free_ptr(qd); 334 tcg_temp_free_ptr(qn); 335 tcg_temp_free_ptr(qm); 336 mve_update_eci(s); 337 return true; 338 } 339 340 #define DO_LOGIC(INSN, HELPER) \ 341 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 342 { \ 343 return do_2op(s, a, HELPER); \ 344 } 345 346 DO_LOGIC(VAND, gen_helper_mve_vand) 347 DO_LOGIC(VBIC, gen_helper_mve_vbic) 348 DO_LOGIC(VORR, gen_helper_mve_vorr) 349 DO_LOGIC(VORN, gen_helper_mve_vorn) 350 DO_LOGIC(VEOR, gen_helper_mve_veor) 351 352 #define DO_2OP(INSN, FN) \ 353 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 354 { \ 355 static MVEGenTwoOpFn * const fns[] = { \ 356 gen_helper_mve_##FN##b, \ 357 gen_helper_mve_##FN##h, \ 358 gen_helper_mve_##FN##w, \ 359 NULL, \ 360 }; \ 361 return do_2op(s, a, fns[a->size]); \ 362 } 363 364 DO_2OP(VADD, vadd) 365 DO_2OP(VSUB, vsub) 366 DO_2OP(VMUL, vmul) 367 DO_2OP(VMULH_S, vmulhs) 368 DO_2OP(VMULH_U, vmulhu) 369 DO_2OP(VRMULH_S, vrmulhs) 370 DO_2OP(VRMULH_U, vrmulhu) 371 DO_2OP(VMAX_S, vmaxs) 372 DO_2OP(VMAX_U, vmaxu) 373 DO_2OP(VMIN_S, vmins) 374 DO_2OP(VMIN_U, vminu) 375 DO_2OP(VABD_S, vabds) 376 DO_2OP(VABD_U, vabdu) 377 DO_2OP(VHADD_S, vhadds) 378 DO_2OP(VHADD_U, vhaddu) 379 DO_2OP(VHSUB_S, vhsubs) 380 DO_2OP(VHSUB_U, vhsubu) 381 DO_2OP(VMULL_BS, vmullbs) 382 DO_2OP(VMULL_BU, vmullbu) 383 DO_2OP(VMULL_TS, vmullts) 384 DO_2OP(VMULL_TU, vmulltu) 385 386 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a, 387 MVEGenDualAccOpFn *fn) 388 { 389 TCGv_ptr qn, qm; 390 TCGv_i64 rda; 391 TCGv_i32 rdalo, rdahi; 392 393 if (!dc_isar_feature(aa32_mve, s) || 394 !mve_check_qreg_bank(s, a->qn | a->qm) || 395 !fn) { 396 return false; 397 } 398 /* 399 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 400 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 401 */ 402 if (a->rdahi == 13 || a->rdahi == 15) { 403 return false; 404 } 405 if (!mve_eci_check(s) || !vfp_access_check(s)) { 406 return true; 407 } 408 409 qn = mve_qreg_ptr(a->qn); 410 qm = mve_qreg_ptr(a->qm); 411 412 /* 413 * This insn is subject to beat-wise execution. Partial execution 414 * of an A=0 (no-accumulate) insn which does not execute the first 415 * beat must start with the current rda value, not 0. 416 */ 417 if (a->a || mve_skip_first_beat(s)) { 418 rda = tcg_temp_new_i64(); 419 rdalo = load_reg(s, a->rdalo); 420 rdahi = load_reg(s, a->rdahi); 421 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 422 tcg_temp_free_i32(rdalo); 423 tcg_temp_free_i32(rdahi); 424 } else { 425 rda = tcg_const_i64(0); 426 } 427 428 fn(rda, cpu_env, qn, qm, rda); 429 tcg_temp_free_ptr(qn); 430 tcg_temp_free_ptr(qm); 431 432 rdalo = tcg_temp_new_i32(); 433 rdahi = tcg_temp_new_i32(); 434 tcg_gen_extrl_i64_i32(rdalo, rda); 435 tcg_gen_extrh_i64_i32(rdahi, rda); 436 store_reg(s, a->rdalo, rdalo); 437 store_reg(s, a->rdahi, rdahi); 438 tcg_temp_free_i64(rda); 439 mve_update_eci(s); 440 return true; 441 } 442 443 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a) 444 { 445 static MVEGenDualAccOpFn * const fns[4][2] = { 446 { NULL, NULL }, 447 { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh }, 448 { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw }, 449 { NULL, NULL }, 450 }; 451 return do_long_dual_acc(s, a, fns[a->size][a->x]); 452 } 453 454 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a) 455 { 456 static MVEGenDualAccOpFn * const fns[4][2] = { 457 { NULL, NULL }, 458 { gen_helper_mve_vmlaldavuh, NULL }, 459 { gen_helper_mve_vmlaldavuw, NULL }, 460 { NULL, NULL }, 461 }; 462 return do_long_dual_acc(s, a, fns[a->size][a->x]); 463 } 464