1 /* 2 * AArch64 SVE translation 3 * 4 * Copyright (c) 2018 Linaro, Ltd 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "translate.h" 22 #include "translate-a64.h" 23 #include "fpu/softfloat.h" 24 25 26 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t, 27 TCGv_i64, uint32_t, uint32_t); 28 29 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr, 30 TCGv_ptr, TCGv_i32); 31 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr, 32 TCGv_ptr, TCGv_ptr, TCGv_i32); 33 34 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32); 35 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr, 36 TCGv_ptr, TCGv_i64, TCGv_i32); 37 38 /* 39 * Helpers for extracting complex instruction fields. 40 */ 41 42 /* See e.g. ASR (immediate, predicated). 43 * Returns -1 for unallocated encoding; diagnose later. 44 */ 45 static int tszimm_esz(DisasContext *s, int x) 46 { 47 x >>= 3; /* discard imm3 */ 48 return 31 - clz32(x); 49 } 50 51 static int tszimm_shr(DisasContext *s, int x) 52 { 53 /* 54 * We won't use the tszimm_shr() value if tszimm_esz() returns -1 (the 55 * trans function will check for esz < 0), so we can return any 56 * value we like from here in that case as long as we avoid UB. 57 */ 58 int esz = tszimm_esz(s, x); 59 if (esz < 0) { 60 return esz; 61 } 62 return (16 << esz) - x; 63 } 64 65 /* See e.g. LSL (immediate, predicated). */ 66 static int tszimm_shl(DisasContext *s, int x) 67 { 68 /* As with tszimm_shr(), value will be unused if esz < 0 */ 69 int esz = tszimm_esz(s, x); 70 if (esz < 0) { 71 return esz; 72 } 73 return x - (8 << esz); 74 } 75 76 /* The SH bit is in bit 8. Extract the low 8 and shift. */ 77 static inline int expand_imm_sh8s(DisasContext *s, int x) 78 { 79 return (int8_t)x << (x & 0x100 ? 8 : 0); 80 } 81 82 static inline int expand_imm_sh8u(DisasContext *s, int x) 83 { 84 return (uint8_t)x << (x & 0x100 ? 8 : 0); 85 } 86 87 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype) 88 * with unsigned data. C.f. SVE Memory Contiguous Load Group. 89 */ 90 static inline int msz_dtype(DisasContext *s, int msz) 91 { 92 static const uint8_t dtype[5] = { 0, 5, 10, 15, 18 }; 93 return dtype[msz]; 94 } 95 96 /* 97 * Include the generated decoder. 98 */ 99 100 #include "decode-sve.c.inc" 101 102 /* 103 * Implement all of the translator functions referenced by the decoder. 104 */ 105 106 /* Invoke an out-of-line helper on 2 Zregs. */ 107 static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn, 108 int rd, int rn, int data) 109 { 110 if (fn == NULL) { 111 return false; 112 } 113 if (sve_access_check(s)) { 114 unsigned vsz = vec_full_reg_size(s); 115 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 116 vec_full_reg_offset(s, rn), 117 vsz, vsz, data, fn); 118 } 119 return true; 120 } 121 122 static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, 123 int rd, int rn, int data, 124 ARMFPStatusFlavour flavour) 125 { 126 if (fn == NULL) { 127 return false; 128 } 129 if (sve_access_check(s)) { 130 unsigned vsz = vec_full_reg_size(s); 131 TCGv_ptr status = fpstatus_ptr(flavour); 132 133 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 134 vec_full_reg_offset(s, rn), 135 status, vsz, vsz, data, fn); 136 } 137 return true; 138 } 139 140 static bool gen_gvec_fpst_ah_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, 141 arg_rr_esz *a, int data) 142 { 143 return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data, 144 select_ah_fpst(s, a->esz)); 145 } 146 147 /* Invoke an out-of-line helper on 3 Zregs. */ 148 static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn, 149 int rd, int rn, int rm, int data) 150 { 151 if (fn == NULL) { 152 return false; 153 } 154 if (sve_access_check(s)) { 155 unsigned vsz = vec_full_reg_size(s); 156 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 157 vec_full_reg_offset(s, rn), 158 vec_full_reg_offset(s, rm), 159 vsz, vsz, data, fn); 160 } 161 return true; 162 } 163 164 static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn, 165 arg_rrr_esz *a, int data) 166 { 167 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data); 168 } 169 170 /* Invoke an out-of-line helper on 3 Zregs, plus float_status. */ 171 static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 172 int rd, int rn, int rm, 173 int data, ARMFPStatusFlavour flavour) 174 { 175 if (fn == NULL) { 176 return false; 177 } 178 if (sve_access_check(s)) { 179 unsigned vsz = vec_full_reg_size(s); 180 TCGv_ptr status = fpstatus_ptr(flavour); 181 182 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 183 vec_full_reg_offset(s, rn), 184 vec_full_reg_offset(s, rm), 185 status, vsz, vsz, data, fn); 186 } 187 return true; 188 } 189 190 static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 191 arg_rrr_esz *a, int data) 192 { 193 return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, 194 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 195 } 196 197 static bool gen_gvec_fpst_ah_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 198 arg_rrr_esz *a, int data) 199 { 200 return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, 201 select_ah_fpst(s, a->esz)); 202 } 203 204 /* Invoke an out-of-line helper on 4 Zregs. */ 205 static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, 206 int rd, int rn, int rm, int ra, int data) 207 { 208 if (fn == NULL) { 209 return false; 210 } 211 if (sve_access_check(s)) { 212 unsigned vsz = vec_full_reg_size(s); 213 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 214 vec_full_reg_offset(s, rn), 215 vec_full_reg_offset(s, rm), 216 vec_full_reg_offset(s, ra), 217 vsz, vsz, data, fn); 218 } 219 return true; 220 } 221 222 static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, 223 arg_rrrr_esz *a, int data) 224 { 225 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data); 226 } 227 228 static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn, 229 arg_rrxr_esz *a) 230 { 231 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index); 232 } 233 234 /* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */ 235 static bool gen_gvec_ptr_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 236 int rd, int rn, int rm, int ra, 237 int data, TCGv_ptr ptr) 238 { 239 if (fn == NULL) { 240 return false; 241 } 242 if (sve_access_check(s)) { 243 unsigned vsz = vec_full_reg_size(s); 244 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 245 vec_full_reg_offset(s, rn), 246 vec_full_reg_offset(s, rm), 247 vec_full_reg_offset(s, ra), 248 ptr, vsz, vsz, data, fn); 249 } 250 return true; 251 } 252 253 static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 254 int rd, int rn, int rm, int ra, 255 int data, ARMFPStatusFlavour flavour) 256 { 257 TCGv_ptr status = fpstatus_ptr(flavour); 258 bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status); 259 return ret; 260 } 261 262 static bool gen_gvec_env_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 263 int rd, int rn, int rm, int ra, 264 int data) 265 { 266 return gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, tcg_env); 267 } 268 269 static bool gen_gvec_env_arg_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 270 arg_rrrr_esz *a, int data) 271 { 272 return gen_gvec_env_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data); 273 } 274 275 static bool gen_gvec_env_arg_zzxz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 276 arg_rrxr_esz *a) 277 { 278 return gen_gvec_env_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index); 279 } 280 281 /* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */ 282 static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn, 283 int rd, int rn, int rm, int ra, int pg, 284 int data, ARMFPStatusFlavour flavour) 285 { 286 if (fn == NULL) { 287 return false; 288 } 289 if (sve_access_check(s)) { 290 unsigned vsz = vec_full_reg_size(s); 291 TCGv_ptr status = fpstatus_ptr(flavour); 292 293 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, rd), 294 vec_full_reg_offset(s, rn), 295 vec_full_reg_offset(s, rm), 296 vec_full_reg_offset(s, ra), 297 pred_full_reg_offset(s, pg), 298 status, vsz, vsz, data, fn); 299 } 300 return true; 301 } 302 303 /* Invoke an out-of-line helper on 2 Zregs and a predicate. */ 304 static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn, 305 int rd, int rn, int pg, int data) 306 { 307 if (fn == NULL) { 308 return false; 309 } 310 if (sve_access_check(s)) { 311 unsigned vsz = vec_full_reg_size(s); 312 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 313 vec_full_reg_offset(s, rn), 314 pred_full_reg_offset(s, pg), 315 vsz, vsz, data, fn); 316 } 317 return true; 318 } 319 320 static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn, 321 arg_rpr_esz *a, int data) 322 { 323 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data); 324 } 325 326 static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn, 327 arg_rpri_esz *a) 328 { 329 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm); 330 } 331 332 static bool gen_gvec_fpst_zzp(DisasContext *s, gen_helper_gvec_3_ptr *fn, 333 int rd, int rn, int pg, int data, 334 ARMFPStatusFlavour flavour) 335 { 336 if (fn == NULL) { 337 return false; 338 } 339 if (sve_access_check(s)) { 340 unsigned vsz = vec_full_reg_size(s); 341 TCGv_ptr status = fpstatus_ptr(flavour); 342 343 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 344 vec_full_reg_offset(s, rn), 345 pred_full_reg_offset(s, pg), 346 status, vsz, vsz, data, fn); 347 } 348 return true; 349 } 350 351 static bool gen_gvec_fpst_arg_zpz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 352 arg_rpr_esz *a, int data, 353 ARMFPStatusFlavour flavour) 354 { 355 return gen_gvec_fpst_zzp(s, fn, a->rd, a->rn, a->pg, data, flavour); 356 } 357 358 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */ 359 static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn, 360 int rd, int rn, int rm, int pg, int data) 361 { 362 if (fn == NULL) { 363 return false; 364 } 365 if (sve_access_check(s)) { 366 unsigned vsz = vec_full_reg_size(s); 367 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 368 vec_full_reg_offset(s, rn), 369 vec_full_reg_offset(s, rm), 370 pred_full_reg_offset(s, pg), 371 vsz, vsz, data, fn); 372 } 373 return true; 374 } 375 376 static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn, 377 arg_rprr_esz *a, int data) 378 { 379 return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data); 380 } 381 382 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */ 383 static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn, 384 int rd, int rn, int rm, int pg, int data, 385 ARMFPStatusFlavour flavour) 386 { 387 if (fn == NULL) { 388 return false; 389 } 390 if (sve_access_check(s)) { 391 unsigned vsz = vec_full_reg_size(s); 392 TCGv_ptr status = fpstatus_ptr(flavour); 393 394 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 395 vec_full_reg_offset(s, rn), 396 vec_full_reg_offset(s, rm), 397 pred_full_reg_offset(s, pg), 398 status, vsz, vsz, data, fn); 399 } 400 return true; 401 } 402 403 static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 404 arg_rprr_esz *a) 405 { 406 return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0, 407 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 408 } 409 410 /* Invoke a vector expander on two Zregs and an immediate. */ 411 static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn, 412 int esz, int rd, int rn, uint64_t imm) 413 { 414 if (gvec_fn == NULL) { 415 return false; 416 } 417 if (sve_access_check(s)) { 418 unsigned vsz = vec_full_reg_size(s); 419 gvec_fn(esz, vec_full_reg_offset(s, rd), 420 vec_full_reg_offset(s, rn), imm, vsz, vsz); 421 } 422 return true; 423 } 424 425 static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn, 426 arg_rri_esz *a) 427 { 428 if (a->esz < 0) { 429 /* Invalid tsz encoding -- see tszimm_esz. */ 430 return false; 431 } 432 return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm); 433 } 434 435 /* Invoke a vector expander on three Zregs. */ 436 static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn, 437 int esz, int rd, int rn, int rm) 438 { 439 if (gvec_fn == NULL) { 440 return false; 441 } 442 if (sve_access_check(s)) { 443 unsigned vsz = vec_full_reg_size(s); 444 gvec_fn(esz, vec_full_reg_offset(s, rd), 445 vec_full_reg_offset(s, rn), 446 vec_full_reg_offset(s, rm), vsz, vsz); 447 } 448 return true; 449 } 450 451 static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn, 452 arg_rrr_esz *a) 453 { 454 return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm); 455 } 456 457 /* Invoke a vector expander on four Zregs. */ 458 static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn, 459 arg_rrrr_esz *a) 460 { 461 if (gvec_fn == NULL) { 462 return false; 463 } 464 if (sve_access_check(s)) { 465 unsigned vsz = vec_full_reg_size(s); 466 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 467 vec_full_reg_offset(s, a->rn), 468 vec_full_reg_offset(s, a->rm), 469 vec_full_reg_offset(s, a->ra), vsz, vsz); 470 } 471 return true; 472 } 473 474 /* Invoke a vector move on two Zregs. */ 475 static bool do_mov_z(DisasContext *s, int rd, int rn) 476 { 477 if (sve_access_check(s)) { 478 unsigned vsz = vec_full_reg_size(s); 479 tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd), 480 vec_full_reg_offset(s, rn), vsz, vsz); 481 } 482 return true; 483 } 484 485 /* Initialize a Zreg with replications of a 64-bit immediate. */ 486 static void do_dupi_z(DisasContext *s, int rd, uint64_t word) 487 { 488 unsigned vsz = vec_full_reg_size(s); 489 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word); 490 } 491 492 /* Invoke a vector expander on three Pregs. */ 493 static bool gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn, 494 int rd, int rn, int rm) 495 { 496 if (sve_access_check(s)) { 497 unsigned psz = pred_gvec_reg_size(s); 498 gvec_fn(MO_64, pred_full_reg_offset(s, rd), 499 pred_full_reg_offset(s, rn), 500 pred_full_reg_offset(s, rm), psz, psz); 501 } 502 return true; 503 } 504 505 /* Invoke a vector move on two Pregs. */ 506 static bool do_mov_p(DisasContext *s, int rd, int rn) 507 { 508 if (sve_access_check(s)) { 509 unsigned psz = pred_gvec_reg_size(s); 510 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd), 511 pred_full_reg_offset(s, rn), psz, psz); 512 } 513 return true; 514 } 515 516 /* Set the cpu flags as per a return from an SVE helper. */ 517 static void do_pred_flags(TCGv_i32 t) 518 { 519 tcg_gen_mov_i32(cpu_NF, t); 520 tcg_gen_andi_i32(cpu_ZF, t, 2); 521 tcg_gen_andi_i32(cpu_CF, t, 1); 522 tcg_gen_movi_i32(cpu_VF, 0); 523 } 524 525 /* Subroutines computing the ARM PredTest psuedofunction. */ 526 static void do_predtest1(TCGv_i64 d, TCGv_i64 g) 527 { 528 TCGv_i32 t = tcg_temp_new_i32(); 529 530 gen_helper_sve_predtest1(t, d, g); 531 do_pred_flags(t); 532 } 533 534 static void do_predtest(DisasContext *s, int dofs, int gofs, int words) 535 { 536 TCGv_ptr dptr = tcg_temp_new_ptr(); 537 TCGv_ptr gptr = tcg_temp_new_ptr(); 538 TCGv_i32 t = tcg_temp_new_i32(); 539 540 tcg_gen_addi_ptr(dptr, tcg_env, dofs); 541 tcg_gen_addi_ptr(gptr, tcg_env, gofs); 542 543 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words)); 544 545 do_pred_flags(t); 546 } 547 548 /* For each element size, the bits within a predicate word that are active. */ 549 const uint64_t pred_esz_masks[5] = { 550 0xffffffffffffffffull, 0x5555555555555555ull, 551 0x1111111111111111ull, 0x0101010101010101ull, 552 0x0001000100010001ull, 553 }; 554 555 static bool trans_INVALID(DisasContext *s, arg_INVALID *a) 556 { 557 unallocated_encoding(s); 558 return true; 559 } 560 561 /* 562 *** SVE Logical - Unpredicated Group 563 */ 564 565 TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a) 566 TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a) 567 TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a) 568 TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a) 569 570 static bool trans_XAR(DisasContext *s, arg_rrri_esz *a) 571 { 572 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { 573 return false; 574 } 575 if (sve_access_check(s)) { 576 unsigned vsz = vec_full_reg_size(s); 577 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd), 578 vec_full_reg_offset(s, a->rn), 579 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz); 580 } 581 return true; 582 } 583 584 TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_gvec_eor3, a) 585 TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_gvec_bcax, a) 586 587 static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 588 uint32_t a, uint32_t oprsz, uint32_t maxsz) 589 { 590 /* BSL differs from the generic bitsel in argument ordering. */ 591 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz); 592 } 593 594 TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a) 595 596 static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 597 { 598 tcg_gen_andc_i64(n, k, n); 599 tcg_gen_andc_i64(m, m, k); 600 tcg_gen_or_i64(d, n, m); 601 } 602 603 static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 604 TCGv_vec m, TCGv_vec k) 605 { 606 tcg_gen_not_vec(vece, n, n); 607 tcg_gen_bitsel_vec(vece, d, k, n, m); 608 } 609 610 static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 611 uint32_t a, uint32_t oprsz, uint32_t maxsz) 612 { 613 static const GVecGen4 op = { 614 .fni8 = gen_bsl1n_i64, 615 .fniv = gen_bsl1n_vec, 616 .fno = gen_helper_sve2_bsl1n, 617 .vece = MO_64, 618 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 619 }; 620 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 621 } 622 623 TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a) 624 625 static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 626 { 627 /* 628 * Z[dn] = (n & k) | (~m & ~k) 629 * = | ~(m | k) 630 */ 631 tcg_gen_and_i64(n, n, k); 632 if (tcg_op_supported(INDEX_op_orc, TCG_TYPE_I64, 0)) { 633 tcg_gen_or_i64(m, m, k); 634 tcg_gen_orc_i64(d, n, m); 635 } else { 636 tcg_gen_nor_i64(m, m, k); 637 tcg_gen_or_i64(d, n, m); 638 } 639 } 640 641 static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 642 TCGv_vec m, TCGv_vec k) 643 { 644 tcg_gen_not_vec(vece, m, m); 645 tcg_gen_bitsel_vec(vece, d, k, n, m); 646 } 647 648 static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 649 uint32_t a, uint32_t oprsz, uint32_t maxsz) 650 { 651 static const GVecGen4 op = { 652 .fni8 = gen_bsl2n_i64, 653 .fniv = gen_bsl2n_vec, 654 .fno = gen_helper_sve2_bsl2n, 655 .vece = MO_64, 656 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 657 }; 658 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 659 } 660 661 TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a) 662 663 static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 664 { 665 tcg_gen_and_i64(n, n, k); 666 tcg_gen_andc_i64(m, m, k); 667 tcg_gen_nor_i64(d, n, m); 668 } 669 670 static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 671 TCGv_vec m, TCGv_vec k) 672 { 673 tcg_gen_bitsel_vec(vece, d, k, n, m); 674 tcg_gen_not_vec(vece, d, d); 675 } 676 677 static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 678 uint32_t a, uint32_t oprsz, uint32_t maxsz) 679 { 680 static const GVecGen4 op = { 681 .fni8 = gen_nbsl_i64, 682 .fniv = gen_nbsl_vec, 683 .fno = gen_helper_sve2_nbsl, 684 .vece = MO_64, 685 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 686 }; 687 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 688 } 689 690 TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a) 691 692 /* 693 *** SVE Integer Arithmetic - Unpredicated Group 694 */ 695 696 TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a) 697 TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a) 698 TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a) 699 TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a) 700 TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a) 701 TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a) 702 703 /* 704 *** SVE Integer Arithmetic - Binary Predicated Group 705 */ 706 707 /* Select active elememnts from Zn and inactive elements from Zm, 708 * storing the result in Zd. 709 */ 710 static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz) 711 { 712 static gen_helper_gvec_4 * const fns[4] = { 713 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h, 714 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d 715 }; 716 return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0); 717 } 718 719 #define DO_ZPZZ(NAME, FEAT, name) \ 720 static gen_helper_gvec_4 * const name##_zpzz_fns[4] = { \ 721 gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h, \ 722 gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d, \ 723 }; \ 724 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz, \ 725 name##_zpzz_fns[a->esz], a, 0) 726 727 DO_ZPZZ(AND_zpzz, aa64_sve, sve_and) 728 DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor) 729 DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr) 730 DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic) 731 732 DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add) 733 DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub) 734 735 DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax) 736 DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax) 737 DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin) 738 DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin) 739 DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd) 740 DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd) 741 742 DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul) 743 DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh) 744 DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh) 745 746 DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr) 747 DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr) 748 DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl) 749 750 static gen_helper_gvec_4 * const sdiv_fns[4] = { 751 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d 752 }; 753 TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0) 754 755 static gen_helper_gvec_4 * const udiv_fns[4] = { 756 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d 757 }; 758 TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0) 759 760 TRANS_FEAT(SEL_zpzz, aa64_sve, do_sel_z, a->rd, a->rn, a->rm, a->pg, a->esz) 761 762 /* 763 *** SVE Integer Arithmetic - Unary Predicated Group 764 */ 765 766 #define DO_ZPZ(NAME, FEAT, name) \ 767 static gen_helper_gvec_3 * const name##_fns[4] = { \ 768 gen_helper_##name##_b, gen_helper_##name##_h, \ 769 gen_helper_##name##_s, gen_helper_##name##_d, \ 770 }; \ 771 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0) 772 773 DO_ZPZ(CLS, aa64_sve, sve_cls) 774 DO_ZPZ(CLZ, aa64_sve, sve_clz) 775 DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz) 776 DO_ZPZ(CNOT, aa64_sve, sve_cnot) 777 DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz) 778 DO_ZPZ(ABS, aa64_sve, sve_abs) 779 DO_ZPZ(NEG, aa64_sve, sve_neg) 780 DO_ZPZ(RBIT, aa64_sve, sve_rbit) 781 DO_ZPZ(ORQV, aa64_sme2p1_or_sve2p1, sve2p1_orqv) 782 DO_ZPZ(EORQV, aa64_sme2p1_or_sve2p1, sve2p1_eorqv) 783 DO_ZPZ(ANDQV, aa64_sme2p1_or_sve2p1, sve2p1_andqv) 784 785 static gen_helper_gvec_3 * const fabs_fns[4] = { 786 NULL, gen_helper_sve_fabs_h, 787 gen_helper_sve_fabs_s, gen_helper_sve_fabs_d, 788 }; 789 static gen_helper_gvec_3 * const fabs_ah_fns[4] = { 790 NULL, gen_helper_sve_ah_fabs_h, 791 gen_helper_sve_ah_fabs_s, gen_helper_sve_ah_fabs_d, 792 }; 793 TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, 794 s->fpcr_ah ? fabs_ah_fns[a->esz] : fabs_fns[a->esz], a, 0) 795 796 static gen_helper_gvec_3 * const fneg_fns[4] = { 797 NULL, gen_helper_sve_fneg_h, 798 gen_helper_sve_fneg_s, gen_helper_sve_fneg_d, 799 }; 800 static gen_helper_gvec_3 * const fneg_ah_fns[4] = { 801 NULL, gen_helper_sve_ah_fneg_h, 802 gen_helper_sve_ah_fneg_s, gen_helper_sve_ah_fneg_d, 803 }; 804 TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, 805 s->fpcr_ah ? fneg_ah_fns[a->esz] : fneg_fns[a->esz], a, 0) 806 807 static gen_helper_gvec_3 * const sxtb_fns[4] = { 808 NULL, gen_helper_sve_sxtb_h, 809 gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d, 810 }; 811 TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0) 812 813 static gen_helper_gvec_3 * const uxtb_fns[4] = { 814 NULL, gen_helper_sve_uxtb_h, 815 gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d, 816 }; 817 TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0) 818 819 static gen_helper_gvec_3 * const sxth_fns[4] = { 820 NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d 821 }; 822 TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0) 823 824 static gen_helper_gvec_3 * const uxth_fns[4] = { 825 NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d 826 }; 827 TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0) 828 829 TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz, 830 a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0) 831 TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz, 832 a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0) 833 834 static gen_helper_gvec_3 * const addqv_fns[4] = { 835 gen_helper_sve2p1_addqv_b, gen_helper_sve2p1_addqv_h, 836 gen_helper_sve2p1_addqv_s, gen_helper_sve2p1_addqv_d, 837 }; 838 TRANS_FEAT(ADDQV, aa64_sme2p1_or_sve2p1, 839 gen_gvec_ool_arg_zpz, addqv_fns[a->esz], a, 0) 840 841 static gen_helper_gvec_3 * const smaxqv_fns[4] = { 842 gen_helper_sve2p1_smaxqv_b, gen_helper_sve2p1_smaxqv_h, 843 gen_helper_sve2p1_smaxqv_s, gen_helper_sve2p1_smaxqv_d, 844 }; 845 TRANS_FEAT(SMAXQV, aa64_sme2p1_or_sve2p1, 846 gen_gvec_ool_arg_zpz, smaxqv_fns[a->esz], a, 0) 847 848 static gen_helper_gvec_3 * const sminqv_fns[4] = { 849 gen_helper_sve2p1_sminqv_b, gen_helper_sve2p1_sminqv_h, 850 gen_helper_sve2p1_sminqv_s, gen_helper_sve2p1_sminqv_d, 851 }; 852 TRANS_FEAT(SMINQV, aa64_sme2p1_or_sve2p1, 853 gen_gvec_ool_arg_zpz, sminqv_fns[a->esz], a, 0) 854 855 static gen_helper_gvec_3 * const umaxqv_fns[4] = { 856 gen_helper_sve2p1_umaxqv_b, gen_helper_sve2p1_umaxqv_h, 857 gen_helper_sve2p1_umaxqv_s, gen_helper_sve2p1_umaxqv_d, 858 }; 859 TRANS_FEAT(UMAXQV, aa64_sme2p1_or_sve2p1, 860 gen_gvec_ool_arg_zpz, umaxqv_fns[a->esz], a, 0) 861 862 static gen_helper_gvec_3 * const uminqv_fns[4] = { 863 gen_helper_sve2p1_uminqv_b, gen_helper_sve2p1_uminqv_h, 864 gen_helper_sve2p1_uminqv_s, gen_helper_sve2p1_uminqv_d, 865 }; 866 TRANS_FEAT(UMINQV, aa64_sme2p1_or_sve2p1, 867 gen_gvec_ool_arg_zpz, uminqv_fns[a->esz], a, 0) 868 869 /* 870 *** SVE Integer Reduction Group 871 */ 872 873 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32); 874 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a, 875 gen_helper_gvec_reduc *fn) 876 { 877 unsigned vsz = vec_full_reg_size(s); 878 TCGv_ptr t_zn, t_pg; 879 TCGv_i32 desc; 880 TCGv_i64 temp; 881 882 if (fn == NULL) { 883 return false; 884 } 885 if (!sve_access_check(s)) { 886 return true; 887 } 888 889 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 890 temp = tcg_temp_new_i64(); 891 t_zn = tcg_temp_new_ptr(); 892 t_pg = tcg_temp_new_ptr(); 893 894 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); 895 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); 896 fn(temp, t_zn, t_pg, desc); 897 898 write_fp_dreg(s, a->rd, temp); 899 return true; 900 } 901 902 #define DO_VPZ(NAME, name) \ 903 static gen_helper_gvec_reduc * const name##_fns[4] = { \ 904 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \ 905 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 906 }; \ 907 TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz]) 908 909 DO_VPZ(ORV, orv) 910 DO_VPZ(ANDV, andv) 911 DO_VPZ(EORV, eorv) 912 913 DO_VPZ(UADDV, uaddv) 914 DO_VPZ(SMAXV, smaxv) 915 DO_VPZ(UMAXV, umaxv) 916 DO_VPZ(SMINV, sminv) 917 DO_VPZ(UMINV, uminv) 918 919 static gen_helper_gvec_reduc * const saddv_fns[4] = { 920 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h, 921 gen_helper_sve_saddv_s, NULL 922 }; 923 TRANS_FEAT(SADDV, aa64_sve, do_vpz_ool, a, saddv_fns[a->esz]) 924 925 #undef DO_VPZ 926 927 /* 928 *** SVE Shift by Immediate - Predicated Group 929 */ 930 931 /* 932 * Copy Zn into Zd, storing zeros into inactive elements. 933 * If invert, store zeros into the active elements. 934 */ 935 static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg, 936 int esz, bool invert) 937 { 938 static gen_helper_gvec_3 * const fns[4] = { 939 gen_helper_sve_movz_b, gen_helper_sve_movz_h, 940 gen_helper_sve_movz_s, gen_helper_sve_movz_d, 941 }; 942 return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert); 943 } 944 945 static bool do_shift_zpzi(DisasContext *s, arg_rpri_esz *a, bool asr, 946 gen_helper_gvec_3 * const fns[4]) 947 { 948 int max; 949 950 if (a->esz < 0) { 951 /* Invalid tsz encoding -- see tszimm_esz. */ 952 return false; 953 } 954 955 /* 956 * Shift by element size is architecturally valid. 957 * For arithmetic right-shift, it's the same as by one less. 958 * For logical shifts and ASRD, it is a zeroing operation. 959 */ 960 max = 8 << a->esz; 961 if (a->imm >= max) { 962 if (asr) { 963 a->imm = max - 1; 964 } else { 965 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); 966 } 967 } 968 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a); 969 } 970 971 static gen_helper_gvec_3 * const asr_zpzi_fns[4] = { 972 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h, 973 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d, 974 }; 975 TRANS_FEAT(ASR_zpzi, aa64_sve, do_shift_zpzi, a, true, asr_zpzi_fns) 976 977 static gen_helper_gvec_3 * const lsr_zpzi_fns[4] = { 978 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h, 979 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d, 980 }; 981 TRANS_FEAT(LSR_zpzi, aa64_sve, do_shift_zpzi, a, false, lsr_zpzi_fns) 982 983 static gen_helper_gvec_3 * const lsl_zpzi_fns[4] = { 984 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h, 985 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d, 986 }; 987 TRANS_FEAT(LSL_zpzi, aa64_sve, do_shift_zpzi, a, false, lsl_zpzi_fns) 988 989 static gen_helper_gvec_3 * const asrd_fns[4] = { 990 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h, 991 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d, 992 }; 993 TRANS_FEAT(ASRD, aa64_sve, do_shift_zpzi, a, false, asrd_fns) 994 995 static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = { 996 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h, 997 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d, 998 }; 999 TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi, 1000 a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a) 1001 1002 static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = { 1003 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h, 1004 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d, 1005 }; 1006 TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi, 1007 a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a) 1008 1009 static gen_helper_gvec_3 * const srshr_fns[4] = { 1010 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h, 1011 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d, 1012 }; 1013 TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi, 1014 a->esz < 0 ? NULL : srshr_fns[a->esz], a) 1015 1016 static gen_helper_gvec_3 * const urshr_fns[4] = { 1017 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h, 1018 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d, 1019 }; 1020 TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi, 1021 a->esz < 0 ? NULL : urshr_fns[a->esz], a) 1022 1023 static gen_helper_gvec_3 * const sqshlu_fns[4] = { 1024 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h, 1025 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d, 1026 }; 1027 TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi, 1028 a->esz < 0 ? NULL : sqshlu_fns[a->esz], a) 1029 1030 /* 1031 *** SVE Bitwise Shift - Predicated Group 1032 */ 1033 1034 #define DO_ZPZW(NAME, name) \ 1035 static gen_helper_gvec_4 * const name##_zpzw_fns[4] = { \ 1036 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \ 1037 gen_helper_sve_##name##_zpzw_s, NULL \ 1038 }; \ 1039 TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz, \ 1040 a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0) 1041 1042 DO_ZPZW(ASR, asr) 1043 DO_ZPZW(LSR, lsr) 1044 DO_ZPZW(LSL, lsl) 1045 1046 #undef DO_ZPZW 1047 1048 /* 1049 *** SVE Bitwise Shift - Unpredicated Group 1050 */ 1051 1052 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr, 1053 void (*gvec_fn)(unsigned, uint32_t, uint32_t, 1054 int64_t, uint32_t, uint32_t)) 1055 { 1056 if (a->esz < 0) { 1057 /* Invalid tsz encoding -- see tszimm_esz. */ 1058 return false; 1059 } 1060 if (sve_access_check(s)) { 1061 unsigned vsz = vec_full_reg_size(s); 1062 /* Shift by element size is architecturally valid. For 1063 arithmetic right-shift, it's the same as by one less. 1064 Otherwise it is a zeroing operation. */ 1065 if (a->imm >= 8 << a->esz) { 1066 if (asr) { 1067 a->imm = (8 << a->esz) - 1; 1068 } else { 1069 do_dupi_z(s, a->rd, 0); 1070 return true; 1071 } 1072 } 1073 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 1074 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz); 1075 } 1076 return true; 1077 } 1078 1079 TRANS_FEAT(ASR_zzi, aa64_sve, do_shift_imm, a, true, tcg_gen_gvec_sari) 1080 TRANS_FEAT(LSR_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shri) 1081 TRANS_FEAT(LSL_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shli) 1082 1083 #define DO_ZZW(NAME, name) \ 1084 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \ 1085 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \ 1086 gen_helper_sve_##name##_zzw_s, NULL \ 1087 }; \ 1088 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \ 1089 name##_zzw_fns[a->esz], a, 0) 1090 1091 DO_ZZW(ASR_zzw, asr) 1092 DO_ZZW(LSR_zzw, lsr) 1093 DO_ZZW(LSL_zzw, lsl) 1094 1095 #undef DO_ZZW 1096 1097 /* 1098 *** SVE Integer Multiply-Add Group 1099 */ 1100 1101 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a, 1102 gen_helper_gvec_5 *fn) 1103 { 1104 if (sve_access_check(s)) { 1105 unsigned vsz = vec_full_reg_size(s); 1106 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd), 1107 vec_full_reg_offset(s, a->ra), 1108 vec_full_reg_offset(s, a->rn), 1109 vec_full_reg_offset(s, a->rm), 1110 pred_full_reg_offset(s, a->pg), 1111 vsz, vsz, 0, fn); 1112 } 1113 return true; 1114 } 1115 1116 static gen_helper_gvec_5 * const mla_fns[4] = { 1117 gen_helper_sve_mla_b, gen_helper_sve_mla_h, 1118 gen_helper_sve_mla_s, gen_helper_sve_mla_d, 1119 }; 1120 TRANS_FEAT(MLA, aa64_sve, do_zpzzz_ool, a, mla_fns[a->esz]) 1121 1122 static gen_helper_gvec_5 * const mls_fns[4] = { 1123 gen_helper_sve_mls_b, gen_helper_sve_mls_h, 1124 gen_helper_sve_mls_s, gen_helper_sve_mls_d, 1125 }; 1126 TRANS_FEAT(MLS, aa64_sve, do_zpzzz_ool, a, mls_fns[a->esz]) 1127 1128 /* 1129 *** SVE Index Generation Group 1130 */ 1131 1132 static bool do_index(DisasContext *s, int esz, int rd, 1133 TCGv_i64 start, TCGv_i64 incr) 1134 { 1135 unsigned vsz; 1136 TCGv_i32 desc; 1137 TCGv_ptr t_zd; 1138 1139 if (!sve_access_check(s)) { 1140 return true; 1141 } 1142 1143 vsz = vec_full_reg_size(s); 1144 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 1145 t_zd = tcg_temp_new_ptr(); 1146 1147 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, rd)); 1148 if (esz == 3) { 1149 gen_helper_sve_index_d(t_zd, start, incr, desc); 1150 } else { 1151 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 1152 static index_fn * const fns[3] = { 1153 gen_helper_sve_index_b, 1154 gen_helper_sve_index_h, 1155 gen_helper_sve_index_s, 1156 }; 1157 TCGv_i32 s32 = tcg_temp_new_i32(); 1158 TCGv_i32 i32 = tcg_temp_new_i32(); 1159 1160 tcg_gen_extrl_i64_i32(s32, start); 1161 tcg_gen_extrl_i64_i32(i32, incr); 1162 fns[esz](t_zd, s32, i32, desc); 1163 } 1164 return true; 1165 } 1166 1167 TRANS_FEAT(INDEX_ii, aa64_sve, do_index, a->esz, a->rd, 1168 tcg_constant_i64(a->imm1), tcg_constant_i64(a->imm2)) 1169 TRANS_FEAT(INDEX_ir, aa64_sve, do_index, a->esz, a->rd, 1170 tcg_constant_i64(a->imm), cpu_reg(s, a->rm)) 1171 TRANS_FEAT(INDEX_ri, aa64_sve, do_index, a->esz, a->rd, 1172 cpu_reg(s, a->rn), tcg_constant_i64(a->imm)) 1173 TRANS_FEAT(INDEX_rr, aa64_sve, do_index, a->esz, a->rd, 1174 cpu_reg(s, a->rn), cpu_reg(s, a->rm)) 1175 1176 /* 1177 *** SVE Stack Allocation Group 1178 */ 1179 1180 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a) 1181 { 1182 if (!dc_isar_feature(aa64_sve, s)) { 1183 return false; 1184 } 1185 if (sve_access_check(s)) { 1186 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1187 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1188 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s)); 1189 } 1190 return true; 1191 } 1192 1193 static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a) 1194 { 1195 if (!dc_isar_feature(aa64_sme, s)) { 1196 return false; 1197 } 1198 if (sme_enabled_check(s)) { 1199 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1200 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1201 tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s)); 1202 } 1203 return true; 1204 } 1205 1206 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a) 1207 { 1208 if (!dc_isar_feature(aa64_sve, s)) { 1209 return false; 1210 } 1211 if (sve_access_check(s)) { 1212 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1213 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1214 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s)); 1215 } 1216 return true; 1217 } 1218 1219 static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a) 1220 { 1221 if (!dc_isar_feature(aa64_sme, s)) { 1222 return false; 1223 } 1224 if (sme_enabled_check(s)) { 1225 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1226 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1227 tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s)); 1228 } 1229 return true; 1230 } 1231 1232 static bool trans_RDVL(DisasContext *s, arg_RDVL *a) 1233 { 1234 if (!dc_isar_feature(aa64_sve, s)) { 1235 return false; 1236 } 1237 if (sve_access_check(s)) { 1238 TCGv_i64 reg = cpu_reg(s, a->rd); 1239 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s)); 1240 } 1241 return true; 1242 } 1243 1244 static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a) 1245 { 1246 if (!dc_isar_feature(aa64_sme, s)) { 1247 return false; 1248 } 1249 if (sme_enabled_check(s)) { 1250 TCGv_i64 reg = cpu_reg(s, a->rd); 1251 tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s)); 1252 } 1253 return true; 1254 } 1255 1256 /* 1257 *** SVE Compute Vector Address Group 1258 */ 1259 1260 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn) 1261 { 1262 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm); 1263 } 1264 1265 TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32) 1266 TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64) 1267 TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32) 1268 TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32) 1269 1270 /* 1271 *** SVE Integer Misc - Unpredicated Group 1272 */ 1273 1274 static gen_helper_gvec_2 * const fexpa_fns[4] = { 1275 NULL, gen_helper_sve_fexpa_h, 1276 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d, 1277 }; 1278 TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz, 1279 fexpa_fns[a->esz], a->rd, a->rn, s->fpcr_ah) 1280 1281 static gen_helper_gvec_3 * const ftssel_fns[4] = { 1282 NULL, gen_helper_sve_ftssel_h, 1283 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d, 1284 }; 1285 TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, 1286 ftssel_fns[a->esz], a, s->fpcr_ah) 1287 1288 /* 1289 *** SVE Predicate Logical Operations Group 1290 */ 1291 1292 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a, 1293 const GVecGen4 *gvec_op) 1294 { 1295 if (!sve_access_check(s)) { 1296 return true; 1297 } 1298 1299 unsigned psz = pred_gvec_reg_size(s); 1300 int dofs = pred_full_reg_offset(s, a->rd); 1301 int nofs = pred_full_reg_offset(s, a->rn); 1302 int mofs = pred_full_reg_offset(s, a->rm); 1303 int gofs = pred_full_reg_offset(s, a->pg); 1304 1305 if (!a->s) { 1306 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); 1307 return true; 1308 } 1309 1310 if (psz == 8) { 1311 /* Do the operation and the flags generation in temps. */ 1312 TCGv_i64 pd = tcg_temp_new_i64(); 1313 TCGv_i64 pn = tcg_temp_new_i64(); 1314 TCGv_i64 pm = tcg_temp_new_i64(); 1315 TCGv_i64 pg = tcg_temp_new_i64(); 1316 1317 tcg_gen_ld_i64(pn, tcg_env, nofs); 1318 tcg_gen_ld_i64(pm, tcg_env, mofs); 1319 tcg_gen_ld_i64(pg, tcg_env, gofs); 1320 1321 gvec_op->fni8(pd, pn, pm, pg); 1322 tcg_gen_st_i64(pd, tcg_env, dofs); 1323 1324 do_predtest1(pd, pg); 1325 } else { 1326 /* The operation and flags generation is large. The computation 1327 * of the flags depends on the original contents of the guarding 1328 * predicate. If the destination overwrites the guarding predicate, 1329 * then the easiest way to get this right is to save a copy. 1330 */ 1331 int tofs = gofs; 1332 if (a->rd == a->pg) { 1333 tofs = offsetof(CPUARMState, vfp.preg_tmp); 1334 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz); 1335 } 1336 1337 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); 1338 do_predtest(s, dofs, tofs, psz / 8); 1339 } 1340 return true; 1341 } 1342 1343 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1344 { 1345 tcg_gen_and_i64(pd, pn, pm); 1346 tcg_gen_and_i64(pd, pd, pg); 1347 } 1348 1349 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1350 TCGv_vec pm, TCGv_vec pg) 1351 { 1352 tcg_gen_and_vec(vece, pd, pn, pm); 1353 tcg_gen_and_vec(vece, pd, pd, pg); 1354 } 1355 1356 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a) 1357 { 1358 static const GVecGen4 op = { 1359 .fni8 = gen_and_pg_i64, 1360 .fniv = gen_and_pg_vec, 1361 .fno = gen_helper_sve_and_pppp, 1362 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1363 }; 1364 1365 if (!dc_isar_feature(aa64_sve, s)) { 1366 return false; 1367 } 1368 if (!a->s) { 1369 if (a->rn == a->rm) { 1370 if (a->pg == a->rn) { 1371 return do_mov_p(s, a->rd, a->rn); 1372 } 1373 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg); 1374 } else if (a->pg == a->rn || a->pg == a->rm) { 1375 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm); 1376 } 1377 } 1378 return do_pppp_flags(s, a, &op); 1379 } 1380 1381 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1382 { 1383 tcg_gen_andc_i64(pd, pn, pm); 1384 tcg_gen_and_i64(pd, pd, pg); 1385 } 1386 1387 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1388 TCGv_vec pm, TCGv_vec pg) 1389 { 1390 tcg_gen_andc_vec(vece, pd, pn, pm); 1391 tcg_gen_and_vec(vece, pd, pd, pg); 1392 } 1393 1394 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a) 1395 { 1396 static const GVecGen4 op = { 1397 .fni8 = gen_bic_pg_i64, 1398 .fniv = gen_bic_pg_vec, 1399 .fno = gen_helper_sve_bic_pppp, 1400 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1401 }; 1402 1403 if (!dc_isar_feature(aa64_sve, s)) { 1404 return false; 1405 } 1406 if (!a->s && a->pg == a->rn) { 1407 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm); 1408 } 1409 return do_pppp_flags(s, a, &op); 1410 } 1411 1412 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1413 { 1414 tcg_gen_xor_i64(pd, pn, pm); 1415 tcg_gen_and_i64(pd, pd, pg); 1416 } 1417 1418 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1419 TCGv_vec pm, TCGv_vec pg) 1420 { 1421 tcg_gen_xor_vec(vece, pd, pn, pm); 1422 tcg_gen_and_vec(vece, pd, pd, pg); 1423 } 1424 1425 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a) 1426 { 1427 static const GVecGen4 op = { 1428 .fni8 = gen_eor_pg_i64, 1429 .fniv = gen_eor_pg_vec, 1430 .fno = gen_helper_sve_eor_pppp, 1431 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1432 }; 1433 1434 if (!dc_isar_feature(aa64_sve, s)) { 1435 return false; 1436 } 1437 /* Alias NOT (predicate) is EOR Pd.B, Pg/Z, Pn.B, Pg.B */ 1438 if (!a->s && a->pg == a->rm) { 1439 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->pg, a->rn); 1440 } 1441 return do_pppp_flags(s, a, &op); 1442 } 1443 1444 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a) 1445 { 1446 if (a->s || !dc_isar_feature(aa64_sve, s)) { 1447 return false; 1448 } 1449 if (sve_access_check(s)) { 1450 unsigned psz = pred_gvec_reg_size(s); 1451 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd), 1452 pred_full_reg_offset(s, a->pg), 1453 pred_full_reg_offset(s, a->rn), 1454 pred_full_reg_offset(s, a->rm), psz, psz); 1455 } 1456 return true; 1457 } 1458 1459 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1460 { 1461 tcg_gen_or_i64(pd, pn, pm); 1462 tcg_gen_and_i64(pd, pd, pg); 1463 } 1464 1465 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1466 TCGv_vec pm, TCGv_vec pg) 1467 { 1468 tcg_gen_or_vec(vece, pd, pn, pm); 1469 tcg_gen_and_vec(vece, pd, pd, pg); 1470 } 1471 1472 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a) 1473 { 1474 static const GVecGen4 op = { 1475 .fni8 = gen_orr_pg_i64, 1476 .fniv = gen_orr_pg_vec, 1477 .fno = gen_helper_sve_orr_pppp, 1478 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1479 }; 1480 1481 if (!dc_isar_feature(aa64_sve, s)) { 1482 return false; 1483 } 1484 if (!a->s && a->pg == a->rn && a->rn == a->rm) { 1485 return do_mov_p(s, a->rd, a->rn); 1486 } 1487 return do_pppp_flags(s, a, &op); 1488 } 1489 1490 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1491 { 1492 tcg_gen_orc_i64(pd, pn, pm); 1493 tcg_gen_and_i64(pd, pd, pg); 1494 } 1495 1496 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1497 TCGv_vec pm, TCGv_vec pg) 1498 { 1499 tcg_gen_orc_vec(vece, pd, pn, pm); 1500 tcg_gen_and_vec(vece, pd, pd, pg); 1501 } 1502 1503 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a) 1504 { 1505 static const GVecGen4 op = { 1506 .fni8 = gen_orn_pg_i64, 1507 .fniv = gen_orn_pg_vec, 1508 .fno = gen_helper_sve_orn_pppp, 1509 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1510 }; 1511 1512 if (!dc_isar_feature(aa64_sve, s)) { 1513 return false; 1514 } 1515 return do_pppp_flags(s, a, &op); 1516 } 1517 1518 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1519 { 1520 tcg_gen_or_i64(pd, pn, pm); 1521 tcg_gen_andc_i64(pd, pg, pd); 1522 } 1523 1524 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1525 TCGv_vec pm, TCGv_vec pg) 1526 { 1527 tcg_gen_or_vec(vece, pd, pn, pm); 1528 tcg_gen_andc_vec(vece, pd, pg, pd); 1529 } 1530 1531 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a) 1532 { 1533 static const GVecGen4 op = { 1534 .fni8 = gen_nor_pg_i64, 1535 .fniv = gen_nor_pg_vec, 1536 .fno = gen_helper_sve_nor_pppp, 1537 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1538 }; 1539 1540 if (!dc_isar_feature(aa64_sve, s)) { 1541 return false; 1542 } 1543 return do_pppp_flags(s, a, &op); 1544 } 1545 1546 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1547 { 1548 tcg_gen_and_i64(pd, pn, pm); 1549 tcg_gen_andc_i64(pd, pg, pd); 1550 } 1551 1552 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1553 TCGv_vec pm, TCGv_vec pg) 1554 { 1555 tcg_gen_and_vec(vece, pd, pn, pm); 1556 tcg_gen_andc_vec(vece, pd, pg, pd); 1557 } 1558 1559 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a) 1560 { 1561 static const GVecGen4 op = { 1562 .fni8 = gen_nand_pg_i64, 1563 .fniv = gen_nand_pg_vec, 1564 .fno = gen_helper_sve_nand_pppp, 1565 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1566 }; 1567 1568 if (!dc_isar_feature(aa64_sve, s)) { 1569 return false; 1570 } 1571 return do_pppp_flags(s, a, &op); 1572 } 1573 1574 /* 1575 *** SVE Predicate Misc Group 1576 */ 1577 1578 static bool trans_PTEST(DisasContext *s, arg_PTEST *a) 1579 { 1580 if (!dc_isar_feature(aa64_sve, s)) { 1581 return false; 1582 } 1583 if (sve_access_check(s)) { 1584 int nofs = pred_full_reg_offset(s, a->rn); 1585 int gofs = pred_full_reg_offset(s, a->pg); 1586 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8); 1587 1588 if (words == 1) { 1589 TCGv_i64 pn = tcg_temp_new_i64(); 1590 TCGv_i64 pg = tcg_temp_new_i64(); 1591 1592 tcg_gen_ld_i64(pn, tcg_env, nofs); 1593 tcg_gen_ld_i64(pg, tcg_env, gofs); 1594 do_predtest1(pn, pg); 1595 } else { 1596 do_predtest(s, nofs, gofs, words); 1597 } 1598 } 1599 return true; 1600 } 1601 1602 /* See the ARM pseudocode DecodePredCount. */ 1603 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz) 1604 { 1605 unsigned elements = fullsz >> esz; 1606 unsigned bound; 1607 1608 switch (pattern) { 1609 case 0x0: /* POW2 */ 1610 return pow2floor(elements); 1611 case 0x1: /* VL1 */ 1612 case 0x2: /* VL2 */ 1613 case 0x3: /* VL3 */ 1614 case 0x4: /* VL4 */ 1615 case 0x5: /* VL5 */ 1616 case 0x6: /* VL6 */ 1617 case 0x7: /* VL7 */ 1618 case 0x8: /* VL8 */ 1619 bound = pattern; 1620 break; 1621 case 0x9: /* VL16 */ 1622 case 0xa: /* VL32 */ 1623 case 0xb: /* VL64 */ 1624 case 0xc: /* VL128 */ 1625 case 0xd: /* VL256 */ 1626 bound = 16 << (pattern - 9); 1627 break; 1628 case 0x1d: /* MUL4 */ 1629 return elements - elements % 4; 1630 case 0x1e: /* MUL3 */ 1631 return elements - elements % 3; 1632 case 0x1f: /* ALL */ 1633 return elements; 1634 default: /* #uimm5 */ 1635 return 0; 1636 } 1637 return elements >= bound ? bound : 0; 1638 } 1639 1640 /* This handles all of the predicate initialization instructions, 1641 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32 1642 * so that decode_pred_count returns 0. For SETFFR, we will have 1643 * set RD == 16 == FFR. 1644 */ 1645 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag) 1646 { 1647 if (!sve_access_check(s)) { 1648 return true; 1649 } 1650 1651 unsigned fullsz = vec_full_reg_size(s); 1652 unsigned ofs = pred_full_reg_offset(s, rd); 1653 unsigned numelem, setsz, i; 1654 uint64_t word, lastword; 1655 TCGv_i64 t; 1656 1657 numelem = decode_pred_count(fullsz, pat, esz); 1658 1659 /* Determine what we must store into each bit, and how many. */ 1660 if (numelem == 0) { 1661 lastword = word = 0; 1662 setsz = fullsz; 1663 } else { 1664 setsz = numelem << esz; 1665 lastword = word = pred_esz_masks[esz]; 1666 if (setsz % 64) { 1667 lastword &= MAKE_64BIT_MASK(0, setsz % 64); 1668 } 1669 } 1670 1671 t = tcg_temp_new_i64(); 1672 if (fullsz <= 64) { 1673 tcg_gen_movi_i64(t, lastword); 1674 tcg_gen_st_i64(t, tcg_env, ofs); 1675 goto done; 1676 } 1677 1678 if (word == lastword) { 1679 unsigned maxsz = size_for_gvec(fullsz / 8); 1680 unsigned oprsz = size_for_gvec(setsz / 8); 1681 1682 if (oprsz * 8 == setsz) { 1683 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word); 1684 goto done; 1685 } 1686 } 1687 1688 setsz /= 8; 1689 fullsz /= 8; 1690 1691 tcg_gen_movi_i64(t, word); 1692 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) { 1693 tcg_gen_st_i64(t, tcg_env, ofs + i); 1694 } 1695 if (lastword != word) { 1696 tcg_gen_movi_i64(t, lastword); 1697 tcg_gen_st_i64(t, tcg_env, ofs + i); 1698 i += 8; 1699 } 1700 if (i < fullsz) { 1701 tcg_gen_movi_i64(t, 0); 1702 for (; i < fullsz; i += 8) { 1703 tcg_gen_st_i64(t, tcg_env, ofs + i); 1704 } 1705 } 1706 1707 done: 1708 /* PTRUES */ 1709 if (setflag) { 1710 tcg_gen_movi_i32(cpu_NF, -(word != 0)); 1711 tcg_gen_movi_i32(cpu_CF, word == 0); 1712 tcg_gen_movi_i32(cpu_VF, 0); 1713 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 1714 } 1715 return true; 1716 } 1717 1718 TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s) 1719 1720 static bool trans_PTRUE_cnt(DisasContext *s, arg_PTRUE_cnt *a) 1721 { 1722 if (!dc_isar_feature(aa64_sme2_or_sve2p1, s)) { 1723 return false; 1724 } 1725 if (sve_access_check(s)) { 1726 /* Canonical TRUE is 0 count, invert bit, plus element size. */ 1727 int val = (1 << 15) | (1 << a->esz); 1728 1729 /* Write val to the first uint64_t; clear all of the rest. */ 1730 tcg_gen_gvec_dup_imm(MO_64, pred_full_reg_offset(s, a->rd), 1731 8, size_for_gvec(pred_full_reg_size(s)), val); 1732 } 1733 return true; 1734 } 1735 1736 /* Note pat == 31 is #all, to set all elements. */ 1737 TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve, 1738 do_predset, 0, FFR_PRED_NUM, 31, false) 1739 1740 /* Note pat == 32 is #unimp, to set no elements. */ 1741 TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false) 1742 1743 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a) 1744 { 1745 /* The path through do_pppp_flags is complicated enough to want to avoid 1746 * duplication. Frob the arguments into the form of a predicated AND. 1747 */ 1748 arg_rprr_s alt_a = { 1749 .rd = a->rd, .pg = a->pg, .s = a->s, 1750 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM, 1751 }; 1752 1753 s->is_nonstreaming = true; 1754 return trans_AND_pppp(s, &alt_a); 1755 } 1756 1757 TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM) 1758 TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn) 1759 1760 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a, 1761 void (*gen_fn)(TCGv_i32, TCGv_ptr, 1762 TCGv_ptr, TCGv_i32)) 1763 { 1764 if (!sve_access_check(s)) { 1765 return true; 1766 } 1767 1768 TCGv_ptr t_pd = tcg_temp_new_ptr(); 1769 TCGv_ptr t_pg = tcg_temp_new_ptr(); 1770 TCGv_i32 t; 1771 unsigned desc = 0; 1772 1773 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 1774 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 1775 1776 tcg_gen_addi_ptr(t_pd, tcg_env, pred_full_reg_offset(s, a->rd)); 1777 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->rn)); 1778 t = tcg_temp_new_i32(); 1779 1780 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc)); 1781 1782 do_pred_flags(t); 1783 return true; 1784 } 1785 1786 TRANS_FEAT(PFIRST, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pfirst) 1787 TRANS_FEAT(PNEXT, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pnext) 1788 1789 /* 1790 *** SVE Element Count Group 1791 */ 1792 1793 /* Perform an inline saturating addition of a 32-bit value within 1794 * a 64-bit register. The second operand is known to be positive, 1795 * which halves the comparisons we must perform to bound the result. 1796 */ 1797 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) 1798 { 1799 int64_t ibound; 1800 1801 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ 1802 if (u) { 1803 tcg_gen_ext32u_i64(reg, reg); 1804 } else { 1805 tcg_gen_ext32s_i64(reg, reg); 1806 } 1807 if (d) { 1808 tcg_gen_sub_i64(reg, reg, val); 1809 ibound = (u ? 0 : INT32_MIN); 1810 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound)); 1811 } else { 1812 tcg_gen_add_i64(reg, reg, val); 1813 ibound = (u ? UINT32_MAX : INT32_MAX); 1814 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound)); 1815 } 1816 } 1817 1818 /* Similarly with 64-bit values. */ 1819 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) 1820 { 1821 TCGv_i64 t0 = tcg_temp_new_i64(); 1822 TCGv_i64 t2; 1823 1824 if (u) { 1825 if (d) { 1826 tcg_gen_sub_i64(t0, reg, val); 1827 t2 = tcg_constant_i64(0); 1828 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0); 1829 } else { 1830 tcg_gen_add_i64(t0, reg, val); 1831 t2 = tcg_constant_i64(-1); 1832 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0); 1833 } 1834 } else { 1835 TCGv_i64 t1 = tcg_temp_new_i64(); 1836 if (d) { 1837 /* Detect signed overflow for subtraction. */ 1838 tcg_gen_xor_i64(t0, reg, val); 1839 tcg_gen_sub_i64(t1, reg, val); 1840 tcg_gen_xor_i64(reg, reg, t1); 1841 tcg_gen_and_i64(t0, t0, reg); 1842 1843 /* Bound the result. */ 1844 tcg_gen_movi_i64(reg, INT64_MIN); 1845 t2 = tcg_constant_i64(0); 1846 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1); 1847 } else { 1848 /* Detect signed overflow for addition. */ 1849 tcg_gen_xor_i64(t0, reg, val); 1850 tcg_gen_add_i64(reg, reg, val); 1851 tcg_gen_xor_i64(t1, reg, val); 1852 tcg_gen_andc_i64(t0, t1, t0); 1853 1854 /* Bound the result. */ 1855 tcg_gen_movi_i64(t1, INT64_MAX); 1856 t2 = tcg_constant_i64(0); 1857 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg); 1858 } 1859 } 1860 } 1861 1862 /* Similarly with a vector and a scalar operand. */ 1863 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn, 1864 TCGv_i64 val, bool u, bool d) 1865 { 1866 unsigned vsz = vec_full_reg_size(s); 1867 TCGv_ptr dptr, nptr; 1868 TCGv_i32 t32, desc; 1869 TCGv_i64 t64; 1870 1871 dptr = tcg_temp_new_ptr(); 1872 nptr = tcg_temp_new_ptr(); 1873 tcg_gen_addi_ptr(dptr, tcg_env, vec_full_reg_offset(s, rd)); 1874 tcg_gen_addi_ptr(nptr, tcg_env, vec_full_reg_offset(s, rn)); 1875 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 1876 1877 switch (esz) { 1878 case MO_8: 1879 t32 = tcg_temp_new_i32(); 1880 tcg_gen_extrl_i64_i32(t32, val); 1881 if (d) { 1882 tcg_gen_neg_i32(t32, t32); 1883 } 1884 if (u) { 1885 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc); 1886 } else { 1887 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc); 1888 } 1889 break; 1890 1891 case MO_16: 1892 t32 = tcg_temp_new_i32(); 1893 tcg_gen_extrl_i64_i32(t32, val); 1894 if (d) { 1895 tcg_gen_neg_i32(t32, t32); 1896 } 1897 if (u) { 1898 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc); 1899 } else { 1900 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc); 1901 } 1902 break; 1903 1904 case MO_32: 1905 t64 = tcg_temp_new_i64(); 1906 if (d) { 1907 tcg_gen_neg_i64(t64, val); 1908 } else { 1909 tcg_gen_mov_i64(t64, val); 1910 } 1911 if (u) { 1912 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc); 1913 } else { 1914 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc); 1915 } 1916 break; 1917 1918 case MO_64: 1919 if (u) { 1920 if (d) { 1921 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc); 1922 } else { 1923 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc); 1924 } 1925 } else if (d) { 1926 t64 = tcg_temp_new_i64(); 1927 tcg_gen_neg_i64(t64, val); 1928 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc); 1929 } else { 1930 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc); 1931 } 1932 break; 1933 1934 default: 1935 g_assert_not_reached(); 1936 } 1937 } 1938 1939 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a) 1940 { 1941 if (!dc_isar_feature(aa64_sve, s)) { 1942 return false; 1943 } 1944 if (sve_access_check(s)) { 1945 unsigned fullsz = vec_full_reg_size(s); 1946 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1947 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm); 1948 } 1949 return true; 1950 } 1951 1952 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a) 1953 { 1954 if (!dc_isar_feature(aa64_sve, s)) { 1955 return false; 1956 } 1957 if (sve_access_check(s)) { 1958 unsigned fullsz = vec_full_reg_size(s); 1959 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1960 int inc = numelem * a->imm * (a->d ? -1 : 1); 1961 TCGv_i64 reg = cpu_reg(s, a->rd); 1962 1963 tcg_gen_addi_i64(reg, reg, inc); 1964 } 1965 return true; 1966 } 1967 1968 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a) 1969 { 1970 if (!dc_isar_feature(aa64_sve, s)) { 1971 return false; 1972 } 1973 if (!sve_access_check(s)) { 1974 return true; 1975 } 1976 1977 unsigned fullsz = vec_full_reg_size(s); 1978 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1979 int inc = numelem * a->imm; 1980 TCGv_i64 reg = cpu_reg(s, a->rd); 1981 1982 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ 1983 if (inc == 0) { 1984 if (a->u) { 1985 tcg_gen_ext32u_i64(reg, reg); 1986 } else { 1987 tcg_gen_ext32s_i64(reg, reg); 1988 } 1989 } else { 1990 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d); 1991 } 1992 return true; 1993 } 1994 1995 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a) 1996 { 1997 if (!dc_isar_feature(aa64_sve, s)) { 1998 return false; 1999 } 2000 if (!sve_access_check(s)) { 2001 return true; 2002 } 2003 2004 unsigned fullsz = vec_full_reg_size(s); 2005 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2006 int inc = numelem * a->imm; 2007 TCGv_i64 reg = cpu_reg(s, a->rd); 2008 2009 if (inc != 0) { 2010 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d); 2011 } 2012 return true; 2013 } 2014 2015 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a) 2016 { 2017 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2018 return false; 2019 } 2020 2021 unsigned fullsz = vec_full_reg_size(s); 2022 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2023 int inc = numelem * a->imm; 2024 2025 if (inc != 0) { 2026 if (sve_access_check(s)) { 2027 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd), 2028 vec_full_reg_offset(s, a->rn), 2029 tcg_constant_i64(a->d ? -inc : inc), 2030 fullsz, fullsz); 2031 } 2032 } else { 2033 do_mov_z(s, a->rd, a->rn); 2034 } 2035 return true; 2036 } 2037 2038 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a) 2039 { 2040 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2041 return false; 2042 } 2043 2044 unsigned fullsz = vec_full_reg_size(s); 2045 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2046 int inc = numelem * a->imm; 2047 2048 if (inc != 0) { 2049 if (sve_access_check(s)) { 2050 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, 2051 tcg_constant_i64(inc), a->u, a->d); 2052 } 2053 } else { 2054 do_mov_z(s, a->rd, a->rn); 2055 } 2056 return true; 2057 } 2058 2059 /* 2060 *** SVE Bitwise Immediate Group 2061 */ 2062 2063 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn) 2064 { 2065 uint64_t imm; 2066 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 2067 extract32(a->dbm, 0, 6), 2068 extract32(a->dbm, 6, 6))) { 2069 return false; 2070 } 2071 return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm); 2072 } 2073 2074 TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi) 2075 TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori) 2076 TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori) 2077 2078 static bool trans_DUPM(DisasContext *s, arg_DUPM *a) 2079 { 2080 uint64_t imm; 2081 2082 if (!dc_isar_feature(aa64_sve, s)) { 2083 return false; 2084 } 2085 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 2086 extract32(a->dbm, 0, 6), 2087 extract32(a->dbm, 6, 6))) { 2088 return false; 2089 } 2090 if (sve_access_check(s)) { 2091 do_dupi_z(s, a->rd, imm); 2092 } 2093 return true; 2094 } 2095 2096 /* 2097 *** SVE Integer Wide Immediate - Predicated Group 2098 */ 2099 2100 /* Implement all merging copies. This is used for CPY (immediate), 2101 * FCPY, CPY (scalar), CPY (SIMD&FP scalar). 2102 */ 2103 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg, 2104 TCGv_i64 val) 2105 { 2106 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); 2107 static gen_cpy * const fns[4] = { 2108 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h, 2109 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d, 2110 }; 2111 unsigned vsz = vec_full_reg_size(s); 2112 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 2113 TCGv_ptr t_zd = tcg_temp_new_ptr(); 2114 TCGv_ptr t_zn = tcg_temp_new_ptr(); 2115 TCGv_ptr t_pg = tcg_temp_new_ptr(); 2116 2117 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, rd)); 2118 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, rn)); 2119 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 2120 2121 fns[esz](t_zd, t_zn, t_pg, val, desc); 2122 } 2123 2124 static bool trans_FCPY(DisasContext *s, arg_FCPY *a) 2125 { 2126 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2127 return false; 2128 } 2129 if (sve_access_check(s)) { 2130 /* Decode the VFP immediate. */ 2131 uint64_t imm = vfp_expand_imm(a->esz, a->imm); 2132 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm)); 2133 } 2134 return true; 2135 } 2136 2137 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a) 2138 { 2139 if (!dc_isar_feature(aa64_sve, s)) { 2140 return false; 2141 } 2142 if (sve_access_check(s)) { 2143 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm)); 2144 } 2145 return true; 2146 } 2147 2148 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a) 2149 { 2150 static gen_helper_gvec_2i * const fns[4] = { 2151 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h, 2152 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d, 2153 }; 2154 2155 if (!dc_isar_feature(aa64_sve, s)) { 2156 return false; 2157 } 2158 if (sve_access_check(s)) { 2159 unsigned vsz = vec_full_reg_size(s); 2160 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), 2161 pred_full_reg_offset(s, a->pg), 2162 tcg_constant_i64(a->imm), 2163 vsz, vsz, 0, fns[a->esz]); 2164 } 2165 return true; 2166 } 2167 2168 /* 2169 *** SVE Permute Extract Group 2170 */ 2171 2172 static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm) 2173 { 2174 if (!sve_access_check(s)) { 2175 return true; 2176 } 2177 2178 unsigned vsz = vec_full_reg_size(s); 2179 unsigned n_ofs = imm >= vsz ? 0 : imm; 2180 unsigned n_siz = vsz - n_ofs; 2181 unsigned d = vec_full_reg_offset(s, rd); 2182 unsigned n = vec_full_reg_offset(s, rn); 2183 unsigned m = vec_full_reg_offset(s, rm); 2184 2185 /* Use host vector move insns if we have appropriate sizes 2186 * and no unfortunate overlap. 2187 */ 2188 if (m != d 2189 && n_ofs == size_for_gvec(n_ofs) 2190 && n_siz == size_for_gvec(n_siz) 2191 && (d != n || n_siz <= n_ofs)) { 2192 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz); 2193 if (n_ofs != 0) { 2194 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs); 2195 } 2196 } else { 2197 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext); 2198 } 2199 return true; 2200 } 2201 2202 TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm) 2203 TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm) 2204 2205 static bool trans_EXTQ(DisasContext *s, arg_EXTQ *a) 2206 { 2207 unsigned vl, dofs, sofs0, sofs1, sofs2, imm; 2208 2209 if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { 2210 return false; 2211 } 2212 if (!sve_access_check(s)) { 2213 return true; 2214 } 2215 2216 imm = a->imm; 2217 if (imm == 0) { 2218 /* So far we never optimize Zdn with MOVPRFX, so zd = zn is a nop. */ 2219 return true; 2220 } 2221 2222 vl = vec_full_reg_size(s); 2223 dofs = vec_full_reg_offset(s, a->rd); 2224 sofs2 = vec_full_reg_offset(s, a->rn); 2225 2226 if (imm & 8) { 2227 sofs0 = dofs + 8; 2228 sofs1 = sofs2; 2229 sofs2 += 8; 2230 } else { 2231 sofs0 = dofs; 2232 sofs1 = dofs + 8; 2233 } 2234 imm = (imm & 7) << 3; 2235 2236 for (unsigned i = 0; i < vl; i += 16) { 2237 TCGv_i64 s0 = tcg_temp_new_i64(); 2238 TCGv_i64 s1 = tcg_temp_new_i64(); 2239 TCGv_i64 s2 = tcg_temp_new_i64(); 2240 2241 tcg_gen_ld_i64(s0, tcg_env, sofs0 + i); 2242 tcg_gen_ld_i64(s1, tcg_env, sofs1 + i); 2243 tcg_gen_ld_i64(s2, tcg_env, sofs2 + i); 2244 2245 tcg_gen_extract2_i64(s0, s0, s1, imm); 2246 tcg_gen_extract2_i64(s1, s1, s2, imm); 2247 2248 tcg_gen_st_i64(s0, tcg_env, dofs + i); 2249 tcg_gen_st_i64(s1, tcg_env, dofs + i + 8); 2250 } 2251 return true; 2252 } 2253 2254 /* 2255 *** SVE Permute - Unpredicated Group 2256 */ 2257 2258 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a) 2259 { 2260 if (!dc_isar_feature(aa64_sve, s)) { 2261 return false; 2262 } 2263 if (sve_access_check(s)) { 2264 unsigned vsz = vec_full_reg_size(s); 2265 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd), 2266 vsz, vsz, cpu_reg_sp(s, a->rn)); 2267 } 2268 return true; 2269 } 2270 2271 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a) 2272 { 2273 if (!dc_isar_feature(aa64_sve, s)) { 2274 return false; 2275 } 2276 if ((a->imm & 0x1f) == 0) { 2277 return false; 2278 } 2279 if (sve_access_check(s)) { 2280 unsigned vsz = vec_full_reg_size(s); 2281 unsigned dofs = vec_full_reg_offset(s, a->rd); 2282 unsigned esz, index; 2283 2284 esz = ctz32(a->imm); 2285 index = a->imm >> (esz + 1); 2286 2287 if ((index << esz) < vsz) { 2288 unsigned nofs = vec_reg_offset(s, a->rn, index, esz); 2289 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz); 2290 } else { 2291 /* 2292 * While dup_mem handles 128-bit elements, dup_imm does not. 2293 * Thankfully element size doesn't matter for splatting zero. 2294 */ 2295 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0); 2296 } 2297 } 2298 return true; 2299 } 2300 2301 static bool trans_DUPQ(DisasContext *s, arg_DUPQ *a) 2302 { 2303 unsigned vl, dofs, nofs; 2304 2305 if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { 2306 return false; 2307 } 2308 if (!sve_access_check(s)) { 2309 return true; 2310 } 2311 2312 vl = vec_full_reg_size(s); 2313 dofs = vec_full_reg_offset(s, a->rd); 2314 nofs = vec_reg_offset(s, a->rn, a->imm, a->esz); 2315 2316 for (unsigned i = 0; i < vl; i += 16) { 2317 tcg_gen_gvec_dup_mem(a->esz, dofs + i, nofs + i, 16, 16); 2318 } 2319 return true; 2320 } 2321 2322 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val) 2323 { 2324 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); 2325 static gen_insr * const fns[4] = { 2326 gen_helper_sve_insr_b, gen_helper_sve_insr_h, 2327 gen_helper_sve_insr_s, gen_helper_sve_insr_d, 2328 }; 2329 unsigned vsz = vec_full_reg_size(s); 2330 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 2331 TCGv_ptr t_zd = tcg_temp_new_ptr(); 2332 TCGv_ptr t_zn = tcg_temp_new_ptr(); 2333 2334 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, a->rd)); 2335 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); 2336 2337 fns[a->esz](t_zd, t_zn, val, desc); 2338 } 2339 2340 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a) 2341 { 2342 if (!dc_isar_feature(aa64_sve, s)) { 2343 return false; 2344 } 2345 if (sve_access_check(s)) { 2346 TCGv_i64 t = tcg_temp_new_i64(); 2347 tcg_gen_ld_i64(t, tcg_env, vec_reg_offset(s, a->rm, 0, MO_64)); 2348 do_insr_i64(s, a, t); 2349 } 2350 return true; 2351 } 2352 2353 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a) 2354 { 2355 if (!dc_isar_feature(aa64_sve, s)) { 2356 return false; 2357 } 2358 if (sve_access_check(s)) { 2359 do_insr_i64(s, a, cpu_reg(s, a->rm)); 2360 } 2361 return true; 2362 } 2363 2364 static gen_helper_gvec_2 * const rev_fns[4] = { 2365 gen_helper_sve_rev_b, gen_helper_sve_rev_h, 2366 gen_helper_sve_rev_s, gen_helper_sve_rev_d 2367 }; 2368 TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0) 2369 2370 static gen_helper_gvec_3 * const sve_tbl_fns[4] = { 2371 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h, 2372 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d 2373 }; 2374 TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0) 2375 2376 static gen_helper_gvec_4 * const sve2_tbl_fns[4] = { 2377 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h, 2378 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d 2379 }; 2380 TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz], 2381 a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0) 2382 2383 static gen_helper_gvec_3 * const tblq_fns[4] = { 2384 gen_helper_sve2p1_tblq_b, gen_helper_sve2p1_tblq_h, 2385 gen_helper_sve2p1_tblq_s, gen_helper_sve2p1_tblq_d 2386 }; 2387 TRANS_FEAT(TBLQ, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz, 2388 tblq_fns[a->esz], a, 0) 2389 2390 static gen_helper_gvec_3 * const tbx_fns[4] = { 2391 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h, 2392 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d 2393 }; 2394 TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0) 2395 2396 static gen_helper_gvec_3 * const tbxq_fns[4] = { 2397 gen_helper_sve2p1_tbxq_b, gen_helper_sve2p1_tbxq_h, 2398 gen_helper_sve2p1_tbxq_s, gen_helper_sve2p1_tbxq_d 2399 }; 2400 TRANS_FEAT(TBXQ, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz, 2401 tbxq_fns[a->esz], a, 0) 2402 2403 static bool trans_PMOV_pv(DisasContext *s, arg_PMOV_pv *a) 2404 { 2405 static gen_helper_gvec_2 * const fns[4] = { 2406 NULL, gen_helper_pmov_pv_h, 2407 gen_helper_pmov_pv_s, gen_helper_pmov_pv_d 2408 }; 2409 unsigned vl, pl, vofs, pofs; 2410 TCGv_i64 tmp; 2411 2412 if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { 2413 return false; 2414 } 2415 if (!sve_access_check(s)) { 2416 return true; 2417 } 2418 2419 vl = vec_full_reg_size(s); 2420 if (a->esz != MO_8) { 2421 tcg_gen_gvec_2_ool(pred_full_reg_offset(s, a->rd), 2422 vec_full_reg_offset(s, a->rn), 2423 vl, vl, a->imm, fns[a->esz]); 2424 return true; 2425 } 2426 2427 /* 2428 * Copy the low PL bytes from vector Zn, zero-extending to a 2429 * multiple of 8 bytes, so that Pd is properly cleared. 2430 */ 2431 2432 pl = vl / 8; 2433 pofs = pred_full_reg_offset(s, a->rd); 2434 vofs = vec_full_reg_offset(s, a->rn); 2435 2436 QEMU_BUILD_BUG_ON(sizeof(ARMPredicateReg) != 32); 2437 for (unsigned i = 32; i >= 8; i >>= 1) { 2438 if (pl & i) { 2439 tcg_gen_gvec_mov(MO_64, pofs, vofs, i, i); 2440 pofs += i; 2441 vofs += i; 2442 } 2443 } 2444 switch (pl & 7) { 2445 case 0: 2446 return true; 2447 case 2: 2448 tmp = tcg_temp_new_i64(); 2449 tcg_gen_ld16u_i64(tmp, tcg_env, vofs + (HOST_BIG_ENDIAN ? 6 : 0)); 2450 break; 2451 case 4: 2452 tmp = tcg_temp_new_i64(); 2453 tcg_gen_ld32u_i64(tmp, tcg_env, vofs + (HOST_BIG_ENDIAN ? 4 : 0)); 2454 break; 2455 case 6: 2456 tmp = tcg_temp_new_i64(); 2457 tcg_gen_ld_i64(tmp, tcg_env, vofs); 2458 tcg_gen_extract_i64(tmp, tmp, 0, 48); 2459 break; 2460 default: 2461 g_assert_not_reached(); 2462 } 2463 tcg_gen_st_i64(tmp, tcg_env, pofs); 2464 return true; 2465 } 2466 2467 static bool trans_PMOV_vp(DisasContext *s, arg_PMOV_pv *a) 2468 { 2469 static gen_helper_gvec_2 * const fns[4] = { 2470 NULL, gen_helper_pmov_vp_h, 2471 gen_helper_pmov_vp_s, gen_helper_pmov_vp_d 2472 }; 2473 unsigned vl; 2474 2475 if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { 2476 return false; 2477 } 2478 if (!sve_access_check(s)) { 2479 return true; 2480 } 2481 2482 vl = vec_full_reg_size(s); 2483 2484 if (a->esz == MO_8) { 2485 /* 2486 * The low PL bytes are copied from Pn to Zd unchanged. 2487 * We know that the unused portion of Pn is zero, and 2488 * that imm == 0, so the balance of Zd must be zeroed. 2489 */ 2490 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, a->rd), 2491 pred_full_reg_offset(s, a->rn), 2492 size_for_gvec(vl / 8), vl); 2493 } else { 2494 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd), 2495 pred_full_reg_offset(s, a->rn), 2496 vl, vl, a->imm, fns[a->esz]); 2497 } 2498 return true; 2499 } 2500 2501 static bool trans_UNPK(DisasContext *s, arg_UNPK *a) 2502 { 2503 static gen_helper_gvec_2 * const fns[4][2] = { 2504 { NULL, NULL }, 2505 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h }, 2506 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s }, 2507 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d }, 2508 }; 2509 2510 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2511 return false; 2512 } 2513 if (sve_access_check(s)) { 2514 unsigned vsz = vec_full_reg_size(s); 2515 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd), 2516 vec_full_reg_offset(s, a->rn) 2517 + (a->h ? vsz / 2 : 0), 2518 vsz, vsz, 0, fns[a->esz][a->u]); 2519 } 2520 return true; 2521 } 2522 2523 /* 2524 *** SVE Permute - Predicates Group 2525 */ 2526 2527 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd, 2528 gen_helper_gvec_3 *fn) 2529 { 2530 if (!sve_access_check(s)) { 2531 return true; 2532 } 2533 2534 unsigned vsz = pred_full_reg_size(s); 2535 2536 TCGv_ptr t_d = tcg_temp_new_ptr(); 2537 TCGv_ptr t_n = tcg_temp_new_ptr(); 2538 TCGv_ptr t_m = tcg_temp_new_ptr(); 2539 uint32_t desc = 0; 2540 2541 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); 2542 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 2543 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); 2544 2545 tcg_gen_addi_ptr(t_d, tcg_env, pred_full_reg_offset(s, a->rd)); 2546 tcg_gen_addi_ptr(t_n, tcg_env, pred_full_reg_offset(s, a->rn)); 2547 tcg_gen_addi_ptr(t_m, tcg_env, pred_full_reg_offset(s, a->rm)); 2548 2549 fn(t_d, t_n, t_m, tcg_constant_i32(desc)); 2550 return true; 2551 } 2552 2553 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd, 2554 gen_helper_gvec_2 *fn) 2555 { 2556 if (!sve_access_check(s)) { 2557 return true; 2558 } 2559 2560 unsigned vsz = pred_full_reg_size(s); 2561 TCGv_ptr t_d = tcg_temp_new_ptr(); 2562 TCGv_ptr t_n = tcg_temp_new_ptr(); 2563 uint32_t desc = 0; 2564 2565 tcg_gen_addi_ptr(t_d, tcg_env, pred_full_reg_offset(s, a->rd)); 2566 tcg_gen_addi_ptr(t_n, tcg_env, pred_full_reg_offset(s, a->rn)); 2567 2568 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); 2569 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 2570 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); 2571 2572 fn(t_d, t_n, tcg_constant_i32(desc)); 2573 return true; 2574 } 2575 2576 TRANS_FEAT(ZIP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_zip_p) 2577 TRANS_FEAT(ZIP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_zip_p) 2578 TRANS_FEAT(UZP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_uzp_p) 2579 TRANS_FEAT(UZP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_uzp_p) 2580 TRANS_FEAT(TRN1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_trn_p) 2581 TRANS_FEAT(TRN2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_trn_p) 2582 2583 TRANS_FEAT(REV_p, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_rev_p) 2584 TRANS_FEAT(PUNPKLO, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_punpk_p) 2585 TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p) 2586 2587 /* 2588 *** SVE Permute - Interleaving Group 2589 */ 2590 2591 static bool do_interleave_q(DisasContext *s, gen_helper_gvec_3 *fn, 2592 arg_rrr_esz *a, int data) 2593 { 2594 if (sve_access_check(s)) { 2595 unsigned vsz = vec_full_reg_size(s); 2596 if (vsz < 32) { 2597 unallocated_encoding(s); 2598 } else { 2599 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), 2600 vec_full_reg_offset(s, a->rn), 2601 vec_full_reg_offset(s, a->rm), 2602 vsz, vsz, data, fn); 2603 } 2604 } 2605 return true; 2606 } 2607 2608 static gen_helper_gvec_3 * const zip_fns[4] = { 2609 gen_helper_sve_zip_b, gen_helper_sve_zip_h, 2610 gen_helper_sve_zip_s, gen_helper_sve_zip_d, 2611 }; 2612 TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2613 zip_fns[a->esz], a, 0) 2614 TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2615 zip_fns[a->esz], a, vec_full_reg_size(s) / 2) 2616 2617 TRANS_FEAT_NONSTREAMING(ZIP1_q, aa64_sve_f64mm, do_interleave_q, 2618 gen_helper_sve2_zip_q, a, 0) 2619 TRANS_FEAT_NONSTREAMING(ZIP2_q, aa64_sve_f64mm, do_interleave_q, 2620 gen_helper_sve2_zip_q, a, 2621 QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2) 2622 2623 static gen_helper_gvec_3 * const zipq_fns[4] = { 2624 gen_helper_sve2p1_zipq_b, gen_helper_sve2p1_zipq_h, 2625 gen_helper_sve2p1_zipq_s, gen_helper_sve2p1_zipq_d, 2626 }; 2627 TRANS_FEAT(ZIPQ1, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz, 2628 zipq_fns[a->esz], a, 0) 2629 TRANS_FEAT(ZIPQ2, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz, 2630 zipq_fns[a->esz], a, 16 / 2) 2631 2632 static gen_helper_gvec_3 * const uzp_fns[4] = { 2633 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h, 2634 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d, 2635 }; 2636 TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2637 uzp_fns[a->esz], a, 0) 2638 TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2639 uzp_fns[a->esz], a, 1 << a->esz) 2640 2641 TRANS_FEAT_NONSTREAMING(UZP1_q, aa64_sve_f64mm, do_interleave_q, 2642 gen_helper_sve2_uzp_q, a, 0) 2643 TRANS_FEAT_NONSTREAMING(UZP2_q, aa64_sve_f64mm, do_interleave_q, 2644 gen_helper_sve2_uzp_q, a, 16) 2645 2646 static gen_helper_gvec_3 * const uzpq_fns[4] = { 2647 gen_helper_sve2p1_uzpq_b, gen_helper_sve2p1_uzpq_h, 2648 gen_helper_sve2p1_uzpq_s, gen_helper_sve2p1_uzpq_d, 2649 }; 2650 TRANS_FEAT(UZPQ1, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz, 2651 uzpq_fns[a->esz], a, 0) 2652 TRANS_FEAT(UZPQ2, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz, 2653 uzpq_fns[a->esz], a, 1 << a->esz) 2654 2655 static gen_helper_gvec_3 * const trn_fns[4] = { 2656 gen_helper_sve_trn_b, gen_helper_sve_trn_h, 2657 gen_helper_sve_trn_s, gen_helper_sve_trn_d, 2658 }; 2659 2660 TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2661 trn_fns[a->esz], a, 0) 2662 TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2663 trn_fns[a->esz], a, 1 << a->esz) 2664 2665 TRANS_FEAT_NONSTREAMING(TRN1_q, aa64_sve_f64mm, do_interleave_q, 2666 gen_helper_sve2_trn_q, a, 0) 2667 TRANS_FEAT_NONSTREAMING(TRN2_q, aa64_sve_f64mm, do_interleave_q, 2668 gen_helper_sve2_trn_q, a, 16) 2669 2670 /* 2671 *** SVE Permute Vector - Predicated Group 2672 */ 2673 2674 static gen_helper_gvec_3 * const compact_fns[4] = { 2675 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d 2676 }; 2677 TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, 2678 compact_fns[a->esz], a, 0) 2679 2680 /* Call the helper that computes the ARM LastActiveElement pseudocode 2681 * function, scaled by the element size. This includes the not found 2682 * indication; e.g. not found for esz=3 is -8. 2683 */ 2684 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg) 2685 { 2686 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot 2687 * round up, as we do elsewhere, because we need the exact size. 2688 */ 2689 TCGv_ptr t_p = tcg_temp_new_ptr(); 2690 unsigned desc = 0; 2691 2692 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 2693 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); 2694 2695 tcg_gen_addi_ptr(t_p, tcg_env, pred_full_reg_offset(s, pg)); 2696 2697 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc)); 2698 } 2699 2700 /* Increment LAST to the offset of the next element in the vector, 2701 * wrapping around to 0. 2702 */ 2703 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz) 2704 { 2705 unsigned vsz = vec_full_reg_size(s); 2706 2707 tcg_gen_addi_i32(last, last, 1 << esz); 2708 if (is_power_of_2(vsz)) { 2709 tcg_gen_andi_i32(last, last, vsz - 1); 2710 } else { 2711 TCGv_i32 max = tcg_constant_i32(vsz); 2712 TCGv_i32 zero = tcg_constant_i32(0); 2713 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last); 2714 } 2715 } 2716 2717 /* If LAST < 0, set LAST to the offset of the last element in the vector. */ 2718 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz) 2719 { 2720 unsigned vsz = vec_full_reg_size(s); 2721 2722 if (is_power_of_2(vsz)) { 2723 tcg_gen_andi_i32(last, last, vsz - 1); 2724 } else { 2725 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz)); 2726 TCGv_i32 zero = tcg_constant_i32(0); 2727 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last); 2728 } 2729 } 2730 2731 /* Load an unsigned element of ESZ from BASE+OFS. */ 2732 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz) 2733 { 2734 TCGv_i64 r = tcg_temp_new_i64(); 2735 2736 switch (esz) { 2737 case 0: 2738 tcg_gen_ld8u_i64(r, base, ofs); 2739 break; 2740 case 1: 2741 tcg_gen_ld16u_i64(r, base, ofs); 2742 break; 2743 case 2: 2744 tcg_gen_ld32u_i64(r, base, ofs); 2745 break; 2746 case 3: 2747 tcg_gen_ld_i64(r, base, ofs); 2748 break; 2749 default: 2750 g_assert_not_reached(); 2751 } 2752 return r; 2753 } 2754 2755 /* Load an unsigned element of ESZ from RM[LAST]. */ 2756 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last, 2757 int rm, int esz) 2758 { 2759 TCGv_ptr p = tcg_temp_new_ptr(); 2760 2761 /* Convert offset into vector into offset into ENV. 2762 * The final adjustment for the vector register base 2763 * is added via constant offset to the load. 2764 */ 2765 #if HOST_BIG_ENDIAN 2766 /* Adjust for element ordering. See vec_reg_offset. */ 2767 if (esz < 3) { 2768 tcg_gen_xori_i32(last, last, 8 - (1 << esz)); 2769 } 2770 #endif 2771 tcg_gen_ext_i32_ptr(p, last); 2772 tcg_gen_add_ptr(p, p, tcg_env); 2773 2774 return load_esz(p, vec_full_reg_offset(s, rm), esz); 2775 } 2776 2777 /* Compute CLAST for a Zreg. */ 2778 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before) 2779 { 2780 TCGv_i32 last; 2781 TCGLabel *over; 2782 TCGv_i64 ele; 2783 unsigned vsz, esz = a->esz; 2784 2785 if (!sve_access_check(s)) { 2786 return true; 2787 } 2788 2789 last = tcg_temp_new_i32(); 2790 over = gen_new_label(); 2791 2792 find_last_active(s, last, esz, a->pg); 2793 2794 /* There is of course no movcond for a 2048-bit vector, 2795 * so we must branch over the actual store. 2796 */ 2797 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over); 2798 2799 if (!before) { 2800 incr_last_active(s, last, esz); 2801 } 2802 2803 ele = load_last_active(s, last, a->rm, esz); 2804 2805 vsz = vec_full_reg_size(s); 2806 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele); 2807 2808 /* If this insn used MOVPRFX, we may need a second move. */ 2809 if (a->rd != a->rn) { 2810 TCGLabel *done = gen_new_label(); 2811 tcg_gen_br(done); 2812 2813 gen_set_label(over); 2814 do_mov_z(s, a->rd, a->rn); 2815 2816 gen_set_label(done); 2817 } else { 2818 gen_set_label(over); 2819 } 2820 return true; 2821 } 2822 2823 TRANS_FEAT(CLASTA_z, aa64_sve, do_clast_vector, a, false) 2824 TRANS_FEAT(CLASTB_z, aa64_sve, do_clast_vector, a, true) 2825 2826 /* Compute CLAST for a scalar. */ 2827 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm, 2828 bool before, TCGv_i64 reg_val) 2829 { 2830 TCGv_i32 last = tcg_temp_new_i32(); 2831 TCGv_i64 ele, cmp; 2832 2833 find_last_active(s, last, esz, pg); 2834 2835 /* Extend the original value of last prior to incrementing. */ 2836 cmp = tcg_temp_new_i64(); 2837 tcg_gen_ext_i32_i64(cmp, last); 2838 2839 if (!before) { 2840 incr_last_active(s, last, esz); 2841 } 2842 2843 /* The conceit here is that while last < 0 indicates not found, after 2844 * adjusting for tcg_env->vfp.zregs[rm], it is still a valid address 2845 * from which we can load garbage. We then discard the garbage with 2846 * a conditional move. 2847 */ 2848 ele = load_last_active(s, last, rm, esz); 2849 2850 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0), 2851 ele, reg_val); 2852 } 2853 2854 /* Compute CLAST for a Vreg. */ 2855 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before) 2856 { 2857 if (sve_access_check(s)) { 2858 int esz = a->esz; 2859 int ofs = vec_reg_offset(s, a->rd, 0, esz); 2860 TCGv_i64 reg = load_esz(tcg_env, ofs, esz); 2861 2862 do_clast_scalar(s, esz, a->pg, a->rn, before, reg); 2863 write_fp_dreg(s, a->rd, reg); 2864 } 2865 return true; 2866 } 2867 2868 TRANS_FEAT(CLASTA_v, aa64_sve, do_clast_fp, a, false) 2869 TRANS_FEAT(CLASTB_v, aa64_sve, do_clast_fp, a, true) 2870 2871 /* Compute CLAST for a Xreg. */ 2872 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before) 2873 { 2874 TCGv_i64 reg; 2875 2876 if (!sve_access_check(s)) { 2877 return true; 2878 } 2879 2880 reg = cpu_reg(s, a->rd); 2881 switch (a->esz) { 2882 case 0: 2883 tcg_gen_ext8u_i64(reg, reg); 2884 break; 2885 case 1: 2886 tcg_gen_ext16u_i64(reg, reg); 2887 break; 2888 case 2: 2889 tcg_gen_ext32u_i64(reg, reg); 2890 break; 2891 case 3: 2892 break; 2893 default: 2894 g_assert_not_reached(); 2895 } 2896 2897 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg); 2898 return true; 2899 } 2900 2901 TRANS_FEAT(CLASTA_r, aa64_sve, do_clast_general, a, false) 2902 TRANS_FEAT(CLASTB_r, aa64_sve, do_clast_general, a, true) 2903 2904 /* Compute LAST for a scalar. */ 2905 static TCGv_i64 do_last_scalar(DisasContext *s, int esz, 2906 int pg, int rm, bool before) 2907 { 2908 TCGv_i32 last = tcg_temp_new_i32(); 2909 2910 find_last_active(s, last, esz, pg); 2911 if (before) { 2912 wrap_last_active(s, last, esz); 2913 } else { 2914 incr_last_active(s, last, esz); 2915 } 2916 2917 return load_last_active(s, last, rm, esz); 2918 } 2919 2920 /* Compute LAST for a Vreg. */ 2921 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before) 2922 { 2923 if (sve_access_check(s)) { 2924 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); 2925 write_fp_dreg(s, a->rd, val); 2926 } 2927 return true; 2928 } 2929 2930 TRANS_FEAT(LASTA_v, aa64_sve, do_last_fp, a, false) 2931 TRANS_FEAT(LASTB_v, aa64_sve, do_last_fp, a, true) 2932 2933 /* Compute LAST for a Xreg. */ 2934 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before) 2935 { 2936 if (sve_access_check(s)) { 2937 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); 2938 tcg_gen_mov_i64(cpu_reg(s, a->rd), val); 2939 } 2940 return true; 2941 } 2942 2943 TRANS_FEAT(LASTA_r, aa64_sve, do_last_general, a, false) 2944 TRANS_FEAT(LASTB_r, aa64_sve, do_last_general, a, true) 2945 2946 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a) 2947 { 2948 if (!dc_isar_feature(aa64_sve, s)) { 2949 return false; 2950 } 2951 if (sve_access_check(s)) { 2952 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn)); 2953 } 2954 return true; 2955 } 2956 2957 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a) 2958 { 2959 if (!dc_isar_feature(aa64_sve, s)) { 2960 return false; 2961 } 2962 if (sve_access_check(s)) { 2963 int ofs = vec_reg_offset(s, a->rn, 0, a->esz); 2964 TCGv_i64 t = load_esz(tcg_env, ofs, a->esz); 2965 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t); 2966 } 2967 return true; 2968 } 2969 2970 static gen_helper_gvec_3 * const revb_fns[4] = { 2971 NULL, gen_helper_sve_revb_h, 2972 gen_helper_sve_revb_s, gen_helper_sve_revb_d, 2973 }; 2974 TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0) 2975 2976 static gen_helper_gvec_3 * const revh_fns[4] = { 2977 NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d, 2978 }; 2979 TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0) 2980 2981 TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz, 2982 a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0) 2983 2984 TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0) 2985 2986 TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz, 2987 gen_helper_sve_splice, a, a->esz) 2988 2989 TRANS_FEAT(SPLICE_sve2, aa64_sve2, gen_gvec_ool_zzzp, gen_helper_sve_splice, 2990 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz) 2991 2992 /* 2993 *** SVE Integer Compare - Vectors Group 2994 */ 2995 2996 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a, 2997 gen_helper_gvec_flags_4 *gen_fn) 2998 { 2999 TCGv_ptr pd, zn, zm, pg; 3000 unsigned vsz; 3001 TCGv_i32 t; 3002 3003 if (gen_fn == NULL) { 3004 return false; 3005 } 3006 if (!sve_access_check(s)) { 3007 return true; 3008 } 3009 3010 vsz = vec_full_reg_size(s); 3011 t = tcg_temp_new_i32(); 3012 pd = tcg_temp_new_ptr(); 3013 zn = tcg_temp_new_ptr(); 3014 zm = tcg_temp_new_ptr(); 3015 pg = tcg_temp_new_ptr(); 3016 3017 tcg_gen_addi_ptr(pd, tcg_env, pred_full_reg_offset(s, a->rd)); 3018 tcg_gen_addi_ptr(zn, tcg_env, vec_full_reg_offset(s, a->rn)); 3019 tcg_gen_addi_ptr(zm, tcg_env, vec_full_reg_offset(s, a->rm)); 3020 tcg_gen_addi_ptr(pg, tcg_env, pred_full_reg_offset(s, a->pg)); 3021 3022 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0))); 3023 3024 do_pred_flags(t); 3025 return true; 3026 } 3027 3028 #define DO_PPZZ(NAME, name) \ 3029 static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = { \ 3030 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \ 3031 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \ 3032 }; \ 3033 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags, \ 3034 a, name##_ppzz_fns[a->esz]) 3035 3036 DO_PPZZ(CMPEQ, cmpeq) 3037 DO_PPZZ(CMPNE, cmpne) 3038 DO_PPZZ(CMPGT, cmpgt) 3039 DO_PPZZ(CMPGE, cmpge) 3040 DO_PPZZ(CMPHI, cmphi) 3041 DO_PPZZ(CMPHS, cmphs) 3042 3043 #undef DO_PPZZ 3044 3045 #define DO_PPZW(NAME, name) \ 3046 static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = { \ 3047 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \ 3048 gen_helper_sve_##name##_ppzw_s, NULL \ 3049 }; \ 3050 TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags, \ 3051 a, name##_ppzw_fns[a->esz]) 3052 3053 DO_PPZW(CMPEQ, cmpeq) 3054 DO_PPZW(CMPNE, cmpne) 3055 DO_PPZW(CMPGT, cmpgt) 3056 DO_PPZW(CMPGE, cmpge) 3057 DO_PPZW(CMPHI, cmphi) 3058 DO_PPZW(CMPHS, cmphs) 3059 DO_PPZW(CMPLT, cmplt) 3060 DO_PPZW(CMPLE, cmple) 3061 DO_PPZW(CMPLO, cmplo) 3062 DO_PPZW(CMPLS, cmpls) 3063 3064 #undef DO_PPZW 3065 3066 /* 3067 *** SVE Integer Compare - Immediate Groups 3068 */ 3069 3070 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a, 3071 gen_helper_gvec_flags_3 *gen_fn) 3072 { 3073 TCGv_ptr pd, zn, pg; 3074 unsigned vsz; 3075 TCGv_i32 t; 3076 3077 if (gen_fn == NULL) { 3078 return false; 3079 } 3080 if (!sve_access_check(s)) { 3081 return true; 3082 } 3083 3084 vsz = vec_full_reg_size(s); 3085 t = tcg_temp_new_i32(); 3086 pd = tcg_temp_new_ptr(); 3087 zn = tcg_temp_new_ptr(); 3088 pg = tcg_temp_new_ptr(); 3089 3090 tcg_gen_addi_ptr(pd, tcg_env, pred_full_reg_offset(s, a->rd)); 3091 tcg_gen_addi_ptr(zn, tcg_env, vec_full_reg_offset(s, a->rn)); 3092 tcg_gen_addi_ptr(pg, tcg_env, pred_full_reg_offset(s, a->pg)); 3093 3094 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm))); 3095 3096 do_pred_flags(t); 3097 return true; 3098 } 3099 3100 #define DO_PPZI(NAME, name) \ 3101 static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = { \ 3102 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \ 3103 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \ 3104 }; \ 3105 TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a, \ 3106 name##_ppzi_fns[a->esz]) 3107 3108 DO_PPZI(CMPEQ, cmpeq) 3109 DO_PPZI(CMPNE, cmpne) 3110 DO_PPZI(CMPGT, cmpgt) 3111 DO_PPZI(CMPGE, cmpge) 3112 DO_PPZI(CMPHI, cmphi) 3113 DO_PPZI(CMPHS, cmphs) 3114 DO_PPZI(CMPLT, cmplt) 3115 DO_PPZI(CMPLE, cmple) 3116 DO_PPZI(CMPLO, cmplo) 3117 DO_PPZI(CMPLS, cmpls) 3118 3119 #undef DO_PPZI 3120 3121 /* 3122 *** SVE Partition Break Group 3123 */ 3124 3125 static bool do_brk3(DisasContext *s, arg_rprr_s *a, 3126 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s) 3127 { 3128 if (!sve_access_check(s)) { 3129 return true; 3130 } 3131 3132 unsigned vsz = pred_full_reg_size(s); 3133 3134 /* Predicate sizes may be smaller and cannot use simd_desc. */ 3135 TCGv_ptr d = tcg_temp_new_ptr(); 3136 TCGv_ptr n = tcg_temp_new_ptr(); 3137 TCGv_ptr m = tcg_temp_new_ptr(); 3138 TCGv_ptr g = tcg_temp_new_ptr(); 3139 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); 3140 3141 tcg_gen_addi_ptr(d, tcg_env, pred_full_reg_offset(s, a->rd)); 3142 tcg_gen_addi_ptr(n, tcg_env, pred_full_reg_offset(s, a->rn)); 3143 tcg_gen_addi_ptr(m, tcg_env, pred_full_reg_offset(s, a->rm)); 3144 tcg_gen_addi_ptr(g, tcg_env, pred_full_reg_offset(s, a->pg)); 3145 3146 if (a->s) { 3147 TCGv_i32 t = tcg_temp_new_i32(); 3148 fn_s(t, d, n, m, g, desc); 3149 do_pred_flags(t); 3150 } else { 3151 fn(d, n, m, g, desc); 3152 } 3153 return true; 3154 } 3155 3156 static bool do_brk2(DisasContext *s, arg_rpr_s *a, 3157 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s) 3158 { 3159 if (!sve_access_check(s)) { 3160 return true; 3161 } 3162 3163 unsigned vsz = pred_full_reg_size(s); 3164 3165 /* Predicate sizes may be smaller and cannot use simd_desc. */ 3166 TCGv_ptr d = tcg_temp_new_ptr(); 3167 TCGv_ptr n = tcg_temp_new_ptr(); 3168 TCGv_ptr g = tcg_temp_new_ptr(); 3169 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); 3170 3171 tcg_gen_addi_ptr(d, tcg_env, pred_full_reg_offset(s, a->rd)); 3172 tcg_gen_addi_ptr(n, tcg_env, pred_full_reg_offset(s, a->rn)); 3173 tcg_gen_addi_ptr(g, tcg_env, pred_full_reg_offset(s, a->pg)); 3174 3175 if (a->s) { 3176 TCGv_i32 t = tcg_temp_new_i32(); 3177 fn_s(t, d, n, g, desc); 3178 do_pred_flags(t); 3179 } else { 3180 fn(d, n, g, desc); 3181 } 3182 return true; 3183 } 3184 3185 TRANS_FEAT(BRKPA, aa64_sve, do_brk3, a, 3186 gen_helper_sve_brkpa, gen_helper_sve_brkpas) 3187 TRANS_FEAT(BRKPB, aa64_sve, do_brk3, a, 3188 gen_helper_sve_brkpb, gen_helper_sve_brkpbs) 3189 3190 TRANS_FEAT(BRKA_m, aa64_sve, do_brk2, a, 3191 gen_helper_sve_brka_m, gen_helper_sve_brkas_m) 3192 TRANS_FEAT(BRKB_m, aa64_sve, do_brk2, a, 3193 gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m) 3194 3195 TRANS_FEAT(BRKA_z, aa64_sve, do_brk2, a, 3196 gen_helper_sve_brka_z, gen_helper_sve_brkas_z) 3197 TRANS_FEAT(BRKB_z, aa64_sve, do_brk2, a, 3198 gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z) 3199 3200 TRANS_FEAT(BRKN, aa64_sve, do_brk2, a, 3201 gen_helper_sve_brkn, gen_helper_sve_brkns) 3202 3203 /* 3204 *** SVE Predicate Count Group 3205 */ 3206 3207 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg) 3208 { 3209 unsigned psz = pred_full_reg_size(s); 3210 3211 if (psz <= 8) { 3212 uint64_t psz_mask; 3213 3214 tcg_gen_ld_i64(val, tcg_env, pred_full_reg_offset(s, pn)); 3215 if (pn != pg) { 3216 TCGv_i64 g = tcg_temp_new_i64(); 3217 tcg_gen_ld_i64(g, tcg_env, pred_full_reg_offset(s, pg)); 3218 tcg_gen_and_i64(val, val, g); 3219 } 3220 3221 /* Reduce the pred_esz_masks value simply to reduce the 3222 * size of the code generated here. 3223 */ 3224 psz_mask = MAKE_64BIT_MASK(0, psz * 8); 3225 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask); 3226 3227 tcg_gen_ctpop_i64(val, val); 3228 } else { 3229 TCGv_ptr t_pn = tcg_temp_new_ptr(); 3230 TCGv_ptr t_pg = tcg_temp_new_ptr(); 3231 unsigned desc = 0; 3232 3233 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz); 3234 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); 3235 3236 tcg_gen_addi_ptr(t_pn, tcg_env, pred_full_reg_offset(s, pn)); 3237 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 3238 3239 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc)); 3240 } 3241 } 3242 3243 static bool trans_CNTP(DisasContext *s, arg_CNTP *a) 3244 { 3245 if (!dc_isar_feature(aa64_sve, s)) { 3246 return false; 3247 } 3248 if (sve_access_check(s)) { 3249 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg); 3250 } 3251 return true; 3252 } 3253 3254 static bool trans_CNTP_c(DisasContext *s, arg_CNTP_c *a) 3255 { 3256 TCGv_i32 t_png; 3257 uint32_t desc = 0; 3258 3259 if (dc_isar_feature(aa64_sve2p1, s)) { 3260 if (!sve_access_check(s)) { 3261 return true; 3262 } 3263 } else if (dc_isar_feature(aa64_sme2, s)) { 3264 if (!sme_sm_enabled_check(s)) { 3265 return true; 3266 } 3267 } else { 3268 return false; 3269 } 3270 3271 t_png = tcg_temp_new_i32(); 3272 tcg_gen_ld16u_i32(t_png, tcg_env, 3273 pred_full_reg_offset(s, a->rn) ^ 3274 (HOST_BIG_ENDIAN ? 6 : 0)); 3275 3276 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 3277 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3278 desc = FIELD_DP32(desc, PREDDESC, DATA, a->vl); 3279 3280 gen_helper_sve2p1_cntp_c(cpu_reg(s, a->rd), t_png, tcg_constant_i32(desc)); 3281 return true; 3282 } 3283 3284 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a) 3285 { 3286 if (!dc_isar_feature(aa64_sve, s)) { 3287 return false; 3288 } 3289 if (sve_access_check(s)) { 3290 TCGv_i64 reg = cpu_reg(s, a->rd); 3291 TCGv_i64 val = tcg_temp_new_i64(); 3292 3293 do_cntp(s, val, a->esz, a->pg, a->pg); 3294 if (a->d) { 3295 tcg_gen_sub_i64(reg, reg, val); 3296 } else { 3297 tcg_gen_add_i64(reg, reg, val); 3298 } 3299 } 3300 return true; 3301 } 3302 3303 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a) 3304 { 3305 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3306 return false; 3307 } 3308 if (sve_access_check(s)) { 3309 unsigned vsz = vec_full_reg_size(s); 3310 TCGv_i64 val = tcg_temp_new_i64(); 3311 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds; 3312 3313 do_cntp(s, val, a->esz, a->pg, a->pg); 3314 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 3315 vec_full_reg_offset(s, a->rn), val, vsz, vsz); 3316 } 3317 return true; 3318 } 3319 3320 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a) 3321 { 3322 if (!dc_isar_feature(aa64_sve, s)) { 3323 return false; 3324 } 3325 if (sve_access_check(s)) { 3326 TCGv_i64 reg = cpu_reg(s, a->rd); 3327 TCGv_i64 val = tcg_temp_new_i64(); 3328 3329 do_cntp(s, val, a->esz, a->pg, a->pg); 3330 do_sat_addsub_32(reg, val, a->u, a->d); 3331 } 3332 return true; 3333 } 3334 3335 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a) 3336 { 3337 if (!dc_isar_feature(aa64_sve, s)) { 3338 return false; 3339 } 3340 if (sve_access_check(s)) { 3341 TCGv_i64 reg = cpu_reg(s, a->rd); 3342 TCGv_i64 val = tcg_temp_new_i64(); 3343 3344 do_cntp(s, val, a->esz, a->pg, a->pg); 3345 do_sat_addsub_64(reg, val, a->u, a->d); 3346 } 3347 return true; 3348 } 3349 3350 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a) 3351 { 3352 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3353 return false; 3354 } 3355 if (sve_access_check(s)) { 3356 TCGv_i64 val = tcg_temp_new_i64(); 3357 do_cntp(s, val, a->esz, a->pg, a->pg); 3358 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d); 3359 } 3360 return true; 3361 } 3362 3363 /* 3364 *** SVE Integer Compare Scalars Group 3365 */ 3366 3367 static bool trans_CTERM(DisasContext *s, arg_CTERM *a) 3368 { 3369 if (!dc_isar_feature(aa64_sve, s)) { 3370 return false; 3371 } 3372 if (!sve_access_check(s)) { 3373 return true; 3374 } 3375 3376 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ); 3377 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf); 3378 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf); 3379 TCGv_i64 cmp = tcg_temp_new_i64(); 3380 3381 tcg_gen_setcond_i64(cond, cmp, rn, rm); 3382 tcg_gen_extrl_i64_i32(cpu_NF, cmp); 3383 3384 /* VF = !NF & !CF. */ 3385 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1); 3386 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF); 3387 3388 /* Both NF and VF actually look at bit 31. */ 3389 tcg_gen_neg_i32(cpu_NF, cpu_NF); 3390 tcg_gen_neg_i32(cpu_VF, cpu_VF); 3391 return true; 3392 } 3393 3394 typedef void gen_while_fn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32); 3395 static bool do_WHILE(DisasContext *s, arg_while *a, 3396 bool lt, int scale, int data, gen_while_fn *fn) 3397 { 3398 TCGv_i64 op0, op1, t0, t1, tmax; 3399 TCGv_i32 t2; 3400 TCGv_ptr ptr; 3401 unsigned vsz = vec_full_reg_size(s); 3402 unsigned desc = 0; 3403 TCGCond cond; 3404 uint64_t maxval; 3405 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */ 3406 bool eq = a->eq == lt; 3407 3408 if (!sve_access_check(s)) { 3409 return true; 3410 } 3411 3412 op0 = read_cpu_reg(s, a->rn, 1); 3413 op1 = read_cpu_reg(s, a->rm, 1); 3414 3415 if (!a->sf) { 3416 if (a->u) { 3417 tcg_gen_ext32u_i64(op0, op0); 3418 tcg_gen_ext32u_i64(op1, op1); 3419 } else { 3420 tcg_gen_ext32s_i64(op0, op0); 3421 tcg_gen_ext32s_i64(op1, op1); 3422 } 3423 } 3424 3425 /* For the helper, compress the different conditions into a computation 3426 * of how many iterations for which the condition is true. 3427 */ 3428 t0 = tcg_temp_new_i64(); 3429 t1 = tcg_temp_new_i64(); 3430 3431 if (lt) { 3432 tcg_gen_sub_i64(t0, op1, op0); 3433 if (a->u) { 3434 maxval = a->sf ? UINT64_MAX : UINT32_MAX; 3435 cond = eq ? TCG_COND_LEU : TCG_COND_LTU; 3436 } else { 3437 maxval = a->sf ? INT64_MAX : INT32_MAX; 3438 cond = eq ? TCG_COND_LE : TCG_COND_LT; 3439 } 3440 } else { 3441 tcg_gen_sub_i64(t0, op0, op1); 3442 if (a->u) { 3443 maxval = 0; 3444 cond = eq ? TCG_COND_GEU : TCG_COND_GTU; 3445 } else { 3446 maxval = a->sf ? INT64_MIN : INT32_MIN; 3447 cond = eq ? TCG_COND_GE : TCG_COND_GT; 3448 } 3449 } 3450 3451 tmax = tcg_constant_i64((vsz << scale) >> a->esz); 3452 if (eq) { 3453 /* Equality means one more iteration. */ 3454 tcg_gen_addi_i64(t0, t0, 1); 3455 3456 /* 3457 * For the less-than while, if op1 is maxval (and the only time 3458 * the addition above could overflow), then we produce an all-true 3459 * predicate by setting the count to the vector length. This is 3460 * because the pseudocode is described as an increment + compare 3461 * loop, and the maximum integer would always compare true. 3462 * Similarly, the greater-than while has the same issue with the 3463 * minimum integer due to the decrement + compare loop. 3464 */ 3465 tcg_gen_movi_i64(t1, maxval); 3466 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0); 3467 } 3468 3469 /* Bound to the maximum. */ 3470 tcg_gen_umin_i64(t0, t0, tmax); 3471 3472 /* Set the count to zero if the condition is false. */ 3473 tcg_gen_movi_i64(t1, 0); 3474 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1); 3475 3476 /* Since we're bounded, pass as a 32-bit type. */ 3477 t2 = tcg_temp_new_i32(); 3478 tcg_gen_extrl_i64_i32(t2, t0); 3479 3480 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); 3481 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3482 desc = FIELD_DP32(desc, PREDDESC, DATA, data); 3483 3484 ptr = tcg_temp_new_ptr(); 3485 tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd)); 3486 3487 fn(t2, ptr, t2, tcg_constant_i32(desc)); 3488 3489 do_pred_flags(t2); 3490 return true; 3491 } 3492 3493 TRANS_FEAT(WHILE_lt, aa64_sve, do_WHILE, 3494 a, true, 0, 0, gen_helper_sve_whilel) 3495 TRANS_FEAT(WHILE_gt, aa64_sve2, do_WHILE, 3496 a, false, 0, 0, gen_helper_sve_whileg) 3497 3498 TRANS_FEAT(WHILE_lt_pair, aa64_sme2_or_sve2p1, do_WHILE, 3499 a, true, 1, 0, gen_helper_sve_while2l) 3500 TRANS_FEAT(WHILE_gt_pair, aa64_sme2_or_sve2p1, do_WHILE, 3501 a, false, 1, 0, gen_helper_sve_while2g) 3502 3503 TRANS_FEAT(WHILE_lt_cnt2, aa64_sme2_or_sve2p1, do_WHILE, 3504 a, true, 1, 1, gen_helper_sve_whilecl) 3505 TRANS_FEAT(WHILE_lt_cnt4, aa64_sme2_or_sve2p1, do_WHILE, 3506 a, true, 2, 2, gen_helper_sve_whilecl) 3507 TRANS_FEAT(WHILE_gt_cnt2, aa64_sme2_or_sve2p1, do_WHILE, 3508 a, false, 1, 1, gen_helper_sve_whilecg) 3509 TRANS_FEAT(WHILE_gt_cnt4, aa64_sme2_or_sve2p1, do_WHILE, 3510 a, false, 2, 2, gen_helper_sve_whilecg) 3511 3512 static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) 3513 { 3514 TCGv_i64 op0, op1, diff, t1, tmax; 3515 TCGv_i32 t2; 3516 TCGv_ptr ptr; 3517 unsigned vsz = vec_full_reg_size(s); 3518 unsigned desc = 0; 3519 3520 if (!dc_isar_feature(aa64_sve2, s)) { 3521 return false; 3522 } 3523 if (!sve_access_check(s)) { 3524 return true; 3525 } 3526 3527 op0 = read_cpu_reg(s, a->rn, 1); 3528 op1 = read_cpu_reg(s, a->rm, 1); 3529 3530 tmax = tcg_constant_i64(vsz >> a->esz); 3531 diff = tcg_temp_new_i64(); 3532 3533 if (a->rw) { 3534 /* WHILERW */ 3535 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */ 3536 t1 = tcg_temp_new_i64(); 3537 tcg_gen_sub_i64(diff, op0, op1); 3538 tcg_gen_sub_i64(t1, op1, op0); 3539 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1); 3540 /* Divide, rounding down, by ESIZE. */ 3541 tcg_gen_shri_i64(diff, diff, a->esz); 3542 /* If op1 == op0, diff == 0, and the condition is always true. */ 3543 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff); 3544 } else { 3545 /* WHILEWR */ 3546 tcg_gen_sub_i64(diff, op1, op0); 3547 /* Divide, rounding down, by ESIZE. */ 3548 tcg_gen_shri_i64(diff, diff, a->esz); 3549 /* If op0 >= op1, diff <= 0, the condition is always true. */ 3550 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff); 3551 } 3552 3553 /* Bound to the maximum. */ 3554 tcg_gen_umin_i64(diff, diff, tmax); 3555 3556 /* Since we're bounded, pass as a 32-bit type. */ 3557 t2 = tcg_temp_new_i32(); 3558 tcg_gen_extrl_i64_i32(t2, diff); 3559 3560 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); 3561 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3562 3563 ptr = tcg_temp_new_ptr(); 3564 tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd)); 3565 3566 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc)); 3567 do_pred_flags(t2); 3568 return true; 3569 } 3570 3571 static bool do_pext(DisasContext *s, arg_pext *a, int n) 3572 { 3573 TCGv_i32 t_png; 3574 TCGv_ptr t_pd; 3575 int pl; 3576 3577 if (!sve_access_check(s)) { 3578 return true; 3579 } 3580 3581 t_png = tcg_temp_new_i32(); 3582 tcg_gen_ld16u_i32(t_png, tcg_env, 3583 pred_full_reg_offset(s, a->rn) ^ 3584 (HOST_BIG_ENDIAN ? 6 : 0)); 3585 3586 t_pd = tcg_temp_new_ptr(); 3587 pl = pred_full_reg_size(s); 3588 3589 for (int i = 0; i < n; ++i) { 3590 int rd = (a->rd + i) % 16; 3591 int part = a->imm * n + i; 3592 unsigned desc = 0; 3593 3594 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pl); 3595 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3596 desc = FIELD_DP32(desc, PREDDESC, DATA, part); 3597 3598 tcg_gen_addi_ptr(t_pd, tcg_env, pred_full_reg_offset(s, rd)); 3599 gen_helper_pext(t_pd, t_png, tcg_constant_i32(desc)); 3600 } 3601 return true; 3602 } 3603 3604 TRANS_FEAT(PEXT_1, aa64_sme2_or_sve2p1, do_pext, a, 1) 3605 TRANS_FEAT(PEXT_2, aa64_sme2_or_sve2p1, do_pext, a, 2) 3606 3607 /* 3608 *** SVE Integer Wide Immediate - Unpredicated Group 3609 */ 3610 3611 static bool trans_FDUP(DisasContext *s, arg_FDUP *a) 3612 { 3613 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3614 return false; 3615 } 3616 if (sve_access_check(s)) { 3617 unsigned vsz = vec_full_reg_size(s); 3618 int dofs = vec_full_reg_offset(s, a->rd); 3619 uint64_t imm; 3620 3621 /* Decode the VFP immediate. */ 3622 imm = vfp_expand_imm(a->esz, a->imm); 3623 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm); 3624 } 3625 return true; 3626 } 3627 3628 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a) 3629 { 3630 if (!dc_isar_feature(aa64_sve, s)) { 3631 return false; 3632 } 3633 if (sve_access_check(s)) { 3634 unsigned vsz = vec_full_reg_size(s); 3635 int dofs = vec_full_reg_offset(s, a->rd); 3636 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm); 3637 } 3638 return true; 3639 } 3640 3641 TRANS_FEAT(ADD_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_addi, a) 3642 3643 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a) 3644 { 3645 a->imm = -a->imm; 3646 return trans_ADD_zzi(s, a); 3647 } 3648 3649 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a) 3650 { 3651 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 }; 3652 static const GVecGen2s op[4] = { 3653 { .fni8 = tcg_gen_vec_sub8_i64, 3654 .fniv = tcg_gen_sub_vec, 3655 .fno = gen_helper_sve_subri_b, 3656 .opt_opc = vecop_list, 3657 .vece = MO_8, 3658 .scalar_first = true }, 3659 { .fni8 = tcg_gen_vec_sub16_i64, 3660 .fniv = tcg_gen_sub_vec, 3661 .fno = gen_helper_sve_subri_h, 3662 .opt_opc = vecop_list, 3663 .vece = MO_16, 3664 .scalar_first = true }, 3665 { .fni4 = tcg_gen_sub_i32, 3666 .fniv = tcg_gen_sub_vec, 3667 .fno = gen_helper_sve_subri_s, 3668 .opt_opc = vecop_list, 3669 .vece = MO_32, 3670 .scalar_first = true }, 3671 { .fni8 = tcg_gen_sub_i64, 3672 .fniv = tcg_gen_sub_vec, 3673 .fno = gen_helper_sve_subri_d, 3674 .opt_opc = vecop_list, 3675 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 3676 .vece = MO_64, 3677 .scalar_first = true } 3678 }; 3679 3680 if (!dc_isar_feature(aa64_sve, s)) { 3681 return false; 3682 } 3683 if (sve_access_check(s)) { 3684 unsigned vsz = vec_full_reg_size(s); 3685 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd), 3686 vec_full_reg_offset(s, a->rn), 3687 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]); 3688 } 3689 return true; 3690 } 3691 3692 TRANS_FEAT(MUL_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_muli, a) 3693 3694 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d) 3695 { 3696 if (sve_access_check(s)) { 3697 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, 3698 tcg_constant_i64(a->imm), u, d); 3699 } 3700 return true; 3701 } 3702 3703 TRANS_FEAT(SQADD_zzi, aa64_sve, do_zzi_sat, a, false, false) 3704 TRANS_FEAT(UQADD_zzi, aa64_sve, do_zzi_sat, a, true, false) 3705 TRANS_FEAT(SQSUB_zzi, aa64_sve, do_zzi_sat, a, false, true) 3706 TRANS_FEAT(UQSUB_zzi, aa64_sve, do_zzi_sat, a, true, true) 3707 3708 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn) 3709 { 3710 if (sve_access_check(s)) { 3711 unsigned vsz = vec_full_reg_size(s); 3712 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), 3713 vec_full_reg_offset(s, a->rn), 3714 tcg_constant_i64(a->imm), vsz, vsz, 0, fn); 3715 } 3716 return true; 3717 } 3718 3719 #define DO_ZZI(NAME, name) \ 3720 static gen_helper_gvec_2i * const name##i_fns[4] = { \ 3721 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \ 3722 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \ 3723 }; \ 3724 TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz]) 3725 3726 DO_ZZI(SMAX, smax) 3727 DO_ZZI(UMAX, umax) 3728 DO_ZZI(SMIN, smin) 3729 DO_ZZI(UMIN, umin) 3730 3731 #undef DO_ZZI 3732 3733 static gen_helper_gvec_4 * const dot_fns[2][2] = { 3734 { gen_helper_gvec_sdot_4b, gen_helper_gvec_sdot_4h }, 3735 { gen_helper_gvec_udot_4b, gen_helper_gvec_udot_4h } 3736 }; 3737 TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz, 3738 dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0) 3739 3740 /* 3741 * SVE Multiply - Indexed 3742 */ 3743 3744 TRANS_FEAT(SDOT_zzxw_4s, aa64_sve, gen_gvec_ool_arg_zzxz, 3745 gen_helper_gvec_sdot_idx_4b, a) 3746 TRANS_FEAT(SDOT_zzxw_4d, aa64_sve, gen_gvec_ool_arg_zzxz, 3747 gen_helper_gvec_sdot_idx_4h, a) 3748 TRANS_FEAT(UDOT_zzxw_4s, aa64_sve, gen_gvec_ool_arg_zzxz, 3749 gen_helper_gvec_udot_idx_4b, a) 3750 TRANS_FEAT(UDOT_zzxw_4d, aa64_sve, gen_gvec_ool_arg_zzxz, 3751 gen_helper_gvec_udot_idx_4h, a) 3752 3753 TRANS_FEAT(SUDOT_zzxw_4s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, 3754 gen_helper_gvec_sudot_idx_4b, a) 3755 TRANS_FEAT(USDOT_zzxw_4s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, 3756 gen_helper_gvec_usdot_idx_4b, a) 3757 3758 TRANS_FEAT(SDOT_zzxw_2s, aa64_sme2_or_sve2p1, gen_gvec_ool_arg_zzxz, 3759 gen_helper_gvec_sdot_idx_2h, a) 3760 TRANS_FEAT(UDOT_zzxw_2s, aa64_sme2_or_sve2p1, gen_gvec_ool_arg_zzxz, 3761 gen_helper_gvec_udot_idx_2h, a) 3762 3763 #define DO_SVE2_RRX(NAME, FUNC) \ 3764 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ 3765 a->rd, a->rn, a->rm, a->index) 3766 3767 DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h) 3768 DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s) 3769 DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d) 3770 3771 DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h) 3772 DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s) 3773 DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d) 3774 3775 DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h) 3776 DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s) 3777 DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d) 3778 3779 #undef DO_SVE2_RRX 3780 3781 #define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \ 3782 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ 3783 a->rd, a->rn, a->rm, (a->index << 1) | TOP) 3784 3785 DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false) 3786 DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false) 3787 DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true) 3788 DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true) 3789 3790 DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false) 3791 DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false) 3792 DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true) 3793 DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true) 3794 3795 DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false) 3796 DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false) 3797 DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true) 3798 DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true) 3799 3800 #undef DO_SVE2_RRX_TB 3801 3802 #define DO_SVE2_RRXR(NAME, FUNC) \ 3803 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a) 3804 3805 DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h) 3806 DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s) 3807 DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d) 3808 3809 DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h) 3810 DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s) 3811 DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d) 3812 3813 DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h) 3814 DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s) 3815 DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d) 3816 3817 DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h) 3818 DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s) 3819 DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d) 3820 3821 #undef DO_SVE2_RRXR 3822 3823 #define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \ 3824 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \ 3825 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP) 3826 3827 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false) 3828 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false) 3829 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true) 3830 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true) 3831 3832 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false) 3833 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false) 3834 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true) 3835 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true) 3836 3837 DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false) 3838 DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false) 3839 DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true) 3840 DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true) 3841 3842 DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false) 3843 DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false) 3844 DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true) 3845 DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true) 3846 3847 DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false) 3848 DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false) 3849 DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true) 3850 DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true) 3851 3852 DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false) 3853 DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false) 3854 DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true) 3855 DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true) 3856 3857 #undef DO_SVE2_RRXR_TB 3858 3859 #define DO_SVE2_RRXR_ROT(NAME, FUNC) \ 3860 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \ 3861 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot) 3862 3863 DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h) 3864 DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s) 3865 3866 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h) 3867 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s) 3868 3869 DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s) 3870 DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d) 3871 3872 #undef DO_SVE2_RRXR_ROT 3873 3874 /* 3875 *** SVE Floating Point Multiply-Add Indexed Group 3876 */ 3877 3878 static gen_helper_gvec_4_ptr * const fmla_idx_fns[4] = { 3879 NULL, gen_helper_gvec_fmla_idx_h, 3880 gen_helper_gvec_fmla_idx_s, gen_helper_gvec_fmla_idx_d 3881 }; 3882 TRANS_FEAT(FMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, 3883 fmla_idx_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->index, 3884 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 3885 3886 static gen_helper_gvec_4_ptr * const fmls_idx_fns[4][2] = { 3887 { NULL, NULL }, 3888 { gen_helper_gvec_fmls_idx_h, gen_helper_gvec_ah_fmls_idx_h }, 3889 { gen_helper_gvec_fmls_idx_s, gen_helper_gvec_ah_fmls_idx_s }, 3890 { gen_helper_gvec_fmls_idx_d, gen_helper_gvec_ah_fmls_idx_d }, 3891 }; 3892 TRANS_FEAT(FMLS_zzxz, aa64_sve, gen_gvec_fpst_zzzz, 3893 fmls_idx_fns[a->esz][s->fpcr_ah], 3894 a->rd, a->rn, a->rm, a->ra, a->index, 3895 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 3896 3897 /* 3898 *** SVE Floating Point Multiply Indexed Group 3899 */ 3900 3901 static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = { 3902 NULL, gen_helper_gvec_fmul_idx_h, 3903 gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d, 3904 }; 3905 TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz, 3906 fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index, 3907 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 3908 3909 /* 3910 *** SVE Floating Point Fast Reduction Group 3911 */ 3912 3913 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr, 3914 TCGv_ptr, TCGv_i32); 3915 3916 static bool do_reduce(DisasContext *s, arg_rpr_esz *a, 3917 gen_helper_fp_reduce *fn) 3918 { 3919 unsigned vsz, p2vsz; 3920 TCGv_i32 t_desc; 3921 TCGv_ptr t_zn, t_pg, status; 3922 TCGv_i64 temp; 3923 3924 if (fn == NULL) { 3925 return false; 3926 } 3927 if (!sve_access_check(s)) { 3928 return true; 3929 } 3930 3931 vsz = vec_full_reg_size(s); 3932 p2vsz = pow2ceil(vsz); 3933 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz)); 3934 temp = tcg_temp_new_i64(); 3935 t_zn = tcg_temp_new_ptr(); 3936 t_pg = tcg_temp_new_ptr(); 3937 3938 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); 3939 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); 3940 status = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 3941 3942 fn(temp, t_zn, t_pg, status, t_desc); 3943 3944 write_fp_dreg(s, a->rd, temp); 3945 return true; 3946 } 3947 3948 #define DO_VPZ(NAME, name) \ 3949 static gen_helper_fp_reduce * const name##_fns[4] = { \ 3950 NULL, gen_helper_sve_##name##_h, \ 3951 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 3952 }; \ 3953 TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz]) 3954 3955 #define DO_VPZ_AH(NAME, name) \ 3956 static gen_helper_fp_reduce * const name##_fns[4] = { \ 3957 NULL, gen_helper_sve_##name##_h, \ 3958 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 3959 }; \ 3960 static gen_helper_fp_reduce * const name##_ah_fns[4] = { \ 3961 NULL, gen_helper_sve_ah_##name##_h, \ 3962 gen_helper_sve_ah_##name##_s, gen_helper_sve_ah_##name##_d, \ 3963 }; \ 3964 TRANS_FEAT(NAME, aa64_sve, do_reduce, a, \ 3965 s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz]) 3966 3967 DO_VPZ(FADDV, faddv) 3968 DO_VPZ(FMINNMV, fminnmv) 3969 DO_VPZ(FMAXNMV, fmaxnmv) 3970 DO_VPZ_AH(FMINV, fminv) 3971 DO_VPZ_AH(FMAXV, fmaxv) 3972 3973 #undef DO_VPZ 3974 3975 static gen_helper_gvec_3_ptr * const faddqv_fns[4] = { 3976 NULL, gen_helper_sve2p1_faddqv_h, 3977 gen_helper_sve2p1_faddqv_s, gen_helper_sve2p1_faddqv_d, 3978 }; 3979 TRANS_FEAT(FADDQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz, 3980 faddqv_fns[a->esz], a, 0, 3981 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 3982 3983 static gen_helper_gvec_3_ptr * const fmaxnmqv_fns[4] = { 3984 NULL, gen_helper_sve2p1_fmaxnmqv_h, 3985 gen_helper_sve2p1_fmaxnmqv_s, gen_helper_sve2p1_fmaxnmqv_d, 3986 }; 3987 TRANS_FEAT(FMAXNMQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz, 3988 fmaxnmqv_fns[a->esz], a, 0, 3989 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 3990 3991 static gen_helper_gvec_3_ptr * const fminnmqv_fns[4] = { 3992 NULL, gen_helper_sve2p1_fminnmqv_h, 3993 gen_helper_sve2p1_fminnmqv_s, gen_helper_sve2p1_fminnmqv_d, 3994 }; 3995 TRANS_FEAT(FMINNMQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz, 3996 fminnmqv_fns[a->esz], a, 0, 3997 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 3998 3999 static gen_helper_gvec_3_ptr * const fmaxqv_fns[4] = { 4000 NULL, gen_helper_sve2p1_fmaxqv_h, 4001 gen_helper_sve2p1_fmaxqv_s, gen_helper_sve2p1_fmaxqv_d, 4002 }; 4003 static gen_helper_gvec_3_ptr * const fmaxqv_ah_fns[4] = { 4004 NULL, gen_helper_sve2p1_ah_fmaxqv_h, 4005 gen_helper_sve2p1_ah_fmaxqv_s, gen_helper_sve2p1_ah_fmaxqv_d, 4006 }; 4007 TRANS_FEAT(FMAXQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz, 4008 (s->fpcr_ah ? fmaxqv_fns : fmaxqv_ah_fns)[a->esz], a, 0, 4009 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 4010 4011 static gen_helper_gvec_3_ptr * const fminqv_fns[4] = { 4012 NULL, gen_helper_sve2p1_fminqv_h, 4013 gen_helper_sve2p1_fminqv_s, gen_helper_sve2p1_fminqv_d, 4014 }; 4015 static gen_helper_gvec_3_ptr * const fminqv_ah_fns[4] = { 4016 NULL, gen_helper_sve2p1_ah_fminqv_h, 4017 gen_helper_sve2p1_ah_fminqv_s, gen_helper_sve2p1_ah_fminqv_d, 4018 }; 4019 TRANS_FEAT(FMINQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz, 4020 (s->fpcr_ah ? fminqv_fns : fminqv_ah_fns)[a->esz], a, 0, 4021 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 4022 4023 /* 4024 *** SVE Floating Point Unary Operations - Unpredicated Group 4025 */ 4026 4027 static gen_helper_gvec_2_ptr * const frecpe_fns[] = { 4028 NULL, gen_helper_gvec_frecpe_h, 4029 gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d, 4030 }; 4031 static gen_helper_gvec_2_ptr * const frecpe_rpres_fns[] = { 4032 NULL, gen_helper_gvec_frecpe_h, 4033 gen_helper_gvec_frecpe_rpres_s, gen_helper_gvec_frecpe_d, 4034 }; 4035 TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz, 4036 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? 4037 frecpe_rpres_fns[a->esz] : frecpe_fns[a->esz], a, 0) 4038 4039 static gen_helper_gvec_2_ptr * const frsqrte_fns[] = { 4040 NULL, gen_helper_gvec_frsqrte_h, 4041 gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d, 4042 }; 4043 static gen_helper_gvec_2_ptr * const frsqrte_rpres_fns[] = { 4044 NULL, gen_helper_gvec_frsqrte_h, 4045 gen_helper_gvec_frsqrte_rpres_s, gen_helper_gvec_frsqrte_d, 4046 }; 4047 TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz, 4048 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? 4049 frsqrte_rpres_fns[a->esz] : frsqrte_fns[a->esz], a, 0) 4050 4051 /* 4052 *** SVE Floating Point Compare with Zero Group 4053 */ 4054 4055 static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a, 4056 gen_helper_gvec_3_ptr *fn) 4057 { 4058 if (fn == NULL) { 4059 return false; 4060 } 4061 if (sve_access_check(s)) { 4062 unsigned vsz = vec_full_reg_size(s); 4063 TCGv_ptr status = 4064 fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 4065 4066 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd), 4067 vec_full_reg_offset(s, a->rn), 4068 pred_full_reg_offset(s, a->pg), 4069 status, vsz, vsz, 0, fn); 4070 } 4071 return true; 4072 } 4073 4074 #define DO_PPZ(NAME, name) \ 4075 static gen_helper_gvec_3_ptr * const name##_fns[] = { \ 4076 NULL, gen_helper_sve_##name##_h, \ 4077 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 4078 }; \ 4079 TRANS_FEAT(NAME, aa64_sve, do_ppz_fp, a, name##_fns[a->esz]) 4080 4081 DO_PPZ(FCMGE_ppz0, fcmge0) 4082 DO_PPZ(FCMGT_ppz0, fcmgt0) 4083 DO_PPZ(FCMLE_ppz0, fcmle0) 4084 DO_PPZ(FCMLT_ppz0, fcmlt0) 4085 DO_PPZ(FCMEQ_ppz0, fcmeq0) 4086 DO_PPZ(FCMNE_ppz0, fcmne0) 4087 4088 #undef DO_PPZ 4089 4090 /* 4091 *** SVE floating-point trig multiply-add coefficient 4092 */ 4093 4094 static gen_helper_gvec_3_ptr * const ftmad_fns[4] = { 4095 NULL, gen_helper_sve_ftmad_h, 4096 gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d, 4097 }; 4098 TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz, 4099 ftmad_fns[a->esz], a->rd, a->rn, a->rm, 4100 a->imm | (s->fpcr_ah << 3), 4101 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 4102 4103 /* 4104 *** SVE Floating Point Accumulating Reduction Group 4105 */ 4106 4107 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) 4108 { 4109 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr, 4110 TCGv_ptr, TCGv_ptr, TCGv_i32); 4111 static fadda_fn * const fns[3] = { 4112 gen_helper_sve_fadda_h, 4113 gen_helper_sve_fadda_s, 4114 gen_helper_sve_fadda_d, 4115 }; 4116 unsigned vsz = vec_full_reg_size(s); 4117 TCGv_ptr t_rm, t_pg, t_fpst; 4118 TCGv_i64 t_val; 4119 TCGv_i32 t_desc; 4120 4121 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 4122 return false; 4123 } 4124 s->is_nonstreaming = true; 4125 if (!sve_access_check(s)) { 4126 return true; 4127 } 4128 4129 t_val = load_esz(tcg_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz); 4130 t_rm = tcg_temp_new_ptr(); 4131 t_pg = tcg_temp_new_ptr(); 4132 tcg_gen_addi_ptr(t_rm, tcg_env, vec_full_reg_offset(s, a->rm)); 4133 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); 4134 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 4135 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 4136 4137 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc); 4138 4139 write_fp_dreg(s, a->rd, t_val); 4140 return true; 4141 } 4142 4143 /* 4144 *** SVE Floating Point Arithmetic - Unpredicated Group 4145 */ 4146 4147 #define DO_FP3(NAME, name) \ 4148 static gen_helper_gvec_3_ptr * const name##_fns[4] = { \ 4149 NULL, gen_helper_gvec_##name##_h, \ 4150 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ 4151 }; \ 4152 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0) 4153 4154 #define DO_FP3_AH(NAME, name) \ 4155 static gen_helper_gvec_3_ptr * const name##_fns[4] = { \ 4156 NULL, gen_helper_gvec_##name##_h, \ 4157 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ 4158 }; \ 4159 static gen_helper_gvec_3_ptr * const name##_ah_fns[4] = { \ 4160 NULL, gen_helper_gvec_ah_##name##_h, \ 4161 gen_helper_gvec_ah_##name##_s, gen_helper_gvec_ah_##name##_d \ 4162 }; \ 4163 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, \ 4164 s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], a, 0) 4165 4166 DO_FP3(FADD_zzz, fadd) 4167 DO_FP3(FSUB_zzz, fsub) 4168 DO_FP3(FMUL_zzz, fmul) 4169 DO_FP3_AH(FRECPS, recps) 4170 DO_FP3_AH(FRSQRTS, rsqrts) 4171 4172 #undef DO_FP3 4173 4174 static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = { 4175 NULL, gen_helper_gvec_ftsmul_h, 4176 gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d 4177 }; 4178 TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz, 4179 ftsmul_fns[a->esz], a, 0) 4180 4181 /* 4182 *** SVE Floating Point Arithmetic - Predicated Group 4183 */ 4184 4185 #define DO_ZPZZ_FP(NAME, FEAT, name) \ 4186 static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ 4187 NULL, gen_helper_##name##_h, \ 4188 gen_helper_##name##_s, gen_helper_##name##_d \ 4189 }; \ 4190 TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a) 4191 4192 #define DO_ZPZZ_AH_FP(NAME, FEAT, name, ah_name) \ 4193 static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ 4194 NULL, gen_helper_##name##_h, \ 4195 gen_helper_##name##_s, gen_helper_##name##_d \ 4196 }; \ 4197 static gen_helper_gvec_4_ptr * const name##_ah_zpzz_fns[4] = { \ 4198 NULL, gen_helper_##ah_name##_h, \ 4199 gen_helper_##ah_name##_s, gen_helper_##ah_name##_d \ 4200 }; \ 4201 TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, \ 4202 s->fpcr_ah ? name##_ah_zpzz_fns[a->esz] : \ 4203 name##_zpzz_fns[a->esz], a) 4204 4205 DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd) 4206 DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub) 4207 DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul) 4208 DO_ZPZZ_AH_FP(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin) 4209 DO_ZPZZ_AH_FP(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax) 4210 DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) 4211 DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) 4212 DO_ZPZZ_AH_FP(FABD, aa64_sve, sve_fabd, sve_ah_fabd) 4213 DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn) 4214 DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv) 4215 DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx) 4216 4217 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr, 4218 TCGv_i64, TCGv_ptr, TCGv_i32); 4219 4220 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16, 4221 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn) 4222 { 4223 unsigned vsz = vec_full_reg_size(s); 4224 TCGv_ptr t_zd, t_zn, t_pg, status; 4225 TCGv_i32 desc; 4226 4227 t_zd = tcg_temp_new_ptr(); 4228 t_zn = tcg_temp_new_ptr(); 4229 t_pg = tcg_temp_new_ptr(); 4230 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, zd)); 4231 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, zn)); 4232 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 4233 4234 status = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64); 4235 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 4236 fn(t_zd, t_zn, t_pg, scalar, status, desc); 4237 } 4238 4239 static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm, 4240 gen_helper_sve_fp2scalar *fn) 4241 { 4242 if (fn == NULL) { 4243 return false; 4244 } 4245 if (sve_access_check(s)) { 4246 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, 4247 tcg_constant_i64(imm), fn); 4248 } 4249 return true; 4250 } 4251 4252 #define DO_FP_IMM(NAME, name, const0, const1) \ 4253 static gen_helper_sve_fp2scalar * const name##_fns[4] = { \ 4254 NULL, gen_helper_sve_##name##_h, \ 4255 gen_helper_sve_##name##_s, \ 4256 gen_helper_sve_##name##_d \ 4257 }; \ 4258 static uint64_t const name##_const[4][2] = { \ 4259 { -1, -1 }, \ 4260 { float16_##const0, float16_##const1 }, \ 4261 { float32_##const0, float32_##const1 }, \ 4262 { float64_##const0, float64_##const1 }, \ 4263 }; \ 4264 TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ 4265 name##_const[a->esz][a->imm], name##_fns[a->esz]) 4266 4267 #define DO_FP_AH_IMM(NAME, name, const0, const1) \ 4268 static gen_helper_sve_fp2scalar * const name##_fns[4] = { \ 4269 NULL, gen_helper_sve_##name##_h, \ 4270 gen_helper_sve_##name##_s, \ 4271 gen_helper_sve_##name##_d \ 4272 }; \ 4273 static gen_helper_sve_fp2scalar * const name##_ah_fns[4] = { \ 4274 NULL, gen_helper_sve_ah_##name##_h, \ 4275 gen_helper_sve_ah_##name##_s, \ 4276 gen_helper_sve_ah_##name##_d \ 4277 }; \ 4278 static uint64_t const name##_const[4][2] = { \ 4279 { -1, -1 }, \ 4280 { float16_##const0, float16_##const1 }, \ 4281 { float32_##const0, float32_##const1 }, \ 4282 { float64_##const0, float64_##const1 }, \ 4283 }; \ 4284 TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ 4285 name##_const[a->esz][a->imm], \ 4286 s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz]) 4287 4288 DO_FP_IMM(FADD, fadds, half, one) 4289 DO_FP_IMM(FSUB, fsubs, half, one) 4290 DO_FP_IMM(FMUL, fmuls, half, two) 4291 DO_FP_IMM(FSUBR, fsubrs, half, one) 4292 DO_FP_IMM(FMAXNM, fmaxnms, zero, one) 4293 DO_FP_IMM(FMINNM, fminnms, zero, one) 4294 DO_FP_AH_IMM(FMAX, fmaxs, zero, one) 4295 DO_FP_AH_IMM(FMIN, fmins, zero, one) 4296 4297 #undef DO_FP_IMM 4298 4299 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a, 4300 gen_helper_gvec_4_ptr *fn) 4301 { 4302 if (fn == NULL) { 4303 return false; 4304 } 4305 if (sve_access_check(s)) { 4306 unsigned vsz = vec_full_reg_size(s); 4307 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 4308 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd), 4309 vec_full_reg_offset(s, a->rn), 4310 vec_full_reg_offset(s, a->rm), 4311 pred_full_reg_offset(s, a->pg), 4312 status, vsz, vsz, 0, fn); 4313 } 4314 return true; 4315 } 4316 4317 #define DO_FPCMP(NAME, name) \ 4318 static gen_helper_gvec_4_ptr * const name##_fns[4] = { \ 4319 NULL, gen_helper_sve_##name##_h, \ 4320 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 4321 }; \ 4322 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_fp_cmp, a, name##_fns[a->esz]) 4323 4324 DO_FPCMP(FCMGE, fcmge) 4325 DO_FPCMP(FCMGT, fcmgt) 4326 DO_FPCMP(FCMEQ, fcmeq) 4327 DO_FPCMP(FCMNE, fcmne) 4328 DO_FPCMP(FCMUO, fcmuo) 4329 DO_FPCMP(FACGE, facge) 4330 DO_FPCMP(FACGT, facgt) 4331 4332 #undef DO_FPCMP 4333 4334 static gen_helper_gvec_4_ptr * const fcadd_fns[] = { 4335 NULL, gen_helper_sve_fcadd_h, 4336 gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d, 4337 }; 4338 TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], 4339 a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1), 4340 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 4341 4342 #define DO_FMLA(NAME, name, ah_name) \ 4343 static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ 4344 NULL, gen_helper_sve_##name##_h, \ 4345 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 4346 }; \ 4347 static gen_helper_gvec_5_ptr * const name##_ah_fns[4] = { \ 4348 NULL, gen_helper_sve_##ah_name##_h, \ 4349 gen_helper_sve_##ah_name##_s, gen_helper_sve_##ah_name##_d \ 4350 }; \ 4351 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \ 4352 s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \ 4353 a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ 4354 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 4355 4356 /* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */ 4357 DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz) 4358 DO_FMLA(FMLS_zpzzz, fmls_zpzzz, ah_fmls_zpzzz) 4359 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz, ah_fnmla_zpzzz) 4360 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz, ah_fnmls_zpzzz) 4361 4362 #undef DO_FMLA 4363 4364 static gen_helper_gvec_5_ptr * const fcmla_fns[4] = { 4365 NULL, gen_helper_sve_fcmla_zpzzz_h, 4366 gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d, 4367 }; 4368 TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz], 4369 a->rd, a->rn, a->rm, a->ra, a->pg, a->rot | (s->fpcr_ah << 2), 4370 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 4371 4372 static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = { 4373 NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL 4374 }; 4375 TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz], 4376 a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot, 4377 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 4378 4379 /* 4380 *** SVE Floating Point Unary Operations Predicated Group 4381 */ 4382 4383 TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4384 gen_helper_sve_fcvt_sh, a, 0, FPST_A64) 4385 TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4386 gen_helper_sve_fcvt_hs, a, 0, FPST_A64_F16) 4387 4388 TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, 4389 gen_helper_sve_bfcvt, a, 0, 4390 s->fpcr_ah ? FPST_AH : FPST_A64) 4391 4392 TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4393 gen_helper_sve_fcvt_dh, a, 0, FPST_A64) 4394 TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4395 gen_helper_sve_fcvt_hd, a, 0, FPST_A64_F16) 4396 TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4397 gen_helper_sve_fcvt_ds, a, 0, FPST_A64) 4398 TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4399 gen_helper_sve_fcvt_sd, a, 0, FPST_A64) 4400 4401 TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4402 gen_helper_sve_fcvtzs_hh, a, 0, FPST_A64_F16) 4403 TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4404 gen_helper_sve_fcvtzu_hh, a, 0, FPST_A64_F16) 4405 TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4406 gen_helper_sve_fcvtzs_hs, a, 0, FPST_A64_F16) 4407 TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4408 gen_helper_sve_fcvtzu_hs, a, 0, FPST_A64_F16) 4409 TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4410 gen_helper_sve_fcvtzs_hd, a, 0, FPST_A64_F16) 4411 TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4412 gen_helper_sve_fcvtzu_hd, a, 0, FPST_A64_F16) 4413 4414 TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4415 gen_helper_sve_fcvtzs_ss, a, 0, FPST_A64) 4416 TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4417 gen_helper_sve_fcvtzu_ss, a, 0, FPST_A64) 4418 TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4419 gen_helper_sve_fcvtzs_sd, a, 0, FPST_A64) 4420 TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4421 gen_helper_sve_fcvtzu_sd, a, 0, FPST_A64) 4422 TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4423 gen_helper_sve_fcvtzs_ds, a, 0, FPST_A64) 4424 TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4425 gen_helper_sve_fcvtzu_ds, a, 0, FPST_A64) 4426 4427 TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4428 gen_helper_sve_fcvtzs_dd, a, 0, FPST_A64) 4429 TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4430 gen_helper_sve_fcvtzu_dd, a, 0, FPST_A64) 4431 4432 static gen_helper_gvec_3_ptr * const frint_fns[] = { 4433 NULL, 4434 gen_helper_sve_frint_h, 4435 gen_helper_sve_frint_s, 4436 gen_helper_sve_frint_d 4437 }; 4438 TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz], 4439 a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 4440 4441 static gen_helper_gvec_3_ptr * const frintx_fns[] = { 4442 NULL, 4443 gen_helper_sve_frintx_h, 4444 gen_helper_sve_frintx_s, 4445 gen_helper_sve_frintx_d 4446 }; 4447 TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz], 4448 a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 4449 4450 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, 4451 ARMFPRounding mode, gen_helper_gvec_3_ptr *fn) 4452 { 4453 unsigned vsz; 4454 TCGv_i32 tmode; 4455 TCGv_ptr status; 4456 4457 if (fn == NULL) { 4458 return false; 4459 } 4460 if (!sve_access_check(s)) { 4461 return true; 4462 } 4463 4464 vsz = vec_full_reg_size(s); 4465 status = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 4466 tmode = gen_set_rmode(mode, status); 4467 4468 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 4469 vec_full_reg_offset(s, a->rn), 4470 pred_full_reg_offset(s, a->pg), 4471 status, vsz, vsz, 0, fn); 4472 4473 gen_restore_rmode(tmode, status); 4474 return true; 4475 } 4476 4477 TRANS_FEAT(FRINTN, aa64_sve, do_frint_mode, a, 4478 FPROUNDING_TIEEVEN, frint_fns[a->esz]) 4479 TRANS_FEAT(FRINTP, aa64_sve, do_frint_mode, a, 4480 FPROUNDING_POSINF, frint_fns[a->esz]) 4481 TRANS_FEAT(FRINTM, aa64_sve, do_frint_mode, a, 4482 FPROUNDING_NEGINF, frint_fns[a->esz]) 4483 TRANS_FEAT(FRINTZ, aa64_sve, do_frint_mode, a, 4484 FPROUNDING_ZERO, frint_fns[a->esz]) 4485 TRANS_FEAT(FRINTA, aa64_sve, do_frint_mode, a, 4486 FPROUNDING_TIEAWAY, frint_fns[a->esz]) 4487 4488 static gen_helper_gvec_3_ptr * const frecpx_fns[] = { 4489 NULL, gen_helper_sve_frecpx_h, 4490 gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d, 4491 }; 4492 TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz], 4493 a, 0, select_ah_fpst(s, a->esz)) 4494 4495 static gen_helper_gvec_3_ptr * const fsqrt_fns[] = { 4496 NULL, gen_helper_sve_fsqrt_h, 4497 gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d, 4498 }; 4499 TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz], 4500 a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 4501 4502 TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4503 gen_helper_sve_scvt_hh, a, 0, FPST_A64_F16) 4504 TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4505 gen_helper_sve_scvt_sh, a, 0, FPST_A64_F16) 4506 TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4507 gen_helper_sve_scvt_dh, a, 0, FPST_A64_F16) 4508 4509 TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4510 gen_helper_sve_scvt_ss, a, 0, FPST_A64) 4511 TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4512 gen_helper_sve_scvt_ds, a, 0, FPST_A64) 4513 4514 TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4515 gen_helper_sve_scvt_sd, a, 0, FPST_A64) 4516 TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4517 gen_helper_sve_scvt_dd, a, 0, FPST_A64) 4518 4519 TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4520 gen_helper_sve_ucvt_hh, a, 0, FPST_A64_F16) 4521 TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4522 gen_helper_sve_ucvt_sh, a, 0, FPST_A64_F16) 4523 TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4524 gen_helper_sve_ucvt_dh, a, 0, FPST_A64_F16) 4525 4526 TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4527 gen_helper_sve_ucvt_ss, a, 0, FPST_A64) 4528 TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4529 gen_helper_sve_ucvt_ds, a, 0, FPST_A64) 4530 TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4531 gen_helper_sve_ucvt_sd, a, 0, FPST_A64) 4532 4533 TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4534 gen_helper_sve_ucvt_dd, a, 0, FPST_A64) 4535 4536 /* 4537 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group 4538 */ 4539 4540 /* Subroutine loading a vector register at VOFS of LEN bytes. 4541 * The load should begin at the address Rn + IMM. 4542 */ 4543 4544 void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, 4545 int len, int rn, int imm, MemOp align) 4546 { 4547 int len_align = QEMU_ALIGN_DOWN(len, 16); 4548 int len_remain = len % 16; 4549 int nparts = len / 16 + ctpop8(len_remain); 4550 int midx = get_mem_index(s); 4551 TCGv_i64 dirty_addr, clean_addr, t0, t1; 4552 TCGv_i128 t16; 4553 4554 dirty_addr = tcg_temp_new_i64(); 4555 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); 4556 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); 4557 4558 /* 4559 * Note that unpredicated load/store of vector/predicate registers 4560 * are defined as a stream of bytes, which equates to little-endian 4561 * operations on larger quantities. 4562 * Attempt to keep code expansion to a minimum by limiting the 4563 * amount of unrolling done. 4564 */ 4565 if (nparts <= 4) { 4566 int i; 4567 4568 t0 = tcg_temp_new_i64(); 4569 t1 = tcg_temp_new_i64(); 4570 t16 = tcg_temp_new_i128(); 4571 4572 for (i = 0; i < len_align; i += 16) { 4573 tcg_gen_qemu_ld_i128(t16, clean_addr, midx, 4574 MO_LE | MO_128 | MO_ATOM_NONE | align); 4575 tcg_gen_extr_i128_i64(t0, t1, t16); 4576 tcg_gen_st_i64(t0, base, vofs + i); 4577 tcg_gen_st_i64(t1, base, vofs + i + 8); 4578 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4579 } 4580 if (len_align) { 4581 align = MO_UNALN; 4582 } 4583 } else { 4584 TCGLabel *loop = gen_new_label(); 4585 TCGv_ptr tp, i = tcg_temp_new_ptr(); 4586 4587 tcg_gen_movi_ptr(i, 0); 4588 gen_set_label(loop); 4589 4590 t16 = tcg_temp_new_i128(); 4591 tcg_gen_qemu_ld_i128(t16, clean_addr, midx, 4592 MO_LE | MO_128 | MO_ATOM_NONE | align); 4593 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4594 4595 tp = tcg_temp_new_ptr(); 4596 tcg_gen_add_ptr(tp, base, i); 4597 tcg_gen_addi_ptr(i, i, 16); 4598 4599 t0 = tcg_temp_new_i64(); 4600 t1 = tcg_temp_new_i64(); 4601 tcg_gen_extr_i128_i64(t0, t1, t16); 4602 4603 tcg_gen_st_i64(t0, tp, vofs); 4604 tcg_gen_st_i64(t1, tp, vofs + 8); 4605 4606 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); 4607 align = MO_UNALN; 4608 } 4609 4610 /* 4611 * Predicate register loads can be any multiple of 2. 4612 * Note that we still store the entire 64-bit unit into tcg_env. 4613 */ 4614 if (len_remain >= 8) { 4615 t0 = tcg_temp_new_i64(); 4616 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, 4617 MO_LEUQ | MO_ATOM_NONE | align); 4618 align = MO_UNALN; 4619 tcg_gen_st_i64(t0, base, vofs + len_align); 4620 len_remain -= 8; 4621 len_align += 8; 4622 if (len_remain) { 4623 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4624 } 4625 } 4626 if (len_remain) { 4627 t0 = tcg_temp_new_i64(); 4628 switch (len_remain) { 4629 case 2: 4630 case 4: 4631 case 8: 4632 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, 4633 MO_LE | ctz32(len_remain) 4634 | MO_ATOM_NONE | align); 4635 break; 4636 4637 case 6: 4638 t1 = tcg_temp_new_i64(); 4639 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, 4640 MO_LEUL | MO_ATOM_NONE | align); 4641 tcg_gen_addi_i64(clean_addr, clean_addr, 4); 4642 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW | MO_ATOM_NONE); 4643 tcg_gen_deposit_i64(t0, t0, t1, 32, 32); 4644 break; 4645 4646 default: 4647 g_assert_not_reached(); 4648 } 4649 tcg_gen_st_i64(t0, base, vofs + len_align); 4650 } 4651 } 4652 4653 /* Similarly for stores. */ 4654 void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs, 4655 int len, int rn, int imm, MemOp align) 4656 { 4657 int len_align = QEMU_ALIGN_DOWN(len, 16); 4658 int len_remain = len % 16; 4659 int nparts = len / 16 + ctpop8(len_remain); 4660 int midx = get_mem_index(s); 4661 TCGv_i64 dirty_addr, clean_addr, t0, t1; 4662 TCGv_i128 t16; 4663 4664 dirty_addr = tcg_temp_new_i64(); 4665 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); 4666 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); 4667 4668 /* Note that unpredicated load/store of vector/predicate registers 4669 * are defined as a stream of bytes, which equates to little-endian 4670 * operations on larger quantities. There is no nice way to force 4671 * a little-endian store for aarch64_be-linux-user out of line. 4672 * 4673 * Attempt to keep code expansion to a minimum by limiting the 4674 * amount of unrolling done. 4675 */ 4676 if (nparts <= 4) { 4677 int i; 4678 4679 t0 = tcg_temp_new_i64(); 4680 t1 = tcg_temp_new_i64(); 4681 t16 = tcg_temp_new_i128(); 4682 for (i = 0; i < len_align; i += 16) { 4683 tcg_gen_ld_i64(t0, base, vofs + i); 4684 tcg_gen_ld_i64(t1, base, vofs + i + 8); 4685 tcg_gen_concat_i64_i128(t16, t0, t1); 4686 tcg_gen_qemu_st_i128(t16, clean_addr, midx, 4687 MO_LE | MO_128 | MO_ATOM_NONE | align); 4688 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4689 } 4690 if (len_align) { 4691 align = MO_UNALN; 4692 } 4693 } else { 4694 TCGLabel *loop = gen_new_label(); 4695 TCGv_ptr tp, i = tcg_temp_new_ptr(); 4696 4697 tcg_gen_movi_ptr(i, 0); 4698 gen_set_label(loop); 4699 4700 t0 = tcg_temp_new_i64(); 4701 t1 = tcg_temp_new_i64(); 4702 tp = tcg_temp_new_ptr(); 4703 tcg_gen_add_ptr(tp, base, i); 4704 tcg_gen_ld_i64(t0, tp, vofs); 4705 tcg_gen_ld_i64(t1, tp, vofs + 8); 4706 tcg_gen_addi_ptr(i, i, 16); 4707 4708 t16 = tcg_temp_new_i128(); 4709 tcg_gen_concat_i64_i128(t16, t0, t1); 4710 4711 tcg_gen_qemu_st_i128(t16, clean_addr, midx, 4712 MO_LE | MO_128 | MO_ATOM_NONE); 4713 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4714 4715 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); 4716 align = MO_UNALN; 4717 } 4718 4719 /* Predicate register stores can be any multiple of 2. */ 4720 if (len_remain >= 8) { 4721 t0 = tcg_temp_new_i64(); 4722 tcg_gen_ld_i64(t0, base, vofs + len_align); 4723 tcg_gen_qemu_st_i64(t0, clean_addr, midx, 4724 MO_LEUQ | MO_ATOM_NONE | align); 4725 align = MO_UNALN; 4726 len_remain -= 8; 4727 len_align += 8; 4728 if (len_remain) { 4729 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4730 } 4731 } 4732 if (len_remain) { 4733 t0 = tcg_temp_new_i64(); 4734 tcg_gen_ld_i64(t0, base, vofs + len_align); 4735 4736 switch (len_remain) { 4737 case 2: 4738 case 4: 4739 case 8: 4740 tcg_gen_qemu_st_i64(t0, clean_addr, midx, 4741 MO_LE | ctz32(len_remain) 4742 | MO_ATOM_NONE | align); 4743 break; 4744 4745 case 6: 4746 tcg_gen_qemu_st_i64(t0, clean_addr, midx, 4747 MO_LEUL | MO_ATOM_NONE | align); 4748 tcg_gen_addi_i64(clean_addr, clean_addr, 4); 4749 tcg_gen_shri_i64(t0, t0, 32); 4750 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW | MO_ATOM_NONE); 4751 break; 4752 4753 default: 4754 g_assert_not_reached(); 4755 } 4756 } 4757 } 4758 4759 static bool trans_LDR_zri(DisasContext *s, arg_rri *a) 4760 { 4761 if (!dc_isar_feature(aa64_sve, s)) { 4762 return false; 4763 } 4764 if (sve_access_check(s)) { 4765 int size = vec_full_reg_size(s); 4766 int off = vec_full_reg_offset(s, a->rd); 4767 gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size, 4768 s->align_mem ? MO_ALIGN_16 : MO_UNALN); 4769 } 4770 return true; 4771 } 4772 4773 static bool trans_LDR_pri(DisasContext *s, arg_rri *a) 4774 { 4775 if (!dc_isar_feature(aa64_sve, s)) { 4776 return false; 4777 } 4778 if (sve_access_check(s)) { 4779 int size = pred_full_reg_size(s); 4780 int off = pred_full_reg_offset(s, a->rd); 4781 gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size, 4782 s->align_mem ? MO_ALIGN_2 : MO_UNALN); 4783 } 4784 return true; 4785 } 4786 4787 static bool trans_STR_zri(DisasContext *s, arg_rri *a) 4788 { 4789 if (!dc_isar_feature(aa64_sve, s)) { 4790 return false; 4791 } 4792 if (sve_access_check(s)) { 4793 int size = vec_full_reg_size(s); 4794 int off = vec_full_reg_offset(s, a->rd); 4795 gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size, 4796 s->align_mem ? MO_ALIGN_16 : MO_UNALN); 4797 } 4798 return true; 4799 } 4800 4801 static bool trans_STR_pri(DisasContext *s, arg_rri *a) 4802 { 4803 if (!dc_isar_feature(aa64_sve, s)) { 4804 return false; 4805 } 4806 if (sve_access_check(s)) { 4807 int size = pred_full_reg_size(s); 4808 int off = pred_full_reg_offset(s, a->rd); 4809 gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size, 4810 s->align_mem ? MO_ALIGN_2 : MO_UNALN); 4811 } 4812 return true; 4813 } 4814 4815 /* 4816 *** SVE Memory - Contiguous Load Group 4817 */ 4818 4819 /* The memory mode of the dtype. */ 4820 static const MemOp dtype_mop[19] = { 4821 MO_UB, MO_UB, MO_UB, MO_UB, 4822 MO_SL, MO_UW, MO_UW, MO_UW, 4823 MO_SW, MO_SW, MO_UL, MO_UL, 4824 MO_SB, MO_SB, MO_SB, MO_UQ, 4825 /* Artificial values used by decode */ 4826 MO_UL, MO_UQ, MO_128, 4827 }; 4828 4829 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE) 4830 4831 /* The vector element size of dtype. */ 4832 static const uint8_t dtype_esz[19] = { 4833 0, 1, 2, 3, 4834 3, 1, 2, 3, 4835 3, 2, 2, 3, 4836 3, 2, 1, 3, 4837 /* Artificial values used by decode */ 4838 4, 4, 4, 4839 }; 4840 4841 uint32_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs, 4842 uint32_t msz, bool is_write, uint32_t data) 4843 { 4844 uint32_t sizem1; 4845 uint32_t desc = 0; 4846 4847 /* Assert all of the data fits, with or without MTE enabled. */ 4848 assert(nregs >= 1 && nregs <= 4); 4849 sizem1 = (nregs << msz) - 1; 4850 assert(sizem1 <= R_MTEDESC_SIZEM1_MASK >> R_MTEDESC_SIZEM1_SHIFT); 4851 assert(data < 1u << SVE_MTEDESC_SHIFT); 4852 4853 if (s->mte_active[0]) { 4854 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 4855 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 4856 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 4857 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 4858 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, sizem1); 4859 desc <<= SVE_MTEDESC_SHIFT; 4860 } 4861 return simd_desc(vsz, vsz, desc | data); 4862 } 4863 4864 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, 4865 int dtype, uint32_t nregs, bool is_write, 4866 gen_helper_gvec_mem *fn) 4867 { 4868 TCGv_ptr t_pg; 4869 uint32_t desc; 4870 4871 if (!s->mte_active[0]) { 4872 addr = clean_data_tbi(s, addr); 4873 } 4874 4875 /* 4876 * For e.g. LD4, there are not enough arguments to pass all 4 4877 * registers as pointers, so encode the regno into the data field. 4878 * For consistency, do this even for LD1. 4879 */ 4880 desc = make_svemte_desc(s, vec_full_reg_size(s), nregs, 4881 dtype_msz(dtype), is_write, zt); 4882 t_pg = tcg_temp_new_ptr(); 4883 4884 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 4885 fn(tcg_env, t_pg, addr, tcg_constant_i32(desc)); 4886 } 4887 4888 /* Indexed by [mte][be][dtype][nreg] */ 4889 static gen_helper_gvec_mem * const ldr_fns[2][2][19][4] = { 4890 { /* mte inactive, little-endian */ 4891 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, 4892 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, 4893 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, 4894 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, 4895 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, 4896 4897 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL }, 4898 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r, 4899 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r }, 4900 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL }, 4901 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL }, 4902 4903 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL }, 4904 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL }, 4905 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r, 4906 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r }, 4907 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL }, 4908 4909 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, 4910 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, 4911 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, 4912 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r, 4913 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r }, 4914 4915 { gen_helper_sve_ld1squ_le_r, NULL, NULL, NULL }, 4916 { gen_helper_sve_ld1dqu_le_r, NULL, NULL, NULL }, 4917 { NULL, gen_helper_sve_ld2qq_le_r, 4918 gen_helper_sve_ld3qq_le_r, gen_helper_sve_ld4qq_le_r }, 4919 }, 4920 4921 /* mte inactive, big-endian */ 4922 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, 4923 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, 4924 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, 4925 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, 4926 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, 4927 4928 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL }, 4929 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r, 4930 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r }, 4931 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL }, 4932 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL }, 4933 4934 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL }, 4935 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL }, 4936 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r, 4937 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r }, 4938 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL }, 4939 4940 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, 4941 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, 4942 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, 4943 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r, 4944 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r }, 4945 4946 { gen_helper_sve_ld1squ_be_r, NULL, NULL, NULL }, 4947 { gen_helper_sve_ld1dqu_be_r, NULL, NULL, NULL }, 4948 { NULL, gen_helper_sve_ld2qq_be_r, 4949 gen_helper_sve_ld3qq_be_r, gen_helper_sve_ld4qq_be_r }, 4950 }, 4951 }, 4952 4953 { /* mte active, little-endian */ 4954 { { gen_helper_sve_ld1bb_r_mte, 4955 gen_helper_sve_ld2bb_r_mte, 4956 gen_helper_sve_ld3bb_r_mte, 4957 gen_helper_sve_ld4bb_r_mte }, 4958 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, 4959 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, 4960 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, 4961 4962 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL }, 4963 { gen_helper_sve_ld1hh_le_r_mte, 4964 gen_helper_sve_ld2hh_le_r_mte, 4965 gen_helper_sve_ld3hh_le_r_mte, 4966 gen_helper_sve_ld4hh_le_r_mte }, 4967 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL }, 4968 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL }, 4969 4970 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL }, 4971 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL }, 4972 { gen_helper_sve_ld1ss_le_r_mte, 4973 gen_helper_sve_ld2ss_le_r_mte, 4974 gen_helper_sve_ld3ss_le_r_mte, 4975 gen_helper_sve_ld4ss_le_r_mte }, 4976 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL }, 4977 4978 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, 4979 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, 4980 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, 4981 { gen_helper_sve_ld1dd_le_r_mte, 4982 gen_helper_sve_ld2dd_le_r_mte, 4983 gen_helper_sve_ld3dd_le_r_mte, 4984 gen_helper_sve_ld4dd_le_r_mte }, 4985 4986 { gen_helper_sve_ld1squ_le_r_mte, NULL, NULL, NULL }, 4987 { gen_helper_sve_ld1dqu_le_r_mte, NULL, NULL, NULL }, 4988 { NULL, 4989 gen_helper_sve_ld2qq_le_r_mte, 4990 gen_helper_sve_ld3qq_le_r_mte, 4991 gen_helper_sve_ld4qq_le_r_mte }, 4992 }, 4993 4994 /* mte active, big-endian */ 4995 { { gen_helper_sve_ld1bb_r_mte, 4996 gen_helper_sve_ld2bb_r_mte, 4997 gen_helper_sve_ld3bb_r_mte, 4998 gen_helper_sve_ld4bb_r_mte }, 4999 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, 5000 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, 5001 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, 5002 5003 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL }, 5004 { gen_helper_sve_ld1hh_be_r_mte, 5005 gen_helper_sve_ld2hh_be_r_mte, 5006 gen_helper_sve_ld3hh_be_r_mte, 5007 gen_helper_sve_ld4hh_be_r_mte }, 5008 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL }, 5009 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL }, 5010 5011 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL }, 5012 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL }, 5013 { gen_helper_sve_ld1ss_be_r_mte, 5014 gen_helper_sve_ld2ss_be_r_mte, 5015 gen_helper_sve_ld3ss_be_r_mte, 5016 gen_helper_sve_ld4ss_be_r_mte }, 5017 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL }, 5018 5019 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, 5020 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, 5021 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, 5022 { gen_helper_sve_ld1dd_be_r_mte, 5023 gen_helper_sve_ld2dd_be_r_mte, 5024 gen_helper_sve_ld3dd_be_r_mte, 5025 gen_helper_sve_ld4dd_be_r_mte }, 5026 5027 { gen_helper_sve_ld1squ_be_r_mte, NULL, NULL, NULL }, 5028 { gen_helper_sve_ld1dqu_be_r_mte, NULL, NULL, NULL }, 5029 { NULL, 5030 gen_helper_sve_ld2qq_be_r_mte, 5031 gen_helper_sve_ld3qq_be_r_mte, 5032 gen_helper_sve_ld4qq_be_r_mte }, 5033 }, 5034 }, 5035 }; 5036 5037 static void do_ld_zpa(DisasContext *s, int zt, int pg, 5038 TCGv_i64 addr, int dtype, int nreg) 5039 { 5040 gen_helper_gvec_mem *fn 5041 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg]; 5042 5043 /* 5044 * While there are holes in the table, they are not 5045 * accessible via the instruction encoding. 5046 */ 5047 assert(fn != NULL); 5048 do_mem_zpa(s, zt, pg, addr, dtype, nreg + 1, false, fn); 5049 } 5050 5051 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a) 5052 { 5053 if (a->rm == 31) { 5054 return false; 5055 } 5056 5057 /* dtypes 16-18 are artificial, representing 128-bit element */ 5058 switch (a->dtype) { 5059 case 0 ... 15: 5060 if (!dc_isar_feature(aa64_sve, s)) { 5061 return false; 5062 } 5063 break; 5064 case 16: case 17: 5065 if (!dc_isar_feature(aa64_sve2p1, s)) { 5066 return false; 5067 } 5068 s->is_nonstreaming = true; 5069 break; 5070 case 18: 5071 if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { 5072 return false; 5073 } 5074 break; 5075 default: 5076 g_assert_not_reached(); 5077 } 5078 5079 if (sve_access_check(s)) { 5080 TCGv_i64 addr = tcg_temp_new_i64(); 5081 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 5082 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5083 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); 5084 } 5085 return true; 5086 } 5087 5088 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a) 5089 { 5090 /* dtypes 16-18 are artificial, representing 128-bit element */ 5091 switch (a->dtype) { 5092 case 0 ... 15: 5093 if (!dc_isar_feature(aa64_sve, s)) { 5094 return false; 5095 } 5096 break; 5097 case 16: case 17: 5098 if (!dc_isar_feature(aa64_sve2p1, s)) { 5099 return false; 5100 } 5101 s->is_nonstreaming = true; 5102 break; 5103 case 18: 5104 if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { 5105 return false; 5106 } 5107 break; 5108 default: 5109 g_assert_not_reached(); 5110 } 5111 5112 if (sve_access_check(s)) { 5113 int vsz = vec_full_reg_size(s); 5114 int elements = vsz >> dtype_esz[a->dtype]; 5115 TCGv_i64 addr = tcg_temp_new_i64(); 5116 5117 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 5118 (a->imm * elements * (a->nreg + 1)) 5119 << dtype_msz(a->dtype)); 5120 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); 5121 } 5122 return true; 5123 } 5124 5125 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a) 5126 { 5127 static gen_helper_gvec_mem * const fns[2][2][16] = { 5128 { /* mte inactive, little-endian */ 5129 { gen_helper_sve_ldff1bb_r, 5130 gen_helper_sve_ldff1bhu_r, 5131 gen_helper_sve_ldff1bsu_r, 5132 gen_helper_sve_ldff1bdu_r, 5133 5134 gen_helper_sve_ldff1sds_le_r, 5135 gen_helper_sve_ldff1hh_le_r, 5136 gen_helper_sve_ldff1hsu_le_r, 5137 gen_helper_sve_ldff1hdu_le_r, 5138 5139 gen_helper_sve_ldff1hds_le_r, 5140 gen_helper_sve_ldff1hss_le_r, 5141 gen_helper_sve_ldff1ss_le_r, 5142 gen_helper_sve_ldff1sdu_le_r, 5143 5144 gen_helper_sve_ldff1bds_r, 5145 gen_helper_sve_ldff1bss_r, 5146 gen_helper_sve_ldff1bhs_r, 5147 gen_helper_sve_ldff1dd_le_r }, 5148 5149 /* mte inactive, big-endian */ 5150 { gen_helper_sve_ldff1bb_r, 5151 gen_helper_sve_ldff1bhu_r, 5152 gen_helper_sve_ldff1bsu_r, 5153 gen_helper_sve_ldff1bdu_r, 5154 5155 gen_helper_sve_ldff1sds_be_r, 5156 gen_helper_sve_ldff1hh_be_r, 5157 gen_helper_sve_ldff1hsu_be_r, 5158 gen_helper_sve_ldff1hdu_be_r, 5159 5160 gen_helper_sve_ldff1hds_be_r, 5161 gen_helper_sve_ldff1hss_be_r, 5162 gen_helper_sve_ldff1ss_be_r, 5163 gen_helper_sve_ldff1sdu_be_r, 5164 5165 gen_helper_sve_ldff1bds_r, 5166 gen_helper_sve_ldff1bss_r, 5167 gen_helper_sve_ldff1bhs_r, 5168 gen_helper_sve_ldff1dd_be_r } }, 5169 5170 { /* mte active, little-endian */ 5171 { gen_helper_sve_ldff1bb_r_mte, 5172 gen_helper_sve_ldff1bhu_r_mte, 5173 gen_helper_sve_ldff1bsu_r_mte, 5174 gen_helper_sve_ldff1bdu_r_mte, 5175 5176 gen_helper_sve_ldff1sds_le_r_mte, 5177 gen_helper_sve_ldff1hh_le_r_mte, 5178 gen_helper_sve_ldff1hsu_le_r_mte, 5179 gen_helper_sve_ldff1hdu_le_r_mte, 5180 5181 gen_helper_sve_ldff1hds_le_r_mte, 5182 gen_helper_sve_ldff1hss_le_r_mte, 5183 gen_helper_sve_ldff1ss_le_r_mte, 5184 gen_helper_sve_ldff1sdu_le_r_mte, 5185 5186 gen_helper_sve_ldff1bds_r_mte, 5187 gen_helper_sve_ldff1bss_r_mte, 5188 gen_helper_sve_ldff1bhs_r_mte, 5189 gen_helper_sve_ldff1dd_le_r_mte }, 5190 5191 /* mte active, big-endian */ 5192 { gen_helper_sve_ldff1bb_r_mte, 5193 gen_helper_sve_ldff1bhu_r_mte, 5194 gen_helper_sve_ldff1bsu_r_mte, 5195 gen_helper_sve_ldff1bdu_r_mte, 5196 5197 gen_helper_sve_ldff1sds_be_r_mte, 5198 gen_helper_sve_ldff1hh_be_r_mte, 5199 gen_helper_sve_ldff1hsu_be_r_mte, 5200 gen_helper_sve_ldff1hdu_be_r_mte, 5201 5202 gen_helper_sve_ldff1hds_be_r_mte, 5203 gen_helper_sve_ldff1hss_be_r_mte, 5204 gen_helper_sve_ldff1ss_be_r_mte, 5205 gen_helper_sve_ldff1sdu_be_r_mte, 5206 5207 gen_helper_sve_ldff1bds_r_mte, 5208 gen_helper_sve_ldff1bss_r_mte, 5209 gen_helper_sve_ldff1bhs_r_mte, 5210 gen_helper_sve_ldff1dd_be_r_mte } }, 5211 }; 5212 5213 if (!dc_isar_feature(aa64_sve, s)) { 5214 return false; 5215 } 5216 s->is_nonstreaming = true; 5217 if (sve_access_check(s)) { 5218 TCGv_i64 addr = tcg_temp_new_i64(); 5219 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 5220 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5221 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, 5222 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); 5223 } 5224 return true; 5225 } 5226 5227 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a) 5228 { 5229 static gen_helper_gvec_mem * const fns[2][2][16] = { 5230 { /* mte inactive, little-endian */ 5231 { gen_helper_sve_ldnf1bb_r, 5232 gen_helper_sve_ldnf1bhu_r, 5233 gen_helper_sve_ldnf1bsu_r, 5234 gen_helper_sve_ldnf1bdu_r, 5235 5236 gen_helper_sve_ldnf1sds_le_r, 5237 gen_helper_sve_ldnf1hh_le_r, 5238 gen_helper_sve_ldnf1hsu_le_r, 5239 gen_helper_sve_ldnf1hdu_le_r, 5240 5241 gen_helper_sve_ldnf1hds_le_r, 5242 gen_helper_sve_ldnf1hss_le_r, 5243 gen_helper_sve_ldnf1ss_le_r, 5244 gen_helper_sve_ldnf1sdu_le_r, 5245 5246 gen_helper_sve_ldnf1bds_r, 5247 gen_helper_sve_ldnf1bss_r, 5248 gen_helper_sve_ldnf1bhs_r, 5249 gen_helper_sve_ldnf1dd_le_r }, 5250 5251 /* mte inactive, big-endian */ 5252 { gen_helper_sve_ldnf1bb_r, 5253 gen_helper_sve_ldnf1bhu_r, 5254 gen_helper_sve_ldnf1bsu_r, 5255 gen_helper_sve_ldnf1bdu_r, 5256 5257 gen_helper_sve_ldnf1sds_be_r, 5258 gen_helper_sve_ldnf1hh_be_r, 5259 gen_helper_sve_ldnf1hsu_be_r, 5260 gen_helper_sve_ldnf1hdu_be_r, 5261 5262 gen_helper_sve_ldnf1hds_be_r, 5263 gen_helper_sve_ldnf1hss_be_r, 5264 gen_helper_sve_ldnf1ss_be_r, 5265 gen_helper_sve_ldnf1sdu_be_r, 5266 5267 gen_helper_sve_ldnf1bds_r, 5268 gen_helper_sve_ldnf1bss_r, 5269 gen_helper_sve_ldnf1bhs_r, 5270 gen_helper_sve_ldnf1dd_be_r } }, 5271 5272 { /* mte inactive, little-endian */ 5273 { gen_helper_sve_ldnf1bb_r_mte, 5274 gen_helper_sve_ldnf1bhu_r_mte, 5275 gen_helper_sve_ldnf1bsu_r_mte, 5276 gen_helper_sve_ldnf1bdu_r_mte, 5277 5278 gen_helper_sve_ldnf1sds_le_r_mte, 5279 gen_helper_sve_ldnf1hh_le_r_mte, 5280 gen_helper_sve_ldnf1hsu_le_r_mte, 5281 gen_helper_sve_ldnf1hdu_le_r_mte, 5282 5283 gen_helper_sve_ldnf1hds_le_r_mte, 5284 gen_helper_sve_ldnf1hss_le_r_mte, 5285 gen_helper_sve_ldnf1ss_le_r_mte, 5286 gen_helper_sve_ldnf1sdu_le_r_mte, 5287 5288 gen_helper_sve_ldnf1bds_r_mte, 5289 gen_helper_sve_ldnf1bss_r_mte, 5290 gen_helper_sve_ldnf1bhs_r_mte, 5291 gen_helper_sve_ldnf1dd_le_r_mte }, 5292 5293 /* mte inactive, big-endian */ 5294 { gen_helper_sve_ldnf1bb_r_mte, 5295 gen_helper_sve_ldnf1bhu_r_mte, 5296 gen_helper_sve_ldnf1bsu_r_mte, 5297 gen_helper_sve_ldnf1bdu_r_mte, 5298 5299 gen_helper_sve_ldnf1sds_be_r_mte, 5300 gen_helper_sve_ldnf1hh_be_r_mte, 5301 gen_helper_sve_ldnf1hsu_be_r_mte, 5302 gen_helper_sve_ldnf1hdu_be_r_mte, 5303 5304 gen_helper_sve_ldnf1hds_be_r_mte, 5305 gen_helper_sve_ldnf1hss_be_r_mte, 5306 gen_helper_sve_ldnf1ss_be_r_mte, 5307 gen_helper_sve_ldnf1sdu_be_r_mte, 5308 5309 gen_helper_sve_ldnf1bds_r_mte, 5310 gen_helper_sve_ldnf1bss_r_mte, 5311 gen_helper_sve_ldnf1bhs_r_mte, 5312 gen_helper_sve_ldnf1dd_be_r_mte } }, 5313 }; 5314 5315 if (!dc_isar_feature(aa64_sve, s)) { 5316 return false; 5317 } 5318 s->is_nonstreaming = true; 5319 if (sve_access_check(s)) { 5320 int vsz = vec_full_reg_size(s); 5321 int elements = vsz >> dtype_esz[a->dtype]; 5322 int off = (a->imm * elements) << dtype_msz(a->dtype); 5323 TCGv_i64 addr = tcg_temp_new_i64(); 5324 5325 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off); 5326 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, 5327 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); 5328 } 5329 return true; 5330 } 5331 5332 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) 5333 { 5334 unsigned vsz = vec_full_reg_size(s); 5335 TCGv_ptr t_pg; 5336 int poff; 5337 uint32_t desc; 5338 5339 /* Load the first quadword using the normal predicated load helpers. */ 5340 if (!s->mte_active[0]) { 5341 addr = clean_data_tbi(s, addr); 5342 } 5343 5344 poff = pred_full_reg_offset(s, pg); 5345 if (vsz > 16) { 5346 /* 5347 * Zero-extend the first 16 bits of the predicate into a temporary. 5348 * This avoids triggering an assert making sure we don't have bits 5349 * set within a predicate beyond VQ, but we have lowered VQ to 1 5350 * for this load operation. 5351 */ 5352 TCGv_i64 tmp = tcg_temp_new_i64(); 5353 #if HOST_BIG_ENDIAN 5354 poff += 6; 5355 #endif 5356 tcg_gen_ld16u_i64(tmp, tcg_env, poff); 5357 5358 poff = offsetof(CPUARMState, vfp.preg_tmp); 5359 tcg_gen_st_i64(tmp, tcg_env, poff); 5360 } 5361 5362 t_pg = tcg_temp_new_ptr(); 5363 tcg_gen_addi_ptr(t_pg, tcg_env, poff); 5364 5365 gen_helper_gvec_mem *fn 5366 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; 5367 desc = make_svemte_desc(s, 16, 1, dtype_msz(dtype), false, zt); 5368 fn(tcg_env, t_pg, addr, tcg_constant_i32(desc)); 5369 5370 /* Replicate that first quadword. */ 5371 if (vsz > 16) { 5372 int doff = vec_full_reg_offset(s, zt); 5373 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16); 5374 } 5375 } 5376 5377 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a) 5378 { 5379 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 5380 return false; 5381 } 5382 if (sve_access_check(s)) { 5383 int msz = dtype_msz(a->dtype); 5384 TCGv_i64 addr = tcg_temp_new_i64(); 5385 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz); 5386 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5387 do_ldrq(s, a->rd, a->pg, addr, a->dtype); 5388 } 5389 return true; 5390 } 5391 5392 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a) 5393 { 5394 if (!dc_isar_feature(aa64_sve, s)) { 5395 return false; 5396 } 5397 if (sve_access_check(s)) { 5398 TCGv_i64 addr = tcg_temp_new_i64(); 5399 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16); 5400 do_ldrq(s, a->rd, a->pg, addr, a->dtype); 5401 } 5402 return true; 5403 } 5404 5405 static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) 5406 { 5407 unsigned vsz = vec_full_reg_size(s); 5408 unsigned vsz_r32; 5409 TCGv_ptr t_pg; 5410 int poff, doff; 5411 uint32_t desc; 5412 5413 if (vsz < 32) { 5414 /* 5415 * Note that this UNDEFINED check comes after CheckSVEEnabled() 5416 * in the ARM pseudocode, which is the sve_access_check() done 5417 * in our caller. We should not now return false from the caller. 5418 */ 5419 unallocated_encoding(s); 5420 return; 5421 } 5422 5423 /* Load the first octaword using the normal predicated load helpers. */ 5424 if (!s->mte_active[0]) { 5425 addr = clean_data_tbi(s, addr); 5426 } 5427 5428 poff = pred_full_reg_offset(s, pg); 5429 if (vsz > 32) { 5430 /* 5431 * Zero-extend the first 32 bits of the predicate into a temporary. 5432 * This avoids triggering an assert making sure we don't have bits 5433 * set within a predicate beyond VQ, but we have lowered VQ to 2 5434 * for this load operation. 5435 */ 5436 TCGv_i64 tmp = tcg_temp_new_i64(); 5437 #if HOST_BIG_ENDIAN 5438 poff += 4; 5439 #endif 5440 tcg_gen_ld32u_i64(tmp, tcg_env, poff); 5441 5442 poff = offsetof(CPUARMState, vfp.preg_tmp); 5443 tcg_gen_st_i64(tmp, tcg_env, poff); 5444 } 5445 5446 t_pg = tcg_temp_new_ptr(); 5447 tcg_gen_addi_ptr(t_pg, tcg_env, poff); 5448 5449 gen_helper_gvec_mem *fn 5450 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; 5451 desc = make_svemte_desc(s, 32, 1, dtype_msz(dtype), false, zt); 5452 fn(tcg_env, t_pg, addr, tcg_constant_i32(desc)); 5453 5454 /* 5455 * Replicate that first octaword. 5456 * The replication happens in units of 32; if the full vector size 5457 * is not a multiple of 32, the final bits are zeroed. 5458 */ 5459 doff = vec_full_reg_offset(s, zt); 5460 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32); 5461 if (vsz >= 64) { 5462 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32); 5463 } 5464 vsz -= vsz_r32; 5465 if (vsz) { 5466 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0); 5467 } 5468 } 5469 5470 static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a) 5471 { 5472 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 5473 return false; 5474 } 5475 if (a->rm == 31) { 5476 return false; 5477 } 5478 s->is_nonstreaming = true; 5479 if (sve_access_check(s)) { 5480 TCGv_i64 addr = tcg_temp_new_i64(); 5481 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 5482 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5483 do_ldro(s, a->rd, a->pg, addr, a->dtype); 5484 } 5485 return true; 5486 } 5487 5488 static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a) 5489 { 5490 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 5491 return false; 5492 } 5493 s->is_nonstreaming = true; 5494 if (sve_access_check(s)) { 5495 TCGv_i64 addr = tcg_temp_new_i64(); 5496 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32); 5497 do_ldro(s, a->rd, a->pg, addr, a->dtype); 5498 } 5499 return true; 5500 } 5501 5502 /* Load and broadcast element. */ 5503 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) 5504 { 5505 unsigned vsz = vec_full_reg_size(s); 5506 unsigned psz = pred_full_reg_size(s); 5507 unsigned esz = dtype_esz[a->dtype]; 5508 unsigned msz = dtype_msz(a->dtype); 5509 TCGLabel *over; 5510 TCGv_i64 temp, clean_addr; 5511 MemOp memop; 5512 5513 if (!dc_isar_feature(aa64_sve, s)) { 5514 return false; 5515 } 5516 if (!sve_access_check(s)) { 5517 return true; 5518 } 5519 5520 over = gen_new_label(); 5521 5522 /* If the guarding predicate has no bits set, no load occurs. */ 5523 if (psz <= 8) { 5524 /* Reduce the pred_esz_masks value simply to reduce the 5525 * size of the code generated here. 5526 */ 5527 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8); 5528 temp = tcg_temp_new_i64(); 5529 tcg_gen_ld_i64(temp, tcg_env, pred_full_reg_offset(s, a->pg)); 5530 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask); 5531 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over); 5532 } else { 5533 TCGv_i32 t32 = tcg_temp_new_i32(); 5534 find_last_active(s, t32, esz, a->pg); 5535 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over); 5536 } 5537 5538 /* Load the data. */ 5539 temp = tcg_temp_new_i64(); 5540 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz); 5541 5542 memop = finalize_memop(s, dtype_mop[a->dtype]); 5543 clean_addr = gen_mte_check1(s, temp, false, true, memop); 5544 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), memop); 5545 5546 /* Broadcast to *all* elements. */ 5547 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), 5548 vsz, vsz, temp); 5549 5550 /* Zero the inactive elements. */ 5551 gen_set_label(over); 5552 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false); 5553 } 5554 5555 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, 5556 int msz, int esz, int nreg) 5557 { 5558 static gen_helper_gvec_mem * const fn_single[2][2][4][5] = { 5559 { { { gen_helper_sve_st1bb_r, 5560 gen_helper_sve_st1bh_r, 5561 gen_helper_sve_st1bs_r, 5562 gen_helper_sve_st1bd_r }, 5563 { NULL, 5564 gen_helper_sve_st1hh_le_r, 5565 gen_helper_sve_st1hs_le_r, 5566 gen_helper_sve_st1hd_le_r }, 5567 { NULL, NULL, 5568 gen_helper_sve_st1ss_le_r, 5569 gen_helper_sve_st1sd_le_r, 5570 gen_helper_sve_st1sq_le_r, }, 5571 { NULL, NULL, NULL, 5572 gen_helper_sve_st1dd_le_r, 5573 gen_helper_sve_st1dq_le_r, } }, 5574 { { gen_helper_sve_st1bb_r, 5575 gen_helper_sve_st1bh_r, 5576 gen_helper_sve_st1bs_r, 5577 gen_helper_sve_st1bd_r }, 5578 { NULL, 5579 gen_helper_sve_st1hh_be_r, 5580 gen_helper_sve_st1hs_be_r, 5581 gen_helper_sve_st1hd_be_r }, 5582 { NULL, NULL, 5583 gen_helper_sve_st1ss_be_r, 5584 gen_helper_sve_st1sd_be_r, 5585 gen_helper_sve_st1sq_be_r }, 5586 { NULL, NULL, NULL, 5587 gen_helper_sve_st1dd_be_r, 5588 gen_helper_sve_st1dq_be_r } } }, 5589 5590 { { { gen_helper_sve_st1bb_r_mte, 5591 gen_helper_sve_st1bh_r_mte, 5592 gen_helper_sve_st1bs_r_mte, 5593 gen_helper_sve_st1bd_r_mte }, 5594 { NULL, 5595 gen_helper_sve_st1hh_le_r_mte, 5596 gen_helper_sve_st1hs_le_r_mte, 5597 gen_helper_sve_st1hd_le_r_mte }, 5598 { NULL, NULL, 5599 gen_helper_sve_st1ss_le_r_mte, 5600 gen_helper_sve_st1sd_le_r_mte, 5601 gen_helper_sve_st1sq_le_r_mte }, 5602 { NULL, NULL, NULL, 5603 gen_helper_sve_st1dd_le_r_mte, 5604 gen_helper_sve_st1dq_le_r_mte } }, 5605 { { gen_helper_sve_st1bb_r_mte, 5606 gen_helper_sve_st1bh_r_mte, 5607 gen_helper_sve_st1bs_r_mte, 5608 gen_helper_sve_st1bd_r_mte }, 5609 { NULL, 5610 gen_helper_sve_st1hh_be_r_mte, 5611 gen_helper_sve_st1hs_be_r_mte, 5612 gen_helper_sve_st1hd_be_r_mte }, 5613 { NULL, NULL, 5614 gen_helper_sve_st1ss_be_r_mte, 5615 gen_helper_sve_st1sd_be_r_mte, 5616 gen_helper_sve_st1sq_be_r_mte }, 5617 { NULL, NULL, NULL, 5618 gen_helper_sve_st1dd_be_r_mte, 5619 gen_helper_sve_st1dq_be_r_mte } } }, 5620 }; 5621 static gen_helper_gvec_mem * const fn_multiple[2][2][3][5] = { 5622 { { { gen_helper_sve_st2bb_r, 5623 gen_helper_sve_st2hh_le_r, 5624 gen_helper_sve_st2ss_le_r, 5625 gen_helper_sve_st2dd_le_r, 5626 gen_helper_sve_st2qq_le_r }, 5627 { gen_helper_sve_st3bb_r, 5628 gen_helper_sve_st3hh_le_r, 5629 gen_helper_sve_st3ss_le_r, 5630 gen_helper_sve_st3dd_le_r, 5631 gen_helper_sve_st3qq_le_r }, 5632 { gen_helper_sve_st4bb_r, 5633 gen_helper_sve_st4hh_le_r, 5634 gen_helper_sve_st4ss_le_r, 5635 gen_helper_sve_st4dd_le_r, 5636 gen_helper_sve_st4qq_le_r } }, 5637 { { gen_helper_sve_st2bb_r, 5638 gen_helper_sve_st2hh_be_r, 5639 gen_helper_sve_st2ss_be_r, 5640 gen_helper_sve_st2dd_be_r, 5641 gen_helper_sve_st2qq_be_r }, 5642 { gen_helper_sve_st3bb_r, 5643 gen_helper_sve_st3hh_be_r, 5644 gen_helper_sve_st3ss_be_r, 5645 gen_helper_sve_st3dd_be_r, 5646 gen_helper_sve_st3qq_be_r }, 5647 { gen_helper_sve_st4bb_r, 5648 gen_helper_sve_st4hh_be_r, 5649 gen_helper_sve_st4ss_be_r, 5650 gen_helper_sve_st4dd_be_r, 5651 gen_helper_sve_st4qq_be_r } } }, 5652 { { { gen_helper_sve_st2bb_r_mte, 5653 gen_helper_sve_st2hh_le_r_mte, 5654 gen_helper_sve_st2ss_le_r_mte, 5655 gen_helper_sve_st2dd_le_r_mte, 5656 gen_helper_sve_st2qq_le_r_mte }, 5657 { gen_helper_sve_st3bb_r_mte, 5658 gen_helper_sve_st3hh_le_r_mte, 5659 gen_helper_sve_st3ss_le_r_mte, 5660 gen_helper_sve_st3dd_le_r_mte, 5661 gen_helper_sve_st3qq_le_r_mte }, 5662 { gen_helper_sve_st4bb_r_mte, 5663 gen_helper_sve_st4hh_le_r_mte, 5664 gen_helper_sve_st4ss_le_r_mte, 5665 gen_helper_sve_st4dd_le_r_mte, 5666 gen_helper_sve_st4qq_le_r_mte } }, 5667 { { gen_helper_sve_st2bb_r_mte, 5668 gen_helper_sve_st2hh_be_r_mte, 5669 gen_helper_sve_st2ss_be_r_mte, 5670 gen_helper_sve_st2dd_be_r_mte, 5671 gen_helper_sve_st2qq_be_r_mte }, 5672 { gen_helper_sve_st3bb_r_mte, 5673 gen_helper_sve_st3hh_be_r_mte, 5674 gen_helper_sve_st3ss_be_r_mte, 5675 gen_helper_sve_st3dd_be_r_mte, 5676 gen_helper_sve_st3qq_be_r_mte }, 5677 { gen_helper_sve_st4bb_r_mte, 5678 gen_helper_sve_st4hh_be_r_mte, 5679 gen_helper_sve_st4ss_be_r_mte, 5680 gen_helper_sve_st4dd_be_r_mte, 5681 gen_helper_sve_st4qq_be_r_mte } } }, 5682 }; 5683 gen_helper_gvec_mem *fn; 5684 int be = s->be_data == MO_BE; 5685 5686 if (nreg == 0) { 5687 /* ST1 */ 5688 fn = fn_single[s->mte_active[0]][be][msz][esz]; 5689 } else { 5690 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */ 5691 assert(msz == esz); 5692 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz]; 5693 } 5694 assert(fn != NULL); 5695 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg + 1, true, fn); 5696 } 5697 5698 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) 5699 { 5700 if (a->rm == 31 || a->msz > a->esz) { 5701 return false; 5702 } 5703 switch (a->esz) { 5704 case MO_8 ... MO_64: 5705 if (!dc_isar_feature(aa64_sve, s)) { 5706 return false; 5707 } 5708 break; 5709 case MO_128: 5710 if (a->nreg == 0) { 5711 assert(a->msz < a->esz); 5712 if (!dc_isar_feature(aa64_sve2p1, s)) { 5713 return false; 5714 } 5715 s->is_nonstreaming = true; 5716 } else { 5717 if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { 5718 return false; 5719 } 5720 } 5721 break; 5722 default: 5723 g_assert_not_reached(); 5724 } 5725 5726 if (sve_access_check(s)) { 5727 TCGv_i64 addr = tcg_temp_new_i64(); 5728 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz); 5729 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5730 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); 5731 } 5732 return true; 5733 } 5734 5735 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a) 5736 { 5737 if (a->msz > a->esz) { 5738 return false; 5739 } 5740 switch (a->esz) { 5741 case MO_8 ... MO_64: 5742 if (!dc_isar_feature(aa64_sve, s)) { 5743 return false; 5744 } 5745 break; 5746 case MO_128: 5747 if (a->nreg == 0) { 5748 assert(a->msz < a->esz); 5749 if (!dc_isar_feature(aa64_sve2p1, s)) { 5750 return false; 5751 } 5752 s->is_nonstreaming = true; 5753 } else { 5754 if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { 5755 return false; 5756 } 5757 } 5758 break; 5759 default: 5760 g_assert_not_reached(); 5761 } 5762 5763 if (sve_access_check(s)) { 5764 int vsz = vec_full_reg_size(s); 5765 int elements = vsz >> a->esz; 5766 TCGv_i64 addr = tcg_temp_new_i64(); 5767 5768 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 5769 (a->imm * elements * (a->nreg + 1)) << a->msz); 5770 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); 5771 } 5772 return true; 5773 } 5774 5775 /* 5776 *** SVE gather loads / scatter stores 5777 */ 5778 5779 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, 5780 int scale, TCGv_i64 scalar, int msz, bool is_write, 5781 gen_helper_gvec_mem_scatter *fn) 5782 { 5783 TCGv_ptr t_zm = tcg_temp_new_ptr(); 5784 TCGv_ptr t_pg = tcg_temp_new_ptr(); 5785 TCGv_ptr t_zt = tcg_temp_new_ptr(); 5786 uint32_t desc; 5787 5788 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 5789 tcg_gen_addi_ptr(t_zm, tcg_env, vec_full_reg_offset(s, zm)); 5790 tcg_gen_addi_ptr(t_zt, tcg_env, vec_full_reg_offset(s, zt)); 5791 5792 desc = make_svemte_desc(s, vec_full_reg_size(s), 1, msz, is_write, scale); 5793 fn(tcg_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc)); 5794 } 5795 5796 /* Indexed by [mte][be][ff][xs][u][msz]. */ 5797 static gen_helper_gvec_mem_scatter * const 5798 gather_load_fn32[2][2][2][2][2][3] = { 5799 { /* MTE Inactive */ 5800 { /* Little-endian */ 5801 { { { gen_helper_sve_ldbss_zsu, 5802 gen_helper_sve_ldhss_le_zsu, 5803 NULL, }, 5804 { gen_helper_sve_ldbsu_zsu, 5805 gen_helper_sve_ldhsu_le_zsu, 5806 gen_helper_sve_ldss_le_zsu, } }, 5807 { { gen_helper_sve_ldbss_zss, 5808 gen_helper_sve_ldhss_le_zss, 5809 NULL, }, 5810 { gen_helper_sve_ldbsu_zss, 5811 gen_helper_sve_ldhsu_le_zss, 5812 gen_helper_sve_ldss_le_zss, } } }, 5813 5814 /* First-fault */ 5815 { { { gen_helper_sve_ldffbss_zsu, 5816 gen_helper_sve_ldffhss_le_zsu, 5817 NULL, }, 5818 { gen_helper_sve_ldffbsu_zsu, 5819 gen_helper_sve_ldffhsu_le_zsu, 5820 gen_helper_sve_ldffss_le_zsu, } }, 5821 { { gen_helper_sve_ldffbss_zss, 5822 gen_helper_sve_ldffhss_le_zss, 5823 NULL, }, 5824 { gen_helper_sve_ldffbsu_zss, 5825 gen_helper_sve_ldffhsu_le_zss, 5826 gen_helper_sve_ldffss_le_zss, } } } }, 5827 5828 { /* Big-endian */ 5829 { { { gen_helper_sve_ldbss_zsu, 5830 gen_helper_sve_ldhss_be_zsu, 5831 NULL, }, 5832 { gen_helper_sve_ldbsu_zsu, 5833 gen_helper_sve_ldhsu_be_zsu, 5834 gen_helper_sve_ldss_be_zsu, } }, 5835 { { gen_helper_sve_ldbss_zss, 5836 gen_helper_sve_ldhss_be_zss, 5837 NULL, }, 5838 { gen_helper_sve_ldbsu_zss, 5839 gen_helper_sve_ldhsu_be_zss, 5840 gen_helper_sve_ldss_be_zss, } } }, 5841 5842 /* First-fault */ 5843 { { { gen_helper_sve_ldffbss_zsu, 5844 gen_helper_sve_ldffhss_be_zsu, 5845 NULL, }, 5846 { gen_helper_sve_ldffbsu_zsu, 5847 gen_helper_sve_ldffhsu_be_zsu, 5848 gen_helper_sve_ldffss_be_zsu, } }, 5849 { { gen_helper_sve_ldffbss_zss, 5850 gen_helper_sve_ldffhss_be_zss, 5851 NULL, }, 5852 { gen_helper_sve_ldffbsu_zss, 5853 gen_helper_sve_ldffhsu_be_zss, 5854 gen_helper_sve_ldffss_be_zss, } } } } }, 5855 { /* MTE Active */ 5856 { /* Little-endian */ 5857 { { { gen_helper_sve_ldbss_zsu_mte, 5858 gen_helper_sve_ldhss_le_zsu_mte, 5859 NULL, }, 5860 { gen_helper_sve_ldbsu_zsu_mte, 5861 gen_helper_sve_ldhsu_le_zsu_mte, 5862 gen_helper_sve_ldss_le_zsu_mte, } }, 5863 { { gen_helper_sve_ldbss_zss_mte, 5864 gen_helper_sve_ldhss_le_zss_mte, 5865 NULL, }, 5866 { gen_helper_sve_ldbsu_zss_mte, 5867 gen_helper_sve_ldhsu_le_zss_mte, 5868 gen_helper_sve_ldss_le_zss_mte, } } }, 5869 5870 /* First-fault */ 5871 { { { gen_helper_sve_ldffbss_zsu_mte, 5872 gen_helper_sve_ldffhss_le_zsu_mte, 5873 NULL, }, 5874 { gen_helper_sve_ldffbsu_zsu_mte, 5875 gen_helper_sve_ldffhsu_le_zsu_mte, 5876 gen_helper_sve_ldffss_le_zsu_mte, } }, 5877 { { gen_helper_sve_ldffbss_zss_mte, 5878 gen_helper_sve_ldffhss_le_zss_mte, 5879 NULL, }, 5880 { gen_helper_sve_ldffbsu_zss_mte, 5881 gen_helper_sve_ldffhsu_le_zss_mte, 5882 gen_helper_sve_ldffss_le_zss_mte, } } } }, 5883 5884 { /* Big-endian */ 5885 { { { gen_helper_sve_ldbss_zsu_mte, 5886 gen_helper_sve_ldhss_be_zsu_mte, 5887 NULL, }, 5888 { gen_helper_sve_ldbsu_zsu_mte, 5889 gen_helper_sve_ldhsu_be_zsu_mte, 5890 gen_helper_sve_ldss_be_zsu_mte, } }, 5891 { { gen_helper_sve_ldbss_zss_mte, 5892 gen_helper_sve_ldhss_be_zss_mte, 5893 NULL, }, 5894 { gen_helper_sve_ldbsu_zss_mte, 5895 gen_helper_sve_ldhsu_be_zss_mte, 5896 gen_helper_sve_ldss_be_zss_mte, } } }, 5897 5898 /* First-fault */ 5899 { { { gen_helper_sve_ldffbss_zsu_mte, 5900 gen_helper_sve_ldffhss_be_zsu_mte, 5901 NULL, }, 5902 { gen_helper_sve_ldffbsu_zsu_mte, 5903 gen_helper_sve_ldffhsu_be_zsu_mte, 5904 gen_helper_sve_ldffss_be_zsu_mte, } }, 5905 { { gen_helper_sve_ldffbss_zss_mte, 5906 gen_helper_sve_ldffhss_be_zss_mte, 5907 NULL, }, 5908 { gen_helper_sve_ldffbsu_zss_mte, 5909 gen_helper_sve_ldffhsu_be_zss_mte, 5910 gen_helper_sve_ldffss_be_zss_mte, } } } } }, 5911 }; 5912 5913 /* Note that we overload xs=2 to indicate 64-bit offset. */ 5914 static gen_helper_gvec_mem_scatter * const 5915 gather_load_fn64[2][2][2][3][2][4] = { 5916 { /* MTE Inactive */ 5917 { /* Little-endian */ 5918 { { { gen_helper_sve_ldbds_zsu, 5919 gen_helper_sve_ldhds_le_zsu, 5920 gen_helper_sve_ldsds_le_zsu, 5921 NULL, }, 5922 { gen_helper_sve_ldbdu_zsu, 5923 gen_helper_sve_ldhdu_le_zsu, 5924 gen_helper_sve_ldsdu_le_zsu, 5925 gen_helper_sve_lddd_le_zsu, } }, 5926 { { gen_helper_sve_ldbds_zss, 5927 gen_helper_sve_ldhds_le_zss, 5928 gen_helper_sve_ldsds_le_zss, 5929 NULL, }, 5930 { gen_helper_sve_ldbdu_zss, 5931 gen_helper_sve_ldhdu_le_zss, 5932 gen_helper_sve_ldsdu_le_zss, 5933 gen_helper_sve_lddd_le_zss, } }, 5934 { { gen_helper_sve_ldbds_zd, 5935 gen_helper_sve_ldhds_le_zd, 5936 gen_helper_sve_ldsds_le_zd, 5937 NULL, }, 5938 { gen_helper_sve_ldbdu_zd, 5939 gen_helper_sve_ldhdu_le_zd, 5940 gen_helper_sve_ldsdu_le_zd, 5941 gen_helper_sve_lddd_le_zd, } } }, 5942 5943 /* First-fault */ 5944 { { { gen_helper_sve_ldffbds_zsu, 5945 gen_helper_sve_ldffhds_le_zsu, 5946 gen_helper_sve_ldffsds_le_zsu, 5947 NULL, }, 5948 { gen_helper_sve_ldffbdu_zsu, 5949 gen_helper_sve_ldffhdu_le_zsu, 5950 gen_helper_sve_ldffsdu_le_zsu, 5951 gen_helper_sve_ldffdd_le_zsu, } }, 5952 { { gen_helper_sve_ldffbds_zss, 5953 gen_helper_sve_ldffhds_le_zss, 5954 gen_helper_sve_ldffsds_le_zss, 5955 NULL, }, 5956 { gen_helper_sve_ldffbdu_zss, 5957 gen_helper_sve_ldffhdu_le_zss, 5958 gen_helper_sve_ldffsdu_le_zss, 5959 gen_helper_sve_ldffdd_le_zss, } }, 5960 { { gen_helper_sve_ldffbds_zd, 5961 gen_helper_sve_ldffhds_le_zd, 5962 gen_helper_sve_ldffsds_le_zd, 5963 NULL, }, 5964 { gen_helper_sve_ldffbdu_zd, 5965 gen_helper_sve_ldffhdu_le_zd, 5966 gen_helper_sve_ldffsdu_le_zd, 5967 gen_helper_sve_ldffdd_le_zd, } } } }, 5968 { /* Big-endian */ 5969 { { { gen_helper_sve_ldbds_zsu, 5970 gen_helper_sve_ldhds_be_zsu, 5971 gen_helper_sve_ldsds_be_zsu, 5972 NULL, }, 5973 { gen_helper_sve_ldbdu_zsu, 5974 gen_helper_sve_ldhdu_be_zsu, 5975 gen_helper_sve_ldsdu_be_zsu, 5976 gen_helper_sve_lddd_be_zsu, } }, 5977 { { gen_helper_sve_ldbds_zss, 5978 gen_helper_sve_ldhds_be_zss, 5979 gen_helper_sve_ldsds_be_zss, 5980 NULL, }, 5981 { gen_helper_sve_ldbdu_zss, 5982 gen_helper_sve_ldhdu_be_zss, 5983 gen_helper_sve_ldsdu_be_zss, 5984 gen_helper_sve_lddd_be_zss, } }, 5985 { { gen_helper_sve_ldbds_zd, 5986 gen_helper_sve_ldhds_be_zd, 5987 gen_helper_sve_ldsds_be_zd, 5988 NULL, }, 5989 { gen_helper_sve_ldbdu_zd, 5990 gen_helper_sve_ldhdu_be_zd, 5991 gen_helper_sve_ldsdu_be_zd, 5992 gen_helper_sve_lddd_be_zd, } } }, 5993 5994 /* First-fault */ 5995 { { { gen_helper_sve_ldffbds_zsu, 5996 gen_helper_sve_ldffhds_be_zsu, 5997 gen_helper_sve_ldffsds_be_zsu, 5998 NULL, }, 5999 { gen_helper_sve_ldffbdu_zsu, 6000 gen_helper_sve_ldffhdu_be_zsu, 6001 gen_helper_sve_ldffsdu_be_zsu, 6002 gen_helper_sve_ldffdd_be_zsu, } }, 6003 { { gen_helper_sve_ldffbds_zss, 6004 gen_helper_sve_ldffhds_be_zss, 6005 gen_helper_sve_ldffsds_be_zss, 6006 NULL, }, 6007 { gen_helper_sve_ldffbdu_zss, 6008 gen_helper_sve_ldffhdu_be_zss, 6009 gen_helper_sve_ldffsdu_be_zss, 6010 gen_helper_sve_ldffdd_be_zss, } }, 6011 { { gen_helper_sve_ldffbds_zd, 6012 gen_helper_sve_ldffhds_be_zd, 6013 gen_helper_sve_ldffsds_be_zd, 6014 NULL, }, 6015 { gen_helper_sve_ldffbdu_zd, 6016 gen_helper_sve_ldffhdu_be_zd, 6017 gen_helper_sve_ldffsdu_be_zd, 6018 gen_helper_sve_ldffdd_be_zd, } } } } }, 6019 { /* MTE Active */ 6020 { /* Little-endian */ 6021 { { { gen_helper_sve_ldbds_zsu_mte, 6022 gen_helper_sve_ldhds_le_zsu_mte, 6023 gen_helper_sve_ldsds_le_zsu_mte, 6024 NULL, }, 6025 { gen_helper_sve_ldbdu_zsu_mte, 6026 gen_helper_sve_ldhdu_le_zsu_mte, 6027 gen_helper_sve_ldsdu_le_zsu_mte, 6028 gen_helper_sve_lddd_le_zsu_mte, } }, 6029 { { gen_helper_sve_ldbds_zss_mte, 6030 gen_helper_sve_ldhds_le_zss_mte, 6031 gen_helper_sve_ldsds_le_zss_mte, 6032 NULL, }, 6033 { gen_helper_sve_ldbdu_zss_mte, 6034 gen_helper_sve_ldhdu_le_zss_mte, 6035 gen_helper_sve_ldsdu_le_zss_mte, 6036 gen_helper_sve_lddd_le_zss_mte, } }, 6037 { { gen_helper_sve_ldbds_zd_mte, 6038 gen_helper_sve_ldhds_le_zd_mte, 6039 gen_helper_sve_ldsds_le_zd_mte, 6040 NULL, }, 6041 { gen_helper_sve_ldbdu_zd_mte, 6042 gen_helper_sve_ldhdu_le_zd_mte, 6043 gen_helper_sve_ldsdu_le_zd_mte, 6044 gen_helper_sve_lddd_le_zd_mte, } } }, 6045 6046 /* First-fault */ 6047 { { { gen_helper_sve_ldffbds_zsu_mte, 6048 gen_helper_sve_ldffhds_le_zsu_mte, 6049 gen_helper_sve_ldffsds_le_zsu_mte, 6050 NULL, }, 6051 { gen_helper_sve_ldffbdu_zsu_mte, 6052 gen_helper_sve_ldffhdu_le_zsu_mte, 6053 gen_helper_sve_ldffsdu_le_zsu_mte, 6054 gen_helper_sve_ldffdd_le_zsu_mte, } }, 6055 { { gen_helper_sve_ldffbds_zss_mte, 6056 gen_helper_sve_ldffhds_le_zss_mte, 6057 gen_helper_sve_ldffsds_le_zss_mte, 6058 NULL, }, 6059 { gen_helper_sve_ldffbdu_zss_mte, 6060 gen_helper_sve_ldffhdu_le_zss_mte, 6061 gen_helper_sve_ldffsdu_le_zss_mte, 6062 gen_helper_sve_ldffdd_le_zss_mte, } }, 6063 { { gen_helper_sve_ldffbds_zd_mte, 6064 gen_helper_sve_ldffhds_le_zd_mte, 6065 gen_helper_sve_ldffsds_le_zd_mte, 6066 NULL, }, 6067 { gen_helper_sve_ldffbdu_zd_mte, 6068 gen_helper_sve_ldffhdu_le_zd_mte, 6069 gen_helper_sve_ldffsdu_le_zd_mte, 6070 gen_helper_sve_ldffdd_le_zd_mte, } } } }, 6071 { /* Big-endian */ 6072 { { { gen_helper_sve_ldbds_zsu_mte, 6073 gen_helper_sve_ldhds_be_zsu_mte, 6074 gen_helper_sve_ldsds_be_zsu_mte, 6075 NULL, }, 6076 { gen_helper_sve_ldbdu_zsu_mte, 6077 gen_helper_sve_ldhdu_be_zsu_mte, 6078 gen_helper_sve_ldsdu_be_zsu_mte, 6079 gen_helper_sve_lddd_be_zsu_mte, } }, 6080 { { gen_helper_sve_ldbds_zss_mte, 6081 gen_helper_sve_ldhds_be_zss_mte, 6082 gen_helper_sve_ldsds_be_zss_mte, 6083 NULL, }, 6084 { gen_helper_sve_ldbdu_zss_mte, 6085 gen_helper_sve_ldhdu_be_zss_mte, 6086 gen_helper_sve_ldsdu_be_zss_mte, 6087 gen_helper_sve_lddd_be_zss_mte, } }, 6088 { { gen_helper_sve_ldbds_zd_mte, 6089 gen_helper_sve_ldhds_be_zd_mte, 6090 gen_helper_sve_ldsds_be_zd_mte, 6091 NULL, }, 6092 { gen_helper_sve_ldbdu_zd_mte, 6093 gen_helper_sve_ldhdu_be_zd_mte, 6094 gen_helper_sve_ldsdu_be_zd_mte, 6095 gen_helper_sve_lddd_be_zd_mte, } } }, 6096 6097 /* First-fault */ 6098 { { { gen_helper_sve_ldffbds_zsu_mte, 6099 gen_helper_sve_ldffhds_be_zsu_mte, 6100 gen_helper_sve_ldffsds_be_zsu_mte, 6101 NULL, }, 6102 { gen_helper_sve_ldffbdu_zsu_mte, 6103 gen_helper_sve_ldffhdu_be_zsu_mte, 6104 gen_helper_sve_ldffsdu_be_zsu_mte, 6105 gen_helper_sve_ldffdd_be_zsu_mte, } }, 6106 { { gen_helper_sve_ldffbds_zss_mte, 6107 gen_helper_sve_ldffhds_be_zss_mte, 6108 gen_helper_sve_ldffsds_be_zss_mte, 6109 NULL, }, 6110 { gen_helper_sve_ldffbdu_zss_mte, 6111 gen_helper_sve_ldffhdu_be_zss_mte, 6112 gen_helper_sve_ldffsdu_be_zss_mte, 6113 gen_helper_sve_ldffdd_be_zss_mte, } }, 6114 { { gen_helper_sve_ldffbds_zd_mte, 6115 gen_helper_sve_ldffhds_be_zd_mte, 6116 gen_helper_sve_ldffsds_be_zd_mte, 6117 NULL, }, 6118 { gen_helper_sve_ldffbdu_zd_mte, 6119 gen_helper_sve_ldffhdu_be_zd_mte, 6120 gen_helper_sve_ldffsdu_be_zd_mte, 6121 gen_helper_sve_ldffdd_be_zd_mte, } } } } }, 6122 }; 6123 6124 static gen_helper_gvec_mem_scatter * const 6125 gather_load_fn128[2][2] = { 6126 { gen_helper_sve_ldqq_le_zd, 6127 gen_helper_sve_ldqq_be_zd }, 6128 { gen_helper_sve_ldqq_le_zd_mte, 6129 gen_helper_sve_ldqq_be_zd_mte } 6130 }; 6131 6132 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) 6133 { 6134 gen_helper_gvec_mem_scatter *fn = NULL; 6135 bool be = s->be_data == MO_BE; 6136 bool mte = s->mte_active[0]; 6137 6138 if (a->esz < MO_128 6139 ? !dc_isar_feature(aa64_sve, s) 6140 : !dc_isar_feature(aa64_sve2p1, s)) { 6141 return false; 6142 } 6143 s->is_nonstreaming = true; 6144 if (!sve_access_check(s)) { 6145 return true; 6146 } 6147 6148 switch (a->esz) { 6149 case MO_32: 6150 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz]; 6151 break; 6152 case MO_64: 6153 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz]; 6154 break; 6155 case MO_128: 6156 assert(!a->ff && a->u && a->xs == 2 && a->msz == MO_128); 6157 fn = gather_load_fn128[mte][be]; 6158 break; 6159 default: 6160 g_assert_not_reached(); 6161 } 6162 assert(fn != NULL); 6163 6164 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, 6165 cpu_reg_sp(s, a->rn), a->msz, false, fn); 6166 return true; 6167 } 6168 6169 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) 6170 { 6171 gen_helper_gvec_mem_scatter *fn = NULL; 6172 bool be = s->be_data == MO_BE; 6173 bool mte = s->mte_active[0]; 6174 6175 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) { 6176 return false; 6177 } 6178 if (!dc_isar_feature(aa64_sve, s)) { 6179 return false; 6180 } 6181 s->is_nonstreaming = true; 6182 if (!sve_access_check(s)) { 6183 return true; 6184 } 6185 6186 switch (a->esz) { 6187 case MO_32: 6188 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz]; 6189 break; 6190 case MO_64: 6191 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz]; 6192 break; 6193 } 6194 assert(fn != NULL); 6195 6196 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x]) 6197 * by loading the immediate into the scalar parameter. 6198 */ 6199 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 6200 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn); 6201 return true; 6202 } 6203 6204 static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a) 6205 { 6206 gen_helper_gvec_mem_scatter *fn = NULL; 6207 bool be = s->be_data == MO_BE; 6208 bool mte = s->mte_active[0]; 6209 6210 if (a->esz < a->msz + !a->u) { 6211 return false; 6212 } 6213 if (!dc_isar_feature(aa64_sve2, s)) { 6214 return false; 6215 } 6216 s->is_nonstreaming = true; 6217 if (!sve_access_check(s)) { 6218 return true; 6219 } 6220 6221 switch (a->esz) { 6222 case MO_32: 6223 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz]; 6224 break; 6225 case MO_64: 6226 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz]; 6227 break; 6228 } 6229 assert(fn != NULL); 6230 6231 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 6232 cpu_reg(s, a->rm), a->msz, false, fn); 6233 return true; 6234 } 6235 6236 /* Indexed by [mte][be][xs][msz]. */ 6237 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = { 6238 { /* MTE Inactive */ 6239 { /* Little-endian */ 6240 { gen_helper_sve_stbs_zsu, 6241 gen_helper_sve_sths_le_zsu, 6242 gen_helper_sve_stss_le_zsu, }, 6243 { gen_helper_sve_stbs_zss, 6244 gen_helper_sve_sths_le_zss, 6245 gen_helper_sve_stss_le_zss, } }, 6246 { /* Big-endian */ 6247 { gen_helper_sve_stbs_zsu, 6248 gen_helper_sve_sths_be_zsu, 6249 gen_helper_sve_stss_be_zsu, }, 6250 { gen_helper_sve_stbs_zss, 6251 gen_helper_sve_sths_be_zss, 6252 gen_helper_sve_stss_be_zss, } } }, 6253 { /* MTE Active */ 6254 { /* Little-endian */ 6255 { gen_helper_sve_stbs_zsu_mte, 6256 gen_helper_sve_sths_le_zsu_mte, 6257 gen_helper_sve_stss_le_zsu_mte, }, 6258 { gen_helper_sve_stbs_zss_mte, 6259 gen_helper_sve_sths_le_zss_mte, 6260 gen_helper_sve_stss_le_zss_mte, } }, 6261 { /* Big-endian */ 6262 { gen_helper_sve_stbs_zsu_mte, 6263 gen_helper_sve_sths_be_zsu_mte, 6264 gen_helper_sve_stss_be_zsu_mte, }, 6265 { gen_helper_sve_stbs_zss_mte, 6266 gen_helper_sve_sths_be_zss_mte, 6267 gen_helper_sve_stss_be_zss_mte, } } }, 6268 }; 6269 6270 /* Note that we overload xs=2 to indicate 64-bit offset. */ 6271 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = { 6272 { /* MTE Inactive */ 6273 { /* Little-endian */ 6274 { gen_helper_sve_stbd_zsu, 6275 gen_helper_sve_sthd_le_zsu, 6276 gen_helper_sve_stsd_le_zsu, 6277 gen_helper_sve_stdd_le_zsu, }, 6278 { gen_helper_sve_stbd_zss, 6279 gen_helper_sve_sthd_le_zss, 6280 gen_helper_sve_stsd_le_zss, 6281 gen_helper_sve_stdd_le_zss, }, 6282 { gen_helper_sve_stbd_zd, 6283 gen_helper_sve_sthd_le_zd, 6284 gen_helper_sve_stsd_le_zd, 6285 gen_helper_sve_stdd_le_zd, } }, 6286 { /* Big-endian */ 6287 { gen_helper_sve_stbd_zsu, 6288 gen_helper_sve_sthd_be_zsu, 6289 gen_helper_sve_stsd_be_zsu, 6290 gen_helper_sve_stdd_be_zsu, }, 6291 { gen_helper_sve_stbd_zss, 6292 gen_helper_sve_sthd_be_zss, 6293 gen_helper_sve_stsd_be_zss, 6294 gen_helper_sve_stdd_be_zss, }, 6295 { gen_helper_sve_stbd_zd, 6296 gen_helper_sve_sthd_be_zd, 6297 gen_helper_sve_stsd_be_zd, 6298 gen_helper_sve_stdd_be_zd, } } }, 6299 { /* MTE Inactive */ 6300 { /* Little-endian */ 6301 { gen_helper_sve_stbd_zsu_mte, 6302 gen_helper_sve_sthd_le_zsu_mte, 6303 gen_helper_sve_stsd_le_zsu_mte, 6304 gen_helper_sve_stdd_le_zsu_mte, }, 6305 { gen_helper_sve_stbd_zss_mte, 6306 gen_helper_sve_sthd_le_zss_mte, 6307 gen_helper_sve_stsd_le_zss_mte, 6308 gen_helper_sve_stdd_le_zss_mte, }, 6309 { gen_helper_sve_stbd_zd_mte, 6310 gen_helper_sve_sthd_le_zd_mte, 6311 gen_helper_sve_stsd_le_zd_mte, 6312 gen_helper_sve_stdd_le_zd_mte, } }, 6313 { /* Big-endian */ 6314 { gen_helper_sve_stbd_zsu_mte, 6315 gen_helper_sve_sthd_be_zsu_mte, 6316 gen_helper_sve_stsd_be_zsu_mte, 6317 gen_helper_sve_stdd_be_zsu_mte, }, 6318 { gen_helper_sve_stbd_zss_mte, 6319 gen_helper_sve_sthd_be_zss_mte, 6320 gen_helper_sve_stsd_be_zss_mte, 6321 gen_helper_sve_stdd_be_zss_mte, }, 6322 { gen_helper_sve_stbd_zd_mte, 6323 gen_helper_sve_sthd_be_zd_mte, 6324 gen_helper_sve_stsd_be_zd_mte, 6325 gen_helper_sve_stdd_be_zd_mte, } } }, 6326 }; 6327 6328 static gen_helper_gvec_mem_scatter * const 6329 scatter_store_fn128[2][2] = { 6330 { gen_helper_sve_stqq_le_zd, 6331 gen_helper_sve_stqq_be_zd }, 6332 { gen_helper_sve_stqq_le_zd_mte, 6333 gen_helper_sve_stqq_be_zd_mte } 6334 }; 6335 6336 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a) 6337 { 6338 gen_helper_gvec_mem_scatter *fn; 6339 bool be = s->be_data == MO_BE; 6340 bool mte = s->mte_active[0]; 6341 6342 if (a->esz < a->msz || (a->msz == 0 && a->scale)) { 6343 return false; 6344 } 6345 if (a->esz < MO_128 6346 ? !dc_isar_feature(aa64_sve, s) 6347 : !dc_isar_feature(aa64_sve2p1, s)) { 6348 return false; 6349 } 6350 s->is_nonstreaming = true; 6351 if (!sve_access_check(s)) { 6352 return true; 6353 } 6354 switch (a->esz) { 6355 case MO_32: 6356 fn = scatter_store_fn32[mte][be][a->xs][a->msz]; 6357 break; 6358 case MO_64: 6359 fn = scatter_store_fn64[mte][be][a->xs][a->msz]; 6360 break; 6361 case MO_128: 6362 assert(a->xs == 2 && a->msz == MO_128); 6363 fn = scatter_store_fn128[mte][be]; 6364 break; 6365 default: 6366 g_assert_not_reached(); 6367 } 6368 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, 6369 cpu_reg_sp(s, a->rn), a->msz, true, fn); 6370 return true; 6371 } 6372 6373 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) 6374 { 6375 gen_helper_gvec_mem_scatter *fn = NULL; 6376 bool be = s->be_data == MO_BE; 6377 bool mte = s->mte_active[0]; 6378 6379 if (a->esz < a->msz) { 6380 return false; 6381 } 6382 if (!dc_isar_feature(aa64_sve, s)) { 6383 return false; 6384 } 6385 s->is_nonstreaming = true; 6386 if (!sve_access_check(s)) { 6387 return true; 6388 } 6389 6390 switch (a->esz) { 6391 case MO_32: 6392 fn = scatter_store_fn32[mte][be][0][a->msz]; 6393 break; 6394 case MO_64: 6395 fn = scatter_store_fn64[mte][be][2][a->msz]; 6396 break; 6397 } 6398 assert(fn != NULL); 6399 6400 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x]) 6401 * by loading the immediate into the scalar parameter. 6402 */ 6403 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 6404 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn); 6405 return true; 6406 } 6407 6408 static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a) 6409 { 6410 gen_helper_gvec_mem_scatter *fn; 6411 bool be = s->be_data == MO_BE; 6412 bool mte = s->mte_active[0]; 6413 6414 if (a->esz < a->msz) { 6415 return false; 6416 } 6417 if (!dc_isar_feature(aa64_sve2, s)) { 6418 return false; 6419 } 6420 s->is_nonstreaming = true; 6421 if (!sve_access_check(s)) { 6422 return true; 6423 } 6424 6425 switch (a->esz) { 6426 case MO_32: 6427 fn = scatter_store_fn32[mte][be][0][a->msz]; 6428 break; 6429 case MO_64: 6430 fn = scatter_store_fn64[mte][be][2][a->msz]; 6431 break; 6432 default: 6433 g_assert_not_reached(); 6434 } 6435 6436 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 6437 cpu_reg(s, a->rm), a->msz, true, fn); 6438 return true; 6439 } 6440 6441 /* 6442 * Prefetches 6443 */ 6444 6445 static bool trans_PRF(DisasContext *s, arg_PRF *a) 6446 { 6447 if (!dc_isar_feature(aa64_sve, s)) { 6448 return false; 6449 } 6450 /* Prefetch is a nop within QEMU. */ 6451 (void)sve_access_check(s); 6452 return true; 6453 } 6454 6455 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a) 6456 { 6457 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 6458 return false; 6459 } 6460 /* Prefetch is a nop within QEMU. */ 6461 (void)sve_access_check(s); 6462 return true; 6463 } 6464 6465 static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a) 6466 { 6467 if (!dc_isar_feature(aa64_sve, s)) { 6468 return false; 6469 } 6470 /* Prefetch is a nop within QEMU. */ 6471 s->is_nonstreaming = true; 6472 (void)sve_access_check(s); 6473 return true; 6474 } 6475 6476 /* 6477 * Move Prefix 6478 * 6479 * TODO: The implementation so far could handle predicated merging movprfx. 6480 * The helper functions as written take an extra source register to 6481 * use in the operation, but the result is only written when predication 6482 * succeeds. For unpredicated movprfx, we need to rearrange the helpers 6483 * to allow the final write back to the destination to be unconditional. 6484 * For predicated zeroing movprfx, we need to rearrange the helpers to 6485 * allow the final write back to zero inactives. 6486 * 6487 * In the meantime, just emit the moves. 6488 */ 6489 6490 TRANS_FEAT(MOVPRFX, aa64_sve, do_mov_z, a->rd, a->rn) 6491 TRANS_FEAT(MOVPRFX_m, aa64_sve, do_sel_z, a->rd, a->rn, a->rd, a->pg, a->esz) 6492 TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false) 6493 6494 /* 6495 * SVE2 Integer Multiply - Unpredicated 6496 */ 6497 6498 TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a) 6499 TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_sve2_sqdmulh, a) 6500 6501 static gen_helper_gvec_3 * const smulh_zzz_fns[4] = { 6502 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h, 6503 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d, 6504 }; 6505 TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6506 smulh_zzz_fns[a->esz], a, 0) 6507 6508 static gen_helper_gvec_3 * const umulh_zzz_fns[4] = { 6509 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h, 6510 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d, 6511 }; 6512 TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6513 umulh_zzz_fns[a->esz], a, 0) 6514 6515 TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6516 gen_helper_gvec_pmul_b, a, 0) 6517 6518 static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = { 6519 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h, 6520 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d, 6521 }; 6522 TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6523 sqrdmulh_zzz_fns[a->esz], a, 0) 6524 6525 /* 6526 * SVE2 Integer - Predicated 6527 */ 6528 6529 static gen_helper_gvec_4 * const sadlp_fns[4] = { 6530 NULL, gen_helper_sve2_sadalp_zpzz_h, 6531 gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d, 6532 }; 6533 TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz, 6534 sadlp_fns[a->esz], a, 0) 6535 6536 static gen_helper_gvec_4 * const uadlp_fns[4] = { 6537 NULL, gen_helper_sve2_uadalp_zpzz_h, 6538 gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d, 6539 }; 6540 TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz, 6541 uadlp_fns[a->esz], a, 0) 6542 6543 /* 6544 * SVE2 integer unary operations (predicated) 6545 */ 6546 6547 TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz, 6548 a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0) 6549 6550 TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz, 6551 a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0) 6552 6553 static gen_helper_gvec_3 * const sqabs_fns[4] = { 6554 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h, 6555 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d, 6556 }; 6557 TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0) 6558 6559 static gen_helper_gvec_3 * const sqneg_fns[4] = { 6560 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h, 6561 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d, 6562 }; 6563 TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0) 6564 6565 DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl) 6566 DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl) 6567 DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl) 6568 6569 DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl) 6570 DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl) 6571 DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl) 6572 6573 DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd) 6574 DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd) 6575 DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub) 6576 6577 DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd) 6578 DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd) 6579 DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub) 6580 6581 DO_ZPZZ(ADDP, aa64_sve2, sve2_addp) 6582 DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp) 6583 DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp) 6584 DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp) 6585 DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp) 6586 6587 DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd) 6588 DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd) 6589 DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub) 6590 DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub) 6591 DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd) 6592 DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd) 6593 6594 /* 6595 * SVE2 Widening Integer Arithmetic 6596 */ 6597 6598 static gen_helper_gvec_3 * const saddl_fns[4] = { 6599 NULL, gen_helper_sve2_saddl_h, 6600 gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d, 6601 }; 6602 TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6603 saddl_fns[a->esz], a, 0) 6604 TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6605 saddl_fns[a->esz], a, 3) 6606 TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz, 6607 saddl_fns[a->esz], a, 2) 6608 6609 static gen_helper_gvec_3 * const ssubl_fns[4] = { 6610 NULL, gen_helper_sve2_ssubl_h, 6611 gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d, 6612 }; 6613 TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6614 ssubl_fns[a->esz], a, 0) 6615 TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6616 ssubl_fns[a->esz], a, 3) 6617 TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz, 6618 ssubl_fns[a->esz], a, 2) 6619 TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz, 6620 ssubl_fns[a->esz], a, 1) 6621 6622 static gen_helper_gvec_3 * const sabdl_fns[4] = { 6623 NULL, gen_helper_sve2_sabdl_h, 6624 gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d, 6625 }; 6626 TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6627 sabdl_fns[a->esz], a, 0) 6628 TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6629 sabdl_fns[a->esz], a, 3) 6630 6631 static gen_helper_gvec_3 * const uaddl_fns[4] = { 6632 NULL, gen_helper_sve2_uaddl_h, 6633 gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d, 6634 }; 6635 TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6636 uaddl_fns[a->esz], a, 0) 6637 TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6638 uaddl_fns[a->esz], a, 3) 6639 6640 static gen_helper_gvec_3 * const usubl_fns[4] = { 6641 NULL, gen_helper_sve2_usubl_h, 6642 gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d, 6643 }; 6644 TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6645 usubl_fns[a->esz], a, 0) 6646 TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6647 usubl_fns[a->esz], a, 3) 6648 6649 static gen_helper_gvec_3 * const uabdl_fns[4] = { 6650 NULL, gen_helper_sve2_uabdl_h, 6651 gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d, 6652 }; 6653 TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6654 uabdl_fns[a->esz], a, 0) 6655 TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6656 uabdl_fns[a->esz], a, 3) 6657 6658 static gen_helper_gvec_3 * const sqdmull_fns[4] = { 6659 NULL, gen_helper_sve2_sqdmull_zzz_h, 6660 gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d, 6661 }; 6662 TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6663 sqdmull_fns[a->esz], a, 0) 6664 TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6665 sqdmull_fns[a->esz], a, 3) 6666 6667 static gen_helper_gvec_3 * const smull_fns[4] = { 6668 NULL, gen_helper_sve2_smull_zzz_h, 6669 gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d, 6670 }; 6671 TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6672 smull_fns[a->esz], a, 0) 6673 TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6674 smull_fns[a->esz], a, 3) 6675 6676 static gen_helper_gvec_3 * const umull_fns[4] = { 6677 NULL, gen_helper_sve2_umull_zzz_h, 6678 gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d, 6679 }; 6680 TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6681 umull_fns[a->esz], a, 0) 6682 TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6683 umull_fns[a->esz], a, 3) 6684 6685 static gen_helper_gvec_3 * const eoril_fns[4] = { 6686 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h, 6687 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d, 6688 }; 6689 TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2) 6690 TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1) 6691 6692 static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel) 6693 { 6694 static gen_helper_gvec_3 * const fns[4] = { 6695 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h, 6696 NULL, gen_helper_sve2_pmull_d, 6697 }; 6698 6699 if (a->esz == 0) { 6700 if (!dc_isar_feature(aa64_sve2_pmull128, s)) { 6701 return false; 6702 } 6703 s->is_nonstreaming = true; 6704 } else if (!dc_isar_feature(aa64_sve, s)) { 6705 return false; 6706 } 6707 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel); 6708 } 6709 6710 TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false) 6711 TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true) 6712 6713 static gen_helper_gvec_3 * const saddw_fns[4] = { 6714 NULL, gen_helper_sve2_saddw_h, 6715 gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d, 6716 }; 6717 TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0) 6718 TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1) 6719 6720 static gen_helper_gvec_3 * const ssubw_fns[4] = { 6721 NULL, gen_helper_sve2_ssubw_h, 6722 gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d, 6723 }; 6724 TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0) 6725 TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1) 6726 6727 static gen_helper_gvec_3 * const uaddw_fns[4] = { 6728 NULL, gen_helper_sve2_uaddw_h, 6729 gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d, 6730 }; 6731 TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0) 6732 TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1) 6733 6734 static gen_helper_gvec_3 * const usubw_fns[4] = { 6735 NULL, gen_helper_sve2_usubw_h, 6736 gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d, 6737 }; 6738 TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0) 6739 TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1) 6740 6741 static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) 6742 { 6743 int top = imm & 1; 6744 int shl = imm >> 1; 6745 int halfbits = 4 << vece; 6746 6747 if (top) { 6748 if (shl == halfbits) { 6749 tcg_gen_and_vec(vece, d, n, 6750 tcg_constant_vec_matching(d, vece, 6751 MAKE_64BIT_MASK(halfbits, halfbits))); 6752 } else { 6753 tcg_gen_sari_vec(vece, d, n, halfbits); 6754 tcg_gen_shli_vec(vece, d, d, shl); 6755 } 6756 } else { 6757 tcg_gen_shli_vec(vece, d, n, halfbits); 6758 tcg_gen_sari_vec(vece, d, d, halfbits - shl); 6759 } 6760 } 6761 6762 static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm) 6763 { 6764 int halfbits = 4 << vece; 6765 int top = imm & 1; 6766 int shl = (imm >> 1); 6767 int shift; 6768 uint64_t mask; 6769 6770 mask = MAKE_64BIT_MASK(0, halfbits); 6771 mask <<= shl; 6772 mask = dup_const(vece, mask); 6773 6774 shift = shl - top * halfbits; 6775 if (shift < 0) { 6776 tcg_gen_shri_i64(d, n, -shift); 6777 } else { 6778 tcg_gen_shli_i64(d, n, shift); 6779 } 6780 tcg_gen_andi_i64(d, d, mask); 6781 } 6782 6783 static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6784 { 6785 gen_ushll_i64(MO_16, d, n, imm); 6786 } 6787 6788 static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6789 { 6790 gen_ushll_i64(MO_32, d, n, imm); 6791 } 6792 6793 static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6794 { 6795 gen_ushll_i64(MO_64, d, n, imm); 6796 } 6797 6798 static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) 6799 { 6800 int halfbits = 4 << vece; 6801 int top = imm & 1; 6802 int shl = imm >> 1; 6803 6804 if (top) { 6805 if (shl == halfbits) { 6806 tcg_gen_and_vec(vece, d, n, 6807 tcg_constant_vec_matching(d, vece, 6808 MAKE_64BIT_MASK(halfbits, halfbits))); 6809 } else { 6810 tcg_gen_shri_vec(vece, d, n, halfbits); 6811 tcg_gen_shli_vec(vece, d, d, shl); 6812 } 6813 } else { 6814 if (shl == 0) { 6815 tcg_gen_and_vec(vece, d, n, 6816 tcg_constant_vec_matching(d, vece, 6817 MAKE_64BIT_MASK(0, halfbits))); 6818 } else { 6819 tcg_gen_shli_vec(vece, d, n, halfbits); 6820 tcg_gen_shri_vec(vece, d, d, halfbits - shl); 6821 } 6822 } 6823 } 6824 6825 static bool do_shll_tb(DisasContext *s, arg_rri_esz *a, 6826 const GVecGen2i ops[3], bool sel) 6827 { 6828 6829 if (a->esz < 0 || a->esz > 2) { 6830 return false; 6831 } 6832 if (sve_access_check(s)) { 6833 unsigned vsz = vec_full_reg_size(s); 6834 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), 6835 vec_full_reg_offset(s, a->rn), 6836 vsz, vsz, (a->imm << 1) | sel, 6837 &ops[a->esz]); 6838 } 6839 return true; 6840 } 6841 6842 static const TCGOpcode sshll_list[] = { 6843 INDEX_op_shli_vec, INDEX_op_sari_vec, 0 6844 }; 6845 static const GVecGen2i sshll_ops[3] = { 6846 { .fniv = gen_sshll_vec, 6847 .opt_opc = sshll_list, 6848 .fno = gen_helper_sve2_sshll_h, 6849 .vece = MO_16 }, 6850 { .fniv = gen_sshll_vec, 6851 .opt_opc = sshll_list, 6852 .fno = gen_helper_sve2_sshll_s, 6853 .vece = MO_32 }, 6854 { .fniv = gen_sshll_vec, 6855 .opt_opc = sshll_list, 6856 .fno = gen_helper_sve2_sshll_d, 6857 .vece = MO_64 } 6858 }; 6859 TRANS_FEAT(SSHLLB, aa64_sve2, do_shll_tb, a, sshll_ops, false) 6860 TRANS_FEAT(SSHLLT, aa64_sve2, do_shll_tb, a, sshll_ops, true) 6861 6862 static const TCGOpcode ushll_list[] = { 6863 INDEX_op_shli_vec, INDEX_op_shri_vec, 0 6864 }; 6865 static const GVecGen2i ushll_ops[3] = { 6866 { .fni8 = gen_ushll16_i64, 6867 .fniv = gen_ushll_vec, 6868 .opt_opc = ushll_list, 6869 .fno = gen_helper_sve2_ushll_h, 6870 .vece = MO_16 }, 6871 { .fni8 = gen_ushll32_i64, 6872 .fniv = gen_ushll_vec, 6873 .opt_opc = ushll_list, 6874 .fno = gen_helper_sve2_ushll_s, 6875 .vece = MO_32 }, 6876 { .fni8 = gen_ushll64_i64, 6877 .fniv = gen_ushll_vec, 6878 .opt_opc = ushll_list, 6879 .fno = gen_helper_sve2_ushll_d, 6880 .vece = MO_64 }, 6881 }; 6882 TRANS_FEAT(USHLLB, aa64_sve2, do_shll_tb, a, ushll_ops, false) 6883 TRANS_FEAT(USHLLT, aa64_sve2, do_shll_tb, a, ushll_ops, true) 6884 6885 static gen_helper_gvec_3 * const bext_fns[4] = { 6886 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h, 6887 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d, 6888 }; 6889 TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6890 bext_fns[a->esz], a, 0) 6891 6892 static gen_helper_gvec_3 * const bdep_fns[4] = { 6893 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h, 6894 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d, 6895 }; 6896 TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6897 bdep_fns[a->esz], a, 0) 6898 6899 static gen_helper_gvec_3 * const bgrp_fns[4] = { 6900 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h, 6901 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d, 6902 }; 6903 TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6904 bgrp_fns[a->esz], a, 0) 6905 6906 static gen_helper_gvec_3 * const cadd_fns[4] = { 6907 gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h, 6908 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d, 6909 }; 6910 TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz, 6911 cadd_fns[a->esz], a, 0) 6912 TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz, 6913 cadd_fns[a->esz], a, 1) 6914 6915 static gen_helper_gvec_3 * const sqcadd_fns[4] = { 6916 gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h, 6917 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d, 6918 }; 6919 TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz, 6920 sqcadd_fns[a->esz], a, 0) 6921 TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz, 6922 sqcadd_fns[a->esz], a, 1) 6923 6924 static gen_helper_gvec_4 * const sabal_fns[4] = { 6925 NULL, gen_helper_sve2_sabal_h, 6926 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d, 6927 }; 6928 TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0) 6929 TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1) 6930 6931 static gen_helper_gvec_4 * const uabal_fns[4] = { 6932 NULL, gen_helper_sve2_uabal_h, 6933 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d, 6934 }; 6935 TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0) 6936 TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1) 6937 6938 static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel) 6939 { 6940 static gen_helper_gvec_4 * const fns[2] = { 6941 gen_helper_sve2_adcl_s, 6942 gen_helper_sve2_adcl_d, 6943 }; 6944 /* 6945 * Note that in this case the ESZ field encodes both size and sign. 6946 * Split out 'subtract' into bit 1 of the data field for the helper. 6947 */ 6948 return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel); 6949 } 6950 6951 TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false) 6952 TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true) 6953 6954 TRANS_FEAT(SSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ssra, a) 6955 TRANS_FEAT(USRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_usra, a) 6956 TRANS_FEAT(SRSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_srsra, a) 6957 TRANS_FEAT(URSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ursra, a) 6958 TRANS_FEAT(SRI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sri, a) 6959 TRANS_FEAT(SLI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sli, a) 6960 6961 TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a) 6962 TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a) 6963 6964 static bool do_narrow_extract(DisasContext *s, arg_rri_esz *a, 6965 const GVecGen2 ops[3]) 6966 { 6967 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0) { 6968 return false; 6969 } 6970 if (sve_access_check(s)) { 6971 unsigned vsz = vec_full_reg_size(s); 6972 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd), 6973 vec_full_reg_offset(s, a->rn), 6974 vsz, vsz, &ops[a->esz]); 6975 } 6976 return true; 6977 } 6978 6979 static const TCGOpcode sqxtn_list[] = { 6980 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 6981 }; 6982 6983 static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6984 { 6985 int halfbits = 4 << vece; 6986 int64_t mask = (1ull << halfbits) - 1; 6987 int64_t min = -1ull << (halfbits - 1); 6988 int64_t max = -min - 1; 6989 6990 tcg_gen_smax_vec(vece, d, n, tcg_constant_vec_matching(d, vece, min)); 6991 tcg_gen_smin_vec(vece, d, d, tcg_constant_vec_matching(d, vece, max)); 6992 tcg_gen_and_vec(vece, d, d, tcg_constant_vec_matching(d, vece, mask)); 6993 } 6994 6995 static const GVecGen2 sqxtnb_ops[3] = { 6996 { .fniv = gen_sqxtnb_vec, 6997 .opt_opc = sqxtn_list, 6998 .fno = gen_helper_sve2_sqxtnb_h, 6999 .vece = MO_16 }, 7000 { .fniv = gen_sqxtnb_vec, 7001 .opt_opc = sqxtn_list, 7002 .fno = gen_helper_sve2_sqxtnb_s, 7003 .vece = MO_32 }, 7004 { .fniv = gen_sqxtnb_vec, 7005 .opt_opc = sqxtn_list, 7006 .fno = gen_helper_sve2_sqxtnb_d, 7007 .vece = MO_64 }, 7008 }; 7009 TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops) 7010 7011 static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 7012 { 7013 int halfbits = 4 << vece; 7014 int64_t mask = (1ull << halfbits) - 1; 7015 int64_t min = -1ull << (halfbits - 1); 7016 int64_t max = -min - 1; 7017 7018 tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min)); 7019 tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max)); 7020 tcg_gen_shli_vec(vece, n, n, halfbits); 7021 tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n); 7022 } 7023 7024 static const GVecGen2 sqxtnt_ops[3] = { 7025 { .fniv = gen_sqxtnt_vec, 7026 .opt_opc = sqxtn_list, 7027 .load_dest = true, 7028 .fno = gen_helper_sve2_sqxtnt_h, 7029 .vece = MO_16 }, 7030 { .fniv = gen_sqxtnt_vec, 7031 .opt_opc = sqxtn_list, 7032 .load_dest = true, 7033 .fno = gen_helper_sve2_sqxtnt_s, 7034 .vece = MO_32 }, 7035 { .fniv = gen_sqxtnt_vec, 7036 .opt_opc = sqxtn_list, 7037 .load_dest = true, 7038 .fno = gen_helper_sve2_sqxtnt_d, 7039 .vece = MO_64 }, 7040 }; 7041 TRANS_FEAT(SQXTNT, aa64_sve2, do_narrow_extract, a, sqxtnt_ops) 7042 7043 static const TCGOpcode uqxtn_list[] = { 7044 INDEX_op_shli_vec, INDEX_op_umin_vec, 0 7045 }; 7046 7047 static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 7048 { 7049 int halfbits = 4 << vece; 7050 int64_t max = (1ull << halfbits) - 1; 7051 7052 tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max)); 7053 } 7054 7055 static const GVecGen2 uqxtnb_ops[3] = { 7056 { .fniv = gen_uqxtnb_vec, 7057 .opt_opc = uqxtn_list, 7058 .fno = gen_helper_sve2_uqxtnb_h, 7059 .vece = MO_16 }, 7060 { .fniv = gen_uqxtnb_vec, 7061 .opt_opc = uqxtn_list, 7062 .fno = gen_helper_sve2_uqxtnb_s, 7063 .vece = MO_32 }, 7064 { .fniv = gen_uqxtnb_vec, 7065 .opt_opc = uqxtn_list, 7066 .fno = gen_helper_sve2_uqxtnb_d, 7067 .vece = MO_64 }, 7068 }; 7069 TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops) 7070 7071 static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 7072 { 7073 int halfbits = 4 << vece; 7074 int64_t max = (1ull << halfbits) - 1; 7075 TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max); 7076 7077 tcg_gen_umin_vec(vece, n, n, maxv); 7078 tcg_gen_shli_vec(vece, n, n, halfbits); 7079 tcg_gen_bitsel_vec(vece, d, maxv, d, n); 7080 } 7081 7082 static const GVecGen2 uqxtnt_ops[3] = { 7083 { .fniv = gen_uqxtnt_vec, 7084 .opt_opc = uqxtn_list, 7085 .load_dest = true, 7086 .fno = gen_helper_sve2_uqxtnt_h, 7087 .vece = MO_16 }, 7088 { .fniv = gen_uqxtnt_vec, 7089 .opt_opc = uqxtn_list, 7090 .load_dest = true, 7091 .fno = gen_helper_sve2_uqxtnt_s, 7092 .vece = MO_32 }, 7093 { .fniv = gen_uqxtnt_vec, 7094 .opt_opc = uqxtn_list, 7095 .load_dest = true, 7096 .fno = gen_helper_sve2_uqxtnt_d, 7097 .vece = MO_64 }, 7098 }; 7099 TRANS_FEAT(UQXTNT, aa64_sve2, do_narrow_extract, a, uqxtnt_ops) 7100 7101 static const TCGOpcode sqxtun_list[] = { 7102 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0 7103 }; 7104 7105 static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 7106 { 7107 int halfbits = 4 << vece; 7108 int64_t max = (1ull << halfbits) - 1; 7109 7110 tcg_gen_smax_vec(vece, d, n, tcg_constant_vec_matching(d, vece, 0)); 7111 tcg_gen_umin_vec(vece, d, d, tcg_constant_vec_matching(d, vece, max)); 7112 } 7113 7114 static const GVecGen2 sqxtunb_ops[3] = { 7115 { .fniv = gen_sqxtunb_vec, 7116 .opt_opc = sqxtun_list, 7117 .fno = gen_helper_sve2_sqxtunb_h, 7118 .vece = MO_16 }, 7119 { .fniv = gen_sqxtunb_vec, 7120 .opt_opc = sqxtun_list, 7121 .fno = gen_helper_sve2_sqxtunb_s, 7122 .vece = MO_32 }, 7123 { .fniv = gen_sqxtunb_vec, 7124 .opt_opc = sqxtun_list, 7125 .fno = gen_helper_sve2_sqxtunb_d, 7126 .vece = MO_64 }, 7127 }; 7128 TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops) 7129 7130 static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 7131 { 7132 int halfbits = 4 << vece; 7133 int64_t max = (1ull << halfbits) - 1; 7134 TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max); 7135 7136 tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0)); 7137 tcg_gen_umin_vec(vece, n, n, maxv); 7138 tcg_gen_shli_vec(vece, n, n, halfbits); 7139 tcg_gen_bitsel_vec(vece, d, maxv, d, n); 7140 } 7141 7142 static const GVecGen2 sqxtunt_ops[3] = { 7143 { .fniv = gen_sqxtunt_vec, 7144 .opt_opc = sqxtun_list, 7145 .load_dest = true, 7146 .fno = gen_helper_sve2_sqxtunt_h, 7147 .vece = MO_16 }, 7148 { .fniv = gen_sqxtunt_vec, 7149 .opt_opc = sqxtun_list, 7150 .load_dest = true, 7151 .fno = gen_helper_sve2_sqxtunt_s, 7152 .vece = MO_32 }, 7153 { .fniv = gen_sqxtunt_vec, 7154 .opt_opc = sqxtun_list, 7155 .load_dest = true, 7156 .fno = gen_helper_sve2_sqxtunt_d, 7157 .vece = MO_64 }, 7158 }; 7159 TRANS_FEAT(SQXTUNT, aa64_sve2, do_narrow_extract, a, sqxtunt_ops) 7160 7161 static bool do_shr_narrow(DisasContext *s, arg_rri_esz *a, 7162 const GVecGen2i ops[3]) 7163 { 7164 if (a->esz < 0 || a->esz > MO_32) { 7165 return false; 7166 } 7167 assert(a->imm > 0 && a->imm <= (8 << a->esz)); 7168 if (sve_access_check(s)) { 7169 unsigned vsz = vec_full_reg_size(s); 7170 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), 7171 vec_full_reg_offset(s, a->rn), 7172 vsz, vsz, a->imm, &ops[a->esz]); 7173 } 7174 return true; 7175 } 7176 7177 static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) 7178 { 7179 int halfbits = 4 << vece; 7180 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits)); 7181 7182 tcg_gen_shri_i64(d, n, shr); 7183 tcg_gen_andi_i64(d, d, mask); 7184 } 7185 7186 static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 7187 { 7188 gen_shrnb_i64(MO_16, d, n, shr); 7189 } 7190 7191 static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 7192 { 7193 gen_shrnb_i64(MO_32, d, n, shr); 7194 } 7195 7196 static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 7197 { 7198 gen_shrnb_i64(MO_64, d, n, shr); 7199 } 7200 7201 static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) 7202 { 7203 int halfbits = 4 << vece; 7204 uint64_t mask = MAKE_64BIT_MASK(0, halfbits); 7205 7206 tcg_gen_shri_vec(vece, n, n, shr); 7207 tcg_gen_and_vec(vece, d, n, tcg_constant_vec_matching(d, vece, mask)); 7208 } 7209 7210 static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 }; 7211 static const GVecGen2i shrnb_ops[3] = { 7212 { .fni8 = gen_shrnb16_i64, 7213 .fniv = gen_shrnb_vec, 7214 .opt_opc = shrnb_vec_list, 7215 .fno = gen_helper_sve2_shrnb_h, 7216 .vece = MO_16 }, 7217 { .fni8 = gen_shrnb32_i64, 7218 .fniv = gen_shrnb_vec, 7219 .opt_opc = shrnb_vec_list, 7220 .fno = gen_helper_sve2_shrnb_s, 7221 .vece = MO_32 }, 7222 { .fni8 = gen_shrnb64_i64, 7223 .fniv = gen_shrnb_vec, 7224 .opt_opc = shrnb_vec_list, 7225 .fno = gen_helper_sve2_shrnb_d, 7226 .vece = MO_64 }, 7227 }; 7228 TRANS_FEAT(SHRNB, aa64_sve2, do_shr_narrow, a, shrnb_ops) 7229 7230 static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) 7231 { 7232 int halfbits = 4 << vece; 7233 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits)); 7234 7235 tcg_gen_shli_i64(n, n, halfbits - shr); 7236 tcg_gen_andi_i64(n, n, ~mask); 7237 tcg_gen_andi_i64(d, d, mask); 7238 tcg_gen_or_i64(d, d, n); 7239 } 7240 7241 static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 7242 { 7243 gen_shrnt_i64(MO_16, d, n, shr); 7244 } 7245 7246 static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 7247 { 7248 gen_shrnt_i64(MO_32, d, n, shr); 7249 } 7250 7251 static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 7252 { 7253 tcg_gen_shri_i64(n, n, shr); 7254 tcg_gen_deposit_i64(d, d, n, 32, 32); 7255 } 7256 7257 static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) 7258 { 7259 int halfbits = 4 << vece; 7260 uint64_t mask = MAKE_64BIT_MASK(0, halfbits); 7261 7262 tcg_gen_shli_vec(vece, n, n, halfbits - shr); 7263 tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n); 7264 } 7265 7266 static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 }; 7267 static const GVecGen2i shrnt_ops[3] = { 7268 { .fni8 = gen_shrnt16_i64, 7269 .fniv = gen_shrnt_vec, 7270 .opt_opc = shrnt_vec_list, 7271 .load_dest = true, 7272 .fno = gen_helper_sve2_shrnt_h, 7273 .vece = MO_16 }, 7274 { .fni8 = gen_shrnt32_i64, 7275 .fniv = gen_shrnt_vec, 7276 .opt_opc = shrnt_vec_list, 7277 .load_dest = true, 7278 .fno = gen_helper_sve2_shrnt_s, 7279 .vece = MO_32 }, 7280 { .fni8 = gen_shrnt64_i64, 7281 .fniv = gen_shrnt_vec, 7282 .opt_opc = shrnt_vec_list, 7283 .load_dest = true, 7284 .fno = gen_helper_sve2_shrnt_d, 7285 .vece = MO_64 }, 7286 }; 7287 TRANS_FEAT(SHRNT, aa64_sve2, do_shr_narrow, a, shrnt_ops) 7288 7289 static const GVecGen2i rshrnb_ops[3] = { 7290 { .fno = gen_helper_sve2_rshrnb_h }, 7291 { .fno = gen_helper_sve2_rshrnb_s }, 7292 { .fno = gen_helper_sve2_rshrnb_d }, 7293 }; 7294 TRANS_FEAT(RSHRNB, aa64_sve2, do_shr_narrow, a, rshrnb_ops) 7295 7296 static const GVecGen2i rshrnt_ops[3] = { 7297 { .fno = gen_helper_sve2_rshrnt_h }, 7298 { .fno = gen_helper_sve2_rshrnt_s }, 7299 { .fno = gen_helper_sve2_rshrnt_d }, 7300 }; 7301 TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops) 7302 7303 static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d, 7304 TCGv_vec n, int64_t shr) 7305 { 7306 int halfbits = 4 << vece; 7307 uint64_t max = MAKE_64BIT_MASK(0, halfbits); 7308 7309 tcg_gen_sari_vec(vece, n, n, shr); 7310 tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0)); 7311 tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max)); 7312 } 7313 7314 static const TCGOpcode sqshrunb_vec_list[] = { 7315 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0 7316 }; 7317 static const GVecGen2i sqshrunb_ops[3] = { 7318 { .fniv = gen_sqshrunb_vec, 7319 .opt_opc = sqshrunb_vec_list, 7320 .fno = gen_helper_sve2_sqshrunb_h, 7321 .vece = MO_16 }, 7322 { .fniv = gen_sqshrunb_vec, 7323 .opt_opc = sqshrunb_vec_list, 7324 .fno = gen_helper_sve2_sqshrunb_s, 7325 .vece = MO_32 }, 7326 { .fniv = gen_sqshrunb_vec, 7327 .opt_opc = sqshrunb_vec_list, 7328 .fno = gen_helper_sve2_sqshrunb_d, 7329 .vece = MO_64 }, 7330 }; 7331 TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops) 7332 7333 static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d, 7334 TCGv_vec n, int64_t shr) 7335 { 7336 int halfbits = 4 << vece; 7337 uint64_t max = MAKE_64BIT_MASK(0, halfbits); 7338 TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max); 7339 7340 tcg_gen_sari_vec(vece, n, n, shr); 7341 tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0)); 7342 tcg_gen_umin_vec(vece, n, n, maxv); 7343 tcg_gen_shli_vec(vece, n, n, halfbits); 7344 tcg_gen_bitsel_vec(vece, d, maxv, d, n); 7345 } 7346 7347 static const TCGOpcode sqshrunt_vec_list[] = { 7348 INDEX_op_shli_vec, INDEX_op_sari_vec, 7349 INDEX_op_smax_vec, INDEX_op_umin_vec, 0 7350 }; 7351 static const GVecGen2i sqshrunt_ops[3] = { 7352 { .fniv = gen_sqshrunt_vec, 7353 .opt_opc = sqshrunt_vec_list, 7354 .load_dest = true, 7355 .fno = gen_helper_sve2_sqshrunt_h, 7356 .vece = MO_16 }, 7357 { .fniv = gen_sqshrunt_vec, 7358 .opt_opc = sqshrunt_vec_list, 7359 .load_dest = true, 7360 .fno = gen_helper_sve2_sqshrunt_s, 7361 .vece = MO_32 }, 7362 { .fniv = gen_sqshrunt_vec, 7363 .opt_opc = sqshrunt_vec_list, 7364 .load_dest = true, 7365 .fno = gen_helper_sve2_sqshrunt_d, 7366 .vece = MO_64 }, 7367 }; 7368 TRANS_FEAT(SQSHRUNT, aa64_sve2, do_shr_narrow, a, sqshrunt_ops) 7369 7370 static const GVecGen2i sqrshrunb_ops[3] = { 7371 { .fno = gen_helper_sve2_sqrshrunb_h }, 7372 { .fno = gen_helper_sve2_sqrshrunb_s }, 7373 { .fno = gen_helper_sve2_sqrshrunb_d }, 7374 }; 7375 TRANS_FEAT(SQRSHRUNB, aa64_sve2, do_shr_narrow, a, sqrshrunb_ops) 7376 7377 static const GVecGen2i sqrshrunt_ops[3] = { 7378 { .fno = gen_helper_sve2_sqrshrunt_h }, 7379 { .fno = gen_helper_sve2_sqrshrunt_s }, 7380 { .fno = gen_helper_sve2_sqrshrunt_d }, 7381 }; 7382 TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops) 7383 7384 static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d, 7385 TCGv_vec n, int64_t shr) 7386 { 7387 int halfbits = 4 << vece; 7388 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); 7389 int64_t min = -max - 1; 7390 int64_t mask = MAKE_64BIT_MASK(0, halfbits); 7391 7392 tcg_gen_sari_vec(vece, n, n, shr); 7393 tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min)); 7394 tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max)); 7395 tcg_gen_and_vec(vece, d, n, tcg_constant_vec_matching(d, vece, mask)); 7396 } 7397 7398 static const TCGOpcode sqshrnb_vec_list[] = { 7399 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0 7400 }; 7401 static const GVecGen2i sqshrnb_ops[3] = { 7402 { .fniv = gen_sqshrnb_vec, 7403 .opt_opc = sqshrnb_vec_list, 7404 .fno = gen_helper_sve2_sqshrnb_h, 7405 .vece = MO_16 }, 7406 { .fniv = gen_sqshrnb_vec, 7407 .opt_opc = sqshrnb_vec_list, 7408 .fno = gen_helper_sve2_sqshrnb_s, 7409 .vece = MO_32 }, 7410 { .fniv = gen_sqshrnb_vec, 7411 .opt_opc = sqshrnb_vec_list, 7412 .fno = gen_helper_sve2_sqshrnb_d, 7413 .vece = MO_64 }, 7414 }; 7415 TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops) 7416 7417 static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d, 7418 TCGv_vec n, int64_t shr) 7419 { 7420 int halfbits = 4 << vece; 7421 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); 7422 int64_t min = -max - 1; 7423 int64_t mask = MAKE_64BIT_MASK(0, halfbits); 7424 7425 tcg_gen_sari_vec(vece, n, n, shr); 7426 tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min)); 7427 tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max)); 7428 tcg_gen_shli_vec(vece, n, n, halfbits); 7429 tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n); 7430 } 7431 7432 static const TCGOpcode sqshrnt_vec_list[] = { 7433 INDEX_op_shli_vec, INDEX_op_sari_vec, 7434 INDEX_op_smax_vec, INDEX_op_smin_vec, 0 7435 }; 7436 static const GVecGen2i sqshrnt_ops[3] = { 7437 { .fniv = gen_sqshrnt_vec, 7438 .opt_opc = sqshrnt_vec_list, 7439 .load_dest = true, 7440 .fno = gen_helper_sve2_sqshrnt_h, 7441 .vece = MO_16 }, 7442 { .fniv = gen_sqshrnt_vec, 7443 .opt_opc = sqshrnt_vec_list, 7444 .load_dest = true, 7445 .fno = gen_helper_sve2_sqshrnt_s, 7446 .vece = MO_32 }, 7447 { .fniv = gen_sqshrnt_vec, 7448 .opt_opc = sqshrnt_vec_list, 7449 .load_dest = true, 7450 .fno = gen_helper_sve2_sqshrnt_d, 7451 .vece = MO_64 }, 7452 }; 7453 TRANS_FEAT(SQSHRNT, aa64_sve2, do_shr_narrow, a, sqshrnt_ops) 7454 7455 static const GVecGen2i sqrshrnb_ops[3] = { 7456 { .fno = gen_helper_sve2_sqrshrnb_h }, 7457 { .fno = gen_helper_sve2_sqrshrnb_s }, 7458 { .fno = gen_helper_sve2_sqrshrnb_d }, 7459 }; 7460 TRANS_FEAT(SQRSHRNB, aa64_sve2, do_shr_narrow, a, sqrshrnb_ops) 7461 7462 static const GVecGen2i sqrshrnt_ops[3] = { 7463 { .fno = gen_helper_sve2_sqrshrnt_h }, 7464 { .fno = gen_helper_sve2_sqrshrnt_s }, 7465 { .fno = gen_helper_sve2_sqrshrnt_d }, 7466 }; 7467 TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops) 7468 7469 static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d, 7470 TCGv_vec n, int64_t shr) 7471 { 7472 int halfbits = 4 << vece; 7473 int64_t max = MAKE_64BIT_MASK(0, halfbits); 7474 7475 tcg_gen_shri_vec(vece, n, n, shr); 7476 tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max)); 7477 } 7478 7479 static const TCGOpcode uqshrnb_vec_list[] = { 7480 INDEX_op_shri_vec, INDEX_op_umin_vec, 0 7481 }; 7482 static const GVecGen2i uqshrnb_ops[3] = { 7483 { .fniv = gen_uqshrnb_vec, 7484 .opt_opc = uqshrnb_vec_list, 7485 .fno = gen_helper_sve2_uqshrnb_h, 7486 .vece = MO_16 }, 7487 { .fniv = gen_uqshrnb_vec, 7488 .opt_opc = uqshrnb_vec_list, 7489 .fno = gen_helper_sve2_uqshrnb_s, 7490 .vece = MO_32 }, 7491 { .fniv = gen_uqshrnb_vec, 7492 .opt_opc = uqshrnb_vec_list, 7493 .fno = gen_helper_sve2_uqshrnb_d, 7494 .vece = MO_64 }, 7495 }; 7496 TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops) 7497 7498 static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d, 7499 TCGv_vec n, int64_t shr) 7500 { 7501 int halfbits = 4 << vece; 7502 int64_t max = MAKE_64BIT_MASK(0, halfbits); 7503 TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max); 7504 7505 tcg_gen_shri_vec(vece, n, n, shr); 7506 tcg_gen_umin_vec(vece, n, n, maxv); 7507 tcg_gen_shli_vec(vece, n, n, halfbits); 7508 tcg_gen_bitsel_vec(vece, d, maxv, d, n); 7509 } 7510 7511 static const TCGOpcode uqshrnt_vec_list[] = { 7512 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0 7513 }; 7514 static const GVecGen2i uqshrnt_ops[3] = { 7515 { .fniv = gen_uqshrnt_vec, 7516 .opt_opc = uqshrnt_vec_list, 7517 .load_dest = true, 7518 .fno = gen_helper_sve2_uqshrnt_h, 7519 .vece = MO_16 }, 7520 { .fniv = gen_uqshrnt_vec, 7521 .opt_opc = uqshrnt_vec_list, 7522 .load_dest = true, 7523 .fno = gen_helper_sve2_uqshrnt_s, 7524 .vece = MO_32 }, 7525 { .fniv = gen_uqshrnt_vec, 7526 .opt_opc = uqshrnt_vec_list, 7527 .load_dest = true, 7528 .fno = gen_helper_sve2_uqshrnt_d, 7529 .vece = MO_64 }, 7530 }; 7531 TRANS_FEAT(UQSHRNT, aa64_sve2, do_shr_narrow, a, uqshrnt_ops) 7532 7533 static const GVecGen2i uqrshrnb_ops[3] = { 7534 { .fno = gen_helper_sve2_uqrshrnb_h }, 7535 { .fno = gen_helper_sve2_uqrshrnb_s }, 7536 { .fno = gen_helper_sve2_uqrshrnb_d }, 7537 }; 7538 TRANS_FEAT(UQRSHRNB, aa64_sve2, do_shr_narrow, a, uqrshrnb_ops) 7539 7540 static const GVecGen2i uqrshrnt_ops[3] = { 7541 { .fno = gen_helper_sve2_uqrshrnt_h }, 7542 { .fno = gen_helper_sve2_uqrshrnt_s }, 7543 { .fno = gen_helper_sve2_uqrshrnt_d }, 7544 }; 7545 TRANS_FEAT(UQRSHRNT, aa64_sve2, do_shr_narrow, a, uqrshrnt_ops) 7546 7547 #define DO_SVE2_ZZZ_NARROW(NAME, name) \ 7548 static gen_helper_gvec_3 * const name##_fns[4] = { \ 7549 NULL, gen_helper_sve2_##name##_h, \ 7550 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \ 7551 }; \ 7552 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \ 7553 name##_fns[a->esz], a, 0) 7554 7555 DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb) 7556 DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt) 7557 DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb) 7558 DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt) 7559 7560 DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb) 7561 DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt) 7562 DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb) 7563 DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt) 7564 7565 static gen_helper_gvec_flags_4 * const match_fns[4] = { 7566 gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL 7567 }; 7568 TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz]) 7569 7570 static gen_helper_gvec_flags_4 * const nmatch_fns[4] = { 7571 gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL 7572 }; 7573 TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz]) 7574 7575 static gen_helper_gvec_4 * const histcnt_fns[4] = { 7576 NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d 7577 }; 7578 TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz, 7579 histcnt_fns[a->esz], a, 0) 7580 7581 TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz, 7582 a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0) 7583 7584 DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz) 7585 DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz) 7586 DO_ZPZZ_FP(FMINNMP, aa64_sve2, sve2_fminnmp_zpzz) 7587 DO_ZPZZ_FP(FMAXP, aa64_sve2, sve2_fmaxp_zpzz) 7588 DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz) 7589 7590 static bool do_fmmla(DisasContext *s, arg_rrrr_esz *a, 7591 gen_helper_gvec_4_ptr *fn) 7592 { 7593 if (sve_access_check(s)) { 7594 if (vec_full_reg_size(s) < 4 * memop_size(a->esz)) { 7595 unallocated_encoding(s); 7596 } else { 7597 gen_gvec_fpst_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, 0, FPST_A64); 7598 } 7599 } 7600 return true; 7601 } 7602 7603 TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, do_fmmla, a, gen_helper_fmmla_s) 7604 TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, do_fmmla, a, gen_helper_fmmla_d) 7605 7606 /* 7607 * SVE Integer Multiply-Add (unpredicated) 7608 */ 7609 7610 static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = { 7611 NULL, gen_helper_sve2_sqdmlal_zzzw_h, 7612 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d, 7613 }; 7614 TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7615 sqdmlal_zzzw_fns[a->esz], a, 0) 7616 TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7617 sqdmlal_zzzw_fns[a->esz], a, 3) 7618 TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz, 7619 sqdmlal_zzzw_fns[a->esz], a, 2) 7620 7621 static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = { 7622 NULL, gen_helper_sve2_sqdmlsl_zzzw_h, 7623 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d, 7624 }; 7625 TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7626 sqdmlsl_zzzw_fns[a->esz], a, 0) 7627 TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7628 sqdmlsl_zzzw_fns[a->esz], a, 3) 7629 TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz, 7630 sqdmlsl_zzzw_fns[a->esz], a, 2) 7631 7632 static gen_helper_gvec_4 * const sqrdmlah_fns[] = { 7633 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h, 7634 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d, 7635 }; 7636 TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz, 7637 sqrdmlah_fns[a->esz], a, 0) 7638 7639 static gen_helper_gvec_4 * const sqrdmlsh_fns[] = { 7640 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h, 7641 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d, 7642 }; 7643 TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz, 7644 sqrdmlsh_fns[a->esz], a, 0) 7645 7646 static gen_helper_gvec_4 * const smlal_zzzw_fns[] = { 7647 NULL, gen_helper_sve2_smlal_zzzw_h, 7648 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d, 7649 }; 7650 TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7651 smlal_zzzw_fns[a->esz], a, 0) 7652 TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7653 smlal_zzzw_fns[a->esz], a, 1) 7654 7655 static gen_helper_gvec_4 * const umlal_zzzw_fns[] = { 7656 NULL, gen_helper_sve2_umlal_zzzw_h, 7657 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d, 7658 }; 7659 TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7660 umlal_zzzw_fns[a->esz], a, 0) 7661 TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7662 umlal_zzzw_fns[a->esz], a, 1) 7663 7664 static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = { 7665 NULL, gen_helper_sve2_smlsl_zzzw_h, 7666 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d, 7667 }; 7668 TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7669 smlsl_zzzw_fns[a->esz], a, 0) 7670 TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7671 smlsl_zzzw_fns[a->esz], a, 1) 7672 7673 static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = { 7674 NULL, gen_helper_sve2_umlsl_zzzw_h, 7675 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d, 7676 }; 7677 TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7678 umlsl_zzzw_fns[a->esz], a, 0) 7679 TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7680 umlsl_zzzw_fns[a->esz], a, 1) 7681 7682 static gen_helper_gvec_4 * const cmla_fns[] = { 7683 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h, 7684 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d, 7685 }; 7686 TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7687 cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7688 7689 static gen_helper_gvec_4 * const cdot_fns[] = { 7690 NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d 7691 }; 7692 TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7693 cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7694 7695 static gen_helper_gvec_4 * const sqrdcmlah_fns[] = { 7696 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h, 7697 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d, 7698 }; 7699 TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7700 sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7701 7702 TRANS_FEAT(USDOT_zzzz_4s, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7703 gen_helper_gvec_usdot_4b, a, 0) 7704 7705 TRANS_FEAT(SDOT_zzzz_2s, aa64_sme2_or_sve2p1, gen_gvec_ool_arg_zzzz, 7706 gen_helper_gvec_sdot_2h, a, 0) 7707 TRANS_FEAT(UDOT_zzzz_2s, aa64_sme2_or_sve2p1, gen_gvec_ool_arg_zzzz, 7708 gen_helper_gvec_udot_2h, a, 0) 7709 7710 TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz, 7711 gen_helper_crypto_aesmc, a->rd, a->rd, 0) 7712 TRANS_FEAT_NONSTREAMING(AESIMC, aa64_sve2_aes, gen_gvec_ool_zz, 7713 gen_helper_crypto_aesimc, a->rd, a->rd, 0) 7714 7715 TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz, 7716 gen_helper_crypto_aese, a, 0) 7717 TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz, 7718 gen_helper_crypto_aesd, a, 0) 7719 7720 TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, 7721 gen_helper_crypto_sm4e, a, 0) 7722 TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, 7723 gen_helper_crypto_sm4ekey, a, 0) 7724 7725 TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, 7726 gen_gvec_rax1, a) 7727 7728 TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz, 7729 gen_helper_sve2_fcvtnt_sh, a, 0, FPST_A64) 7730 TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz, 7731 gen_helper_sve2_fcvtnt_ds, a, 0, FPST_A64) 7732 7733 TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, 7734 gen_helper_sve_bfcvtnt, a, 0, 7735 s->fpcr_ah ? FPST_AH : FPST_A64) 7736 7737 TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz, 7738 gen_helper_sve2_fcvtlt_hs, a, 0, FPST_A64) 7739 TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz, 7740 gen_helper_sve2_fcvtlt_sd, a, 0, FPST_A64) 7741 7742 TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a, 7743 FPROUNDING_ODD, gen_helper_sve_fcvt_ds) 7744 TRANS_FEAT(FCVTXNT_ds, aa64_sve2, do_frint_mode, a, 7745 FPROUNDING_ODD, gen_helper_sve2_fcvtnt_ds) 7746 7747 static gen_helper_gvec_3_ptr * const flogb_fns[] = { 7748 NULL, gen_helper_flogb_h, 7749 gen_helper_flogb_s, gen_helper_flogb_d 7750 }; 7751 TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz], 7752 a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 7753 7754 static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel) 7755 { 7756 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzzw_s, 7757 a->rd, a->rn, a->rm, a->ra, 7758 (sel << 1) | sub, tcg_env); 7759 } 7760 7761 TRANS_FEAT(FMLALB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, false) 7762 TRANS_FEAT(FMLALT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, true) 7763 TRANS_FEAT(FMLSLB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, false) 7764 TRANS_FEAT(FMLSLT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, true) 7765 7766 static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel) 7767 { 7768 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s, 7769 a->rd, a->rn, a->rm, a->ra, 7770 (a->index << 3) | (sel << 1) | sub, tcg_env); 7771 } 7772 7773 TRANS_FEAT(FMLALB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, false) 7774 TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true) 7775 TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false) 7776 TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true) 7777 7778 TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7779 gen_helper_gvec_smmla_b, a, 0) 7780 TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7781 gen_helper_gvec_usmmla_b, a, 0) 7782 TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7783 gen_helper_gvec_ummla_b, a, 0) 7784 7785 TRANS_FEAT(FDOT_zzzz, aa64_sme2_or_sve2p1, gen_gvec_env_arg_zzzz, 7786 gen_helper_sme2_fdot_h, a, 0) 7787 TRANS_FEAT(FDOT_zzxz, aa64_sme2_or_sve2p1, gen_gvec_env_arg_zzxz, 7788 gen_helper_sme2_fdot_idx_h, a) 7789 7790 TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_env_arg_zzzz, 7791 gen_helper_gvec_bfdot, a, 0) 7792 TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_env_arg_zzxz, 7793 gen_helper_gvec_bfdot_idx, a) 7794 7795 TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_env_arg_zzzz, 7796 gen_helper_gvec_bfmmla, a, 0) 7797 7798 static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) 7799 { 7800 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal, 7801 a->rd, a->rn, a->rm, a->ra, sel, 7802 s->fpcr_ah ? FPST_AH : FPST_A64); 7803 } 7804 7805 TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false) 7806 TRANS_FEAT(BFMLALT_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, true) 7807 7808 static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) 7809 { 7810 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx, 7811 a->rd, a->rn, a->rm, a->ra, 7812 (a->index << 1) | sel, 7813 s->fpcr_ah ? FPST_AH : FPST_A64); 7814 } 7815 7816 TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) 7817 TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true) 7818 7819 static bool do_BFMLSL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) 7820 { 7821 if (s->fpcr_ah) { 7822 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_ah_bfmlsl, 7823 a->rd, a->rn, a->rm, a->ra, sel, FPST_AH); 7824 } else { 7825 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlsl, 7826 a->rd, a->rn, a->rm, a->ra, sel, FPST_A64); 7827 } 7828 } 7829 7830 TRANS_FEAT(BFMLSLB_zzzw, aa64_sme2_or_sve2p1, do_BFMLSL_zzzw, a, false) 7831 TRANS_FEAT(BFMLSLT_zzzw, aa64_sme2_or_sve2p1, do_BFMLSL_zzzw, a, true) 7832 7833 static bool do_BFMLSL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) 7834 { 7835 if (s->fpcr_ah) { 7836 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_ah_bfmlsl_idx, 7837 a->rd, a->rn, a->rm, a->ra, 7838 (a->index << 1) | sel, FPST_AH); 7839 } else { 7840 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlsl_idx, 7841 a->rd, a->rn, a->rm, a->ra, 7842 (a->index << 1) | sel, FPST_A64); 7843 } 7844 } 7845 7846 TRANS_FEAT(BFMLSLB_zzxw, aa64_sme2_or_sve2p1, do_BFMLSL_zzxw, a, false) 7847 TRANS_FEAT(BFMLSLT_zzxw, aa64_sme2_or_sve2p1, do_BFMLSL_zzxw, a, true) 7848 7849 static bool trans_PSEL(DisasContext *s, arg_psel *a) 7850 { 7851 int vl = vec_full_reg_size(s); 7852 int pl = pred_gvec_reg_size(s); 7853 int elements = vl >> a->esz; 7854 TCGv_i64 tmp, didx, dbit; 7855 TCGv_ptr ptr; 7856 7857 if (!dc_isar_feature(aa64_sme_or_sve2p1, s)) { 7858 return false; 7859 } 7860 if (!sve_access_check(s)) { 7861 return true; 7862 } 7863 7864 tmp = tcg_temp_new_i64(); 7865 dbit = tcg_temp_new_i64(); 7866 didx = tcg_temp_new_i64(); 7867 ptr = tcg_temp_new_ptr(); 7868 7869 /* Compute the predicate element. */ 7870 tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm); 7871 if (is_power_of_2(elements)) { 7872 tcg_gen_andi_i64(tmp, tmp, elements - 1); 7873 } else { 7874 tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements)); 7875 } 7876 7877 /* Extract the predicate byte and bit indices. */ 7878 tcg_gen_shli_i64(tmp, tmp, a->esz); 7879 tcg_gen_andi_i64(dbit, tmp, 7); 7880 tcg_gen_shri_i64(didx, tmp, 3); 7881 if (HOST_BIG_ENDIAN) { 7882 tcg_gen_xori_i64(didx, didx, 7); 7883 } 7884 7885 /* Load the predicate word. */ 7886 tcg_gen_trunc_i64_ptr(ptr, didx); 7887 tcg_gen_add_ptr(ptr, ptr, tcg_env); 7888 tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm)); 7889 7890 /* Extract the predicate bit and replicate to MO_64. */ 7891 tcg_gen_shr_i64(tmp, tmp, dbit); 7892 tcg_gen_andi_i64(tmp, tmp, 1); 7893 tcg_gen_neg_i64(tmp, tmp); 7894 7895 /* Apply to either copy the source, or write zeros. */ 7896 pl = size_for_gvec(pl); 7897 tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd), 7898 pred_full_reg_offset(s, a->pn), tmp, pl, pl); 7899 return true; 7900 } 7901 7902 static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) 7903 { 7904 tcg_gen_smax_i32(d, a, n); 7905 tcg_gen_smin_i32(d, d, m); 7906 } 7907 7908 static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) 7909 { 7910 tcg_gen_smax_i64(d, a, n); 7911 tcg_gen_smin_i64(d, d, m); 7912 } 7913 7914 static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 7915 TCGv_vec m, TCGv_vec a) 7916 { 7917 tcg_gen_smax_vec(vece, d, a, n); 7918 tcg_gen_smin_vec(vece, d, d, m); 7919 } 7920 7921 static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 7922 uint32_t a, uint32_t oprsz, uint32_t maxsz) 7923 { 7924 static const TCGOpcode vecop[] = { 7925 INDEX_op_smin_vec, INDEX_op_smax_vec, 0 7926 }; 7927 static const GVecGen4 ops[4] = { 7928 { .fniv = gen_sclamp_vec, 7929 .fno = gen_helper_gvec_sclamp_b, 7930 .opt_opc = vecop, 7931 .vece = MO_8 }, 7932 { .fniv = gen_sclamp_vec, 7933 .fno = gen_helper_gvec_sclamp_h, 7934 .opt_opc = vecop, 7935 .vece = MO_16 }, 7936 { .fni4 = gen_sclamp_i32, 7937 .fniv = gen_sclamp_vec, 7938 .fno = gen_helper_gvec_sclamp_s, 7939 .opt_opc = vecop, 7940 .vece = MO_32 }, 7941 { .fni8 = gen_sclamp_i64, 7942 .fniv = gen_sclamp_vec, 7943 .fno = gen_helper_gvec_sclamp_d, 7944 .opt_opc = vecop, 7945 .vece = MO_64, 7946 .prefer_i64 = TCG_TARGET_REG_BITS == 64 } 7947 }; 7948 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); 7949 } 7950 7951 TRANS_FEAT(SCLAMP, aa64_sme_or_sve2p1, gen_gvec_fn_arg_zzzz, gen_sclamp, a) 7952 7953 static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) 7954 { 7955 tcg_gen_umax_i32(d, a, n); 7956 tcg_gen_umin_i32(d, d, m); 7957 } 7958 7959 static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) 7960 { 7961 tcg_gen_umax_i64(d, a, n); 7962 tcg_gen_umin_i64(d, d, m); 7963 } 7964 7965 static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 7966 TCGv_vec m, TCGv_vec a) 7967 { 7968 tcg_gen_umax_vec(vece, d, a, n); 7969 tcg_gen_umin_vec(vece, d, d, m); 7970 } 7971 7972 static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 7973 uint32_t a, uint32_t oprsz, uint32_t maxsz) 7974 { 7975 static const TCGOpcode vecop[] = { 7976 INDEX_op_umin_vec, INDEX_op_umax_vec, 0 7977 }; 7978 static const GVecGen4 ops[4] = { 7979 { .fniv = gen_uclamp_vec, 7980 .fno = gen_helper_gvec_uclamp_b, 7981 .opt_opc = vecop, 7982 .vece = MO_8 }, 7983 { .fniv = gen_uclamp_vec, 7984 .fno = gen_helper_gvec_uclamp_h, 7985 .opt_opc = vecop, 7986 .vece = MO_16 }, 7987 { .fni4 = gen_uclamp_i32, 7988 .fniv = gen_uclamp_vec, 7989 .fno = gen_helper_gvec_uclamp_s, 7990 .opt_opc = vecop, 7991 .vece = MO_32 }, 7992 { .fni8 = gen_uclamp_i64, 7993 .fniv = gen_uclamp_vec, 7994 .fno = gen_helper_gvec_uclamp_d, 7995 .opt_opc = vecop, 7996 .vece = MO_64, 7997 .prefer_i64 = TCG_TARGET_REG_BITS == 64 } 7998 }; 7999 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); 8000 } 8001 8002 TRANS_FEAT(UCLAMP, aa64_sme_or_sve2p1, gen_gvec_fn_arg_zzzz, gen_uclamp, a) 8003 8004 static bool trans_FCLAMP(DisasContext *s, arg_FCLAMP *a) 8005 { 8006 static gen_helper_gvec_3_ptr * const fn[] = { 8007 gen_helper_sme2_bfclamp, 8008 gen_helper_sme2_fclamp_h, 8009 gen_helper_sme2_fclamp_s, 8010 gen_helper_sme2_fclamp_d, 8011 }; 8012 8013 /* This insn uses MO_8 to encode BFloat16. */ 8014 if (a->esz == MO_8 8015 ? !dc_isar_feature(aa64_sve_b16b16, s) 8016 : !dc_isar_feature(aa64_sme2_or_sve2p1, s)) { 8017 return false; 8018 } 8019 8020 /* So far we never optimize rda with MOVPRFX */ 8021 assert(a->rd == a->ra); 8022 return gen_gvec_fpst_zzz(s, fn[a->esz], a->rd, a->rn, a->rm, 1, 8023 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 8024 } 8025 8026 TRANS_FEAT(SQCVTN_sh, aa64_sme2_or_sve2p1, gen_gvec_ool_zz, 8027 gen_helper_sme2_sqcvtn_sh, a->rd, a->rn, 0) 8028 TRANS_FEAT(UQCVTN_sh, aa64_sme2_or_sve2p1, gen_gvec_ool_zz, 8029 gen_helper_sme2_uqcvtn_sh, a->rd, a->rn, 0) 8030 TRANS_FEAT(SQCVTUN_sh, aa64_sme2_or_sve2p1, gen_gvec_ool_zz, 8031 gen_helper_sme2_sqcvtun_sh, a->rd, a->rn, 0) 8032 8033 static bool gen_ldst_c(DisasContext *s, TCGv_i64 addr, int zd, int png, 8034 MemOp esz, bool is_write, int n, bool strided) 8035 { 8036 typedef void ldst_c_fn(TCGv_env, TCGv_ptr, TCGv_i64, 8037 TCGv_i32, TCGv_i32); 8038 static ldst_c_fn * const f_ldst[2][2][4] = { 8039 { { gen_helper_sve2p1_ld1bb_c, 8040 gen_helper_sve2p1_ld1hh_le_c, 8041 gen_helper_sve2p1_ld1ss_le_c, 8042 gen_helper_sve2p1_ld1dd_le_c, }, 8043 { gen_helper_sve2p1_ld1bb_c, 8044 gen_helper_sve2p1_ld1hh_be_c, 8045 gen_helper_sve2p1_ld1ss_be_c, 8046 gen_helper_sve2p1_ld1dd_be_c, } }, 8047 8048 { { gen_helper_sve2p1_st1bb_c, 8049 gen_helper_sve2p1_st1hh_le_c, 8050 gen_helper_sve2p1_st1ss_le_c, 8051 gen_helper_sve2p1_st1dd_le_c, }, 8052 { gen_helper_sve2p1_st1bb_c, 8053 gen_helper_sve2p1_st1hh_be_c, 8054 gen_helper_sve2p1_st1ss_be_c, 8055 gen_helper_sve2p1_st1dd_be_c, } } 8056 }; 8057 8058 TCGv_i32 t_png, t_desc; 8059 TCGv_ptr t_zd; 8060 uint32_t desc, lg2_rstride = 0; 8061 bool be = s->be_data == MO_BE; 8062 8063 assert(n == 2 || n == 4); 8064 if (strided) { 8065 lg2_rstride = 3; 8066 if (n == 4) { 8067 /* Validate ZD alignment. */ 8068 if (zd & 4) { 8069 return false; 8070 } 8071 lg2_rstride = 2; 8072 } 8073 /* Ignore non-temporal bit */ 8074 zd &= ~8; 8075 } 8076 8077 if (strided || !dc_isar_feature(aa64_sve2p1, s) 8078 ? !sme_sm_enabled_check(s) 8079 : !sve_access_check(s)) { 8080 return true; 8081 } 8082 8083 if (!s->mte_active[0]) { 8084 addr = clean_data_tbi(s, addr); 8085 } 8086 8087 desc = n == 2 ? 0 : 1; 8088 desc = desc | (lg2_rstride << 1); 8089 desc = make_svemte_desc(s, vec_full_reg_size(s), 1, esz, is_write, desc); 8090 t_desc = tcg_constant_i32(desc); 8091 8092 t_png = tcg_temp_new_i32(); 8093 tcg_gen_ld16u_i32(t_png, tcg_env, 8094 pred_full_reg_offset(s, png) ^ 8095 (HOST_BIG_ENDIAN ? 6 : 0)); 8096 8097 t_zd = tcg_temp_new_ptr(); 8098 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, zd)); 8099 8100 f_ldst[is_write][be][esz](tcg_env, t_zd, addr, t_png, t_desc); 8101 return true; 8102 } 8103 8104 static bool gen_ldst_zcrr_c(DisasContext *s, arg_zcrr_ldst *a, 8105 bool is_write, bool strided) 8106 { 8107 TCGv_i64 addr = tcg_temp_new_i64(); 8108 8109 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->esz); 8110 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 8111 return gen_ldst_c(s, addr, a->rd, a->png, a->esz, is_write, 8112 a->nreg, strided); 8113 } 8114 8115 static bool gen_ldst_zcri_c(DisasContext *s, arg_zcri_ldst *a, 8116 bool is_write, bool strided) 8117 { 8118 TCGv_i64 addr = tcg_temp_new_i64(); 8119 8120 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 8121 a->imm * a->nreg * vec_full_reg_size(s)); 8122 return gen_ldst_c(s, addr, a->rd, a->png, a->esz, is_write, 8123 a->nreg, strided); 8124 } 8125 8126 TRANS_FEAT(LD1_zcrr, aa64_sme2_or_sve2p1, gen_ldst_zcrr_c, a, false, false) 8127 TRANS_FEAT(LD1_zcri, aa64_sme2_or_sve2p1, gen_ldst_zcri_c, a, false, false) 8128 TRANS_FEAT(ST1_zcrr, aa64_sme2_or_sve2p1, gen_ldst_zcrr_c, a, true, false) 8129 TRANS_FEAT(ST1_zcri, aa64_sme2_or_sve2p1, gen_ldst_zcri_c, a, true, false) 8130 8131 TRANS_FEAT(LD1_zcrr_stride, aa64_sme2, gen_ldst_zcrr_c, a, false, true) 8132 TRANS_FEAT(LD1_zcri_stride, aa64_sme2, gen_ldst_zcri_c, a, false, true) 8133 TRANS_FEAT(ST1_zcrr_stride, aa64_sme2, gen_ldst_zcrr_c, a, true, true) 8134 TRANS_FEAT(ST1_zcri_stride, aa64_sme2, gen_ldst_zcri_c, a, true, true) 8135