1 /* 2 * AArch64 specific helpers 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/units.h" 22 #include "cpu.h" 23 #include "gdbstub/helpers.h" 24 #include "exec/helper-proto.h" 25 #include "qemu/host-utils.h" 26 #include "qemu/log.h" 27 #include "qemu/main-loop.h" 28 #include "qemu/bitops.h" 29 #include "internals.h" 30 #include "qemu/crc32c.h" 31 #include "exec/cpu-common.h" 32 #include "accel/tcg/cpu-ldst.h" 33 #include "accel/tcg/helper-retaddr.h" 34 #include "accel/tcg/probe.h" 35 #include "exec/target_page.h" 36 #include "exec/tlb-flags.h" 37 #include "qemu/int128.h" 38 #include "qemu/atomic128.h" 39 #include "fpu/softfloat.h" 40 #include <zlib.h> /* for crc32 */ 41 #ifdef CONFIG_USER_ONLY 42 #include "user/page-protection.h" 43 #endif 44 #include "vec_internal.h" 45 46 /* C2.4.7 Multiply and divide */ 47 /* special cases for 0 and LLONG_MIN are mandated by the standard */ 48 uint64_t HELPER(udiv64)(uint64_t num, uint64_t den) 49 { 50 if (den == 0) { 51 return 0; 52 } 53 return num / den; 54 } 55 56 int64_t HELPER(sdiv64)(int64_t num, int64_t den) 57 { 58 if (den == 0) { 59 return 0; 60 } 61 if (num == LLONG_MIN && den == -1) { 62 return LLONG_MIN; 63 } 64 return num / den; 65 } 66 67 uint64_t HELPER(rbit64)(uint64_t x) 68 { 69 return revbit64(x); 70 } 71 72 void HELPER(msr_i_spsel)(CPUARMState *env, uint32_t imm) 73 { 74 update_spsel(env, imm); 75 } 76 77 void HELPER(msr_set_allint_el1)(CPUARMState *env) 78 { 79 /* ALLINT update to PSTATE. */ 80 if (arm_hcrx_el2_eff(env) & HCRX_TALLINT) { 81 raise_exception_ra(env, EXCP_UDEF, 82 syn_aa64_sysregtrap(0, 1, 0, 4, 1, 0x1f, 0), 2, 83 GETPC()); 84 } 85 86 env->pstate |= PSTATE_ALLINT; 87 } 88 89 static void daif_check(CPUARMState *env, uint32_t op, 90 uint32_t imm, uintptr_t ra) 91 { 92 /* DAIF update to PSTATE. This is OK from EL0 only if UMA is set. */ 93 if (arm_current_el(env) == 0 && !(arm_sctlr(env, 0) & SCTLR_UMA)) { 94 raise_exception_ra(env, EXCP_UDEF, 95 syn_aa64_sysregtrap(0, extract32(op, 0, 3), 96 extract32(op, 3, 3), 4, 97 imm, 0x1f, 0), 98 exception_target_el(env), ra); 99 } 100 } 101 102 void HELPER(msr_i_daifset)(CPUARMState *env, uint32_t imm) 103 { 104 daif_check(env, 0x1e, imm, GETPC()); 105 env->daif |= (imm << 6) & PSTATE_DAIF; 106 arm_rebuild_hflags(env); 107 } 108 109 void HELPER(msr_i_daifclear)(CPUARMState *env, uint32_t imm) 110 { 111 daif_check(env, 0x1f, imm, GETPC()); 112 env->daif &= ~((imm << 6) & PSTATE_DAIF); 113 arm_rebuild_hflags(env); 114 } 115 116 /* Convert a softfloat float_relation_ (as returned by 117 * the float*_compare functions) to the correct ARM 118 * NZCV flag state. 119 */ 120 static inline uint32_t float_rel_to_flags(int res) 121 { 122 uint64_t flags; 123 switch (res) { 124 case float_relation_equal: 125 flags = PSTATE_Z | PSTATE_C; 126 break; 127 case float_relation_less: 128 flags = PSTATE_N; 129 break; 130 case float_relation_greater: 131 flags = PSTATE_C; 132 break; 133 case float_relation_unordered: 134 default: 135 flags = PSTATE_C | PSTATE_V; 136 break; 137 } 138 return flags; 139 } 140 141 uint64_t HELPER(vfp_cmph_a64)(uint32_t x, uint32_t y, float_status *fp_status) 142 { 143 return float_rel_to_flags(float16_compare_quiet(x, y, fp_status)); 144 } 145 146 uint64_t HELPER(vfp_cmpeh_a64)(uint32_t x, uint32_t y, float_status *fp_status) 147 { 148 return float_rel_to_flags(float16_compare(x, y, fp_status)); 149 } 150 151 uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, float_status *fp_status) 152 { 153 return float_rel_to_flags(float32_compare_quiet(x, y, fp_status)); 154 } 155 156 uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, float_status *fp_status) 157 { 158 return float_rel_to_flags(float32_compare(x, y, fp_status)); 159 } 160 161 uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, float_status *fp_status) 162 { 163 return float_rel_to_flags(float64_compare_quiet(x, y, fp_status)); 164 } 165 166 uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, float_status *fp_status) 167 { 168 return float_rel_to_flags(float64_compare(x, y, fp_status)); 169 } 170 171 float32 HELPER(vfp_mulxs)(float32 a, float32 b, float_status *fpst) 172 { 173 a = float32_squash_input_denormal(a, fpst); 174 b = float32_squash_input_denormal(b, fpst); 175 176 if ((float32_is_zero(a) && float32_is_infinity(b)) || 177 (float32_is_infinity(a) && float32_is_zero(b))) { 178 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ 179 return make_float32((1U << 30) | 180 ((float32_val(a) ^ float32_val(b)) & (1U << 31))); 181 } 182 return float32_mul(a, b, fpst); 183 } 184 185 float64 HELPER(vfp_mulxd)(float64 a, float64 b, float_status *fpst) 186 { 187 a = float64_squash_input_denormal(a, fpst); 188 b = float64_squash_input_denormal(b, fpst); 189 190 if ((float64_is_zero(a) && float64_is_infinity(b)) || 191 (float64_is_infinity(a) && float64_is_zero(b))) { 192 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ 193 return make_float64((1ULL << 62) | 194 ((float64_val(a) ^ float64_val(b)) & (1ULL << 63))); 195 } 196 return float64_mul(a, b, fpst); 197 } 198 199 /* 64bit/double versions of the neon float compare functions */ 200 uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, float_status *fpst) 201 { 202 return -float64_eq_quiet(a, b, fpst); 203 } 204 205 uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, float_status *fpst) 206 { 207 return -float64_le(b, a, fpst); 208 } 209 210 uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, float_status *fpst) 211 { 212 return -float64_lt(b, a, fpst); 213 } 214 215 /* 216 * Reciprocal step and sqrt step. Note that unlike the A32/T32 217 * versions, these do a fully fused multiply-add or 218 * multiply-add-and-halve. 219 * The FPCR.AH == 1 versions need to avoid flipping the sign of NaN. 220 */ 221 #define DO_RECPS(NAME, CTYPE, FLOATTYPE, CHSFN) \ 222 CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ 223 { \ 224 a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ 225 b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ 226 a = FLOATTYPE ## _ ## CHSFN(a); \ 227 if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \ 228 (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \ 229 return FLOATTYPE ## _two; \ 230 } \ 231 return FLOATTYPE ## _muladd(a, b, FLOATTYPE ## _two, 0, fpst); \ 232 } 233 234 DO_RECPS(recpsf_f16, uint32_t, float16, chs) 235 DO_RECPS(recpsf_f32, float32, float32, chs) 236 DO_RECPS(recpsf_f64, float64, float64, chs) 237 DO_RECPS(recpsf_ah_f16, uint32_t, float16, ah_chs) 238 DO_RECPS(recpsf_ah_f32, float32, float32, ah_chs) 239 DO_RECPS(recpsf_ah_f64, float64, float64, ah_chs) 240 241 #define DO_RSQRTSF(NAME, CTYPE, FLOATTYPE, CHSFN) \ 242 CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ 243 { \ 244 a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ 245 b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ 246 a = FLOATTYPE ## _ ## CHSFN(a); \ 247 if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \ 248 (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \ 249 return FLOATTYPE ## _one_point_five; \ 250 } \ 251 return FLOATTYPE ## _muladd_scalbn(a, b, FLOATTYPE ## _three, \ 252 -1, 0, fpst); \ 253 } \ 254 255 DO_RSQRTSF(rsqrtsf_f16, uint32_t, float16, chs) 256 DO_RSQRTSF(rsqrtsf_f32, float32, float32, chs) 257 DO_RSQRTSF(rsqrtsf_f64, float64, float64, chs) 258 DO_RSQRTSF(rsqrtsf_ah_f16, uint32_t, float16, ah_chs) 259 DO_RSQRTSF(rsqrtsf_ah_f32, float32, float32, ah_chs) 260 DO_RSQRTSF(rsqrtsf_ah_f64, float64, float64, ah_chs) 261 262 /* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */ 263 uint32_t HELPER(frecpx_f16)(uint32_t a, float_status *fpst) 264 { 265 uint16_t val16, sbit; 266 int16_t exp; 267 268 if (float16_is_any_nan(a)) { 269 float16 nan = a; 270 if (float16_is_signaling_nan(a, fpst)) { 271 float_raise(float_flag_invalid, fpst); 272 if (!fpst->default_nan_mode) { 273 nan = float16_silence_nan(a, fpst); 274 } 275 } 276 if (fpst->default_nan_mode) { 277 nan = float16_default_nan(fpst); 278 } 279 return nan; 280 } 281 282 a = float16_squash_input_denormal(a, fpst); 283 284 val16 = float16_val(a); 285 sbit = 0x8000 & val16; 286 exp = extract32(val16, 10, 5); 287 288 if (exp == 0) { 289 return make_float16(deposit32(sbit, 10, 5, 0x1e)); 290 } else { 291 return make_float16(deposit32(sbit, 10, 5, ~exp)); 292 } 293 } 294 295 float32 HELPER(frecpx_f32)(float32 a, float_status *fpst) 296 { 297 uint32_t val32, sbit; 298 int32_t exp; 299 300 if (float32_is_any_nan(a)) { 301 float32 nan = a; 302 if (float32_is_signaling_nan(a, fpst)) { 303 float_raise(float_flag_invalid, fpst); 304 if (!fpst->default_nan_mode) { 305 nan = float32_silence_nan(a, fpst); 306 } 307 } 308 if (fpst->default_nan_mode) { 309 nan = float32_default_nan(fpst); 310 } 311 return nan; 312 } 313 314 a = float32_squash_input_denormal(a, fpst); 315 316 val32 = float32_val(a); 317 sbit = 0x80000000ULL & val32; 318 exp = extract32(val32, 23, 8); 319 320 if (exp == 0) { 321 return make_float32(sbit | (0xfe << 23)); 322 } else { 323 return make_float32(sbit | (~exp & 0xff) << 23); 324 } 325 } 326 327 float64 HELPER(frecpx_f64)(float64 a, float_status *fpst) 328 { 329 uint64_t val64, sbit; 330 int64_t exp; 331 332 if (float64_is_any_nan(a)) { 333 float64 nan = a; 334 if (float64_is_signaling_nan(a, fpst)) { 335 float_raise(float_flag_invalid, fpst); 336 if (!fpst->default_nan_mode) { 337 nan = float64_silence_nan(a, fpst); 338 } 339 } 340 if (fpst->default_nan_mode) { 341 nan = float64_default_nan(fpst); 342 } 343 return nan; 344 } 345 346 a = float64_squash_input_denormal(a, fpst); 347 348 val64 = float64_val(a); 349 sbit = 0x8000000000000000ULL & val64; 350 exp = extract64(float64_val(a), 52, 11); 351 352 if (exp == 0) { 353 return make_float64(sbit | (0x7feULL << 52)); 354 } else { 355 return make_float64(sbit | (~exp & 0x7ffULL) << 52); 356 } 357 } 358 359 float32 HELPER(fcvtx_f64_to_f32)(float64 a, float_status *fpst) 360 { 361 float32 r; 362 int old = get_float_rounding_mode(fpst); 363 364 set_float_rounding_mode(float_round_to_odd, fpst); 365 r = float64_to_float32(a, fpst); 366 set_float_rounding_mode(old, fpst); 367 return r; 368 } 369 370 /* 371 * AH=1 min/max have some odd special cases: 372 * comparing two zeroes (regardless of sign), (NaN, anything), 373 * or (anything, NaN) should return the second argument (possibly 374 * squashed to zero). 375 * Also, denormal outputs are not squashed to zero regardless of FZ or FZ16. 376 */ 377 #define AH_MINMAX_HELPER(NAME, CTYPE, FLOATTYPE, MINMAX) \ 378 CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ 379 { \ 380 bool save; \ 381 CTYPE r; \ 382 a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ 383 b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ 384 if (FLOATTYPE ## _is_zero(a) && FLOATTYPE ## _is_zero(b)) { \ 385 return b; \ 386 } \ 387 if (FLOATTYPE ## _is_any_nan(a) || \ 388 FLOATTYPE ## _is_any_nan(b)) { \ 389 float_raise(float_flag_invalid, fpst); \ 390 return b; \ 391 } \ 392 save = get_flush_to_zero(fpst); \ 393 set_flush_to_zero(false, fpst); \ 394 r = FLOATTYPE ## _ ## MINMAX(a, b, fpst); \ 395 set_flush_to_zero(save, fpst); \ 396 return r; \ 397 } 398 399 AH_MINMAX_HELPER(vfp_ah_minh, dh_ctype_f16, float16, min) 400 AH_MINMAX_HELPER(vfp_ah_mins, float32, float32, min) 401 AH_MINMAX_HELPER(vfp_ah_mind, float64, float64, min) 402 AH_MINMAX_HELPER(vfp_ah_maxh, dh_ctype_f16, float16, max) 403 AH_MINMAX_HELPER(vfp_ah_maxs, float32, float32, max) 404 AH_MINMAX_HELPER(vfp_ah_maxd, float64, float64, max) 405 406 /* 64-bit versions of the CRC helpers. Note that although the operation 407 * (and the prototypes of crc32c() and crc32() mean that only the bottom 408 * 32 bits of the accumulator and result are used, we pass and return 409 * uint64_t for convenience of the generated code. Unlike the 32-bit 410 * instruction set versions, val may genuinely have 64 bits of data in it. 411 * The upper bytes of val (above the number specified by 'bytes') must have 412 * been zeroed out by the caller. 413 */ 414 uint64_t HELPER(crc32_64)(uint64_t acc, uint64_t val, uint32_t bytes) 415 { 416 uint8_t buf[8]; 417 418 stq_le_p(buf, val); 419 420 /* zlib crc32 converts the accumulator and output to one's complement. */ 421 return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff; 422 } 423 424 uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) 425 { 426 uint8_t buf[8]; 427 428 stq_le_p(buf, val); 429 430 /* Linux crc32c converts the output to one's complement. */ 431 return crc32c(acc, buf, bytes) ^ 0xffffffff; 432 } 433 434 /* 435 * AdvSIMD half-precision 436 */ 437 438 #define ADVSIMD_HELPER(name, suffix) HELPER(glue(glue(advsimd_, name), suffix)) 439 440 #define ADVSIMD_HALFOP(name) \ 441 uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, float_status *fpst) \ 442 { \ 443 return float16_ ## name(a, b, fpst); \ 444 } 445 446 #define ADVSIMD_TWOHALFOP(name) \ 447 uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, \ 448 float_status *fpst) \ 449 { \ 450 float16 a1, a2, b1, b2; \ 451 uint32_t r1, r2; \ 452 a1 = extract32(two_a, 0, 16); \ 453 a2 = extract32(two_a, 16, 16); \ 454 b1 = extract32(two_b, 0, 16); \ 455 b2 = extract32(two_b, 16, 16); \ 456 r1 = float16_ ## name(a1, b1, fpst); \ 457 r2 = float16_ ## name(a2, b2, fpst); \ 458 return deposit32(r1, 16, 16, r2); \ 459 } 460 461 ADVSIMD_TWOHALFOP(add) 462 ADVSIMD_TWOHALFOP(sub) 463 ADVSIMD_TWOHALFOP(mul) 464 ADVSIMD_TWOHALFOP(div) 465 ADVSIMD_TWOHALFOP(min) 466 ADVSIMD_TWOHALFOP(max) 467 ADVSIMD_TWOHALFOP(minnum) 468 ADVSIMD_TWOHALFOP(maxnum) 469 470 /* Data processing - scalar floating-point and advanced SIMD */ 471 static float16 float16_mulx(float16 a, float16 b, float_status *fpst) 472 { 473 a = float16_squash_input_denormal(a, fpst); 474 b = float16_squash_input_denormal(b, fpst); 475 476 if ((float16_is_zero(a) && float16_is_infinity(b)) || 477 (float16_is_infinity(a) && float16_is_zero(b))) { 478 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ 479 return make_float16((1U << 14) | 480 ((float16_val(a) ^ float16_val(b)) & (1U << 15))); 481 } 482 return float16_mul(a, b, fpst); 483 } 484 485 ADVSIMD_HALFOP(mulx) 486 ADVSIMD_TWOHALFOP(mulx) 487 488 /* fused multiply-accumulate */ 489 uint32_t HELPER(advsimd_muladdh)(uint32_t a, uint32_t b, uint32_t c, 490 float_status *fpst) 491 { 492 return float16_muladd(a, b, c, 0, fpst); 493 } 494 495 uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b, 496 uint32_t two_c, float_status *fpst) 497 { 498 float16 a1, a2, b1, b2, c1, c2; 499 uint32_t r1, r2; 500 a1 = extract32(two_a, 0, 16); 501 a2 = extract32(two_a, 16, 16); 502 b1 = extract32(two_b, 0, 16); 503 b2 = extract32(two_b, 16, 16); 504 c1 = extract32(two_c, 0, 16); 505 c2 = extract32(two_c, 16, 16); 506 r1 = float16_muladd(a1, b1, c1, 0, fpst); 507 r2 = float16_muladd(a2, b2, c2, 0, fpst); 508 return deposit32(r1, 16, 16, r2); 509 } 510 511 /* 512 * Floating point comparisons produce an integer result. Softfloat 513 * routines return float_relation types which we convert to the 0/-1 514 * Neon requires. 515 */ 516 517 #define ADVSIMD_CMPRES(test) (test) ? 0xffff : 0 518 519 uint32_t HELPER(advsimd_ceq_f16)(uint32_t a, uint32_t b, float_status *fpst) 520 { 521 int compare = float16_compare_quiet(a, b, fpst); 522 return ADVSIMD_CMPRES(compare == float_relation_equal); 523 } 524 525 uint32_t HELPER(advsimd_cge_f16)(uint32_t a, uint32_t b, float_status *fpst) 526 { 527 int compare = float16_compare(a, b, fpst); 528 return ADVSIMD_CMPRES(compare == float_relation_greater || 529 compare == float_relation_equal); 530 } 531 532 uint32_t HELPER(advsimd_cgt_f16)(uint32_t a, uint32_t b, float_status *fpst) 533 { 534 int compare = float16_compare(a, b, fpst); 535 return ADVSIMD_CMPRES(compare == float_relation_greater); 536 } 537 538 uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, float_status *fpst) 539 { 540 float16 f0 = float16_abs(a); 541 float16 f1 = float16_abs(b); 542 int compare = float16_compare(f0, f1, fpst); 543 return ADVSIMD_CMPRES(compare == float_relation_greater || 544 compare == float_relation_equal); 545 } 546 547 uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, float_status *fpst) 548 { 549 float16 f0 = float16_abs(a); 550 float16 f1 = float16_abs(b); 551 int compare = float16_compare(f0, f1, fpst); 552 return ADVSIMD_CMPRES(compare == float_relation_greater); 553 } 554 555 /* round to integral */ 556 uint32_t HELPER(advsimd_rinth_exact)(uint32_t x, float_status *fp_status) 557 { 558 return float16_round_to_int(x, fp_status); 559 } 560 561 uint32_t HELPER(advsimd_rinth)(uint32_t x, float_status *fp_status) 562 { 563 int old_flags = get_float_exception_flags(fp_status), new_flags; 564 float16 ret; 565 566 ret = float16_round_to_int(x, fp_status); 567 568 /* Suppress any inexact exceptions the conversion produced */ 569 if (!(old_flags & float_flag_inexact)) { 570 new_flags = get_float_exception_flags(fp_status); 571 set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status); 572 } 573 574 return ret; 575 } 576 577 static int el_from_spsr(uint32_t spsr) 578 { 579 /* Return the exception level that this SPSR is requesting a return to, 580 * or -1 if it is invalid (an illegal return) 581 */ 582 if (spsr & PSTATE_nRW) { 583 switch (spsr & CPSR_M) { 584 case ARM_CPU_MODE_USR: 585 return 0; 586 case ARM_CPU_MODE_HYP: 587 return 2; 588 case ARM_CPU_MODE_FIQ: 589 case ARM_CPU_MODE_IRQ: 590 case ARM_CPU_MODE_SVC: 591 case ARM_CPU_MODE_ABT: 592 case ARM_CPU_MODE_UND: 593 case ARM_CPU_MODE_SYS: 594 return 1; 595 case ARM_CPU_MODE_MON: 596 /* Returning to Mon from AArch64 is never possible, 597 * so this is an illegal return. 598 */ 599 default: 600 return -1; 601 } 602 } else { 603 if (extract32(spsr, 1, 1)) { 604 /* Return with reserved M[1] bit set */ 605 return -1; 606 } 607 if (extract32(spsr, 0, 4) == 1) { 608 /* return to EL0 with M[0] bit set */ 609 return -1; 610 } 611 return extract32(spsr, 2, 2); 612 } 613 } 614 615 static void cpsr_write_from_spsr_elx(CPUARMState *env, 616 uint32_t val) 617 { 618 uint32_t mask; 619 620 /* Save SPSR_ELx.SS into PSTATE. */ 621 env->pstate = (env->pstate & ~PSTATE_SS) | (val & PSTATE_SS); 622 val &= ~PSTATE_SS; 623 624 /* Move DIT to the correct location for CPSR */ 625 if (val & PSTATE_DIT) { 626 val &= ~PSTATE_DIT; 627 val |= CPSR_DIT; 628 } 629 630 mask = aarch32_cpsr_valid_mask(env->features, \ 631 &env_archcpu(env)->isar); 632 cpsr_write(env, val, mask, CPSRWriteRaw); 633 } 634 635 void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) 636 { 637 ARMCPU *cpu = env_archcpu(env); 638 int cur_el = arm_current_el(env); 639 unsigned int spsr_idx = aarch64_banked_spsr_index(cur_el); 640 uint32_t spsr = env->banked_spsr[spsr_idx]; 641 int new_el; 642 bool return_to_aa64 = (spsr & PSTATE_nRW) == 0; 643 644 aarch64_save_sp(env, cur_el); 645 646 arm_clear_exclusive(env); 647 648 /* We must squash the PSTATE.SS bit to zero unless both of the 649 * following hold: 650 * 1. debug exceptions are currently disabled 651 * 2. singlestep will be active in the EL we return to 652 * We check 1 here and 2 after we've done the pstate/cpsr write() to 653 * transition to the EL we're going to. 654 */ 655 if (arm_generate_debug_exceptions(env)) { 656 spsr &= ~PSTATE_SS; 657 } 658 659 /* 660 * FEAT_RME forbids return from EL3 with an invalid security state. 661 * We don't need an explicit check for FEAT_RME here because we enforce 662 * in scr_write() that you can't set the NSE bit without it. 663 */ 664 if (cur_el == 3 && (env->cp15.scr_el3 & (SCR_NS | SCR_NSE)) == SCR_NSE) { 665 goto illegal_return; 666 } 667 668 new_el = el_from_spsr(spsr); 669 if (new_el == -1) { 670 goto illegal_return; 671 } 672 if (new_el > cur_el || (new_el == 2 && !arm_is_el2_enabled(env))) { 673 /* Disallow return to an EL which is unimplemented or higher 674 * than the current one. 675 */ 676 goto illegal_return; 677 } 678 679 if (new_el != 0 && arm_el_is_aa64(env, new_el) != return_to_aa64) { 680 /* Return to an EL which is configured for a different register width */ 681 goto illegal_return; 682 } 683 684 if (!return_to_aa64 && !cpu_isar_feature(aa64_aa32, cpu)) { 685 /* Return to AArch32 when CPU is AArch64-only */ 686 goto illegal_return; 687 } 688 689 if (new_el == 1 && (arm_hcr_el2_eff(env) & HCR_TGE)) { 690 goto illegal_return; 691 } 692 693 bql_lock(); 694 arm_call_pre_el_change_hook(cpu); 695 bql_unlock(); 696 697 if (!return_to_aa64) { 698 env->aarch64 = false; 699 /* We do a raw CPSR write because aarch64_sync_64_to_32() 700 * will sort the register banks out for us, and we've already 701 * caught all the bad-mode cases in el_from_spsr(). 702 */ 703 cpsr_write_from_spsr_elx(env, spsr); 704 if (!arm_singlestep_active(env)) { 705 env->pstate &= ~PSTATE_SS; 706 } 707 aarch64_sync_64_to_32(env); 708 709 if (spsr & CPSR_T) { 710 env->regs[15] = new_pc & ~0x1; 711 } else { 712 env->regs[15] = new_pc & ~0x3; 713 } 714 helper_rebuild_hflags_a32(env, new_el); 715 qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to " 716 "AArch32 EL%d PC 0x%" PRIx32 "\n", 717 cur_el, new_el, env->regs[15]); 718 } else { 719 int tbii; 720 721 env->aarch64 = true; 722 spsr &= aarch64_pstate_valid_mask(&cpu->isar); 723 pstate_write(env, spsr); 724 if (!arm_singlestep_active(env)) { 725 env->pstate &= ~PSTATE_SS; 726 } 727 aarch64_restore_sp(env, new_el); 728 helper_rebuild_hflags_a64(env, new_el); 729 730 /* 731 * Apply TBI to the exception return address. We had to delay this 732 * until after we selected the new EL, so that we could select the 733 * correct TBI+TBID bits. This is made easier by waiting until after 734 * the hflags rebuild, since we can pull the composite TBII field 735 * from there. 736 */ 737 tbii = EX_TBFLAG_A64(env->hflags, TBII); 738 if ((tbii >> extract64(new_pc, 55, 1)) & 1) { 739 /* TBI is enabled. */ 740 int core_mmu_idx = arm_env_mmu_index(env); 741 if (regime_has_2_ranges(core_to_aa64_mmu_idx(core_mmu_idx))) { 742 new_pc = sextract64(new_pc, 0, 56); 743 } else { 744 new_pc = extract64(new_pc, 0, 56); 745 } 746 } 747 env->pc = new_pc; 748 749 qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to " 750 "AArch64 EL%d PC 0x%" PRIx64 "\n", 751 cur_el, new_el, env->pc); 752 } 753 754 /* 755 * Note that cur_el can never be 0. If new_el is 0, then 756 * el0_a64 is return_to_aa64, else el0_a64 is ignored. 757 */ 758 aarch64_sve_change_el(env, cur_el, new_el, return_to_aa64); 759 760 bql_lock(); 761 arm_call_el_change_hook(cpu); 762 bql_unlock(); 763 764 return; 765 766 illegal_return: 767 /* Illegal return events of various kinds have architecturally 768 * mandated behaviour: 769 * restore NZCV and DAIF from SPSR_ELx 770 * set PSTATE.IL 771 * restore PC from ELR_ELx 772 * no change to exception level, execution state or stack pointer 773 */ 774 env->pstate |= PSTATE_IL; 775 env->pc = new_pc; 776 spsr &= PSTATE_NZCV | PSTATE_DAIF | PSTATE_ALLINT; 777 spsr |= pstate_read(env) & ~(PSTATE_NZCV | PSTATE_DAIF | PSTATE_ALLINT); 778 pstate_write(env, spsr); 779 if (!arm_singlestep_active(env)) { 780 env->pstate &= ~PSTATE_SS; 781 } 782 helper_rebuild_hflags_a64(env, cur_el); 783 qemu_log_mask(LOG_GUEST_ERROR, "Illegal exception return at EL%d: " 784 "resuming execution at 0x%" PRIx64 "\n", cur_el, env->pc); 785 } 786 787 void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) 788 { 789 uintptr_t ra = GETPC(); 790 791 /* 792 * Implement DC ZVA, which zeroes a fixed-length block of memory. 793 * Note that we do not implement the (architecturally mandated) 794 * alignment fault for attempts to use this on Device memory 795 * (which matches the usual QEMU behaviour of not implementing either 796 * alignment faults or any memory attribute handling). 797 */ 798 int blocklen = 4 << env_archcpu(env)->dcz_blocksize; 799 uint64_t vaddr = vaddr_in & ~(blocklen - 1); 800 int mmu_idx = arm_env_mmu_index(env); 801 void *mem; 802 803 /* 804 * Trapless lookup. In addition to actual invalid page, may 805 * return NULL for I/O, watchpoints, clean pages, etc. 806 */ 807 mem = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx); 808 809 #ifndef CONFIG_USER_ONLY 810 if (unlikely(!mem)) { 811 /* 812 * Trap if accessing an invalid page. DC_ZVA requires that we supply 813 * the original pointer for an invalid page. But watchpoints require 814 * that we probe the actual space. So do both. 815 */ 816 (void) probe_write(env, vaddr_in, 1, mmu_idx, ra); 817 mem = probe_write(env, vaddr, blocklen, mmu_idx, ra); 818 819 if (unlikely(!mem)) { 820 /* 821 * The only remaining reason for mem == NULL is I/O. 822 * Just do a series of byte writes as the architecture demands. 823 */ 824 for (int i = 0; i < blocklen; i++) { 825 cpu_stb_mmuidx_ra(env, vaddr + i, 0, mmu_idx, ra); 826 } 827 return; 828 } 829 } 830 #endif 831 832 set_helper_retaddr(ra); 833 memset(mem, 0, blocklen); 834 clear_helper_retaddr(); 835 } 836 837 void HELPER(unaligned_access)(CPUARMState *env, uint64_t addr, 838 uint32_t access_type, uint32_t mmu_idx) 839 { 840 arm_cpu_do_unaligned_access(env_cpu(env), addr, access_type, 841 mmu_idx, GETPC()); 842 } 843 844 /* Memory operations (memset, memmove, memcpy) */ 845 846 /* 847 * Return true if the CPY* and SET* insns can execute; compare 848 * pseudocode CheckMOPSEnabled(), though we refactor it a little. 849 */ 850 static bool mops_enabled(CPUARMState *env) 851 { 852 int el = arm_current_el(env); 853 854 if (el < 2 && 855 (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE) && 856 !(arm_hcrx_el2_eff(env) & HCRX_MSCEN)) { 857 return false; 858 } 859 860 if (el == 0) { 861 if (!el_is_in_host(env, 0)) { 862 return env->cp15.sctlr_el[1] & SCTLR_MSCEN; 863 } else { 864 return env->cp15.sctlr_el[2] & SCTLR_MSCEN; 865 } 866 } 867 return true; 868 } 869 870 static void check_mops_enabled(CPUARMState *env, uintptr_t ra) 871 { 872 if (!mops_enabled(env)) { 873 raise_exception_ra(env, EXCP_UDEF, syn_uncategorized(), 874 exception_target_el(env), ra); 875 } 876 } 877 878 /* 879 * Return the target exception level for an exception due 880 * to mismatched arguments in a FEAT_MOPS copy or set. 881 * Compare pseudocode MismatchedCpySetTargetEL() 882 */ 883 static int mops_mismatch_exception_target_el(CPUARMState *env) 884 { 885 int el = arm_current_el(env); 886 887 if (el > 1) { 888 return el; 889 } 890 if (el == 0 && (arm_hcr_el2_eff(env) & HCR_TGE)) { 891 return 2; 892 } 893 if (el == 1 && (arm_hcrx_el2_eff(env) & HCRX_MCE2)) { 894 return 2; 895 } 896 return 1; 897 } 898 899 /* 900 * Check whether an M or E instruction was executed with a CF value 901 * indicating the wrong option for this implementation. 902 * Assumes we are always Option A. 903 */ 904 static void check_mops_wrong_option(CPUARMState *env, uint32_t syndrome, 905 uintptr_t ra) 906 { 907 if (env->CF != 0) { 908 syndrome |= 1 << 17; /* Set the wrong-option bit */ 909 raise_exception_ra(env, EXCP_UDEF, syndrome, 910 mops_mismatch_exception_target_el(env), ra); 911 } 912 } 913 914 /* 915 * Return the maximum number of bytes we can transfer starting at addr 916 * without crossing a page boundary. 917 */ 918 static uint64_t page_limit(uint64_t addr) 919 { 920 return TARGET_PAGE_ALIGN(addr + 1) - addr; 921 } 922 923 /* 924 * Return the number of bytes we can copy starting from addr and working 925 * backwards without crossing a page boundary. 926 */ 927 static uint64_t page_limit_rev(uint64_t addr) 928 { 929 return (addr & ~TARGET_PAGE_MASK) + 1; 930 } 931 932 /* 933 * Perform part of a memory set on an area of guest memory starting at 934 * toaddr (a dirty address) and extending for setsize bytes. 935 * 936 * Returns the number of bytes actually set, which might be less than 937 * setsize; the caller should loop until the whole set has been done. 938 * The caller should ensure that the guest registers are correct 939 * for the possibility that the first byte of the set encounters 940 * an exception or watchpoint. We guarantee not to take any faults 941 * for bytes other than the first. 942 */ 943 static uint64_t set_step(CPUARMState *env, uint64_t toaddr, 944 uint64_t setsize, uint32_t data, int memidx, 945 uint32_t *mtedesc, uintptr_t ra) 946 { 947 void *mem; 948 949 setsize = MIN(setsize, page_limit(toaddr)); 950 if (*mtedesc) { 951 uint64_t mtesize = mte_mops_probe(env, toaddr, setsize, *mtedesc); 952 if (mtesize == 0) { 953 /* Trap, or not. All CPU state is up to date */ 954 mte_check_fail(env, *mtedesc, toaddr, ra); 955 /* Continue, with no further MTE checks required */ 956 *mtedesc = 0; 957 } else { 958 /* Advance to the end, or to the tag mismatch */ 959 setsize = MIN(setsize, mtesize); 960 } 961 } 962 963 toaddr = useronly_clean_ptr(toaddr); 964 /* 965 * Trapless lookup: returns NULL for invalid page, I/O, 966 * watchpoints, clean pages, etc. 967 */ 968 mem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, memidx); 969 970 #ifndef CONFIG_USER_ONLY 971 if (unlikely(!mem)) { 972 /* 973 * Slow-path: just do one byte write. This will handle the 974 * watchpoint, invalid page, etc handling correctly. 975 * For clean code pages, the next iteration will see 976 * the page dirty and will use the fast path. 977 */ 978 cpu_stb_mmuidx_ra(env, toaddr, data, memidx, ra); 979 return 1; 980 } 981 #endif 982 /* Easy case: just memset the host memory */ 983 set_helper_retaddr(ra); 984 memset(mem, data, setsize); 985 clear_helper_retaddr(); 986 return setsize; 987 } 988 989 /* 990 * Similar, but setting tags. The architecture requires us to do this 991 * in 16-byte chunks. SETP accesses are not tag checked; they set 992 * the tags. 993 */ 994 static uint64_t set_step_tags(CPUARMState *env, uint64_t toaddr, 995 uint64_t setsize, uint32_t data, int memidx, 996 uint32_t *mtedesc, uintptr_t ra) 997 { 998 void *mem; 999 uint64_t cleanaddr; 1000 1001 setsize = MIN(setsize, page_limit(toaddr)); 1002 1003 cleanaddr = useronly_clean_ptr(toaddr); 1004 /* 1005 * Trapless lookup: returns NULL for invalid page, I/O, 1006 * watchpoints, clean pages, etc. 1007 */ 1008 mem = tlb_vaddr_to_host(env, cleanaddr, MMU_DATA_STORE, memidx); 1009 1010 #ifndef CONFIG_USER_ONLY 1011 if (unlikely(!mem)) { 1012 /* 1013 * Slow-path: just do one write. This will handle the 1014 * watchpoint, invalid page, etc handling correctly. 1015 * The architecture requires that we do 16 bytes at a time, 1016 * and we know both ptr and size are 16 byte aligned. 1017 * For clean code pages, the next iteration will see 1018 * the page dirty and will use the fast path. 1019 */ 1020 uint64_t repldata = data * 0x0101010101010101ULL; 1021 MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, memidx); 1022 cpu_st16_mmu(env, toaddr, int128_make128(repldata, repldata), oi16, ra); 1023 mte_mops_set_tags(env, toaddr, 16, *mtedesc); 1024 return 16; 1025 } 1026 #endif 1027 /* Easy case: just memset the host memory */ 1028 set_helper_retaddr(ra); 1029 memset(mem, data, setsize); 1030 clear_helper_retaddr(); 1031 mte_mops_set_tags(env, toaddr, setsize, *mtedesc); 1032 return setsize; 1033 } 1034 1035 typedef uint64_t StepFn(CPUARMState *env, uint64_t toaddr, 1036 uint64_t setsize, uint32_t data, 1037 int memidx, uint32_t *mtedesc, uintptr_t ra); 1038 1039 /* Extract register numbers from a MOPS exception syndrome value */ 1040 static int mops_destreg(uint32_t syndrome) 1041 { 1042 return extract32(syndrome, 10, 5); 1043 } 1044 1045 static int mops_srcreg(uint32_t syndrome) 1046 { 1047 return extract32(syndrome, 5, 5); 1048 } 1049 1050 static int mops_sizereg(uint32_t syndrome) 1051 { 1052 return extract32(syndrome, 0, 5); 1053 } 1054 1055 /* 1056 * Return true if TCMA and TBI bits mean we need to do MTE checks. 1057 * We only need to do this once per MOPS insn, not for every page. 1058 */ 1059 static bool mte_checks_needed(uint64_t ptr, uint32_t desc) 1060 { 1061 int bit55 = extract64(ptr, 55, 1); 1062 1063 /* 1064 * Note that tbi_check() returns true for "access checked" but 1065 * tcma_check() returns true for "access unchecked". 1066 */ 1067 if (!tbi_check(desc, bit55)) { 1068 return false; 1069 } 1070 return !tcma_check(desc, bit55, allocation_tag_from_addr(ptr)); 1071 } 1072 1073 /* Take an exception if the SETG addr/size are not granule aligned */ 1074 static void check_setg_alignment(CPUARMState *env, uint64_t ptr, uint64_t size, 1075 uint32_t memidx, uintptr_t ra) 1076 { 1077 if ((size != 0 && !QEMU_IS_ALIGNED(ptr, TAG_GRANULE)) || 1078 !QEMU_IS_ALIGNED(size, TAG_GRANULE)) { 1079 arm_cpu_do_unaligned_access(env_cpu(env), ptr, MMU_DATA_STORE, 1080 memidx, ra); 1081 1082 } 1083 } 1084 1085 static uint64_t arm_reg_or_xzr(CPUARMState *env, int reg) 1086 { 1087 /* 1088 * Runtime equivalent of cpu_reg() -- return the CPU register value, 1089 * for contexts when index 31 means XZR (not SP). 1090 */ 1091 return reg == 31 ? 0 : env->xregs[reg]; 1092 } 1093 1094 /* 1095 * For the Memory Set operation, our implementation chooses 1096 * always to use "option A", where we update Xd to the final 1097 * address in the SETP insn, and set Xn to be -(bytes remaining). 1098 * On SETM and SETE insns we only need update Xn. 1099 * 1100 * @env: CPU 1101 * @syndrome: syndrome value for mismatch exceptions 1102 * (also contains the register numbers we need to use) 1103 * @mtedesc: MTE descriptor word 1104 * @stepfn: function which does a single part of the set operation 1105 * @is_setg: true if this is the tag-setting SETG variant 1106 */ 1107 static void do_setp(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, 1108 StepFn *stepfn, bool is_setg, uintptr_t ra) 1109 { 1110 /* Prologue: we choose to do up to the next page boundary */ 1111 int rd = mops_destreg(syndrome); 1112 int rs = mops_srcreg(syndrome); 1113 int rn = mops_sizereg(syndrome); 1114 uint8_t data = arm_reg_or_xzr(env, rs); 1115 uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); 1116 uint64_t toaddr = env->xregs[rd]; 1117 uint64_t setsize = env->xregs[rn]; 1118 uint64_t stagesetsize, step; 1119 1120 check_mops_enabled(env, ra); 1121 1122 if (setsize > INT64_MAX) { 1123 setsize = INT64_MAX; 1124 if (is_setg) { 1125 setsize &= ~0xf; 1126 } 1127 } 1128 1129 if (unlikely(is_setg)) { 1130 check_setg_alignment(env, toaddr, setsize, memidx, ra); 1131 } else if (!mte_checks_needed(toaddr, mtedesc)) { 1132 mtedesc = 0; 1133 } 1134 1135 stagesetsize = MIN(setsize, page_limit(toaddr)); 1136 while (stagesetsize) { 1137 env->xregs[rd] = toaddr; 1138 env->xregs[rn] = setsize; 1139 step = stepfn(env, toaddr, stagesetsize, data, memidx, &mtedesc, ra); 1140 toaddr += step; 1141 setsize -= step; 1142 stagesetsize -= step; 1143 } 1144 /* Insn completed, so update registers to the Option A format */ 1145 env->xregs[rd] = toaddr + setsize; 1146 env->xregs[rn] = -setsize; 1147 1148 /* Set NZCV = 0000 to indicate we are an Option A implementation */ 1149 env->NF = 0; 1150 env->ZF = 1; /* our env->ZF encoding is inverted */ 1151 env->CF = 0; 1152 env->VF = 0; 1153 } 1154 1155 void HELPER(setp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1156 { 1157 do_setp(env, syndrome, mtedesc, set_step, false, GETPC()); 1158 } 1159 1160 void HELPER(setgp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1161 { 1162 do_setp(env, syndrome, mtedesc, set_step_tags, true, GETPC()); 1163 } 1164 1165 static void do_setm(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, 1166 StepFn *stepfn, bool is_setg, uintptr_t ra) 1167 { 1168 /* Main: we choose to do all the full-page chunks */ 1169 CPUState *cs = env_cpu(env); 1170 int rd = mops_destreg(syndrome); 1171 int rs = mops_srcreg(syndrome); 1172 int rn = mops_sizereg(syndrome); 1173 uint8_t data = arm_reg_or_xzr(env, rs); 1174 uint64_t toaddr = env->xregs[rd] + env->xregs[rn]; 1175 uint64_t setsize = -env->xregs[rn]; 1176 uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); 1177 uint64_t step, stagesetsize; 1178 1179 check_mops_enabled(env, ra); 1180 1181 /* 1182 * We're allowed to NOP out "no data to copy" before the consistency 1183 * checks; we choose to do so. 1184 */ 1185 if (env->xregs[rn] == 0) { 1186 return; 1187 } 1188 1189 check_mops_wrong_option(env, syndrome, ra); 1190 1191 /* 1192 * Our implementation will work fine even if we have an unaligned 1193 * destination address, and because we update Xn every time around 1194 * the loop below and the return value from stepfn() may be less 1195 * than requested, we might find toaddr is unaligned. So we don't 1196 * have an IMPDEF check for alignment here. 1197 */ 1198 1199 if (unlikely(is_setg)) { 1200 check_setg_alignment(env, toaddr, setsize, memidx, ra); 1201 } else if (!mte_checks_needed(toaddr, mtedesc)) { 1202 mtedesc = 0; 1203 } 1204 1205 /* Do the actual memset: we leave the last partial page to SETE */ 1206 stagesetsize = setsize & TARGET_PAGE_MASK; 1207 while (stagesetsize > 0) { 1208 step = stepfn(env, toaddr, stagesetsize, data, memidx, &mtedesc, ra); 1209 toaddr += step; 1210 setsize -= step; 1211 stagesetsize -= step; 1212 env->xregs[rn] = -setsize; 1213 if (stagesetsize > 0 && unlikely(cpu_loop_exit_requested(cs))) { 1214 cpu_loop_exit_restore(cs, ra); 1215 } 1216 } 1217 } 1218 1219 void HELPER(setm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1220 { 1221 do_setm(env, syndrome, mtedesc, set_step, false, GETPC()); 1222 } 1223 1224 void HELPER(setgm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1225 { 1226 do_setm(env, syndrome, mtedesc, set_step_tags, true, GETPC()); 1227 } 1228 1229 static void do_sete(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, 1230 StepFn *stepfn, bool is_setg, uintptr_t ra) 1231 { 1232 /* Epilogue: do the last partial page */ 1233 int rd = mops_destreg(syndrome); 1234 int rs = mops_srcreg(syndrome); 1235 int rn = mops_sizereg(syndrome); 1236 uint8_t data = arm_reg_or_xzr(env, rs); 1237 uint64_t toaddr = env->xregs[rd] + env->xregs[rn]; 1238 uint64_t setsize = -env->xregs[rn]; 1239 uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); 1240 uint64_t step; 1241 1242 check_mops_enabled(env, ra); 1243 1244 /* 1245 * We're allowed to NOP out "no data to copy" before the consistency 1246 * checks; we choose to do so. 1247 */ 1248 if (setsize == 0) { 1249 return; 1250 } 1251 1252 check_mops_wrong_option(env, syndrome, ra); 1253 1254 /* 1255 * Our implementation has no address alignment requirements, but 1256 * we do want to enforce the "less than a page" size requirement, 1257 * so we don't need to have the "check for interrupts" here. 1258 */ 1259 if (setsize >= TARGET_PAGE_SIZE) { 1260 raise_exception_ra(env, EXCP_UDEF, syndrome, 1261 mops_mismatch_exception_target_el(env), ra); 1262 } 1263 1264 if (unlikely(is_setg)) { 1265 check_setg_alignment(env, toaddr, setsize, memidx, ra); 1266 } else if (!mte_checks_needed(toaddr, mtedesc)) { 1267 mtedesc = 0; 1268 } 1269 1270 /* Do the actual memset */ 1271 while (setsize > 0) { 1272 step = stepfn(env, toaddr, setsize, data, memidx, &mtedesc, ra); 1273 toaddr += step; 1274 setsize -= step; 1275 env->xregs[rn] = -setsize; 1276 } 1277 } 1278 1279 void HELPER(sete)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1280 { 1281 do_sete(env, syndrome, mtedesc, set_step, false, GETPC()); 1282 } 1283 1284 void HELPER(setge)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1285 { 1286 do_sete(env, syndrome, mtedesc, set_step_tags, true, GETPC()); 1287 } 1288 1289 /* 1290 * Perform part of a memory copy from the guest memory at fromaddr 1291 * and extending for copysize bytes, to the guest memory at 1292 * toaddr. Both addresses are dirty. 1293 * 1294 * Returns the number of bytes actually set, which might be less than 1295 * copysize; the caller should loop until the whole copy has been done. 1296 * The caller should ensure that the guest registers are correct 1297 * for the possibility that the first byte of the copy encounters 1298 * an exception or watchpoint. We guarantee not to take any faults 1299 * for bytes other than the first. 1300 */ 1301 static uint64_t copy_step(CPUARMState *env, uint64_t toaddr, uint64_t fromaddr, 1302 uint64_t copysize, int wmemidx, int rmemidx, 1303 uint32_t *wdesc, uint32_t *rdesc, uintptr_t ra) 1304 { 1305 void *rmem; 1306 void *wmem; 1307 1308 /* Don't cross a page boundary on either source or destination */ 1309 copysize = MIN(copysize, page_limit(toaddr)); 1310 copysize = MIN(copysize, page_limit(fromaddr)); 1311 /* 1312 * Handle MTE tag checks: either handle the tag mismatch for byte 0, 1313 * or else copy up to but not including the byte with the mismatch. 1314 */ 1315 if (*rdesc) { 1316 uint64_t mtesize = mte_mops_probe(env, fromaddr, copysize, *rdesc); 1317 if (mtesize == 0) { 1318 mte_check_fail(env, *rdesc, fromaddr, ra); 1319 *rdesc = 0; 1320 } else { 1321 copysize = MIN(copysize, mtesize); 1322 } 1323 } 1324 if (*wdesc) { 1325 uint64_t mtesize = mte_mops_probe(env, toaddr, copysize, *wdesc); 1326 if (mtesize == 0) { 1327 mte_check_fail(env, *wdesc, toaddr, ra); 1328 *wdesc = 0; 1329 } else { 1330 copysize = MIN(copysize, mtesize); 1331 } 1332 } 1333 1334 toaddr = useronly_clean_ptr(toaddr); 1335 fromaddr = useronly_clean_ptr(fromaddr); 1336 /* Trapless lookup of whether we can get a host memory pointer */ 1337 wmem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, wmemidx); 1338 rmem = tlb_vaddr_to_host(env, fromaddr, MMU_DATA_LOAD, rmemidx); 1339 1340 #ifndef CONFIG_USER_ONLY 1341 /* 1342 * If we don't have host memory for both source and dest then just 1343 * do a single byte copy. This will handle watchpoints, invalid pages, 1344 * etc correctly. For clean code pages, the next iteration will see 1345 * the page dirty and will use the fast path. 1346 */ 1347 if (unlikely(!rmem || !wmem)) { 1348 uint8_t byte; 1349 if (rmem) { 1350 byte = *(uint8_t *)rmem; 1351 } else { 1352 byte = cpu_ldub_mmuidx_ra(env, fromaddr, rmemidx, ra); 1353 } 1354 if (wmem) { 1355 *(uint8_t *)wmem = byte; 1356 } else { 1357 cpu_stb_mmuidx_ra(env, toaddr, byte, wmemidx, ra); 1358 } 1359 return 1; 1360 } 1361 #endif 1362 /* Easy case: just memmove the host memory */ 1363 set_helper_retaddr(ra); 1364 memmove(wmem, rmem, copysize); 1365 clear_helper_retaddr(); 1366 return copysize; 1367 } 1368 1369 /* 1370 * Do part of a backwards memory copy. Here toaddr and fromaddr point 1371 * to the *last* byte to be copied. 1372 */ 1373 static uint64_t copy_step_rev(CPUARMState *env, uint64_t toaddr, 1374 uint64_t fromaddr, 1375 uint64_t copysize, int wmemidx, int rmemidx, 1376 uint32_t *wdesc, uint32_t *rdesc, uintptr_t ra) 1377 { 1378 void *rmem; 1379 void *wmem; 1380 1381 /* Don't cross a page boundary on either source or destination */ 1382 copysize = MIN(copysize, page_limit_rev(toaddr)); 1383 copysize = MIN(copysize, page_limit_rev(fromaddr)); 1384 1385 /* 1386 * Handle MTE tag checks: either handle the tag mismatch for byte 0, 1387 * or else copy up to but not including the byte with the mismatch. 1388 */ 1389 if (*rdesc) { 1390 uint64_t mtesize = mte_mops_probe_rev(env, fromaddr, copysize, *rdesc); 1391 if (mtesize == 0) { 1392 mte_check_fail(env, *rdesc, fromaddr, ra); 1393 *rdesc = 0; 1394 } else { 1395 copysize = MIN(copysize, mtesize); 1396 } 1397 } 1398 if (*wdesc) { 1399 uint64_t mtesize = mte_mops_probe_rev(env, toaddr, copysize, *wdesc); 1400 if (mtesize == 0) { 1401 mte_check_fail(env, *wdesc, toaddr, ra); 1402 *wdesc = 0; 1403 } else { 1404 copysize = MIN(copysize, mtesize); 1405 } 1406 } 1407 1408 toaddr = useronly_clean_ptr(toaddr); 1409 fromaddr = useronly_clean_ptr(fromaddr); 1410 /* Trapless lookup of whether we can get a host memory pointer */ 1411 wmem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, wmemidx); 1412 rmem = tlb_vaddr_to_host(env, fromaddr, MMU_DATA_LOAD, rmemidx); 1413 1414 #ifndef CONFIG_USER_ONLY 1415 /* 1416 * If we don't have host memory for both source and dest then just 1417 * do a single byte copy. This will handle watchpoints, invalid pages, 1418 * etc correctly. For clean code pages, the next iteration will see 1419 * the page dirty and will use the fast path. 1420 */ 1421 if (unlikely(!rmem || !wmem)) { 1422 uint8_t byte; 1423 if (rmem) { 1424 byte = *(uint8_t *)rmem; 1425 } else { 1426 byte = cpu_ldub_mmuidx_ra(env, fromaddr, rmemidx, ra); 1427 } 1428 if (wmem) { 1429 *(uint8_t *)wmem = byte; 1430 } else { 1431 cpu_stb_mmuidx_ra(env, toaddr, byte, wmemidx, ra); 1432 } 1433 return 1; 1434 } 1435 #endif 1436 /* 1437 * Easy case: just memmove the host memory. Note that wmem and 1438 * rmem here point to the *last* byte to copy. 1439 */ 1440 set_helper_retaddr(ra); 1441 memmove(wmem - (copysize - 1), rmem - (copysize - 1), copysize); 1442 clear_helper_retaddr(); 1443 return copysize; 1444 } 1445 1446 /* 1447 * for the Memory Copy operation, our implementation chooses always 1448 * to use "option A", where we update Xd and Xs to the final addresses 1449 * in the CPYP insn, and then in CPYM and CPYE only need to update Xn. 1450 * 1451 * @env: CPU 1452 * @syndrome: syndrome value for mismatch exceptions 1453 * (also contains the register numbers we need to use) 1454 * @wdesc: MTE descriptor for the writes (destination) 1455 * @rdesc: MTE descriptor for the reads (source) 1456 * @move: true if this is CPY (memmove), false for CPYF (memcpy forwards) 1457 */ 1458 static void do_cpyp(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1459 uint32_t rdesc, uint32_t move, uintptr_t ra) 1460 { 1461 int rd = mops_destreg(syndrome); 1462 int rs = mops_srcreg(syndrome); 1463 int rn = mops_sizereg(syndrome); 1464 uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); 1465 uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); 1466 bool forwards = true; 1467 uint64_t toaddr = env->xregs[rd]; 1468 uint64_t fromaddr = env->xregs[rs]; 1469 uint64_t copysize = env->xregs[rn]; 1470 uint64_t stagecopysize, step; 1471 1472 check_mops_enabled(env, ra); 1473 1474 1475 if (move) { 1476 /* 1477 * Copy backwards if necessary. The direction for a non-overlapping 1478 * copy is IMPDEF; we choose forwards. 1479 */ 1480 if (copysize > 0x007FFFFFFFFFFFFFULL) { 1481 copysize = 0x007FFFFFFFFFFFFFULL; 1482 } 1483 uint64_t fs = extract64(fromaddr, 0, 56); 1484 uint64_t ts = extract64(toaddr, 0, 56); 1485 uint64_t fe = extract64(fromaddr + copysize, 0, 56); 1486 1487 if (fs < ts && fe > ts) { 1488 forwards = false; 1489 } 1490 } else { 1491 if (copysize > INT64_MAX) { 1492 copysize = INT64_MAX; 1493 } 1494 } 1495 1496 if (!mte_checks_needed(fromaddr, rdesc)) { 1497 rdesc = 0; 1498 } 1499 if (!mte_checks_needed(toaddr, wdesc)) { 1500 wdesc = 0; 1501 } 1502 1503 if (forwards) { 1504 stagecopysize = MIN(copysize, page_limit(toaddr)); 1505 stagecopysize = MIN(stagecopysize, page_limit(fromaddr)); 1506 while (stagecopysize) { 1507 env->xregs[rd] = toaddr; 1508 env->xregs[rs] = fromaddr; 1509 env->xregs[rn] = copysize; 1510 step = copy_step(env, toaddr, fromaddr, stagecopysize, 1511 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1512 toaddr += step; 1513 fromaddr += step; 1514 copysize -= step; 1515 stagecopysize -= step; 1516 } 1517 /* Insn completed, so update registers to the Option A format */ 1518 env->xregs[rd] = toaddr + copysize; 1519 env->xregs[rs] = fromaddr + copysize; 1520 env->xregs[rn] = -copysize; 1521 } else { 1522 /* 1523 * In a reverse copy the to and from addrs in Xs and Xd are the start 1524 * of the range, but it's more convenient for us to work with pointers 1525 * to the last byte being copied. 1526 */ 1527 toaddr += copysize - 1; 1528 fromaddr += copysize - 1; 1529 stagecopysize = MIN(copysize, page_limit_rev(toaddr)); 1530 stagecopysize = MIN(stagecopysize, page_limit_rev(fromaddr)); 1531 while (stagecopysize) { 1532 env->xregs[rn] = copysize; 1533 step = copy_step_rev(env, toaddr, fromaddr, stagecopysize, 1534 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1535 copysize -= step; 1536 stagecopysize -= step; 1537 toaddr -= step; 1538 fromaddr -= step; 1539 } 1540 /* 1541 * Insn completed, so update registers to the Option A format. 1542 * For a reverse copy this is no different to the CPYP input format. 1543 */ 1544 env->xregs[rn] = copysize; 1545 } 1546 1547 /* Set NZCV = 0000 to indicate we are an Option A implementation */ 1548 env->NF = 0; 1549 env->ZF = 1; /* our env->ZF encoding is inverted */ 1550 env->CF = 0; 1551 env->VF = 0; 1552 } 1553 1554 void HELPER(cpyp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1555 uint32_t rdesc) 1556 { 1557 do_cpyp(env, syndrome, wdesc, rdesc, true, GETPC()); 1558 } 1559 1560 void HELPER(cpyfp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1561 uint32_t rdesc) 1562 { 1563 do_cpyp(env, syndrome, wdesc, rdesc, false, GETPC()); 1564 } 1565 1566 static void do_cpym(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1567 uint32_t rdesc, uint32_t move, uintptr_t ra) 1568 { 1569 /* Main: we choose to copy until less than a page remaining */ 1570 CPUState *cs = env_cpu(env); 1571 int rd = mops_destreg(syndrome); 1572 int rs = mops_srcreg(syndrome); 1573 int rn = mops_sizereg(syndrome); 1574 uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); 1575 uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); 1576 bool forwards = true; 1577 uint64_t toaddr, fromaddr, copysize, step; 1578 1579 check_mops_enabled(env, ra); 1580 1581 /* We choose to NOP out "no data to copy" before consistency checks */ 1582 if (env->xregs[rn] == 0) { 1583 return; 1584 } 1585 1586 check_mops_wrong_option(env, syndrome, ra); 1587 1588 if (move) { 1589 forwards = (int64_t)env->xregs[rn] < 0; 1590 } 1591 1592 if (forwards) { 1593 toaddr = env->xregs[rd] + env->xregs[rn]; 1594 fromaddr = env->xregs[rs] + env->xregs[rn]; 1595 copysize = -env->xregs[rn]; 1596 } else { 1597 copysize = env->xregs[rn]; 1598 /* This toaddr and fromaddr point to the *last* byte to copy */ 1599 toaddr = env->xregs[rd] + copysize - 1; 1600 fromaddr = env->xregs[rs] + copysize - 1; 1601 } 1602 1603 if (!mte_checks_needed(fromaddr, rdesc)) { 1604 rdesc = 0; 1605 } 1606 if (!mte_checks_needed(toaddr, wdesc)) { 1607 wdesc = 0; 1608 } 1609 1610 /* Our implementation has no particular parameter requirements for CPYM */ 1611 1612 /* Do the actual memmove */ 1613 if (forwards) { 1614 while (copysize >= TARGET_PAGE_SIZE) { 1615 step = copy_step(env, toaddr, fromaddr, copysize, 1616 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1617 toaddr += step; 1618 fromaddr += step; 1619 copysize -= step; 1620 env->xregs[rn] = -copysize; 1621 if (copysize >= TARGET_PAGE_SIZE && 1622 unlikely(cpu_loop_exit_requested(cs))) { 1623 cpu_loop_exit_restore(cs, ra); 1624 } 1625 } 1626 } else { 1627 while (copysize >= TARGET_PAGE_SIZE) { 1628 step = copy_step_rev(env, toaddr, fromaddr, copysize, 1629 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1630 toaddr -= step; 1631 fromaddr -= step; 1632 copysize -= step; 1633 env->xregs[rn] = copysize; 1634 if (copysize >= TARGET_PAGE_SIZE && 1635 unlikely(cpu_loop_exit_requested(cs))) { 1636 cpu_loop_exit_restore(cs, ra); 1637 } 1638 } 1639 } 1640 } 1641 1642 void HELPER(cpym)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1643 uint32_t rdesc) 1644 { 1645 do_cpym(env, syndrome, wdesc, rdesc, true, GETPC()); 1646 } 1647 1648 void HELPER(cpyfm)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1649 uint32_t rdesc) 1650 { 1651 do_cpym(env, syndrome, wdesc, rdesc, false, GETPC()); 1652 } 1653 1654 static void do_cpye(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1655 uint32_t rdesc, uint32_t move, uintptr_t ra) 1656 { 1657 /* Epilogue: do the last partial page */ 1658 int rd = mops_destreg(syndrome); 1659 int rs = mops_srcreg(syndrome); 1660 int rn = mops_sizereg(syndrome); 1661 uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); 1662 uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); 1663 bool forwards = true; 1664 uint64_t toaddr, fromaddr, copysize, step; 1665 1666 check_mops_enabled(env, ra); 1667 1668 /* We choose to NOP out "no data to copy" before consistency checks */ 1669 if (env->xregs[rn] == 0) { 1670 return; 1671 } 1672 1673 check_mops_wrong_option(env, syndrome, ra); 1674 1675 if (move) { 1676 forwards = (int64_t)env->xregs[rn] < 0; 1677 } 1678 1679 if (forwards) { 1680 toaddr = env->xregs[rd] + env->xregs[rn]; 1681 fromaddr = env->xregs[rs] + env->xregs[rn]; 1682 copysize = -env->xregs[rn]; 1683 } else { 1684 copysize = env->xregs[rn]; 1685 /* This toaddr and fromaddr point to the *last* byte to copy */ 1686 toaddr = env->xregs[rd] + copysize - 1; 1687 fromaddr = env->xregs[rs] + copysize - 1; 1688 } 1689 1690 if (!mte_checks_needed(fromaddr, rdesc)) { 1691 rdesc = 0; 1692 } 1693 if (!mte_checks_needed(toaddr, wdesc)) { 1694 wdesc = 0; 1695 } 1696 1697 /* Check the size; we don't want to have do a check-for-interrupts */ 1698 if (copysize >= TARGET_PAGE_SIZE) { 1699 raise_exception_ra(env, EXCP_UDEF, syndrome, 1700 mops_mismatch_exception_target_el(env), ra); 1701 } 1702 1703 /* Do the actual memmove */ 1704 if (forwards) { 1705 while (copysize > 0) { 1706 step = copy_step(env, toaddr, fromaddr, copysize, 1707 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1708 toaddr += step; 1709 fromaddr += step; 1710 copysize -= step; 1711 env->xregs[rn] = -copysize; 1712 } 1713 } else { 1714 while (copysize > 0) { 1715 step = copy_step_rev(env, toaddr, fromaddr, copysize, 1716 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1717 toaddr -= step; 1718 fromaddr -= step; 1719 copysize -= step; 1720 env->xregs[rn] = copysize; 1721 } 1722 } 1723 } 1724 1725 void HELPER(cpye)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1726 uint32_t rdesc) 1727 { 1728 do_cpye(env, syndrome, wdesc, rdesc, true, GETPC()); 1729 } 1730 1731 void HELPER(cpyfe)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1732 uint32_t rdesc) 1733 { 1734 do_cpye(env, syndrome, wdesc, rdesc, false, GETPC()); 1735 } 1736 1737 static bool is_guarded_page(CPUARMState *env, target_ulong addr, uintptr_t ra) 1738 { 1739 #ifdef CONFIG_USER_ONLY 1740 return page_get_flags(addr) & PAGE_BTI; 1741 #else 1742 CPUTLBEntryFull *full; 1743 void *host; 1744 int mmu_idx = cpu_mmu_index(env_cpu(env), true); 1745 int flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx, 1746 false, &host, &full, ra); 1747 1748 assert(!(flags & TLB_INVALID_MASK)); 1749 return full->extra.arm.guarded; 1750 #endif 1751 } 1752 1753 void HELPER(guarded_page_check)(CPUARMState *env) 1754 { 1755 /* 1756 * We have already verified that bti is enabled, and that the 1757 * instruction at PC is not ok for BTYPE. This is always at 1758 * the beginning of a block, so PC is always up-to-date and 1759 * no unwind is required. 1760 */ 1761 if (is_guarded_page(env, env->pc, 0)) { 1762 raise_exception(env, EXCP_UDEF, syn_btitrap(env->btype), 1763 exception_target_el(env)); 1764 } 1765 } 1766 1767 void HELPER(guarded_page_br)(CPUARMState *env, target_ulong pc) 1768 { 1769 /* 1770 * We have already checked for branch via x16 and x17. 1771 * What remains for choosing BTYPE is checking for a guarded page. 1772 */ 1773 env->btype = is_guarded_page(env, pc, GETPC()) ? 3 : 1; 1774 } 1775