1 /* 2 * AArch64 specific helpers 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/units.h" 22 #include "cpu.h" 23 #include "gdbstub/helpers.h" 24 #include "exec/helper-proto.h" 25 #include "qemu/host-utils.h" 26 #include "qemu/log.h" 27 #include "qemu/main-loop.h" 28 #include "qemu/bitops.h" 29 #include "internals.h" 30 #include "qemu/crc32c.h" 31 #include "exec/cpu-common.h" 32 #include "exec/exec-all.h" 33 #include "exec/cpu_ldst.h" 34 #include "exec/tlb-flags.h" 35 #include "qemu/int128.h" 36 #include "qemu/atomic128.h" 37 #include "fpu/softfloat.h" 38 #include <zlib.h> /* for crc32 */ 39 #ifdef CONFIG_USER_ONLY 40 #include "user/page-protection.h" 41 #endif 42 #include "vec_internal.h" 43 44 /* C2.4.7 Multiply and divide */ 45 /* special cases for 0 and LLONG_MIN are mandated by the standard */ 46 uint64_t HELPER(udiv64)(uint64_t num, uint64_t den) 47 { 48 if (den == 0) { 49 return 0; 50 } 51 return num / den; 52 } 53 54 int64_t HELPER(sdiv64)(int64_t num, int64_t den) 55 { 56 if (den == 0) { 57 return 0; 58 } 59 if (num == LLONG_MIN && den == -1) { 60 return LLONG_MIN; 61 } 62 return num / den; 63 } 64 65 uint64_t HELPER(rbit64)(uint64_t x) 66 { 67 return revbit64(x); 68 } 69 70 void HELPER(msr_i_spsel)(CPUARMState *env, uint32_t imm) 71 { 72 update_spsel(env, imm); 73 } 74 75 void HELPER(msr_set_allint_el1)(CPUARMState *env) 76 { 77 /* ALLINT update to PSTATE. */ 78 if (arm_hcrx_el2_eff(env) & HCRX_TALLINT) { 79 raise_exception_ra(env, EXCP_UDEF, 80 syn_aa64_sysregtrap(0, 1, 0, 4, 1, 0x1f, 0), 2, 81 GETPC()); 82 } 83 84 env->pstate |= PSTATE_ALLINT; 85 } 86 87 static void daif_check(CPUARMState *env, uint32_t op, 88 uint32_t imm, uintptr_t ra) 89 { 90 /* DAIF update to PSTATE. This is OK from EL0 only if UMA is set. */ 91 if (arm_current_el(env) == 0 && !(arm_sctlr(env, 0) & SCTLR_UMA)) { 92 raise_exception_ra(env, EXCP_UDEF, 93 syn_aa64_sysregtrap(0, extract32(op, 0, 3), 94 extract32(op, 3, 3), 4, 95 imm, 0x1f, 0), 96 exception_target_el(env), ra); 97 } 98 } 99 100 void HELPER(msr_i_daifset)(CPUARMState *env, uint32_t imm) 101 { 102 daif_check(env, 0x1e, imm, GETPC()); 103 env->daif |= (imm << 6) & PSTATE_DAIF; 104 arm_rebuild_hflags(env); 105 } 106 107 void HELPER(msr_i_daifclear)(CPUARMState *env, uint32_t imm) 108 { 109 daif_check(env, 0x1f, imm, GETPC()); 110 env->daif &= ~((imm << 6) & PSTATE_DAIF); 111 arm_rebuild_hflags(env); 112 } 113 114 /* Convert a softfloat float_relation_ (as returned by 115 * the float*_compare functions) to the correct ARM 116 * NZCV flag state. 117 */ 118 static inline uint32_t float_rel_to_flags(int res) 119 { 120 uint64_t flags; 121 switch (res) { 122 case float_relation_equal: 123 flags = PSTATE_Z | PSTATE_C; 124 break; 125 case float_relation_less: 126 flags = PSTATE_N; 127 break; 128 case float_relation_greater: 129 flags = PSTATE_C; 130 break; 131 case float_relation_unordered: 132 default: 133 flags = PSTATE_C | PSTATE_V; 134 break; 135 } 136 return flags; 137 } 138 139 uint64_t HELPER(vfp_cmph_a64)(uint32_t x, uint32_t y, float_status *fp_status) 140 { 141 return float_rel_to_flags(float16_compare_quiet(x, y, fp_status)); 142 } 143 144 uint64_t HELPER(vfp_cmpeh_a64)(uint32_t x, uint32_t y, float_status *fp_status) 145 { 146 return float_rel_to_flags(float16_compare(x, y, fp_status)); 147 } 148 149 uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, float_status *fp_status) 150 { 151 return float_rel_to_flags(float32_compare_quiet(x, y, fp_status)); 152 } 153 154 uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, float_status *fp_status) 155 { 156 return float_rel_to_flags(float32_compare(x, y, fp_status)); 157 } 158 159 uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, float_status *fp_status) 160 { 161 return float_rel_to_flags(float64_compare_quiet(x, y, fp_status)); 162 } 163 164 uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, float_status *fp_status) 165 { 166 return float_rel_to_flags(float64_compare(x, y, fp_status)); 167 } 168 169 float32 HELPER(vfp_mulxs)(float32 a, float32 b, float_status *fpst) 170 { 171 a = float32_squash_input_denormal(a, fpst); 172 b = float32_squash_input_denormal(b, fpst); 173 174 if ((float32_is_zero(a) && float32_is_infinity(b)) || 175 (float32_is_infinity(a) && float32_is_zero(b))) { 176 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ 177 return make_float32((1U << 30) | 178 ((float32_val(a) ^ float32_val(b)) & (1U << 31))); 179 } 180 return float32_mul(a, b, fpst); 181 } 182 183 float64 HELPER(vfp_mulxd)(float64 a, float64 b, float_status *fpst) 184 { 185 a = float64_squash_input_denormal(a, fpst); 186 b = float64_squash_input_denormal(b, fpst); 187 188 if ((float64_is_zero(a) && float64_is_infinity(b)) || 189 (float64_is_infinity(a) && float64_is_zero(b))) { 190 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ 191 return make_float64((1ULL << 62) | 192 ((float64_val(a) ^ float64_val(b)) & (1ULL << 63))); 193 } 194 return float64_mul(a, b, fpst); 195 } 196 197 /* 64bit/double versions of the neon float compare functions */ 198 uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, float_status *fpst) 199 { 200 return -float64_eq_quiet(a, b, fpst); 201 } 202 203 uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, float_status *fpst) 204 { 205 return -float64_le(b, a, fpst); 206 } 207 208 uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, float_status *fpst) 209 { 210 return -float64_lt(b, a, fpst); 211 } 212 213 /* 214 * Reciprocal step and sqrt step. Note that unlike the A32/T32 215 * versions, these do a fully fused multiply-add or 216 * multiply-add-and-halve. 217 * The FPCR.AH == 1 versions need to avoid flipping the sign of NaN. 218 */ 219 #define DO_RECPS(NAME, CTYPE, FLOATTYPE, CHSFN) \ 220 CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ 221 { \ 222 a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ 223 b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ 224 a = FLOATTYPE ## _ ## CHSFN(a); \ 225 if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \ 226 (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \ 227 return FLOATTYPE ## _two; \ 228 } \ 229 return FLOATTYPE ## _muladd(a, b, FLOATTYPE ## _two, 0, fpst); \ 230 } 231 232 DO_RECPS(recpsf_f16, uint32_t, float16, chs) 233 DO_RECPS(recpsf_f32, float32, float32, chs) 234 DO_RECPS(recpsf_f64, float64, float64, chs) 235 DO_RECPS(recpsf_ah_f16, uint32_t, float16, ah_chs) 236 DO_RECPS(recpsf_ah_f32, float32, float32, ah_chs) 237 DO_RECPS(recpsf_ah_f64, float64, float64, ah_chs) 238 239 #define DO_RSQRTSF(NAME, CTYPE, FLOATTYPE, CHSFN) \ 240 CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ 241 { \ 242 a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ 243 b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ 244 a = FLOATTYPE ## _ ## CHSFN(a); \ 245 if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \ 246 (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \ 247 return FLOATTYPE ## _one_point_five; \ 248 } \ 249 return FLOATTYPE ## _muladd_scalbn(a, b, FLOATTYPE ## _three, \ 250 -1, 0, fpst); \ 251 } \ 252 253 DO_RSQRTSF(rsqrtsf_f16, uint32_t, float16, chs) 254 DO_RSQRTSF(rsqrtsf_f32, float32, float32, chs) 255 DO_RSQRTSF(rsqrtsf_f64, float64, float64, chs) 256 DO_RSQRTSF(rsqrtsf_ah_f16, uint32_t, float16, ah_chs) 257 DO_RSQRTSF(rsqrtsf_ah_f32, float32, float32, ah_chs) 258 DO_RSQRTSF(rsqrtsf_ah_f64, float64, float64, ah_chs) 259 260 /* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */ 261 uint32_t HELPER(frecpx_f16)(uint32_t a, float_status *fpst) 262 { 263 uint16_t val16, sbit; 264 int16_t exp; 265 266 if (float16_is_any_nan(a)) { 267 float16 nan = a; 268 if (float16_is_signaling_nan(a, fpst)) { 269 float_raise(float_flag_invalid, fpst); 270 if (!fpst->default_nan_mode) { 271 nan = float16_silence_nan(a, fpst); 272 } 273 } 274 if (fpst->default_nan_mode) { 275 nan = float16_default_nan(fpst); 276 } 277 return nan; 278 } 279 280 a = float16_squash_input_denormal(a, fpst); 281 282 val16 = float16_val(a); 283 sbit = 0x8000 & val16; 284 exp = extract32(val16, 10, 5); 285 286 if (exp == 0) { 287 return make_float16(deposit32(sbit, 10, 5, 0x1e)); 288 } else { 289 return make_float16(deposit32(sbit, 10, 5, ~exp)); 290 } 291 } 292 293 float32 HELPER(frecpx_f32)(float32 a, float_status *fpst) 294 { 295 uint32_t val32, sbit; 296 int32_t exp; 297 298 if (float32_is_any_nan(a)) { 299 float32 nan = a; 300 if (float32_is_signaling_nan(a, fpst)) { 301 float_raise(float_flag_invalid, fpst); 302 if (!fpst->default_nan_mode) { 303 nan = float32_silence_nan(a, fpst); 304 } 305 } 306 if (fpst->default_nan_mode) { 307 nan = float32_default_nan(fpst); 308 } 309 return nan; 310 } 311 312 a = float32_squash_input_denormal(a, fpst); 313 314 val32 = float32_val(a); 315 sbit = 0x80000000ULL & val32; 316 exp = extract32(val32, 23, 8); 317 318 if (exp == 0) { 319 return make_float32(sbit | (0xfe << 23)); 320 } else { 321 return make_float32(sbit | (~exp & 0xff) << 23); 322 } 323 } 324 325 float64 HELPER(frecpx_f64)(float64 a, float_status *fpst) 326 { 327 uint64_t val64, sbit; 328 int64_t exp; 329 330 if (float64_is_any_nan(a)) { 331 float64 nan = a; 332 if (float64_is_signaling_nan(a, fpst)) { 333 float_raise(float_flag_invalid, fpst); 334 if (!fpst->default_nan_mode) { 335 nan = float64_silence_nan(a, fpst); 336 } 337 } 338 if (fpst->default_nan_mode) { 339 nan = float64_default_nan(fpst); 340 } 341 return nan; 342 } 343 344 a = float64_squash_input_denormal(a, fpst); 345 346 val64 = float64_val(a); 347 sbit = 0x8000000000000000ULL & val64; 348 exp = extract64(float64_val(a), 52, 11); 349 350 if (exp == 0) { 351 return make_float64(sbit | (0x7feULL << 52)); 352 } else { 353 return make_float64(sbit | (~exp & 0x7ffULL) << 52); 354 } 355 } 356 357 float32 HELPER(fcvtx_f64_to_f32)(float64 a, float_status *fpst) 358 { 359 float32 r; 360 int old = get_float_rounding_mode(fpst); 361 362 set_float_rounding_mode(float_round_to_odd, fpst); 363 r = float64_to_float32(a, fpst); 364 set_float_rounding_mode(old, fpst); 365 return r; 366 } 367 368 /* 369 * AH=1 min/max have some odd special cases: 370 * comparing two zeroes (regardless of sign), (NaN, anything), 371 * or (anything, NaN) should return the second argument (possibly 372 * squashed to zero). 373 * Also, denormal outputs are not squashed to zero regardless of FZ or FZ16. 374 */ 375 #define AH_MINMAX_HELPER(NAME, CTYPE, FLOATTYPE, MINMAX) \ 376 CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ 377 { \ 378 bool save; \ 379 CTYPE r; \ 380 a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ 381 b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ 382 if (FLOATTYPE ## _is_zero(a) && FLOATTYPE ## _is_zero(b)) { \ 383 return b; \ 384 } \ 385 if (FLOATTYPE ## _is_any_nan(a) || \ 386 FLOATTYPE ## _is_any_nan(b)) { \ 387 float_raise(float_flag_invalid, fpst); \ 388 return b; \ 389 } \ 390 save = get_flush_to_zero(fpst); \ 391 set_flush_to_zero(false, fpst); \ 392 r = FLOATTYPE ## _ ## MINMAX(a, b, fpst); \ 393 set_flush_to_zero(save, fpst); \ 394 return r; \ 395 } 396 397 AH_MINMAX_HELPER(vfp_ah_minh, dh_ctype_f16, float16, min) 398 AH_MINMAX_HELPER(vfp_ah_mins, float32, float32, min) 399 AH_MINMAX_HELPER(vfp_ah_mind, float64, float64, min) 400 AH_MINMAX_HELPER(vfp_ah_maxh, dh_ctype_f16, float16, max) 401 AH_MINMAX_HELPER(vfp_ah_maxs, float32, float32, max) 402 AH_MINMAX_HELPER(vfp_ah_maxd, float64, float64, max) 403 404 /* 64-bit versions of the CRC helpers. Note that although the operation 405 * (and the prototypes of crc32c() and crc32() mean that only the bottom 406 * 32 bits of the accumulator and result are used, we pass and return 407 * uint64_t for convenience of the generated code. Unlike the 32-bit 408 * instruction set versions, val may genuinely have 64 bits of data in it. 409 * The upper bytes of val (above the number specified by 'bytes') must have 410 * been zeroed out by the caller. 411 */ 412 uint64_t HELPER(crc32_64)(uint64_t acc, uint64_t val, uint32_t bytes) 413 { 414 uint8_t buf[8]; 415 416 stq_le_p(buf, val); 417 418 /* zlib crc32 converts the accumulator and output to one's complement. */ 419 return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff; 420 } 421 422 uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) 423 { 424 uint8_t buf[8]; 425 426 stq_le_p(buf, val); 427 428 /* Linux crc32c converts the output to one's complement. */ 429 return crc32c(acc, buf, bytes) ^ 0xffffffff; 430 } 431 432 /* 433 * AdvSIMD half-precision 434 */ 435 436 #define ADVSIMD_HELPER(name, suffix) HELPER(glue(glue(advsimd_, name), suffix)) 437 438 #define ADVSIMD_HALFOP(name) \ 439 uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, float_status *fpst) \ 440 { \ 441 return float16_ ## name(a, b, fpst); \ 442 } 443 444 #define ADVSIMD_TWOHALFOP(name) \ 445 uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, \ 446 float_status *fpst) \ 447 { \ 448 float16 a1, a2, b1, b2; \ 449 uint32_t r1, r2; \ 450 a1 = extract32(two_a, 0, 16); \ 451 a2 = extract32(two_a, 16, 16); \ 452 b1 = extract32(two_b, 0, 16); \ 453 b2 = extract32(two_b, 16, 16); \ 454 r1 = float16_ ## name(a1, b1, fpst); \ 455 r2 = float16_ ## name(a2, b2, fpst); \ 456 return deposit32(r1, 16, 16, r2); \ 457 } 458 459 ADVSIMD_TWOHALFOP(add) 460 ADVSIMD_TWOHALFOP(sub) 461 ADVSIMD_TWOHALFOP(mul) 462 ADVSIMD_TWOHALFOP(div) 463 ADVSIMD_TWOHALFOP(min) 464 ADVSIMD_TWOHALFOP(max) 465 ADVSIMD_TWOHALFOP(minnum) 466 ADVSIMD_TWOHALFOP(maxnum) 467 468 /* Data processing - scalar floating-point and advanced SIMD */ 469 static float16 float16_mulx(float16 a, float16 b, float_status *fpst) 470 { 471 a = float16_squash_input_denormal(a, fpst); 472 b = float16_squash_input_denormal(b, fpst); 473 474 if ((float16_is_zero(a) && float16_is_infinity(b)) || 475 (float16_is_infinity(a) && float16_is_zero(b))) { 476 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ 477 return make_float16((1U << 14) | 478 ((float16_val(a) ^ float16_val(b)) & (1U << 15))); 479 } 480 return float16_mul(a, b, fpst); 481 } 482 483 ADVSIMD_HALFOP(mulx) 484 ADVSIMD_TWOHALFOP(mulx) 485 486 /* fused multiply-accumulate */ 487 uint32_t HELPER(advsimd_muladdh)(uint32_t a, uint32_t b, uint32_t c, 488 float_status *fpst) 489 { 490 return float16_muladd(a, b, c, 0, fpst); 491 } 492 493 uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b, 494 uint32_t two_c, float_status *fpst) 495 { 496 float16 a1, a2, b1, b2, c1, c2; 497 uint32_t r1, r2; 498 a1 = extract32(two_a, 0, 16); 499 a2 = extract32(two_a, 16, 16); 500 b1 = extract32(two_b, 0, 16); 501 b2 = extract32(two_b, 16, 16); 502 c1 = extract32(two_c, 0, 16); 503 c2 = extract32(two_c, 16, 16); 504 r1 = float16_muladd(a1, b1, c1, 0, fpst); 505 r2 = float16_muladd(a2, b2, c2, 0, fpst); 506 return deposit32(r1, 16, 16, r2); 507 } 508 509 /* 510 * Floating point comparisons produce an integer result. Softfloat 511 * routines return float_relation types which we convert to the 0/-1 512 * Neon requires. 513 */ 514 515 #define ADVSIMD_CMPRES(test) (test) ? 0xffff : 0 516 517 uint32_t HELPER(advsimd_ceq_f16)(uint32_t a, uint32_t b, float_status *fpst) 518 { 519 int compare = float16_compare_quiet(a, b, fpst); 520 return ADVSIMD_CMPRES(compare == float_relation_equal); 521 } 522 523 uint32_t HELPER(advsimd_cge_f16)(uint32_t a, uint32_t b, float_status *fpst) 524 { 525 int compare = float16_compare(a, b, fpst); 526 return ADVSIMD_CMPRES(compare == float_relation_greater || 527 compare == float_relation_equal); 528 } 529 530 uint32_t HELPER(advsimd_cgt_f16)(uint32_t a, uint32_t b, float_status *fpst) 531 { 532 int compare = float16_compare(a, b, fpst); 533 return ADVSIMD_CMPRES(compare == float_relation_greater); 534 } 535 536 uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, float_status *fpst) 537 { 538 float16 f0 = float16_abs(a); 539 float16 f1 = float16_abs(b); 540 int compare = float16_compare(f0, f1, fpst); 541 return ADVSIMD_CMPRES(compare == float_relation_greater || 542 compare == float_relation_equal); 543 } 544 545 uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, float_status *fpst) 546 { 547 float16 f0 = float16_abs(a); 548 float16 f1 = float16_abs(b); 549 int compare = float16_compare(f0, f1, fpst); 550 return ADVSIMD_CMPRES(compare == float_relation_greater); 551 } 552 553 /* round to integral */ 554 uint32_t HELPER(advsimd_rinth_exact)(uint32_t x, float_status *fp_status) 555 { 556 return float16_round_to_int(x, fp_status); 557 } 558 559 uint32_t HELPER(advsimd_rinth)(uint32_t x, float_status *fp_status) 560 { 561 int old_flags = get_float_exception_flags(fp_status), new_flags; 562 float16 ret; 563 564 ret = float16_round_to_int(x, fp_status); 565 566 /* Suppress any inexact exceptions the conversion produced */ 567 if (!(old_flags & float_flag_inexact)) { 568 new_flags = get_float_exception_flags(fp_status); 569 set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status); 570 } 571 572 return ret; 573 } 574 575 static int el_from_spsr(uint32_t spsr) 576 { 577 /* Return the exception level that this SPSR is requesting a return to, 578 * or -1 if it is invalid (an illegal return) 579 */ 580 if (spsr & PSTATE_nRW) { 581 switch (spsr & CPSR_M) { 582 case ARM_CPU_MODE_USR: 583 return 0; 584 case ARM_CPU_MODE_HYP: 585 return 2; 586 case ARM_CPU_MODE_FIQ: 587 case ARM_CPU_MODE_IRQ: 588 case ARM_CPU_MODE_SVC: 589 case ARM_CPU_MODE_ABT: 590 case ARM_CPU_MODE_UND: 591 case ARM_CPU_MODE_SYS: 592 return 1; 593 case ARM_CPU_MODE_MON: 594 /* Returning to Mon from AArch64 is never possible, 595 * so this is an illegal return. 596 */ 597 default: 598 return -1; 599 } 600 } else { 601 if (extract32(spsr, 1, 1)) { 602 /* Return with reserved M[1] bit set */ 603 return -1; 604 } 605 if (extract32(spsr, 0, 4) == 1) { 606 /* return to EL0 with M[0] bit set */ 607 return -1; 608 } 609 return extract32(spsr, 2, 2); 610 } 611 } 612 613 static void cpsr_write_from_spsr_elx(CPUARMState *env, 614 uint32_t val) 615 { 616 uint32_t mask; 617 618 /* Save SPSR_ELx.SS into PSTATE. */ 619 env->pstate = (env->pstate & ~PSTATE_SS) | (val & PSTATE_SS); 620 val &= ~PSTATE_SS; 621 622 /* Move DIT to the correct location for CPSR */ 623 if (val & PSTATE_DIT) { 624 val &= ~PSTATE_DIT; 625 val |= CPSR_DIT; 626 } 627 628 mask = aarch32_cpsr_valid_mask(env->features, \ 629 &env_archcpu(env)->isar); 630 cpsr_write(env, val, mask, CPSRWriteRaw); 631 } 632 633 void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) 634 { 635 ARMCPU *cpu = env_archcpu(env); 636 int cur_el = arm_current_el(env); 637 unsigned int spsr_idx = aarch64_banked_spsr_index(cur_el); 638 uint32_t spsr = env->banked_spsr[spsr_idx]; 639 int new_el; 640 bool return_to_aa64 = (spsr & PSTATE_nRW) == 0; 641 642 aarch64_save_sp(env, cur_el); 643 644 arm_clear_exclusive(env); 645 646 /* We must squash the PSTATE.SS bit to zero unless both of the 647 * following hold: 648 * 1. debug exceptions are currently disabled 649 * 2. singlestep will be active in the EL we return to 650 * We check 1 here and 2 after we've done the pstate/cpsr write() to 651 * transition to the EL we're going to. 652 */ 653 if (arm_generate_debug_exceptions(env)) { 654 spsr &= ~PSTATE_SS; 655 } 656 657 /* 658 * FEAT_RME forbids return from EL3 with an invalid security state. 659 * We don't need an explicit check for FEAT_RME here because we enforce 660 * in scr_write() that you can't set the NSE bit without it. 661 */ 662 if (cur_el == 3 && (env->cp15.scr_el3 & (SCR_NS | SCR_NSE)) == SCR_NSE) { 663 goto illegal_return; 664 } 665 666 new_el = el_from_spsr(spsr); 667 if (new_el == -1) { 668 goto illegal_return; 669 } 670 if (new_el > cur_el || (new_el == 2 && !arm_is_el2_enabled(env))) { 671 /* Disallow return to an EL which is unimplemented or higher 672 * than the current one. 673 */ 674 goto illegal_return; 675 } 676 677 if (new_el != 0 && arm_el_is_aa64(env, new_el) != return_to_aa64) { 678 /* Return to an EL which is configured for a different register width */ 679 goto illegal_return; 680 } 681 682 if (!return_to_aa64 && !cpu_isar_feature(aa64_aa32, cpu)) { 683 /* Return to AArch32 when CPU is AArch64-only */ 684 goto illegal_return; 685 } 686 687 if (new_el == 1 && (arm_hcr_el2_eff(env) & HCR_TGE)) { 688 goto illegal_return; 689 } 690 691 bql_lock(); 692 arm_call_pre_el_change_hook(cpu); 693 bql_unlock(); 694 695 if (!return_to_aa64) { 696 env->aarch64 = false; 697 /* We do a raw CPSR write because aarch64_sync_64_to_32() 698 * will sort the register banks out for us, and we've already 699 * caught all the bad-mode cases in el_from_spsr(). 700 */ 701 cpsr_write_from_spsr_elx(env, spsr); 702 if (!arm_singlestep_active(env)) { 703 env->pstate &= ~PSTATE_SS; 704 } 705 aarch64_sync_64_to_32(env); 706 707 if (spsr & CPSR_T) { 708 env->regs[15] = new_pc & ~0x1; 709 } else { 710 env->regs[15] = new_pc & ~0x3; 711 } 712 helper_rebuild_hflags_a32(env, new_el); 713 qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to " 714 "AArch32 EL%d PC 0x%" PRIx32 "\n", 715 cur_el, new_el, env->regs[15]); 716 } else { 717 int tbii; 718 719 env->aarch64 = true; 720 spsr &= aarch64_pstate_valid_mask(&cpu->isar); 721 pstate_write(env, spsr); 722 if (!arm_singlestep_active(env)) { 723 env->pstate &= ~PSTATE_SS; 724 } 725 aarch64_restore_sp(env, new_el); 726 helper_rebuild_hflags_a64(env, new_el); 727 728 /* 729 * Apply TBI to the exception return address. We had to delay this 730 * until after we selected the new EL, so that we could select the 731 * correct TBI+TBID bits. This is made easier by waiting until after 732 * the hflags rebuild, since we can pull the composite TBII field 733 * from there. 734 */ 735 tbii = EX_TBFLAG_A64(env->hflags, TBII); 736 if ((tbii >> extract64(new_pc, 55, 1)) & 1) { 737 /* TBI is enabled. */ 738 int core_mmu_idx = arm_env_mmu_index(env); 739 if (regime_has_2_ranges(core_to_aa64_mmu_idx(core_mmu_idx))) { 740 new_pc = sextract64(new_pc, 0, 56); 741 } else { 742 new_pc = extract64(new_pc, 0, 56); 743 } 744 } 745 env->pc = new_pc; 746 747 qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to " 748 "AArch64 EL%d PC 0x%" PRIx64 "\n", 749 cur_el, new_el, env->pc); 750 } 751 752 /* 753 * Note that cur_el can never be 0. If new_el is 0, then 754 * el0_a64 is return_to_aa64, else el0_a64 is ignored. 755 */ 756 aarch64_sve_change_el(env, cur_el, new_el, return_to_aa64); 757 758 bql_lock(); 759 arm_call_el_change_hook(cpu); 760 bql_unlock(); 761 762 return; 763 764 illegal_return: 765 /* Illegal return events of various kinds have architecturally 766 * mandated behaviour: 767 * restore NZCV and DAIF from SPSR_ELx 768 * set PSTATE.IL 769 * restore PC from ELR_ELx 770 * no change to exception level, execution state or stack pointer 771 */ 772 env->pstate |= PSTATE_IL; 773 env->pc = new_pc; 774 spsr &= PSTATE_NZCV | PSTATE_DAIF | PSTATE_ALLINT; 775 spsr |= pstate_read(env) & ~(PSTATE_NZCV | PSTATE_DAIF | PSTATE_ALLINT); 776 pstate_write(env, spsr); 777 if (!arm_singlestep_active(env)) { 778 env->pstate &= ~PSTATE_SS; 779 } 780 helper_rebuild_hflags_a64(env, cur_el); 781 qemu_log_mask(LOG_GUEST_ERROR, "Illegal exception return at EL%d: " 782 "resuming execution at 0x%" PRIx64 "\n", cur_el, env->pc); 783 } 784 785 void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) 786 { 787 uintptr_t ra = GETPC(); 788 789 /* 790 * Implement DC ZVA, which zeroes a fixed-length block of memory. 791 * Note that we do not implement the (architecturally mandated) 792 * alignment fault for attempts to use this on Device memory 793 * (which matches the usual QEMU behaviour of not implementing either 794 * alignment faults or any memory attribute handling). 795 */ 796 int blocklen = 4 << env_archcpu(env)->dcz_blocksize; 797 uint64_t vaddr = vaddr_in & ~(blocklen - 1); 798 int mmu_idx = arm_env_mmu_index(env); 799 void *mem; 800 801 /* 802 * Trapless lookup. In addition to actual invalid page, may 803 * return NULL for I/O, watchpoints, clean pages, etc. 804 */ 805 mem = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx); 806 807 #ifndef CONFIG_USER_ONLY 808 if (unlikely(!mem)) { 809 /* 810 * Trap if accessing an invalid page. DC_ZVA requires that we supply 811 * the original pointer for an invalid page. But watchpoints require 812 * that we probe the actual space. So do both. 813 */ 814 (void) probe_write(env, vaddr_in, 1, mmu_idx, ra); 815 mem = probe_write(env, vaddr, blocklen, mmu_idx, ra); 816 817 if (unlikely(!mem)) { 818 /* 819 * The only remaining reason for mem == NULL is I/O. 820 * Just do a series of byte writes as the architecture demands. 821 */ 822 for (int i = 0; i < blocklen; i++) { 823 cpu_stb_mmuidx_ra(env, vaddr + i, 0, mmu_idx, ra); 824 } 825 return; 826 } 827 } 828 #endif 829 830 set_helper_retaddr(ra); 831 memset(mem, 0, blocklen); 832 clear_helper_retaddr(); 833 } 834 835 void HELPER(unaligned_access)(CPUARMState *env, uint64_t addr, 836 uint32_t access_type, uint32_t mmu_idx) 837 { 838 arm_cpu_do_unaligned_access(env_cpu(env), addr, access_type, 839 mmu_idx, GETPC()); 840 } 841 842 /* Memory operations (memset, memmove, memcpy) */ 843 844 /* 845 * Return true if the CPY* and SET* insns can execute; compare 846 * pseudocode CheckMOPSEnabled(), though we refactor it a little. 847 */ 848 static bool mops_enabled(CPUARMState *env) 849 { 850 int el = arm_current_el(env); 851 852 if (el < 2 && 853 (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE) && 854 !(arm_hcrx_el2_eff(env) & HCRX_MSCEN)) { 855 return false; 856 } 857 858 if (el == 0) { 859 if (!el_is_in_host(env, 0)) { 860 return env->cp15.sctlr_el[1] & SCTLR_MSCEN; 861 } else { 862 return env->cp15.sctlr_el[2] & SCTLR_MSCEN; 863 } 864 } 865 return true; 866 } 867 868 static void check_mops_enabled(CPUARMState *env, uintptr_t ra) 869 { 870 if (!mops_enabled(env)) { 871 raise_exception_ra(env, EXCP_UDEF, syn_uncategorized(), 872 exception_target_el(env), ra); 873 } 874 } 875 876 /* 877 * Return the target exception level for an exception due 878 * to mismatched arguments in a FEAT_MOPS copy or set. 879 * Compare pseudocode MismatchedCpySetTargetEL() 880 */ 881 static int mops_mismatch_exception_target_el(CPUARMState *env) 882 { 883 int el = arm_current_el(env); 884 885 if (el > 1) { 886 return el; 887 } 888 if (el == 0 && (arm_hcr_el2_eff(env) & HCR_TGE)) { 889 return 2; 890 } 891 if (el == 1 && (arm_hcrx_el2_eff(env) & HCRX_MCE2)) { 892 return 2; 893 } 894 return 1; 895 } 896 897 /* 898 * Check whether an M or E instruction was executed with a CF value 899 * indicating the wrong option for this implementation. 900 * Assumes we are always Option A. 901 */ 902 static void check_mops_wrong_option(CPUARMState *env, uint32_t syndrome, 903 uintptr_t ra) 904 { 905 if (env->CF != 0) { 906 syndrome |= 1 << 17; /* Set the wrong-option bit */ 907 raise_exception_ra(env, EXCP_UDEF, syndrome, 908 mops_mismatch_exception_target_el(env), ra); 909 } 910 } 911 912 /* 913 * Return the maximum number of bytes we can transfer starting at addr 914 * without crossing a page boundary. 915 */ 916 static uint64_t page_limit(uint64_t addr) 917 { 918 return TARGET_PAGE_ALIGN(addr + 1) - addr; 919 } 920 921 /* 922 * Return the number of bytes we can copy starting from addr and working 923 * backwards without crossing a page boundary. 924 */ 925 static uint64_t page_limit_rev(uint64_t addr) 926 { 927 return (addr & ~TARGET_PAGE_MASK) + 1; 928 } 929 930 /* 931 * Perform part of a memory set on an area of guest memory starting at 932 * toaddr (a dirty address) and extending for setsize bytes. 933 * 934 * Returns the number of bytes actually set, which might be less than 935 * setsize; the caller should loop until the whole set has been done. 936 * The caller should ensure that the guest registers are correct 937 * for the possibility that the first byte of the set encounters 938 * an exception or watchpoint. We guarantee not to take any faults 939 * for bytes other than the first. 940 */ 941 static uint64_t set_step(CPUARMState *env, uint64_t toaddr, 942 uint64_t setsize, uint32_t data, int memidx, 943 uint32_t *mtedesc, uintptr_t ra) 944 { 945 void *mem; 946 947 setsize = MIN(setsize, page_limit(toaddr)); 948 if (*mtedesc) { 949 uint64_t mtesize = mte_mops_probe(env, toaddr, setsize, *mtedesc); 950 if (mtesize == 0) { 951 /* Trap, or not. All CPU state is up to date */ 952 mte_check_fail(env, *mtedesc, toaddr, ra); 953 /* Continue, with no further MTE checks required */ 954 *mtedesc = 0; 955 } else { 956 /* Advance to the end, or to the tag mismatch */ 957 setsize = MIN(setsize, mtesize); 958 } 959 } 960 961 toaddr = useronly_clean_ptr(toaddr); 962 /* 963 * Trapless lookup: returns NULL for invalid page, I/O, 964 * watchpoints, clean pages, etc. 965 */ 966 mem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, memidx); 967 968 #ifndef CONFIG_USER_ONLY 969 if (unlikely(!mem)) { 970 /* 971 * Slow-path: just do one byte write. This will handle the 972 * watchpoint, invalid page, etc handling correctly. 973 * For clean code pages, the next iteration will see 974 * the page dirty and will use the fast path. 975 */ 976 cpu_stb_mmuidx_ra(env, toaddr, data, memidx, ra); 977 return 1; 978 } 979 #endif 980 /* Easy case: just memset the host memory */ 981 set_helper_retaddr(ra); 982 memset(mem, data, setsize); 983 clear_helper_retaddr(); 984 return setsize; 985 } 986 987 /* 988 * Similar, but setting tags. The architecture requires us to do this 989 * in 16-byte chunks. SETP accesses are not tag checked; they set 990 * the tags. 991 */ 992 static uint64_t set_step_tags(CPUARMState *env, uint64_t toaddr, 993 uint64_t setsize, uint32_t data, int memidx, 994 uint32_t *mtedesc, uintptr_t ra) 995 { 996 void *mem; 997 uint64_t cleanaddr; 998 999 setsize = MIN(setsize, page_limit(toaddr)); 1000 1001 cleanaddr = useronly_clean_ptr(toaddr); 1002 /* 1003 * Trapless lookup: returns NULL for invalid page, I/O, 1004 * watchpoints, clean pages, etc. 1005 */ 1006 mem = tlb_vaddr_to_host(env, cleanaddr, MMU_DATA_STORE, memidx); 1007 1008 #ifndef CONFIG_USER_ONLY 1009 if (unlikely(!mem)) { 1010 /* 1011 * Slow-path: just do one write. This will handle the 1012 * watchpoint, invalid page, etc handling correctly. 1013 * The architecture requires that we do 16 bytes at a time, 1014 * and we know both ptr and size are 16 byte aligned. 1015 * For clean code pages, the next iteration will see 1016 * the page dirty and will use the fast path. 1017 */ 1018 uint64_t repldata = data * 0x0101010101010101ULL; 1019 MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, memidx); 1020 cpu_st16_mmu(env, toaddr, int128_make128(repldata, repldata), oi16, ra); 1021 mte_mops_set_tags(env, toaddr, 16, *mtedesc); 1022 return 16; 1023 } 1024 #endif 1025 /* Easy case: just memset the host memory */ 1026 set_helper_retaddr(ra); 1027 memset(mem, data, setsize); 1028 clear_helper_retaddr(); 1029 mte_mops_set_tags(env, toaddr, setsize, *mtedesc); 1030 return setsize; 1031 } 1032 1033 typedef uint64_t StepFn(CPUARMState *env, uint64_t toaddr, 1034 uint64_t setsize, uint32_t data, 1035 int memidx, uint32_t *mtedesc, uintptr_t ra); 1036 1037 /* Extract register numbers from a MOPS exception syndrome value */ 1038 static int mops_destreg(uint32_t syndrome) 1039 { 1040 return extract32(syndrome, 10, 5); 1041 } 1042 1043 static int mops_srcreg(uint32_t syndrome) 1044 { 1045 return extract32(syndrome, 5, 5); 1046 } 1047 1048 static int mops_sizereg(uint32_t syndrome) 1049 { 1050 return extract32(syndrome, 0, 5); 1051 } 1052 1053 /* 1054 * Return true if TCMA and TBI bits mean we need to do MTE checks. 1055 * We only need to do this once per MOPS insn, not for every page. 1056 */ 1057 static bool mte_checks_needed(uint64_t ptr, uint32_t desc) 1058 { 1059 int bit55 = extract64(ptr, 55, 1); 1060 1061 /* 1062 * Note that tbi_check() returns true for "access checked" but 1063 * tcma_check() returns true for "access unchecked". 1064 */ 1065 if (!tbi_check(desc, bit55)) { 1066 return false; 1067 } 1068 return !tcma_check(desc, bit55, allocation_tag_from_addr(ptr)); 1069 } 1070 1071 /* Take an exception if the SETG addr/size are not granule aligned */ 1072 static void check_setg_alignment(CPUARMState *env, uint64_t ptr, uint64_t size, 1073 uint32_t memidx, uintptr_t ra) 1074 { 1075 if ((size != 0 && !QEMU_IS_ALIGNED(ptr, TAG_GRANULE)) || 1076 !QEMU_IS_ALIGNED(size, TAG_GRANULE)) { 1077 arm_cpu_do_unaligned_access(env_cpu(env), ptr, MMU_DATA_STORE, 1078 memidx, ra); 1079 1080 } 1081 } 1082 1083 static uint64_t arm_reg_or_xzr(CPUARMState *env, int reg) 1084 { 1085 /* 1086 * Runtime equivalent of cpu_reg() -- return the CPU register value, 1087 * for contexts when index 31 means XZR (not SP). 1088 */ 1089 return reg == 31 ? 0 : env->xregs[reg]; 1090 } 1091 1092 /* 1093 * For the Memory Set operation, our implementation chooses 1094 * always to use "option A", where we update Xd to the final 1095 * address in the SETP insn, and set Xn to be -(bytes remaining). 1096 * On SETM and SETE insns we only need update Xn. 1097 * 1098 * @env: CPU 1099 * @syndrome: syndrome value for mismatch exceptions 1100 * (also contains the register numbers we need to use) 1101 * @mtedesc: MTE descriptor word 1102 * @stepfn: function which does a single part of the set operation 1103 * @is_setg: true if this is the tag-setting SETG variant 1104 */ 1105 static void do_setp(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, 1106 StepFn *stepfn, bool is_setg, uintptr_t ra) 1107 { 1108 /* Prologue: we choose to do up to the next page boundary */ 1109 int rd = mops_destreg(syndrome); 1110 int rs = mops_srcreg(syndrome); 1111 int rn = mops_sizereg(syndrome); 1112 uint8_t data = arm_reg_or_xzr(env, rs); 1113 uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); 1114 uint64_t toaddr = env->xregs[rd]; 1115 uint64_t setsize = env->xregs[rn]; 1116 uint64_t stagesetsize, step; 1117 1118 check_mops_enabled(env, ra); 1119 1120 if (setsize > INT64_MAX) { 1121 setsize = INT64_MAX; 1122 if (is_setg) { 1123 setsize &= ~0xf; 1124 } 1125 } 1126 1127 if (unlikely(is_setg)) { 1128 check_setg_alignment(env, toaddr, setsize, memidx, ra); 1129 } else if (!mte_checks_needed(toaddr, mtedesc)) { 1130 mtedesc = 0; 1131 } 1132 1133 stagesetsize = MIN(setsize, page_limit(toaddr)); 1134 while (stagesetsize) { 1135 env->xregs[rd] = toaddr; 1136 env->xregs[rn] = setsize; 1137 step = stepfn(env, toaddr, stagesetsize, data, memidx, &mtedesc, ra); 1138 toaddr += step; 1139 setsize -= step; 1140 stagesetsize -= step; 1141 } 1142 /* Insn completed, so update registers to the Option A format */ 1143 env->xregs[rd] = toaddr + setsize; 1144 env->xregs[rn] = -setsize; 1145 1146 /* Set NZCV = 0000 to indicate we are an Option A implementation */ 1147 env->NF = 0; 1148 env->ZF = 1; /* our env->ZF encoding is inverted */ 1149 env->CF = 0; 1150 env->VF = 0; 1151 return; 1152 } 1153 1154 void HELPER(setp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1155 { 1156 do_setp(env, syndrome, mtedesc, set_step, false, GETPC()); 1157 } 1158 1159 void HELPER(setgp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1160 { 1161 do_setp(env, syndrome, mtedesc, set_step_tags, true, GETPC()); 1162 } 1163 1164 static void do_setm(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, 1165 StepFn *stepfn, bool is_setg, uintptr_t ra) 1166 { 1167 /* Main: we choose to do all the full-page chunks */ 1168 CPUState *cs = env_cpu(env); 1169 int rd = mops_destreg(syndrome); 1170 int rs = mops_srcreg(syndrome); 1171 int rn = mops_sizereg(syndrome); 1172 uint8_t data = arm_reg_or_xzr(env, rs); 1173 uint64_t toaddr = env->xregs[rd] + env->xregs[rn]; 1174 uint64_t setsize = -env->xregs[rn]; 1175 uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); 1176 uint64_t step, stagesetsize; 1177 1178 check_mops_enabled(env, ra); 1179 1180 /* 1181 * We're allowed to NOP out "no data to copy" before the consistency 1182 * checks; we choose to do so. 1183 */ 1184 if (env->xregs[rn] == 0) { 1185 return; 1186 } 1187 1188 check_mops_wrong_option(env, syndrome, ra); 1189 1190 /* 1191 * Our implementation will work fine even if we have an unaligned 1192 * destination address, and because we update Xn every time around 1193 * the loop below and the return value from stepfn() may be less 1194 * than requested, we might find toaddr is unaligned. So we don't 1195 * have an IMPDEF check for alignment here. 1196 */ 1197 1198 if (unlikely(is_setg)) { 1199 check_setg_alignment(env, toaddr, setsize, memidx, ra); 1200 } else if (!mte_checks_needed(toaddr, mtedesc)) { 1201 mtedesc = 0; 1202 } 1203 1204 /* Do the actual memset: we leave the last partial page to SETE */ 1205 stagesetsize = setsize & TARGET_PAGE_MASK; 1206 while (stagesetsize > 0) { 1207 step = stepfn(env, toaddr, stagesetsize, data, memidx, &mtedesc, ra); 1208 toaddr += step; 1209 setsize -= step; 1210 stagesetsize -= step; 1211 env->xregs[rn] = -setsize; 1212 if (stagesetsize > 0 && unlikely(cpu_loop_exit_requested(cs))) { 1213 cpu_loop_exit_restore(cs, ra); 1214 } 1215 } 1216 } 1217 1218 void HELPER(setm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1219 { 1220 do_setm(env, syndrome, mtedesc, set_step, false, GETPC()); 1221 } 1222 1223 void HELPER(setgm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1224 { 1225 do_setm(env, syndrome, mtedesc, set_step_tags, true, GETPC()); 1226 } 1227 1228 static void do_sete(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, 1229 StepFn *stepfn, bool is_setg, uintptr_t ra) 1230 { 1231 /* Epilogue: do the last partial page */ 1232 int rd = mops_destreg(syndrome); 1233 int rs = mops_srcreg(syndrome); 1234 int rn = mops_sizereg(syndrome); 1235 uint8_t data = arm_reg_or_xzr(env, rs); 1236 uint64_t toaddr = env->xregs[rd] + env->xregs[rn]; 1237 uint64_t setsize = -env->xregs[rn]; 1238 uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); 1239 uint64_t step; 1240 1241 check_mops_enabled(env, ra); 1242 1243 /* 1244 * We're allowed to NOP out "no data to copy" before the consistency 1245 * checks; we choose to do so. 1246 */ 1247 if (setsize == 0) { 1248 return; 1249 } 1250 1251 check_mops_wrong_option(env, syndrome, ra); 1252 1253 /* 1254 * Our implementation has no address alignment requirements, but 1255 * we do want to enforce the "less than a page" size requirement, 1256 * so we don't need to have the "check for interrupts" here. 1257 */ 1258 if (setsize >= TARGET_PAGE_SIZE) { 1259 raise_exception_ra(env, EXCP_UDEF, syndrome, 1260 mops_mismatch_exception_target_el(env), ra); 1261 } 1262 1263 if (unlikely(is_setg)) { 1264 check_setg_alignment(env, toaddr, setsize, memidx, ra); 1265 } else if (!mte_checks_needed(toaddr, mtedesc)) { 1266 mtedesc = 0; 1267 } 1268 1269 /* Do the actual memset */ 1270 while (setsize > 0) { 1271 step = stepfn(env, toaddr, setsize, data, memidx, &mtedesc, ra); 1272 toaddr += step; 1273 setsize -= step; 1274 env->xregs[rn] = -setsize; 1275 } 1276 } 1277 1278 void HELPER(sete)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1279 { 1280 do_sete(env, syndrome, mtedesc, set_step, false, GETPC()); 1281 } 1282 1283 void HELPER(setge)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1284 { 1285 do_sete(env, syndrome, mtedesc, set_step_tags, true, GETPC()); 1286 } 1287 1288 /* 1289 * Perform part of a memory copy from the guest memory at fromaddr 1290 * and extending for copysize bytes, to the guest memory at 1291 * toaddr. Both addresses are dirty. 1292 * 1293 * Returns the number of bytes actually set, which might be less than 1294 * copysize; the caller should loop until the whole copy has been done. 1295 * The caller should ensure that the guest registers are correct 1296 * for the possibility that the first byte of the copy encounters 1297 * an exception or watchpoint. We guarantee not to take any faults 1298 * for bytes other than the first. 1299 */ 1300 static uint64_t copy_step(CPUARMState *env, uint64_t toaddr, uint64_t fromaddr, 1301 uint64_t copysize, int wmemidx, int rmemidx, 1302 uint32_t *wdesc, uint32_t *rdesc, uintptr_t ra) 1303 { 1304 void *rmem; 1305 void *wmem; 1306 1307 /* Don't cross a page boundary on either source or destination */ 1308 copysize = MIN(copysize, page_limit(toaddr)); 1309 copysize = MIN(copysize, page_limit(fromaddr)); 1310 /* 1311 * Handle MTE tag checks: either handle the tag mismatch for byte 0, 1312 * or else copy up to but not including the byte with the mismatch. 1313 */ 1314 if (*rdesc) { 1315 uint64_t mtesize = mte_mops_probe(env, fromaddr, copysize, *rdesc); 1316 if (mtesize == 0) { 1317 mte_check_fail(env, *rdesc, fromaddr, ra); 1318 *rdesc = 0; 1319 } else { 1320 copysize = MIN(copysize, mtesize); 1321 } 1322 } 1323 if (*wdesc) { 1324 uint64_t mtesize = mte_mops_probe(env, toaddr, copysize, *wdesc); 1325 if (mtesize == 0) { 1326 mte_check_fail(env, *wdesc, toaddr, ra); 1327 *wdesc = 0; 1328 } else { 1329 copysize = MIN(copysize, mtesize); 1330 } 1331 } 1332 1333 toaddr = useronly_clean_ptr(toaddr); 1334 fromaddr = useronly_clean_ptr(fromaddr); 1335 /* Trapless lookup of whether we can get a host memory pointer */ 1336 wmem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, wmemidx); 1337 rmem = tlb_vaddr_to_host(env, fromaddr, MMU_DATA_LOAD, rmemidx); 1338 1339 #ifndef CONFIG_USER_ONLY 1340 /* 1341 * If we don't have host memory for both source and dest then just 1342 * do a single byte copy. This will handle watchpoints, invalid pages, 1343 * etc correctly. For clean code pages, the next iteration will see 1344 * the page dirty and will use the fast path. 1345 */ 1346 if (unlikely(!rmem || !wmem)) { 1347 uint8_t byte; 1348 if (rmem) { 1349 byte = *(uint8_t *)rmem; 1350 } else { 1351 byte = cpu_ldub_mmuidx_ra(env, fromaddr, rmemidx, ra); 1352 } 1353 if (wmem) { 1354 *(uint8_t *)wmem = byte; 1355 } else { 1356 cpu_stb_mmuidx_ra(env, toaddr, byte, wmemidx, ra); 1357 } 1358 return 1; 1359 } 1360 #endif 1361 /* Easy case: just memmove the host memory */ 1362 set_helper_retaddr(ra); 1363 memmove(wmem, rmem, copysize); 1364 clear_helper_retaddr(); 1365 return copysize; 1366 } 1367 1368 /* 1369 * Do part of a backwards memory copy. Here toaddr and fromaddr point 1370 * to the *last* byte to be copied. 1371 */ 1372 static uint64_t copy_step_rev(CPUARMState *env, uint64_t toaddr, 1373 uint64_t fromaddr, 1374 uint64_t copysize, int wmemidx, int rmemidx, 1375 uint32_t *wdesc, uint32_t *rdesc, uintptr_t ra) 1376 { 1377 void *rmem; 1378 void *wmem; 1379 1380 /* Don't cross a page boundary on either source or destination */ 1381 copysize = MIN(copysize, page_limit_rev(toaddr)); 1382 copysize = MIN(copysize, page_limit_rev(fromaddr)); 1383 1384 /* 1385 * Handle MTE tag checks: either handle the tag mismatch for byte 0, 1386 * or else copy up to but not including the byte with the mismatch. 1387 */ 1388 if (*rdesc) { 1389 uint64_t mtesize = mte_mops_probe_rev(env, fromaddr, copysize, *rdesc); 1390 if (mtesize == 0) { 1391 mte_check_fail(env, *rdesc, fromaddr, ra); 1392 *rdesc = 0; 1393 } else { 1394 copysize = MIN(copysize, mtesize); 1395 } 1396 } 1397 if (*wdesc) { 1398 uint64_t mtesize = mte_mops_probe_rev(env, toaddr, copysize, *wdesc); 1399 if (mtesize == 0) { 1400 mte_check_fail(env, *wdesc, toaddr, ra); 1401 *wdesc = 0; 1402 } else { 1403 copysize = MIN(copysize, mtesize); 1404 } 1405 } 1406 1407 toaddr = useronly_clean_ptr(toaddr); 1408 fromaddr = useronly_clean_ptr(fromaddr); 1409 /* Trapless lookup of whether we can get a host memory pointer */ 1410 wmem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, wmemidx); 1411 rmem = tlb_vaddr_to_host(env, fromaddr, MMU_DATA_LOAD, rmemidx); 1412 1413 #ifndef CONFIG_USER_ONLY 1414 /* 1415 * If we don't have host memory for both source and dest then just 1416 * do a single byte copy. This will handle watchpoints, invalid pages, 1417 * etc correctly. For clean code pages, the next iteration will see 1418 * the page dirty and will use the fast path. 1419 */ 1420 if (unlikely(!rmem || !wmem)) { 1421 uint8_t byte; 1422 if (rmem) { 1423 byte = *(uint8_t *)rmem; 1424 } else { 1425 byte = cpu_ldub_mmuidx_ra(env, fromaddr, rmemidx, ra); 1426 } 1427 if (wmem) { 1428 *(uint8_t *)wmem = byte; 1429 } else { 1430 cpu_stb_mmuidx_ra(env, toaddr, byte, wmemidx, ra); 1431 } 1432 return 1; 1433 } 1434 #endif 1435 /* 1436 * Easy case: just memmove the host memory. Note that wmem and 1437 * rmem here point to the *last* byte to copy. 1438 */ 1439 set_helper_retaddr(ra); 1440 memmove(wmem - (copysize - 1), rmem - (copysize - 1), copysize); 1441 clear_helper_retaddr(); 1442 return copysize; 1443 } 1444 1445 /* 1446 * for the Memory Copy operation, our implementation chooses always 1447 * to use "option A", where we update Xd and Xs to the final addresses 1448 * in the CPYP insn, and then in CPYM and CPYE only need to update Xn. 1449 * 1450 * @env: CPU 1451 * @syndrome: syndrome value for mismatch exceptions 1452 * (also contains the register numbers we need to use) 1453 * @wdesc: MTE descriptor for the writes (destination) 1454 * @rdesc: MTE descriptor for the reads (source) 1455 * @move: true if this is CPY (memmove), false for CPYF (memcpy forwards) 1456 */ 1457 static void do_cpyp(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1458 uint32_t rdesc, uint32_t move, uintptr_t ra) 1459 { 1460 int rd = mops_destreg(syndrome); 1461 int rs = mops_srcreg(syndrome); 1462 int rn = mops_sizereg(syndrome); 1463 uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); 1464 uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); 1465 bool forwards = true; 1466 uint64_t toaddr = env->xregs[rd]; 1467 uint64_t fromaddr = env->xregs[rs]; 1468 uint64_t copysize = env->xregs[rn]; 1469 uint64_t stagecopysize, step; 1470 1471 check_mops_enabled(env, ra); 1472 1473 1474 if (move) { 1475 /* 1476 * Copy backwards if necessary. The direction for a non-overlapping 1477 * copy is IMPDEF; we choose forwards. 1478 */ 1479 if (copysize > 0x007FFFFFFFFFFFFFULL) { 1480 copysize = 0x007FFFFFFFFFFFFFULL; 1481 } 1482 uint64_t fs = extract64(fromaddr, 0, 56); 1483 uint64_t ts = extract64(toaddr, 0, 56); 1484 uint64_t fe = extract64(fromaddr + copysize, 0, 56); 1485 1486 if (fs < ts && fe > ts) { 1487 forwards = false; 1488 } 1489 } else { 1490 if (copysize > INT64_MAX) { 1491 copysize = INT64_MAX; 1492 } 1493 } 1494 1495 if (!mte_checks_needed(fromaddr, rdesc)) { 1496 rdesc = 0; 1497 } 1498 if (!mte_checks_needed(toaddr, wdesc)) { 1499 wdesc = 0; 1500 } 1501 1502 if (forwards) { 1503 stagecopysize = MIN(copysize, page_limit(toaddr)); 1504 stagecopysize = MIN(stagecopysize, page_limit(fromaddr)); 1505 while (stagecopysize) { 1506 env->xregs[rd] = toaddr; 1507 env->xregs[rs] = fromaddr; 1508 env->xregs[rn] = copysize; 1509 step = copy_step(env, toaddr, fromaddr, stagecopysize, 1510 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1511 toaddr += step; 1512 fromaddr += step; 1513 copysize -= step; 1514 stagecopysize -= step; 1515 } 1516 /* Insn completed, so update registers to the Option A format */ 1517 env->xregs[rd] = toaddr + copysize; 1518 env->xregs[rs] = fromaddr + copysize; 1519 env->xregs[rn] = -copysize; 1520 } else { 1521 /* 1522 * In a reverse copy the to and from addrs in Xs and Xd are the start 1523 * of the range, but it's more convenient for us to work with pointers 1524 * to the last byte being copied. 1525 */ 1526 toaddr += copysize - 1; 1527 fromaddr += copysize - 1; 1528 stagecopysize = MIN(copysize, page_limit_rev(toaddr)); 1529 stagecopysize = MIN(stagecopysize, page_limit_rev(fromaddr)); 1530 while (stagecopysize) { 1531 env->xregs[rn] = copysize; 1532 step = copy_step_rev(env, toaddr, fromaddr, stagecopysize, 1533 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1534 copysize -= step; 1535 stagecopysize -= step; 1536 toaddr -= step; 1537 fromaddr -= step; 1538 } 1539 /* 1540 * Insn completed, so update registers to the Option A format. 1541 * For a reverse copy this is no different to the CPYP input format. 1542 */ 1543 env->xregs[rn] = copysize; 1544 } 1545 1546 /* Set NZCV = 0000 to indicate we are an Option A implementation */ 1547 env->NF = 0; 1548 env->ZF = 1; /* our env->ZF encoding is inverted */ 1549 env->CF = 0; 1550 env->VF = 0; 1551 return; 1552 } 1553 1554 void HELPER(cpyp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1555 uint32_t rdesc) 1556 { 1557 do_cpyp(env, syndrome, wdesc, rdesc, true, GETPC()); 1558 } 1559 1560 void HELPER(cpyfp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1561 uint32_t rdesc) 1562 { 1563 do_cpyp(env, syndrome, wdesc, rdesc, false, GETPC()); 1564 } 1565 1566 static void do_cpym(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1567 uint32_t rdesc, uint32_t move, uintptr_t ra) 1568 { 1569 /* Main: we choose to copy until less than a page remaining */ 1570 CPUState *cs = env_cpu(env); 1571 int rd = mops_destreg(syndrome); 1572 int rs = mops_srcreg(syndrome); 1573 int rn = mops_sizereg(syndrome); 1574 uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); 1575 uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); 1576 bool forwards = true; 1577 uint64_t toaddr, fromaddr, copysize, step; 1578 1579 check_mops_enabled(env, ra); 1580 1581 /* We choose to NOP out "no data to copy" before consistency checks */ 1582 if (env->xregs[rn] == 0) { 1583 return; 1584 } 1585 1586 check_mops_wrong_option(env, syndrome, ra); 1587 1588 if (move) { 1589 forwards = (int64_t)env->xregs[rn] < 0; 1590 } 1591 1592 if (forwards) { 1593 toaddr = env->xregs[rd] + env->xregs[rn]; 1594 fromaddr = env->xregs[rs] + env->xregs[rn]; 1595 copysize = -env->xregs[rn]; 1596 } else { 1597 copysize = env->xregs[rn]; 1598 /* This toaddr and fromaddr point to the *last* byte to copy */ 1599 toaddr = env->xregs[rd] + copysize - 1; 1600 fromaddr = env->xregs[rs] + copysize - 1; 1601 } 1602 1603 if (!mte_checks_needed(fromaddr, rdesc)) { 1604 rdesc = 0; 1605 } 1606 if (!mte_checks_needed(toaddr, wdesc)) { 1607 wdesc = 0; 1608 } 1609 1610 /* Our implementation has no particular parameter requirements for CPYM */ 1611 1612 /* Do the actual memmove */ 1613 if (forwards) { 1614 while (copysize >= TARGET_PAGE_SIZE) { 1615 step = copy_step(env, toaddr, fromaddr, copysize, 1616 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1617 toaddr += step; 1618 fromaddr += step; 1619 copysize -= step; 1620 env->xregs[rn] = -copysize; 1621 if (copysize >= TARGET_PAGE_SIZE && 1622 unlikely(cpu_loop_exit_requested(cs))) { 1623 cpu_loop_exit_restore(cs, ra); 1624 } 1625 } 1626 } else { 1627 while (copysize >= TARGET_PAGE_SIZE) { 1628 step = copy_step_rev(env, toaddr, fromaddr, copysize, 1629 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1630 toaddr -= step; 1631 fromaddr -= step; 1632 copysize -= step; 1633 env->xregs[rn] = copysize; 1634 if (copysize >= TARGET_PAGE_SIZE && 1635 unlikely(cpu_loop_exit_requested(cs))) { 1636 cpu_loop_exit_restore(cs, ra); 1637 } 1638 } 1639 } 1640 } 1641 1642 void HELPER(cpym)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1643 uint32_t rdesc) 1644 { 1645 do_cpym(env, syndrome, wdesc, rdesc, true, GETPC()); 1646 } 1647 1648 void HELPER(cpyfm)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1649 uint32_t rdesc) 1650 { 1651 do_cpym(env, syndrome, wdesc, rdesc, false, GETPC()); 1652 } 1653 1654 static void do_cpye(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1655 uint32_t rdesc, uint32_t move, uintptr_t ra) 1656 { 1657 /* Epilogue: do the last partial page */ 1658 int rd = mops_destreg(syndrome); 1659 int rs = mops_srcreg(syndrome); 1660 int rn = mops_sizereg(syndrome); 1661 uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); 1662 uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); 1663 bool forwards = true; 1664 uint64_t toaddr, fromaddr, copysize, step; 1665 1666 check_mops_enabled(env, ra); 1667 1668 /* We choose to NOP out "no data to copy" before consistency checks */ 1669 if (env->xregs[rn] == 0) { 1670 return; 1671 } 1672 1673 check_mops_wrong_option(env, syndrome, ra); 1674 1675 if (move) { 1676 forwards = (int64_t)env->xregs[rn] < 0; 1677 } 1678 1679 if (forwards) { 1680 toaddr = env->xregs[rd] + env->xregs[rn]; 1681 fromaddr = env->xregs[rs] + env->xregs[rn]; 1682 copysize = -env->xregs[rn]; 1683 } else { 1684 copysize = env->xregs[rn]; 1685 /* This toaddr and fromaddr point to the *last* byte to copy */ 1686 toaddr = env->xregs[rd] + copysize - 1; 1687 fromaddr = env->xregs[rs] + copysize - 1; 1688 } 1689 1690 if (!mte_checks_needed(fromaddr, rdesc)) { 1691 rdesc = 0; 1692 } 1693 if (!mte_checks_needed(toaddr, wdesc)) { 1694 wdesc = 0; 1695 } 1696 1697 /* Check the size; we don't want to have do a check-for-interrupts */ 1698 if (copysize >= TARGET_PAGE_SIZE) { 1699 raise_exception_ra(env, EXCP_UDEF, syndrome, 1700 mops_mismatch_exception_target_el(env), ra); 1701 } 1702 1703 /* Do the actual memmove */ 1704 if (forwards) { 1705 while (copysize > 0) { 1706 step = copy_step(env, toaddr, fromaddr, copysize, 1707 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1708 toaddr += step; 1709 fromaddr += step; 1710 copysize -= step; 1711 env->xregs[rn] = -copysize; 1712 } 1713 } else { 1714 while (copysize > 0) { 1715 step = copy_step_rev(env, toaddr, fromaddr, copysize, 1716 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1717 toaddr -= step; 1718 fromaddr -= step; 1719 copysize -= step; 1720 env->xregs[rn] = copysize; 1721 } 1722 } 1723 } 1724 1725 void HELPER(cpye)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1726 uint32_t rdesc) 1727 { 1728 do_cpye(env, syndrome, wdesc, rdesc, true, GETPC()); 1729 } 1730 1731 void HELPER(cpyfe)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1732 uint32_t rdesc) 1733 { 1734 do_cpye(env, syndrome, wdesc, rdesc, false, GETPC()); 1735 } 1736 1737 static bool is_guarded_page(CPUARMState *env, target_ulong addr, uintptr_t ra) 1738 { 1739 #ifdef CONFIG_USER_ONLY 1740 return page_get_flags(addr) & PAGE_BTI; 1741 #else 1742 CPUTLBEntryFull *full; 1743 void *host; 1744 int mmu_idx = cpu_mmu_index(env_cpu(env), true); 1745 int flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx, 1746 false, &host, &full, ra); 1747 1748 assert(!(flags & TLB_INVALID_MASK)); 1749 return full->extra.arm.guarded; 1750 #endif 1751 } 1752 1753 void HELPER(guarded_page_check)(CPUARMState *env) 1754 { 1755 /* 1756 * We have already verified that bti is enabled, and that the 1757 * instruction at PC is not ok for BTYPE. This is always at 1758 * the beginning of a block, so PC is always up-to-date and 1759 * no unwind is required. 1760 */ 1761 if (is_guarded_page(env, env->pc, 0)) { 1762 raise_exception(env, EXCP_UDEF, syn_btitrap(env->btype), 1763 exception_target_el(env)); 1764 } 1765 } 1766 1767 void HELPER(guarded_page_br)(CPUARMState *env, target_ulong pc) 1768 { 1769 /* 1770 * We have already checked for branch via x16 and x17. 1771 * What remains for choosing BTYPE is checking for a guarded page. 1772 */ 1773 env->btype = is_guarded_page(env, pc, GETPC()) ? 3 : 1; 1774 } 1775