1 /* 2 * AArch64 specific helpers 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/units.h" 22 #include "cpu.h" 23 #include "gdbstub/helpers.h" 24 #include "exec/helper-proto.h" 25 #include "qemu/host-utils.h" 26 #include "qemu/log.h" 27 #include "qemu/main-loop.h" 28 #include "qemu/bitops.h" 29 #include "internals.h" 30 #include "qemu/crc32c.h" 31 #include "exec/cpu-common.h" 32 #include "exec/exec-all.h" 33 #include "accel/tcg/cpu-ldst.h" 34 #include "exec/target_page.h" 35 #include "exec/tlb-flags.h" 36 #include "qemu/int128.h" 37 #include "qemu/atomic128.h" 38 #include "fpu/softfloat.h" 39 #include <zlib.h> /* for crc32 */ 40 #ifdef CONFIG_USER_ONLY 41 #include "user/page-protection.h" 42 #endif 43 #include "vec_internal.h" 44 45 /* C2.4.7 Multiply and divide */ 46 /* special cases for 0 and LLONG_MIN are mandated by the standard */ 47 uint64_t HELPER(udiv64)(uint64_t num, uint64_t den) 48 { 49 if (den == 0) { 50 return 0; 51 } 52 return num / den; 53 } 54 55 int64_t HELPER(sdiv64)(int64_t num, int64_t den) 56 { 57 if (den == 0) { 58 return 0; 59 } 60 if (num == LLONG_MIN && den == -1) { 61 return LLONG_MIN; 62 } 63 return num / den; 64 } 65 66 uint64_t HELPER(rbit64)(uint64_t x) 67 { 68 return revbit64(x); 69 } 70 71 void HELPER(msr_i_spsel)(CPUARMState *env, uint32_t imm) 72 { 73 update_spsel(env, imm); 74 } 75 76 void HELPER(msr_set_allint_el1)(CPUARMState *env) 77 { 78 /* ALLINT update to PSTATE. */ 79 if (arm_hcrx_el2_eff(env) & HCRX_TALLINT) { 80 raise_exception_ra(env, EXCP_UDEF, 81 syn_aa64_sysregtrap(0, 1, 0, 4, 1, 0x1f, 0), 2, 82 GETPC()); 83 } 84 85 env->pstate |= PSTATE_ALLINT; 86 } 87 88 static void daif_check(CPUARMState *env, uint32_t op, 89 uint32_t imm, uintptr_t ra) 90 { 91 /* DAIF update to PSTATE. This is OK from EL0 only if UMA is set. */ 92 if (arm_current_el(env) == 0 && !(arm_sctlr(env, 0) & SCTLR_UMA)) { 93 raise_exception_ra(env, EXCP_UDEF, 94 syn_aa64_sysregtrap(0, extract32(op, 0, 3), 95 extract32(op, 3, 3), 4, 96 imm, 0x1f, 0), 97 exception_target_el(env), ra); 98 } 99 } 100 101 void HELPER(msr_i_daifset)(CPUARMState *env, uint32_t imm) 102 { 103 daif_check(env, 0x1e, imm, GETPC()); 104 env->daif |= (imm << 6) & PSTATE_DAIF; 105 arm_rebuild_hflags(env); 106 } 107 108 void HELPER(msr_i_daifclear)(CPUARMState *env, uint32_t imm) 109 { 110 daif_check(env, 0x1f, imm, GETPC()); 111 env->daif &= ~((imm << 6) & PSTATE_DAIF); 112 arm_rebuild_hflags(env); 113 } 114 115 /* Convert a softfloat float_relation_ (as returned by 116 * the float*_compare functions) to the correct ARM 117 * NZCV flag state. 118 */ 119 static inline uint32_t float_rel_to_flags(int res) 120 { 121 uint64_t flags; 122 switch (res) { 123 case float_relation_equal: 124 flags = PSTATE_Z | PSTATE_C; 125 break; 126 case float_relation_less: 127 flags = PSTATE_N; 128 break; 129 case float_relation_greater: 130 flags = PSTATE_C; 131 break; 132 case float_relation_unordered: 133 default: 134 flags = PSTATE_C | PSTATE_V; 135 break; 136 } 137 return flags; 138 } 139 140 uint64_t HELPER(vfp_cmph_a64)(uint32_t x, uint32_t y, float_status *fp_status) 141 { 142 return float_rel_to_flags(float16_compare_quiet(x, y, fp_status)); 143 } 144 145 uint64_t HELPER(vfp_cmpeh_a64)(uint32_t x, uint32_t y, float_status *fp_status) 146 { 147 return float_rel_to_flags(float16_compare(x, y, fp_status)); 148 } 149 150 uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, float_status *fp_status) 151 { 152 return float_rel_to_flags(float32_compare_quiet(x, y, fp_status)); 153 } 154 155 uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, float_status *fp_status) 156 { 157 return float_rel_to_flags(float32_compare(x, y, fp_status)); 158 } 159 160 uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, float_status *fp_status) 161 { 162 return float_rel_to_flags(float64_compare_quiet(x, y, fp_status)); 163 } 164 165 uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, float_status *fp_status) 166 { 167 return float_rel_to_flags(float64_compare(x, y, fp_status)); 168 } 169 170 float32 HELPER(vfp_mulxs)(float32 a, float32 b, float_status *fpst) 171 { 172 a = float32_squash_input_denormal(a, fpst); 173 b = float32_squash_input_denormal(b, fpst); 174 175 if ((float32_is_zero(a) && float32_is_infinity(b)) || 176 (float32_is_infinity(a) && float32_is_zero(b))) { 177 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ 178 return make_float32((1U << 30) | 179 ((float32_val(a) ^ float32_val(b)) & (1U << 31))); 180 } 181 return float32_mul(a, b, fpst); 182 } 183 184 float64 HELPER(vfp_mulxd)(float64 a, float64 b, float_status *fpst) 185 { 186 a = float64_squash_input_denormal(a, fpst); 187 b = float64_squash_input_denormal(b, fpst); 188 189 if ((float64_is_zero(a) && float64_is_infinity(b)) || 190 (float64_is_infinity(a) && float64_is_zero(b))) { 191 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ 192 return make_float64((1ULL << 62) | 193 ((float64_val(a) ^ float64_val(b)) & (1ULL << 63))); 194 } 195 return float64_mul(a, b, fpst); 196 } 197 198 /* 64bit/double versions of the neon float compare functions */ 199 uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, float_status *fpst) 200 { 201 return -float64_eq_quiet(a, b, fpst); 202 } 203 204 uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, float_status *fpst) 205 { 206 return -float64_le(b, a, fpst); 207 } 208 209 uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, float_status *fpst) 210 { 211 return -float64_lt(b, a, fpst); 212 } 213 214 /* 215 * Reciprocal step and sqrt step. Note that unlike the A32/T32 216 * versions, these do a fully fused multiply-add or 217 * multiply-add-and-halve. 218 * The FPCR.AH == 1 versions need to avoid flipping the sign of NaN. 219 */ 220 #define DO_RECPS(NAME, CTYPE, FLOATTYPE, CHSFN) \ 221 CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ 222 { \ 223 a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ 224 b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ 225 a = FLOATTYPE ## _ ## CHSFN(a); \ 226 if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \ 227 (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \ 228 return FLOATTYPE ## _two; \ 229 } \ 230 return FLOATTYPE ## _muladd(a, b, FLOATTYPE ## _two, 0, fpst); \ 231 } 232 233 DO_RECPS(recpsf_f16, uint32_t, float16, chs) 234 DO_RECPS(recpsf_f32, float32, float32, chs) 235 DO_RECPS(recpsf_f64, float64, float64, chs) 236 DO_RECPS(recpsf_ah_f16, uint32_t, float16, ah_chs) 237 DO_RECPS(recpsf_ah_f32, float32, float32, ah_chs) 238 DO_RECPS(recpsf_ah_f64, float64, float64, ah_chs) 239 240 #define DO_RSQRTSF(NAME, CTYPE, FLOATTYPE, CHSFN) \ 241 CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ 242 { \ 243 a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ 244 b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ 245 a = FLOATTYPE ## _ ## CHSFN(a); \ 246 if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \ 247 (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \ 248 return FLOATTYPE ## _one_point_five; \ 249 } \ 250 return FLOATTYPE ## _muladd_scalbn(a, b, FLOATTYPE ## _three, \ 251 -1, 0, fpst); \ 252 } \ 253 254 DO_RSQRTSF(rsqrtsf_f16, uint32_t, float16, chs) 255 DO_RSQRTSF(rsqrtsf_f32, float32, float32, chs) 256 DO_RSQRTSF(rsqrtsf_f64, float64, float64, chs) 257 DO_RSQRTSF(rsqrtsf_ah_f16, uint32_t, float16, ah_chs) 258 DO_RSQRTSF(rsqrtsf_ah_f32, float32, float32, ah_chs) 259 DO_RSQRTSF(rsqrtsf_ah_f64, float64, float64, ah_chs) 260 261 /* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */ 262 uint32_t HELPER(frecpx_f16)(uint32_t a, float_status *fpst) 263 { 264 uint16_t val16, sbit; 265 int16_t exp; 266 267 if (float16_is_any_nan(a)) { 268 float16 nan = a; 269 if (float16_is_signaling_nan(a, fpst)) { 270 float_raise(float_flag_invalid, fpst); 271 if (!fpst->default_nan_mode) { 272 nan = float16_silence_nan(a, fpst); 273 } 274 } 275 if (fpst->default_nan_mode) { 276 nan = float16_default_nan(fpst); 277 } 278 return nan; 279 } 280 281 a = float16_squash_input_denormal(a, fpst); 282 283 val16 = float16_val(a); 284 sbit = 0x8000 & val16; 285 exp = extract32(val16, 10, 5); 286 287 if (exp == 0) { 288 return make_float16(deposit32(sbit, 10, 5, 0x1e)); 289 } else { 290 return make_float16(deposit32(sbit, 10, 5, ~exp)); 291 } 292 } 293 294 float32 HELPER(frecpx_f32)(float32 a, float_status *fpst) 295 { 296 uint32_t val32, sbit; 297 int32_t exp; 298 299 if (float32_is_any_nan(a)) { 300 float32 nan = a; 301 if (float32_is_signaling_nan(a, fpst)) { 302 float_raise(float_flag_invalid, fpst); 303 if (!fpst->default_nan_mode) { 304 nan = float32_silence_nan(a, fpst); 305 } 306 } 307 if (fpst->default_nan_mode) { 308 nan = float32_default_nan(fpst); 309 } 310 return nan; 311 } 312 313 a = float32_squash_input_denormal(a, fpst); 314 315 val32 = float32_val(a); 316 sbit = 0x80000000ULL & val32; 317 exp = extract32(val32, 23, 8); 318 319 if (exp == 0) { 320 return make_float32(sbit | (0xfe << 23)); 321 } else { 322 return make_float32(sbit | (~exp & 0xff) << 23); 323 } 324 } 325 326 float64 HELPER(frecpx_f64)(float64 a, float_status *fpst) 327 { 328 uint64_t val64, sbit; 329 int64_t exp; 330 331 if (float64_is_any_nan(a)) { 332 float64 nan = a; 333 if (float64_is_signaling_nan(a, fpst)) { 334 float_raise(float_flag_invalid, fpst); 335 if (!fpst->default_nan_mode) { 336 nan = float64_silence_nan(a, fpst); 337 } 338 } 339 if (fpst->default_nan_mode) { 340 nan = float64_default_nan(fpst); 341 } 342 return nan; 343 } 344 345 a = float64_squash_input_denormal(a, fpst); 346 347 val64 = float64_val(a); 348 sbit = 0x8000000000000000ULL & val64; 349 exp = extract64(float64_val(a), 52, 11); 350 351 if (exp == 0) { 352 return make_float64(sbit | (0x7feULL << 52)); 353 } else { 354 return make_float64(sbit | (~exp & 0x7ffULL) << 52); 355 } 356 } 357 358 float32 HELPER(fcvtx_f64_to_f32)(float64 a, float_status *fpst) 359 { 360 float32 r; 361 int old = get_float_rounding_mode(fpst); 362 363 set_float_rounding_mode(float_round_to_odd, fpst); 364 r = float64_to_float32(a, fpst); 365 set_float_rounding_mode(old, fpst); 366 return r; 367 } 368 369 /* 370 * AH=1 min/max have some odd special cases: 371 * comparing two zeroes (regardless of sign), (NaN, anything), 372 * or (anything, NaN) should return the second argument (possibly 373 * squashed to zero). 374 * Also, denormal outputs are not squashed to zero regardless of FZ or FZ16. 375 */ 376 #define AH_MINMAX_HELPER(NAME, CTYPE, FLOATTYPE, MINMAX) \ 377 CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ 378 { \ 379 bool save; \ 380 CTYPE r; \ 381 a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ 382 b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ 383 if (FLOATTYPE ## _is_zero(a) && FLOATTYPE ## _is_zero(b)) { \ 384 return b; \ 385 } \ 386 if (FLOATTYPE ## _is_any_nan(a) || \ 387 FLOATTYPE ## _is_any_nan(b)) { \ 388 float_raise(float_flag_invalid, fpst); \ 389 return b; \ 390 } \ 391 save = get_flush_to_zero(fpst); \ 392 set_flush_to_zero(false, fpst); \ 393 r = FLOATTYPE ## _ ## MINMAX(a, b, fpst); \ 394 set_flush_to_zero(save, fpst); \ 395 return r; \ 396 } 397 398 AH_MINMAX_HELPER(vfp_ah_minh, dh_ctype_f16, float16, min) 399 AH_MINMAX_HELPER(vfp_ah_mins, float32, float32, min) 400 AH_MINMAX_HELPER(vfp_ah_mind, float64, float64, min) 401 AH_MINMAX_HELPER(vfp_ah_maxh, dh_ctype_f16, float16, max) 402 AH_MINMAX_HELPER(vfp_ah_maxs, float32, float32, max) 403 AH_MINMAX_HELPER(vfp_ah_maxd, float64, float64, max) 404 405 /* 64-bit versions of the CRC helpers. Note that although the operation 406 * (and the prototypes of crc32c() and crc32() mean that only the bottom 407 * 32 bits of the accumulator and result are used, we pass and return 408 * uint64_t for convenience of the generated code. Unlike the 32-bit 409 * instruction set versions, val may genuinely have 64 bits of data in it. 410 * The upper bytes of val (above the number specified by 'bytes') must have 411 * been zeroed out by the caller. 412 */ 413 uint64_t HELPER(crc32_64)(uint64_t acc, uint64_t val, uint32_t bytes) 414 { 415 uint8_t buf[8]; 416 417 stq_le_p(buf, val); 418 419 /* zlib crc32 converts the accumulator and output to one's complement. */ 420 return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff; 421 } 422 423 uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) 424 { 425 uint8_t buf[8]; 426 427 stq_le_p(buf, val); 428 429 /* Linux crc32c converts the output to one's complement. */ 430 return crc32c(acc, buf, bytes) ^ 0xffffffff; 431 } 432 433 /* 434 * AdvSIMD half-precision 435 */ 436 437 #define ADVSIMD_HELPER(name, suffix) HELPER(glue(glue(advsimd_, name), suffix)) 438 439 #define ADVSIMD_HALFOP(name) \ 440 uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, float_status *fpst) \ 441 { \ 442 return float16_ ## name(a, b, fpst); \ 443 } 444 445 #define ADVSIMD_TWOHALFOP(name) \ 446 uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, \ 447 float_status *fpst) \ 448 { \ 449 float16 a1, a2, b1, b2; \ 450 uint32_t r1, r2; \ 451 a1 = extract32(two_a, 0, 16); \ 452 a2 = extract32(two_a, 16, 16); \ 453 b1 = extract32(two_b, 0, 16); \ 454 b2 = extract32(two_b, 16, 16); \ 455 r1 = float16_ ## name(a1, b1, fpst); \ 456 r2 = float16_ ## name(a2, b2, fpst); \ 457 return deposit32(r1, 16, 16, r2); \ 458 } 459 460 ADVSIMD_TWOHALFOP(add) 461 ADVSIMD_TWOHALFOP(sub) 462 ADVSIMD_TWOHALFOP(mul) 463 ADVSIMD_TWOHALFOP(div) 464 ADVSIMD_TWOHALFOP(min) 465 ADVSIMD_TWOHALFOP(max) 466 ADVSIMD_TWOHALFOP(minnum) 467 ADVSIMD_TWOHALFOP(maxnum) 468 469 /* Data processing - scalar floating-point and advanced SIMD */ 470 static float16 float16_mulx(float16 a, float16 b, float_status *fpst) 471 { 472 a = float16_squash_input_denormal(a, fpst); 473 b = float16_squash_input_denormal(b, fpst); 474 475 if ((float16_is_zero(a) && float16_is_infinity(b)) || 476 (float16_is_infinity(a) && float16_is_zero(b))) { 477 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ 478 return make_float16((1U << 14) | 479 ((float16_val(a) ^ float16_val(b)) & (1U << 15))); 480 } 481 return float16_mul(a, b, fpst); 482 } 483 484 ADVSIMD_HALFOP(mulx) 485 ADVSIMD_TWOHALFOP(mulx) 486 487 /* fused multiply-accumulate */ 488 uint32_t HELPER(advsimd_muladdh)(uint32_t a, uint32_t b, uint32_t c, 489 float_status *fpst) 490 { 491 return float16_muladd(a, b, c, 0, fpst); 492 } 493 494 uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b, 495 uint32_t two_c, float_status *fpst) 496 { 497 float16 a1, a2, b1, b2, c1, c2; 498 uint32_t r1, r2; 499 a1 = extract32(two_a, 0, 16); 500 a2 = extract32(two_a, 16, 16); 501 b1 = extract32(two_b, 0, 16); 502 b2 = extract32(two_b, 16, 16); 503 c1 = extract32(two_c, 0, 16); 504 c2 = extract32(two_c, 16, 16); 505 r1 = float16_muladd(a1, b1, c1, 0, fpst); 506 r2 = float16_muladd(a2, b2, c2, 0, fpst); 507 return deposit32(r1, 16, 16, r2); 508 } 509 510 /* 511 * Floating point comparisons produce an integer result. Softfloat 512 * routines return float_relation types which we convert to the 0/-1 513 * Neon requires. 514 */ 515 516 #define ADVSIMD_CMPRES(test) (test) ? 0xffff : 0 517 518 uint32_t HELPER(advsimd_ceq_f16)(uint32_t a, uint32_t b, float_status *fpst) 519 { 520 int compare = float16_compare_quiet(a, b, fpst); 521 return ADVSIMD_CMPRES(compare == float_relation_equal); 522 } 523 524 uint32_t HELPER(advsimd_cge_f16)(uint32_t a, uint32_t b, float_status *fpst) 525 { 526 int compare = float16_compare(a, b, fpst); 527 return ADVSIMD_CMPRES(compare == float_relation_greater || 528 compare == float_relation_equal); 529 } 530 531 uint32_t HELPER(advsimd_cgt_f16)(uint32_t a, uint32_t b, float_status *fpst) 532 { 533 int compare = float16_compare(a, b, fpst); 534 return ADVSIMD_CMPRES(compare == float_relation_greater); 535 } 536 537 uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, float_status *fpst) 538 { 539 float16 f0 = float16_abs(a); 540 float16 f1 = float16_abs(b); 541 int compare = float16_compare(f0, f1, fpst); 542 return ADVSIMD_CMPRES(compare == float_relation_greater || 543 compare == float_relation_equal); 544 } 545 546 uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, float_status *fpst) 547 { 548 float16 f0 = float16_abs(a); 549 float16 f1 = float16_abs(b); 550 int compare = float16_compare(f0, f1, fpst); 551 return ADVSIMD_CMPRES(compare == float_relation_greater); 552 } 553 554 /* round to integral */ 555 uint32_t HELPER(advsimd_rinth_exact)(uint32_t x, float_status *fp_status) 556 { 557 return float16_round_to_int(x, fp_status); 558 } 559 560 uint32_t HELPER(advsimd_rinth)(uint32_t x, float_status *fp_status) 561 { 562 int old_flags = get_float_exception_flags(fp_status), new_flags; 563 float16 ret; 564 565 ret = float16_round_to_int(x, fp_status); 566 567 /* Suppress any inexact exceptions the conversion produced */ 568 if (!(old_flags & float_flag_inexact)) { 569 new_flags = get_float_exception_flags(fp_status); 570 set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status); 571 } 572 573 return ret; 574 } 575 576 static int el_from_spsr(uint32_t spsr) 577 { 578 /* Return the exception level that this SPSR is requesting a return to, 579 * or -1 if it is invalid (an illegal return) 580 */ 581 if (spsr & PSTATE_nRW) { 582 switch (spsr & CPSR_M) { 583 case ARM_CPU_MODE_USR: 584 return 0; 585 case ARM_CPU_MODE_HYP: 586 return 2; 587 case ARM_CPU_MODE_FIQ: 588 case ARM_CPU_MODE_IRQ: 589 case ARM_CPU_MODE_SVC: 590 case ARM_CPU_MODE_ABT: 591 case ARM_CPU_MODE_UND: 592 case ARM_CPU_MODE_SYS: 593 return 1; 594 case ARM_CPU_MODE_MON: 595 /* Returning to Mon from AArch64 is never possible, 596 * so this is an illegal return. 597 */ 598 default: 599 return -1; 600 } 601 } else { 602 if (extract32(spsr, 1, 1)) { 603 /* Return with reserved M[1] bit set */ 604 return -1; 605 } 606 if (extract32(spsr, 0, 4) == 1) { 607 /* return to EL0 with M[0] bit set */ 608 return -1; 609 } 610 return extract32(spsr, 2, 2); 611 } 612 } 613 614 static void cpsr_write_from_spsr_elx(CPUARMState *env, 615 uint32_t val) 616 { 617 uint32_t mask; 618 619 /* Save SPSR_ELx.SS into PSTATE. */ 620 env->pstate = (env->pstate & ~PSTATE_SS) | (val & PSTATE_SS); 621 val &= ~PSTATE_SS; 622 623 /* Move DIT to the correct location for CPSR */ 624 if (val & PSTATE_DIT) { 625 val &= ~PSTATE_DIT; 626 val |= CPSR_DIT; 627 } 628 629 mask = aarch32_cpsr_valid_mask(env->features, \ 630 &env_archcpu(env)->isar); 631 cpsr_write(env, val, mask, CPSRWriteRaw); 632 } 633 634 void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) 635 { 636 ARMCPU *cpu = env_archcpu(env); 637 int cur_el = arm_current_el(env); 638 unsigned int spsr_idx = aarch64_banked_spsr_index(cur_el); 639 uint32_t spsr = env->banked_spsr[spsr_idx]; 640 int new_el; 641 bool return_to_aa64 = (spsr & PSTATE_nRW) == 0; 642 643 aarch64_save_sp(env, cur_el); 644 645 arm_clear_exclusive(env); 646 647 /* We must squash the PSTATE.SS bit to zero unless both of the 648 * following hold: 649 * 1. debug exceptions are currently disabled 650 * 2. singlestep will be active in the EL we return to 651 * We check 1 here and 2 after we've done the pstate/cpsr write() to 652 * transition to the EL we're going to. 653 */ 654 if (arm_generate_debug_exceptions(env)) { 655 spsr &= ~PSTATE_SS; 656 } 657 658 /* 659 * FEAT_RME forbids return from EL3 with an invalid security state. 660 * We don't need an explicit check for FEAT_RME here because we enforce 661 * in scr_write() that you can't set the NSE bit without it. 662 */ 663 if (cur_el == 3 && (env->cp15.scr_el3 & (SCR_NS | SCR_NSE)) == SCR_NSE) { 664 goto illegal_return; 665 } 666 667 new_el = el_from_spsr(spsr); 668 if (new_el == -1) { 669 goto illegal_return; 670 } 671 if (new_el > cur_el || (new_el == 2 && !arm_is_el2_enabled(env))) { 672 /* Disallow return to an EL which is unimplemented or higher 673 * than the current one. 674 */ 675 goto illegal_return; 676 } 677 678 if (new_el != 0 && arm_el_is_aa64(env, new_el) != return_to_aa64) { 679 /* Return to an EL which is configured for a different register width */ 680 goto illegal_return; 681 } 682 683 if (!return_to_aa64 && !cpu_isar_feature(aa64_aa32, cpu)) { 684 /* Return to AArch32 when CPU is AArch64-only */ 685 goto illegal_return; 686 } 687 688 if (new_el == 1 && (arm_hcr_el2_eff(env) & HCR_TGE)) { 689 goto illegal_return; 690 } 691 692 bql_lock(); 693 arm_call_pre_el_change_hook(cpu); 694 bql_unlock(); 695 696 if (!return_to_aa64) { 697 env->aarch64 = false; 698 /* We do a raw CPSR write because aarch64_sync_64_to_32() 699 * will sort the register banks out for us, and we've already 700 * caught all the bad-mode cases in el_from_spsr(). 701 */ 702 cpsr_write_from_spsr_elx(env, spsr); 703 if (!arm_singlestep_active(env)) { 704 env->pstate &= ~PSTATE_SS; 705 } 706 aarch64_sync_64_to_32(env); 707 708 if (spsr & CPSR_T) { 709 env->regs[15] = new_pc & ~0x1; 710 } else { 711 env->regs[15] = new_pc & ~0x3; 712 } 713 helper_rebuild_hflags_a32(env, new_el); 714 qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to " 715 "AArch32 EL%d PC 0x%" PRIx32 "\n", 716 cur_el, new_el, env->regs[15]); 717 } else { 718 int tbii; 719 720 env->aarch64 = true; 721 spsr &= aarch64_pstate_valid_mask(&cpu->isar); 722 pstate_write(env, spsr); 723 if (!arm_singlestep_active(env)) { 724 env->pstate &= ~PSTATE_SS; 725 } 726 aarch64_restore_sp(env, new_el); 727 helper_rebuild_hflags_a64(env, new_el); 728 729 /* 730 * Apply TBI to the exception return address. We had to delay this 731 * until after we selected the new EL, so that we could select the 732 * correct TBI+TBID bits. This is made easier by waiting until after 733 * the hflags rebuild, since we can pull the composite TBII field 734 * from there. 735 */ 736 tbii = EX_TBFLAG_A64(env->hflags, TBII); 737 if ((tbii >> extract64(new_pc, 55, 1)) & 1) { 738 /* TBI is enabled. */ 739 int core_mmu_idx = arm_env_mmu_index(env); 740 if (regime_has_2_ranges(core_to_aa64_mmu_idx(core_mmu_idx))) { 741 new_pc = sextract64(new_pc, 0, 56); 742 } else { 743 new_pc = extract64(new_pc, 0, 56); 744 } 745 } 746 env->pc = new_pc; 747 748 qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to " 749 "AArch64 EL%d PC 0x%" PRIx64 "\n", 750 cur_el, new_el, env->pc); 751 } 752 753 /* 754 * Note that cur_el can never be 0. If new_el is 0, then 755 * el0_a64 is return_to_aa64, else el0_a64 is ignored. 756 */ 757 aarch64_sve_change_el(env, cur_el, new_el, return_to_aa64); 758 759 bql_lock(); 760 arm_call_el_change_hook(cpu); 761 bql_unlock(); 762 763 return; 764 765 illegal_return: 766 /* Illegal return events of various kinds have architecturally 767 * mandated behaviour: 768 * restore NZCV and DAIF from SPSR_ELx 769 * set PSTATE.IL 770 * restore PC from ELR_ELx 771 * no change to exception level, execution state or stack pointer 772 */ 773 env->pstate |= PSTATE_IL; 774 env->pc = new_pc; 775 spsr &= PSTATE_NZCV | PSTATE_DAIF | PSTATE_ALLINT; 776 spsr |= pstate_read(env) & ~(PSTATE_NZCV | PSTATE_DAIF | PSTATE_ALLINT); 777 pstate_write(env, spsr); 778 if (!arm_singlestep_active(env)) { 779 env->pstate &= ~PSTATE_SS; 780 } 781 helper_rebuild_hflags_a64(env, cur_el); 782 qemu_log_mask(LOG_GUEST_ERROR, "Illegal exception return at EL%d: " 783 "resuming execution at 0x%" PRIx64 "\n", cur_el, env->pc); 784 } 785 786 void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) 787 { 788 uintptr_t ra = GETPC(); 789 790 /* 791 * Implement DC ZVA, which zeroes a fixed-length block of memory. 792 * Note that we do not implement the (architecturally mandated) 793 * alignment fault for attempts to use this on Device memory 794 * (which matches the usual QEMU behaviour of not implementing either 795 * alignment faults or any memory attribute handling). 796 */ 797 int blocklen = 4 << env_archcpu(env)->dcz_blocksize; 798 uint64_t vaddr = vaddr_in & ~(blocklen - 1); 799 int mmu_idx = arm_env_mmu_index(env); 800 void *mem; 801 802 /* 803 * Trapless lookup. In addition to actual invalid page, may 804 * return NULL for I/O, watchpoints, clean pages, etc. 805 */ 806 mem = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx); 807 808 #ifndef CONFIG_USER_ONLY 809 if (unlikely(!mem)) { 810 /* 811 * Trap if accessing an invalid page. DC_ZVA requires that we supply 812 * the original pointer for an invalid page. But watchpoints require 813 * that we probe the actual space. So do both. 814 */ 815 (void) probe_write(env, vaddr_in, 1, mmu_idx, ra); 816 mem = probe_write(env, vaddr, blocklen, mmu_idx, ra); 817 818 if (unlikely(!mem)) { 819 /* 820 * The only remaining reason for mem == NULL is I/O. 821 * Just do a series of byte writes as the architecture demands. 822 */ 823 for (int i = 0; i < blocklen; i++) { 824 cpu_stb_mmuidx_ra(env, vaddr + i, 0, mmu_idx, ra); 825 } 826 return; 827 } 828 } 829 #endif 830 831 set_helper_retaddr(ra); 832 memset(mem, 0, blocklen); 833 clear_helper_retaddr(); 834 } 835 836 void HELPER(unaligned_access)(CPUARMState *env, uint64_t addr, 837 uint32_t access_type, uint32_t mmu_idx) 838 { 839 arm_cpu_do_unaligned_access(env_cpu(env), addr, access_type, 840 mmu_idx, GETPC()); 841 } 842 843 /* Memory operations (memset, memmove, memcpy) */ 844 845 /* 846 * Return true if the CPY* and SET* insns can execute; compare 847 * pseudocode CheckMOPSEnabled(), though we refactor it a little. 848 */ 849 static bool mops_enabled(CPUARMState *env) 850 { 851 int el = arm_current_el(env); 852 853 if (el < 2 && 854 (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE) && 855 !(arm_hcrx_el2_eff(env) & HCRX_MSCEN)) { 856 return false; 857 } 858 859 if (el == 0) { 860 if (!el_is_in_host(env, 0)) { 861 return env->cp15.sctlr_el[1] & SCTLR_MSCEN; 862 } else { 863 return env->cp15.sctlr_el[2] & SCTLR_MSCEN; 864 } 865 } 866 return true; 867 } 868 869 static void check_mops_enabled(CPUARMState *env, uintptr_t ra) 870 { 871 if (!mops_enabled(env)) { 872 raise_exception_ra(env, EXCP_UDEF, syn_uncategorized(), 873 exception_target_el(env), ra); 874 } 875 } 876 877 /* 878 * Return the target exception level for an exception due 879 * to mismatched arguments in a FEAT_MOPS copy or set. 880 * Compare pseudocode MismatchedCpySetTargetEL() 881 */ 882 static int mops_mismatch_exception_target_el(CPUARMState *env) 883 { 884 int el = arm_current_el(env); 885 886 if (el > 1) { 887 return el; 888 } 889 if (el == 0 && (arm_hcr_el2_eff(env) & HCR_TGE)) { 890 return 2; 891 } 892 if (el == 1 && (arm_hcrx_el2_eff(env) & HCRX_MCE2)) { 893 return 2; 894 } 895 return 1; 896 } 897 898 /* 899 * Check whether an M or E instruction was executed with a CF value 900 * indicating the wrong option for this implementation. 901 * Assumes we are always Option A. 902 */ 903 static void check_mops_wrong_option(CPUARMState *env, uint32_t syndrome, 904 uintptr_t ra) 905 { 906 if (env->CF != 0) { 907 syndrome |= 1 << 17; /* Set the wrong-option bit */ 908 raise_exception_ra(env, EXCP_UDEF, syndrome, 909 mops_mismatch_exception_target_el(env), ra); 910 } 911 } 912 913 /* 914 * Return the maximum number of bytes we can transfer starting at addr 915 * without crossing a page boundary. 916 */ 917 static uint64_t page_limit(uint64_t addr) 918 { 919 return TARGET_PAGE_ALIGN(addr + 1) - addr; 920 } 921 922 /* 923 * Return the number of bytes we can copy starting from addr and working 924 * backwards without crossing a page boundary. 925 */ 926 static uint64_t page_limit_rev(uint64_t addr) 927 { 928 return (addr & ~TARGET_PAGE_MASK) + 1; 929 } 930 931 /* 932 * Perform part of a memory set on an area of guest memory starting at 933 * toaddr (a dirty address) and extending for setsize bytes. 934 * 935 * Returns the number of bytes actually set, which might be less than 936 * setsize; the caller should loop until the whole set has been done. 937 * The caller should ensure that the guest registers are correct 938 * for the possibility that the first byte of the set encounters 939 * an exception or watchpoint. We guarantee not to take any faults 940 * for bytes other than the first. 941 */ 942 static uint64_t set_step(CPUARMState *env, uint64_t toaddr, 943 uint64_t setsize, uint32_t data, int memidx, 944 uint32_t *mtedesc, uintptr_t ra) 945 { 946 void *mem; 947 948 setsize = MIN(setsize, page_limit(toaddr)); 949 if (*mtedesc) { 950 uint64_t mtesize = mte_mops_probe(env, toaddr, setsize, *mtedesc); 951 if (mtesize == 0) { 952 /* Trap, or not. All CPU state is up to date */ 953 mte_check_fail(env, *mtedesc, toaddr, ra); 954 /* Continue, with no further MTE checks required */ 955 *mtedesc = 0; 956 } else { 957 /* Advance to the end, or to the tag mismatch */ 958 setsize = MIN(setsize, mtesize); 959 } 960 } 961 962 toaddr = useronly_clean_ptr(toaddr); 963 /* 964 * Trapless lookup: returns NULL for invalid page, I/O, 965 * watchpoints, clean pages, etc. 966 */ 967 mem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, memidx); 968 969 #ifndef CONFIG_USER_ONLY 970 if (unlikely(!mem)) { 971 /* 972 * Slow-path: just do one byte write. This will handle the 973 * watchpoint, invalid page, etc handling correctly. 974 * For clean code pages, the next iteration will see 975 * the page dirty and will use the fast path. 976 */ 977 cpu_stb_mmuidx_ra(env, toaddr, data, memidx, ra); 978 return 1; 979 } 980 #endif 981 /* Easy case: just memset the host memory */ 982 set_helper_retaddr(ra); 983 memset(mem, data, setsize); 984 clear_helper_retaddr(); 985 return setsize; 986 } 987 988 /* 989 * Similar, but setting tags. The architecture requires us to do this 990 * in 16-byte chunks. SETP accesses are not tag checked; they set 991 * the tags. 992 */ 993 static uint64_t set_step_tags(CPUARMState *env, uint64_t toaddr, 994 uint64_t setsize, uint32_t data, int memidx, 995 uint32_t *mtedesc, uintptr_t ra) 996 { 997 void *mem; 998 uint64_t cleanaddr; 999 1000 setsize = MIN(setsize, page_limit(toaddr)); 1001 1002 cleanaddr = useronly_clean_ptr(toaddr); 1003 /* 1004 * Trapless lookup: returns NULL for invalid page, I/O, 1005 * watchpoints, clean pages, etc. 1006 */ 1007 mem = tlb_vaddr_to_host(env, cleanaddr, MMU_DATA_STORE, memidx); 1008 1009 #ifndef CONFIG_USER_ONLY 1010 if (unlikely(!mem)) { 1011 /* 1012 * Slow-path: just do one write. This will handle the 1013 * watchpoint, invalid page, etc handling correctly. 1014 * The architecture requires that we do 16 bytes at a time, 1015 * and we know both ptr and size are 16 byte aligned. 1016 * For clean code pages, the next iteration will see 1017 * the page dirty and will use the fast path. 1018 */ 1019 uint64_t repldata = data * 0x0101010101010101ULL; 1020 MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, memidx); 1021 cpu_st16_mmu(env, toaddr, int128_make128(repldata, repldata), oi16, ra); 1022 mte_mops_set_tags(env, toaddr, 16, *mtedesc); 1023 return 16; 1024 } 1025 #endif 1026 /* Easy case: just memset the host memory */ 1027 set_helper_retaddr(ra); 1028 memset(mem, data, setsize); 1029 clear_helper_retaddr(); 1030 mte_mops_set_tags(env, toaddr, setsize, *mtedesc); 1031 return setsize; 1032 } 1033 1034 typedef uint64_t StepFn(CPUARMState *env, uint64_t toaddr, 1035 uint64_t setsize, uint32_t data, 1036 int memidx, uint32_t *mtedesc, uintptr_t ra); 1037 1038 /* Extract register numbers from a MOPS exception syndrome value */ 1039 static int mops_destreg(uint32_t syndrome) 1040 { 1041 return extract32(syndrome, 10, 5); 1042 } 1043 1044 static int mops_srcreg(uint32_t syndrome) 1045 { 1046 return extract32(syndrome, 5, 5); 1047 } 1048 1049 static int mops_sizereg(uint32_t syndrome) 1050 { 1051 return extract32(syndrome, 0, 5); 1052 } 1053 1054 /* 1055 * Return true if TCMA and TBI bits mean we need to do MTE checks. 1056 * We only need to do this once per MOPS insn, not for every page. 1057 */ 1058 static bool mte_checks_needed(uint64_t ptr, uint32_t desc) 1059 { 1060 int bit55 = extract64(ptr, 55, 1); 1061 1062 /* 1063 * Note that tbi_check() returns true for "access checked" but 1064 * tcma_check() returns true for "access unchecked". 1065 */ 1066 if (!tbi_check(desc, bit55)) { 1067 return false; 1068 } 1069 return !tcma_check(desc, bit55, allocation_tag_from_addr(ptr)); 1070 } 1071 1072 /* Take an exception if the SETG addr/size are not granule aligned */ 1073 static void check_setg_alignment(CPUARMState *env, uint64_t ptr, uint64_t size, 1074 uint32_t memidx, uintptr_t ra) 1075 { 1076 if ((size != 0 && !QEMU_IS_ALIGNED(ptr, TAG_GRANULE)) || 1077 !QEMU_IS_ALIGNED(size, TAG_GRANULE)) { 1078 arm_cpu_do_unaligned_access(env_cpu(env), ptr, MMU_DATA_STORE, 1079 memidx, ra); 1080 1081 } 1082 } 1083 1084 static uint64_t arm_reg_or_xzr(CPUARMState *env, int reg) 1085 { 1086 /* 1087 * Runtime equivalent of cpu_reg() -- return the CPU register value, 1088 * for contexts when index 31 means XZR (not SP). 1089 */ 1090 return reg == 31 ? 0 : env->xregs[reg]; 1091 } 1092 1093 /* 1094 * For the Memory Set operation, our implementation chooses 1095 * always to use "option A", where we update Xd to the final 1096 * address in the SETP insn, and set Xn to be -(bytes remaining). 1097 * On SETM and SETE insns we only need update Xn. 1098 * 1099 * @env: CPU 1100 * @syndrome: syndrome value for mismatch exceptions 1101 * (also contains the register numbers we need to use) 1102 * @mtedesc: MTE descriptor word 1103 * @stepfn: function which does a single part of the set operation 1104 * @is_setg: true if this is the tag-setting SETG variant 1105 */ 1106 static void do_setp(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, 1107 StepFn *stepfn, bool is_setg, uintptr_t ra) 1108 { 1109 /* Prologue: we choose to do up to the next page boundary */ 1110 int rd = mops_destreg(syndrome); 1111 int rs = mops_srcreg(syndrome); 1112 int rn = mops_sizereg(syndrome); 1113 uint8_t data = arm_reg_or_xzr(env, rs); 1114 uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); 1115 uint64_t toaddr = env->xregs[rd]; 1116 uint64_t setsize = env->xregs[rn]; 1117 uint64_t stagesetsize, step; 1118 1119 check_mops_enabled(env, ra); 1120 1121 if (setsize > INT64_MAX) { 1122 setsize = INT64_MAX; 1123 if (is_setg) { 1124 setsize &= ~0xf; 1125 } 1126 } 1127 1128 if (unlikely(is_setg)) { 1129 check_setg_alignment(env, toaddr, setsize, memidx, ra); 1130 } else if (!mte_checks_needed(toaddr, mtedesc)) { 1131 mtedesc = 0; 1132 } 1133 1134 stagesetsize = MIN(setsize, page_limit(toaddr)); 1135 while (stagesetsize) { 1136 env->xregs[rd] = toaddr; 1137 env->xregs[rn] = setsize; 1138 step = stepfn(env, toaddr, stagesetsize, data, memidx, &mtedesc, ra); 1139 toaddr += step; 1140 setsize -= step; 1141 stagesetsize -= step; 1142 } 1143 /* Insn completed, so update registers to the Option A format */ 1144 env->xregs[rd] = toaddr + setsize; 1145 env->xregs[rn] = -setsize; 1146 1147 /* Set NZCV = 0000 to indicate we are an Option A implementation */ 1148 env->NF = 0; 1149 env->ZF = 1; /* our env->ZF encoding is inverted */ 1150 env->CF = 0; 1151 env->VF = 0; 1152 } 1153 1154 void HELPER(setp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1155 { 1156 do_setp(env, syndrome, mtedesc, set_step, false, GETPC()); 1157 } 1158 1159 void HELPER(setgp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1160 { 1161 do_setp(env, syndrome, mtedesc, set_step_tags, true, GETPC()); 1162 } 1163 1164 static void do_setm(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, 1165 StepFn *stepfn, bool is_setg, uintptr_t ra) 1166 { 1167 /* Main: we choose to do all the full-page chunks */ 1168 CPUState *cs = env_cpu(env); 1169 int rd = mops_destreg(syndrome); 1170 int rs = mops_srcreg(syndrome); 1171 int rn = mops_sizereg(syndrome); 1172 uint8_t data = arm_reg_or_xzr(env, rs); 1173 uint64_t toaddr = env->xregs[rd] + env->xregs[rn]; 1174 uint64_t setsize = -env->xregs[rn]; 1175 uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); 1176 uint64_t step, stagesetsize; 1177 1178 check_mops_enabled(env, ra); 1179 1180 /* 1181 * We're allowed to NOP out "no data to copy" before the consistency 1182 * checks; we choose to do so. 1183 */ 1184 if (env->xregs[rn] == 0) { 1185 return; 1186 } 1187 1188 check_mops_wrong_option(env, syndrome, ra); 1189 1190 /* 1191 * Our implementation will work fine even if we have an unaligned 1192 * destination address, and because we update Xn every time around 1193 * the loop below and the return value from stepfn() may be less 1194 * than requested, we might find toaddr is unaligned. So we don't 1195 * have an IMPDEF check for alignment here. 1196 */ 1197 1198 if (unlikely(is_setg)) { 1199 check_setg_alignment(env, toaddr, setsize, memidx, ra); 1200 } else if (!mte_checks_needed(toaddr, mtedesc)) { 1201 mtedesc = 0; 1202 } 1203 1204 /* Do the actual memset: we leave the last partial page to SETE */ 1205 stagesetsize = setsize & TARGET_PAGE_MASK; 1206 while (stagesetsize > 0) { 1207 step = stepfn(env, toaddr, stagesetsize, data, memidx, &mtedesc, ra); 1208 toaddr += step; 1209 setsize -= step; 1210 stagesetsize -= step; 1211 env->xregs[rn] = -setsize; 1212 if (stagesetsize > 0 && unlikely(cpu_loop_exit_requested(cs))) { 1213 cpu_loop_exit_restore(cs, ra); 1214 } 1215 } 1216 } 1217 1218 void HELPER(setm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1219 { 1220 do_setm(env, syndrome, mtedesc, set_step, false, GETPC()); 1221 } 1222 1223 void HELPER(setgm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1224 { 1225 do_setm(env, syndrome, mtedesc, set_step_tags, true, GETPC()); 1226 } 1227 1228 static void do_sete(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, 1229 StepFn *stepfn, bool is_setg, uintptr_t ra) 1230 { 1231 /* Epilogue: do the last partial page */ 1232 int rd = mops_destreg(syndrome); 1233 int rs = mops_srcreg(syndrome); 1234 int rn = mops_sizereg(syndrome); 1235 uint8_t data = arm_reg_or_xzr(env, rs); 1236 uint64_t toaddr = env->xregs[rd] + env->xregs[rn]; 1237 uint64_t setsize = -env->xregs[rn]; 1238 uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); 1239 uint64_t step; 1240 1241 check_mops_enabled(env, ra); 1242 1243 /* 1244 * We're allowed to NOP out "no data to copy" before the consistency 1245 * checks; we choose to do so. 1246 */ 1247 if (setsize == 0) { 1248 return; 1249 } 1250 1251 check_mops_wrong_option(env, syndrome, ra); 1252 1253 /* 1254 * Our implementation has no address alignment requirements, but 1255 * we do want to enforce the "less than a page" size requirement, 1256 * so we don't need to have the "check for interrupts" here. 1257 */ 1258 if (setsize >= TARGET_PAGE_SIZE) { 1259 raise_exception_ra(env, EXCP_UDEF, syndrome, 1260 mops_mismatch_exception_target_el(env), ra); 1261 } 1262 1263 if (unlikely(is_setg)) { 1264 check_setg_alignment(env, toaddr, setsize, memidx, ra); 1265 } else if (!mte_checks_needed(toaddr, mtedesc)) { 1266 mtedesc = 0; 1267 } 1268 1269 /* Do the actual memset */ 1270 while (setsize > 0) { 1271 step = stepfn(env, toaddr, setsize, data, memidx, &mtedesc, ra); 1272 toaddr += step; 1273 setsize -= step; 1274 env->xregs[rn] = -setsize; 1275 } 1276 } 1277 1278 void HELPER(sete)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1279 { 1280 do_sete(env, syndrome, mtedesc, set_step, false, GETPC()); 1281 } 1282 1283 void HELPER(setge)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1284 { 1285 do_sete(env, syndrome, mtedesc, set_step_tags, true, GETPC()); 1286 } 1287 1288 /* 1289 * Perform part of a memory copy from the guest memory at fromaddr 1290 * and extending for copysize bytes, to the guest memory at 1291 * toaddr. Both addresses are dirty. 1292 * 1293 * Returns the number of bytes actually set, which might be less than 1294 * copysize; the caller should loop until the whole copy has been done. 1295 * The caller should ensure that the guest registers are correct 1296 * for the possibility that the first byte of the copy encounters 1297 * an exception or watchpoint. We guarantee not to take any faults 1298 * for bytes other than the first. 1299 */ 1300 static uint64_t copy_step(CPUARMState *env, uint64_t toaddr, uint64_t fromaddr, 1301 uint64_t copysize, int wmemidx, int rmemidx, 1302 uint32_t *wdesc, uint32_t *rdesc, uintptr_t ra) 1303 { 1304 void *rmem; 1305 void *wmem; 1306 1307 /* Don't cross a page boundary on either source or destination */ 1308 copysize = MIN(copysize, page_limit(toaddr)); 1309 copysize = MIN(copysize, page_limit(fromaddr)); 1310 /* 1311 * Handle MTE tag checks: either handle the tag mismatch for byte 0, 1312 * or else copy up to but not including the byte with the mismatch. 1313 */ 1314 if (*rdesc) { 1315 uint64_t mtesize = mte_mops_probe(env, fromaddr, copysize, *rdesc); 1316 if (mtesize == 0) { 1317 mte_check_fail(env, *rdesc, fromaddr, ra); 1318 *rdesc = 0; 1319 } else { 1320 copysize = MIN(copysize, mtesize); 1321 } 1322 } 1323 if (*wdesc) { 1324 uint64_t mtesize = mte_mops_probe(env, toaddr, copysize, *wdesc); 1325 if (mtesize == 0) { 1326 mte_check_fail(env, *wdesc, toaddr, ra); 1327 *wdesc = 0; 1328 } else { 1329 copysize = MIN(copysize, mtesize); 1330 } 1331 } 1332 1333 toaddr = useronly_clean_ptr(toaddr); 1334 fromaddr = useronly_clean_ptr(fromaddr); 1335 /* Trapless lookup of whether we can get a host memory pointer */ 1336 wmem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, wmemidx); 1337 rmem = tlb_vaddr_to_host(env, fromaddr, MMU_DATA_LOAD, rmemidx); 1338 1339 #ifndef CONFIG_USER_ONLY 1340 /* 1341 * If we don't have host memory for both source and dest then just 1342 * do a single byte copy. This will handle watchpoints, invalid pages, 1343 * etc correctly. For clean code pages, the next iteration will see 1344 * the page dirty and will use the fast path. 1345 */ 1346 if (unlikely(!rmem || !wmem)) { 1347 uint8_t byte; 1348 if (rmem) { 1349 byte = *(uint8_t *)rmem; 1350 } else { 1351 byte = cpu_ldub_mmuidx_ra(env, fromaddr, rmemidx, ra); 1352 } 1353 if (wmem) { 1354 *(uint8_t *)wmem = byte; 1355 } else { 1356 cpu_stb_mmuidx_ra(env, toaddr, byte, wmemidx, ra); 1357 } 1358 return 1; 1359 } 1360 #endif 1361 /* Easy case: just memmove the host memory */ 1362 set_helper_retaddr(ra); 1363 memmove(wmem, rmem, copysize); 1364 clear_helper_retaddr(); 1365 return copysize; 1366 } 1367 1368 /* 1369 * Do part of a backwards memory copy. Here toaddr and fromaddr point 1370 * to the *last* byte to be copied. 1371 */ 1372 static uint64_t copy_step_rev(CPUARMState *env, uint64_t toaddr, 1373 uint64_t fromaddr, 1374 uint64_t copysize, int wmemidx, int rmemidx, 1375 uint32_t *wdesc, uint32_t *rdesc, uintptr_t ra) 1376 { 1377 void *rmem; 1378 void *wmem; 1379 1380 /* Don't cross a page boundary on either source or destination */ 1381 copysize = MIN(copysize, page_limit_rev(toaddr)); 1382 copysize = MIN(copysize, page_limit_rev(fromaddr)); 1383 1384 /* 1385 * Handle MTE tag checks: either handle the tag mismatch for byte 0, 1386 * or else copy up to but not including the byte with the mismatch. 1387 */ 1388 if (*rdesc) { 1389 uint64_t mtesize = mte_mops_probe_rev(env, fromaddr, copysize, *rdesc); 1390 if (mtesize == 0) { 1391 mte_check_fail(env, *rdesc, fromaddr, ra); 1392 *rdesc = 0; 1393 } else { 1394 copysize = MIN(copysize, mtesize); 1395 } 1396 } 1397 if (*wdesc) { 1398 uint64_t mtesize = mte_mops_probe_rev(env, toaddr, copysize, *wdesc); 1399 if (mtesize == 0) { 1400 mte_check_fail(env, *wdesc, toaddr, ra); 1401 *wdesc = 0; 1402 } else { 1403 copysize = MIN(copysize, mtesize); 1404 } 1405 } 1406 1407 toaddr = useronly_clean_ptr(toaddr); 1408 fromaddr = useronly_clean_ptr(fromaddr); 1409 /* Trapless lookup of whether we can get a host memory pointer */ 1410 wmem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, wmemidx); 1411 rmem = tlb_vaddr_to_host(env, fromaddr, MMU_DATA_LOAD, rmemidx); 1412 1413 #ifndef CONFIG_USER_ONLY 1414 /* 1415 * If we don't have host memory for both source and dest then just 1416 * do a single byte copy. This will handle watchpoints, invalid pages, 1417 * etc correctly. For clean code pages, the next iteration will see 1418 * the page dirty and will use the fast path. 1419 */ 1420 if (unlikely(!rmem || !wmem)) { 1421 uint8_t byte; 1422 if (rmem) { 1423 byte = *(uint8_t *)rmem; 1424 } else { 1425 byte = cpu_ldub_mmuidx_ra(env, fromaddr, rmemidx, ra); 1426 } 1427 if (wmem) { 1428 *(uint8_t *)wmem = byte; 1429 } else { 1430 cpu_stb_mmuidx_ra(env, toaddr, byte, wmemidx, ra); 1431 } 1432 return 1; 1433 } 1434 #endif 1435 /* 1436 * Easy case: just memmove the host memory. Note that wmem and 1437 * rmem here point to the *last* byte to copy. 1438 */ 1439 set_helper_retaddr(ra); 1440 memmove(wmem - (copysize - 1), rmem - (copysize - 1), copysize); 1441 clear_helper_retaddr(); 1442 return copysize; 1443 } 1444 1445 /* 1446 * for the Memory Copy operation, our implementation chooses always 1447 * to use "option A", where we update Xd and Xs to the final addresses 1448 * in the CPYP insn, and then in CPYM and CPYE only need to update Xn. 1449 * 1450 * @env: CPU 1451 * @syndrome: syndrome value for mismatch exceptions 1452 * (also contains the register numbers we need to use) 1453 * @wdesc: MTE descriptor for the writes (destination) 1454 * @rdesc: MTE descriptor for the reads (source) 1455 * @move: true if this is CPY (memmove), false for CPYF (memcpy forwards) 1456 */ 1457 static void do_cpyp(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1458 uint32_t rdesc, uint32_t move, uintptr_t ra) 1459 { 1460 int rd = mops_destreg(syndrome); 1461 int rs = mops_srcreg(syndrome); 1462 int rn = mops_sizereg(syndrome); 1463 uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); 1464 uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); 1465 bool forwards = true; 1466 uint64_t toaddr = env->xregs[rd]; 1467 uint64_t fromaddr = env->xregs[rs]; 1468 uint64_t copysize = env->xregs[rn]; 1469 uint64_t stagecopysize, step; 1470 1471 check_mops_enabled(env, ra); 1472 1473 1474 if (move) { 1475 /* 1476 * Copy backwards if necessary. The direction for a non-overlapping 1477 * copy is IMPDEF; we choose forwards. 1478 */ 1479 if (copysize > 0x007FFFFFFFFFFFFFULL) { 1480 copysize = 0x007FFFFFFFFFFFFFULL; 1481 } 1482 uint64_t fs = extract64(fromaddr, 0, 56); 1483 uint64_t ts = extract64(toaddr, 0, 56); 1484 uint64_t fe = extract64(fromaddr + copysize, 0, 56); 1485 1486 if (fs < ts && fe > ts) { 1487 forwards = false; 1488 } 1489 } else { 1490 if (copysize > INT64_MAX) { 1491 copysize = INT64_MAX; 1492 } 1493 } 1494 1495 if (!mte_checks_needed(fromaddr, rdesc)) { 1496 rdesc = 0; 1497 } 1498 if (!mte_checks_needed(toaddr, wdesc)) { 1499 wdesc = 0; 1500 } 1501 1502 if (forwards) { 1503 stagecopysize = MIN(copysize, page_limit(toaddr)); 1504 stagecopysize = MIN(stagecopysize, page_limit(fromaddr)); 1505 while (stagecopysize) { 1506 env->xregs[rd] = toaddr; 1507 env->xregs[rs] = fromaddr; 1508 env->xregs[rn] = copysize; 1509 step = copy_step(env, toaddr, fromaddr, stagecopysize, 1510 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1511 toaddr += step; 1512 fromaddr += step; 1513 copysize -= step; 1514 stagecopysize -= step; 1515 } 1516 /* Insn completed, so update registers to the Option A format */ 1517 env->xregs[rd] = toaddr + copysize; 1518 env->xregs[rs] = fromaddr + copysize; 1519 env->xregs[rn] = -copysize; 1520 } else { 1521 /* 1522 * In a reverse copy the to and from addrs in Xs and Xd are the start 1523 * of the range, but it's more convenient for us to work with pointers 1524 * to the last byte being copied. 1525 */ 1526 toaddr += copysize - 1; 1527 fromaddr += copysize - 1; 1528 stagecopysize = MIN(copysize, page_limit_rev(toaddr)); 1529 stagecopysize = MIN(stagecopysize, page_limit_rev(fromaddr)); 1530 while (stagecopysize) { 1531 env->xregs[rn] = copysize; 1532 step = copy_step_rev(env, toaddr, fromaddr, stagecopysize, 1533 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1534 copysize -= step; 1535 stagecopysize -= step; 1536 toaddr -= step; 1537 fromaddr -= step; 1538 } 1539 /* 1540 * Insn completed, so update registers to the Option A format. 1541 * For a reverse copy this is no different to the CPYP input format. 1542 */ 1543 env->xregs[rn] = copysize; 1544 } 1545 1546 /* Set NZCV = 0000 to indicate we are an Option A implementation */ 1547 env->NF = 0; 1548 env->ZF = 1; /* our env->ZF encoding is inverted */ 1549 env->CF = 0; 1550 env->VF = 0; 1551 } 1552 1553 void HELPER(cpyp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1554 uint32_t rdesc) 1555 { 1556 do_cpyp(env, syndrome, wdesc, rdesc, true, GETPC()); 1557 } 1558 1559 void HELPER(cpyfp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1560 uint32_t rdesc) 1561 { 1562 do_cpyp(env, syndrome, wdesc, rdesc, false, GETPC()); 1563 } 1564 1565 static void do_cpym(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1566 uint32_t rdesc, uint32_t move, uintptr_t ra) 1567 { 1568 /* Main: we choose to copy until less than a page remaining */ 1569 CPUState *cs = env_cpu(env); 1570 int rd = mops_destreg(syndrome); 1571 int rs = mops_srcreg(syndrome); 1572 int rn = mops_sizereg(syndrome); 1573 uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); 1574 uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); 1575 bool forwards = true; 1576 uint64_t toaddr, fromaddr, copysize, step; 1577 1578 check_mops_enabled(env, ra); 1579 1580 /* We choose to NOP out "no data to copy" before consistency checks */ 1581 if (env->xregs[rn] == 0) { 1582 return; 1583 } 1584 1585 check_mops_wrong_option(env, syndrome, ra); 1586 1587 if (move) { 1588 forwards = (int64_t)env->xregs[rn] < 0; 1589 } 1590 1591 if (forwards) { 1592 toaddr = env->xregs[rd] + env->xregs[rn]; 1593 fromaddr = env->xregs[rs] + env->xregs[rn]; 1594 copysize = -env->xregs[rn]; 1595 } else { 1596 copysize = env->xregs[rn]; 1597 /* This toaddr and fromaddr point to the *last* byte to copy */ 1598 toaddr = env->xregs[rd] + copysize - 1; 1599 fromaddr = env->xregs[rs] + copysize - 1; 1600 } 1601 1602 if (!mte_checks_needed(fromaddr, rdesc)) { 1603 rdesc = 0; 1604 } 1605 if (!mte_checks_needed(toaddr, wdesc)) { 1606 wdesc = 0; 1607 } 1608 1609 /* Our implementation has no particular parameter requirements for CPYM */ 1610 1611 /* Do the actual memmove */ 1612 if (forwards) { 1613 while (copysize >= TARGET_PAGE_SIZE) { 1614 step = copy_step(env, toaddr, fromaddr, copysize, 1615 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1616 toaddr += step; 1617 fromaddr += step; 1618 copysize -= step; 1619 env->xregs[rn] = -copysize; 1620 if (copysize >= TARGET_PAGE_SIZE && 1621 unlikely(cpu_loop_exit_requested(cs))) { 1622 cpu_loop_exit_restore(cs, ra); 1623 } 1624 } 1625 } else { 1626 while (copysize >= TARGET_PAGE_SIZE) { 1627 step = copy_step_rev(env, toaddr, fromaddr, copysize, 1628 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1629 toaddr -= step; 1630 fromaddr -= step; 1631 copysize -= step; 1632 env->xregs[rn] = copysize; 1633 if (copysize >= TARGET_PAGE_SIZE && 1634 unlikely(cpu_loop_exit_requested(cs))) { 1635 cpu_loop_exit_restore(cs, ra); 1636 } 1637 } 1638 } 1639 } 1640 1641 void HELPER(cpym)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1642 uint32_t rdesc) 1643 { 1644 do_cpym(env, syndrome, wdesc, rdesc, true, GETPC()); 1645 } 1646 1647 void HELPER(cpyfm)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1648 uint32_t rdesc) 1649 { 1650 do_cpym(env, syndrome, wdesc, rdesc, false, GETPC()); 1651 } 1652 1653 static void do_cpye(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1654 uint32_t rdesc, uint32_t move, uintptr_t ra) 1655 { 1656 /* Epilogue: do the last partial page */ 1657 int rd = mops_destreg(syndrome); 1658 int rs = mops_srcreg(syndrome); 1659 int rn = mops_sizereg(syndrome); 1660 uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); 1661 uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); 1662 bool forwards = true; 1663 uint64_t toaddr, fromaddr, copysize, step; 1664 1665 check_mops_enabled(env, ra); 1666 1667 /* We choose to NOP out "no data to copy" before consistency checks */ 1668 if (env->xregs[rn] == 0) { 1669 return; 1670 } 1671 1672 check_mops_wrong_option(env, syndrome, ra); 1673 1674 if (move) { 1675 forwards = (int64_t)env->xregs[rn] < 0; 1676 } 1677 1678 if (forwards) { 1679 toaddr = env->xregs[rd] + env->xregs[rn]; 1680 fromaddr = env->xregs[rs] + env->xregs[rn]; 1681 copysize = -env->xregs[rn]; 1682 } else { 1683 copysize = env->xregs[rn]; 1684 /* This toaddr and fromaddr point to the *last* byte to copy */ 1685 toaddr = env->xregs[rd] + copysize - 1; 1686 fromaddr = env->xregs[rs] + copysize - 1; 1687 } 1688 1689 if (!mte_checks_needed(fromaddr, rdesc)) { 1690 rdesc = 0; 1691 } 1692 if (!mte_checks_needed(toaddr, wdesc)) { 1693 wdesc = 0; 1694 } 1695 1696 /* Check the size; we don't want to have do a check-for-interrupts */ 1697 if (copysize >= TARGET_PAGE_SIZE) { 1698 raise_exception_ra(env, EXCP_UDEF, syndrome, 1699 mops_mismatch_exception_target_el(env), ra); 1700 } 1701 1702 /* Do the actual memmove */ 1703 if (forwards) { 1704 while (copysize > 0) { 1705 step = copy_step(env, toaddr, fromaddr, copysize, 1706 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1707 toaddr += step; 1708 fromaddr += step; 1709 copysize -= step; 1710 env->xregs[rn] = -copysize; 1711 } 1712 } else { 1713 while (copysize > 0) { 1714 step = copy_step_rev(env, toaddr, fromaddr, copysize, 1715 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1716 toaddr -= step; 1717 fromaddr -= step; 1718 copysize -= step; 1719 env->xregs[rn] = copysize; 1720 } 1721 } 1722 } 1723 1724 void HELPER(cpye)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1725 uint32_t rdesc) 1726 { 1727 do_cpye(env, syndrome, wdesc, rdesc, true, GETPC()); 1728 } 1729 1730 void HELPER(cpyfe)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1731 uint32_t rdesc) 1732 { 1733 do_cpye(env, syndrome, wdesc, rdesc, false, GETPC()); 1734 } 1735 1736 static bool is_guarded_page(CPUARMState *env, target_ulong addr, uintptr_t ra) 1737 { 1738 #ifdef CONFIG_USER_ONLY 1739 return page_get_flags(addr) & PAGE_BTI; 1740 #else 1741 CPUTLBEntryFull *full; 1742 void *host; 1743 int mmu_idx = cpu_mmu_index(env_cpu(env), true); 1744 int flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx, 1745 false, &host, &full, ra); 1746 1747 assert(!(flags & TLB_INVALID_MASK)); 1748 return full->extra.arm.guarded; 1749 #endif 1750 } 1751 1752 void HELPER(guarded_page_check)(CPUARMState *env) 1753 { 1754 /* 1755 * We have already verified that bti is enabled, and that the 1756 * instruction at PC is not ok for BTYPE. This is always at 1757 * the beginning of a block, so PC is always up-to-date and 1758 * no unwind is required. 1759 */ 1760 if (is_guarded_page(env, env->pc, 0)) { 1761 raise_exception(env, EXCP_UDEF, syn_btitrap(env->btype), 1762 exception_target_el(env)); 1763 } 1764 } 1765 1766 void HELPER(guarded_page_br)(CPUARMState *env, target_ulong pc) 1767 { 1768 /* 1769 * We have already checked for branch via x16 and x17. 1770 * What remains for choosing BTYPE is checking for a guarded page. 1771 */ 1772 env->btype = is_guarded_page(env, pc, GETPC()) ? 3 : 1; 1773 } 1774