1 /* 2 * AArch64 specific helpers 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/units.h" 22 #include "cpu.h" 23 #include "gdbstub/helpers.h" 24 #include "exec/helper-proto.h" 25 #include "qemu/host-utils.h" 26 #include "qemu/log.h" 27 #include "qemu/main-loop.h" 28 #include "qemu/bitops.h" 29 #include "internals.h" 30 #include "qemu/crc32c.h" 31 #include "exec/cpu-common.h" 32 #include "exec/exec-all.h" 33 #include "exec/cpu_ldst.h" 34 #include "qemu/int128.h" 35 #include "qemu/atomic128.h" 36 #include "fpu/softfloat.h" 37 #include <zlib.h> /* for crc32 */ 38 #ifdef CONFIG_USER_ONLY 39 #include "user/page-protection.h" 40 #endif 41 #include "vec_internal.h" 42 43 /* C2.4.7 Multiply and divide */ 44 /* special cases for 0 and LLONG_MIN are mandated by the standard */ 45 uint64_t HELPER(udiv64)(uint64_t num, uint64_t den) 46 { 47 if (den == 0) { 48 return 0; 49 } 50 return num / den; 51 } 52 53 int64_t HELPER(sdiv64)(int64_t num, int64_t den) 54 { 55 if (den == 0) { 56 return 0; 57 } 58 if (num == LLONG_MIN && den == -1) { 59 return LLONG_MIN; 60 } 61 return num / den; 62 } 63 64 uint64_t HELPER(rbit64)(uint64_t x) 65 { 66 return revbit64(x); 67 } 68 69 void HELPER(msr_i_spsel)(CPUARMState *env, uint32_t imm) 70 { 71 update_spsel(env, imm); 72 } 73 74 void HELPER(msr_set_allint_el1)(CPUARMState *env) 75 { 76 /* ALLINT update to PSTATE. */ 77 if (arm_hcrx_el2_eff(env) & HCRX_TALLINT) { 78 raise_exception_ra(env, EXCP_UDEF, 79 syn_aa64_sysregtrap(0, 1, 0, 4, 1, 0x1f, 0), 2, 80 GETPC()); 81 } 82 83 env->pstate |= PSTATE_ALLINT; 84 } 85 86 static void daif_check(CPUARMState *env, uint32_t op, 87 uint32_t imm, uintptr_t ra) 88 { 89 /* DAIF update to PSTATE. This is OK from EL0 only if UMA is set. */ 90 if (arm_current_el(env) == 0 && !(arm_sctlr(env, 0) & SCTLR_UMA)) { 91 raise_exception_ra(env, EXCP_UDEF, 92 syn_aa64_sysregtrap(0, extract32(op, 0, 3), 93 extract32(op, 3, 3), 4, 94 imm, 0x1f, 0), 95 exception_target_el(env), ra); 96 } 97 } 98 99 void HELPER(msr_i_daifset)(CPUARMState *env, uint32_t imm) 100 { 101 daif_check(env, 0x1e, imm, GETPC()); 102 env->daif |= (imm << 6) & PSTATE_DAIF; 103 arm_rebuild_hflags(env); 104 } 105 106 void HELPER(msr_i_daifclear)(CPUARMState *env, uint32_t imm) 107 { 108 daif_check(env, 0x1f, imm, GETPC()); 109 env->daif &= ~((imm << 6) & PSTATE_DAIF); 110 arm_rebuild_hflags(env); 111 } 112 113 /* Convert a softfloat float_relation_ (as returned by 114 * the float*_compare functions) to the correct ARM 115 * NZCV flag state. 116 */ 117 static inline uint32_t float_rel_to_flags(int res) 118 { 119 uint64_t flags; 120 switch (res) { 121 case float_relation_equal: 122 flags = PSTATE_Z | PSTATE_C; 123 break; 124 case float_relation_less: 125 flags = PSTATE_N; 126 break; 127 case float_relation_greater: 128 flags = PSTATE_C; 129 break; 130 case float_relation_unordered: 131 default: 132 flags = PSTATE_C | PSTATE_V; 133 break; 134 } 135 return flags; 136 } 137 138 uint64_t HELPER(vfp_cmph_a64)(uint32_t x, uint32_t y, float_status *fp_status) 139 { 140 return float_rel_to_flags(float16_compare_quiet(x, y, fp_status)); 141 } 142 143 uint64_t HELPER(vfp_cmpeh_a64)(uint32_t x, uint32_t y, float_status *fp_status) 144 { 145 return float_rel_to_flags(float16_compare(x, y, fp_status)); 146 } 147 148 uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, float_status *fp_status) 149 { 150 return float_rel_to_flags(float32_compare_quiet(x, y, fp_status)); 151 } 152 153 uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, float_status *fp_status) 154 { 155 return float_rel_to_flags(float32_compare(x, y, fp_status)); 156 } 157 158 uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, float_status *fp_status) 159 { 160 return float_rel_to_flags(float64_compare_quiet(x, y, fp_status)); 161 } 162 163 uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, float_status *fp_status) 164 { 165 return float_rel_to_flags(float64_compare(x, y, fp_status)); 166 } 167 168 float32 HELPER(vfp_mulxs)(float32 a, float32 b, float_status *fpst) 169 { 170 a = float32_squash_input_denormal(a, fpst); 171 b = float32_squash_input_denormal(b, fpst); 172 173 if ((float32_is_zero(a) && float32_is_infinity(b)) || 174 (float32_is_infinity(a) && float32_is_zero(b))) { 175 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ 176 return make_float32((1U << 30) | 177 ((float32_val(a) ^ float32_val(b)) & (1U << 31))); 178 } 179 return float32_mul(a, b, fpst); 180 } 181 182 float64 HELPER(vfp_mulxd)(float64 a, float64 b, float_status *fpst) 183 { 184 a = float64_squash_input_denormal(a, fpst); 185 b = float64_squash_input_denormal(b, fpst); 186 187 if ((float64_is_zero(a) && float64_is_infinity(b)) || 188 (float64_is_infinity(a) && float64_is_zero(b))) { 189 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ 190 return make_float64((1ULL << 62) | 191 ((float64_val(a) ^ float64_val(b)) & (1ULL << 63))); 192 } 193 return float64_mul(a, b, fpst); 194 } 195 196 /* 64bit/double versions of the neon float compare functions */ 197 uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, float_status *fpst) 198 { 199 return -float64_eq_quiet(a, b, fpst); 200 } 201 202 uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, float_status *fpst) 203 { 204 return -float64_le(b, a, fpst); 205 } 206 207 uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, float_status *fpst) 208 { 209 return -float64_lt(b, a, fpst); 210 } 211 212 /* 213 * Reciprocal step and sqrt step. Note that unlike the A32/T32 214 * versions, these do a fully fused multiply-add or 215 * multiply-add-and-halve. 216 * The FPCR.AH == 1 versions need to avoid flipping the sign of NaN. 217 */ 218 #define DO_RECPS(NAME, CTYPE, FLOATTYPE, CHSFN) \ 219 CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ 220 { \ 221 a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ 222 b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ 223 a = FLOATTYPE ## _ ## CHSFN(a); \ 224 if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \ 225 (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \ 226 return FLOATTYPE ## _two; \ 227 } \ 228 return FLOATTYPE ## _muladd(a, b, FLOATTYPE ## _two, 0, fpst); \ 229 } 230 231 DO_RECPS(recpsf_f16, uint32_t, float16, chs) 232 DO_RECPS(recpsf_f32, float32, float32, chs) 233 DO_RECPS(recpsf_f64, float64, float64, chs) 234 DO_RECPS(recpsf_ah_f16, uint32_t, float16, ah_chs) 235 DO_RECPS(recpsf_ah_f32, float32, float32, ah_chs) 236 DO_RECPS(recpsf_ah_f64, float64, float64, ah_chs) 237 238 #define DO_RSQRTSF(NAME, CTYPE, FLOATTYPE, CHSFN) \ 239 CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ 240 { \ 241 a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ 242 b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ 243 a = FLOATTYPE ## _ ## CHSFN(a); \ 244 if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \ 245 (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \ 246 return FLOATTYPE ## _one_point_five; \ 247 } \ 248 return FLOATTYPE ## _muladd_scalbn(a, b, FLOATTYPE ## _three, \ 249 -1, 0, fpst); \ 250 } \ 251 252 DO_RSQRTSF(rsqrtsf_f16, uint32_t, float16, chs) 253 DO_RSQRTSF(rsqrtsf_f32, float32, float32, chs) 254 DO_RSQRTSF(rsqrtsf_f64, float64, float64, chs) 255 DO_RSQRTSF(rsqrtsf_ah_f16, uint32_t, float16, ah_chs) 256 DO_RSQRTSF(rsqrtsf_ah_f32, float32, float32, ah_chs) 257 DO_RSQRTSF(rsqrtsf_ah_f64, float64, float64, ah_chs) 258 259 /* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */ 260 uint32_t HELPER(frecpx_f16)(uint32_t a, float_status *fpst) 261 { 262 uint16_t val16, sbit; 263 int16_t exp; 264 265 if (float16_is_any_nan(a)) { 266 float16 nan = a; 267 if (float16_is_signaling_nan(a, fpst)) { 268 float_raise(float_flag_invalid, fpst); 269 if (!fpst->default_nan_mode) { 270 nan = float16_silence_nan(a, fpst); 271 } 272 } 273 if (fpst->default_nan_mode) { 274 nan = float16_default_nan(fpst); 275 } 276 return nan; 277 } 278 279 a = float16_squash_input_denormal(a, fpst); 280 281 val16 = float16_val(a); 282 sbit = 0x8000 & val16; 283 exp = extract32(val16, 10, 5); 284 285 if (exp == 0) { 286 return make_float16(deposit32(sbit, 10, 5, 0x1e)); 287 } else { 288 return make_float16(deposit32(sbit, 10, 5, ~exp)); 289 } 290 } 291 292 float32 HELPER(frecpx_f32)(float32 a, float_status *fpst) 293 { 294 uint32_t val32, sbit; 295 int32_t exp; 296 297 if (float32_is_any_nan(a)) { 298 float32 nan = a; 299 if (float32_is_signaling_nan(a, fpst)) { 300 float_raise(float_flag_invalid, fpst); 301 if (!fpst->default_nan_mode) { 302 nan = float32_silence_nan(a, fpst); 303 } 304 } 305 if (fpst->default_nan_mode) { 306 nan = float32_default_nan(fpst); 307 } 308 return nan; 309 } 310 311 a = float32_squash_input_denormal(a, fpst); 312 313 val32 = float32_val(a); 314 sbit = 0x80000000ULL & val32; 315 exp = extract32(val32, 23, 8); 316 317 if (exp == 0) { 318 return make_float32(sbit | (0xfe << 23)); 319 } else { 320 return make_float32(sbit | (~exp & 0xff) << 23); 321 } 322 } 323 324 float64 HELPER(frecpx_f64)(float64 a, float_status *fpst) 325 { 326 uint64_t val64, sbit; 327 int64_t exp; 328 329 if (float64_is_any_nan(a)) { 330 float64 nan = a; 331 if (float64_is_signaling_nan(a, fpst)) { 332 float_raise(float_flag_invalid, fpst); 333 if (!fpst->default_nan_mode) { 334 nan = float64_silence_nan(a, fpst); 335 } 336 } 337 if (fpst->default_nan_mode) { 338 nan = float64_default_nan(fpst); 339 } 340 return nan; 341 } 342 343 a = float64_squash_input_denormal(a, fpst); 344 345 val64 = float64_val(a); 346 sbit = 0x8000000000000000ULL & val64; 347 exp = extract64(float64_val(a), 52, 11); 348 349 if (exp == 0) { 350 return make_float64(sbit | (0x7feULL << 52)); 351 } else { 352 return make_float64(sbit | (~exp & 0x7ffULL) << 52); 353 } 354 } 355 356 float32 HELPER(fcvtx_f64_to_f32)(float64 a, float_status *fpst) 357 { 358 float32 r; 359 int old = get_float_rounding_mode(fpst); 360 361 set_float_rounding_mode(float_round_to_odd, fpst); 362 r = float64_to_float32(a, fpst); 363 set_float_rounding_mode(old, fpst); 364 return r; 365 } 366 367 /* 368 * AH=1 min/max have some odd special cases: 369 * comparing two zeroes (regardless of sign), (NaN, anything), 370 * or (anything, NaN) should return the second argument (possibly 371 * squashed to zero). 372 * Also, denormal outputs are not squashed to zero regardless of FZ or FZ16. 373 */ 374 #define AH_MINMAX_HELPER(NAME, CTYPE, FLOATTYPE, MINMAX) \ 375 CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ 376 { \ 377 bool save; \ 378 CTYPE r; \ 379 a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ 380 b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ 381 if (FLOATTYPE ## _is_zero(a) && FLOATTYPE ## _is_zero(b)) { \ 382 return b; \ 383 } \ 384 if (FLOATTYPE ## _is_any_nan(a) || \ 385 FLOATTYPE ## _is_any_nan(b)) { \ 386 float_raise(float_flag_invalid, fpst); \ 387 return b; \ 388 } \ 389 save = get_flush_to_zero(fpst); \ 390 set_flush_to_zero(false, fpst); \ 391 r = FLOATTYPE ## _ ## MINMAX(a, b, fpst); \ 392 set_flush_to_zero(save, fpst); \ 393 return r; \ 394 } 395 396 AH_MINMAX_HELPER(vfp_ah_minh, dh_ctype_f16, float16, min) 397 AH_MINMAX_HELPER(vfp_ah_mins, float32, float32, min) 398 AH_MINMAX_HELPER(vfp_ah_mind, float64, float64, min) 399 AH_MINMAX_HELPER(vfp_ah_maxh, dh_ctype_f16, float16, max) 400 AH_MINMAX_HELPER(vfp_ah_maxs, float32, float32, max) 401 AH_MINMAX_HELPER(vfp_ah_maxd, float64, float64, max) 402 403 /* 64-bit versions of the CRC helpers. Note that although the operation 404 * (and the prototypes of crc32c() and crc32() mean that only the bottom 405 * 32 bits of the accumulator and result are used, we pass and return 406 * uint64_t for convenience of the generated code. Unlike the 32-bit 407 * instruction set versions, val may genuinely have 64 bits of data in it. 408 * The upper bytes of val (above the number specified by 'bytes') must have 409 * been zeroed out by the caller. 410 */ 411 uint64_t HELPER(crc32_64)(uint64_t acc, uint64_t val, uint32_t bytes) 412 { 413 uint8_t buf[8]; 414 415 stq_le_p(buf, val); 416 417 /* zlib crc32 converts the accumulator and output to one's complement. */ 418 return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff; 419 } 420 421 uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) 422 { 423 uint8_t buf[8]; 424 425 stq_le_p(buf, val); 426 427 /* Linux crc32c converts the output to one's complement. */ 428 return crc32c(acc, buf, bytes) ^ 0xffffffff; 429 } 430 431 /* 432 * AdvSIMD half-precision 433 */ 434 435 #define ADVSIMD_HELPER(name, suffix) HELPER(glue(glue(advsimd_, name), suffix)) 436 437 #define ADVSIMD_HALFOP(name) \ 438 uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, float_status *fpst) \ 439 { \ 440 return float16_ ## name(a, b, fpst); \ 441 } 442 443 #define ADVSIMD_TWOHALFOP(name) \ 444 uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, \ 445 float_status *fpst) \ 446 { \ 447 float16 a1, a2, b1, b2; \ 448 uint32_t r1, r2; \ 449 a1 = extract32(two_a, 0, 16); \ 450 a2 = extract32(two_a, 16, 16); \ 451 b1 = extract32(two_b, 0, 16); \ 452 b2 = extract32(two_b, 16, 16); \ 453 r1 = float16_ ## name(a1, b1, fpst); \ 454 r2 = float16_ ## name(a2, b2, fpst); \ 455 return deposit32(r1, 16, 16, r2); \ 456 } 457 458 ADVSIMD_TWOHALFOP(add) 459 ADVSIMD_TWOHALFOP(sub) 460 ADVSIMD_TWOHALFOP(mul) 461 ADVSIMD_TWOHALFOP(div) 462 ADVSIMD_TWOHALFOP(min) 463 ADVSIMD_TWOHALFOP(max) 464 ADVSIMD_TWOHALFOP(minnum) 465 ADVSIMD_TWOHALFOP(maxnum) 466 467 /* Data processing - scalar floating-point and advanced SIMD */ 468 static float16 float16_mulx(float16 a, float16 b, float_status *fpst) 469 { 470 a = float16_squash_input_denormal(a, fpst); 471 b = float16_squash_input_denormal(b, fpst); 472 473 if ((float16_is_zero(a) && float16_is_infinity(b)) || 474 (float16_is_infinity(a) && float16_is_zero(b))) { 475 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ 476 return make_float16((1U << 14) | 477 ((float16_val(a) ^ float16_val(b)) & (1U << 15))); 478 } 479 return float16_mul(a, b, fpst); 480 } 481 482 ADVSIMD_HALFOP(mulx) 483 ADVSIMD_TWOHALFOP(mulx) 484 485 /* fused multiply-accumulate */ 486 uint32_t HELPER(advsimd_muladdh)(uint32_t a, uint32_t b, uint32_t c, 487 float_status *fpst) 488 { 489 return float16_muladd(a, b, c, 0, fpst); 490 } 491 492 uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b, 493 uint32_t two_c, float_status *fpst) 494 { 495 float16 a1, a2, b1, b2, c1, c2; 496 uint32_t r1, r2; 497 a1 = extract32(two_a, 0, 16); 498 a2 = extract32(two_a, 16, 16); 499 b1 = extract32(two_b, 0, 16); 500 b2 = extract32(two_b, 16, 16); 501 c1 = extract32(two_c, 0, 16); 502 c2 = extract32(two_c, 16, 16); 503 r1 = float16_muladd(a1, b1, c1, 0, fpst); 504 r2 = float16_muladd(a2, b2, c2, 0, fpst); 505 return deposit32(r1, 16, 16, r2); 506 } 507 508 /* 509 * Floating point comparisons produce an integer result. Softfloat 510 * routines return float_relation types which we convert to the 0/-1 511 * Neon requires. 512 */ 513 514 #define ADVSIMD_CMPRES(test) (test) ? 0xffff : 0 515 516 uint32_t HELPER(advsimd_ceq_f16)(uint32_t a, uint32_t b, float_status *fpst) 517 { 518 int compare = float16_compare_quiet(a, b, fpst); 519 return ADVSIMD_CMPRES(compare == float_relation_equal); 520 } 521 522 uint32_t HELPER(advsimd_cge_f16)(uint32_t a, uint32_t b, float_status *fpst) 523 { 524 int compare = float16_compare(a, b, fpst); 525 return ADVSIMD_CMPRES(compare == float_relation_greater || 526 compare == float_relation_equal); 527 } 528 529 uint32_t HELPER(advsimd_cgt_f16)(uint32_t a, uint32_t b, float_status *fpst) 530 { 531 int compare = float16_compare(a, b, fpst); 532 return ADVSIMD_CMPRES(compare == float_relation_greater); 533 } 534 535 uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, float_status *fpst) 536 { 537 float16 f0 = float16_abs(a); 538 float16 f1 = float16_abs(b); 539 int compare = float16_compare(f0, f1, fpst); 540 return ADVSIMD_CMPRES(compare == float_relation_greater || 541 compare == float_relation_equal); 542 } 543 544 uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, float_status *fpst) 545 { 546 float16 f0 = float16_abs(a); 547 float16 f1 = float16_abs(b); 548 int compare = float16_compare(f0, f1, fpst); 549 return ADVSIMD_CMPRES(compare == float_relation_greater); 550 } 551 552 /* round to integral */ 553 uint32_t HELPER(advsimd_rinth_exact)(uint32_t x, float_status *fp_status) 554 { 555 return float16_round_to_int(x, fp_status); 556 } 557 558 uint32_t HELPER(advsimd_rinth)(uint32_t x, float_status *fp_status) 559 { 560 int old_flags = get_float_exception_flags(fp_status), new_flags; 561 float16 ret; 562 563 ret = float16_round_to_int(x, fp_status); 564 565 /* Suppress any inexact exceptions the conversion produced */ 566 if (!(old_flags & float_flag_inexact)) { 567 new_flags = get_float_exception_flags(fp_status); 568 set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status); 569 } 570 571 return ret; 572 } 573 574 static int el_from_spsr(uint32_t spsr) 575 { 576 /* Return the exception level that this SPSR is requesting a return to, 577 * or -1 if it is invalid (an illegal return) 578 */ 579 if (spsr & PSTATE_nRW) { 580 switch (spsr & CPSR_M) { 581 case ARM_CPU_MODE_USR: 582 return 0; 583 case ARM_CPU_MODE_HYP: 584 return 2; 585 case ARM_CPU_MODE_FIQ: 586 case ARM_CPU_MODE_IRQ: 587 case ARM_CPU_MODE_SVC: 588 case ARM_CPU_MODE_ABT: 589 case ARM_CPU_MODE_UND: 590 case ARM_CPU_MODE_SYS: 591 return 1; 592 case ARM_CPU_MODE_MON: 593 /* Returning to Mon from AArch64 is never possible, 594 * so this is an illegal return. 595 */ 596 default: 597 return -1; 598 } 599 } else { 600 if (extract32(spsr, 1, 1)) { 601 /* Return with reserved M[1] bit set */ 602 return -1; 603 } 604 if (extract32(spsr, 0, 4) == 1) { 605 /* return to EL0 with M[0] bit set */ 606 return -1; 607 } 608 return extract32(spsr, 2, 2); 609 } 610 } 611 612 static void cpsr_write_from_spsr_elx(CPUARMState *env, 613 uint32_t val) 614 { 615 uint32_t mask; 616 617 /* Save SPSR_ELx.SS into PSTATE. */ 618 env->pstate = (env->pstate & ~PSTATE_SS) | (val & PSTATE_SS); 619 val &= ~PSTATE_SS; 620 621 /* Move DIT to the correct location for CPSR */ 622 if (val & PSTATE_DIT) { 623 val &= ~PSTATE_DIT; 624 val |= CPSR_DIT; 625 } 626 627 mask = aarch32_cpsr_valid_mask(env->features, \ 628 &env_archcpu(env)->isar); 629 cpsr_write(env, val, mask, CPSRWriteRaw); 630 } 631 632 void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) 633 { 634 ARMCPU *cpu = env_archcpu(env); 635 int cur_el = arm_current_el(env); 636 unsigned int spsr_idx = aarch64_banked_spsr_index(cur_el); 637 uint32_t spsr = env->banked_spsr[spsr_idx]; 638 int new_el; 639 bool return_to_aa64 = (spsr & PSTATE_nRW) == 0; 640 641 aarch64_save_sp(env, cur_el); 642 643 arm_clear_exclusive(env); 644 645 /* We must squash the PSTATE.SS bit to zero unless both of the 646 * following hold: 647 * 1. debug exceptions are currently disabled 648 * 2. singlestep will be active in the EL we return to 649 * We check 1 here and 2 after we've done the pstate/cpsr write() to 650 * transition to the EL we're going to. 651 */ 652 if (arm_generate_debug_exceptions(env)) { 653 spsr &= ~PSTATE_SS; 654 } 655 656 /* 657 * FEAT_RME forbids return from EL3 with an invalid security state. 658 * We don't need an explicit check for FEAT_RME here because we enforce 659 * in scr_write() that you can't set the NSE bit without it. 660 */ 661 if (cur_el == 3 && (env->cp15.scr_el3 & (SCR_NS | SCR_NSE)) == SCR_NSE) { 662 goto illegal_return; 663 } 664 665 new_el = el_from_spsr(spsr); 666 if (new_el == -1) { 667 goto illegal_return; 668 } 669 if (new_el > cur_el || (new_el == 2 && !arm_is_el2_enabled(env))) { 670 /* Disallow return to an EL which is unimplemented or higher 671 * than the current one. 672 */ 673 goto illegal_return; 674 } 675 676 if (new_el != 0 && arm_el_is_aa64(env, new_el) != return_to_aa64) { 677 /* Return to an EL which is configured for a different register width */ 678 goto illegal_return; 679 } 680 681 if (!return_to_aa64 && !cpu_isar_feature(aa64_aa32, cpu)) { 682 /* Return to AArch32 when CPU is AArch64-only */ 683 goto illegal_return; 684 } 685 686 if (new_el == 1 && (arm_hcr_el2_eff(env) & HCR_TGE)) { 687 goto illegal_return; 688 } 689 690 bql_lock(); 691 arm_call_pre_el_change_hook(cpu); 692 bql_unlock(); 693 694 if (!return_to_aa64) { 695 env->aarch64 = false; 696 /* We do a raw CPSR write because aarch64_sync_64_to_32() 697 * will sort the register banks out for us, and we've already 698 * caught all the bad-mode cases in el_from_spsr(). 699 */ 700 cpsr_write_from_spsr_elx(env, spsr); 701 if (!arm_singlestep_active(env)) { 702 env->pstate &= ~PSTATE_SS; 703 } 704 aarch64_sync_64_to_32(env); 705 706 if (spsr & CPSR_T) { 707 env->regs[15] = new_pc & ~0x1; 708 } else { 709 env->regs[15] = new_pc & ~0x3; 710 } 711 helper_rebuild_hflags_a32(env, new_el); 712 qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to " 713 "AArch32 EL%d PC 0x%" PRIx32 "\n", 714 cur_el, new_el, env->regs[15]); 715 } else { 716 int tbii; 717 718 env->aarch64 = true; 719 spsr &= aarch64_pstate_valid_mask(&cpu->isar); 720 pstate_write(env, spsr); 721 if (!arm_singlestep_active(env)) { 722 env->pstate &= ~PSTATE_SS; 723 } 724 aarch64_restore_sp(env, new_el); 725 helper_rebuild_hflags_a64(env, new_el); 726 727 /* 728 * Apply TBI to the exception return address. We had to delay this 729 * until after we selected the new EL, so that we could select the 730 * correct TBI+TBID bits. This is made easier by waiting until after 731 * the hflags rebuild, since we can pull the composite TBII field 732 * from there. 733 */ 734 tbii = EX_TBFLAG_A64(env->hflags, TBII); 735 if ((tbii >> extract64(new_pc, 55, 1)) & 1) { 736 /* TBI is enabled. */ 737 int core_mmu_idx = arm_env_mmu_index(env); 738 if (regime_has_2_ranges(core_to_aa64_mmu_idx(core_mmu_idx))) { 739 new_pc = sextract64(new_pc, 0, 56); 740 } else { 741 new_pc = extract64(new_pc, 0, 56); 742 } 743 } 744 env->pc = new_pc; 745 746 qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to " 747 "AArch64 EL%d PC 0x%" PRIx64 "\n", 748 cur_el, new_el, env->pc); 749 } 750 751 /* 752 * Note that cur_el can never be 0. If new_el is 0, then 753 * el0_a64 is return_to_aa64, else el0_a64 is ignored. 754 */ 755 aarch64_sve_change_el(env, cur_el, new_el, return_to_aa64); 756 757 bql_lock(); 758 arm_call_el_change_hook(cpu); 759 bql_unlock(); 760 761 return; 762 763 illegal_return: 764 /* Illegal return events of various kinds have architecturally 765 * mandated behaviour: 766 * restore NZCV and DAIF from SPSR_ELx 767 * set PSTATE.IL 768 * restore PC from ELR_ELx 769 * no change to exception level, execution state or stack pointer 770 */ 771 env->pstate |= PSTATE_IL; 772 env->pc = new_pc; 773 spsr &= PSTATE_NZCV | PSTATE_DAIF | PSTATE_ALLINT; 774 spsr |= pstate_read(env) & ~(PSTATE_NZCV | PSTATE_DAIF | PSTATE_ALLINT); 775 pstate_write(env, spsr); 776 if (!arm_singlestep_active(env)) { 777 env->pstate &= ~PSTATE_SS; 778 } 779 helper_rebuild_hflags_a64(env, cur_el); 780 qemu_log_mask(LOG_GUEST_ERROR, "Illegal exception return at EL%d: " 781 "resuming execution at 0x%" PRIx64 "\n", cur_el, env->pc); 782 } 783 784 void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) 785 { 786 uintptr_t ra = GETPC(); 787 788 /* 789 * Implement DC ZVA, which zeroes a fixed-length block of memory. 790 * Note that we do not implement the (architecturally mandated) 791 * alignment fault for attempts to use this on Device memory 792 * (which matches the usual QEMU behaviour of not implementing either 793 * alignment faults or any memory attribute handling). 794 */ 795 int blocklen = 4 << env_archcpu(env)->dcz_blocksize; 796 uint64_t vaddr = vaddr_in & ~(blocklen - 1); 797 int mmu_idx = arm_env_mmu_index(env); 798 void *mem; 799 800 /* 801 * Trapless lookup. In addition to actual invalid page, may 802 * return NULL for I/O, watchpoints, clean pages, etc. 803 */ 804 mem = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx); 805 806 #ifndef CONFIG_USER_ONLY 807 if (unlikely(!mem)) { 808 /* 809 * Trap if accessing an invalid page. DC_ZVA requires that we supply 810 * the original pointer for an invalid page. But watchpoints require 811 * that we probe the actual space. So do both. 812 */ 813 (void) probe_write(env, vaddr_in, 1, mmu_idx, ra); 814 mem = probe_write(env, vaddr, blocklen, mmu_idx, ra); 815 816 if (unlikely(!mem)) { 817 /* 818 * The only remaining reason for mem == NULL is I/O. 819 * Just do a series of byte writes as the architecture demands. 820 */ 821 for (int i = 0; i < blocklen; i++) { 822 cpu_stb_mmuidx_ra(env, vaddr + i, 0, mmu_idx, ra); 823 } 824 return; 825 } 826 } 827 #endif 828 829 set_helper_retaddr(ra); 830 memset(mem, 0, blocklen); 831 clear_helper_retaddr(); 832 } 833 834 void HELPER(unaligned_access)(CPUARMState *env, uint64_t addr, 835 uint32_t access_type, uint32_t mmu_idx) 836 { 837 arm_cpu_do_unaligned_access(env_cpu(env), addr, access_type, 838 mmu_idx, GETPC()); 839 } 840 841 /* Memory operations (memset, memmove, memcpy) */ 842 843 /* 844 * Return true if the CPY* and SET* insns can execute; compare 845 * pseudocode CheckMOPSEnabled(), though we refactor it a little. 846 */ 847 static bool mops_enabled(CPUARMState *env) 848 { 849 int el = arm_current_el(env); 850 851 if (el < 2 && 852 (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE) && 853 !(arm_hcrx_el2_eff(env) & HCRX_MSCEN)) { 854 return false; 855 } 856 857 if (el == 0) { 858 if (!el_is_in_host(env, 0)) { 859 return env->cp15.sctlr_el[1] & SCTLR_MSCEN; 860 } else { 861 return env->cp15.sctlr_el[2] & SCTLR_MSCEN; 862 } 863 } 864 return true; 865 } 866 867 static void check_mops_enabled(CPUARMState *env, uintptr_t ra) 868 { 869 if (!mops_enabled(env)) { 870 raise_exception_ra(env, EXCP_UDEF, syn_uncategorized(), 871 exception_target_el(env), ra); 872 } 873 } 874 875 /* 876 * Return the target exception level for an exception due 877 * to mismatched arguments in a FEAT_MOPS copy or set. 878 * Compare pseudocode MismatchedCpySetTargetEL() 879 */ 880 static int mops_mismatch_exception_target_el(CPUARMState *env) 881 { 882 int el = arm_current_el(env); 883 884 if (el > 1) { 885 return el; 886 } 887 if (el == 0 && (arm_hcr_el2_eff(env) & HCR_TGE)) { 888 return 2; 889 } 890 if (el == 1 && (arm_hcrx_el2_eff(env) & HCRX_MCE2)) { 891 return 2; 892 } 893 return 1; 894 } 895 896 /* 897 * Check whether an M or E instruction was executed with a CF value 898 * indicating the wrong option for this implementation. 899 * Assumes we are always Option A. 900 */ 901 static void check_mops_wrong_option(CPUARMState *env, uint32_t syndrome, 902 uintptr_t ra) 903 { 904 if (env->CF != 0) { 905 syndrome |= 1 << 17; /* Set the wrong-option bit */ 906 raise_exception_ra(env, EXCP_UDEF, syndrome, 907 mops_mismatch_exception_target_el(env), ra); 908 } 909 } 910 911 /* 912 * Return the maximum number of bytes we can transfer starting at addr 913 * without crossing a page boundary. 914 */ 915 static uint64_t page_limit(uint64_t addr) 916 { 917 return TARGET_PAGE_ALIGN(addr + 1) - addr; 918 } 919 920 /* 921 * Return the number of bytes we can copy starting from addr and working 922 * backwards without crossing a page boundary. 923 */ 924 static uint64_t page_limit_rev(uint64_t addr) 925 { 926 return (addr & ~TARGET_PAGE_MASK) + 1; 927 } 928 929 /* 930 * Perform part of a memory set on an area of guest memory starting at 931 * toaddr (a dirty address) and extending for setsize bytes. 932 * 933 * Returns the number of bytes actually set, which might be less than 934 * setsize; the caller should loop until the whole set has been done. 935 * The caller should ensure that the guest registers are correct 936 * for the possibility that the first byte of the set encounters 937 * an exception or watchpoint. We guarantee not to take any faults 938 * for bytes other than the first. 939 */ 940 static uint64_t set_step(CPUARMState *env, uint64_t toaddr, 941 uint64_t setsize, uint32_t data, int memidx, 942 uint32_t *mtedesc, uintptr_t ra) 943 { 944 void *mem; 945 946 setsize = MIN(setsize, page_limit(toaddr)); 947 if (*mtedesc) { 948 uint64_t mtesize = mte_mops_probe(env, toaddr, setsize, *mtedesc); 949 if (mtesize == 0) { 950 /* Trap, or not. All CPU state is up to date */ 951 mte_check_fail(env, *mtedesc, toaddr, ra); 952 /* Continue, with no further MTE checks required */ 953 *mtedesc = 0; 954 } else { 955 /* Advance to the end, or to the tag mismatch */ 956 setsize = MIN(setsize, mtesize); 957 } 958 } 959 960 toaddr = useronly_clean_ptr(toaddr); 961 /* 962 * Trapless lookup: returns NULL for invalid page, I/O, 963 * watchpoints, clean pages, etc. 964 */ 965 mem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, memidx); 966 967 #ifndef CONFIG_USER_ONLY 968 if (unlikely(!mem)) { 969 /* 970 * Slow-path: just do one byte write. This will handle the 971 * watchpoint, invalid page, etc handling correctly. 972 * For clean code pages, the next iteration will see 973 * the page dirty and will use the fast path. 974 */ 975 cpu_stb_mmuidx_ra(env, toaddr, data, memidx, ra); 976 return 1; 977 } 978 #endif 979 /* Easy case: just memset the host memory */ 980 set_helper_retaddr(ra); 981 memset(mem, data, setsize); 982 clear_helper_retaddr(); 983 return setsize; 984 } 985 986 /* 987 * Similar, but setting tags. The architecture requires us to do this 988 * in 16-byte chunks. SETP accesses are not tag checked; they set 989 * the tags. 990 */ 991 static uint64_t set_step_tags(CPUARMState *env, uint64_t toaddr, 992 uint64_t setsize, uint32_t data, int memidx, 993 uint32_t *mtedesc, uintptr_t ra) 994 { 995 void *mem; 996 uint64_t cleanaddr; 997 998 setsize = MIN(setsize, page_limit(toaddr)); 999 1000 cleanaddr = useronly_clean_ptr(toaddr); 1001 /* 1002 * Trapless lookup: returns NULL for invalid page, I/O, 1003 * watchpoints, clean pages, etc. 1004 */ 1005 mem = tlb_vaddr_to_host(env, cleanaddr, MMU_DATA_STORE, memidx); 1006 1007 #ifndef CONFIG_USER_ONLY 1008 if (unlikely(!mem)) { 1009 /* 1010 * Slow-path: just do one write. This will handle the 1011 * watchpoint, invalid page, etc handling correctly. 1012 * The architecture requires that we do 16 bytes at a time, 1013 * and we know both ptr and size are 16 byte aligned. 1014 * For clean code pages, the next iteration will see 1015 * the page dirty and will use the fast path. 1016 */ 1017 uint64_t repldata = data * 0x0101010101010101ULL; 1018 MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, memidx); 1019 cpu_st16_mmu(env, toaddr, int128_make128(repldata, repldata), oi16, ra); 1020 mte_mops_set_tags(env, toaddr, 16, *mtedesc); 1021 return 16; 1022 } 1023 #endif 1024 /* Easy case: just memset the host memory */ 1025 set_helper_retaddr(ra); 1026 memset(mem, data, setsize); 1027 clear_helper_retaddr(); 1028 mte_mops_set_tags(env, toaddr, setsize, *mtedesc); 1029 return setsize; 1030 } 1031 1032 typedef uint64_t StepFn(CPUARMState *env, uint64_t toaddr, 1033 uint64_t setsize, uint32_t data, 1034 int memidx, uint32_t *mtedesc, uintptr_t ra); 1035 1036 /* Extract register numbers from a MOPS exception syndrome value */ 1037 static int mops_destreg(uint32_t syndrome) 1038 { 1039 return extract32(syndrome, 10, 5); 1040 } 1041 1042 static int mops_srcreg(uint32_t syndrome) 1043 { 1044 return extract32(syndrome, 5, 5); 1045 } 1046 1047 static int mops_sizereg(uint32_t syndrome) 1048 { 1049 return extract32(syndrome, 0, 5); 1050 } 1051 1052 /* 1053 * Return true if TCMA and TBI bits mean we need to do MTE checks. 1054 * We only need to do this once per MOPS insn, not for every page. 1055 */ 1056 static bool mte_checks_needed(uint64_t ptr, uint32_t desc) 1057 { 1058 int bit55 = extract64(ptr, 55, 1); 1059 1060 /* 1061 * Note that tbi_check() returns true for "access checked" but 1062 * tcma_check() returns true for "access unchecked". 1063 */ 1064 if (!tbi_check(desc, bit55)) { 1065 return false; 1066 } 1067 return !tcma_check(desc, bit55, allocation_tag_from_addr(ptr)); 1068 } 1069 1070 /* Take an exception if the SETG addr/size are not granule aligned */ 1071 static void check_setg_alignment(CPUARMState *env, uint64_t ptr, uint64_t size, 1072 uint32_t memidx, uintptr_t ra) 1073 { 1074 if ((size != 0 && !QEMU_IS_ALIGNED(ptr, TAG_GRANULE)) || 1075 !QEMU_IS_ALIGNED(size, TAG_GRANULE)) { 1076 arm_cpu_do_unaligned_access(env_cpu(env), ptr, MMU_DATA_STORE, 1077 memidx, ra); 1078 1079 } 1080 } 1081 1082 static uint64_t arm_reg_or_xzr(CPUARMState *env, int reg) 1083 { 1084 /* 1085 * Runtime equivalent of cpu_reg() -- return the CPU register value, 1086 * for contexts when index 31 means XZR (not SP). 1087 */ 1088 return reg == 31 ? 0 : env->xregs[reg]; 1089 } 1090 1091 /* 1092 * For the Memory Set operation, our implementation chooses 1093 * always to use "option A", where we update Xd to the final 1094 * address in the SETP insn, and set Xn to be -(bytes remaining). 1095 * On SETM and SETE insns we only need update Xn. 1096 * 1097 * @env: CPU 1098 * @syndrome: syndrome value for mismatch exceptions 1099 * (also contains the register numbers we need to use) 1100 * @mtedesc: MTE descriptor word 1101 * @stepfn: function which does a single part of the set operation 1102 * @is_setg: true if this is the tag-setting SETG variant 1103 */ 1104 static void do_setp(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, 1105 StepFn *stepfn, bool is_setg, uintptr_t ra) 1106 { 1107 /* Prologue: we choose to do up to the next page boundary */ 1108 int rd = mops_destreg(syndrome); 1109 int rs = mops_srcreg(syndrome); 1110 int rn = mops_sizereg(syndrome); 1111 uint8_t data = arm_reg_or_xzr(env, rs); 1112 uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); 1113 uint64_t toaddr = env->xregs[rd]; 1114 uint64_t setsize = env->xregs[rn]; 1115 uint64_t stagesetsize, step; 1116 1117 check_mops_enabled(env, ra); 1118 1119 if (setsize > INT64_MAX) { 1120 setsize = INT64_MAX; 1121 if (is_setg) { 1122 setsize &= ~0xf; 1123 } 1124 } 1125 1126 if (unlikely(is_setg)) { 1127 check_setg_alignment(env, toaddr, setsize, memidx, ra); 1128 } else if (!mte_checks_needed(toaddr, mtedesc)) { 1129 mtedesc = 0; 1130 } 1131 1132 stagesetsize = MIN(setsize, page_limit(toaddr)); 1133 while (stagesetsize) { 1134 env->xregs[rd] = toaddr; 1135 env->xregs[rn] = setsize; 1136 step = stepfn(env, toaddr, stagesetsize, data, memidx, &mtedesc, ra); 1137 toaddr += step; 1138 setsize -= step; 1139 stagesetsize -= step; 1140 } 1141 /* Insn completed, so update registers to the Option A format */ 1142 env->xregs[rd] = toaddr + setsize; 1143 env->xregs[rn] = -setsize; 1144 1145 /* Set NZCV = 0000 to indicate we are an Option A implementation */ 1146 env->NF = 0; 1147 env->ZF = 1; /* our env->ZF encoding is inverted */ 1148 env->CF = 0; 1149 env->VF = 0; 1150 return; 1151 } 1152 1153 void HELPER(setp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1154 { 1155 do_setp(env, syndrome, mtedesc, set_step, false, GETPC()); 1156 } 1157 1158 void HELPER(setgp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1159 { 1160 do_setp(env, syndrome, mtedesc, set_step_tags, true, GETPC()); 1161 } 1162 1163 static void do_setm(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, 1164 StepFn *stepfn, bool is_setg, uintptr_t ra) 1165 { 1166 /* Main: we choose to do all the full-page chunks */ 1167 CPUState *cs = env_cpu(env); 1168 int rd = mops_destreg(syndrome); 1169 int rs = mops_srcreg(syndrome); 1170 int rn = mops_sizereg(syndrome); 1171 uint8_t data = arm_reg_or_xzr(env, rs); 1172 uint64_t toaddr = env->xregs[rd] + env->xregs[rn]; 1173 uint64_t setsize = -env->xregs[rn]; 1174 uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); 1175 uint64_t step, stagesetsize; 1176 1177 check_mops_enabled(env, ra); 1178 1179 /* 1180 * We're allowed to NOP out "no data to copy" before the consistency 1181 * checks; we choose to do so. 1182 */ 1183 if (env->xregs[rn] == 0) { 1184 return; 1185 } 1186 1187 check_mops_wrong_option(env, syndrome, ra); 1188 1189 /* 1190 * Our implementation will work fine even if we have an unaligned 1191 * destination address, and because we update Xn every time around 1192 * the loop below and the return value from stepfn() may be less 1193 * than requested, we might find toaddr is unaligned. So we don't 1194 * have an IMPDEF check for alignment here. 1195 */ 1196 1197 if (unlikely(is_setg)) { 1198 check_setg_alignment(env, toaddr, setsize, memidx, ra); 1199 } else if (!mte_checks_needed(toaddr, mtedesc)) { 1200 mtedesc = 0; 1201 } 1202 1203 /* Do the actual memset: we leave the last partial page to SETE */ 1204 stagesetsize = setsize & TARGET_PAGE_MASK; 1205 while (stagesetsize > 0) { 1206 step = stepfn(env, toaddr, stagesetsize, data, memidx, &mtedesc, ra); 1207 toaddr += step; 1208 setsize -= step; 1209 stagesetsize -= step; 1210 env->xregs[rn] = -setsize; 1211 if (stagesetsize > 0 && unlikely(cpu_loop_exit_requested(cs))) { 1212 cpu_loop_exit_restore(cs, ra); 1213 } 1214 } 1215 } 1216 1217 void HELPER(setm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1218 { 1219 do_setm(env, syndrome, mtedesc, set_step, false, GETPC()); 1220 } 1221 1222 void HELPER(setgm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1223 { 1224 do_setm(env, syndrome, mtedesc, set_step_tags, true, GETPC()); 1225 } 1226 1227 static void do_sete(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, 1228 StepFn *stepfn, bool is_setg, uintptr_t ra) 1229 { 1230 /* Epilogue: do the last partial page */ 1231 int rd = mops_destreg(syndrome); 1232 int rs = mops_srcreg(syndrome); 1233 int rn = mops_sizereg(syndrome); 1234 uint8_t data = arm_reg_or_xzr(env, rs); 1235 uint64_t toaddr = env->xregs[rd] + env->xregs[rn]; 1236 uint64_t setsize = -env->xregs[rn]; 1237 uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); 1238 uint64_t step; 1239 1240 check_mops_enabled(env, ra); 1241 1242 /* 1243 * We're allowed to NOP out "no data to copy" before the consistency 1244 * checks; we choose to do so. 1245 */ 1246 if (setsize == 0) { 1247 return; 1248 } 1249 1250 check_mops_wrong_option(env, syndrome, ra); 1251 1252 /* 1253 * Our implementation has no address alignment requirements, but 1254 * we do want to enforce the "less than a page" size requirement, 1255 * so we don't need to have the "check for interrupts" here. 1256 */ 1257 if (setsize >= TARGET_PAGE_SIZE) { 1258 raise_exception_ra(env, EXCP_UDEF, syndrome, 1259 mops_mismatch_exception_target_el(env), ra); 1260 } 1261 1262 if (unlikely(is_setg)) { 1263 check_setg_alignment(env, toaddr, setsize, memidx, ra); 1264 } else if (!mte_checks_needed(toaddr, mtedesc)) { 1265 mtedesc = 0; 1266 } 1267 1268 /* Do the actual memset */ 1269 while (setsize > 0) { 1270 step = stepfn(env, toaddr, setsize, data, memidx, &mtedesc, ra); 1271 toaddr += step; 1272 setsize -= step; 1273 env->xregs[rn] = -setsize; 1274 } 1275 } 1276 1277 void HELPER(sete)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1278 { 1279 do_sete(env, syndrome, mtedesc, set_step, false, GETPC()); 1280 } 1281 1282 void HELPER(setge)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1283 { 1284 do_sete(env, syndrome, mtedesc, set_step_tags, true, GETPC()); 1285 } 1286 1287 /* 1288 * Perform part of a memory copy from the guest memory at fromaddr 1289 * and extending for copysize bytes, to the guest memory at 1290 * toaddr. Both addresses are dirty. 1291 * 1292 * Returns the number of bytes actually set, which might be less than 1293 * copysize; the caller should loop until the whole copy has been done. 1294 * The caller should ensure that the guest registers are correct 1295 * for the possibility that the first byte of the copy encounters 1296 * an exception or watchpoint. We guarantee not to take any faults 1297 * for bytes other than the first. 1298 */ 1299 static uint64_t copy_step(CPUARMState *env, uint64_t toaddr, uint64_t fromaddr, 1300 uint64_t copysize, int wmemidx, int rmemidx, 1301 uint32_t *wdesc, uint32_t *rdesc, uintptr_t ra) 1302 { 1303 void *rmem; 1304 void *wmem; 1305 1306 /* Don't cross a page boundary on either source or destination */ 1307 copysize = MIN(copysize, page_limit(toaddr)); 1308 copysize = MIN(copysize, page_limit(fromaddr)); 1309 /* 1310 * Handle MTE tag checks: either handle the tag mismatch for byte 0, 1311 * or else copy up to but not including the byte with the mismatch. 1312 */ 1313 if (*rdesc) { 1314 uint64_t mtesize = mte_mops_probe(env, fromaddr, copysize, *rdesc); 1315 if (mtesize == 0) { 1316 mte_check_fail(env, *rdesc, fromaddr, ra); 1317 *rdesc = 0; 1318 } else { 1319 copysize = MIN(copysize, mtesize); 1320 } 1321 } 1322 if (*wdesc) { 1323 uint64_t mtesize = mte_mops_probe(env, toaddr, copysize, *wdesc); 1324 if (mtesize == 0) { 1325 mte_check_fail(env, *wdesc, toaddr, ra); 1326 *wdesc = 0; 1327 } else { 1328 copysize = MIN(copysize, mtesize); 1329 } 1330 } 1331 1332 toaddr = useronly_clean_ptr(toaddr); 1333 fromaddr = useronly_clean_ptr(fromaddr); 1334 /* Trapless lookup of whether we can get a host memory pointer */ 1335 wmem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, wmemidx); 1336 rmem = tlb_vaddr_to_host(env, fromaddr, MMU_DATA_LOAD, rmemidx); 1337 1338 #ifndef CONFIG_USER_ONLY 1339 /* 1340 * If we don't have host memory for both source and dest then just 1341 * do a single byte copy. This will handle watchpoints, invalid pages, 1342 * etc correctly. For clean code pages, the next iteration will see 1343 * the page dirty and will use the fast path. 1344 */ 1345 if (unlikely(!rmem || !wmem)) { 1346 uint8_t byte; 1347 if (rmem) { 1348 byte = *(uint8_t *)rmem; 1349 } else { 1350 byte = cpu_ldub_mmuidx_ra(env, fromaddr, rmemidx, ra); 1351 } 1352 if (wmem) { 1353 *(uint8_t *)wmem = byte; 1354 } else { 1355 cpu_stb_mmuidx_ra(env, toaddr, byte, wmemidx, ra); 1356 } 1357 return 1; 1358 } 1359 #endif 1360 /* Easy case: just memmove the host memory */ 1361 set_helper_retaddr(ra); 1362 memmove(wmem, rmem, copysize); 1363 clear_helper_retaddr(); 1364 return copysize; 1365 } 1366 1367 /* 1368 * Do part of a backwards memory copy. Here toaddr and fromaddr point 1369 * to the *last* byte to be copied. 1370 */ 1371 static uint64_t copy_step_rev(CPUARMState *env, uint64_t toaddr, 1372 uint64_t fromaddr, 1373 uint64_t copysize, int wmemidx, int rmemidx, 1374 uint32_t *wdesc, uint32_t *rdesc, uintptr_t ra) 1375 { 1376 void *rmem; 1377 void *wmem; 1378 1379 /* Don't cross a page boundary on either source or destination */ 1380 copysize = MIN(copysize, page_limit_rev(toaddr)); 1381 copysize = MIN(copysize, page_limit_rev(fromaddr)); 1382 1383 /* 1384 * Handle MTE tag checks: either handle the tag mismatch for byte 0, 1385 * or else copy up to but not including the byte with the mismatch. 1386 */ 1387 if (*rdesc) { 1388 uint64_t mtesize = mte_mops_probe_rev(env, fromaddr, copysize, *rdesc); 1389 if (mtesize == 0) { 1390 mte_check_fail(env, *rdesc, fromaddr, ra); 1391 *rdesc = 0; 1392 } else { 1393 copysize = MIN(copysize, mtesize); 1394 } 1395 } 1396 if (*wdesc) { 1397 uint64_t mtesize = mte_mops_probe_rev(env, toaddr, copysize, *wdesc); 1398 if (mtesize == 0) { 1399 mte_check_fail(env, *wdesc, toaddr, ra); 1400 *wdesc = 0; 1401 } else { 1402 copysize = MIN(copysize, mtesize); 1403 } 1404 } 1405 1406 toaddr = useronly_clean_ptr(toaddr); 1407 fromaddr = useronly_clean_ptr(fromaddr); 1408 /* Trapless lookup of whether we can get a host memory pointer */ 1409 wmem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, wmemidx); 1410 rmem = tlb_vaddr_to_host(env, fromaddr, MMU_DATA_LOAD, rmemidx); 1411 1412 #ifndef CONFIG_USER_ONLY 1413 /* 1414 * If we don't have host memory for both source and dest then just 1415 * do a single byte copy. This will handle watchpoints, invalid pages, 1416 * etc correctly. For clean code pages, the next iteration will see 1417 * the page dirty and will use the fast path. 1418 */ 1419 if (unlikely(!rmem || !wmem)) { 1420 uint8_t byte; 1421 if (rmem) { 1422 byte = *(uint8_t *)rmem; 1423 } else { 1424 byte = cpu_ldub_mmuidx_ra(env, fromaddr, rmemidx, ra); 1425 } 1426 if (wmem) { 1427 *(uint8_t *)wmem = byte; 1428 } else { 1429 cpu_stb_mmuidx_ra(env, toaddr, byte, wmemidx, ra); 1430 } 1431 return 1; 1432 } 1433 #endif 1434 /* 1435 * Easy case: just memmove the host memory. Note that wmem and 1436 * rmem here point to the *last* byte to copy. 1437 */ 1438 set_helper_retaddr(ra); 1439 memmove(wmem - (copysize - 1), rmem - (copysize - 1), copysize); 1440 clear_helper_retaddr(); 1441 return copysize; 1442 } 1443 1444 /* 1445 * for the Memory Copy operation, our implementation chooses always 1446 * to use "option A", where we update Xd and Xs to the final addresses 1447 * in the CPYP insn, and then in CPYM and CPYE only need to update Xn. 1448 * 1449 * @env: CPU 1450 * @syndrome: syndrome value for mismatch exceptions 1451 * (also contains the register numbers we need to use) 1452 * @wdesc: MTE descriptor for the writes (destination) 1453 * @rdesc: MTE descriptor for the reads (source) 1454 * @move: true if this is CPY (memmove), false for CPYF (memcpy forwards) 1455 */ 1456 static void do_cpyp(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1457 uint32_t rdesc, uint32_t move, uintptr_t ra) 1458 { 1459 int rd = mops_destreg(syndrome); 1460 int rs = mops_srcreg(syndrome); 1461 int rn = mops_sizereg(syndrome); 1462 uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); 1463 uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); 1464 bool forwards = true; 1465 uint64_t toaddr = env->xregs[rd]; 1466 uint64_t fromaddr = env->xregs[rs]; 1467 uint64_t copysize = env->xregs[rn]; 1468 uint64_t stagecopysize, step; 1469 1470 check_mops_enabled(env, ra); 1471 1472 1473 if (move) { 1474 /* 1475 * Copy backwards if necessary. The direction for a non-overlapping 1476 * copy is IMPDEF; we choose forwards. 1477 */ 1478 if (copysize > 0x007FFFFFFFFFFFFFULL) { 1479 copysize = 0x007FFFFFFFFFFFFFULL; 1480 } 1481 uint64_t fs = extract64(fromaddr, 0, 56); 1482 uint64_t ts = extract64(toaddr, 0, 56); 1483 uint64_t fe = extract64(fromaddr + copysize, 0, 56); 1484 1485 if (fs < ts && fe > ts) { 1486 forwards = false; 1487 } 1488 } else { 1489 if (copysize > INT64_MAX) { 1490 copysize = INT64_MAX; 1491 } 1492 } 1493 1494 if (!mte_checks_needed(fromaddr, rdesc)) { 1495 rdesc = 0; 1496 } 1497 if (!mte_checks_needed(toaddr, wdesc)) { 1498 wdesc = 0; 1499 } 1500 1501 if (forwards) { 1502 stagecopysize = MIN(copysize, page_limit(toaddr)); 1503 stagecopysize = MIN(stagecopysize, page_limit(fromaddr)); 1504 while (stagecopysize) { 1505 env->xregs[rd] = toaddr; 1506 env->xregs[rs] = fromaddr; 1507 env->xregs[rn] = copysize; 1508 step = copy_step(env, toaddr, fromaddr, stagecopysize, 1509 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1510 toaddr += step; 1511 fromaddr += step; 1512 copysize -= step; 1513 stagecopysize -= step; 1514 } 1515 /* Insn completed, so update registers to the Option A format */ 1516 env->xregs[rd] = toaddr + copysize; 1517 env->xregs[rs] = fromaddr + copysize; 1518 env->xregs[rn] = -copysize; 1519 } else { 1520 /* 1521 * In a reverse copy the to and from addrs in Xs and Xd are the start 1522 * of the range, but it's more convenient for us to work with pointers 1523 * to the last byte being copied. 1524 */ 1525 toaddr += copysize - 1; 1526 fromaddr += copysize - 1; 1527 stagecopysize = MIN(copysize, page_limit_rev(toaddr)); 1528 stagecopysize = MIN(stagecopysize, page_limit_rev(fromaddr)); 1529 while (stagecopysize) { 1530 env->xregs[rn] = copysize; 1531 step = copy_step_rev(env, toaddr, fromaddr, stagecopysize, 1532 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1533 copysize -= step; 1534 stagecopysize -= step; 1535 toaddr -= step; 1536 fromaddr -= step; 1537 } 1538 /* 1539 * Insn completed, so update registers to the Option A format. 1540 * For a reverse copy this is no different to the CPYP input format. 1541 */ 1542 env->xregs[rn] = copysize; 1543 } 1544 1545 /* Set NZCV = 0000 to indicate we are an Option A implementation */ 1546 env->NF = 0; 1547 env->ZF = 1; /* our env->ZF encoding is inverted */ 1548 env->CF = 0; 1549 env->VF = 0; 1550 return; 1551 } 1552 1553 void HELPER(cpyp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1554 uint32_t rdesc) 1555 { 1556 do_cpyp(env, syndrome, wdesc, rdesc, true, GETPC()); 1557 } 1558 1559 void HELPER(cpyfp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1560 uint32_t rdesc) 1561 { 1562 do_cpyp(env, syndrome, wdesc, rdesc, false, GETPC()); 1563 } 1564 1565 static void do_cpym(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1566 uint32_t rdesc, uint32_t move, uintptr_t ra) 1567 { 1568 /* Main: we choose to copy until less than a page remaining */ 1569 CPUState *cs = env_cpu(env); 1570 int rd = mops_destreg(syndrome); 1571 int rs = mops_srcreg(syndrome); 1572 int rn = mops_sizereg(syndrome); 1573 uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); 1574 uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); 1575 bool forwards = true; 1576 uint64_t toaddr, fromaddr, copysize, step; 1577 1578 check_mops_enabled(env, ra); 1579 1580 /* We choose to NOP out "no data to copy" before consistency checks */ 1581 if (env->xregs[rn] == 0) { 1582 return; 1583 } 1584 1585 check_mops_wrong_option(env, syndrome, ra); 1586 1587 if (move) { 1588 forwards = (int64_t)env->xregs[rn] < 0; 1589 } 1590 1591 if (forwards) { 1592 toaddr = env->xregs[rd] + env->xregs[rn]; 1593 fromaddr = env->xregs[rs] + env->xregs[rn]; 1594 copysize = -env->xregs[rn]; 1595 } else { 1596 copysize = env->xregs[rn]; 1597 /* This toaddr and fromaddr point to the *last* byte to copy */ 1598 toaddr = env->xregs[rd] + copysize - 1; 1599 fromaddr = env->xregs[rs] + copysize - 1; 1600 } 1601 1602 if (!mte_checks_needed(fromaddr, rdesc)) { 1603 rdesc = 0; 1604 } 1605 if (!mte_checks_needed(toaddr, wdesc)) { 1606 wdesc = 0; 1607 } 1608 1609 /* Our implementation has no particular parameter requirements for CPYM */ 1610 1611 /* Do the actual memmove */ 1612 if (forwards) { 1613 while (copysize >= TARGET_PAGE_SIZE) { 1614 step = copy_step(env, toaddr, fromaddr, copysize, 1615 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1616 toaddr += step; 1617 fromaddr += step; 1618 copysize -= step; 1619 env->xregs[rn] = -copysize; 1620 if (copysize >= TARGET_PAGE_SIZE && 1621 unlikely(cpu_loop_exit_requested(cs))) { 1622 cpu_loop_exit_restore(cs, ra); 1623 } 1624 } 1625 } else { 1626 while (copysize >= TARGET_PAGE_SIZE) { 1627 step = copy_step_rev(env, toaddr, fromaddr, copysize, 1628 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1629 toaddr -= step; 1630 fromaddr -= step; 1631 copysize -= step; 1632 env->xregs[rn] = copysize; 1633 if (copysize >= TARGET_PAGE_SIZE && 1634 unlikely(cpu_loop_exit_requested(cs))) { 1635 cpu_loop_exit_restore(cs, ra); 1636 } 1637 } 1638 } 1639 } 1640 1641 void HELPER(cpym)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1642 uint32_t rdesc) 1643 { 1644 do_cpym(env, syndrome, wdesc, rdesc, true, GETPC()); 1645 } 1646 1647 void HELPER(cpyfm)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1648 uint32_t rdesc) 1649 { 1650 do_cpym(env, syndrome, wdesc, rdesc, false, GETPC()); 1651 } 1652 1653 static void do_cpye(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1654 uint32_t rdesc, uint32_t move, uintptr_t ra) 1655 { 1656 /* Epilogue: do the last partial page */ 1657 int rd = mops_destreg(syndrome); 1658 int rs = mops_srcreg(syndrome); 1659 int rn = mops_sizereg(syndrome); 1660 uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); 1661 uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); 1662 bool forwards = true; 1663 uint64_t toaddr, fromaddr, copysize, step; 1664 1665 check_mops_enabled(env, ra); 1666 1667 /* We choose to NOP out "no data to copy" before consistency checks */ 1668 if (env->xregs[rn] == 0) { 1669 return; 1670 } 1671 1672 check_mops_wrong_option(env, syndrome, ra); 1673 1674 if (move) { 1675 forwards = (int64_t)env->xregs[rn] < 0; 1676 } 1677 1678 if (forwards) { 1679 toaddr = env->xregs[rd] + env->xregs[rn]; 1680 fromaddr = env->xregs[rs] + env->xregs[rn]; 1681 copysize = -env->xregs[rn]; 1682 } else { 1683 copysize = env->xregs[rn]; 1684 /* This toaddr and fromaddr point to the *last* byte to copy */ 1685 toaddr = env->xregs[rd] + copysize - 1; 1686 fromaddr = env->xregs[rs] + copysize - 1; 1687 } 1688 1689 if (!mte_checks_needed(fromaddr, rdesc)) { 1690 rdesc = 0; 1691 } 1692 if (!mte_checks_needed(toaddr, wdesc)) { 1693 wdesc = 0; 1694 } 1695 1696 /* Check the size; we don't want to have do a check-for-interrupts */ 1697 if (copysize >= TARGET_PAGE_SIZE) { 1698 raise_exception_ra(env, EXCP_UDEF, syndrome, 1699 mops_mismatch_exception_target_el(env), ra); 1700 } 1701 1702 /* Do the actual memmove */ 1703 if (forwards) { 1704 while (copysize > 0) { 1705 step = copy_step(env, toaddr, fromaddr, copysize, 1706 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1707 toaddr += step; 1708 fromaddr += step; 1709 copysize -= step; 1710 env->xregs[rn] = -copysize; 1711 } 1712 } else { 1713 while (copysize > 0) { 1714 step = copy_step_rev(env, toaddr, fromaddr, copysize, 1715 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1716 toaddr -= step; 1717 fromaddr -= step; 1718 copysize -= step; 1719 env->xregs[rn] = copysize; 1720 } 1721 } 1722 } 1723 1724 void HELPER(cpye)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1725 uint32_t rdesc) 1726 { 1727 do_cpye(env, syndrome, wdesc, rdesc, true, GETPC()); 1728 } 1729 1730 void HELPER(cpyfe)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1731 uint32_t rdesc) 1732 { 1733 do_cpye(env, syndrome, wdesc, rdesc, false, GETPC()); 1734 } 1735 1736 static bool is_guarded_page(CPUARMState *env, target_ulong addr, uintptr_t ra) 1737 { 1738 #ifdef CONFIG_USER_ONLY 1739 return page_get_flags(addr) & PAGE_BTI; 1740 #else 1741 CPUTLBEntryFull *full; 1742 void *host; 1743 int mmu_idx = cpu_mmu_index(env_cpu(env), true); 1744 int flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx, 1745 false, &host, &full, ra); 1746 1747 assert(!(flags & TLB_INVALID_MASK)); 1748 return full->extra.arm.guarded; 1749 #endif 1750 } 1751 1752 void HELPER(guarded_page_check)(CPUARMState *env) 1753 { 1754 /* 1755 * We have already verified that bti is enabled, and that the 1756 * instruction at PC is not ok for BTYPE. This is always at 1757 * the beginning of a block, so PC is always up-to-date and 1758 * no unwind is required. 1759 */ 1760 if (is_guarded_page(env, env->pc, 0)) { 1761 raise_exception(env, EXCP_UDEF, syn_btitrap(env->btype), 1762 exception_target_el(env)); 1763 } 1764 } 1765 1766 void HELPER(guarded_page_br)(CPUARMState *env, target_ulong pc) 1767 { 1768 /* 1769 * We have already checked for branch via x16 and x17. 1770 * What remains for choosing BTYPE is checking for a guarded page. 1771 */ 1772 env->btype = is_guarded_page(env, pc, GETPC()) ? 3 : 1; 1773 } 1774