1 /* 2 * AArch64 specific helpers 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/units.h" 22 #include "cpu.h" 23 #include "gdbstub/helpers.h" 24 #include "exec/helper-proto.h" 25 #include "qemu/host-utils.h" 26 #include "qemu/log.h" 27 #include "qemu/main-loop.h" 28 #include "qemu/bitops.h" 29 #include "internals.h" 30 #include "qemu/crc32c.h" 31 #include "exec/cpu-common.h" 32 #include "exec/exec-all.h" 33 #include "exec/cpu_ldst.h" 34 #include "qemu/int128.h" 35 #include "qemu/atomic128.h" 36 #include "fpu/softfloat.h" 37 #include <zlib.h> /* for crc32 */ 38 #ifdef CONFIG_USER_ONLY 39 #include "user/page-protection.h" 40 #endif 41 42 /* C2.4.7 Multiply and divide */ 43 /* special cases for 0 and LLONG_MIN are mandated by the standard */ 44 uint64_t HELPER(udiv64)(uint64_t num, uint64_t den) 45 { 46 if (den == 0) { 47 return 0; 48 } 49 return num / den; 50 } 51 52 int64_t HELPER(sdiv64)(int64_t num, int64_t den) 53 { 54 if (den == 0) { 55 return 0; 56 } 57 if (num == LLONG_MIN && den == -1) { 58 return LLONG_MIN; 59 } 60 return num / den; 61 } 62 63 uint64_t HELPER(rbit64)(uint64_t x) 64 { 65 return revbit64(x); 66 } 67 68 void HELPER(msr_i_spsel)(CPUARMState *env, uint32_t imm) 69 { 70 update_spsel(env, imm); 71 } 72 73 void HELPER(msr_set_allint_el1)(CPUARMState *env) 74 { 75 /* ALLINT update to PSTATE. */ 76 if (arm_hcrx_el2_eff(env) & HCRX_TALLINT) { 77 raise_exception_ra(env, EXCP_UDEF, 78 syn_aa64_sysregtrap(0, 1, 0, 4, 1, 0x1f, 0), 2, 79 GETPC()); 80 } 81 82 env->pstate |= PSTATE_ALLINT; 83 } 84 85 static void daif_check(CPUARMState *env, uint32_t op, 86 uint32_t imm, uintptr_t ra) 87 { 88 /* DAIF update to PSTATE. This is OK from EL0 only if UMA is set. */ 89 if (arm_current_el(env) == 0 && !(arm_sctlr(env, 0) & SCTLR_UMA)) { 90 raise_exception_ra(env, EXCP_UDEF, 91 syn_aa64_sysregtrap(0, extract32(op, 0, 3), 92 extract32(op, 3, 3), 4, 93 imm, 0x1f, 0), 94 exception_target_el(env), ra); 95 } 96 } 97 98 void HELPER(msr_i_daifset)(CPUARMState *env, uint32_t imm) 99 { 100 daif_check(env, 0x1e, imm, GETPC()); 101 env->daif |= (imm << 6) & PSTATE_DAIF; 102 arm_rebuild_hflags(env); 103 } 104 105 void HELPER(msr_i_daifclear)(CPUARMState *env, uint32_t imm) 106 { 107 daif_check(env, 0x1f, imm, GETPC()); 108 env->daif &= ~((imm << 6) & PSTATE_DAIF); 109 arm_rebuild_hflags(env); 110 } 111 112 /* Convert a softfloat float_relation_ (as returned by 113 * the float*_compare functions) to the correct ARM 114 * NZCV flag state. 115 */ 116 static inline uint32_t float_rel_to_flags(int res) 117 { 118 uint64_t flags; 119 switch (res) { 120 case float_relation_equal: 121 flags = PSTATE_Z | PSTATE_C; 122 break; 123 case float_relation_less: 124 flags = PSTATE_N; 125 break; 126 case float_relation_greater: 127 flags = PSTATE_C; 128 break; 129 case float_relation_unordered: 130 default: 131 flags = PSTATE_C | PSTATE_V; 132 break; 133 } 134 return flags; 135 } 136 137 uint64_t HELPER(vfp_cmph_a64)(uint32_t x, uint32_t y, float_status *fp_status) 138 { 139 return float_rel_to_flags(float16_compare_quiet(x, y, fp_status)); 140 } 141 142 uint64_t HELPER(vfp_cmpeh_a64)(uint32_t x, uint32_t y, float_status *fp_status) 143 { 144 return float_rel_to_flags(float16_compare(x, y, fp_status)); 145 } 146 147 uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, float_status *fp_status) 148 { 149 return float_rel_to_flags(float32_compare_quiet(x, y, fp_status)); 150 } 151 152 uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, float_status *fp_status) 153 { 154 return float_rel_to_flags(float32_compare(x, y, fp_status)); 155 } 156 157 uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, float_status *fp_status) 158 { 159 return float_rel_to_flags(float64_compare_quiet(x, y, fp_status)); 160 } 161 162 uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, float_status *fp_status) 163 { 164 return float_rel_to_flags(float64_compare(x, y, fp_status)); 165 } 166 167 float32 HELPER(vfp_mulxs)(float32 a, float32 b, float_status *fpst) 168 { 169 a = float32_squash_input_denormal(a, fpst); 170 b = float32_squash_input_denormal(b, fpst); 171 172 if ((float32_is_zero(a) && float32_is_infinity(b)) || 173 (float32_is_infinity(a) && float32_is_zero(b))) { 174 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ 175 return make_float32((1U << 30) | 176 ((float32_val(a) ^ float32_val(b)) & (1U << 31))); 177 } 178 return float32_mul(a, b, fpst); 179 } 180 181 float64 HELPER(vfp_mulxd)(float64 a, float64 b, float_status *fpst) 182 { 183 a = float64_squash_input_denormal(a, fpst); 184 b = float64_squash_input_denormal(b, fpst); 185 186 if ((float64_is_zero(a) && float64_is_infinity(b)) || 187 (float64_is_infinity(a) && float64_is_zero(b))) { 188 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ 189 return make_float64((1ULL << 62) | 190 ((float64_val(a) ^ float64_val(b)) & (1ULL << 63))); 191 } 192 return float64_mul(a, b, fpst); 193 } 194 195 /* 64bit/double versions of the neon float compare functions */ 196 uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, float_status *fpst) 197 { 198 return -float64_eq_quiet(a, b, fpst); 199 } 200 201 uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, float_status *fpst) 202 { 203 return -float64_le(b, a, fpst); 204 } 205 206 uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, float_status *fpst) 207 { 208 return -float64_lt(b, a, fpst); 209 } 210 211 /* Reciprocal step and sqrt step. Note that unlike the A32/T32 212 * versions, these do a fully fused multiply-add or 213 * multiply-add-and-halve. 214 */ 215 216 uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, float_status *fpst) 217 { 218 a = float16_squash_input_denormal(a, fpst); 219 b = float16_squash_input_denormal(b, fpst); 220 221 a = float16_chs(a); 222 if ((float16_is_infinity(a) && float16_is_zero(b)) || 223 (float16_is_infinity(b) && float16_is_zero(a))) { 224 return float16_two; 225 } 226 return float16_muladd(a, b, float16_two, 0, fpst); 227 } 228 229 float32 HELPER(recpsf_f32)(float32 a, float32 b, float_status *fpst) 230 { 231 a = float32_squash_input_denormal(a, fpst); 232 b = float32_squash_input_denormal(b, fpst); 233 234 a = float32_chs(a); 235 if ((float32_is_infinity(a) && float32_is_zero(b)) || 236 (float32_is_infinity(b) && float32_is_zero(a))) { 237 return float32_two; 238 } 239 return float32_muladd(a, b, float32_two, 0, fpst); 240 } 241 242 float64 HELPER(recpsf_f64)(float64 a, float64 b, float_status *fpst) 243 { 244 a = float64_squash_input_denormal(a, fpst); 245 b = float64_squash_input_denormal(b, fpst); 246 247 a = float64_chs(a); 248 if ((float64_is_infinity(a) && float64_is_zero(b)) || 249 (float64_is_infinity(b) && float64_is_zero(a))) { 250 return float64_two; 251 } 252 return float64_muladd(a, b, float64_two, 0, fpst); 253 } 254 255 uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, float_status *fpst) 256 { 257 a = float16_squash_input_denormal(a, fpst); 258 b = float16_squash_input_denormal(b, fpst); 259 260 a = float16_chs(a); 261 if ((float16_is_infinity(a) && float16_is_zero(b)) || 262 (float16_is_infinity(b) && float16_is_zero(a))) { 263 return float16_one_point_five; 264 } 265 return float16_muladd(a, b, float16_three, float_muladd_halve_result, fpst); 266 } 267 268 float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst) 269 { 270 a = float32_squash_input_denormal(a, fpst); 271 b = float32_squash_input_denormal(b, fpst); 272 273 a = float32_chs(a); 274 if ((float32_is_infinity(a) && float32_is_zero(b)) || 275 (float32_is_infinity(b) && float32_is_zero(a))) { 276 return float32_one_point_five; 277 } 278 return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst); 279 } 280 281 float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst) 282 { 283 a = float64_squash_input_denormal(a, fpst); 284 b = float64_squash_input_denormal(b, fpst); 285 286 a = float64_chs(a); 287 if ((float64_is_infinity(a) && float64_is_zero(b)) || 288 (float64_is_infinity(b) && float64_is_zero(a))) { 289 return float64_one_point_five; 290 } 291 return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst); 292 } 293 294 /* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */ 295 uint32_t HELPER(frecpx_f16)(uint32_t a, float_status *fpst) 296 { 297 uint16_t val16, sbit; 298 int16_t exp; 299 300 if (float16_is_any_nan(a)) { 301 float16 nan = a; 302 if (float16_is_signaling_nan(a, fpst)) { 303 float_raise(float_flag_invalid, fpst); 304 if (!fpst->default_nan_mode) { 305 nan = float16_silence_nan(a, fpst); 306 } 307 } 308 if (fpst->default_nan_mode) { 309 nan = float16_default_nan(fpst); 310 } 311 return nan; 312 } 313 314 a = float16_squash_input_denormal(a, fpst); 315 316 val16 = float16_val(a); 317 sbit = 0x8000 & val16; 318 exp = extract32(val16, 10, 5); 319 320 if (exp == 0) { 321 return make_float16(deposit32(sbit, 10, 5, 0x1e)); 322 } else { 323 return make_float16(deposit32(sbit, 10, 5, ~exp)); 324 } 325 } 326 327 float32 HELPER(frecpx_f32)(float32 a, float_status *fpst) 328 { 329 uint32_t val32, sbit; 330 int32_t exp; 331 332 if (float32_is_any_nan(a)) { 333 float32 nan = a; 334 if (float32_is_signaling_nan(a, fpst)) { 335 float_raise(float_flag_invalid, fpst); 336 if (!fpst->default_nan_mode) { 337 nan = float32_silence_nan(a, fpst); 338 } 339 } 340 if (fpst->default_nan_mode) { 341 nan = float32_default_nan(fpst); 342 } 343 return nan; 344 } 345 346 a = float32_squash_input_denormal(a, fpst); 347 348 val32 = float32_val(a); 349 sbit = 0x80000000ULL & val32; 350 exp = extract32(val32, 23, 8); 351 352 if (exp == 0) { 353 return make_float32(sbit | (0xfe << 23)); 354 } else { 355 return make_float32(sbit | (~exp & 0xff) << 23); 356 } 357 } 358 359 float64 HELPER(frecpx_f64)(float64 a, float_status *fpst) 360 { 361 uint64_t val64, sbit; 362 int64_t exp; 363 364 if (float64_is_any_nan(a)) { 365 float64 nan = a; 366 if (float64_is_signaling_nan(a, fpst)) { 367 float_raise(float_flag_invalid, fpst); 368 if (!fpst->default_nan_mode) { 369 nan = float64_silence_nan(a, fpst); 370 } 371 } 372 if (fpst->default_nan_mode) { 373 nan = float64_default_nan(fpst); 374 } 375 return nan; 376 } 377 378 a = float64_squash_input_denormal(a, fpst); 379 380 val64 = float64_val(a); 381 sbit = 0x8000000000000000ULL & val64; 382 exp = extract64(float64_val(a), 52, 11); 383 384 if (exp == 0) { 385 return make_float64(sbit | (0x7feULL << 52)); 386 } else { 387 return make_float64(sbit | (~exp & 0x7ffULL) << 52); 388 } 389 } 390 391 float32 HELPER(fcvtx_f64_to_f32)(float64 a, float_status *fpst) 392 { 393 float32 r; 394 int old = get_float_rounding_mode(fpst); 395 396 set_float_rounding_mode(float_round_to_odd, fpst); 397 r = float64_to_float32(a, fpst); 398 set_float_rounding_mode(old, fpst); 399 return r; 400 } 401 402 /* 64-bit versions of the CRC helpers. Note that although the operation 403 * (and the prototypes of crc32c() and crc32() mean that only the bottom 404 * 32 bits of the accumulator and result are used, we pass and return 405 * uint64_t for convenience of the generated code. Unlike the 32-bit 406 * instruction set versions, val may genuinely have 64 bits of data in it. 407 * The upper bytes of val (above the number specified by 'bytes') must have 408 * been zeroed out by the caller. 409 */ 410 uint64_t HELPER(crc32_64)(uint64_t acc, uint64_t val, uint32_t bytes) 411 { 412 uint8_t buf[8]; 413 414 stq_le_p(buf, val); 415 416 /* zlib crc32 converts the accumulator and output to one's complement. */ 417 return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff; 418 } 419 420 uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) 421 { 422 uint8_t buf[8]; 423 424 stq_le_p(buf, val); 425 426 /* Linux crc32c converts the output to one's complement. */ 427 return crc32c(acc, buf, bytes) ^ 0xffffffff; 428 } 429 430 /* 431 * AdvSIMD half-precision 432 */ 433 434 #define ADVSIMD_HELPER(name, suffix) HELPER(glue(glue(advsimd_, name), suffix)) 435 436 #define ADVSIMD_HALFOP(name) \ 437 uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, float_status *fpst) \ 438 { \ 439 return float16_ ## name(a, b, fpst); \ 440 } 441 442 ADVSIMD_HALFOP(add) 443 ADVSIMD_HALFOP(sub) 444 ADVSIMD_HALFOP(mul) 445 ADVSIMD_HALFOP(div) 446 ADVSIMD_HALFOP(min) 447 ADVSIMD_HALFOP(max) 448 ADVSIMD_HALFOP(minnum) 449 ADVSIMD_HALFOP(maxnum) 450 451 #define ADVSIMD_TWOHALFOP(name) \ 452 uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, \ 453 float_status *fpst) \ 454 { \ 455 float16 a1, a2, b1, b2; \ 456 uint32_t r1, r2; \ 457 a1 = extract32(two_a, 0, 16); \ 458 a2 = extract32(two_a, 16, 16); \ 459 b1 = extract32(two_b, 0, 16); \ 460 b2 = extract32(two_b, 16, 16); \ 461 r1 = float16_ ## name(a1, b1, fpst); \ 462 r2 = float16_ ## name(a2, b2, fpst); \ 463 return deposit32(r1, 16, 16, r2); \ 464 } 465 466 ADVSIMD_TWOHALFOP(add) 467 ADVSIMD_TWOHALFOP(sub) 468 ADVSIMD_TWOHALFOP(mul) 469 ADVSIMD_TWOHALFOP(div) 470 ADVSIMD_TWOHALFOP(min) 471 ADVSIMD_TWOHALFOP(max) 472 ADVSIMD_TWOHALFOP(minnum) 473 ADVSIMD_TWOHALFOP(maxnum) 474 475 /* Data processing - scalar floating-point and advanced SIMD */ 476 static float16 float16_mulx(float16 a, float16 b, float_status *fpst) 477 { 478 a = float16_squash_input_denormal(a, fpst); 479 b = float16_squash_input_denormal(b, fpst); 480 481 if ((float16_is_zero(a) && float16_is_infinity(b)) || 482 (float16_is_infinity(a) && float16_is_zero(b))) { 483 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ 484 return make_float16((1U << 14) | 485 ((float16_val(a) ^ float16_val(b)) & (1U << 15))); 486 } 487 return float16_mul(a, b, fpst); 488 } 489 490 ADVSIMD_HALFOP(mulx) 491 ADVSIMD_TWOHALFOP(mulx) 492 493 /* fused multiply-accumulate */ 494 uint32_t HELPER(advsimd_muladdh)(uint32_t a, uint32_t b, uint32_t c, 495 float_status *fpst) 496 { 497 return float16_muladd(a, b, c, 0, fpst); 498 } 499 500 uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b, 501 uint32_t two_c, float_status *fpst) 502 { 503 float16 a1, a2, b1, b2, c1, c2; 504 uint32_t r1, r2; 505 a1 = extract32(two_a, 0, 16); 506 a2 = extract32(two_a, 16, 16); 507 b1 = extract32(two_b, 0, 16); 508 b2 = extract32(two_b, 16, 16); 509 c1 = extract32(two_c, 0, 16); 510 c2 = extract32(two_c, 16, 16); 511 r1 = float16_muladd(a1, b1, c1, 0, fpst); 512 r2 = float16_muladd(a2, b2, c2, 0, fpst); 513 return deposit32(r1, 16, 16, r2); 514 } 515 516 /* 517 * Floating point comparisons produce an integer result. Softfloat 518 * routines return float_relation types which we convert to the 0/-1 519 * Neon requires. 520 */ 521 522 #define ADVSIMD_CMPRES(test) (test) ? 0xffff : 0 523 524 uint32_t HELPER(advsimd_ceq_f16)(uint32_t a, uint32_t b, float_status *fpst) 525 { 526 int compare = float16_compare_quiet(a, b, fpst); 527 return ADVSIMD_CMPRES(compare == float_relation_equal); 528 } 529 530 uint32_t HELPER(advsimd_cge_f16)(uint32_t a, uint32_t b, float_status *fpst) 531 { 532 int compare = float16_compare(a, b, fpst); 533 return ADVSIMD_CMPRES(compare == float_relation_greater || 534 compare == float_relation_equal); 535 } 536 537 uint32_t HELPER(advsimd_cgt_f16)(uint32_t a, uint32_t b, float_status *fpst) 538 { 539 int compare = float16_compare(a, b, fpst); 540 return ADVSIMD_CMPRES(compare == float_relation_greater); 541 } 542 543 uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, float_status *fpst) 544 { 545 float16 f0 = float16_abs(a); 546 float16 f1 = float16_abs(b); 547 int compare = float16_compare(f0, f1, fpst); 548 return ADVSIMD_CMPRES(compare == float_relation_greater || 549 compare == float_relation_equal); 550 } 551 552 uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, float_status *fpst) 553 { 554 float16 f0 = float16_abs(a); 555 float16 f1 = float16_abs(b); 556 int compare = float16_compare(f0, f1, fpst); 557 return ADVSIMD_CMPRES(compare == float_relation_greater); 558 } 559 560 /* round to integral */ 561 uint32_t HELPER(advsimd_rinth_exact)(uint32_t x, float_status *fp_status) 562 { 563 return float16_round_to_int(x, fp_status); 564 } 565 566 uint32_t HELPER(advsimd_rinth)(uint32_t x, float_status *fp_status) 567 { 568 int old_flags = get_float_exception_flags(fp_status), new_flags; 569 float16 ret; 570 571 ret = float16_round_to_int(x, fp_status); 572 573 /* Suppress any inexact exceptions the conversion produced */ 574 if (!(old_flags & float_flag_inexact)) { 575 new_flags = get_float_exception_flags(fp_status); 576 set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status); 577 } 578 579 return ret; 580 } 581 582 static int el_from_spsr(uint32_t spsr) 583 { 584 /* Return the exception level that this SPSR is requesting a return to, 585 * or -1 if it is invalid (an illegal return) 586 */ 587 if (spsr & PSTATE_nRW) { 588 switch (spsr & CPSR_M) { 589 case ARM_CPU_MODE_USR: 590 return 0; 591 case ARM_CPU_MODE_HYP: 592 return 2; 593 case ARM_CPU_MODE_FIQ: 594 case ARM_CPU_MODE_IRQ: 595 case ARM_CPU_MODE_SVC: 596 case ARM_CPU_MODE_ABT: 597 case ARM_CPU_MODE_UND: 598 case ARM_CPU_MODE_SYS: 599 return 1; 600 case ARM_CPU_MODE_MON: 601 /* Returning to Mon from AArch64 is never possible, 602 * so this is an illegal return. 603 */ 604 default: 605 return -1; 606 } 607 } else { 608 if (extract32(spsr, 1, 1)) { 609 /* Return with reserved M[1] bit set */ 610 return -1; 611 } 612 if (extract32(spsr, 0, 4) == 1) { 613 /* return to EL0 with M[0] bit set */ 614 return -1; 615 } 616 return extract32(spsr, 2, 2); 617 } 618 } 619 620 static void cpsr_write_from_spsr_elx(CPUARMState *env, 621 uint32_t val) 622 { 623 uint32_t mask; 624 625 /* Save SPSR_ELx.SS into PSTATE. */ 626 env->pstate = (env->pstate & ~PSTATE_SS) | (val & PSTATE_SS); 627 val &= ~PSTATE_SS; 628 629 /* Move DIT to the correct location for CPSR */ 630 if (val & PSTATE_DIT) { 631 val &= ~PSTATE_DIT; 632 val |= CPSR_DIT; 633 } 634 635 mask = aarch32_cpsr_valid_mask(env->features, \ 636 &env_archcpu(env)->isar); 637 cpsr_write(env, val, mask, CPSRWriteRaw); 638 } 639 640 void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) 641 { 642 int cur_el = arm_current_el(env); 643 unsigned int spsr_idx = aarch64_banked_spsr_index(cur_el); 644 uint32_t spsr = env->banked_spsr[spsr_idx]; 645 int new_el; 646 bool return_to_aa64 = (spsr & PSTATE_nRW) == 0; 647 648 aarch64_save_sp(env, cur_el); 649 650 arm_clear_exclusive(env); 651 652 /* We must squash the PSTATE.SS bit to zero unless both of the 653 * following hold: 654 * 1. debug exceptions are currently disabled 655 * 2. singlestep will be active in the EL we return to 656 * We check 1 here and 2 after we've done the pstate/cpsr write() to 657 * transition to the EL we're going to. 658 */ 659 if (arm_generate_debug_exceptions(env)) { 660 spsr &= ~PSTATE_SS; 661 } 662 663 /* 664 * FEAT_RME forbids return from EL3 with an invalid security state. 665 * We don't need an explicit check for FEAT_RME here because we enforce 666 * in scr_write() that you can't set the NSE bit without it. 667 */ 668 if (cur_el == 3 && (env->cp15.scr_el3 & (SCR_NS | SCR_NSE)) == SCR_NSE) { 669 goto illegal_return; 670 } 671 672 new_el = el_from_spsr(spsr); 673 if (new_el == -1) { 674 goto illegal_return; 675 } 676 if (new_el > cur_el || (new_el == 2 && !arm_is_el2_enabled(env))) { 677 /* Disallow return to an EL which is unimplemented or higher 678 * than the current one. 679 */ 680 goto illegal_return; 681 } 682 683 if (new_el != 0 && arm_el_is_aa64(env, new_el) != return_to_aa64) { 684 /* Return to an EL which is configured for a different register width */ 685 goto illegal_return; 686 } 687 688 if (new_el == 1 && (arm_hcr_el2_eff(env) & HCR_TGE)) { 689 goto illegal_return; 690 } 691 692 bql_lock(); 693 arm_call_pre_el_change_hook(env_archcpu(env)); 694 bql_unlock(); 695 696 if (!return_to_aa64) { 697 env->aarch64 = false; 698 /* We do a raw CPSR write because aarch64_sync_64_to_32() 699 * will sort the register banks out for us, and we've already 700 * caught all the bad-mode cases in el_from_spsr(). 701 */ 702 cpsr_write_from_spsr_elx(env, spsr); 703 if (!arm_singlestep_active(env)) { 704 env->pstate &= ~PSTATE_SS; 705 } 706 aarch64_sync_64_to_32(env); 707 708 if (spsr & CPSR_T) { 709 env->regs[15] = new_pc & ~0x1; 710 } else { 711 env->regs[15] = new_pc & ~0x3; 712 } 713 helper_rebuild_hflags_a32(env, new_el); 714 qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to " 715 "AArch32 EL%d PC 0x%" PRIx32 "\n", 716 cur_el, new_el, env->regs[15]); 717 } else { 718 int tbii; 719 720 env->aarch64 = true; 721 spsr &= aarch64_pstate_valid_mask(&env_archcpu(env)->isar); 722 pstate_write(env, spsr); 723 if (!arm_singlestep_active(env)) { 724 env->pstate &= ~PSTATE_SS; 725 } 726 aarch64_restore_sp(env, new_el); 727 helper_rebuild_hflags_a64(env, new_el); 728 729 /* 730 * Apply TBI to the exception return address. We had to delay this 731 * until after we selected the new EL, so that we could select the 732 * correct TBI+TBID bits. This is made easier by waiting until after 733 * the hflags rebuild, since we can pull the composite TBII field 734 * from there. 735 */ 736 tbii = EX_TBFLAG_A64(env->hflags, TBII); 737 if ((tbii >> extract64(new_pc, 55, 1)) & 1) { 738 /* TBI is enabled. */ 739 int core_mmu_idx = arm_env_mmu_index(env); 740 if (regime_has_2_ranges(core_to_aa64_mmu_idx(core_mmu_idx))) { 741 new_pc = sextract64(new_pc, 0, 56); 742 } else { 743 new_pc = extract64(new_pc, 0, 56); 744 } 745 } 746 env->pc = new_pc; 747 748 qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to " 749 "AArch64 EL%d PC 0x%" PRIx64 "\n", 750 cur_el, new_el, env->pc); 751 } 752 753 /* 754 * Note that cur_el can never be 0. If new_el is 0, then 755 * el0_a64 is return_to_aa64, else el0_a64 is ignored. 756 */ 757 aarch64_sve_change_el(env, cur_el, new_el, return_to_aa64); 758 759 bql_lock(); 760 arm_call_el_change_hook(env_archcpu(env)); 761 bql_unlock(); 762 763 return; 764 765 illegal_return: 766 /* Illegal return events of various kinds have architecturally 767 * mandated behaviour: 768 * restore NZCV and DAIF from SPSR_ELx 769 * set PSTATE.IL 770 * restore PC from ELR_ELx 771 * no change to exception level, execution state or stack pointer 772 */ 773 env->pstate |= PSTATE_IL; 774 env->pc = new_pc; 775 spsr &= PSTATE_NZCV | PSTATE_DAIF | PSTATE_ALLINT; 776 spsr |= pstate_read(env) & ~(PSTATE_NZCV | PSTATE_DAIF | PSTATE_ALLINT); 777 pstate_write(env, spsr); 778 if (!arm_singlestep_active(env)) { 779 env->pstate &= ~PSTATE_SS; 780 } 781 helper_rebuild_hflags_a64(env, cur_el); 782 qemu_log_mask(LOG_GUEST_ERROR, "Illegal exception return at EL%d: " 783 "resuming execution at 0x%" PRIx64 "\n", cur_el, env->pc); 784 } 785 786 void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) 787 { 788 uintptr_t ra = GETPC(); 789 790 /* 791 * Implement DC ZVA, which zeroes a fixed-length block of memory. 792 * Note that we do not implement the (architecturally mandated) 793 * alignment fault for attempts to use this on Device memory 794 * (which matches the usual QEMU behaviour of not implementing either 795 * alignment faults or any memory attribute handling). 796 */ 797 int blocklen = 4 << env_archcpu(env)->dcz_blocksize; 798 uint64_t vaddr = vaddr_in & ~(blocklen - 1); 799 int mmu_idx = arm_env_mmu_index(env); 800 void *mem; 801 802 /* 803 * Trapless lookup. In addition to actual invalid page, may 804 * return NULL for I/O, watchpoints, clean pages, etc. 805 */ 806 mem = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx); 807 808 #ifndef CONFIG_USER_ONLY 809 if (unlikely(!mem)) { 810 /* 811 * Trap if accessing an invalid page. DC_ZVA requires that we supply 812 * the original pointer for an invalid page. But watchpoints require 813 * that we probe the actual space. So do both. 814 */ 815 (void) probe_write(env, vaddr_in, 1, mmu_idx, ra); 816 mem = probe_write(env, vaddr, blocklen, mmu_idx, ra); 817 818 if (unlikely(!mem)) { 819 /* 820 * The only remaining reason for mem == NULL is I/O. 821 * Just do a series of byte writes as the architecture demands. 822 */ 823 for (int i = 0; i < blocklen; i++) { 824 cpu_stb_mmuidx_ra(env, vaddr + i, 0, mmu_idx, ra); 825 } 826 return; 827 } 828 } 829 #endif 830 831 set_helper_retaddr(ra); 832 memset(mem, 0, blocklen); 833 clear_helper_retaddr(); 834 } 835 836 void HELPER(unaligned_access)(CPUARMState *env, uint64_t addr, 837 uint32_t access_type, uint32_t mmu_idx) 838 { 839 arm_cpu_do_unaligned_access(env_cpu(env), addr, access_type, 840 mmu_idx, GETPC()); 841 } 842 843 /* Memory operations (memset, memmove, memcpy) */ 844 845 /* 846 * Return true if the CPY* and SET* insns can execute; compare 847 * pseudocode CheckMOPSEnabled(), though we refactor it a little. 848 */ 849 static bool mops_enabled(CPUARMState *env) 850 { 851 int el = arm_current_el(env); 852 853 if (el < 2 && 854 (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE) && 855 !(arm_hcrx_el2_eff(env) & HCRX_MSCEN)) { 856 return false; 857 } 858 859 if (el == 0) { 860 if (!el_is_in_host(env, 0)) { 861 return env->cp15.sctlr_el[1] & SCTLR_MSCEN; 862 } else { 863 return env->cp15.sctlr_el[2] & SCTLR_MSCEN; 864 } 865 } 866 return true; 867 } 868 869 static void check_mops_enabled(CPUARMState *env, uintptr_t ra) 870 { 871 if (!mops_enabled(env)) { 872 raise_exception_ra(env, EXCP_UDEF, syn_uncategorized(), 873 exception_target_el(env), ra); 874 } 875 } 876 877 /* 878 * Return the target exception level for an exception due 879 * to mismatched arguments in a FEAT_MOPS copy or set. 880 * Compare pseudocode MismatchedCpySetTargetEL() 881 */ 882 static int mops_mismatch_exception_target_el(CPUARMState *env) 883 { 884 int el = arm_current_el(env); 885 886 if (el > 1) { 887 return el; 888 } 889 if (el == 0 && (arm_hcr_el2_eff(env) & HCR_TGE)) { 890 return 2; 891 } 892 if (el == 1 && (arm_hcrx_el2_eff(env) & HCRX_MCE2)) { 893 return 2; 894 } 895 return 1; 896 } 897 898 /* 899 * Check whether an M or E instruction was executed with a CF value 900 * indicating the wrong option for this implementation. 901 * Assumes we are always Option A. 902 */ 903 static void check_mops_wrong_option(CPUARMState *env, uint32_t syndrome, 904 uintptr_t ra) 905 { 906 if (env->CF != 0) { 907 syndrome |= 1 << 17; /* Set the wrong-option bit */ 908 raise_exception_ra(env, EXCP_UDEF, syndrome, 909 mops_mismatch_exception_target_el(env), ra); 910 } 911 } 912 913 /* 914 * Return the maximum number of bytes we can transfer starting at addr 915 * without crossing a page boundary. 916 */ 917 static uint64_t page_limit(uint64_t addr) 918 { 919 return TARGET_PAGE_ALIGN(addr + 1) - addr; 920 } 921 922 /* 923 * Return the number of bytes we can copy starting from addr and working 924 * backwards without crossing a page boundary. 925 */ 926 static uint64_t page_limit_rev(uint64_t addr) 927 { 928 return (addr & ~TARGET_PAGE_MASK) + 1; 929 } 930 931 /* 932 * Perform part of a memory set on an area of guest memory starting at 933 * toaddr (a dirty address) and extending for setsize bytes. 934 * 935 * Returns the number of bytes actually set, which might be less than 936 * setsize; the caller should loop until the whole set has been done. 937 * The caller should ensure that the guest registers are correct 938 * for the possibility that the first byte of the set encounters 939 * an exception or watchpoint. We guarantee not to take any faults 940 * for bytes other than the first. 941 */ 942 static uint64_t set_step(CPUARMState *env, uint64_t toaddr, 943 uint64_t setsize, uint32_t data, int memidx, 944 uint32_t *mtedesc, uintptr_t ra) 945 { 946 void *mem; 947 948 setsize = MIN(setsize, page_limit(toaddr)); 949 if (*mtedesc) { 950 uint64_t mtesize = mte_mops_probe(env, toaddr, setsize, *mtedesc); 951 if (mtesize == 0) { 952 /* Trap, or not. All CPU state is up to date */ 953 mte_check_fail(env, *mtedesc, toaddr, ra); 954 /* Continue, with no further MTE checks required */ 955 *mtedesc = 0; 956 } else { 957 /* Advance to the end, or to the tag mismatch */ 958 setsize = MIN(setsize, mtesize); 959 } 960 } 961 962 toaddr = useronly_clean_ptr(toaddr); 963 /* 964 * Trapless lookup: returns NULL for invalid page, I/O, 965 * watchpoints, clean pages, etc. 966 */ 967 mem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, memidx); 968 969 #ifndef CONFIG_USER_ONLY 970 if (unlikely(!mem)) { 971 /* 972 * Slow-path: just do one byte write. This will handle the 973 * watchpoint, invalid page, etc handling correctly. 974 * For clean code pages, the next iteration will see 975 * the page dirty and will use the fast path. 976 */ 977 cpu_stb_mmuidx_ra(env, toaddr, data, memidx, ra); 978 return 1; 979 } 980 #endif 981 /* Easy case: just memset the host memory */ 982 set_helper_retaddr(ra); 983 memset(mem, data, setsize); 984 clear_helper_retaddr(); 985 return setsize; 986 } 987 988 /* 989 * Similar, but setting tags. The architecture requires us to do this 990 * in 16-byte chunks. SETP accesses are not tag checked; they set 991 * the tags. 992 */ 993 static uint64_t set_step_tags(CPUARMState *env, uint64_t toaddr, 994 uint64_t setsize, uint32_t data, int memidx, 995 uint32_t *mtedesc, uintptr_t ra) 996 { 997 void *mem; 998 uint64_t cleanaddr; 999 1000 setsize = MIN(setsize, page_limit(toaddr)); 1001 1002 cleanaddr = useronly_clean_ptr(toaddr); 1003 /* 1004 * Trapless lookup: returns NULL for invalid page, I/O, 1005 * watchpoints, clean pages, etc. 1006 */ 1007 mem = tlb_vaddr_to_host(env, cleanaddr, MMU_DATA_STORE, memidx); 1008 1009 #ifndef CONFIG_USER_ONLY 1010 if (unlikely(!mem)) { 1011 /* 1012 * Slow-path: just do one write. This will handle the 1013 * watchpoint, invalid page, etc handling correctly. 1014 * The architecture requires that we do 16 bytes at a time, 1015 * and we know both ptr and size are 16 byte aligned. 1016 * For clean code pages, the next iteration will see 1017 * the page dirty and will use the fast path. 1018 */ 1019 uint64_t repldata = data * 0x0101010101010101ULL; 1020 MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, memidx); 1021 cpu_st16_mmu(env, toaddr, int128_make128(repldata, repldata), oi16, ra); 1022 mte_mops_set_tags(env, toaddr, 16, *mtedesc); 1023 return 16; 1024 } 1025 #endif 1026 /* Easy case: just memset the host memory */ 1027 set_helper_retaddr(ra); 1028 memset(mem, data, setsize); 1029 clear_helper_retaddr(); 1030 mte_mops_set_tags(env, toaddr, setsize, *mtedesc); 1031 return setsize; 1032 } 1033 1034 typedef uint64_t StepFn(CPUARMState *env, uint64_t toaddr, 1035 uint64_t setsize, uint32_t data, 1036 int memidx, uint32_t *mtedesc, uintptr_t ra); 1037 1038 /* Extract register numbers from a MOPS exception syndrome value */ 1039 static int mops_destreg(uint32_t syndrome) 1040 { 1041 return extract32(syndrome, 10, 5); 1042 } 1043 1044 static int mops_srcreg(uint32_t syndrome) 1045 { 1046 return extract32(syndrome, 5, 5); 1047 } 1048 1049 static int mops_sizereg(uint32_t syndrome) 1050 { 1051 return extract32(syndrome, 0, 5); 1052 } 1053 1054 /* 1055 * Return true if TCMA and TBI bits mean we need to do MTE checks. 1056 * We only need to do this once per MOPS insn, not for every page. 1057 */ 1058 static bool mte_checks_needed(uint64_t ptr, uint32_t desc) 1059 { 1060 int bit55 = extract64(ptr, 55, 1); 1061 1062 /* 1063 * Note that tbi_check() returns true for "access checked" but 1064 * tcma_check() returns true for "access unchecked". 1065 */ 1066 if (!tbi_check(desc, bit55)) { 1067 return false; 1068 } 1069 return !tcma_check(desc, bit55, allocation_tag_from_addr(ptr)); 1070 } 1071 1072 /* Take an exception if the SETG addr/size are not granule aligned */ 1073 static void check_setg_alignment(CPUARMState *env, uint64_t ptr, uint64_t size, 1074 uint32_t memidx, uintptr_t ra) 1075 { 1076 if ((size != 0 && !QEMU_IS_ALIGNED(ptr, TAG_GRANULE)) || 1077 !QEMU_IS_ALIGNED(size, TAG_GRANULE)) { 1078 arm_cpu_do_unaligned_access(env_cpu(env), ptr, MMU_DATA_STORE, 1079 memidx, ra); 1080 1081 } 1082 } 1083 1084 static uint64_t arm_reg_or_xzr(CPUARMState *env, int reg) 1085 { 1086 /* 1087 * Runtime equivalent of cpu_reg() -- return the CPU register value, 1088 * for contexts when index 31 means XZR (not SP). 1089 */ 1090 return reg == 31 ? 0 : env->xregs[reg]; 1091 } 1092 1093 /* 1094 * For the Memory Set operation, our implementation chooses 1095 * always to use "option A", where we update Xd to the final 1096 * address in the SETP insn, and set Xn to be -(bytes remaining). 1097 * On SETM and SETE insns we only need update Xn. 1098 * 1099 * @env: CPU 1100 * @syndrome: syndrome value for mismatch exceptions 1101 * (also contains the register numbers we need to use) 1102 * @mtedesc: MTE descriptor word 1103 * @stepfn: function which does a single part of the set operation 1104 * @is_setg: true if this is the tag-setting SETG variant 1105 */ 1106 static void do_setp(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, 1107 StepFn *stepfn, bool is_setg, uintptr_t ra) 1108 { 1109 /* Prologue: we choose to do up to the next page boundary */ 1110 int rd = mops_destreg(syndrome); 1111 int rs = mops_srcreg(syndrome); 1112 int rn = mops_sizereg(syndrome); 1113 uint8_t data = arm_reg_or_xzr(env, rs); 1114 uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); 1115 uint64_t toaddr = env->xregs[rd]; 1116 uint64_t setsize = env->xregs[rn]; 1117 uint64_t stagesetsize, step; 1118 1119 check_mops_enabled(env, ra); 1120 1121 if (setsize > INT64_MAX) { 1122 setsize = INT64_MAX; 1123 if (is_setg) { 1124 setsize &= ~0xf; 1125 } 1126 } 1127 1128 if (unlikely(is_setg)) { 1129 check_setg_alignment(env, toaddr, setsize, memidx, ra); 1130 } else if (!mte_checks_needed(toaddr, mtedesc)) { 1131 mtedesc = 0; 1132 } 1133 1134 stagesetsize = MIN(setsize, page_limit(toaddr)); 1135 while (stagesetsize) { 1136 env->xregs[rd] = toaddr; 1137 env->xregs[rn] = setsize; 1138 step = stepfn(env, toaddr, stagesetsize, data, memidx, &mtedesc, ra); 1139 toaddr += step; 1140 setsize -= step; 1141 stagesetsize -= step; 1142 } 1143 /* Insn completed, so update registers to the Option A format */ 1144 env->xregs[rd] = toaddr + setsize; 1145 env->xregs[rn] = -setsize; 1146 1147 /* Set NZCV = 0000 to indicate we are an Option A implementation */ 1148 env->NF = 0; 1149 env->ZF = 1; /* our env->ZF encoding is inverted */ 1150 env->CF = 0; 1151 env->VF = 0; 1152 return; 1153 } 1154 1155 void HELPER(setp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1156 { 1157 do_setp(env, syndrome, mtedesc, set_step, false, GETPC()); 1158 } 1159 1160 void HELPER(setgp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1161 { 1162 do_setp(env, syndrome, mtedesc, set_step_tags, true, GETPC()); 1163 } 1164 1165 static void do_setm(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, 1166 StepFn *stepfn, bool is_setg, uintptr_t ra) 1167 { 1168 /* Main: we choose to do all the full-page chunks */ 1169 CPUState *cs = env_cpu(env); 1170 int rd = mops_destreg(syndrome); 1171 int rs = mops_srcreg(syndrome); 1172 int rn = mops_sizereg(syndrome); 1173 uint8_t data = arm_reg_or_xzr(env, rs); 1174 uint64_t toaddr = env->xregs[rd] + env->xregs[rn]; 1175 uint64_t setsize = -env->xregs[rn]; 1176 uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); 1177 uint64_t step, stagesetsize; 1178 1179 check_mops_enabled(env, ra); 1180 1181 /* 1182 * We're allowed to NOP out "no data to copy" before the consistency 1183 * checks; we choose to do so. 1184 */ 1185 if (env->xregs[rn] == 0) { 1186 return; 1187 } 1188 1189 check_mops_wrong_option(env, syndrome, ra); 1190 1191 /* 1192 * Our implementation will work fine even if we have an unaligned 1193 * destination address, and because we update Xn every time around 1194 * the loop below and the return value from stepfn() may be less 1195 * than requested, we might find toaddr is unaligned. So we don't 1196 * have an IMPDEF check for alignment here. 1197 */ 1198 1199 if (unlikely(is_setg)) { 1200 check_setg_alignment(env, toaddr, setsize, memidx, ra); 1201 } else if (!mte_checks_needed(toaddr, mtedesc)) { 1202 mtedesc = 0; 1203 } 1204 1205 /* Do the actual memset: we leave the last partial page to SETE */ 1206 stagesetsize = setsize & TARGET_PAGE_MASK; 1207 while (stagesetsize > 0) { 1208 step = stepfn(env, toaddr, stagesetsize, data, memidx, &mtedesc, ra); 1209 toaddr += step; 1210 setsize -= step; 1211 stagesetsize -= step; 1212 env->xregs[rn] = -setsize; 1213 if (stagesetsize > 0 && unlikely(cpu_loop_exit_requested(cs))) { 1214 cpu_loop_exit_restore(cs, ra); 1215 } 1216 } 1217 } 1218 1219 void HELPER(setm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1220 { 1221 do_setm(env, syndrome, mtedesc, set_step, false, GETPC()); 1222 } 1223 1224 void HELPER(setgm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1225 { 1226 do_setm(env, syndrome, mtedesc, set_step_tags, true, GETPC()); 1227 } 1228 1229 static void do_sete(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, 1230 StepFn *stepfn, bool is_setg, uintptr_t ra) 1231 { 1232 /* Epilogue: do the last partial page */ 1233 int rd = mops_destreg(syndrome); 1234 int rs = mops_srcreg(syndrome); 1235 int rn = mops_sizereg(syndrome); 1236 uint8_t data = arm_reg_or_xzr(env, rs); 1237 uint64_t toaddr = env->xregs[rd] + env->xregs[rn]; 1238 uint64_t setsize = -env->xregs[rn]; 1239 uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); 1240 uint64_t step; 1241 1242 check_mops_enabled(env, ra); 1243 1244 /* 1245 * We're allowed to NOP out "no data to copy" before the consistency 1246 * checks; we choose to do so. 1247 */ 1248 if (setsize == 0) { 1249 return; 1250 } 1251 1252 check_mops_wrong_option(env, syndrome, ra); 1253 1254 /* 1255 * Our implementation has no address alignment requirements, but 1256 * we do want to enforce the "less than a page" size requirement, 1257 * so we don't need to have the "check for interrupts" here. 1258 */ 1259 if (setsize >= TARGET_PAGE_SIZE) { 1260 raise_exception_ra(env, EXCP_UDEF, syndrome, 1261 mops_mismatch_exception_target_el(env), ra); 1262 } 1263 1264 if (unlikely(is_setg)) { 1265 check_setg_alignment(env, toaddr, setsize, memidx, ra); 1266 } else if (!mte_checks_needed(toaddr, mtedesc)) { 1267 mtedesc = 0; 1268 } 1269 1270 /* Do the actual memset */ 1271 while (setsize > 0) { 1272 step = stepfn(env, toaddr, setsize, data, memidx, &mtedesc, ra); 1273 toaddr += step; 1274 setsize -= step; 1275 env->xregs[rn] = -setsize; 1276 } 1277 } 1278 1279 void HELPER(sete)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1280 { 1281 do_sete(env, syndrome, mtedesc, set_step, false, GETPC()); 1282 } 1283 1284 void HELPER(setge)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1285 { 1286 do_sete(env, syndrome, mtedesc, set_step_tags, true, GETPC()); 1287 } 1288 1289 /* 1290 * Perform part of a memory copy from the guest memory at fromaddr 1291 * and extending for copysize bytes, to the guest memory at 1292 * toaddr. Both addresses are dirty. 1293 * 1294 * Returns the number of bytes actually set, which might be less than 1295 * copysize; the caller should loop until the whole copy has been done. 1296 * The caller should ensure that the guest registers are correct 1297 * for the possibility that the first byte of the copy encounters 1298 * an exception or watchpoint. We guarantee not to take any faults 1299 * for bytes other than the first. 1300 */ 1301 static uint64_t copy_step(CPUARMState *env, uint64_t toaddr, uint64_t fromaddr, 1302 uint64_t copysize, int wmemidx, int rmemidx, 1303 uint32_t *wdesc, uint32_t *rdesc, uintptr_t ra) 1304 { 1305 void *rmem; 1306 void *wmem; 1307 1308 /* Don't cross a page boundary on either source or destination */ 1309 copysize = MIN(copysize, page_limit(toaddr)); 1310 copysize = MIN(copysize, page_limit(fromaddr)); 1311 /* 1312 * Handle MTE tag checks: either handle the tag mismatch for byte 0, 1313 * or else copy up to but not including the byte with the mismatch. 1314 */ 1315 if (*rdesc) { 1316 uint64_t mtesize = mte_mops_probe(env, fromaddr, copysize, *rdesc); 1317 if (mtesize == 0) { 1318 mte_check_fail(env, *rdesc, fromaddr, ra); 1319 *rdesc = 0; 1320 } else { 1321 copysize = MIN(copysize, mtesize); 1322 } 1323 } 1324 if (*wdesc) { 1325 uint64_t mtesize = mte_mops_probe(env, toaddr, copysize, *wdesc); 1326 if (mtesize == 0) { 1327 mte_check_fail(env, *wdesc, toaddr, ra); 1328 *wdesc = 0; 1329 } else { 1330 copysize = MIN(copysize, mtesize); 1331 } 1332 } 1333 1334 toaddr = useronly_clean_ptr(toaddr); 1335 fromaddr = useronly_clean_ptr(fromaddr); 1336 /* Trapless lookup of whether we can get a host memory pointer */ 1337 wmem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, wmemidx); 1338 rmem = tlb_vaddr_to_host(env, fromaddr, MMU_DATA_LOAD, rmemidx); 1339 1340 #ifndef CONFIG_USER_ONLY 1341 /* 1342 * If we don't have host memory for both source and dest then just 1343 * do a single byte copy. This will handle watchpoints, invalid pages, 1344 * etc correctly. For clean code pages, the next iteration will see 1345 * the page dirty and will use the fast path. 1346 */ 1347 if (unlikely(!rmem || !wmem)) { 1348 uint8_t byte; 1349 if (rmem) { 1350 byte = *(uint8_t *)rmem; 1351 } else { 1352 byte = cpu_ldub_mmuidx_ra(env, fromaddr, rmemidx, ra); 1353 } 1354 if (wmem) { 1355 *(uint8_t *)wmem = byte; 1356 } else { 1357 cpu_stb_mmuidx_ra(env, toaddr, byte, wmemidx, ra); 1358 } 1359 return 1; 1360 } 1361 #endif 1362 /* Easy case: just memmove the host memory */ 1363 set_helper_retaddr(ra); 1364 memmove(wmem, rmem, copysize); 1365 clear_helper_retaddr(); 1366 return copysize; 1367 } 1368 1369 /* 1370 * Do part of a backwards memory copy. Here toaddr and fromaddr point 1371 * to the *last* byte to be copied. 1372 */ 1373 static uint64_t copy_step_rev(CPUARMState *env, uint64_t toaddr, 1374 uint64_t fromaddr, 1375 uint64_t copysize, int wmemidx, int rmemidx, 1376 uint32_t *wdesc, uint32_t *rdesc, uintptr_t ra) 1377 { 1378 void *rmem; 1379 void *wmem; 1380 1381 /* Don't cross a page boundary on either source or destination */ 1382 copysize = MIN(copysize, page_limit_rev(toaddr)); 1383 copysize = MIN(copysize, page_limit_rev(fromaddr)); 1384 1385 /* 1386 * Handle MTE tag checks: either handle the tag mismatch for byte 0, 1387 * or else copy up to but not including the byte with the mismatch. 1388 */ 1389 if (*rdesc) { 1390 uint64_t mtesize = mte_mops_probe_rev(env, fromaddr, copysize, *rdesc); 1391 if (mtesize == 0) { 1392 mte_check_fail(env, *rdesc, fromaddr, ra); 1393 *rdesc = 0; 1394 } else { 1395 copysize = MIN(copysize, mtesize); 1396 } 1397 } 1398 if (*wdesc) { 1399 uint64_t mtesize = mte_mops_probe_rev(env, toaddr, copysize, *wdesc); 1400 if (mtesize == 0) { 1401 mte_check_fail(env, *wdesc, toaddr, ra); 1402 *wdesc = 0; 1403 } else { 1404 copysize = MIN(copysize, mtesize); 1405 } 1406 } 1407 1408 toaddr = useronly_clean_ptr(toaddr); 1409 fromaddr = useronly_clean_ptr(fromaddr); 1410 /* Trapless lookup of whether we can get a host memory pointer */ 1411 wmem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, wmemidx); 1412 rmem = tlb_vaddr_to_host(env, fromaddr, MMU_DATA_LOAD, rmemidx); 1413 1414 #ifndef CONFIG_USER_ONLY 1415 /* 1416 * If we don't have host memory for both source and dest then just 1417 * do a single byte copy. This will handle watchpoints, invalid pages, 1418 * etc correctly. For clean code pages, the next iteration will see 1419 * the page dirty and will use the fast path. 1420 */ 1421 if (unlikely(!rmem || !wmem)) { 1422 uint8_t byte; 1423 if (rmem) { 1424 byte = *(uint8_t *)rmem; 1425 } else { 1426 byte = cpu_ldub_mmuidx_ra(env, fromaddr, rmemidx, ra); 1427 } 1428 if (wmem) { 1429 *(uint8_t *)wmem = byte; 1430 } else { 1431 cpu_stb_mmuidx_ra(env, toaddr, byte, wmemidx, ra); 1432 } 1433 return 1; 1434 } 1435 #endif 1436 /* 1437 * Easy case: just memmove the host memory. Note that wmem and 1438 * rmem here point to the *last* byte to copy. 1439 */ 1440 set_helper_retaddr(ra); 1441 memmove(wmem - (copysize - 1), rmem - (copysize - 1), copysize); 1442 clear_helper_retaddr(); 1443 return copysize; 1444 } 1445 1446 /* 1447 * for the Memory Copy operation, our implementation chooses always 1448 * to use "option A", where we update Xd and Xs to the final addresses 1449 * in the CPYP insn, and then in CPYM and CPYE only need to update Xn. 1450 * 1451 * @env: CPU 1452 * @syndrome: syndrome value for mismatch exceptions 1453 * (also contains the register numbers we need to use) 1454 * @wdesc: MTE descriptor for the writes (destination) 1455 * @rdesc: MTE descriptor for the reads (source) 1456 * @move: true if this is CPY (memmove), false for CPYF (memcpy forwards) 1457 */ 1458 static void do_cpyp(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1459 uint32_t rdesc, uint32_t move, uintptr_t ra) 1460 { 1461 int rd = mops_destreg(syndrome); 1462 int rs = mops_srcreg(syndrome); 1463 int rn = mops_sizereg(syndrome); 1464 uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); 1465 uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); 1466 bool forwards = true; 1467 uint64_t toaddr = env->xregs[rd]; 1468 uint64_t fromaddr = env->xregs[rs]; 1469 uint64_t copysize = env->xregs[rn]; 1470 uint64_t stagecopysize, step; 1471 1472 check_mops_enabled(env, ra); 1473 1474 1475 if (move) { 1476 /* 1477 * Copy backwards if necessary. The direction for a non-overlapping 1478 * copy is IMPDEF; we choose forwards. 1479 */ 1480 if (copysize > 0x007FFFFFFFFFFFFFULL) { 1481 copysize = 0x007FFFFFFFFFFFFFULL; 1482 } 1483 uint64_t fs = extract64(fromaddr, 0, 56); 1484 uint64_t ts = extract64(toaddr, 0, 56); 1485 uint64_t fe = extract64(fromaddr + copysize, 0, 56); 1486 1487 if (fs < ts && fe > ts) { 1488 forwards = false; 1489 } 1490 } else { 1491 if (copysize > INT64_MAX) { 1492 copysize = INT64_MAX; 1493 } 1494 } 1495 1496 if (!mte_checks_needed(fromaddr, rdesc)) { 1497 rdesc = 0; 1498 } 1499 if (!mte_checks_needed(toaddr, wdesc)) { 1500 wdesc = 0; 1501 } 1502 1503 if (forwards) { 1504 stagecopysize = MIN(copysize, page_limit(toaddr)); 1505 stagecopysize = MIN(stagecopysize, page_limit(fromaddr)); 1506 while (stagecopysize) { 1507 env->xregs[rd] = toaddr; 1508 env->xregs[rs] = fromaddr; 1509 env->xregs[rn] = copysize; 1510 step = copy_step(env, toaddr, fromaddr, stagecopysize, 1511 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1512 toaddr += step; 1513 fromaddr += step; 1514 copysize -= step; 1515 stagecopysize -= step; 1516 } 1517 /* Insn completed, so update registers to the Option A format */ 1518 env->xregs[rd] = toaddr + copysize; 1519 env->xregs[rs] = fromaddr + copysize; 1520 env->xregs[rn] = -copysize; 1521 } else { 1522 /* 1523 * In a reverse copy the to and from addrs in Xs and Xd are the start 1524 * of the range, but it's more convenient for us to work with pointers 1525 * to the last byte being copied. 1526 */ 1527 toaddr += copysize - 1; 1528 fromaddr += copysize - 1; 1529 stagecopysize = MIN(copysize, page_limit_rev(toaddr)); 1530 stagecopysize = MIN(stagecopysize, page_limit_rev(fromaddr)); 1531 while (stagecopysize) { 1532 env->xregs[rn] = copysize; 1533 step = copy_step_rev(env, toaddr, fromaddr, stagecopysize, 1534 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1535 copysize -= step; 1536 stagecopysize -= step; 1537 toaddr -= step; 1538 fromaddr -= step; 1539 } 1540 /* 1541 * Insn completed, so update registers to the Option A format. 1542 * For a reverse copy this is no different to the CPYP input format. 1543 */ 1544 env->xregs[rn] = copysize; 1545 } 1546 1547 /* Set NZCV = 0000 to indicate we are an Option A implementation */ 1548 env->NF = 0; 1549 env->ZF = 1; /* our env->ZF encoding is inverted */ 1550 env->CF = 0; 1551 env->VF = 0; 1552 return; 1553 } 1554 1555 void HELPER(cpyp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1556 uint32_t rdesc) 1557 { 1558 do_cpyp(env, syndrome, wdesc, rdesc, true, GETPC()); 1559 } 1560 1561 void HELPER(cpyfp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1562 uint32_t rdesc) 1563 { 1564 do_cpyp(env, syndrome, wdesc, rdesc, false, GETPC()); 1565 } 1566 1567 static void do_cpym(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1568 uint32_t rdesc, uint32_t move, uintptr_t ra) 1569 { 1570 /* Main: we choose to copy until less than a page remaining */ 1571 CPUState *cs = env_cpu(env); 1572 int rd = mops_destreg(syndrome); 1573 int rs = mops_srcreg(syndrome); 1574 int rn = mops_sizereg(syndrome); 1575 uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); 1576 uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); 1577 bool forwards = true; 1578 uint64_t toaddr, fromaddr, copysize, step; 1579 1580 check_mops_enabled(env, ra); 1581 1582 /* We choose to NOP out "no data to copy" before consistency checks */ 1583 if (env->xregs[rn] == 0) { 1584 return; 1585 } 1586 1587 check_mops_wrong_option(env, syndrome, ra); 1588 1589 if (move) { 1590 forwards = (int64_t)env->xregs[rn] < 0; 1591 } 1592 1593 if (forwards) { 1594 toaddr = env->xregs[rd] + env->xregs[rn]; 1595 fromaddr = env->xregs[rs] + env->xregs[rn]; 1596 copysize = -env->xregs[rn]; 1597 } else { 1598 copysize = env->xregs[rn]; 1599 /* This toaddr and fromaddr point to the *last* byte to copy */ 1600 toaddr = env->xregs[rd] + copysize - 1; 1601 fromaddr = env->xregs[rs] + copysize - 1; 1602 } 1603 1604 if (!mte_checks_needed(fromaddr, rdesc)) { 1605 rdesc = 0; 1606 } 1607 if (!mte_checks_needed(toaddr, wdesc)) { 1608 wdesc = 0; 1609 } 1610 1611 /* Our implementation has no particular parameter requirements for CPYM */ 1612 1613 /* Do the actual memmove */ 1614 if (forwards) { 1615 while (copysize >= TARGET_PAGE_SIZE) { 1616 step = copy_step(env, toaddr, fromaddr, copysize, 1617 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1618 toaddr += step; 1619 fromaddr += step; 1620 copysize -= step; 1621 env->xregs[rn] = -copysize; 1622 if (copysize >= TARGET_PAGE_SIZE && 1623 unlikely(cpu_loop_exit_requested(cs))) { 1624 cpu_loop_exit_restore(cs, ra); 1625 } 1626 } 1627 } else { 1628 while (copysize >= TARGET_PAGE_SIZE) { 1629 step = copy_step_rev(env, toaddr, fromaddr, copysize, 1630 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1631 toaddr -= step; 1632 fromaddr -= step; 1633 copysize -= step; 1634 env->xregs[rn] = copysize; 1635 if (copysize >= TARGET_PAGE_SIZE && 1636 unlikely(cpu_loop_exit_requested(cs))) { 1637 cpu_loop_exit_restore(cs, ra); 1638 } 1639 } 1640 } 1641 } 1642 1643 void HELPER(cpym)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1644 uint32_t rdesc) 1645 { 1646 do_cpym(env, syndrome, wdesc, rdesc, true, GETPC()); 1647 } 1648 1649 void HELPER(cpyfm)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1650 uint32_t rdesc) 1651 { 1652 do_cpym(env, syndrome, wdesc, rdesc, false, GETPC()); 1653 } 1654 1655 static void do_cpye(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1656 uint32_t rdesc, uint32_t move, uintptr_t ra) 1657 { 1658 /* Epilogue: do the last partial page */ 1659 int rd = mops_destreg(syndrome); 1660 int rs = mops_srcreg(syndrome); 1661 int rn = mops_sizereg(syndrome); 1662 uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); 1663 uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); 1664 bool forwards = true; 1665 uint64_t toaddr, fromaddr, copysize, step; 1666 1667 check_mops_enabled(env, ra); 1668 1669 /* We choose to NOP out "no data to copy" before consistency checks */ 1670 if (env->xregs[rn] == 0) { 1671 return; 1672 } 1673 1674 check_mops_wrong_option(env, syndrome, ra); 1675 1676 if (move) { 1677 forwards = (int64_t)env->xregs[rn] < 0; 1678 } 1679 1680 if (forwards) { 1681 toaddr = env->xregs[rd] + env->xregs[rn]; 1682 fromaddr = env->xregs[rs] + env->xregs[rn]; 1683 copysize = -env->xregs[rn]; 1684 } else { 1685 copysize = env->xregs[rn]; 1686 /* This toaddr and fromaddr point to the *last* byte to copy */ 1687 toaddr = env->xregs[rd] + copysize - 1; 1688 fromaddr = env->xregs[rs] + copysize - 1; 1689 } 1690 1691 if (!mte_checks_needed(fromaddr, rdesc)) { 1692 rdesc = 0; 1693 } 1694 if (!mte_checks_needed(toaddr, wdesc)) { 1695 wdesc = 0; 1696 } 1697 1698 /* Check the size; we don't want to have do a check-for-interrupts */ 1699 if (copysize >= TARGET_PAGE_SIZE) { 1700 raise_exception_ra(env, EXCP_UDEF, syndrome, 1701 mops_mismatch_exception_target_el(env), ra); 1702 } 1703 1704 /* Do the actual memmove */ 1705 if (forwards) { 1706 while (copysize > 0) { 1707 step = copy_step(env, toaddr, fromaddr, copysize, 1708 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1709 toaddr += step; 1710 fromaddr += step; 1711 copysize -= step; 1712 env->xregs[rn] = -copysize; 1713 } 1714 } else { 1715 while (copysize > 0) { 1716 step = copy_step_rev(env, toaddr, fromaddr, copysize, 1717 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1718 toaddr -= step; 1719 fromaddr -= step; 1720 copysize -= step; 1721 env->xregs[rn] = copysize; 1722 } 1723 } 1724 } 1725 1726 void HELPER(cpye)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1727 uint32_t rdesc) 1728 { 1729 do_cpye(env, syndrome, wdesc, rdesc, true, GETPC()); 1730 } 1731 1732 void HELPER(cpyfe)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1733 uint32_t rdesc) 1734 { 1735 do_cpye(env, syndrome, wdesc, rdesc, false, GETPC()); 1736 } 1737 1738 static bool is_guarded_page(CPUARMState *env, target_ulong addr, uintptr_t ra) 1739 { 1740 #ifdef CONFIG_USER_ONLY 1741 return page_get_flags(addr) & PAGE_BTI; 1742 #else 1743 CPUTLBEntryFull *full; 1744 void *host; 1745 int mmu_idx = cpu_mmu_index(env_cpu(env), true); 1746 int flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx, 1747 false, &host, &full, ra); 1748 1749 assert(!(flags & TLB_INVALID_MASK)); 1750 return full->extra.arm.guarded; 1751 #endif 1752 } 1753 1754 void HELPER(guarded_page_check)(CPUARMState *env) 1755 { 1756 /* 1757 * We have already verified that bti is enabled, and that the 1758 * instruction at PC is not ok for BTYPE. This is always at 1759 * the beginning of a block, so PC is always up-to-date and 1760 * no unwind is required. 1761 */ 1762 if (is_guarded_page(env, env->pc, 0)) { 1763 raise_exception(env, EXCP_UDEF, syn_btitrap(env->btype), 1764 exception_target_el(env)); 1765 } 1766 } 1767 1768 void HELPER(guarded_page_br)(CPUARMState *env, target_ulong pc) 1769 { 1770 /* 1771 * We have already checked for branch via x16 and x17. 1772 * What remains for choosing BTYPE is checking for a guarded page. 1773 */ 1774 env->btype = is_guarded_page(env, pc, GETPC()) ? 3 : 1; 1775 } 1776