1 /* 2 * AArch64 specific helpers 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/units.h" 22 #include "cpu.h" 23 #include "gdbstub/helpers.h" 24 #include "exec/helper-proto.h" 25 #include "qemu/host-utils.h" 26 #include "qemu/log.h" 27 #include "qemu/main-loop.h" 28 #include "qemu/bitops.h" 29 #include "internals.h" 30 #include "qemu/crc32c.h" 31 #include "exec/cpu-common.h" 32 #include "exec/exec-all.h" 33 #include "exec/cpu_ldst.h" 34 #include "qemu/int128.h" 35 #include "qemu/atomic128.h" 36 #include "fpu/softfloat.h" 37 #include <zlib.h> /* for crc32 */ 38 #ifdef CONFIG_USER_ONLY 39 #include "user/page-protection.h" 40 #endif 41 42 /* C2.4.7 Multiply and divide */ 43 /* special cases for 0 and LLONG_MIN are mandated by the standard */ 44 uint64_t HELPER(udiv64)(uint64_t num, uint64_t den) 45 { 46 if (den == 0) { 47 return 0; 48 } 49 return num / den; 50 } 51 52 int64_t HELPER(sdiv64)(int64_t num, int64_t den) 53 { 54 if (den == 0) { 55 return 0; 56 } 57 if (num == LLONG_MIN && den == -1) { 58 return LLONG_MIN; 59 } 60 return num / den; 61 } 62 63 uint64_t HELPER(rbit64)(uint64_t x) 64 { 65 return revbit64(x); 66 } 67 68 void HELPER(msr_i_spsel)(CPUARMState *env, uint32_t imm) 69 { 70 update_spsel(env, imm); 71 } 72 73 void HELPER(msr_set_allint_el1)(CPUARMState *env) 74 { 75 /* ALLINT update to PSTATE. */ 76 if (arm_hcrx_el2_eff(env) & HCRX_TALLINT) { 77 raise_exception_ra(env, EXCP_UDEF, 78 syn_aa64_sysregtrap(0, 1, 0, 4, 1, 0x1f, 0), 2, 79 GETPC()); 80 } 81 82 env->pstate |= PSTATE_ALLINT; 83 } 84 85 static void daif_check(CPUARMState *env, uint32_t op, 86 uint32_t imm, uintptr_t ra) 87 { 88 /* DAIF update to PSTATE. This is OK from EL0 only if UMA is set. */ 89 if (arm_current_el(env) == 0 && !(arm_sctlr(env, 0) & SCTLR_UMA)) { 90 raise_exception_ra(env, EXCP_UDEF, 91 syn_aa64_sysregtrap(0, extract32(op, 0, 3), 92 extract32(op, 3, 3), 4, 93 imm, 0x1f, 0), 94 exception_target_el(env), ra); 95 } 96 } 97 98 void HELPER(msr_i_daifset)(CPUARMState *env, uint32_t imm) 99 { 100 daif_check(env, 0x1e, imm, GETPC()); 101 env->daif |= (imm << 6) & PSTATE_DAIF; 102 arm_rebuild_hflags(env); 103 } 104 105 void HELPER(msr_i_daifclear)(CPUARMState *env, uint32_t imm) 106 { 107 daif_check(env, 0x1f, imm, GETPC()); 108 env->daif &= ~((imm << 6) & PSTATE_DAIF); 109 arm_rebuild_hflags(env); 110 } 111 112 /* Convert a softfloat float_relation_ (as returned by 113 * the float*_compare functions) to the correct ARM 114 * NZCV flag state. 115 */ 116 static inline uint32_t float_rel_to_flags(int res) 117 { 118 uint64_t flags; 119 switch (res) { 120 case float_relation_equal: 121 flags = PSTATE_Z | PSTATE_C; 122 break; 123 case float_relation_less: 124 flags = PSTATE_N; 125 break; 126 case float_relation_greater: 127 flags = PSTATE_C; 128 break; 129 case float_relation_unordered: 130 default: 131 flags = PSTATE_C | PSTATE_V; 132 break; 133 } 134 return flags; 135 } 136 137 uint64_t HELPER(vfp_cmph_a64)(uint32_t x, uint32_t y, float_status *fp_status) 138 { 139 return float_rel_to_flags(float16_compare_quiet(x, y, fp_status)); 140 } 141 142 uint64_t HELPER(vfp_cmpeh_a64)(uint32_t x, uint32_t y, float_status *fp_status) 143 { 144 return float_rel_to_flags(float16_compare(x, y, fp_status)); 145 } 146 147 uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, float_status *fp_status) 148 { 149 return float_rel_to_flags(float32_compare_quiet(x, y, fp_status)); 150 } 151 152 uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, float_status *fp_status) 153 { 154 return float_rel_to_flags(float32_compare(x, y, fp_status)); 155 } 156 157 uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, float_status *fp_status) 158 { 159 return float_rel_to_flags(float64_compare_quiet(x, y, fp_status)); 160 } 161 162 uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, float_status *fp_status) 163 { 164 return float_rel_to_flags(float64_compare(x, y, fp_status)); 165 } 166 167 float32 HELPER(vfp_mulxs)(float32 a, float32 b, float_status *fpst) 168 { 169 a = float32_squash_input_denormal(a, fpst); 170 b = float32_squash_input_denormal(b, fpst); 171 172 if ((float32_is_zero(a) && float32_is_infinity(b)) || 173 (float32_is_infinity(a) && float32_is_zero(b))) { 174 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ 175 return make_float32((1U << 30) | 176 ((float32_val(a) ^ float32_val(b)) & (1U << 31))); 177 } 178 return float32_mul(a, b, fpst); 179 } 180 181 float64 HELPER(vfp_mulxd)(float64 a, float64 b, float_status *fpst) 182 { 183 a = float64_squash_input_denormal(a, fpst); 184 b = float64_squash_input_denormal(b, fpst); 185 186 if ((float64_is_zero(a) && float64_is_infinity(b)) || 187 (float64_is_infinity(a) && float64_is_zero(b))) { 188 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ 189 return make_float64((1ULL << 62) | 190 ((float64_val(a) ^ float64_val(b)) & (1ULL << 63))); 191 } 192 return float64_mul(a, b, fpst); 193 } 194 195 /* 64bit/double versions of the neon float compare functions */ 196 uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, float_status *fpst) 197 { 198 return -float64_eq_quiet(a, b, fpst); 199 } 200 201 uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, float_status *fpst) 202 { 203 return -float64_le(b, a, fpst); 204 } 205 206 uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, float_status *fpst) 207 { 208 return -float64_lt(b, a, fpst); 209 } 210 211 /* Reciprocal step and sqrt step. Note that unlike the A32/T32 212 * versions, these do a fully fused multiply-add or 213 * multiply-add-and-halve. 214 */ 215 216 uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, float_status *fpst) 217 { 218 a = float16_squash_input_denormal(a, fpst); 219 b = float16_squash_input_denormal(b, fpst); 220 221 a = float16_chs(a); 222 if ((float16_is_infinity(a) && float16_is_zero(b)) || 223 (float16_is_infinity(b) && float16_is_zero(a))) { 224 return float16_two; 225 } 226 return float16_muladd(a, b, float16_two, 0, fpst); 227 } 228 229 float32 HELPER(recpsf_f32)(float32 a, float32 b, float_status *fpst) 230 { 231 a = float32_squash_input_denormal(a, fpst); 232 b = float32_squash_input_denormal(b, fpst); 233 234 a = float32_chs(a); 235 if ((float32_is_infinity(a) && float32_is_zero(b)) || 236 (float32_is_infinity(b) && float32_is_zero(a))) { 237 return float32_two; 238 } 239 return float32_muladd(a, b, float32_two, 0, fpst); 240 } 241 242 float64 HELPER(recpsf_f64)(float64 a, float64 b, float_status *fpst) 243 { 244 a = float64_squash_input_denormal(a, fpst); 245 b = float64_squash_input_denormal(b, fpst); 246 247 a = float64_chs(a); 248 if ((float64_is_infinity(a) && float64_is_zero(b)) || 249 (float64_is_infinity(b) && float64_is_zero(a))) { 250 return float64_two; 251 } 252 return float64_muladd(a, b, float64_two, 0, fpst); 253 } 254 255 uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, float_status *fpst) 256 { 257 a = float16_squash_input_denormal(a, fpst); 258 b = float16_squash_input_denormal(b, fpst); 259 260 a = float16_chs(a); 261 if ((float16_is_infinity(a) && float16_is_zero(b)) || 262 (float16_is_infinity(b) && float16_is_zero(a))) { 263 return float16_one_point_five; 264 } 265 return float16_muladd_scalbn(a, b, float16_three, -1, 0, fpst); 266 } 267 268 float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst) 269 { 270 a = float32_squash_input_denormal(a, fpst); 271 b = float32_squash_input_denormal(b, fpst); 272 273 a = float32_chs(a); 274 if ((float32_is_infinity(a) && float32_is_zero(b)) || 275 (float32_is_infinity(b) && float32_is_zero(a))) { 276 return float32_one_point_five; 277 } 278 return float32_muladd_scalbn(a, b, float32_three, -1, 0, fpst); 279 } 280 281 float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst) 282 { 283 a = float64_squash_input_denormal(a, fpst); 284 b = float64_squash_input_denormal(b, fpst); 285 286 a = float64_chs(a); 287 if ((float64_is_infinity(a) && float64_is_zero(b)) || 288 (float64_is_infinity(b) && float64_is_zero(a))) { 289 return float64_one_point_five; 290 } 291 return float64_muladd_scalbn(a, b, float64_three, -1, 0, fpst); 292 } 293 294 /* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */ 295 uint32_t HELPER(frecpx_f16)(uint32_t a, float_status *fpst) 296 { 297 uint16_t val16, sbit; 298 int16_t exp; 299 300 if (float16_is_any_nan(a)) { 301 float16 nan = a; 302 if (float16_is_signaling_nan(a, fpst)) { 303 float_raise(float_flag_invalid, fpst); 304 if (!fpst->default_nan_mode) { 305 nan = float16_silence_nan(a, fpst); 306 } 307 } 308 if (fpst->default_nan_mode) { 309 nan = float16_default_nan(fpst); 310 } 311 return nan; 312 } 313 314 a = float16_squash_input_denormal(a, fpst); 315 316 val16 = float16_val(a); 317 sbit = 0x8000 & val16; 318 exp = extract32(val16, 10, 5); 319 320 if (exp == 0) { 321 return make_float16(deposit32(sbit, 10, 5, 0x1e)); 322 } else { 323 return make_float16(deposit32(sbit, 10, 5, ~exp)); 324 } 325 } 326 327 float32 HELPER(frecpx_f32)(float32 a, float_status *fpst) 328 { 329 uint32_t val32, sbit; 330 int32_t exp; 331 332 if (float32_is_any_nan(a)) { 333 float32 nan = a; 334 if (float32_is_signaling_nan(a, fpst)) { 335 float_raise(float_flag_invalid, fpst); 336 if (!fpst->default_nan_mode) { 337 nan = float32_silence_nan(a, fpst); 338 } 339 } 340 if (fpst->default_nan_mode) { 341 nan = float32_default_nan(fpst); 342 } 343 return nan; 344 } 345 346 a = float32_squash_input_denormal(a, fpst); 347 348 val32 = float32_val(a); 349 sbit = 0x80000000ULL & val32; 350 exp = extract32(val32, 23, 8); 351 352 if (exp == 0) { 353 return make_float32(sbit | (0xfe << 23)); 354 } else { 355 return make_float32(sbit | (~exp & 0xff) << 23); 356 } 357 } 358 359 float64 HELPER(frecpx_f64)(float64 a, float_status *fpst) 360 { 361 uint64_t val64, sbit; 362 int64_t exp; 363 364 if (float64_is_any_nan(a)) { 365 float64 nan = a; 366 if (float64_is_signaling_nan(a, fpst)) { 367 float_raise(float_flag_invalid, fpst); 368 if (!fpst->default_nan_mode) { 369 nan = float64_silence_nan(a, fpst); 370 } 371 } 372 if (fpst->default_nan_mode) { 373 nan = float64_default_nan(fpst); 374 } 375 return nan; 376 } 377 378 a = float64_squash_input_denormal(a, fpst); 379 380 val64 = float64_val(a); 381 sbit = 0x8000000000000000ULL & val64; 382 exp = extract64(float64_val(a), 52, 11); 383 384 if (exp == 0) { 385 return make_float64(sbit | (0x7feULL << 52)); 386 } else { 387 return make_float64(sbit | (~exp & 0x7ffULL) << 52); 388 } 389 } 390 391 float32 HELPER(fcvtx_f64_to_f32)(float64 a, float_status *fpst) 392 { 393 float32 r; 394 int old = get_float_rounding_mode(fpst); 395 396 set_float_rounding_mode(float_round_to_odd, fpst); 397 r = float64_to_float32(a, fpst); 398 set_float_rounding_mode(old, fpst); 399 return r; 400 } 401 402 /* 64-bit versions of the CRC helpers. Note that although the operation 403 * (and the prototypes of crc32c() and crc32() mean that only the bottom 404 * 32 bits of the accumulator and result are used, we pass and return 405 * uint64_t for convenience of the generated code. Unlike the 32-bit 406 * instruction set versions, val may genuinely have 64 bits of data in it. 407 * The upper bytes of val (above the number specified by 'bytes') must have 408 * been zeroed out by the caller. 409 */ 410 uint64_t HELPER(crc32_64)(uint64_t acc, uint64_t val, uint32_t bytes) 411 { 412 uint8_t buf[8]; 413 414 stq_le_p(buf, val); 415 416 /* zlib crc32 converts the accumulator and output to one's complement. */ 417 return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff; 418 } 419 420 uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) 421 { 422 uint8_t buf[8]; 423 424 stq_le_p(buf, val); 425 426 /* Linux crc32c converts the output to one's complement. */ 427 return crc32c(acc, buf, bytes) ^ 0xffffffff; 428 } 429 430 /* 431 * AdvSIMD half-precision 432 */ 433 434 #define ADVSIMD_HELPER(name, suffix) HELPER(glue(glue(advsimd_, name), suffix)) 435 436 #define ADVSIMD_HALFOP(name) \ 437 uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, float_status *fpst) \ 438 { \ 439 return float16_ ## name(a, b, fpst); \ 440 } 441 442 #define ADVSIMD_TWOHALFOP(name) \ 443 uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, \ 444 float_status *fpst) \ 445 { \ 446 float16 a1, a2, b1, b2; \ 447 uint32_t r1, r2; \ 448 a1 = extract32(two_a, 0, 16); \ 449 a2 = extract32(two_a, 16, 16); \ 450 b1 = extract32(two_b, 0, 16); \ 451 b2 = extract32(two_b, 16, 16); \ 452 r1 = float16_ ## name(a1, b1, fpst); \ 453 r2 = float16_ ## name(a2, b2, fpst); \ 454 return deposit32(r1, 16, 16, r2); \ 455 } 456 457 ADVSIMD_TWOHALFOP(add) 458 ADVSIMD_TWOHALFOP(sub) 459 ADVSIMD_TWOHALFOP(mul) 460 ADVSIMD_TWOHALFOP(div) 461 ADVSIMD_TWOHALFOP(min) 462 ADVSIMD_TWOHALFOP(max) 463 ADVSIMD_TWOHALFOP(minnum) 464 ADVSIMD_TWOHALFOP(maxnum) 465 466 /* Data processing - scalar floating-point and advanced SIMD */ 467 static float16 float16_mulx(float16 a, float16 b, float_status *fpst) 468 { 469 a = float16_squash_input_denormal(a, fpst); 470 b = float16_squash_input_denormal(b, fpst); 471 472 if ((float16_is_zero(a) && float16_is_infinity(b)) || 473 (float16_is_infinity(a) && float16_is_zero(b))) { 474 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ 475 return make_float16((1U << 14) | 476 ((float16_val(a) ^ float16_val(b)) & (1U << 15))); 477 } 478 return float16_mul(a, b, fpst); 479 } 480 481 ADVSIMD_HALFOP(mulx) 482 ADVSIMD_TWOHALFOP(mulx) 483 484 /* fused multiply-accumulate */ 485 uint32_t HELPER(advsimd_muladdh)(uint32_t a, uint32_t b, uint32_t c, 486 float_status *fpst) 487 { 488 return float16_muladd(a, b, c, 0, fpst); 489 } 490 491 uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b, 492 uint32_t two_c, float_status *fpst) 493 { 494 float16 a1, a2, b1, b2, c1, c2; 495 uint32_t r1, r2; 496 a1 = extract32(two_a, 0, 16); 497 a2 = extract32(two_a, 16, 16); 498 b1 = extract32(two_b, 0, 16); 499 b2 = extract32(two_b, 16, 16); 500 c1 = extract32(two_c, 0, 16); 501 c2 = extract32(two_c, 16, 16); 502 r1 = float16_muladd(a1, b1, c1, 0, fpst); 503 r2 = float16_muladd(a2, b2, c2, 0, fpst); 504 return deposit32(r1, 16, 16, r2); 505 } 506 507 /* 508 * Floating point comparisons produce an integer result. Softfloat 509 * routines return float_relation types which we convert to the 0/-1 510 * Neon requires. 511 */ 512 513 #define ADVSIMD_CMPRES(test) (test) ? 0xffff : 0 514 515 uint32_t HELPER(advsimd_ceq_f16)(uint32_t a, uint32_t b, float_status *fpst) 516 { 517 int compare = float16_compare_quiet(a, b, fpst); 518 return ADVSIMD_CMPRES(compare == float_relation_equal); 519 } 520 521 uint32_t HELPER(advsimd_cge_f16)(uint32_t a, uint32_t b, float_status *fpst) 522 { 523 int compare = float16_compare(a, b, fpst); 524 return ADVSIMD_CMPRES(compare == float_relation_greater || 525 compare == float_relation_equal); 526 } 527 528 uint32_t HELPER(advsimd_cgt_f16)(uint32_t a, uint32_t b, float_status *fpst) 529 { 530 int compare = float16_compare(a, b, fpst); 531 return ADVSIMD_CMPRES(compare == float_relation_greater); 532 } 533 534 uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, float_status *fpst) 535 { 536 float16 f0 = float16_abs(a); 537 float16 f1 = float16_abs(b); 538 int compare = float16_compare(f0, f1, fpst); 539 return ADVSIMD_CMPRES(compare == float_relation_greater || 540 compare == float_relation_equal); 541 } 542 543 uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, float_status *fpst) 544 { 545 float16 f0 = float16_abs(a); 546 float16 f1 = float16_abs(b); 547 int compare = float16_compare(f0, f1, fpst); 548 return ADVSIMD_CMPRES(compare == float_relation_greater); 549 } 550 551 /* round to integral */ 552 uint32_t HELPER(advsimd_rinth_exact)(uint32_t x, float_status *fp_status) 553 { 554 return float16_round_to_int(x, fp_status); 555 } 556 557 uint32_t HELPER(advsimd_rinth)(uint32_t x, float_status *fp_status) 558 { 559 int old_flags = get_float_exception_flags(fp_status), new_flags; 560 float16 ret; 561 562 ret = float16_round_to_int(x, fp_status); 563 564 /* Suppress any inexact exceptions the conversion produced */ 565 if (!(old_flags & float_flag_inexact)) { 566 new_flags = get_float_exception_flags(fp_status); 567 set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status); 568 } 569 570 return ret; 571 } 572 573 static int el_from_spsr(uint32_t spsr) 574 { 575 /* Return the exception level that this SPSR is requesting a return to, 576 * or -1 if it is invalid (an illegal return) 577 */ 578 if (spsr & PSTATE_nRW) { 579 switch (spsr & CPSR_M) { 580 case ARM_CPU_MODE_USR: 581 return 0; 582 case ARM_CPU_MODE_HYP: 583 return 2; 584 case ARM_CPU_MODE_FIQ: 585 case ARM_CPU_MODE_IRQ: 586 case ARM_CPU_MODE_SVC: 587 case ARM_CPU_MODE_ABT: 588 case ARM_CPU_MODE_UND: 589 case ARM_CPU_MODE_SYS: 590 return 1; 591 case ARM_CPU_MODE_MON: 592 /* Returning to Mon from AArch64 is never possible, 593 * so this is an illegal return. 594 */ 595 default: 596 return -1; 597 } 598 } else { 599 if (extract32(spsr, 1, 1)) { 600 /* Return with reserved M[1] bit set */ 601 return -1; 602 } 603 if (extract32(spsr, 0, 4) == 1) { 604 /* return to EL0 with M[0] bit set */ 605 return -1; 606 } 607 return extract32(spsr, 2, 2); 608 } 609 } 610 611 static void cpsr_write_from_spsr_elx(CPUARMState *env, 612 uint32_t val) 613 { 614 uint32_t mask; 615 616 /* Save SPSR_ELx.SS into PSTATE. */ 617 env->pstate = (env->pstate & ~PSTATE_SS) | (val & PSTATE_SS); 618 val &= ~PSTATE_SS; 619 620 /* Move DIT to the correct location for CPSR */ 621 if (val & PSTATE_DIT) { 622 val &= ~PSTATE_DIT; 623 val |= CPSR_DIT; 624 } 625 626 mask = aarch32_cpsr_valid_mask(env->features, \ 627 &env_archcpu(env)->isar); 628 cpsr_write(env, val, mask, CPSRWriteRaw); 629 } 630 631 void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) 632 { 633 int cur_el = arm_current_el(env); 634 unsigned int spsr_idx = aarch64_banked_spsr_index(cur_el); 635 uint32_t spsr = env->banked_spsr[spsr_idx]; 636 int new_el; 637 bool return_to_aa64 = (spsr & PSTATE_nRW) == 0; 638 639 aarch64_save_sp(env, cur_el); 640 641 arm_clear_exclusive(env); 642 643 /* We must squash the PSTATE.SS bit to zero unless both of the 644 * following hold: 645 * 1. debug exceptions are currently disabled 646 * 2. singlestep will be active in the EL we return to 647 * We check 1 here and 2 after we've done the pstate/cpsr write() to 648 * transition to the EL we're going to. 649 */ 650 if (arm_generate_debug_exceptions(env)) { 651 spsr &= ~PSTATE_SS; 652 } 653 654 /* 655 * FEAT_RME forbids return from EL3 with an invalid security state. 656 * We don't need an explicit check for FEAT_RME here because we enforce 657 * in scr_write() that you can't set the NSE bit without it. 658 */ 659 if (cur_el == 3 && (env->cp15.scr_el3 & (SCR_NS | SCR_NSE)) == SCR_NSE) { 660 goto illegal_return; 661 } 662 663 new_el = el_from_spsr(spsr); 664 if (new_el == -1) { 665 goto illegal_return; 666 } 667 if (new_el > cur_el || (new_el == 2 && !arm_is_el2_enabled(env))) { 668 /* Disallow return to an EL which is unimplemented or higher 669 * than the current one. 670 */ 671 goto illegal_return; 672 } 673 674 if (new_el != 0 && arm_el_is_aa64(env, new_el) != return_to_aa64) { 675 /* Return to an EL which is configured for a different register width */ 676 goto illegal_return; 677 } 678 679 if (new_el == 1 && (arm_hcr_el2_eff(env) & HCR_TGE)) { 680 goto illegal_return; 681 } 682 683 bql_lock(); 684 arm_call_pre_el_change_hook(env_archcpu(env)); 685 bql_unlock(); 686 687 if (!return_to_aa64) { 688 env->aarch64 = false; 689 /* We do a raw CPSR write because aarch64_sync_64_to_32() 690 * will sort the register banks out for us, and we've already 691 * caught all the bad-mode cases in el_from_spsr(). 692 */ 693 cpsr_write_from_spsr_elx(env, spsr); 694 if (!arm_singlestep_active(env)) { 695 env->pstate &= ~PSTATE_SS; 696 } 697 aarch64_sync_64_to_32(env); 698 699 if (spsr & CPSR_T) { 700 env->regs[15] = new_pc & ~0x1; 701 } else { 702 env->regs[15] = new_pc & ~0x3; 703 } 704 helper_rebuild_hflags_a32(env, new_el); 705 qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to " 706 "AArch32 EL%d PC 0x%" PRIx32 "\n", 707 cur_el, new_el, env->regs[15]); 708 } else { 709 int tbii; 710 711 env->aarch64 = true; 712 spsr &= aarch64_pstate_valid_mask(&env_archcpu(env)->isar); 713 pstate_write(env, spsr); 714 if (!arm_singlestep_active(env)) { 715 env->pstate &= ~PSTATE_SS; 716 } 717 aarch64_restore_sp(env, new_el); 718 helper_rebuild_hflags_a64(env, new_el); 719 720 /* 721 * Apply TBI to the exception return address. We had to delay this 722 * until after we selected the new EL, so that we could select the 723 * correct TBI+TBID bits. This is made easier by waiting until after 724 * the hflags rebuild, since we can pull the composite TBII field 725 * from there. 726 */ 727 tbii = EX_TBFLAG_A64(env->hflags, TBII); 728 if ((tbii >> extract64(new_pc, 55, 1)) & 1) { 729 /* TBI is enabled. */ 730 int core_mmu_idx = arm_env_mmu_index(env); 731 if (regime_has_2_ranges(core_to_aa64_mmu_idx(core_mmu_idx))) { 732 new_pc = sextract64(new_pc, 0, 56); 733 } else { 734 new_pc = extract64(new_pc, 0, 56); 735 } 736 } 737 env->pc = new_pc; 738 739 qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to " 740 "AArch64 EL%d PC 0x%" PRIx64 "\n", 741 cur_el, new_el, env->pc); 742 } 743 744 /* 745 * Note that cur_el can never be 0. If new_el is 0, then 746 * el0_a64 is return_to_aa64, else el0_a64 is ignored. 747 */ 748 aarch64_sve_change_el(env, cur_el, new_el, return_to_aa64); 749 750 bql_lock(); 751 arm_call_el_change_hook(env_archcpu(env)); 752 bql_unlock(); 753 754 return; 755 756 illegal_return: 757 /* Illegal return events of various kinds have architecturally 758 * mandated behaviour: 759 * restore NZCV and DAIF from SPSR_ELx 760 * set PSTATE.IL 761 * restore PC from ELR_ELx 762 * no change to exception level, execution state or stack pointer 763 */ 764 env->pstate |= PSTATE_IL; 765 env->pc = new_pc; 766 spsr &= PSTATE_NZCV | PSTATE_DAIF | PSTATE_ALLINT; 767 spsr |= pstate_read(env) & ~(PSTATE_NZCV | PSTATE_DAIF | PSTATE_ALLINT); 768 pstate_write(env, spsr); 769 if (!arm_singlestep_active(env)) { 770 env->pstate &= ~PSTATE_SS; 771 } 772 helper_rebuild_hflags_a64(env, cur_el); 773 qemu_log_mask(LOG_GUEST_ERROR, "Illegal exception return at EL%d: " 774 "resuming execution at 0x%" PRIx64 "\n", cur_el, env->pc); 775 } 776 777 void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) 778 { 779 uintptr_t ra = GETPC(); 780 781 /* 782 * Implement DC ZVA, which zeroes a fixed-length block of memory. 783 * Note that we do not implement the (architecturally mandated) 784 * alignment fault for attempts to use this on Device memory 785 * (which matches the usual QEMU behaviour of not implementing either 786 * alignment faults or any memory attribute handling). 787 */ 788 int blocklen = 4 << env_archcpu(env)->dcz_blocksize; 789 uint64_t vaddr = vaddr_in & ~(blocklen - 1); 790 int mmu_idx = arm_env_mmu_index(env); 791 void *mem; 792 793 /* 794 * Trapless lookup. In addition to actual invalid page, may 795 * return NULL for I/O, watchpoints, clean pages, etc. 796 */ 797 mem = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx); 798 799 #ifndef CONFIG_USER_ONLY 800 if (unlikely(!mem)) { 801 /* 802 * Trap if accessing an invalid page. DC_ZVA requires that we supply 803 * the original pointer for an invalid page. But watchpoints require 804 * that we probe the actual space. So do both. 805 */ 806 (void) probe_write(env, vaddr_in, 1, mmu_idx, ra); 807 mem = probe_write(env, vaddr, blocklen, mmu_idx, ra); 808 809 if (unlikely(!mem)) { 810 /* 811 * The only remaining reason for mem == NULL is I/O. 812 * Just do a series of byte writes as the architecture demands. 813 */ 814 for (int i = 0; i < blocklen; i++) { 815 cpu_stb_mmuidx_ra(env, vaddr + i, 0, mmu_idx, ra); 816 } 817 return; 818 } 819 } 820 #endif 821 822 set_helper_retaddr(ra); 823 memset(mem, 0, blocklen); 824 clear_helper_retaddr(); 825 } 826 827 void HELPER(unaligned_access)(CPUARMState *env, uint64_t addr, 828 uint32_t access_type, uint32_t mmu_idx) 829 { 830 arm_cpu_do_unaligned_access(env_cpu(env), addr, access_type, 831 mmu_idx, GETPC()); 832 } 833 834 /* Memory operations (memset, memmove, memcpy) */ 835 836 /* 837 * Return true if the CPY* and SET* insns can execute; compare 838 * pseudocode CheckMOPSEnabled(), though we refactor it a little. 839 */ 840 static bool mops_enabled(CPUARMState *env) 841 { 842 int el = arm_current_el(env); 843 844 if (el < 2 && 845 (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE) && 846 !(arm_hcrx_el2_eff(env) & HCRX_MSCEN)) { 847 return false; 848 } 849 850 if (el == 0) { 851 if (!el_is_in_host(env, 0)) { 852 return env->cp15.sctlr_el[1] & SCTLR_MSCEN; 853 } else { 854 return env->cp15.sctlr_el[2] & SCTLR_MSCEN; 855 } 856 } 857 return true; 858 } 859 860 static void check_mops_enabled(CPUARMState *env, uintptr_t ra) 861 { 862 if (!mops_enabled(env)) { 863 raise_exception_ra(env, EXCP_UDEF, syn_uncategorized(), 864 exception_target_el(env), ra); 865 } 866 } 867 868 /* 869 * Return the target exception level for an exception due 870 * to mismatched arguments in a FEAT_MOPS copy or set. 871 * Compare pseudocode MismatchedCpySetTargetEL() 872 */ 873 static int mops_mismatch_exception_target_el(CPUARMState *env) 874 { 875 int el = arm_current_el(env); 876 877 if (el > 1) { 878 return el; 879 } 880 if (el == 0 && (arm_hcr_el2_eff(env) & HCR_TGE)) { 881 return 2; 882 } 883 if (el == 1 && (arm_hcrx_el2_eff(env) & HCRX_MCE2)) { 884 return 2; 885 } 886 return 1; 887 } 888 889 /* 890 * Check whether an M or E instruction was executed with a CF value 891 * indicating the wrong option for this implementation. 892 * Assumes we are always Option A. 893 */ 894 static void check_mops_wrong_option(CPUARMState *env, uint32_t syndrome, 895 uintptr_t ra) 896 { 897 if (env->CF != 0) { 898 syndrome |= 1 << 17; /* Set the wrong-option bit */ 899 raise_exception_ra(env, EXCP_UDEF, syndrome, 900 mops_mismatch_exception_target_el(env), ra); 901 } 902 } 903 904 /* 905 * Return the maximum number of bytes we can transfer starting at addr 906 * without crossing a page boundary. 907 */ 908 static uint64_t page_limit(uint64_t addr) 909 { 910 return TARGET_PAGE_ALIGN(addr + 1) - addr; 911 } 912 913 /* 914 * Return the number of bytes we can copy starting from addr and working 915 * backwards without crossing a page boundary. 916 */ 917 static uint64_t page_limit_rev(uint64_t addr) 918 { 919 return (addr & ~TARGET_PAGE_MASK) + 1; 920 } 921 922 /* 923 * Perform part of a memory set on an area of guest memory starting at 924 * toaddr (a dirty address) and extending for setsize bytes. 925 * 926 * Returns the number of bytes actually set, which might be less than 927 * setsize; the caller should loop until the whole set has been done. 928 * The caller should ensure that the guest registers are correct 929 * for the possibility that the first byte of the set encounters 930 * an exception or watchpoint. We guarantee not to take any faults 931 * for bytes other than the first. 932 */ 933 static uint64_t set_step(CPUARMState *env, uint64_t toaddr, 934 uint64_t setsize, uint32_t data, int memidx, 935 uint32_t *mtedesc, uintptr_t ra) 936 { 937 void *mem; 938 939 setsize = MIN(setsize, page_limit(toaddr)); 940 if (*mtedesc) { 941 uint64_t mtesize = mte_mops_probe(env, toaddr, setsize, *mtedesc); 942 if (mtesize == 0) { 943 /* Trap, or not. All CPU state is up to date */ 944 mte_check_fail(env, *mtedesc, toaddr, ra); 945 /* Continue, with no further MTE checks required */ 946 *mtedesc = 0; 947 } else { 948 /* Advance to the end, or to the tag mismatch */ 949 setsize = MIN(setsize, mtesize); 950 } 951 } 952 953 toaddr = useronly_clean_ptr(toaddr); 954 /* 955 * Trapless lookup: returns NULL for invalid page, I/O, 956 * watchpoints, clean pages, etc. 957 */ 958 mem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, memidx); 959 960 #ifndef CONFIG_USER_ONLY 961 if (unlikely(!mem)) { 962 /* 963 * Slow-path: just do one byte write. This will handle the 964 * watchpoint, invalid page, etc handling correctly. 965 * For clean code pages, the next iteration will see 966 * the page dirty and will use the fast path. 967 */ 968 cpu_stb_mmuidx_ra(env, toaddr, data, memidx, ra); 969 return 1; 970 } 971 #endif 972 /* Easy case: just memset the host memory */ 973 set_helper_retaddr(ra); 974 memset(mem, data, setsize); 975 clear_helper_retaddr(); 976 return setsize; 977 } 978 979 /* 980 * Similar, but setting tags. The architecture requires us to do this 981 * in 16-byte chunks. SETP accesses are not tag checked; they set 982 * the tags. 983 */ 984 static uint64_t set_step_tags(CPUARMState *env, uint64_t toaddr, 985 uint64_t setsize, uint32_t data, int memidx, 986 uint32_t *mtedesc, uintptr_t ra) 987 { 988 void *mem; 989 uint64_t cleanaddr; 990 991 setsize = MIN(setsize, page_limit(toaddr)); 992 993 cleanaddr = useronly_clean_ptr(toaddr); 994 /* 995 * Trapless lookup: returns NULL for invalid page, I/O, 996 * watchpoints, clean pages, etc. 997 */ 998 mem = tlb_vaddr_to_host(env, cleanaddr, MMU_DATA_STORE, memidx); 999 1000 #ifndef CONFIG_USER_ONLY 1001 if (unlikely(!mem)) { 1002 /* 1003 * Slow-path: just do one write. This will handle the 1004 * watchpoint, invalid page, etc handling correctly. 1005 * The architecture requires that we do 16 bytes at a time, 1006 * and we know both ptr and size are 16 byte aligned. 1007 * For clean code pages, the next iteration will see 1008 * the page dirty and will use the fast path. 1009 */ 1010 uint64_t repldata = data * 0x0101010101010101ULL; 1011 MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, memidx); 1012 cpu_st16_mmu(env, toaddr, int128_make128(repldata, repldata), oi16, ra); 1013 mte_mops_set_tags(env, toaddr, 16, *mtedesc); 1014 return 16; 1015 } 1016 #endif 1017 /* Easy case: just memset the host memory */ 1018 set_helper_retaddr(ra); 1019 memset(mem, data, setsize); 1020 clear_helper_retaddr(); 1021 mte_mops_set_tags(env, toaddr, setsize, *mtedesc); 1022 return setsize; 1023 } 1024 1025 typedef uint64_t StepFn(CPUARMState *env, uint64_t toaddr, 1026 uint64_t setsize, uint32_t data, 1027 int memidx, uint32_t *mtedesc, uintptr_t ra); 1028 1029 /* Extract register numbers from a MOPS exception syndrome value */ 1030 static int mops_destreg(uint32_t syndrome) 1031 { 1032 return extract32(syndrome, 10, 5); 1033 } 1034 1035 static int mops_srcreg(uint32_t syndrome) 1036 { 1037 return extract32(syndrome, 5, 5); 1038 } 1039 1040 static int mops_sizereg(uint32_t syndrome) 1041 { 1042 return extract32(syndrome, 0, 5); 1043 } 1044 1045 /* 1046 * Return true if TCMA and TBI bits mean we need to do MTE checks. 1047 * We only need to do this once per MOPS insn, not for every page. 1048 */ 1049 static bool mte_checks_needed(uint64_t ptr, uint32_t desc) 1050 { 1051 int bit55 = extract64(ptr, 55, 1); 1052 1053 /* 1054 * Note that tbi_check() returns true for "access checked" but 1055 * tcma_check() returns true for "access unchecked". 1056 */ 1057 if (!tbi_check(desc, bit55)) { 1058 return false; 1059 } 1060 return !tcma_check(desc, bit55, allocation_tag_from_addr(ptr)); 1061 } 1062 1063 /* Take an exception if the SETG addr/size are not granule aligned */ 1064 static void check_setg_alignment(CPUARMState *env, uint64_t ptr, uint64_t size, 1065 uint32_t memidx, uintptr_t ra) 1066 { 1067 if ((size != 0 && !QEMU_IS_ALIGNED(ptr, TAG_GRANULE)) || 1068 !QEMU_IS_ALIGNED(size, TAG_GRANULE)) { 1069 arm_cpu_do_unaligned_access(env_cpu(env), ptr, MMU_DATA_STORE, 1070 memidx, ra); 1071 1072 } 1073 } 1074 1075 static uint64_t arm_reg_or_xzr(CPUARMState *env, int reg) 1076 { 1077 /* 1078 * Runtime equivalent of cpu_reg() -- return the CPU register value, 1079 * for contexts when index 31 means XZR (not SP). 1080 */ 1081 return reg == 31 ? 0 : env->xregs[reg]; 1082 } 1083 1084 /* 1085 * For the Memory Set operation, our implementation chooses 1086 * always to use "option A", where we update Xd to the final 1087 * address in the SETP insn, and set Xn to be -(bytes remaining). 1088 * On SETM and SETE insns we only need update Xn. 1089 * 1090 * @env: CPU 1091 * @syndrome: syndrome value for mismatch exceptions 1092 * (also contains the register numbers we need to use) 1093 * @mtedesc: MTE descriptor word 1094 * @stepfn: function which does a single part of the set operation 1095 * @is_setg: true if this is the tag-setting SETG variant 1096 */ 1097 static void do_setp(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, 1098 StepFn *stepfn, bool is_setg, uintptr_t ra) 1099 { 1100 /* Prologue: we choose to do up to the next page boundary */ 1101 int rd = mops_destreg(syndrome); 1102 int rs = mops_srcreg(syndrome); 1103 int rn = mops_sizereg(syndrome); 1104 uint8_t data = arm_reg_or_xzr(env, rs); 1105 uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); 1106 uint64_t toaddr = env->xregs[rd]; 1107 uint64_t setsize = env->xregs[rn]; 1108 uint64_t stagesetsize, step; 1109 1110 check_mops_enabled(env, ra); 1111 1112 if (setsize > INT64_MAX) { 1113 setsize = INT64_MAX; 1114 if (is_setg) { 1115 setsize &= ~0xf; 1116 } 1117 } 1118 1119 if (unlikely(is_setg)) { 1120 check_setg_alignment(env, toaddr, setsize, memidx, ra); 1121 } else if (!mte_checks_needed(toaddr, mtedesc)) { 1122 mtedesc = 0; 1123 } 1124 1125 stagesetsize = MIN(setsize, page_limit(toaddr)); 1126 while (stagesetsize) { 1127 env->xregs[rd] = toaddr; 1128 env->xregs[rn] = setsize; 1129 step = stepfn(env, toaddr, stagesetsize, data, memidx, &mtedesc, ra); 1130 toaddr += step; 1131 setsize -= step; 1132 stagesetsize -= step; 1133 } 1134 /* Insn completed, so update registers to the Option A format */ 1135 env->xregs[rd] = toaddr + setsize; 1136 env->xregs[rn] = -setsize; 1137 1138 /* Set NZCV = 0000 to indicate we are an Option A implementation */ 1139 env->NF = 0; 1140 env->ZF = 1; /* our env->ZF encoding is inverted */ 1141 env->CF = 0; 1142 env->VF = 0; 1143 return; 1144 } 1145 1146 void HELPER(setp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1147 { 1148 do_setp(env, syndrome, mtedesc, set_step, false, GETPC()); 1149 } 1150 1151 void HELPER(setgp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1152 { 1153 do_setp(env, syndrome, mtedesc, set_step_tags, true, GETPC()); 1154 } 1155 1156 static void do_setm(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, 1157 StepFn *stepfn, bool is_setg, uintptr_t ra) 1158 { 1159 /* Main: we choose to do all the full-page chunks */ 1160 CPUState *cs = env_cpu(env); 1161 int rd = mops_destreg(syndrome); 1162 int rs = mops_srcreg(syndrome); 1163 int rn = mops_sizereg(syndrome); 1164 uint8_t data = arm_reg_or_xzr(env, rs); 1165 uint64_t toaddr = env->xregs[rd] + env->xregs[rn]; 1166 uint64_t setsize = -env->xregs[rn]; 1167 uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); 1168 uint64_t step, stagesetsize; 1169 1170 check_mops_enabled(env, ra); 1171 1172 /* 1173 * We're allowed to NOP out "no data to copy" before the consistency 1174 * checks; we choose to do so. 1175 */ 1176 if (env->xregs[rn] == 0) { 1177 return; 1178 } 1179 1180 check_mops_wrong_option(env, syndrome, ra); 1181 1182 /* 1183 * Our implementation will work fine even if we have an unaligned 1184 * destination address, and because we update Xn every time around 1185 * the loop below and the return value from stepfn() may be less 1186 * than requested, we might find toaddr is unaligned. So we don't 1187 * have an IMPDEF check for alignment here. 1188 */ 1189 1190 if (unlikely(is_setg)) { 1191 check_setg_alignment(env, toaddr, setsize, memidx, ra); 1192 } else if (!mte_checks_needed(toaddr, mtedesc)) { 1193 mtedesc = 0; 1194 } 1195 1196 /* Do the actual memset: we leave the last partial page to SETE */ 1197 stagesetsize = setsize & TARGET_PAGE_MASK; 1198 while (stagesetsize > 0) { 1199 step = stepfn(env, toaddr, stagesetsize, data, memidx, &mtedesc, ra); 1200 toaddr += step; 1201 setsize -= step; 1202 stagesetsize -= step; 1203 env->xregs[rn] = -setsize; 1204 if (stagesetsize > 0 && unlikely(cpu_loop_exit_requested(cs))) { 1205 cpu_loop_exit_restore(cs, ra); 1206 } 1207 } 1208 } 1209 1210 void HELPER(setm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1211 { 1212 do_setm(env, syndrome, mtedesc, set_step, false, GETPC()); 1213 } 1214 1215 void HELPER(setgm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1216 { 1217 do_setm(env, syndrome, mtedesc, set_step_tags, true, GETPC()); 1218 } 1219 1220 static void do_sete(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, 1221 StepFn *stepfn, bool is_setg, uintptr_t ra) 1222 { 1223 /* Epilogue: do the last partial page */ 1224 int rd = mops_destreg(syndrome); 1225 int rs = mops_srcreg(syndrome); 1226 int rn = mops_sizereg(syndrome); 1227 uint8_t data = arm_reg_or_xzr(env, rs); 1228 uint64_t toaddr = env->xregs[rd] + env->xregs[rn]; 1229 uint64_t setsize = -env->xregs[rn]; 1230 uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); 1231 uint64_t step; 1232 1233 check_mops_enabled(env, ra); 1234 1235 /* 1236 * We're allowed to NOP out "no data to copy" before the consistency 1237 * checks; we choose to do so. 1238 */ 1239 if (setsize == 0) { 1240 return; 1241 } 1242 1243 check_mops_wrong_option(env, syndrome, ra); 1244 1245 /* 1246 * Our implementation has no address alignment requirements, but 1247 * we do want to enforce the "less than a page" size requirement, 1248 * so we don't need to have the "check for interrupts" here. 1249 */ 1250 if (setsize >= TARGET_PAGE_SIZE) { 1251 raise_exception_ra(env, EXCP_UDEF, syndrome, 1252 mops_mismatch_exception_target_el(env), ra); 1253 } 1254 1255 if (unlikely(is_setg)) { 1256 check_setg_alignment(env, toaddr, setsize, memidx, ra); 1257 } else if (!mte_checks_needed(toaddr, mtedesc)) { 1258 mtedesc = 0; 1259 } 1260 1261 /* Do the actual memset */ 1262 while (setsize > 0) { 1263 step = stepfn(env, toaddr, setsize, data, memidx, &mtedesc, ra); 1264 toaddr += step; 1265 setsize -= step; 1266 env->xregs[rn] = -setsize; 1267 } 1268 } 1269 1270 void HELPER(sete)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1271 { 1272 do_sete(env, syndrome, mtedesc, set_step, false, GETPC()); 1273 } 1274 1275 void HELPER(setge)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1276 { 1277 do_sete(env, syndrome, mtedesc, set_step_tags, true, GETPC()); 1278 } 1279 1280 /* 1281 * Perform part of a memory copy from the guest memory at fromaddr 1282 * and extending for copysize bytes, to the guest memory at 1283 * toaddr. Both addresses are dirty. 1284 * 1285 * Returns the number of bytes actually set, which might be less than 1286 * copysize; the caller should loop until the whole copy has been done. 1287 * The caller should ensure that the guest registers are correct 1288 * for the possibility that the first byte of the copy encounters 1289 * an exception or watchpoint. We guarantee not to take any faults 1290 * for bytes other than the first. 1291 */ 1292 static uint64_t copy_step(CPUARMState *env, uint64_t toaddr, uint64_t fromaddr, 1293 uint64_t copysize, int wmemidx, int rmemidx, 1294 uint32_t *wdesc, uint32_t *rdesc, uintptr_t ra) 1295 { 1296 void *rmem; 1297 void *wmem; 1298 1299 /* Don't cross a page boundary on either source or destination */ 1300 copysize = MIN(copysize, page_limit(toaddr)); 1301 copysize = MIN(copysize, page_limit(fromaddr)); 1302 /* 1303 * Handle MTE tag checks: either handle the tag mismatch for byte 0, 1304 * or else copy up to but not including the byte with the mismatch. 1305 */ 1306 if (*rdesc) { 1307 uint64_t mtesize = mte_mops_probe(env, fromaddr, copysize, *rdesc); 1308 if (mtesize == 0) { 1309 mte_check_fail(env, *rdesc, fromaddr, ra); 1310 *rdesc = 0; 1311 } else { 1312 copysize = MIN(copysize, mtesize); 1313 } 1314 } 1315 if (*wdesc) { 1316 uint64_t mtesize = mte_mops_probe(env, toaddr, copysize, *wdesc); 1317 if (mtesize == 0) { 1318 mte_check_fail(env, *wdesc, toaddr, ra); 1319 *wdesc = 0; 1320 } else { 1321 copysize = MIN(copysize, mtesize); 1322 } 1323 } 1324 1325 toaddr = useronly_clean_ptr(toaddr); 1326 fromaddr = useronly_clean_ptr(fromaddr); 1327 /* Trapless lookup of whether we can get a host memory pointer */ 1328 wmem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, wmemidx); 1329 rmem = tlb_vaddr_to_host(env, fromaddr, MMU_DATA_LOAD, rmemidx); 1330 1331 #ifndef CONFIG_USER_ONLY 1332 /* 1333 * If we don't have host memory for both source and dest then just 1334 * do a single byte copy. This will handle watchpoints, invalid pages, 1335 * etc correctly. For clean code pages, the next iteration will see 1336 * the page dirty and will use the fast path. 1337 */ 1338 if (unlikely(!rmem || !wmem)) { 1339 uint8_t byte; 1340 if (rmem) { 1341 byte = *(uint8_t *)rmem; 1342 } else { 1343 byte = cpu_ldub_mmuidx_ra(env, fromaddr, rmemidx, ra); 1344 } 1345 if (wmem) { 1346 *(uint8_t *)wmem = byte; 1347 } else { 1348 cpu_stb_mmuidx_ra(env, toaddr, byte, wmemidx, ra); 1349 } 1350 return 1; 1351 } 1352 #endif 1353 /* Easy case: just memmove the host memory */ 1354 set_helper_retaddr(ra); 1355 memmove(wmem, rmem, copysize); 1356 clear_helper_retaddr(); 1357 return copysize; 1358 } 1359 1360 /* 1361 * Do part of a backwards memory copy. Here toaddr and fromaddr point 1362 * to the *last* byte to be copied. 1363 */ 1364 static uint64_t copy_step_rev(CPUARMState *env, uint64_t toaddr, 1365 uint64_t fromaddr, 1366 uint64_t copysize, int wmemidx, int rmemidx, 1367 uint32_t *wdesc, uint32_t *rdesc, uintptr_t ra) 1368 { 1369 void *rmem; 1370 void *wmem; 1371 1372 /* Don't cross a page boundary on either source or destination */ 1373 copysize = MIN(copysize, page_limit_rev(toaddr)); 1374 copysize = MIN(copysize, page_limit_rev(fromaddr)); 1375 1376 /* 1377 * Handle MTE tag checks: either handle the tag mismatch for byte 0, 1378 * or else copy up to but not including the byte with the mismatch. 1379 */ 1380 if (*rdesc) { 1381 uint64_t mtesize = mte_mops_probe_rev(env, fromaddr, copysize, *rdesc); 1382 if (mtesize == 0) { 1383 mte_check_fail(env, *rdesc, fromaddr, ra); 1384 *rdesc = 0; 1385 } else { 1386 copysize = MIN(copysize, mtesize); 1387 } 1388 } 1389 if (*wdesc) { 1390 uint64_t mtesize = mte_mops_probe_rev(env, toaddr, copysize, *wdesc); 1391 if (mtesize == 0) { 1392 mte_check_fail(env, *wdesc, toaddr, ra); 1393 *wdesc = 0; 1394 } else { 1395 copysize = MIN(copysize, mtesize); 1396 } 1397 } 1398 1399 toaddr = useronly_clean_ptr(toaddr); 1400 fromaddr = useronly_clean_ptr(fromaddr); 1401 /* Trapless lookup of whether we can get a host memory pointer */ 1402 wmem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, wmemidx); 1403 rmem = tlb_vaddr_to_host(env, fromaddr, MMU_DATA_LOAD, rmemidx); 1404 1405 #ifndef CONFIG_USER_ONLY 1406 /* 1407 * If we don't have host memory for both source and dest then just 1408 * do a single byte copy. This will handle watchpoints, invalid pages, 1409 * etc correctly. For clean code pages, the next iteration will see 1410 * the page dirty and will use the fast path. 1411 */ 1412 if (unlikely(!rmem || !wmem)) { 1413 uint8_t byte; 1414 if (rmem) { 1415 byte = *(uint8_t *)rmem; 1416 } else { 1417 byte = cpu_ldub_mmuidx_ra(env, fromaddr, rmemidx, ra); 1418 } 1419 if (wmem) { 1420 *(uint8_t *)wmem = byte; 1421 } else { 1422 cpu_stb_mmuidx_ra(env, toaddr, byte, wmemidx, ra); 1423 } 1424 return 1; 1425 } 1426 #endif 1427 /* 1428 * Easy case: just memmove the host memory. Note that wmem and 1429 * rmem here point to the *last* byte to copy. 1430 */ 1431 set_helper_retaddr(ra); 1432 memmove(wmem - (copysize - 1), rmem - (copysize - 1), copysize); 1433 clear_helper_retaddr(); 1434 return copysize; 1435 } 1436 1437 /* 1438 * for the Memory Copy operation, our implementation chooses always 1439 * to use "option A", where we update Xd and Xs to the final addresses 1440 * in the CPYP insn, and then in CPYM and CPYE only need to update Xn. 1441 * 1442 * @env: CPU 1443 * @syndrome: syndrome value for mismatch exceptions 1444 * (also contains the register numbers we need to use) 1445 * @wdesc: MTE descriptor for the writes (destination) 1446 * @rdesc: MTE descriptor for the reads (source) 1447 * @move: true if this is CPY (memmove), false for CPYF (memcpy forwards) 1448 */ 1449 static void do_cpyp(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1450 uint32_t rdesc, uint32_t move, uintptr_t ra) 1451 { 1452 int rd = mops_destreg(syndrome); 1453 int rs = mops_srcreg(syndrome); 1454 int rn = mops_sizereg(syndrome); 1455 uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); 1456 uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); 1457 bool forwards = true; 1458 uint64_t toaddr = env->xregs[rd]; 1459 uint64_t fromaddr = env->xregs[rs]; 1460 uint64_t copysize = env->xregs[rn]; 1461 uint64_t stagecopysize, step; 1462 1463 check_mops_enabled(env, ra); 1464 1465 1466 if (move) { 1467 /* 1468 * Copy backwards if necessary. The direction for a non-overlapping 1469 * copy is IMPDEF; we choose forwards. 1470 */ 1471 if (copysize > 0x007FFFFFFFFFFFFFULL) { 1472 copysize = 0x007FFFFFFFFFFFFFULL; 1473 } 1474 uint64_t fs = extract64(fromaddr, 0, 56); 1475 uint64_t ts = extract64(toaddr, 0, 56); 1476 uint64_t fe = extract64(fromaddr + copysize, 0, 56); 1477 1478 if (fs < ts && fe > ts) { 1479 forwards = false; 1480 } 1481 } else { 1482 if (copysize > INT64_MAX) { 1483 copysize = INT64_MAX; 1484 } 1485 } 1486 1487 if (!mte_checks_needed(fromaddr, rdesc)) { 1488 rdesc = 0; 1489 } 1490 if (!mte_checks_needed(toaddr, wdesc)) { 1491 wdesc = 0; 1492 } 1493 1494 if (forwards) { 1495 stagecopysize = MIN(copysize, page_limit(toaddr)); 1496 stagecopysize = MIN(stagecopysize, page_limit(fromaddr)); 1497 while (stagecopysize) { 1498 env->xregs[rd] = toaddr; 1499 env->xregs[rs] = fromaddr; 1500 env->xregs[rn] = copysize; 1501 step = copy_step(env, toaddr, fromaddr, stagecopysize, 1502 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1503 toaddr += step; 1504 fromaddr += step; 1505 copysize -= step; 1506 stagecopysize -= step; 1507 } 1508 /* Insn completed, so update registers to the Option A format */ 1509 env->xregs[rd] = toaddr + copysize; 1510 env->xregs[rs] = fromaddr + copysize; 1511 env->xregs[rn] = -copysize; 1512 } else { 1513 /* 1514 * In a reverse copy the to and from addrs in Xs and Xd are the start 1515 * of the range, but it's more convenient for us to work with pointers 1516 * to the last byte being copied. 1517 */ 1518 toaddr += copysize - 1; 1519 fromaddr += copysize - 1; 1520 stagecopysize = MIN(copysize, page_limit_rev(toaddr)); 1521 stagecopysize = MIN(stagecopysize, page_limit_rev(fromaddr)); 1522 while (stagecopysize) { 1523 env->xregs[rn] = copysize; 1524 step = copy_step_rev(env, toaddr, fromaddr, stagecopysize, 1525 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1526 copysize -= step; 1527 stagecopysize -= step; 1528 toaddr -= step; 1529 fromaddr -= step; 1530 } 1531 /* 1532 * Insn completed, so update registers to the Option A format. 1533 * For a reverse copy this is no different to the CPYP input format. 1534 */ 1535 env->xregs[rn] = copysize; 1536 } 1537 1538 /* Set NZCV = 0000 to indicate we are an Option A implementation */ 1539 env->NF = 0; 1540 env->ZF = 1; /* our env->ZF encoding is inverted */ 1541 env->CF = 0; 1542 env->VF = 0; 1543 return; 1544 } 1545 1546 void HELPER(cpyp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1547 uint32_t rdesc) 1548 { 1549 do_cpyp(env, syndrome, wdesc, rdesc, true, GETPC()); 1550 } 1551 1552 void HELPER(cpyfp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1553 uint32_t rdesc) 1554 { 1555 do_cpyp(env, syndrome, wdesc, rdesc, false, GETPC()); 1556 } 1557 1558 static void do_cpym(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1559 uint32_t rdesc, uint32_t move, uintptr_t ra) 1560 { 1561 /* Main: we choose to copy until less than a page remaining */ 1562 CPUState *cs = env_cpu(env); 1563 int rd = mops_destreg(syndrome); 1564 int rs = mops_srcreg(syndrome); 1565 int rn = mops_sizereg(syndrome); 1566 uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); 1567 uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); 1568 bool forwards = true; 1569 uint64_t toaddr, fromaddr, copysize, step; 1570 1571 check_mops_enabled(env, ra); 1572 1573 /* We choose to NOP out "no data to copy" before consistency checks */ 1574 if (env->xregs[rn] == 0) { 1575 return; 1576 } 1577 1578 check_mops_wrong_option(env, syndrome, ra); 1579 1580 if (move) { 1581 forwards = (int64_t)env->xregs[rn] < 0; 1582 } 1583 1584 if (forwards) { 1585 toaddr = env->xregs[rd] + env->xregs[rn]; 1586 fromaddr = env->xregs[rs] + env->xregs[rn]; 1587 copysize = -env->xregs[rn]; 1588 } else { 1589 copysize = env->xregs[rn]; 1590 /* This toaddr and fromaddr point to the *last* byte to copy */ 1591 toaddr = env->xregs[rd] + copysize - 1; 1592 fromaddr = env->xregs[rs] + copysize - 1; 1593 } 1594 1595 if (!mte_checks_needed(fromaddr, rdesc)) { 1596 rdesc = 0; 1597 } 1598 if (!mte_checks_needed(toaddr, wdesc)) { 1599 wdesc = 0; 1600 } 1601 1602 /* Our implementation has no particular parameter requirements for CPYM */ 1603 1604 /* Do the actual memmove */ 1605 if (forwards) { 1606 while (copysize >= TARGET_PAGE_SIZE) { 1607 step = copy_step(env, toaddr, fromaddr, copysize, 1608 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1609 toaddr += step; 1610 fromaddr += step; 1611 copysize -= step; 1612 env->xregs[rn] = -copysize; 1613 if (copysize >= TARGET_PAGE_SIZE && 1614 unlikely(cpu_loop_exit_requested(cs))) { 1615 cpu_loop_exit_restore(cs, ra); 1616 } 1617 } 1618 } else { 1619 while (copysize >= TARGET_PAGE_SIZE) { 1620 step = copy_step_rev(env, toaddr, fromaddr, copysize, 1621 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1622 toaddr -= step; 1623 fromaddr -= step; 1624 copysize -= step; 1625 env->xregs[rn] = copysize; 1626 if (copysize >= TARGET_PAGE_SIZE && 1627 unlikely(cpu_loop_exit_requested(cs))) { 1628 cpu_loop_exit_restore(cs, ra); 1629 } 1630 } 1631 } 1632 } 1633 1634 void HELPER(cpym)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1635 uint32_t rdesc) 1636 { 1637 do_cpym(env, syndrome, wdesc, rdesc, true, GETPC()); 1638 } 1639 1640 void HELPER(cpyfm)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1641 uint32_t rdesc) 1642 { 1643 do_cpym(env, syndrome, wdesc, rdesc, false, GETPC()); 1644 } 1645 1646 static void do_cpye(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1647 uint32_t rdesc, uint32_t move, uintptr_t ra) 1648 { 1649 /* Epilogue: do the last partial page */ 1650 int rd = mops_destreg(syndrome); 1651 int rs = mops_srcreg(syndrome); 1652 int rn = mops_sizereg(syndrome); 1653 uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); 1654 uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); 1655 bool forwards = true; 1656 uint64_t toaddr, fromaddr, copysize, step; 1657 1658 check_mops_enabled(env, ra); 1659 1660 /* We choose to NOP out "no data to copy" before consistency checks */ 1661 if (env->xregs[rn] == 0) { 1662 return; 1663 } 1664 1665 check_mops_wrong_option(env, syndrome, ra); 1666 1667 if (move) { 1668 forwards = (int64_t)env->xregs[rn] < 0; 1669 } 1670 1671 if (forwards) { 1672 toaddr = env->xregs[rd] + env->xregs[rn]; 1673 fromaddr = env->xregs[rs] + env->xregs[rn]; 1674 copysize = -env->xregs[rn]; 1675 } else { 1676 copysize = env->xregs[rn]; 1677 /* This toaddr and fromaddr point to the *last* byte to copy */ 1678 toaddr = env->xregs[rd] + copysize - 1; 1679 fromaddr = env->xregs[rs] + copysize - 1; 1680 } 1681 1682 if (!mte_checks_needed(fromaddr, rdesc)) { 1683 rdesc = 0; 1684 } 1685 if (!mte_checks_needed(toaddr, wdesc)) { 1686 wdesc = 0; 1687 } 1688 1689 /* Check the size; we don't want to have do a check-for-interrupts */ 1690 if (copysize >= TARGET_PAGE_SIZE) { 1691 raise_exception_ra(env, EXCP_UDEF, syndrome, 1692 mops_mismatch_exception_target_el(env), ra); 1693 } 1694 1695 /* Do the actual memmove */ 1696 if (forwards) { 1697 while (copysize > 0) { 1698 step = copy_step(env, toaddr, fromaddr, copysize, 1699 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1700 toaddr += step; 1701 fromaddr += step; 1702 copysize -= step; 1703 env->xregs[rn] = -copysize; 1704 } 1705 } else { 1706 while (copysize > 0) { 1707 step = copy_step_rev(env, toaddr, fromaddr, copysize, 1708 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1709 toaddr -= step; 1710 fromaddr -= step; 1711 copysize -= step; 1712 env->xregs[rn] = copysize; 1713 } 1714 } 1715 } 1716 1717 void HELPER(cpye)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1718 uint32_t rdesc) 1719 { 1720 do_cpye(env, syndrome, wdesc, rdesc, true, GETPC()); 1721 } 1722 1723 void HELPER(cpyfe)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1724 uint32_t rdesc) 1725 { 1726 do_cpye(env, syndrome, wdesc, rdesc, false, GETPC()); 1727 } 1728 1729 static bool is_guarded_page(CPUARMState *env, target_ulong addr, uintptr_t ra) 1730 { 1731 #ifdef CONFIG_USER_ONLY 1732 return page_get_flags(addr) & PAGE_BTI; 1733 #else 1734 CPUTLBEntryFull *full; 1735 void *host; 1736 int mmu_idx = cpu_mmu_index(env_cpu(env), true); 1737 int flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx, 1738 false, &host, &full, ra); 1739 1740 assert(!(flags & TLB_INVALID_MASK)); 1741 return full->extra.arm.guarded; 1742 #endif 1743 } 1744 1745 void HELPER(guarded_page_check)(CPUARMState *env) 1746 { 1747 /* 1748 * We have already verified that bti is enabled, and that the 1749 * instruction at PC is not ok for BTYPE. This is always at 1750 * the beginning of a block, so PC is always up-to-date and 1751 * no unwind is required. 1752 */ 1753 if (is_guarded_page(env, env->pc, 0)) { 1754 raise_exception(env, EXCP_UDEF, syn_btitrap(env->btype), 1755 exception_target_el(env)); 1756 } 1757 } 1758 1759 void HELPER(guarded_page_br)(CPUARMState *env, target_ulong pc) 1760 { 1761 /* 1762 * We have already checked for branch via x16 and x17. 1763 * What remains for choosing BTYPE is checking for a guarded page. 1764 */ 1765 env->btype = is_guarded_page(env, pc, GETPC()) ? 3 : 1; 1766 } 1767