1 /* 2 * AArch64 specific helpers 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/units.h" 22 #include "cpu.h" 23 #include "gdbstub/helpers.h" 24 #include "exec/helper-proto.h" 25 #include "qemu/host-utils.h" 26 #include "qemu/log.h" 27 #include "qemu/main-loop.h" 28 #include "qemu/bitops.h" 29 #include "internals.h" 30 #include "qemu/crc32c.h" 31 #include "exec/exec-all.h" 32 #include "exec/cpu_ldst.h" 33 #include "qemu/int128.h" 34 #include "qemu/atomic128.h" 35 #include "fpu/softfloat.h" 36 #include <zlib.h> /* for crc32 */ 37 38 /* C2.4.7 Multiply and divide */ 39 /* special cases for 0 and LLONG_MIN are mandated by the standard */ 40 uint64_t HELPER(udiv64)(uint64_t num, uint64_t den) 41 { 42 if (den == 0) { 43 return 0; 44 } 45 return num / den; 46 } 47 48 int64_t HELPER(sdiv64)(int64_t num, int64_t den) 49 { 50 if (den == 0) { 51 return 0; 52 } 53 if (num == LLONG_MIN && den == -1) { 54 return LLONG_MIN; 55 } 56 return num / den; 57 } 58 59 uint64_t HELPER(rbit64)(uint64_t x) 60 { 61 return revbit64(x); 62 } 63 64 void HELPER(msr_i_spsel)(CPUARMState *env, uint32_t imm) 65 { 66 update_spsel(env, imm); 67 } 68 69 void HELPER(msr_set_allint_el1)(CPUARMState *env) 70 { 71 /* ALLINT update to PSTATE. */ 72 if (arm_hcrx_el2_eff(env) & HCRX_TALLINT) { 73 raise_exception_ra(env, EXCP_UDEF, 74 syn_aa64_sysregtrap(0, 1, 0, 4, 1, 0x1f, 0), 2, 75 GETPC()); 76 } 77 78 env->pstate |= PSTATE_ALLINT; 79 } 80 81 static void daif_check(CPUARMState *env, uint32_t op, 82 uint32_t imm, uintptr_t ra) 83 { 84 /* DAIF update to PSTATE. This is OK from EL0 only if UMA is set. */ 85 if (arm_current_el(env) == 0 && !(arm_sctlr(env, 0) & SCTLR_UMA)) { 86 raise_exception_ra(env, EXCP_UDEF, 87 syn_aa64_sysregtrap(0, extract32(op, 0, 3), 88 extract32(op, 3, 3), 4, 89 imm, 0x1f, 0), 90 exception_target_el(env), ra); 91 } 92 } 93 94 void HELPER(msr_i_daifset)(CPUARMState *env, uint32_t imm) 95 { 96 daif_check(env, 0x1e, imm, GETPC()); 97 env->daif |= (imm << 6) & PSTATE_DAIF; 98 arm_rebuild_hflags(env); 99 } 100 101 void HELPER(msr_i_daifclear)(CPUARMState *env, uint32_t imm) 102 { 103 daif_check(env, 0x1f, imm, GETPC()); 104 env->daif &= ~((imm << 6) & PSTATE_DAIF); 105 arm_rebuild_hflags(env); 106 } 107 108 /* Convert a softfloat float_relation_ (as returned by 109 * the float*_compare functions) to the correct ARM 110 * NZCV flag state. 111 */ 112 static inline uint32_t float_rel_to_flags(int res) 113 { 114 uint64_t flags; 115 switch (res) { 116 case float_relation_equal: 117 flags = PSTATE_Z | PSTATE_C; 118 break; 119 case float_relation_less: 120 flags = PSTATE_N; 121 break; 122 case float_relation_greater: 123 flags = PSTATE_C; 124 break; 125 case float_relation_unordered: 126 default: 127 flags = PSTATE_C | PSTATE_V; 128 break; 129 } 130 return flags; 131 } 132 133 uint64_t HELPER(vfp_cmph_a64)(uint32_t x, uint32_t y, float_status *fp_status) 134 { 135 return float_rel_to_flags(float16_compare_quiet(x, y, fp_status)); 136 } 137 138 uint64_t HELPER(vfp_cmpeh_a64)(uint32_t x, uint32_t y, float_status *fp_status) 139 { 140 return float_rel_to_flags(float16_compare(x, y, fp_status)); 141 } 142 143 uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, float_status *fp_status) 144 { 145 return float_rel_to_flags(float32_compare_quiet(x, y, fp_status)); 146 } 147 148 uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, float_status *fp_status) 149 { 150 return float_rel_to_flags(float32_compare(x, y, fp_status)); 151 } 152 153 uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, float_status *fp_status) 154 { 155 return float_rel_to_flags(float64_compare_quiet(x, y, fp_status)); 156 } 157 158 uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, float_status *fp_status) 159 { 160 return float_rel_to_flags(float64_compare(x, y, fp_status)); 161 } 162 163 float32 HELPER(vfp_mulxs)(float32 a, float32 b, float_status *fpst) 164 { 165 a = float32_squash_input_denormal(a, fpst); 166 b = float32_squash_input_denormal(b, fpst); 167 168 if ((float32_is_zero(a) && float32_is_infinity(b)) || 169 (float32_is_infinity(a) && float32_is_zero(b))) { 170 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ 171 return make_float32((1U << 30) | 172 ((float32_val(a) ^ float32_val(b)) & (1U << 31))); 173 } 174 return float32_mul(a, b, fpst); 175 } 176 177 float64 HELPER(vfp_mulxd)(float64 a, float64 b, float_status *fpst) 178 { 179 a = float64_squash_input_denormal(a, fpst); 180 b = float64_squash_input_denormal(b, fpst); 181 182 if ((float64_is_zero(a) && float64_is_infinity(b)) || 183 (float64_is_infinity(a) && float64_is_zero(b))) { 184 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ 185 return make_float64((1ULL << 62) | 186 ((float64_val(a) ^ float64_val(b)) & (1ULL << 63))); 187 } 188 return float64_mul(a, b, fpst); 189 } 190 191 /* 64bit/double versions of the neon float compare functions */ 192 uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, float_status *fpst) 193 { 194 return -float64_eq_quiet(a, b, fpst); 195 } 196 197 uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, float_status *fpst) 198 { 199 return -float64_le(b, a, fpst); 200 } 201 202 uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, float_status *fpst) 203 { 204 return -float64_lt(b, a, fpst); 205 } 206 207 /* Reciprocal step and sqrt step. Note that unlike the A32/T32 208 * versions, these do a fully fused multiply-add or 209 * multiply-add-and-halve. 210 */ 211 212 uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, float_status *fpst) 213 { 214 a = float16_squash_input_denormal(a, fpst); 215 b = float16_squash_input_denormal(b, fpst); 216 217 a = float16_chs(a); 218 if ((float16_is_infinity(a) && float16_is_zero(b)) || 219 (float16_is_infinity(b) && float16_is_zero(a))) { 220 return float16_two; 221 } 222 return float16_muladd(a, b, float16_two, 0, fpst); 223 } 224 225 float32 HELPER(recpsf_f32)(float32 a, float32 b, float_status *fpst) 226 { 227 a = float32_squash_input_denormal(a, fpst); 228 b = float32_squash_input_denormal(b, fpst); 229 230 a = float32_chs(a); 231 if ((float32_is_infinity(a) && float32_is_zero(b)) || 232 (float32_is_infinity(b) && float32_is_zero(a))) { 233 return float32_two; 234 } 235 return float32_muladd(a, b, float32_two, 0, fpst); 236 } 237 238 float64 HELPER(recpsf_f64)(float64 a, float64 b, float_status *fpst) 239 { 240 a = float64_squash_input_denormal(a, fpst); 241 b = float64_squash_input_denormal(b, fpst); 242 243 a = float64_chs(a); 244 if ((float64_is_infinity(a) && float64_is_zero(b)) || 245 (float64_is_infinity(b) && float64_is_zero(a))) { 246 return float64_two; 247 } 248 return float64_muladd(a, b, float64_two, 0, fpst); 249 } 250 251 uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, float_status *fpst) 252 { 253 a = float16_squash_input_denormal(a, fpst); 254 b = float16_squash_input_denormal(b, fpst); 255 256 a = float16_chs(a); 257 if ((float16_is_infinity(a) && float16_is_zero(b)) || 258 (float16_is_infinity(b) && float16_is_zero(a))) { 259 return float16_one_point_five; 260 } 261 return float16_muladd(a, b, float16_three, float_muladd_halve_result, fpst); 262 } 263 264 float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst) 265 { 266 a = float32_squash_input_denormal(a, fpst); 267 b = float32_squash_input_denormal(b, fpst); 268 269 a = float32_chs(a); 270 if ((float32_is_infinity(a) && float32_is_zero(b)) || 271 (float32_is_infinity(b) && float32_is_zero(a))) { 272 return float32_one_point_five; 273 } 274 return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst); 275 } 276 277 float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst) 278 { 279 a = float64_squash_input_denormal(a, fpst); 280 b = float64_squash_input_denormal(b, fpst); 281 282 a = float64_chs(a); 283 if ((float64_is_infinity(a) && float64_is_zero(b)) || 284 (float64_is_infinity(b) && float64_is_zero(a))) { 285 return float64_one_point_five; 286 } 287 return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst); 288 } 289 290 /* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */ 291 uint32_t HELPER(frecpx_f16)(uint32_t a, float_status *fpst) 292 { 293 uint16_t val16, sbit; 294 int16_t exp; 295 296 if (float16_is_any_nan(a)) { 297 float16 nan = a; 298 if (float16_is_signaling_nan(a, fpst)) { 299 float_raise(float_flag_invalid, fpst); 300 if (!fpst->default_nan_mode) { 301 nan = float16_silence_nan(a, fpst); 302 } 303 } 304 if (fpst->default_nan_mode) { 305 nan = float16_default_nan(fpst); 306 } 307 return nan; 308 } 309 310 a = float16_squash_input_denormal(a, fpst); 311 312 val16 = float16_val(a); 313 sbit = 0x8000 & val16; 314 exp = extract32(val16, 10, 5); 315 316 if (exp == 0) { 317 return make_float16(deposit32(sbit, 10, 5, 0x1e)); 318 } else { 319 return make_float16(deposit32(sbit, 10, 5, ~exp)); 320 } 321 } 322 323 float32 HELPER(frecpx_f32)(float32 a, float_status *fpst) 324 { 325 uint32_t val32, sbit; 326 int32_t exp; 327 328 if (float32_is_any_nan(a)) { 329 float32 nan = a; 330 if (float32_is_signaling_nan(a, fpst)) { 331 float_raise(float_flag_invalid, fpst); 332 if (!fpst->default_nan_mode) { 333 nan = float32_silence_nan(a, fpst); 334 } 335 } 336 if (fpst->default_nan_mode) { 337 nan = float32_default_nan(fpst); 338 } 339 return nan; 340 } 341 342 a = float32_squash_input_denormal(a, fpst); 343 344 val32 = float32_val(a); 345 sbit = 0x80000000ULL & val32; 346 exp = extract32(val32, 23, 8); 347 348 if (exp == 0) { 349 return make_float32(sbit | (0xfe << 23)); 350 } else { 351 return make_float32(sbit | (~exp & 0xff) << 23); 352 } 353 } 354 355 float64 HELPER(frecpx_f64)(float64 a, float_status *fpst) 356 { 357 uint64_t val64, sbit; 358 int64_t exp; 359 360 if (float64_is_any_nan(a)) { 361 float64 nan = a; 362 if (float64_is_signaling_nan(a, fpst)) { 363 float_raise(float_flag_invalid, fpst); 364 if (!fpst->default_nan_mode) { 365 nan = float64_silence_nan(a, fpst); 366 } 367 } 368 if (fpst->default_nan_mode) { 369 nan = float64_default_nan(fpst); 370 } 371 return nan; 372 } 373 374 a = float64_squash_input_denormal(a, fpst); 375 376 val64 = float64_val(a); 377 sbit = 0x8000000000000000ULL & val64; 378 exp = extract64(float64_val(a), 52, 11); 379 380 if (exp == 0) { 381 return make_float64(sbit | (0x7feULL << 52)); 382 } else { 383 return make_float64(sbit | (~exp & 0x7ffULL) << 52); 384 } 385 } 386 387 float32 HELPER(fcvtx_f64_to_f32)(float64 a, float_status *fpst) 388 { 389 float32 r; 390 int old = get_float_rounding_mode(fpst); 391 392 set_float_rounding_mode(float_round_to_odd, fpst); 393 r = float64_to_float32(a, fpst); 394 set_float_rounding_mode(old, fpst); 395 return r; 396 } 397 398 /* 64-bit versions of the CRC helpers. Note that although the operation 399 * (and the prototypes of crc32c() and crc32() mean that only the bottom 400 * 32 bits of the accumulator and result are used, we pass and return 401 * uint64_t for convenience of the generated code. Unlike the 32-bit 402 * instruction set versions, val may genuinely have 64 bits of data in it. 403 * The upper bytes of val (above the number specified by 'bytes') must have 404 * been zeroed out by the caller. 405 */ 406 uint64_t HELPER(crc32_64)(uint64_t acc, uint64_t val, uint32_t bytes) 407 { 408 uint8_t buf[8]; 409 410 stq_le_p(buf, val); 411 412 /* zlib crc32 converts the accumulator and output to one's complement. */ 413 return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff; 414 } 415 416 uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) 417 { 418 uint8_t buf[8]; 419 420 stq_le_p(buf, val); 421 422 /* Linux crc32c converts the output to one's complement. */ 423 return crc32c(acc, buf, bytes) ^ 0xffffffff; 424 } 425 426 /* 427 * AdvSIMD half-precision 428 */ 429 430 #define ADVSIMD_HELPER(name, suffix) HELPER(glue(glue(advsimd_, name), suffix)) 431 432 #define ADVSIMD_HALFOP(name) \ 433 uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, float_status *fpst) \ 434 { \ 435 return float16_ ## name(a, b, fpst); \ 436 } 437 438 ADVSIMD_HALFOP(add) 439 ADVSIMD_HALFOP(sub) 440 ADVSIMD_HALFOP(mul) 441 ADVSIMD_HALFOP(div) 442 ADVSIMD_HALFOP(min) 443 ADVSIMD_HALFOP(max) 444 ADVSIMD_HALFOP(minnum) 445 ADVSIMD_HALFOP(maxnum) 446 447 #define ADVSIMD_TWOHALFOP(name) \ 448 uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, \ 449 float_status *fpst) \ 450 { \ 451 float16 a1, a2, b1, b2; \ 452 uint32_t r1, r2; \ 453 a1 = extract32(two_a, 0, 16); \ 454 a2 = extract32(two_a, 16, 16); \ 455 b1 = extract32(two_b, 0, 16); \ 456 b2 = extract32(two_b, 16, 16); \ 457 r1 = float16_ ## name(a1, b1, fpst); \ 458 r2 = float16_ ## name(a2, b2, fpst); \ 459 return deposit32(r1, 16, 16, r2); \ 460 } 461 462 ADVSIMD_TWOHALFOP(add) 463 ADVSIMD_TWOHALFOP(sub) 464 ADVSIMD_TWOHALFOP(mul) 465 ADVSIMD_TWOHALFOP(div) 466 ADVSIMD_TWOHALFOP(min) 467 ADVSIMD_TWOHALFOP(max) 468 ADVSIMD_TWOHALFOP(minnum) 469 ADVSIMD_TWOHALFOP(maxnum) 470 471 /* Data processing - scalar floating-point and advanced SIMD */ 472 static float16 float16_mulx(float16 a, float16 b, float_status *fpst) 473 { 474 a = float16_squash_input_denormal(a, fpst); 475 b = float16_squash_input_denormal(b, fpst); 476 477 if ((float16_is_zero(a) && float16_is_infinity(b)) || 478 (float16_is_infinity(a) && float16_is_zero(b))) { 479 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ 480 return make_float16((1U << 14) | 481 ((float16_val(a) ^ float16_val(b)) & (1U << 15))); 482 } 483 return float16_mul(a, b, fpst); 484 } 485 486 ADVSIMD_HALFOP(mulx) 487 ADVSIMD_TWOHALFOP(mulx) 488 489 /* fused multiply-accumulate */ 490 uint32_t HELPER(advsimd_muladdh)(uint32_t a, uint32_t b, uint32_t c, 491 float_status *fpst) 492 { 493 return float16_muladd(a, b, c, 0, fpst); 494 } 495 496 uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b, 497 uint32_t two_c, float_status *fpst) 498 { 499 float16 a1, a2, b1, b2, c1, c2; 500 uint32_t r1, r2; 501 a1 = extract32(two_a, 0, 16); 502 a2 = extract32(two_a, 16, 16); 503 b1 = extract32(two_b, 0, 16); 504 b2 = extract32(two_b, 16, 16); 505 c1 = extract32(two_c, 0, 16); 506 c2 = extract32(two_c, 16, 16); 507 r1 = float16_muladd(a1, b1, c1, 0, fpst); 508 r2 = float16_muladd(a2, b2, c2, 0, fpst); 509 return deposit32(r1, 16, 16, r2); 510 } 511 512 /* 513 * Floating point comparisons produce an integer result. Softfloat 514 * routines return float_relation types which we convert to the 0/-1 515 * Neon requires. 516 */ 517 518 #define ADVSIMD_CMPRES(test) (test) ? 0xffff : 0 519 520 uint32_t HELPER(advsimd_ceq_f16)(uint32_t a, uint32_t b, float_status *fpst) 521 { 522 int compare = float16_compare_quiet(a, b, fpst); 523 return ADVSIMD_CMPRES(compare == float_relation_equal); 524 } 525 526 uint32_t HELPER(advsimd_cge_f16)(uint32_t a, uint32_t b, float_status *fpst) 527 { 528 int compare = float16_compare(a, b, fpst); 529 return ADVSIMD_CMPRES(compare == float_relation_greater || 530 compare == float_relation_equal); 531 } 532 533 uint32_t HELPER(advsimd_cgt_f16)(uint32_t a, uint32_t b, float_status *fpst) 534 { 535 int compare = float16_compare(a, b, fpst); 536 return ADVSIMD_CMPRES(compare == float_relation_greater); 537 } 538 539 uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, float_status *fpst) 540 { 541 float16 f0 = float16_abs(a); 542 float16 f1 = float16_abs(b); 543 int compare = float16_compare(f0, f1, fpst); 544 return ADVSIMD_CMPRES(compare == float_relation_greater || 545 compare == float_relation_equal); 546 } 547 548 uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, float_status *fpst) 549 { 550 float16 f0 = float16_abs(a); 551 float16 f1 = float16_abs(b); 552 int compare = float16_compare(f0, f1, fpst); 553 return ADVSIMD_CMPRES(compare == float_relation_greater); 554 } 555 556 /* round to integral */ 557 uint32_t HELPER(advsimd_rinth_exact)(uint32_t x, float_status *fp_status) 558 { 559 return float16_round_to_int(x, fp_status); 560 } 561 562 uint32_t HELPER(advsimd_rinth)(uint32_t x, float_status *fp_status) 563 { 564 int old_flags = get_float_exception_flags(fp_status), new_flags; 565 float16 ret; 566 567 ret = float16_round_to_int(x, fp_status); 568 569 /* Suppress any inexact exceptions the conversion produced */ 570 if (!(old_flags & float_flag_inexact)) { 571 new_flags = get_float_exception_flags(fp_status); 572 set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status); 573 } 574 575 return ret; 576 } 577 578 static int el_from_spsr(uint32_t spsr) 579 { 580 /* Return the exception level that this SPSR is requesting a return to, 581 * or -1 if it is invalid (an illegal return) 582 */ 583 if (spsr & PSTATE_nRW) { 584 switch (spsr & CPSR_M) { 585 case ARM_CPU_MODE_USR: 586 return 0; 587 case ARM_CPU_MODE_HYP: 588 return 2; 589 case ARM_CPU_MODE_FIQ: 590 case ARM_CPU_MODE_IRQ: 591 case ARM_CPU_MODE_SVC: 592 case ARM_CPU_MODE_ABT: 593 case ARM_CPU_MODE_UND: 594 case ARM_CPU_MODE_SYS: 595 return 1; 596 case ARM_CPU_MODE_MON: 597 /* Returning to Mon from AArch64 is never possible, 598 * so this is an illegal return. 599 */ 600 default: 601 return -1; 602 } 603 } else { 604 if (extract32(spsr, 1, 1)) { 605 /* Return with reserved M[1] bit set */ 606 return -1; 607 } 608 if (extract32(spsr, 0, 4) == 1) { 609 /* return to EL0 with M[0] bit set */ 610 return -1; 611 } 612 return extract32(spsr, 2, 2); 613 } 614 } 615 616 static void cpsr_write_from_spsr_elx(CPUARMState *env, 617 uint32_t val) 618 { 619 uint32_t mask; 620 621 /* Save SPSR_ELx.SS into PSTATE. */ 622 env->pstate = (env->pstate & ~PSTATE_SS) | (val & PSTATE_SS); 623 val &= ~PSTATE_SS; 624 625 /* Move DIT to the correct location for CPSR */ 626 if (val & PSTATE_DIT) { 627 val &= ~PSTATE_DIT; 628 val |= CPSR_DIT; 629 } 630 631 mask = aarch32_cpsr_valid_mask(env->features, \ 632 &env_archcpu(env)->isar); 633 cpsr_write(env, val, mask, CPSRWriteRaw); 634 } 635 636 void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) 637 { 638 int cur_el = arm_current_el(env); 639 unsigned int spsr_idx = aarch64_banked_spsr_index(cur_el); 640 uint32_t spsr = env->banked_spsr[spsr_idx]; 641 int new_el; 642 bool return_to_aa64 = (spsr & PSTATE_nRW) == 0; 643 644 aarch64_save_sp(env, cur_el); 645 646 arm_clear_exclusive(env); 647 648 /* We must squash the PSTATE.SS bit to zero unless both of the 649 * following hold: 650 * 1. debug exceptions are currently disabled 651 * 2. singlestep will be active in the EL we return to 652 * We check 1 here and 2 after we've done the pstate/cpsr write() to 653 * transition to the EL we're going to. 654 */ 655 if (arm_generate_debug_exceptions(env)) { 656 spsr &= ~PSTATE_SS; 657 } 658 659 /* 660 * FEAT_RME forbids return from EL3 with an invalid security state. 661 * We don't need an explicit check for FEAT_RME here because we enforce 662 * in scr_write() that you can't set the NSE bit without it. 663 */ 664 if (cur_el == 3 && (env->cp15.scr_el3 & (SCR_NS | SCR_NSE)) == SCR_NSE) { 665 goto illegal_return; 666 } 667 668 new_el = el_from_spsr(spsr); 669 if (new_el == -1) { 670 goto illegal_return; 671 } 672 if (new_el > cur_el || (new_el == 2 && !arm_is_el2_enabled(env))) { 673 /* Disallow return to an EL which is unimplemented or higher 674 * than the current one. 675 */ 676 goto illegal_return; 677 } 678 679 if (new_el != 0 && arm_el_is_aa64(env, new_el) != return_to_aa64) { 680 /* Return to an EL which is configured for a different register width */ 681 goto illegal_return; 682 } 683 684 if (new_el == 1 && (arm_hcr_el2_eff(env) & HCR_TGE)) { 685 goto illegal_return; 686 } 687 688 bql_lock(); 689 arm_call_pre_el_change_hook(env_archcpu(env)); 690 bql_unlock(); 691 692 if (!return_to_aa64) { 693 env->aarch64 = false; 694 /* We do a raw CPSR write because aarch64_sync_64_to_32() 695 * will sort the register banks out for us, and we've already 696 * caught all the bad-mode cases in el_from_spsr(). 697 */ 698 cpsr_write_from_spsr_elx(env, spsr); 699 if (!arm_singlestep_active(env)) { 700 env->pstate &= ~PSTATE_SS; 701 } 702 aarch64_sync_64_to_32(env); 703 704 if (spsr & CPSR_T) { 705 env->regs[15] = new_pc & ~0x1; 706 } else { 707 env->regs[15] = new_pc & ~0x3; 708 } 709 helper_rebuild_hflags_a32(env, new_el); 710 qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to " 711 "AArch32 EL%d PC 0x%" PRIx32 "\n", 712 cur_el, new_el, env->regs[15]); 713 } else { 714 int tbii; 715 716 env->aarch64 = true; 717 spsr &= aarch64_pstate_valid_mask(&env_archcpu(env)->isar); 718 pstate_write(env, spsr); 719 if (!arm_singlestep_active(env)) { 720 env->pstate &= ~PSTATE_SS; 721 } 722 aarch64_restore_sp(env, new_el); 723 helper_rebuild_hflags_a64(env, new_el); 724 725 /* 726 * Apply TBI to the exception return address. We had to delay this 727 * until after we selected the new EL, so that we could select the 728 * correct TBI+TBID bits. This is made easier by waiting until after 729 * the hflags rebuild, since we can pull the composite TBII field 730 * from there. 731 */ 732 tbii = EX_TBFLAG_A64(env->hflags, TBII); 733 if ((tbii >> extract64(new_pc, 55, 1)) & 1) { 734 /* TBI is enabled. */ 735 int core_mmu_idx = arm_env_mmu_index(env); 736 if (regime_has_2_ranges(core_to_aa64_mmu_idx(core_mmu_idx))) { 737 new_pc = sextract64(new_pc, 0, 56); 738 } else { 739 new_pc = extract64(new_pc, 0, 56); 740 } 741 } 742 env->pc = new_pc; 743 744 qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to " 745 "AArch64 EL%d PC 0x%" PRIx64 "\n", 746 cur_el, new_el, env->pc); 747 } 748 749 /* 750 * Note that cur_el can never be 0. If new_el is 0, then 751 * el0_a64 is return_to_aa64, else el0_a64 is ignored. 752 */ 753 aarch64_sve_change_el(env, cur_el, new_el, return_to_aa64); 754 755 bql_lock(); 756 arm_call_el_change_hook(env_archcpu(env)); 757 bql_unlock(); 758 759 return; 760 761 illegal_return: 762 /* Illegal return events of various kinds have architecturally 763 * mandated behaviour: 764 * restore NZCV and DAIF from SPSR_ELx 765 * set PSTATE.IL 766 * restore PC from ELR_ELx 767 * no change to exception level, execution state or stack pointer 768 */ 769 env->pstate |= PSTATE_IL; 770 env->pc = new_pc; 771 spsr &= PSTATE_NZCV | PSTATE_DAIF | PSTATE_ALLINT; 772 spsr |= pstate_read(env) & ~(PSTATE_NZCV | PSTATE_DAIF | PSTATE_ALLINT); 773 pstate_write(env, spsr); 774 if (!arm_singlestep_active(env)) { 775 env->pstate &= ~PSTATE_SS; 776 } 777 helper_rebuild_hflags_a64(env, cur_el); 778 qemu_log_mask(LOG_GUEST_ERROR, "Illegal exception return at EL%d: " 779 "resuming execution at 0x%" PRIx64 "\n", cur_el, env->pc); 780 } 781 782 void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) 783 { 784 uintptr_t ra = GETPC(); 785 786 /* 787 * Implement DC ZVA, which zeroes a fixed-length block of memory. 788 * Note that we do not implement the (architecturally mandated) 789 * alignment fault for attempts to use this on Device memory 790 * (which matches the usual QEMU behaviour of not implementing either 791 * alignment faults or any memory attribute handling). 792 */ 793 int blocklen = 4 << env_archcpu(env)->dcz_blocksize; 794 uint64_t vaddr = vaddr_in & ~(blocklen - 1); 795 int mmu_idx = arm_env_mmu_index(env); 796 void *mem; 797 798 /* 799 * Trapless lookup. In addition to actual invalid page, may 800 * return NULL for I/O, watchpoints, clean pages, etc. 801 */ 802 mem = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx); 803 804 #ifndef CONFIG_USER_ONLY 805 if (unlikely(!mem)) { 806 /* 807 * Trap if accessing an invalid page. DC_ZVA requires that we supply 808 * the original pointer for an invalid page. But watchpoints require 809 * that we probe the actual space. So do both. 810 */ 811 (void) probe_write(env, vaddr_in, 1, mmu_idx, ra); 812 mem = probe_write(env, vaddr, blocklen, mmu_idx, ra); 813 814 if (unlikely(!mem)) { 815 /* 816 * The only remaining reason for mem == NULL is I/O. 817 * Just do a series of byte writes as the architecture demands. 818 */ 819 for (int i = 0; i < blocklen; i++) { 820 cpu_stb_mmuidx_ra(env, vaddr + i, 0, mmu_idx, ra); 821 } 822 return; 823 } 824 } 825 #endif 826 827 set_helper_retaddr(ra); 828 memset(mem, 0, blocklen); 829 clear_helper_retaddr(); 830 } 831 832 void HELPER(unaligned_access)(CPUARMState *env, uint64_t addr, 833 uint32_t access_type, uint32_t mmu_idx) 834 { 835 arm_cpu_do_unaligned_access(env_cpu(env), addr, access_type, 836 mmu_idx, GETPC()); 837 } 838 839 /* Memory operations (memset, memmove, memcpy) */ 840 841 /* 842 * Return true if the CPY* and SET* insns can execute; compare 843 * pseudocode CheckMOPSEnabled(), though we refactor it a little. 844 */ 845 static bool mops_enabled(CPUARMState *env) 846 { 847 int el = arm_current_el(env); 848 849 if (el < 2 && 850 (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE) && 851 !(arm_hcrx_el2_eff(env) & HCRX_MSCEN)) { 852 return false; 853 } 854 855 if (el == 0) { 856 if (!el_is_in_host(env, 0)) { 857 return env->cp15.sctlr_el[1] & SCTLR_MSCEN; 858 } else { 859 return env->cp15.sctlr_el[2] & SCTLR_MSCEN; 860 } 861 } 862 return true; 863 } 864 865 static void check_mops_enabled(CPUARMState *env, uintptr_t ra) 866 { 867 if (!mops_enabled(env)) { 868 raise_exception_ra(env, EXCP_UDEF, syn_uncategorized(), 869 exception_target_el(env), ra); 870 } 871 } 872 873 /* 874 * Return the target exception level for an exception due 875 * to mismatched arguments in a FEAT_MOPS copy or set. 876 * Compare pseudocode MismatchedCpySetTargetEL() 877 */ 878 static int mops_mismatch_exception_target_el(CPUARMState *env) 879 { 880 int el = arm_current_el(env); 881 882 if (el > 1) { 883 return el; 884 } 885 if (el == 0 && (arm_hcr_el2_eff(env) & HCR_TGE)) { 886 return 2; 887 } 888 if (el == 1 && (arm_hcrx_el2_eff(env) & HCRX_MCE2)) { 889 return 2; 890 } 891 return 1; 892 } 893 894 /* 895 * Check whether an M or E instruction was executed with a CF value 896 * indicating the wrong option for this implementation. 897 * Assumes we are always Option A. 898 */ 899 static void check_mops_wrong_option(CPUARMState *env, uint32_t syndrome, 900 uintptr_t ra) 901 { 902 if (env->CF != 0) { 903 syndrome |= 1 << 17; /* Set the wrong-option bit */ 904 raise_exception_ra(env, EXCP_UDEF, syndrome, 905 mops_mismatch_exception_target_el(env), ra); 906 } 907 } 908 909 /* 910 * Return the maximum number of bytes we can transfer starting at addr 911 * without crossing a page boundary. 912 */ 913 static uint64_t page_limit(uint64_t addr) 914 { 915 return TARGET_PAGE_ALIGN(addr + 1) - addr; 916 } 917 918 /* 919 * Return the number of bytes we can copy starting from addr and working 920 * backwards without crossing a page boundary. 921 */ 922 static uint64_t page_limit_rev(uint64_t addr) 923 { 924 return (addr & ~TARGET_PAGE_MASK) + 1; 925 } 926 927 /* 928 * Perform part of a memory set on an area of guest memory starting at 929 * toaddr (a dirty address) and extending for setsize bytes. 930 * 931 * Returns the number of bytes actually set, which might be less than 932 * setsize; the caller should loop until the whole set has been done. 933 * The caller should ensure that the guest registers are correct 934 * for the possibility that the first byte of the set encounters 935 * an exception or watchpoint. We guarantee not to take any faults 936 * for bytes other than the first. 937 */ 938 static uint64_t set_step(CPUARMState *env, uint64_t toaddr, 939 uint64_t setsize, uint32_t data, int memidx, 940 uint32_t *mtedesc, uintptr_t ra) 941 { 942 void *mem; 943 944 setsize = MIN(setsize, page_limit(toaddr)); 945 if (*mtedesc) { 946 uint64_t mtesize = mte_mops_probe(env, toaddr, setsize, *mtedesc); 947 if (mtesize == 0) { 948 /* Trap, or not. All CPU state is up to date */ 949 mte_check_fail(env, *mtedesc, toaddr, ra); 950 /* Continue, with no further MTE checks required */ 951 *mtedesc = 0; 952 } else { 953 /* Advance to the end, or to the tag mismatch */ 954 setsize = MIN(setsize, mtesize); 955 } 956 } 957 958 toaddr = useronly_clean_ptr(toaddr); 959 /* 960 * Trapless lookup: returns NULL for invalid page, I/O, 961 * watchpoints, clean pages, etc. 962 */ 963 mem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, memidx); 964 965 #ifndef CONFIG_USER_ONLY 966 if (unlikely(!mem)) { 967 /* 968 * Slow-path: just do one byte write. This will handle the 969 * watchpoint, invalid page, etc handling correctly. 970 * For clean code pages, the next iteration will see 971 * the page dirty and will use the fast path. 972 */ 973 cpu_stb_mmuidx_ra(env, toaddr, data, memidx, ra); 974 return 1; 975 } 976 #endif 977 /* Easy case: just memset the host memory */ 978 set_helper_retaddr(ra); 979 memset(mem, data, setsize); 980 clear_helper_retaddr(); 981 return setsize; 982 } 983 984 /* 985 * Similar, but setting tags. The architecture requires us to do this 986 * in 16-byte chunks. SETP accesses are not tag checked; they set 987 * the tags. 988 */ 989 static uint64_t set_step_tags(CPUARMState *env, uint64_t toaddr, 990 uint64_t setsize, uint32_t data, int memidx, 991 uint32_t *mtedesc, uintptr_t ra) 992 { 993 void *mem; 994 uint64_t cleanaddr; 995 996 setsize = MIN(setsize, page_limit(toaddr)); 997 998 cleanaddr = useronly_clean_ptr(toaddr); 999 /* 1000 * Trapless lookup: returns NULL for invalid page, I/O, 1001 * watchpoints, clean pages, etc. 1002 */ 1003 mem = tlb_vaddr_to_host(env, cleanaddr, MMU_DATA_STORE, memidx); 1004 1005 #ifndef CONFIG_USER_ONLY 1006 if (unlikely(!mem)) { 1007 /* 1008 * Slow-path: just do one write. This will handle the 1009 * watchpoint, invalid page, etc handling correctly. 1010 * The architecture requires that we do 16 bytes at a time, 1011 * and we know both ptr and size are 16 byte aligned. 1012 * For clean code pages, the next iteration will see 1013 * the page dirty and will use the fast path. 1014 */ 1015 uint64_t repldata = data * 0x0101010101010101ULL; 1016 MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, memidx); 1017 cpu_st16_mmu(env, toaddr, int128_make128(repldata, repldata), oi16, ra); 1018 mte_mops_set_tags(env, toaddr, 16, *mtedesc); 1019 return 16; 1020 } 1021 #endif 1022 /* Easy case: just memset the host memory */ 1023 set_helper_retaddr(ra); 1024 memset(mem, data, setsize); 1025 clear_helper_retaddr(); 1026 mte_mops_set_tags(env, toaddr, setsize, *mtedesc); 1027 return setsize; 1028 } 1029 1030 typedef uint64_t StepFn(CPUARMState *env, uint64_t toaddr, 1031 uint64_t setsize, uint32_t data, 1032 int memidx, uint32_t *mtedesc, uintptr_t ra); 1033 1034 /* Extract register numbers from a MOPS exception syndrome value */ 1035 static int mops_destreg(uint32_t syndrome) 1036 { 1037 return extract32(syndrome, 10, 5); 1038 } 1039 1040 static int mops_srcreg(uint32_t syndrome) 1041 { 1042 return extract32(syndrome, 5, 5); 1043 } 1044 1045 static int mops_sizereg(uint32_t syndrome) 1046 { 1047 return extract32(syndrome, 0, 5); 1048 } 1049 1050 /* 1051 * Return true if TCMA and TBI bits mean we need to do MTE checks. 1052 * We only need to do this once per MOPS insn, not for every page. 1053 */ 1054 static bool mte_checks_needed(uint64_t ptr, uint32_t desc) 1055 { 1056 int bit55 = extract64(ptr, 55, 1); 1057 1058 /* 1059 * Note that tbi_check() returns true for "access checked" but 1060 * tcma_check() returns true for "access unchecked". 1061 */ 1062 if (!tbi_check(desc, bit55)) { 1063 return false; 1064 } 1065 return !tcma_check(desc, bit55, allocation_tag_from_addr(ptr)); 1066 } 1067 1068 /* Take an exception if the SETG addr/size are not granule aligned */ 1069 static void check_setg_alignment(CPUARMState *env, uint64_t ptr, uint64_t size, 1070 uint32_t memidx, uintptr_t ra) 1071 { 1072 if ((size != 0 && !QEMU_IS_ALIGNED(ptr, TAG_GRANULE)) || 1073 !QEMU_IS_ALIGNED(size, TAG_GRANULE)) { 1074 arm_cpu_do_unaligned_access(env_cpu(env), ptr, MMU_DATA_STORE, 1075 memidx, ra); 1076 1077 } 1078 } 1079 1080 static uint64_t arm_reg_or_xzr(CPUARMState *env, int reg) 1081 { 1082 /* 1083 * Runtime equivalent of cpu_reg() -- return the CPU register value, 1084 * for contexts when index 31 means XZR (not SP). 1085 */ 1086 return reg == 31 ? 0 : env->xregs[reg]; 1087 } 1088 1089 /* 1090 * For the Memory Set operation, our implementation chooses 1091 * always to use "option A", where we update Xd to the final 1092 * address in the SETP insn, and set Xn to be -(bytes remaining). 1093 * On SETM and SETE insns we only need update Xn. 1094 * 1095 * @env: CPU 1096 * @syndrome: syndrome value for mismatch exceptions 1097 * (also contains the register numbers we need to use) 1098 * @mtedesc: MTE descriptor word 1099 * @stepfn: function which does a single part of the set operation 1100 * @is_setg: true if this is the tag-setting SETG variant 1101 */ 1102 static void do_setp(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, 1103 StepFn *stepfn, bool is_setg, uintptr_t ra) 1104 { 1105 /* Prologue: we choose to do up to the next page boundary */ 1106 int rd = mops_destreg(syndrome); 1107 int rs = mops_srcreg(syndrome); 1108 int rn = mops_sizereg(syndrome); 1109 uint8_t data = arm_reg_or_xzr(env, rs); 1110 uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); 1111 uint64_t toaddr = env->xregs[rd]; 1112 uint64_t setsize = env->xregs[rn]; 1113 uint64_t stagesetsize, step; 1114 1115 check_mops_enabled(env, ra); 1116 1117 if (setsize > INT64_MAX) { 1118 setsize = INT64_MAX; 1119 if (is_setg) { 1120 setsize &= ~0xf; 1121 } 1122 } 1123 1124 if (unlikely(is_setg)) { 1125 check_setg_alignment(env, toaddr, setsize, memidx, ra); 1126 } else if (!mte_checks_needed(toaddr, mtedesc)) { 1127 mtedesc = 0; 1128 } 1129 1130 stagesetsize = MIN(setsize, page_limit(toaddr)); 1131 while (stagesetsize) { 1132 env->xregs[rd] = toaddr; 1133 env->xregs[rn] = setsize; 1134 step = stepfn(env, toaddr, stagesetsize, data, memidx, &mtedesc, ra); 1135 toaddr += step; 1136 setsize -= step; 1137 stagesetsize -= step; 1138 } 1139 /* Insn completed, so update registers to the Option A format */ 1140 env->xregs[rd] = toaddr + setsize; 1141 env->xregs[rn] = -setsize; 1142 1143 /* Set NZCV = 0000 to indicate we are an Option A implementation */ 1144 env->NF = 0; 1145 env->ZF = 1; /* our env->ZF encoding is inverted */ 1146 env->CF = 0; 1147 env->VF = 0; 1148 return; 1149 } 1150 1151 void HELPER(setp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1152 { 1153 do_setp(env, syndrome, mtedesc, set_step, false, GETPC()); 1154 } 1155 1156 void HELPER(setgp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1157 { 1158 do_setp(env, syndrome, mtedesc, set_step_tags, true, GETPC()); 1159 } 1160 1161 static void do_setm(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, 1162 StepFn *stepfn, bool is_setg, uintptr_t ra) 1163 { 1164 /* Main: we choose to do all the full-page chunks */ 1165 CPUState *cs = env_cpu(env); 1166 int rd = mops_destreg(syndrome); 1167 int rs = mops_srcreg(syndrome); 1168 int rn = mops_sizereg(syndrome); 1169 uint8_t data = arm_reg_or_xzr(env, rs); 1170 uint64_t toaddr = env->xregs[rd] + env->xregs[rn]; 1171 uint64_t setsize = -env->xregs[rn]; 1172 uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); 1173 uint64_t step, stagesetsize; 1174 1175 check_mops_enabled(env, ra); 1176 1177 /* 1178 * We're allowed to NOP out "no data to copy" before the consistency 1179 * checks; we choose to do so. 1180 */ 1181 if (env->xregs[rn] == 0) { 1182 return; 1183 } 1184 1185 check_mops_wrong_option(env, syndrome, ra); 1186 1187 /* 1188 * Our implementation will work fine even if we have an unaligned 1189 * destination address, and because we update Xn every time around 1190 * the loop below and the return value from stepfn() may be less 1191 * than requested, we might find toaddr is unaligned. So we don't 1192 * have an IMPDEF check for alignment here. 1193 */ 1194 1195 if (unlikely(is_setg)) { 1196 check_setg_alignment(env, toaddr, setsize, memidx, ra); 1197 } else if (!mte_checks_needed(toaddr, mtedesc)) { 1198 mtedesc = 0; 1199 } 1200 1201 /* Do the actual memset: we leave the last partial page to SETE */ 1202 stagesetsize = setsize & TARGET_PAGE_MASK; 1203 while (stagesetsize > 0) { 1204 step = stepfn(env, toaddr, stagesetsize, data, memidx, &mtedesc, ra); 1205 toaddr += step; 1206 setsize -= step; 1207 stagesetsize -= step; 1208 env->xregs[rn] = -setsize; 1209 if (stagesetsize > 0 && unlikely(cpu_loop_exit_requested(cs))) { 1210 cpu_loop_exit_restore(cs, ra); 1211 } 1212 } 1213 } 1214 1215 void HELPER(setm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1216 { 1217 do_setm(env, syndrome, mtedesc, set_step, false, GETPC()); 1218 } 1219 1220 void HELPER(setgm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1221 { 1222 do_setm(env, syndrome, mtedesc, set_step_tags, true, GETPC()); 1223 } 1224 1225 static void do_sete(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, 1226 StepFn *stepfn, bool is_setg, uintptr_t ra) 1227 { 1228 /* Epilogue: do the last partial page */ 1229 int rd = mops_destreg(syndrome); 1230 int rs = mops_srcreg(syndrome); 1231 int rn = mops_sizereg(syndrome); 1232 uint8_t data = arm_reg_or_xzr(env, rs); 1233 uint64_t toaddr = env->xregs[rd] + env->xregs[rn]; 1234 uint64_t setsize = -env->xregs[rn]; 1235 uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); 1236 uint64_t step; 1237 1238 check_mops_enabled(env, ra); 1239 1240 /* 1241 * We're allowed to NOP out "no data to copy" before the consistency 1242 * checks; we choose to do so. 1243 */ 1244 if (setsize == 0) { 1245 return; 1246 } 1247 1248 check_mops_wrong_option(env, syndrome, ra); 1249 1250 /* 1251 * Our implementation has no address alignment requirements, but 1252 * we do want to enforce the "less than a page" size requirement, 1253 * so we don't need to have the "check for interrupts" here. 1254 */ 1255 if (setsize >= TARGET_PAGE_SIZE) { 1256 raise_exception_ra(env, EXCP_UDEF, syndrome, 1257 mops_mismatch_exception_target_el(env), ra); 1258 } 1259 1260 if (unlikely(is_setg)) { 1261 check_setg_alignment(env, toaddr, setsize, memidx, ra); 1262 } else if (!mte_checks_needed(toaddr, mtedesc)) { 1263 mtedesc = 0; 1264 } 1265 1266 /* Do the actual memset */ 1267 while (setsize > 0) { 1268 step = stepfn(env, toaddr, setsize, data, memidx, &mtedesc, ra); 1269 toaddr += step; 1270 setsize -= step; 1271 env->xregs[rn] = -setsize; 1272 } 1273 } 1274 1275 void HELPER(sete)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1276 { 1277 do_sete(env, syndrome, mtedesc, set_step, false, GETPC()); 1278 } 1279 1280 void HELPER(setge)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) 1281 { 1282 do_sete(env, syndrome, mtedesc, set_step_tags, true, GETPC()); 1283 } 1284 1285 /* 1286 * Perform part of a memory copy from the guest memory at fromaddr 1287 * and extending for copysize bytes, to the guest memory at 1288 * toaddr. Both addresses are dirty. 1289 * 1290 * Returns the number of bytes actually set, which might be less than 1291 * copysize; the caller should loop until the whole copy has been done. 1292 * The caller should ensure that the guest registers are correct 1293 * for the possibility that the first byte of the copy encounters 1294 * an exception or watchpoint. We guarantee not to take any faults 1295 * for bytes other than the first. 1296 */ 1297 static uint64_t copy_step(CPUARMState *env, uint64_t toaddr, uint64_t fromaddr, 1298 uint64_t copysize, int wmemidx, int rmemidx, 1299 uint32_t *wdesc, uint32_t *rdesc, uintptr_t ra) 1300 { 1301 void *rmem; 1302 void *wmem; 1303 1304 /* Don't cross a page boundary on either source or destination */ 1305 copysize = MIN(copysize, page_limit(toaddr)); 1306 copysize = MIN(copysize, page_limit(fromaddr)); 1307 /* 1308 * Handle MTE tag checks: either handle the tag mismatch for byte 0, 1309 * or else copy up to but not including the byte with the mismatch. 1310 */ 1311 if (*rdesc) { 1312 uint64_t mtesize = mte_mops_probe(env, fromaddr, copysize, *rdesc); 1313 if (mtesize == 0) { 1314 mte_check_fail(env, *rdesc, fromaddr, ra); 1315 *rdesc = 0; 1316 } else { 1317 copysize = MIN(copysize, mtesize); 1318 } 1319 } 1320 if (*wdesc) { 1321 uint64_t mtesize = mte_mops_probe(env, toaddr, copysize, *wdesc); 1322 if (mtesize == 0) { 1323 mte_check_fail(env, *wdesc, toaddr, ra); 1324 *wdesc = 0; 1325 } else { 1326 copysize = MIN(copysize, mtesize); 1327 } 1328 } 1329 1330 toaddr = useronly_clean_ptr(toaddr); 1331 fromaddr = useronly_clean_ptr(fromaddr); 1332 /* Trapless lookup of whether we can get a host memory pointer */ 1333 wmem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, wmemidx); 1334 rmem = tlb_vaddr_to_host(env, fromaddr, MMU_DATA_LOAD, rmemidx); 1335 1336 #ifndef CONFIG_USER_ONLY 1337 /* 1338 * If we don't have host memory for both source and dest then just 1339 * do a single byte copy. This will handle watchpoints, invalid pages, 1340 * etc correctly. For clean code pages, the next iteration will see 1341 * the page dirty and will use the fast path. 1342 */ 1343 if (unlikely(!rmem || !wmem)) { 1344 uint8_t byte; 1345 if (rmem) { 1346 byte = *(uint8_t *)rmem; 1347 } else { 1348 byte = cpu_ldub_mmuidx_ra(env, fromaddr, rmemidx, ra); 1349 } 1350 if (wmem) { 1351 *(uint8_t *)wmem = byte; 1352 } else { 1353 cpu_stb_mmuidx_ra(env, toaddr, byte, wmemidx, ra); 1354 } 1355 return 1; 1356 } 1357 #endif 1358 /* Easy case: just memmove the host memory */ 1359 set_helper_retaddr(ra); 1360 memmove(wmem, rmem, copysize); 1361 clear_helper_retaddr(); 1362 return copysize; 1363 } 1364 1365 /* 1366 * Do part of a backwards memory copy. Here toaddr and fromaddr point 1367 * to the *last* byte to be copied. 1368 */ 1369 static uint64_t copy_step_rev(CPUARMState *env, uint64_t toaddr, 1370 uint64_t fromaddr, 1371 uint64_t copysize, int wmemidx, int rmemidx, 1372 uint32_t *wdesc, uint32_t *rdesc, uintptr_t ra) 1373 { 1374 void *rmem; 1375 void *wmem; 1376 1377 /* Don't cross a page boundary on either source or destination */ 1378 copysize = MIN(copysize, page_limit_rev(toaddr)); 1379 copysize = MIN(copysize, page_limit_rev(fromaddr)); 1380 1381 /* 1382 * Handle MTE tag checks: either handle the tag mismatch for byte 0, 1383 * or else copy up to but not including the byte with the mismatch. 1384 */ 1385 if (*rdesc) { 1386 uint64_t mtesize = mte_mops_probe_rev(env, fromaddr, copysize, *rdesc); 1387 if (mtesize == 0) { 1388 mte_check_fail(env, *rdesc, fromaddr, ra); 1389 *rdesc = 0; 1390 } else { 1391 copysize = MIN(copysize, mtesize); 1392 } 1393 } 1394 if (*wdesc) { 1395 uint64_t mtesize = mte_mops_probe_rev(env, toaddr, copysize, *wdesc); 1396 if (mtesize == 0) { 1397 mte_check_fail(env, *wdesc, toaddr, ra); 1398 *wdesc = 0; 1399 } else { 1400 copysize = MIN(copysize, mtesize); 1401 } 1402 } 1403 1404 toaddr = useronly_clean_ptr(toaddr); 1405 fromaddr = useronly_clean_ptr(fromaddr); 1406 /* Trapless lookup of whether we can get a host memory pointer */ 1407 wmem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, wmemidx); 1408 rmem = tlb_vaddr_to_host(env, fromaddr, MMU_DATA_LOAD, rmemidx); 1409 1410 #ifndef CONFIG_USER_ONLY 1411 /* 1412 * If we don't have host memory for both source and dest then just 1413 * do a single byte copy. This will handle watchpoints, invalid pages, 1414 * etc correctly. For clean code pages, the next iteration will see 1415 * the page dirty and will use the fast path. 1416 */ 1417 if (unlikely(!rmem || !wmem)) { 1418 uint8_t byte; 1419 if (rmem) { 1420 byte = *(uint8_t *)rmem; 1421 } else { 1422 byte = cpu_ldub_mmuidx_ra(env, fromaddr, rmemidx, ra); 1423 } 1424 if (wmem) { 1425 *(uint8_t *)wmem = byte; 1426 } else { 1427 cpu_stb_mmuidx_ra(env, toaddr, byte, wmemidx, ra); 1428 } 1429 return 1; 1430 } 1431 #endif 1432 /* 1433 * Easy case: just memmove the host memory. Note that wmem and 1434 * rmem here point to the *last* byte to copy. 1435 */ 1436 set_helper_retaddr(ra); 1437 memmove(wmem - (copysize - 1), rmem - (copysize - 1), copysize); 1438 clear_helper_retaddr(); 1439 return copysize; 1440 } 1441 1442 /* 1443 * for the Memory Copy operation, our implementation chooses always 1444 * to use "option A", where we update Xd and Xs to the final addresses 1445 * in the CPYP insn, and then in CPYM and CPYE only need to update Xn. 1446 * 1447 * @env: CPU 1448 * @syndrome: syndrome value for mismatch exceptions 1449 * (also contains the register numbers we need to use) 1450 * @wdesc: MTE descriptor for the writes (destination) 1451 * @rdesc: MTE descriptor for the reads (source) 1452 * @move: true if this is CPY (memmove), false for CPYF (memcpy forwards) 1453 */ 1454 static void do_cpyp(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1455 uint32_t rdesc, uint32_t move, uintptr_t ra) 1456 { 1457 int rd = mops_destreg(syndrome); 1458 int rs = mops_srcreg(syndrome); 1459 int rn = mops_sizereg(syndrome); 1460 uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); 1461 uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); 1462 bool forwards = true; 1463 uint64_t toaddr = env->xregs[rd]; 1464 uint64_t fromaddr = env->xregs[rs]; 1465 uint64_t copysize = env->xregs[rn]; 1466 uint64_t stagecopysize, step; 1467 1468 check_mops_enabled(env, ra); 1469 1470 1471 if (move) { 1472 /* 1473 * Copy backwards if necessary. The direction for a non-overlapping 1474 * copy is IMPDEF; we choose forwards. 1475 */ 1476 if (copysize > 0x007FFFFFFFFFFFFFULL) { 1477 copysize = 0x007FFFFFFFFFFFFFULL; 1478 } 1479 uint64_t fs = extract64(fromaddr, 0, 56); 1480 uint64_t ts = extract64(toaddr, 0, 56); 1481 uint64_t fe = extract64(fromaddr + copysize, 0, 56); 1482 1483 if (fs < ts && fe > ts) { 1484 forwards = false; 1485 } 1486 } else { 1487 if (copysize > INT64_MAX) { 1488 copysize = INT64_MAX; 1489 } 1490 } 1491 1492 if (!mte_checks_needed(fromaddr, rdesc)) { 1493 rdesc = 0; 1494 } 1495 if (!mte_checks_needed(toaddr, wdesc)) { 1496 wdesc = 0; 1497 } 1498 1499 if (forwards) { 1500 stagecopysize = MIN(copysize, page_limit(toaddr)); 1501 stagecopysize = MIN(stagecopysize, page_limit(fromaddr)); 1502 while (stagecopysize) { 1503 env->xregs[rd] = toaddr; 1504 env->xregs[rs] = fromaddr; 1505 env->xregs[rn] = copysize; 1506 step = copy_step(env, toaddr, fromaddr, stagecopysize, 1507 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1508 toaddr += step; 1509 fromaddr += step; 1510 copysize -= step; 1511 stagecopysize -= step; 1512 } 1513 /* Insn completed, so update registers to the Option A format */ 1514 env->xregs[rd] = toaddr + copysize; 1515 env->xregs[rs] = fromaddr + copysize; 1516 env->xregs[rn] = -copysize; 1517 } else { 1518 /* 1519 * In a reverse copy the to and from addrs in Xs and Xd are the start 1520 * of the range, but it's more convenient for us to work with pointers 1521 * to the last byte being copied. 1522 */ 1523 toaddr += copysize - 1; 1524 fromaddr += copysize - 1; 1525 stagecopysize = MIN(copysize, page_limit_rev(toaddr)); 1526 stagecopysize = MIN(stagecopysize, page_limit_rev(fromaddr)); 1527 while (stagecopysize) { 1528 env->xregs[rn] = copysize; 1529 step = copy_step_rev(env, toaddr, fromaddr, stagecopysize, 1530 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1531 copysize -= step; 1532 stagecopysize -= step; 1533 toaddr -= step; 1534 fromaddr -= step; 1535 } 1536 /* 1537 * Insn completed, so update registers to the Option A format. 1538 * For a reverse copy this is no different to the CPYP input format. 1539 */ 1540 env->xregs[rn] = copysize; 1541 } 1542 1543 /* Set NZCV = 0000 to indicate we are an Option A implementation */ 1544 env->NF = 0; 1545 env->ZF = 1; /* our env->ZF encoding is inverted */ 1546 env->CF = 0; 1547 env->VF = 0; 1548 return; 1549 } 1550 1551 void HELPER(cpyp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1552 uint32_t rdesc) 1553 { 1554 do_cpyp(env, syndrome, wdesc, rdesc, true, GETPC()); 1555 } 1556 1557 void HELPER(cpyfp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1558 uint32_t rdesc) 1559 { 1560 do_cpyp(env, syndrome, wdesc, rdesc, false, GETPC()); 1561 } 1562 1563 static void do_cpym(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1564 uint32_t rdesc, uint32_t move, uintptr_t ra) 1565 { 1566 /* Main: we choose to copy until less than a page remaining */ 1567 CPUState *cs = env_cpu(env); 1568 int rd = mops_destreg(syndrome); 1569 int rs = mops_srcreg(syndrome); 1570 int rn = mops_sizereg(syndrome); 1571 uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); 1572 uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); 1573 bool forwards = true; 1574 uint64_t toaddr, fromaddr, copysize, step; 1575 1576 check_mops_enabled(env, ra); 1577 1578 /* We choose to NOP out "no data to copy" before consistency checks */ 1579 if (env->xregs[rn] == 0) { 1580 return; 1581 } 1582 1583 check_mops_wrong_option(env, syndrome, ra); 1584 1585 if (move) { 1586 forwards = (int64_t)env->xregs[rn] < 0; 1587 } 1588 1589 if (forwards) { 1590 toaddr = env->xregs[rd] + env->xregs[rn]; 1591 fromaddr = env->xregs[rs] + env->xregs[rn]; 1592 copysize = -env->xregs[rn]; 1593 } else { 1594 copysize = env->xregs[rn]; 1595 /* This toaddr and fromaddr point to the *last* byte to copy */ 1596 toaddr = env->xregs[rd] + copysize - 1; 1597 fromaddr = env->xregs[rs] + copysize - 1; 1598 } 1599 1600 if (!mte_checks_needed(fromaddr, rdesc)) { 1601 rdesc = 0; 1602 } 1603 if (!mte_checks_needed(toaddr, wdesc)) { 1604 wdesc = 0; 1605 } 1606 1607 /* Our implementation has no particular parameter requirements for CPYM */ 1608 1609 /* Do the actual memmove */ 1610 if (forwards) { 1611 while (copysize >= TARGET_PAGE_SIZE) { 1612 step = copy_step(env, toaddr, fromaddr, copysize, 1613 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1614 toaddr += step; 1615 fromaddr += step; 1616 copysize -= step; 1617 env->xregs[rn] = -copysize; 1618 if (copysize >= TARGET_PAGE_SIZE && 1619 unlikely(cpu_loop_exit_requested(cs))) { 1620 cpu_loop_exit_restore(cs, ra); 1621 } 1622 } 1623 } else { 1624 while (copysize >= TARGET_PAGE_SIZE) { 1625 step = copy_step_rev(env, toaddr, fromaddr, copysize, 1626 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1627 toaddr -= step; 1628 fromaddr -= step; 1629 copysize -= step; 1630 env->xregs[rn] = copysize; 1631 if (copysize >= TARGET_PAGE_SIZE && 1632 unlikely(cpu_loop_exit_requested(cs))) { 1633 cpu_loop_exit_restore(cs, ra); 1634 } 1635 } 1636 } 1637 } 1638 1639 void HELPER(cpym)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1640 uint32_t rdesc) 1641 { 1642 do_cpym(env, syndrome, wdesc, rdesc, true, GETPC()); 1643 } 1644 1645 void HELPER(cpyfm)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1646 uint32_t rdesc) 1647 { 1648 do_cpym(env, syndrome, wdesc, rdesc, false, GETPC()); 1649 } 1650 1651 static void do_cpye(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1652 uint32_t rdesc, uint32_t move, uintptr_t ra) 1653 { 1654 /* Epilogue: do the last partial page */ 1655 int rd = mops_destreg(syndrome); 1656 int rs = mops_srcreg(syndrome); 1657 int rn = mops_sizereg(syndrome); 1658 uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); 1659 uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); 1660 bool forwards = true; 1661 uint64_t toaddr, fromaddr, copysize, step; 1662 1663 check_mops_enabled(env, ra); 1664 1665 /* We choose to NOP out "no data to copy" before consistency checks */ 1666 if (env->xregs[rn] == 0) { 1667 return; 1668 } 1669 1670 check_mops_wrong_option(env, syndrome, ra); 1671 1672 if (move) { 1673 forwards = (int64_t)env->xregs[rn] < 0; 1674 } 1675 1676 if (forwards) { 1677 toaddr = env->xregs[rd] + env->xregs[rn]; 1678 fromaddr = env->xregs[rs] + env->xregs[rn]; 1679 copysize = -env->xregs[rn]; 1680 } else { 1681 copysize = env->xregs[rn]; 1682 /* This toaddr and fromaddr point to the *last* byte to copy */ 1683 toaddr = env->xregs[rd] + copysize - 1; 1684 fromaddr = env->xregs[rs] + copysize - 1; 1685 } 1686 1687 if (!mte_checks_needed(fromaddr, rdesc)) { 1688 rdesc = 0; 1689 } 1690 if (!mte_checks_needed(toaddr, wdesc)) { 1691 wdesc = 0; 1692 } 1693 1694 /* Check the size; we don't want to have do a check-for-interrupts */ 1695 if (copysize >= TARGET_PAGE_SIZE) { 1696 raise_exception_ra(env, EXCP_UDEF, syndrome, 1697 mops_mismatch_exception_target_el(env), ra); 1698 } 1699 1700 /* Do the actual memmove */ 1701 if (forwards) { 1702 while (copysize > 0) { 1703 step = copy_step(env, toaddr, fromaddr, copysize, 1704 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1705 toaddr += step; 1706 fromaddr += step; 1707 copysize -= step; 1708 env->xregs[rn] = -copysize; 1709 } 1710 } else { 1711 while (copysize > 0) { 1712 step = copy_step_rev(env, toaddr, fromaddr, copysize, 1713 wmemidx, rmemidx, &wdesc, &rdesc, ra); 1714 toaddr -= step; 1715 fromaddr -= step; 1716 copysize -= step; 1717 env->xregs[rn] = copysize; 1718 } 1719 } 1720 } 1721 1722 void HELPER(cpye)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1723 uint32_t rdesc) 1724 { 1725 do_cpye(env, syndrome, wdesc, rdesc, true, GETPC()); 1726 } 1727 1728 void HELPER(cpyfe)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, 1729 uint32_t rdesc) 1730 { 1731 do_cpye(env, syndrome, wdesc, rdesc, false, GETPC()); 1732 } 1733 1734 static bool is_guarded_page(CPUARMState *env, target_ulong addr, uintptr_t ra) 1735 { 1736 #ifdef CONFIG_USER_ONLY 1737 return page_get_flags(addr) & PAGE_BTI; 1738 #else 1739 CPUTLBEntryFull *full; 1740 void *host; 1741 int mmu_idx = cpu_mmu_index(env_cpu(env), true); 1742 int flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx, 1743 false, &host, &full, ra); 1744 1745 assert(!(flags & TLB_INVALID_MASK)); 1746 return full->extra.arm.guarded; 1747 #endif 1748 } 1749 1750 void HELPER(guarded_page_check)(CPUARMState *env) 1751 { 1752 /* 1753 * We have already verified that bti is enabled, and that the 1754 * instruction at PC is not ok for BTYPE. This is always at 1755 * the beginning of a block, so PC is always up-to-date and 1756 * no unwind is required. 1757 */ 1758 if (is_guarded_page(env, env->pc, 0)) { 1759 raise_exception(env, EXCP_UDEF, syn_btitrap(env->btype), 1760 exception_target_el(env)); 1761 } 1762 } 1763 1764 void HELPER(guarded_page_br)(CPUARMState *env, target_ulong pc) 1765 { 1766 /* 1767 * We have already checked for branch via x16 and x17. 1768 * What remains for choosing BTYPE is checking for a guarded page. 1769 */ 1770 env->btype = is_guarded_page(env, pc, GETPC()) ? 3 : 1; 1771 } 1772