1 /* 2 * ARM v8.5-MemTag Operations 3 * 4 * Copyright (c) 2020 Linaro, Ltd. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/log.h" 22 #include "cpu.h" 23 #include "internals.h" 24 #include "exec/exec-all.h" 25 #include "exec/page-protection.h" 26 #ifdef CONFIG_USER_ONLY 27 #include "user/cpu_loop.h" 28 #include "user/page-protection.h" 29 #else 30 #include "exec/ram_addr.h" 31 #endif 32 #include "exec/cpu_ldst.h" 33 #include "exec/helper-proto.h" 34 #include "hw/core/tcg-cpu-ops.h" 35 #include "qapi/error.h" 36 #include "qemu/guest-random.h" 37 #include "mte_helper.h" 38 39 40 static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude) 41 { 42 if (exclude == 0xffff) { 43 return 0; 44 } 45 if (offset == 0) { 46 while (exclude & (1 << tag)) { 47 tag = (tag + 1) & 15; 48 } 49 } else { 50 do { 51 do { 52 tag = (tag + 1) & 15; 53 } while (exclude & (1 << tag)); 54 } while (--offset > 0); 55 } 56 return tag; 57 } 58 59 uint8_t *allocation_tag_mem_probe(CPUARMState *env, int ptr_mmu_idx, 60 uint64_t ptr, MMUAccessType ptr_access, 61 int ptr_size, MMUAccessType tag_access, 62 bool probe, uintptr_t ra) 63 { 64 #ifdef CONFIG_USER_ONLY 65 uint64_t clean_ptr = useronly_clean_ptr(ptr); 66 int flags = page_get_flags(clean_ptr); 67 uint8_t *tags; 68 uintptr_t index; 69 70 assert(!(probe && ra)); 71 72 if (!(flags & (ptr_access == MMU_DATA_STORE ? PAGE_WRITE_ORG : PAGE_READ))) { 73 if (probe) { 74 return NULL; 75 } 76 cpu_loop_exit_sigsegv(env_cpu(env), ptr, ptr_access, 77 !(flags & PAGE_VALID), ra); 78 } 79 80 /* Require both MAP_ANON and PROT_MTE for the page. */ 81 if (!(flags & PAGE_ANON) || !(flags & PAGE_MTE)) { 82 return NULL; 83 } 84 85 tags = page_get_target_data(clean_ptr); 86 87 index = extract32(ptr, LOG2_TAG_GRANULE + 1, 88 TARGET_PAGE_BITS - LOG2_TAG_GRANULE - 1); 89 return tags + index; 90 #else 91 CPUTLBEntryFull *full; 92 MemTxAttrs attrs; 93 int in_page, flags; 94 hwaddr ptr_paddr, tag_paddr, xlat; 95 MemoryRegion *mr; 96 ARMASIdx tag_asi; 97 AddressSpace *tag_as; 98 void *host; 99 100 /* 101 * Probe the first byte of the virtual address. This raises an 102 * exception for inaccessible pages, and resolves the virtual address 103 * into the softmmu tlb. 104 * 105 * When RA == 0, this is either a pure probe or a no-fault-expected probe. 106 * Indicate to probe_access_flags no-fault, then either return NULL 107 * for the pure probe, or assert that we received a valid page for the 108 * no-fault-expected probe. 109 */ 110 flags = probe_access_full(env, ptr, 0, ptr_access, ptr_mmu_idx, 111 ra == 0, &host, &full, ra); 112 if (probe && (flags & TLB_INVALID_MASK)) { 113 return NULL; 114 } 115 assert(!(flags & TLB_INVALID_MASK)); 116 117 /* If the virtual page MemAttr != Tagged, access unchecked. */ 118 if (full->extra.arm.pte_attrs != 0xf0) { 119 return NULL; 120 } 121 122 /* 123 * If not backed by host ram, there is no tag storage: access unchecked. 124 * This is probably a guest os bug though, so log it. 125 */ 126 if (unlikely(flags & TLB_MMIO)) { 127 qemu_log_mask(LOG_GUEST_ERROR, 128 "Page @ 0x%" PRIx64 " indicates Tagged Normal memory " 129 "but is not backed by host ram\n", ptr); 130 return NULL; 131 } 132 133 /* 134 * Remember these values across the second lookup below, 135 * which may invalidate this pointer via tlb resize. 136 */ 137 ptr_paddr = full->phys_addr | (ptr & ~TARGET_PAGE_MASK); 138 attrs = full->attrs; 139 full = NULL; 140 141 /* 142 * The Normal memory access can extend to the next page. E.g. a single 143 * 8-byte access to the last byte of a page will check only the last 144 * tag on the first page. 145 * Any page access exception has priority over tag check exception. 146 */ 147 in_page = -(ptr | TARGET_PAGE_MASK); 148 if (unlikely(ptr_size > in_page)) { 149 flags |= probe_access_full(env, ptr + in_page, 0, ptr_access, 150 ptr_mmu_idx, ra == 0, &host, &full, ra); 151 assert(!(flags & TLB_INVALID_MASK)); 152 } 153 154 /* Any debug exception has priority over a tag check exception. */ 155 if (!probe && unlikely(flags & TLB_WATCHPOINT)) { 156 int wp = ptr_access == MMU_DATA_LOAD ? BP_MEM_READ : BP_MEM_WRITE; 157 assert(ra != 0); 158 cpu_check_watchpoint(env_cpu(env), ptr, ptr_size, attrs, wp, ra); 159 } 160 161 /* Convert to the physical address in tag space. */ 162 tag_paddr = ptr_paddr >> (LOG2_TAG_GRANULE + 1); 163 164 /* Look up the address in tag space. */ 165 tag_asi = attrs.secure ? ARMASIdx_TagS : ARMASIdx_TagNS; 166 tag_as = cpu_get_address_space(env_cpu(env), tag_asi); 167 mr = address_space_translate(tag_as, tag_paddr, &xlat, NULL, 168 tag_access == MMU_DATA_STORE, attrs); 169 170 /* 171 * Note that @mr will never be NULL. If there is nothing in the address 172 * space at @tag_paddr, the translation will return the unallocated memory 173 * region. For our purposes, the result must be ram. 174 */ 175 if (unlikely(!memory_region_is_ram(mr))) { 176 /* ??? Failure is a board configuration error. */ 177 qemu_log_mask(LOG_UNIMP, 178 "Tag Memory @ 0x%" HWADDR_PRIx " not found for " 179 "Normal Memory @ 0x%" HWADDR_PRIx "\n", 180 tag_paddr, ptr_paddr); 181 return NULL; 182 } 183 184 /* 185 * Ensure the tag memory is dirty on write, for migration. 186 * Tag memory can never contain code or display memory (vga). 187 */ 188 if (tag_access == MMU_DATA_STORE) { 189 ram_addr_t tag_ra = memory_region_get_ram_addr(mr) + xlat; 190 cpu_physical_memory_set_dirty_flag(tag_ra, DIRTY_MEMORY_MIGRATION); 191 } 192 193 return memory_region_get_ram_ptr(mr) + xlat; 194 #endif 195 } 196 197 static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx, 198 uint64_t ptr, MMUAccessType ptr_access, 199 int ptr_size, MMUAccessType tag_access, 200 uintptr_t ra) 201 { 202 return allocation_tag_mem_probe(env, ptr_mmu_idx, ptr, ptr_access, 203 ptr_size, tag_access, false, ra); 204 } 205 206 uint64_t HELPER(irg)(CPUARMState *env, uint64_t rn, uint64_t rm) 207 { 208 uint16_t exclude = extract32(rm | env->cp15.gcr_el1, 0, 16); 209 int rrnd = extract32(env->cp15.gcr_el1, 16, 1); 210 int start = extract32(env->cp15.rgsr_el1, 0, 4); 211 int seed = extract32(env->cp15.rgsr_el1, 8, 16); 212 int offset, i, rtag; 213 214 /* 215 * Our IMPDEF choice for GCR_EL1.RRND==1 is to continue to use the 216 * deterministic algorithm. Except that with RRND==1 the kernel is 217 * not required to have set RGSR_EL1.SEED != 0, which is required for 218 * the deterministic algorithm to function. So we force a non-zero 219 * SEED for that case. 220 */ 221 if (unlikely(seed == 0) && rrnd) { 222 do { 223 Error *err = NULL; 224 uint16_t two; 225 226 if (qemu_guest_getrandom(&two, sizeof(two), &err) < 0) { 227 /* 228 * Failed, for unknown reasons in the crypto subsystem. 229 * Best we can do is log the reason and use a constant seed. 230 */ 231 qemu_log_mask(LOG_UNIMP, "IRG: Crypto failure: %s\n", 232 error_get_pretty(err)); 233 error_free(err); 234 two = 1; 235 } 236 seed = two; 237 } while (seed == 0); 238 } 239 240 /* RandomTag */ 241 for (i = offset = 0; i < 4; ++i) { 242 /* NextRandomTagBit */ 243 int top = (extract32(seed, 5, 1) ^ extract32(seed, 3, 1) ^ 244 extract32(seed, 2, 1) ^ extract32(seed, 0, 1)); 245 seed = (top << 15) | (seed >> 1); 246 offset |= top << i; 247 } 248 rtag = choose_nonexcluded_tag(start, offset, exclude); 249 env->cp15.rgsr_el1 = rtag | (seed << 8); 250 251 return address_with_allocation_tag(rn, rtag); 252 } 253 254 uint64_t HELPER(addsubg)(CPUARMState *env, uint64_t ptr, 255 int32_t offset, uint32_t tag_offset) 256 { 257 int start_tag = allocation_tag_from_addr(ptr); 258 uint16_t exclude = extract32(env->cp15.gcr_el1, 0, 16); 259 int rtag = choose_nonexcluded_tag(start_tag, tag_offset, exclude); 260 261 return address_with_allocation_tag(ptr + offset, rtag); 262 } 263 264 int load_tag1(uint64_t ptr, uint8_t *mem) 265 { 266 int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4; 267 return extract32(*mem, ofs, 4); 268 } 269 270 uint64_t HELPER(ldg)(CPUARMState *env, uint64_t ptr, uint64_t xt) 271 { 272 int mmu_idx = arm_env_mmu_index(env); 273 uint8_t *mem; 274 int rtag = 0; 275 276 /* Trap if accessing an invalid page. */ 277 mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD, 1, 278 MMU_DATA_LOAD, GETPC()); 279 280 /* Load if page supports tags. */ 281 if (mem) { 282 rtag = load_tag1(ptr, mem); 283 } 284 285 return address_with_allocation_tag(xt, rtag); 286 } 287 288 static void check_tag_aligned(CPUARMState *env, uint64_t ptr, uintptr_t ra) 289 { 290 if (unlikely(!QEMU_IS_ALIGNED(ptr, TAG_GRANULE))) { 291 arm_cpu_do_unaligned_access(env_cpu(env), ptr, MMU_DATA_STORE, 292 arm_env_mmu_index(env), ra); 293 g_assert_not_reached(); 294 } 295 } 296 297 /* For use in a non-parallel context, store to the given nibble. */ 298 void store_tag1(uint64_t ptr, uint8_t *mem, int tag) 299 { 300 int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4; 301 *mem = deposit32(*mem, ofs, 4, tag); 302 } 303 304 /* For use in a parallel context, atomically store to the given nibble. */ 305 static void store_tag1_parallel(uint64_t ptr, uint8_t *mem, int tag) 306 { 307 int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4; 308 uint8_t old = qatomic_read(mem); 309 310 while (1) { 311 uint8_t new = deposit32(old, ofs, 4, tag); 312 uint8_t cmp = qatomic_cmpxchg(mem, old, new); 313 if (likely(cmp == old)) { 314 return; 315 } 316 old = cmp; 317 } 318 } 319 320 typedef void stg_store1(uint64_t, uint8_t *, int); 321 322 static inline void do_stg(CPUARMState *env, uint64_t ptr, uint64_t xt, 323 uintptr_t ra, stg_store1 store1) 324 { 325 int mmu_idx = arm_env_mmu_index(env); 326 uint8_t *mem; 327 328 check_tag_aligned(env, ptr, ra); 329 330 /* Trap if accessing an invalid page. */ 331 mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, TAG_GRANULE, 332 MMU_DATA_STORE, ra); 333 334 /* Store if page supports tags. */ 335 if (mem) { 336 store1(ptr, mem, allocation_tag_from_addr(xt)); 337 } 338 } 339 340 void HELPER(stg)(CPUARMState *env, uint64_t ptr, uint64_t xt) 341 { 342 do_stg(env, ptr, xt, GETPC(), store_tag1); 343 } 344 345 void HELPER(stg_parallel)(CPUARMState *env, uint64_t ptr, uint64_t xt) 346 { 347 do_stg(env, ptr, xt, GETPC(), store_tag1_parallel); 348 } 349 350 void HELPER(stg_stub)(CPUARMState *env, uint64_t ptr) 351 { 352 int mmu_idx = arm_env_mmu_index(env); 353 uintptr_t ra = GETPC(); 354 355 check_tag_aligned(env, ptr, ra); 356 probe_write(env, ptr, TAG_GRANULE, mmu_idx, ra); 357 } 358 359 static inline void do_st2g(CPUARMState *env, uint64_t ptr, uint64_t xt, 360 uintptr_t ra, stg_store1 store1) 361 { 362 int mmu_idx = arm_env_mmu_index(env); 363 int tag = allocation_tag_from_addr(xt); 364 uint8_t *mem1, *mem2; 365 366 check_tag_aligned(env, ptr, ra); 367 368 /* 369 * Trap if accessing an invalid page(s). 370 * This takes priority over !allocation_tag_access_enabled. 371 */ 372 if (ptr & TAG_GRANULE) { 373 /* Two stores unaligned mod TAG_GRANULE*2 -- modify two bytes. */ 374 mem1 = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, 375 TAG_GRANULE, MMU_DATA_STORE, ra); 376 mem2 = allocation_tag_mem(env, mmu_idx, ptr + TAG_GRANULE, 377 MMU_DATA_STORE, TAG_GRANULE, 378 MMU_DATA_STORE, ra); 379 380 /* Store if page(s) support tags. */ 381 if (mem1) { 382 store1(TAG_GRANULE, mem1, tag); 383 } 384 if (mem2) { 385 store1(0, mem2, tag); 386 } 387 } else { 388 /* Two stores aligned mod TAG_GRANULE*2 -- modify one byte. */ 389 mem1 = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, 390 2 * TAG_GRANULE, MMU_DATA_STORE, ra); 391 if (mem1) { 392 tag |= tag << 4; 393 qatomic_set(mem1, tag); 394 } 395 } 396 } 397 398 void HELPER(st2g)(CPUARMState *env, uint64_t ptr, uint64_t xt) 399 { 400 do_st2g(env, ptr, xt, GETPC(), store_tag1); 401 } 402 403 void HELPER(st2g_parallel)(CPUARMState *env, uint64_t ptr, uint64_t xt) 404 { 405 do_st2g(env, ptr, xt, GETPC(), store_tag1_parallel); 406 } 407 408 void HELPER(st2g_stub)(CPUARMState *env, uint64_t ptr) 409 { 410 int mmu_idx = arm_env_mmu_index(env); 411 uintptr_t ra = GETPC(); 412 int in_page = -(ptr | TARGET_PAGE_MASK); 413 414 check_tag_aligned(env, ptr, ra); 415 416 if (likely(in_page >= 2 * TAG_GRANULE)) { 417 probe_write(env, ptr, 2 * TAG_GRANULE, mmu_idx, ra); 418 } else { 419 probe_write(env, ptr, TAG_GRANULE, mmu_idx, ra); 420 probe_write(env, ptr + TAG_GRANULE, TAG_GRANULE, mmu_idx, ra); 421 } 422 } 423 424 uint64_t HELPER(ldgm)(CPUARMState *env, uint64_t ptr) 425 { 426 int mmu_idx = arm_env_mmu_index(env); 427 uintptr_t ra = GETPC(); 428 int gm_bs = env_archcpu(env)->gm_blocksize; 429 int gm_bs_bytes = 4 << gm_bs; 430 void *tag_mem; 431 uint64_t ret; 432 int shift; 433 434 ptr = QEMU_ALIGN_DOWN(ptr, gm_bs_bytes); 435 436 /* Trap if accessing an invalid page. */ 437 tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD, 438 gm_bs_bytes, MMU_DATA_LOAD, ra); 439 440 /* The tag is squashed to zero if the page does not support tags. */ 441 if (!tag_mem) { 442 return 0; 443 } 444 445 /* 446 * The ordering of elements within the word corresponds to 447 * a little-endian operation. Computation of shift comes from 448 * 449 * index = address<LOG2_TAG_GRANULE+3:LOG2_TAG_GRANULE> 450 * data<index*4+3:index*4> = tag 451 * 452 * Because of the alignment of ptr above, BS=6 has shift=0. 453 * All memory operations are aligned. Defer support for BS=2, 454 * requiring insertion or extraction of a nibble, until we 455 * support a cpu that requires it. 456 */ 457 switch (gm_bs) { 458 case 3: 459 /* 32 bytes -> 2 tags -> 8 result bits */ 460 ret = *(uint8_t *)tag_mem; 461 break; 462 case 4: 463 /* 64 bytes -> 4 tags -> 16 result bits */ 464 ret = cpu_to_le16(*(uint16_t *)tag_mem); 465 break; 466 case 5: 467 /* 128 bytes -> 8 tags -> 32 result bits */ 468 ret = cpu_to_le32(*(uint32_t *)tag_mem); 469 break; 470 case 6: 471 /* 256 bytes -> 16 tags -> 64 result bits */ 472 return cpu_to_le64(*(uint64_t *)tag_mem); 473 default: 474 /* 475 * CPU configured with unsupported/invalid gm blocksize. 476 * This is detected early in arm_cpu_realizefn. 477 */ 478 g_assert_not_reached(); 479 } 480 shift = extract64(ptr, LOG2_TAG_GRANULE, 4) * 4; 481 return ret << shift; 482 } 483 484 void HELPER(stgm)(CPUARMState *env, uint64_t ptr, uint64_t val) 485 { 486 int mmu_idx = arm_env_mmu_index(env); 487 uintptr_t ra = GETPC(); 488 int gm_bs = env_archcpu(env)->gm_blocksize; 489 int gm_bs_bytes = 4 << gm_bs; 490 void *tag_mem; 491 int shift; 492 493 ptr = QEMU_ALIGN_DOWN(ptr, gm_bs_bytes); 494 495 /* Trap if accessing an invalid page. */ 496 tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, 497 gm_bs_bytes, MMU_DATA_LOAD, ra); 498 499 /* 500 * Tag store only happens if the page support tags, 501 * and if the OS has enabled access to the tags. 502 */ 503 if (!tag_mem) { 504 return; 505 } 506 507 /* See LDGM for comments on BS and on shift. */ 508 shift = extract64(ptr, LOG2_TAG_GRANULE, 4) * 4; 509 val >>= shift; 510 switch (gm_bs) { 511 case 3: 512 /* 32 bytes -> 2 tags -> 8 result bits */ 513 *(uint8_t *)tag_mem = val; 514 break; 515 case 4: 516 /* 64 bytes -> 4 tags -> 16 result bits */ 517 *(uint16_t *)tag_mem = cpu_to_le16(val); 518 break; 519 case 5: 520 /* 128 bytes -> 8 tags -> 32 result bits */ 521 *(uint32_t *)tag_mem = cpu_to_le32(val); 522 break; 523 case 6: 524 /* 256 bytes -> 16 tags -> 64 result bits */ 525 *(uint64_t *)tag_mem = cpu_to_le64(val); 526 break; 527 default: 528 /* cpu configured with unsupported gm blocksize. */ 529 g_assert_not_reached(); 530 } 531 } 532 533 void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val) 534 { 535 uintptr_t ra = GETPC(); 536 int mmu_idx = arm_env_mmu_index(env); 537 int log2_dcz_bytes, log2_tag_bytes; 538 intptr_t dcz_bytes, tag_bytes; 539 uint8_t *mem; 540 541 /* 542 * In arm_cpu_realizefn, we assert that dcz > LOG2_TAG_GRANULE+1, 543 * i.e. 32 bytes, which is an unreasonably small dcz anyway, 544 * to make sure that we can access one complete tag byte here. 545 */ 546 log2_dcz_bytes = env_archcpu(env)->dcz_blocksize + 2; 547 log2_tag_bytes = log2_dcz_bytes - (LOG2_TAG_GRANULE + 1); 548 dcz_bytes = (intptr_t)1 << log2_dcz_bytes; 549 tag_bytes = (intptr_t)1 << log2_tag_bytes; 550 ptr &= -dcz_bytes; 551 552 mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, dcz_bytes, 553 MMU_DATA_STORE, ra); 554 if (mem) { 555 int tag_pair = (val & 0xf) * 0x11; 556 memset(mem, tag_pair, tag_bytes); 557 } 558 } 559 560 static void mte_sync_check_fail(CPUARMState *env, uint32_t desc, 561 uint64_t dirty_ptr, uintptr_t ra) 562 { 563 int is_write, syn; 564 565 env->exception.vaddress = dirty_ptr; 566 567 is_write = FIELD_EX32(desc, MTEDESC, WRITE); 568 syn = syn_data_abort_no_iss(arm_current_el(env) != 0, 0, 0, 0, 0, is_write, 569 0x11); 570 raise_exception_ra(env, EXCP_DATA_ABORT, syn, exception_target_el(env), ra); 571 g_assert_not_reached(); 572 } 573 574 static void mte_async_check_fail(CPUARMState *env, uint64_t dirty_ptr, 575 uintptr_t ra, ARMMMUIdx arm_mmu_idx, int el) 576 { 577 int select; 578 579 if (regime_has_2_ranges(arm_mmu_idx)) { 580 select = extract64(dirty_ptr, 55, 1); 581 } else { 582 select = 0; 583 } 584 env->cp15.tfsr_el[el] |= 1 << select; 585 #ifdef CONFIG_USER_ONLY 586 /* 587 * Stand in for a timer irq, setting _TIF_MTE_ASYNC_FAULT, 588 * which then sends a SIGSEGV when the thread is next scheduled. 589 * This cpu will return to the main loop at the end of the TB, 590 * which is rather sooner than "normal". But the alternative 591 * is waiting until the next syscall. 592 */ 593 qemu_cpu_kick(env_cpu(env)); 594 #endif 595 } 596 597 /* Record a tag check failure. */ 598 void mte_check_fail(CPUARMState *env, uint32_t desc, 599 uint64_t dirty_ptr, uintptr_t ra) 600 { 601 int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); 602 ARMMMUIdx arm_mmu_idx = core_to_aa64_mmu_idx(mmu_idx); 603 int el, reg_el, tcf; 604 uint64_t sctlr; 605 606 reg_el = regime_el(env, arm_mmu_idx); 607 sctlr = env->cp15.sctlr_el[reg_el]; 608 609 switch (arm_mmu_idx) { 610 case ARMMMUIdx_E10_0: 611 case ARMMMUIdx_E20_0: 612 el = 0; 613 tcf = extract64(sctlr, 38, 2); 614 break; 615 default: 616 el = reg_el; 617 tcf = extract64(sctlr, 40, 2); 618 } 619 620 switch (tcf) { 621 case 1: 622 /* Tag check fail causes a synchronous exception. */ 623 mte_sync_check_fail(env, desc, dirty_ptr, ra); 624 break; 625 626 case 0: 627 /* 628 * Tag check fail does not affect the PE. 629 * We eliminate this case by not setting MTE_ACTIVE 630 * in tb_flags, so that we never make this runtime call. 631 */ 632 g_assert_not_reached(); 633 634 case 2: 635 /* Tag check fail causes asynchronous flag set. */ 636 mte_async_check_fail(env, dirty_ptr, ra, arm_mmu_idx, el); 637 break; 638 639 case 3: 640 /* 641 * Tag check fail causes asynchronous flag set for stores, or 642 * a synchronous exception for loads. 643 */ 644 if (FIELD_EX32(desc, MTEDESC, WRITE)) { 645 mte_async_check_fail(env, dirty_ptr, ra, arm_mmu_idx, el); 646 } else { 647 mte_sync_check_fail(env, desc, dirty_ptr, ra); 648 } 649 break; 650 } 651 } 652 653 /** 654 * checkN: 655 * @tag: tag memory to test 656 * @odd: true to begin testing at tags at odd nibble 657 * @cmp: the tag to compare against 658 * @count: number of tags to test 659 * 660 * Return the number of successful tests. 661 * Thus a return value < @count indicates a failure. 662 * 663 * A note about sizes: count is expected to be small. 664 * 665 * The most common use will be LDP/STP of two integer registers, 666 * which means 16 bytes of memory touching at most 2 tags, but 667 * often the access is aligned and thus just 1 tag. 668 * 669 * Using AdvSIMD LD/ST (multiple), one can access 64 bytes of memory, 670 * touching at most 5 tags. SVE LDR/STR (vector) with the default 671 * vector length is also 64 bytes; the maximum architectural length 672 * is 256 bytes touching at most 9 tags. 673 * 674 * The loop below uses 7 logical operations and 1 memory operation 675 * per tag pair. An implementation that loads an aligned word and 676 * uses masking to ignore adjacent tags requires 18 logical operations 677 * and thus does not begin to pay off until 6 tags. 678 * Which, according to the survey above, is unlikely to be common. 679 */ 680 static int checkN(uint8_t *mem, int odd, int cmp, int count) 681 { 682 int n = 0, diff; 683 684 /* Replicate the test tag and compare. */ 685 cmp *= 0x11; 686 diff = *mem++ ^ cmp; 687 688 if (odd) { 689 goto start_odd; 690 } 691 692 while (1) { 693 /* Test even tag. */ 694 if (unlikely((diff) & 0x0f)) { 695 break; 696 } 697 if (++n == count) { 698 break; 699 } 700 701 start_odd: 702 /* Test odd tag. */ 703 if (unlikely((diff) & 0xf0)) { 704 break; 705 } 706 if (++n == count) { 707 break; 708 } 709 710 diff = *mem++ ^ cmp; 711 } 712 return n; 713 } 714 715 /** 716 * checkNrev: 717 * @tag: tag memory to test 718 * @odd: true to begin testing at tags at odd nibble 719 * @cmp: the tag to compare against 720 * @count: number of tags to test 721 * 722 * Return the number of successful tests. 723 * Thus a return value < @count indicates a failure. 724 * 725 * This is like checkN, but it runs backwards, checking the 726 * tags starting with @tag and then the tags preceding it. 727 * This is needed by the backwards-memory-copying operations. 728 */ 729 static int checkNrev(uint8_t *mem, int odd, int cmp, int count) 730 { 731 int n = 0, diff; 732 733 /* Replicate the test tag and compare. */ 734 cmp *= 0x11; 735 diff = *mem-- ^ cmp; 736 737 if (!odd) { 738 goto start_even; 739 } 740 741 while (1) { 742 /* Test odd tag. */ 743 if (unlikely((diff) & 0xf0)) { 744 break; 745 } 746 if (++n == count) { 747 break; 748 } 749 750 start_even: 751 /* Test even tag. */ 752 if (unlikely((diff) & 0x0f)) { 753 break; 754 } 755 if (++n == count) { 756 break; 757 } 758 759 diff = *mem-- ^ cmp; 760 } 761 return n; 762 } 763 764 /** 765 * mte_probe_int() - helper for mte_probe and mte_check 766 * @env: CPU environment 767 * @desc: MTEDESC descriptor 768 * @ptr: virtual address of the base of the access 769 * @fault: return virtual address of the first check failure 770 * 771 * Internal routine for both mte_probe and mte_check. 772 * Return zero on failure, filling in *fault. 773 * Return negative on trivial success for tbi disabled. 774 * Return positive on success with tbi enabled. 775 */ 776 static int mte_probe_int(CPUARMState *env, uint32_t desc, uint64_t ptr, 777 uintptr_t ra, uint64_t *fault) 778 { 779 int mmu_idx, ptr_tag, bit55; 780 uint64_t ptr_last, prev_page, next_page; 781 uint64_t tag_first, tag_last; 782 uint32_t sizem1, tag_count, n, c; 783 uint8_t *mem1, *mem2; 784 MMUAccessType type; 785 786 bit55 = extract64(ptr, 55, 1); 787 *fault = ptr; 788 789 /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */ 790 if (unlikely(!tbi_check(desc, bit55))) { 791 return -1; 792 } 793 794 ptr_tag = allocation_tag_from_addr(ptr); 795 796 if (tcma_check(desc, bit55, ptr_tag)) { 797 return 1; 798 } 799 800 mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); 801 type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD; 802 sizem1 = FIELD_EX32(desc, MTEDESC, SIZEM1); 803 804 /* Find the addr of the end of the access */ 805 ptr_last = ptr + sizem1; 806 807 /* Round the bounds to the tag granule, and compute the number of tags. */ 808 tag_first = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE); 809 tag_last = QEMU_ALIGN_DOWN(ptr_last, TAG_GRANULE); 810 tag_count = ((tag_last - tag_first) / TAG_GRANULE) + 1; 811 812 /* Locate the page boundaries. */ 813 prev_page = ptr & TARGET_PAGE_MASK; 814 next_page = prev_page + TARGET_PAGE_SIZE; 815 816 if (likely(tag_last - prev_page < TARGET_PAGE_SIZE)) { 817 /* Memory access stays on one page. */ 818 mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, sizem1 + 1, 819 MMU_DATA_LOAD, ra); 820 if (!mem1) { 821 return 1; 822 } 823 /* Perform all of the comparisons. */ 824 n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, tag_count); 825 } else { 826 /* Memory access crosses to next page. */ 827 mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, next_page - ptr, 828 MMU_DATA_LOAD, ra); 829 830 mem2 = allocation_tag_mem(env, mmu_idx, next_page, type, 831 ptr_last - next_page + 1, 832 MMU_DATA_LOAD, ra); 833 834 /* 835 * Perform all of the comparisons. 836 * Note the possible but unlikely case of the operation spanning 837 * two pages that do not both have tagging enabled. 838 */ 839 n = c = (next_page - tag_first) / TAG_GRANULE; 840 if (mem1) { 841 n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, c); 842 } 843 if (n == c) { 844 if (!mem2) { 845 return 1; 846 } 847 n += checkN(mem2, 0, ptr_tag, tag_count - c); 848 } 849 } 850 851 if (likely(n == tag_count)) { 852 return 1; 853 } 854 855 /* 856 * If we failed, we know which granule. For the first granule, the 857 * failure address is @ptr, the first byte accessed. Otherwise the 858 * failure address is the first byte of the nth granule. 859 */ 860 if (n > 0) { 861 *fault = tag_first + n * TAG_GRANULE; 862 } 863 return 0; 864 } 865 866 uint64_t mte_check(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra) 867 { 868 uint64_t fault; 869 int ret = mte_probe_int(env, desc, ptr, ra, &fault); 870 871 if (unlikely(ret == 0)) { 872 mte_check_fail(env, desc, fault, ra); 873 } else if (ret < 0) { 874 return ptr; 875 } 876 return useronly_clean_ptr(ptr); 877 } 878 879 uint64_t HELPER(mte_check)(CPUARMState *env, uint32_t desc, uint64_t ptr) 880 { 881 /* 882 * R_XCHFJ: Alignment check not caused by memory type is priority 1, 883 * higher than any translation fault. When MTE is disabled, tcg 884 * performs the alignment check during the code generated for the 885 * memory access. With MTE enabled, we must check this here before 886 * raising any translation fault in allocation_tag_mem. 887 */ 888 unsigned align = FIELD_EX32(desc, MTEDESC, ALIGN); 889 if (unlikely(align)) { 890 align = (1u << align) - 1; 891 if (unlikely(ptr & align)) { 892 int idx = FIELD_EX32(desc, MTEDESC, MIDX); 893 bool w = FIELD_EX32(desc, MTEDESC, WRITE); 894 MMUAccessType type = w ? MMU_DATA_STORE : MMU_DATA_LOAD; 895 arm_cpu_do_unaligned_access(env_cpu(env), ptr, type, idx, GETPC()); 896 } 897 } 898 899 return mte_check(env, desc, ptr, GETPC()); 900 } 901 902 /* 903 * No-fault version of mte_check, to be used by SVE for MemSingleNF. 904 * Returns false if the access is Checked and the check failed. This 905 * is only intended to probe the tag -- the validity of the page must 906 * be checked beforehand. 907 */ 908 bool mte_probe(CPUARMState *env, uint32_t desc, uint64_t ptr) 909 { 910 uint64_t fault; 911 int ret = mte_probe_int(env, desc, ptr, 0, &fault); 912 913 return ret != 0; 914 } 915 916 /* 917 * Perform an MTE checked access for DC_ZVA. 918 */ 919 uint64_t HELPER(mte_check_zva)(CPUARMState *env, uint32_t desc, uint64_t ptr) 920 { 921 uintptr_t ra = GETPC(); 922 int log2_dcz_bytes, log2_tag_bytes; 923 int mmu_idx, bit55; 924 intptr_t dcz_bytes, tag_bytes, i; 925 void *mem; 926 uint64_t ptr_tag, mem_tag, align_ptr; 927 928 bit55 = extract64(ptr, 55, 1); 929 930 /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */ 931 if (unlikely(!tbi_check(desc, bit55))) { 932 return ptr; 933 } 934 935 ptr_tag = allocation_tag_from_addr(ptr); 936 937 if (tcma_check(desc, bit55, ptr_tag)) { 938 goto done; 939 } 940 941 /* 942 * In arm_cpu_realizefn, we asserted that dcz > LOG2_TAG_GRANULE+1, 943 * i.e. 32 bytes, which is an unreasonably small dcz anyway, to make 944 * sure that we can access one complete tag byte here. 945 */ 946 log2_dcz_bytes = env_archcpu(env)->dcz_blocksize + 2; 947 log2_tag_bytes = log2_dcz_bytes - (LOG2_TAG_GRANULE + 1); 948 dcz_bytes = (intptr_t)1 << log2_dcz_bytes; 949 tag_bytes = (intptr_t)1 << log2_tag_bytes; 950 align_ptr = ptr & -dcz_bytes; 951 952 /* 953 * Trap if accessing an invalid page. DC_ZVA requires that we supply 954 * the original pointer for an invalid page. But watchpoints require 955 * that we probe the actual space. So do both. 956 */ 957 mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); 958 (void) probe_write(env, ptr, 1, mmu_idx, ra); 959 mem = allocation_tag_mem(env, mmu_idx, align_ptr, MMU_DATA_STORE, 960 dcz_bytes, MMU_DATA_LOAD, ra); 961 if (!mem) { 962 goto done; 963 } 964 965 /* 966 * Unlike the reasoning for checkN, DC_ZVA is always aligned, and thus 967 * it is quite easy to perform all of the comparisons at once without 968 * any extra masking. 969 * 970 * The most common zva block size is 64; some of the thunderx cpus use 971 * a block size of 128. For user-only, aarch64_max_initfn will set the 972 * block size to 512. Fill out the other cases for future-proofing. 973 * 974 * In order to be able to find the first miscompare later, we want the 975 * tag bytes to be in little-endian order. 976 */ 977 switch (log2_tag_bytes) { 978 case 0: /* zva_blocksize 32 */ 979 mem_tag = *(uint8_t *)mem; 980 ptr_tag *= 0x11u; 981 break; 982 case 1: /* zva_blocksize 64 */ 983 mem_tag = cpu_to_le16(*(uint16_t *)mem); 984 ptr_tag *= 0x1111u; 985 break; 986 case 2: /* zva_blocksize 128 */ 987 mem_tag = cpu_to_le32(*(uint32_t *)mem); 988 ptr_tag *= 0x11111111u; 989 break; 990 case 3: /* zva_blocksize 256 */ 991 mem_tag = cpu_to_le64(*(uint64_t *)mem); 992 ptr_tag *= 0x1111111111111111ull; 993 break; 994 995 default: /* zva_blocksize 512, 1024, 2048 */ 996 ptr_tag *= 0x1111111111111111ull; 997 i = 0; 998 do { 999 mem_tag = cpu_to_le64(*(uint64_t *)(mem + i)); 1000 if (unlikely(mem_tag != ptr_tag)) { 1001 goto fail; 1002 } 1003 i += 8; 1004 align_ptr += 16 * TAG_GRANULE; 1005 } while (i < tag_bytes); 1006 goto done; 1007 } 1008 1009 if (likely(mem_tag == ptr_tag)) { 1010 goto done; 1011 } 1012 1013 fail: 1014 /* Locate the first nibble that differs. */ 1015 i = ctz64(mem_tag ^ ptr_tag) >> 4; 1016 mte_check_fail(env, desc, align_ptr + i * TAG_GRANULE, ra); 1017 1018 done: 1019 return useronly_clean_ptr(ptr); 1020 } 1021 1022 uint64_t mte_mops_probe(CPUARMState *env, uint64_t ptr, uint64_t size, 1023 uint32_t desc) 1024 { 1025 int mmu_idx, tag_count; 1026 uint64_t ptr_tag, tag_first, tag_last; 1027 void *mem; 1028 bool w = FIELD_EX32(desc, MTEDESC, WRITE); 1029 uint32_t n; 1030 1031 mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); 1032 /* True probe; this will never fault */ 1033 mem = allocation_tag_mem_probe(env, mmu_idx, ptr, 1034 w ? MMU_DATA_STORE : MMU_DATA_LOAD, 1035 size, MMU_DATA_LOAD, true, 0); 1036 if (!mem) { 1037 return size; 1038 } 1039 1040 /* 1041 * TODO: checkN() is not designed for checks of the size we expect 1042 * for FEAT_MOPS operations, so we should implement this differently. 1043 * Maybe we should do something like 1044 * if (region start and size are aligned nicely) { 1045 * do direct loads of 64 tag bits at a time; 1046 * } else { 1047 * call checkN() 1048 * } 1049 */ 1050 /* Round the bounds to the tag granule, and compute the number of tags. */ 1051 ptr_tag = allocation_tag_from_addr(ptr); 1052 tag_first = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE); 1053 tag_last = QEMU_ALIGN_DOWN(ptr + size - 1, TAG_GRANULE); 1054 tag_count = ((tag_last - tag_first) / TAG_GRANULE) + 1; 1055 n = checkN(mem, ptr & TAG_GRANULE, ptr_tag, tag_count); 1056 if (likely(n == tag_count)) { 1057 return size; 1058 } 1059 1060 /* 1061 * Failure; for the first granule, it's at @ptr. Otherwise 1062 * it's at the first byte of the nth granule. Calculate how 1063 * many bytes we can access without hitting that failure. 1064 */ 1065 if (n == 0) { 1066 return 0; 1067 } else { 1068 return n * TAG_GRANULE - (ptr - tag_first); 1069 } 1070 } 1071 1072 uint64_t mte_mops_probe_rev(CPUARMState *env, uint64_t ptr, uint64_t size, 1073 uint32_t desc) 1074 { 1075 int mmu_idx, tag_count; 1076 uint64_t ptr_tag, tag_first, tag_last; 1077 void *mem; 1078 bool w = FIELD_EX32(desc, MTEDESC, WRITE); 1079 uint32_t n; 1080 1081 mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); 1082 /* 1083 * True probe; this will never fault. Note that our caller passes 1084 * us a pointer to the end of the region, but allocation_tag_mem_probe() 1085 * wants a pointer to the start. Because we know we don't span a page 1086 * boundary and that allocation_tag_mem_probe() doesn't otherwise care 1087 * about the size, pass in a size of 1 byte. This is simpler than 1088 * adjusting the ptr to point to the start of the region and then having 1089 * to adjust the returned 'mem' to get the end of the tag memory. 1090 */ 1091 mem = allocation_tag_mem_probe(env, mmu_idx, ptr, 1092 w ? MMU_DATA_STORE : MMU_DATA_LOAD, 1093 1, MMU_DATA_LOAD, true, 0); 1094 if (!mem) { 1095 return size; 1096 } 1097 1098 /* 1099 * TODO: checkNrev() is not designed for checks of the size we expect 1100 * for FEAT_MOPS operations, so we should implement this differently. 1101 * Maybe we should do something like 1102 * if (region start and size are aligned nicely) { 1103 * do direct loads of 64 tag bits at a time; 1104 * } else { 1105 * call checkN() 1106 * } 1107 */ 1108 /* Round the bounds to the tag granule, and compute the number of tags. */ 1109 ptr_tag = allocation_tag_from_addr(ptr); 1110 tag_first = QEMU_ALIGN_DOWN(ptr - (size - 1), TAG_GRANULE); 1111 tag_last = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE); 1112 tag_count = ((tag_last - tag_first) / TAG_GRANULE) + 1; 1113 n = checkNrev(mem, ptr & TAG_GRANULE, ptr_tag, tag_count); 1114 if (likely(n == tag_count)) { 1115 return size; 1116 } 1117 1118 /* 1119 * Failure; for the first granule, it's at @ptr. Otherwise 1120 * it's at the last byte of the nth granule. Calculate how 1121 * many bytes we can access without hitting that failure. 1122 */ 1123 if (n == 0) { 1124 return 0; 1125 } else { 1126 return (n - 1) * TAG_GRANULE + ((ptr + 1) - tag_last); 1127 } 1128 } 1129 1130 void mte_mops_set_tags(CPUARMState *env, uint64_t ptr, uint64_t size, 1131 uint32_t desc) 1132 { 1133 int mmu_idx, tag_count; 1134 uint64_t ptr_tag; 1135 void *mem; 1136 1137 if (!desc) { 1138 /* Tags not actually enabled */ 1139 return; 1140 } 1141 1142 mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); 1143 /* True probe: this will never fault */ 1144 mem = allocation_tag_mem_probe(env, mmu_idx, ptr, MMU_DATA_STORE, size, 1145 MMU_DATA_STORE, true, 0); 1146 if (!mem) { 1147 return; 1148 } 1149 1150 /* 1151 * We know that ptr and size are both TAG_GRANULE aligned; store 1152 * the tag from the pointer value into the tag memory. 1153 */ 1154 ptr_tag = allocation_tag_from_addr(ptr); 1155 tag_count = size / TAG_GRANULE; 1156 if (ptr & TAG_GRANULE) { 1157 /* Not 2*TAG_GRANULE-aligned: store tag to first nibble */ 1158 store_tag1_parallel(TAG_GRANULE, mem, ptr_tag); 1159 mem++; 1160 tag_count--; 1161 } 1162 memset(mem, ptr_tag | (ptr_tag << 4), tag_count / 2); 1163 if (tag_count & 1) { 1164 /* Final trailing unaligned nibble */ 1165 mem += tag_count / 2; 1166 store_tag1_parallel(0, mem, ptr_tag); 1167 } 1168 } 1169