xref: /qemu/target/arm/tcg/mte_helper.c (revision 41bfb6704eed12015ddead4e507b97b39b1ff5f6)
1 /*
2  * ARM v8.5-MemTag Operations
3  *
4  * Copyright (c) 2020 Linaro, Ltd.
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/log.h"
22 #include "cpu.h"
23 #include "internals.h"
24 #include "exec/exec-all.h"
25 #include "exec/page-protection.h"
26 #include "exec/ram_addr.h"
27 #include "exec/cpu_ldst.h"
28 #include "exec/helper-proto.h"
29 #include "hw/core/tcg-cpu-ops.h"
30 #include "qapi/error.h"
31 #include "qemu/guest-random.h"
32 
33 
34 static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude)
35 {
36     if (exclude == 0xffff) {
37         return 0;
38     }
39     if (offset == 0) {
40         while (exclude & (1 << tag)) {
41             tag = (tag + 1) & 15;
42         }
43     } else {
44         do {
45             do {
46                 tag = (tag + 1) & 15;
47             } while (exclude & (1 << tag));
48         } while (--offset > 0);
49     }
50     return tag;
51 }
52 
53 /**
54  * allocation_tag_mem_probe:
55  * @env: the cpu environment
56  * @ptr_mmu_idx: the addressing regime to use for the virtual address
57  * @ptr: the virtual address for which to look up tag memory
58  * @ptr_access: the access to use for the virtual address
59  * @ptr_size: the number of bytes in the normal memory access
60  * @tag_access: the access to use for the tag memory
61  * @probe: true to merely probe, never taking an exception
62  * @ra: the return address for exception handling
63  *
64  * Our tag memory is formatted as a sequence of little-endian nibbles.
65  * That is, the byte at (addr >> (LOG2_TAG_GRANULE + 1)) contains two
66  * tags, with the tag at [3:0] for the lower addr and the tag at [7:4]
67  * for the higher addr.
68  *
69  * Here, resolve the physical address from the virtual address, and return
70  * a pointer to the corresponding tag byte.
71  *
72  * If there is no tag storage corresponding to @ptr, return NULL.
73  *
74  * If the page is inaccessible for @ptr_access, or has a watchpoint, there are
75  * three options:
76  * (1) probe = true, ra = 0 : pure probe -- we return NULL if the page is not
77  *     accessible, and do not take watchpoint traps. The calling code must
78  *     handle those cases in the right priority compared to MTE traps.
79  * (2) probe = false, ra = 0 : probe, no fault expected -- the caller guarantees
80  *     that the page is going to be accessible. We will take watchpoint traps.
81  * (3) probe = false, ra != 0 : non-probe -- we will take both memory access
82  *     traps and watchpoint traps.
83  * (probe = true, ra != 0 is invalid and will assert.)
84  */
85 static uint8_t *allocation_tag_mem_probe(CPUARMState *env, int ptr_mmu_idx,
86                                          uint64_t ptr, MMUAccessType ptr_access,
87                                          int ptr_size, MMUAccessType tag_access,
88                                          bool probe, uintptr_t ra)
89 {
90 #ifdef CONFIG_USER_ONLY
91     uint64_t clean_ptr = useronly_clean_ptr(ptr);
92     int flags = page_get_flags(clean_ptr);
93     uint8_t *tags;
94     uintptr_t index;
95 
96     assert(!(probe && ra));
97 
98     if (!(flags & (ptr_access == MMU_DATA_STORE ? PAGE_WRITE_ORG : PAGE_READ))) {
99         if (probe) {
100             return NULL;
101         }
102         cpu_loop_exit_sigsegv(env_cpu(env), ptr, ptr_access,
103                               !(flags & PAGE_VALID), ra);
104     }
105 
106     /* Require both MAP_ANON and PROT_MTE for the page. */
107     if (!(flags & PAGE_ANON) || !(flags & PAGE_MTE)) {
108         return NULL;
109     }
110 
111     tags = page_get_target_data(clean_ptr);
112 
113     index = extract32(ptr, LOG2_TAG_GRANULE + 1,
114                       TARGET_PAGE_BITS - LOG2_TAG_GRANULE - 1);
115     return tags + index;
116 #else
117     CPUTLBEntryFull *full;
118     MemTxAttrs attrs;
119     int in_page, flags;
120     hwaddr ptr_paddr, tag_paddr, xlat;
121     MemoryRegion *mr;
122     ARMASIdx tag_asi;
123     AddressSpace *tag_as;
124     void *host;
125 
126     /*
127      * Probe the first byte of the virtual address.  This raises an
128      * exception for inaccessible pages, and resolves the virtual address
129      * into the softmmu tlb.
130      *
131      * When RA == 0, this is either a pure probe or a no-fault-expected probe.
132      * Indicate to probe_access_flags no-fault, then either return NULL
133      * for the pure probe, or assert that we received a valid page for the
134      * no-fault-expected probe.
135      */
136     flags = probe_access_full(env, ptr, 0, ptr_access, ptr_mmu_idx,
137                               ra == 0, &host, &full, ra);
138     if (probe && (flags & TLB_INVALID_MASK)) {
139         return NULL;
140     }
141     assert(!(flags & TLB_INVALID_MASK));
142 
143     /* If the virtual page MemAttr != Tagged, access unchecked. */
144     if (full->extra.arm.pte_attrs != 0xf0) {
145         return NULL;
146     }
147 
148     /*
149      * If not backed by host ram, there is no tag storage: access unchecked.
150      * This is probably a guest os bug though, so log it.
151      */
152     if (unlikely(flags & TLB_MMIO)) {
153         qemu_log_mask(LOG_GUEST_ERROR,
154                       "Page @ 0x%" PRIx64 " indicates Tagged Normal memory "
155                       "but is not backed by host ram\n", ptr);
156         return NULL;
157     }
158 
159     /*
160      * Remember these values across the second lookup below,
161      * which may invalidate this pointer via tlb resize.
162      */
163     ptr_paddr = full->phys_addr | (ptr & ~TARGET_PAGE_MASK);
164     attrs = full->attrs;
165     full = NULL;
166 
167     /*
168      * The Normal memory access can extend to the next page.  E.g. a single
169      * 8-byte access to the last byte of a page will check only the last
170      * tag on the first page.
171      * Any page access exception has priority over tag check exception.
172      */
173     in_page = -(ptr | TARGET_PAGE_MASK);
174     if (unlikely(ptr_size > in_page)) {
175         flags |= probe_access_full(env, ptr + in_page, 0, ptr_access,
176                                    ptr_mmu_idx, ra == 0, &host, &full, ra);
177         assert(!(flags & TLB_INVALID_MASK));
178     }
179 
180     /* Any debug exception has priority over a tag check exception. */
181     if (!probe && unlikely(flags & TLB_WATCHPOINT)) {
182         int wp = ptr_access == MMU_DATA_LOAD ? BP_MEM_READ : BP_MEM_WRITE;
183         assert(ra != 0);
184         cpu_check_watchpoint(env_cpu(env), ptr, ptr_size, attrs, wp, ra);
185     }
186 
187     /* Convert to the physical address in tag space.  */
188     tag_paddr = ptr_paddr >> (LOG2_TAG_GRANULE + 1);
189 
190     /* Look up the address in tag space. */
191     tag_asi = attrs.secure ? ARMASIdx_TagS : ARMASIdx_TagNS;
192     tag_as = cpu_get_address_space(env_cpu(env), tag_asi);
193     mr = address_space_translate(tag_as, tag_paddr, &xlat, NULL,
194                                  tag_access == MMU_DATA_STORE, attrs);
195 
196     /*
197      * Note that @mr will never be NULL.  If there is nothing in the address
198      * space at @tag_paddr, the translation will return the unallocated memory
199      * region.  For our purposes, the result must be ram.
200      */
201     if (unlikely(!memory_region_is_ram(mr))) {
202         /* ??? Failure is a board configuration error. */
203         qemu_log_mask(LOG_UNIMP,
204                       "Tag Memory @ 0x%" HWADDR_PRIx " not found for "
205                       "Normal Memory @ 0x%" HWADDR_PRIx "\n",
206                       tag_paddr, ptr_paddr);
207         return NULL;
208     }
209 
210     /*
211      * Ensure the tag memory is dirty on write, for migration.
212      * Tag memory can never contain code or display memory (vga).
213      */
214     if (tag_access == MMU_DATA_STORE) {
215         ram_addr_t tag_ra = memory_region_get_ram_addr(mr) + xlat;
216         cpu_physical_memory_set_dirty_flag(tag_ra, DIRTY_MEMORY_MIGRATION);
217     }
218 
219     return memory_region_get_ram_ptr(mr) + xlat;
220 #endif
221 }
222 
223 static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
224                                    uint64_t ptr, MMUAccessType ptr_access,
225                                    int ptr_size, MMUAccessType tag_access,
226                                    uintptr_t ra)
227 {
228     return allocation_tag_mem_probe(env, ptr_mmu_idx, ptr, ptr_access,
229                                     ptr_size, tag_access, false, ra);
230 }
231 
232 uint64_t HELPER(irg)(CPUARMState *env, uint64_t rn, uint64_t rm)
233 {
234     uint16_t exclude = extract32(rm | env->cp15.gcr_el1, 0, 16);
235     int rrnd = extract32(env->cp15.gcr_el1, 16, 1);
236     int start = extract32(env->cp15.rgsr_el1, 0, 4);
237     int seed = extract32(env->cp15.rgsr_el1, 8, 16);
238     int offset, i, rtag;
239 
240     /*
241      * Our IMPDEF choice for GCR_EL1.RRND==1 is to continue to use the
242      * deterministic algorithm.  Except that with RRND==1 the kernel is
243      * not required to have set RGSR_EL1.SEED != 0, which is required for
244      * the deterministic algorithm to function.  So we force a non-zero
245      * SEED for that case.
246      */
247     if (unlikely(seed == 0) && rrnd) {
248         do {
249             Error *err = NULL;
250             uint16_t two;
251 
252             if (qemu_guest_getrandom(&two, sizeof(two), &err) < 0) {
253                 /*
254                  * Failed, for unknown reasons in the crypto subsystem.
255                  * Best we can do is log the reason and use a constant seed.
256                  */
257                 qemu_log_mask(LOG_UNIMP, "IRG: Crypto failure: %s\n",
258                               error_get_pretty(err));
259                 error_free(err);
260                 two = 1;
261             }
262             seed = two;
263         } while (seed == 0);
264     }
265 
266     /* RandomTag */
267     for (i = offset = 0; i < 4; ++i) {
268         /* NextRandomTagBit */
269         int top = (extract32(seed, 5, 1) ^ extract32(seed, 3, 1) ^
270                    extract32(seed, 2, 1) ^ extract32(seed, 0, 1));
271         seed = (top << 15) | (seed >> 1);
272         offset |= top << i;
273     }
274     rtag = choose_nonexcluded_tag(start, offset, exclude);
275     env->cp15.rgsr_el1 = rtag | (seed << 8);
276 
277     return address_with_allocation_tag(rn, rtag);
278 }
279 
280 uint64_t HELPER(addsubg)(CPUARMState *env, uint64_t ptr,
281                          int32_t offset, uint32_t tag_offset)
282 {
283     int start_tag = allocation_tag_from_addr(ptr);
284     uint16_t exclude = extract32(env->cp15.gcr_el1, 0, 16);
285     int rtag = choose_nonexcluded_tag(start_tag, tag_offset, exclude);
286 
287     return address_with_allocation_tag(ptr + offset, rtag);
288 }
289 
290 static int load_tag1(uint64_t ptr, uint8_t *mem)
291 {
292     int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
293     return extract32(*mem, ofs, 4);
294 }
295 
296 uint64_t HELPER(ldg)(CPUARMState *env, uint64_t ptr, uint64_t xt)
297 {
298     int mmu_idx = arm_env_mmu_index(env);
299     uint8_t *mem;
300     int rtag = 0;
301 
302     /* Trap if accessing an invalid page.  */
303     mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD, 1,
304                              MMU_DATA_LOAD, GETPC());
305 
306     /* Load if page supports tags. */
307     if (mem) {
308         rtag = load_tag1(ptr, mem);
309     }
310 
311     return address_with_allocation_tag(xt, rtag);
312 }
313 
314 static void check_tag_aligned(CPUARMState *env, uint64_t ptr, uintptr_t ra)
315 {
316     if (unlikely(!QEMU_IS_ALIGNED(ptr, TAG_GRANULE))) {
317         arm_cpu_do_unaligned_access(env_cpu(env), ptr, MMU_DATA_STORE,
318                                     arm_env_mmu_index(env), ra);
319         g_assert_not_reached();
320     }
321 }
322 
323 /* For use in a non-parallel context, store to the given nibble.  */
324 static void store_tag1(uint64_t ptr, uint8_t *mem, int tag)
325 {
326     int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
327     *mem = deposit32(*mem, ofs, 4, tag);
328 }
329 
330 /* For use in a parallel context, atomically store to the given nibble.  */
331 static void store_tag1_parallel(uint64_t ptr, uint8_t *mem, int tag)
332 {
333     int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
334     uint8_t old = qatomic_read(mem);
335 
336     while (1) {
337         uint8_t new = deposit32(old, ofs, 4, tag);
338         uint8_t cmp = qatomic_cmpxchg(mem, old, new);
339         if (likely(cmp == old)) {
340             return;
341         }
342         old = cmp;
343     }
344 }
345 
346 typedef void stg_store1(uint64_t, uint8_t *, int);
347 
348 static inline void do_stg(CPUARMState *env, uint64_t ptr, uint64_t xt,
349                           uintptr_t ra, stg_store1 store1)
350 {
351     int mmu_idx = arm_env_mmu_index(env);
352     uint8_t *mem;
353 
354     check_tag_aligned(env, ptr, ra);
355 
356     /* Trap if accessing an invalid page.  */
357     mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, TAG_GRANULE,
358                              MMU_DATA_STORE, ra);
359 
360     /* Store if page supports tags. */
361     if (mem) {
362         store1(ptr, mem, allocation_tag_from_addr(xt));
363     }
364 }
365 
366 void HELPER(stg)(CPUARMState *env, uint64_t ptr, uint64_t xt)
367 {
368     do_stg(env, ptr, xt, GETPC(), store_tag1);
369 }
370 
371 void HELPER(stg_parallel)(CPUARMState *env, uint64_t ptr, uint64_t xt)
372 {
373     do_stg(env, ptr, xt, GETPC(), store_tag1_parallel);
374 }
375 
376 void HELPER(stg_stub)(CPUARMState *env, uint64_t ptr)
377 {
378     int mmu_idx = arm_env_mmu_index(env);
379     uintptr_t ra = GETPC();
380 
381     check_tag_aligned(env, ptr, ra);
382     probe_write(env, ptr, TAG_GRANULE, mmu_idx, ra);
383 }
384 
385 static inline void do_st2g(CPUARMState *env, uint64_t ptr, uint64_t xt,
386                            uintptr_t ra, stg_store1 store1)
387 {
388     int mmu_idx = arm_env_mmu_index(env);
389     int tag = allocation_tag_from_addr(xt);
390     uint8_t *mem1, *mem2;
391 
392     check_tag_aligned(env, ptr, ra);
393 
394     /*
395      * Trap if accessing an invalid page(s).
396      * This takes priority over !allocation_tag_access_enabled.
397      */
398     if (ptr & TAG_GRANULE) {
399         /* Two stores unaligned mod TAG_GRANULE*2 -- modify two bytes. */
400         mem1 = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE,
401                                   TAG_GRANULE, MMU_DATA_STORE, ra);
402         mem2 = allocation_tag_mem(env, mmu_idx, ptr + TAG_GRANULE,
403                                   MMU_DATA_STORE, TAG_GRANULE,
404                                   MMU_DATA_STORE, ra);
405 
406         /* Store if page(s) support tags. */
407         if (mem1) {
408             store1(TAG_GRANULE, mem1, tag);
409         }
410         if (mem2) {
411             store1(0, mem2, tag);
412         }
413     } else {
414         /* Two stores aligned mod TAG_GRANULE*2 -- modify one byte. */
415         mem1 = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE,
416                                   2 * TAG_GRANULE, MMU_DATA_STORE, ra);
417         if (mem1) {
418             tag |= tag << 4;
419             qatomic_set(mem1, tag);
420         }
421     }
422 }
423 
424 void HELPER(st2g)(CPUARMState *env, uint64_t ptr, uint64_t xt)
425 {
426     do_st2g(env, ptr, xt, GETPC(), store_tag1);
427 }
428 
429 void HELPER(st2g_parallel)(CPUARMState *env, uint64_t ptr, uint64_t xt)
430 {
431     do_st2g(env, ptr, xt, GETPC(), store_tag1_parallel);
432 }
433 
434 void HELPER(st2g_stub)(CPUARMState *env, uint64_t ptr)
435 {
436     int mmu_idx = arm_env_mmu_index(env);
437     uintptr_t ra = GETPC();
438     int in_page = -(ptr | TARGET_PAGE_MASK);
439 
440     check_tag_aligned(env, ptr, ra);
441 
442     if (likely(in_page >= 2 * TAG_GRANULE)) {
443         probe_write(env, ptr, 2 * TAG_GRANULE, mmu_idx, ra);
444     } else {
445         probe_write(env, ptr, TAG_GRANULE, mmu_idx, ra);
446         probe_write(env, ptr + TAG_GRANULE, TAG_GRANULE, mmu_idx, ra);
447     }
448 }
449 
450 uint64_t HELPER(ldgm)(CPUARMState *env, uint64_t ptr)
451 {
452     int mmu_idx = arm_env_mmu_index(env);
453     uintptr_t ra = GETPC();
454     int gm_bs = env_archcpu(env)->gm_blocksize;
455     int gm_bs_bytes = 4 << gm_bs;
456     void *tag_mem;
457     uint64_t ret;
458     int shift;
459 
460     ptr = QEMU_ALIGN_DOWN(ptr, gm_bs_bytes);
461 
462     /* Trap if accessing an invalid page.  */
463     tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD,
464                                  gm_bs_bytes, MMU_DATA_LOAD, ra);
465 
466     /* The tag is squashed to zero if the page does not support tags.  */
467     if (!tag_mem) {
468         return 0;
469     }
470 
471     /*
472      * The ordering of elements within the word corresponds to
473      * a little-endian operation.  Computation of shift comes from
474      *
475      *     index = address<LOG2_TAG_GRANULE+3:LOG2_TAG_GRANULE>
476      *     data<index*4+3:index*4> = tag
477      *
478      * Because of the alignment of ptr above, BS=6 has shift=0.
479      * All memory operations are aligned.  Defer support for BS=2,
480      * requiring insertion or extraction of a nibble, until we
481      * support a cpu that requires it.
482      */
483     switch (gm_bs) {
484     case 3:
485         /* 32 bytes -> 2 tags -> 8 result bits */
486         ret = *(uint8_t *)tag_mem;
487         break;
488     case 4:
489         /* 64 bytes -> 4 tags -> 16 result bits */
490         ret = cpu_to_le16(*(uint16_t *)tag_mem);
491         break;
492     case 5:
493         /* 128 bytes -> 8 tags -> 32 result bits */
494         ret = cpu_to_le32(*(uint32_t *)tag_mem);
495         break;
496     case 6:
497         /* 256 bytes -> 16 tags -> 64 result bits */
498         return cpu_to_le64(*(uint64_t *)tag_mem);
499     default:
500         /*
501          * CPU configured with unsupported/invalid gm blocksize.
502          * This is detected early in arm_cpu_realizefn.
503          */
504         g_assert_not_reached();
505     }
506     shift = extract64(ptr, LOG2_TAG_GRANULE, 4) * 4;
507     return ret << shift;
508 }
509 
510 void HELPER(stgm)(CPUARMState *env, uint64_t ptr, uint64_t val)
511 {
512     int mmu_idx = arm_env_mmu_index(env);
513     uintptr_t ra = GETPC();
514     int gm_bs = env_archcpu(env)->gm_blocksize;
515     int gm_bs_bytes = 4 << gm_bs;
516     void *tag_mem;
517     int shift;
518 
519     ptr = QEMU_ALIGN_DOWN(ptr, gm_bs_bytes);
520 
521     /* Trap if accessing an invalid page.  */
522     tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE,
523                                  gm_bs_bytes, MMU_DATA_LOAD, ra);
524 
525     /*
526      * Tag store only happens if the page support tags,
527      * and if the OS has enabled access to the tags.
528      */
529     if (!tag_mem) {
530         return;
531     }
532 
533     /* See LDGM for comments on BS and on shift.  */
534     shift = extract64(ptr, LOG2_TAG_GRANULE, 4) * 4;
535     val >>= shift;
536     switch (gm_bs) {
537     case 3:
538         /* 32 bytes -> 2 tags -> 8 result bits */
539         *(uint8_t *)tag_mem = val;
540         break;
541     case 4:
542         /* 64 bytes -> 4 tags -> 16 result bits */
543         *(uint16_t *)tag_mem = cpu_to_le16(val);
544         break;
545     case 5:
546         /* 128 bytes -> 8 tags -> 32 result bits */
547         *(uint32_t *)tag_mem = cpu_to_le32(val);
548         break;
549     case 6:
550         /* 256 bytes -> 16 tags -> 64 result bits */
551         *(uint64_t *)tag_mem = cpu_to_le64(val);
552         break;
553     default:
554         /* cpu configured with unsupported gm blocksize. */
555         g_assert_not_reached();
556     }
557 }
558 
559 void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val)
560 {
561     uintptr_t ra = GETPC();
562     int mmu_idx = arm_env_mmu_index(env);
563     int log2_dcz_bytes, log2_tag_bytes;
564     intptr_t dcz_bytes, tag_bytes;
565     uint8_t *mem;
566 
567     /*
568      * In arm_cpu_realizefn, we assert that dcz > LOG2_TAG_GRANULE+1,
569      * i.e. 32 bytes, which is an unreasonably small dcz anyway,
570      * to make sure that we can access one complete tag byte here.
571      */
572     log2_dcz_bytes = env_archcpu(env)->dcz_blocksize + 2;
573     log2_tag_bytes = log2_dcz_bytes - (LOG2_TAG_GRANULE + 1);
574     dcz_bytes = (intptr_t)1 << log2_dcz_bytes;
575     tag_bytes = (intptr_t)1 << log2_tag_bytes;
576     ptr &= -dcz_bytes;
577 
578     mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, dcz_bytes,
579                              MMU_DATA_STORE, ra);
580     if (mem) {
581         int tag_pair = (val & 0xf) * 0x11;
582         memset(mem, tag_pair, tag_bytes);
583     }
584 }
585 
586 static void mte_sync_check_fail(CPUARMState *env, uint32_t desc,
587                                 uint64_t dirty_ptr, uintptr_t ra)
588 {
589     int is_write, syn;
590 
591     env->exception.vaddress = dirty_ptr;
592 
593     is_write = FIELD_EX32(desc, MTEDESC, WRITE);
594     syn = syn_data_abort_no_iss(arm_current_el(env) != 0, 0, 0, 0, 0, is_write,
595                                 0x11);
596     raise_exception_ra(env, EXCP_DATA_ABORT, syn, exception_target_el(env), ra);
597     g_assert_not_reached();
598 }
599 
600 static void mte_async_check_fail(CPUARMState *env, uint64_t dirty_ptr,
601                                  uintptr_t ra, ARMMMUIdx arm_mmu_idx, int el)
602 {
603     int select;
604 
605     if (regime_has_2_ranges(arm_mmu_idx)) {
606         select = extract64(dirty_ptr, 55, 1);
607     } else {
608         select = 0;
609     }
610     env->cp15.tfsr_el[el] |= 1 << select;
611 #ifdef CONFIG_USER_ONLY
612     /*
613      * Stand in for a timer irq, setting _TIF_MTE_ASYNC_FAULT,
614      * which then sends a SIGSEGV when the thread is next scheduled.
615      * This cpu will return to the main loop at the end of the TB,
616      * which is rather sooner than "normal".  But the alternative
617      * is waiting until the next syscall.
618      */
619     qemu_cpu_kick(env_cpu(env));
620 #endif
621 }
622 
623 /* Record a tag check failure.  */
624 void mte_check_fail(CPUARMState *env, uint32_t desc,
625                     uint64_t dirty_ptr, uintptr_t ra)
626 {
627     int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
628     ARMMMUIdx arm_mmu_idx = core_to_aa64_mmu_idx(mmu_idx);
629     int el, reg_el, tcf;
630     uint64_t sctlr;
631 
632     reg_el = regime_el(env, arm_mmu_idx);
633     sctlr = env->cp15.sctlr_el[reg_el];
634 
635     switch (arm_mmu_idx) {
636     case ARMMMUIdx_E10_0:
637     case ARMMMUIdx_E20_0:
638         el = 0;
639         tcf = extract64(sctlr, 38, 2);
640         break;
641     default:
642         el = reg_el;
643         tcf = extract64(sctlr, 40, 2);
644     }
645 
646     switch (tcf) {
647     case 1:
648         /* Tag check fail causes a synchronous exception. */
649         mte_sync_check_fail(env, desc, dirty_ptr, ra);
650         break;
651 
652     case 0:
653         /*
654          * Tag check fail does not affect the PE.
655          * We eliminate this case by not setting MTE_ACTIVE
656          * in tb_flags, so that we never make this runtime call.
657          */
658         g_assert_not_reached();
659 
660     case 2:
661         /* Tag check fail causes asynchronous flag set.  */
662         mte_async_check_fail(env, dirty_ptr, ra, arm_mmu_idx, el);
663         break;
664 
665     case 3:
666         /*
667          * Tag check fail causes asynchronous flag set for stores, or
668          * a synchronous exception for loads.
669          */
670         if (FIELD_EX32(desc, MTEDESC, WRITE)) {
671             mte_async_check_fail(env, dirty_ptr, ra, arm_mmu_idx, el);
672         } else {
673             mte_sync_check_fail(env, desc, dirty_ptr, ra);
674         }
675         break;
676     }
677 }
678 
679 /**
680  * checkN:
681  * @tag: tag memory to test
682  * @odd: true to begin testing at tags at odd nibble
683  * @cmp: the tag to compare against
684  * @count: number of tags to test
685  *
686  * Return the number of successful tests.
687  * Thus a return value < @count indicates a failure.
688  *
689  * A note about sizes: count is expected to be small.
690  *
691  * The most common use will be LDP/STP of two integer registers,
692  * which means 16 bytes of memory touching at most 2 tags, but
693  * often the access is aligned and thus just 1 tag.
694  *
695  * Using AdvSIMD LD/ST (multiple), one can access 64 bytes of memory,
696  * touching at most 5 tags.  SVE LDR/STR (vector) with the default
697  * vector length is also 64 bytes; the maximum architectural length
698  * is 256 bytes touching at most 9 tags.
699  *
700  * The loop below uses 7 logical operations and 1 memory operation
701  * per tag pair.  An implementation that loads an aligned word and
702  * uses masking to ignore adjacent tags requires 18 logical operations
703  * and thus does not begin to pay off until 6 tags.
704  * Which, according to the survey above, is unlikely to be common.
705  */
706 static int checkN(uint8_t *mem, int odd, int cmp, int count)
707 {
708     int n = 0, diff;
709 
710     /* Replicate the test tag and compare.  */
711     cmp *= 0x11;
712     diff = *mem++ ^ cmp;
713 
714     if (odd) {
715         goto start_odd;
716     }
717 
718     while (1) {
719         /* Test even tag. */
720         if (unlikely((diff) & 0x0f)) {
721             break;
722         }
723         if (++n == count) {
724             break;
725         }
726 
727     start_odd:
728         /* Test odd tag. */
729         if (unlikely((diff) & 0xf0)) {
730             break;
731         }
732         if (++n == count) {
733             break;
734         }
735 
736         diff = *mem++ ^ cmp;
737     }
738     return n;
739 }
740 
741 /**
742  * checkNrev:
743  * @tag: tag memory to test
744  * @odd: true to begin testing at tags at odd nibble
745  * @cmp: the tag to compare against
746  * @count: number of tags to test
747  *
748  * Return the number of successful tests.
749  * Thus a return value < @count indicates a failure.
750  *
751  * This is like checkN, but it runs backwards, checking the
752  * tags starting with @tag and then the tags preceding it.
753  * This is needed by the backwards-memory-copying operations.
754  */
755 static int checkNrev(uint8_t *mem, int odd, int cmp, int count)
756 {
757     int n = 0, diff;
758 
759     /* Replicate the test tag and compare.  */
760     cmp *= 0x11;
761     diff = *mem-- ^ cmp;
762 
763     if (!odd) {
764         goto start_even;
765     }
766 
767     while (1) {
768         /* Test odd tag. */
769         if (unlikely((diff) & 0xf0)) {
770             break;
771         }
772         if (++n == count) {
773             break;
774         }
775 
776     start_even:
777         /* Test even tag. */
778         if (unlikely((diff) & 0x0f)) {
779             break;
780         }
781         if (++n == count) {
782             break;
783         }
784 
785         diff = *mem-- ^ cmp;
786     }
787     return n;
788 }
789 
790 /**
791  * mte_probe_int() - helper for mte_probe and mte_check
792  * @env: CPU environment
793  * @desc: MTEDESC descriptor
794  * @ptr: virtual address of the base of the access
795  * @fault: return virtual address of the first check failure
796  *
797  * Internal routine for both mte_probe and mte_check.
798  * Return zero on failure, filling in *fault.
799  * Return negative on trivial success for tbi disabled.
800  * Return positive on success with tbi enabled.
801  */
802 static int mte_probe_int(CPUARMState *env, uint32_t desc, uint64_t ptr,
803                          uintptr_t ra, uint64_t *fault)
804 {
805     int mmu_idx, ptr_tag, bit55;
806     uint64_t ptr_last, prev_page, next_page;
807     uint64_t tag_first, tag_last;
808     uint32_t sizem1, tag_count, n, c;
809     uint8_t *mem1, *mem2;
810     MMUAccessType type;
811 
812     bit55 = extract64(ptr, 55, 1);
813     *fault = ptr;
814 
815     /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */
816     if (unlikely(!tbi_check(desc, bit55))) {
817         return -1;
818     }
819 
820     ptr_tag = allocation_tag_from_addr(ptr);
821 
822     if (tcma_check(desc, bit55, ptr_tag)) {
823         return 1;
824     }
825 
826     mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
827     type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD;
828     sizem1 = FIELD_EX32(desc, MTEDESC, SIZEM1);
829 
830     /* Find the addr of the end of the access */
831     ptr_last = ptr + sizem1;
832 
833     /* Round the bounds to the tag granule, and compute the number of tags. */
834     tag_first = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE);
835     tag_last = QEMU_ALIGN_DOWN(ptr_last, TAG_GRANULE);
836     tag_count = ((tag_last - tag_first) / TAG_GRANULE) + 1;
837 
838     /* Locate the page boundaries. */
839     prev_page = ptr & TARGET_PAGE_MASK;
840     next_page = prev_page + TARGET_PAGE_SIZE;
841 
842     if (likely(tag_last - prev_page < TARGET_PAGE_SIZE)) {
843         /* Memory access stays on one page. */
844         mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, sizem1 + 1,
845                                   MMU_DATA_LOAD, ra);
846         if (!mem1) {
847             return 1;
848         }
849         /* Perform all of the comparisons. */
850         n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, tag_count);
851     } else {
852         /* Memory access crosses to next page. */
853         mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, next_page - ptr,
854                                   MMU_DATA_LOAD, ra);
855 
856         mem2 = allocation_tag_mem(env, mmu_idx, next_page, type,
857                                   ptr_last - next_page + 1,
858                                   MMU_DATA_LOAD, ra);
859 
860         /*
861          * Perform all of the comparisons.
862          * Note the possible but unlikely case of the operation spanning
863          * two pages that do not both have tagging enabled.
864          */
865         n = c = (next_page - tag_first) / TAG_GRANULE;
866         if (mem1) {
867             n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, c);
868         }
869         if (n == c) {
870             if (!mem2) {
871                 return 1;
872             }
873             n += checkN(mem2, 0, ptr_tag, tag_count - c);
874         }
875     }
876 
877     if (likely(n == tag_count)) {
878         return 1;
879     }
880 
881     /*
882      * If we failed, we know which granule.  For the first granule, the
883      * failure address is @ptr, the first byte accessed.  Otherwise the
884      * failure address is the first byte of the nth granule.
885      */
886     if (n > 0) {
887         *fault = tag_first + n * TAG_GRANULE;
888     }
889     return 0;
890 }
891 
892 uint64_t mte_check(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra)
893 {
894     uint64_t fault;
895     int ret = mte_probe_int(env, desc, ptr, ra, &fault);
896 
897     if (unlikely(ret == 0)) {
898         mte_check_fail(env, desc, fault, ra);
899     } else if (ret < 0) {
900         return ptr;
901     }
902     return useronly_clean_ptr(ptr);
903 }
904 
905 uint64_t HELPER(mte_check)(CPUARMState *env, uint32_t desc, uint64_t ptr)
906 {
907     /*
908      * R_XCHFJ: Alignment check not caused by memory type is priority 1,
909      * higher than any translation fault.  When MTE is disabled, tcg
910      * performs the alignment check during the code generated for the
911      * memory access.  With MTE enabled, we must check this here before
912      * raising any translation fault in allocation_tag_mem.
913      */
914     unsigned align = FIELD_EX32(desc, MTEDESC, ALIGN);
915     if (unlikely(align)) {
916         align = (1u << align) - 1;
917         if (unlikely(ptr & align)) {
918             int idx = FIELD_EX32(desc, MTEDESC, MIDX);
919             bool w = FIELD_EX32(desc, MTEDESC, WRITE);
920             MMUAccessType type = w ? MMU_DATA_STORE : MMU_DATA_LOAD;
921             arm_cpu_do_unaligned_access(env_cpu(env), ptr, type, idx, GETPC());
922         }
923     }
924 
925     return mte_check(env, desc, ptr, GETPC());
926 }
927 
928 /*
929  * No-fault version of mte_check, to be used by SVE for MemSingleNF.
930  * Returns false if the access is Checked and the check failed.  This
931  * is only intended to probe the tag -- the validity of the page must
932  * be checked beforehand.
933  */
934 bool mte_probe(CPUARMState *env, uint32_t desc, uint64_t ptr)
935 {
936     uint64_t fault;
937     int ret = mte_probe_int(env, desc, ptr, 0, &fault);
938 
939     return ret != 0;
940 }
941 
942 /*
943  * Perform an MTE checked access for DC_ZVA.
944  */
945 uint64_t HELPER(mte_check_zva)(CPUARMState *env, uint32_t desc, uint64_t ptr)
946 {
947     uintptr_t ra = GETPC();
948     int log2_dcz_bytes, log2_tag_bytes;
949     int mmu_idx, bit55;
950     intptr_t dcz_bytes, tag_bytes, i;
951     void *mem;
952     uint64_t ptr_tag, mem_tag, align_ptr;
953 
954     bit55 = extract64(ptr, 55, 1);
955 
956     /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */
957     if (unlikely(!tbi_check(desc, bit55))) {
958         return ptr;
959     }
960 
961     ptr_tag = allocation_tag_from_addr(ptr);
962 
963     if (tcma_check(desc, bit55, ptr_tag)) {
964         goto done;
965     }
966 
967     /*
968      * In arm_cpu_realizefn, we asserted that dcz > LOG2_TAG_GRANULE+1,
969      * i.e. 32 bytes, which is an unreasonably small dcz anyway, to make
970      * sure that we can access one complete tag byte here.
971      */
972     log2_dcz_bytes = env_archcpu(env)->dcz_blocksize + 2;
973     log2_tag_bytes = log2_dcz_bytes - (LOG2_TAG_GRANULE + 1);
974     dcz_bytes = (intptr_t)1 << log2_dcz_bytes;
975     tag_bytes = (intptr_t)1 << log2_tag_bytes;
976     align_ptr = ptr & -dcz_bytes;
977 
978     /*
979      * Trap if accessing an invalid page.  DC_ZVA requires that we supply
980      * the original pointer for an invalid page.  But watchpoints require
981      * that we probe the actual space.  So do both.
982      */
983     mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
984     (void) probe_write(env, ptr, 1, mmu_idx, ra);
985     mem = allocation_tag_mem(env, mmu_idx, align_ptr, MMU_DATA_STORE,
986                              dcz_bytes, MMU_DATA_LOAD, ra);
987     if (!mem) {
988         goto done;
989     }
990 
991     /*
992      * Unlike the reasoning for checkN, DC_ZVA is always aligned, and thus
993      * it is quite easy to perform all of the comparisons at once without
994      * any extra masking.
995      *
996      * The most common zva block size is 64; some of the thunderx cpus use
997      * a block size of 128.  For user-only, aarch64_max_initfn will set the
998      * block size to 512.  Fill out the other cases for future-proofing.
999      *
1000      * In order to be able to find the first miscompare later, we want the
1001      * tag bytes to be in little-endian order.
1002      */
1003     switch (log2_tag_bytes) {
1004     case 0: /* zva_blocksize 32 */
1005         mem_tag = *(uint8_t *)mem;
1006         ptr_tag *= 0x11u;
1007         break;
1008     case 1: /* zva_blocksize 64 */
1009         mem_tag = cpu_to_le16(*(uint16_t *)mem);
1010         ptr_tag *= 0x1111u;
1011         break;
1012     case 2: /* zva_blocksize 128 */
1013         mem_tag = cpu_to_le32(*(uint32_t *)mem);
1014         ptr_tag *= 0x11111111u;
1015         break;
1016     case 3: /* zva_blocksize 256 */
1017         mem_tag = cpu_to_le64(*(uint64_t *)mem);
1018         ptr_tag *= 0x1111111111111111ull;
1019         break;
1020 
1021     default: /* zva_blocksize 512, 1024, 2048 */
1022         ptr_tag *= 0x1111111111111111ull;
1023         i = 0;
1024         do {
1025             mem_tag = cpu_to_le64(*(uint64_t *)(mem + i));
1026             if (unlikely(mem_tag != ptr_tag)) {
1027                 goto fail;
1028             }
1029             i += 8;
1030             align_ptr += 16 * TAG_GRANULE;
1031         } while (i < tag_bytes);
1032         goto done;
1033     }
1034 
1035     if (likely(mem_tag == ptr_tag)) {
1036         goto done;
1037     }
1038 
1039  fail:
1040     /* Locate the first nibble that differs. */
1041     i = ctz64(mem_tag ^ ptr_tag) >> 4;
1042     mte_check_fail(env, desc, align_ptr + i * TAG_GRANULE, ra);
1043 
1044  done:
1045     return useronly_clean_ptr(ptr);
1046 }
1047 
1048 uint64_t mte_mops_probe(CPUARMState *env, uint64_t ptr, uint64_t size,
1049                         uint32_t desc)
1050 {
1051     int mmu_idx, tag_count;
1052     uint64_t ptr_tag, tag_first, tag_last;
1053     void *mem;
1054     bool w = FIELD_EX32(desc, MTEDESC, WRITE);
1055     uint32_t n;
1056 
1057     mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
1058     /* True probe; this will never fault */
1059     mem = allocation_tag_mem_probe(env, mmu_idx, ptr,
1060                                    w ? MMU_DATA_STORE : MMU_DATA_LOAD,
1061                                    size, MMU_DATA_LOAD, true, 0);
1062     if (!mem) {
1063         return size;
1064     }
1065 
1066     /*
1067      * TODO: checkN() is not designed for checks of the size we expect
1068      * for FEAT_MOPS operations, so we should implement this differently.
1069      * Maybe we should do something like
1070      *   if (region start and size are aligned nicely) {
1071      *      do direct loads of 64 tag bits at a time;
1072      *   } else {
1073      *      call checkN()
1074      *   }
1075      */
1076     /* Round the bounds to the tag granule, and compute the number of tags. */
1077     ptr_tag = allocation_tag_from_addr(ptr);
1078     tag_first = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE);
1079     tag_last = QEMU_ALIGN_DOWN(ptr + size - 1, TAG_GRANULE);
1080     tag_count = ((tag_last - tag_first) / TAG_GRANULE) + 1;
1081     n = checkN(mem, ptr & TAG_GRANULE, ptr_tag, tag_count);
1082     if (likely(n == tag_count)) {
1083         return size;
1084     }
1085 
1086     /*
1087      * Failure; for the first granule, it's at @ptr. Otherwise
1088      * it's at the first byte of the nth granule. Calculate how
1089      * many bytes we can access without hitting that failure.
1090      */
1091     if (n == 0) {
1092         return 0;
1093     } else {
1094         return n * TAG_GRANULE - (ptr - tag_first);
1095     }
1096 }
1097 
1098 uint64_t mte_mops_probe_rev(CPUARMState *env, uint64_t ptr, uint64_t size,
1099                             uint32_t desc)
1100 {
1101     int mmu_idx, tag_count;
1102     uint64_t ptr_tag, tag_first, tag_last;
1103     void *mem;
1104     bool w = FIELD_EX32(desc, MTEDESC, WRITE);
1105     uint32_t n;
1106 
1107     mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
1108     /*
1109      * True probe; this will never fault. Note that our caller passes
1110      * us a pointer to the end of the region, but allocation_tag_mem_probe()
1111      * wants a pointer to the start. Because we know we don't span a page
1112      * boundary and that allocation_tag_mem_probe() doesn't otherwise care
1113      * about the size, pass in a size of 1 byte. This is simpler than
1114      * adjusting the ptr to point to the start of the region and then having
1115      * to adjust the returned 'mem' to get the end of the tag memory.
1116      */
1117     mem = allocation_tag_mem_probe(env, mmu_idx, ptr,
1118                                    w ? MMU_DATA_STORE : MMU_DATA_LOAD,
1119                                    1, MMU_DATA_LOAD, true, 0);
1120     if (!mem) {
1121         return size;
1122     }
1123 
1124     /*
1125      * TODO: checkNrev() is not designed for checks of the size we expect
1126      * for FEAT_MOPS operations, so we should implement this differently.
1127      * Maybe we should do something like
1128      *   if (region start and size are aligned nicely) {
1129      *      do direct loads of 64 tag bits at a time;
1130      *   } else {
1131      *      call checkN()
1132      *   }
1133      */
1134     /* Round the bounds to the tag granule, and compute the number of tags. */
1135     ptr_tag = allocation_tag_from_addr(ptr);
1136     tag_first = QEMU_ALIGN_DOWN(ptr - (size - 1), TAG_GRANULE);
1137     tag_last = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE);
1138     tag_count = ((tag_last - tag_first) / TAG_GRANULE) + 1;
1139     n = checkNrev(mem, ptr & TAG_GRANULE, ptr_tag, tag_count);
1140     if (likely(n == tag_count)) {
1141         return size;
1142     }
1143 
1144     /*
1145      * Failure; for the first granule, it's at @ptr. Otherwise
1146      * it's at the last byte of the nth granule. Calculate how
1147      * many bytes we can access without hitting that failure.
1148      */
1149     if (n == 0) {
1150         return 0;
1151     } else {
1152         return (n - 1) * TAG_GRANULE + ((ptr + 1) - tag_last);
1153     }
1154 }
1155 
1156 void mte_mops_set_tags(CPUARMState *env, uint64_t ptr, uint64_t size,
1157                        uint32_t desc)
1158 {
1159     int mmu_idx, tag_count;
1160     uint64_t ptr_tag;
1161     void *mem;
1162 
1163     if (!desc) {
1164         /* Tags not actually enabled */
1165         return;
1166     }
1167 
1168     mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
1169     /* True probe: this will never fault */
1170     mem = allocation_tag_mem_probe(env, mmu_idx, ptr, MMU_DATA_STORE, size,
1171                                    MMU_DATA_STORE, true, 0);
1172     if (!mem) {
1173         return;
1174     }
1175 
1176     /*
1177      * We know that ptr and size are both TAG_GRANULE aligned; store
1178      * the tag from the pointer value into the tag memory.
1179      */
1180     ptr_tag = allocation_tag_from_addr(ptr);
1181     tag_count = size / TAG_GRANULE;
1182     if (ptr & TAG_GRANULE) {
1183         /* Not 2*TAG_GRANULE-aligned: store tag to first nibble */
1184         store_tag1_parallel(TAG_GRANULE, mem, ptr_tag);
1185         mem++;
1186         tag_count--;
1187     }
1188     memset(mem, ptr_tag | (ptr_tag << 4), tag_count / 2);
1189     if (tag_count & 1) {
1190         /* Final trailing unaligned nibble */
1191         mem += tag_count / 2;
1192         store_tag1_parallel(0, mem, ptr_tag);
1193     }
1194 }
1195