xref: /qemu/target/i386/tcg/system/excp_helper.c (revision b103cc6e74ac92f070a0e004bd84334e845c20b5)
1 /*
2  *  x86 exception helpers - system code
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/cpu_ldst.h"
23 #include "exec/cputlb.h"
24 #include "exec/page-protection.h"
25 #include "exec/tlb-flags.h"
26 #include "tcg/helper-tcg.h"
27 
28 typedef struct TranslateParams {
29     target_ulong addr;
30     target_ulong cr3;
31     int pg_mode;
32     int mmu_idx;
33     int ptw_idx;
34     MMUAccessType access_type;
35 } TranslateParams;
36 
37 typedef struct TranslateResult {
38     hwaddr paddr;
39     int prot;
40     int page_size;
41 } TranslateResult;
42 
43 typedef enum TranslateFaultStage2 {
44     S2_NONE,
45     S2_GPA,
46     S2_GPT,
47 } TranslateFaultStage2;
48 
49 typedef struct TranslateFault {
50     int exception_index;
51     int error_code;
52     target_ulong cr2;
53     TranslateFaultStage2 stage2;
54 } TranslateFault;
55 
56 typedef struct PTETranslate {
57     CPUX86State *env;
58     TranslateFault *err;
59     int ptw_idx;
60     void *haddr;
61     hwaddr gaddr;
62 } PTETranslate;
63 
64 static bool ptw_translate(PTETranslate *inout, hwaddr addr)
65 {
66     int flags;
67 
68     inout->gaddr = addr;
69     flags = probe_access_full_mmu(inout->env, addr, 0, MMU_DATA_STORE,
70                                   inout->ptw_idx, &inout->haddr, NULL);
71 
72     if (unlikely(flags & TLB_INVALID_MASK)) {
73         TranslateFault *err = inout->err;
74 
75         assert(inout->ptw_idx == MMU_NESTED_IDX);
76         *err = (TranslateFault){
77             .error_code = inout->env->error_code,
78             .cr2 = addr,
79             .stage2 = S2_GPT,
80         };
81         return false;
82     }
83     return true;
84 }
85 
86 static inline uint32_t ptw_ldl(const PTETranslate *in, uint64_t ra)
87 {
88     if (likely(in->haddr)) {
89         return ldl_p(in->haddr);
90     }
91     return cpu_ldl_mmuidx_ra(in->env, in->gaddr, in->ptw_idx, ra);
92 }
93 
94 static inline uint64_t ptw_ldq(const PTETranslate *in, uint64_t ra)
95 {
96     if (likely(in->haddr)) {
97         return ldq_p(in->haddr);
98     }
99     return cpu_ldq_mmuidx_ra(in->env, in->gaddr, in->ptw_idx, ra);
100 }
101 
102 /*
103  * Note that we can use a 32-bit cmpxchg for all page table entries,
104  * even 64-bit ones, because PG_PRESENT_MASK, PG_ACCESSED_MASK and
105  * PG_DIRTY_MASK are all in the low 32 bits.
106  */
107 static bool ptw_setl_slow(const PTETranslate *in, uint32_t old, uint32_t new)
108 {
109     uint32_t cmp;
110 
111     CPUState *cpu = env_cpu(in->env);
112     /* We are in cpu_exec, and start_exclusive can't be called directly.*/
113     g_assert(cpu->running);
114     cpu_exec_end(cpu);
115     /* Does x86 really perform a rmw cycle on mmio for ptw? */
116     start_exclusive();
117     cmp = cpu_ldl_mmuidx_ra(in->env, in->gaddr, in->ptw_idx, 0);
118     if (cmp == old) {
119         cpu_stl_mmuidx_ra(in->env, in->gaddr, new, in->ptw_idx, 0);
120     }
121     end_exclusive();
122     cpu_exec_start(cpu);
123     return cmp == old;
124 }
125 
126 static inline bool ptw_setl(const PTETranslate *in, uint32_t old, uint32_t set)
127 {
128     if (set & ~old) {
129         uint32_t new = old | set;
130         if (likely(in->haddr)) {
131             old = cpu_to_le32(old);
132             new = cpu_to_le32(new);
133             return qatomic_cmpxchg((uint32_t *)in->haddr, old, new) == old;
134         }
135         return ptw_setl_slow(in, old, new);
136     }
137     return true;
138 }
139 
140 static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
141                           TranslateResult *out, TranslateFault *err,
142                           uint64_t ra)
143 {
144     const target_ulong addr = in->addr;
145     const int pg_mode = in->pg_mode;
146     const bool is_user = is_mmu_index_user(in->mmu_idx);
147     const MMUAccessType access_type = in->access_type;
148     uint64_t ptep, pte, rsvd_mask;
149     PTETranslate pte_trans = {
150         .env = env,
151         .err = err,
152         .ptw_idx = in->ptw_idx,
153     };
154     hwaddr pte_addr, paddr;
155     uint32_t pkr;
156     int page_size;
157     int error_code;
158     int prot;
159 
160  restart_all:
161     rsvd_mask = ~MAKE_64BIT_MASK(0, env_archcpu(env)->phys_bits);
162     rsvd_mask &= PG_ADDRESS_MASK;
163     if (!(pg_mode & PG_MODE_NXE)) {
164         rsvd_mask |= PG_NX_MASK;
165     }
166 
167     if (pg_mode & PG_MODE_PAE) {
168 #ifdef TARGET_X86_64
169         if (pg_mode & PG_MODE_LMA) {
170             if (pg_mode & PG_MODE_LA57) {
171                 /*
172                  * Page table level 5
173                  */
174                 pte_addr = (in->cr3 & ~0xfff) + (((addr >> 48) & 0x1ff) << 3);
175                 if (!ptw_translate(&pte_trans, pte_addr)) {
176                     return false;
177                 }
178             restart_5:
179                 pte = ptw_ldq(&pte_trans, ra);
180                 if (!(pte & PG_PRESENT_MASK)) {
181                     goto do_fault;
182                 }
183                 if (pte & (rsvd_mask | PG_PSE_MASK)) {
184                     goto do_fault_rsvd;
185                 }
186                 if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
187                     goto restart_5;
188                 }
189                 ptep = pte ^ PG_NX_MASK;
190             } else {
191                 pte = in->cr3;
192                 ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
193             }
194 
195             /*
196              * Page table level 4
197              */
198             pte_addr = (pte & PG_ADDRESS_MASK) + (((addr >> 39) & 0x1ff) << 3);
199             if (!ptw_translate(&pte_trans, pte_addr)) {
200                 return false;
201             }
202         restart_4:
203             pte = ptw_ldq(&pte_trans, ra);
204             if (!(pte & PG_PRESENT_MASK)) {
205                 goto do_fault;
206             }
207             if (pte & (rsvd_mask | PG_PSE_MASK)) {
208                 goto do_fault_rsvd;
209             }
210             if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
211                 goto restart_4;
212             }
213             ptep &= pte ^ PG_NX_MASK;
214 
215             /*
216              * Page table level 3
217              */
218             pte_addr = (pte & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3);
219             if (!ptw_translate(&pte_trans, pte_addr)) {
220                 return false;
221             }
222         restart_3_lma:
223             pte = ptw_ldq(&pte_trans, ra);
224             if (!(pte & PG_PRESENT_MASK)) {
225                 goto do_fault;
226             }
227             if (pte & rsvd_mask) {
228                 goto do_fault_rsvd;
229             }
230             if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
231                 goto restart_3_lma;
232             }
233             ptep &= pte ^ PG_NX_MASK;
234             if (pte & PG_PSE_MASK) {
235                 /* 1 GB page */
236                 page_size = 1024 * 1024 * 1024;
237                 goto do_check_protect;
238             }
239         } else
240 #endif
241         {
242             /*
243              * Page table level 3
244              */
245             pte_addr = (in->cr3 & 0xffffffe0ULL) + ((addr >> 27) & 0x18);
246             if (!ptw_translate(&pte_trans, pte_addr)) {
247                 return false;
248             }
249             rsvd_mask |= PG_HI_USER_MASK;
250         restart_3_nolma:
251             pte = ptw_ldq(&pte_trans, ra);
252             if (!(pte & PG_PRESENT_MASK)) {
253                 goto do_fault;
254             }
255             if (pte & (rsvd_mask | PG_NX_MASK)) {
256                 goto do_fault_rsvd;
257             }
258             if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
259                 goto restart_3_nolma;
260             }
261             ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
262         }
263 
264         /*
265          * Page table level 2
266          */
267         pte_addr = (pte & PG_ADDRESS_MASK) + (((addr >> 21) & 0x1ff) << 3);
268         if (!ptw_translate(&pte_trans, pte_addr)) {
269             return false;
270         }
271     restart_2_pae:
272         pte = ptw_ldq(&pte_trans, ra);
273         if (!(pte & PG_PRESENT_MASK)) {
274             goto do_fault;
275         }
276         if (pte & rsvd_mask) {
277             goto do_fault_rsvd;
278         }
279         if (pte & PG_PSE_MASK) {
280             /* 2 MB page */
281             page_size = 2048 * 1024;
282             ptep &= pte ^ PG_NX_MASK;
283             goto do_check_protect;
284         }
285         if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
286             goto restart_2_pae;
287         }
288         ptep &= pte ^ PG_NX_MASK;
289 
290         /*
291          * Page table level 1
292          */
293         pte_addr = (pte & PG_ADDRESS_MASK) + (((addr >> 12) & 0x1ff) << 3);
294         if (!ptw_translate(&pte_trans, pte_addr)) {
295             return false;
296         }
297         pte = ptw_ldq(&pte_trans, ra);
298         if (!(pte & PG_PRESENT_MASK)) {
299             goto do_fault;
300         }
301         if (pte & rsvd_mask) {
302             goto do_fault_rsvd;
303         }
304         /* combine pde and pte nx, user and rw protections */
305         ptep &= pte ^ PG_NX_MASK;
306         page_size = 4096;
307     } else if (pg_mode & PG_MODE_PG) {
308         /*
309          * Page table level 2
310          */
311         pte_addr = (in->cr3 & 0xfffff000ULL) + ((addr >> 20) & 0xffc);
312         if (!ptw_translate(&pte_trans, pte_addr)) {
313             return false;
314         }
315     restart_2_nopae:
316         pte = ptw_ldl(&pte_trans, ra);
317         if (!(pte & PG_PRESENT_MASK)) {
318             goto do_fault;
319         }
320         ptep = pte | PG_NX_MASK;
321 
322         /* if PSE bit is set, then we use a 4MB page */
323         if ((pte & PG_PSE_MASK) && (pg_mode & PG_MODE_PSE)) {
324             page_size = 4096 * 1024;
325             /*
326              * Bits 20-13 provide bits 39-32 of the address, bit 21 is reserved.
327              * Leave bits 20-13 in place for setting accessed/dirty bits below.
328              */
329             pte = (uint32_t)pte | ((pte & 0x1fe000LL) << (32 - 13));
330             rsvd_mask = 0x200000;
331             goto do_check_protect_pse36;
332         }
333         if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
334             goto restart_2_nopae;
335         }
336 
337         /*
338          * Page table level 1
339          */
340         pte_addr = (pte & ~0xfffu) + ((addr >> 10) & 0xffc);
341         if (!ptw_translate(&pte_trans, pte_addr)) {
342             return false;
343         }
344         pte = ptw_ldl(&pte_trans, ra);
345         if (!(pte & PG_PRESENT_MASK)) {
346             goto do_fault;
347         }
348         /* combine pde and pte user and rw protections */
349         ptep &= pte | PG_NX_MASK;
350         page_size = 4096;
351         rsvd_mask = 0;
352     } else {
353         /*
354          * No paging (real mode), let's tentatively resolve the address as 1:1
355          * here, but conditionally still perform an NPT walk on it later.
356          */
357         page_size = 0x40000000;
358         paddr = in->addr;
359         prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
360         goto stage2;
361     }
362 
363 do_check_protect:
364     rsvd_mask |= (page_size - 1) & PG_ADDRESS_MASK & ~PG_PSE_PAT_MASK;
365 do_check_protect_pse36:
366     if (pte & rsvd_mask) {
367         goto do_fault_rsvd;
368     }
369     ptep ^= PG_NX_MASK;
370 
371     /* can the page can be put in the TLB?  prot will tell us */
372     if (is_user && !(ptep & PG_USER_MASK)) {
373         goto do_fault_protect;
374     }
375 
376     prot = 0;
377     if (!is_mmu_index_smap(in->mmu_idx) || !(ptep & PG_USER_MASK)) {
378         prot |= PAGE_READ;
379         if ((ptep & PG_RW_MASK) || !(is_user || (pg_mode & PG_MODE_WP))) {
380             prot |= PAGE_WRITE;
381         }
382     }
383     if (!(ptep & PG_NX_MASK) &&
384         (is_user ||
385          !((pg_mode & PG_MODE_SMEP) && (ptep & PG_USER_MASK)))) {
386         prot |= PAGE_EXEC;
387     }
388 
389     if (ptep & PG_USER_MASK) {
390         pkr = pg_mode & PG_MODE_PKE ? env->pkru : 0;
391     } else {
392         pkr = pg_mode & PG_MODE_PKS ? env->pkrs : 0;
393     }
394     if (pkr) {
395         uint32_t pk = (pte & PG_PKRU_MASK) >> PG_PKRU_BIT;
396         uint32_t pkr_ad = (pkr >> pk * 2) & 1;
397         uint32_t pkr_wd = (pkr >> pk * 2) & 2;
398         uint32_t pkr_prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
399 
400         if (pkr_ad) {
401             pkr_prot &= ~(PAGE_READ | PAGE_WRITE);
402         } else if (pkr_wd && (is_user || (pg_mode & PG_MODE_WP))) {
403             pkr_prot &= ~PAGE_WRITE;
404         }
405         if ((pkr_prot & (1 << access_type)) == 0) {
406             goto do_fault_pk_protect;
407         }
408         prot &= pkr_prot;
409     }
410 
411     if ((prot & (1 << access_type)) == 0) {
412         goto do_fault_protect;
413     }
414 
415     /* yes, it can! */
416     {
417         uint32_t set = PG_ACCESSED_MASK;
418         if (access_type == MMU_DATA_STORE) {
419             set |= PG_DIRTY_MASK;
420         } else if (!(pte & PG_DIRTY_MASK)) {
421             /*
422              * Only set write access if already dirty...
423              * otherwise wait for dirty access.
424              */
425             prot &= ~PAGE_WRITE;
426         }
427         if (!ptw_setl(&pte_trans, pte, set)) {
428             /*
429              * We can arrive here from any of 3 levels and 2 formats.
430              * The only safe thing is to restart the entire lookup.
431              */
432             goto restart_all;
433         }
434     }
435 
436     /* merge offset within page */
437     paddr = (pte & PG_ADDRESS_MASK & ~(page_size - 1)) | (addr & (page_size - 1));
438  stage2:
439 
440     /*
441      * Note that NPT is walked (for both paging structures and final guest
442      * addresses) using the address with the A20 bit set.
443      */
444     if (in->ptw_idx == MMU_NESTED_IDX) {
445         CPUTLBEntryFull *full;
446         int flags, nested_page_size;
447 
448         flags = probe_access_full_mmu(env, paddr, 0, access_type,
449                                       MMU_NESTED_IDX, &pte_trans.haddr, &full);
450         if (unlikely(flags & TLB_INVALID_MASK)) {
451             *err = (TranslateFault){
452                 .error_code = env->error_code,
453                 .cr2 = paddr,
454                 .stage2 = S2_GPA,
455             };
456             return false;
457         }
458 
459         /* Merge stage1 & stage2 protection bits. */
460         prot &= full->prot;
461 
462         /* Re-verify resulting protection. */
463         if ((prot & (1 << access_type)) == 0) {
464             goto do_fault_protect;
465         }
466 
467         /* Merge stage1 & stage2 addresses to final physical address. */
468         nested_page_size = 1 << full->lg_page_size;
469         paddr = (full->phys_addr & ~(nested_page_size - 1))
470               | (paddr & (nested_page_size - 1));
471 
472         /*
473          * Use the larger of stage1 & stage2 page sizes, so that
474          * invalidation works.
475          */
476         if (nested_page_size > page_size) {
477             page_size = nested_page_size;
478         }
479     }
480 
481     out->paddr = paddr & x86_get_a20_mask(env);
482     out->prot = prot;
483     out->page_size = page_size;
484     return true;
485 
486  do_fault_rsvd:
487     error_code = PG_ERROR_RSVD_MASK;
488     goto do_fault_cont;
489  do_fault_protect:
490     error_code = PG_ERROR_P_MASK;
491     goto do_fault_cont;
492  do_fault_pk_protect:
493     assert(access_type != MMU_INST_FETCH);
494     error_code = PG_ERROR_PK_MASK | PG_ERROR_P_MASK;
495     goto do_fault_cont;
496  do_fault:
497     error_code = 0;
498  do_fault_cont:
499     if (is_user) {
500         error_code |= PG_ERROR_U_MASK;
501     }
502     switch (access_type) {
503     case MMU_DATA_LOAD:
504         break;
505     case MMU_DATA_STORE:
506         error_code |= PG_ERROR_W_MASK;
507         break;
508     case MMU_INST_FETCH:
509         if (pg_mode & (PG_MODE_NXE | PG_MODE_SMEP)) {
510             error_code |= PG_ERROR_I_D_MASK;
511         }
512         break;
513     }
514     *err = (TranslateFault){
515         .exception_index = EXCP0E_PAGE,
516         .error_code = error_code,
517         .cr2 = addr,
518     };
519     return false;
520 }
521 
522 static G_NORETURN void raise_stage2(CPUX86State *env, TranslateFault *err,
523                                     uintptr_t retaddr)
524 {
525     uint64_t exit_info_1 = err->error_code;
526 
527     switch (err->stage2) {
528     case S2_GPT:
529         exit_info_1 |= SVM_NPTEXIT_GPT;
530         break;
531     case S2_GPA:
532         exit_info_1 |= SVM_NPTEXIT_GPA;
533         break;
534     default:
535         g_assert_not_reached();
536     }
537 
538     x86_stq_phys(env_cpu(env),
539                  env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
540                  err->cr2);
541     cpu_vmexit(env, SVM_EXIT_NPF, exit_info_1, retaddr);
542 }
543 
544 static bool get_physical_address(CPUX86State *env, vaddr addr,
545                                  MMUAccessType access_type, int mmu_idx,
546                                  TranslateResult *out, TranslateFault *err,
547                                  uint64_t ra)
548 {
549     TranslateParams in;
550     bool use_stage2 = env->hflags2 & HF2_NPT_MASK;
551 
552     in.addr = addr;
553     in.access_type = access_type;
554 
555     switch (mmu_idx) {
556     case MMU_PHYS_IDX:
557         break;
558 
559     case MMU_NESTED_IDX:
560         if (likely(use_stage2)) {
561             in.cr3 = env->nested_cr3;
562             in.pg_mode = env->nested_pg_mode;
563             in.mmu_idx =
564                 env->nested_pg_mode & PG_MODE_LMA ? MMU_USER64_IDX : MMU_USER32_IDX;
565             in.ptw_idx = MMU_PHYS_IDX;
566 
567             if (!mmu_translate(env, &in, out, err, ra)) {
568                 err->stage2 = S2_GPA;
569                 return false;
570             }
571             return true;
572         }
573         break;
574 
575     default:
576         if (is_mmu_index_32(mmu_idx)) {
577             addr = (uint32_t)addr;
578         }
579 
580         if (likely(env->cr[0] & CR0_PG_MASK || use_stage2)) {
581             in.cr3 = env->cr[3];
582             in.mmu_idx = mmu_idx;
583             in.ptw_idx = use_stage2 ? MMU_NESTED_IDX : MMU_PHYS_IDX;
584             in.pg_mode = get_pg_mode(env);
585 
586             if (in.pg_mode & PG_MODE_LMA) {
587                 /* test virtual address sign extension */
588                 int shift = in.pg_mode & PG_MODE_LA57 ? 56 : 47;
589                 int64_t sext = (int64_t)addr >> shift;
590                 if (sext != 0 && sext != -1) {
591                     *err = (TranslateFault){
592                         .exception_index = EXCP0D_GPF,
593                         .cr2 = addr,
594                     };
595                     return false;
596                 }
597             }
598             return mmu_translate(env, &in, out, err, ra);
599         }
600         break;
601     }
602 
603     /* No translation needed. */
604     out->paddr = addr & x86_get_a20_mask(env);
605     out->prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
606     out->page_size = TARGET_PAGE_SIZE;
607     return true;
608 }
609 
610 bool x86_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
611                       MMUAccessType access_type, int mmu_idx,
612                       bool probe, uintptr_t retaddr)
613 {
614     CPUX86State *env = cpu_env(cs);
615     TranslateResult out;
616     TranslateFault err;
617 
618     if (get_physical_address(env, addr, access_type, mmu_idx, &out, &err,
619                              retaddr)) {
620         /*
621          * Even if 4MB pages, we map only one 4KB page in the cache to
622          * avoid filling it too fast.
623          */
624         assert(out.prot & (1 << access_type));
625         tlb_set_page_with_attrs(cs, addr & TARGET_PAGE_MASK,
626                                 out.paddr & TARGET_PAGE_MASK,
627                                 cpu_get_mem_attrs(env),
628                                 out.prot, mmu_idx, out.page_size);
629         return true;
630     }
631 
632     if (probe) {
633         /* This will be used if recursing for stage2 translation. */
634         env->error_code = err.error_code;
635         return false;
636     }
637 
638     if (err.stage2 != S2_NONE) {
639         raise_stage2(env, &err, retaddr);
640     }
641 
642     if (env->intercept_exceptions & (1 << err.exception_index)) {
643         /* cr2 is not modified in case of exceptions */
644         x86_stq_phys(cs, env->vm_vmcb +
645                      offsetof(struct vmcb, control.exit_info_2),
646                      err.cr2);
647     } else {
648         env->cr[2] = err.cr2;
649     }
650     raise_exception_err_ra(env, err.exception_index, err.error_code, retaddr);
651 }
652 
653 G_NORETURN void x86_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
654                                             MMUAccessType access_type,
655                                             int mmu_idx, uintptr_t retaddr)
656 {
657     X86CPU *cpu = X86_CPU(cs);
658     handle_unaligned_access(&cpu->env, vaddr, access_type, retaddr);
659 }
660