xref: /qemu/target/i386/tcg/system/excp_helper.c (revision 22a7c2f239229b2ee9fcbac03cb598d9aebb9196)
1 /*
2  *  x86 exception helpers - system code
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/cpu_ldst.h"
23 #include "exec/cputlb.h"
24 #include "exec/page-protection.h"
25 #include "exec/tlb-flags.h"
26 #include "exec/tswap.h"
27 #include "tcg/helper-tcg.h"
28 
29 typedef struct TranslateParams {
30     target_ulong addr;
31     target_ulong cr3;
32     int pg_mode;
33     int mmu_idx;
34     int ptw_idx;
35     MMUAccessType access_type;
36 } TranslateParams;
37 
38 typedef struct TranslateResult {
39     hwaddr paddr;
40     int prot;
41     int page_size;
42 } TranslateResult;
43 
44 typedef enum TranslateFaultStage2 {
45     S2_NONE,
46     S2_GPA,
47     S2_GPT,
48 } TranslateFaultStage2;
49 
50 typedef struct TranslateFault {
51     int exception_index;
52     int error_code;
53     target_ulong cr2;
54     TranslateFaultStage2 stage2;
55 } TranslateFault;
56 
57 typedef struct PTETranslate {
58     CPUX86State *env;
59     TranslateFault *err;
60     int ptw_idx;
61     void *haddr;
62     hwaddr gaddr;
63 } PTETranslate;
64 
65 static bool ptw_translate(PTETranslate *inout, hwaddr addr)
66 {
67     int flags;
68 
69     inout->gaddr = addr;
70     flags = probe_access_full_mmu(inout->env, addr, 0, MMU_DATA_STORE,
71                                   inout->ptw_idx, &inout->haddr, NULL);
72 
73     if (unlikely(flags & TLB_INVALID_MASK)) {
74         TranslateFault *err = inout->err;
75 
76         assert(inout->ptw_idx == MMU_NESTED_IDX);
77         *err = (TranslateFault){
78             .error_code = inout->env->error_code,
79             .cr2 = addr,
80             .stage2 = S2_GPT,
81         };
82         return false;
83     }
84     return true;
85 }
86 
87 static inline uint32_t ptw_ldl(const PTETranslate *in, uint64_t ra)
88 {
89     if (likely(in->haddr)) {
90         return ldl_p(in->haddr);
91     }
92     return cpu_ldl_mmuidx_ra(in->env, in->gaddr, in->ptw_idx, ra);
93 }
94 
95 static inline uint64_t ptw_ldq(const PTETranslate *in, uint64_t ra)
96 {
97     if (likely(in->haddr)) {
98         return ldq_p(in->haddr);
99     }
100     return cpu_ldq_mmuidx_ra(in->env, in->gaddr, in->ptw_idx, ra);
101 }
102 
103 /*
104  * Note that we can use a 32-bit cmpxchg for all page table entries,
105  * even 64-bit ones, because PG_PRESENT_MASK, PG_ACCESSED_MASK and
106  * PG_DIRTY_MASK are all in the low 32 bits.
107  */
108 static bool ptw_setl_slow(const PTETranslate *in, uint32_t old, uint32_t new)
109 {
110     uint32_t cmp;
111 
112     CPUState *cpu = env_cpu(in->env);
113     /* We are in cpu_exec, and start_exclusive can't be called directly.*/
114     g_assert(cpu->running);
115     cpu_exec_end(cpu);
116     /* Does x86 really perform a rmw cycle on mmio for ptw? */
117     start_exclusive();
118     cmp = cpu_ldl_mmuidx_ra(in->env, in->gaddr, in->ptw_idx, 0);
119     if (cmp == old) {
120         cpu_stl_mmuidx_ra(in->env, in->gaddr, new, in->ptw_idx, 0);
121     }
122     end_exclusive();
123     cpu_exec_start(cpu);
124     return cmp == old;
125 }
126 
127 static inline bool ptw_setl(const PTETranslate *in, uint32_t old, uint32_t set)
128 {
129     if (set & ~old) {
130         uint32_t new = old | set;
131         if (likely(in->haddr)) {
132             old = cpu_to_le32(old);
133             new = cpu_to_le32(new);
134             return qatomic_cmpxchg((uint32_t *)in->haddr, old, new) == old;
135         }
136         return ptw_setl_slow(in, old, new);
137     }
138     return true;
139 }
140 
141 static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
142                           TranslateResult *out, TranslateFault *err,
143                           uint64_t ra)
144 {
145     const target_ulong addr = in->addr;
146     const int pg_mode = in->pg_mode;
147     const bool is_user = is_mmu_index_user(in->mmu_idx);
148     const MMUAccessType access_type = in->access_type;
149     uint64_t ptep, pte, rsvd_mask;
150     PTETranslate pte_trans = {
151         .env = env,
152         .err = err,
153         .ptw_idx = in->ptw_idx,
154     };
155     hwaddr pte_addr, paddr;
156     uint32_t pkr;
157     int page_size;
158     int error_code;
159     int prot;
160 
161  restart_all:
162     rsvd_mask = ~MAKE_64BIT_MASK(0, env_archcpu(env)->phys_bits);
163     rsvd_mask &= PG_ADDRESS_MASK;
164     if (!(pg_mode & PG_MODE_NXE)) {
165         rsvd_mask |= PG_NX_MASK;
166     }
167 
168     if (pg_mode & PG_MODE_PAE) {
169 #ifdef TARGET_X86_64
170         if (pg_mode & PG_MODE_LMA) {
171             if (pg_mode & PG_MODE_LA57) {
172                 /*
173                  * Page table level 5
174                  */
175                 pte_addr = (in->cr3 & ~0xfff) + (((addr >> 48) & 0x1ff) << 3);
176                 if (!ptw_translate(&pte_trans, pte_addr)) {
177                     return false;
178                 }
179             restart_5:
180                 pte = ptw_ldq(&pte_trans, ra);
181                 if (!(pte & PG_PRESENT_MASK)) {
182                     goto do_fault;
183                 }
184                 if (pte & (rsvd_mask | PG_PSE_MASK)) {
185                     goto do_fault_rsvd;
186                 }
187                 if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
188                     goto restart_5;
189                 }
190                 ptep = pte ^ PG_NX_MASK;
191             } else {
192                 pte = in->cr3;
193                 ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
194             }
195 
196             /*
197              * Page table level 4
198              */
199             pte_addr = (pte & PG_ADDRESS_MASK) + (((addr >> 39) & 0x1ff) << 3);
200             if (!ptw_translate(&pte_trans, pte_addr)) {
201                 return false;
202             }
203         restart_4:
204             pte = ptw_ldq(&pte_trans, ra);
205             if (!(pte & PG_PRESENT_MASK)) {
206                 goto do_fault;
207             }
208             if (pte & (rsvd_mask | PG_PSE_MASK)) {
209                 goto do_fault_rsvd;
210             }
211             if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
212                 goto restart_4;
213             }
214             ptep &= pte ^ PG_NX_MASK;
215 
216             /*
217              * Page table level 3
218              */
219             pte_addr = (pte & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3);
220             if (!ptw_translate(&pte_trans, pte_addr)) {
221                 return false;
222             }
223         restart_3_lma:
224             pte = ptw_ldq(&pte_trans, ra);
225             if (!(pte & PG_PRESENT_MASK)) {
226                 goto do_fault;
227             }
228             if (pte & rsvd_mask) {
229                 goto do_fault_rsvd;
230             }
231             if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
232                 goto restart_3_lma;
233             }
234             ptep &= pte ^ PG_NX_MASK;
235             if (pte & PG_PSE_MASK) {
236                 /* 1 GB page */
237                 page_size = 1024 * 1024 * 1024;
238                 goto do_check_protect;
239             }
240         } else
241 #endif
242         {
243             /*
244              * Page table level 3
245              */
246             pte_addr = (in->cr3 & 0xffffffe0ULL) + ((addr >> 27) & 0x18);
247             if (!ptw_translate(&pte_trans, pte_addr)) {
248                 return false;
249             }
250             rsvd_mask |= PG_HI_USER_MASK;
251         restart_3_nolma:
252             pte = ptw_ldq(&pte_trans, ra);
253             if (!(pte & PG_PRESENT_MASK)) {
254                 goto do_fault;
255             }
256             if (pte & (rsvd_mask | PG_NX_MASK)) {
257                 goto do_fault_rsvd;
258             }
259             if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
260                 goto restart_3_nolma;
261             }
262             ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
263         }
264 
265         /*
266          * Page table level 2
267          */
268         pte_addr = (pte & PG_ADDRESS_MASK) + (((addr >> 21) & 0x1ff) << 3);
269         if (!ptw_translate(&pte_trans, pte_addr)) {
270             return false;
271         }
272     restart_2_pae:
273         pte = ptw_ldq(&pte_trans, ra);
274         if (!(pte & PG_PRESENT_MASK)) {
275             goto do_fault;
276         }
277         if (pte & rsvd_mask) {
278             goto do_fault_rsvd;
279         }
280         if (pte & PG_PSE_MASK) {
281             /* 2 MB page */
282             page_size = 2048 * 1024;
283             ptep &= pte ^ PG_NX_MASK;
284             goto do_check_protect;
285         }
286         if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
287             goto restart_2_pae;
288         }
289         ptep &= pte ^ PG_NX_MASK;
290 
291         /*
292          * Page table level 1
293          */
294         pte_addr = (pte & PG_ADDRESS_MASK) + (((addr >> 12) & 0x1ff) << 3);
295         if (!ptw_translate(&pte_trans, pte_addr)) {
296             return false;
297         }
298         pte = ptw_ldq(&pte_trans, ra);
299         if (!(pte & PG_PRESENT_MASK)) {
300             goto do_fault;
301         }
302         if (pte & rsvd_mask) {
303             goto do_fault_rsvd;
304         }
305         /* combine pde and pte nx, user and rw protections */
306         ptep &= pte ^ PG_NX_MASK;
307         page_size = 4096;
308     } else if (pg_mode & PG_MODE_PG) {
309         /*
310          * Page table level 2
311          */
312         pte_addr = (in->cr3 & 0xfffff000ULL) + ((addr >> 20) & 0xffc);
313         if (!ptw_translate(&pte_trans, pte_addr)) {
314             return false;
315         }
316     restart_2_nopae:
317         pte = ptw_ldl(&pte_trans, ra);
318         if (!(pte & PG_PRESENT_MASK)) {
319             goto do_fault;
320         }
321         ptep = pte | PG_NX_MASK;
322 
323         /* if PSE bit is set, then we use a 4MB page */
324         if ((pte & PG_PSE_MASK) && (pg_mode & PG_MODE_PSE)) {
325             page_size = 4096 * 1024;
326             /*
327              * Bits 20-13 provide bits 39-32 of the address, bit 21 is reserved.
328              * Leave bits 20-13 in place for setting accessed/dirty bits below.
329              */
330             pte = (uint32_t)pte | ((pte & 0x1fe000LL) << (32 - 13));
331             rsvd_mask = 0x200000;
332             goto do_check_protect_pse36;
333         }
334         if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
335             goto restart_2_nopae;
336         }
337 
338         /*
339          * Page table level 1
340          */
341         pte_addr = (pte & ~0xfffu) + ((addr >> 10) & 0xffc);
342         if (!ptw_translate(&pte_trans, pte_addr)) {
343             return false;
344         }
345         pte = ptw_ldl(&pte_trans, ra);
346         if (!(pte & PG_PRESENT_MASK)) {
347             goto do_fault;
348         }
349         /* combine pde and pte user and rw protections */
350         ptep &= pte | PG_NX_MASK;
351         page_size = 4096;
352         rsvd_mask = 0;
353     } else {
354         /*
355          * No paging (real mode), let's tentatively resolve the address as 1:1
356          * here, but conditionally still perform an NPT walk on it later.
357          */
358         page_size = 0x40000000;
359         paddr = in->addr;
360         prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
361         goto stage2;
362     }
363 
364 do_check_protect:
365     rsvd_mask |= (page_size - 1) & PG_ADDRESS_MASK & ~PG_PSE_PAT_MASK;
366 do_check_protect_pse36:
367     if (pte & rsvd_mask) {
368         goto do_fault_rsvd;
369     }
370     ptep ^= PG_NX_MASK;
371 
372     /* can the page can be put in the TLB?  prot will tell us */
373     if (is_user && !(ptep & PG_USER_MASK)) {
374         goto do_fault_protect;
375     }
376 
377     prot = 0;
378     if (!is_mmu_index_smap(in->mmu_idx) || !(ptep & PG_USER_MASK)) {
379         prot |= PAGE_READ;
380         if ((ptep & PG_RW_MASK) || !(is_user || (pg_mode & PG_MODE_WP))) {
381             prot |= PAGE_WRITE;
382         }
383     }
384     if (!(ptep & PG_NX_MASK) &&
385         (is_user ||
386          !((pg_mode & PG_MODE_SMEP) && (ptep & PG_USER_MASK)))) {
387         prot |= PAGE_EXEC;
388     }
389 
390     if (ptep & PG_USER_MASK) {
391         pkr = pg_mode & PG_MODE_PKE ? env->pkru : 0;
392     } else {
393         pkr = pg_mode & PG_MODE_PKS ? env->pkrs : 0;
394     }
395     if (pkr) {
396         uint32_t pk = (pte & PG_PKRU_MASK) >> PG_PKRU_BIT;
397         uint32_t pkr_ad = (pkr >> pk * 2) & 1;
398         uint32_t pkr_wd = (pkr >> pk * 2) & 2;
399         uint32_t pkr_prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
400 
401         if (pkr_ad) {
402             pkr_prot &= ~(PAGE_READ | PAGE_WRITE);
403         } else if (pkr_wd && (is_user || (pg_mode & PG_MODE_WP))) {
404             pkr_prot &= ~PAGE_WRITE;
405         }
406         if ((pkr_prot & (1 << access_type)) == 0) {
407             goto do_fault_pk_protect;
408         }
409         prot &= pkr_prot;
410     }
411 
412     if ((prot & (1 << access_type)) == 0) {
413         goto do_fault_protect;
414     }
415 
416     /* yes, it can! */
417     {
418         uint32_t set = PG_ACCESSED_MASK;
419         if (access_type == MMU_DATA_STORE) {
420             set |= PG_DIRTY_MASK;
421         } else if (!(pte & PG_DIRTY_MASK)) {
422             /*
423              * Only set write access if already dirty...
424              * otherwise wait for dirty access.
425              */
426             prot &= ~PAGE_WRITE;
427         }
428         if (!ptw_setl(&pte_trans, pte, set)) {
429             /*
430              * We can arrive here from any of 3 levels and 2 formats.
431              * The only safe thing is to restart the entire lookup.
432              */
433             goto restart_all;
434         }
435     }
436 
437     /* merge offset within page */
438     paddr = (pte & PG_ADDRESS_MASK & ~(page_size - 1)) | (addr & (page_size - 1));
439  stage2:
440 
441     /*
442      * Note that NPT is walked (for both paging structures and final guest
443      * addresses) using the address with the A20 bit set.
444      */
445     if (in->ptw_idx == MMU_NESTED_IDX) {
446         CPUTLBEntryFull *full;
447         int flags, nested_page_size;
448 
449         flags = probe_access_full_mmu(env, paddr, 0, access_type,
450                                       MMU_NESTED_IDX, &pte_trans.haddr, &full);
451         if (unlikely(flags & TLB_INVALID_MASK)) {
452             *err = (TranslateFault){
453                 .error_code = env->error_code,
454                 .cr2 = paddr,
455                 .stage2 = S2_GPA,
456             };
457             return false;
458         }
459 
460         /* Merge stage1 & stage2 protection bits. */
461         prot &= full->prot;
462 
463         /* Re-verify resulting protection. */
464         if ((prot & (1 << access_type)) == 0) {
465             goto do_fault_protect;
466         }
467 
468         /* Merge stage1 & stage2 addresses to final physical address. */
469         nested_page_size = 1 << full->lg_page_size;
470         paddr = (full->phys_addr & ~(nested_page_size - 1))
471               | (paddr & (nested_page_size - 1));
472 
473         /*
474          * Use the larger of stage1 & stage2 page sizes, so that
475          * invalidation works.
476          */
477         if (nested_page_size > page_size) {
478             page_size = nested_page_size;
479         }
480     }
481 
482     out->paddr = paddr & x86_get_a20_mask(env);
483     out->prot = prot;
484     out->page_size = page_size;
485     return true;
486 
487  do_fault_rsvd:
488     error_code = PG_ERROR_RSVD_MASK;
489     goto do_fault_cont;
490  do_fault_protect:
491     error_code = PG_ERROR_P_MASK;
492     goto do_fault_cont;
493  do_fault_pk_protect:
494     assert(access_type != MMU_INST_FETCH);
495     error_code = PG_ERROR_PK_MASK | PG_ERROR_P_MASK;
496     goto do_fault_cont;
497  do_fault:
498     error_code = 0;
499  do_fault_cont:
500     if (is_user) {
501         error_code |= PG_ERROR_U_MASK;
502     }
503     switch (access_type) {
504     case MMU_DATA_LOAD:
505         break;
506     case MMU_DATA_STORE:
507         error_code |= PG_ERROR_W_MASK;
508         break;
509     case MMU_INST_FETCH:
510         if (pg_mode & (PG_MODE_NXE | PG_MODE_SMEP)) {
511             error_code |= PG_ERROR_I_D_MASK;
512         }
513         break;
514     }
515     *err = (TranslateFault){
516         .exception_index = EXCP0E_PAGE,
517         .error_code = error_code,
518         .cr2 = addr,
519     };
520     return false;
521 }
522 
523 static G_NORETURN void raise_stage2(CPUX86State *env, TranslateFault *err,
524                                     uintptr_t retaddr)
525 {
526     uint64_t exit_info_1 = err->error_code;
527 
528     switch (err->stage2) {
529     case S2_GPT:
530         exit_info_1 |= SVM_NPTEXIT_GPT;
531         break;
532     case S2_GPA:
533         exit_info_1 |= SVM_NPTEXIT_GPA;
534         break;
535     default:
536         g_assert_not_reached();
537     }
538 
539     x86_stq_phys(env_cpu(env),
540                  env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
541                  err->cr2);
542     cpu_vmexit(env, SVM_EXIT_NPF, exit_info_1, retaddr);
543 }
544 
545 static bool get_physical_address(CPUX86State *env, vaddr addr,
546                                  MMUAccessType access_type, int mmu_idx,
547                                  TranslateResult *out, TranslateFault *err,
548                                  uint64_t ra)
549 {
550     TranslateParams in;
551     bool use_stage2 = env->hflags2 & HF2_NPT_MASK;
552 
553     in.addr = addr;
554     in.access_type = access_type;
555 
556     switch (mmu_idx) {
557     case MMU_PHYS_IDX:
558         break;
559 
560     case MMU_NESTED_IDX:
561         if (likely(use_stage2)) {
562             in.cr3 = env->nested_cr3;
563             in.pg_mode = env->nested_pg_mode;
564             in.mmu_idx =
565                 env->nested_pg_mode & PG_MODE_LMA ? MMU_USER64_IDX : MMU_USER32_IDX;
566             in.ptw_idx = MMU_PHYS_IDX;
567 
568             if (!mmu_translate(env, &in, out, err, ra)) {
569                 err->stage2 = S2_GPA;
570                 return false;
571             }
572             return true;
573         }
574         break;
575 
576     default:
577         if (is_mmu_index_32(mmu_idx)) {
578             addr = (uint32_t)addr;
579         }
580 
581         if (likely(env->cr[0] & CR0_PG_MASK || use_stage2)) {
582             in.cr3 = env->cr[3];
583             in.mmu_idx = mmu_idx;
584             in.ptw_idx = use_stage2 ? MMU_NESTED_IDX : MMU_PHYS_IDX;
585             in.pg_mode = get_pg_mode(env);
586 
587             if (in.pg_mode & PG_MODE_LMA) {
588                 /* test virtual address sign extension */
589                 int shift = in.pg_mode & PG_MODE_LA57 ? 56 : 47;
590                 int64_t sext = (int64_t)addr >> shift;
591                 if (sext != 0 && sext != -1) {
592                     *err = (TranslateFault){
593                         .exception_index = EXCP0D_GPF,
594                         .cr2 = addr,
595                     };
596                     return false;
597                 }
598             }
599             return mmu_translate(env, &in, out, err, ra);
600         }
601         break;
602     }
603 
604     /* No translation needed. */
605     out->paddr = addr & x86_get_a20_mask(env);
606     out->prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
607     out->page_size = TARGET_PAGE_SIZE;
608     return true;
609 }
610 
611 bool x86_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
612                       MMUAccessType access_type, int mmu_idx,
613                       bool probe, uintptr_t retaddr)
614 {
615     CPUX86State *env = cpu_env(cs);
616     TranslateResult out;
617     TranslateFault err;
618 
619     if (get_physical_address(env, addr, access_type, mmu_idx, &out, &err,
620                              retaddr)) {
621         /*
622          * Even if 4MB pages, we map only one 4KB page in the cache to
623          * avoid filling it too fast.
624          */
625         assert(out.prot & (1 << access_type));
626         tlb_set_page_with_attrs(cs, addr & TARGET_PAGE_MASK,
627                                 out.paddr & TARGET_PAGE_MASK,
628                                 cpu_get_mem_attrs(env),
629                                 out.prot, mmu_idx, out.page_size);
630         return true;
631     }
632 
633     if (probe) {
634         /* This will be used if recursing for stage2 translation. */
635         env->error_code = err.error_code;
636         return false;
637     }
638 
639     if (err.stage2 != S2_NONE) {
640         raise_stage2(env, &err, retaddr);
641     }
642 
643     if (env->intercept_exceptions & (1 << err.exception_index)) {
644         /* cr2 is not modified in case of exceptions */
645         x86_stq_phys(cs, env->vm_vmcb +
646                      offsetof(struct vmcb, control.exit_info_2),
647                      err.cr2);
648     } else {
649         env->cr[2] = err.cr2;
650     }
651     raise_exception_err_ra(env, err.exception_index, err.error_code, retaddr);
652 }
653 
654 G_NORETURN void x86_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
655                                             MMUAccessType access_type,
656                                             int mmu_idx, uintptr_t retaddr)
657 {
658     X86CPU *cpu = X86_CPU(cs);
659     handle_unaligned_access(&cpu->env, vaddr, access_type, retaddr);
660 }
661