xref: /qemu/target/i386/tcg/system/excp_helper.c (revision 019b4e84eda27a006f94ed0faa024babd0a97e97)
1  /*
2   *  x86 exception helpers - system code
3   *
4   *  Copyright (c) 2003 Fabrice Bellard
5   *
6   * This library is free software; you can redistribute it and/or
7   * modify it under the terms of the GNU Lesser General Public
8   * License as published by the Free Software Foundation; either
9   * version 2.1 of the License, or (at your option) any later version.
10   *
11   * This library is distributed in the hope that it will be useful,
12   * but WITHOUT ANY WARRANTY; without even the implied warranty of
13   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   * Lesser General Public License for more details.
15   *
16   * You should have received a copy of the GNU Lesser General Public
17   * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18   */
19  
20  #include "qemu/osdep.h"
21  #include "cpu.h"
22  #include "exec/cpu_ldst.h"
23  #include "exec/cputlb.h"
24  #include "exec/page-protection.h"
25  #include "tcg/helper-tcg.h"
26  
27  typedef struct TranslateParams {
28      target_ulong addr;
29      target_ulong cr3;
30      int pg_mode;
31      int mmu_idx;
32      int ptw_idx;
33      MMUAccessType access_type;
34  } TranslateParams;
35  
36  typedef struct TranslateResult {
37      hwaddr paddr;
38      int prot;
39      int page_size;
40  } TranslateResult;
41  
42  typedef enum TranslateFaultStage2 {
43      S2_NONE,
44      S2_GPA,
45      S2_GPT,
46  } TranslateFaultStage2;
47  
48  typedef struct TranslateFault {
49      int exception_index;
50      int error_code;
51      target_ulong cr2;
52      TranslateFaultStage2 stage2;
53  } TranslateFault;
54  
55  typedef struct PTETranslate {
56      CPUX86State *env;
57      TranslateFault *err;
58      int ptw_idx;
59      void *haddr;
60      hwaddr gaddr;
61  } PTETranslate;
62  
63  static bool ptw_translate(PTETranslate *inout, hwaddr addr)
64  {
65      int flags;
66  
67      inout->gaddr = addr;
68      flags = probe_access_full_mmu(inout->env, addr, 0, MMU_DATA_STORE,
69                                    inout->ptw_idx, &inout->haddr, NULL);
70  
71      if (unlikely(flags & TLB_INVALID_MASK)) {
72          TranslateFault *err = inout->err;
73  
74          assert(inout->ptw_idx == MMU_NESTED_IDX);
75          *err = (TranslateFault){
76              .error_code = inout->env->error_code,
77              .cr2 = addr,
78              .stage2 = S2_GPT,
79          };
80          return false;
81      }
82      return true;
83  }
84  
85  static inline uint32_t ptw_ldl(const PTETranslate *in, uint64_t ra)
86  {
87      if (likely(in->haddr)) {
88          return ldl_p(in->haddr);
89      }
90      return cpu_ldl_mmuidx_ra(in->env, in->gaddr, in->ptw_idx, ra);
91  }
92  
93  static inline uint64_t ptw_ldq(const PTETranslate *in, uint64_t ra)
94  {
95      if (likely(in->haddr)) {
96          return ldq_p(in->haddr);
97      }
98      return cpu_ldq_mmuidx_ra(in->env, in->gaddr, in->ptw_idx, ra);
99  }
100  
101  /*
102   * Note that we can use a 32-bit cmpxchg for all page table entries,
103   * even 64-bit ones, because PG_PRESENT_MASK, PG_ACCESSED_MASK and
104   * PG_DIRTY_MASK are all in the low 32 bits.
105   */
106  static bool ptw_setl_slow(const PTETranslate *in, uint32_t old, uint32_t new)
107  {
108      uint32_t cmp;
109  
110      CPUState *cpu = env_cpu(in->env);
111      /* We are in cpu_exec, and start_exclusive can't be called directly.*/
112      g_assert(cpu->running);
113      cpu_exec_end(cpu);
114      /* Does x86 really perform a rmw cycle on mmio for ptw? */
115      start_exclusive();
116      cmp = cpu_ldl_mmuidx_ra(in->env, in->gaddr, in->ptw_idx, 0);
117      if (cmp == old) {
118          cpu_stl_mmuidx_ra(in->env, in->gaddr, new, in->ptw_idx, 0);
119      }
120      end_exclusive();
121      cpu_exec_start(cpu);
122      return cmp == old;
123  }
124  
125  static inline bool ptw_setl(const PTETranslate *in, uint32_t old, uint32_t set)
126  {
127      if (set & ~old) {
128          uint32_t new = old | set;
129          if (likely(in->haddr)) {
130              old = cpu_to_le32(old);
131              new = cpu_to_le32(new);
132              return qatomic_cmpxchg((uint32_t *)in->haddr, old, new) == old;
133          }
134          return ptw_setl_slow(in, old, new);
135      }
136      return true;
137  }
138  
139  static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
140                            TranslateResult *out, TranslateFault *err,
141                            uint64_t ra)
142  {
143      const target_ulong addr = in->addr;
144      const int pg_mode = in->pg_mode;
145      const bool is_user = is_mmu_index_user(in->mmu_idx);
146      const MMUAccessType access_type = in->access_type;
147      uint64_t ptep, pte, rsvd_mask;
148      PTETranslate pte_trans = {
149          .env = env,
150          .err = err,
151          .ptw_idx = in->ptw_idx,
152      };
153      hwaddr pte_addr, paddr;
154      uint32_t pkr;
155      int page_size;
156      int error_code;
157      int prot;
158  
159   restart_all:
160      rsvd_mask = ~MAKE_64BIT_MASK(0, env_archcpu(env)->phys_bits);
161      rsvd_mask &= PG_ADDRESS_MASK;
162      if (!(pg_mode & PG_MODE_NXE)) {
163          rsvd_mask |= PG_NX_MASK;
164      }
165  
166      if (pg_mode & PG_MODE_PAE) {
167  #ifdef TARGET_X86_64
168          if (pg_mode & PG_MODE_LMA) {
169              if (pg_mode & PG_MODE_LA57) {
170                  /*
171                   * Page table level 5
172                   */
173                  pte_addr = (in->cr3 & ~0xfff) + (((addr >> 48) & 0x1ff) << 3);
174                  if (!ptw_translate(&pte_trans, pte_addr)) {
175                      return false;
176                  }
177              restart_5:
178                  pte = ptw_ldq(&pte_trans, ra);
179                  if (!(pte & PG_PRESENT_MASK)) {
180                      goto do_fault;
181                  }
182                  if (pte & (rsvd_mask | PG_PSE_MASK)) {
183                      goto do_fault_rsvd;
184                  }
185                  if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
186                      goto restart_5;
187                  }
188                  ptep = pte ^ PG_NX_MASK;
189              } else {
190                  pte = in->cr3;
191                  ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
192              }
193  
194              /*
195               * Page table level 4
196               */
197              pte_addr = (pte & PG_ADDRESS_MASK) + (((addr >> 39) & 0x1ff) << 3);
198              if (!ptw_translate(&pte_trans, pte_addr)) {
199                  return false;
200              }
201          restart_4:
202              pte = ptw_ldq(&pte_trans, ra);
203              if (!(pte & PG_PRESENT_MASK)) {
204                  goto do_fault;
205              }
206              if (pte & (rsvd_mask | PG_PSE_MASK)) {
207                  goto do_fault_rsvd;
208              }
209              if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
210                  goto restart_4;
211              }
212              ptep &= pte ^ PG_NX_MASK;
213  
214              /*
215               * Page table level 3
216               */
217              pte_addr = (pte & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3);
218              if (!ptw_translate(&pte_trans, pte_addr)) {
219                  return false;
220              }
221          restart_3_lma:
222              pte = ptw_ldq(&pte_trans, ra);
223              if (!(pte & PG_PRESENT_MASK)) {
224                  goto do_fault;
225              }
226              if (pte & rsvd_mask) {
227                  goto do_fault_rsvd;
228              }
229              if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
230                  goto restart_3_lma;
231              }
232              ptep &= pte ^ PG_NX_MASK;
233              if (pte & PG_PSE_MASK) {
234                  /* 1 GB page */
235                  page_size = 1024 * 1024 * 1024;
236                  goto do_check_protect;
237              }
238          } else
239  #endif
240          {
241              /*
242               * Page table level 3
243               */
244              pte_addr = (in->cr3 & 0xffffffe0ULL) + ((addr >> 27) & 0x18);
245              if (!ptw_translate(&pte_trans, pte_addr)) {
246                  return false;
247              }
248              rsvd_mask |= PG_HI_USER_MASK;
249          restart_3_nolma:
250              pte = ptw_ldq(&pte_trans, ra);
251              if (!(pte & PG_PRESENT_MASK)) {
252                  goto do_fault;
253              }
254              if (pte & (rsvd_mask | PG_NX_MASK)) {
255                  goto do_fault_rsvd;
256              }
257              if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
258                  goto restart_3_nolma;
259              }
260              ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
261          }
262  
263          /*
264           * Page table level 2
265           */
266          pte_addr = (pte & PG_ADDRESS_MASK) + (((addr >> 21) & 0x1ff) << 3);
267          if (!ptw_translate(&pte_trans, pte_addr)) {
268              return false;
269          }
270      restart_2_pae:
271          pte = ptw_ldq(&pte_trans, ra);
272          if (!(pte & PG_PRESENT_MASK)) {
273              goto do_fault;
274          }
275          if (pte & rsvd_mask) {
276              goto do_fault_rsvd;
277          }
278          if (pte & PG_PSE_MASK) {
279              /* 2 MB page */
280              page_size = 2048 * 1024;
281              ptep &= pte ^ PG_NX_MASK;
282              goto do_check_protect;
283          }
284          if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
285              goto restart_2_pae;
286          }
287          ptep &= pte ^ PG_NX_MASK;
288  
289          /*
290           * Page table level 1
291           */
292          pte_addr = (pte & PG_ADDRESS_MASK) + (((addr >> 12) & 0x1ff) << 3);
293          if (!ptw_translate(&pte_trans, pte_addr)) {
294              return false;
295          }
296          pte = ptw_ldq(&pte_trans, ra);
297          if (!(pte & PG_PRESENT_MASK)) {
298              goto do_fault;
299          }
300          if (pte & rsvd_mask) {
301              goto do_fault_rsvd;
302          }
303          /* combine pde and pte nx, user and rw protections */
304          ptep &= pte ^ PG_NX_MASK;
305          page_size = 4096;
306      } else if (pg_mode & PG_MODE_PG) {
307          /*
308           * Page table level 2
309           */
310          pte_addr = (in->cr3 & 0xfffff000ULL) + ((addr >> 20) & 0xffc);
311          if (!ptw_translate(&pte_trans, pte_addr)) {
312              return false;
313          }
314      restart_2_nopae:
315          pte = ptw_ldl(&pte_trans, ra);
316          if (!(pte & PG_PRESENT_MASK)) {
317              goto do_fault;
318          }
319          ptep = pte | PG_NX_MASK;
320  
321          /* if PSE bit is set, then we use a 4MB page */
322          if ((pte & PG_PSE_MASK) && (pg_mode & PG_MODE_PSE)) {
323              page_size = 4096 * 1024;
324              /*
325               * Bits 20-13 provide bits 39-32 of the address, bit 21 is reserved.
326               * Leave bits 20-13 in place for setting accessed/dirty bits below.
327               */
328              pte = (uint32_t)pte | ((pte & 0x1fe000LL) << (32 - 13));
329              rsvd_mask = 0x200000;
330              goto do_check_protect_pse36;
331          }
332          if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
333              goto restart_2_nopae;
334          }
335  
336          /*
337           * Page table level 1
338           */
339          pte_addr = (pte & ~0xfffu) + ((addr >> 10) & 0xffc);
340          if (!ptw_translate(&pte_trans, pte_addr)) {
341              return false;
342          }
343          pte = ptw_ldl(&pte_trans, ra);
344          if (!(pte & PG_PRESENT_MASK)) {
345              goto do_fault;
346          }
347          /* combine pde and pte user and rw protections */
348          ptep &= pte | PG_NX_MASK;
349          page_size = 4096;
350          rsvd_mask = 0;
351      } else {
352          /*
353           * No paging (real mode), let's tentatively resolve the address as 1:1
354           * here, but conditionally still perform an NPT walk on it later.
355           */
356          page_size = 0x40000000;
357          paddr = in->addr;
358          prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
359          goto stage2;
360      }
361  
362  do_check_protect:
363      rsvd_mask |= (page_size - 1) & PG_ADDRESS_MASK & ~PG_PSE_PAT_MASK;
364  do_check_protect_pse36:
365      if (pte & rsvd_mask) {
366          goto do_fault_rsvd;
367      }
368      ptep ^= PG_NX_MASK;
369  
370      /* can the page can be put in the TLB?  prot will tell us */
371      if (is_user && !(ptep & PG_USER_MASK)) {
372          goto do_fault_protect;
373      }
374  
375      prot = 0;
376      if (!is_mmu_index_smap(in->mmu_idx) || !(ptep & PG_USER_MASK)) {
377          prot |= PAGE_READ;
378          if ((ptep & PG_RW_MASK) || !(is_user || (pg_mode & PG_MODE_WP))) {
379              prot |= PAGE_WRITE;
380          }
381      }
382      if (!(ptep & PG_NX_MASK) &&
383          (is_user ||
384           !((pg_mode & PG_MODE_SMEP) && (ptep & PG_USER_MASK)))) {
385          prot |= PAGE_EXEC;
386      }
387  
388      if (ptep & PG_USER_MASK) {
389          pkr = pg_mode & PG_MODE_PKE ? env->pkru : 0;
390      } else {
391          pkr = pg_mode & PG_MODE_PKS ? env->pkrs : 0;
392      }
393      if (pkr) {
394          uint32_t pk = (pte & PG_PKRU_MASK) >> PG_PKRU_BIT;
395          uint32_t pkr_ad = (pkr >> pk * 2) & 1;
396          uint32_t pkr_wd = (pkr >> pk * 2) & 2;
397          uint32_t pkr_prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
398  
399          if (pkr_ad) {
400              pkr_prot &= ~(PAGE_READ | PAGE_WRITE);
401          } else if (pkr_wd && (is_user || (pg_mode & PG_MODE_WP))) {
402              pkr_prot &= ~PAGE_WRITE;
403          }
404          if ((pkr_prot & (1 << access_type)) == 0) {
405              goto do_fault_pk_protect;
406          }
407          prot &= pkr_prot;
408      }
409  
410      if ((prot & (1 << access_type)) == 0) {
411          goto do_fault_protect;
412      }
413  
414      /* yes, it can! */
415      {
416          uint32_t set = PG_ACCESSED_MASK;
417          if (access_type == MMU_DATA_STORE) {
418              set |= PG_DIRTY_MASK;
419          } else if (!(pte & PG_DIRTY_MASK)) {
420              /*
421               * Only set write access if already dirty...
422               * otherwise wait for dirty access.
423               */
424              prot &= ~PAGE_WRITE;
425          }
426          if (!ptw_setl(&pte_trans, pte, set)) {
427              /*
428               * We can arrive here from any of 3 levels and 2 formats.
429               * The only safe thing is to restart the entire lookup.
430               */
431              goto restart_all;
432          }
433      }
434  
435      /* merge offset within page */
436      paddr = (pte & PG_ADDRESS_MASK & ~(page_size - 1)) | (addr & (page_size - 1));
437   stage2:
438  
439      /*
440       * Note that NPT is walked (for both paging structures and final guest
441       * addresses) using the address with the A20 bit set.
442       */
443      if (in->ptw_idx == MMU_NESTED_IDX) {
444          CPUTLBEntryFull *full;
445          int flags, nested_page_size;
446  
447          flags = probe_access_full_mmu(env, paddr, 0, access_type,
448                                        MMU_NESTED_IDX, &pte_trans.haddr, &full);
449          if (unlikely(flags & TLB_INVALID_MASK)) {
450              *err = (TranslateFault){
451                  .error_code = env->error_code,
452                  .cr2 = paddr,
453                  .stage2 = S2_GPA,
454              };
455              return false;
456          }
457  
458          /* Merge stage1 & stage2 protection bits. */
459          prot &= full->prot;
460  
461          /* Re-verify resulting protection. */
462          if ((prot & (1 << access_type)) == 0) {
463              goto do_fault_protect;
464          }
465  
466          /* Merge stage1 & stage2 addresses to final physical address. */
467          nested_page_size = 1 << full->lg_page_size;
468          paddr = (full->phys_addr & ~(nested_page_size - 1))
469                | (paddr & (nested_page_size - 1));
470  
471          /*
472           * Use the larger of stage1 & stage2 page sizes, so that
473           * invalidation works.
474           */
475          if (nested_page_size > page_size) {
476              page_size = nested_page_size;
477          }
478      }
479  
480      out->paddr = paddr & x86_get_a20_mask(env);
481      out->prot = prot;
482      out->page_size = page_size;
483      return true;
484  
485   do_fault_rsvd:
486      error_code = PG_ERROR_RSVD_MASK;
487      goto do_fault_cont;
488   do_fault_protect:
489      error_code = PG_ERROR_P_MASK;
490      goto do_fault_cont;
491   do_fault_pk_protect:
492      assert(access_type != MMU_INST_FETCH);
493      error_code = PG_ERROR_PK_MASK | PG_ERROR_P_MASK;
494      goto do_fault_cont;
495   do_fault:
496      error_code = 0;
497   do_fault_cont:
498      if (is_user) {
499          error_code |= PG_ERROR_U_MASK;
500      }
501      switch (access_type) {
502      case MMU_DATA_LOAD:
503          break;
504      case MMU_DATA_STORE:
505          error_code |= PG_ERROR_W_MASK;
506          break;
507      case MMU_INST_FETCH:
508          if (pg_mode & (PG_MODE_NXE | PG_MODE_SMEP)) {
509              error_code |= PG_ERROR_I_D_MASK;
510          }
511          break;
512      }
513      *err = (TranslateFault){
514          .exception_index = EXCP0E_PAGE,
515          .error_code = error_code,
516          .cr2 = addr,
517      };
518      return false;
519  }
520  
521  static G_NORETURN void raise_stage2(CPUX86State *env, TranslateFault *err,
522                                      uintptr_t retaddr)
523  {
524      uint64_t exit_info_1 = err->error_code;
525  
526      switch (err->stage2) {
527      case S2_GPT:
528          exit_info_1 |= SVM_NPTEXIT_GPT;
529          break;
530      case S2_GPA:
531          exit_info_1 |= SVM_NPTEXIT_GPA;
532          break;
533      default:
534          g_assert_not_reached();
535      }
536  
537      x86_stq_phys(env_cpu(env),
538                   env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
539                   err->cr2);
540      cpu_vmexit(env, SVM_EXIT_NPF, exit_info_1, retaddr);
541  }
542  
543  static bool get_physical_address(CPUX86State *env, vaddr addr,
544                                   MMUAccessType access_type, int mmu_idx,
545                                   TranslateResult *out, TranslateFault *err,
546                                   uint64_t ra)
547  {
548      TranslateParams in;
549      bool use_stage2 = env->hflags2 & HF2_NPT_MASK;
550  
551      in.addr = addr;
552      in.access_type = access_type;
553  
554      switch (mmu_idx) {
555      case MMU_PHYS_IDX:
556          break;
557  
558      case MMU_NESTED_IDX:
559          if (likely(use_stage2)) {
560              in.cr3 = env->nested_cr3;
561              in.pg_mode = env->nested_pg_mode;
562              in.mmu_idx =
563                  env->nested_pg_mode & PG_MODE_LMA ? MMU_USER64_IDX : MMU_USER32_IDX;
564              in.ptw_idx = MMU_PHYS_IDX;
565  
566              if (!mmu_translate(env, &in, out, err, ra)) {
567                  err->stage2 = S2_GPA;
568                  return false;
569              }
570              return true;
571          }
572          break;
573  
574      default:
575          if (is_mmu_index_32(mmu_idx)) {
576              addr = (uint32_t)addr;
577          }
578  
579          if (likely(env->cr[0] & CR0_PG_MASK || use_stage2)) {
580              in.cr3 = env->cr[3];
581              in.mmu_idx = mmu_idx;
582              in.ptw_idx = use_stage2 ? MMU_NESTED_IDX : MMU_PHYS_IDX;
583              in.pg_mode = get_pg_mode(env);
584  
585              if (in.pg_mode & PG_MODE_LMA) {
586                  /* test virtual address sign extension */
587                  int shift = in.pg_mode & PG_MODE_LA57 ? 56 : 47;
588                  int64_t sext = (int64_t)addr >> shift;
589                  if (sext != 0 && sext != -1) {
590                      *err = (TranslateFault){
591                          .exception_index = EXCP0D_GPF,
592                          .cr2 = addr,
593                      };
594                      return false;
595                  }
596              }
597              return mmu_translate(env, &in, out, err, ra);
598          }
599          break;
600      }
601  
602      /* No translation needed. */
603      out->paddr = addr & x86_get_a20_mask(env);
604      out->prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
605      out->page_size = TARGET_PAGE_SIZE;
606      return true;
607  }
608  
609  bool x86_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
610                        MMUAccessType access_type, int mmu_idx,
611                        bool probe, uintptr_t retaddr)
612  {
613      CPUX86State *env = cpu_env(cs);
614      TranslateResult out;
615      TranslateFault err;
616  
617      if (get_physical_address(env, addr, access_type, mmu_idx, &out, &err,
618                               retaddr)) {
619          /*
620           * Even if 4MB pages, we map only one 4KB page in the cache to
621           * avoid filling it too fast.
622           */
623          assert(out.prot & (1 << access_type));
624          tlb_set_page_with_attrs(cs, addr & TARGET_PAGE_MASK,
625                                  out.paddr & TARGET_PAGE_MASK,
626                                  cpu_get_mem_attrs(env),
627                                  out.prot, mmu_idx, out.page_size);
628          return true;
629      }
630  
631      if (probe) {
632          /* This will be used if recursing for stage2 translation. */
633          env->error_code = err.error_code;
634          return false;
635      }
636  
637      if (err.stage2 != S2_NONE) {
638          raise_stage2(env, &err, retaddr);
639      }
640  
641      if (env->intercept_exceptions & (1 << err.exception_index)) {
642          /* cr2 is not modified in case of exceptions */
643          x86_stq_phys(cs, env->vm_vmcb +
644                       offsetof(struct vmcb, control.exit_info_2),
645                       err.cr2);
646      } else {
647          env->cr[2] = err.cr2;
648      }
649      raise_exception_err_ra(env, err.exception_index, err.error_code, retaddr);
650  }
651  
652  G_NORETURN void x86_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
653                                              MMUAccessType access_type,
654                                              int mmu_idx, uintptr_t retaddr)
655  {
656      X86CPU *cpu = X86_CPU(cs);
657      handle_unaligned_access(&cpu->env, vaddr, access_type, retaddr);
658  }
659