1 #include "vm.h" 2 #include "libcflat.h" 3 #include "vmalloc.h" 4 #include "alloc_page.h" 5 #include "smp.h" 6 7 static pteval_t pte_opt_mask; 8 9 pteval_t *install_pte(pgd_t *cr3, 10 int pte_level, 11 void *virt, 12 pteval_t pte, 13 pteval_t *pt_page) 14 { 15 int level; 16 pteval_t *pt = cr3; 17 unsigned offset; 18 19 for (level = PAGE_LEVEL; level > pte_level; --level) { 20 offset = PGDIR_OFFSET((uintptr_t)virt, level); 21 if (!(pt[offset] & PT_PRESENT_MASK)) { 22 pteval_t *new_pt = pt_page; 23 if (!new_pt) 24 new_pt = alloc_page(); 25 else 26 pt_page = 0; 27 memset(new_pt, 0, PAGE_SIZE); 28 pt[offset] = virt_to_phys(new_pt) | PT_PRESENT_MASK | PT_WRITABLE_MASK | pte_opt_mask; 29 #ifdef CONFIG_EFI 30 pt[offset] |= get_amd_sev_c_bit_mask(); 31 #endif /* CONFIG_EFI */ 32 } 33 pt = phys_to_virt(pt[offset] & PT_ADDR_MASK); 34 } 35 offset = PGDIR_OFFSET((uintptr_t)virt, level); 36 pt[offset] = pte; 37 return &pt[offset]; 38 } 39 40 /* 41 * Finds last PTE in the mapping of @virt that's at or above @lowest_level. The 42 * returned PTE isn't necessarily present, but its parent is. 43 */ 44 struct pte_search find_pte_level(pgd_t *cr3, void *virt, 45 int lowest_level) 46 { 47 pteval_t *pt = cr3, pte; 48 unsigned offset; 49 unsigned shift; 50 struct pte_search r; 51 52 assert(lowest_level >= 1 && lowest_level <= PAGE_LEVEL); 53 54 for (r.level = PAGE_LEVEL;; --r.level) { 55 shift = (r.level - 1) * PGDIR_WIDTH + 12; 56 offset = ((uintptr_t)virt >> shift) & PGDIR_MASK; 57 r.pte = &pt[offset]; 58 pte = *r.pte; 59 60 if (!(pte & PT_PRESENT_MASK)) 61 return r; 62 63 if ((r.level == 2 || r.level == 3) && (pte & PT_PAGE_SIZE_MASK)) 64 return r; 65 66 if (r.level == lowest_level) 67 return r; 68 69 pt = phys_to_virt(pte & PT_ADDR_MASK); 70 } 71 } 72 73 /* 74 * Returns the leaf PTE in the mapping of @virt (i.e., 4K PTE or a present huge 75 * PTE). Returns NULL if no leaf PTE exists. 76 */ 77 pteval_t *get_pte(pgd_t *cr3, void *virt) 78 { 79 struct pte_search search; 80 81 search = find_pte_level(cr3, virt, 1); 82 return found_leaf_pte(search) ? search.pte : NULL; 83 } 84 85 /* 86 * Returns the PTE in the mapping of @virt at the given level @pte_level. 87 * Returns NULL if the PT at @pte_level isn't present (i.e., the mapping at 88 * @pte_level - 1 isn't present). 89 */ 90 pteval_t *get_pte_level(pgd_t *cr3, void *virt, int pte_level) 91 { 92 struct pte_search search; 93 94 search = find_pte_level(cr3, virt, pte_level); 95 return search.level == pte_level ? search.pte : NULL; 96 } 97 98 pteval_t *install_large_page(pgd_t *cr3, phys_addr_t phys, void *virt) 99 { 100 phys_addr_t flags = PT_PRESENT_MASK | PT_WRITABLE_MASK | pte_opt_mask | PT_PAGE_SIZE_MASK; 101 #ifdef CONFIG_EFI 102 flags |= get_amd_sev_c_bit_mask(); 103 #endif /* CONFIG_EFI */ 104 return install_pte(cr3, 2, virt, phys | flags, 0); 105 } 106 107 pteval_t *install_page(pgd_t *cr3, phys_addr_t phys, void *virt) 108 { 109 phys_addr_t flags = PT_PRESENT_MASK | PT_WRITABLE_MASK | pte_opt_mask; 110 #ifdef CONFIG_EFI 111 flags |= get_amd_sev_c_bit_mask(); 112 #endif /* CONFIG_EFI */ 113 return install_pte(cr3, 1, virt, phys | flags, 0); 114 } 115 116 void install_pages(pgd_t *cr3, phys_addr_t phys, size_t len, void *virt) 117 { 118 phys_addr_t max = (u64)len + (u64)phys; 119 assert(phys % PAGE_SIZE == 0); 120 assert((uintptr_t) virt % PAGE_SIZE == 0); 121 assert(len % PAGE_SIZE == 0); 122 123 while (phys + PAGE_SIZE <= max) { 124 install_page(cr3, phys, virt); 125 phys += PAGE_SIZE; 126 virt = (char *) virt + PAGE_SIZE; 127 } 128 } 129 130 bool any_present_pages(pgd_t *cr3, void *virt, size_t len) 131 { 132 uintptr_t max = (uintptr_t) virt + len; 133 uintptr_t curr; 134 135 for (curr = (uintptr_t) virt; curr < max; curr += PAGE_SIZE) { 136 pteval_t *ptep = get_pte(cr3, (void *) curr); 137 if (ptep && (*ptep & PT_PRESENT_MASK)) 138 return true; 139 } 140 return false; 141 } 142 143 static void setup_mmu_range(pgd_t *cr3, phys_addr_t start, size_t len) 144 { 145 u64 max = (u64)len + (u64)start; 146 u64 phys = start; 147 148 while (phys + LARGE_PAGE_SIZE <= max) { 149 install_large_page(cr3, phys, (void *)(ulong)phys); 150 phys += LARGE_PAGE_SIZE; 151 } 152 install_pages(cr3, phys, max - phys, (void *)(ulong)phys); 153 } 154 155 static void set_additional_vcpu_vmregs(struct vm_vcpu_info *info) 156 { 157 write_cr3(info->cr3); 158 write_cr4(info->cr4); 159 write_cr0(info->cr0); 160 } 161 162 void *setup_mmu(phys_addr_t end_of_memory, void *opt_mask) 163 { 164 pgd_t *cr3 = alloc_page(); 165 struct vm_vcpu_info info; 166 int i; 167 168 if (opt_mask) 169 pte_opt_mask = *(pteval_t *)opt_mask; 170 else 171 pte_opt_mask = PT_USER_MASK; 172 173 memset(cr3, 0, PAGE_SIZE); 174 175 #ifdef __x86_64__ 176 if (end_of_memory < (1ul << 32)) 177 end_of_memory = (1ul << 32); /* map mmio 1:1 */ 178 179 setup_mmu_range(cr3, 0, end_of_memory); 180 #else 181 setup_mmu_range(cr3, 0, (2ul << 30)); 182 setup_mmu_range(cr3, 3ul << 30, (1ul << 30)); 183 init_alloc_vpage((void*)(3ul << 30)); 184 #endif 185 186 write_cr3(virt_to_phys(cr3)); 187 #ifndef __x86_64__ 188 write_cr4(X86_CR4_PSE); 189 #endif 190 write_cr0(X86_CR0_PG |X86_CR0_PE | X86_CR0_WP); 191 192 printf("paging enabled\n"); 193 printf("cr0 = %lx\n", read_cr0()); 194 printf("cr3 = %lx\n", read_cr3()); 195 printf("cr4 = %lx\n", read_cr4()); 196 197 info.cr3 = read_cr3(); 198 info.cr4 = read_cr4(); 199 info.cr0 = read_cr0(); 200 201 for (i = 1; i < cpu_count(); i++) 202 on_cpu(i, (void *)set_additional_vcpu_vmregs, &info); 203 204 return cr3; 205 } 206 207 phys_addr_t virt_to_pte_phys(pgd_t *cr3, void *mem) 208 { 209 return (*get_pte(cr3, mem) & PT_ADDR_MASK) + ((ulong)mem & (PAGE_SIZE - 1)); 210 } 211 212 /* 213 * split_large_page: Split a 2M/1G large page into 512 smaller PTEs. 214 * @ptep : large page table entry to split 215 * @level : level of ptep (2 or 3) 216 */ 217 void split_large_page(unsigned long *ptep, int level) 218 { 219 unsigned long *new_pt; 220 unsigned long pa; 221 unsigned long pte; 222 unsigned long prototype; 223 int i; 224 225 pte = *ptep; 226 assert(pte & PT_PRESENT_MASK); 227 assert(pte & PT_PAGE_SIZE_MASK); 228 assert(level == 2 || level == 3); 229 230 new_pt = alloc_page(); 231 assert(new_pt); 232 233 prototype = pte & ~PT_ADDR_MASK; 234 if (level == 2) 235 prototype &= ~PT_PAGE_SIZE_MASK; 236 237 pa = pte & PT_ADDR_MASK; 238 for (i = 0; i < (1 << PGDIR_WIDTH); i++) { 239 new_pt[i] = prototype | pa; 240 pa += 1ul << PGDIR_BITS(level - 1); 241 } 242 243 pte &= ~PT_PAGE_SIZE_MASK; 244 pte &= ~PT_ADDR_MASK; 245 pte |= virt_to_phys(new_pt); 246 247 /* Modify the relevant paging-structure entry */ 248 *ptep = pte; 249 250 /* 251 * Flush the TLB to eradicate stale mappings. 252 * 253 * Note: Removing specific TLB mappings is tricky because 254 * split_large_page() can be called to split the active code page 255 * backing the next set of instructions to be fetched and executed. 256 * Furthermore, Intel SDM volume 3 recommends to clear the present bit 257 * for the page being split, before invalidating any mappings. 258 * 259 * But clearing the mapping from the page table and removing it from the 260 * TLB (where it's not actually guaranteed to reside anyway) makes it 261 * impossible to continue fetching instructions! 262 */ 263 flush_tlb(); 264 } 265 266 /* 267 * force_4k_page: Ensures that addr translate to a 4k page. 268 * 269 * This function uses split_large_page(), as needed, to ensure that target 270 * address, addr, translates to a 4k page. 271 * 272 * @addr: target address that should be mapped to a 4k page 273 */ 274 void force_4k_page(void *addr) 275 { 276 unsigned long *ptep; 277 unsigned long pte; 278 unsigned long *cr3 = current_page_table(); 279 280 ptep = get_pte_level(cr3, addr, 3); 281 assert(ptep); 282 pte = *ptep; 283 assert(pte & PT_PRESENT_MASK); 284 if (pte & PT_PAGE_SIZE_MASK) 285 split_large_page(ptep, 3); 286 287 ptep = get_pte_level(cr3, addr, 2); 288 assert(ptep); 289 pte = *ptep; 290 assert(pte & PT_PRESENT_MASK); 291 if (pte & PT_PAGE_SIZE_MASK) 292 split_large_page(ptep, 2); 293 } 294 295 /* 296 * Call the callback on each page from virt to virt + len. 297 */ 298 void walk_pte(void *virt, size_t len, pte_callback_t callback) 299 { 300 pgd_t *cr3 = current_page_table(); 301 uintptr_t start = (uintptr_t)virt; 302 uintptr_t end = (uintptr_t)virt + len; 303 struct pte_search search; 304 size_t page_size; 305 uintptr_t curr; 306 307 for (curr = start; curr < end; curr = ALIGN_DOWN(curr + page_size, page_size)) { 308 search = find_pte_level(cr3, (void *)curr, 1); 309 assert(found_leaf_pte(search)); 310 page_size = 1ul << PGDIR_BITS(search.level); 311 312 callback(search, (void *)curr); 313 } 314 } 315