1 #include "vm.h" 2 #include "libcflat.h" 3 #include "vmalloc.h" 4 #include "alloc_page.h" 5 6 pteval_t *install_pte(pgd_t *cr3, 7 int pte_level, 8 void *virt, 9 pteval_t pte, 10 pteval_t *pt_page) 11 { 12 int level; 13 pteval_t *pt = cr3; 14 unsigned offset; 15 16 for (level = PAGE_LEVEL; level > pte_level; --level) { 17 offset = PGDIR_OFFSET((uintptr_t)virt, level); 18 if (!(pt[offset] & PT_PRESENT_MASK)) { 19 pteval_t *new_pt = pt_page; 20 if (!new_pt) 21 new_pt = alloc_page(); 22 else 23 pt_page = 0; 24 memset(new_pt, 0, PAGE_SIZE); 25 pt[offset] = virt_to_phys(new_pt) | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK; 26 } 27 pt = phys_to_virt(pt[offset] & PT_ADDR_MASK); 28 } 29 offset = PGDIR_OFFSET((uintptr_t)virt, level); 30 pt[offset] = pte; 31 return &pt[offset]; 32 } 33 34 /* 35 * Finds last PTE in the mapping of @virt that's at or above @lowest_level. The 36 * returned PTE isn't necessarily present, but its parent is. 37 */ 38 struct pte_search find_pte_level(pgd_t *cr3, void *virt, 39 int lowest_level) 40 { 41 pteval_t *pt = cr3, pte; 42 unsigned offset; 43 unsigned shift; 44 struct pte_search r; 45 46 assert(lowest_level >= 1 && lowest_level <= PAGE_LEVEL); 47 48 for (r.level = PAGE_LEVEL;; --r.level) { 49 shift = (r.level - 1) * PGDIR_WIDTH + 12; 50 offset = ((uintptr_t)virt >> shift) & PGDIR_MASK; 51 r.pte = &pt[offset]; 52 pte = *r.pte; 53 54 if (!(pte & PT_PRESENT_MASK)) 55 return r; 56 57 if ((r.level == 2 || r.level == 3) && (pte & PT_PAGE_SIZE_MASK)) 58 return r; 59 60 if (r.level == lowest_level) 61 return r; 62 63 pt = phys_to_virt(pte & 0xffffffffff000ull); 64 } 65 } 66 67 /* 68 * Returns the leaf PTE in the mapping of @virt (i.e., 4K PTE or a present huge 69 * PTE). Returns NULL if no leaf PTE exists. 70 */ 71 pteval_t *get_pte(pgd_t *cr3, void *virt) 72 { 73 struct pte_search search; 74 75 search = find_pte_level(cr3, virt, 1); 76 return found_leaf_pte(search) ? search.pte : NULL; 77 } 78 79 /* 80 * Returns the PTE in the mapping of @virt at the given level @pte_level. 81 * Returns NULL if the PT at @pte_level isn't present (i.e., the mapping at 82 * @pte_level - 1 isn't present). 83 */ 84 pteval_t *get_pte_level(pgd_t *cr3, void *virt, int pte_level) 85 { 86 struct pte_search search; 87 88 search = find_pte_level(cr3, virt, pte_level); 89 return search.level == pte_level ? search.pte : NULL; 90 } 91 92 pteval_t *install_large_page(pgd_t *cr3, phys_addr_t phys, void *virt) 93 { 94 return install_pte(cr3, 2, virt, 95 phys | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK | PT_PAGE_SIZE_MASK, 0); 96 } 97 98 pteval_t *install_page(pgd_t *cr3, phys_addr_t phys, void *virt) 99 { 100 return install_pte(cr3, 1, virt, phys | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK, 0); 101 } 102 103 void install_pages(pgd_t *cr3, phys_addr_t phys, size_t len, void *virt) 104 { 105 phys_addr_t max = (u64)len + (u64)phys; 106 assert(phys % PAGE_SIZE == 0); 107 assert((uintptr_t) virt % PAGE_SIZE == 0); 108 assert(len % PAGE_SIZE == 0); 109 110 while (phys + PAGE_SIZE <= max) { 111 install_page(cr3, phys, virt); 112 phys += PAGE_SIZE; 113 virt = (char *) virt + PAGE_SIZE; 114 } 115 } 116 117 bool any_present_pages(pgd_t *cr3, void *virt, size_t len) 118 { 119 uintptr_t max = (uintptr_t) virt + len; 120 uintptr_t curr; 121 122 for (curr = (uintptr_t) virt; curr < max; curr += PAGE_SIZE) { 123 pteval_t *ptep = get_pte(cr3, (void *) curr); 124 if (ptep && (*ptep & PT_PRESENT_MASK)) 125 return true; 126 } 127 return false; 128 } 129 130 static void setup_mmu_range(pgd_t *cr3, phys_addr_t start, size_t len) 131 { 132 u64 max = (u64)len + (u64)start; 133 u64 phys = start; 134 135 while (phys + LARGE_PAGE_SIZE <= max) { 136 install_large_page(cr3, phys, (void *)(ulong)phys); 137 phys += LARGE_PAGE_SIZE; 138 } 139 install_pages(cr3, phys, max - phys, (void *)(ulong)phys); 140 } 141 142 void *setup_mmu(phys_addr_t end_of_memory) 143 { 144 pgd_t *cr3 = alloc_page(); 145 146 memset(cr3, 0, PAGE_SIZE); 147 148 #ifdef __x86_64__ 149 if (end_of_memory < (1ul << 32)) 150 end_of_memory = (1ul << 32); /* map mmio 1:1 */ 151 152 setup_mmu_range(cr3, 0, end_of_memory); 153 #else 154 setup_mmu_range(cr3, 0, (2ul << 30)); 155 setup_mmu_range(cr3, 3ul << 30, (1ul << 30)); 156 init_alloc_vpage((void*)(3ul << 30)); 157 #endif 158 159 write_cr3(virt_to_phys(cr3)); 160 #ifndef __x86_64__ 161 write_cr4(X86_CR4_PSE); 162 #endif 163 write_cr0(X86_CR0_PG |X86_CR0_PE | X86_CR0_WP); 164 165 printf("paging enabled\n"); 166 printf("cr0 = %lx\n", read_cr0()); 167 printf("cr3 = %lx\n", read_cr3()); 168 printf("cr4 = %lx\n", read_cr4()); 169 return cr3; 170 } 171 172 phys_addr_t virt_to_pte_phys(pgd_t *cr3, void *mem) 173 { 174 return (*get_pte(cr3, mem) & PT_ADDR_MASK) + ((ulong)mem & (PAGE_SIZE - 1)); 175 } 176 177 /* 178 * split_large_page: Split a 2M/1G large page into 512 smaller PTEs. 179 * @ptep : large page table entry to split 180 * @level : level of ptep (2 or 3) 181 */ 182 void split_large_page(unsigned long *ptep, int level) 183 { 184 unsigned long *new_pt; 185 unsigned long pa; 186 unsigned long pte; 187 unsigned long prototype; 188 int i; 189 190 pte = *ptep; 191 assert(pte & PT_PRESENT_MASK); 192 assert(pte & PT_PAGE_SIZE_MASK); 193 assert(level == 2 || level == 3); 194 195 new_pt = alloc_page(); 196 assert(new_pt); 197 198 prototype = pte & ~PT_ADDR_MASK; 199 if (level == 2) 200 prototype &= ~PT_PAGE_SIZE_MASK; 201 202 pa = pte & PT_ADDR_MASK; 203 for (i = 0; i < (1 << PGDIR_WIDTH); i++) { 204 new_pt[i] = prototype | pa; 205 pa += 1ul << PGDIR_BITS(level - 1); 206 } 207 208 pte &= ~PT_PAGE_SIZE_MASK; 209 pte &= ~PT_ADDR_MASK; 210 pte |= virt_to_phys(new_pt); 211 212 /* Modify the relevant paging-structure entry */ 213 *ptep = pte; 214 215 /* 216 * Flush the TLB to eradicate stale mappings. 217 * 218 * Note: Removing specific TLB mappings is tricky because 219 * split_large_page() can be called to split the active code page 220 * backing the next set of instructions to be fetched and executed. 221 * Furthermore, Intel SDM volume 3 recommends to clear the present bit 222 * for the page being split, before invalidating any mappings. 223 * 224 * But clearing the mapping from the page table and removing it from the 225 * TLB (where it's not actually guaranteed to reside anyway) makes it 226 * impossible to continue fetching instructions! 227 */ 228 flush_tlb(); 229 } 230 231 /* 232 * force_4k_page: Ensures that addr translate to a 4k page. 233 * 234 * This function uses split_large_page(), as needed, to ensure that target 235 * address, addr, translates to a 4k page. 236 * 237 * @addr: target address that should be mapped to a 4k page 238 */ 239 void force_4k_page(void *addr) 240 { 241 unsigned long *ptep; 242 unsigned long pte; 243 unsigned long *cr3 = current_page_table(); 244 245 ptep = get_pte_level(cr3, addr, 3); 246 assert(ptep); 247 pte = *ptep; 248 assert(pte & PT_PRESENT_MASK); 249 if (pte & PT_PAGE_SIZE_MASK) 250 split_large_page(ptep, 3); 251 252 ptep = get_pte_level(cr3, addr, 2); 253 assert(ptep); 254 pte = *ptep; 255 assert(pte & PT_PRESENT_MASK); 256 if (pte & PT_PAGE_SIZE_MASK) 257 split_large_page(ptep, 2); 258 } 259