1002d1830SGleb Natapov #include "fwcfg.h" 27d36db35SAvi Kivity #include "vm.h" 37d36db35SAvi Kivity #include "libcflat.h" 4efd8e5aaSPaolo Bonzini #include "vmalloc.h" 5*5aca024eSPaolo Bonzini #include "alloc_page.h" 67d36db35SAvi Kivity 77d36db35SAvi Kivity extern char edata; 87d36db35SAvi Kivity static unsigned long end_of_memory; 97d36db35SAvi Kivity 1004262816SPaolo Bonzini unsigned long *install_pte(unsigned long *cr3, 117d36db35SAvi Kivity int pte_level, 127d36db35SAvi Kivity void *virt, 137d36db35SAvi Kivity unsigned long pte, 147d36db35SAvi Kivity unsigned long *pt_page) 157d36db35SAvi Kivity { 167d36db35SAvi Kivity int level; 177d36db35SAvi Kivity unsigned long *pt = cr3; 187d36db35SAvi Kivity unsigned offset; 197d36db35SAvi Kivity 207d36db35SAvi Kivity for (level = PAGE_LEVEL; level > pte_level; --level) { 219d7e08c0SPeter Xu offset = PGDIR_OFFSET((unsigned long)virt, level); 22d10d16e1SAlexander Gordeev if (!(pt[offset] & PT_PRESENT_MASK)) { 237d36db35SAvi Kivity unsigned long *new_pt = pt_page; 247d36db35SAvi Kivity if (!new_pt) 257d36db35SAvi Kivity new_pt = alloc_page(); 267d36db35SAvi Kivity else 277d36db35SAvi Kivity pt_page = 0; 287d36db35SAvi Kivity memset(new_pt, 0, PAGE_SIZE); 29d10d16e1SAlexander Gordeev pt[offset] = virt_to_phys(new_pt) | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK; 307d36db35SAvi Kivity } 31d10d16e1SAlexander Gordeev pt = phys_to_virt(pt[offset] & PT_ADDR_MASK); 327d36db35SAvi Kivity } 339d7e08c0SPeter Xu offset = PGDIR_OFFSET((unsigned long)virt, level); 347d36db35SAvi Kivity pt[offset] = pte; 3504262816SPaolo Bonzini return &pt[offset]; 367d36db35SAvi Kivity } 377d36db35SAvi Kivity 381df80b57SPeter Feiner /* 391df80b57SPeter Feiner * Finds last PTE in the mapping of @virt that's at or above @lowest_level. The 401df80b57SPeter Feiner * returned PTE isn't necessarily present, but its parent is. 411df80b57SPeter Feiner */ 421df80b57SPeter Feiner struct pte_search find_pte_level(unsigned long *cr3, void *virt, 431df80b57SPeter Feiner int lowest_level) 447d36db35SAvi Kivity { 457d36db35SAvi Kivity unsigned long *pt = cr3, pte; 467d36db35SAvi Kivity unsigned offset; 471df80b57SPeter Feiner unsigned long shift; 481df80b57SPeter Feiner struct pte_search r; 497d36db35SAvi Kivity 501df80b57SPeter Feiner assert(lowest_level >= 1 && lowest_level <= PAGE_LEVEL); 511df80b57SPeter Feiner 521df80b57SPeter Feiner for (r.level = PAGE_LEVEL;; --r.level) { 531df80b57SPeter Feiner shift = (r.level - 1) * PGDIR_WIDTH + 12; 541df80b57SPeter Feiner offset = ((unsigned long)virt >> shift) & PGDIR_MASK; 551df80b57SPeter Feiner r.pte = &pt[offset]; 561df80b57SPeter Feiner pte = *r.pte; 571df80b57SPeter Feiner 58d10d16e1SAlexander Gordeev if (!(pte & PT_PRESENT_MASK)) 591df80b57SPeter Feiner return r; 601df80b57SPeter Feiner 611df80b57SPeter Feiner if ((r.level == 2 || r.level == 3) && (pte & PT_PAGE_SIZE_MASK)) 621df80b57SPeter Feiner return r; 631df80b57SPeter Feiner 641df80b57SPeter Feiner if (r.level == lowest_level) 651df80b57SPeter Feiner return r; 661df80b57SPeter Feiner 671df80b57SPeter Feiner pt = phys_to_virt(pte & 0xffffffffff000ull); 687d36db35SAvi Kivity } 691df80b57SPeter Feiner } 701df80b57SPeter Feiner 711df80b57SPeter Feiner /* 721df80b57SPeter Feiner * Returns the leaf PTE in the mapping of @virt (i.e., 4K PTE or a present huge 731df80b57SPeter Feiner * PTE). Returns NULL if no leaf PTE exists. 741df80b57SPeter Feiner */ 751df80b57SPeter Feiner unsigned long *get_pte(unsigned long *cr3, void *virt) 761df80b57SPeter Feiner { 771df80b57SPeter Feiner struct pte_search search; 781df80b57SPeter Feiner 791df80b57SPeter Feiner search = find_pte_level(cr3, virt, 1); 801df80b57SPeter Feiner return found_leaf_pte(search) ? search.pte : NULL; 811df80b57SPeter Feiner } 821df80b57SPeter Feiner 831df80b57SPeter Feiner /* 841df80b57SPeter Feiner * Returns the PTE in the mapping of @virt at the given level @pte_level. 851df80b57SPeter Feiner * Returns NULL if the PT at @pte_level isn't present (i.e., the mapping at 861df80b57SPeter Feiner * @pte_level - 1 isn't present). 871df80b57SPeter Feiner */ 881df80b57SPeter Feiner unsigned long *get_pte_level(unsigned long *cr3, void *virt, int pte_level) 891df80b57SPeter Feiner { 901df80b57SPeter Feiner struct pte_search search; 911df80b57SPeter Feiner 921df80b57SPeter Feiner search = find_pte_level(cr3, virt, pte_level); 931df80b57SPeter Feiner return search.level == pte_level ? search.pte : NULL; 947d36db35SAvi Kivity } 957d36db35SAvi Kivity 9604262816SPaolo Bonzini unsigned long *install_large_page(unsigned long *cr3, 977d36db35SAvi Kivity unsigned long phys, 987d36db35SAvi Kivity void *virt) 997d36db35SAvi Kivity { 10004262816SPaolo Bonzini return install_pte(cr3, 2, virt, 101d10d16e1SAlexander Gordeev phys | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK | PT_PAGE_SIZE_MASK, 0); 1027d36db35SAvi Kivity } 1037d36db35SAvi Kivity 10404262816SPaolo Bonzini unsigned long *install_page(unsigned long *cr3, 1057d36db35SAvi Kivity unsigned long phys, 1067d36db35SAvi Kivity void *virt) 1077d36db35SAvi Kivity { 108d10d16e1SAlexander Gordeev return install_pte(cr3, 1, virt, phys | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK, 0); 1097d36db35SAvi Kivity } 1107d36db35SAvi Kivity 1111df80b57SPeter Feiner void install_pages(unsigned long *cr3, unsigned long phys, unsigned long len, 1121df80b57SPeter Feiner void *virt) 1131df80b57SPeter Feiner { 1141df80b57SPeter Feiner unsigned long max = (u64)len + (u64)phys; 1151df80b57SPeter Feiner assert(phys % PAGE_SIZE == 0); 1161df80b57SPeter Feiner assert((unsigned long) virt % PAGE_SIZE == 0); 1171df80b57SPeter Feiner assert(len % PAGE_SIZE == 0); 1181df80b57SPeter Feiner 1191df80b57SPeter Feiner while (phys + PAGE_SIZE <= max) { 1201df80b57SPeter Feiner install_page(cr3, phys, virt); 1211df80b57SPeter Feiner phys += PAGE_SIZE; 1221df80b57SPeter Feiner virt = (char *) virt + PAGE_SIZE; 1231df80b57SPeter Feiner } 1241df80b57SPeter Feiner } 1251df80b57SPeter Feiner 1261df80b57SPeter Feiner bool any_present_pages(unsigned long *cr3, void *virt, unsigned long len) 1271df80b57SPeter Feiner { 1281df80b57SPeter Feiner unsigned long max = (unsigned long) virt + len; 1291df80b57SPeter Feiner unsigned long curr; 1301df80b57SPeter Feiner 1311df80b57SPeter Feiner for (curr = (unsigned long) virt; curr < max; curr += PAGE_SIZE) { 1321df80b57SPeter Feiner unsigned long *ptep = get_pte(cr3, (void *) curr); 1331df80b57SPeter Feiner if (ptep && (*ptep & PT_PRESENT_MASK)) 1341df80b57SPeter Feiner return true; 1351df80b57SPeter Feiner } 1361df80b57SPeter Feiner return false; 1371df80b57SPeter Feiner } 1387d36db35SAvi Kivity 13963254428SGleb Natapov static void setup_mmu_range(unsigned long *cr3, unsigned long start, 14063254428SGleb Natapov unsigned long len) 14163254428SGleb Natapov { 14263254428SGleb Natapov u64 max = (u64)len + (u64)start; 14363254428SGleb Natapov u64 phys = start; 14463254428SGleb Natapov 14563254428SGleb Natapov while (phys + LARGE_PAGE_SIZE <= max) { 14663254428SGleb Natapov install_large_page(cr3, phys, (void *)(ulong)phys); 14763254428SGleb Natapov phys += LARGE_PAGE_SIZE; 14863254428SGleb Natapov } 1491df80b57SPeter Feiner install_pages(cr3, phys, max - phys, (void *)(ulong)phys); 15063254428SGleb Natapov } 15163254428SGleb Natapov 1527d36db35SAvi Kivity static void setup_mmu(unsigned long len) 1537d36db35SAvi Kivity { 1547d36db35SAvi Kivity unsigned long *cr3 = alloc_page(); 1557d36db35SAvi Kivity 1567d36db35SAvi Kivity memset(cr3, 0, PAGE_SIZE); 15763254428SGleb Natapov 15863254428SGleb Natapov #ifdef __x86_64__ 15963254428SGleb Natapov if (len < (1ul << 32)) 16063254428SGleb Natapov len = (1ul << 32); /* map mmio 1:1 */ 16163254428SGleb Natapov 16263254428SGleb Natapov setup_mmu_range(cr3, 0, len); 16363254428SGleb Natapov #else 16463254428SGleb Natapov if (len > (1ul << 31)) 16563254428SGleb Natapov len = (1ul << 31); 16663254428SGleb Natapov 16763254428SGleb Natapov /* 0 - 2G memory, 2G-3G valloc area, 3G-4G mmio */ 16863254428SGleb Natapov setup_mmu_range(cr3, 0, len); 16963254428SGleb Natapov setup_mmu_range(cr3, 3ul << 30, (1ul << 30)); 170efd8e5aaSPaolo Bonzini init_alloc_vpage((void*)(3ul << 30)); 17163254428SGleb Natapov #endif 17263254428SGleb Natapov 1737d36db35SAvi Kivity write_cr3(virt_to_phys(cr3)); 1747d36db35SAvi Kivity #ifndef __x86_64__ 1757d36db35SAvi Kivity write_cr4(X86_CR4_PSE); 1767d36db35SAvi Kivity #endif 17797011120SGleb Natapov write_cr0(X86_CR0_PG |X86_CR0_PE | X86_CR0_WP); 1787d36db35SAvi Kivity 1797d36db35SAvi Kivity printf("paging enabled\n"); 180b006d7ebSAndrew Jones printf("cr0 = %lx\n", read_cr0()); 181b006d7ebSAndrew Jones printf("cr3 = %lx\n", read_cr3()); 182b006d7ebSAndrew Jones printf("cr4 = %lx\n", read_cr4()); 1837d36db35SAvi Kivity } 1847d36db35SAvi Kivity 1857d36db35SAvi Kivity void setup_vm() 1867d36db35SAvi Kivity { 187f7b87da6SPeter Xu assert(!end_of_memory); 188002d1830SGleb Natapov end_of_memory = fwcfg_get_u64(FW_CFG_RAM_SIZE); 189*5aca024eSPaolo Bonzini free_pages(&edata, end_of_memory - (unsigned long)&edata); 1907d36db35SAvi Kivity setup_mmu(end_of_memory); 1917d36db35SAvi Kivity } 1927d36db35SAvi Kivity 1937d36db35SAvi Kivity void *vmalloc(unsigned long size) 1947d36db35SAvi Kivity { 1957d36db35SAvi Kivity void *mem, *p; 1967d36db35SAvi Kivity unsigned pages; 1977d36db35SAvi Kivity 1987d36db35SAvi Kivity size += sizeof(unsigned long); 1997d36db35SAvi Kivity 2007d36db35SAvi Kivity size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); 2017d36db35SAvi Kivity pages = size / PAGE_SIZE; 202efd8e5aaSPaolo Bonzini mem = p = alloc_vpages(pages); 2037d36db35SAvi Kivity while (pages--) { 2047d36db35SAvi Kivity install_page(phys_to_virt(read_cr3()), virt_to_phys(alloc_page()), p); 2057d36db35SAvi Kivity p += PAGE_SIZE; 2067d36db35SAvi Kivity } 2077d36db35SAvi Kivity *(unsigned long *)mem = size; 2087d36db35SAvi Kivity mem += sizeof(unsigned long); 2097d36db35SAvi Kivity return mem; 2107d36db35SAvi Kivity } 2117d36db35SAvi Kivity 212334cd2bfSGleb Natapov uint64_t virt_to_phys_cr3(void *mem) 213334cd2bfSGleb Natapov { 214d10d16e1SAlexander Gordeev return (*get_pte(phys_to_virt(read_cr3()), mem) & PT_ADDR_MASK) + ((ulong)mem & (PAGE_SIZE - 1)); 215334cd2bfSGleb Natapov } 216334cd2bfSGleb Natapov 2177d36db35SAvi Kivity void vfree(void *mem) 2187d36db35SAvi Kivity { 2197d36db35SAvi Kivity unsigned long size = ((unsigned long *)mem)[-1]; 2207d36db35SAvi Kivity 2217d36db35SAvi Kivity while (size) { 222d10d16e1SAlexander Gordeev free_page(phys_to_virt(*get_pte(phys_to_virt(read_cr3()), mem) & PT_ADDR_MASK)); 2237d36db35SAvi Kivity mem += PAGE_SIZE; 2247d36db35SAvi Kivity size -= PAGE_SIZE; 2257d36db35SAvi Kivity } 2267d36db35SAvi Kivity } 2277d36db35SAvi Kivity 2287d36db35SAvi Kivity void *vmap(unsigned long long phys, unsigned long size) 2297d36db35SAvi Kivity { 2307d36db35SAvi Kivity void *mem, *p; 2317d36db35SAvi Kivity unsigned pages; 2327d36db35SAvi Kivity 2337d36db35SAvi Kivity size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); 2347d36db35SAvi Kivity pages = size / PAGE_SIZE; 235efd8e5aaSPaolo Bonzini mem = p = alloc_vpages(pages); 236efd8e5aaSPaolo Bonzini 237efd8e5aaSPaolo Bonzini phys &= ~(unsigned long long)(PAGE_SIZE - 1); 2387d36db35SAvi Kivity while (pages--) { 2397d36db35SAvi Kivity install_page(phys_to_virt(read_cr3()), phys, p); 2407d36db35SAvi Kivity phys += PAGE_SIZE; 2417d36db35SAvi Kivity p += PAGE_SIZE; 2427d36db35SAvi Kivity } 2437d36db35SAvi Kivity return mem; 2447d36db35SAvi Kivity } 245