18f6aac41SChristoph Lameter /* 28f6aac41SChristoph Lameter * Virtual Memory Map support 38f6aac41SChristoph Lameter * 4cde53535SChristoph Lameter * (C) 2007 sgi. Christoph Lameter. 58f6aac41SChristoph Lameter * 68f6aac41SChristoph Lameter * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn, 78f6aac41SChristoph Lameter * virt_to_page, page_address() to be implemented as a base offset 88f6aac41SChristoph Lameter * calculation without memory access. 98f6aac41SChristoph Lameter * 108f6aac41SChristoph Lameter * However, virtual mappings need a page table and TLBs. Many Linux 118f6aac41SChristoph Lameter * architectures already map their physical space using 1-1 mappings 12b595076aSUwe Kleine-König * via TLBs. For those arches the virtual memory map is essentially 138f6aac41SChristoph Lameter * for free if we use the same page size as the 1-1 mappings. In that 148f6aac41SChristoph Lameter * case the overhead consists of a few additional pages that are 158f6aac41SChristoph Lameter * allocated to create a view of memory for vmemmap. 168f6aac41SChristoph Lameter * 1729c71111SAndy Whitcroft * The architecture is expected to provide a vmemmap_populate() function 1829c71111SAndy Whitcroft * to instantiate the mapping. 198f6aac41SChristoph Lameter */ 208f6aac41SChristoph Lameter #include <linux/mm.h> 218f6aac41SChristoph Lameter #include <linux/mmzone.h> 228f6aac41SChristoph Lameter #include <linux/bootmem.h> 238f6aac41SChristoph Lameter #include <linux/highmem.h> 245a0e3ad6STejun Heo #include <linux/slab.h> 258f6aac41SChristoph Lameter #include <linux/spinlock.h> 268f6aac41SChristoph Lameter #include <linux/vmalloc.h> 278bca44bbSGlauber de Oliveira Costa #include <linux/sched.h> 288f6aac41SChristoph Lameter #include <asm/dma.h> 298f6aac41SChristoph Lameter #include <asm/pgalloc.h> 308f6aac41SChristoph Lameter #include <asm/pgtable.h> 318f6aac41SChristoph Lameter 328f6aac41SChristoph Lameter /* 338f6aac41SChristoph Lameter * Allocate a block of memory to be used to back the virtual memory map 348f6aac41SChristoph Lameter * or to back the page tables that are used to create the mapping. 358f6aac41SChristoph Lameter * Uses the main allocators if they are available, else bootmem. 368f6aac41SChristoph Lameter */ 37e0dc3a53SKAMEZAWA Hiroyuki 38e0dc3a53SKAMEZAWA Hiroyuki static void * __init_refok __earlyonly_bootmem_alloc(int node, 39e0dc3a53SKAMEZAWA Hiroyuki unsigned long size, 40e0dc3a53SKAMEZAWA Hiroyuki unsigned long align, 41e0dc3a53SKAMEZAWA Hiroyuki unsigned long goal) 42e0dc3a53SKAMEZAWA Hiroyuki { 43*bb016b84SSantosh Shilimkar return memblock_virt_alloc_try_nid(size, align, goal, 44*bb016b84SSantosh Shilimkar BOOTMEM_ALLOC_ACCESSIBLE, node); 45e0dc3a53SKAMEZAWA Hiroyuki } 46e0dc3a53SKAMEZAWA Hiroyuki 479bdac914SYinghai Lu static void *vmemmap_buf; 489bdac914SYinghai Lu static void *vmemmap_buf_end; 49e0dc3a53SKAMEZAWA Hiroyuki 508f6aac41SChristoph Lameter void * __meminit vmemmap_alloc_block(unsigned long size, int node) 518f6aac41SChristoph Lameter { 528f6aac41SChristoph Lameter /* If the main allocator is up use that, fallback to bootmem. */ 538f6aac41SChristoph Lameter if (slab_is_available()) { 54f52407ceSShaohua Li struct page *page; 55f52407ceSShaohua Li 56f52407ceSShaohua Li if (node_state(node, N_HIGH_MEMORY)) 57055e4fd9SBen Hutchings page = alloc_pages_node( 58055e4fd9SBen Hutchings node, GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT, 59055e4fd9SBen Hutchings get_order(size)); 60f52407ceSShaohua Li else 61055e4fd9SBen Hutchings page = alloc_pages( 62055e4fd9SBen Hutchings GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT, 63f52407ceSShaohua Li get_order(size)); 648f6aac41SChristoph Lameter if (page) 658f6aac41SChristoph Lameter return page_address(page); 668f6aac41SChristoph Lameter return NULL; 678f6aac41SChristoph Lameter } else 68e0dc3a53SKAMEZAWA Hiroyuki return __earlyonly_bootmem_alloc(node, size, size, 698f6aac41SChristoph Lameter __pa(MAX_DMA_ADDRESS)); 708f6aac41SChristoph Lameter } 718f6aac41SChristoph Lameter 729bdac914SYinghai Lu /* need to make sure size is all the same during early stage */ 739bdac914SYinghai Lu void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node) 749bdac914SYinghai Lu { 759bdac914SYinghai Lu void *ptr; 769bdac914SYinghai Lu 779bdac914SYinghai Lu if (!vmemmap_buf) 789bdac914SYinghai Lu return vmemmap_alloc_block(size, node); 799bdac914SYinghai Lu 809bdac914SYinghai Lu /* take the from buf */ 819bdac914SYinghai Lu ptr = (void *)ALIGN((unsigned long)vmemmap_buf, size); 829bdac914SYinghai Lu if (ptr + size > vmemmap_buf_end) 839bdac914SYinghai Lu return vmemmap_alloc_block(size, node); 849bdac914SYinghai Lu 859bdac914SYinghai Lu vmemmap_buf = ptr + size; 869bdac914SYinghai Lu 879bdac914SYinghai Lu return ptr; 889bdac914SYinghai Lu } 899bdac914SYinghai Lu 908f6aac41SChristoph Lameter void __meminit vmemmap_verify(pte_t *pte, int node, 918f6aac41SChristoph Lameter unsigned long start, unsigned long end) 928f6aac41SChristoph Lameter { 938f6aac41SChristoph Lameter unsigned long pfn = pte_pfn(*pte); 948f6aac41SChristoph Lameter int actual_node = early_pfn_to_nid(pfn); 958f6aac41SChristoph Lameter 96b41ad14cSDavid Rientjes if (node_distance(actual_node, node) > LOCAL_DISTANCE) 978f6aac41SChristoph Lameter printk(KERN_WARNING "[%lx-%lx] potential offnode " 988f6aac41SChristoph Lameter "page_structs\n", start, end - 1); 998f6aac41SChristoph Lameter } 1008f6aac41SChristoph Lameter 10129c71111SAndy Whitcroft pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node) 1028f6aac41SChristoph Lameter { 10329c71111SAndy Whitcroft pte_t *pte = pte_offset_kernel(pmd, addr); 1048f6aac41SChristoph Lameter if (pte_none(*pte)) { 1058f6aac41SChristoph Lameter pte_t entry; 1069bdac914SYinghai Lu void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node); 1078f6aac41SChristoph Lameter if (!p) 1089dce07f1SAl Viro return NULL; 10929c71111SAndy Whitcroft entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); 11029c71111SAndy Whitcroft set_pte_at(&init_mm, addr, pte, entry); 11129c71111SAndy Whitcroft } 11229c71111SAndy Whitcroft return pte; 1138f6aac41SChristoph Lameter } 1148f6aac41SChristoph Lameter 11529c71111SAndy Whitcroft pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node) 1168f6aac41SChristoph Lameter { 11729c71111SAndy Whitcroft pmd_t *pmd = pmd_offset(pud, addr); 1188f6aac41SChristoph Lameter if (pmd_none(*pmd)) { 1198f6aac41SChristoph Lameter void *p = vmemmap_alloc_block(PAGE_SIZE, node); 1208f6aac41SChristoph Lameter if (!p) 1219dce07f1SAl Viro return NULL; 1228f6aac41SChristoph Lameter pmd_populate_kernel(&init_mm, pmd, p); 1238f6aac41SChristoph Lameter } 12429c71111SAndy Whitcroft return pmd; 1258f6aac41SChristoph Lameter } 1268f6aac41SChristoph Lameter 12729c71111SAndy Whitcroft pud_t * __meminit vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node) 1288f6aac41SChristoph Lameter { 12929c71111SAndy Whitcroft pud_t *pud = pud_offset(pgd, addr); 1308f6aac41SChristoph Lameter if (pud_none(*pud)) { 1318f6aac41SChristoph Lameter void *p = vmemmap_alloc_block(PAGE_SIZE, node); 1328f6aac41SChristoph Lameter if (!p) 1339dce07f1SAl Viro return NULL; 1348f6aac41SChristoph Lameter pud_populate(&init_mm, pud, p); 1358f6aac41SChristoph Lameter } 13629c71111SAndy Whitcroft return pud; 1378f6aac41SChristoph Lameter } 1388f6aac41SChristoph Lameter 13929c71111SAndy Whitcroft pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node) 1408f6aac41SChristoph Lameter { 14129c71111SAndy Whitcroft pgd_t *pgd = pgd_offset_k(addr); 1428f6aac41SChristoph Lameter if (pgd_none(*pgd)) { 1438f6aac41SChristoph Lameter void *p = vmemmap_alloc_block(PAGE_SIZE, node); 1448f6aac41SChristoph Lameter if (!p) 1459dce07f1SAl Viro return NULL; 1468f6aac41SChristoph Lameter pgd_populate(&init_mm, pgd, p); 1478f6aac41SChristoph Lameter } 14829c71111SAndy Whitcroft return pgd; 1498f6aac41SChristoph Lameter } 15029c71111SAndy Whitcroft 1510aad818bSJohannes Weiner int __meminit vmemmap_populate_basepages(unsigned long start, 1520aad818bSJohannes Weiner unsigned long end, int node) 15329c71111SAndy Whitcroft { 1540aad818bSJohannes Weiner unsigned long addr = start; 15529c71111SAndy Whitcroft pgd_t *pgd; 15629c71111SAndy Whitcroft pud_t *pud; 15729c71111SAndy Whitcroft pmd_t *pmd; 15829c71111SAndy Whitcroft pte_t *pte; 15929c71111SAndy Whitcroft 16029c71111SAndy Whitcroft for (; addr < end; addr += PAGE_SIZE) { 16129c71111SAndy Whitcroft pgd = vmemmap_pgd_populate(addr, node); 16229c71111SAndy Whitcroft if (!pgd) 16329c71111SAndy Whitcroft return -ENOMEM; 16429c71111SAndy Whitcroft pud = vmemmap_pud_populate(pgd, addr, node); 16529c71111SAndy Whitcroft if (!pud) 16629c71111SAndy Whitcroft return -ENOMEM; 16729c71111SAndy Whitcroft pmd = vmemmap_pmd_populate(pud, addr, node); 16829c71111SAndy Whitcroft if (!pmd) 16929c71111SAndy Whitcroft return -ENOMEM; 17029c71111SAndy Whitcroft pte = vmemmap_pte_populate(pmd, addr, node); 17129c71111SAndy Whitcroft if (!pte) 17229c71111SAndy Whitcroft return -ENOMEM; 17329c71111SAndy Whitcroft vmemmap_verify(pte, node, addr, addr + PAGE_SIZE); 1748f6aac41SChristoph Lameter } 17529c71111SAndy Whitcroft 17629c71111SAndy Whitcroft return 0; 17729c71111SAndy Whitcroft } 1788f6aac41SChristoph Lameter 17998f3cfc1SYasunori Goto struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid) 1808f6aac41SChristoph Lameter { 1810aad818bSJohannes Weiner unsigned long start; 1820aad818bSJohannes Weiner unsigned long end; 1830aad818bSJohannes Weiner struct page *map; 1840aad818bSJohannes Weiner 1850aad818bSJohannes Weiner map = pfn_to_page(pnum * PAGES_PER_SECTION); 1860aad818bSJohannes Weiner start = (unsigned long)map; 1870aad818bSJohannes Weiner end = (unsigned long)(map + PAGES_PER_SECTION); 1880aad818bSJohannes Weiner 1890aad818bSJohannes Weiner if (vmemmap_populate(start, end, nid)) 1908f6aac41SChristoph Lameter return NULL; 1918f6aac41SChristoph Lameter 1928f6aac41SChristoph Lameter return map; 1938f6aac41SChristoph Lameter } 1949bdac914SYinghai Lu 1959bdac914SYinghai Lu void __init sparse_mem_maps_populate_node(struct page **map_map, 1969bdac914SYinghai Lu unsigned long pnum_begin, 1979bdac914SYinghai Lu unsigned long pnum_end, 1989bdac914SYinghai Lu unsigned long map_count, int nodeid) 1999bdac914SYinghai Lu { 2009bdac914SYinghai Lu unsigned long pnum; 2019bdac914SYinghai Lu unsigned long size = sizeof(struct page) * PAGES_PER_SECTION; 2029bdac914SYinghai Lu void *vmemmap_buf_start; 2039bdac914SYinghai Lu 2049bdac914SYinghai Lu size = ALIGN(size, PMD_SIZE); 2059bdac914SYinghai Lu vmemmap_buf_start = __earlyonly_bootmem_alloc(nodeid, size * map_count, 2069bdac914SYinghai Lu PMD_SIZE, __pa(MAX_DMA_ADDRESS)); 2079bdac914SYinghai Lu 2089bdac914SYinghai Lu if (vmemmap_buf_start) { 2099bdac914SYinghai Lu vmemmap_buf = vmemmap_buf_start; 2109bdac914SYinghai Lu vmemmap_buf_end = vmemmap_buf_start + size * map_count; 2119bdac914SYinghai Lu } 2129bdac914SYinghai Lu 2139bdac914SYinghai Lu for (pnum = pnum_begin; pnum < pnum_end; pnum++) { 2149bdac914SYinghai Lu struct mem_section *ms; 2159bdac914SYinghai Lu 2169bdac914SYinghai Lu if (!present_section_nr(pnum)) 2179bdac914SYinghai Lu continue; 2189bdac914SYinghai Lu 2199bdac914SYinghai Lu map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); 2209bdac914SYinghai Lu if (map_map[pnum]) 2219bdac914SYinghai Lu continue; 2229bdac914SYinghai Lu ms = __nr_to_section(pnum); 2239bdac914SYinghai Lu printk(KERN_ERR "%s: sparsemem memory map backing failed " 2249bdac914SYinghai Lu "some memory will not be available.\n", __func__); 2259bdac914SYinghai Lu ms->section_mem_map = 0; 2269bdac914SYinghai Lu } 2279bdac914SYinghai Lu 2289bdac914SYinghai Lu if (vmemmap_buf_start) { 2299bdac914SYinghai Lu /* need to free left buf */ 230*bb016b84SSantosh Shilimkar memblock_free_early(__pa(vmemmap_buf), 231*bb016b84SSantosh Shilimkar vmemmap_buf_end - vmemmap_buf); 2329bdac914SYinghai Lu vmemmap_buf = NULL; 2339bdac914SYinghai Lu vmemmap_buf_end = NULL; 2349bdac914SYinghai Lu } 2359bdac914SYinghai Lu } 236