11802d0beSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 2084bd298SSteve Capper /* 3084bd298SSteve Capper * arch/arm64/mm/hugetlbpage.c 4084bd298SSteve Capper * 5084bd298SSteve Capper * Copyright (C) 2013 Linaro Ltd. 6084bd298SSteve Capper * 7084bd298SSteve Capper * Based on arch/x86/mm/hugetlbpage.c. 8084bd298SSteve Capper */ 9084bd298SSteve Capper 10084bd298SSteve Capper #include <linux/init.h> 11084bd298SSteve Capper #include <linux/fs.h> 12084bd298SSteve Capper #include <linux/mm.h> 13084bd298SSteve Capper #include <linux/hugetlb.h> 14084bd298SSteve Capper #include <linux/pagemap.h> 15084bd298SSteve Capper #include <linux/err.h> 16084bd298SSteve Capper #include <linux/sysctl.h> 17084bd298SSteve Capper #include <asm/mman.h> 18084bd298SSteve Capper #include <asm/tlb.h> 19084bd298SSteve Capper #include <asm/tlbflush.h> 20084bd298SSteve Capper 21abb7962aSAnshuman Khandual /* 22abb7962aSAnshuman Khandual * HugeTLB Support Matrix 23abb7962aSAnshuman Khandual * 24abb7962aSAnshuman Khandual * --------------------------------------------------- 25abb7962aSAnshuman Khandual * | Page Size | CONT PTE | PMD | CONT PMD | PUD | 26abb7962aSAnshuman Khandual * --------------------------------------------------- 27abb7962aSAnshuman Khandual * | 4K | 64K | 2M | 32M | 1G | 28abb7962aSAnshuman Khandual * | 16K | 2M | 32M | 1G | | 29abb7962aSAnshuman Khandual * | 64K | 2M | 512M | 16G | | 30abb7962aSAnshuman Khandual * --------------------------------------------------- 31abb7962aSAnshuman Khandual */ 32abb7962aSAnshuman Khandual 33abb7962aSAnshuman Khandual /* 34abb7962aSAnshuman Khandual * Reserve CMA areas for the largest supported gigantic 35abb7962aSAnshuman Khandual * huge page when requested. Any other smaller gigantic 36abb7962aSAnshuman Khandual * huge pages could still be served from those areas. 37abb7962aSAnshuman Khandual */ 38abb7962aSAnshuman Khandual #ifdef CONFIG_CMA 39abb7962aSAnshuman Khandual void __init arm64_hugetlb_cma_reserve(void) 40abb7962aSAnshuman Khandual { 41abb7962aSAnshuman Khandual int order; 42abb7962aSAnshuman Khandual 43f8b46c4bSAnshuman Khandual if (pud_sect_supported()) 44abb7962aSAnshuman Khandual order = PUD_SHIFT - PAGE_SHIFT; 45f8b46c4bSAnshuman Khandual else 462e5809a4SMike Kravetz order = CONT_PMD_SHIFT - PAGE_SHIFT; 47e6359798SWill Deacon 48abb7962aSAnshuman Khandual /* 49abb7962aSAnshuman Khandual * HugeTLB CMA reservation is required for gigantic 50abb7962aSAnshuman Khandual * huge pages which could not be allocated via the 51abb7962aSAnshuman Khandual * page allocator. Just warn if there is any change 52abb7962aSAnshuman Khandual * breaking this assumption. 53abb7962aSAnshuman Khandual */ 54abb7962aSAnshuman Khandual WARN_ON(order <= MAX_ORDER); 55abb7962aSAnshuman Khandual hugetlb_cma_reserve(order); 56abb7962aSAnshuman Khandual } 57abb7962aSAnshuman Khandual #endif /* CONFIG_CMA */ 58abb7962aSAnshuman Khandual 59a8a733b2SAnshuman Khandual static bool __hugetlb_valid_size(unsigned long size) 60a8a733b2SAnshuman Khandual { 61a8a733b2SAnshuman Khandual switch (size) { 62a8a733b2SAnshuman Khandual #ifndef __PAGETABLE_PMD_FOLDED 63a8a733b2SAnshuman Khandual case PUD_SIZE: 64a8a733b2SAnshuman Khandual return pud_sect_supported(); 65a8a733b2SAnshuman Khandual #endif 66a8a733b2SAnshuman Khandual case CONT_PMD_SIZE: 67a8a733b2SAnshuman Khandual case PMD_SIZE: 68a8a733b2SAnshuman Khandual case CONT_PTE_SIZE: 69a8a733b2SAnshuman Khandual return true; 70a8a733b2SAnshuman Khandual } 71a8a733b2SAnshuman Khandual 72a8a733b2SAnshuman Khandual return false; 73a8a733b2SAnshuman Khandual } 74a8a733b2SAnshuman Khandual 755480280dSAnshuman Khandual #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION 765480280dSAnshuman Khandual bool arch_hugetlb_migration_supported(struct hstate *h) 775480280dSAnshuman Khandual { 785480280dSAnshuman Khandual size_t pagesize = huge_page_size(h); 795480280dSAnshuman Khandual 80a8a733b2SAnshuman Khandual if (!__hugetlb_valid_size(pagesize)) { 815480280dSAnshuman Khandual pr_warn("%s: unrecognized huge page size 0x%lx\n", 825480280dSAnshuman Khandual __func__, pagesize); 835480280dSAnshuman Khandual return false; 845480280dSAnshuman Khandual } 85a8a733b2SAnshuman Khandual return true; 86a8a733b2SAnshuman Khandual } 875480280dSAnshuman Khandual #endif 885480280dSAnshuman Khandual 89084bd298SSteve Capper int pmd_huge(pmd_t pmd) 90084bd298SSteve Capper { 91fd28f5d4SChristoffer Dall return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); 92084bd298SSteve Capper } 93084bd298SSteve Capper 94084bd298SSteve Capper int pud_huge(pud_t pud) 95084bd298SSteve Capper { 964797ec2dSMark Salter #ifndef __PAGETABLE_PMD_FOLDED 97fd28f5d4SChristoffer Dall return pud_val(pud) && !(pud_val(pud) & PUD_TABLE_BIT); 984797ec2dSMark Salter #else 994797ec2dSMark Salter return 0; 1004797ec2dSMark Salter #endif 101084bd298SSteve Capper } 102084bd298SSteve Capper 103b5b0be86SSteve Capper /* 104b5b0be86SSteve Capper * Select all bits except the pfn 105b5b0be86SSteve Capper */ 106b5b0be86SSteve Capper static inline pgprot_t pte_pgprot(pte_t pte) 107b5b0be86SSteve Capper { 108b5b0be86SSteve Capper unsigned long pfn = pte_pfn(pte); 109b5b0be86SSteve Capper 110b5b0be86SSteve Capper return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); 111b5b0be86SSteve Capper } 112b5b0be86SSteve Capper 11366b3923aSDavid Woods static int find_num_contig(struct mm_struct *mm, unsigned long addr, 114bb9dd3dfSSteve Capper pte_t *ptep, size_t *pgsize) 11566b3923aSDavid Woods { 11620a004e7SWill Deacon pgd_t *pgdp = pgd_offset(mm, addr); 117e9f63768SMike Rapoport p4d_t *p4dp; 11820a004e7SWill Deacon pud_t *pudp; 11920a004e7SWill Deacon pmd_t *pmdp; 12066b3923aSDavid Woods 12166b3923aSDavid Woods *pgsize = PAGE_SIZE; 122e9f63768SMike Rapoport p4dp = p4d_offset(pgdp, addr); 123e9f63768SMike Rapoport pudp = pud_offset(p4dp, addr); 12420a004e7SWill Deacon pmdp = pmd_offset(pudp, addr); 12520a004e7SWill Deacon if ((pte_t *)pmdp == ptep) { 12666b3923aSDavid Woods *pgsize = PMD_SIZE; 12766b3923aSDavid Woods return CONT_PMDS; 12866b3923aSDavid Woods } 12966b3923aSDavid Woods return CONT_PTES; 13066b3923aSDavid Woods } 13166b3923aSDavid Woods 132c3e4ed5cSPunit Agrawal static inline int num_contig_ptes(unsigned long size, size_t *pgsize) 133c3e4ed5cSPunit Agrawal { 134c3e4ed5cSPunit Agrawal int contig_ptes = 0; 135c3e4ed5cSPunit Agrawal 136c3e4ed5cSPunit Agrawal *pgsize = size; 137c3e4ed5cSPunit Agrawal 138c3e4ed5cSPunit Agrawal switch (size) { 139f8b46c4bSAnshuman Khandual #ifndef __PAGETABLE_PMD_FOLDED 140c3e4ed5cSPunit Agrawal case PUD_SIZE: 141f8b46c4bSAnshuman Khandual if (pud_sect_supported()) 142f8b46c4bSAnshuman Khandual contig_ptes = 1; 143f8b46c4bSAnshuman Khandual break; 144c3e4ed5cSPunit Agrawal #endif 145c3e4ed5cSPunit Agrawal case PMD_SIZE: 146c3e4ed5cSPunit Agrawal contig_ptes = 1; 147c3e4ed5cSPunit Agrawal break; 148c3e4ed5cSPunit Agrawal case CONT_PMD_SIZE: 149c3e4ed5cSPunit Agrawal *pgsize = PMD_SIZE; 150c3e4ed5cSPunit Agrawal contig_ptes = CONT_PMDS; 151c3e4ed5cSPunit Agrawal break; 152c3e4ed5cSPunit Agrawal case CONT_PTE_SIZE: 153c3e4ed5cSPunit Agrawal *pgsize = PAGE_SIZE; 154c3e4ed5cSPunit Agrawal contig_ptes = CONT_PTES; 155c3e4ed5cSPunit Agrawal break; 156c3e4ed5cSPunit Agrawal } 157c3e4ed5cSPunit Agrawal 158c3e4ed5cSPunit Agrawal return contig_ptes; 159c3e4ed5cSPunit Agrawal } 160c3e4ed5cSPunit Agrawal 161d8bdcff2SSteve Capper /* 162d8bdcff2SSteve Capper * Changing some bits of contiguous entries requires us to follow a 163d8bdcff2SSteve Capper * Break-Before-Make approach, breaking the whole contiguous set 164d8bdcff2SSteve Capper * before we can change any entries. See ARM DDI 0487A.k_iss10775, 165d8bdcff2SSteve Capper * "Misprogramming of the Contiguous bit", page D4-1762. 166d8bdcff2SSteve Capper * 167d8bdcff2SSteve Capper * This helper performs the break step. 168d8bdcff2SSteve Capper */ 169d8bdcff2SSteve Capper static pte_t get_clear_flush(struct mm_struct *mm, 170d8bdcff2SSteve Capper unsigned long addr, 171d8bdcff2SSteve Capper pte_t *ptep, 172d8bdcff2SSteve Capper unsigned long pgsize, 173d8bdcff2SSteve Capper unsigned long ncontig) 174d8bdcff2SSteve Capper { 175d8bdcff2SSteve Capper pte_t orig_pte = huge_ptep_get(ptep); 176d8bdcff2SSteve Capper bool valid = pte_valid(orig_pte); 177d8bdcff2SSteve Capper unsigned long i, saddr = addr; 178d8bdcff2SSteve Capper 179d8bdcff2SSteve Capper for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) { 180d8bdcff2SSteve Capper pte_t pte = ptep_get_and_clear(mm, addr, ptep); 181d8bdcff2SSteve Capper 182d8bdcff2SSteve Capper /* 183d8bdcff2SSteve Capper * If HW_AFDBM is enabled, then the HW could turn on 184469ed9d8SSteve Capper * the dirty or accessed bit for any page in the set, 185469ed9d8SSteve Capper * so check them all. 186d8bdcff2SSteve Capper */ 187d8bdcff2SSteve Capper if (pte_dirty(pte)) 188d8bdcff2SSteve Capper orig_pte = pte_mkdirty(orig_pte); 189469ed9d8SSteve Capper 190469ed9d8SSteve Capper if (pte_young(pte)) 191469ed9d8SSteve Capper orig_pte = pte_mkyoung(orig_pte); 192d8bdcff2SSteve Capper } 193d8bdcff2SSteve Capper 1948b11ec1bSLinus Torvalds if (valid) { 1958b11ec1bSLinus Torvalds struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); 196d8bdcff2SSteve Capper flush_tlb_range(&vma, saddr, addr); 1978b11ec1bSLinus Torvalds } 198d8bdcff2SSteve Capper return orig_pte; 199d8bdcff2SSteve Capper } 200d8bdcff2SSteve Capper 201d8bdcff2SSteve Capper /* 202d8bdcff2SSteve Capper * Changing some bits of contiguous entries requires us to follow a 203d8bdcff2SSteve Capper * Break-Before-Make approach, breaking the whole contiguous set 204d8bdcff2SSteve Capper * before we can change any entries. See ARM DDI 0487A.k_iss10775, 205d8bdcff2SSteve Capper * "Misprogramming of the Contiguous bit", page D4-1762. 206d8bdcff2SSteve Capper * 207d8bdcff2SSteve Capper * This helper performs the break step for use cases where the 208d8bdcff2SSteve Capper * original pte is not needed. 209d8bdcff2SSteve Capper */ 210d8bdcff2SSteve Capper static void clear_flush(struct mm_struct *mm, 211d8bdcff2SSteve Capper unsigned long addr, 212d8bdcff2SSteve Capper pte_t *ptep, 213d8bdcff2SSteve Capper unsigned long pgsize, 214d8bdcff2SSteve Capper unsigned long ncontig) 215d8bdcff2SSteve Capper { 2168b11ec1bSLinus Torvalds struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); 217d8bdcff2SSteve Capper unsigned long i, saddr = addr; 218d8bdcff2SSteve Capper 219d8bdcff2SSteve Capper for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) 220d8bdcff2SSteve Capper pte_clear(mm, addr, ptep); 221d8bdcff2SSteve Capper 222d8bdcff2SSteve Capper flush_tlb_range(&vma, saddr, addr); 223d8bdcff2SSteve Capper } 224d8bdcff2SSteve Capper 22566b3923aSDavid Woods void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 22666b3923aSDavid Woods pte_t *ptep, pte_t pte) 22766b3923aSDavid Woods { 22866b3923aSDavid Woods size_t pgsize; 22966b3923aSDavid Woods int i; 230bb9dd3dfSSteve Capper int ncontig; 23129a7287dSSteve Capper unsigned long pfn, dpfn; 23266b3923aSDavid Woods pgprot_t hugeprot; 23366b3923aSDavid Woods 234d3ea7952SSteve Capper /* 235d3ea7952SSteve Capper * Code needs to be expanded to handle huge swap and migration 236d3ea7952SSteve Capper * entries. Needed for HUGETLB and MEMORY_FAILURE. 237d3ea7952SSteve Capper */ 238d3ea7952SSteve Capper WARN_ON(!pte_present(pte)); 239d3ea7952SSteve Capper 240bb9dd3dfSSteve Capper if (!pte_cont(pte)) { 24166b3923aSDavid Woods set_pte_at(mm, addr, ptep, pte); 24266b3923aSDavid Woods return; 24366b3923aSDavid Woods } 24466b3923aSDavid Woods 245bb9dd3dfSSteve Capper ncontig = find_num_contig(mm, addr, ptep, &pgsize); 24666b3923aSDavid Woods pfn = pte_pfn(pte); 24729a7287dSSteve Capper dpfn = pgsize >> PAGE_SHIFT; 248b5b0be86SSteve Capper hugeprot = pte_pgprot(pte); 24929a7287dSSteve Capper 250d8bdcff2SSteve Capper clear_flush(mm, addr, ptep, pgsize, ncontig); 251d8bdcff2SSteve Capper 25220a004e7SWill Deacon for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) 25366b3923aSDavid Woods set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot)); 25466b3923aSDavid Woods } 25566b3923aSDavid Woods 256a8d623eeSPunit Agrawal void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr, 257a8d623eeSPunit Agrawal pte_t *ptep, pte_t pte, unsigned long sz) 258a8d623eeSPunit Agrawal { 259a8d623eeSPunit Agrawal int i, ncontig; 260a8d623eeSPunit Agrawal size_t pgsize; 261a8d623eeSPunit Agrawal 262a8d623eeSPunit Agrawal ncontig = num_contig_ptes(sz, &pgsize); 263a8d623eeSPunit Agrawal 264a8d623eeSPunit Agrawal for (i = 0; i < ncontig; i++, ptep++) 265a8d623eeSPunit Agrawal set_pte(ptep, pte); 266a8d623eeSPunit Agrawal } 267a8d623eeSPunit Agrawal 268aec44e0fSPeter Xu pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, 26966b3923aSDavid Woods unsigned long addr, unsigned long sz) 27066b3923aSDavid Woods { 27120a004e7SWill Deacon pgd_t *pgdp; 272e9f63768SMike Rapoport p4d_t *p4dp; 27320a004e7SWill Deacon pud_t *pudp; 27420a004e7SWill Deacon pmd_t *pmdp; 27520a004e7SWill Deacon pte_t *ptep = NULL; 27666b3923aSDavid Woods 27720a004e7SWill Deacon pgdp = pgd_offset(mm, addr); 278e9f63768SMike Rapoport p4dp = p4d_offset(pgdp, addr); 279e9f63768SMike Rapoport pudp = pud_alloc(mm, p4dp, addr); 28020a004e7SWill Deacon if (!pudp) 28166b3923aSDavid Woods return NULL; 28266b3923aSDavid Woods 28366b3923aSDavid Woods if (sz == PUD_SIZE) { 28420a004e7SWill Deacon ptep = (pte_t *)pudp; 285441a6278SAnshuman Khandual } else if (sz == (CONT_PTE_SIZE)) { 28620a004e7SWill Deacon pmdp = pmd_alloc(mm, pudp, addr); 287027d0c71SMark Rutland if (!pmdp) 288027d0c71SMark Rutland return NULL; 28966b3923aSDavid Woods 29066b3923aSDavid Woods WARN_ON(addr & (sz - 1)); 29166b3923aSDavid Woods /* 29266b3923aSDavid Woods * Note that if this code were ever ported to the 29366b3923aSDavid Woods * 32-bit arm platform then it will cause trouble in 29466b3923aSDavid Woods * the case where CONFIG_HIGHPTE is set, since there 29566b3923aSDavid Woods * will be no pte_unmap() to correspond with this 29666b3923aSDavid Woods * pte_alloc_map(). 29766b3923aSDavid Woods */ 29820a004e7SWill Deacon ptep = pte_alloc_map(mm, pmdp, addr); 29966b3923aSDavid Woods } else if (sz == PMD_SIZE) { 300c1991e07SPeter Xu if (want_pmd_share(vma, addr) && pud_none(READ_ONCE(*pudp))) 301aec44e0fSPeter Xu ptep = huge_pmd_share(mm, vma, addr, pudp); 30266b3923aSDavid Woods else 30320a004e7SWill Deacon ptep = (pte_t *)pmd_alloc(mm, pudp, addr); 304441a6278SAnshuman Khandual } else if (sz == (CONT_PMD_SIZE)) { 30520a004e7SWill Deacon pmdp = pmd_alloc(mm, pudp, addr); 30666b3923aSDavid Woods WARN_ON(addr & (sz - 1)); 30720a004e7SWill Deacon return (pte_t *)pmdp; 30866b3923aSDavid Woods } 30966b3923aSDavid Woods 31020a004e7SWill Deacon return ptep; 31166b3923aSDavid Woods } 31266b3923aSDavid Woods 3137868a208SPunit Agrawal pte_t *huge_pte_offset(struct mm_struct *mm, 3147868a208SPunit Agrawal unsigned long addr, unsigned long sz) 31566b3923aSDavid Woods { 31620a004e7SWill Deacon pgd_t *pgdp; 317e9f63768SMike Rapoport p4d_t *p4dp; 31820a004e7SWill Deacon pud_t *pudp, pud; 31920a004e7SWill Deacon pmd_t *pmdp, pmd; 32066b3923aSDavid Woods 32120a004e7SWill Deacon pgdp = pgd_offset(mm, addr); 32220a004e7SWill Deacon if (!pgd_present(READ_ONCE(*pgdp))) 32366b3923aSDavid Woods return NULL; 324f02ab08aSPunit Agrawal 325e9f63768SMike Rapoport p4dp = p4d_offset(pgdp, addr); 326e9f63768SMike Rapoport if (!p4d_present(READ_ONCE(*p4dp))) 327e9f63768SMike Rapoport return NULL; 328e9f63768SMike Rapoport 329e9f63768SMike Rapoport pudp = pud_offset(p4dp, addr); 33020a004e7SWill Deacon pud = READ_ONCE(*pudp); 33120a004e7SWill Deacon if (sz != PUD_SIZE && pud_none(pud)) 33266b3923aSDavid Woods return NULL; 33330f3ac00SPunit Agrawal /* hugepage or swap? */ 33420a004e7SWill Deacon if (pud_huge(pud) || !pud_present(pud)) 33520a004e7SWill Deacon return (pte_t *)pudp; 336f02ab08aSPunit Agrawal /* table; check the next level */ 33766b3923aSDavid Woods 33830f3ac00SPunit Agrawal if (sz == CONT_PMD_SIZE) 33930f3ac00SPunit Agrawal addr &= CONT_PMD_MASK; 34030f3ac00SPunit Agrawal 34120a004e7SWill Deacon pmdp = pmd_offset(pudp, addr); 34220a004e7SWill Deacon pmd = READ_ONCE(*pmdp); 34330f3ac00SPunit Agrawal if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) && 34420a004e7SWill Deacon pmd_none(pmd)) 345f02ab08aSPunit Agrawal return NULL; 34620a004e7SWill Deacon if (pmd_huge(pmd) || !pmd_present(pmd)) 34720a004e7SWill Deacon return (pte_t *)pmdp; 348f02ab08aSPunit Agrawal 34920a004e7SWill Deacon if (sz == CONT_PTE_SIZE) 35020a004e7SWill Deacon return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK)); 35130f3ac00SPunit Agrawal 35266b3923aSDavid Woods return NULL; 35366b3923aSDavid Woods } 35466b3923aSDavid Woods 35579c1c594SChristophe Leroy pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags) 35666b3923aSDavid Woods { 35779c1c594SChristophe Leroy size_t pagesize = 1UL << shift; 35866b3923aSDavid Woods 35916785bd7SAnshuman Khandual entry = pte_mkhuge(entry); 36066b3923aSDavid Woods if (pagesize == CONT_PTE_SIZE) { 36166b3923aSDavid Woods entry = pte_mkcont(entry); 36266b3923aSDavid Woods } else if (pagesize == CONT_PMD_SIZE) { 36366b3923aSDavid Woods entry = pmd_pte(pmd_mkcont(pte_pmd(entry))); 36466b3923aSDavid Woods } else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) { 36566b3923aSDavid Woods pr_warn("%s: unrecognized huge page size 0x%lx\n", 36666b3923aSDavid Woods __func__, pagesize); 36766b3923aSDavid Woods } 36866b3923aSDavid Woods return entry; 36966b3923aSDavid Woods } 37066b3923aSDavid Woods 371c3e4ed5cSPunit Agrawal void huge_pte_clear(struct mm_struct *mm, unsigned long addr, 372c3e4ed5cSPunit Agrawal pte_t *ptep, unsigned long sz) 373c3e4ed5cSPunit Agrawal { 374c3e4ed5cSPunit Agrawal int i, ncontig; 375c3e4ed5cSPunit Agrawal size_t pgsize; 376c3e4ed5cSPunit Agrawal 377c3e4ed5cSPunit Agrawal ncontig = num_contig_ptes(sz, &pgsize); 378c3e4ed5cSPunit Agrawal 379c3e4ed5cSPunit Agrawal for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) 380c3e4ed5cSPunit Agrawal pte_clear(mm, addr, ptep); 381c3e4ed5cSPunit Agrawal } 382c3e4ed5cSPunit Agrawal 38366b3923aSDavid Woods pte_t huge_ptep_get_and_clear(struct mm_struct *mm, 38466b3923aSDavid Woods unsigned long addr, pte_t *ptep) 38566b3923aSDavid Woods { 386d8bdcff2SSteve Capper int ncontig; 38766b3923aSDavid Woods size_t pgsize; 38829a7287dSSteve Capper pte_t orig_pte = huge_ptep_get(ptep); 38929a7287dSSteve Capper 39029a7287dSSteve Capper if (!pte_cont(orig_pte)) 39129a7287dSSteve Capper return ptep_get_and_clear(mm, addr, ptep); 39266b3923aSDavid Woods 393f0b38d65SSteve Capper ncontig = find_num_contig(mm, addr, ptep, &pgsize); 39429a7287dSSteve Capper 395d8bdcff2SSteve Capper return get_clear_flush(mm, addr, ptep, pgsize, ncontig); 39666b3923aSDavid Woods } 39766b3923aSDavid Woods 398031e6e6bSSteve Capper /* 399031e6e6bSSteve Capper * huge_ptep_set_access_flags will update access flags (dirty, accesssed) 400031e6e6bSSteve Capper * and write permission. 401031e6e6bSSteve Capper * 402031e6e6bSSteve Capper * For a contiguous huge pte range we need to check whether or not write 403031e6e6bSSteve Capper * permission has to change only on the first pte in the set. Then for 404031e6e6bSSteve Capper * all the contiguous ptes we need to check whether or not there is a 405031e6e6bSSteve Capper * discrepancy between dirty or young. 406031e6e6bSSteve Capper */ 407031e6e6bSSteve Capper static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig) 408031e6e6bSSteve Capper { 409031e6e6bSSteve Capper int i; 410031e6e6bSSteve Capper 411031e6e6bSSteve Capper if (pte_write(pte) != pte_write(huge_ptep_get(ptep))) 412031e6e6bSSteve Capper return 1; 413031e6e6bSSteve Capper 414031e6e6bSSteve Capper for (i = 0; i < ncontig; i++) { 415031e6e6bSSteve Capper pte_t orig_pte = huge_ptep_get(ptep + i); 416031e6e6bSSteve Capper 417031e6e6bSSteve Capper if (pte_dirty(pte) != pte_dirty(orig_pte)) 418031e6e6bSSteve Capper return 1; 419031e6e6bSSteve Capper 420031e6e6bSSteve Capper if (pte_young(pte) != pte_young(orig_pte)) 421031e6e6bSSteve Capper return 1; 422031e6e6bSSteve Capper } 423031e6e6bSSteve Capper 424031e6e6bSSteve Capper return 0; 425031e6e6bSSteve Capper } 426031e6e6bSSteve Capper 42766b3923aSDavid Woods int huge_ptep_set_access_flags(struct vm_area_struct *vma, 42866b3923aSDavid Woods unsigned long addr, pte_t *ptep, 42966b3923aSDavid Woods pte_t pte, int dirty) 43066b3923aSDavid Woods { 431031e6e6bSSteve Capper int ncontig, i; 43266b3923aSDavid Woods size_t pgsize = 0; 43329a7287dSSteve Capper unsigned long pfn = pte_pfn(pte), dpfn; 43429a7287dSSteve Capper pgprot_t hugeprot; 435d8bdcff2SSteve Capper pte_t orig_pte; 43666b3923aSDavid Woods 43729a7287dSSteve Capper if (!pte_cont(pte)) 43866b3923aSDavid Woods return ptep_set_access_flags(vma, addr, ptep, pte, dirty); 43929a7287dSSteve Capper 44029a7287dSSteve Capper ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize); 44129a7287dSSteve Capper dpfn = pgsize >> PAGE_SHIFT; 44229a7287dSSteve Capper 443031e6e6bSSteve Capper if (!__cont_access_flags_changed(ptep, pte, ncontig)) 444031e6e6bSSteve Capper return 0; 445031e6e6bSSteve Capper 446d8bdcff2SSteve Capper orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig); 447d8bdcff2SSteve Capper 448469ed9d8SSteve Capper /* Make sure we don't lose the dirty or young state */ 449d8bdcff2SSteve Capper if (pte_dirty(orig_pte)) 450d8bdcff2SSteve Capper pte = pte_mkdirty(pte); 451d8bdcff2SSteve Capper 452469ed9d8SSteve Capper if (pte_young(orig_pte)) 453469ed9d8SSteve Capper pte = pte_mkyoung(pte); 454469ed9d8SSteve Capper 455d8bdcff2SSteve Capper hugeprot = pte_pgprot(pte); 456d8bdcff2SSteve Capper for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) 457d8bdcff2SSteve Capper set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot)); 45829a7287dSSteve Capper 459031e6e6bSSteve Capper return 1; 46066b3923aSDavid Woods } 46166b3923aSDavid Woods 46266b3923aSDavid Woods void huge_ptep_set_wrprotect(struct mm_struct *mm, 46366b3923aSDavid Woods unsigned long addr, pte_t *ptep) 46466b3923aSDavid Woods { 465d8bdcff2SSteve Capper unsigned long pfn, dpfn; 466d8bdcff2SSteve Capper pgprot_t hugeprot; 46766b3923aSDavid Woods int ncontig, i; 46829a7287dSSteve Capper size_t pgsize; 469d8bdcff2SSteve Capper pte_t pte; 47029a7287dSSteve Capper 47120a004e7SWill Deacon if (!pte_cont(READ_ONCE(*ptep))) { 47229a7287dSSteve Capper ptep_set_wrprotect(mm, addr, ptep); 47329a7287dSSteve Capper return; 47429a7287dSSteve Capper } 47566b3923aSDavid Woods 476f0b38d65SSteve Capper ncontig = find_num_contig(mm, addr, ptep, &pgsize); 477d8bdcff2SSteve Capper dpfn = pgsize >> PAGE_SHIFT; 478d8bdcff2SSteve Capper 479d8bdcff2SSteve Capper pte = get_clear_flush(mm, addr, ptep, pgsize, ncontig); 480d8bdcff2SSteve Capper pte = pte_wrprotect(pte); 481d8bdcff2SSteve Capper 482d8bdcff2SSteve Capper hugeprot = pte_pgprot(pte); 483d8bdcff2SSteve Capper pfn = pte_pfn(pte); 484d8bdcff2SSteve Capper 485d8bdcff2SSteve Capper for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) 486d8bdcff2SSteve Capper set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot)); 48766b3923aSDavid Woods } 48866b3923aSDavid Woods 489*ae075629SBaolin Wang pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, 49066b3923aSDavid Woods unsigned long addr, pte_t *ptep) 49166b3923aSDavid Woods { 49229a7287dSSteve Capper size_t pgsize; 493d8bdcff2SSteve Capper int ncontig; 49466b3923aSDavid Woods 495*ae075629SBaolin Wang if (!pte_cont(READ_ONCE(*ptep))) 496*ae075629SBaolin Wang return ptep_clear_flush(vma, addr, ptep); 49729a7287dSSteve Capper 49829a7287dSSteve Capper ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize); 499*ae075629SBaolin Wang return get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig); 50066b3923aSDavid Woods } 50166b3923aSDavid Woods 502a21b0b78SAllen Pais static int __init hugetlbpage_init(void) 503a21b0b78SAllen Pais { 504f8b46c4bSAnshuman Khandual if (pud_sect_supported()) 50538237830SMike Kravetz hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); 506f8b46c4bSAnshuman Khandual 507a1634a54SGavin Shan hugetlb_add_hstate(CONT_PMD_SHIFT - PAGE_SHIFT); 50838237830SMike Kravetz hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); 509a1634a54SGavin Shan hugetlb_add_hstate(CONT_PTE_SHIFT - PAGE_SHIFT); 510a21b0b78SAllen Pais 511a21b0b78SAllen Pais return 0; 512a21b0b78SAllen Pais } 513a21b0b78SAllen Pais arch_initcall(hugetlbpage_init); 514a21b0b78SAllen Pais 515ae94da89SMike Kravetz bool __init arch_hugetlb_valid_size(unsigned long size) 516084bd298SSteve Capper { 517a8a733b2SAnshuman Khandual return __hugetlb_valid_size(size); 518ae94da89SMike Kravetz } 519