xref: /linux/arch/arm64/mm/hugetlbpage.c (revision ae07562909f3dfcdff40f87e51965728dab50485)
11802d0beSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2084bd298SSteve Capper /*
3084bd298SSteve Capper  * arch/arm64/mm/hugetlbpage.c
4084bd298SSteve Capper  *
5084bd298SSteve Capper  * Copyright (C) 2013 Linaro Ltd.
6084bd298SSteve Capper  *
7084bd298SSteve Capper  * Based on arch/x86/mm/hugetlbpage.c.
8084bd298SSteve Capper  */
9084bd298SSteve Capper 
10084bd298SSteve Capper #include <linux/init.h>
11084bd298SSteve Capper #include <linux/fs.h>
12084bd298SSteve Capper #include <linux/mm.h>
13084bd298SSteve Capper #include <linux/hugetlb.h>
14084bd298SSteve Capper #include <linux/pagemap.h>
15084bd298SSteve Capper #include <linux/err.h>
16084bd298SSteve Capper #include <linux/sysctl.h>
17084bd298SSteve Capper #include <asm/mman.h>
18084bd298SSteve Capper #include <asm/tlb.h>
19084bd298SSteve Capper #include <asm/tlbflush.h>
20084bd298SSteve Capper 
21abb7962aSAnshuman Khandual /*
22abb7962aSAnshuman Khandual  * HugeTLB Support Matrix
23abb7962aSAnshuman Khandual  *
24abb7962aSAnshuman Khandual  * ---------------------------------------------------
25abb7962aSAnshuman Khandual  * | Page Size | CONT PTE |  PMD  | CONT PMD |  PUD  |
26abb7962aSAnshuman Khandual  * ---------------------------------------------------
27abb7962aSAnshuman Khandual  * |     4K    |   64K    |   2M  |    32M   |   1G  |
28abb7962aSAnshuman Khandual  * |    16K    |    2M    |  32M  |     1G   |       |
29abb7962aSAnshuman Khandual  * |    64K    |    2M    | 512M  |    16G   |       |
30abb7962aSAnshuman Khandual  * ---------------------------------------------------
31abb7962aSAnshuman Khandual  */
32abb7962aSAnshuman Khandual 
33abb7962aSAnshuman Khandual /*
34abb7962aSAnshuman Khandual  * Reserve CMA areas for the largest supported gigantic
35abb7962aSAnshuman Khandual  * huge page when requested. Any other smaller gigantic
36abb7962aSAnshuman Khandual  * huge pages could still be served from those areas.
37abb7962aSAnshuman Khandual  */
38abb7962aSAnshuman Khandual #ifdef CONFIG_CMA
39abb7962aSAnshuman Khandual void __init arm64_hugetlb_cma_reserve(void)
40abb7962aSAnshuman Khandual {
41abb7962aSAnshuman Khandual 	int order;
42abb7962aSAnshuman Khandual 
43f8b46c4bSAnshuman Khandual 	if (pud_sect_supported())
44abb7962aSAnshuman Khandual 		order = PUD_SHIFT - PAGE_SHIFT;
45f8b46c4bSAnshuman Khandual 	else
462e5809a4SMike Kravetz 		order = CONT_PMD_SHIFT - PAGE_SHIFT;
47e6359798SWill Deacon 
48abb7962aSAnshuman Khandual 	/*
49abb7962aSAnshuman Khandual 	 * HugeTLB CMA reservation is required for gigantic
50abb7962aSAnshuman Khandual 	 * huge pages which could not be allocated via the
51abb7962aSAnshuman Khandual 	 * page allocator. Just warn if there is any change
52abb7962aSAnshuman Khandual 	 * breaking this assumption.
53abb7962aSAnshuman Khandual 	 */
54abb7962aSAnshuman Khandual 	WARN_ON(order <= MAX_ORDER);
55abb7962aSAnshuman Khandual 	hugetlb_cma_reserve(order);
56abb7962aSAnshuman Khandual }
57abb7962aSAnshuman Khandual #endif /* CONFIG_CMA */
58abb7962aSAnshuman Khandual 
59a8a733b2SAnshuman Khandual static bool __hugetlb_valid_size(unsigned long size)
60a8a733b2SAnshuman Khandual {
61a8a733b2SAnshuman Khandual 	switch (size) {
62a8a733b2SAnshuman Khandual #ifndef __PAGETABLE_PMD_FOLDED
63a8a733b2SAnshuman Khandual 	case PUD_SIZE:
64a8a733b2SAnshuman Khandual 		return pud_sect_supported();
65a8a733b2SAnshuman Khandual #endif
66a8a733b2SAnshuman Khandual 	case CONT_PMD_SIZE:
67a8a733b2SAnshuman Khandual 	case PMD_SIZE:
68a8a733b2SAnshuman Khandual 	case CONT_PTE_SIZE:
69a8a733b2SAnshuman Khandual 		return true;
70a8a733b2SAnshuman Khandual 	}
71a8a733b2SAnshuman Khandual 
72a8a733b2SAnshuman Khandual 	return false;
73a8a733b2SAnshuman Khandual }
74a8a733b2SAnshuman Khandual 
755480280dSAnshuman Khandual #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
765480280dSAnshuman Khandual bool arch_hugetlb_migration_supported(struct hstate *h)
775480280dSAnshuman Khandual {
785480280dSAnshuman Khandual 	size_t pagesize = huge_page_size(h);
795480280dSAnshuman Khandual 
80a8a733b2SAnshuman Khandual 	if (!__hugetlb_valid_size(pagesize)) {
815480280dSAnshuman Khandual 		pr_warn("%s: unrecognized huge page size 0x%lx\n",
825480280dSAnshuman Khandual 			__func__, pagesize);
835480280dSAnshuman Khandual 		return false;
845480280dSAnshuman Khandual 	}
85a8a733b2SAnshuman Khandual 	return true;
86a8a733b2SAnshuman Khandual }
875480280dSAnshuman Khandual #endif
885480280dSAnshuman Khandual 
89084bd298SSteve Capper int pmd_huge(pmd_t pmd)
90084bd298SSteve Capper {
91fd28f5d4SChristoffer Dall 	return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
92084bd298SSteve Capper }
93084bd298SSteve Capper 
94084bd298SSteve Capper int pud_huge(pud_t pud)
95084bd298SSteve Capper {
964797ec2dSMark Salter #ifndef __PAGETABLE_PMD_FOLDED
97fd28f5d4SChristoffer Dall 	return pud_val(pud) && !(pud_val(pud) & PUD_TABLE_BIT);
984797ec2dSMark Salter #else
994797ec2dSMark Salter 	return 0;
1004797ec2dSMark Salter #endif
101084bd298SSteve Capper }
102084bd298SSteve Capper 
103b5b0be86SSteve Capper /*
104b5b0be86SSteve Capper  * Select all bits except the pfn
105b5b0be86SSteve Capper  */
106b5b0be86SSteve Capper static inline pgprot_t pte_pgprot(pte_t pte)
107b5b0be86SSteve Capper {
108b5b0be86SSteve Capper 	unsigned long pfn = pte_pfn(pte);
109b5b0be86SSteve Capper 
110b5b0be86SSteve Capper 	return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
111b5b0be86SSteve Capper }
112b5b0be86SSteve Capper 
11366b3923aSDavid Woods static int find_num_contig(struct mm_struct *mm, unsigned long addr,
114bb9dd3dfSSteve Capper 			   pte_t *ptep, size_t *pgsize)
11566b3923aSDavid Woods {
11620a004e7SWill Deacon 	pgd_t *pgdp = pgd_offset(mm, addr);
117e9f63768SMike Rapoport 	p4d_t *p4dp;
11820a004e7SWill Deacon 	pud_t *pudp;
11920a004e7SWill Deacon 	pmd_t *pmdp;
12066b3923aSDavid Woods 
12166b3923aSDavid Woods 	*pgsize = PAGE_SIZE;
122e9f63768SMike Rapoport 	p4dp = p4d_offset(pgdp, addr);
123e9f63768SMike Rapoport 	pudp = pud_offset(p4dp, addr);
12420a004e7SWill Deacon 	pmdp = pmd_offset(pudp, addr);
12520a004e7SWill Deacon 	if ((pte_t *)pmdp == ptep) {
12666b3923aSDavid Woods 		*pgsize = PMD_SIZE;
12766b3923aSDavid Woods 		return CONT_PMDS;
12866b3923aSDavid Woods 	}
12966b3923aSDavid Woods 	return CONT_PTES;
13066b3923aSDavid Woods }
13166b3923aSDavid Woods 
132c3e4ed5cSPunit Agrawal static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
133c3e4ed5cSPunit Agrawal {
134c3e4ed5cSPunit Agrawal 	int contig_ptes = 0;
135c3e4ed5cSPunit Agrawal 
136c3e4ed5cSPunit Agrawal 	*pgsize = size;
137c3e4ed5cSPunit Agrawal 
138c3e4ed5cSPunit Agrawal 	switch (size) {
139f8b46c4bSAnshuman Khandual #ifndef __PAGETABLE_PMD_FOLDED
140c3e4ed5cSPunit Agrawal 	case PUD_SIZE:
141f8b46c4bSAnshuman Khandual 		if (pud_sect_supported())
142f8b46c4bSAnshuman Khandual 			contig_ptes = 1;
143f8b46c4bSAnshuman Khandual 		break;
144c3e4ed5cSPunit Agrawal #endif
145c3e4ed5cSPunit Agrawal 	case PMD_SIZE:
146c3e4ed5cSPunit Agrawal 		contig_ptes = 1;
147c3e4ed5cSPunit Agrawal 		break;
148c3e4ed5cSPunit Agrawal 	case CONT_PMD_SIZE:
149c3e4ed5cSPunit Agrawal 		*pgsize = PMD_SIZE;
150c3e4ed5cSPunit Agrawal 		contig_ptes = CONT_PMDS;
151c3e4ed5cSPunit Agrawal 		break;
152c3e4ed5cSPunit Agrawal 	case CONT_PTE_SIZE:
153c3e4ed5cSPunit Agrawal 		*pgsize = PAGE_SIZE;
154c3e4ed5cSPunit Agrawal 		contig_ptes = CONT_PTES;
155c3e4ed5cSPunit Agrawal 		break;
156c3e4ed5cSPunit Agrawal 	}
157c3e4ed5cSPunit Agrawal 
158c3e4ed5cSPunit Agrawal 	return contig_ptes;
159c3e4ed5cSPunit Agrawal }
160c3e4ed5cSPunit Agrawal 
161d8bdcff2SSteve Capper /*
162d8bdcff2SSteve Capper  * Changing some bits of contiguous entries requires us to follow a
163d8bdcff2SSteve Capper  * Break-Before-Make approach, breaking the whole contiguous set
164d8bdcff2SSteve Capper  * before we can change any entries. See ARM DDI 0487A.k_iss10775,
165d8bdcff2SSteve Capper  * "Misprogramming of the Contiguous bit", page D4-1762.
166d8bdcff2SSteve Capper  *
167d8bdcff2SSteve Capper  * This helper performs the break step.
168d8bdcff2SSteve Capper  */
169d8bdcff2SSteve Capper static pte_t get_clear_flush(struct mm_struct *mm,
170d8bdcff2SSteve Capper 			     unsigned long addr,
171d8bdcff2SSteve Capper 			     pte_t *ptep,
172d8bdcff2SSteve Capper 			     unsigned long pgsize,
173d8bdcff2SSteve Capper 			     unsigned long ncontig)
174d8bdcff2SSteve Capper {
175d8bdcff2SSteve Capper 	pte_t orig_pte = huge_ptep_get(ptep);
176d8bdcff2SSteve Capper 	bool valid = pte_valid(orig_pte);
177d8bdcff2SSteve Capper 	unsigned long i, saddr = addr;
178d8bdcff2SSteve Capper 
179d8bdcff2SSteve Capper 	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
180d8bdcff2SSteve Capper 		pte_t pte = ptep_get_and_clear(mm, addr, ptep);
181d8bdcff2SSteve Capper 
182d8bdcff2SSteve Capper 		/*
183d8bdcff2SSteve Capper 		 * If HW_AFDBM is enabled, then the HW could turn on
184469ed9d8SSteve Capper 		 * the dirty or accessed bit for any page in the set,
185469ed9d8SSteve Capper 		 * so check them all.
186d8bdcff2SSteve Capper 		 */
187d8bdcff2SSteve Capper 		if (pte_dirty(pte))
188d8bdcff2SSteve Capper 			orig_pte = pte_mkdirty(orig_pte);
189469ed9d8SSteve Capper 
190469ed9d8SSteve Capper 		if (pte_young(pte))
191469ed9d8SSteve Capper 			orig_pte = pte_mkyoung(orig_pte);
192d8bdcff2SSteve Capper 	}
193d8bdcff2SSteve Capper 
1948b11ec1bSLinus Torvalds 	if (valid) {
1958b11ec1bSLinus Torvalds 		struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
196d8bdcff2SSteve Capper 		flush_tlb_range(&vma, saddr, addr);
1978b11ec1bSLinus Torvalds 	}
198d8bdcff2SSteve Capper 	return orig_pte;
199d8bdcff2SSteve Capper }
200d8bdcff2SSteve Capper 
201d8bdcff2SSteve Capper /*
202d8bdcff2SSteve Capper  * Changing some bits of contiguous entries requires us to follow a
203d8bdcff2SSteve Capper  * Break-Before-Make approach, breaking the whole contiguous set
204d8bdcff2SSteve Capper  * before we can change any entries. See ARM DDI 0487A.k_iss10775,
205d8bdcff2SSteve Capper  * "Misprogramming of the Contiguous bit", page D4-1762.
206d8bdcff2SSteve Capper  *
207d8bdcff2SSteve Capper  * This helper performs the break step for use cases where the
208d8bdcff2SSteve Capper  * original pte is not needed.
209d8bdcff2SSteve Capper  */
210d8bdcff2SSteve Capper static void clear_flush(struct mm_struct *mm,
211d8bdcff2SSteve Capper 			     unsigned long addr,
212d8bdcff2SSteve Capper 			     pte_t *ptep,
213d8bdcff2SSteve Capper 			     unsigned long pgsize,
214d8bdcff2SSteve Capper 			     unsigned long ncontig)
215d8bdcff2SSteve Capper {
2168b11ec1bSLinus Torvalds 	struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
217d8bdcff2SSteve Capper 	unsigned long i, saddr = addr;
218d8bdcff2SSteve Capper 
219d8bdcff2SSteve Capper 	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
220d8bdcff2SSteve Capper 		pte_clear(mm, addr, ptep);
221d8bdcff2SSteve Capper 
222d8bdcff2SSteve Capper 	flush_tlb_range(&vma, saddr, addr);
223d8bdcff2SSteve Capper }
224d8bdcff2SSteve Capper 
22566b3923aSDavid Woods void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
22666b3923aSDavid Woods 			    pte_t *ptep, pte_t pte)
22766b3923aSDavid Woods {
22866b3923aSDavid Woods 	size_t pgsize;
22966b3923aSDavid Woods 	int i;
230bb9dd3dfSSteve Capper 	int ncontig;
23129a7287dSSteve Capper 	unsigned long pfn, dpfn;
23266b3923aSDavid Woods 	pgprot_t hugeprot;
23366b3923aSDavid Woods 
234d3ea7952SSteve Capper 	/*
235d3ea7952SSteve Capper 	 * Code needs to be expanded to handle huge swap and migration
236d3ea7952SSteve Capper 	 * entries. Needed for HUGETLB and MEMORY_FAILURE.
237d3ea7952SSteve Capper 	 */
238d3ea7952SSteve Capper 	WARN_ON(!pte_present(pte));
239d3ea7952SSteve Capper 
240bb9dd3dfSSteve Capper 	if (!pte_cont(pte)) {
24166b3923aSDavid Woods 		set_pte_at(mm, addr, ptep, pte);
24266b3923aSDavid Woods 		return;
24366b3923aSDavid Woods 	}
24466b3923aSDavid Woods 
245bb9dd3dfSSteve Capper 	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
24666b3923aSDavid Woods 	pfn = pte_pfn(pte);
24729a7287dSSteve Capper 	dpfn = pgsize >> PAGE_SHIFT;
248b5b0be86SSteve Capper 	hugeprot = pte_pgprot(pte);
24929a7287dSSteve Capper 
250d8bdcff2SSteve Capper 	clear_flush(mm, addr, ptep, pgsize, ncontig);
251d8bdcff2SSteve Capper 
25220a004e7SWill Deacon 	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
25366b3923aSDavid Woods 		set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
25466b3923aSDavid Woods }
25566b3923aSDavid Woods 
256a8d623eeSPunit Agrawal void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
257a8d623eeSPunit Agrawal 			  pte_t *ptep, pte_t pte, unsigned long sz)
258a8d623eeSPunit Agrawal {
259a8d623eeSPunit Agrawal 	int i, ncontig;
260a8d623eeSPunit Agrawal 	size_t pgsize;
261a8d623eeSPunit Agrawal 
262a8d623eeSPunit Agrawal 	ncontig = num_contig_ptes(sz, &pgsize);
263a8d623eeSPunit Agrawal 
264a8d623eeSPunit Agrawal 	for (i = 0; i < ncontig; i++, ptep++)
265a8d623eeSPunit Agrawal 		set_pte(ptep, pte);
266a8d623eeSPunit Agrawal }
267a8d623eeSPunit Agrawal 
268aec44e0fSPeter Xu pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
26966b3923aSDavid Woods 		      unsigned long addr, unsigned long sz)
27066b3923aSDavid Woods {
27120a004e7SWill Deacon 	pgd_t *pgdp;
272e9f63768SMike Rapoport 	p4d_t *p4dp;
27320a004e7SWill Deacon 	pud_t *pudp;
27420a004e7SWill Deacon 	pmd_t *pmdp;
27520a004e7SWill Deacon 	pte_t *ptep = NULL;
27666b3923aSDavid Woods 
27720a004e7SWill Deacon 	pgdp = pgd_offset(mm, addr);
278e9f63768SMike Rapoport 	p4dp = p4d_offset(pgdp, addr);
279e9f63768SMike Rapoport 	pudp = pud_alloc(mm, p4dp, addr);
28020a004e7SWill Deacon 	if (!pudp)
28166b3923aSDavid Woods 		return NULL;
28266b3923aSDavid Woods 
28366b3923aSDavid Woods 	if (sz == PUD_SIZE) {
28420a004e7SWill Deacon 		ptep = (pte_t *)pudp;
285441a6278SAnshuman Khandual 	} else if (sz == (CONT_PTE_SIZE)) {
28620a004e7SWill Deacon 		pmdp = pmd_alloc(mm, pudp, addr);
287027d0c71SMark Rutland 		if (!pmdp)
288027d0c71SMark Rutland 			return NULL;
28966b3923aSDavid Woods 
29066b3923aSDavid Woods 		WARN_ON(addr & (sz - 1));
29166b3923aSDavid Woods 		/*
29266b3923aSDavid Woods 		 * Note that if this code were ever ported to the
29366b3923aSDavid Woods 		 * 32-bit arm platform then it will cause trouble in
29466b3923aSDavid Woods 		 * the case where CONFIG_HIGHPTE is set, since there
29566b3923aSDavid Woods 		 * will be no pte_unmap() to correspond with this
29666b3923aSDavid Woods 		 * pte_alloc_map().
29766b3923aSDavid Woods 		 */
29820a004e7SWill Deacon 		ptep = pte_alloc_map(mm, pmdp, addr);
29966b3923aSDavid Woods 	} else if (sz == PMD_SIZE) {
300c1991e07SPeter Xu 		if (want_pmd_share(vma, addr) && pud_none(READ_ONCE(*pudp)))
301aec44e0fSPeter Xu 			ptep = huge_pmd_share(mm, vma, addr, pudp);
30266b3923aSDavid Woods 		else
30320a004e7SWill Deacon 			ptep = (pte_t *)pmd_alloc(mm, pudp, addr);
304441a6278SAnshuman Khandual 	} else if (sz == (CONT_PMD_SIZE)) {
30520a004e7SWill Deacon 		pmdp = pmd_alloc(mm, pudp, addr);
30666b3923aSDavid Woods 		WARN_ON(addr & (sz - 1));
30720a004e7SWill Deacon 		return (pte_t *)pmdp;
30866b3923aSDavid Woods 	}
30966b3923aSDavid Woods 
31020a004e7SWill Deacon 	return ptep;
31166b3923aSDavid Woods }
31266b3923aSDavid Woods 
3137868a208SPunit Agrawal pte_t *huge_pte_offset(struct mm_struct *mm,
3147868a208SPunit Agrawal 		       unsigned long addr, unsigned long sz)
31566b3923aSDavid Woods {
31620a004e7SWill Deacon 	pgd_t *pgdp;
317e9f63768SMike Rapoport 	p4d_t *p4dp;
31820a004e7SWill Deacon 	pud_t *pudp, pud;
31920a004e7SWill Deacon 	pmd_t *pmdp, pmd;
32066b3923aSDavid Woods 
32120a004e7SWill Deacon 	pgdp = pgd_offset(mm, addr);
32220a004e7SWill Deacon 	if (!pgd_present(READ_ONCE(*pgdp)))
32366b3923aSDavid Woods 		return NULL;
324f02ab08aSPunit Agrawal 
325e9f63768SMike Rapoport 	p4dp = p4d_offset(pgdp, addr);
326e9f63768SMike Rapoport 	if (!p4d_present(READ_ONCE(*p4dp)))
327e9f63768SMike Rapoport 		return NULL;
328e9f63768SMike Rapoport 
329e9f63768SMike Rapoport 	pudp = pud_offset(p4dp, addr);
33020a004e7SWill Deacon 	pud = READ_ONCE(*pudp);
33120a004e7SWill Deacon 	if (sz != PUD_SIZE && pud_none(pud))
33266b3923aSDavid Woods 		return NULL;
33330f3ac00SPunit Agrawal 	/* hugepage or swap? */
33420a004e7SWill Deacon 	if (pud_huge(pud) || !pud_present(pud))
33520a004e7SWill Deacon 		return (pte_t *)pudp;
336f02ab08aSPunit Agrawal 	/* table; check the next level */
33766b3923aSDavid Woods 
33830f3ac00SPunit Agrawal 	if (sz == CONT_PMD_SIZE)
33930f3ac00SPunit Agrawal 		addr &= CONT_PMD_MASK;
34030f3ac00SPunit Agrawal 
34120a004e7SWill Deacon 	pmdp = pmd_offset(pudp, addr);
34220a004e7SWill Deacon 	pmd = READ_ONCE(*pmdp);
34330f3ac00SPunit Agrawal 	if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
34420a004e7SWill Deacon 	    pmd_none(pmd))
345f02ab08aSPunit Agrawal 		return NULL;
34620a004e7SWill Deacon 	if (pmd_huge(pmd) || !pmd_present(pmd))
34720a004e7SWill Deacon 		return (pte_t *)pmdp;
348f02ab08aSPunit Agrawal 
34920a004e7SWill Deacon 	if (sz == CONT_PTE_SIZE)
35020a004e7SWill Deacon 		return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK));
35130f3ac00SPunit Agrawal 
35266b3923aSDavid Woods 	return NULL;
35366b3923aSDavid Woods }
35466b3923aSDavid Woods 
35579c1c594SChristophe Leroy pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
35666b3923aSDavid Woods {
35779c1c594SChristophe Leroy 	size_t pagesize = 1UL << shift;
35866b3923aSDavid Woods 
35916785bd7SAnshuman Khandual 	entry = pte_mkhuge(entry);
36066b3923aSDavid Woods 	if (pagesize == CONT_PTE_SIZE) {
36166b3923aSDavid Woods 		entry = pte_mkcont(entry);
36266b3923aSDavid Woods 	} else if (pagesize == CONT_PMD_SIZE) {
36366b3923aSDavid Woods 		entry = pmd_pte(pmd_mkcont(pte_pmd(entry)));
36466b3923aSDavid Woods 	} else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) {
36566b3923aSDavid Woods 		pr_warn("%s: unrecognized huge page size 0x%lx\n",
36666b3923aSDavid Woods 			__func__, pagesize);
36766b3923aSDavid Woods 	}
36866b3923aSDavid Woods 	return entry;
36966b3923aSDavid Woods }
37066b3923aSDavid Woods 
371c3e4ed5cSPunit Agrawal void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
372c3e4ed5cSPunit Agrawal 		    pte_t *ptep, unsigned long sz)
373c3e4ed5cSPunit Agrawal {
374c3e4ed5cSPunit Agrawal 	int i, ncontig;
375c3e4ed5cSPunit Agrawal 	size_t pgsize;
376c3e4ed5cSPunit Agrawal 
377c3e4ed5cSPunit Agrawal 	ncontig = num_contig_ptes(sz, &pgsize);
378c3e4ed5cSPunit Agrawal 
379c3e4ed5cSPunit Agrawal 	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
380c3e4ed5cSPunit Agrawal 		pte_clear(mm, addr, ptep);
381c3e4ed5cSPunit Agrawal }
382c3e4ed5cSPunit Agrawal 
38366b3923aSDavid Woods pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
38466b3923aSDavid Woods 			      unsigned long addr, pte_t *ptep)
38566b3923aSDavid Woods {
386d8bdcff2SSteve Capper 	int ncontig;
38766b3923aSDavid Woods 	size_t pgsize;
38829a7287dSSteve Capper 	pte_t orig_pte = huge_ptep_get(ptep);
38929a7287dSSteve Capper 
39029a7287dSSteve Capper 	if (!pte_cont(orig_pte))
39129a7287dSSteve Capper 		return ptep_get_and_clear(mm, addr, ptep);
39266b3923aSDavid Woods 
393f0b38d65SSteve Capper 	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
39429a7287dSSteve Capper 
395d8bdcff2SSteve Capper 	return get_clear_flush(mm, addr, ptep, pgsize, ncontig);
39666b3923aSDavid Woods }
39766b3923aSDavid Woods 
398031e6e6bSSteve Capper /*
399031e6e6bSSteve Capper  * huge_ptep_set_access_flags will update access flags (dirty, accesssed)
400031e6e6bSSteve Capper  * and write permission.
401031e6e6bSSteve Capper  *
402031e6e6bSSteve Capper  * For a contiguous huge pte range we need to check whether or not write
403031e6e6bSSteve Capper  * permission has to change only on the first pte in the set. Then for
404031e6e6bSSteve Capper  * all the contiguous ptes we need to check whether or not there is a
405031e6e6bSSteve Capper  * discrepancy between dirty or young.
406031e6e6bSSteve Capper  */
407031e6e6bSSteve Capper static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig)
408031e6e6bSSteve Capper {
409031e6e6bSSteve Capper 	int i;
410031e6e6bSSteve Capper 
411031e6e6bSSteve Capper 	if (pte_write(pte) != pte_write(huge_ptep_get(ptep)))
412031e6e6bSSteve Capper 		return 1;
413031e6e6bSSteve Capper 
414031e6e6bSSteve Capper 	for (i = 0; i < ncontig; i++) {
415031e6e6bSSteve Capper 		pte_t orig_pte = huge_ptep_get(ptep + i);
416031e6e6bSSteve Capper 
417031e6e6bSSteve Capper 		if (pte_dirty(pte) != pte_dirty(orig_pte))
418031e6e6bSSteve Capper 			return 1;
419031e6e6bSSteve Capper 
420031e6e6bSSteve Capper 		if (pte_young(pte) != pte_young(orig_pte))
421031e6e6bSSteve Capper 			return 1;
422031e6e6bSSteve Capper 	}
423031e6e6bSSteve Capper 
424031e6e6bSSteve Capper 	return 0;
425031e6e6bSSteve Capper }
426031e6e6bSSteve Capper 
42766b3923aSDavid Woods int huge_ptep_set_access_flags(struct vm_area_struct *vma,
42866b3923aSDavid Woods 			       unsigned long addr, pte_t *ptep,
42966b3923aSDavid Woods 			       pte_t pte, int dirty)
43066b3923aSDavid Woods {
431031e6e6bSSteve Capper 	int ncontig, i;
43266b3923aSDavid Woods 	size_t pgsize = 0;
43329a7287dSSteve Capper 	unsigned long pfn = pte_pfn(pte), dpfn;
43429a7287dSSteve Capper 	pgprot_t hugeprot;
435d8bdcff2SSteve Capper 	pte_t orig_pte;
43666b3923aSDavid Woods 
43729a7287dSSteve Capper 	if (!pte_cont(pte))
43866b3923aSDavid Woods 		return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
43929a7287dSSteve Capper 
44029a7287dSSteve Capper 	ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
44129a7287dSSteve Capper 	dpfn = pgsize >> PAGE_SHIFT;
44229a7287dSSteve Capper 
443031e6e6bSSteve Capper 	if (!__cont_access_flags_changed(ptep, pte, ncontig))
444031e6e6bSSteve Capper 		return 0;
445031e6e6bSSteve Capper 
446d8bdcff2SSteve Capper 	orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
447d8bdcff2SSteve Capper 
448469ed9d8SSteve Capper 	/* Make sure we don't lose the dirty or young state */
449d8bdcff2SSteve Capper 	if (pte_dirty(orig_pte))
450d8bdcff2SSteve Capper 		pte = pte_mkdirty(pte);
451d8bdcff2SSteve Capper 
452469ed9d8SSteve Capper 	if (pte_young(orig_pte))
453469ed9d8SSteve Capper 		pte = pte_mkyoung(pte);
454469ed9d8SSteve Capper 
455d8bdcff2SSteve Capper 	hugeprot = pte_pgprot(pte);
456d8bdcff2SSteve Capper 	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
457d8bdcff2SSteve Capper 		set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot));
45829a7287dSSteve Capper 
459031e6e6bSSteve Capper 	return 1;
46066b3923aSDavid Woods }
46166b3923aSDavid Woods 
46266b3923aSDavid Woods void huge_ptep_set_wrprotect(struct mm_struct *mm,
46366b3923aSDavid Woods 			     unsigned long addr, pte_t *ptep)
46466b3923aSDavid Woods {
465d8bdcff2SSteve Capper 	unsigned long pfn, dpfn;
466d8bdcff2SSteve Capper 	pgprot_t hugeprot;
46766b3923aSDavid Woods 	int ncontig, i;
46829a7287dSSteve Capper 	size_t pgsize;
469d8bdcff2SSteve Capper 	pte_t pte;
47029a7287dSSteve Capper 
47120a004e7SWill Deacon 	if (!pte_cont(READ_ONCE(*ptep))) {
47229a7287dSSteve Capper 		ptep_set_wrprotect(mm, addr, ptep);
47329a7287dSSteve Capper 		return;
47429a7287dSSteve Capper 	}
47566b3923aSDavid Woods 
476f0b38d65SSteve Capper 	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
477d8bdcff2SSteve Capper 	dpfn = pgsize >> PAGE_SHIFT;
478d8bdcff2SSteve Capper 
479d8bdcff2SSteve Capper 	pte = get_clear_flush(mm, addr, ptep, pgsize, ncontig);
480d8bdcff2SSteve Capper 	pte = pte_wrprotect(pte);
481d8bdcff2SSteve Capper 
482d8bdcff2SSteve Capper 	hugeprot = pte_pgprot(pte);
483d8bdcff2SSteve Capper 	pfn = pte_pfn(pte);
484d8bdcff2SSteve Capper 
485d8bdcff2SSteve Capper 	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
486d8bdcff2SSteve Capper 		set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
48766b3923aSDavid Woods }
48866b3923aSDavid Woods 
489*ae075629SBaolin Wang pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
49066b3923aSDavid Woods 			    unsigned long addr, pte_t *ptep)
49166b3923aSDavid Woods {
49229a7287dSSteve Capper 	size_t pgsize;
493d8bdcff2SSteve Capper 	int ncontig;
49466b3923aSDavid Woods 
495*ae075629SBaolin Wang 	if (!pte_cont(READ_ONCE(*ptep)))
496*ae075629SBaolin Wang 		return ptep_clear_flush(vma, addr, ptep);
49729a7287dSSteve Capper 
49829a7287dSSteve Capper 	ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
499*ae075629SBaolin Wang 	return get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
50066b3923aSDavid Woods }
50166b3923aSDavid Woods 
502a21b0b78SAllen Pais static int __init hugetlbpage_init(void)
503a21b0b78SAllen Pais {
504f8b46c4bSAnshuman Khandual 	if (pud_sect_supported())
50538237830SMike Kravetz 		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
506f8b46c4bSAnshuman Khandual 
507a1634a54SGavin Shan 	hugetlb_add_hstate(CONT_PMD_SHIFT - PAGE_SHIFT);
50838237830SMike Kravetz 	hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
509a1634a54SGavin Shan 	hugetlb_add_hstate(CONT_PTE_SHIFT - PAGE_SHIFT);
510a21b0b78SAllen Pais 
511a21b0b78SAllen Pais 	return 0;
512a21b0b78SAllen Pais }
513a21b0b78SAllen Pais arch_initcall(hugetlbpage_init);
514a21b0b78SAllen Pais 
515ae94da89SMike Kravetz bool __init arch_hugetlb_valid_size(unsigned long size)
516084bd298SSteve Capper {
517a8a733b2SAnshuman Khandual 	return __hugetlb_valid_size(size);
518ae94da89SMike Kravetz }
519