Lines Matching +full:tlb +full:- +full:split
1 // SPDX-License-Identifier: GPL-2.0-only
21 #include <linux/backing-dev.h>
39 #include <linux/memory-tiers.h>
42 #include <asm/tlb.h>
93 if (!vma->vm_mm) /* vdso */ in __thp_vma_allowable_orders()
102 test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)) in __thp_vma_allowable_orders()
134 addr = vma->vm_end - (PAGE_SIZE << order); in __thp_vma_allowable_orders()
149 if (!in_pf && shmem_file(vma->vm_file)) in __thp_vma_allowable_orders()
150 return shmem_is_huge(file_inode(vma->vm_file), vma->vm_pgoff, in __thp_vma_allowable_orders()
151 !enforce_sysfs, vma->vm_mm, vm_flags) in __thp_vma_allowable_orders()
165 * Trust that ->huge_fault() handlers know what they are doing in __thp_vma_allowable_orders()
168 if (((in_pf || smaps)) && vma->vm_ops->huge_fault) in __thp_vma_allowable_orders()
186 if (!vma->anon_vma) in __thp_vma_allowable_orders()
231 if (test_bit(MMF_HUGE_ZERO_PAGE, &mm->flags)) in mm_get_huge_zero_page()
237 if (test_and_set_bit(MMF_HUGE_ZERO_PAGE, &mm->flags)) in mm_get_huge_zero_page()
245 if (test_bit(MMF_HUGE_ZERO_PAGE, &mm->flags)) in mm_put_huge_zero_page()
305 ret = -EINVAL; in enabled_store()
337 return -EINVAL; in single_hugepage_flag_store()
400 return -EINVAL; in defrag_store()
459 int order = to_thpsize(kobj)->order; in thpsize_enabled_show()
478 int order = to_thpsize(kobj)->order; in thpsize_enabled_store()
506 ret = -EINVAL; in thpsize_enabled_store()
536 return ERR_PTR(-ENOMEM); in thpsize_create()
538 ret = kobject_init_and_add(&thpsize->kobj, &thpsize_ktype, parent, in thpsize_create()
539 "hugepages-%lukB", size); in thpsize_create()
545 ret = sysfs_create_group(&thpsize->kobj, &thpsize_attr_group); in thpsize_create()
547 kobject_put(&thpsize->kobj); in thpsize_create()
551 thpsize->order = order; in thpsize_create()
568 * Default to setting PMD-sized THP to inherit the global setting and in hugepage_init_sysfs()
569 * disable all other sizes. powerpc's PMD_ORDER isn't a compile-time in hugepage_init_sysfs()
577 return -ENOMEM; in hugepage_init_sysfs()
601 list_add(&thpsize->node, &thpsize_list); in hugepage_init_sysfs()
622 list_del(&thpsize->node); in hugepage_exit_sysfs()
623 kobject_put(&thpsize->kobj); in hugepage_exit_sysfs()
643 huge_zero_page_shrinker = shrinker_alloc(0, "thp-zero"); in thp_shrinker_init()
645 return -ENOMEM; in thp_shrinker_init()
650 "thp-deferred_split"); in thp_shrinker_init()
653 return -ENOMEM; in thp_shrinker_init()
656 huge_zero_page_shrinker->count_objects = shrink_huge_zero_page_count; in thp_shrinker_init()
657 huge_zero_page_shrinker->scan_objects = shrink_huge_zero_page_scan; in thp_shrinker_init()
660 deferred_split_shrinker->count_objects = deferred_split_count; in thp_shrinker_init()
661 deferred_split_shrinker->scan_objects = deferred_split_scan; in thp_shrinker_init()
680 return -EINVAL; in hugepage_init()
688 * we use page->mapping and page->index in second tail page in hugepage_init()
707 * where the extra memory used could hurt more than TLB overhead in hugepage_init()
710 if (totalram_pages() < (512 << (20 - PAGE_SHIFT))) { in hugepage_init()
764 if (likely(vma->vm_flags & VM_WRITE)) in maybe_pmd_mkwrite()
777 return &memcg->deferred_split_queue; in get_deferred_split_queue()
779 return &pgdat->deferred_split_queue; in get_deferred_split_queue()
787 return &pgdat->deferred_split_queue; in get_deferred_split_queue()
794 INIT_LIST_HEAD(&folio->_deferred_list); in folio_prep_large_rmappable()
803 return is_huge_zero_page(&folio->page) || in is_transparent_hugepage()
818 if (off_end <= off_align || (off_end - off_align) < size) in __thp_get_unmapped_area()
825 ret = current->mm->get_unmapped_area(filp, addr, len_pad, in __thp_get_unmapped_area()
842 off_sub = (off - ret) & (size - 1); in __thp_get_unmapped_area()
844 if (current->mm->get_unmapped_area == arch_get_unmapped_area_topdown && in __thp_get_unmapped_area()
862 return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags); in thp_get_unmapped_area()
869 struct vm_area_struct *vma = vmf->vma; in __do_huge_pmd_anonymous_page()
872 unsigned long haddr = vmf->address & HPAGE_PMD_MASK; in __do_huge_pmd_anonymous_page()
877 if (mem_cgroup_charge(folio, vma->vm_mm, gfp)) { in __do_huge_pmd_anonymous_page()
885 pgtable = pte_alloc_one(vma->vm_mm); in __do_huge_pmd_anonymous_page()
891 clear_huge_page(page, vmf->address, HPAGE_PMD_NR); in __do_huge_pmd_anonymous_page()
899 vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); in __do_huge_pmd_anonymous_page()
900 if (unlikely(!pmd_none(*vmf->pmd))) { in __do_huge_pmd_anonymous_page()
905 ret = check_stable_address_space(vma->vm_mm); in __do_huge_pmd_anonymous_page()
911 spin_unlock(vmf->ptl); in __do_huge_pmd_anonymous_page()
913 pte_free(vma->vm_mm, pgtable); in __do_huge_pmd_anonymous_page()
919 entry = mk_huge_pmd(page, vma->vm_page_prot); in __do_huge_pmd_anonymous_page()
923 pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable); in __do_huge_pmd_anonymous_page()
924 set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry); in __do_huge_pmd_anonymous_page()
925 update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); in __do_huge_pmd_anonymous_page()
926 add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR); in __do_huge_pmd_anonymous_page()
927 mm_inc_nr_ptes(vma->vm_mm); in __do_huge_pmd_anonymous_page()
928 spin_unlock(vmf->ptl); in __do_huge_pmd_anonymous_page()
930 count_memcg_event_mm(vma->vm_mm, THP_FAULT_ALLOC); in __do_huge_pmd_anonymous_page()
935 spin_unlock(vmf->ptl); in __do_huge_pmd_anonymous_page()
938 pte_free(vma->vm_mm, pgtable); in __do_huge_pmd_anonymous_page()
955 const bool vma_madvised = vma && (vma->vm_flags & VM_HUGEPAGE); in vma_thp_gfp_mask()
987 entry = mk_pmd(zero_page, vma->vm_page_prot); in set_huge_zero_page()
996 struct vm_area_struct *vma = vmf->vma; in do_huge_pmd_anonymous_page()
999 unsigned long haddr = vmf->address & HPAGE_PMD_MASK; in do_huge_pmd_anonymous_page()
1005 khugepaged_enter_vma(vma, vma->vm_flags); in do_huge_pmd_anonymous_page()
1007 if (!(vmf->flags & FAULT_FLAG_WRITE) && in do_huge_pmd_anonymous_page()
1008 !mm_forbids_zeropage(vma->vm_mm) && in do_huge_pmd_anonymous_page()
1013 pgtable = pte_alloc_one(vma->vm_mm); in do_huge_pmd_anonymous_page()
1016 zero_page = mm_get_huge_zero_page(vma->vm_mm); in do_huge_pmd_anonymous_page()
1018 pte_free(vma->vm_mm, pgtable); in do_huge_pmd_anonymous_page()
1022 vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); in do_huge_pmd_anonymous_page()
1024 if (pmd_none(*vmf->pmd)) { in do_huge_pmd_anonymous_page()
1025 ret = check_stable_address_space(vma->vm_mm); in do_huge_pmd_anonymous_page()
1027 spin_unlock(vmf->ptl); in do_huge_pmd_anonymous_page()
1028 pte_free(vma->vm_mm, pgtable); in do_huge_pmd_anonymous_page()
1030 spin_unlock(vmf->ptl); in do_huge_pmd_anonymous_page()
1031 pte_free(vma->vm_mm, pgtable); in do_huge_pmd_anonymous_page()
1035 set_huge_zero_page(pgtable, vma->vm_mm, vma, in do_huge_pmd_anonymous_page()
1036 haddr, vmf->pmd, zero_page); in do_huge_pmd_anonymous_page()
1037 update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); in do_huge_pmd_anonymous_page()
1038 spin_unlock(vmf->ptl); in do_huge_pmd_anonymous_page()
1041 spin_unlock(vmf->ptl); in do_huge_pmd_anonymous_page()
1042 pte_free(vma->vm_mm, pgtable); in do_huge_pmd_anonymous_page()
1052 return __do_huge_pmd_anonymous_page(vmf, &folio->page, gfp); in do_huge_pmd_anonymous_page()
1059 struct mm_struct *mm = vma->vm_mm; in insert_pfn_pmd()
1103 * vmf_insert_pfn_pmd - insert a pmd size pfn
1114 unsigned long addr = vmf->address & PMD_MASK; in vmf_insert_pfn_pmd()
1115 struct vm_area_struct *vma = vmf->vma; in vmf_insert_pfn_pmd()
1116 pgprot_t pgprot = vma->vm_page_prot; in vmf_insert_pfn_pmd()
1124 BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) && in vmf_insert_pfn_pmd()
1126 BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) == in vmf_insert_pfn_pmd()
1128 BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags)); in vmf_insert_pfn_pmd()
1130 if (addr < vma->vm_start || addr >= vma->vm_end) in vmf_insert_pfn_pmd()
1134 pgtable = pte_alloc_one(vma->vm_mm); in vmf_insert_pfn_pmd()
1141 insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable); in vmf_insert_pfn_pmd()
1149 if (likely(vma->vm_flags & VM_WRITE)) in maybe_pud_mkwrite()
1157 struct mm_struct *mm = vma->vm_mm; in insert_pfn_pud()
1158 pgprot_t prot = vma->vm_page_prot; in insert_pfn_pud()
1192 * vmf_insert_pfn_pud - insert a pud size pfn
1203 unsigned long addr = vmf->address & PUD_MASK; in vmf_insert_pfn_pud()
1204 struct vm_area_struct *vma = vmf->vma; in vmf_insert_pfn_pud()
1205 pgprot_t pgprot = vma->vm_page_prot; in vmf_insert_pfn_pud()
1212 BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) && in vmf_insert_pfn_pud()
1214 BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) == in vmf_insert_pfn_pud()
1216 BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags)); in vmf_insert_pfn_pud()
1218 if (addr < vma->vm_start || addr >= vma->vm_end) in vmf_insert_pfn_pud()
1223 insert_pfn_pud(vma, addr, vmf->pud, pfn, write); in vmf_insert_pfn_pud()
1246 struct mm_struct *mm = vma->vm_mm; in follow_devmap_pmd()
1268 return ERR_PTR(-EEXIST); in follow_devmap_pmd()
1273 return ERR_PTR(-EFAULT); in follow_devmap_pmd()
1291 int ret = -ENOMEM; in copy_huge_pmd()
1293 /* Skip if can be re-fill on fault */ in copy_huge_pmd()
1305 ret = -EAGAIN; in copy_huge_pmd()
1340 * under splitting since we don't split the page itself, only pmd to in copy_huge_pmd()
1359 /* Page maybe pinned: split and retry the fault on PTEs. */ in copy_huge_pmd()
1365 return -EAGAIN; in copy_huge_pmd()
1403 struct mm_struct *mm = vma->vm_mm; in follow_devmap_pud()
1427 return ERR_PTR(-EEXIST); in follow_devmap_pud()
1432 return ERR_PTR(-EFAULT); in follow_devmap_pud()
1454 ret = -EAGAIN; in copy_huge_pud()
1461 * under splitting since we don't split the page itself, only pud to in copy_huge_pud()
1470 * folio_try_dup_anon_rmap_*() and split if duplicating fails. in copy_huge_pud()
1485 bool write = vmf->flags & FAULT_FLAG_WRITE; in huge_pud_set_accessed()
1487 vmf->ptl = pud_lock(vmf->vma->vm_mm, vmf->pud); in huge_pud_set_accessed()
1488 if (unlikely(!pud_same(*vmf->pud, orig_pud))) in huge_pud_set_accessed()
1491 touch_pud(vmf->vma, vmf->address, vmf->pud, write); in huge_pud_set_accessed()
1493 spin_unlock(vmf->ptl); in huge_pud_set_accessed()
1499 bool write = vmf->flags & FAULT_FLAG_WRITE; in huge_pmd_set_accessed()
1501 vmf->ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd); in huge_pmd_set_accessed()
1502 if (unlikely(!pmd_same(*vmf->pmd, vmf->orig_pmd))) in huge_pmd_set_accessed()
1505 touch_pmd(vmf->vma, vmf->address, vmf->pmd, write); in huge_pmd_set_accessed()
1508 spin_unlock(vmf->ptl); in huge_pmd_set_accessed()
1513 const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; in do_huge_pmd_wp_page()
1514 struct vm_area_struct *vma = vmf->vma; in do_huge_pmd_wp_page()
1517 unsigned long haddr = vmf->address & HPAGE_PMD_MASK; in do_huge_pmd_wp_page()
1518 pmd_t orig_pmd = vmf->orig_pmd; in do_huge_pmd_wp_page()
1520 vmf->ptl = pmd_lockptr(vma->vm_mm, vmf->pmd); in do_huge_pmd_wp_page()
1521 VM_BUG_ON_VMA(!vma->anon_vma, vma); in do_huge_pmd_wp_page()
1526 spin_lock(vmf->ptl); in do_huge_pmd_wp_page()
1528 if (unlikely(!pmd_same(*vmf->pmd, orig_pmd))) { in do_huge_pmd_wp_page()
1529 spin_unlock(vmf->ptl); in do_huge_pmd_wp_page()
1543 spin_unlock(vmf->ptl); in do_huge_pmd_wp_page()
1545 spin_lock(vmf->ptl); in do_huge_pmd_wp_page()
1546 if (unlikely(!pmd_same(*vmf->pmd, orig_pmd))) { in do_huge_pmd_wp_page()
1547 spin_unlock(vmf->ptl); in do_huge_pmd_wp_page()
1579 spin_unlock(vmf->ptl); in do_huge_pmd_wp_page()
1584 if (pmdp_set_access_flags(vma, haddr, vmf->pmd, entry, 1)) in do_huge_pmd_wp_page()
1585 update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); in do_huge_pmd_wp_page()
1586 spin_unlock(vmf->ptl); in do_huge_pmd_wp_page()
1592 spin_unlock(vmf->ptl); in do_huge_pmd_wp_page()
1594 __split_huge_pmd(vma, vmf->pmd, vmf->address, false, NULL); in do_huge_pmd_wp_page()
1603 if (WARN_ON_ONCE(!(vma->vm_flags & VM_WRITE))) in can_change_pmd_writable()
1614 /* Do we need write faults for uffd-wp tracking? */ in can_change_pmd_writable()
1618 if (!(vma->vm_flags & VM_SHARED)) { in can_change_pmd_writable()
1642 if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED)) in can_follow_write_pmd()
1645 /* ... or read-only private ones */ in can_follow_write_pmd()
1646 if (!(vma->vm_flags & VM_MAYWRITE)) in can_follow_write_pmd()
1650 if (vma->vm_flags & VM_WRITE) in can_follow_write_pmd()
1660 /* ... and a write-fault isn't required for other reasons. */ in can_follow_write_pmd()
1671 struct mm_struct *mm = vma->vm_mm; in follow_trans_huge_pmd()
1686 return ERR_PTR(-EFAULT); in follow_trans_huge_pmd()
1692 return ERR_PTR(-EMLINK); in follow_trans_huge_pmd()
1713 struct vm_area_struct *vma = vmf->vma; in do_huge_pmd_numa_page()
1714 pmd_t oldpmd = vmf->orig_pmd; in do_huge_pmd_numa_page()
1717 unsigned long haddr = vmf->address & HPAGE_PMD_MASK; in do_huge_pmd_numa_page()
1719 int target_nid, last_cpupid = (-1 & LAST_CPUPID_MASK); in do_huge_pmd_numa_page()
1723 vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); in do_huge_pmd_numa_page()
1724 if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { in do_huge_pmd_numa_page()
1725 spin_unlock(vmf->ptl); in do_huge_pmd_numa_page()
1729 pmd = pmd_modify(oldpmd, vma->vm_page_prot); in do_huge_pmd_numa_page()
1737 can_change_pmd_writable(vma, vmf->address, pmd)) in do_huge_pmd_numa_page()
1761 spin_unlock(vmf->ptl); in do_huge_pmd_numa_page()
1770 vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); in do_huge_pmd_numa_page()
1771 if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { in do_huge_pmd_numa_page()
1772 spin_unlock(vmf->ptl); in do_huge_pmd_numa_page()
1786 pmd = pmd_modify(oldpmd, vma->vm_page_prot); in do_huge_pmd_numa_page()
1790 set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd); in do_huge_pmd_numa_page()
1791 update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); in do_huge_pmd_numa_page()
1792 spin_unlock(vmf->ptl); in do_huge_pmd_numa_page()
1800 bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, in madvise_free_huge_pmd() argument
1806 struct mm_struct *mm = tlb->mm; in madvise_free_huge_pmd()
1809 tlb_change_page_size(tlb, HPAGE_PMD_SIZE); in madvise_free_huge_pmd()
1837 * If user want to discard part-pages of THP, split it so MADV_FREE in madvise_free_huge_pmd()
1840 if (next - addr != HPAGE_PMD_SIZE) { in madvise_free_huge_pmd()
1859 tlb_remove_pmd_tlb_entry(tlb, pmd, addr); in madvise_free_huge_pmd()
1879 int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, in zap_huge_pmd() argument
1885 tlb_change_page_size(tlb, HPAGE_PMD_SIZE); in zap_huge_pmd()
1897 tlb->fullmm); in zap_huge_pmd()
1899 tlb_remove_pmd_tlb_entry(tlb, pmd, addr); in zap_huge_pmd()
1902 zap_deposited_table(tlb->mm, pmd); in zap_huge_pmd()
1905 zap_deposited_table(tlb->mm, pmd); in zap_huge_pmd()
1927 zap_deposited_table(tlb->mm, pmd); in zap_huge_pmd()
1928 add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR); in zap_huge_pmd()
1931 zap_deposited_table(tlb->mm, pmd); in zap_huge_pmd()
1932 add_mm_counter(tlb->mm, mm_counter_file(page), -HPAGE_PMD_NR); in zap_huge_pmd()
1937 tlb_remove_page_size(tlb, page, HPAGE_PMD_SIZE); in zap_huge_pmd()
1948 * With split pmd lock we also need to move preallocated in pmd_move_must_withdraw()
1973 struct mm_struct *mm = vma->vm_mm; in move_huge_pmd()
2019 * - 0 if PMD could not be locked
2020 * - 1 if PMD was locked but protections unchanged and TLB flush unnecessary
2022 * - HPAGE_PMD_NR if protections changed and TLB flush necessary
2024 int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, in change_huge_pmd() argument
2028 struct mm_struct *mm = vma->vm_mm; in change_huge_pmd()
2036 tlb_change_page_size(tlb, HPAGE_PMD_SIZE); in change_huge_pmd()
2082 * Avoid trapping faults against the zero page. The read-only in change_huge_pmd()
2083 * data is likely to be read-cached on the local CPU and in change_huge_pmd()
2120 * // pmd is re-established in change_huge_pmd()
2150 tlb_flush_pmd_range(tlb, addr, HPAGE_PMD_SIZE); in change_huge_pmd()
2161 * Return zero if succeeded in moving the page, -EAGAIN if it needs to be
2187 return -EINVAL; in move_pages_huge_pmd()
2194 return -EAGAIN; in move_pages_huge_pmd()
2196 return -ENOENT; in move_pages_huge_pmd()
2202 return -EBUSY; in move_pages_huge_pmd()
2223 err = -EAGAIN; in move_pages_huge_pmd()
2232 err = -EAGAIN; in move_pages_huge_pmd()
2236 !PageAnonExclusive(&src_folio->page)) { in move_pages_huge_pmd()
2237 err = -EBUSY; in move_pages_huge_pmd()
2243 err = -EBUSY; in move_pages_huge_pmd()
2248 WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr)); in move_pages_huge_pmd()
2254 err = -EBUSY; in move_pages_huge_pmd()
2258 _dst_pmd = mk_huge_pmd(&src_folio->page, dst_vma->vm_page_prot); in move_pages_huge_pmd()
2287 ptl = pmd_lock(vma->vm_mm, pmd); in __pmd_trans_huge_lock()
2305 ptl = pud_lock(vma->vm_mm, pud); in __pud_trans_huge_lock()
2313 int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, in zap_huge_pud() argument
2322 pudp_huge_get_and_clear_full(vma, addr, pud, tlb->fullmm); in zap_huge_pud()
2323 tlb_remove_pud_tlb_entry(tlb, pud, addr); in zap_huge_pud()
2338 VM_BUG_ON_VMA(vma->vm_start > haddr, vma); in __split_huge_pud_locked()
2339 VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PUD_SIZE, vma); in __split_huge_pud_locked()
2353 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm, in __split_huge_pud()
2357 ptl = pud_lock(vma->vm_mm, pud); in __split_huge_pud()
2371 struct mm_struct *mm = vma->vm_mm; in __split_huge_zero_page_pmd()
2396 entry = pfn_pte(my_zero_pfn(addr), vma->vm_page_prot); in __split_huge_zero_page_pmd()
2404 pte_unmap(pte - 1); in __split_huge_zero_page_pmd()
2412 struct mm_struct *mm = vma->vm_mm; in __split_huge_pmd_locked()
2424 VM_BUG_ON_VMA(vma->vm_start > haddr, vma); in __split_huge_pmd_locked()
2425 VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma); in __split_huge_pmd_locked()
2456 add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR); in __split_huge_pmd_locked()
2475 * whole access to the hugepage during the split (which happens in in __split_huge_pmd_locked()
2476 * place). If we overwrite the pmd with the not-huge version pointing in __split_huge_pmd_locked()
2478 * free), userland could trigger a small page size TLB miss on the in __split_huge_pmd_locked()
2479 * small sized TLB while the hugepage TLB entry is still established in in __split_huge_pmd_locked()
2480 * the huge TLB. Some CPU doesn't like that. in __split_huge_pmd_locked()
2484 * loaded in the two TLB is identical (which should be the case here). in __split_huge_pmd_locked()
2485 * But it is generally safer to never allow small and huge TLB entries in __split_huge_pmd_locked()
2489 * must remain set at all times on the pmd until the split is complete in __split_huge_pmd_locked()
2490 * for this pmd), then we flush the SMP TLB and finally we write the in __split_huge_pmd_locked()
2491 * non-huge version of the pmd entry with pmd_populate. in __split_huge_pmd_locked()
2524 * Without "freeze", we'll simply split the PMD, propagating the in __split_huge_pmd_locked()
2526 * each subpage -- no need to (temporarily) clear. in __split_huge_pmd_locked()
2530 * managed to clear PageAnonExclusive() -- see in __split_huge_pmd_locked()
2533 * In case we cannot clear PageAnonExclusive(), split the PMD in __split_huge_pmd_locked()
2545 folio_ref_add(folio, HPAGE_PMD_NR - 1); in __split_huge_pmd_locked()
2590 entry = mk_pte(page + i, READ_ONCE(vma->vm_page_prot)); in __split_huge_pmd_locked()
2595 /* NOTE: this may set soft-dirty too on some archs */ in __split_huge_pmd_locked()
2607 pte_unmap(pte - 1); in __split_huge_pmd_locked()
2624 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm, in __split_huge_pmd()
2628 ptl = pmd_lock(vma->vm_mm, pmd); in __split_huge_pmd()
2656 pmd_t *pmd = mm_find_pmd(vma->vm_mm, address); in split_huge_pmd_address()
2668 * contain an hugepage: check if we need to split an huge pmd. in split_huge_pmd_if_needed()
2681 /* Check if we need to split start first. */ in vma_adjust_trans_huge()
2684 /* Check if we need to split end next. */ in vma_adjust_trans_huge()
2689 * check if we need to split it. in vma_adjust_trans_huge()
2692 struct vm_area_struct *next = find_vma(vma->vm_mm, vma->vm_end); in vma_adjust_trans_huge()
2693 unsigned long nstart = next->vm_start; in vma_adjust_trans_huge()
2741 lockdep_assert_held(&lruvec->lru_lock); in lru_add_page_tail()
2747 list_add_tail(&tail->lru, list); in lru_add_page_tail()
2752 tail->mlock_count = 0; in lru_add_page_tail()
2754 list_add_tail(&tail->lru, &head->lru); in lru_add_page_tail()
2762 struct page *head = &folio->page; in __split_huge_page_tail()
2770 VM_BUG_ON_PAGE(atomic_read(&page_tail->_mapcount) != -1, page_tail); in __split_huge_page_tail()
2778 * Note that for mapped sub-pages of an anonymous THP, in __split_huge_page_tail()
2782 * unreferenced sub-pages of an anonymous THP: we can simply drop in __split_huge_page_tail()
2783 * PG_anon_exclusive (-> PG_mappedtodisk) for these here. in __split_huge_page_tail()
2785 page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; in __split_huge_page_tail()
2786 page_tail->flags |= (head->flags & in __split_huge_page_tail()
2803 /* ->mapping in first and second tail page is replaced by other uses */ in __split_huge_page_tail()
2804 VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING, in __split_huge_page_tail()
2806 page_tail->mapping = head->mapping; in __split_huge_page_tail()
2807 page_tail->index = head->index + tail; in __split_huge_page_tail()
2810 * page->private should not be set in tail pages. Fix up and warn once in __split_huge_page_tail()
2813 if (unlikely(page_tail->private)) { in __split_huge_page_tail()
2815 page_tail->private = 0; in __split_huge_page_tail()
2818 new_folio->swap.val = folio->swap.val + tail; in __split_huge_page_tail()
2820 /* Page flags must be visible before we make the page non-compound. */ in __split_huge_page_tail()
2844 * pages to show after the currently processed elements - e.g. in __split_huge_page_tail()
2854 struct page *head = &folio->page; in __split_huge_page()
2865 offset = swp_offset(folio->swap); in __split_huge_page()
2866 swap_cache = swap_address_space(folio->swap); in __split_huge_page()
2867 xa_lock(&swap_cache->i_pages); in __split_huge_page()
2875 for (i = nr - 1; i >= 1; i--) { in __split_huge_page()
2881 if (shmem_mapping(head->mapping)) in __split_huge_page()
2885 inode_to_wb(folio->mapping->host)); in __split_huge_page()
2889 __xa_store(&head->mapping->i_pages, head[i].index, in __split_huge_page()
2892 __xa_store(&swap_cache->i_pages, offset + i, in __split_huge_page()
2908 xa_unlock(&swap_cache->i_pages); in __split_huge_page()
2915 xa_unlock(&head->mapping->i_pages); in __split_huge_page()
2920 shmem_uncharge(head->mapping->host, nr_dropped); in __split_huge_page()
2924 split_swap_cluster(folio->swap); in __split_huge_page()
2937 * of the tail pages after the split is complete. in __split_huge_page()
2943 /* Racy check whether the huge page can be split */
2956 return folio_mapcount(folio) == folio_ref_count(folio) - extra_pins - 1; in can_split_folio()
2961 * subpage of huge page to split. Split doesn't change the position of @page.
2963 * Only caller must hold pin on the @page, otherwise split fails with -EBUSY.
2974 * Returns 0 if the hugepage is split successfully.
2975 * Returns -EBUSY if the page is pinned or if anon_vma disappeared from under
2982 XA_STATE(xas, &folio->mapping->i_pages, folio->index); in split_huge_page_to_list()
2992 is_hzp = is_huge_zero_page(&folio->page); in split_huge_page_to_list()
2995 return -EBUSY; in split_huge_page_to_list()
2999 return -EBUSY; in split_huge_page_to_list()
3007 * is taken to serialise against parallel split or collapse in split_huge_page_to_list()
3012 ret = -EBUSY; in split_huge_page_to_list()
3015 end = -1; in split_huge_page_to_list()
3021 mapping = folio->mapping; in split_huge_page_to_list()
3025 ret = -EBUSY; in split_huge_page_to_list()
3033 ret = -EBUSY; in split_huge_page_to_list()
3048 * but on 32-bit, i_size_read() takes an irq-unsafe seqlock, in split_huge_page_to_list()
3053 end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE); in split_huge_page_to_list()
3055 end = shmem_fallocend(mapping->host, end); in split_huge_page_to_list()
3059 * Racy check if we can split the page, before unmap_folio() will in split_huge_page_to_list()
3060 * split PMDs in split_huge_page_to_list()
3063 ret = -EAGAIN; in split_huge_page_to_list()
3082 /* Prevent deferred_split_scan() touching ->_refcount */ in split_huge_page_to_list()
3083 spin_lock(&ds_queue->split_queue_lock); in split_huge_page_to_list()
3085 if (!list_empty(&folio->_deferred_list)) { in split_huge_page_to_list()
3086 ds_queue->split_queue_len--; in split_huge_page_to_list()
3087 list_del(&folio->_deferred_list); in split_huge_page_to_list()
3089 spin_unlock(&ds_queue->split_queue_lock); in split_huge_page_to_list()
3097 NR_SHMEM_THPS, -nr); in split_huge_page_to_list()
3100 NR_FILE_THPS, -nr); in split_huge_page_to_list()
3109 spin_unlock(&ds_queue->split_queue_lock); in split_huge_page_to_list()
3115 ret = -EAGAIN; in split_huge_page_to_list()
3141 if (data_race(list_empty(&folio->_deferred_list))) in folio_undo_large_rmappable()
3145 spin_lock_irqsave(&ds_queue->split_queue_lock, flags); in folio_undo_large_rmappable()
3146 if (!list_empty(&folio->_deferred_list)) { in folio_undo_large_rmappable()
3147 ds_queue->split_queue_len--; in folio_undo_large_rmappable()
3148 list_del_init(&folio->_deferred_list); in folio_undo_large_rmappable()
3150 spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); in folio_undo_large_rmappable()
3165 * this may cause a race condition to corrupt deferred split queue. in deferred_split_folio()
3176 if (!list_empty(&folio->_deferred_list)) in deferred_split_folio()
3179 spin_lock_irqsave(&ds_queue->split_queue_lock, flags); in deferred_split_folio()
3180 if (list_empty(&folio->_deferred_list)) { in deferred_split_folio()
3182 list_add_tail(&folio->_deferred_list, &ds_queue->split_queue); in deferred_split_folio()
3183 ds_queue->split_queue_len++; in deferred_split_folio()
3187 deferred_split_shrinker->id); in deferred_split_folio()
3190 spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); in deferred_split_folio()
3196 struct pglist_data *pgdata = NODE_DATA(sc->nid); in deferred_split_count()
3197 struct deferred_split *ds_queue = &pgdata->deferred_split_queue; in deferred_split_count()
3200 if (sc->memcg) in deferred_split_count()
3201 ds_queue = &sc->memcg->deferred_split_queue; in deferred_split_count()
3203 return READ_ONCE(ds_queue->split_queue_len); in deferred_split_count()
3209 struct pglist_data *pgdata = NODE_DATA(sc->nid); in deferred_split_scan()
3210 struct deferred_split *ds_queue = &pgdata->deferred_split_queue; in deferred_split_scan()
3214 int split = 0; in deferred_split_scan() local
3217 if (sc->memcg) in deferred_split_scan()
3218 ds_queue = &sc->memcg->deferred_split_queue; in deferred_split_scan()
3221 spin_lock_irqsave(&ds_queue->split_queue_lock, flags); in deferred_split_scan()
3223 list_for_each_entry_safe(folio, next, &ds_queue->split_queue, in deferred_split_scan()
3226 list_move(&folio->_deferred_list, &list); in deferred_split_scan()
3229 list_del_init(&folio->_deferred_list); in deferred_split_scan()
3230 ds_queue->split_queue_len--; in deferred_split_scan()
3232 if (!--sc->nr_to_scan) in deferred_split_scan()
3235 spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); in deferred_split_scan()
3242 split++; in deferred_split_scan()
3248 spin_lock_irqsave(&ds_queue->split_queue_lock, flags); in deferred_split_scan()
3249 list_splice_tail(&list, &ds_queue->split_queue); in deferred_split_scan()
3250 spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); in deferred_split_scan()
3253 * Stop shrinker if we didn't split any page, but the queue is empty. in deferred_split_scan()
3256 if (!split && list_empty(&ds_queue->split_queue)) in deferred_split_scan()
3258 return split; in deferred_split_scan()
3268 unsigned long total = 0, split = 0; in split_huge_pages_all() local
3270 pr_debug("Split all THPs\n"); in split_huge_pages_all()
3275 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) { in split_huge_pages_all()
3300 split++; in split_huge_pages_all()
3301 pfn += nr_pages - 1; in split_huge_pages_all()
3309 pr_debug("%lu of %lu THP split\n", split, total); in split_huge_pages_all()
3314 return vma_is_special_huge(vma) || (vma->vm_flags & VM_IO) || in vma_not_suitable_for_thp_split()
3324 unsigned long total = 0, split = 0; in split_huge_pages_pid() local
3335 ret = -ESRCH; in split_huge_pages_pid()
3346 ret = -EINVAL; in split_huge_pages_pid()
3350 pr_debug("Split huge pages in pid: %d, vaddr: [0x%lx - 0x%lx]\n", in split_huge_pages_pid()
3356 * table filled with PTE-mapped THPs, each of which is distinct. in split_huge_pages_pid()
3368 addr = vma->vm_end; in split_huge_pages_pid()
3390 split++; in split_huge_pages_pid()
3400 pr_debug("%lu of %lu THP split\n", split, total); in split_huge_pages_pid()
3412 int ret = -EINVAL; in split_huge_pages_in_file()
3415 unsigned long total = 0, split = 0; in split_huge_pages_in_file() local
3425 pr_debug("split file-backed THPs in file: %s, page offset: [0x%lx - 0x%lx]\n", in split_huge_pages_in_file()
3428 mapping = candidate->f_mapping; in split_huge_pages_in_file()
3447 split++; in split_huge_pages_in_file()
3458 pr_debug("%lu of %lu file-backed THP split\n", split, total); in split_huge_pages_in_file()
3480 ret = -EFAULT; in split_huge_pages_write()
3486 input_buf[MAX_INPUT_BUF_SZ - 1] = '\0'; in split_huge_pages_write()
3499 ret = -EINVAL; in split_huge_pages_write()
3505 ret = -EINVAL; in split_huge_pages_write()
3521 ret = -EINVAL; in split_huge_pages_write()
3554 struct vm_area_struct *vma = pvmw->vma; in set_pmd_migration_entry()
3555 struct mm_struct *mm = vma->vm_mm; in set_pmd_migration_entry()
3556 unsigned long address = pvmw->address; in set_pmd_migration_entry()
3562 if (!(pvmw->pmd && !pvmw->pte)) in set_pmd_migration_entry()
3566 pmdval = pmdp_invalidate(vma, address, pvmw->pmd); in set_pmd_migration_entry()
3571 set_pmd_at(mm, address, pvmw->pmd, pmdval); in set_pmd_migration_entry()
3572 return -EBUSY; in set_pmd_migration_entry()
3592 set_pmd_at(mm, address, pvmw->pmd, pmdswp); in set_pmd_migration_entry()
3603 struct vm_area_struct *vma = pvmw->vma; in remove_migration_pmd()
3604 struct mm_struct *mm = vma->vm_mm; in remove_migration_pmd()
3605 unsigned long address = pvmw->address; in remove_migration_pmd()
3610 if (!(pvmw->pmd && !pvmw->pte)) in remove_migration_pmd()
3613 entry = pmd_to_swp_entry(*pvmw->pmd); in remove_migration_pmd()
3615 pmde = mk_huge_pmd(new, READ_ONCE(vma->vm_page_prot)); in remove_migration_pmd()
3616 if (pmd_swp_soft_dirty(*pvmw->pmd)) in remove_migration_pmd()
3620 if (pmd_swp_uffd_wp(*pvmw->pmd)) in remove_migration_pmd()
3624 /* NOTE: this may contain setting soft-dirty on some archs */ in remove_migration_pmd()
3639 set_pmd_at(mm, haddr, pvmw->pmd, pmde); in remove_migration_pmd()
3641 /* No need to invalidate - it was non-present before */ in remove_migration_pmd()
3642 update_mmu_cache_pmd(vma, address, pvmw->pmd); in remove_migration_pmd()