linux-6.15/mm/huge_memory.c

1 // SPDX-License-Identifier: GPL-2.0-only
20 #include <linux/backing-dev.h>
39 #include <linux/memory-tiers.h>
44 #include <asm/tlb.h>
93 	if (!vma->vm_file)  in file_thp_enabled()
96 	inode = file_inode(vma->vm_file);  in file_thp_enabled()
98 	return !inode_is_open_for_write(inode) && S_ISREG(inode->i_mode);  in file_thp_enabled()
123 	if (!vma->vm_mm)		/* vdso */  in __thp_vma_allowable_orders()
153 			addr = vma->vm_end - (PAGE_SIZE << order);  in __thp_vma_allowable_orders()
168 	if (!in_pf && shmem_file(vma->vm_file))  in __thp_vma_allowable_orders()
169 		return shmem_allowable_huge_orders(file_inode(vma->vm_file),  in __thp_vma_allowable_orders()
170 						   vma, vma->vm_pgoff, 0,  in __thp_vma_allowable_orders()
184 		 * Trust that ->huge_fault() handlers know what they are doing  in __thp_vma_allowable_orders()
187 		if (((in_pf || smaps)) && vma->vm_ops->huge_fault)  in __thp_vma_allowable_orders()
205 	if (!vma->anon_vma)  in __thp_vma_allowable_orders()
252 	if (test_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))  in mm_get_huge_zero_folio()
258 	if (test_and_set_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))  in mm_get_huge_zero_folio()
266 	if (test_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))  in mm_put_huge_zero_folio()
326 		ret = -EINVAL;  in enabled_store()
358 		return -EINVAL;  in single_hugepage_flag_store()
421 		return -EINVAL;  in defrag_store()
494 	int order = to_thpsize(kobj)->order;  in anon_enabled_show()
513 	int order = to_thpsize(kobj)->order;  in anon_enabled_store()
541 		ret = -EINVAL;  in anon_enabled_store()
599 		sum += this->stats[order][item];  in sum_mthp_stat()
609 	int order = to_thpsize(kobj)->order;				\
629 DEFINE_MTHP_STAT_ATTR(split, MTHP_STAT_SPLIT);
694 	int ret = -ENOENT;  in sysfs_add_group()
701 	if (grp->name)  in sysfs_add_group()
713 	int ret = -ENOMEM;  in thpsize_create()
719 	thpsize->order = order;  in thpsize_create()
721 	ret = kobject_init_and_add(&thpsize->kobj, &thpsize_ktype, parent,  in thpsize_create()
722 				   "hugepages-%lukB", size);  in thpsize_create()
729 	ret = sysfs_add_group(&thpsize->kobj, &any_ctrl_attr_grp);  in thpsize_create()
733 	ret = sysfs_add_group(&thpsize->kobj, &any_stats_attr_grp);  in thpsize_create()
738 		ret = sysfs_add_group(&thpsize->kobj, &anon_ctrl_attr_grp);  in thpsize_create()
742 		ret = sysfs_add_group(&thpsize->kobj, &anon_stats_attr_grp);  in thpsize_create()
748 		ret = sysfs_add_group(&thpsize->kobj, &file_ctrl_attr_grp);  in thpsize_create()
752 		ret = sysfs_add_group(&thpsize->kobj, &file_stats_attr_grp);  in thpsize_create()
759 	kobject_put(&thpsize->kobj);  in thpsize_create()
777 	 * Default to setting PMD-sized THP to inherit the global setting and  in hugepage_init_sysfs()
778 	 * disable all other sizes. powerpc's PMD_ORDER isn't a compile-time  in hugepage_init_sysfs()
787 		return -ENOMEM;  in hugepage_init_sysfs()
811 		list_add(&thpsize->node, &thpsize_list);  in hugepage_init_sysfs()
832 		list_del(&thpsize->node);  in hugepage_exit_sysfs()
833 		kobject_put(&thpsize->kobj);  in hugepage_exit_sysfs()
853 	huge_zero_page_shrinker = shrinker_alloc(0, "thp-zero");  in thp_shrinker_init()
855 		return -ENOMEM;  in thp_shrinker_init()
860 						 "thp-deferred_split");  in thp_shrinker_init()
863 		return -ENOMEM;  in thp_shrinker_init()
866 	huge_zero_page_shrinker->count_objects = shrink_huge_zero_page_count;  in thp_shrinker_init()
867 	huge_zero_page_shrinker->scan_objects = shrink_huge_zero_page_scan;  in thp_shrinker_init()
870 	deferred_split_shrinker->count_objects = deferred_split_count;  in thp_shrinker_init()
871 	deferred_split_shrinker->scan_objects = deferred_split_scan;  in thp_shrinker_init()
890 		return -EINVAL;  in hugepage_init()
912 	 * where the extra memory used could hurt more than TLB overhead  in hugepage_init()
915 	if (totalram_pages() < (512 << (20 - PAGE_SHIFT))) {  in hugepage_init()
992 			if (strchr(subtoken, '-')) {  in setup_thp_anon()
993 				start_size = strsep(&subtoken, "-");  in setup_thp_anon()
1004 			if (start == -EINVAL) {  in setup_thp_anon()
1009 			if (end == -EINVAL) {  in setup_thp_anon()
1017 			nr = end - start + 1;  in setup_thp_anon()
1055 	if (likely(vma->vm_flags & VM_WRITE))  in maybe_pmd_mkwrite()
1068 		return &memcg->deferred_split_queue;  in get_deferred_split_queue()
1070 		return &pgdat->deferred_split_queue;  in get_deferred_split_queue()
1078 	return &pgdat->deferred_split_queue;  in get_deferred_split_queue()
1103 	if (off_end <= off_align || (off_end - off_align) < size)  in __thp_get_unmapped_area()
1110 	ret = mm_get_unmapped_area_vmflags(current->mm, filp, addr, len_pad,  in __thp_get_unmapped_area()
1127 	off_sub = (off - ret) & (size - 1);  in __thp_get_unmapped_area()
1129 	if (test_bit(MMF_TOPDOWN, &current->mm->flags) && !off_sub)  in __thp_get_unmapped_area()
1147 	return mm_get_unmapped_area_vmflags(current->mm, filp, addr, len, pgoff, flags,  in thp_get_unmapped_area_vmflags()
1174 	if (mem_cgroup_charge(folio, vma->vm_mm, gfp)) {  in vma_alloc_anon_folio_pmd()
1206 	entry = mk_huge_pmd(&folio->page, vma->vm_page_prot);  in map_anon_folio_pmd()
1210 	set_pmd_at(vma->vm_mm, haddr, pmd, entry);  in map_anon_folio_pmd()
1212 	add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);  in map_anon_folio_pmd()
1215 	count_memcg_event_mm(vma->vm_mm, THP_FAULT_ALLOC);  in map_anon_folio_pmd()
1220 	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;  in __do_huge_pmd_anonymous_page()
1221 	struct vm_area_struct *vma = vmf->vma;  in __do_huge_pmd_anonymous_page()
1226 	folio = vma_alloc_anon_folio_pmd(vma, vmf->address);  in __do_huge_pmd_anonymous_page()
1230 	pgtable = pte_alloc_one(vma->vm_mm);  in __do_huge_pmd_anonymous_page()
1236 	vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);  in __do_huge_pmd_anonymous_page()
1237 	if (unlikely(!pmd_none(*vmf->pmd))) {  in __do_huge_pmd_anonymous_page()
1240 		ret = check_stable_address_space(vma->vm_mm);  in __do_huge_pmd_anonymous_page()
1246 			spin_unlock(vmf->ptl);  in __do_huge_pmd_anonymous_page()
1248 			pte_free(vma->vm_mm, pgtable);  in __do_huge_pmd_anonymous_page()
1253 		pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);  in __do_huge_pmd_anonymous_page()
1254 		map_anon_folio_pmd(folio, vmf->pmd, vma, haddr);  in __do_huge_pmd_anonymous_page()
1255 		mm_inc_nr_ptes(vma->vm_mm);  in __do_huge_pmd_anonymous_page()
1257 		spin_unlock(vmf->ptl);  in __do_huge_pmd_anonymous_page()
1262 	spin_unlock(vmf->ptl);  in __do_huge_pmd_anonymous_page()
1265 		pte_free(vma->vm_mm, pgtable);  in __do_huge_pmd_anonymous_page()
1282 	const bool vma_madvised = vma && (vma->vm_flags & VM_HUGEPAGE);  in vma_thp_gfp_mask()
1312 	entry = mk_pmd(&zero_folio->page, vma->vm_page_prot);  in set_huge_zero_folio()
1321 	struct vm_area_struct *vma = vmf->vma;  in do_huge_pmd_anonymous_page()
1322 	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;  in do_huge_pmd_anonymous_page()
1330 	khugepaged_enter_vma(vma, vma->vm_flags);  in do_huge_pmd_anonymous_page()
1332 	if (!(vmf->flags & FAULT_FLAG_WRITE) &&  in do_huge_pmd_anonymous_page()
1333 			!mm_forbids_zeropage(vma->vm_mm) &&  in do_huge_pmd_anonymous_page()
1339 		pgtable = pte_alloc_one(vma->vm_mm);  in do_huge_pmd_anonymous_page()
1342 		zero_folio = mm_get_huge_zero_folio(vma->vm_mm);  in do_huge_pmd_anonymous_page()
1344 			pte_free(vma->vm_mm, pgtable);  in do_huge_pmd_anonymous_page()
1348 		vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);  in do_huge_pmd_anonymous_page()
1350 		if (pmd_none(*vmf->pmd)) {  in do_huge_pmd_anonymous_page()
1351 			ret = check_stable_address_space(vma->vm_mm);  in do_huge_pmd_anonymous_page()
1353 				spin_unlock(vmf->ptl);  in do_huge_pmd_anonymous_page()
1354 				pte_free(vma->vm_mm, pgtable);  in do_huge_pmd_anonymous_page()
1356 				spin_unlock(vmf->ptl);  in do_huge_pmd_anonymous_page()
1357 				pte_free(vma->vm_mm, pgtable);  in do_huge_pmd_anonymous_page()
1361 				set_huge_zero_folio(pgtable, vma->vm_mm, vma,  in do_huge_pmd_anonymous_page()
1362 						   haddr, vmf->pmd, zero_folio);  in do_huge_pmd_anonymous_page()
1363 				update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);  in do_huge_pmd_anonymous_page()
1364 				spin_unlock(vmf->ptl);  in do_huge_pmd_anonymous_page()
1367 			spin_unlock(vmf->ptl);  in do_huge_pmd_anonymous_page()
1368 			pte_free(vma->vm_mm, pgtable);  in do_huge_pmd_anonymous_page()
1380 	struct mm_struct *mm = vma->vm_mm;  in insert_pfn_pmd()
1389 				return -EEXIST;  in insert_pfn_pmd()
1397 		return -EEXIST;  in insert_pfn_pmd()
1421  * vmf_insert_pfn_pmd - insert a pmd size pfn
1432 	unsigned long addr = vmf->address & PMD_MASK;  in vmf_insert_pfn_pmd()
1433 	struct vm_area_struct *vma = vmf->vma;  in vmf_insert_pfn_pmd()
1434 	pgprot_t pgprot = vma->vm_page_prot;  in vmf_insert_pfn_pmd()
1444 	BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) &&  in vmf_insert_pfn_pmd()
1446 	BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) ==  in vmf_insert_pfn_pmd()
1448 	BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags));  in vmf_insert_pfn_pmd()
1450 	if (addr < vma->vm_start || addr >= vma->vm_end)  in vmf_insert_pfn_pmd()
1454 		pgtable = pte_alloc_one(vma->vm_mm);  in vmf_insert_pfn_pmd()
1460 	ptl = pmd_lock(vma->vm_mm, vmf->pmd);  in vmf_insert_pfn_pmd()
1461 	error = insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write,  in vmf_insert_pfn_pmd()
1465 		pte_free(vma->vm_mm, pgtable);  in vmf_insert_pfn_pmd()
1474 	struct vm_area_struct *vma = vmf->vma;  in vmf_insert_folio_pmd()
1475 	unsigned long addr = vmf->address & PMD_MASK;  in vmf_insert_folio_pmd()
1476 	struct mm_struct *mm = vma->vm_mm;  in vmf_insert_folio_pmd()
1481 	if (addr < vma->vm_start || addr >= vma->vm_end)  in vmf_insert_folio_pmd()
1488 		pgtable = pte_alloc_one(vma->vm_mm);  in vmf_insert_folio_pmd()
1493 	ptl = pmd_lock(mm, vmf->pmd);  in vmf_insert_folio_pmd()
1494 	if (pmd_none(*vmf->pmd)) {  in vmf_insert_folio_pmd()
1496 		folio_add_file_rmap_pmd(folio, &folio->page, vma);  in vmf_insert_folio_pmd()
1499 	error = insert_pfn_pmd(vma, addr, vmf->pmd,  in vmf_insert_folio_pmd()
1500 			pfn_to_pfn_t(folio_pfn(folio)), vma->vm_page_prot,  in vmf_insert_folio_pmd()
1513 	if (likely(vma->vm_flags & VM_WRITE))  in maybe_pud_mkwrite()
1521 	struct mm_struct *mm = vma->vm_mm;  in insert_pfn_pud()
1522 	pgprot_t prot = vma->vm_page_prot;  in insert_pfn_pud()
1551  * vmf_insert_pfn_pud - insert a pud size pfn
1562 	unsigned long addr = vmf->address & PUD_MASK;  in vmf_insert_pfn_pud()
1563 	struct vm_area_struct *vma = vmf->vma;  in vmf_insert_pfn_pud()
1564 	pgprot_t pgprot = vma->vm_page_prot;  in vmf_insert_pfn_pud()
1572 	BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) &&  in vmf_insert_pfn_pud()
1574 	BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) ==  in vmf_insert_pfn_pud()
1576 	BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags));  in vmf_insert_pfn_pud()
1578 	if (addr < vma->vm_start || addr >= vma->vm_end)  in vmf_insert_pfn_pud()
1583 	ptl = pud_lock(vma->vm_mm, vmf->pud);  in vmf_insert_pfn_pud()
1584 	insert_pfn_pud(vma, addr, vmf->pud, pfn, write);  in vmf_insert_pfn_pud()
1592  * vmf_insert_folio_pud - insert a pud size folio mapped by a pud entry
1602 	struct vm_area_struct *vma = vmf->vma;  in vmf_insert_folio_pud()
1603 	unsigned long addr = vmf->address & PUD_MASK;  in vmf_insert_folio_pud()
1604 	pud_t *pud = vmf->pud;  in vmf_insert_folio_pud()
1605 	struct mm_struct *mm = vma->vm_mm;  in vmf_insert_folio_pud()
1608 	if (addr < vma->vm_start || addr >= vma->vm_end)  in vmf_insert_folio_pud()
1622 	if (pud_none(*vmf->pud)) {  in vmf_insert_folio_pud()
1624 		folio_add_file_rmap_pud(folio, &folio->page, vma);  in vmf_insert_folio_pud()
1627 	insert_pfn_pud(vma, addr, vmf->pud, pfn_to_pfn_t(folio_pfn(folio)),  in vmf_insert_folio_pud()
1653 	struct mm_struct *mm = vma->vm_mm;  in follow_devmap_pmd()
1675 		return ERR_PTR(-EEXIST);  in follow_devmap_pmd()
1680 		return ERR_PTR(-EFAULT);  in follow_devmap_pmd()
1698 	int ret = -ENOMEM;  in copy_huge_pmd()
1714 		VM_WARN_ON_ONCE(is_cow_mapping(src_vma->vm_flags) && pmd_write(pmd));  in copy_huge_pmd()
1718 	/* Skip if can be re-fill on fault */  in copy_huge_pmd()
1730 	ret = -EAGAIN;  in copy_huge_pmd()
1765 	 * under splitting since we don't split the page itself, only pmd to  in copy_huge_pmd()
1784 		/* Page maybe pinned: split and retry the fault on PTEs. */  in copy_huge_pmd()
1790 		return -EAGAIN;  in copy_huge_pmd()
1838 	ret = -EAGAIN;  in copy_huge_pud()
1845 	 * folio_try_dup_anon_rmap_*() and split if duplicating fails.  in copy_huge_pud()
1847 	if (is_cow_mapping(vma->vm_flags) && pud_write(pud)) {  in copy_huge_pud()
1863 	bool write = vmf->flags & FAULT_FLAG_WRITE;  in huge_pud_set_accessed()
1865 	vmf->ptl = pud_lock(vmf->vma->vm_mm, vmf->pud);  in huge_pud_set_accessed()
1866 	if (unlikely(!pud_same(*vmf->pud, orig_pud)))  in huge_pud_set_accessed()
1869 	touch_pud(vmf->vma, vmf->address, vmf->pud, write);  in huge_pud_set_accessed()
1871 	spin_unlock(vmf->ptl);  in huge_pud_set_accessed()
1877 	bool write = vmf->flags & FAULT_FLAG_WRITE;  in huge_pmd_set_accessed()
1879 	vmf->ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);  in huge_pmd_set_accessed()
1880 	if (unlikely(!pmd_same(*vmf->pmd, vmf->orig_pmd)))  in huge_pmd_set_accessed()
1883 	touch_pmd(vmf->vma, vmf->address, vmf->pmd, write);  in huge_pmd_set_accessed()
1886 	spin_unlock(vmf->ptl);  in huge_pmd_set_accessed()
1891 	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;  in do_huge_zero_wp_pmd()
1892 	struct vm_area_struct *vma = vmf->vma;  in do_huge_zero_wp_pmd()
1897 	folio = vma_alloc_anon_folio_pmd(vma, vmf->address);  in do_huge_zero_wp_pmd()
1901 	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm, haddr,  in do_huge_zero_wp_pmd()
1904 	vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);  in do_huge_zero_wp_pmd()
1905 	if (unlikely(!pmd_same(pmdp_get(vmf->pmd), vmf->orig_pmd)))  in do_huge_zero_wp_pmd()
1907 	ret = check_stable_address_space(vma->vm_mm);  in do_huge_zero_wp_pmd()
1910 	(void)pmdp_huge_clear_flush(vma, haddr, vmf->pmd);  in do_huge_zero_wp_pmd()
1911 	map_anon_folio_pmd(folio, vmf->pmd, vma, haddr);  in do_huge_zero_wp_pmd()
1916 	spin_unlock(vmf->ptl);  in do_huge_zero_wp_pmd()
1923 	const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE;  in do_huge_pmd_wp_page()
1924 	struct vm_area_struct *vma = vmf->vma;  in do_huge_pmd_wp_page()
1927 	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;  in do_huge_pmd_wp_page()
1928 	pmd_t orig_pmd = vmf->orig_pmd;  in do_huge_pmd_wp_page()
1930 	vmf->ptl = pmd_lockptr(vma->vm_mm, vmf->pmd);  in do_huge_pmd_wp_page()
1931 	VM_BUG_ON_VMA(!vma->anon_vma, vma);  in do_huge_pmd_wp_page()
1943 	spin_lock(vmf->ptl);  in do_huge_pmd_wp_page()
1945 	if (unlikely(!pmd_same(*vmf->pmd, orig_pmd))) {  in do_huge_pmd_wp_page()
1946 		spin_unlock(vmf->ptl);  in do_huge_pmd_wp_page()
1960 		spin_unlock(vmf->ptl);  in do_huge_pmd_wp_page()
1962 		spin_lock(vmf->ptl);  in do_huge_pmd_wp_page()
1963 		if (unlikely(!pmd_same(*vmf->pmd, orig_pmd))) {  in do_huge_pmd_wp_page()
1964 			spin_unlock(vmf->ptl);  in do_huge_pmd_wp_page()
1996 			spin_unlock(vmf->ptl);  in do_huge_pmd_wp_page()
2001 		if (pmdp_set_access_flags(vma, haddr, vmf->pmd, entry, 1))  in do_huge_pmd_wp_page()
2002 			update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);  in do_huge_pmd_wp_page()
2003 		spin_unlock(vmf->ptl);  in do_huge_pmd_wp_page()
2009 	spin_unlock(vmf->ptl);  in do_huge_pmd_wp_page()
2011 	__split_huge_pmd(vma, vmf->pmd, vmf->address, false, NULL);  in do_huge_pmd_wp_page()
2020 	if (WARN_ON_ONCE(!(vma->vm_flags & VM_WRITE)))  in can_change_pmd_writable()
2031 	/* Do we need write faults for uffd-wp tracking? */  in can_change_pmd_writable()
2035 	if (!(vma->vm_flags & VM_SHARED)) {  in can_change_pmd_writable()
2048 	struct vm_area_struct *vma = vmf->vma;  in do_huge_pmd_numa_page()
2050 	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;  in do_huge_pmd_numa_page()
2057 	vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);  in do_huge_pmd_numa_page()
2058 	old_pmd = pmdp_get(vmf->pmd);  in do_huge_pmd_numa_page()
2060 	if (unlikely(!pmd_same(old_pmd, vmf->orig_pmd))) {  in do_huge_pmd_numa_page()
2061 		spin_unlock(vmf->ptl);  in do_huge_pmd_numa_page()
2065 	pmd = pmd_modify(old_pmd, vma->vm_page_prot);  in do_huge_pmd_numa_page()
2073 	    can_change_pmd_writable(vma, vmf->address, pmd))  in do_huge_pmd_numa_page()
2091 	spin_unlock(vmf->ptl);  in do_huge_pmd_numa_page()
2102 	vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);  in do_huge_pmd_numa_page()
2103 	if (unlikely(!pmd_same(pmdp_get(vmf->pmd), vmf->orig_pmd))) {  in do_huge_pmd_numa_page()
2104 		spin_unlock(vmf->ptl);  in do_huge_pmd_numa_page()
2109 	pmd = pmd_modify(pmdp_get(vmf->pmd), vma->vm_page_prot);  in do_huge_pmd_numa_page()
2113 	set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd);  in do_huge_pmd_numa_page()
2114 	update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);  in do_huge_pmd_numa_page()
2115 	spin_unlock(vmf->ptl);  in do_huge_pmd_numa_page()
2126 bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,  in madvise_free_huge_pmd()  argument
2132 	struct mm_struct *mm = tlb->mm;  in madvise_free_huge_pmd()
2135 	tlb_change_page_size(tlb, HPAGE_PMD_SIZE);  in madvise_free_huge_pmd()
2163 	 * If user want to discard part-pages of THP, split it so MADV_FREE  in madvise_free_huge_pmd()
2166 	if (next - addr != HPAGE_PMD_SIZE) {  in madvise_free_huge_pmd()
2185 		tlb_remove_pmd_tlb_entry(tlb, pmd, addr);  in madvise_free_huge_pmd()
2205 int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,  in zap_huge_pmd()  argument
2211 	tlb_change_page_size(tlb, HPAGE_PMD_SIZE);  in zap_huge_pmd()
2223 						tlb->fullmm);  in zap_huge_pmd()
2225 	tlb_remove_pmd_tlb_entry(tlb, pmd, addr);  in zap_huge_pmd()
2228 			zap_deposited_table(tlb->mm, pmd);  in zap_huge_pmd()
2232 			zap_deposited_table(tlb->mm, pmd);  in zap_huge_pmd()
2256 			zap_deposited_table(tlb->mm, pmd);  in zap_huge_pmd()
2257 			add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);  in zap_huge_pmd()
2260 				zap_deposited_table(tlb->mm, pmd);  in zap_huge_pmd()
2261 			add_mm_counter(tlb->mm, mm_counter_file(folio),  in zap_huge_pmd()
2262 				       -HPAGE_PMD_NR);  in zap_huge_pmd()
2267 			tlb_remove_page_size(tlb, &folio->page, HPAGE_PMD_SIZE);  in zap_huge_pmd()
2278 	 * With split pmd lock we also need to move preallocated  in pmd_move_must_withdraw()
2313 	struct mm_struct *mm = vma->vm_mm;  in move_huge_pmd()
2361  *  - 0 if PMD could not be locked
2362  *  - 1 if PMD was locked but protections unchanged and TLB flush unnecessary
2364  *  - HPAGE_PMD_NR if protections changed and TLB flush necessary
2366 int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,  in change_huge_pmd()  argument
2370 	struct mm_struct *mm = vma->vm_mm;  in change_huge_pmd()
2378 	tlb_change_page_size(tlb, HPAGE_PMD_SIZE);  in change_huge_pmd()
2424 		 * Avoid trapping faults against the zero page. The read-only  in change_huge_pmd()
2425 		 * data is likely to be read-cached on the local CPU and  in change_huge_pmd()
2461 	 *				 // pmd is re-established  in change_huge_pmd()
2491 		tlb_flush_pmd_range(tlb, addr, HPAGE_PMD_SIZE);  in change_huge_pmd()
2500  * - 0: if pud leaf changed from under us
2501  * - 1: if pud can be skipped
2502  * - HPAGE_PUD_NR: if pud was successfully processed
2505 int change_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma,  in change_huge_pud()  argument
2509 	struct mm_struct *mm = vma->vm_mm;  in change_huge_pud()
2513 	tlb_change_page_size(tlb, HPAGE_PUD_SIZE);  in change_huge_pud()
2520 	 * Huge entries on userfault-wp only works with anonymous, while we  in change_huge_pud()
2537 	tlb_flush_pud_range(tlb, addr, HPAGE_PUD_SIZE);  in change_huge_pud()
2549  * Return zero if succeeded in moving the page, -EAGAIN if it needs to be
2576 		return -EINVAL;  in move_pages_huge_pmd()
2583 			return -EAGAIN;  in move_pages_huge_pmd()
2585 		return -ENOENT;  in move_pages_huge_pmd()
2593 			return -EBUSY;  in move_pages_huge_pmd()
2618 			err = -EAGAIN;  in move_pages_huge_pmd()
2629 		err = -EAGAIN;  in move_pages_huge_pmd()
2634 		    !PageAnonExclusive(&src_folio->page)) {  in move_pages_huge_pmd()
2635 			err = -EBUSY;  in move_pages_huge_pmd()
2641 			err = -EBUSY;  in move_pages_huge_pmd()
2649 			err = -EBUSY;  in move_pages_huge_pmd()
2654 		src_folio->index = linear_page_index(dst_vma, dst_addr);  in move_pages_huge_pmd()
2656 		_dst_pmd = mk_huge_pmd(&src_folio->page, dst_vma->vm_page_prot);  in move_pages_huge_pmd()
2661 		_dst_pmd = mk_huge_pmd(src_page, dst_vma->vm_page_prot);  in move_pages_huge_pmd()
2693 	ptl = pmd_lock(vma->vm_mm, pmd);  in __pmd_trans_huge_lock()
2711 	ptl = pud_lock(vma->vm_mm, pud);  in __pud_trans_huge_lock()
2719 int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma,  in zap_huge_pud()  argument
2729 	orig_pud = pudp_huge_get_and_clear_full(vma, addr, pud, tlb->fullmm);  in zap_huge_pud()
2731 	tlb_remove_pud_tlb_entry(tlb, pud, addr);  in zap_huge_pud()
2746 		add_mm_counter(tlb->mm, mm_counter_file(folio), -HPAGE_PUD_NR);  in zap_huge_pud()
2749 		tlb_remove_page_size(tlb, page, HPAGE_PUD_SIZE);  in zap_huge_pud()
2762 	VM_BUG_ON_VMA(vma->vm_start > haddr, vma);  in __split_huge_pud_locked()
2763 	VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PUD_SIZE, vma);  in __split_huge_pud_locked()
2782 	add_mm_counter(vma->vm_mm, mm_counter_file(folio),  in __split_huge_pud_locked()
2783 		-HPAGE_PUD_NR);  in __split_huge_pud_locked()
2792 	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm,  in __split_huge_pud()
2796 	ptl = pud_lock(vma->vm_mm, pud);  in __split_huge_pud()
2815 	struct mm_struct *mm = vma->vm_mm;  in __split_huge_zero_page_pmd()
2840 		entry = pfn_pte(my_zero_pfn(addr), vma->vm_page_prot);  in __split_huge_zero_page_pmd()
2848 	pte_unmap(pte - 1);  in __split_huge_zero_page_pmd()
2856 	struct mm_struct *mm = vma->vm_mm;  in __split_huge_pmd_locked()
2868 	VM_BUG_ON_VMA(vma->vm_start > haddr, vma);  in __split_huge_pmd_locked()
2869 	VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma);  in __split_huge_pmd_locked()
2902 		add_mm_counter(mm, mm_counter_file(folio), -HPAGE_PMD_NR);  in __split_huge_pmd_locked()
2936 		 * the whole access to the hugepage during the split (which  in __split_huge_pmd_locked()
2937 		 * happens in place). If we overwrite the pmd with the not-huge  in __split_huge_pmd_locked()
2940 		 * size TLB miss on the small sized TLB while the hugepage TLB  in __split_huge_pmd_locked()
2941 		 * entry is still established in the huge TLB. Some CPU doesn't  in __split_huge_pmd_locked()
2946 		 * two entries loaded in the two TLB is identical (which should  in __split_huge_pmd_locked()
2948 		 * small and huge TLB entries for the same virtual address to be  in __split_huge_pmd_locked()
2952 		 * remain set at all times on the pmd until the split is  in __split_huge_pmd_locked()
2953 		 * complete for this pmd), then we flush the SMP TLB and finally  in __split_huge_pmd_locked()
2954 		 * we write the non-huge version of the pmd entry with  in __split_huge_pmd_locked()
2973 		 * Without "freeze", we'll simply split the PMD, propagating the  in __split_huge_pmd_locked()
2975 		 * each subpage -- no need to (temporarily) clear.  in __split_huge_pmd_locked()
2979 		 * managed to clear PageAnonExclusive() -- see  in __split_huge_pmd_locked()
2982 		 * In case we cannot clear PageAnonExclusive(), split the PMD  in __split_huge_pmd_locked()
2994 			folio_ref_add(folio, HPAGE_PMD_NR - 1);  in __split_huge_pmd_locked()
3046 		entry = mk_pte(page, READ_ONCE(vma->vm_page_prot));  in __split_huge_pmd_locked()
3051 		/* NOTE: this may set soft-dirty too on some archs */  in __split_huge_pmd_locked()
3107 	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm,  in __split_huge_pmd()
3111 	ptl = pmd_lock(vma->vm_mm, pmd);  in __split_huge_pmd()
3120 	pmd_t *pmd = mm_find_pmd(vma->vm_mm, address);  in split_huge_pmd_address()
3132 	 * contain an hugepage: check if we need to split an huge pmd.  in split_huge_pmd_if_needed()
3145 	/* Check if we need to split start first. */  in vma_adjust_trans_huge()
3148 	/* Check if we need to split end next. */  in vma_adjust_trans_huge()
3151 	/* If we're incrementing next->vm_start, we might need to split it. */  in vma_adjust_trans_huge()
3183 	struct mm_struct *mm = vma->vm_mm;  in __discard_anon_folio_pmd_locked()
3189 	if (folio_test_dirty(folio) && !(vma->vm_flags & VM_DROPPABLE)) {  in __discard_anon_folio_pmd_locked()
3197 	 * Syncing against concurrent GUP-fast:  in __discard_anon_folio_pmd_locked()
3198 	 * - clear PMD; barrier; read refcount  in __discard_anon_folio_pmd_locked()
3199 	 * - inc refcount; barrier; read PMD  in __discard_anon_folio_pmd_locked()
3221 	if (folio_test_dirty(folio) && !(vma->vm_flags & VM_DROPPABLE)) {  in __discard_anon_folio_pmd_locked()
3234 	add_mm_counter(mm, MM_ANONPAGES, -HPAGE_PMD_NR);  in __discard_anon_folio_pmd_locked()
3235 	if (vma->vm_flags & VM_LOCKED)  in __discard_anon_folio_pmd_locked()
3274 	lockdep_assert_held(&lruvec->lru_lock);  in lru_add_split_folio()
3280 		list_add_tail(&new_folio->lru, list);  in lru_add_split_folio()
3285 			new_folio->mlock_count = 0;  in lru_add_split_folio()
3287 			list_add_tail(&new_folio->lru, &folio->lru);  in lru_add_split_folio()
3292 /* Racy check whether the huge page can be split */
3305 	return folio_mapcount(folio) == folio_ref_count(folio) - extra_pins -  in can_split_folio()
3325 		struct page *new_head = &folio->page + i;  in __split_folio_to_order()
3333 		VM_BUG_ON_PAGE(atomic_read(&new_folio->_mapcount) != -1, new_head);  in __split_folio_to_order()
3341 		 * Note that for mapped sub-pages of an anonymous THP,  in __split_folio_to_order()
3345 		 * unreferenced sub-pages of an anonymous THP: we can simply drop  in __split_folio_to_order()
3346 		 * PG_anon_exclusive (-> PG_mappedtodisk) for these here.  in __split_folio_to_order()
3348 		new_folio->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;  in __split_folio_to_order()
3349 		new_folio->flags |= (folio->flags &  in __split_folio_to_order()
3368 		new_folio->mapping = folio->mapping;  in __split_folio_to_order()
3369 		new_folio->index = folio->index + i;  in __split_folio_to_order()
3372 		 * page->private should not be set in tail pages. Fix up and warn once  in __split_folio_to_order()
3375 		if (unlikely(new_folio->private)) {  in __split_folio_to_order()
3377 			new_folio->private = NULL;  in __split_folio_to_order()
3381 			new_folio->swap.val = folio->swap.val + i;  in __split_folio_to_order()
3383 		/* Page flags must be visible before we make the page non-compound. */  in __split_folio_to_order()
3403 		new_folio->memcg_data = folio->memcg_data;  in __split_folio_to_order()
3412 		ClearPageCompound(&folio->page);  in __split_folio_to_order()
3417  * @folio: the to-be-split folio
3418  * @new_order: the smallest order of the after split folios (since buddy
3419  *             allocator like split generates folios with orders from @folio's
3420  *             order - 1 to new_order).
3421  * @split_at: in buddy allocator like split, the folio containing @split_at
3422  *            will be split until its order becomes @new_order.
3424  * @list: the after split folios will be added to @list if it is not NULL,
3426  * @end: the end of the file @folio maps to. -1 if @folio is anonymous memory.
3427  * @xas: xa_state pointing to folio->mapping->i_pages and locked by caller
3428  * @mapping: @folio->mapping
3429  * @uniform_split: if the split is uniform or not (buddy allocator like split)
3432  * 1. uniform split: the given @folio into multiple @new_order small folios,
3435  * 2. buddy allocator like (non-uniform) split: the given @folio is split into
3436  *    half and one of the half (containing the given page) is split into half
3441  * 1. uniform split: a single __split_folio_to_order() is called to split the
3445  * 2. non-uniform split: in general, folio_order - @new_order calls to
3446  *    __split_folio_to_order() are made in a for loop to split the @folio
3449  *    @page, which is split in next for loop.
3455  * 1. uniform split leaves @page (or the folio contains it) locked;
3456  * 2. buddy allocator like (non-uniform) split leaves @folio locked.
3459  * For !uniform_split, when -ENOMEM is returned, the original folio might be
3460  * split. The caller needs to check the input folio.
3476 	int start_order = uniform_split ? new_order : order - 1;  in __split_unmapped_folio()
3484 		/* a swapcache folio can only be uniformly split to order-0 */  in __split_unmapped_folio()
3486 			return -EINVAL;  in __split_unmapped_folio()
3488 		swap_cache = swap_address_space(folio->swap);  in __split_unmapped_folio()
3489 		xa_lock(&swap_cache->i_pages);  in __split_unmapped_folio()
3493 		mod_mthp_stat(order, MTHP_STAT_NR_ANON, -1);  in __split_unmapped_folio()
3501 	 * split to new_order one order at a time. For uniform split,  in __split_unmapped_folio()
3502 	 * folio is split to new_order directly.  in __split_unmapped_folio()
3506 	     split_order--) {  in __split_unmapped_folio()
3511 		/* order-1 anonymous folio is not supported */  in __split_unmapped_folio()
3519 			 * uniform split has xas_split_alloc() called before  in __split_unmapped_folio()
3521 			 * non-uniform split can handle ENOMEM.  in __split_unmapped_folio()
3526 				xas_set_order(xas, folio->index, split_order);  in __split_unmapped_folio()
3537 		split_page_owner(&folio->page, old_order, split_order);  in __split_unmapped_folio()
3544 		 * Iterate through after-split folios and perform related  in __split_unmapped_folio()
3545 		 * operations. But in buddy allocator like split, the folio  in __split_unmapped_folio()
3553 			 * for buddy allocator like split, the folio containing  in __split_unmapped_folio()
3554 			 * page will be split next and should not be released,  in __split_unmapped_folio()
3570 			 * entries are updated with all the other after-split  in __split_unmapped_folio()
3585 			if (release->index >= end) {  in __split_unmapped_folio()
3590 						inode_to_wb(mapping->host));  in __split_unmapped_folio()
3594 				__xa_store(&mapping->i_pages,  in __split_unmapped_folio()
3595 						release->index, release, 0);  in __split_unmapped_folio()
3597 				__xa_store(&swap_cache->i_pages,  in __split_unmapped_folio()
3598 						swap_cache_index(release->swap),  in __split_unmapped_folio()
3616 		xa_unlock(&swap_cache->i_pages);  in __split_unmapped_folio()
3618 		xa_unlock(&mapping->i_pages);  in __split_unmapped_folio()
3624 		shmem_uncharge(mapping->host, nr_dropped);  in __split_unmapped_folio()
3632 	 * For uniform split, it is left for caller to unlock.  in __split_unmapped_folio()
3633 	 * For buddy allocator like split, the first after-split folio is left  in __split_unmapped_folio()
3647 		 * of the tail pages after the split is complete.  in __split_unmapped_folio()
3649 		free_page_and_swap_cache(&new_folio->page);  in __split_unmapped_folio()
3658 		/* order-1 is not supported for anonymous THP. */  in non_uniform_split_supported()
3660 				"Cannot split to order-1 folio");  in non_uniform_split_supported()
3663 	    !mapping_large_folio_support(folio->mapping)) {  in non_uniform_split_supported()
3665 		 * No split if the file system does not support large folio.  in non_uniform_split_supported()
3671 			"Cannot split file folio to non-0 order");  in non_uniform_split_supported()
3675 	/* Only swapping a whole PMD-mapped folio is supported */  in non_uniform_split_supported()
3678 			"Cannot split swapcache folio to non-0 order");  in non_uniform_split_supported()
3691 				"Cannot split to order-1 folio");  in uniform_split_supported()
3695 		    !mapping_large_folio_support(folio->mapping)) {  in uniform_split_supported()
3697 				"Cannot split file folio to non-0 order");  in uniform_split_supported()
3704 			"Cannot split swapcache folio to non-0 order");  in uniform_split_supported()
3712  * __folio_split: split a folio at @split_at to a @new_order folio
3713  * @folio: folio to split
3717  * @list: after-split folios will be put on it if non NULL
3718  * @uniform_split: perform uniform split or not (non-uniform split)
3720  * It calls __split_unmapped_folio() to perform uniform and non-uniform split.
3721  * It is in charge of checking whether the split is supported or not and
3724  * return: 0: successful, <0 failed (if -ENOMEM is returned, @folio might be
3725  * split but not to @new_order, the caller needs to check)
3732 	XA_STATE(xas, &folio->mapping->i_pages, folio->index);  in __folio_split()
3745 		return -EINVAL;  in __folio_split()
3748 		return -EINVAL;  in __folio_split()
3751 		return -EINVAL;  in __folio_split()
3755 		return -EINVAL;  in __folio_split()
3760 		return -EBUSY;  in __folio_split()
3764 		return -EBUSY;  in __folio_split()
3772 		 * is taken to serialise against parallel split or collapse  in __folio_split()
3777 			ret = -EBUSY;  in __folio_split()
3780 		end = -1;  in __folio_split()
3787 		mapping = folio->mapping;  in __folio_split()
3796 			ret = -EBUSY;  in __folio_split()
3800 		min_order = mapping_min_folio_order(folio->mapping);  in __folio_split()
3802 			VM_WARN_ONCE(1, "Cannot split mapped folio below min-order: %u",  in __folio_split()
3804 			ret = -EINVAL;  in __folio_split()
3812 			ret = -EBUSY;  in __folio_split()
3817 			xas_set_order(&xas, folio->index, new_order);  in __folio_split()
3830 		 * EOF: but on 32-bit, i_size_read() takes an irq-unsafe  in __folio_split()
3835 		end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);  in __folio_split()
3837 			end = shmem_fallocend(mapping->host, end);  in __folio_split()
3841 	 * Racy check if we can split the page, before unmap_folio() will  in __folio_split()
3842 	 * split PMDs  in __folio_split()
3845 		ret = -EAGAIN;  in __folio_split()
3864 	/* Prevent deferred_split_scan() touching ->_refcount */  in __folio_split()
3865 	spin_lock(&ds_queue->split_queue_lock);  in __folio_split()
3868 		    !list_empty(&folio->_deferred_list)) {  in __folio_split()
3869 			ds_queue->split_queue_len--;  in __folio_split()
3873 					      MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, -1);  in __folio_split()
3878 			 * split will see list corruption when checking the  in __folio_split()
3881 			list_del_init(&folio->_deferred_list);  in __folio_split()
3883 		spin_unlock(&ds_queue->split_queue_lock);  in __folio_split()
3891 							NR_SHMEM_THPS, -nr);  in __folio_split()
3894 							NR_FILE_THPS, -nr);  in __folio_split()
3904 		spin_unlock(&ds_queue->split_queue_lock);  in __folio_split()
3910 		ret = -EAGAIN;  in __folio_split()
3930  * @page can point to any page of the large folio to split. The split operation
3941  *    GUP pins, will result in the folio not getting split; instead, the caller
3942  *    will receive an -EAGAIN.
3944  * 4) @new_order > 1, usually. Splitting to order-1 anonymous folios is not
3945  *    supported for non-file-backed folios, because folio->_deferred_list, which
3946  *    is used by partially mapped folios, is stored in subpage 2, but an order-1
3947  *    folio only has subpages 0 and 1. File-backed order-1 folios are supported,
3959  * Returns 0 if the huge page was split successfully.
3961  * Returns -EAGAIN if the folio has unexpected reference (e.g., GUP) or if
3964  * Returns -EBUSY when trying to split the huge zeropage, if the folio is
3965  * under writeback, if fs-specific folio metadata cannot currently be
3970  * min-order if one is set for non-anonymous folios.
3972  * Returns -EINVAL when trying to split to an order that is incompatible
3980 	return __folio_split(folio, new_order, &folio->page, page, list, true);  in split_huge_page_to_list_to_order()
3984  * folio_split: split a folio at @split_at to a @new_order folio
3985  * @folio: folio to split
3989  * return: 0: successful, <0 failed (if -ENOMEM is returned, @folio might be
3990  * split but not to @new_order, the caller needs to check)
3995  * Split a folio at @split_at to a new_order folio, leave the
3997  * in the case of splitting an order-9 folio at its third order-3 subpages to
3998  * an order-3 folio, there are 2^(9-3)=64 order-3 subpages in the order-9 folio.
3999  * After the split, there will be a group of folios with different orders and
4001  * [order-4, {order-3}, order-3, order-5, order-6, order-7, order-8].
4003  * After split, folio is left locked for caller.
4008 	return __folio_split(folio, new_order, split_at, &folio->page, list,  in folio_split()
4017 	if (!folio->mapping) {  in min_order_for_split()
4020 		return -EBUSY;  in min_order_for_split()
4023 	return mapping_min_folio_order(folio->mapping);  in min_order_for_split()
4033 	return split_huge_page_to_list_to_order(&folio->page, list, ret);  in split_folio_to_list()
4040  * queueing THP splits, and that list is (racily observed to be) non-empty.
4043  * zero: because even when split_queue_lock is held, a non-empty _deferred_list
4044  * might be in use on deferred_split_scan()'s unlocked on-stack list.
4047  * therefore important to unqueue deferred split before changing folio memcg.
4059 	spin_lock_irqsave(&ds_queue->split_queue_lock, flags);  in __folio_unqueue_deferred_split()
4060 	if (!list_empty(&folio->_deferred_list)) {  in __folio_unqueue_deferred_split()
4061 		ds_queue->split_queue_len--;  in __folio_unqueue_deferred_split()
4065 				      MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, -1);  in __folio_unqueue_deferred_split()
4067 		list_del_init(&folio->_deferred_list);  in __folio_unqueue_deferred_split()
4070 	spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);  in __folio_unqueue_deferred_split()
4095 	 * Exclude swapcache: originally to avoid a corrupt deferred split  in deferred_split_folio()
4104 	spin_lock_irqsave(&ds_queue->split_queue_lock, flags);  in deferred_split_folio()
4115 		/* partially mapped folios cannot become non-partially mapped */  in deferred_split_folio()
4118 	if (list_empty(&folio->_deferred_list)) {  in deferred_split_folio()
4119 		list_add_tail(&folio->_deferred_list, &ds_queue->split_queue);  in deferred_split_folio()
4120 		ds_queue->split_queue_len++;  in deferred_split_folio()
4124 					 deferred_split_shrinker->id);  in deferred_split_folio()
4127 	spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);  in deferred_split_folio()
4133 	struct pglist_data *pgdata = NODE_DATA(sc->nid);  in deferred_split_count()
4134 	struct deferred_split *ds_queue = &pgdata->deferred_split_queue;  in deferred_split_count()
4137 	if (sc->memcg)  in deferred_split_count()
4138 		ds_queue = &sc->memcg->deferred_split_queue;  in deferred_split_count()
4140 	return READ_ONCE(ds_queue->split_queue_len);  in deferred_split_count()
4149 	if (khugepaged_max_ptes_none == HPAGE_PMD_NR - 1)  in thp_underused()
4163 			 * of non-zero filled pages exceeds threshold.  in thp_underused()
4166 			if (num_filled_pages >= HPAGE_PMD_NR - khugepaged_max_ptes_none) {  in thp_underused()
4179 	struct pglist_data *pgdata = NODE_DATA(sc->nid);  in deferred_split_scan()
4180 	struct deferred_split *ds_queue = &pgdata->deferred_split_queue;  in deferred_split_scan()
4184 	int split = 0, removed = 0;  in deferred_split_scan()  local
4187 	if (sc->memcg)  in deferred_split_scan()
4188 		ds_queue = &sc->memcg->deferred_split_queue;  in deferred_split_scan()
4191 	spin_lock_irqsave(&ds_queue->split_queue_lock, flags);  in deferred_split_scan()
4193 	list_for_each_entry_safe(folio, next, &ds_queue->split_queue,  in deferred_split_scan()
4196 			list_move(&folio->_deferred_list, &list);  in deferred_split_scan()
4202 					      MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, -1);  in deferred_split_scan()
4204 			list_del_init(&folio->_deferred_list);  in deferred_split_scan()
4205 			ds_queue->split_queue_len--;  in deferred_split_scan()
4207 		if (!--sc->nr_to_scan)  in deferred_split_scan()
4210 	spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);  in deferred_split_scan()
4227 			split++;  in deferred_split_scan()
4241 			list_del_init(&folio->_deferred_list);  in deferred_split_scan()
4256 	spin_lock_irqsave(&ds_queue->split_queue_lock, flags);  in deferred_split_scan()
4257 	list_splice_tail(&list, &ds_queue->split_queue);  in deferred_split_scan()
4258 	ds_queue->split_queue_len -= removed;  in deferred_split_scan()
4259 	spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);  in deferred_split_scan()
4265 	 * Stop shrinker if we didn't split any page, but the queue is empty.  in deferred_split_scan()
4268 	if (!split && list_empty(&ds_queue->split_queue))  in deferred_split_scan()
4270 	return split;  in deferred_split_scan()
4280 	unsigned long total = 0, split = 0;  in split_huge_pages_all()  local
4282 	pr_debug("Split all THPs\n");  in split_huge_pages_all()
4287 		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) {  in split_huge_pages_all()
4312 				split++;  in split_huge_pages_all()
4313 			pfn += nr_pages - 1;  in split_huge_pages_all()
4321 	pr_debug("%lu of %lu THP split\n", split, total);  in split_huge_pages_all()
4326 	return vma_is_special_huge(vma) || (vma->vm_flags & VM_IO) ||  in vma_not_suitable_for_thp_split()
4337 	unsigned long total = 0, split = 0;  in split_huge_pages_pid()  local
4345 		ret = -ESRCH;  in split_huge_pages_pid()
4354 		ret = -EINVAL;  in split_huge_pages_pid()
4358 	pr_debug("Split huge pages in pid: %d, vaddr: [0x%lx - 0x%lx]\n",  in split_huge_pages_pid()
4364 	 * table filled with PTE-mapped THPs, each of which is distinct.  in split_huge_pages_pid()
4378 			addr = vma->vm_end;  in split_huge_pages_pid()
4390 			mapping = folio->mapping;  in split_huge_pages_pid()
4401 		 * will try to drop it before split and then check if the folio  in split_huge_pages_pid()
4402 		 * can be split or not. So skip the check here.  in split_huge_pages_pid()
4413 		if (!folio_test_anon(folio) && folio->mapping != mapping)  in split_huge_pages_pid()
4419 				split++;  in split_huge_pages_pid()
4424 				split++;  in split_huge_pages_pid()
4441 	pr_debug("%lu of %lu THP split\n", split, total);  in split_huge_pages_pid()
4454 	int ret = -EINVAL;  in split_huge_pages_in_file()
4457 	unsigned long total = 0, split = 0;  in split_huge_pages_in_file()  local
4469 	pr_debug("split file-backed THPs in file: %s, page offset: [0x%lx - 0x%lx]\n",  in split_huge_pages_in_file()
4472 	mapping = candidate->f_mapping;  in split_huge_pages_in_file()
4495 		if (folio->mapping != mapping)  in split_huge_pages_in_file()
4500 				split++;  in split_huge_pages_in_file()
4505 				split++;  in split_huge_pages_in_file()
4518 	pr_debug("%lu of %lu file-backed THP split\n", split, total);  in split_huge_pages_in_file()
4539 	long in_folio_offset = -1;  in split_huge_pages_write()
4545 	ret = -EFAULT;  in split_huge_pages_write()
4551 	input_buf[MAX_INPUT_BUF_SZ - 1] = '\0';  in split_huge_pages_write()
4564 			ret = -EINVAL;  in split_huge_pages_write()
4571 			ret = -EINVAL;  in split_huge_pages_write()
4589 		ret = -EINVAL;  in split_huge_pages_write()
4622 	struct vm_area_struct *vma = pvmw->vma;  in set_pmd_migration_entry()
4623 	struct mm_struct *mm = vma->vm_mm;  in set_pmd_migration_entry()
4624 	unsigned long address = pvmw->address;  in set_pmd_migration_entry()
4630 	if (!(pvmw->pmd && !pvmw->pte))  in set_pmd_migration_entry()
4634 	pmdval = pmdp_invalidate(vma, address, pvmw->pmd);  in set_pmd_migration_entry()
4639 		set_pmd_at(mm, address, pvmw->pmd, pmdval);  in set_pmd_migration_entry()
4640 		return -EBUSY;  in set_pmd_migration_entry()
4660 	set_pmd_at(mm, address, pvmw->pmd, pmdswp);  in set_pmd_migration_entry()
4671 	struct vm_area_struct *vma = pvmw->vma;  in remove_migration_pmd()
4672 	struct mm_struct *mm = vma->vm_mm;  in remove_migration_pmd()
4673 	unsigned long address = pvmw->address;  in remove_migration_pmd()
4678 	if (!(pvmw->pmd && !pvmw->pte))  in remove_migration_pmd()
4681 	entry = pmd_to_swp_entry(*pvmw->pmd);  in remove_migration_pmd()
4683 	pmde = mk_huge_pmd(new, READ_ONCE(vma->vm_page_prot));  in remove_migration_pmd()
4684 	if (pmd_swp_soft_dirty(*pvmw->pmd))  in remove_migration_pmd()
4688 	if (pmd_swp_uffd_wp(*pvmw->pmd))  in remove_migration_pmd()
4692 	/* NOTE: this may contain setting soft-dirty on some archs */  in remove_migration_pmd()
4707 	set_pmd_at(mm, haddr, pvmw->pmd, pmde);  in remove_migration_pmd()
4709 	/* No need to invalidate - it was non-present before */  in remove_migration_pmd()
4710 	update_mmu_cache_pmd(vma, address, pvmw->pmd);  in remove_migration_pmd()