Lines Matching +full:use +full:- +full:guard +full:- +full:pages
1 // SPDX-License-Identifier: GPL-2.0
13 #include <linux/page-isolation.h>
28 #include <linux/backing-dev.h>
41 * Maximum number of attempts we make to install guard pages before we give up
42 * and return -ERESTARTNOINTR to have userspace try again.
52 * Any behaviour which results in changes to the vma->vm_flags needs to
84 /* Add 1 for NUL terminator at the end of the anon_name->name */ in anon_vma_name_alloc()
88 kref_init(&anon_name->kref); in anon_vma_name_alloc()
89 memcpy(anon_name->name, name, count); in anon_vma_name_alloc()
104 mmap_assert_locked(vma->vm_mm); in anon_vma_name()
106 return vma->anon_name; in anon_vma_name()
109 /* mmap_lock should be write-locked */
116 vma->anon_name = NULL; in replace_anon_vma_name()
124 vma->anon_name = anon_vma_name_reuse(anon_name); in replace_anon_vma_name()
134 return -EINVAL; in replace_anon_vma_name()
150 struct mm_struct *mm = vma->vm_mm; in madvise_update_vma()
154 if (new_flags == vma->vm_flags && anon_vma_name_eq(anon_vma_name(vma), anon_name)) { in madvise_update_vma()
169 if (!vma->vm_file || vma_is_anon_shmem(vma)) { in madvise_update_vma()
182 struct vm_area_struct *vma = walk->private; in swapin_walk_pmd_entry()
194 ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); in swapin_walk_pmd_entry()
232 XA_STATE(xas, &mapping->i_pages, linear_page_index(vma, start)); in shmem_swapin_range()
233 pgoff_t end_index = linear_page_index(vma, end) - 1; in shmem_swapin_range()
249 addr = vma->vm_start + in shmem_swapin_range()
250 ((xas.xa_index - vma->vm_pgoff) << PAGE_SHIFT); in shmem_swapin_range()
273 struct mm_struct *mm = vma->vm_mm; in madvise_willneed()
274 struct file *file = vma->vm_file; in madvise_willneed()
280 walk_page_range(vma->vm_mm, start, end, &swapin_walk_ops, vma); in madvise_willneed()
281 lru_add_drain(); /* Push any new pages onto the LRU now */ in madvise_willneed()
285 if (shmem_mapping(file->f_mapping)) { in madvise_willneed()
286 shmem_swapin_range(vma, start, end, file->f_mapping); in madvise_willneed()
287 lru_add_drain(); /* Push any new pages onto the LRU now */ in madvise_willneed()
292 return -EBADF; in madvise_willneed()
308 offset = (loff_t)(start - vma->vm_start) in madvise_willneed()
309 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); in madvise_willneed()
311 vfs_fadvise(file, offset, end - start, POSIX_FADV_WILLNEED); in madvise_willneed()
319 if (!vma->vm_file) in can_do_file_pageout()
322 * paging out pagecache only for non-anonymous mappings that correspond in can_do_file_pageout()
324 * otherwise we'd be including shared non-exclusive mappings, which in can_do_file_pageout()
328 file_inode(vma->vm_file)) || in can_do_file_pageout()
329 file_permission(vma->vm_file, MAY_WRITE) == 0; in can_do_file_pageout()
338 int max_nr = (end - addr) / PAGE_SIZE; in madvise_folio_pte_batch()
348 struct madvise_walk_private *private = walk->private; in madvise_cold_or_pageout_pte_range()
349 struct mmu_gather *tlb = private->tlb; in madvise_cold_or_pageout_pte_range()
350 bool pageout = private->pageout; in madvise_cold_or_pageout_pte_range()
351 struct mm_struct *mm = tlb->mm; in madvise_cold_or_pageout_pte_range()
352 struct vm_area_struct *vma = walk->vma; in madvise_cold_or_pageout_pte_range()
362 return -EINTR; in madvise_cold_or_pageout_pte_range()
396 if (next - addr != HPAGE_PMD_SIZE) { in madvise_cold_or_pageout_pte_range()
427 list_add(&folio->lru, &folio_list); in madvise_cold_or_pageout_pte_range()
442 start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); in madvise_cold_or_pageout_pte_range()
515 * non-LRU folio. If we have a large folio at this point, we in madvise_cold_or_pageout_pte_range()
517 * number of pages, it must be exclusive. in madvise_cold_or_pageout_pte_range()
535 * As a side effect, it makes confuse idle-page tracking in madvise_cold_or_pageout_pte_range()
547 list_add(&folio->lru, &folio_list); in madvise_cold_or_pageout_pte_range()
579 walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, &walk_private); in madvise_cold_page_range()
585 return !(vma->vm_flags & (VM_LOCKED|VM_PFNMAP|VM_HUGETLB)); in can_madv_lru_vma()
592 struct mm_struct *mm = vma->vm_mm; in madvise_cold()
597 return -EINVAL; in madvise_cold()
617 walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, &walk_private); in madvise_pageout_page_range()
625 struct mm_struct *mm = vma->vm_mm; in madvise_pageout()
630 return -EINVAL; in madvise_pageout()
634 * dirty pages which can be paged out if even this process is neither in madvise_pageout()
636 * further to pageout dirty anon pages. in madvise_pageout()
639 (vma->vm_flags & VM_MAYSHARE))) in madvise_pageout()
655 struct mmu_gather *tlb = walk->private; in madvise_free_pte_range()
656 struct mm_struct *mm = tlb->mm; in madvise_free_pte_range()
657 struct vm_area_struct *vma = walk->vma; in madvise_free_pte_range()
684 * prevent swap-in which is more expensive rather than in madvise_free_pte_range()
692 max_nr = (end - addr) / PAGE_SIZE; in madvise_free_pte_range()
694 nr_swap -= nr; in madvise_free_pte_range()
696 clear_not_present_full_ptes(mm, addr, pte, nr, tlb->fullmm); in madvise_free_pte_range()
699 pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); in madvise_free_pte_range()
757 * number of pages, it must be exclusive. in madvise_free_pte_range()
800 struct mm_struct *mm = vma->vm_mm; in madvise_free_single_vma()
806 return -EINVAL; in madvise_free_single_vma()
808 range.start = max(vma->vm_start, start_addr); in madvise_free_single_vma()
809 if (range.start >= vma->vm_end) in madvise_free_single_vma()
810 return -EINVAL; in madvise_free_single_vma()
811 range.end = min(vma->vm_end, end_addr); in madvise_free_single_vma()
812 if (range.end <= vma->vm_start) in madvise_free_single_vma()
813 return -EINVAL; in madvise_free_single_vma()
823 walk_page_range(vma->vm_mm, range.start, range.end, in madvise_free_single_vma()
833 * Application no longer needs these pages. If the pages are dirty,
837 * free these pages later if no one else has touched them in the meantime,
838 * although we could add these pages to a global reuse list for
839 * shrink_active_list to pick up before reclaiming other pages.
844 * pages in anonymous maps after committing to backing store the data
848 * An interface that causes the system to free clean pages and flush
849 * dirty pages is already available as msync(MS_INVALIDATE).
859 zap_page_range_single(vma, start, end - start, &details); in madvise_dontneed_single_vma()
874 return !(vma->vm_flags & forbidden); in madvise_dontneed_free_valid_vma()
884 * boundaries, and may be unaware that this VMA uses huge pages. in madvise_dontneed_free_valid_vma()
886 * huge pages freed. in madvise_dontneed_free_valid_vma()
898 struct mm_struct *mm = vma->vm_mm; in madvise_dontneed_free()
902 return -EINVAL; in madvise_dontneed_free()
913 return -ENOMEM; in madvise_dontneed_free()
920 return -EINVAL; in madvise_dontneed_free()
921 if (end > vma->vm_end) { in madvise_dontneed_free()
923 * Don't fail if end > vma->vm_end. If the old in madvise_dontneed_free()
931 * end-vma->vm_end range, but the manager can in madvise_dontneed_free()
934 end = vma->vm_end; in madvise_dontneed_free()
938 * originally backed by 4kB pages and then remapped to in madvise_dontneed_free()
953 return -EINVAL; in madvise_dontneed_free()
961 long pages; in madvise_populate() local
965 pages = faultin_page_range(mm, start, end, write, &locked); in madvise_populate()
970 if (pages < 0) { in madvise_populate()
971 switch (pages) { in madvise_populate()
972 case -EINTR: in madvise_populate()
973 return -EINTR; in madvise_populate()
974 case -EINVAL: /* Incompatible mappings / permissions. */ in madvise_populate()
975 return -EINVAL; in madvise_populate()
976 case -EHWPOISON: in madvise_populate()
977 return -EHWPOISON; in madvise_populate()
978 case -EFAULT: /* VM_FAULT_SIGBUS or VM_FAULT_SIGSEGV */ in madvise_populate()
979 return -EFAULT; in madvise_populate()
982 __func__, pages); in madvise_populate()
984 case -ENOMEM: /* No VMA or out of memory. */ in madvise_populate()
985 return -ENOMEM; in madvise_populate()
988 start += pages * PAGE_SIZE; in madvise_populate()
994 * Application wants to free up the pages and associated backing store.
1004 struct mm_struct *mm = vma->vm_mm; in madvise_remove()
1008 if (vma->vm_flags & VM_LOCKED) in madvise_remove()
1009 return -EINVAL; in madvise_remove()
1011 f = vma->vm_file; in madvise_remove()
1013 if (!f || !f->f_mapping || !f->f_mapping->host) { in madvise_remove()
1014 return -EINVAL; in madvise_remove()
1018 return -EACCES; in madvise_remove()
1020 offset = (loff_t)(start - vma->vm_start) in madvise_remove()
1021 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); in madvise_remove()
1036 offset, end - start); in madvise_remove()
1047 * A user could lock after setting a guard range but that's fine, as in is_valid_guard_vma()
1054 return !(vma->vm_flags & disallowed); in is_valid_guard_vma()
1085 unsigned long *nr_pages = (unsigned long *)walk->private; in guard_install_pte_entry()
1087 /* If there is already a guard page marker, we have nothing to do. */ in guard_install_pte_entry()
1101 unsigned long *nr_pages = (unsigned long *)walk->private; in guard_install_set_pte()
1127 return -EINVAL; in madvise_guard_install()
1130 * If we install guard markers, then the range is no longer in madvise_guard_install()
1141 * Optimistically try to install the guard marker pages first. If any in madvise_guard_install()
1142 * non-guard pages are encountered, give up and zap the range before in madvise_guard_install()
1149 * In most cases we should simply install the guard markers immediately in madvise_guard_install()
1156 err = walk_page_range_mm(vma->vm_mm, start, end, in madvise_guard_install()
1162 unsigned long nr_expected_pages = PHYS_PFN(end - start); in madvise_guard_install()
1169 * OK some of the range have non-guard pages mapped, zap in madvise_guard_install()
1170 * them. This leaves existing guard pages in place. in madvise_guard_install()
1172 zap_page_range_single(vma, start, end - start, NULL); in madvise_guard_install()
1176 * We were unable to install the guard pages due to being raced by page in madvise_guard_install()
1188 /* If huge, cannot have guard pages present, so no-op - skip. */ in guard_remove_pud_entry()
1190 walk->action = ACTION_CONTINUE; in guard_remove_pud_entry()
1200 /* If huge, cannot have guard pages present, so no-op - skip. */ in guard_remove_pmd_entry()
1202 walk->action = ACTION_CONTINUE; in guard_remove_pmd_entry()
1214 pte_clear_not_present_full(walk->mm, addr, pte, false); in guard_remove_pte_entry()
1215 update_mmu_cache(walk->vma, addr, pte); in guard_remove_pte_entry()
1235 * non-destructive action. in madvise_guard_remove()
1238 return -EINVAL; in madvise_guard_remove()
1240 return walk_page_range(vma->vm_mm, start, end, in madvise_guard_remove()
1256 unsigned long new_flags = vma->vm_flags; in madvise_vma_behavior()
1259 return -EPERM; in madvise_vma_behavior()
1287 if (vma->vm_flags & VM_IO) in madvise_vma_behavior()
1288 return -EINVAL; in madvise_vma_behavior()
1293 if (vma->vm_file || vma->vm_flags & VM_SHARED) in madvise_vma_behavior()
1294 return -EINVAL; in madvise_vma_behavior()
1298 if (vma->vm_flags & VM_DROPPABLE) in madvise_vma_behavior()
1299 return -EINVAL; in madvise_vma_behavior()
1307 (vma->vm_flags & VM_DROPPABLE)) in madvise_vma_behavior()
1308 return -EINVAL; in madvise_vma_behavior()
1342 if (error == -ENOMEM) in madvise_vma_behavior()
1343 error = -EAGAIN; in madvise_vma_behavior()
1357 return -EPERM; in madvise_inject_error()
1385 if (ret == -EOPNOTSUPP) in madvise_inject_error()
1484 * original range will result in this function returning -ENOMEM while still
1502 * ranges, just ignore them, but return -ENOMEM at the end. in madvise_walk_vmas()
1503 * - different from the way of handling in mlock etc. in madvise_walk_vmas()
1506 if (vma && start > vma->vm_start) in madvise_walk_vmas()
1514 return -ENOMEM; in madvise_walk_vmas()
1516 /* Here start < (end|vma->vm_end). */ in madvise_walk_vmas()
1517 if (start < vma->vm_start) { in madvise_walk_vmas()
1518 unmapped_error = -ENOMEM; in madvise_walk_vmas()
1519 start = vma->vm_start; in madvise_walk_vmas()
1524 /* Here vma->vm_start <= start < (end|vma->vm_end) */ in madvise_walk_vmas()
1525 tmp = vma->vm_end; in madvise_walk_vmas()
1529 /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ in madvise_walk_vmas()
1534 if (prev && start < prev->vm_end) in madvise_walk_vmas()
1535 start = prev->vm_end; in madvise_walk_vmas()
1539 vma = find_vma(mm, prev->vm_end); in madvise_walk_vmas()
1556 if (vma->vm_file && !vma_is_anon_shmem(vma)) in madvise_vma_anon_name()
1557 return -EBADF; in madvise_vma_anon_name()
1559 error = madvise_update_vma(vma, prev, start, end, vma->vm_flags, in madvise_vma_anon_name()
1566 if (error == -ENOMEM) in madvise_vma_anon_name()
1567 error = -EAGAIN; in madvise_vma_anon_name()
1578 return -EINVAL; in madvise_set_anon_name()
1581 /* Check to see whether len was rounded up from small -ve to zero */ in madvise_set_anon_name()
1583 return -EINVAL; in madvise_set_anon_name()
1587 return -EINVAL; in madvise_set_anon_name()
1604 return -EINTR; in madvise_lock()
1633 /* Check to see whether len was rounded up from small -ve to zero */ in is_valid_madvise()
1644 * madvise_should_skip() - Return if the request is invalid or nothing.
1645 * @start: Start address of madvise-requested address range.
1646 * @len_in: Length of madvise-requested address range.
1658 *err = -EINVAL; in madvise_should_skip()
1704 * Applications can use madvise() to advise the kernel how it should
1706 * use appropriate read-ahead and caching techniques. The information
1711 * MADV_NORMAL - the default behavior is to read clusters. This
1712 * results in some read-ahead and read-behind.
1713 * MADV_RANDOM - the system should read the minimum amount of data
1714 * on any access, since it is unlikely that the appli-
1716 * MADV_SEQUENTIAL - pages in the given range will probably be accessed
1719 * MADV_WILLNEED - the application is notifying the system to read
1720 * some pages ahead.
1721 * MADV_DONTNEED - the application is finished with the given range,
1723 * MADV_FREE - the application marks pages in the given range as lazy free,
1725 * MADV_REMOVE - the application wants to free up the given range of
1726 * pages and associated backing store.
1727 * MADV_DONTFORK - omit this area from child's address space when forking:
1728 * typically, to avoid COWing pages pinned by get_user_pages().
1729 * MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking.
1730 * MADV_WIPEONFORK - present the child process with zero-filled memory in this
1732 * MADV_KEEPONFORK - undo the effect of MADV_WIPEONFORK
1733 * MADV_HWPOISON - trigger memory error handler as if the given memory range
1735 * MADV_SOFT_OFFLINE - try to soft-offline the given range of memory.
1736 * MADV_MERGEABLE - the application recommends that KSM try to merge pages in
1737 * this area with pages of identical content from other such areas.
1738 * MADV_UNMERGEABLE- cancel MADV_MERGEABLE: no longer merge pages with others.
1739 * MADV_HUGEPAGE - the application wants to back the given range by transparent
1740 * huge pages in the future. Existing pages might be coalesced and
1741 * new pages might be allocated as THP.
1742 * MADV_NOHUGEPAGE - mark the given range as not worth being backed by
1743 * transparent huge pages so the existing pages will not be
1744 * coalesced into THP and new pages will not be allocated as THP.
1745 * MADV_COLLAPSE - synchronously coalesce pages into new THP.
1746 * MADV_DONTDUMP - the application wants to prevent pages in the given range
1748 * MADV_DODUMP - cancel MADV_DONTDUMP: no longer exclude from core dump.
1749 * MADV_COLD - the application is not expected to use this memory soon,
1750 * deactivate pages in this range so that they can be reclaimed
1752 * MADV_PAGEOUT - the application is not expected to use this memory soon,
1753 * page out the pages in this range immediately.
1754 * MADV_POPULATE_READ - populate (prefault) page tables readable by
1756 * MADV_POPULATE_WRITE - populate (prefault) page tables writable by
1760 * zero - success
1761 * -EINVAL - start + len < 0, start is not page-aligned,
1763 * is attempting to release locked or shared pages,
1766 * -ENOMEM - addresses in the specified range are not currently
1768 * -EIO - an I/O error occurred while paging in data.
1769 * -EBADF - map exists, but area maps something that isn't a file.
1770 * -EAGAIN - a kernel resource was temporarily unavailable.
1771 * -EPERM - memory is sealed.
1790 return do_madvise(current->mm, start, len_in, behavior); in SYSCALL_DEFINE3()
1825 if (ret == -ERESTARTNOINTR) { in vector_madvise()
1827 ret = -EINTR; in vector_madvise()
1842 ret = (total_len - iov_iter_count(iter)) ? : ret; in vector_madvise()
1859 ret = -EINVAL; in SYSCALL_DEFINE5()
1884 if (mm != current->mm && !process_madvise_remote_valid(behavior)) { in SYSCALL_DEFINE5()
1885 ret = -EINVAL; in SYSCALL_DEFINE5()
1891 * only non-destructive hints are currently supported for remote in SYSCALL_DEFINE5()
1894 if (mm != current->mm && !capable(CAP_SYS_NICE)) { in SYSCALL_DEFINE5()
1895 ret = -EPERM; in SYSCALL_DEFINE5()