Lines Matching +full:trim +full:- +full:hs +full:- +full:current
1 // SPDX-License-Identifier: GPL-2.0-only
55 #include <linux/page-isolation.h>
138 if (spool->count) in subpool_is_free()
140 if (spool->max_hpages != -1) in subpool_is_free()
141 return spool->used_hpages == 0; in subpool_is_free()
142 if (spool->min_hpages != -1) in subpool_is_free()
143 return spool->rsv_hpages == spool->min_hpages; in subpool_is_free()
151 spin_unlock_irqrestore(&spool->lock, irq_flags); in unlock_or_release_subpool()
157 if (spool->min_hpages != -1) in unlock_or_release_subpool()
158 hugetlb_acct_memory(spool->hstate, in unlock_or_release_subpool()
159 -spool->min_hpages); in unlock_or_release_subpool()
173 spin_lock_init(&spool->lock); in hugepage_new_subpool()
174 spool->count = 1; in hugepage_new_subpool()
175 spool->max_hpages = max_hpages; in hugepage_new_subpool()
176 spool->hstate = h; in hugepage_new_subpool()
177 spool->min_hpages = min_hpages; in hugepage_new_subpool()
179 if (min_hpages != -1 && hugetlb_acct_memory(h, min_hpages)) { in hugepage_new_subpool()
183 spool->rsv_hpages = min_hpages; in hugepage_new_subpool()
192 spin_lock_irqsave(&spool->lock, flags); in hugepage_put_subpool()
193 BUG_ON(!spool->count); in hugepage_put_subpool()
194 spool->count--; in hugepage_put_subpool()
200 * Return -ENOMEM if there are not enough resources to satisfy the
214 spin_lock_irq(&spool->lock); in hugepage_subpool_get_pages()
216 if (spool->max_hpages != -1) { /* maximum size accounting */ in hugepage_subpool_get_pages()
217 if ((spool->used_hpages + delta) <= spool->max_hpages) in hugepage_subpool_get_pages()
218 spool->used_hpages += delta; in hugepage_subpool_get_pages()
220 ret = -ENOMEM; in hugepage_subpool_get_pages()
226 if (spool->min_hpages != -1 && spool->rsv_hpages) { in hugepage_subpool_get_pages()
227 if (delta > spool->rsv_hpages) { in hugepage_subpool_get_pages()
232 ret = delta - spool->rsv_hpages; in hugepage_subpool_get_pages()
233 spool->rsv_hpages = 0; in hugepage_subpool_get_pages()
236 spool->rsv_hpages -= delta; in hugepage_subpool_get_pages()
241 spin_unlock_irq(&spool->lock); in hugepage_subpool_get_pages()
260 spin_lock_irqsave(&spool->lock, flags); in hugepage_subpool_put_pages()
262 if (spool->max_hpages != -1) /* maximum size accounting */ in hugepage_subpool_put_pages()
263 spool->used_hpages -= delta; in hugepage_subpool_put_pages()
266 if (spool->min_hpages != -1 && spool->used_hpages < spool->min_hpages) { in hugepage_subpool_put_pages()
267 if (spool->rsv_hpages + delta <= spool->min_hpages) in hugepage_subpool_put_pages()
270 ret = spool->rsv_hpages + delta - spool->min_hpages; in hugepage_subpool_put_pages()
272 spool->rsv_hpages += delta; in hugepage_subpool_put_pages()
273 if (spool->rsv_hpages > spool->min_hpages) in hugepage_subpool_put_pages()
274 spool->rsv_hpages = spool->min_hpages; in hugepage_subpool_put_pages()
288 return HUGETLBFS_SB(inode->i_sb)->spool; in subpool_inode()
293 return subpool_inode(file_inode(vma->vm_file)); in subpool_vma()
302 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; in hugetlb_vma_lock_read()
304 down_read(&vma_lock->rw_sema); in hugetlb_vma_lock_read()
308 down_read(&resv_map->rw_sema); in hugetlb_vma_lock_read()
315 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; in hugetlb_vma_unlock_read()
317 up_read(&vma_lock->rw_sema); in hugetlb_vma_unlock_read()
321 up_read(&resv_map->rw_sema); in hugetlb_vma_unlock_read()
328 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; in hugetlb_vma_lock_write()
330 down_write(&vma_lock->rw_sema); in hugetlb_vma_lock_write()
334 down_write(&resv_map->rw_sema); in hugetlb_vma_lock_write()
341 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; in hugetlb_vma_unlock_write()
343 up_write(&vma_lock->rw_sema); in hugetlb_vma_unlock_write()
347 up_write(&resv_map->rw_sema); in hugetlb_vma_unlock_write()
355 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; in hugetlb_vma_trylock_write()
357 return down_write_trylock(&vma_lock->rw_sema); in hugetlb_vma_trylock_write()
361 return down_write_trylock(&resv_map->rw_sema); in hugetlb_vma_trylock_write()
370 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; in hugetlb_vma_assert_locked()
372 lockdep_assert_held(&vma_lock->rw_sema); in hugetlb_vma_assert_locked()
376 lockdep_assert_held(&resv_map->rw_sema); in hugetlb_vma_assert_locked()
390 struct vm_area_struct *vma = vma_lock->vma; in __hugetlb_vma_unlock_write_put()
395 * Semaphore synchronizes access to vma_lock->vma field. in __hugetlb_vma_unlock_write_put()
397 vma_lock->vma = NULL; in __hugetlb_vma_unlock_write_put()
398 vma->vm_private_data = NULL; in __hugetlb_vma_unlock_write_put()
399 up_write(&vma_lock->rw_sema); in __hugetlb_vma_unlock_write_put()
400 kref_put(&vma_lock->refs, hugetlb_vma_lock_release); in __hugetlb_vma_unlock_write_put()
406 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; in __hugetlb_vma_unlock_write_free()
413 up_write(&resv_map->rw_sema); in __hugetlb_vma_unlock_write_free()
425 if (vma->vm_private_data) { in hugetlb_vma_lock_free()
426 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; in hugetlb_vma_lock_free()
428 down_write(&vma_lock->rw_sema); in hugetlb_vma_lock_free()
438 if (!vma || !(vma->vm_flags & VM_MAYSHARE)) in hugetlb_vma_lock_alloc()
441 /* Should never get here with non-NULL vm_private_data */ in hugetlb_vma_lock_alloc()
442 if (vma->vm_private_data) in hugetlb_vma_lock_alloc()
461 kref_init(&vma_lock->refs); in hugetlb_vma_lock_alloc()
462 init_rwsem(&vma_lock->rw_sema); in hugetlb_vma_lock_alloc()
463 vma_lock->vma = vma; in hugetlb_vma_lock_alloc()
464 vma->vm_private_data = vma_lock; in hugetlb_vma_lock_alloc()
475 VM_BUG_ON(resv->region_cache_count <= 0); in get_file_region_entry_from_cache()
477 resv->region_cache_count--; in get_file_region_entry_from_cache()
478 nrg = list_first_entry(&resv->region_cache, struct file_region, link); in get_file_region_entry_from_cache()
479 list_del(&nrg->link); in get_file_region_entry_from_cache()
481 nrg->from = from; in get_file_region_entry_from_cache()
482 nrg->to = to; in get_file_region_entry_from_cache()
491 nrg->reservation_counter = rg->reservation_counter; in copy_hugetlb_cgroup_uncharge_info()
492 nrg->css = rg->css; in copy_hugetlb_cgroup_uncharge_info()
493 if (rg->css) in copy_hugetlb_cgroup_uncharge_info()
494 css_get(rg->css); in copy_hugetlb_cgroup_uncharge_info()
506 nrg->reservation_counter = in record_hugetlb_cgroup_uncharge_info()
507 &h_cg->rsvd_hugepage[hstate_index(h)]; in record_hugetlb_cgroup_uncharge_info()
508 nrg->css = &h_cg->css; in record_hugetlb_cgroup_uncharge_info()
510 * The caller will hold exactly one h_cg->css reference for the in record_hugetlb_cgroup_uncharge_info()
515 * exactly one h_cg->css reference, we should do css_get for in record_hugetlb_cgroup_uncharge_info()
519 css_get(&h_cg->css); in record_hugetlb_cgroup_uncharge_info()
520 if (!resv->pages_per_hpage) in record_hugetlb_cgroup_uncharge_info()
521 resv->pages_per_hpage = pages_per_huge_page(h); in record_hugetlb_cgroup_uncharge_info()
525 VM_BUG_ON(resv->pages_per_hpage != pages_per_huge_page(h)); in record_hugetlb_cgroup_uncharge_info()
527 nrg->reservation_counter = NULL; in record_hugetlb_cgroup_uncharge_info()
528 nrg->css = NULL; in record_hugetlb_cgroup_uncharge_info()
536 if (rg->css) in put_uncharge_info()
537 css_put(rg->css); in put_uncharge_info()
545 return rg->reservation_counter == org->reservation_counter && in has_same_uncharge_info()
546 rg->css == org->css; in has_same_uncharge_info()
558 if (&prg->link != &resv->regions && prg->to == rg->from && in coalesce_file_region()
560 prg->to = rg->to; in coalesce_file_region()
562 list_del(&rg->link); in coalesce_file_region()
570 if (&nrg->link != &resv->regions && nrg->from == rg->to && in coalesce_file_region()
572 nrg->from = rg->from; in coalesce_file_region()
574 list_del(&rg->link); in coalesce_file_region()
590 list_add(&nrg->link, rg); in hugetlb_resv_map_add()
595 return to - from; in hugetlb_resv_map_add()
599 * Must be called with resv->lock held.
611 struct list_head *head = &resv->regions; in add_reservation_in_range()
620 * [last_accounted_offset, iter->from), at every iteration, with some in add_reservation_in_range()
625 if (iter->from < f) { in add_reservation_in_range()
629 if (iter->to > last_accounted_offset) in add_reservation_in_range()
630 last_accounted_offset = iter->to; in add_reservation_in_range()
637 if (iter->from >= t) { in add_reservation_in_range()
638 rg = iter->link.prev; in add_reservation_in_range()
642 /* Add an entry for last_accounted_offset -> iter->from, and in add_reservation_in_range()
645 if (iter->from > last_accounted_offset) in add_reservation_in_range()
646 add += hugetlb_resv_map_add(resv, iter->link.prev, in add_reservation_in_range()
648 iter->from, h, h_cg, in add_reservation_in_range()
651 last_accounted_offset = iter->to; in add_reservation_in_range()
658 rg = head->prev; in add_reservation_in_range()
666 /* Must be called with resv->lock acquired. Will drop lock to allocate entries.
670 __must_hold(&resv->lock) in allocate_file_region_entries()
687 while (resv->region_cache_count < in allocate_file_region_entries()
688 (resv->adds_in_progress + regions_needed)) { in allocate_file_region_entries()
689 to_allocate = resv->adds_in_progress + regions_needed - in allocate_file_region_entries()
690 resv->region_cache_count; in allocate_file_region_entries()
696 VM_BUG_ON(resv->region_cache_count < resv->adds_in_progress); in allocate_file_region_entries()
698 spin_unlock(&resv->lock); in allocate_file_region_entries()
703 list_add(&trg->link, &allocated_regions); in allocate_file_region_entries()
706 spin_lock(&resv->lock); in allocate_file_region_entries()
708 list_splice(&allocated_regions, &resv->region_cache); in allocate_file_region_entries()
709 resv->region_cache_count += to_allocate; in allocate_file_region_entries()
716 list_del(&rg->link); in allocate_file_region_entries()
719 return -ENOMEM; in allocate_file_region_entries()
734 * this operation and we were not able to allocate, it returns -ENOMEM.
745 spin_lock(&resv->lock); in region_add()
762 resv->region_cache_count < in region_add()
763 resv->adds_in_progress + in region_add()
764 (actual_regions_needed - in_regions_needed)) { in region_add()
768 VM_BUG_ON(t - f <= 1); in region_add()
771 resv, actual_regions_needed - in_regions_needed)) { in region_add()
772 return -ENOMEM; in region_add()
780 resv->adds_in_progress -= in_regions_needed; in region_add()
782 spin_unlock(&resv->lock); in region_add()
798 * resv->adds_in_progress. This value needs to be provided to a follow up call
803 * zero. -ENOMEM is returned if a new file_region structure or cache entry
811 spin_lock(&resv->lock); in region_chg()
821 return -ENOMEM; in region_chg()
823 resv->adds_in_progress += *out_regions_needed; in region_chg()
825 spin_unlock(&resv->lock); in region_chg()
845 spin_lock(&resv->lock); in region_abort()
846 VM_BUG_ON(!resv->region_cache_count); in region_abort()
847 resv->adds_in_progress -= regions_needed; in region_abort()
848 spin_unlock(&resv->lock); in region_abort()
855 * and either trim, delete or split the existing regions.
860 * be allocated. If the allocation fails, -ENOMEM will be returned.
862 * a region and possibly return -ENOMEM. Callers specifying
863 * t == LONG_MAX do not need to check for -ENOMEM error.
867 struct list_head *head = &resv->regions; in region_del()
873 spin_lock(&resv->lock); in region_del()
882 if (rg->to <= f && (rg->to != rg->from || rg->to != f)) in region_del()
885 if (rg->from >= t) in region_del()
888 if (f > rg->from && t < rg->to) { /* Must split region */ in region_del()
894 resv->region_cache_count > resv->adds_in_progress) { in region_del()
895 nrg = list_first_entry(&resv->region_cache, in region_del()
898 list_del(&nrg->link); in region_del()
899 resv->region_cache_count--; in region_del()
903 spin_unlock(&resv->lock); in region_del()
906 return -ENOMEM; in region_del()
910 del += t - f; in region_del()
912 resv, rg, t - f, false); in region_del()
915 nrg->from = t; in region_del()
916 nrg->to = rg->to; in region_del()
920 INIT_LIST_HEAD(&nrg->link); in region_del()
923 rg->to = f; in region_del()
925 list_add(&nrg->link, &rg->link); in region_del()
930 if (f <= rg->from && t >= rg->to) { /* Remove entire region */ in region_del()
931 del += rg->to - rg->from; in region_del()
933 rg->to - rg->from, true); in region_del()
934 list_del(&rg->link); in region_del()
939 if (f <= rg->from) { /* Trim beginning of region */ in region_del()
941 t - rg->from, false); in region_del()
943 del += t - rg->from; in region_del()
944 rg->from = t; in region_del()
945 } else { /* Trim end of region */ in region_del()
947 rg->to - f, false); in region_del()
949 del += rg->to - f; in region_del()
950 rg->to = f; in region_del()
954 spin_unlock(&resv->lock); in region_del()
994 struct list_head *head = &resv->regions; in region_count()
998 spin_lock(&resv->lock); in region_count()
1004 if (rg->to <= f) in region_count()
1006 if (rg->from >= t) in region_count()
1009 seg_from = max(rg->from, f); in region_count()
1010 seg_to = min(rg->to, t); in region_count()
1012 chg += seg_to - seg_from; in region_count()
1014 spin_unlock(&resv->lock); in region_count()
1026 return ((address - vma->vm_start) >> huge_page_shift(h)) + in vma_hugecache_offset()
1027 (vma->vm_pgoff >> huge_page_order(h)); in vma_hugecache_offset()
1031 * vma_kernel_pagesize - Page size granularity for this VMA.
1041 if (vma->vm_ops && vma->vm_ops->pagesize) in vma_kernel_pagesize()
1042 return vma->vm_ops->pagesize(vma); in vma_kernel_pagesize()
1050 * architectures where it differs, an architecture-specific 'strong'
1088 return (unsigned long)vma->vm_private_data; in get_vma_private_data()
1094 vma->vm_private_data = (void *)value; in set_vma_private_data()
1104 resv_map->reservation_counter = NULL; in resv_map_set_hugetlb_cgroup_uncharge_info()
1105 resv_map->pages_per_hpage = 0; in resv_map_set_hugetlb_cgroup_uncharge_info()
1106 resv_map->css = NULL; in resv_map_set_hugetlb_cgroup_uncharge_info()
1108 resv_map->reservation_counter = in resv_map_set_hugetlb_cgroup_uncharge_info()
1109 &h_cg->rsvd_hugepage[hstate_index(h)]; in resv_map_set_hugetlb_cgroup_uncharge_info()
1110 resv_map->pages_per_hpage = pages_per_huge_page(h); in resv_map_set_hugetlb_cgroup_uncharge_info()
1111 resv_map->css = &h_cg->css; in resv_map_set_hugetlb_cgroup_uncharge_info()
1127 kref_init(&resv_map->refs); in resv_map_alloc()
1128 spin_lock_init(&resv_map->lock); in resv_map_alloc()
1129 INIT_LIST_HEAD(&resv_map->regions); in resv_map_alloc()
1130 init_rwsem(&resv_map->rw_sema); in resv_map_alloc()
1132 resv_map->adds_in_progress = 0; in resv_map_alloc()
1136 * re-initialized to the proper values, to indicate that hugetlb cgroup in resv_map_alloc()
1137 * reservations are to be un-charged from here. in resv_map_alloc()
1141 INIT_LIST_HEAD(&resv_map->region_cache); in resv_map_alloc()
1142 list_add(&rg->link, &resv_map->region_cache); in resv_map_alloc()
1143 resv_map->region_cache_count = 1; in resv_map_alloc()
1151 struct list_head *head = &resv_map->region_cache; in resv_map_release()
1159 list_del(&rg->link); in resv_map_release()
1163 VM_BUG_ON(resv_map->adds_in_progress); in resv_map_release()
1175 * The VERY common case is inode->mapping == &inode->i_data but, in inode_resv_map()
1178 return (struct resv_map *)(&inode->i_data)->i_private_data; in inode_resv_map()
1184 if (vma->vm_flags & VM_MAYSHARE) { in vma_resv_map()
1185 struct address_space *mapping = vma->vm_file->f_mapping; in vma_resv_map()
1186 struct inode *inode = mapping->host; in vma_resv_map()
1199 VM_BUG_ON_VMA(vma->vm_flags & VM_MAYSHARE, vma); in set_vma_resv_map()
1207 VM_BUG_ON_VMA(vma->vm_flags & VM_MAYSHARE, vma); in set_vma_resv_flags()
1221 return !(vma->vm_flags & VM_MAYSHARE) && in __vma_private_lock()
1231 * - For shared mappings this is a per-vma semaphore that may be in hugetlb_dup_vma_private()
1237 * - For MAP_PRIVATE mappings, this is the reserve map which does in hugetlb_dup_vma_private()
1239 * not guaranteed to succeed, even if read-only. in hugetlb_dup_vma_private()
1241 if (vma->vm_flags & VM_MAYSHARE) { in hugetlb_dup_vma_private()
1242 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; in hugetlb_dup_vma_private()
1244 if (vma_lock && vma_lock->vma != vma) in hugetlb_dup_vma_private()
1245 vma->vm_private_data = NULL; in hugetlb_dup_vma_private()
1247 vma->vm_private_data = NULL; in hugetlb_dup_vma_private()
1252 * Called with mm->mmap_lock writer semaphore held.
1275 kref_put(&reservations->refs, resv_map_release); in clear_vma_resv_huge_pages()
1288 list_move(&folio->lru, &h->hugepage_freelists[nid]); in enqueue_hugetlb_folio()
1289 h->free_huge_pages++; in enqueue_hugetlb_folio()
1290 h->free_huge_pages_node[nid]++; in enqueue_hugetlb_folio()
1298 bool pin = !!(current->flags & PF_MEMALLOC_PIN); in dequeue_hugetlb_folio_node_exact()
1301 list_for_each_entry(folio, &h->hugepage_freelists[nid], lru) { in dequeue_hugetlb_folio_node_exact()
1308 if (is_migrate_isolate_page(&folio->page)) in dequeue_hugetlb_folio_node_exact()
1311 list_move(&folio->lru, &h->hugepage_activelist); in dequeue_hugetlb_folio_node_exact()
1314 h->free_huge_pages--; in dequeue_hugetlb_folio_node_exact()
1315 h->free_huge_pages_node[nid]--; in dequeue_hugetlb_folio_node_exact()
1364 return h->free_huge_pages - h->resv_huge_pages; in available_huge_pages()
1409 * nodes_allowed previously, so h->next_node_to_{alloc|free} might
1448 * helper for remove_pool_hugetlb_folio() - return the previously saved
1459 nid = get_valid_node_allowed(h->next_nid_to_free, nodes_allowed); in hstate_next_node_to_free()
1460 h->next_nid_to_free = next_node_allowed(nid, nodes_allowed); in hstate_next_node_to_free()
1469 nr_nodes--)
1471 #define for_each_node_mask_to_free(hs, nr_nodes, node, mask) \ argument
1474 ((node = hstate_next_node_to_free(hs, mask)) || 1); \
1475 nr_nodes--)
1547 list_del(&folio->lru); in remove_hugetlb_folio()
1551 h->free_huge_pages--; in remove_hugetlb_folio()
1552 h->free_huge_pages_node[nid]--; in remove_hugetlb_folio()
1555 h->surplus_huge_pages--; in remove_hugetlb_folio()
1556 h->surplus_huge_pages_node[nid]--; in remove_hugetlb_folio()
1567 h->nr_huge_pages--; in remove_hugetlb_folio()
1568 h->nr_huge_pages_node[nid]--; in remove_hugetlb_folio()
1580 INIT_LIST_HEAD(&folio->lru); in add_hugetlb_folio()
1581 h->nr_huge_pages++; in add_hugetlb_folio()
1582 h->nr_huge_pages_node[nid]++; in add_hugetlb_folio()
1585 h->surplus_huge_pages++; in add_hugetlb_folio()
1586 h->surplus_huge_pages_node[nid]++; in add_hugetlb_folio()
1662 * freed and frees them one-by-one. As the page->mapping pointer is going
1680 node = node->next; in free_hpage_workfn()
1681 folio->mapping = NULL; in free_hpage_workfn()
1718 if (llist_add((struct llist_node *)&folio->mapping, &hpage_freelist)) in update_and_free_hugetlb_folio()
1737 list_del(&folio->lru); in bulk_vmemmap_restore_error()
1757 list_del(&folio->lru); in bulk_vmemmap_restore_error()
1762 list_del(&folio->lru); in bulk_vmemmap_restore_error()
1842 __ClearPageAnonExclusive(&folio->page); in free_huge_folio()
1843 folio->mapping = NULL; in free_huge_folio()
1872 lruvec_stat_mod_folio(folio, NR_HUGETLB, -pages_per_huge_page(h)); in free_huge_folio()
1875 h->resv_huge_pages++; in free_huge_folio()
1881 } else if (h->surplus_huge_pages_node[nid]) { in free_huge_folio()
1899 h->nr_huge_pages++; in __prep_account_new_huge_page()
1900 h->nr_huge_pages_node[nid]++; in __prep_account_new_huge_page()
1906 INIT_LIST_HEAD(&folio->lru); in init_new_hugetlb_folio()
2109 if ((!acct_surplus || h->surplus_huge_pages_node[node]) && in remove_pool_hugetlb_folio()
2110 !list_empty(&h->hugepage_freelists[node])) { in remove_pool_hugetlb_folio()
2111 folio = list_entry(h->hugepage_freelists[node].next, in remove_pool_hugetlb_folio()
2123 * does nothing for in-use hugetlb folios and non-hugetlb folios.
2126 * -ENOMEM: failed to allocate vmemmap pages to free the freed hugepages
2130 * -EBUSY: failed to dissolved free hugepages or the hugepage is in-use
2137 int rc = -EBUSY; in dissolve_free_hugetlb_folio()
2166 * Theoretically, we should return -EBUSY when we in dissolve_free_hugetlb_folio()
2176 if (h->surplus_huge_pages_node[folio_nid(folio)]) in dissolve_free_hugetlb_folio()
2179 h->max_huge_pages--; in dissolve_free_hugetlb_folio()
2192 * non-vmemmap optimized hugetlb folios. in dissolve_free_hugetlb_folio()
2199 h->max_huge_pages++; in dissolve_free_hugetlb_folio()
2258 if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) in alloc_surplus_hugetlb_folio()
2283 if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) { in alloc_surplus_hugetlb_folio()
2290 h->surplus_huge_pages++; in alloc_surplus_hugetlb_folio()
2291 h->surplus_huge_pages_node[folio_nid(folio)]++; in alloc_surplus_hugetlb_folio()
2360 VM_BUG_ON(!h->resv_huge_pages); in alloc_hugetlb_folio_reserve()
2361 h->resv_huge_pages--; in alloc_hugetlb_folio_reserve()
2385 /* We cannot fallback to other nodes, as we could break the per-node pool. */ in alloc_hugetlb_folio_nodemask()
2395 struct mempolicy *mpol = get_task_policy(current); in policy_mbind_nodemask()
2401 if (mpol->mode == MPOL_BIND && in policy_mbind_nodemask()
2403 cpuset_nodemask_valid_mems_allowed(&mpol->nodes))) in policy_mbind_nodemask()
2404 return &mpol->nodes; in policy_mbind_nodemask()
2432 needed = (h->resv_huge_pages + delta) - h->free_huge_pages; in gather_surplus_pages()
2434 h->resv_huge_pages += delta; in gather_surplus_pages()
2440 ret = -ENOMEM; in gather_surplus_pages()
2446 /* Prioritize current node */ in gather_surplus_pages()
2465 list_add(&folio->lru, &surplus_list); in gather_surplus_pages()
2475 needed = (h->resv_huge_pages + delta) - in gather_surplus_pages()
2476 (h->free_huge_pages + allocated); in gather_surplus_pages()
2496 h->resv_huge_pages += delta; in gather_surplus_pages()
2501 if ((--needed) < 0) in gather_surplus_pages()
2536 h->resv_huge_pages -= unused_resv_pages; in return_unused_surplus_pages()
2543 * by pre-allocated pages. Only free surplus pages. in return_unused_surplus_pages()
2545 nr_pages = min(unused_resv_pages, h->surplus_huge_pages); in return_unused_surplus_pages()
2553 * on-line nodes with memory and will handle the hstate accounting. in return_unused_surplus_pages()
2555 while (nr_pages--) { in return_unused_surplus_pages()
2562 list_add(&folio->lru, &page_list); in return_unused_surplus_pages()
2641 if (vma->vm_flags & VM_MAYSHARE) { in __vma_reservation_common()
2651 if (vma->vm_flags & VM_MAYSHARE) { in __vma_reservation_common()
2664 if (vma->vm_flags & VM_MAYSHARE || mode == VMA_DEL_RESV) in __vma_reservation_common()
2673 * Subtle - The reserve map for private mappings has the in __vma_reservation_common()
2793 if (!(vma->vm_flags & VM_MAYSHARE)) in restore_reserve_on_error()
2812 * alloc_and_dissolve_hugetlb_folio - Allocate a new folio and dissolve
2839 * Fail with -EBUSY if not possible. in alloc_and_dissolve_hugetlb_folio()
2843 ret = isolated ? 0 : -EBUSY; in alloc_and_dissolve_hugetlb_folio()
2861 return -ENOMEM; in alloc_and_dissolve_hugetlb_folio()
2903 int ret = -EBUSY; in isolate_or_dissolve_huge_page()
2921 * alloc_contig_range and them. Return -ENOMEM as this has the effect in isolate_or_dissolve_huge_page()
2925 return -ENOMEM; in isolate_or_dissolve_huge_page()
2936 * replace_free_hugepage_folios - Replace free hugepage folios in a given pfn
2991 * For either 0/1: we checked the per-vma resv map, and one resv
2997 * Cannot use per-vma resv count can be used, hence a new resv
3010 * faults of hugetlb private mappings on top of a non-page-cache folio (in
3029 /* Whether we need a separate per-vma reservation? */ in alloc_hugetlb_folio()
3034 * consume the per-vma resv map even if it's reserved. in alloc_hugetlb_folio()
3045 return ERR_PTR(-ENOMEM); in alloc_hugetlb_folio()
3070 * If this allocation is not consuming a per-vma reservation, in alloc_hugetlb_folio()
3097 list_add(&folio->lru, &h->hugepage_activelist); in alloc_hugetlb_folio()
3103 * Either dequeued or buddy-allocated folio needs to add special in alloc_hugetlb_folio()
3108 h->resv_huge_pages--; in alloc_hugetlb_folio()
3141 hugetlb_acct_memory(h, -rsv_adjust); in alloc_hugetlb_folio()
3160 if (ret == -ENOMEM) { in alloc_hugetlb_folio()
3162 return ERR_PTR(-ENOMEM); in alloc_hugetlb_folio()
3180 hugetlb_acct_memory(h, -gbl_reserve); in alloc_hugetlb_folio()
3187 return ERR_PTR(-ENOSPC); in alloc_hugetlb_folio()
3207 * For pre-HVO to work correctly, pages need to be on in alloc_bootmem()
3218 m->flags = 0; in alloc_bootmem()
3219 m->cma = NULL; in alloc_bootmem()
3232 INIT_LIST_HEAD(&m->list); in alloc_bootmem()
3233 list_add(&m->list, &huge_boot_pages[listnode]); in alloc_bootmem()
3234 m->hstate = h; in alloc_bootmem()
3256 for_each_node_mask_to_alloc(&h->next_nid_to_alloc, nr_nodes, node, &node_states[N_ONLINE]) { in __alloc_bootmem_huge_page()
3273 huge_page_size(h) - PAGE_SIZE); in __alloc_bootmem_huge_page()
3293 prep_compound_tail((struct page *)folio, pfn - head_pfn); in hugetlb_folio_init_tail_vmemmap()
3317 return m->flags & HUGE_BOOTMEM_HVO; in hugetlb_bootmem_page_prehvo()
3322 return m->flags & HUGE_BOOTMEM_CMA; in hugetlb_bootmem_page_earlycma()
3326 * memblock-allocated pageblocks might not have the migrate type set
3332 * read-only, but that's ok - for sparse vmemmap this does not write to
3387 if (m->flags & HUGE_BOOTMEM_ZONES_VALID) { in hugetlb_bootmem_page_zones_valid()
3395 valid = cma_validate_zones(m->cma); in hugetlb_bootmem_page_zones_valid()
3402 pages_per_huge_page(m->hstate)); in hugetlb_bootmem_page_zones_valid()
3405 hstate_boot_nrinvalid[hstate_index(m->hstate)]++; in hugetlb_bootmem_page_zones_valid()
3424 while (npages--) { in hugetlb_bootmem_free_invalid_page()
3446 h = m->hstate; in gather_bootmem_prealloc_node()
3475 * If pre-HVO was done, just set the in gather_bootmem_prealloc_node()
3484 list_add(&folio->lru, &folio_list); in gather_bootmem_prealloc_node()
3489 * other side-effects, like CommitLimit going negative. in gather_bootmem_prealloc_node()
3533 for (i = 0; i < h->max_huge_pages_node[nid]; ++i) { in hugetlb_hstate_alloc_pages_onenode()
3545 list_add(&folio->lru, &folio_list); in hugetlb_hstate_alloc_pages_onenode()
3553 if (i == h->max_huge_pages_node[nid]) in hugetlb_hstate_alloc_pages_onenode()
3558 h->max_huge_pages_node[nid], buf, nid, i); in hugetlb_hstate_alloc_pages_onenode()
3559 h->max_huge_pages -= (h->max_huge_pages_node[nid] - i); in hugetlb_hstate_alloc_pages_onenode()
3560 h->max_huge_pages_node[nid] = i; in hugetlb_hstate_alloc_pages_onenode()
3569 if (h->max_huge_pages_node[i] > 0) { in hugetlb_hstate_alloc_pages_specific_nodes()
3580 if (allocated < h->max_huge_pages) { in hugetlb_hstate_alloc_pages_errcheck()
3585 h->max_huge_pages, buf, allocated); in hugetlb_hstate_alloc_pages_errcheck()
3586 h->max_huge_pages = allocated; in hugetlb_hstate_alloc_pages_errcheck()
3593 int i, num = end - start; in hugetlb_pages_alloc_boot_node()
3598 /* Bit mask controlling how hard we retry per-node allocations.*/ in hugetlb_pages_alloc_boot_node()
3607 list_move(&folio->lru, &folio_list); in hugetlb_pages_alloc_boot_node()
3618 for (i = 0; i < h->max_huge_pages; ++i) { in hugetlb_gigantic_pages_alloc_boot()
3640 job.size = h->max_huge_pages; in hugetlb_pages_alloc_boot()
3651 * +-----------------------+-------+-------+-------+-------+-------+ in hugetlb_pages_alloc_boot()
3653 * +-----------------------+-------+-------+-------+-------+-------+ in hugetlb_pages_alloc_boot()
3656 * +-----------------------+-------+-------+-------+-------+-------+ in hugetlb_pages_alloc_boot()
3664 job.min_chunk = h->max_huge_pages / hugepage_allocation_threads; in hugetlb_pages_alloc_boot()
3671 jiffies_to_msecs(jiffies_end - jiffies_start), in hugetlb_pages_alloc_boot()
3674 return h->nr_huge_pages; in hugetlb_pages_alloc_boot()
3679 * non-gigantic pages.
3680 * - For gigantic pages, this is called early in the boot process and
3684 * - For non-gigantic pages, this is called later in the boot process after
3726 * h->demote_order is initially 0. in hugetlb_init_hstates()
3727 * - We can not demote gigantic pages if runtime freeing in hugetlb_init_hstates()
3729 * - If CMA allocation is possible, we can not demote in hugetlb_init_hstates()
3734 if (hugetlb_cma_total_size() && h->order <= HUGETLB_PAGE_ORDER) in hugetlb_init_hstates()
3739 if (h2->order < h->order && in hugetlb_init_hstates()
3740 h2->order > h->demote_order) in hugetlb_init_hstates()
3741 h->demote_order = h2->order; in hugetlb_init_hstates()
3755 h->max_huge_pages -= nrinvalid; in report_hugepages()
3758 pr_info("HugeTLB: registered %s page size, pre-allocated %ld pages\n", in report_hugepages()
3759 buf, h->free_huge_pages); in report_hugepages()
3784 struct list_head *freel = &h->hugepage_freelists[i]; in try_to_free_low()
3786 if (count >= h->nr_huge_pages) in try_to_free_low()
3791 list_add(&folio->lru, &page_list); in try_to_free_low()
3808 * Increment or decrement surplus_huge_pages. Keep node-specific counters
3809 * balanced by operating on them in a round-robin fashion.
3818 VM_BUG_ON(delta != -1 && delta != 1); in adjust_pool_surplus()
3821 for_each_node_mask_to_alloc(&h->next_nid_to_alloc, nr_nodes, node, nodes_allowed) { in adjust_pool_surplus()
3822 if (h->surplus_huge_pages_node[node]) in adjust_pool_surplus()
3827 if (h->surplus_huge_pages_node[node] < in adjust_pool_surplus()
3828 h->nr_huge_pages_node[node]) in adjust_pool_surplus()
3835 h->surplus_huge_pages += delta; in adjust_pool_surplus()
3836 h->surplus_huge_pages_node[node] += delta; in adjust_pool_surplus()
3840 #define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages)
3852 * Bit mask controlling how hard we retry per-node allocations. in set_max_huge_pages()
3859 return -ENOMEM; in set_max_huge_pages()
3865 mutex_lock(&h->resize_lock); in set_max_huge_pages()
3878 count += persistent_huge_pages(h) - in set_max_huge_pages()
3879 (h->nr_huge_pages_node[nid] - in set_max_huge_pages()
3880 h->surplus_huge_pages_node[nid]); in set_max_huge_pages()
3901 mutex_unlock(&h->resize_lock); in set_max_huge_pages()
3903 return -EINVAL; in set_max_huge_pages()
3919 while (h->surplus_huge_pages && count > persistent_huge_pages(h)) { in set_max_huge_pages()
3920 if (!adjust_pool_surplus(h, nodes_allowed, -1)) in set_max_huge_pages()
3938 &h->next_nid_to_alloc); in set_max_huge_pages()
3945 list_add(&folio->lru, &page_list); in set_max_huge_pages()
3948 /* Bail for signals. Probably ctrl-c from user */ in set_max_huge_pages()
3949 if (signal_pending(current)) { in set_max_huge_pages()
3982 * resv_huge_pages + persistent_huge_pages() - free_huge_pages, in set_max_huge_pages()
3989 persistent_free_count = h->free_huge_pages; in set_max_huge_pages()
3990 if (h->free_huge_pages > persistent_huge_pages(h)) { in set_max_huge_pages()
3991 if (h->free_huge_pages > h->surplus_huge_pages) in set_max_huge_pages()
3992 persistent_free_count -= h->surplus_huge_pages; in set_max_huge_pages()
3996 min_count = h->resv_huge_pages + persistent_huge_pages(h) - persistent_free_count; in set_max_huge_pages()
4008 list_add(&folio->lru, &page_list); in set_max_huge_pages()
4021 h->max_huge_pages = persistent_huge_pages(h); in set_max_huge_pages()
4023 mutex_unlock(&h->resize_lock); in set_max_huge_pages()
4046 * Note that we already hold src->resize_lock. To prevent deadlock, in demote_free_hugetlb_folios()
4049 mutex_lock(&dst->resize_lock); in demote_free_hugetlb_folios()
4060 list_del(&folio->lru); in demote_free_hugetlb_folios()
4062 split_page_owner(&folio->page, huge_page_order(src), huge_page_order(dst)); in demote_free_hugetlb_folios()
4071 prep_compound_page(page, dst->order); in demote_free_hugetlb_folios()
4073 new_folio->mapping = NULL; in demote_free_hugetlb_folios()
4078 list_add(&new_folio->lru, &dst_list); in demote_free_hugetlb_folios()
4084 mutex_unlock(&dst->resize_lock); in demote_free_hugetlb_folios()
4101 if (!src->demote_order) { in demote_pool_huge_page()
4103 return -EINVAL; /* internal error */ in demote_pool_huge_page()
4105 dst = size_to_hstate(PAGE_SIZE << src->demote_order); in demote_pool_huge_page()
4111 list_for_each_entry_safe(folio, next, &src->hugepage_freelists[node], lru) { in demote_pool_huge_page()
4116 list_add(&folio->lru, &list); in demote_pool_huge_page()
4129 list_del(&folio->lru); in demote_pool_huge_page()
4132 nr_demoted--; in demote_pool_huge_page()
4143 src->max_huge_pages -= nr_demoted; in demote_pool_huge_page()
4144 dst->max_huge_pages += nr_demoted << (huge_page_order(src) - huge_page_order(dst)); in demote_pool_huge_page()
4153 * Return -EBUSY so that caller will not retry. in demote_pool_huge_page()
4155 return -EBUSY; in demote_pool_huge_page()
4195 nr_huge_pages = h->nr_huge_pages; in nr_hugepages_show_common()
4197 nr_huge_pages = h->nr_huge_pages_node[nid]; in nr_hugepages_show_common()
4210 return -EINVAL; in __nr_hugepages_store_common()
4268 * hstate attribute for optionally mempolicy-based constraint on persistent
4291 return sysfs_emit(buf, "%lu\n", h->nr_overcommit_huge_pages); in nr_overcommit_hugepages_show()
4302 return -EINVAL; in nr_overcommit_hugepages_store()
4309 h->nr_overcommit_huge_pages = input; in nr_overcommit_hugepages_store()
4325 free_huge_pages = h->free_huge_pages; in free_hugepages_show()
4327 free_huge_pages = h->free_huge_pages_node[nid]; in free_hugepages_show()
4337 return sysfs_emit(buf, "%lu\n", h->resv_huge_pages); in resv_hugepages_show()
4350 surplus_huge_pages = h->surplus_huge_pages; in surplus_hugepages_show()
4352 surplus_huge_pages = h->surplus_huge_pages_node[nid]; in surplus_hugepages_show()
4381 mutex_lock(&h->resize_lock); in demote_store()
4392 nr_available = h->free_huge_pages_node[nid]; in demote_store()
4394 nr_available = h->free_huge_pages; in demote_store()
4395 nr_available -= h->resv_huge_pages; in demote_store()
4405 nr_demote -= rc; in demote_store()
4409 mutex_unlock(&h->resize_lock); in demote_store()
4421 unsigned long demote_size = (PAGE_SIZE << h->demote_order) / SZ_1K; in demote_size_show()
4438 return -EINVAL; in demote_size_store()
4439 demote_order = demote_hstate->order; in demote_size_store()
4441 return -EINVAL; in demote_size_store()
4445 if (demote_order >= h->order) in demote_size_store()
4446 return -EINVAL; in demote_size_store()
4449 mutex_lock(&h->resize_lock); in demote_size_store()
4450 h->demote_order = demote_order; in demote_size_store()
4451 mutex_unlock(&h->resize_lock); in demote_size_store()
4490 hstate_kobjs[hi] = kobject_create_and_add(h->name, parent); in hugetlb_sysfs_add_hstate()
4492 return -ENOMEM; in hugetlb_sysfs_add_hstate()
4501 if (h->demote_order) { in hugetlb_sysfs_add_hstate()
4505 pr_warn("HugeTLB unable to create demote interfaces for %s\n", h->name); in hugetlb_sysfs_add_hstate()
4520 * node_hstate/s - associate per node hstate attributes, via their kobjects,
4547 * kobj_to_node_hstate - lookup global hstate for node device hstate attr kobj.
4548 * Returns node id via non-NULL nidp.
4558 if (nhs->hstate_kobjs[i] == kobj) { in kobj_to_node_hstate()
4571 * No-op if no hstate attributes attached.
4576 struct node_hstate *nhs = &node_hstates[node->dev.id]; in hugetlb_unregister_node()
4578 if (!nhs->hugepages_kobj) in hugetlb_unregister_node()
4583 struct kobject *hstate_kobj = nhs->hstate_kobjs[idx]; in hugetlb_unregister_node()
4587 if (h->demote_order) in hugetlb_unregister_node()
4591 nhs->hstate_kobjs[idx] = NULL; in hugetlb_unregister_node()
4594 kobject_put(nhs->hugepages_kobj); in hugetlb_unregister_node()
4595 nhs->hugepages_kobj = NULL; in hugetlb_unregister_node()
4601 * No-op if attributes already registered.
4606 struct node_hstate *nhs = &node_hstates[node->dev.id]; in hugetlb_register_node()
4612 if (nhs->hugepages_kobj) in hugetlb_register_node()
4615 nhs->hugepages_kobj = kobject_create_and_add("hugepages", in hugetlb_register_node()
4616 &node->dev.kobj); in hugetlb_register_node()
4617 if (!nhs->hugepages_kobj) in hugetlb_register_node()
4621 err = hugetlb_sysfs_add_hstate(h, nhs->hugepages_kobj, in hugetlb_register_node()
4622 nhs->hstate_kobjs, in hugetlb_register_node()
4626 h->name, node->dev.id); in hugetlb_register_node()
4635 * devices of nodes that have memory. All on-line nodes should have
4651 *nidp = -1; in kobj_to_node_hstate()
4672 pr_err("HugeTLB: Unable to add hstate %s\n", h->name); in hugetlb_sysfs_init()
4696 pr_warn("HugeTLB: huge pages not supported, ignoring associated command-line parameters\n"); in hugetlb_init()
4777 __mutex_init(&h->resize_lock, "resize mutex", &h->resize_key); in hugetlb_add_hstate()
4778 h->order = order; in hugetlb_add_hstate()
4779 h->mask = ~(huge_page_size(h) - 1); in hugetlb_add_hstate()
4781 INIT_LIST_HEAD(&h->hugepage_freelists[i]); in hugetlb_add_hstate()
4782 INIT_LIST_HEAD(&h->hugepage_activelist); in hugetlb_add_hstate()
4783 snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB", in hugetlb_add_hstate()
4801 parsed_hstate->max_huge_pages = 0; in hugepages_clear_pages_in_node()
4802 memset(parsed_hstate->max_huge_pages_node, 0, in hugepages_clear_pages_in_node()
4803 sizeof(parsed_hstate->max_huge_pages_node)); in hugepages_clear_pages_in_node()
4813 return -EINVAL; in hugetlb_add_param()
4817 return -EINVAL; in hugetlb_add_param()
4839 hcp->setup(hcp->val); in hugetlb_parse_params()
4864 return -EINVAL; in hugepages_setup()
4876 mhp = &parsed_hstate->max_huge_pages; in hugepages_setup()
4903 parsed_hstate->max_huge_pages_node[node] = tmp; in hugepages_setup()
4925 return -EINVAL; in hugepages_setup()
4946 return -EINVAL; in hugepagesz_setup()
4961 return -EINVAL; in hugepagesz_setup()
4974 hugetlb_add_hstate(ilog2(size) - PAGE_SHIFT); in hugepagesz_setup()
4992 return -EINVAL; in default_hugepagesz_setup()
4999 return -EINVAL; in default_hugepagesz_setup()
5002 hugetlb_add_hstate(ilog2(size) - PAGE_SHIFT); in default_hugepagesz_setup()
5053 h->next_nid_to_alloc = first_online_node; in hugetlb_bootmem_alloc()
5054 h->next_nid_to_free = first_online_node; in hugetlb_bootmem_alloc()
5090 unsigned int *array = h->free_huge_pages_node; in allowed_mems_nr()
5124 unsigned long tmp = h->max_huge_pages; in hugetlb_sysctl_handler_common()
5128 return -EOPNOTSUPP; in hugetlb_sysctl_handler_common()
5167 return -EOPNOTSUPP; in hugetlb_overcommit_handler()
5169 tmp = h->nr_overcommit_huge_pages; in hugetlb_overcommit_handler()
5172 return -EINVAL; in hugetlb_overcommit_handler()
5181 h->nr_overcommit_huge_pages = tmp; in hugetlb_overcommit_handler()
5236 unsigned long count = h->nr_huge_pages; in hugetlb_report_meminfo()
5248 h->free_huge_pages, in hugetlb_report_meminfo()
5249 h->resv_huge_pages, in hugetlb_report_meminfo()
5250 h->surplus_huge_pages, in hugetlb_report_meminfo()
5268 nid, h->nr_huge_pages_node[nid], in hugetlb_report_node_meminfo()
5269 nid, h->free_huge_pages_node[nid], in hugetlb_report_node_meminfo()
5270 nid, h->surplus_huge_pages_node[nid]); in hugetlb_report_node_meminfo()
5283 h->nr_huge_pages_node[nid], in hugetlb_show_meminfo_node()
5284 h->free_huge_pages_node[nid], in hugetlb_show_meminfo_node()
5285 h->surplus_huge_pages_node[nid], in hugetlb_show_meminfo_node()
5292 K(atomic_long_read(&mm->hugetlb_usage))); in hugetlb_report_usage()
5302 nr_total_pages += h->nr_huge_pages * pages_per_huge_page(h); in hugetlb_total_pages()
5308 int ret = -ENOMEM; in hugetlb_acct_memory()
5319 * current cpuset. Application can still potentially OOM'ed by kernel in hugetlb_acct_memory()
5327 * we fall back to check against current free page availability as in hugetlb_acct_memory()
5334 * the memory policy of the current task. Similar to the description in hugetlb_acct_memory()
5349 return_unused_surplus_pages(h, (unsigned long) -delta); in hugetlb_acct_memory()
5371 kref_get(&resv->refs); in hugetlb_vm_op_open()
5380 if (vma->vm_flags & VM_MAYSHARE) { in hugetlb_vm_op_open()
5381 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; in hugetlb_vm_op_open()
5384 if (vma_lock->vma != vma) { in hugetlb_vm_op_open()
5385 vma->vm_private_data = NULL; in hugetlb_vm_op_open()
5408 start = vma_hugecache_offset(h, vma, vma->vm_start); in hugetlb_vm_op_close()
5409 end = vma_hugecache_offset(h, vma, vma->vm_end); in hugetlb_vm_op_close()
5411 reserve = (end - start) - region_count(resv, start, end); in hugetlb_vm_op_close()
5419 hugetlb_acct_memory(h, -gbl_reserve); in hugetlb_vm_op_close()
5422 kref_put(&resv->refs, resv_map_release); in hugetlb_vm_op_close()
5428 return -EINVAL; in hugetlb_vm_op_split()
5431 * PMD sharing is only possible for PUD_SIZE-aligned address ranges in hugetlb_vm_op_split()
5444 if (floor >= vma->vm_start && ceil <= vma->vm_end) in hugetlb_vm_op_split()
5458 * handle_mm_fault() to try to instantiate regular-sized pages in the
5489 if (try_mkwrite && (vma->vm_flags & VM_WRITE)) { in make_huge_pte()
5491 vma->vm_page_prot))); in make_huge_pte()
5494 vma->vm_page_prot)); in make_huge_pte()
5497 entry = arch_make_huge_pte(entry, shift, vma->vm_flags); in make_huge_pte()
5507 entry = huge_pte_mkwrite(huge_pte_mkdirty(huge_ptep_get(vma->vm_mm, address, ptep))); in set_huge_ptep_writable()
5515 if (vma->vm_flags & VM_WRITE) in set_huge_ptep_maybe_writable()
5549 pte_t newpte = make_huge_pte(vma, &new_folio->page, true); in hugetlb_install_folio()
5555 set_huge_pte_at(vma->vm_mm, addr, ptep, newpte, sz); in hugetlb_install_folio()
5556 hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm); in hugetlb_install_folio()
5567 bool cow = is_cow_mapping(src_vma->vm_flags); in copy_hugetlb_page_range()
5577 src_vma->vm_start, in copy_hugetlb_page_range()
5578 src_vma->vm_end); in copy_hugetlb_page_range()
5581 raw_write_seqcount_begin(&src->write_protect_seq); in copy_hugetlb_page_range()
5593 for (addr = src_vma->vm_start; addr < src_vma->vm_end; addr += sz) { in copy_hugetlb_page_range()
5602 ret = -ENOMEM; in copy_hugetlb_page_range()
5622 entry = huge_ptep_get(src_vma->vm_mm, addr, src_pte); in copy_hugetlb_page_range()
5660 entry = huge_ptep_get(src_vma->vm_mm, addr, src_pte); in copy_hugetlb_page_range()
5670 * When pre-allocating the page or copying data, we in copy_hugetlb_page_range()
5701 entry = huge_ptep_get(src_vma->vm_mm, addr, src_pte); in copy_hugetlb_page_range()
5739 raw_write_seqcount_end(&src->write_protect_seq); in copy_hugetlb_page_range()
5754 struct mm_struct *mm = vma->vm_mm; in move_huge_pte()
5793 struct address_space *mapping = vma->vm_file->f_mapping; in move_hugetlb_page_tables()
5795 struct mm_struct *mm = vma->vm_mm; in move_hugetlb_page_tables()
5843 flush_hugetlb_tlb_range(vma, old_end - len, old_end); in move_hugetlb_page_tables()
5848 return len + old_addr - old_end; in move_hugetlb_page_tables()
5855 struct mm_struct *mm = vma->vm_mm; in __unmap_hugepage_range()
5908 * If the pte was wr-protected by uffd-wp in any of the in __unmap_hugepage_range()
5910 * drop the uffd-wp bit in this zap, then replace the in __unmap_hugepage_range()
5947 /* Leave a uffd-wp pte marker if needed */ in __unmap_hugepage_range()
5962 if (!h->surplus_huge_pages && __vma_private_lock(vma) && in __unmap_hugepage_range()
5974 * resv->adds_in_progress if it succeeds. If this is not done, in __unmap_hugepage_range()
6022 if (!vma->vm_file) /* hugetlbfs_file_mmap error */ in __hugetlb_zap_begin()
6027 if (vma->vm_file) in __hugetlb_zap_begin()
6028 i_mmap_lock_write(vma->vm_file->f_mapping); in __hugetlb_zap_begin()
6034 zap_flags_t zap_flags = details ? details->zap_flags : 0; in __hugetlb_zap_end()
6036 if (!vma->vm_file) /* hugetlbfs_file_mmap error */ in __hugetlb_zap_end()
6054 if (vma->vm_file) in __hugetlb_zap_end()
6055 i_mmap_unlock_write(vma->vm_file->f_mapping); in __hugetlb_zap_end()
6065 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm, in unmap_hugepage_range()
6069 tlb_gather_mmu(&tlb, vma->vm_mm); in unmap_hugepage_range()
6096 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + in unmap_ref_private()
6097 vma->vm_pgoff; in unmap_ref_private()
6098 mapping = vma->vm_file->f_mapping; in unmap_ref_private()
6106 vma_interval_tree_foreach(iter_vma, &mapping->i_mmap, pgoff, pgoff) { in unmap_ref_private()
6107 /* Do not unmap the current VMA */ in unmap_ref_private()
6116 if (iter_vma->vm_flags & VM_MAYSHARE) in unmap_ref_private()
6122 * areas. This is because a future no-page fault on this VMA in unmap_ref_private()
6142 struct vm_area_struct *vma = vmf->vma; in hugetlb_wp()
6143 struct mm_struct *mm = vma->vm_mm; in hugetlb_wp()
6144 const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; in hugetlb_wp()
6145 pte_t pte = huge_ptep_get(mm, vmf->address, vmf->pte); in hugetlb_wp()
6154 * Never handle CoW for uffd-wp protected pages. It should be only in hugetlb_wp()
6155 * handled when the uffd-wp protection is removed. in hugetlb_wp()
6159 * uffd-wp bit first. in hugetlb_wp()
6165 if (vma->vm_flags & VM_MAYSHARE) { in hugetlb_wp()
6166 set_huge_ptep_writable(vma, vmf->address, vmf->pte); in hugetlb_wp()
6176 * If no-one else is actually using this page, we're the exclusive in hugetlb_wp()
6187 if (!PageAnonExclusive(&old_folio->page)) { in hugetlb_wp()
6189 SetPageAnonExclusive(&old_folio->page); in hugetlb_wp()
6192 set_huge_ptep_maybe_writable(vma, vmf->address, in hugetlb_wp()
6193 vmf->pte); in hugetlb_wp()
6199 PageAnonExclusive(&old_folio->page), &old_folio->page); in hugetlb_wp()
6220 spin_unlock(vmf->ptl); in hugetlb_wp()
6221 new_folio = alloc_hugetlb_folio(vma, vmf->address, cow_from_owner); in hugetlb_wp()
6232 struct address_space *mapping = vma->vm_file->f_mapping; in hugetlb_wp()
6246 idx = vma_hugecache_offset(h, vma, vmf->address); in hugetlb_wp()
6251 unmap_ref_private(mm, vma, &old_folio->page, in hugetlb_wp()
6252 vmf->address); in hugetlb_wp()
6256 spin_lock(vmf->ptl); in hugetlb_wp()
6257 vmf->pte = hugetlb_walk(vma, vmf->address, in hugetlb_wp()
6259 if (likely(vmf->pte && in hugetlb_wp()
6260 pte_same(huge_ptep_get(mm, vmf->address, vmf->pte), pte))) in hugetlb_wp()
6263 * race occurs while re-acquiring page table in hugetlb_wp()
6282 if (copy_user_large_folio(new_folio, old_folio, vmf->real_address, vma)) { in hugetlb_wp()
6288 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, vmf->address, in hugetlb_wp()
6289 vmf->address + huge_page_size(h)); in hugetlb_wp()
6296 spin_lock(vmf->ptl); in hugetlb_wp()
6297 vmf->pte = hugetlb_walk(vma, vmf->address, huge_page_size(h)); in hugetlb_wp()
6298 if (likely(vmf->pte && pte_same(huge_ptep_get(mm, vmf->address, vmf->pte), pte))) { in hugetlb_wp()
6299 pte_t newpte = make_huge_pte(vma, &new_folio->page, !unshare); in hugetlb_wp()
6302 huge_ptep_clear_flush(vma, vmf->address, vmf->pte); in hugetlb_wp()
6304 hugetlb_add_new_anon_rmap(new_folio, vma, vmf->address); in hugetlb_wp()
6307 set_huge_pte_at(mm, vmf->address, vmf->pte, newpte, in hugetlb_wp()
6313 spin_unlock(vmf->ptl); in hugetlb_wp()
6321 restore_reserve_on_error(h, vma, vmf->address, new_folio); in hugetlb_wp()
6326 spin_lock(vmf->ptl); /* Caller expects lock to be held */ in hugetlb_wp()
6338 struct address_space *mapping = vma->vm_file->f_mapping; in hugetlbfs_pagecache_present()
6352 struct inode *inode = mapping->host; in hugetlb_add_to_page_cache()
6368 * by non-hugetlbfs specific code paths. in hugetlb_add_to_page_cache()
6372 spin_lock(&inode->i_lock); in hugetlb_add_to_page_cache()
6373 inode->i_blocks += blocks_per_huge_page(h); in hugetlb_add_to_page_cache()
6374 spin_unlock(&inode->i_lock); in hugetlb_add_to_page_cache()
6389 hugetlb_vma_unlock_read(vmf->vma); in hugetlb_handle_userfault()
6390 hash = hugetlb_fault_mutex_hash(mapping, vmf->pgoff); in hugetlb_handle_userfault()
6415 struct vm_area_struct *vma = vmf->vma; in hugetlb_no_page()
6416 struct mm_struct *mm = vma->vm_mm; in hugetlb_no_page()
6424 u32 hash = hugetlb_fault_mutex_hash(mapping, vmf->pgoff); in hugetlb_no_page()
6434 current->pid); in hugetlb_no_page()
6443 folio = filemap_lock_hugetlb_folio(h, mapping, vmf->pgoff); in hugetlb_no_page()
6445 size = i_size_read(mapping->host) >> huge_page_shift(h); in hugetlb_no_page()
6446 if (vmf->pgoff >= size) in hugetlb_no_page()
6452 * without pgtable lock, we need to re-test under in hugetlb_no_page()
6455 * either changed or during-changing ptes and retry in hugetlb_no_page()
6467 if (!hugetlb_pte_stable(h, mm, vmf->address, vmf->pte, vmf->orig_pte)) { in hugetlb_no_page()
6476 if (!(vma->vm_flags & VM_MAYSHARE)) { in hugetlb_no_page()
6482 folio = alloc_hugetlb_folio(vma, vmf->address, false); in hugetlb_no_page()
6496 if (hugetlb_pte_stable(h, mm, vmf->address, vmf->pte, vmf->orig_pte)) in hugetlb_no_page()
6502 folio_zero_user(folio, vmf->real_address); in hugetlb_no_page()
6506 if (vma->vm_flags & VM_MAYSHARE) { in hugetlb_no_page()
6508 vmf->pgoff); in hugetlb_no_page()
6511 * err can't be -EEXIST which implies someone in hugetlb_no_page()
6517 restore_reserve_on_error(h, vma, vmf->address, in hugetlb_no_page()
6545 if (!hugetlb_pte_stable(h, mm, vmf->address, vmf->pte, vmf->orig_pte)) { in hugetlb_no_page()
6560 if ((vmf->flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { in hugetlb_no_page()
6561 if (vma_needs_reservation(h, vma, vmf->address) < 0) { in hugetlb_no_page()
6566 vma_end_reservation(h, vma, vmf->address); in hugetlb_no_page()
6569 vmf->ptl = huge_pte_lock(h, mm, vmf->pte); in hugetlb_no_page()
6572 if (!pte_same(huge_ptep_get(mm, vmf->address, vmf->pte), vmf->orig_pte)) in hugetlb_no_page()
6576 hugetlb_add_new_anon_rmap(folio, vma, vmf->address); in hugetlb_no_page()
6579 new_pte = make_huge_pte(vma, &folio->page, vma->vm_flags & VM_SHARED); in hugetlb_no_page()
6581 * If this pte was previously wr-protected, keep it wr-protected even in hugetlb_no_page()
6584 if (unlikely(pte_marker_uffd_wp(vmf->orig_pte))) in hugetlb_no_page()
6586 set_huge_pte_at(mm, vmf->address, vmf->pte, new_pte, huge_page_size(h)); in hugetlb_no_page()
6589 if ((vmf->flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { in hugetlb_no_page()
6594 spin_unlock(vmf->ptl); in hugetlb_no_page()
6609 * We must check to release the per-VMA lock. __vmf_anon_prepare() is in hugetlb_no_page()
6619 spin_unlock(vmf->ptl); in hugetlb_no_page()
6622 restore_reserve_on_error(h, vma, vmf->address, folio); in hugetlb_no_page()
6640 return hash & (num_fault_mutexes - 1); in hugetlb_fault_mutex_hash()
6683 mapping = vma->vm_file->f_mapping; in hugetlb_fault()
6761 !(vma->vm_flags & VM_MAYSHARE) && !huge_pte_write(vmf.orig_pte)) { in hugetlb_fault()
6781 /* Handle userfault-wp first, before trying to lock more pages */ in hugetlb_fault()
6842 * We must check to release the per-VMA lock. __vmf_anon_prepare() in in hugetlb_fault()
6852 * the page is not used after unlocked before returning from the current in hugetlb_fault()
6880 * that breaking the per-node hugetlb pool is not allowed in this case. in alloc_hugetlb_folio_vma()
6899 struct mm_struct *dst_mm = dst_vma->vm_mm; in hugetlb_mfill_atomic_pte()
6903 struct address_space *mapping = dst_vma->vm_file->f_mapping; in hugetlb_mfill_atomic_pte()
6906 int vm_shared = dst_vma->vm_flags & VM_SHARED; in hugetlb_mfill_atomic_pte()
6909 int ret = -ENOMEM; in hugetlb_mfill_atomic_pte()
6919 return -EEXIST; in hugetlb_mfill_atomic_pte()
6925 /* No need to invalidate - it was non-present before */ in hugetlb_mfill_atomic_pte()
6933 ret = -EFAULT; in hugetlb_mfill_atomic_pte()
6940 * a non-missing case. Return -EEXIST. in hugetlb_mfill_atomic_pte()
6944 ret = -EEXIST; in hugetlb_mfill_atomic_pte()
6950 ret = -ENOMEM; in hugetlb_mfill_atomic_pte()
6959 ret = -ENOENT; in hugetlb_mfill_atomic_pte()
6971 ret = -ENOMEM; in hugetlb_mfill_atomic_pte()
6985 ret = -EEXIST; in hugetlb_mfill_atomic_pte()
6993 ret = -ENOMEM; in hugetlb_mfill_atomic_pte()
7024 ret = -EFAULT; in hugetlb_mfill_atomic_pte()
7025 if (idx >= (i_size_read(mapping->host) >> huge_page_shift(h))) in hugetlb_mfill_atomic_pte()
7042 ret = -EIO; in hugetlb_mfill_atomic_pte()
7048 * registered, we firstly wr-protect a none pte which has no page cache in hugetlb_mfill_atomic_pte()
7051 ret = -EEXIST; in hugetlb_mfill_atomic_pte()
7061 * For either: (1) CONTINUE on a non-shared VMA, or (2) UFFDIO_COPY in hugetlb_mfill_atomic_pte()
7064 _dst_pte = make_huge_pte(dst_vma, &folio->page, in hugetlb_mfill_atomic_pte()
7082 /* No need to invalidate - it was non-present before */ in hugetlb_mfill_atomic_pte()
7109 struct mm_struct *mm = vma->vm_mm; in hugetlb_change_protection()
7135 i_mmap_lock_write(vma->vm_file->f_mapping); in hugetlb_change_protection()
7146 * Userfaultfd wr-protect requires pgtable in hugetlb_change_protection()
7147 * pre-allocations to install pte markers. in hugetlb_change_protection()
7151 pages = -ENOMEM; in hugetlb_change_protection()
7158 * When uffd-wp is enabled on the vma, unshare in hugetlb_change_protection()
7202 /* Safe to modify directly (non-present->none). */ in hugetlb_change_protection()
7210 pte = arch_make_huge_pte(pte, shift, vma->vm_flags); in hugetlb_change_protection()
7220 /* Safe to modify directly (none->non-present). */ in hugetlb_change_protection()
7245 i_mmap_unlock_write(vma->vm_file->f_mapping); in hugetlb_change_protection()
7249 return pages > 0 ? (pages << h->order) : pages; in hugetlb_change_protection()
7258 long chg = -1, add = -1, spool_resv, gbl_resv; in hugetlb_reserve_pages()
7288 * to reserve the full area even if read-only as mprotect() may be in hugetlb_reserve_pages()
7289 * called to make the mapping read-write. Assume !vma is a shm mapping in hugetlb_reserve_pages()
7291 if (!vma || vma->vm_flags & VM_MAYSHARE) { in hugetlb_reserve_pages()
7306 chg = to - from; in hugetlb_reserve_pages()
7319 if (vma && !(vma->vm_flags & VM_MAYSHARE) && h_cg) { in hugetlb_reserve_pages()
7347 * the reservation was consumed. Private mappings are per-VMA and in hugetlb_reserve_pages()
7353 if (!vma || vma->vm_flags & VM_MAYSHARE) { in hugetlb_reserve_pages()
7357 hugetlb_acct_memory(h, -gbl_reserve); in hugetlb_reserve_pages()
7371 * reference to h_cg->css. See comment below for detail. in hugetlb_reserve_pages()
7375 (chg - add) * pages_per_huge_page(h), h_cg); in hugetlb_reserve_pages()
7378 chg - add); in hugetlb_reserve_pages()
7379 hugetlb_acct_memory(h, -rsv_adjust); in hugetlb_reserve_pages()
7383 * h_cg->css. So we should release the reference held in hugetlb_reserve_pages()
7393 spool_resv = chg - gbl_reserve; in hugetlb_reserve_pages()
7395 /* put sub pool's reservation back, chg - gbl_reserve */ in hugetlb_reserve_pages()
7401 hugetlb_acct_memory(h, -gbl_resv); in hugetlb_reserve_pages()
7408 if (!vma || vma->vm_flags & VM_MAYSHARE) in hugetlb_reserve_pages()
7415 kref_put(&resv_map->refs, resv_map_release); in hugetlb_reserve_pages()
7445 spin_lock(&inode->i_lock); in hugetlb_unreserve_pages()
7446 inode->i_blocks -= (blocks_per_huge_page(h) * freed); in hugetlb_unreserve_pages()
7447 spin_unlock(&inode->i_lock); in hugetlb_unreserve_pages()
7453 * Note that !resv_map implies freed == 0. So (chg - freed) in hugetlb_unreserve_pages()
7456 gbl_reserve = hugepage_subpool_put_pages(spool, (chg - freed)); in hugetlb_unreserve_pages()
7457 hugetlb_acct_memory(h, -gbl_reserve); in hugetlb_unreserve_pages()
7467 unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) + in page_table_shareable()
7468 svma->vm_start; in page_table_shareable()
7473 unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED_MASK; in page_table_shareable()
7474 unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED_MASK; in page_table_shareable()
7485 !svma->vm_private_data) in page_table_shareable()
7503 if (!(vma->vm_flags & VM_MAYSHARE)) in want_pmd_share()
7505 if (!vma->vm_private_data) /* vma lock required for sharing */ in want_pmd_share()
7520 unsigned long v_start = ALIGN(vma->vm_start, PUD_SIZE), in adjust_range_if_pmd_sharing_possible()
7521 v_end = ALIGN_DOWN(vma->vm_end, PUD_SIZE); in adjust_range_if_pmd_sharing_possible()
7527 if (!(vma->vm_flags & VM_MAYSHARE) || !(v_end > v_start) || in adjust_range_if_pmd_sharing_possible()
7544 * pud has to be populated inside the same i_mmap_rwsem section - otherwise
7551 struct address_space *mapping = vma->vm_file->f_mapping; in huge_pmd_share()
7552 pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + in huge_pmd_share()
7553 vma->vm_pgoff; in huge_pmd_share()
7560 vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) { in huge_pmd_share()
7578 spin_lock(&mm->page_table_lock); in huge_pmd_share()
7586 spin_unlock(&mm->page_table_lock); in huge_pmd_share()
7609 i_mmap_assert_write_locked(vma->vm_file->f_mapping); in huge_pmd_unshare()
7683 * huge_pte_offset() - Walk the page table to resolve the hugepage
7708 /* must be pud huge, non-present or none */ in huge_pte_offset()
7715 /* must be pmd huge, non-present or none */ in huge_pte_offset()
7721 * page in a page table page mapping size. Used to skip non-present
7731 return P4D_SIZE - PUD_SIZE; in hugetlb_mask_last_page()
7733 return PUD_SIZE - PMD_SIZE; in hugetlb_mask_last_page()
7745 return PUD_SIZE - PMD_SIZE; in hugetlb_mask_last_page()
7753 * folio_isolate_hugetlb - try to isolate an allocated hugetlb folio
7758 * isolated/non-migratable, and moving it from the active list to the
7762 * it is already isolated/non-migratable.
7781 list_move_tail(&folio->lru, list); in folio_isolate_hugetlb()
7800 ret = -EBUSY; in get_hwpoison_hugetlb_folio()
7818 * folio_putback_hugetlb - unisolate a hugetlb folio
7821 * Putback/un-isolate the hugetlb folio that was previous isolated using
7822 * folio_isolate_hugetlb(): marking it non-isolated/migratable and putting it
7832 list_move_tail(&folio->lru, &(folio_hstate(folio))->hugepage_activelist); in folio_putback_hugetlb()
7842 set_page_owner_migrate_reason(&new_folio->page, reason); in move_hugetlb_state()
7850 * Also note that we have to transfer the per-node surplus state in move_hugetlb_state()
7852 * the per-node's. in move_hugetlb_state()
7863 * There is no need to transfer the per-node surplus state in move_hugetlb_state()
7869 if (h->surplus_huge_pages_node[old_nid]) { in move_hugetlb_state()
7870 h->surplus_huge_pages_node[old_nid]--; in move_hugetlb_state()
7871 h->surplus_huge_pages_node[new_nid]++; in move_hugetlb_state()
7883 list_move_tail(&new_folio->lru, &(folio_hstate(new_folio))->hugepage_activelist); in move_hugetlb_state()
7893 struct mm_struct *mm = vma->vm_mm; in hugetlb_unshare_pmds()
7899 if (!(vma->vm_flags & VM_MAYSHARE)) in hugetlb_unshare_pmds()
7914 i_mmap_lock_write(vma->vm_file->f_mapping); in hugetlb_unshare_pmds()
7924 i_mmap_unlock_write(vma->vm_file->f_mapping); in hugetlb_unshare_pmds()
7939 hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE), in hugetlb_unshare_all_pmds()
7940 ALIGN_DOWN(vma->vm_end, PUD_SIZE)); in hugetlb_unshare_all_pmds()
7944 * For hugetlb, mremap() is an odd edge case - while the VMA copying is