Lines Matching +full:lock +full:- +full:offset
1 // SPDX-License-Identifier: GPL-2.0-only
22 #include <linux/blk-cgroup.h>
31 #include <linux/backing-dev.h>
66 /* protected with swap_lock. reading in vm_swap_full() doesn't need lock */
68 static int least_priority = -1;
76 static const char Bad_offset[] = "Bad swap offset entry ";
77 static const char Unused_offset[] = "Unused swap offset entry ";
91 * This uses its own lock instead of swap_lock because when a
92 * swap_info_struct changes between not-full/full, it needs to
93 * add/remove itself to/from this list, but the swap_info_struct->lock
95 * before any swap_info_struct->lock.
135 unsigned long offset, unsigned long flags) in __try_to_reclaim_swap() argument
137 swp_entry_t entry = swp_entry(si->type, offset); in __try_to_reclaim_swap()
141 folio = filemap_get_folio(swap_address_space(entry), offset); in __try_to_reclaim_swap()
146 * called by vmscan.c at reclaiming folios. So we hold a folio lock in __try_to_reclaim_swap()
164 struct rb_node *rb = rb_first(&sis->swap_extent_root); in first_se()
170 struct rb_node *rb = rb_next(&se->rb_node); in next_se()
176 * to allow the swap device to optimize its wear-levelling.
187 start_block = (se->start_block + 1) << (PAGE_SHIFT - 9); in discard_swap()
188 nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9); in discard_swap()
190 err = blkdev_issue_discard(si->bdev, start_block, in discard_swap()
198 start_block = se->start_block << (PAGE_SHIFT - 9); in discard_swap()
199 nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9); in discard_swap()
201 err = blkdev_issue_discard(si->bdev, start_block, in discard_swap()
208 return err; /* That will often be -EOPNOTSUPP */ in discard_swap()
212 offset_to_swap_extent(struct swap_info_struct *sis, unsigned long offset) in offset_to_swap_extent() argument
217 rb = sis->swap_extent_root.rb_node; in offset_to_swap_extent()
220 if (offset < se->start_page) in offset_to_swap_extent()
221 rb = rb->rb_left; in offset_to_swap_extent()
222 else if (offset >= se->start_page + se->nr_pages) in offset_to_swap_extent()
223 rb = rb->rb_right; in offset_to_swap_extent()
233 struct swap_info_struct *sis = swp_swap_info(folio->swap); in swap_folio_sector()
236 pgoff_t offset; in swap_folio_sector() local
238 offset = swp_offset(folio->swap); in swap_folio_sector()
239 se = offset_to_swap_extent(sis, offset); in swap_folio_sector()
240 sector = se->start_block + (offset - se->start_page); in swap_folio_sector()
241 return sector << (PAGE_SHIFT - 9); in swap_folio_sector()
246 * to allow the swap device to optimize its wear-levelling.
254 pgoff_t offset = start_page - se->start_page; in discard_swap_cluster() local
255 sector_t start_block = se->start_block + offset; in discard_swap_cluster()
256 sector_t nr_blocks = se->nr_pages - offset; in discard_swap_cluster()
261 nr_pages -= nr_blocks; in discard_swap_cluster()
263 start_block <<= PAGE_SHIFT - 9; in discard_swap_cluster()
264 nr_blocks <<= PAGE_SHIFT - 9; in discard_swap_cluster()
265 if (blkdev_issue_discard(si->bdev, start_block, in discard_swap_cluster()
291 info->flags = flag; in cluster_set_flag()
296 return info->data; in cluster_count()
302 info->data = c; in cluster_set_count()
308 info->flags = f; in cluster_set_count_flag()
309 info->data = c; in cluster_set_count_flag()
314 return info->data; in cluster_next()
320 info->data = n; in cluster_set_next()
326 info->flags = f; in cluster_set_next_flag()
327 info->data = n; in cluster_set_next_flag()
332 return info->flags & CLUSTER_FLAG_FREE; in cluster_is_free()
337 return info->flags & CLUSTER_FLAG_NEXT_NULL; in cluster_is_null()
342 info->flags = CLUSTER_FLAG_NEXT_NULL; in cluster_set_null()
343 info->data = 0; in cluster_set_null()
349 return info->flags & CLUSTER_FLAG_HUGE; in cluster_is_huge()
355 info->flags &= ~CLUSTER_FLAG_HUGE; in cluster_clear_huge()
359 unsigned long offset) in lock_cluster() argument
363 ci = si->cluster_info; in lock_cluster()
365 ci += offset / SWAPFILE_CLUSTER; in lock_cluster()
366 spin_lock(&ci->lock); in lock_cluster()
374 spin_unlock(&ci->lock); in unlock_cluster()
379 * swap_cluster_info if SSD-style cluster-based locking is in place.
382 struct swap_info_struct *si, unsigned long offset) in lock_cluster_or_swap_info() argument
386 /* Try to use fine-grained SSD-style locking if available: */ in lock_cluster_or_swap_info()
387 ci = lock_cluster(si, offset); in lock_cluster_or_swap_info()
390 spin_lock(&si->lock); in lock_cluster_or_swap_info()
401 spin_unlock(&si->lock); in unlock_cluster_or_swap_info()
406 return cluster_is_null(&list->head); in cluster_list_empty()
411 return cluster_next(&list->head); in cluster_list_first()
416 cluster_set_null(&list->head); in cluster_list_init()
417 cluster_set_null(&list->tail); in cluster_list_init()
425 cluster_set_next_flag(&list->head, idx, 0); in cluster_list_add_tail()
426 cluster_set_next_flag(&list->tail, idx, 0); in cluster_list_add_tail()
429 unsigned int tail = cluster_next(&list->tail); in cluster_list_add_tail()
432 * Nested cluster lock, but both cluster locks are in cluster_list_add_tail()
433 * only acquired when we held swap_info_struct->lock in cluster_list_add_tail()
436 spin_lock_nested(&ci_tail->lock, SINGLE_DEPTH_NESTING); in cluster_list_add_tail()
438 spin_unlock(&ci_tail->lock); in cluster_list_add_tail()
439 cluster_set_next_flag(&list->tail, idx, 0); in cluster_list_add_tail()
448 idx = cluster_next(&list->head); in cluster_list_del_first()
449 if (cluster_next(&list->tail) == idx) { in cluster_list_del_first()
450 cluster_set_null(&list->head); in cluster_list_del_first()
451 cluster_set_null(&list->tail); in cluster_list_del_first()
453 cluster_set_next_flag(&list->head, in cluster_list_del_first()
465 * si->swap_map directly. To make sure the discarding cluster isn't in swap_cluster_schedule_discard()
469 memset(si->swap_map + idx * SWAPFILE_CLUSTER, in swap_cluster_schedule_discard()
472 cluster_list_add_tail(&si->discard_clusters, si->cluster_info, idx); in swap_cluster_schedule_discard()
474 schedule_work(&si->discard_work); in swap_cluster_schedule_discard()
479 struct swap_cluster_info *ci = si->cluster_info; in __free_cluster()
482 cluster_list_add_tail(&si->free_clusters, ci, idx); in __free_cluster()
487 * will be added to free cluster list. caller should hold si->lock.
494 info = si->cluster_info; in swap_do_scheduled_discard()
496 while (!cluster_list_empty(&si->discard_clusters)) { in swap_do_scheduled_discard()
497 idx = cluster_list_del_first(&si->discard_clusters, info); in swap_do_scheduled_discard()
498 spin_unlock(&si->lock); in swap_do_scheduled_discard()
503 spin_lock(&si->lock); in swap_do_scheduled_discard()
506 memset(si->swap_map + idx * SWAPFILE_CLUSTER, in swap_do_scheduled_discard()
518 spin_lock(&si->lock); in swap_discard_work()
520 spin_unlock(&si->lock); in swap_discard_work()
528 complete(&si->comp); in swap_users_ref_free()
533 struct swap_cluster_info *ci = si->cluster_info; in alloc_cluster()
535 VM_BUG_ON(cluster_list_first(&si->free_clusters) != idx); in alloc_cluster()
536 cluster_list_del_first(&si->free_clusters, ci); in alloc_cluster()
542 struct swap_cluster_info *ci = si->cluster_info + idx; in free_cluster()
550 if ((si->flags & (SWP_WRITEOK | SWP_PAGE_DISCARD)) == in free_cluster()
593 cluster_count(&cluster_info[idx]) - 1); in dec_cluster_info_page()
605 unsigned long offset) in scan_swap_map_ssd_cluster_conflict() argument
610 offset /= SWAPFILE_CLUSTER; in scan_swap_map_ssd_cluster_conflict()
611 conflict = !cluster_list_empty(&si->free_clusters) && in scan_swap_map_ssd_cluster_conflict()
612 offset != cluster_list_first(&si->free_clusters) && in scan_swap_map_ssd_cluster_conflict()
613 cluster_is_free(&si->cluster_info[offset]); in scan_swap_map_ssd_cluster_conflict()
618 percpu_cluster = this_cpu_ptr(si->percpu_cluster); in scan_swap_map_ssd_cluster_conflict()
619 cluster_set_null(&percpu_cluster->index); in scan_swap_map_ssd_cluster_conflict()
628 unsigned long *offset, unsigned long *scan_base) in scan_swap_map_try_ssd_cluster() argument
635 cluster = this_cpu_ptr(si->percpu_cluster); in scan_swap_map_try_ssd_cluster()
636 if (cluster_is_null(&cluster->index)) { in scan_swap_map_try_ssd_cluster()
637 if (!cluster_list_empty(&si->free_clusters)) { in scan_swap_map_try_ssd_cluster()
638 cluster->index = si->free_clusters.head; in scan_swap_map_try_ssd_cluster()
639 cluster->next = cluster_next(&cluster->index) * in scan_swap_map_try_ssd_cluster()
641 } else if (!cluster_list_empty(&si->discard_clusters)) { in scan_swap_map_try_ssd_cluster()
645 * reread cluster_next_cpu since we dropped si->lock in scan_swap_map_try_ssd_cluster()
648 *scan_base = this_cpu_read(*si->cluster_next_cpu); in scan_swap_map_try_ssd_cluster()
649 *offset = *scan_base; in scan_swap_map_try_ssd_cluster()
659 tmp = cluster->next; in scan_swap_map_try_ssd_cluster()
660 max = min_t(unsigned long, si->max, in scan_swap_map_try_ssd_cluster()
661 (cluster_next(&cluster->index) + 1) * SWAPFILE_CLUSTER); in scan_swap_map_try_ssd_cluster()
665 if (!si->swap_map[tmp]) in scan_swap_map_try_ssd_cluster()
672 cluster_set_null(&cluster->index); in scan_swap_map_try_ssd_cluster()
675 cluster->next = tmp + 1; in scan_swap_map_try_ssd_cluster()
676 *offset = tmp; in scan_swap_map_try_ssd_cluster()
685 assert_spin_locked(&p->lock); in __del_from_avail_list()
687 plist_del(&p->avail_lists[nid], &swap_avail_heads[nid]); in __del_from_avail_list()
697 static void swap_range_alloc(struct swap_info_struct *si, unsigned long offset, in swap_range_alloc() argument
700 unsigned int end = offset + nr_entries - 1; in swap_range_alloc()
702 if (offset == si->lowest_bit) in swap_range_alloc()
703 si->lowest_bit += nr_entries; in swap_range_alloc()
704 if (end == si->highest_bit) in swap_range_alloc()
705 WRITE_ONCE(si->highest_bit, si->highest_bit - nr_entries); in swap_range_alloc()
706 WRITE_ONCE(si->inuse_pages, si->inuse_pages + nr_entries); in swap_range_alloc()
707 if (si->inuse_pages == si->pages) { in swap_range_alloc()
708 si->lowest_bit = si->max; in swap_range_alloc()
709 si->highest_bit = 0; in swap_range_alloc()
720 plist_add(&p->avail_lists[nid], &swap_avail_heads[nid]); in add_to_avail_list()
724 static void swap_range_free(struct swap_info_struct *si, unsigned long offset, in swap_range_free() argument
727 unsigned long begin = offset; in swap_range_free()
728 unsigned long end = offset + nr_entries - 1; in swap_range_free()
731 if (offset < si->lowest_bit) in swap_range_free()
732 si->lowest_bit = offset; in swap_range_free()
733 if (end > si->highest_bit) { in swap_range_free()
734 bool was_full = !si->highest_bit; in swap_range_free()
736 WRITE_ONCE(si->highest_bit, end); in swap_range_free()
737 if (was_full && (si->flags & SWP_WRITEOK)) in swap_range_free()
741 WRITE_ONCE(si->inuse_pages, si->inuse_pages - nr_entries); in swap_range_free()
742 if (si->flags & SWP_BLKDEV) in swap_range_free()
744 si->bdev->bd_disk->fops->swap_slot_free_notify; in swap_range_free()
747 while (offset <= end) { in swap_range_free()
748 arch_swap_invalidate_page(si->type, offset); in swap_range_free()
749 zswap_invalidate(si->type, offset); in swap_range_free()
751 swap_slot_free_notify(si->bdev, offset); in swap_range_free()
752 offset++; in swap_range_free()
754 clear_shadow_from_swap_cache(si->type, begin, end); in swap_range_free()
761 if (!(si->flags & SWP_SOLIDSTATE)) { in set_cluster_next()
762 si->cluster_next = next; in set_cluster_next()
766 prev = this_cpu_read(*si->cluster_next_cpu); in set_cluster_next()
769 * another trunk randomly to avoid lock contention on swap in set_cluster_next()
775 if (si->highest_bit <= si->lowest_bit) in set_cluster_next()
777 next = get_random_u32_inclusive(si->lowest_bit, si->highest_bit); in set_cluster_next()
779 next = max_t(unsigned int, next, si->lowest_bit); in set_cluster_next()
781 this_cpu_write(*si->cluster_next_cpu, next); in set_cluster_next()
785 unsigned long offset) in swap_offset_available_and_locked() argument
787 if (data_race(!si->swap_map[offset])) { in swap_offset_available_and_locked()
788 spin_lock(&si->lock); in swap_offset_available_and_locked()
792 if (vm_swap_full() && READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) { in swap_offset_available_and_locked()
793 spin_lock(&si->lock); in swap_offset_available_and_locked()
805 unsigned long offset; in scan_swap_map_slots() local
815 * way, however, we resort to first-free allocation, starting in scan_swap_map_slots()
818 * overall disk seek times between swap pages. -- sct in scan_swap_map_slots()
819 * But we do now try to find an empty cluster. -Andrea in scan_swap_map_slots()
823 si->flags += SWP_SCANNING; in scan_swap_map_slots()
825 * Use percpu scan base for SSD to reduce lock contention on in scan_swap_map_slots()
829 if (si->flags & SWP_SOLIDSTATE) in scan_swap_map_slots()
830 scan_base = this_cpu_read(*si->cluster_next_cpu); in scan_swap_map_slots()
832 scan_base = si->cluster_next; in scan_swap_map_slots()
833 offset = scan_base; in scan_swap_map_slots()
836 if (si->cluster_info) { in scan_swap_map_slots()
837 if (!scan_swap_map_try_ssd_cluster(si, &offset, &scan_base)) in scan_swap_map_slots()
839 } else if (unlikely(!si->cluster_nr--)) { in scan_swap_map_slots()
840 if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) { in scan_swap_map_slots()
841 si->cluster_nr = SWAPFILE_CLUSTER - 1; in scan_swap_map_slots()
845 spin_unlock(&si->lock); in scan_swap_map_slots()
850 * If seek is cheap, that is the SWP_SOLIDSTATE si->cluster_info in scan_swap_map_slots()
853 scan_base = offset = si->lowest_bit; in scan_swap_map_slots()
854 last_in_cluster = offset + SWAPFILE_CLUSTER - 1; in scan_swap_map_slots()
857 for (; last_in_cluster <= si->highest_bit; offset++) { in scan_swap_map_slots()
858 if (si->swap_map[offset]) in scan_swap_map_slots()
859 last_in_cluster = offset + SWAPFILE_CLUSTER; in scan_swap_map_slots()
860 else if (offset == last_in_cluster) { in scan_swap_map_slots()
861 spin_lock(&si->lock); in scan_swap_map_slots()
862 offset -= SWAPFILE_CLUSTER - 1; in scan_swap_map_slots()
863 si->cluster_next = offset; in scan_swap_map_slots()
864 si->cluster_nr = SWAPFILE_CLUSTER - 1; in scan_swap_map_slots()
867 if (unlikely(--latency_ration < 0)) { in scan_swap_map_slots()
873 offset = scan_base; in scan_swap_map_slots()
874 spin_lock(&si->lock); in scan_swap_map_slots()
875 si->cluster_nr = SWAPFILE_CLUSTER - 1; in scan_swap_map_slots()
879 if (si->cluster_info) { in scan_swap_map_slots()
880 while (scan_swap_map_ssd_cluster_conflict(si, offset)) { in scan_swap_map_slots()
884 if (!scan_swap_map_try_ssd_cluster(si, &offset, in scan_swap_map_slots()
889 if (!(si->flags & SWP_WRITEOK)) in scan_swap_map_slots()
891 if (!si->highest_bit) in scan_swap_map_slots()
893 if (offset > si->highest_bit) in scan_swap_map_slots()
894 scan_base = offset = si->lowest_bit; in scan_swap_map_slots()
896 ci = lock_cluster(si, offset); in scan_swap_map_slots()
897 /* reuse swap entry of cache-only swap if not busy. */ in scan_swap_map_slots()
898 if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) { in scan_swap_map_slots()
901 spin_unlock(&si->lock); in scan_swap_map_slots()
902 swap_was_freed = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY); in scan_swap_map_slots()
903 spin_lock(&si->lock); in scan_swap_map_slots()
910 if (si->swap_map[offset]) { in scan_swap_map_slots()
917 WRITE_ONCE(si->swap_map[offset], usage); in scan_swap_map_slots()
918 inc_cluster_info_page(si, si->cluster_info, offset); in scan_swap_map_slots()
921 swap_range_alloc(si, offset, 1); in scan_swap_map_slots()
922 slots[n_ret++] = swp_entry(si->type, offset); in scan_swap_map_slots()
925 if ((n_ret == nr) || (offset >= si->highest_bit)) in scan_swap_map_slots()
931 if (unlikely(--latency_ration < 0)) { in scan_swap_map_slots()
934 spin_unlock(&si->lock); in scan_swap_map_slots()
936 spin_lock(&si->lock); in scan_swap_map_slots()
941 if (si->cluster_info) { in scan_swap_map_slots()
942 if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base)) in scan_swap_map_slots()
944 } else if (si->cluster_nr && !si->swap_map[++offset]) { in scan_swap_map_slots()
945 /* non-ssd case, still more slots in cluster? */ in scan_swap_map_slots()
946 --si->cluster_nr; in scan_swap_map_slots()
952 * try to scan a little more quickly with lock held unless we in scan_swap_map_slots()
958 if (offset < scan_base) in scan_swap_map_slots()
961 scan_limit = si->highest_bit; in scan_swap_map_slots()
962 for (; offset <= scan_limit && --latency_ration > 0; in scan_swap_map_slots()
963 offset++) { in scan_swap_map_slots()
964 if (!si->swap_map[offset]) in scan_swap_map_slots()
970 set_cluster_next(si, offset + 1); in scan_swap_map_slots()
971 si->flags -= SWP_SCANNING; in scan_swap_map_slots()
975 spin_unlock(&si->lock); in scan_swap_map_slots()
976 while (++offset <= READ_ONCE(si->highest_bit)) { in scan_swap_map_slots()
977 if (unlikely(--latency_ration < 0)) { in scan_swap_map_slots()
982 if (swap_offset_available_and_locked(si, offset)) in scan_swap_map_slots()
985 offset = si->lowest_bit; in scan_swap_map_slots()
986 while (offset < scan_base) { in scan_swap_map_slots()
987 if (unlikely(--latency_ration < 0)) { in scan_swap_map_slots()
992 if (swap_offset_available_and_locked(si, offset)) in scan_swap_map_slots()
994 offset++; in scan_swap_map_slots()
996 spin_lock(&si->lock); in scan_swap_map_slots()
999 si->flags -= SWP_SCANNING; in scan_swap_map_slots()
1007 unsigned long offset; in swap_alloc_cluster() local
1018 if (cluster_list_empty(&si->free_clusters)) in swap_alloc_cluster()
1021 idx = cluster_list_first(&si->free_clusters); in swap_alloc_cluster()
1022 offset = idx * SWAPFILE_CLUSTER; in swap_alloc_cluster()
1023 ci = lock_cluster(si, offset); in swap_alloc_cluster()
1027 memset(si->swap_map + offset, SWAP_HAS_CACHE, SWAPFILE_CLUSTER); in swap_alloc_cluster()
1029 swap_range_alloc(si, offset, SWAPFILE_CLUSTER); in swap_alloc_cluster()
1030 *slot = swp_entry(si->type, offset); in swap_alloc_cluster()
1037 unsigned long offset = idx * SWAPFILE_CLUSTER; in swap_free_cluster() local
1040 ci = lock_cluster(si, offset); in swap_free_cluster()
1041 memset(si->swap_map + offset, 0, SWAPFILE_CLUSTER); in swap_free_cluster()
1045 swap_range_free(si, offset, SWAPFILE_CLUSTER); in swap_free_cluster()
1074 /* requeue si to after same-priority siblings */ in get_swap_pages()
1075 plist_requeue(&si->avail_lists[node], &swap_avail_heads[node]); in get_swap_pages()
1077 spin_lock(&si->lock); in get_swap_pages()
1078 if (!si->highest_bit || !(si->flags & SWP_WRITEOK)) { in get_swap_pages()
1080 if (plist_node_empty(&si->avail_lists[node])) { in get_swap_pages()
1081 spin_unlock(&si->lock); in get_swap_pages()
1084 WARN(!si->highest_bit, in get_swap_pages()
1086 si->type); in get_swap_pages()
1087 WARN(!(si->flags & SWP_WRITEOK), in get_swap_pages()
1089 si->type); in get_swap_pages()
1091 spin_unlock(&si->lock); in get_swap_pages()
1095 if (si->flags & SWP_BLKDEV) in get_swap_pages()
1100 spin_unlock(&si->lock); in get_swap_pages()
1109 * and since scan_swap_map_slots() can drop the si->lock, in get_swap_pages()
1113 * si->lock. Since we dropped the swap_avail_lock, the in get_swap_pages()
1118 if (plist_node_empty(&next->avail_lists[node])) in get_swap_pages()
1126 atomic_long_add((long)(n_goal - n_ret) * size, in get_swap_pages()
1135 unsigned long offset; in _swap_info_get() local
1142 if (data_race(!(p->flags & SWP_USED))) in _swap_info_get()
1144 offset = swp_offset(entry); in _swap_info_get()
1145 if (offset >= p->max) in _swap_info_get()
1147 if (data_race(!p->swap_map[swp_offset(entry)])) in _swap_info_get()
1175 spin_unlock(&q->lock); in swap_info_get_cont()
1177 spin_lock(&p->lock); in swap_info_get_cont()
1183 unsigned long offset, in __swap_entry_free_locked() argument
1189 count = p->swap_map[offset]; in __swap_entry_free_locked()
1205 if (swap_count_continued(p, offset, count)) in __swap_entry_free_locked()
1210 count--; in __swap_entry_free_locked()
1215 WRITE_ONCE(p->swap_map[offset], usage); in __swap_entry_free_locked()
1217 WRITE_ONCE(p->swap_map[offset], SWAP_HAS_CACHE); in __swap_entry_free_locked()
1225 * table lock is held, etc., the swap entry may become invalid because
1260 unsigned long offset; in get_swap_device() local
1267 if (!percpu_ref_tryget_live(&si->users)) in get_swap_device()
1270 * Guarantee the si->users are checked before accessing other in get_swap_device()
1277 offset = swp_offset(entry); in get_swap_device()
1278 if (offset >= si->max) in get_swap_device()
1288 percpu_ref_put(&si->users); in get_swap_device()
1296 unsigned long offset = swp_offset(entry); in __swap_entry_free() local
1299 ci = lock_cluster_or_swap_info(p, offset); in __swap_entry_free()
1300 usage = __swap_entry_free_locked(p, offset, 1); in __swap_entry_free()
1311 unsigned long offset = swp_offset(entry); in swap_entry_free() local
1314 ci = lock_cluster(p, offset); in swap_entry_free()
1315 count = p->swap_map[offset]; in swap_entry_free()
1317 p->swap_map[offset] = 0; in swap_entry_free()
1318 dec_cluster_info_page(p, p->cluster_info, offset); in swap_entry_free()
1322 swap_range_free(p, offset, 1); in swap_entry_free()
1343 unsigned long offset = swp_offset(entry); in put_swap_folio() local
1344 unsigned long idx = offset / SWAPFILE_CLUSTER; in put_swap_folio()
1356 ci = lock_cluster_or_swap_info(si, offset); in put_swap_folio()
1359 map = si->swap_map + offset; in put_swap_folio()
1369 spin_lock(&si->lock); in put_swap_folio()
1372 spin_unlock(&si->lock); in put_swap_folio()
1377 if (!__swap_entry_free_locked(si, offset + i, SWAP_HAS_CACHE)) { in put_swap_folio()
1380 if (i == size - 1) in put_swap_folio()
1382 lock_cluster_or_swap_info(si, offset); in put_swap_folio()
1393 unsigned long offset = swp_offset(entry); in split_swap_cluster() local
1397 return -EBUSY; in split_swap_cluster()
1398 ci = lock_cluster(si, offset); in split_swap_cluster()
1409 return (int)swp_type(*e1) - (int)swp_type(*e2); in swp_entry_cmp()
1424 * Sort swap entries by swap device, so each lock is only taken once. in swapcache_free_entries()
1437 spin_unlock(&p->lock); in swapcache_free_entries()
1443 pgoff_t offset = swp_offset(entry); in __swap_count() local
1445 return swap_count(si->swap_map[offset]); in __swap_count()
1455 pgoff_t offset = swp_offset(entry); in swap_swapcount() local
1459 ci = lock_cluster_or_swap_info(si, offset); in swap_swapcount()
1460 count = swap_count(si->swap_map[offset]); in swap_swapcount()
1475 pgoff_t offset; in swp_swapcount() local
1482 offset = swp_offset(entry); in swp_swapcount()
1484 ci = lock_cluster_or_swap_info(p, offset); in swp_swapcount()
1486 count = swap_count(p->swap_map[offset]); in swp_swapcount()
1493 page = vmalloc_to_page(p->swap_map + offset); in swp_swapcount()
1494 offset &= ~PAGE_MASK; in swp_swapcount()
1500 tmp_count = map[offset]; in swp_swapcount()
1515 unsigned char *map = si->swap_map; in swap_page_trans_huge_swapped()
1517 unsigned long offset = round_down(roffset, SWAPFILE_CLUSTER); in swap_page_trans_huge_swapped() local
1521 ci = lock_cluster_or_swap_info(si, offset); in swap_page_trans_huge_swapped()
1528 if (swap_count(map[offset + i])) { in swap_page_trans_huge_swapped()
1540 swp_entry_t entry = folio->swap; in folio_swapped()
1553 * folio_free_swap() - Free the swap space used for this folio.
1575 * - most probably a call from __try_to_reclaim_swap() while in folio_free_swap()
1577 * but conceivably even a call from memory reclaim - will free in folio_free_swap()
1629 spin_lock(&si->lock); in get_swap_page_of_type()
1630 if ((si->flags & SWP_WRITEOK) && scan_swap_map_slots(si, 1, 1, &entry)) in get_swap_page_of_type()
1632 spin_unlock(&si->lock); in get_swap_page_of_type()
1640 * @offset - number of the PAGE_SIZE-sized block of the device, starting
1645 int swap_type_of(dev_t device, sector_t offset) in swap_type_of() argument
1650 return -1; in swap_type_of()
1656 if (!(sis->flags & SWP_WRITEOK)) in swap_type_of()
1659 if (device == sis->bdev->bd_dev) { in swap_type_of()
1662 if (se->start_block == offset) { in swap_type_of()
1669 return -ENODEV; in swap_type_of()
1680 if (!(sis->flags & SWP_WRITEOK)) in find_first_swap()
1682 *device = sis->bdev->bd_dev; in find_first_swap()
1687 return -ENODEV; in find_first_swap()
1691 * Get the (PAGE_SIZE) block corresponding to given offset on the swapdev
1694 sector_t swapdev_block(int type, pgoff_t offset) in swapdev_block() argument
1699 if (!si || !(si->flags & SWP_WRITEOK)) in swapdev_block()
1701 se = offset_to_swap_extent(si, offset); in swapdev_block()
1702 return se->start_block + (offset - se->start_page); in swapdev_block()
1719 spin_lock(&sis->lock); in count_swap_pages()
1720 if (sis->flags & SWP_WRITEOK) { in count_swap_pages()
1721 n = sis->pages; in count_swap_pages()
1723 n -= sis->inuse_pages; in count_swap_pages()
1725 spin_unlock(&sis->lock); in count_swap_pages()
1739 * just let do_wp_page work it out if a write is requested later - to
1755 return -ENOMEM; in unuse_pte()
1756 else if (unlikely(folio == ERR_PTR(-EHWPOISON))) { in unuse_pte()
1765 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); in unuse_pte()
1777 dec_mm_counter(vma->vm_mm, MM_SWAPENTS); in unuse_pte()
1795 dec_mm_counter(vma->vm_mm, MM_SWAPENTS); in unuse_pte()
1796 inc_mm_counter(vma->vm_mm, MM_ANONPAGES); in unuse_pte()
1815 new_pte = pte_mkold(mk_pte(page, vma->vm_page_prot)); in unuse_pte()
1821 set_pte_at(vma->vm_mm, addr, pte, new_pte); in unuse_pte()
1843 unsigned long offset; in unuse_pte_range() local
1864 offset = swp_offset(entry); in unuse_pte_range()
1884 swp_count = READ_ONCE(si->swap_map[offset]); in unuse_pte_range()
1887 return -ENOMEM; in unuse_pte_range()
1974 addr = vma->vm_start; in unuse_vma()
1975 end = vma->vm_end; in unuse_vma()
1977 pgd = pgd_offset(vma->vm_mm, addr); in unuse_vma()
1997 if (vma->anon_vma) { in unuse_mm()
2026 for (i = prev + 1; i < si->max; i++) { in find_next_to_unuse()
2027 count = READ_ONCE(si->swap_map[i]); in find_next_to_unuse()
2034 if (i == si->max) in find_next_to_unuse()
2051 if (!READ_ONCE(si->inuse_pages)) in try_to_unuse()
2064 while (READ_ONCE(si->inuse_pages) && in try_to_unuse()
2066 (p = p->next) != &init_mm.mmlist) { in try_to_unuse()
2092 while (READ_ONCE(si->inuse_pages) && in try_to_unuse()
2103 * swap cache just before we acquired the page lock. The folio in try_to_unuse()
2125 * and robust (though cpu-intensive) just to keep retrying. in try_to_unuse()
2127 if (READ_ONCE(si->inuse_pages)) { in try_to_unuse()
2130 return -EINTR; in try_to_unuse()
2140 * added to the mmlist just after page_duplicate - before would be racy.
2148 if (swap_info[type]->inuse_pages) in drain_mmlist()
2161 while (!RB_EMPTY_ROOT(&sis->swap_extent_root)) { in destroy_swap_extents()
2162 struct rb_node *rb = sis->swap_extent_root.rb_node; in destroy_swap_extents()
2165 rb_erase(rb, &sis->swap_extent_root); in destroy_swap_extents()
2169 if (sis->flags & SWP_ACTIVATED) { in destroy_swap_extents()
2170 struct file *swap_file = sis->swap_file; in destroy_swap_extents()
2171 struct address_space *mapping = swap_file->f_mapping; in destroy_swap_extents()
2173 sis->flags &= ~SWP_ACTIVATED; in destroy_swap_extents()
2174 if (mapping->a_ops->swap_deactivate) in destroy_swap_extents()
2175 mapping->a_ops->swap_deactivate(swap_file); in destroy_swap_extents()
2189 struct rb_node **link = &sis->swap_extent_root.rb_node, *parent = NULL; in add_swap_extent()
2199 link = &parent->rb_right; in add_swap_extent()
2204 BUG_ON(se->start_page + se->nr_pages != start_page); in add_swap_extent()
2205 if (se->start_block + se->nr_pages == start_block) { in add_swap_extent()
2207 se->nr_pages += nr_pages; in add_swap_extent()
2215 return -ENOMEM; in add_swap_extent()
2216 new_se->start_page = start_page; in add_swap_extent()
2217 new_se->nr_pages = nr_pages; in add_swap_extent()
2218 new_se->start_block = start_block; in add_swap_extent()
2220 rb_link_node(&new_se->rb_node, parent, link); in add_swap_extent()
2221 rb_insert_color(&new_se->rb_node, &sis->swap_extent_root); in add_swap_extent()
2243 * requirements, they are simply tossed out - we will never use those blocks
2250 * Typically it is in the 1-4 megabyte range. So we can have hundreds of
2251 * extents in the rbtree. - akpm.
2255 struct file *swap_file = sis->swap_file; in setup_swap_extents()
2256 struct address_space *mapping = swap_file->f_mapping; in setup_swap_extents()
2257 struct inode *inode = mapping->host; in setup_swap_extents()
2260 if (S_ISBLK(inode->i_mode)) { in setup_swap_extents()
2261 ret = add_swap_extent(sis, 0, sis->max, 0); in setup_swap_extents()
2262 *span = sis->pages; in setup_swap_extents()
2266 if (mapping->a_ops->swap_activate) { in setup_swap_extents()
2267 ret = mapping->a_ops->swap_activate(sis, swap_file, span); in setup_swap_extents()
2270 sis->flags |= SWP_ACTIVATED; in setup_swap_extents()
2271 if ((sis->flags & SWP_FS_OPS) && in setup_swap_extents()
2274 return -ENOMEM; in setup_swap_extents()
2286 if (p->bdev) in swap_node()
2287 bdev = p->bdev; in swap_node()
2289 bdev = p->swap_file->f_inode->i_sb->s_bdev; in swap_node()
2291 return bdev ? bdev->bd_disk->node_id : NUMA_NO_NODE; in swap_node()
2301 p->prio = prio; in setup_swap_info()
2303 p->prio = --least_priority; in setup_swap_info()
2306 * low-to-high, while swap ordering is high-to-low in setup_swap_info()
2308 p->list.prio = -p->prio; in setup_swap_info()
2310 if (p->prio >= 0) in setup_swap_info()
2311 p->avail_lists[i].prio = -p->prio; in setup_swap_info()
2314 p->avail_lists[i].prio = 1; in setup_swap_info()
2316 p->avail_lists[i].prio = -p->prio; in setup_swap_info()
2319 p->swap_map = swap_map; in setup_swap_info()
2320 p->cluster_info = cluster_info; in setup_swap_info()
2325 p->flags |= SWP_WRITEOK; in _enable_swap_info()
2326 atomic_long_add(p->pages, &nr_swap_pages); in _enable_swap_info()
2327 total_swap_pages += p->pages; in _enable_swap_info()
2333 * which on removal of any swap_info_struct with an auto-assigned in _enable_swap_info()
2334 * (i.e. negative) priority increments the auto-assigned priority in _enable_swap_info()
2335 * of any lower-priority swap_info_structs. in _enable_swap_info()
2340 plist_add(&p->list, &swap_active_head); in _enable_swap_info()
2343 if (p->highest_bit) in _enable_swap_info()
2351 zswap_swapon(p->type); in enable_swap_info()
2354 spin_lock(&p->lock); in enable_swap_info()
2356 spin_unlock(&p->lock); in enable_swap_info()
2361 percpu_ref_resurrect(&p->users); in enable_swap_info()
2363 spin_lock(&p->lock); in enable_swap_info()
2365 spin_unlock(&p->lock); in enable_swap_info()
2372 spin_lock(&p->lock); in reinsert_swap_info()
2373 setup_swap_info(p, p->prio, p->swap_map, p->cluster_info); in reinsert_swap_info()
2375 spin_unlock(&p->lock); in reinsert_swap_info()
2403 return -EPERM; in SYSCALL_DEFINE1()
2405 BUG_ON(!current->mm); in SYSCALL_DEFINE1()
2416 mapping = victim->f_mapping; in SYSCALL_DEFINE1()
2419 if (p->flags & SWP_WRITEOK) { in SYSCALL_DEFINE1()
2420 if (p->swap_file->f_mapping == mapping) { in SYSCALL_DEFINE1()
2427 err = -EINVAL; in SYSCALL_DEFINE1()
2431 if (!security_vm_enough_memory_mm(current->mm, p->pages)) in SYSCALL_DEFINE1()
2432 vm_unacct_memory(p->pages); in SYSCALL_DEFINE1()
2434 err = -ENOMEM; in SYSCALL_DEFINE1()
2438 spin_lock(&p->lock); in SYSCALL_DEFINE1()
2440 if (p->prio < 0) { in SYSCALL_DEFINE1()
2445 si->prio++; in SYSCALL_DEFINE1()
2446 si->list.prio--; in SYSCALL_DEFINE1()
2448 if (si->avail_lists[nid].prio != 1) in SYSCALL_DEFINE1()
2449 si->avail_lists[nid].prio--; in SYSCALL_DEFINE1()
2454 plist_del(&p->list, &swap_active_head); in SYSCALL_DEFINE1()
2455 atomic_long_sub(p->pages, &nr_swap_pages); in SYSCALL_DEFINE1()
2456 total_swap_pages -= p->pages; in SYSCALL_DEFINE1()
2457 p->flags &= ~SWP_WRITEOK; in SYSCALL_DEFINE1()
2458 spin_unlock(&p->lock); in SYSCALL_DEFINE1()
2464 err = try_to_unuse(p->type); in SYSCALL_DEFINE1()
2468 /* re-insert swap space back into swap_list */ in SYSCALL_DEFINE1()
2483 percpu_ref_kill(&p->users); in SYSCALL_DEFINE1()
2485 wait_for_completion(&p->comp); in SYSCALL_DEFINE1()
2487 flush_work(&p->discard_work); in SYSCALL_DEFINE1()
2490 if (p->flags & SWP_CONTINUED) in SYSCALL_DEFINE1()
2493 if (!p->bdev || !bdev_nonrot(p->bdev)) in SYSCALL_DEFINE1()
2498 spin_lock(&p->lock); in SYSCALL_DEFINE1()
2502 p->highest_bit = 0; /* cuts scans short */ in SYSCALL_DEFINE1()
2503 while (p->flags >= SWP_SCANNING) { in SYSCALL_DEFINE1()
2504 spin_unlock(&p->lock); in SYSCALL_DEFINE1()
2508 spin_lock(&p->lock); in SYSCALL_DEFINE1()
2511 swap_file = p->swap_file; in SYSCALL_DEFINE1()
2512 old_block_size = p->old_block_size; in SYSCALL_DEFINE1()
2513 p->swap_file = NULL; in SYSCALL_DEFINE1()
2514 p->max = 0; in SYSCALL_DEFINE1()
2515 swap_map = p->swap_map; in SYSCALL_DEFINE1()
2516 p->swap_map = NULL; in SYSCALL_DEFINE1()
2517 cluster_info = p->cluster_info; in SYSCALL_DEFINE1()
2518 p->cluster_info = NULL; in SYSCALL_DEFINE1()
2519 spin_unlock(&p->lock); in SYSCALL_DEFINE1()
2521 arch_swap_invalidate_area(p->type); in SYSCALL_DEFINE1()
2522 zswap_swapoff(p->type); in SYSCALL_DEFINE1()
2524 free_percpu(p->percpu_cluster); in SYSCALL_DEFINE1()
2525 p->percpu_cluster = NULL; in SYSCALL_DEFINE1()
2526 free_percpu(p->cluster_next_cpu); in SYSCALL_DEFINE1()
2527 p->cluster_next_cpu = NULL; in SYSCALL_DEFINE1()
2531 swap_cgroup_swapoff(p->type); in SYSCALL_DEFINE1()
2532 exit_swap_address_space(p->type); in SYSCALL_DEFINE1()
2534 inode = mapping->host; in SYSCALL_DEFINE1()
2535 if (p->bdev_handle) { in SYSCALL_DEFINE1()
2536 set_blocksize(p->bdev, old_block_size); in SYSCALL_DEFINE1()
2537 bdev_release(p->bdev_handle); in SYSCALL_DEFINE1()
2538 p->bdev_handle = NULL; in SYSCALL_DEFINE1()
2542 inode->i_flags &= ~S_SWAPFILE; in SYSCALL_DEFINE1()
2549 * not hold p->lock after we cleared its SWP_WRITEOK. in SYSCALL_DEFINE1()
2552 p->flags = 0; in SYSCALL_DEFINE1()
2569 struct seq_file *seq = file->private_data; in swaps_poll()
2573 if (seq->poll_event != atomic_read(&proc_poll_event)) { in swaps_poll()
2574 seq->poll_event = atomic_read(&proc_poll_event); in swaps_poll()
2594 if (!(si->flags & SWP_USED) || !si->swap_map) in swap_start()
2596 if (!--l) in swap_start()
2611 type = si->type + 1; in swap_next()
2615 if (!(si->flags & SWP_USED) || !si->swap_map) in swap_next()
2640 bytes = K(si->pages); in swap_show()
2641 inuse = K(READ_ONCE(si->inuse_pages)); in swap_show()
2643 file = si->swap_file; in swap_show()
2646 len < 40 ? 40 - len : 1, " ", in swap_show()
2647 S_ISBLK(file_inode(file)->i_mode) ? in swap_show()
2651 si->prio); in swap_show()
2671 seq = file->private_data; in swaps_open()
2672 seq->poll_event = atomic_read(&proc_poll_event); in swaps_open()
2711 return ERR_PTR(-ENOMEM); in alloc_swap_info()
2713 if (percpu_ref_init(&p->users, swap_users_ref_free, in alloc_swap_info()
2716 return ERR_PTR(-ENOMEM); in alloc_swap_info()
2721 if (!(swap_info[type]->flags & SWP_USED)) in alloc_swap_info()
2726 percpu_ref_exit(&p->users); in alloc_swap_info()
2728 return ERR_PTR(-EPERM); in alloc_swap_info()
2731 p->type = type; in alloc_swap_info()
2743 * would be relying on p->type to remain valid. in alloc_swap_info()
2746 p->swap_extent_root = RB_ROOT; in alloc_swap_info()
2747 plist_node_init(&p->list, 0); in alloc_swap_info()
2749 plist_node_init(&p->avail_lists[i], 0); in alloc_swap_info()
2750 p->flags = SWP_USED; in alloc_swap_info()
2753 percpu_ref_exit(&defer->users); in alloc_swap_info()
2756 spin_lock_init(&p->lock); in alloc_swap_info()
2757 spin_lock_init(&p->cont_lock); in alloc_swap_info()
2758 init_completion(&p->comp); in alloc_swap_info()
2767 if (S_ISBLK(inode->i_mode)) { in claim_swapfile()
2768 p->bdev_handle = bdev_open_by_dev(inode->i_rdev, in claim_swapfile()
2770 if (IS_ERR(p->bdev_handle)) { in claim_swapfile()
2771 error = PTR_ERR(p->bdev_handle); in claim_swapfile()
2772 p->bdev_handle = NULL; in claim_swapfile()
2775 p->bdev = p->bdev_handle->bdev; in claim_swapfile()
2776 p->old_block_size = block_size(p->bdev); in claim_swapfile()
2777 error = set_blocksize(p->bdev, PAGE_SIZE); in claim_swapfile()
2785 if (bdev_is_zoned(p->bdev)) in claim_swapfile()
2786 return -EINVAL; in claim_swapfile()
2787 p->flags |= SWP_BLKDEV; in claim_swapfile()
2788 } else if (S_ISREG(inode->i_mode)) { in claim_swapfile()
2789 p->bdev = inode->i_sb->s_bdev; in claim_swapfile()
2799 * 1) the number of bits for the swap offset in the swp_entry_t type, and
2804 * swap type 0 and swap offset ~0UL is created, encoded to a swap pte,
2805 * decoded to a swp_entry_t again, and finally the swap offset is
2833 if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) { in read_swap_header()
2834 pr_err("Unable to find swap-space signature\n"); in read_swap_header()
2839 if (swab32(swap_header->info.version) == 1) { in read_swap_header()
2840 swab32s(&swap_header->info.version); in read_swap_header()
2841 swab32s(&swap_header->info.last_page); in read_swap_header()
2842 swab32s(&swap_header->info.nr_badpages); in read_swap_header()
2843 if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) in read_swap_header()
2845 for (i = 0; i < swap_header->info.nr_badpages; i++) in read_swap_header()
2846 swab32s(&swap_header->info.badpages[i]); in read_swap_header()
2848 /* Check the swap header's sub-version */ in read_swap_header()
2849 if (swap_header->info.version != 1) { in read_swap_header()
2851 swap_header->info.version); in read_swap_header()
2855 p->lowest_bit = 1; in read_swap_header()
2856 p->cluster_next = 1; in read_swap_header()
2857 p->cluster_nr = 0; in read_swap_header()
2860 last_page = swap_header->info.last_page; in read_swap_header()
2862 pr_warn("Empty swap-file\n"); in read_swap_header()
2871 /* p->max is an unsigned int: don't overflow it */ in read_swap_header()
2875 p->highest_bit = maxpages - 1; in read_swap_header()
2884 if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode)) in read_swap_header()
2886 if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) in read_swap_header()
2910 unsigned long col = p->cluster_next / SWAPFILE_CLUSTER % SWAP_CLUSTER_COLS; in setup_swap_map_and_extents()
2913 nr_good_pages = maxpages - 1; /* omit header page */ in setup_swap_map_and_extents()
2915 cluster_list_init(&p->free_clusters); in setup_swap_map_and_extents()
2916 cluster_list_init(&p->discard_clusters); in setup_swap_map_and_extents()
2918 for (i = 0; i < swap_header->info.nr_badpages; i++) { in setup_swap_map_and_extents()
2919 unsigned int page_nr = swap_header->info.badpages[i]; in setup_swap_map_and_extents()
2920 if (page_nr == 0 || page_nr > swap_header->info.last_page) in setup_swap_map_and_extents()
2921 return -EINVAL; in setup_swap_map_and_extents()
2924 nr_good_pages--; in setup_swap_map_and_extents()
2944 p->max = maxpages; in setup_swap_map_and_extents()
2945 p->pages = nr_good_pages; in setup_swap_map_and_extents()
2949 nr_good_pages = p->pages; in setup_swap_map_and_extents()
2952 pr_warn("Empty swap-file\n"); in setup_swap_map_and_extents()
2953 return -EINVAL; in setup_swap_map_and_extents()
2973 cluster_list_add_tail(&p->free_clusters, cluster_info, in setup_swap_map_and_extents()
3000 return -EINVAL; in SYSCALL_DEFINE2()
3003 return -EPERM; in SYSCALL_DEFINE2()
3006 return -ENOMEM; in SYSCALL_DEFINE2()
3012 INIT_WORK(&p->discard_work, swap_discard_work); in SYSCALL_DEFINE2()
3027 p->swap_file = swap_file; in SYSCALL_DEFINE2()
3028 mapping = swap_file->f_mapping; in SYSCALL_DEFINE2()
3029 dentry = swap_file->f_path.dentry; in SYSCALL_DEFINE2()
3030 inode = mapping->host; in SYSCALL_DEFINE2()
3038 error = -ENOENT; in SYSCALL_DEFINE2()
3042 error = -EBUSY; in SYSCALL_DEFINE2()
3049 if (!mapping->a_ops->read_folio) { in SYSCALL_DEFINE2()
3050 error = -EINVAL; in SYSCALL_DEFINE2()
3062 error = -EINVAL; in SYSCALL_DEFINE2()
3069 error = -ENOMEM; in SYSCALL_DEFINE2()
3073 if (p->bdev && bdev_stable_writes(p->bdev)) in SYSCALL_DEFINE2()
3074 p->flags |= SWP_STABLE_WRITES; in SYSCALL_DEFINE2()
3076 if (p->bdev && bdev_synchronous(p->bdev)) in SYSCALL_DEFINE2()
3077 p->flags |= SWP_SYNCHRONOUS_IO; in SYSCALL_DEFINE2()
3079 if (p->bdev && bdev_nonrot(p->bdev)) { in SYSCALL_DEFINE2()
3083 p->flags |= SWP_SOLIDSTATE; in SYSCALL_DEFINE2()
3084 p->cluster_next_cpu = alloc_percpu(unsigned int); in SYSCALL_DEFINE2()
3085 if (!p->cluster_next_cpu) { in SYSCALL_DEFINE2()
3086 error = -ENOMEM; in SYSCALL_DEFINE2()
3094 per_cpu(*p->cluster_next_cpu, cpu) = in SYSCALL_DEFINE2()
3095 get_random_u32_inclusive(1, p->highest_bit); in SYSCALL_DEFINE2()
3102 error = -ENOMEM; in SYSCALL_DEFINE2()
3107 spin_lock_init(&((cluster_info + ci)->lock)); in SYSCALL_DEFINE2()
3109 p->percpu_cluster = alloc_percpu(struct percpu_cluster); in SYSCALL_DEFINE2()
3110 if (!p->percpu_cluster) { in SYSCALL_DEFINE2()
3111 error = -ENOMEM; in SYSCALL_DEFINE2()
3116 cluster = per_cpu_ptr(p->percpu_cluster, cpu); in SYSCALL_DEFINE2()
3117 cluster_set_null(&cluster->index); in SYSCALL_DEFINE2()
3124 error = swap_cgroup_swapon(p->type, maxpages); in SYSCALL_DEFINE2()
3136 p->bdev && bdev_max_discard_sectors(p->bdev)) { in SYSCALL_DEFINE2()
3143 p->flags |= (SWP_DISCARDABLE | SWP_AREA_DISCARD | in SYSCALL_DEFINE2()
3148 * either do single-time area discards only, or to just in SYSCALL_DEFINE2()
3149 * perform discards for released swap page-clusters. in SYSCALL_DEFINE2()
3150 * Now it's time to adjust the p->flags accordingly. in SYSCALL_DEFINE2()
3153 p->flags &= ~SWP_PAGE_DISCARD; in SYSCALL_DEFINE2()
3155 p->flags &= ~SWP_AREA_DISCARD; in SYSCALL_DEFINE2()
3157 /* issue a swapon-time discard if it's still required */ in SYSCALL_DEFINE2()
3158 if (p->flags & SWP_AREA_DISCARD) { in SYSCALL_DEFINE2()
3166 error = init_swap_address_space(p->type, maxpages); in SYSCALL_DEFINE2()
3174 inode->i_flags |= S_SWAPFILE; in SYSCALL_DEFINE2()
3177 inode->i_flags &= ~S_SWAPFILE; in SYSCALL_DEFINE2()
3182 prio = -1; in SYSCALL_DEFINE2()
3189 K(p->pages), name->name, p->prio, nr_extents, in SYSCALL_DEFINE2()
3191 (p->flags & SWP_SOLIDSTATE) ? "SS" : "", in SYSCALL_DEFINE2()
3192 (p->flags & SWP_DISCARDABLE) ? "D" : "", in SYSCALL_DEFINE2()
3193 (p->flags & SWP_AREA_DISCARD) ? "s" : "", in SYSCALL_DEFINE2()
3194 (p->flags & SWP_PAGE_DISCARD) ? "c" : ""); in SYSCALL_DEFINE2()
3203 exit_swap_address_space(p->type); in SYSCALL_DEFINE2()
3207 free_percpu(p->percpu_cluster); in SYSCALL_DEFINE2()
3208 p->percpu_cluster = NULL; in SYSCALL_DEFINE2()
3209 free_percpu(p->cluster_next_cpu); in SYSCALL_DEFINE2()
3210 p->cluster_next_cpu = NULL; in SYSCALL_DEFINE2()
3211 if (p->bdev_handle) { in SYSCALL_DEFINE2()
3212 set_blocksize(p->bdev, p->old_block_size); in SYSCALL_DEFINE2()
3213 bdev_release(p->bdev_handle); in SYSCALL_DEFINE2()
3214 p->bdev_handle = NULL; in SYSCALL_DEFINE2()
3218 swap_cgroup_swapoff(p->type); in SYSCALL_DEFINE2()
3220 p->swap_file = NULL; in SYSCALL_DEFINE2()
3221 p->flags = 0; in SYSCALL_DEFINE2()
3252 if ((si->flags & SWP_USED) && !(si->flags & SWP_WRITEOK)) in si_swapinfo()
3253 nr_to_be_unused += READ_ONCE(si->inuse_pages); in si_swapinfo()
3255 val->freeswap = atomic_long_read(&nr_swap_pages) + nr_to_be_unused; in si_swapinfo()
3256 val->totalswap = total_swap_pages + nr_to_be_unused; in si_swapinfo()
3264 * - success -> 0
3265 * - swp_entry is invalid -> EINVAL
3266 * - swp_entry is migration entry -> EINVAL
3267 * - swap-cache reference is requested but there is already one. -> EEXIST
3268 * - swap-cache reference is requested but the entry is not used. -> ENOENT
3269 * - swap-mapped reference requested but needs continued swap count. -> ENOMEM
3275 unsigned long offset; in __swap_duplicate() local
3282 offset = swp_offset(entry); in __swap_duplicate()
3283 ci = lock_cluster_or_swap_info(p, offset); in __swap_duplicate()
3285 count = p->swap_map[offset]; in __swap_duplicate()
3289 * swap entry could be SWAP_MAP_BAD. Check here with lock held. in __swap_duplicate()
3292 err = -ENOENT; in __swap_duplicate()
3306 err = -EEXIST; in __swap_duplicate()
3308 err = -ENOENT; in __swap_duplicate()
3315 err = -EINVAL; in __swap_duplicate()
3316 else if (swap_count_continued(p, offset, count)) in __swap_duplicate()
3319 err = -ENOMEM; in __swap_duplicate()
3321 err = -ENOENT; /* unused swap entry */ in __swap_duplicate()
3323 WRITE_ONCE(p->swap_map[offset], count | has_cache); in __swap_duplicate()
3341 * Returns 0 for success, or -ENOMEM if a swap_count_continuation is required
3343 * if __swap_duplicate() fails for another reason (-EINVAL or -ENOENT), which
3350 while (!err && __swap_duplicate(entry, 1) == -ENOMEM) in swap_duplicate()
3360 * -EEXIST means there is a swap cache.
3371 unsigned long offset = swp_offset(entry); in swapcache_clear() local
3374 ci = lock_cluster_or_swap_info(si, offset); in swapcache_clear()
3375 usage = __swap_entry_free_locked(si, offset, SWAP_HAS_CACHE); in swapcache_clear()
3387 * out-of-line methods to avoid include hell.
3391 return swp_swap_info(folio->swap)->swap_file->f_mapping; in swapcache_mapping()
3403 * add_swap_count_continuation - called when a swap count is duplicated
3424 pgoff_t offset; in add_swap_count_continuation() local
3442 spin_lock(&si->lock); in add_swap_count_continuation()
3444 offset = swp_offset(entry); in add_swap_count_continuation()
3446 ci = lock_cluster(si, offset); in add_swap_count_continuation()
3448 count = swap_count(si->swap_map[offset]); in add_swap_count_continuation()
3454 * over-provisioning. in add_swap_count_continuation()
3460 ret = -ENOMEM; in add_swap_count_continuation()
3464 head = vmalloc_to_page(si->swap_map + offset); in add_swap_count_continuation()
3465 offset &= ~PAGE_MASK; in add_swap_count_continuation()
3467 spin_lock(&si->cont_lock); in add_swap_count_continuation()
3474 INIT_LIST_HEAD(&head->lru); in add_swap_count_continuation()
3476 si->flags |= SWP_CONTINUED; in add_swap_count_continuation()
3479 list_for_each_entry(list_page, &head->lru, lru) { in add_swap_count_continuation()
3489 map = kmap_local_page(list_page) + offset; in add_swap_count_continuation()
3501 list_add_tail(&page->lru, &head->lru); in add_swap_count_continuation()
3504 spin_unlock(&si->cont_lock); in add_swap_count_continuation()
3507 spin_unlock(&si->lock); in add_swap_count_continuation()
3516 * swap_count_continued - when the original swap_map count is incremented
3522 * lock.
3525 pgoff_t offset, unsigned char count) in swap_count_continued() argument
3532 head = vmalloc_to_page(si->swap_map + offset); in swap_count_continued()
3538 spin_lock(&si->cont_lock); in swap_count_continued()
3539 offset &= ~PAGE_MASK; in swap_count_continued()
3541 map = kmap_local_page(page) + offset; in swap_count_continued()
3554 map = kmap_local_page(page) + offset; in swap_count_continued()
3563 map = kmap_local_page(page) + offset; in swap_count_continued()
3569 map = kmap_local_page(page) + offset; in swap_count_continued()
3584 map = kmap_local_page(page) + offset; in swap_count_continued()
3587 *map -= 1; in swap_count_continued()
3592 map = kmap_local_page(page) + offset; in swap_count_continued()
3600 spin_unlock(&si->cont_lock); in swap_count_continued()
3605 * free_swap_count_continuations - swapoff free all the continuation pages
3610 pgoff_t offset; in free_swap_count_continuations() local
3612 for (offset = 0; offset < si->max; offset += PAGE_SIZE) { in free_swap_count_continuations()
3614 head = vmalloc_to_page(si->swap_map + offset); in free_swap_count_continuations()
3618 list_for_each_entry_safe(page, next, &head->lru, lru) { in free_swap_count_continuations()
3619 list_del(&page->lru); in free_swap_count_continuations()
3640 * lock. in __folio_throttle_swaprate()
3642 if (current->throttle_disk) in __folio_throttle_swaprate()
3648 if (si->bdev) { in __folio_throttle_swaprate()
3649 blkcg_schedule_throttle(si->bdev->bd_disk, true); in __folio_throttle_swaprate()
3665 return -ENOMEM; in swapfile_init()