xref: /linux/mm/swap.h (revision 334fbe734e687404f346eba7d5d96ed2b44d35ab)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _MM_SWAP_H
3 #define _MM_SWAP_H
4 
5 #include <linux/atomic.h> /* for atomic_long_t */
6 struct mempolicy;
7 struct swap_iocb;
8 
9 extern int page_cluster;
10 
11 #ifdef CONFIG_THP_SWAP
12 #define SWAPFILE_CLUSTER	HPAGE_PMD_NR
13 #define swap_entry_order(order)	(order)
14 #else
15 #define SWAPFILE_CLUSTER	256
16 #define swap_entry_order(order)	0
17 #endif
18 
19 extern struct swap_info_struct *swap_info[];
20 
21 /*
22  * We use this to track usage of a cluster. A cluster is a block of swap disk
23  * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All
24  * free clusters are organized into a list. We fetch an entry from the list to
25  * get a free cluster.
26  *
27  * The flags field determines if a cluster is free. This is
28  * protected by cluster lock.
29  */
30 struct swap_cluster_info {
31 	spinlock_t lock;	/*
32 				 * Protect swap_cluster_info fields
33 				 * other than list, and swap_info_struct->swap_map
34 				 * elements corresponding to the swap cluster.
35 				 */
36 	u16 count;
37 	u8 flags;
38 	u8 order;
39 	atomic_long_t __rcu *table;	/* Swap table entries, see mm/swap_table.h */
40 	unsigned int *extend_table;	/* For large swap count, protected by ci->lock */
41 	struct list_head list;
42 };
43 
44 /* All on-list cluster must have a non-zero flag. */
45 enum swap_cluster_flags {
46 	CLUSTER_FLAG_NONE = 0, /* For temporary off-list cluster */
47 	CLUSTER_FLAG_FREE,
48 	CLUSTER_FLAG_NONFULL,
49 	CLUSTER_FLAG_FRAG,
50 	/* Clusters with flags above are allocatable */
51 	CLUSTER_FLAG_USABLE = CLUSTER_FLAG_FRAG,
52 	CLUSTER_FLAG_FULL,
53 	CLUSTER_FLAG_DISCARD,
54 	CLUSTER_FLAG_MAX,
55 };
56 
57 #ifdef CONFIG_SWAP
58 #include <linux/swapops.h> /* for swp_offset */
59 #include <linux/blk_types.h> /* for bio_end_io_t */
60 
swp_cluster_offset(swp_entry_t entry)61 static inline unsigned int swp_cluster_offset(swp_entry_t entry)
62 {
63 	return swp_offset(entry) % SWAPFILE_CLUSTER;
64 }
65 
66 /*
67  * Callers of all helpers below must ensure the entry, type, or offset is
68  * valid, and protect the swap device with reference count or locks.
69  */
__swap_type_to_info(int type)70 static inline struct swap_info_struct *__swap_type_to_info(int type)
71 {
72 	struct swap_info_struct *si;
73 
74 	si = READ_ONCE(swap_info[type]); /* rcu_dereference() */
75 	VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */
76 	return si;
77 }
78 
__swap_entry_to_info(swp_entry_t entry)79 static inline struct swap_info_struct *__swap_entry_to_info(swp_entry_t entry)
80 {
81 	return __swap_type_to_info(swp_type(entry));
82 }
83 
__swap_offset_to_cluster(struct swap_info_struct * si,pgoff_t offset)84 static inline struct swap_cluster_info *__swap_offset_to_cluster(
85 		struct swap_info_struct *si, pgoff_t offset)
86 {
87 	VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */
88 	VM_WARN_ON_ONCE(offset >= roundup(si->max, SWAPFILE_CLUSTER));
89 	return &si->cluster_info[offset / SWAPFILE_CLUSTER];
90 }
91 
__swap_entry_to_cluster(swp_entry_t entry)92 static inline struct swap_cluster_info *__swap_entry_to_cluster(swp_entry_t entry)
93 {
94 	return __swap_offset_to_cluster(__swap_entry_to_info(entry),
95 					swp_offset(entry));
96 }
97 
__swap_cluster_lock(struct swap_info_struct * si,unsigned long offset,bool irq)98 static __always_inline struct swap_cluster_info *__swap_cluster_lock(
99 		struct swap_info_struct *si, unsigned long offset, bool irq)
100 {
101 	struct swap_cluster_info *ci = __swap_offset_to_cluster(si, offset);
102 
103 	/*
104 	 * Nothing modifies swap cache in an IRQ context. All access to
105 	 * swap cache is wrapped by swap_cache_* helpers, and swap cache
106 	 * writeback is handled outside of IRQs. Swapin or swapout never
107 	 * occurs in IRQ, and neither does in-place split or replace.
108 	 *
109 	 * Besides, modifying swap cache requires synchronization with
110 	 * swap_map, which was never IRQ safe.
111 	 */
112 	VM_WARN_ON_ONCE(!in_task());
113 	VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */
114 	if (irq)
115 		spin_lock_irq(&ci->lock);
116 	else
117 		spin_lock(&ci->lock);
118 	return ci;
119 }
120 
121 /**
122  * swap_cluster_lock - Lock and return the swap cluster of given offset.
123  * @si: swap device the cluster belongs to.
124  * @offset: the swap entry offset, pointing to a valid slot.
125  *
126  * Context: The caller must ensure the offset is in the valid range and
127  * protect the swap device with reference count or locks.
128  */
swap_cluster_lock(struct swap_info_struct * si,unsigned long offset)129 static inline struct swap_cluster_info *swap_cluster_lock(
130 		struct swap_info_struct *si, unsigned long offset)
131 {
132 	return __swap_cluster_lock(si, offset, false);
133 }
134 
__swap_cluster_get_and_lock(const struct folio * folio,bool irq)135 static inline struct swap_cluster_info *__swap_cluster_get_and_lock(
136 		const struct folio *folio, bool irq)
137 {
138 	VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio);
139 	VM_WARN_ON_ONCE_FOLIO(!folio_test_swapcache(folio), folio);
140 	return __swap_cluster_lock(__swap_entry_to_info(folio->swap),
141 				   swp_offset(folio->swap), irq);
142 }
143 
144 /*
145  * swap_cluster_get_and_lock - Locks the cluster that holds a folio's entries.
146  * @folio: The folio.
147  *
148  * This locks and returns the swap cluster that contains a folio's swap
149  * entries. The swap entries of a folio are always in one single cluster.
150  * The folio has to be locked so its swap entries won't change and the
151  * cluster won't be freed.
152  *
153  * Context: Caller must ensure the folio is locked and in the swap cache.
154  * Return: Pointer to the swap cluster.
155  */
swap_cluster_get_and_lock(const struct folio * folio)156 static inline struct swap_cluster_info *swap_cluster_get_and_lock(
157 		const struct folio *folio)
158 {
159 	return __swap_cluster_get_and_lock(folio, false);
160 }
161 
162 /*
163  * swap_cluster_get_and_lock_irq - Locks the cluster that holds a folio's entries.
164  * @folio: The folio.
165  *
166  * Same as swap_cluster_get_and_lock but also disable IRQ.
167  *
168  * Context: Caller must ensure the folio is locked and in the swap cache.
169  * Return: Pointer to the swap cluster.
170  */
swap_cluster_get_and_lock_irq(const struct folio * folio)171 static inline struct swap_cluster_info *swap_cluster_get_and_lock_irq(
172 		const struct folio *folio)
173 {
174 	return __swap_cluster_get_and_lock(folio, true);
175 }
176 
swap_cluster_unlock(struct swap_cluster_info * ci)177 static inline void swap_cluster_unlock(struct swap_cluster_info *ci)
178 {
179 	spin_unlock(&ci->lock);
180 }
181 
swap_cluster_unlock_irq(struct swap_cluster_info * ci)182 static inline void swap_cluster_unlock_irq(struct swap_cluster_info *ci)
183 {
184 	spin_unlock_irq(&ci->lock);
185 }
186 
187 extern int swap_retry_table_alloc(swp_entry_t entry, gfp_t gfp);
188 
189 /*
190  * Below are the core routines for doing swap for a folio.
191  * All helpers requires the folio to be locked, and a locked folio
192  * in the swap cache pins the swap entries / slots allocated to the
193  * folio, swap relies heavily on the swap cache and folio lock for
194  * synchronization.
195  *
196  * folio_alloc_swap(): the entry point for a folio to be swapped
197  * out. It allocates swap slots and pins the slots with swap cache.
198  * The slots start with a swap count of zero. The slots are pinned
199  * by swap cache reference which doesn't contribute to swap count.
200  *
201  * folio_dup_swap(): increases the swap count of a folio, usually
202  * during it gets unmapped and a swap entry is installed to replace
203  * it (e.g., swap entry in page table). A swap slot with swap
204  * count == 0 can only be increased by this helper.
205  *
206  * folio_put_swap(): does the opposite thing of folio_dup_swap().
207  */
208 int folio_alloc_swap(struct folio *folio);
209 int folio_dup_swap(struct folio *folio, struct page *subpage);
210 void folio_put_swap(struct folio *folio, struct page *subpage);
211 
212 /* For internal use */
213 extern void __swap_cluster_free_entries(struct swap_info_struct *si,
214 					struct swap_cluster_info *ci,
215 					unsigned int ci_off, unsigned int nr_pages);
216 
217 /* linux/mm/page_io.c */
218 int sio_pool_init(void);
219 struct swap_iocb;
220 void swap_read_folio(struct folio *folio, struct swap_iocb **plug);
221 void __swap_read_unplug(struct swap_iocb *plug);
swap_read_unplug(struct swap_iocb * plug)222 static inline void swap_read_unplug(struct swap_iocb *plug)
223 {
224 	if (unlikely(plug))
225 		__swap_read_unplug(plug);
226 }
227 void swap_write_unplug(struct swap_iocb *sio);
228 int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug);
229 void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug);
230 
231 /* linux/mm/swap_state.c */
232 extern struct address_space swap_space __read_mostly;
swap_address_space(swp_entry_t entry)233 static inline struct address_space *swap_address_space(swp_entry_t entry)
234 {
235 	return &swap_space;
236 }
237 
238 /*
239  * Return the swap device position of the swap entry.
240  */
swap_dev_pos(swp_entry_t entry)241 static inline loff_t swap_dev_pos(swp_entry_t entry)
242 {
243 	return ((loff_t)swp_offset(entry)) << PAGE_SHIFT;
244 }
245 
246 /**
247  * folio_matches_swap_entry - Check if a folio matches a given swap entry.
248  * @folio: The folio.
249  * @entry: The swap entry to check against.
250  *
251  * Context: The caller should have the folio locked to ensure it's stable
252  * and nothing will move it in or out of the swap cache.
253  * Return: true or false.
254  */
folio_matches_swap_entry(const struct folio * folio,swp_entry_t entry)255 static inline bool folio_matches_swap_entry(const struct folio *folio,
256 					    swp_entry_t entry)
257 {
258 	swp_entry_t folio_entry = folio->swap;
259 	long nr_pages = folio_nr_pages(folio);
260 
261 	VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio);
262 	if (!folio_test_swapcache(folio))
263 		return false;
264 	VM_WARN_ON_ONCE_FOLIO(!IS_ALIGNED(folio_entry.val, nr_pages), folio);
265 	return folio_entry.val == round_down(entry.val, nr_pages);
266 }
267 
268 /*
269  * All swap cache helpers below require the caller to ensure the swap entries
270  * used are valid and stabilize the device by any of the following ways:
271  * - Hold a reference by get_swap_device(): this ensures a single entry is
272  *   valid and increases the swap device's refcount.
273  * - Locking a folio in the swap cache: this ensures the folio's swap entries
274  *   are valid and pinned, also implies reference to the device.
275  * - Locking anything referencing the swap entry: e.g. PTL that protects
276  *   swap entries in the page table, similar to locking swap cache folio.
277  * - See the comment of get_swap_device() for more complex usage.
278  */
279 bool swap_cache_has_folio(swp_entry_t entry);
280 struct folio *swap_cache_get_folio(swp_entry_t entry);
281 void *swap_cache_get_shadow(swp_entry_t entry);
282 void swap_cache_del_folio(struct folio *folio);
283 struct folio *swap_cache_alloc_folio(swp_entry_t entry, gfp_t gfp_flags,
284 				     struct mempolicy *mpol, pgoff_t ilx,
285 				     bool *alloced);
286 /* Below helpers require the caller to lock and pass in the swap cluster. */
287 void __swap_cache_add_folio(struct swap_cluster_info *ci,
288 			    struct folio *folio, swp_entry_t entry);
289 void __swap_cache_del_folio(struct swap_cluster_info *ci,
290 			    struct folio *folio, swp_entry_t entry, void *shadow);
291 void __swap_cache_replace_folio(struct swap_cluster_info *ci,
292 				struct folio *old, struct folio *new);
293 
294 void show_swap_cache_info(void);
295 void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry, int nr);
296 struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
297 		struct vm_area_struct *vma, unsigned long addr,
298 		struct swap_iocb **plug);
299 struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t flag,
300 		struct mempolicy *mpol, pgoff_t ilx);
301 struct folio *swapin_readahead(swp_entry_t entry, gfp_t flag,
302 		struct vm_fault *vmf);
303 struct folio *swapin_folio(swp_entry_t entry, struct folio *folio);
304 void swap_update_readahead(struct folio *folio, struct vm_area_struct *vma,
305 			   unsigned long addr);
306 
folio_swap_flags(struct folio * folio)307 static inline unsigned int folio_swap_flags(struct folio *folio)
308 {
309 	return __swap_entry_to_info(folio->swap)->flags;
310 }
311 
312 /*
313  * Return the count of contiguous swap entries that share the same
314  * zeromap status as the starting entry. If is_zeromap is not NULL,
315  * it will return the zeromap status of the starting entry.
316  */
swap_zeromap_batch(swp_entry_t entry,int max_nr,bool * is_zeromap)317 static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr,
318 		bool *is_zeromap)
319 {
320 	struct swap_info_struct *sis = __swap_entry_to_info(entry);
321 	unsigned long start = swp_offset(entry);
322 	unsigned long end = start + max_nr;
323 	bool first_bit;
324 
325 	first_bit = test_bit(start, sis->zeromap);
326 	if (is_zeromap)
327 		*is_zeromap = first_bit;
328 
329 	if (max_nr <= 1)
330 		return max_nr;
331 	if (first_bit)
332 		return find_next_zero_bit(sis->zeromap, end, start) - start;
333 	else
334 		return find_next_bit(sis->zeromap, end, start) - start;
335 }
336 
non_swapcache_batch(swp_entry_t entry,int max_nr)337 static inline int non_swapcache_batch(swp_entry_t entry, int max_nr)
338 {
339 	int i;
340 
341 	/*
342 	 * While allocating a large folio and doing mTHP swapin, we need to
343 	 * ensure all entries are not cached, otherwise, the mTHP folio will
344 	 * be in conflict with the folio in swap cache.
345 	 */
346 	for (i = 0; i < max_nr; i++) {
347 		if (swap_cache_has_folio(entry))
348 			return i;
349 		entry.val++;
350 	}
351 
352 	return i;
353 }
354 
355 #else /* CONFIG_SWAP */
356 struct swap_iocb;
swap_cluster_lock(struct swap_info_struct * si,pgoff_t offset,bool irq)357 static inline struct swap_cluster_info *swap_cluster_lock(
358 	struct swap_info_struct *si, pgoff_t offset, bool irq)
359 {
360 	return NULL;
361 }
362 
swap_cluster_get_and_lock(struct folio * folio)363 static inline struct swap_cluster_info *swap_cluster_get_and_lock(
364 		struct folio *folio)
365 {
366 	return NULL;
367 }
368 
swap_cluster_get_and_lock_irq(struct folio * folio)369 static inline struct swap_cluster_info *swap_cluster_get_and_lock_irq(
370 		struct folio *folio)
371 {
372 	return NULL;
373 }
374 
swap_cluster_unlock(struct swap_cluster_info * ci)375 static inline void swap_cluster_unlock(struct swap_cluster_info *ci)
376 {
377 }
378 
swap_cluster_unlock_irq(struct swap_cluster_info * ci)379 static inline void swap_cluster_unlock_irq(struct swap_cluster_info *ci)
380 {
381 }
382 
__swap_entry_to_info(swp_entry_t entry)383 static inline struct swap_info_struct *__swap_entry_to_info(swp_entry_t entry)
384 {
385 	return NULL;
386 }
387 
folio_alloc_swap(struct folio * folio)388 static inline int folio_alloc_swap(struct folio *folio)
389 {
390 	return -EINVAL;
391 }
392 
folio_dup_swap(struct folio * folio,struct page * page)393 static inline int folio_dup_swap(struct folio *folio, struct page *page)
394 {
395 	return -EINVAL;
396 }
397 
folio_put_swap(struct folio * folio,struct page * page)398 static inline void folio_put_swap(struct folio *folio, struct page *page)
399 {
400 }
401 
swap_read_folio(struct folio * folio,struct swap_iocb ** plug)402 static inline void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
403 {
404 }
405 
swap_write_unplug(struct swap_iocb * sio)406 static inline void swap_write_unplug(struct swap_iocb *sio)
407 {
408 }
409 
swap_address_space(swp_entry_t entry)410 static inline struct address_space *swap_address_space(swp_entry_t entry)
411 {
412 	return NULL;
413 }
414 
folio_matches_swap_entry(const struct folio * folio,swp_entry_t entry)415 static inline bool folio_matches_swap_entry(const struct folio *folio, swp_entry_t entry)
416 {
417 	return false;
418 }
419 
show_swap_cache_info(void)420 static inline void show_swap_cache_info(void)
421 {
422 }
423 
swap_cluster_readahead(swp_entry_t entry,gfp_t gfp_mask,struct mempolicy * mpol,pgoff_t ilx)424 static inline struct folio *swap_cluster_readahead(swp_entry_t entry,
425 			gfp_t gfp_mask, struct mempolicy *mpol, pgoff_t ilx)
426 {
427 	return NULL;
428 }
429 
swapin_readahead(swp_entry_t swp,gfp_t gfp_mask,struct vm_fault * vmf)430 static inline struct folio *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
431 			struct vm_fault *vmf)
432 {
433 	return NULL;
434 }
435 
swapin_folio(swp_entry_t entry,struct folio * folio)436 static inline struct folio *swapin_folio(swp_entry_t entry, struct folio *folio)
437 {
438 	return NULL;
439 }
440 
swap_update_readahead(struct folio * folio,struct vm_area_struct * vma,unsigned long addr)441 static inline void swap_update_readahead(struct folio *folio,
442 		struct vm_area_struct *vma, unsigned long addr)
443 {
444 }
445 
swap_writeout(struct folio * folio,struct swap_iocb ** swap_plug)446 static inline int swap_writeout(struct folio *folio,
447 		struct swap_iocb **swap_plug)
448 {
449 	return 0;
450 }
451 
swap_retry_table_alloc(swp_entry_t entry,gfp_t gfp)452 static inline int swap_retry_table_alloc(swp_entry_t entry, gfp_t gfp)
453 {
454 	return -EINVAL;
455 }
456 
swap_cache_has_folio(swp_entry_t entry)457 static inline bool swap_cache_has_folio(swp_entry_t entry)
458 {
459 	return false;
460 }
461 
swap_cache_get_folio(swp_entry_t entry)462 static inline struct folio *swap_cache_get_folio(swp_entry_t entry)
463 {
464 	return NULL;
465 }
466 
swap_cache_get_shadow(swp_entry_t entry)467 static inline void *swap_cache_get_shadow(swp_entry_t entry)
468 {
469 	return NULL;
470 }
471 
swap_cache_del_folio(struct folio * folio)472 static inline void swap_cache_del_folio(struct folio *folio)
473 {
474 }
475 
__swap_cache_del_folio(struct swap_cluster_info * ci,struct folio * folio,swp_entry_t entry,void * shadow)476 static inline void __swap_cache_del_folio(struct swap_cluster_info *ci,
477 		struct folio *folio, swp_entry_t entry, void *shadow)
478 {
479 }
480 
__swap_cache_replace_folio(struct swap_cluster_info * ci,struct folio * old,struct folio * new)481 static inline void __swap_cache_replace_folio(struct swap_cluster_info *ci,
482 		struct folio *old, struct folio *new)
483 {
484 }
485 
folio_swap_flags(struct folio * folio)486 static inline unsigned int folio_swap_flags(struct folio *folio)
487 {
488 	return 0;
489 }
490 
swap_zeromap_batch(swp_entry_t entry,int max_nr,bool * has_zeromap)491 static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr,
492 		bool *has_zeromap)
493 {
494 	return 0;
495 }
496 
non_swapcache_batch(swp_entry_t entry,int max_nr)497 static inline int non_swapcache_batch(swp_entry_t entry, int max_nr)
498 {
499 	return 0;
500 }
501 #endif /* CONFIG_SWAP */
502 #endif /* _MM_SWAP_H */
503