1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _MM_SWAP_H
3 #define _MM_SWAP_H
4
5 #include <linux/atomic.h> /* for atomic_long_t */
6 struct mempolicy;
7 struct swap_iocb;
8
9 extern int page_cluster;
10
11 #ifdef CONFIG_THP_SWAP
12 #define SWAPFILE_CLUSTER HPAGE_PMD_NR
13 #define swap_entry_order(order) (order)
14 #else
15 #define SWAPFILE_CLUSTER 256
16 #define swap_entry_order(order) 0
17 #endif
18
19 extern struct swap_info_struct *swap_info[];
20
21 /*
22 * We use this to track usage of a cluster. A cluster is a block of swap disk
23 * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All
24 * free clusters are organized into a list. We fetch an entry from the list to
25 * get a free cluster.
26 *
27 * The flags field determines if a cluster is free. This is
28 * protected by cluster lock.
29 */
30 struct swap_cluster_info {
31 spinlock_t lock; /*
32 * Protect swap_cluster_info fields
33 * other than list, and swap_info_struct->swap_map
34 * elements corresponding to the swap cluster.
35 */
36 u16 count;
37 u8 flags;
38 u8 order;
39 atomic_long_t __rcu *table; /* Swap table entries, see mm/swap_table.h */
40 struct list_head list;
41 };
42
43 /* All on-list cluster must have a non-zero flag. */
44 enum swap_cluster_flags {
45 CLUSTER_FLAG_NONE = 0, /* For temporary off-list cluster */
46 CLUSTER_FLAG_FREE,
47 CLUSTER_FLAG_NONFULL,
48 CLUSTER_FLAG_FRAG,
49 /* Clusters with flags above are allocatable */
50 CLUSTER_FLAG_USABLE = CLUSTER_FLAG_FRAG,
51 CLUSTER_FLAG_FULL,
52 CLUSTER_FLAG_DISCARD,
53 CLUSTER_FLAG_MAX,
54 };
55
56 #ifdef CONFIG_SWAP
57 #include <linux/swapops.h> /* for swp_offset */
58 #include <linux/blk_types.h> /* for bio_end_io_t */
59
swp_cluster_offset(swp_entry_t entry)60 static inline unsigned int swp_cluster_offset(swp_entry_t entry)
61 {
62 return swp_offset(entry) % SWAPFILE_CLUSTER;
63 }
64
65 /*
66 * Callers of all helpers below must ensure the entry, type, or offset is
67 * valid, and protect the swap device with reference count or locks.
68 */
__swap_type_to_info(int type)69 static inline struct swap_info_struct *__swap_type_to_info(int type)
70 {
71 struct swap_info_struct *si;
72
73 si = READ_ONCE(swap_info[type]); /* rcu_dereference() */
74 VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */
75 return si;
76 }
77
__swap_entry_to_info(swp_entry_t entry)78 static inline struct swap_info_struct *__swap_entry_to_info(swp_entry_t entry)
79 {
80 return __swap_type_to_info(swp_type(entry));
81 }
82
__swap_offset_to_cluster(struct swap_info_struct * si,pgoff_t offset)83 static inline struct swap_cluster_info *__swap_offset_to_cluster(
84 struct swap_info_struct *si, pgoff_t offset)
85 {
86 VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */
87 VM_WARN_ON_ONCE(offset >= si->max);
88 return &si->cluster_info[offset / SWAPFILE_CLUSTER];
89 }
90
__swap_entry_to_cluster(swp_entry_t entry)91 static inline struct swap_cluster_info *__swap_entry_to_cluster(swp_entry_t entry)
92 {
93 return __swap_offset_to_cluster(__swap_entry_to_info(entry),
94 swp_offset(entry));
95 }
96
__swap_cluster_lock(struct swap_info_struct * si,unsigned long offset,bool irq)97 static __always_inline struct swap_cluster_info *__swap_cluster_lock(
98 struct swap_info_struct *si, unsigned long offset, bool irq)
99 {
100 struct swap_cluster_info *ci = __swap_offset_to_cluster(si, offset);
101
102 /*
103 * Nothing modifies swap cache in an IRQ context. All access to
104 * swap cache is wrapped by swap_cache_* helpers, and swap cache
105 * writeback is handled outside of IRQs. Swapin or swapout never
106 * occurs in IRQ, and neither does in-place split or replace.
107 *
108 * Besides, modifying swap cache requires synchronization with
109 * swap_map, which was never IRQ safe.
110 */
111 VM_WARN_ON_ONCE(!in_task());
112 VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */
113 if (irq)
114 spin_lock_irq(&ci->lock);
115 else
116 spin_lock(&ci->lock);
117 return ci;
118 }
119
120 /**
121 * swap_cluster_lock - Lock and return the swap cluster of given offset.
122 * @si: swap device the cluster belongs to.
123 * @offset: the swap entry offset, pointing to a valid slot.
124 *
125 * Context: The caller must ensure the offset is in the valid range and
126 * protect the swap device with reference count or locks.
127 */
swap_cluster_lock(struct swap_info_struct * si,unsigned long offset)128 static inline struct swap_cluster_info *swap_cluster_lock(
129 struct swap_info_struct *si, unsigned long offset)
130 {
131 return __swap_cluster_lock(si, offset, false);
132 }
133
__swap_cluster_get_and_lock(const struct folio * folio,bool irq)134 static inline struct swap_cluster_info *__swap_cluster_get_and_lock(
135 const struct folio *folio, bool irq)
136 {
137 VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio);
138 VM_WARN_ON_ONCE_FOLIO(!folio_test_swapcache(folio), folio);
139 return __swap_cluster_lock(__swap_entry_to_info(folio->swap),
140 swp_offset(folio->swap), irq);
141 }
142
143 /*
144 * swap_cluster_get_and_lock - Locks the cluster that holds a folio's entries.
145 * @folio: The folio.
146 *
147 * This locks and returns the swap cluster that contains a folio's swap
148 * entries. The swap entries of a folio are always in one single cluster.
149 * The folio has to be locked so its swap entries won't change and the
150 * cluster won't be freed.
151 *
152 * Context: Caller must ensure the folio is locked and in the swap cache.
153 * Return: Pointer to the swap cluster.
154 */
swap_cluster_get_and_lock(const struct folio * folio)155 static inline struct swap_cluster_info *swap_cluster_get_and_lock(
156 const struct folio *folio)
157 {
158 return __swap_cluster_get_and_lock(folio, false);
159 }
160
161 /*
162 * swap_cluster_get_and_lock_irq - Locks the cluster that holds a folio's entries.
163 * @folio: The folio.
164 *
165 * Same as swap_cluster_get_and_lock but also disable IRQ.
166 *
167 * Context: Caller must ensure the folio is locked and in the swap cache.
168 * Return: Pointer to the swap cluster.
169 */
swap_cluster_get_and_lock_irq(const struct folio * folio)170 static inline struct swap_cluster_info *swap_cluster_get_and_lock_irq(
171 const struct folio *folio)
172 {
173 return __swap_cluster_get_and_lock(folio, true);
174 }
175
swap_cluster_unlock(struct swap_cluster_info * ci)176 static inline void swap_cluster_unlock(struct swap_cluster_info *ci)
177 {
178 spin_unlock(&ci->lock);
179 }
180
swap_cluster_unlock_irq(struct swap_cluster_info * ci)181 static inline void swap_cluster_unlock_irq(struct swap_cluster_info *ci)
182 {
183 spin_unlock_irq(&ci->lock);
184 }
185
186 /*
187 * Below are the core routines for doing swap for a folio.
188 * All helpers requires the folio to be locked, and a locked folio
189 * in the swap cache pins the swap entries / slots allocated to the
190 * folio, swap relies heavily on the swap cache and folio lock for
191 * synchronization.
192 *
193 * folio_alloc_swap(): the entry point for a folio to be swapped
194 * out. It allocates swap slots and pins the slots with swap cache.
195 * The slots start with a swap count of zero.
196 *
197 * folio_dup_swap(): increases the swap count of a folio, usually
198 * during it gets unmapped and a swap entry is installed to replace
199 * it (e.g., swap entry in page table). A swap slot with swap
200 * count == 0 should only be increasd by this helper.
201 *
202 * folio_put_swap(): does the opposite thing of folio_dup_swap().
203 */
204 int folio_alloc_swap(struct folio *folio);
205 int folio_dup_swap(struct folio *folio, struct page *subpage);
206 void folio_put_swap(struct folio *folio, struct page *subpage);
207
208 /* For internal use */
209 extern void swap_entries_free(struct swap_info_struct *si,
210 struct swap_cluster_info *ci,
211 unsigned long offset, unsigned int nr_pages);
212
213 /* linux/mm/page_io.c */
214 int sio_pool_init(void);
215 struct swap_iocb;
216 void swap_read_folio(struct folio *folio, struct swap_iocb **plug);
217 void __swap_read_unplug(struct swap_iocb *plug);
swap_read_unplug(struct swap_iocb * plug)218 static inline void swap_read_unplug(struct swap_iocb *plug)
219 {
220 if (unlikely(plug))
221 __swap_read_unplug(plug);
222 }
223 void swap_write_unplug(struct swap_iocb *sio);
224 int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug);
225 void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug);
226
227 /* linux/mm/swap_state.c */
228 extern struct address_space swap_space __read_mostly;
swap_address_space(swp_entry_t entry)229 static inline struct address_space *swap_address_space(swp_entry_t entry)
230 {
231 return &swap_space;
232 }
233
234 /*
235 * Return the swap device position of the swap entry.
236 */
swap_dev_pos(swp_entry_t entry)237 static inline loff_t swap_dev_pos(swp_entry_t entry)
238 {
239 return ((loff_t)swp_offset(entry)) << PAGE_SHIFT;
240 }
241
242 /**
243 * folio_matches_swap_entry - Check if a folio matches a given swap entry.
244 * @folio: The folio.
245 * @entry: The swap entry to check against.
246 *
247 * Context: The caller should have the folio locked to ensure it's stable
248 * and nothing will move it in or out of the swap cache.
249 * Return: true or false.
250 */
folio_matches_swap_entry(const struct folio * folio,swp_entry_t entry)251 static inline bool folio_matches_swap_entry(const struct folio *folio,
252 swp_entry_t entry)
253 {
254 swp_entry_t folio_entry = folio->swap;
255 long nr_pages = folio_nr_pages(folio);
256
257 VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio);
258 if (!folio_test_swapcache(folio))
259 return false;
260 VM_WARN_ON_ONCE_FOLIO(!IS_ALIGNED(folio_entry.val, nr_pages), folio);
261 return folio_entry.val == round_down(entry.val, nr_pages);
262 }
263
264 /*
265 * All swap cache helpers below require the caller to ensure the swap entries
266 * used are valid and stabilize the device by any of the following ways:
267 * - Hold a reference by get_swap_device(): this ensures a single entry is
268 * valid and increases the swap device's refcount.
269 * - Locking a folio in the swap cache: this ensures the folio's swap entries
270 * are valid and pinned, also implies reference to the device.
271 * - Locking anything referencing the swap entry: e.g. PTL that protects
272 * swap entries in the page table, similar to locking swap cache folio.
273 * - See the comment of get_swap_device() for more complex usage.
274 */
275 bool swap_cache_has_folio(swp_entry_t entry);
276 struct folio *swap_cache_get_folio(swp_entry_t entry);
277 void *swap_cache_get_shadow(swp_entry_t entry);
278 void swap_cache_del_folio(struct folio *folio);
279 struct folio *swap_cache_alloc_folio(swp_entry_t entry, gfp_t gfp_flags,
280 struct mempolicy *mpol, pgoff_t ilx,
281 bool *alloced);
282 /* Below helpers require the caller to lock and pass in the swap cluster. */
283 void __swap_cache_add_folio(struct swap_cluster_info *ci,
284 struct folio *folio, swp_entry_t entry);
285 void __swap_cache_del_folio(struct swap_cluster_info *ci,
286 struct folio *folio, swp_entry_t entry, void *shadow);
287 void __swap_cache_replace_folio(struct swap_cluster_info *ci,
288 struct folio *old, struct folio *new);
289 void __swap_cache_clear_shadow(swp_entry_t entry, int nr_ents);
290
291 void show_swap_cache_info(void);
292 void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry, int nr);
293 struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
294 struct vm_area_struct *vma, unsigned long addr,
295 struct swap_iocb **plug);
296 struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t flag,
297 struct mempolicy *mpol, pgoff_t ilx);
298 struct folio *swapin_readahead(swp_entry_t entry, gfp_t flag,
299 struct vm_fault *vmf);
300 struct folio *swapin_folio(swp_entry_t entry, struct folio *folio);
301 void swap_update_readahead(struct folio *folio, struct vm_area_struct *vma,
302 unsigned long addr);
303
folio_swap_flags(struct folio * folio)304 static inline unsigned int folio_swap_flags(struct folio *folio)
305 {
306 return __swap_entry_to_info(folio->swap)->flags;
307 }
308
309 /*
310 * Return the count of contiguous swap entries that share the same
311 * zeromap status as the starting entry. If is_zeromap is not NULL,
312 * it will return the zeromap status of the starting entry.
313 */
swap_zeromap_batch(swp_entry_t entry,int max_nr,bool * is_zeromap)314 static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr,
315 bool *is_zeromap)
316 {
317 struct swap_info_struct *sis = __swap_entry_to_info(entry);
318 unsigned long start = swp_offset(entry);
319 unsigned long end = start + max_nr;
320 bool first_bit;
321
322 first_bit = test_bit(start, sis->zeromap);
323 if (is_zeromap)
324 *is_zeromap = first_bit;
325
326 if (max_nr <= 1)
327 return max_nr;
328 if (first_bit)
329 return find_next_zero_bit(sis->zeromap, end, start) - start;
330 else
331 return find_next_bit(sis->zeromap, end, start) - start;
332 }
333
non_swapcache_batch(swp_entry_t entry,int max_nr)334 static inline int non_swapcache_batch(swp_entry_t entry, int max_nr)
335 {
336 int i;
337
338 /*
339 * While allocating a large folio and doing mTHP swapin, we need to
340 * ensure all entries are not cached, otherwise, the mTHP folio will
341 * be in conflict with the folio in swap cache.
342 */
343 for (i = 0; i < max_nr; i++) {
344 if (swap_cache_has_folio(entry))
345 return i;
346 entry.val++;
347 }
348
349 return i;
350 }
351
352 #else /* CONFIG_SWAP */
353 struct swap_iocb;
swap_cluster_lock(struct swap_info_struct * si,pgoff_t offset,bool irq)354 static inline struct swap_cluster_info *swap_cluster_lock(
355 struct swap_info_struct *si, pgoff_t offset, bool irq)
356 {
357 return NULL;
358 }
359
swap_cluster_get_and_lock(struct folio * folio)360 static inline struct swap_cluster_info *swap_cluster_get_and_lock(
361 struct folio *folio)
362 {
363 return NULL;
364 }
365
swap_cluster_get_and_lock_irq(struct folio * folio)366 static inline struct swap_cluster_info *swap_cluster_get_and_lock_irq(
367 struct folio *folio)
368 {
369 return NULL;
370 }
371
swap_cluster_unlock(struct swap_cluster_info * ci)372 static inline void swap_cluster_unlock(struct swap_cluster_info *ci)
373 {
374 }
375
swap_cluster_unlock_irq(struct swap_cluster_info * ci)376 static inline void swap_cluster_unlock_irq(struct swap_cluster_info *ci)
377 {
378 }
379
__swap_entry_to_info(swp_entry_t entry)380 static inline struct swap_info_struct *__swap_entry_to_info(swp_entry_t entry)
381 {
382 return NULL;
383 }
384
folio_alloc_swap(struct folio * folio)385 static inline int folio_alloc_swap(struct folio *folio)
386 {
387 return -EINVAL;
388 }
389
folio_dup_swap(struct folio * folio,struct page * page)390 static inline int folio_dup_swap(struct folio *folio, struct page *page)
391 {
392 return -EINVAL;
393 }
394
folio_put_swap(struct folio * folio,struct page * page)395 static inline void folio_put_swap(struct folio *folio, struct page *page)
396 {
397 }
398
swap_read_folio(struct folio * folio,struct swap_iocb ** plug)399 static inline void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
400 {
401 }
402
swap_write_unplug(struct swap_iocb * sio)403 static inline void swap_write_unplug(struct swap_iocb *sio)
404 {
405 }
406
swap_address_space(swp_entry_t entry)407 static inline struct address_space *swap_address_space(swp_entry_t entry)
408 {
409 return NULL;
410 }
411
folio_matches_swap_entry(const struct folio * folio,swp_entry_t entry)412 static inline bool folio_matches_swap_entry(const struct folio *folio, swp_entry_t entry)
413 {
414 return false;
415 }
416
show_swap_cache_info(void)417 static inline void show_swap_cache_info(void)
418 {
419 }
420
swap_cluster_readahead(swp_entry_t entry,gfp_t gfp_mask,struct mempolicy * mpol,pgoff_t ilx)421 static inline struct folio *swap_cluster_readahead(swp_entry_t entry,
422 gfp_t gfp_mask, struct mempolicy *mpol, pgoff_t ilx)
423 {
424 return NULL;
425 }
426
swapin_readahead(swp_entry_t swp,gfp_t gfp_mask,struct vm_fault * vmf)427 static inline struct folio *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
428 struct vm_fault *vmf)
429 {
430 return NULL;
431 }
432
swapin_folio(swp_entry_t entry,struct folio * folio)433 static inline struct folio *swapin_folio(swp_entry_t entry, struct folio *folio)
434 {
435 return NULL;
436 }
437
swap_update_readahead(struct folio * folio,struct vm_area_struct * vma,unsigned long addr)438 static inline void swap_update_readahead(struct folio *folio,
439 struct vm_area_struct *vma, unsigned long addr)
440 {
441 }
442
swap_writeout(struct folio * folio,struct swap_iocb ** swap_plug)443 static inline int swap_writeout(struct folio *folio,
444 struct swap_iocb **swap_plug)
445 {
446 return 0;
447 }
448
swap_cache_has_folio(swp_entry_t entry)449 static inline bool swap_cache_has_folio(swp_entry_t entry)
450 {
451 return false;
452 }
453
swap_cache_get_folio(swp_entry_t entry)454 static inline struct folio *swap_cache_get_folio(swp_entry_t entry)
455 {
456 return NULL;
457 }
458
swap_cache_get_shadow(swp_entry_t entry)459 static inline void *swap_cache_get_shadow(swp_entry_t entry)
460 {
461 return NULL;
462 }
463
swap_cache_del_folio(struct folio * folio)464 static inline void swap_cache_del_folio(struct folio *folio)
465 {
466 }
467
__swap_cache_del_folio(struct swap_cluster_info * ci,struct folio * folio,swp_entry_t entry,void * shadow)468 static inline void __swap_cache_del_folio(struct swap_cluster_info *ci,
469 struct folio *folio, swp_entry_t entry, void *shadow)
470 {
471 }
472
__swap_cache_replace_folio(struct swap_cluster_info * ci,struct folio * old,struct folio * new)473 static inline void __swap_cache_replace_folio(struct swap_cluster_info *ci,
474 struct folio *old, struct folio *new)
475 {
476 }
477
folio_swap_flags(struct folio * folio)478 static inline unsigned int folio_swap_flags(struct folio *folio)
479 {
480 return 0;
481 }
482
swap_zeromap_batch(swp_entry_t entry,int max_nr,bool * has_zeromap)483 static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr,
484 bool *has_zeromap)
485 {
486 return 0;
487 }
488
non_swapcache_batch(swp_entry_t entry,int max_nr)489 static inline int non_swapcache_batch(swp_entry_t entry, int max_nr)
490 {
491 return 0;
492 }
493 #endif /* CONFIG_SWAP */
494 #endif /* _MM_SWAP_H */
495