1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2023-2025 Christoph Hellwig.
4 * Copyright (c) 2024-2025, Western Digital Corporation or its affiliates.
5 */
6 #include "xfs.h"
7 #include "xfs_shared.h"
8 #include "xfs_format.h"
9 #include "xfs_trans_resv.h"
10 #include "xfs_mount.h"
11 #include "xfs_inode.h"
12 #include "xfs_rtbitmap.h"
13 #include "xfs_zone_alloc.h"
14 #include "xfs_zone_priv.h"
15 #include "xfs_zones.h"
16
17 /*
18 * Note: the zoned allocator does not support a rtextsize > 1, so this code and
19 * the allocator itself uses file system blocks interchangeable with realtime
20 * extents without doing the otherwise required conversions.
21 */
22
23 /*
24 * Per-task space reservation.
25 *
26 * Tasks that need to wait for GC to free up space allocate one of these
27 * on-stack and adds it to the per-mount zi_reclaim_reservations lists.
28 * The GC thread will then wake the tasks in order when space becomes available.
29 */
30 struct xfs_zone_reservation {
31 struct list_head entry;
32 struct task_struct *task;
33 xfs_filblks_t count_fsb;
34 };
35
36 /*
37 * Calculate the number of reserved blocks.
38 *
39 * XC_FREE_RTEXTENTS counts the user available capacity, to which the file
40 * system can be filled, while XC_FREE_RTAVAILABLE counts the blocks instantly
41 * available for writes without waiting for GC.
42 *
43 * For XC_FREE_RTAVAILABLE only the smaller reservation required for GC and
44 * block zeroing is excluded from the user capacity, while XC_FREE_RTEXTENTS
45 * is further restricted by at least one zone as well as the optional
46 * persistently reserved blocks. This allows the allocator to run more
47 * smoothly by not always triggering GC.
48 */
49 uint64_t
xfs_zoned_default_resblks(struct xfs_mount * mp,enum xfs_free_counter ctr)50 xfs_zoned_default_resblks(
51 struct xfs_mount *mp,
52 enum xfs_free_counter ctr)
53 {
54 switch (ctr) {
55 case XC_FREE_RTEXTENTS:
56 return (uint64_t)XFS_RESERVED_ZONES *
57 mp->m_groups[XG_TYPE_RTG].blocks +
58 mp->m_sb.sb_rtreserved;
59 case XC_FREE_RTAVAILABLE:
60 return (uint64_t)XFS_GC_ZONES *
61 mp->m_groups[XG_TYPE_RTG].blocks;
62 default:
63 ASSERT(0);
64 return 0;
65 }
66 }
67
68 void
xfs_zoned_resv_wake_all(struct xfs_mount * mp)69 xfs_zoned_resv_wake_all(
70 struct xfs_mount *mp)
71 {
72 struct xfs_zone_info *zi = mp->m_zone_info;
73 struct xfs_zone_reservation *reservation;
74
75 spin_lock(&zi->zi_reservation_lock);
76 list_for_each_entry(reservation, &zi->zi_reclaim_reservations, entry)
77 wake_up_process(reservation->task);
78 spin_unlock(&zi->zi_reservation_lock);
79 }
80
81 void
xfs_zoned_add_available(struct xfs_mount * mp,xfs_filblks_t count_fsb)82 xfs_zoned_add_available(
83 struct xfs_mount *mp,
84 xfs_filblks_t count_fsb)
85 {
86 struct xfs_zone_info *zi = mp->m_zone_info;
87 struct xfs_zone_reservation *reservation;
88
89 if (list_empty_careful(&zi->zi_reclaim_reservations)) {
90 xfs_add_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb);
91 return;
92 }
93
94 spin_lock(&zi->zi_reservation_lock);
95 xfs_add_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb);
96 count_fsb = xfs_sum_freecounter(mp, XC_FREE_RTAVAILABLE);
97 list_for_each_entry(reservation, &zi->zi_reclaim_reservations, entry) {
98 if (reservation->count_fsb > count_fsb)
99 break;
100 wake_up_process(reservation->task);
101 count_fsb -= reservation->count_fsb;
102
103 }
104 spin_unlock(&zi->zi_reservation_lock);
105 }
106
107 static int
xfs_zoned_space_wait_error(struct xfs_mount * mp)108 xfs_zoned_space_wait_error(
109 struct xfs_mount *mp)
110 {
111 if (xfs_is_shutdown(mp))
112 return -EIO;
113 if (fatal_signal_pending(current))
114 return -EINTR;
115 return 0;
116 }
117
118 static int
xfs_zoned_reserve_available(struct xfs_mount * mp,xfs_filblks_t count_fsb,unsigned int flags)119 xfs_zoned_reserve_available(
120 struct xfs_mount *mp,
121 xfs_filblks_t count_fsb,
122 unsigned int flags)
123 {
124 struct xfs_zone_info *zi = mp->m_zone_info;
125 struct xfs_zone_reservation reservation = {
126 .task = current,
127 .count_fsb = count_fsb,
128 };
129 int error;
130
131 /*
132 * If there are no waiters, try to directly grab the available blocks
133 * from the percpu counter.
134 *
135 * If the caller wants to dip into the reserved pool also bypass the
136 * wait list. This relies on the fact that we have a very graciously
137 * sized reserved pool that always has enough space. If the reserved
138 * allocations fail we're in trouble.
139 */
140 if (likely(list_empty_careful(&zi->zi_reclaim_reservations) ||
141 (flags & XFS_ZR_RESERVED))) {
142 error = xfs_dec_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb,
143 flags & XFS_ZR_RESERVED);
144 if (error != -ENOSPC)
145 return error;
146 }
147
148 if (flags & XFS_ZR_NOWAIT)
149 return -EAGAIN;
150
151 spin_lock(&zi->zi_reservation_lock);
152 list_add_tail(&reservation.entry, &zi->zi_reclaim_reservations);
153 while ((error = xfs_zoned_space_wait_error(mp)) == 0) {
154 set_current_state(TASK_KILLABLE);
155
156 error = xfs_dec_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb,
157 flags & XFS_ZR_RESERVED);
158 if (error != -ENOSPC)
159 break;
160
161 /*
162 * Make sure to start GC if it is not running already. As we
163 * check the rtavailable count when filling up zones, GC is
164 * normally already running at this point, but in some setups
165 * with very few zones we may completely run out of non-
166 * reserved blocks in between filling zones.
167 */
168 if (!xfs_is_zonegc_running(mp))
169 wake_up_process(zi->zi_gc_thread);
170
171 /*
172 * If there is no reclaimable group left and we aren't still
173 * processing a pending GC request give up as we're fully out
174 * of space.
175 */
176 if (!xfs_group_marked(mp, XG_TYPE_RTG, XFS_RTG_RECLAIMABLE) &&
177 !xfs_is_zonegc_running(mp))
178 break;
179
180 spin_unlock(&zi->zi_reservation_lock);
181 schedule();
182 spin_lock(&zi->zi_reservation_lock);
183 }
184 list_del(&reservation.entry);
185 spin_unlock(&zi->zi_reservation_lock);
186
187 __set_current_state(TASK_RUNNING);
188 return error;
189 }
190
191 /*
192 * Implement greedy space allocation for short writes by trying to grab all
193 * that is left after locking out other threads from trying to do the same.
194 *
195 * This isn't exactly optimal and can hopefully be replaced by a proper
196 * percpu_counter primitive one day.
197 */
198 static int
xfs_zoned_reserve_extents_greedy(struct xfs_mount * mp,xfs_filblks_t * count_fsb,unsigned int flags)199 xfs_zoned_reserve_extents_greedy(
200 struct xfs_mount *mp,
201 xfs_filblks_t *count_fsb,
202 unsigned int flags)
203 {
204 struct xfs_zone_info *zi = mp->m_zone_info;
205 s64 len = *count_fsb;
206 int error = -ENOSPC;
207
208 spin_lock(&zi->zi_reservation_lock);
209 len = min(len, xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS));
210 if (len > 0) {
211 *count_fsb = len;
212 error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, *count_fsb,
213 flags & XFS_ZR_RESERVED);
214 }
215 spin_unlock(&zi->zi_reservation_lock);
216 return error;
217 }
218
219 int
xfs_zoned_space_reserve(struct xfs_mount * mp,xfs_filblks_t count_fsb,unsigned int flags,struct xfs_zone_alloc_ctx * ac)220 xfs_zoned_space_reserve(
221 struct xfs_mount *mp,
222 xfs_filblks_t count_fsb,
223 unsigned int flags,
224 struct xfs_zone_alloc_ctx *ac)
225 {
226 int error;
227
228 ASSERT(ac->reserved_blocks == 0);
229 ASSERT(ac->open_zone == NULL);
230
231 error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb,
232 flags & XFS_ZR_RESERVED);
233 if (error == -ENOSPC && (flags & XFS_ZR_GREEDY) && count_fsb > 1)
234 error = xfs_zoned_reserve_extents_greedy(mp, &count_fsb, flags);
235 if (error)
236 return error;
237
238 error = xfs_zoned_reserve_available(mp, count_fsb, flags);
239 if (error) {
240 xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb);
241 return error;
242 }
243 ac->reserved_blocks = count_fsb;
244 return 0;
245 }
246
247 void
xfs_zoned_space_unreserve(struct xfs_mount * mp,struct xfs_zone_alloc_ctx * ac)248 xfs_zoned_space_unreserve(
249 struct xfs_mount *mp,
250 struct xfs_zone_alloc_ctx *ac)
251 {
252 if (ac->reserved_blocks > 0) {
253 xfs_zoned_add_available(mp, ac->reserved_blocks);
254 xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, ac->reserved_blocks);
255 }
256 if (ac->open_zone)
257 xfs_open_zone_put(ac->open_zone);
258 }
259