1 /* SPDX-License-Identifier: GPL-2.0 */
2
3 #ifndef BTRFS_SPACE_INFO_H
4 #define BTRFS_SPACE_INFO_H
5
6 #include <trace/events/btrfs.h>
7 #include <linux/spinlock.h>
8 #include <linux/list.h>
9 #include <linux/kobject.h>
10 #include <linux/lockdep.h>
11 #include <linux/wait.h>
12 #include <linux/rwsem.h>
13 #include "volumes.h"
14
15 struct btrfs_fs_info;
16 struct btrfs_block_group;
17
18 /*
19 * Different levels for to flush space when doing space reservations.
20 *
21 * The higher the level, the more methods we try to reclaim space.
22 */
23 enum btrfs_reserve_flush_enum {
24 /*
25 * Used when we can't flush or don't need:
26 *
27 * 1) We are holding a transaction handle open, so we can't flush as
28 * that could deadlock.
29 *
30 * 2) For a nowait write we don't want to block when reserving delalloc.
31 *
32 * 3) Joining a transaction or attaching a transaction, we don't want
33 * to wait and we don't need to reserve anything (any needed space
34 * was reserved before in a dedicated block reserve, or we rely on
35 * the global block reserve, see btrfs_init_root_block_rsv()).
36 *
37 * 4) Starting a transaction when we don't need to reserve space, as
38 * we don't need it because we previously reserved in a dedicated
39 * block reserve or rely on the global block reserve, like the above
40 * case.
41 */
42 BTRFS_RESERVE_NO_FLUSH,
43
44 /*
45 * Flush space by:
46 * - Running delayed inode items
47 * - Allocating a new chunk
48 */
49 BTRFS_RESERVE_FLUSH_LIMIT,
50
51 /*
52 * Flush space by:
53 * - Running delayed inode items
54 * - Running delayed refs
55 * - Running delalloc and waiting for ordered extents
56 * - Allocating a new chunk
57 * - Committing transaction
58 */
59 BTRFS_RESERVE_FLUSH_EVICT,
60
61 /*
62 * Flush space by above mentioned methods and by:
63 * - Running delayed iputs
64 * - Committing transaction
65 *
66 * Can be interrupted by a fatal signal.
67 */
68 BTRFS_RESERVE_FLUSH_DATA,
69 BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE,
70 BTRFS_RESERVE_FLUSH_ALL,
71
72 /*
73 * Pretty much the same as FLUSH_ALL, but can also steal space from
74 * global rsv.
75 *
76 * Can be interrupted by a fatal signal.
77 */
78 BTRFS_RESERVE_FLUSH_ALL_STEAL,
79
80 /*
81 * This is for btrfs_use_block_rsv only. We have exhausted our block
82 * rsv and our global block rsv. This can happen for things like
83 * delalloc where we are overwriting a lot of extents with a single
84 * extent and didn't reserve enough space. Alternatively it can happen
85 * with delalloc where we reserve 1 extents worth for a large extent but
86 * fragmentation leads to multiple extents being created. This will
87 * give us the reservation in the case of
88 *
89 * if (num_bytes < (space_info->total_bytes -
90 * btrfs_space_info_used(space_info, false))
91 *
92 * Which ignores bytes_may_use. This is potentially dangerous, but our
93 * reservation system is generally pessimistic so is able to absorb this
94 * style of mistake.
95 */
96 BTRFS_RESERVE_FLUSH_EMERGENCY,
97 };
98
99 /*
100 * Please be aware that the order of enum values will be the order of the reclaim
101 * process in btrfs_async_reclaim_metadata_space().
102 */
103 enum btrfs_flush_state {
104 FLUSH_DELAYED_ITEMS_NR = 1,
105 FLUSH_DELAYED_ITEMS = 2,
106 FLUSH_DELAYED_REFS_NR = 3,
107 FLUSH_DELAYED_REFS = 4,
108 FLUSH_DELALLOC = 5,
109 FLUSH_DELALLOC_WAIT = 6,
110 FLUSH_DELALLOC_FULL = 7,
111 ALLOC_CHUNK = 8,
112 ALLOC_CHUNK_FORCE = 9,
113 RUN_DELAYED_IPUTS = 10,
114 COMMIT_TRANS = 11,
115 RESET_ZONES = 12,
116 RECLAIM_ZONES = 13,
117 };
118
119 enum btrfs_space_info_sub_group {
120 BTRFS_SUB_GROUP_PRIMARY,
121 BTRFS_SUB_GROUP_DATA_RELOC,
122 BTRFS_SUB_GROUP_TREELOG,
123 };
124
125 #define BTRFS_SPACE_INFO_SUB_GROUP_MAX 1
126 struct btrfs_space_info {
127 struct btrfs_fs_info *fs_info;
128 struct btrfs_space_info *parent;
129 struct btrfs_space_info *sub_group[BTRFS_SPACE_INFO_SUB_GROUP_MAX];
130 int subgroup_id;
131 spinlock_t lock;
132
133 u64 total_bytes; /* total bytes in the space,
134 this doesn't take mirrors into account */
135 u64 bytes_used; /* total bytes used,
136 this doesn't take mirrors into account */
137 u64 bytes_pinned; /* total bytes pinned, will be freed when the
138 transaction finishes */
139 u64 bytes_reserved; /* total bytes the allocator has reserved for
140 current allocations */
141 u64 bytes_may_use; /* number of bytes that may be used for
142 delalloc/allocations */
143 u64 bytes_readonly; /* total bytes that are read only */
144 u64 bytes_zone_unusable; /* total bytes that are unusable until
145 resetting the device zone */
146
147 u64 max_extent_size; /* This will hold the maximum extent size of
148 the space info if we had an ENOSPC in the
149 allocator. */
150 /* Chunk size in bytes */
151 u64 chunk_size;
152
153 /*
154 * Once a block group drops below this threshold (percents) we'll
155 * schedule it for reclaim.
156 */
157 int bg_reclaim_threshold;
158
159 int clamp; /* Used to scale our threshold for preemptive
160 flushing. The value is >> clamp, so turns
161 out to be a 2^clamp divisor. */
162
163 bool full; /* indicates that we cannot allocate any more
164 chunks for this space */
165 bool chunk_alloc; /* set if we are allocating a chunk */
166
167 bool flush; /* set if we are trying to make space */
168
169 unsigned int force_alloc; /* set if we need to force a chunk
170 alloc for this space */
171
172 u64 disk_used; /* total bytes used on disk */
173 u64 disk_total; /* total bytes on disk, takes mirrors into
174 account */
175
176 u64 flags;
177
178 struct list_head list;
179 /* Protected by the spinlock 'lock'. */
180 struct list_head ro_bgs;
181 struct list_head priority_tickets;
182 struct list_head tickets;
183
184 /*
185 * Size of space that needs to be reclaimed in order to satisfy pending
186 * tickets
187 */
188 u64 reclaim_size;
189
190 /*
191 * tickets_id just indicates the next ticket will be handled, so note
192 * it's not stored per ticket.
193 */
194 u64 tickets_id;
195
196 struct rw_semaphore groups_sem;
197 /* for block groups in our same type */
198 struct list_head block_groups[BTRFS_NR_RAID_TYPES];
199
200 struct kobject kobj;
201 struct kobject *block_group_kobjs[BTRFS_NR_RAID_TYPES];
202
203 /*
204 * Monotonically increasing counter of block group reclaim attempts
205 * Exposed in /sys/fs/<uuid>/allocation/<type>/reclaim_count
206 */
207 u64 reclaim_count;
208
209 /*
210 * Monotonically increasing counter of reclaimed bytes
211 * Exposed in /sys/fs/<uuid>/allocation/<type>/reclaim_bytes
212 */
213 u64 reclaim_bytes;
214
215 /*
216 * Monotonically increasing counter of reclaim errors
217 * Exposed in /sys/fs/<uuid>/allocation/<type>/reclaim_errors
218 */
219 u64 reclaim_errors;
220
221 /*
222 * If true, use the dynamic relocation threshold, instead of the
223 * fixed bg_reclaim_threshold.
224 */
225 bool dynamic_reclaim;
226
227 /*
228 * Periodically check all block groups against the reclaim
229 * threshold in the cleaner thread.
230 */
231 bool periodic_reclaim;
232
233 /*
234 * Periodic reclaim should be a no-op if a space_info hasn't
235 * freed any space since the last time we tried.
236 */
237 bool periodic_reclaim_ready;
238
239 /*
240 * Net bytes freed or allocated since the last reclaim pass.
241 */
242 s64 reclaimable_bytes;
243 };
244
btrfs_mixed_space_info(const struct btrfs_space_info * space_info)245 static inline bool btrfs_mixed_space_info(const struct btrfs_space_info *space_info)
246 {
247 return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
248 (space_info->flags & BTRFS_BLOCK_GROUP_DATA));
249 }
250
251 /*
252 *
253 * Declare a helper function to detect underflow of various space info members
254 */
255 #define DECLARE_SPACE_INFO_UPDATE(name, trace_name) \
256 static inline void \
257 btrfs_space_info_update_##name(struct btrfs_space_info *sinfo, \
258 s64 bytes) \
259 { \
260 struct btrfs_fs_info *fs_info = sinfo->fs_info; \
261 const u64 abs_bytes = (bytes < 0) ? -bytes : bytes; \
262 lockdep_assert_held(&sinfo->lock); \
263 trace_update_##name(fs_info, sinfo, sinfo->name, bytes); \
264 trace_btrfs_space_reservation(fs_info, trace_name, \
265 sinfo->flags, abs_bytes, \
266 bytes > 0); \
267 if (bytes < 0 && sinfo->name < -bytes) { \
268 WARN_ON(1); \
269 sinfo->name = 0; \
270 return; \
271 } \
272 sinfo->name += bytes; \
273 }
274
275 DECLARE_SPACE_INFO_UPDATE(bytes_may_use, "space_info");
276 DECLARE_SPACE_INFO_UPDATE(bytes_pinned, "pinned");
277 DECLARE_SPACE_INFO_UPDATE(bytes_zone_unusable, "zone_unusable");
278
btrfs_space_info_used(const struct btrfs_space_info * s_info,bool may_use_included)279 static inline u64 btrfs_space_info_used(const struct btrfs_space_info *s_info,
280 bool may_use_included)
281 {
282 lockdep_assert_held(&s_info->lock);
283
284 return s_info->bytes_used + s_info->bytes_reserved +
285 s_info->bytes_pinned + s_info->bytes_readonly +
286 s_info->bytes_zone_unusable +
287 (may_use_included ? s_info->bytes_may_use : 0);
288 }
289
290 int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
291 void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
292 struct btrfs_block_group *block_group);
293 void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info,
294 u64 chunk_size);
295 struct btrfs_space_info *btrfs_find_space_info(const struct btrfs_fs_info *info,
296 u64 flags);
297 void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
298 void btrfs_dump_space_info(struct btrfs_space_info *info, u64 bytes,
299 bool dump_block_groups);
300 int btrfs_reserve_metadata_bytes(struct btrfs_space_info *space_info,
301 u64 orig_bytes,
302 enum btrfs_reserve_flush_enum flush);
303 void btrfs_try_granting_tickets(struct btrfs_space_info *space_info);
304 bool btrfs_can_overcommit(const struct btrfs_space_info *space_info, u64 bytes,
305 enum btrfs_reserve_flush_enum flush);
306
btrfs_space_info_free_bytes_may_use(struct btrfs_space_info * space_info,u64 num_bytes)307 static inline void btrfs_space_info_free_bytes_may_use(
308 struct btrfs_space_info *space_info,
309 u64 num_bytes)
310 {
311 spin_lock(&space_info->lock);
312 btrfs_space_info_update_bytes_may_use(space_info, -num_bytes);
313 btrfs_try_granting_tickets(space_info);
314 spin_unlock(&space_info->lock);
315 }
316 int btrfs_reserve_data_bytes(struct btrfs_space_info *space_info, u64 bytes,
317 enum btrfs_reserve_flush_enum flush);
318 void btrfs_dump_space_info_for_trans_abort(struct btrfs_fs_info *fs_info);
319 void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info);
320 u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
321
322 void btrfs_space_info_update_reclaimable(struct btrfs_space_info *space_info, s64 bytes);
323 void btrfs_set_periodic_reclaim_ready(struct btrfs_space_info *space_info, bool ready);
324 int btrfs_calc_reclaim_threshold(const struct btrfs_space_info *space_info);
325 void btrfs_reclaim_sweep(const struct btrfs_fs_info *fs_info);
326 void btrfs_return_free_space(struct btrfs_space_info *space_info, u64 len);
327
btrfs_space_info_type_str(const struct btrfs_space_info * space_info)328 static inline const char *btrfs_space_info_type_str(const struct btrfs_space_info *space_info)
329 {
330 switch (space_info->flags) {
331 case BTRFS_BLOCK_GROUP_SYSTEM:
332 return "SYSTEM";
333 case BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA:
334 return "DATA+METADATA";
335 case BTRFS_BLOCK_GROUP_DATA:
336 return "DATA";
337 case BTRFS_BLOCK_GROUP_METADATA:
338 return "METADATA";
339 default:
340 return "UNKNOWN";
341 }
342 }
343
344 #endif /* BTRFS_SPACE_INFO_H */
345