1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include "bcachefs.h"
4 #include "alloc_background.h"
5 #include "backpointers.h"
6 #include "btree_gc.h"
7 #include "btree_node_scan.h"
8 #include "disk_accounting.h"
9 #include "ec.h"
10 #include "fsck.h"
11 #include "inode.h"
12 #include "journal.h"
13 #include "lru.h"
14 #include "logged_ops.h"
15 #include "movinggc.h"
16 #include "rebalance.h"
17 #include "recovery.h"
18 #include "recovery_passes.h"
19 #include "snapshot.h"
20 #include "subvolume.h"
21 #include "super.h"
22 #include "super-io.h"
23 
24 const char * const bch2_recovery_passes[] = {
25 #define x(_fn, ...)	#_fn,
26 	BCH_RECOVERY_PASSES()
27 #undef x
28 	NULL
29 };
30 
31 /* Fake recovery pass, so that scan_for_btree_nodes isn't 0: */
bch2_recovery_pass_empty(struct bch_fs * c)32 static int bch2_recovery_pass_empty(struct bch_fs *c)
33 {
34 	return 0;
35 }
36 
bch2_set_may_go_rw(struct bch_fs * c)37 static int bch2_set_may_go_rw(struct bch_fs *c)
38 {
39 	struct journal_keys *keys = &c->journal_keys;
40 
41 	/*
42 	 * After we go RW, the journal keys buffer can't be modified (except for
43 	 * setting journal_key->overwritten: it will be accessed by multiple
44 	 * threads
45 	 */
46 	move_gap(keys, keys->nr);
47 
48 	set_bit(BCH_FS_may_go_rw, &c->flags);
49 
50 	if (keys->nr || !c->opts.read_only || c->opts.fsck || !c->sb.clean || c->opts.recovery_passes)
51 		return bch2_fs_read_write_early(c);
52 	return 0;
53 }
54 
55 struct recovery_pass_fn {
56 	int		(*fn)(struct bch_fs *);
57 	unsigned	when;
58 };
59 
60 static struct recovery_pass_fn recovery_pass_fns[] = {
61 #define x(_fn, _id, _when)	{ .fn = bch2_##_fn, .when = _when },
62 	BCH_RECOVERY_PASSES()
63 #undef x
64 };
65 
66 static const u8 passes_to_stable_map[] = {
67 #define x(n, id, ...)	[BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n,
68 	BCH_RECOVERY_PASSES()
69 #undef x
70 };
71 
bch2_recovery_pass_to_stable(enum bch_recovery_pass pass)72 static enum bch_recovery_pass_stable bch2_recovery_pass_to_stable(enum bch_recovery_pass pass)
73 {
74 	return passes_to_stable_map[pass];
75 }
76 
bch2_recovery_passes_to_stable(u64 v)77 u64 bch2_recovery_passes_to_stable(u64 v)
78 {
79 	u64 ret = 0;
80 	for (unsigned i = 0; i < ARRAY_SIZE(passes_to_stable_map); i++)
81 		if (v & BIT_ULL(i))
82 			ret |= BIT_ULL(passes_to_stable_map[i]);
83 	return ret;
84 }
85 
bch2_recovery_passes_from_stable(u64 v)86 u64 bch2_recovery_passes_from_stable(u64 v)
87 {
88 	static const u8 map[] = {
89 #define x(n, id, ...)	[BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n,
90 	BCH_RECOVERY_PASSES()
91 #undef x
92 	};
93 
94 	u64 ret = 0;
95 	for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
96 		if (v & BIT_ULL(i))
97 			ret |= BIT_ULL(map[i]);
98 	return ret;
99 }
100 
101 /*
102  * For when we need to rewind recovery passes and run a pass we skipped:
103  */
__bch2_run_explicit_recovery_pass(struct bch_fs * c,enum bch_recovery_pass pass)104 static int __bch2_run_explicit_recovery_pass(struct bch_fs *c,
105 					     enum bch_recovery_pass pass)
106 {
107 	if (c->curr_recovery_pass == ARRAY_SIZE(recovery_pass_fns))
108 		return -BCH_ERR_not_in_recovery;
109 
110 	if (c->recovery_passes_complete & BIT_ULL(pass))
111 		return 0;
112 
113 	bool print = !(c->opts.recovery_passes & BIT_ULL(pass));
114 
115 	if (pass < BCH_RECOVERY_PASS_set_may_go_rw &&
116 	    c->curr_recovery_pass >= BCH_RECOVERY_PASS_set_may_go_rw) {
117 		if (print)
118 			bch_info(c, "need recovery pass %s (%u), but already rw",
119 				 bch2_recovery_passes[pass], pass);
120 		return -BCH_ERR_cannot_rewind_recovery;
121 	}
122 
123 	if (print)
124 		bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)",
125 			 bch2_recovery_passes[pass], pass,
126 			 bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass);
127 
128 	c->opts.recovery_passes |= BIT_ULL(pass);
129 
130 	if (c->curr_recovery_pass > pass) {
131 		c->next_recovery_pass = pass;
132 		c->recovery_passes_complete &= (1ULL << pass) >> 1;
133 		return -BCH_ERR_restart_recovery;
134 	} else {
135 		return 0;
136 	}
137 }
138 
bch2_run_explicit_recovery_pass(struct bch_fs * c,enum bch_recovery_pass pass)139 int bch2_run_explicit_recovery_pass(struct bch_fs *c,
140 				    enum bch_recovery_pass pass)
141 {
142 	unsigned long flags;
143 	spin_lock_irqsave(&c->recovery_pass_lock, flags);
144 	int ret = __bch2_run_explicit_recovery_pass(c, pass);
145 	spin_unlock_irqrestore(&c->recovery_pass_lock, flags);
146 	return ret;
147 }
148 
bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs * c,enum bch_recovery_pass pass)149 int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *c,
150 					       enum bch_recovery_pass pass)
151 {
152 	lockdep_assert_held(&c->sb_lock);
153 
154 	struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
155 	__set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required);
156 
157 	return bch2_run_explicit_recovery_pass(c, pass);
158 }
159 
bch2_run_explicit_recovery_pass_persistent(struct bch_fs * c,enum bch_recovery_pass pass)160 int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c,
161 					       enum bch_recovery_pass pass)
162 {
163 	enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass);
164 
165 	mutex_lock(&c->sb_lock);
166 	struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
167 
168 	if (!test_bit_le64(s, ext->recovery_passes_required)) {
169 		__set_bit_le64(s, ext->recovery_passes_required);
170 		bch2_write_super(c);
171 	}
172 	mutex_unlock(&c->sb_lock);
173 
174 	return bch2_run_explicit_recovery_pass(c, pass);
175 }
176 
bch2_clear_recovery_pass_required(struct bch_fs * c,enum bch_recovery_pass pass)177 static void bch2_clear_recovery_pass_required(struct bch_fs *c,
178 					      enum bch_recovery_pass pass)
179 {
180 	enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass);
181 
182 	mutex_lock(&c->sb_lock);
183 	struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
184 
185 	if (test_bit_le64(s, ext->recovery_passes_required)) {
186 		__clear_bit_le64(s, ext->recovery_passes_required);
187 		bch2_write_super(c);
188 	}
189 	mutex_unlock(&c->sb_lock);
190 }
191 
bch2_fsck_recovery_passes(void)192 u64 bch2_fsck_recovery_passes(void)
193 {
194 	u64 ret = 0;
195 
196 	for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++)
197 		if (recovery_pass_fns[i].when & PASS_FSCK)
198 			ret |= BIT_ULL(i);
199 	return ret;
200 }
201 
should_run_recovery_pass(struct bch_fs * c,enum bch_recovery_pass pass)202 static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
203 {
204 	struct recovery_pass_fn *p = recovery_pass_fns + pass;
205 
206 	if (c->opts.recovery_passes_exclude & BIT_ULL(pass))
207 		return false;
208 	if (c->opts.recovery_passes & BIT_ULL(pass))
209 		return true;
210 	if ((p->when & PASS_FSCK) && c->opts.fsck)
211 		return true;
212 	if ((p->when & PASS_UNCLEAN) && !c->sb.clean)
213 		return true;
214 	if (p->when & PASS_ALWAYS)
215 		return true;
216 	return false;
217 }
218 
bch2_run_recovery_pass(struct bch_fs * c,enum bch_recovery_pass pass)219 static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
220 {
221 	struct recovery_pass_fn *p = recovery_pass_fns + pass;
222 	int ret;
223 
224 	if (!(p->when & PASS_SILENT))
225 		bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."),
226 			   bch2_recovery_passes[pass]);
227 	ret = p->fn(c);
228 	if (ret)
229 		return ret;
230 	if (!(p->when & PASS_SILENT))
231 		bch2_print(c, KERN_CONT " done\n");
232 
233 	return 0;
234 }
235 
bch2_run_online_recovery_passes(struct bch_fs * c)236 int bch2_run_online_recovery_passes(struct bch_fs *c)
237 {
238 	for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) {
239 		struct recovery_pass_fn *p = recovery_pass_fns + i;
240 
241 		if (!(p->when & PASS_ONLINE))
242 			continue;
243 
244 		int ret = bch2_run_recovery_pass(c, i);
245 		if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) {
246 			i = c->curr_recovery_pass;
247 			continue;
248 		}
249 		if (ret)
250 			return ret;
251 	}
252 
253 	return 0;
254 }
255 
bch2_run_recovery_passes(struct bch_fs * c)256 int bch2_run_recovery_passes(struct bch_fs *c)
257 {
258 	int ret = 0;
259 
260 	/*
261 	 * We can't allow set_may_go_rw to be excluded; that would cause us to
262 	 * use the journal replay keys for updates where it's not expected.
263 	 */
264 	c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw;
265 
266 	spin_lock_irq(&c->recovery_pass_lock);
267 
268 	while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) {
269 		unsigned prev_done = c->recovery_pass_done;
270 		unsigned pass = c->curr_recovery_pass;
271 
272 		c->next_recovery_pass = pass + 1;
273 
274 		if (c->opts.recovery_pass_last &&
275 		    c->curr_recovery_pass > c->opts.recovery_pass_last)
276 			break;
277 
278 		if (should_run_recovery_pass(c, pass)) {
279 			spin_unlock_irq(&c->recovery_pass_lock);
280 			ret =   bch2_run_recovery_pass(c, pass) ?:
281 				bch2_journal_flush(&c->journal);
282 
283 			if (!ret && !test_bit(BCH_FS_error, &c->flags))
284 				bch2_clear_recovery_pass_required(c, pass);
285 			spin_lock_irq(&c->recovery_pass_lock);
286 
287 			if (c->next_recovery_pass < c->curr_recovery_pass) {
288 				/*
289 				 * bch2_run_explicit_recovery_pass() was called: we
290 				 * can't always catch -BCH_ERR_restart_recovery because
291 				 * it may have been called from another thread (btree
292 				 * node read completion)
293 				 */
294 				ret = 0;
295 				c->recovery_passes_complete &= ~(~0ULL << c->curr_recovery_pass);
296 			} else {
297 				c->recovery_passes_complete |= BIT_ULL(pass);
298 				c->recovery_pass_done = max(c->recovery_pass_done, pass);
299 			}
300 		}
301 
302 		c->curr_recovery_pass = c->next_recovery_pass;
303 
304 		if (prev_done <= BCH_RECOVERY_PASS_check_snapshots &&
305 		    c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots) {
306 			bch2_copygc_wakeup(c);
307 			bch2_rebalance_wakeup(c);
308 		}
309 	}
310 
311 	spin_unlock_irq(&c->recovery_pass_lock);
312 
313 	return ret;
314 }
315