1 // SPDX-License-Identifier: GPL-2.0
2
3 #include "bcachefs.h"
4 #include "checksum.h"
5 #include "disk_groups.h"
6 #include "ec.h"
7 #include "error.h"
8 #include "journal.h"
9 #include "journal_sb.h"
10 #include "journal_seq_blacklist.h"
11 #include "recovery_passes.h"
12 #include "replicas.h"
13 #include "quota.h"
14 #include "sb-clean.h"
15 #include "sb-counters.h"
16 #include "sb-downgrade.h"
17 #include "sb-errors.h"
18 #include "sb-members.h"
19 #include "super-io.h"
20 #include "super.h"
21 #include "trace.h"
22 #include "vstructs.h"
23
24 #include <linux/backing-dev.h>
25 #include <linux/sort.h>
26 #include <linux/string_choices.h>
27
28 struct bch2_metadata_version {
29 u16 version;
30 const char *name;
31 };
32
33 static const struct bch2_metadata_version bch2_metadata_versions[] = {
34 #define x(n, v) { \
35 .version = v, \
36 .name = #n, \
37 },
38 BCH_METADATA_VERSIONS()
39 #undef x
40 };
41
bch2_version_to_text(struct printbuf * out,enum bcachefs_metadata_version v)42 void bch2_version_to_text(struct printbuf *out, enum bcachefs_metadata_version v)
43 {
44 const char *str = "(unknown version)";
45
46 for (unsigned i = 0; i < ARRAY_SIZE(bch2_metadata_versions); i++)
47 if (bch2_metadata_versions[i].version == v) {
48 str = bch2_metadata_versions[i].name;
49 break;
50 }
51
52 prt_printf(out, "%u.%u: %s", BCH_VERSION_MAJOR(v), BCH_VERSION_MINOR(v), str);
53 }
54
bch2_latest_compatible_version(enum bcachefs_metadata_version v)55 enum bcachefs_metadata_version bch2_latest_compatible_version(enum bcachefs_metadata_version v)
56 {
57 if (!BCH_VERSION_MAJOR(v))
58 return v;
59
60 for (unsigned i = 0; i < ARRAY_SIZE(bch2_metadata_versions); i++)
61 if (bch2_metadata_versions[i].version > v &&
62 BCH_VERSION_MAJOR(bch2_metadata_versions[i].version) ==
63 BCH_VERSION_MAJOR(v))
64 v = bch2_metadata_versions[i].version;
65
66 return v;
67 }
68
bch2_set_version_incompat(struct bch_fs * c,enum bcachefs_metadata_version version)69 int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version version)
70 {
71 int ret = ((c->sb.features & BIT_ULL(BCH_FEATURE_incompat_version_field)) &&
72 version <= c->sb.version_incompat_allowed)
73 ? 0
74 : -BCH_ERR_may_not_use_incompat_feature;
75
76 mutex_lock(&c->sb_lock);
77 if (!ret) {
78 SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb,
79 max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version));
80 bch2_write_super(c);
81 } else {
82 darray_for_each(c->incompat_versions_requested, i)
83 if (version == *i)
84 goto out;
85
86 darray_push(&c->incompat_versions_requested, version);
87 struct printbuf buf = PRINTBUF;
88 prt_str(&buf, "requested incompat feature ");
89 bch2_version_to_text(&buf, version);
90 prt_str(&buf, " currently not enabled");
91 prt_printf(&buf, "\n set version_upgrade=incompat to enable");
92
93 bch_notice(c, "%s", buf.buf);
94 printbuf_exit(&buf);
95 }
96
97 out:
98 mutex_unlock(&c->sb_lock);
99
100 return ret;
101 }
102
103 const char * const bch2_sb_fields[] = {
104 #define x(name, nr) #name,
105 BCH_SB_FIELDS()
106 #undef x
107 NULL
108 };
109
110 static int bch2_sb_field_validate(struct bch_sb *, struct bch_sb_field *,
111 enum bch_validate_flags, struct printbuf *);
112
bch2_sb_field_get_id(struct bch_sb * sb,enum bch_sb_field_type type)113 struct bch_sb_field *bch2_sb_field_get_id(struct bch_sb *sb,
114 enum bch_sb_field_type type)
115 {
116 /* XXX: need locking around superblock to access optional fields */
117
118 vstruct_for_each(sb, f)
119 if (le32_to_cpu(f->type) == type)
120 return f;
121 return NULL;
122 }
123
__bch2_sb_field_resize(struct bch_sb_handle * sb,struct bch_sb_field * f,unsigned u64s)124 static struct bch_sb_field *__bch2_sb_field_resize(struct bch_sb_handle *sb,
125 struct bch_sb_field *f,
126 unsigned u64s)
127 {
128 unsigned old_u64s = f ? le32_to_cpu(f->u64s) : 0;
129 unsigned sb_u64s = le32_to_cpu(sb->sb->u64s) + u64s - old_u64s;
130
131 BUG_ON(__vstruct_bytes(struct bch_sb, sb_u64s) > sb->buffer_size);
132
133 if (!f && !u64s) {
134 /* nothing to do: */
135 } else if (!f) {
136 f = vstruct_last(sb->sb);
137 memset(f, 0, sizeof(u64) * u64s);
138 f->u64s = cpu_to_le32(u64s);
139 f->type = 0;
140 } else {
141 void *src, *dst;
142
143 src = vstruct_end(f);
144
145 if (u64s) {
146 f->u64s = cpu_to_le32(u64s);
147 dst = vstruct_end(f);
148 } else {
149 dst = f;
150 }
151
152 memmove(dst, src, vstruct_end(sb->sb) - src);
153
154 if (dst > src)
155 memset(src, 0, dst - src);
156 }
157
158 sb->sb->u64s = cpu_to_le32(sb_u64s);
159
160 return u64s ? f : NULL;
161 }
162
bch2_sb_field_delete(struct bch_sb_handle * sb,enum bch_sb_field_type type)163 void bch2_sb_field_delete(struct bch_sb_handle *sb,
164 enum bch_sb_field_type type)
165 {
166 struct bch_sb_field *f = bch2_sb_field_get_id(sb->sb, type);
167
168 if (f)
169 __bch2_sb_field_resize(sb, f, 0);
170 }
171
172 /* Superblock realloc/free: */
173
bch2_free_super(struct bch_sb_handle * sb)174 void bch2_free_super(struct bch_sb_handle *sb)
175 {
176 kfree(sb->bio);
177 if (!IS_ERR_OR_NULL(sb->s_bdev_file))
178 bdev_fput(sb->s_bdev_file);
179 kfree(sb->holder);
180 kfree(sb->sb_name);
181
182 kfree(sb->sb);
183 memset(sb, 0, sizeof(*sb));
184 }
185
bch2_sb_realloc(struct bch_sb_handle * sb,unsigned u64s)186 int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s)
187 {
188 size_t new_bytes = __vstruct_bytes(struct bch_sb, u64s);
189 size_t new_buffer_size;
190 struct bch_sb *new_sb;
191 struct bio *bio;
192
193 if (sb->bdev)
194 new_bytes = max_t(size_t, new_bytes, bdev_logical_block_size(sb->bdev));
195
196 new_buffer_size = roundup_pow_of_two(new_bytes);
197
198 if (sb->sb && sb->buffer_size >= new_buffer_size)
199 return 0;
200
201 if (sb->sb && sb->have_layout) {
202 u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits;
203
204 if (new_bytes > max_bytes) {
205 struct printbuf buf = PRINTBUF;
206
207 prt_bdevname(&buf, sb->bdev);
208 prt_printf(&buf, ": superblock too big: want %zu but have %llu", new_bytes, max_bytes);
209 pr_err("%s", buf.buf);
210 printbuf_exit(&buf);
211 return -BCH_ERR_ENOSPC_sb;
212 }
213 }
214
215 if (sb->buffer_size >= new_buffer_size && sb->sb)
216 return 0;
217
218 if (dynamic_fault("bcachefs:add:super_realloc"))
219 return -BCH_ERR_ENOMEM_sb_realloc_injected;
220
221 new_sb = krealloc(sb->sb, new_buffer_size, GFP_NOFS|__GFP_ZERO);
222 if (!new_sb)
223 return -BCH_ERR_ENOMEM_sb_buf_realloc;
224
225 sb->sb = new_sb;
226
227 if (sb->have_bio) {
228 unsigned nr_bvecs = buf_pages(sb->sb, new_buffer_size);
229
230 bio = bio_kmalloc(nr_bvecs, GFP_KERNEL);
231 if (!bio)
232 return -BCH_ERR_ENOMEM_sb_bio_realloc;
233
234 bio_init(bio, NULL, bio->bi_inline_vecs, nr_bvecs, 0);
235
236 kfree(sb->bio);
237 sb->bio = bio;
238 }
239
240 sb->buffer_size = new_buffer_size;
241
242 return 0;
243 }
244
bch2_sb_field_resize_id(struct bch_sb_handle * sb,enum bch_sb_field_type type,unsigned u64s)245 struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb,
246 enum bch_sb_field_type type,
247 unsigned u64s)
248 {
249 struct bch_sb_field *f = bch2_sb_field_get_id(sb->sb, type);
250 ssize_t old_u64s = f ? le32_to_cpu(f->u64s) : 0;
251 ssize_t d = -old_u64s + u64s;
252
253 if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d))
254 return NULL;
255
256 if (sb->fs_sb) {
257 struct bch_fs *c = container_of(sb, struct bch_fs, disk_sb);
258
259 lockdep_assert_held(&c->sb_lock);
260
261 /* XXX: we're not checking that offline device have enough space */
262
263 for_each_online_member(c, ca) {
264 struct bch_sb_handle *dev_sb = &ca->disk_sb;
265
266 if (bch2_sb_realloc(dev_sb, le32_to_cpu(dev_sb->sb->u64s) + d)) {
267 percpu_ref_put(&ca->io_ref[READ]);
268 return NULL;
269 }
270 }
271 }
272
273 f = bch2_sb_field_get_id(sb->sb, type);
274 f = __bch2_sb_field_resize(sb, f, u64s);
275 if (f)
276 f->type = cpu_to_le32(type);
277 return f;
278 }
279
bch2_sb_field_get_minsize_id(struct bch_sb_handle * sb,enum bch_sb_field_type type,unsigned u64s)280 struct bch_sb_field *bch2_sb_field_get_minsize_id(struct bch_sb_handle *sb,
281 enum bch_sb_field_type type,
282 unsigned u64s)
283 {
284 struct bch_sb_field *f = bch2_sb_field_get_id(sb->sb, type);
285
286 if (!f || le32_to_cpu(f->u64s) < u64s)
287 f = bch2_sb_field_resize_id(sb, type, u64s);
288 return f;
289 }
290
291 /* Superblock validate: */
292
validate_sb_layout(struct bch_sb_layout * layout,struct printbuf * out)293 static int validate_sb_layout(struct bch_sb_layout *layout, struct printbuf *out)
294 {
295 u64 offset, prev_offset, max_sectors;
296 unsigned i;
297
298 BUILD_BUG_ON(sizeof(struct bch_sb_layout) != 512);
299
300 if (!uuid_equal(&layout->magic, &BCACHE_MAGIC) &&
301 !uuid_equal(&layout->magic, &BCHFS_MAGIC)) {
302 prt_printf(out, "Not a bcachefs superblock layout");
303 return -BCH_ERR_invalid_sb_layout;
304 }
305
306 if (layout->layout_type != 0) {
307 prt_printf(out, "Invalid superblock layout type %u",
308 layout->layout_type);
309 return -BCH_ERR_invalid_sb_layout_type;
310 }
311
312 if (!layout->nr_superblocks) {
313 prt_printf(out, "Invalid superblock layout: no superblocks");
314 return -BCH_ERR_invalid_sb_layout_nr_superblocks;
315 }
316
317 if (layout->nr_superblocks > ARRAY_SIZE(layout->sb_offset)) {
318 prt_printf(out, "Invalid superblock layout: too many superblocks");
319 return -BCH_ERR_invalid_sb_layout_nr_superblocks;
320 }
321
322 if (layout->sb_max_size_bits > BCH_SB_LAYOUT_SIZE_BITS_MAX) {
323 prt_printf(out, "Invalid superblock layout: max_size_bits too high");
324 return -BCH_ERR_invalid_sb_layout_sb_max_size_bits;
325 }
326
327 max_sectors = 1 << layout->sb_max_size_bits;
328
329 prev_offset = le64_to_cpu(layout->sb_offset[0]);
330
331 for (i = 1; i < layout->nr_superblocks; i++) {
332 offset = le64_to_cpu(layout->sb_offset[i]);
333
334 if (offset < prev_offset + max_sectors) {
335 prt_printf(out, "Invalid superblock layout: superblocks overlap\n"
336 " (sb %u ends at %llu next starts at %llu",
337 i - 1, prev_offset + max_sectors, offset);
338 return -BCH_ERR_invalid_sb_layout_superblocks_overlap;
339 }
340 prev_offset = offset;
341 }
342
343 return 0;
344 }
345
bch2_sb_compatible(struct bch_sb * sb,struct printbuf * out)346 static int bch2_sb_compatible(struct bch_sb *sb, struct printbuf *out)
347 {
348 u16 version = le16_to_cpu(sb->version);
349 u16 version_min = le16_to_cpu(sb->version_min);
350
351 if (!bch2_version_compatible(version)) {
352 prt_str(out, "Unsupported superblock version ");
353 bch2_version_to_text(out, version);
354 prt_str(out, " (min ");
355 bch2_version_to_text(out, bcachefs_metadata_version_min);
356 prt_str(out, ", max ");
357 bch2_version_to_text(out, bcachefs_metadata_version_current);
358 prt_str(out, ")");
359 return -BCH_ERR_invalid_sb_version;
360 }
361
362 if (!bch2_version_compatible(version_min)) {
363 prt_str(out, "Unsupported superblock version_min ");
364 bch2_version_to_text(out, version_min);
365 prt_str(out, " (min ");
366 bch2_version_to_text(out, bcachefs_metadata_version_min);
367 prt_str(out, ", max ");
368 bch2_version_to_text(out, bcachefs_metadata_version_current);
369 prt_str(out, ")");
370 return -BCH_ERR_invalid_sb_version;
371 }
372
373 if (version_min > version) {
374 prt_str(out, "Bad minimum version ");
375 bch2_version_to_text(out, version_min);
376 prt_str(out, ", greater than version field ");
377 bch2_version_to_text(out, version);
378 return -BCH_ERR_invalid_sb_version;
379 }
380
381 return 0;
382 }
383
bch2_sb_validate(struct bch_sb * sb,u64 read_offset,enum bch_validate_flags flags,struct printbuf * out)384 int bch2_sb_validate(struct bch_sb *sb, u64 read_offset,
385 enum bch_validate_flags flags, struct printbuf *out)
386 {
387 struct bch_sb_field_members_v1 *mi;
388 enum bch_opt_id opt_id;
389 int ret;
390
391 ret = bch2_sb_compatible(sb, out);
392 if (ret)
393 return ret;
394
395 u64 incompat = le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR);
396 unsigned incompat_bit = 0;
397 if (incompat)
398 incompat_bit = __ffs64(incompat);
399 else if (sb->features[1])
400 incompat_bit = 64 + __ffs64(le64_to_cpu(sb->features[1]));
401
402 if (incompat_bit) {
403 prt_printf(out, "Filesystem has incompatible feature bit %u, highest supported %s (%u)",
404 incompat_bit,
405 bch2_sb_features[BCH_FEATURE_NR - 1],
406 BCH_FEATURE_NR - 1);
407 return -BCH_ERR_invalid_sb_features;
408 }
409
410 if (BCH_VERSION_MAJOR(le16_to_cpu(sb->version)) > BCH_VERSION_MAJOR(bcachefs_metadata_version_current) ||
411 BCH_SB_VERSION_INCOMPAT(sb) > bcachefs_metadata_version_current) {
412 prt_str(out, "Filesystem has incompatible version ");
413 bch2_version_to_text(out, le16_to_cpu(sb->version));
414 prt_str(out, ", current version ");
415 bch2_version_to_text(out, bcachefs_metadata_version_current);
416 return -BCH_ERR_invalid_sb_features;
417 }
418
419 if (bch2_is_zero(sb->user_uuid.b, sizeof(sb->user_uuid))) {
420 prt_printf(out, "Bad user UUID (got zeroes)");
421 return -BCH_ERR_invalid_sb_uuid;
422 }
423
424 if (bch2_is_zero(sb->uuid.b, sizeof(sb->uuid))) {
425 prt_printf(out, "Bad internal UUID (got zeroes)");
426 return -BCH_ERR_invalid_sb_uuid;
427 }
428
429 if (!(flags & BCH_VALIDATE_write) &&
430 le64_to_cpu(sb->offset) != read_offset) {
431 prt_printf(out, "Bad sb offset (got %llu, read from %llu)",
432 le64_to_cpu(sb->offset), read_offset);
433 return -BCH_ERR_invalid_sb_offset;
434 }
435
436 if (!sb->nr_devices ||
437 sb->nr_devices > BCH_SB_MEMBERS_MAX) {
438 prt_printf(out, "Bad number of member devices %u (max %u)",
439 sb->nr_devices, BCH_SB_MEMBERS_MAX);
440 return -BCH_ERR_invalid_sb_too_many_members;
441 }
442
443 if (sb->dev_idx >= sb->nr_devices) {
444 prt_printf(out, "Bad dev_idx (got %u, nr_devices %u)",
445 sb->dev_idx, sb->nr_devices);
446 return -BCH_ERR_invalid_sb_dev_idx;
447 }
448
449 if (!sb->time_precision ||
450 le32_to_cpu(sb->time_precision) > NSEC_PER_SEC) {
451 prt_printf(out, "Invalid time precision: %u (min 1, max %lu)",
452 le32_to_cpu(sb->time_precision), NSEC_PER_SEC);
453 return -BCH_ERR_invalid_sb_time_precision;
454 }
455
456 /* old versions didn't know to downgrade this field */
457 if (BCH_SB_VERSION_INCOMPAT_ALLOWED(sb) > le16_to_cpu(sb->version))
458 SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(sb, le16_to_cpu(sb->version));
459
460 if (BCH_SB_VERSION_INCOMPAT(sb) > BCH_SB_VERSION_INCOMPAT_ALLOWED(sb)) {
461 prt_printf(out, "Invalid version_incompat ");
462 bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT(sb));
463 prt_str(out, " > incompat_allowed ");
464 bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT_ALLOWED(sb));
465 if (flags & BCH_VALIDATE_write)
466 return -BCH_ERR_invalid_sb_version;
467 else
468 SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(sb, BCH_SB_VERSION_INCOMPAT(sb));
469 }
470
471 if (!flags) {
472 /*
473 * Been seeing a bug where these are getting inexplicably
474 * zeroed, so we're now validating them, but we have to be
475 * careful not to preven people's filesystems from mounting:
476 */
477 if (!BCH_SB_JOURNAL_FLUSH_DELAY(sb))
478 SET_BCH_SB_JOURNAL_FLUSH_DELAY(sb, 1000);
479 if (!BCH_SB_JOURNAL_RECLAIM_DELAY(sb))
480 SET_BCH_SB_JOURNAL_RECLAIM_DELAY(sb, 1000);
481
482 if (!BCH_SB_VERSION_UPGRADE_COMPLETE(sb))
483 SET_BCH_SB_VERSION_UPGRADE_COMPLETE(sb, le16_to_cpu(sb->version));
484
485 if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_disk_accounting_v2 &&
486 !BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb))
487 SET_BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb, 30);
488
489 if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_disk_accounting_v2)
490 SET_BCH_SB_PROMOTE_WHOLE_EXTENTS(sb, true);
491
492 if (!BCH_SB_WRITE_ERROR_TIMEOUT(sb))
493 SET_BCH_SB_WRITE_ERROR_TIMEOUT(sb, 30);
494
495 if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_extent_flags &&
496 !BCH_SB_CSUM_ERR_RETRY_NR(sb))
497 SET_BCH_SB_CSUM_ERR_RETRY_NR(sb, 3);
498 }
499
500 #ifdef __KERNEL__
501 if (!BCH_SB_SHARD_INUMS_NBITS(sb))
502 SET_BCH_SB_SHARD_INUMS_NBITS(sb, ilog2(roundup_pow_of_two(num_online_cpus())));
503 #endif
504
505 for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) {
506 const struct bch_option *opt = bch2_opt_table + opt_id;
507
508 if (opt->get_sb) {
509 u64 v = bch2_opt_from_sb(sb, opt_id, -1);
510
511 prt_printf(out, "Invalid option ");
512 ret = bch2_opt_validate(opt, v, out);
513 if (ret)
514 return ret;
515
516 printbuf_reset(out);
517 }
518 }
519
520 /* validate layout */
521 ret = validate_sb_layout(&sb->layout, out);
522 if (ret)
523 return ret;
524
525 vstruct_for_each(sb, f) {
526 if (!f->u64s) {
527 prt_printf(out, "Invalid superblock: optional field with size 0 (type %u)",
528 le32_to_cpu(f->type));
529 return -BCH_ERR_invalid_sb_field_size;
530 }
531
532 if (vstruct_next(f) > vstruct_last(sb)) {
533 prt_printf(out, "Invalid superblock: optional field extends past end of superblock (type %u)",
534 le32_to_cpu(f->type));
535 return -BCH_ERR_invalid_sb_field_size;
536 }
537 }
538
539 /* members must be validated first: */
540 mi = bch2_sb_field_get(sb, members_v1);
541 if (!mi) {
542 prt_printf(out, "Invalid superblock: member info area missing");
543 return -BCH_ERR_invalid_sb_members_missing;
544 }
545
546 ret = bch2_sb_field_validate(sb, &mi->field, flags, out);
547 if (ret)
548 return ret;
549
550 vstruct_for_each(sb, f) {
551 if (le32_to_cpu(f->type) == BCH_SB_FIELD_members_v1)
552 continue;
553
554 ret = bch2_sb_field_validate(sb, f, flags, out);
555 if (ret)
556 return ret;
557 }
558
559 if ((flags & BCH_VALIDATE_write) &&
560 bch2_sb_member_get(sb, sb->dev_idx).seq != sb->seq) {
561 prt_printf(out, "Invalid superblock: member seq %llu != sb seq %llu",
562 le64_to_cpu(bch2_sb_member_get(sb, sb->dev_idx).seq),
563 le64_to_cpu(sb->seq));
564 return -BCH_ERR_invalid_sb_members_missing;
565 }
566
567 return 0;
568 }
569
570 /* device open: */
571
le_ulong_to_cpu(unsigned long v)572 static unsigned long le_ulong_to_cpu(unsigned long v)
573 {
574 return sizeof(unsigned long) == 8
575 ? le64_to_cpu(v)
576 : le32_to_cpu(v);
577 }
578
le_bitvector_to_cpu(unsigned long * dst,unsigned long * src,unsigned nr)579 static void le_bitvector_to_cpu(unsigned long *dst, unsigned long *src, unsigned nr)
580 {
581 BUG_ON(nr & (BITS_PER_TYPE(long) - 1));
582
583 for (unsigned i = 0; i < BITS_TO_LONGS(nr); i++)
584 dst[i] = le_ulong_to_cpu(src[i]);
585 }
586
bch2_sb_update(struct bch_fs * c)587 static void bch2_sb_update(struct bch_fs *c)
588 {
589 struct bch_sb *src = c->disk_sb.sb;
590
591 lockdep_assert_held(&c->sb_lock);
592
593 c->sb.uuid = src->uuid;
594 c->sb.user_uuid = src->user_uuid;
595 c->sb.version = le16_to_cpu(src->version);
596 c->sb.version_incompat = BCH_SB_VERSION_INCOMPAT(src);
597 c->sb.version_incompat_allowed
598 = BCH_SB_VERSION_INCOMPAT_ALLOWED(src);
599 c->sb.version_min = le16_to_cpu(src->version_min);
600 c->sb.version_upgrade_complete = BCH_SB_VERSION_UPGRADE_COMPLETE(src);
601 c->sb.nr_devices = src->nr_devices;
602 c->sb.clean = BCH_SB_CLEAN(src);
603 c->sb.encryption_type = BCH_SB_ENCRYPTION_TYPE(src);
604
605 c->sb.nsec_per_time_unit = le32_to_cpu(src->time_precision);
606 c->sb.time_units_per_sec = NSEC_PER_SEC / c->sb.nsec_per_time_unit;
607
608 /* XXX this is wrong, we need a 96 or 128 bit integer type */
609 c->sb.time_base_lo = div_u64(le64_to_cpu(src->time_base_lo),
610 c->sb.nsec_per_time_unit);
611 c->sb.time_base_hi = le32_to_cpu(src->time_base_hi);
612
613 c->sb.features = le64_to_cpu(src->features[0]);
614 c->sb.compat = le64_to_cpu(src->compat[0]);
615
616 memset(c->sb.errors_silent, 0, sizeof(c->sb.errors_silent));
617
618 struct bch_sb_field_ext *ext = bch2_sb_field_get(src, ext);
619 if (ext) {
620 le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent,
621 sizeof(c->sb.errors_silent) * 8);
622 c->sb.btrees_lost_data = le64_to_cpu(ext->btrees_lost_data);
623 }
624
625 for_each_member_device(c, ca) {
626 struct bch_member m = bch2_sb_member_get(src, ca->dev_idx);
627 ca->mi = bch2_mi_to_cpu(&m);
628 }
629 }
630
__copy_super(struct bch_sb_handle * dst_handle,struct bch_sb * src)631 static int __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src)
632 {
633 struct bch_sb_field *src_f, *dst_f;
634 struct bch_sb *dst = dst_handle->sb;
635 unsigned i;
636
637 dst->version = src->version;
638 dst->version_min = src->version_min;
639 dst->seq = src->seq;
640 dst->uuid = src->uuid;
641 dst->user_uuid = src->user_uuid;
642 memcpy(dst->label, src->label, sizeof(dst->label));
643
644 dst->block_size = src->block_size;
645 dst->nr_devices = src->nr_devices;
646
647 dst->time_base_lo = src->time_base_lo;
648 dst->time_base_hi = src->time_base_hi;
649 dst->time_precision = src->time_precision;
650 dst->write_time = src->write_time;
651
652 memcpy(dst->flags, src->flags, sizeof(dst->flags));
653 memcpy(dst->features, src->features, sizeof(dst->features));
654 memcpy(dst->compat, src->compat, sizeof(dst->compat));
655
656 for (i = 0; i < BCH_SB_FIELD_NR; i++) {
657 int d;
658
659 if ((1U << i) & BCH_SINGLE_DEVICE_SB_FIELDS)
660 continue;
661
662 src_f = bch2_sb_field_get_id(src, i);
663 dst_f = bch2_sb_field_get_id(dst, i);
664
665 d = (src_f ? le32_to_cpu(src_f->u64s) : 0) -
666 (dst_f ? le32_to_cpu(dst_f->u64s) : 0);
667 if (d > 0) {
668 int ret = bch2_sb_realloc(dst_handle,
669 le32_to_cpu(dst_handle->sb->u64s) + d);
670
671 if (ret)
672 return ret;
673
674 dst = dst_handle->sb;
675 dst_f = bch2_sb_field_get_id(dst, i);
676 }
677
678 dst_f = __bch2_sb_field_resize(dst_handle, dst_f,
679 src_f ? le32_to_cpu(src_f->u64s) : 0);
680
681 if (src_f)
682 memcpy(dst_f, src_f, vstruct_bytes(src_f));
683 }
684
685 return 0;
686 }
687
bch2_sb_to_fs(struct bch_fs * c,struct bch_sb * src)688 int bch2_sb_to_fs(struct bch_fs *c, struct bch_sb *src)
689 {
690 int ret;
691
692 lockdep_assert_held(&c->sb_lock);
693
694 ret = bch2_sb_realloc(&c->disk_sb, 0) ?:
695 __copy_super(&c->disk_sb, src) ?:
696 bch2_sb_replicas_to_cpu_replicas(c) ?:
697 bch2_sb_disk_groups_to_cpu(c);
698 if (ret)
699 return ret;
700
701 bch2_sb_update(c);
702 return 0;
703 }
704
bch2_sb_from_fs(struct bch_fs * c,struct bch_dev * ca)705 int bch2_sb_from_fs(struct bch_fs *c, struct bch_dev *ca)
706 {
707 return __copy_super(&ca->disk_sb, c->disk_sb.sb);
708 }
709
710 /* read superblock: */
711
read_one_super(struct bch_sb_handle * sb,u64 offset,struct printbuf * err)712 static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf *err)
713 {
714 size_t bytes;
715 int ret;
716 reread:
717 bio_reset(sb->bio, sb->bdev, REQ_OP_READ|REQ_SYNC|REQ_META);
718 sb->bio->bi_iter.bi_sector = offset;
719 bch2_bio_map(sb->bio, sb->sb, sb->buffer_size);
720
721 ret = submit_bio_wait(sb->bio);
722 if (ret) {
723 prt_printf(err, "IO error: %i", ret);
724 return ret;
725 }
726
727 if (!uuid_equal(&sb->sb->magic, &BCACHE_MAGIC) &&
728 !uuid_equal(&sb->sb->magic, &BCHFS_MAGIC)) {
729 prt_str(err, "Not a bcachefs superblock (got magic ");
730 pr_uuid(err, sb->sb->magic.b);
731 prt_str(err, ")");
732 return -BCH_ERR_invalid_sb_magic;
733 }
734
735 ret = bch2_sb_compatible(sb->sb, err);
736 if (ret)
737 return ret;
738
739 bytes = vstruct_bytes(sb->sb);
740
741 u64 sb_size = 512ULL << min(BCH_SB_LAYOUT_SIZE_BITS_MAX, sb->sb->layout.sb_max_size_bits);
742 if (bytes > sb_size) {
743 prt_printf(err, "Invalid superblock: too big (got %zu bytes, layout max %llu)",
744 bytes, sb_size);
745 return -BCH_ERR_invalid_sb_too_big;
746 }
747
748 if (bytes > sb->buffer_size) {
749 ret = bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s));
750 if (ret)
751 return ret;
752 goto reread;
753 }
754
755 enum bch_csum_type csum_type = BCH_SB_CSUM_TYPE(sb->sb);
756 if (csum_type >= BCH_CSUM_NR ||
757 bch2_csum_type_is_encryption(csum_type)) {
758 prt_printf(err, "unknown checksum type %llu", BCH_SB_CSUM_TYPE(sb->sb));
759 return -BCH_ERR_invalid_sb_csum_type;
760 }
761
762 /* XXX: verify MACs */
763 struct bch_csum csum = csum_vstruct(NULL, csum_type, null_nonce(), sb->sb);
764 if (bch2_crc_cmp(csum, sb->sb->csum)) {
765 bch2_csum_err_msg(err, csum_type, sb->sb->csum, csum);
766 return -BCH_ERR_invalid_sb_csum;
767 }
768
769 sb->seq = le64_to_cpu(sb->sb->seq);
770
771 return 0;
772 }
773
__bch2_read_super(const char * path,struct bch_opts * opts,struct bch_sb_handle * sb,bool ignore_notbchfs_msg)774 static int __bch2_read_super(const char *path, struct bch_opts *opts,
775 struct bch_sb_handle *sb, bool ignore_notbchfs_msg)
776 {
777 u64 offset = opt_get(*opts, sb);
778 struct bch_sb_layout layout;
779 struct printbuf err = PRINTBUF;
780 struct printbuf err2 = PRINTBUF;
781 __le64 *i;
782 int ret;
783 #ifndef __KERNEL__
784 retry:
785 #endif
786 memset(sb, 0, sizeof(*sb));
787 sb->mode = BLK_OPEN_READ;
788 sb->have_bio = true;
789 sb->holder = kzalloc(sizeof(*sb->holder), GFP_KERNEL);
790 if (!sb->holder)
791 return -ENOMEM;
792
793 sb->sb_name = kstrdup(path, GFP_KERNEL);
794 if (!sb->sb_name) {
795 ret = -ENOMEM;
796 prt_printf(&err, "error allocating memory for sb_name");
797 goto err;
798 }
799
800 #ifndef __KERNEL__
801 if (opt_get(*opts, direct_io) == false)
802 sb->mode |= BLK_OPEN_BUFFERED;
803 #endif
804
805 if (!opt_get(*opts, noexcl))
806 sb->mode |= BLK_OPEN_EXCL;
807
808 if (!opt_get(*opts, nochanges))
809 sb->mode |= BLK_OPEN_WRITE;
810
811 sb->s_bdev_file = bdev_file_open_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops);
812 if (IS_ERR(sb->s_bdev_file) &&
813 PTR_ERR(sb->s_bdev_file) == -EACCES &&
814 opt_get(*opts, read_only)) {
815 sb->mode &= ~BLK_OPEN_WRITE;
816
817 sb->s_bdev_file = bdev_file_open_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops);
818 if (!IS_ERR(sb->s_bdev_file))
819 opt_set(*opts, nochanges, true);
820 }
821
822 if (IS_ERR(sb->s_bdev_file)) {
823 ret = PTR_ERR(sb->s_bdev_file);
824 prt_printf(&err, "error opening %s: %s", path, bch2_err_str(ret));
825 goto err;
826 }
827 sb->bdev = file_bdev(sb->s_bdev_file);
828
829 ret = bch2_sb_realloc(sb, 0);
830 if (ret) {
831 prt_printf(&err, "error allocating memory for superblock");
832 goto err;
833 }
834
835 if (bch2_fs_init_fault("read_super")) {
836 prt_printf(&err, "dynamic fault");
837 ret = -EFAULT;
838 goto err;
839 }
840
841 ret = read_one_super(sb, offset, &err);
842 if (!ret)
843 goto got_super;
844
845 if (opt_defined(*opts, sb))
846 goto err;
847
848 prt_printf(&err2, "bcachefs (%s): error reading default superblock: %s\n",
849 path, err.buf);
850 if (ret == -BCH_ERR_invalid_sb_magic && ignore_notbchfs_msg)
851 bch2_print_opts(opts, KERN_INFO "%s", err2.buf);
852 else
853 bch2_print_opts(opts, KERN_ERR "%s", err2.buf);
854
855 printbuf_exit(&err2);
856 printbuf_reset(&err);
857
858 /*
859 * Error reading primary superblock - read location of backup
860 * superblocks:
861 */
862 bio_reset(sb->bio, sb->bdev, REQ_OP_READ|REQ_SYNC|REQ_META);
863 sb->bio->bi_iter.bi_sector = BCH_SB_LAYOUT_SECTOR;
864 /*
865 * use sb buffer to read layout, since sb buffer is page aligned but
866 * layout won't be:
867 */
868 bch2_bio_map(sb->bio, sb->sb, sizeof(struct bch_sb_layout));
869
870 ret = submit_bio_wait(sb->bio);
871 if (ret) {
872 prt_printf(&err, "IO error: %i", ret);
873 goto err;
874 }
875
876 memcpy(&layout, sb->sb, sizeof(layout));
877 ret = validate_sb_layout(&layout, &err);
878 if (ret)
879 goto err;
880
881 for (i = layout.sb_offset;
882 i < layout.sb_offset + layout.nr_superblocks; i++) {
883 offset = le64_to_cpu(*i);
884
885 if (offset == opt_get(*opts, sb)) {
886 ret = -BCH_ERR_invalid;
887 continue;
888 }
889
890 ret = read_one_super(sb, offset, &err);
891 if (!ret)
892 goto got_super;
893 }
894
895 goto err;
896
897 got_super:
898 if (le16_to_cpu(sb->sb->block_size) << 9 <
899 bdev_logical_block_size(sb->bdev) &&
900 opt_get(*opts, direct_io)) {
901 #ifndef __KERNEL__
902 opt_set(*opts, direct_io, false);
903 bch2_free_super(sb);
904 goto retry;
905 #endif
906 prt_printf(&err, "block size (%u) smaller than device block size (%u)",
907 le16_to_cpu(sb->sb->block_size) << 9,
908 bdev_logical_block_size(sb->bdev));
909 ret = -BCH_ERR_block_size_too_small;
910 goto err;
911 }
912
913 sb->have_layout = true;
914
915 ret = bch2_sb_validate(sb->sb, offset, 0, &err);
916 if (ret) {
917 bch2_print_opts(opts, KERN_ERR "bcachefs (%s): error validating superblock: %s\n",
918 path, err.buf);
919 goto err_no_print;
920 }
921 out:
922 printbuf_exit(&err);
923 return ret;
924 err:
925 bch2_print_opts(opts, KERN_ERR "bcachefs (%s): error reading superblock: %s\n",
926 path, err.buf);
927 err_no_print:
928 bch2_free_super(sb);
929 goto out;
930 }
931
bch2_read_super(const char * path,struct bch_opts * opts,struct bch_sb_handle * sb)932 int bch2_read_super(const char *path, struct bch_opts *opts,
933 struct bch_sb_handle *sb)
934 {
935 return __bch2_read_super(path, opts, sb, false);
936 }
937
938 /* provide a silenced version for mount.bcachefs */
939
bch2_read_super_silent(const char * path,struct bch_opts * opts,struct bch_sb_handle * sb)940 int bch2_read_super_silent(const char *path, struct bch_opts *opts,
941 struct bch_sb_handle *sb)
942 {
943 return __bch2_read_super(path, opts, sb, true);
944 }
945
946 /* write superblock: */
947
write_super_endio(struct bio * bio)948 static void write_super_endio(struct bio *bio)
949 {
950 struct bch_dev *ca = bio->bi_private;
951
952 bch2_account_io_success_fail(ca, bio_data_dir(bio), !bio->bi_status);
953
954 /* XXX: return errors directly */
955
956 if (bio->bi_status) {
957 bch_err_dev_ratelimited(ca, "superblock %s error: %s",
958 str_write_read(bio_data_dir(bio)),
959 bch2_blk_status_to_str(bio->bi_status));
960 ca->sb_write_error = 1;
961 }
962
963 closure_put(&ca->fs->sb_write);
964 percpu_ref_put(&ca->io_ref[READ]);
965 }
966
read_back_super(struct bch_fs * c,struct bch_dev * ca)967 static void read_back_super(struct bch_fs *c, struct bch_dev *ca)
968 {
969 struct bch_sb *sb = ca->disk_sb.sb;
970 struct bio *bio = ca->disk_sb.bio;
971
972 memset(ca->sb_read_scratch, 0, BCH_SB_READ_SCRATCH_BUF_SIZE);
973
974 bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ|REQ_SYNC|REQ_META);
975 bio->bi_iter.bi_sector = le64_to_cpu(sb->layout.sb_offset[0]);
976 bio->bi_end_io = write_super_endio;
977 bio->bi_private = ca;
978 bch2_bio_map(bio, ca->sb_read_scratch, BCH_SB_READ_SCRATCH_BUF_SIZE);
979
980 this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb], bio_sectors(bio));
981
982 percpu_ref_get(&ca->io_ref[READ]);
983 closure_bio_submit(bio, &c->sb_write);
984 }
985
write_one_super(struct bch_fs * c,struct bch_dev * ca,unsigned idx)986 static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
987 {
988 struct bch_sb *sb = ca->disk_sb.sb;
989 struct bio *bio = ca->disk_sb.bio;
990
991 sb->offset = sb->layout.sb_offset[idx];
992
993 SET_BCH_SB_CSUM_TYPE(sb, bch2_csum_opt_to_type(c->opts.metadata_checksum, false));
994 sb->csum = csum_vstruct(c, BCH_SB_CSUM_TYPE(sb),
995 null_nonce(), sb);
996
997 bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_META);
998 bio->bi_iter.bi_sector = le64_to_cpu(sb->offset);
999 bio->bi_end_io = write_super_endio;
1000 bio->bi_private = ca;
1001 bch2_bio_map(bio, sb,
1002 roundup((size_t) vstruct_bytes(sb),
1003 bdev_logical_block_size(ca->disk_sb.bdev)));
1004
1005 this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_sb],
1006 bio_sectors(bio));
1007
1008 percpu_ref_get(&ca->io_ref[READ]);
1009 closure_bio_submit(bio, &c->sb_write);
1010 }
1011
bch2_write_super(struct bch_fs * c)1012 int bch2_write_super(struct bch_fs *c)
1013 {
1014 struct closure *cl = &c->sb_write;
1015 struct printbuf err = PRINTBUF;
1016 unsigned sb = 0, nr_wrote;
1017 struct bch_devs_mask sb_written;
1018 bool wrote, can_mount_without_written, can_mount_with_written;
1019 unsigned degraded_flags = BCH_FORCE_IF_DEGRADED;
1020 DARRAY(struct bch_dev *) online_devices = {};
1021 int ret = 0;
1022
1023 trace_and_count(c, write_super, c, _RET_IP_);
1024
1025 if (c->opts.very_degraded)
1026 degraded_flags |= BCH_FORCE_IF_LOST;
1027
1028 lockdep_assert_held(&c->sb_lock);
1029
1030 closure_init_stack(cl);
1031 memset(&sb_written, 0, sizeof(sb_written));
1032
1033 /*
1034 * Note: we do writes to RO devices here, and we might want to change
1035 * that in the future.
1036 *
1037 * For now, we expect to be able to call write_super() when we're not
1038 * yet RW:
1039 */
1040 for_each_online_member(c, ca) {
1041 ret = darray_push(&online_devices, ca);
1042 if (bch2_fs_fatal_err_on(ret, c, "%s: error allocating online devices", __func__)) {
1043 percpu_ref_put(&ca->io_ref[READ]);
1044 goto out;
1045 }
1046 percpu_ref_get(&ca->io_ref[READ]);
1047 }
1048
1049 /* Make sure we're using the new magic numbers: */
1050 c->disk_sb.sb->magic = BCHFS_MAGIC;
1051 c->disk_sb.sb->layout.magic = BCHFS_MAGIC;
1052
1053 le64_add_cpu(&c->disk_sb.sb->seq, 1);
1054
1055 struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
1056 darray_for_each(online_devices, ca)
1057 __bch2_members_v2_get_mut(mi, (*ca)->dev_idx)->seq = c->disk_sb.sb->seq;
1058 c->disk_sb.sb->write_time = cpu_to_le64(ktime_get_real_seconds());
1059
1060 if (test_bit(BCH_FS_error, &c->flags))
1061 SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 1);
1062 if (test_bit(BCH_FS_topology_error, &c->flags))
1063 SET_BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb, 1);
1064
1065 SET_BCH_SB_BIG_ENDIAN(c->disk_sb.sb, CPU_BIG_ENDIAN);
1066
1067 bch2_sb_counters_from_cpu(c);
1068 bch2_sb_members_from_cpu(c);
1069 bch2_sb_members_cpy_v2_v1(&c->disk_sb);
1070 bch2_sb_errors_from_cpu(c);
1071 bch2_sb_downgrade_update(c);
1072
1073 darray_for_each(online_devices, ca)
1074 bch2_sb_from_fs(c, (*ca));
1075
1076 darray_for_each(online_devices, ca) {
1077 printbuf_reset(&err);
1078
1079 ret = bch2_sb_validate((*ca)->disk_sb.sb, 0, BCH_VALIDATE_write, &err);
1080 if (ret) {
1081 bch2_fs_inconsistent(c, "sb invalid before write: %s", err.buf);
1082 goto out;
1083 }
1084 }
1085
1086 if (c->opts.nochanges)
1087 goto out;
1088
1089 /*
1090 * Defer writing the superblock until filesystem initialization is
1091 * complete - don't write out a partly initialized superblock:
1092 */
1093 if (!BCH_SB_INITIALIZED(c->disk_sb.sb))
1094 goto out;
1095
1096 if (le16_to_cpu(c->disk_sb.sb->version) > bcachefs_metadata_version_current) {
1097 struct printbuf buf = PRINTBUF;
1098 prt_printf(&buf, "attempting to write superblock that wasn't version downgraded (");
1099 bch2_version_to_text(&buf, le16_to_cpu(c->disk_sb.sb->version));
1100 prt_str(&buf, " > ");
1101 bch2_version_to_text(&buf, bcachefs_metadata_version_current);
1102 prt_str(&buf, ")");
1103 bch2_fs_fatal_error(c, ": %s", buf.buf);
1104 printbuf_exit(&buf);
1105 ret = -BCH_ERR_sb_not_downgraded;
1106 goto out;
1107 }
1108
1109 darray_for_each(online_devices, ca) {
1110 __set_bit((*ca)->dev_idx, sb_written.d);
1111 (*ca)->sb_write_error = 0;
1112 }
1113
1114 darray_for_each(online_devices, ca)
1115 read_back_super(c, *ca);
1116 closure_sync(cl);
1117
1118 darray_for_each(online_devices, cap) {
1119 struct bch_dev *ca = *cap;
1120
1121 if (ca->sb_write_error)
1122 continue;
1123
1124 if (le64_to_cpu(ca->sb_read_scratch->seq) < ca->disk_sb.seq) {
1125 struct printbuf buf = PRINTBUF;
1126 prt_char(&buf, ' ');
1127 prt_bdevname(&buf, ca->disk_sb.bdev);
1128 prt_printf(&buf,
1129 ": Superblock write was silently dropped! (seq %llu expected %llu)",
1130 le64_to_cpu(ca->sb_read_scratch->seq),
1131 ca->disk_sb.seq);
1132
1133 if (c->opts.errors != BCH_ON_ERROR_continue &&
1134 c->opts.errors != BCH_ON_ERROR_fix_safe) {
1135 ret = -BCH_ERR_erofs_sb_err;
1136 bch2_fs_fatal_error(c, "%s", buf.buf);
1137 } else {
1138 bch_err(c, "%s", buf.buf);
1139 }
1140
1141 printbuf_exit(&buf);
1142 }
1143
1144 if (le64_to_cpu(ca->sb_read_scratch->seq) > ca->disk_sb.seq) {
1145 struct printbuf buf = PRINTBUF;
1146 prt_char(&buf, ' ');
1147 prt_bdevname(&buf, ca->disk_sb.bdev);
1148 prt_printf(&buf,
1149 ": Superblock modified by another process (seq %llu expected %llu)",
1150 le64_to_cpu(ca->sb_read_scratch->seq),
1151 ca->disk_sb.seq);
1152 bch2_fs_fatal_error(c, "%s", buf.buf);
1153 printbuf_exit(&buf);
1154 ret = -BCH_ERR_erofs_sb_err;
1155 }
1156 }
1157
1158 if (ret)
1159 goto out;
1160
1161 do {
1162 wrote = false;
1163 darray_for_each(online_devices, cap) {
1164 struct bch_dev *ca = *cap;
1165 if (!ca->sb_write_error &&
1166 sb < ca->disk_sb.sb->layout.nr_superblocks) {
1167 write_one_super(c, ca, sb);
1168 wrote = true;
1169 }
1170 }
1171 closure_sync(cl);
1172 sb++;
1173 } while (wrote);
1174
1175 darray_for_each(online_devices, cap) {
1176 struct bch_dev *ca = *cap;
1177 if (ca->sb_write_error)
1178 __clear_bit(ca->dev_idx, sb_written.d);
1179 else
1180 ca->disk_sb.seq = le64_to_cpu(ca->disk_sb.sb->seq);
1181 }
1182
1183 nr_wrote = dev_mask_nr(&sb_written);
1184
1185 can_mount_with_written =
1186 bch2_have_enough_devs(c, sb_written, degraded_flags, false);
1187
1188 for (unsigned i = 0; i < ARRAY_SIZE(sb_written.d); i++)
1189 sb_written.d[i] = ~sb_written.d[i];
1190
1191 can_mount_without_written =
1192 bch2_have_enough_devs(c, sb_written, degraded_flags, false);
1193
1194 /*
1195 * If we would be able to mount _without_ the devices we successfully
1196 * wrote superblocks to, we weren't able to write to enough devices:
1197 *
1198 * Exception: if we can mount without the successes because we haven't
1199 * written anything (new filesystem), we continue if we'd be able to
1200 * mount with the devices we did successfully write to:
1201 */
1202 if (bch2_fs_fatal_err_on(!nr_wrote ||
1203 !can_mount_with_written ||
1204 (can_mount_without_written &&
1205 !can_mount_with_written), c,
1206 ": Unable to write superblock to sufficient devices (from %ps)",
1207 (void *) _RET_IP_))
1208 ret = -BCH_ERR_erofs_sb_err;
1209 out:
1210 /* Make new options visible after they're persistent: */
1211 bch2_sb_update(c);
1212 darray_for_each(online_devices, ca)
1213 percpu_ref_put(&(*ca)->io_ref[READ]);
1214 darray_exit(&online_devices);
1215 printbuf_exit(&err);
1216 return ret;
1217 }
1218
__bch2_check_set_feature(struct bch_fs * c,unsigned feat)1219 void __bch2_check_set_feature(struct bch_fs *c, unsigned feat)
1220 {
1221 mutex_lock(&c->sb_lock);
1222 if (!(c->sb.features & (1ULL << feat))) {
1223 c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << feat);
1224
1225 bch2_write_super(c);
1226 }
1227 mutex_unlock(&c->sb_lock);
1228 }
1229
1230 /* Downgrade if superblock is at a higher version than currently supported: */
bch2_check_version_downgrade(struct bch_fs * c)1231 bool bch2_check_version_downgrade(struct bch_fs *c)
1232 {
1233 bool ret = bcachefs_metadata_version_current < c->sb.version;
1234
1235 lockdep_assert_held(&c->sb_lock);
1236
1237 /*
1238 * Downgrade, if superblock is at a higher version than currently
1239 * supported:
1240 *
1241 * c->sb will be checked before we write the superblock, so update it as
1242 * well:
1243 */
1244 if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) > bcachefs_metadata_version_current)
1245 SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current);
1246 if (BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb) > bcachefs_metadata_version_current)
1247 SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb, bcachefs_metadata_version_current);
1248 if (c->sb.version > bcachefs_metadata_version_current)
1249 c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current);
1250 if (c->sb.version_min > bcachefs_metadata_version_current)
1251 c->disk_sb.sb->version_min = cpu_to_le16(bcachefs_metadata_version_current);
1252 c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1);
1253 return ret;
1254 }
1255
bch2_sb_upgrade(struct bch_fs * c,unsigned new_version,bool incompat)1256 void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version, bool incompat)
1257 {
1258 lockdep_assert_held(&c->sb_lock);
1259
1260 if (BCH_VERSION_MAJOR(new_version) >
1261 BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version)))
1262 bch2_sb_field_resize(&c->disk_sb, downgrade, 0);
1263
1264 c->disk_sb.sb->version = cpu_to_le16(new_version);
1265
1266 if (incompat) {
1267 c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
1268 SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb,
1269 max(BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb), new_version));
1270 }
1271 }
1272
bch2_sb_ext_validate(struct bch_sb * sb,struct bch_sb_field * f,enum bch_validate_flags flags,struct printbuf * err)1273 static int bch2_sb_ext_validate(struct bch_sb *sb, struct bch_sb_field *f,
1274 enum bch_validate_flags flags, struct printbuf *err)
1275 {
1276 if (vstruct_bytes(f) < 88) {
1277 prt_printf(err, "field too small (%zu < %u)", vstruct_bytes(f), 88);
1278 return -BCH_ERR_invalid_sb_ext;
1279 }
1280
1281 return 0;
1282 }
1283
bch2_sb_ext_to_text(struct printbuf * out,struct bch_sb * sb,struct bch_sb_field * f)1284 static void bch2_sb_ext_to_text(struct printbuf *out, struct bch_sb *sb,
1285 struct bch_sb_field *f)
1286 {
1287 struct bch_sb_field_ext *e = field_to_type(f, ext);
1288
1289 prt_printf(out, "Recovery passes required:\t");
1290 prt_bitflags(out, bch2_recovery_passes,
1291 bch2_recovery_passes_from_stable(le64_to_cpu(e->recovery_passes_required[0])));
1292 prt_newline(out);
1293
1294 unsigned long *errors_silent = kmalloc(sizeof(e->errors_silent), GFP_KERNEL);
1295 if (errors_silent) {
1296 le_bitvector_to_cpu(errors_silent, (void *) e->errors_silent, sizeof(e->errors_silent) * 8);
1297
1298 prt_printf(out, "Errors to silently fix:\t");
1299 prt_bitflags_vector(out, bch2_sb_error_strs, errors_silent,
1300 min(BCH_FSCK_ERR_MAX, sizeof(e->errors_silent) * 8));
1301 prt_newline(out);
1302
1303 kfree(errors_silent);
1304 }
1305
1306 prt_printf(out, "Btrees with missing data:\t");
1307 prt_bitflags(out, __bch2_btree_ids, le64_to_cpu(e->btrees_lost_data));
1308 prt_newline(out);
1309 }
1310
1311 static const struct bch_sb_field_ops bch_sb_field_ops_ext = {
1312 .validate = bch2_sb_ext_validate,
1313 .to_text = bch2_sb_ext_to_text,
1314 };
1315
1316 static const struct bch_sb_field_ops *bch2_sb_field_ops[] = {
1317 #define x(f, nr) \
1318 [BCH_SB_FIELD_##f] = &bch_sb_field_ops_##f,
1319 BCH_SB_FIELDS()
1320 #undef x
1321 };
1322
1323 static const struct bch_sb_field_ops bch2_sb_field_null_ops;
1324
bch2_sb_field_type_ops(unsigned type)1325 static const struct bch_sb_field_ops *bch2_sb_field_type_ops(unsigned type)
1326 {
1327 return likely(type < ARRAY_SIZE(bch2_sb_field_ops))
1328 ? bch2_sb_field_ops[type]
1329 : &bch2_sb_field_null_ops;
1330 }
1331
bch2_sb_field_validate(struct bch_sb * sb,struct bch_sb_field * f,enum bch_validate_flags flags,struct printbuf * err)1332 static int bch2_sb_field_validate(struct bch_sb *sb, struct bch_sb_field *f,
1333 enum bch_validate_flags flags, struct printbuf *err)
1334 {
1335 unsigned type = le32_to_cpu(f->type);
1336 struct printbuf field_err = PRINTBUF;
1337 const struct bch_sb_field_ops *ops = bch2_sb_field_type_ops(type);
1338 int ret;
1339
1340 ret = ops->validate ? ops->validate(sb, f, flags, &field_err) : 0;
1341 if (ret) {
1342 prt_printf(err, "Invalid superblock section %s: %s",
1343 bch2_sb_fields[type], field_err.buf);
1344 prt_newline(err);
1345 bch2_sb_field_to_text(err, sb, f);
1346 }
1347
1348 printbuf_exit(&field_err);
1349 return ret;
1350 }
1351
__bch2_sb_field_to_text(struct printbuf * out,struct bch_sb * sb,struct bch_sb_field * f)1352 void __bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,
1353 struct bch_sb_field *f)
1354 {
1355 unsigned type = le32_to_cpu(f->type);
1356 const struct bch_sb_field_ops *ops = bch2_sb_field_type_ops(type);
1357
1358 if (!out->nr_tabstops)
1359 printbuf_tabstop_push(out, 32);
1360
1361 if (ops->to_text)
1362 ops->to_text(out, sb, f);
1363 }
1364
bch2_sb_field_to_text(struct printbuf * out,struct bch_sb * sb,struct bch_sb_field * f)1365 void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,
1366 struct bch_sb_field *f)
1367 {
1368 unsigned type = le32_to_cpu(f->type);
1369
1370 if (type < BCH_SB_FIELD_NR)
1371 prt_printf(out, "%s", bch2_sb_fields[type]);
1372 else
1373 prt_printf(out, "(unknown field %u)", type);
1374
1375 prt_printf(out, " (size %zu):", vstruct_bytes(f));
1376 prt_newline(out);
1377
1378 __bch2_sb_field_to_text(out, sb, f);
1379 }
1380
bch2_sb_layout_to_text(struct printbuf * out,struct bch_sb_layout * l)1381 void bch2_sb_layout_to_text(struct printbuf *out, struct bch_sb_layout *l)
1382 {
1383 unsigned i;
1384
1385 prt_printf(out, "Type: %u", l->layout_type);
1386 prt_newline(out);
1387
1388 prt_str(out, "Superblock max size: ");
1389 prt_units_u64(out, 512 << l->sb_max_size_bits);
1390 prt_newline(out);
1391
1392 prt_printf(out, "Nr superblocks: %u", l->nr_superblocks);
1393 prt_newline(out);
1394
1395 prt_str(out, "Offsets: ");
1396 for (i = 0; i < l->nr_superblocks; i++) {
1397 if (i)
1398 prt_str(out, ", ");
1399 prt_printf(out, "%llu", le64_to_cpu(l->sb_offset[i]));
1400 }
1401 prt_newline(out);
1402 }
1403
bch2_sb_to_text(struct printbuf * out,struct bch_sb * sb,bool print_layout,unsigned fields)1404 void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
1405 bool print_layout, unsigned fields)
1406 {
1407 if (!out->nr_tabstops)
1408 printbuf_tabstop_push(out, 44);
1409
1410 prt_printf(out, "External UUID:\t");
1411 pr_uuid(out, sb->user_uuid.b);
1412 prt_newline(out);
1413
1414 prt_printf(out, "Internal UUID:\t");
1415 pr_uuid(out, sb->uuid.b);
1416 prt_newline(out);
1417
1418 prt_printf(out, "Magic number:\t");
1419 pr_uuid(out, sb->magic.b);
1420 prt_newline(out);
1421
1422 prt_printf(out, "Device index:\t%u\n", sb->dev_idx);
1423
1424 prt_printf(out, "Label:\t");
1425 if (!strlen(sb->label))
1426 prt_printf(out, "(none)");
1427 else
1428 prt_printf(out, "%.*s", (int) sizeof(sb->label), sb->label);
1429 prt_newline(out);
1430
1431 prt_printf(out, "Version:\t");
1432 bch2_version_to_text(out, le16_to_cpu(sb->version));
1433 prt_newline(out);
1434
1435 prt_printf(out, "Incompatible features allowed:\t");
1436 bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT_ALLOWED(sb));
1437 prt_newline(out);
1438
1439 prt_printf(out, "Incompatible features in use:\t");
1440 bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT(sb));
1441 prt_newline(out);
1442
1443 prt_printf(out, "Version upgrade complete:\t");
1444 bch2_version_to_text(out, BCH_SB_VERSION_UPGRADE_COMPLETE(sb));
1445 prt_newline(out);
1446
1447 prt_printf(out, "Oldest version on disk:\t");
1448 bch2_version_to_text(out, le16_to_cpu(sb->version_min));
1449 prt_newline(out);
1450
1451 prt_printf(out, "Created:\t");
1452 if (sb->time_base_lo)
1453 bch2_prt_datetime(out, div_u64(le64_to_cpu(sb->time_base_lo), NSEC_PER_SEC));
1454 else
1455 prt_printf(out, "(not set)");
1456 prt_newline(out);
1457
1458 prt_printf(out, "Sequence number:\t");
1459 prt_printf(out, "%llu", le64_to_cpu(sb->seq));
1460 prt_newline(out);
1461
1462 prt_printf(out, "Time of last write:\t");
1463 bch2_prt_datetime(out, le64_to_cpu(sb->write_time));
1464 prt_newline(out);
1465
1466 prt_printf(out, "Superblock size:\t");
1467 prt_units_u64(out, vstruct_bytes(sb));
1468 prt_str(out, "/");
1469 prt_units_u64(out, 512ULL << sb->layout.sb_max_size_bits);
1470 prt_newline(out);
1471
1472 prt_printf(out, "Clean:\t%llu\n", BCH_SB_CLEAN(sb));
1473 prt_printf(out, "Devices:\t%u\n", bch2_sb_nr_devices(sb));
1474
1475 prt_printf(out, "Sections:\t");
1476 u64 fields_have = 0;
1477 vstruct_for_each(sb, f)
1478 fields_have |= 1 << le32_to_cpu(f->type);
1479 prt_bitflags(out, bch2_sb_fields, fields_have);
1480 prt_newline(out);
1481
1482 prt_printf(out, "Features:\t");
1483 prt_bitflags(out, bch2_sb_features, le64_to_cpu(sb->features[0]));
1484 prt_newline(out);
1485
1486 prt_printf(out, "Compat features:\t");
1487 prt_bitflags(out, bch2_sb_compat, le64_to_cpu(sb->compat[0]));
1488 prt_newline(out);
1489
1490 prt_newline(out);
1491 prt_printf(out, "Options:");
1492 prt_newline(out);
1493 printbuf_indent_add(out, 2);
1494 {
1495 enum bch_opt_id id;
1496
1497 for (id = 0; id < bch2_opts_nr; id++) {
1498 const struct bch_option *opt = bch2_opt_table + id;
1499
1500 if (opt->get_sb) {
1501 u64 v = bch2_opt_from_sb(sb, id, -1);
1502
1503 prt_printf(out, "%s:\t", opt->attr.name);
1504 bch2_opt_to_text(out, NULL, sb, opt, v,
1505 OPT_HUMAN_READABLE|OPT_SHOW_FULL_LIST);
1506 prt_newline(out);
1507 }
1508 }
1509 }
1510
1511 printbuf_indent_sub(out, 2);
1512
1513 if (print_layout) {
1514 prt_newline(out);
1515 prt_printf(out, "layout:");
1516 prt_newline(out);
1517 printbuf_indent_add(out, 2);
1518 bch2_sb_layout_to_text(out, &sb->layout);
1519 printbuf_indent_sub(out, 2);
1520 }
1521
1522 vstruct_for_each(sb, f)
1523 if (fields & (1 << le32_to_cpu(f->type))) {
1524 prt_newline(out);
1525 bch2_sb_field_to_text(out, sb, f);
1526 }
1527 }
1528