xref: /linux/fs/btrfs/tree-checker.c (revision c92b4d3dd59f9f71ac34b42d4603d2323a499ab0) !
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) Qu Wenruo 2017.  All rights reserved.
4  */
5 
6 /*
7  * The module is used to catch unexpected/corrupted tree block data.
8  * Such behavior can be caused either by a fuzzed image or bugs.
9  *
10  * The objective is to do leaf/node validation checks when tree block is read
11  * from disk, and check *every* possible member, so other code won't
12  * need to checking them again.
13  *
14  * Due to the potential and unwanted damage, every checker needs to be
15  * carefully reviewed otherwise so it does not prevent mount of valid images.
16  */
17 
18 #include <linux/types.h>
19 #include <linux/stddef.h>
20 #include <linux/error-injection.h>
21 #include "messages.h"
22 #include "ctree.h"
23 #include "tree-checker.h"
24 #include "compression.h"
25 #include "volumes.h"
26 #include "misc.h"
27 #include "fs.h"
28 #include "accessors.h"
29 #include "file-item.h"
30 #include "inode-item.h"
31 #include "dir-item.h"
32 #include "extent-tree.h"
33 
34 /*
35  * Error message should follow the following format:
36  * corrupt <type>: <identifier>, <reason>[, <bad_value>]
37  *
38  * @type:	leaf or node
39  * @identifier:	the necessary info to locate the leaf/node.
40  * 		It's recommended to decode key.objecitd/offset if it's
41  * 		meaningful.
42  * @reason:	describe the error
43  * @bad_value:	optional, it's recommended to output bad value and its
44  *		expected value (range).
45  *
46  * Since comma is used to separate the components, only space is allowed
47  * inside each component.
48  */
49 
50 /*
51  * Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt.
52  * Allows callers to customize the output.
53  */
54 __printf(3, 4)
55 __cold
56 static void generic_err(const struct extent_buffer *eb, int slot,
57 			const char *fmt, ...)
58 {
59 	const struct btrfs_fs_info *fs_info = eb->fs_info;
60 	struct va_format vaf;
61 	va_list args;
62 
63 	va_start(args, fmt);
64 
65 	vaf.fmt = fmt;
66 	vaf.va = &args;
67 
68 	dump_page(folio_page(eb->folios[0], 0), "eb page dump");
69 	btrfs_crit(fs_info,
70 		"corrupt %s: root=%llu block=%llu slot=%d, %pV",
71 		btrfs_header_level(eb) == 0 ? "leaf" : "node",
72 		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, &vaf);
73 	va_end(args);
74 }
75 
76 /*
77  * Customized reporter for extent data item, since its key objectid and
78  * offset has its own meaning.
79  */
80 __printf(3, 4)
81 __cold
82 static void file_extent_err(const struct extent_buffer *eb, int slot,
83 			    const char *fmt, ...)
84 {
85 	const struct btrfs_fs_info *fs_info = eb->fs_info;
86 	struct btrfs_key key;
87 	struct va_format vaf;
88 	va_list args;
89 
90 	btrfs_item_key_to_cpu(eb, &key, slot);
91 	va_start(args, fmt);
92 
93 	vaf.fmt = fmt;
94 	vaf.va = &args;
95 
96 	dump_page(folio_page(eb->folios[0], 0), "eb page dump");
97 	btrfs_crit(fs_info,
98 	"corrupt %s: root=%llu block=%llu slot=%d ino=%llu file_offset=%llu, %pV",
99 		btrfs_header_level(eb) == 0 ? "leaf" : "node",
100 		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
101 		key.objectid, key.offset, &vaf);
102 	va_end(args);
103 }
104 
105 /*
106  * Return 0 if the btrfs_file_extent_##name is aligned to @alignment
107  * Else return 1
108  */
109 #define CHECK_FE_ALIGNED(leaf, slot, fi, name, alignment)		      \
110 ({									      \
111 	if (unlikely(!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)),      \
112 				 (alignment))))				      \
113 		file_extent_err((leaf), (slot),				      \
114 	"invalid %s for file extent, have %llu, should be aligned to %u",     \
115 			(#name), btrfs_file_extent_##name((leaf), (fi)),      \
116 			(alignment));					      \
117 	(!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment)));   \
118 })
119 
120 static u64 file_extent_end(struct extent_buffer *leaf,
121 			   struct btrfs_key *key,
122 			   struct btrfs_file_extent_item *extent)
123 {
124 	u64 end;
125 	u64 len;
126 
127 	if (btrfs_file_extent_type(leaf, extent) == BTRFS_FILE_EXTENT_INLINE) {
128 		len = btrfs_file_extent_ram_bytes(leaf, extent);
129 		end = ALIGN(key->offset + len, leaf->fs_info->sectorsize);
130 	} else {
131 		len = btrfs_file_extent_num_bytes(leaf, extent);
132 		end = key->offset + len;
133 	}
134 	return end;
135 }
136 
137 /*
138  * Customized report for dir_item, the only new important information is
139  * key->objectid, which represents inode number
140  */
141 __printf(3, 4)
142 __cold
143 static void dir_item_err(const struct extent_buffer *eb, int slot,
144 			 const char *fmt, ...)
145 {
146 	const struct btrfs_fs_info *fs_info = eb->fs_info;
147 	struct btrfs_key key;
148 	struct va_format vaf;
149 	va_list args;
150 
151 	btrfs_item_key_to_cpu(eb, &key, slot);
152 	va_start(args, fmt);
153 
154 	vaf.fmt = fmt;
155 	vaf.va = &args;
156 
157 	dump_page(folio_page(eb->folios[0], 0), "eb page dump");
158 	btrfs_crit(fs_info,
159 		"corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV",
160 		btrfs_header_level(eb) == 0 ? "leaf" : "node",
161 		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
162 		key.objectid, &vaf);
163 	va_end(args);
164 }
165 
166 /*
167  * This functions checks prev_key->objectid, to ensure current key and prev_key
168  * share the same objectid as inode number.
169  *
170  * This is to detect missing INODE_ITEM in subvolume trees.
171  *
172  * Return true if everything is OK or we don't need to check.
173  * Return false if anything is wrong.
174  */
175 static bool check_prev_ino(struct extent_buffer *leaf,
176 			   struct btrfs_key *key, int slot,
177 			   struct btrfs_key *prev_key)
178 {
179 	/* No prev key, skip check */
180 	if (slot == 0)
181 		return true;
182 
183 	/* Only these key->types needs to be checked */
184 	ASSERT(key->type == BTRFS_XATTR_ITEM_KEY ||
185 	       key->type == BTRFS_INODE_REF_KEY ||
186 	       key->type == BTRFS_INODE_EXTREF_KEY ||
187 	       key->type == BTRFS_DIR_INDEX_KEY ||
188 	       key->type == BTRFS_DIR_ITEM_KEY ||
189 	       key->type == BTRFS_EXTENT_DATA_KEY, "key->type=%u", key->type);
190 
191 	/*
192 	 * Only subvolume trees along with their reloc trees need this check.
193 	 * Things like log tree doesn't follow this ino requirement.
194 	 */
195 	if (!btrfs_is_fstree(btrfs_header_owner(leaf)))
196 		return true;
197 
198 	if (key->objectid == prev_key->objectid)
199 		return true;
200 
201 	/* Error found */
202 	dir_item_err(leaf, slot,
203 		"invalid previous key objectid, have %llu expect %llu",
204 		prev_key->objectid, key->objectid);
205 	return false;
206 }
207 static int check_extent_data_item(struct extent_buffer *leaf,
208 				  struct btrfs_key *key, int slot,
209 				  struct btrfs_key *prev_key)
210 {
211 	struct btrfs_fs_info *fs_info = leaf->fs_info;
212 	struct btrfs_file_extent_item *fi;
213 	u32 sectorsize = fs_info->sectorsize;
214 	u32 item_size = btrfs_item_size(leaf, slot);
215 	u64 extent_end;
216 
217 	if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) {
218 		file_extent_err(leaf, slot,
219 "unaligned file_offset for file extent, have %llu should be aligned to %u",
220 			key->offset, sectorsize);
221 		return -EUCLEAN;
222 	}
223 
224 	/*
225 	 * Previous key must have the same key->objectid (ino).
226 	 * It can be XATTR_ITEM, INODE_ITEM or just another EXTENT_DATA.
227 	 * But if objectids mismatch, it means we have a missing
228 	 * INODE_ITEM.
229 	 */
230 	if (unlikely(!check_prev_ino(leaf, key, slot, prev_key)))
231 		return -EUCLEAN;
232 
233 	fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
234 
235 	/*
236 	 * Make sure the item contains at least inline header, so the file
237 	 * extent type is not some garbage.
238 	 */
239 	if (unlikely(item_size < BTRFS_FILE_EXTENT_INLINE_DATA_START)) {
240 		file_extent_err(leaf, slot,
241 				"invalid item size, have %u expect [%zu, %u)",
242 				item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START,
243 				SZ_4K);
244 		return -EUCLEAN;
245 	}
246 	if (unlikely(btrfs_file_extent_type(leaf, fi) >=
247 		     BTRFS_NR_FILE_EXTENT_TYPES)) {
248 		file_extent_err(leaf, slot,
249 		"invalid type for file extent, have %u expect range [0, %u]",
250 			btrfs_file_extent_type(leaf, fi),
251 			BTRFS_NR_FILE_EXTENT_TYPES - 1);
252 		return -EUCLEAN;
253 	}
254 
255 	/*
256 	 * Support for new compression/encryption must introduce incompat flag,
257 	 * and must be caught in open_ctree().
258 	 */
259 	if (unlikely(btrfs_file_extent_compression(leaf, fi) >=
260 		     BTRFS_NR_COMPRESS_TYPES)) {
261 		file_extent_err(leaf, slot,
262 	"invalid compression for file extent, have %u expect range [0, %u]",
263 			btrfs_file_extent_compression(leaf, fi),
264 			BTRFS_NR_COMPRESS_TYPES - 1);
265 		return -EUCLEAN;
266 	}
267 	if (unlikely(btrfs_file_extent_encryption(leaf, fi))) {
268 		file_extent_err(leaf, slot,
269 			"invalid encryption for file extent, have %u expect 0",
270 			btrfs_file_extent_encryption(leaf, fi));
271 		return -EUCLEAN;
272 	}
273 	if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
274 		/* Inline extent must have 0 as key offset */
275 		if (unlikely(key->offset)) {
276 			file_extent_err(leaf, slot,
277 		"invalid file_offset for inline file extent, have %llu expect 0",
278 				key->offset);
279 			return -EUCLEAN;
280 		}
281 
282 		/* Compressed inline extent has no on-disk size, skip it */
283 		if (btrfs_file_extent_compression(leaf, fi) !=
284 		    BTRFS_COMPRESS_NONE)
285 			return 0;
286 
287 		/* Uncompressed inline extent size must match item size */
288 		if (unlikely(item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
289 					  btrfs_file_extent_ram_bytes(leaf, fi))) {
290 			file_extent_err(leaf, slot,
291 	"invalid ram_bytes for uncompressed inline extent, have %u expect %llu",
292 				item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START +
293 				btrfs_file_extent_ram_bytes(leaf, fi));
294 			return -EUCLEAN;
295 		}
296 		return 0;
297 	}
298 
299 	/* Regular or preallocated extent has fixed item size */
300 	if (unlikely(item_size != sizeof(*fi))) {
301 		file_extent_err(leaf, slot,
302 	"invalid item size for reg/prealloc file extent, have %u expect %zu",
303 			item_size, sizeof(*fi));
304 		return -EUCLEAN;
305 	}
306 	if (unlikely(CHECK_FE_ALIGNED(leaf, slot, fi, ram_bytes, sectorsize) ||
307 		     CHECK_FE_ALIGNED(leaf, slot, fi, disk_bytenr, sectorsize) ||
308 		     CHECK_FE_ALIGNED(leaf, slot, fi, disk_num_bytes, sectorsize) ||
309 		     CHECK_FE_ALIGNED(leaf, slot, fi, offset, sectorsize) ||
310 		     CHECK_FE_ALIGNED(leaf, slot, fi, num_bytes, sectorsize)))
311 		return -EUCLEAN;
312 
313 	/* Catch extent end overflow */
314 	if (unlikely(check_add_overflow(btrfs_file_extent_num_bytes(leaf, fi),
315 					key->offset, &extent_end))) {
316 		file_extent_err(leaf, slot,
317 	"extent end overflow, have file offset %llu extent num bytes %llu",
318 				key->offset,
319 				btrfs_file_extent_num_bytes(leaf, fi));
320 		return -EUCLEAN;
321 	}
322 
323 	/*
324 	 * Check that no two consecutive file extent items, in the same leaf,
325 	 * present ranges that overlap each other.
326 	 */
327 	if (slot > 0 &&
328 	    prev_key->objectid == key->objectid &&
329 	    prev_key->type == BTRFS_EXTENT_DATA_KEY) {
330 		struct btrfs_file_extent_item *prev_fi;
331 		u64 prev_end;
332 
333 		prev_fi = btrfs_item_ptr(leaf, slot - 1,
334 					 struct btrfs_file_extent_item);
335 		prev_end = file_extent_end(leaf, prev_key, prev_fi);
336 		if (unlikely(prev_end > key->offset)) {
337 			file_extent_err(leaf, slot - 1,
338 "file extent end range (%llu) goes beyond start offset (%llu) of the next file extent",
339 					prev_end, key->offset);
340 			return -EUCLEAN;
341 		}
342 	}
343 
344 	/*
345 	 * For non-compressed data extents, ram_bytes should match its
346 	 * disk_num_bytes.
347 	 * However we do not really utilize ram_bytes in this case, so this check
348 	 * is only optional for DEBUG builds for developers to catch the
349 	 * unexpected behaviors.
350 	 */
351 	if (IS_ENABLED(CONFIG_BTRFS_DEBUG) &&
352 	    btrfs_file_extent_compression(leaf, fi) == BTRFS_COMPRESS_NONE &&
353 	    btrfs_file_extent_disk_bytenr(leaf, fi)) {
354 		if (WARN_ON(btrfs_file_extent_ram_bytes(leaf, fi) !=
355 			    btrfs_file_extent_disk_num_bytes(leaf, fi)))
356 			file_extent_err(leaf, slot,
357 "mismatch ram_bytes (%llu) and disk_num_bytes (%llu) for non-compressed extent",
358 					btrfs_file_extent_ram_bytes(leaf, fi),
359 					btrfs_file_extent_disk_num_bytes(leaf, fi));
360 	}
361 
362 	return 0;
363 }
364 
365 static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key,
366 			   int slot, struct btrfs_key *prev_key)
367 {
368 	struct btrfs_fs_info *fs_info = leaf->fs_info;
369 	u32 sectorsize = fs_info->sectorsize;
370 	const u32 csumsize = fs_info->csum_size;
371 
372 	if (unlikely(key->objectid != BTRFS_EXTENT_CSUM_OBJECTID)) {
373 		generic_err(leaf, slot,
374 		"invalid key objectid for csum item, have %llu expect %llu",
375 			key->objectid, BTRFS_EXTENT_CSUM_OBJECTID);
376 		return -EUCLEAN;
377 	}
378 	if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) {
379 		generic_err(leaf, slot,
380 	"unaligned key offset for csum item, have %llu should be aligned to %u",
381 			key->offset, sectorsize);
382 		return -EUCLEAN;
383 	}
384 	if (unlikely(!IS_ALIGNED(btrfs_item_size(leaf, slot), csumsize))) {
385 		generic_err(leaf, slot,
386 	"unaligned item size for csum item, have %u should be aligned to %u",
387 			btrfs_item_size(leaf, slot), csumsize);
388 		return -EUCLEAN;
389 	}
390 	if (slot > 0 && prev_key->type == BTRFS_EXTENT_CSUM_KEY) {
391 		u64 prev_csum_end;
392 		u32 prev_item_size;
393 
394 		prev_item_size = btrfs_item_size(leaf, slot - 1);
395 		prev_csum_end = (prev_item_size / csumsize) * sectorsize;
396 		prev_csum_end += prev_key->offset;
397 		if (unlikely(prev_csum_end > key->offset)) {
398 			generic_err(leaf, slot - 1,
399 "csum end range (%llu) goes beyond the start range (%llu) of the next csum item",
400 				    prev_csum_end, key->offset);
401 			return -EUCLEAN;
402 		}
403 	}
404 	return 0;
405 }
406 
407 /* Inode item error output has the same format as dir_item_err() */
408 #define inode_item_err(eb, slot, fmt, ...)			\
409 	dir_item_err(eb, slot, fmt, __VA_ARGS__)
410 
411 static int check_inode_key(struct extent_buffer *leaf, struct btrfs_key *key,
412 			   int slot)
413 {
414 	struct btrfs_key item_key;
415 	bool is_inode_item;
416 
417 	btrfs_item_key_to_cpu(leaf, &item_key, slot);
418 	is_inode_item = (item_key.type == BTRFS_INODE_ITEM_KEY);
419 
420 	/* For XATTR_ITEM, location key should be all 0 */
421 	if (item_key.type == BTRFS_XATTR_ITEM_KEY) {
422 		if (unlikely(key->objectid != 0 || key->type != 0 ||
423 			     key->offset != 0))
424 			return -EUCLEAN;
425 		return 0;
426 	}
427 
428 	if (unlikely((key->objectid < BTRFS_FIRST_FREE_OBJECTID ||
429 		      key->objectid > BTRFS_LAST_FREE_OBJECTID) &&
430 		     key->objectid != BTRFS_ROOT_TREE_DIR_OBJECTID &&
431 		     key->objectid != BTRFS_FREE_INO_OBJECTID)) {
432 		if (is_inode_item) {
433 			generic_err(leaf, slot,
434 	"invalid key objectid: has %llu expect %llu or [%llu, %llu] or %llu",
435 				key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID,
436 				BTRFS_FIRST_FREE_OBJECTID,
437 				BTRFS_LAST_FREE_OBJECTID,
438 				BTRFS_FREE_INO_OBJECTID);
439 		} else {
440 			dir_item_err(leaf, slot,
441 "invalid location key objectid: has %llu expect %llu or [%llu, %llu] or %llu",
442 				key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID,
443 				BTRFS_FIRST_FREE_OBJECTID,
444 				BTRFS_LAST_FREE_OBJECTID,
445 				BTRFS_FREE_INO_OBJECTID);
446 		}
447 		return -EUCLEAN;
448 	}
449 	if (unlikely(key->offset != 0)) {
450 		if (is_inode_item)
451 			inode_item_err(leaf, slot,
452 				       "invalid key offset: has %llu expect 0",
453 				       key->offset);
454 		else
455 			dir_item_err(leaf, slot,
456 				"invalid location key offset:has %llu expect 0",
457 				key->offset);
458 		return -EUCLEAN;
459 	}
460 	return 0;
461 }
462 
463 static int check_root_key(struct extent_buffer *leaf, struct btrfs_key *key,
464 			  int slot)
465 {
466 	struct btrfs_key item_key;
467 	bool is_root_item;
468 
469 	btrfs_item_key_to_cpu(leaf, &item_key, slot);
470 	is_root_item = (item_key.type == BTRFS_ROOT_ITEM_KEY);
471 
472 	/*
473 	 * Bad rootid for reloc trees.
474 	 *
475 	 * Reloc trees are only for subvolume trees, other trees only need
476 	 * to be COWed to be relocated.
477 	 */
478 	if (unlikely(is_root_item && key->objectid == BTRFS_TREE_RELOC_OBJECTID &&
479 		     !btrfs_is_fstree(key->offset))) {
480 		generic_err(leaf, slot,
481 		"invalid reloc tree for root %lld, root id is not a subvolume tree",
482 			    key->offset);
483 		return -EUCLEAN;
484 	}
485 
486 	/* No such tree id */
487 	if (unlikely(key->objectid == 0)) {
488 		if (is_root_item)
489 			generic_err(leaf, slot, "invalid root id 0");
490 		else
491 			dir_item_err(leaf, slot,
492 				     "invalid location key root id 0");
493 		return -EUCLEAN;
494 	}
495 
496 	/* DIR_ITEM/INDEX/INODE_REF is not allowed to point to non-fs trees */
497 	if (unlikely(!btrfs_is_fstree(key->objectid) && !is_root_item)) {
498 		dir_item_err(leaf, slot,
499 		"invalid location key objectid, have %llu expect [%llu, %llu]",
500 				key->objectid, BTRFS_FIRST_FREE_OBJECTID,
501 				BTRFS_LAST_FREE_OBJECTID);
502 		return -EUCLEAN;
503 	}
504 
505 	/*
506 	 * ROOT_ITEM with non-zero offset means this is a snapshot, created at
507 	 * @offset transid.
508 	 * Furthermore, for location key in DIR_ITEM, its offset is always -1.
509 	 *
510 	 * So here we only check offset for reloc tree whose key->offset must
511 	 * be a valid tree.
512 	 */
513 	if (unlikely(key->objectid == BTRFS_TREE_RELOC_OBJECTID &&
514 		     key->offset == 0)) {
515 		generic_err(leaf, slot, "invalid root id 0 for reloc tree");
516 		return -EUCLEAN;
517 	}
518 	return 0;
519 }
520 
521 static int check_dir_item(struct extent_buffer *leaf,
522 			  struct btrfs_key *key, struct btrfs_key *prev_key,
523 			  int slot)
524 {
525 	struct btrfs_fs_info *fs_info = leaf->fs_info;
526 	struct btrfs_dir_item *di;
527 	u32 item_size = btrfs_item_size(leaf, slot);
528 	u32 cur = 0;
529 
530 	if (unlikely(!check_prev_ino(leaf, key, slot, prev_key)))
531 		return -EUCLEAN;
532 
533 	di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
534 	while (cur < item_size) {
535 		struct btrfs_key location_key;
536 		u32 name_len;
537 		u32 data_len;
538 		u32 max_name_len;
539 		u32 total_size;
540 		u32 name_hash;
541 		u8 dir_type;
542 		int ret;
543 
544 		/* header itself should not cross item boundary */
545 		if (unlikely(cur + sizeof(*di) > item_size)) {
546 			dir_item_err(leaf, slot,
547 		"dir item header crosses item boundary, have %zu boundary %u",
548 				cur + sizeof(*di), item_size);
549 			return -EUCLEAN;
550 		}
551 
552 		/* Location key check */
553 		btrfs_dir_item_key_to_cpu(leaf, di, &location_key);
554 		if (location_key.type == BTRFS_ROOT_ITEM_KEY) {
555 			ret = check_root_key(leaf, &location_key, slot);
556 			if (unlikely(ret < 0))
557 				return ret;
558 		} else if (location_key.type == BTRFS_INODE_ITEM_KEY ||
559 			   location_key.type == 0) {
560 			ret = check_inode_key(leaf, &location_key, slot);
561 			if (unlikely(ret < 0))
562 				return ret;
563 		} else {
564 			dir_item_err(leaf, slot,
565 			"invalid location key type, have %u, expect %u or %u",
566 				     location_key.type, BTRFS_ROOT_ITEM_KEY,
567 				     BTRFS_INODE_ITEM_KEY);
568 			return -EUCLEAN;
569 		}
570 
571 		/* dir type check */
572 		dir_type = btrfs_dir_ftype(leaf, di);
573 		if (unlikely(dir_type <= BTRFS_FT_UNKNOWN ||
574 			     dir_type >= BTRFS_FT_MAX)) {
575 			dir_item_err(leaf, slot,
576 			"invalid dir item type, have %u expect (0, %u)",
577 				dir_type, BTRFS_FT_MAX);
578 			return -EUCLEAN;
579 		}
580 
581 		if (unlikely(key->type == BTRFS_XATTR_ITEM_KEY &&
582 			     dir_type != BTRFS_FT_XATTR)) {
583 			dir_item_err(leaf, slot,
584 		"invalid dir item type for XATTR key, have %u expect %u",
585 				dir_type, BTRFS_FT_XATTR);
586 			return -EUCLEAN;
587 		}
588 		if (unlikely(dir_type == BTRFS_FT_XATTR &&
589 			     key->type != BTRFS_XATTR_ITEM_KEY)) {
590 			dir_item_err(leaf, slot,
591 			"xattr dir type found for non-XATTR key");
592 			return -EUCLEAN;
593 		}
594 		if (dir_type == BTRFS_FT_XATTR)
595 			max_name_len = XATTR_NAME_MAX;
596 		else
597 			max_name_len = BTRFS_NAME_LEN;
598 
599 		/* Name/data length check */
600 		name_len = btrfs_dir_name_len(leaf, di);
601 		data_len = btrfs_dir_data_len(leaf, di);
602 		if (unlikely(name_len > max_name_len)) {
603 			dir_item_err(leaf, slot,
604 			"dir item name len too long, have %u max %u",
605 				name_len, max_name_len);
606 			return -EUCLEAN;
607 		}
608 		if (unlikely(name_len + data_len > BTRFS_MAX_XATTR_SIZE(fs_info))) {
609 			dir_item_err(leaf, slot,
610 			"dir item name and data len too long, have %u max %u",
611 				name_len + data_len,
612 				BTRFS_MAX_XATTR_SIZE(fs_info));
613 			return -EUCLEAN;
614 		}
615 
616 		if (unlikely(data_len && dir_type != BTRFS_FT_XATTR)) {
617 			dir_item_err(leaf, slot,
618 			"dir item with invalid data len, have %u expect 0",
619 				data_len);
620 			return -EUCLEAN;
621 		}
622 
623 		total_size = sizeof(*di) + name_len + data_len;
624 
625 		/* header and name/data should not cross item boundary */
626 		if (unlikely(cur + total_size > item_size)) {
627 			dir_item_err(leaf, slot,
628 		"dir item data crosses item boundary, have %u boundary %u",
629 				cur + total_size, item_size);
630 			return -EUCLEAN;
631 		}
632 
633 		/*
634 		 * Special check for XATTR/DIR_ITEM, as key->offset is name
635 		 * hash, should match its name
636 		 */
637 		if (key->type == BTRFS_DIR_ITEM_KEY ||
638 		    key->type == BTRFS_XATTR_ITEM_KEY) {
639 			char namebuf[MAX(BTRFS_NAME_LEN, XATTR_NAME_MAX)];
640 
641 			read_extent_buffer(leaf, namebuf,
642 					(unsigned long)(di + 1), name_len);
643 			name_hash = btrfs_name_hash(namebuf, name_len);
644 			if (unlikely(key->offset != name_hash)) {
645 				dir_item_err(leaf, slot,
646 		"name hash mismatch with key, have 0x%016x expect 0x%016llx",
647 					name_hash, key->offset);
648 				return -EUCLEAN;
649 			}
650 		}
651 		cur += total_size;
652 		di = (struct btrfs_dir_item *)((void *)di + total_size);
653 	}
654 	return 0;
655 }
656 
657 __printf(3, 4)
658 __cold
659 static void block_group_err(const struct extent_buffer *eb, int slot,
660 			    const char *fmt, ...)
661 {
662 	const struct btrfs_fs_info *fs_info = eb->fs_info;
663 	struct btrfs_key key;
664 	struct va_format vaf;
665 	va_list args;
666 
667 	btrfs_item_key_to_cpu(eb, &key, slot);
668 	va_start(args, fmt);
669 
670 	vaf.fmt = fmt;
671 	vaf.va = &args;
672 
673 	dump_page(folio_page(eb->folios[0], 0), "eb page dump");
674 	btrfs_crit(fs_info,
675 	"corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV",
676 		btrfs_header_level(eb) == 0 ? "leaf" : "node",
677 		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
678 		key.objectid, key.offset, &vaf);
679 	va_end(args);
680 }
681 
682 static int check_block_group_item(struct extent_buffer *leaf,
683 				  struct btrfs_key *key, int slot)
684 {
685 	struct btrfs_fs_info *fs_info = leaf->fs_info;
686 	struct btrfs_block_group_item bgi;
687 	u32 item_size = btrfs_item_size(leaf, slot);
688 	u64 chunk_objectid;
689 	u64 flags;
690 	u64 type;
691 	size_t exp_size;
692 
693 	/*
694 	 * Here we don't really care about alignment since extent allocator can
695 	 * handle it.  We care more about the size.
696 	 */
697 	if (unlikely(key->offset == 0)) {
698 		block_group_err(leaf, slot,
699 				"invalid block group size 0");
700 		return -EUCLEAN;
701 	}
702 
703 	if (btrfs_fs_incompat(fs_info, REMAP_TREE))
704 		exp_size = sizeof(struct btrfs_block_group_item_v2);
705 	else
706 		exp_size = sizeof(struct btrfs_block_group_item);
707 
708 	if (unlikely(item_size != exp_size)) {
709 		block_group_err(leaf, slot,
710 			"invalid item size, have %u expect %zu",
711 				item_size, exp_size);
712 		return -EUCLEAN;
713 	}
714 
715 	read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
716 			   sizeof(bgi));
717 	chunk_objectid = btrfs_stack_block_group_chunk_objectid(&bgi);
718 	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
719 		/*
720 		 * We don't init the nr_global_roots until we load the global
721 		 * roots, so this could be 0 at mount time.  If it's 0 we'll
722 		 * just assume we're fine, and later we'll check against our
723 		 * actual value.
724 		 */
725 		if (unlikely(fs_info->nr_global_roots &&
726 			     chunk_objectid >= fs_info->nr_global_roots)) {
727 			block_group_err(leaf, slot,
728 	"invalid block group global root id, have %llu, needs to be <= %llu",
729 					chunk_objectid,
730 					fs_info->nr_global_roots);
731 			return -EUCLEAN;
732 		}
733 	} else if (unlikely(chunk_objectid != BTRFS_FIRST_CHUNK_TREE_OBJECTID)) {
734 		block_group_err(leaf, slot,
735 		"invalid block group chunk objectid, have %llu expect %llu",
736 				btrfs_stack_block_group_chunk_objectid(&bgi),
737 				BTRFS_FIRST_CHUNK_TREE_OBJECTID);
738 		return -EUCLEAN;
739 	}
740 
741 	if (unlikely(btrfs_stack_block_group_used(&bgi) > key->offset)) {
742 		block_group_err(leaf, slot,
743 			"invalid block group used, have %llu expect [0, %llu)",
744 				btrfs_stack_block_group_used(&bgi), key->offset);
745 		return -EUCLEAN;
746 	}
747 
748 	flags = btrfs_stack_block_group_flags(&bgi);
749 	if (unlikely(hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1)) {
750 		block_group_err(leaf, slot,
751 "invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set",
752 			flags & BTRFS_BLOCK_GROUP_PROFILE_MASK,
753 			hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK));
754 		return -EUCLEAN;
755 	}
756 
757 	if (unlikely(flags & BTRFS_BLOCK_GROUP_METADATA_REMAP &&
758 		     !btrfs_fs_incompat(fs_info, REMAP_TREE))) {
759 		block_group_err(leaf, slot,
760 "invalid flags, have 0x%llx (METADATA_REMAP flag set) but no remap-tree incompat flag",
761 				flags);
762 		return -EUCLEAN;
763 	}
764 
765 	type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
766 	if (unlikely(type != BTRFS_BLOCK_GROUP_DATA &&
767 		     type != BTRFS_BLOCK_GROUP_METADATA &&
768 		     type != BTRFS_BLOCK_GROUP_SYSTEM &&
769 		     type != BTRFS_BLOCK_GROUP_METADATA_REMAP &&
770 		     type != (BTRFS_BLOCK_GROUP_METADATA |
771 			      BTRFS_BLOCK_GROUP_DATA))) {
772 		block_group_err(leaf, slot,
773 "invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx, 0x%llx or 0x%llx",
774 			type, hweight64(type),
775 			BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
776 			BTRFS_BLOCK_GROUP_SYSTEM, BTRFS_BLOCK_GROUP_METADATA_REMAP,
777 			BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA);
778 		return -EUCLEAN;
779 	}
780 
781 	if (unlikely(!btrfs_fs_incompat(fs_info, REMAP_TREE) &&
782 		     type == BTRFS_BLOCK_GROUP_METADATA_REMAP)) {
783 		block_group_err(leaf, slot,
784 		"invalid type, METADATA_REMAP set but REMAP_TREE incompat flag not set");
785 		return -EUCLEAN;
786 	}
787 
788 	if (unlikely(!btrfs_fs_incompat(fs_info, REMAP_TREE) &&
789 		     flags & BTRFS_BLOCK_GROUP_REMAPPED)) {
790 		block_group_err(leaf, slot,
791 		"invalid flags, REMAPPED set but REMAP_TREE incompat flag not set");
792 		return -EUCLEAN;
793 	}
794 
795 	if (item_size == sizeof(struct btrfs_block_group_item_v2)) {
796 		struct btrfs_block_group_item_v2 *bgi2;
797 		u64 remap_bytes;
798 		u32 identity_remap_count;
799 
800 		bgi2 = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item_v2);
801 		remap_bytes = btrfs_block_group_v2_remap_bytes(leaf, bgi2);
802 
803 		if (unlikely(remap_bytes > key->offset)) {
804 			block_group_err(leaf, slot,
805 				"invalid remap_bytes, have %llu expect [0, %llu]",
806 					remap_bytes, key->offset);
807 			return -EUCLEAN;
808 		}
809 
810 		identity_remap_count = btrfs_block_group_v2_identity_remap_count(leaf, bgi2);
811 		if (unlikely((u64)identity_remap_count >
812 			     key->offset >> fs_info->sectorsize_bits)) {
813 			block_group_err(leaf, slot,
814 				"invalid identity_remap_count, have %u expect [0, %llu]",
815 					identity_remap_count,
816 					key->offset >> fs_info->sectorsize_bits);
817 			return -EUCLEAN;
818 		}
819 	}
820 
821 	return 0;
822 }
823 
824 __printf(5, 6)
825 __cold
826 static void chunk_err(const struct btrfs_fs_info *fs_info,
827 		      const struct extent_buffer *leaf,
828 		      const struct btrfs_chunk *chunk, u64 logical,
829 		      const char *fmt, ...)
830 {
831 	bool is_sb = !leaf;
832 	struct va_format vaf;
833 	va_list args;
834 	int i;
835 	int slot = -1;
836 
837 	if (!is_sb) {
838 		/*
839 		 * Get the slot number by iterating through all slots, this
840 		 * would provide better readability.
841 		 */
842 		for (i = 0; i < btrfs_header_nritems(leaf); i++) {
843 			if (btrfs_item_ptr_offset(leaf, i) ==
844 					(unsigned long)chunk) {
845 				slot = i;
846 				break;
847 			}
848 		}
849 	}
850 	va_start(args, fmt);
851 	vaf.fmt = fmt;
852 	vaf.va = &args;
853 
854 	if (is_sb)
855 		btrfs_crit(fs_info,
856 		"corrupt superblock syschunk array: chunk_start=%llu, %pV",
857 			   logical, &vaf);
858 	else
859 		btrfs_crit(fs_info,
860 	"corrupt leaf: root=%llu block=%llu slot=%d chunk_start=%llu, %pV",
861 			   BTRFS_CHUNK_TREE_OBJECTID, leaf->start, slot,
862 			   logical, &vaf);
863 	va_end(args);
864 }
865 
866 static bool valid_stripe_count(u64 profile, u16 num_stripes, u16 sub_stripes)
867 {
868 	switch (profile) {
869 	case BTRFS_BLOCK_GROUP_RAID0:
870 		return true;
871 	case BTRFS_BLOCK_GROUP_RAID10:
872 		return sub_stripes == btrfs_raid_array[BTRFS_RAID_RAID10].sub_stripes;
873 	case BTRFS_BLOCK_GROUP_RAID1:
874 		return num_stripes == btrfs_raid_array[BTRFS_RAID_RAID1].devs_min;
875 	case BTRFS_BLOCK_GROUP_RAID1C3:
876 		return num_stripes == btrfs_raid_array[BTRFS_RAID_RAID1C3].devs_min;
877 	case BTRFS_BLOCK_GROUP_RAID1C4:
878 		return num_stripes == btrfs_raid_array[BTRFS_RAID_RAID1C4].devs_min;
879 	case BTRFS_BLOCK_GROUP_RAID5:
880 		return num_stripes >= btrfs_raid_array[BTRFS_RAID_RAID5].devs_min;
881 	case BTRFS_BLOCK_GROUP_RAID6:
882 		return num_stripes >= btrfs_raid_array[BTRFS_RAID_RAID6].devs_min;
883 	case BTRFS_BLOCK_GROUP_DUP:
884 		return num_stripes == btrfs_raid_array[BTRFS_RAID_DUP].dev_stripes;
885 	case 0: /* SINGLE */
886 		return num_stripes == btrfs_raid_array[BTRFS_RAID_SINGLE].dev_stripes;
887 	default:
888 		BUG();
889 	}
890 }
891 
892 /*
893  * The common chunk check which could also work on super block sys chunk array.
894  *
895  * If @leaf is NULL, then @chunk must be an on-stack chunk item.
896  * (For superblock sys_chunk array, and fs_info->sectorsize is unreliable)
897  *
898  * Return -EUCLEAN if anything is corrupted.
899  * Return 0 if everything is OK.
900  */
901 int btrfs_check_chunk_valid(const struct btrfs_fs_info *fs_info,
902 			    const struct extent_buffer *leaf,
903 			    const struct btrfs_chunk *chunk, u64 logical,
904 			    u32 sectorsize)
905 {
906 	u64 length;
907 	u64 chunk_end;
908 	u64 stripe_len;
909 	u16 num_stripes;
910 	u16 sub_stripes;
911 	u64 type;
912 	u64 features;
913 	u32 chunk_sector_size;
914 	bool mixed = false;
915 	bool remapped;
916 	int raid_index;
917 	int nparity;
918 	int ncopies;
919 
920 	if (leaf) {
921 		length = btrfs_chunk_length(leaf, chunk);
922 		stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
923 		num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
924 		sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
925 		type = btrfs_chunk_type(leaf, chunk);
926 		chunk_sector_size = btrfs_chunk_sector_size(leaf, chunk);
927 	} else {
928 		length = btrfs_stack_chunk_length(chunk);
929 		stripe_len = btrfs_stack_chunk_stripe_len(chunk);
930 		num_stripes = btrfs_stack_chunk_num_stripes(chunk);
931 		sub_stripes = btrfs_stack_chunk_sub_stripes(chunk);
932 		type = btrfs_stack_chunk_type(chunk);
933 		chunk_sector_size = btrfs_stack_chunk_sector_size(chunk);
934 	}
935 	raid_index = btrfs_bg_flags_to_raid_index(type);
936 	ncopies = btrfs_raid_array[raid_index].ncopies;
937 	nparity = btrfs_raid_array[raid_index].nparity;
938 	remapped = (type & BTRFS_BLOCK_GROUP_REMAPPED);
939 
940 	if (unlikely(!remapped && !num_stripes)) {
941 		chunk_err(fs_info, leaf, chunk, logical,
942 			  "invalid chunk num_stripes, have %u", num_stripes);
943 		return -EUCLEAN;
944 	}
945 	if (unlikely(num_stripes != 0 && num_stripes < ncopies)) {
946 		chunk_err(fs_info, leaf, chunk, logical,
947 			  "invalid chunk num_stripes < ncopies, have %u < %d",
948 			  num_stripes, ncopies);
949 		return -EUCLEAN;
950 	}
951 	if (unlikely(nparity && num_stripes == nparity)) {
952 		chunk_err(fs_info, leaf, chunk, logical,
953 			  "invalid chunk num_stripes == nparity, have %u == %d",
954 			  num_stripes, nparity);
955 		return -EUCLEAN;
956 	}
957 	if (unlikely(!IS_ALIGNED(logical, sectorsize))) {
958 		chunk_err(fs_info, leaf, chunk, logical,
959 		"invalid chunk logical, have %llu should aligned to %u",
960 			  logical, sectorsize);
961 		return -EUCLEAN;
962 	}
963 	if (unlikely(chunk_sector_size != sectorsize)) {
964 		chunk_err(fs_info, leaf, chunk, logical,
965 			  "invalid chunk sectorsize, have %u expect %u",
966 			  chunk_sector_size, sectorsize);
967 		return -EUCLEAN;
968 	}
969 	if (unlikely(!length || !IS_ALIGNED(length, sectorsize))) {
970 		chunk_err(fs_info, leaf, chunk, logical,
971 			  "invalid chunk length, have %llu", length);
972 		return -EUCLEAN;
973 	}
974 	if (unlikely(check_add_overflow(logical, length, &chunk_end))) {
975 		chunk_err(fs_info, leaf, chunk, logical,
976 "invalid chunk logical start and length, have logical start %llu length %llu",
977 			  logical, length);
978 		return -EUCLEAN;
979 	}
980 	if (unlikely(!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN)) {
981 		chunk_err(fs_info, leaf, chunk, logical,
982 			  "invalid chunk stripe length: %llu",
983 			  stripe_len);
984 		return -EUCLEAN;
985 	}
986 	/*
987 	 * We artificially limit the chunk size, so that the number of stripes
988 	 * inside a chunk can be fit into a U32.  The current limit (256G) is
989 	 * way too large for real world usage anyway, and it's also much larger
990 	 * than our existing limit (10G).
991 	 *
992 	 * Thus it should be a good way to catch obvious bitflips.
993 	 */
994 	if (unlikely(length >= btrfs_stripe_nr_to_offset(U32_MAX))) {
995 		chunk_err(fs_info, leaf, chunk, logical,
996 			  "chunk length too large: have %llu limit %llu",
997 			  length, btrfs_stripe_nr_to_offset(U32_MAX));
998 		return -EUCLEAN;
999 	}
1000 	if (unlikely(type & ~BTRFS_BLOCK_GROUP_VALID)) {
1001 		chunk_err(fs_info, leaf, chunk, logical,
1002 			  "unrecognized chunk type: 0x%llx",
1003 			  type & ~BTRFS_BLOCK_GROUP_VALID);
1004 		return -EUCLEAN;
1005 	}
1006 
1007 	if (unlikely(!has_single_bit_set(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) &&
1008 		     (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0)) {
1009 		chunk_err(fs_info, leaf, chunk, logical,
1010 		"invalid chunk profile flag: 0x%llx, expect 0 or 1 bit set",
1011 			  type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
1012 		return -EUCLEAN;
1013 	}
1014 	if (unlikely((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0)) {
1015 		chunk_err(fs_info, leaf, chunk, logical,
1016 	"missing chunk type flag, have 0x%llx one bit must be set in 0x%llx",
1017 			  type, BTRFS_BLOCK_GROUP_TYPE_MASK);
1018 		return -EUCLEAN;
1019 	}
1020 
1021 	if (unlikely((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
1022 		     (type & (BTRFS_BLOCK_GROUP_METADATA |
1023 			      BTRFS_BLOCK_GROUP_DATA)))) {
1024 		chunk_err(fs_info, leaf, chunk, logical,
1025 			  "system chunk with data or metadata type: 0x%llx",
1026 			  type);
1027 		return -EUCLEAN;
1028 	}
1029 
1030 	features = btrfs_super_incompat_flags(fs_info->super_copy);
1031 	if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
1032 		mixed = true;
1033 
1034 	if (!mixed) {
1035 		if (unlikely((type & BTRFS_BLOCK_GROUP_METADATA) &&
1036 			     (type & BTRFS_BLOCK_GROUP_DATA))) {
1037 			chunk_err(fs_info, leaf, chunk, logical,
1038 			"mixed chunk type in non-mixed mode: 0x%llx", type);
1039 			return -EUCLEAN;
1040 		}
1041 	}
1042 
1043 	if (unlikely((type & BTRFS_BLOCK_GROUP_METADATA_REMAP) &&
1044 		     !(features & BTRFS_FEATURE_INCOMPAT_REMAP_TREE))) {
1045 		chunk_err(fs_info, leaf, chunk, logical,
1046 		"METADATA_REMAP chunk type without REMAP_TREE incompat bit");
1047 		return -EUCLEAN;
1048 	}
1049 
1050 	if (unlikely(remapped &&
1051 		     !(features & BTRFS_FEATURE_INCOMPAT_REMAP_TREE))) {
1052 		chunk_err(fs_info, leaf, chunk, logical,
1053 		"REMAPPED chunk flag without REMAP_TREE incompat bit");
1054 		return -EUCLEAN;
1055 	}
1056 
1057 	if (!remapped &&
1058 	    !valid_stripe_count(type & BTRFS_BLOCK_GROUP_PROFILE_MASK,
1059 				num_stripes, sub_stripes)) {
1060 		chunk_err(fs_info, leaf, chunk, logical,
1061 			"invalid num_stripes:sub_stripes %u:%u for profile %llu",
1062 			num_stripes, sub_stripes,
1063 			type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
1064 		return -EUCLEAN;
1065 	}
1066 
1067 	return 0;
1068 }
1069 
1070 /*
1071  * Enhanced version of chunk item checker.
1072  *
1073  * The common btrfs_check_chunk_valid() doesn't check item size since it needs
1074  * to work on super block sys_chunk_array which doesn't have full item ptr.
1075  */
1076 static int check_leaf_chunk_item(struct extent_buffer *leaf,
1077 				 struct btrfs_chunk *chunk,
1078 				 struct btrfs_key *key, int slot)
1079 {
1080 	struct btrfs_fs_info *fs_info = leaf->fs_info;
1081 	int num_stripes;
1082 
1083 	if (unlikely(btrfs_item_size(leaf, slot) < offsetof(struct btrfs_chunk, stripe))) {
1084 		chunk_err(fs_info, leaf, chunk, key->offset,
1085 			"invalid chunk item size: have %u expect [%zu, %u)",
1086 			btrfs_item_size(leaf, slot),
1087 			offsetof(struct btrfs_chunk, stripe),
1088 			BTRFS_LEAF_DATA_SIZE(fs_info));
1089 		return -EUCLEAN;
1090 	}
1091 
1092 	num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
1093 	/* Let btrfs_check_chunk_valid() handle this error type */
1094 	if (num_stripes == 0)
1095 		goto out;
1096 
1097 	if (unlikely(btrfs_chunk_item_size(num_stripes) !=
1098 		     btrfs_item_size(leaf, slot))) {
1099 		chunk_err(fs_info, leaf, chunk, key->offset,
1100 			"invalid chunk item size: have %u expect %lu",
1101 			btrfs_item_size(leaf, slot),
1102 			btrfs_chunk_item_size(num_stripes));
1103 		return -EUCLEAN;
1104 	}
1105 out:
1106 	return btrfs_check_chunk_valid(fs_info, leaf, chunk, key->offset,
1107 				       fs_info->sectorsize);
1108 }
1109 
1110 __printf(3, 4)
1111 __cold
1112 static void dev_item_err(const struct extent_buffer *eb, int slot,
1113 			 const char *fmt, ...)
1114 {
1115 	struct btrfs_key key;
1116 	struct va_format vaf;
1117 	va_list args;
1118 
1119 	btrfs_item_key_to_cpu(eb, &key, slot);
1120 	va_start(args, fmt);
1121 
1122 	vaf.fmt = fmt;
1123 	vaf.va = &args;
1124 
1125 	dump_page(folio_page(eb->folios[0], 0), "eb page dump");
1126 	btrfs_crit(eb->fs_info,
1127 	"corrupt %s: root=%llu block=%llu slot=%d devid=%llu %pV",
1128 		btrfs_header_level(eb) == 0 ? "leaf" : "node",
1129 		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
1130 		key.objectid, &vaf);
1131 	va_end(args);
1132 }
1133 
1134 static int check_dev_item(struct extent_buffer *leaf,
1135 			  struct btrfs_key *key, int slot)
1136 {
1137 	struct btrfs_dev_item *ditem;
1138 	const u32 item_size = btrfs_item_size(leaf, slot);
1139 
1140 	if (unlikely(key->objectid != BTRFS_DEV_ITEMS_OBJECTID)) {
1141 		dev_item_err(leaf, slot,
1142 			     "invalid objectid: has=%llu expect=%llu",
1143 			     key->objectid, BTRFS_DEV_ITEMS_OBJECTID);
1144 		return -EUCLEAN;
1145 	}
1146 
1147 	if (unlikely(item_size != sizeof(*ditem))) {
1148 		dev_item_err(leaf, slot, "invalid item size: has %u expect %zu",
1149 			     item_size, sizeof(*ditem));
1150 		return -EUCLEAN;
1151 	}
1152 
1153 	ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item);
1154 	if (unlikely(btrfs_device_id(leaf, ditem) != key->offset)) {
1155 		dev_item_err(leaf, slot,
1156 			     "devid mismatch: key has=%llu item has=%llu",
1157 			     key->offset, btrfs_device_id(leaf, ditem));
1158 		return -EUCLEAN;
1159 	}
1160 
1161 	/*
1162 	 * For device total_bytes, we don't have reliable way to check it, as
1163 	 * it can be 0 for device removal. Device size check can only be done
1164 	 * by dev extents check.
1165 	 */
1166 	if (unlikely(btrfs_device_bytes_used(leaf, ditem) >
1167 		     btrfs_device_total_bytes(leaf, ditem))) {
1168 		dev_item_err(leaf, slot,
1169 			     "invalid bytes used: have %llu expect [0, %llu]",
1170 			     btrfs_device_bytes_used(leaf, ditem),
1171 			     btrfs_device_total_bytes(leaf, ditem));
1172 		return -EUCLEAN;
1173 	}
1174 	/*
1175 	 * Remaining members like io_align/type/gen/dev_group aren't really
1176 	 * utilized.  Skip them to make later usage of them easier.
1177 	 */
1178 	return 0;
1179 }
1180 
1181 static int check_inode_item(struct extent_buffer *leaf,
1182 			    struct btrfs_key *key, int slot)
1183 {
1184 	struct btrfs_fs_info *fs_info = leaf->fs_info;
1185 	struct btrfs_inode_item *iitem;
1186 	u64 super_gen = btrfs_super_generation(fs_info->super_copy);
1187 	u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777);
1188 	const u32 item_size = btrfs_item_size(leaf, slot);
1189 	u32 mode;
1190 	int ret;
1191 	u32 flags;
1192 	u32 ro_flags;
1193 
1194 	ret = check_inode_key(leaf, key, slot);
1195 	if (unlikely(ret < 0))
1196 		return ret;
1197 
1198 	if (unlikely(item_size != sizeof(*iitem))) {
1199 		generic_err(leaf, slot, "invalid item size: has %u expect %zu",
1200 			    item_size, sizeof(*iitem));
1201 		return -EUCLEAN;
1202 	}
1203 
1204 	iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item);
1205 
1206 	/* Here we use super block generation + 1 to handle log tree */
1207 	if (unlikely(btrfs_inode_generation(leaf, iitem) > super_gen + 1)) {
1208 		inode_item_err(leaf, slot,
1209 			"invalid inode generation: has %llu expect (0, %llu]",
1210 			       btrfs_inode_generation(leaf, iitem),
1211 			       super_gen + 1);
1212 		return -EUCLEAN;
1213 	}
1214 	/* Note for ROOT_TREE_DIR_ITEM, mkfs could set its transid 0 */
1215 	if (unlikely(btrfs_inode_transid(leaf, iitem) > super_gen + 1)) {
1216 		inode_item_err(leaf, slot,
1217 			"invalid inode transid: has %llu expect [0, %llu]",
1218 			       btrfs_inode_transid(leaf, iitem), super_gen + 1);
1219 		return -EUCLEAN;
1220 	}
1221 
1222 	/*
1223 	 * For size and nbytes it's better not to be too strict, as for dir
1224 	 * item its size/nbytes can easily get wrong, but doesn't affect
1225 	 * anything in the fs. So here we skip the check.
1226 	 */
1227 	mode = btrfs_inode_mode(leaf, iitem);
1228 	if (unlikely(mode & ~valid_mask)) {
1229 		inode_item_err(leaf, slot,
1230 			       "unknown mode bit detected: 0x%x",
1231 			       mode & ~valid_mask);
1232 		return -EUCLEAN;
1233 	}
1234 
1235 	/*
1236 	 * S_IFMT is not bit mapped so we can't completely rely on
1237 	 * is_power_of_2/has_single_bit_set, but it can save us from checking
1238 	 * FIFO/CHR/DIR/REG.  Only needs to check BLK, LNK and SOCKS
1239 	 */
1240 	if (!has_single_bit_set(mode & S_IFMT)) {
1241 		if (unlikely(!S_ISLNK(mode) && !S_ISBLK(mode) && !S_ISSOCK(mode))) {
1242 			inode_item_err(leaf, slot,
1243 			"invalid mode: has 0%o expect valid S_IF* bit(s)",
1244 				       mode & S_IFMT);
1245 			return -EUCLEAN;
1246 		}
1247 	}
1248 	if (unlikely(S_ISDIR(mode) && btrfs_inode_nlink(leaf, iitem) > 1)) {
1249 		inode_item_err(leaf, slot,
1250 		       "invalid nlink: has %u expect no more than 1 for dir",
1251 			btrfs_inode_nlink(leaf, iitem));
1252 		return -EUCLEAN;
1253 	}
1254 	btrfs_inode_split_flags(btrfs_inode_flags(leaf, iitem), &flags, &ro_flags);
1255 	if (unlikely(flags & ~BTRFS_INODE_FLAG_MASK)) {
1256 		inode_item_err(leaf, slot,
1257 			       "unknown incompat flags detected: 0x%x", flags);
1258 		return -EUCLEAN;
1259 	}
1260 	if (unlikely(!sb_rdonly(fs_info->sb) &&
1261 		     (ro_flags & ~BTRFS_INODE_RO_FLAG_MASK))) {
1262 		inode_item_err(leaf, slot,
1263 			"unknown ro-compat flags detected on writeable mount: 0x%x",
1264 			ro_flags);
1265 		return -EUCLEAN;
1266 	}
1267 	return 0;
1268 }
1269 
1270 static int check_root_item(struct extent_buffer *leaf, struct btrfs_key *key,
1271 			   int slot)
1272 {
1273 	struct btrfs_fs_info *fs_info = leaf->fs_info;
1274 	struct btrfs_root_item ri = { 0 };
1275 	const u64 valid_root_flags = BTRFS_ROOT_SUBVOL_RDONLY |
1276 				     BTRFS_ROOT_SUBVOL_DEAD;
1277 	int ret;
1278 
1279 	ret = check_root_key(leaf, key, slot);
1280 	if (unlikely(ret < 0))
1281 		return ret;
1282 
1283 	if (unlikely(btrfs_item_size(leaf, slot) != sizeof(ri) &&
1284 		     btrfs_item_size(leaf, slot) !=
1285 		     btrfs_legacy_root_item_size())) {
1286 		generic_err(leaf, slot,
1287 			    "invalid root item size, have %u expect %zu or %u",
1288 			    btrfs_item_size(leaf, slot), sizeof(ri),
1289 			    btrfs_legacy_root_item_size());
1290 		return -EUCLEAN;
1291 	}
1292 
1293 	/*
1294 	 * For legacy root item, the members starting at generation_v2 will be
1295 	 * all filled with 0.
1296 	 * And since we allow generation_v2 as 0, it will still pass the check.
1297 	 */
1298 	read_extent_buffer(leaf, &ri, btrfs_item_ptr_offset(leaf, slot),
1299 			   btrfs_item_size(leaf, slot));
1300 
1301 	/* Generation related */
1302 	if (unlikely(btrfs_root_generation(&ri) >
1303 		     btrfs_super_generation(fs_info->super_copy) + 1)) {
1304 		generic_err(leaf, slot,
1305 			"invalid root generation, have %llu expect (0, %llu]",
1306 			    btrfs_root_generation(&ri),
1307 			    btrfs_super_generation(fs_info->super_copy) + 1);
1308 		return -EUCLEAN;
1309 	}
1310 	if (unlikely(btrfs_root_generation_v2(&ri) >
1311 		     btrfs_super_generation(fs_info->super_copy) + 1)) {
1312 		generic_err(leaf, slot,
1313 		"invalid root v2 generation, have %llu expect (0, %llu]",
1314 			    btrfs_root_generation_v2(&ri),
1315 			    btrfs_super_generation(fs_info->super_copy) + 1);
1316 		return -EUCLEAN;
1317 	}
1318 	if (unlikely(btrfs_root_last_snapshot(&ri) >
1319 		     btrfs_super_generation(fs_info->super_copy) + 1)) {
1320 		generic_err(leaf, slot,
1321 		"invalid root last_snapshot, have %llu expect (0, %llu]",
1322 			    btrfs_root_last_snapshot(&ri),
1323 			    btrfs_super_generation(fs_info->super_copy) + 1);
1324 		return -EUCLEAN;
1325 	}
1326 
1327 	/* Alignment and level check */
1328 	if (unlikely(!IS_ALIGNED(btrfs_root_bytenr(&ri), fs_info->sectorsize))) {
1329 		generic_err(leaf, slot,
1330 		"invalid root bytenr, have %llu expect to be aligned to %u",
1331 			    btrfs_root_bytenr(&ri), fs_info->sectorsize);
1332 		return -EUCLEAN;
1333 	}
1334 	if (unlikely(btrfs_root_level(&ri) >= BTRFS_MAX_LEVEL)) {
1335 		generic_err(leaf, slot,
1336 			    "invalid root level, have %u expect [0, %u]",
1337 			    btrfs_root_level(&ri), BTRFS_MAX_LEVEL - 1);
1338 		return -EUCLEAN;
1339 	}
1340 	if (unlikely(btrfs_root_drop_level(&ri) >= BTRFS_MAX_LEVEL)) {
1341 		generic_err(leaf, slot,
1342 			    "invalid root drop_level, have %u expect [0, %u]",
1343 			    btrfs_root_drop_level(&ri), BTRFS_MAX_LEVEL - 1);
1344 		return -EUCLEAN;
1345 	}
1346 	/*
1347 	 * If drop_progress.objectid is non-zero, a btrfs_drop_snapshot() was
1348 	 * interrupted and the resume point was recorded in drop_progress and
1349 	 * drop_level.  In that case drop_level must be >= 1: level 0 is the
1350 	 * leaf level and drop_snapshot never saves a checkpoint there (it
1351 	 * only records checkpoints at internal node levels in DROP_REFERENCE
1352 	 * stage).  A zero drop_level combined with a non-zero drop_progress
1353 	 * objectid indicates on-disk corruption and would cause a BUG_ON in
1354 	 * merge_reloc_root() and btrfs_drop_snapshot() at mount time.
1355 	 */
1356 	if (unlikely(btrfs_disk_key_objectid(&ri.drop_progress) != 0 &&
1357 		     btrfs_root_drop_level(&ri) == 0)) {
1358 		generic_err(leaf, slot,
1359 			    "invalid root drop_level 0 with non-zero drop_progress objectid %llu",
1360 			    btrfs_disk_key_objectid(&ri.drop_progress));
1361 		return -EUCLEAN;
1362 	}
1363 
1364 	/* Flags check */
1365 	if (unlikely(btrfs_root_flags(&ri) & ~valid_root_flags)) {
1366 		generic_err(leaf, slot,
1367 			    "invalid root flags, have 0x%llx expect mask 0x%llx",
1368 			    btrfs_root_flags(&ri), valid_root_flags);
1369 		return -EUCLEAN;
1370 	}
1371 	return 0;
1372 }
1373 
1374 __printf(3,4)
1375 __cold
1376 static void extent_err(const struct extent_buffer *eb, int slot,
1377 		       const char *fmt, ...)
1378 {
1379 	struct btrfs_key key;
1380 	struct va_format vaf;
1381 	va_list args;
1382 	u64 bytenr;
1383 	u64 len;
1384 
1385 	btrfs_item_key_to_cpu(eb, &key, slot);
1386 	bytenr = key.objectid;
1387 	if (key.type == BTRFS_METADATA_ITEM_KEY ||
1388 	    key.type == BTRFS_TREE_BLOCK_REF_KEY ||
1389 	    key.type == BTRFS_SHARED_BLOCK_REF_KEY)
1390 		len = eb->fs_info->nodesize;
1391 	else
1392 		len = key.offset;
1393 	va_start(args, fmt);
1394 
1395 	vaf.fmt = fmt;
1396 	vaf.va = &args;
1397 
1398 	dump_page(folio_page(eb->folios[0], 0), "eb page dump");
1399 	btrfs_crit(eb->fs_info,
1400 	"corrupt %s: block=%llu slot=%d extent bytenr=%llu len=%llu %pV",
1401 		btrfs_header_level(eb) == 0 ? "leaf" : "node",
1402 		eb->start, slot, bytenr, len, &vaf);
1403 	va_end(args);
1404 }
1405 
1406 static bool is_valid_dref_root(u64 rootid)
1407 {
1408 	/*
1409 	 * The following tree root objectids are allowed to have a data backref:
1410 	 * - subvolume trees
1411 	 * - data reloc tree
1412 	 * - tree root
1413 	 *   For v1 space cache
1414 	 */
1415 	return btrfs_is_fstree(rootid) || rootid == BTRFS_DATA_RELOC_TREE_OBJECTID ||
1416 	       rootid == BTRFS_ROOT_TREE_OBJECTID;
1417 }
1418 
1419 static int check_extent_item(struct extent_buffer *leaf,
1420 			     struct btrfs_key *key, int slot,
1421 			     struct btrfs_key *prev_key)
1422 {
1423 	struct btrfs_fs_info *fs_info = leaf->fs_info;
1424 	struct btrfs_extent_item *ei;
1425 	bool is_tree_block = false;
1426 	unsigned long ptr;	/* Current pointer inside inline refs */
1427 	unsigned long end;	/* Extent item end */
1428 	const u32 item_size = btrfs_item_size(leaf, slot);
1429 	u8 last_type = 0;
1430 	u64 last_seq = U64_MAX;
1431 	u64 flags;
1432 	u64 generation;
1433 	u64 total_refs;		/* Total refs in btrfs_extent_item */
1434 	u64 inline_refs = 0;	/* found total inline refs */
1435 
1436 	if (unlikely(key->type == BTRFS_METADATA_ITEM_KEY &&
1437 		     !btrfs_fs_incompat(fs_info, SKINNY_METADATA))) {
1438 		generic_err(leaf, slot,
1439 "invalid key type, METADATA_ITEM type invalid when SKINNY_METADATA feature disabled");
1440 		return -EUCLEAN;
1441 	}
1442 	/* key->objectid is the bytenr for both key types */
1443 	if (unlikely(!IS_ALIGNED(key->objectid, fs_info->sectorsize))) {
1444 		generic_err(leaf, slot,
1445 		"invalid key objectid, have %llu expect to be aligned to %u",
1446 			   key->objectid, fs_info->sectorsize);
1447 		return -EUCLEAN;
1448 	}
1449 
1450 	/* key->offset is tree level for METADATA_ITEM_KEY */
1451 	if (unlikely(key->type == BTRFS_METADATA_ITEM_KEY &&
1452 		     key->offset >= BTRFS_MAX_LEVEL)) {
1453 		extent_err(leaf, slot,
1454 			   "invalid tree level, have %llu expect [0, %u]",
1455 			   key->offset, BTRFS_MAX_LEVEL - 1);
1456 		return -EUCLEAN;
1457 	}
1458 
1459 	/*
1460 	 * EXTENT/METADATA_ITEM consists of:
1461 	 * 1) One btrfs_extent_item
1462 	 *    Records the total refs, type and generation of the extent.
1463 	 *
1464 	 * 2) One btrfs_tree_block_info (for EXTENT_ITEM and tree backref only)
1465 	 *    Records the first key and level of the tree block.
1466 	 *
1467 	 * 2) Zero or more btrfs_extent_inline_ref(s)
1468 	 *    Each inline ref has one btrfs_extent_inline_ref shows:
1469 	 *    2.1) The ref type, one of the 4
1470 	 *         TREE_BLOCK_REF	Tree block only
1471 	 *         SHARED_BLOCK_REF	Tree block only
1472 	 *         EXTENT_DATA_REF	Data only
1473 	 *         SHARED_DATA_REF	Data only
1474 	 *    2.2) Ref type specific data
1475 	 *         Either using btrfs_extent_inline_ref::offset, or specific
1476 	 *         data structure.
1477 	 *
1478 	 *    All above inline items should follow the order:
1479 	 *
1480 	 *    - All btrfs_extent_inline_ref::type should be in an ascending
1481 	 *      order
1482 	 *
1483 	 *    - Within the same type, the items should follow a descending
1484 	 *      order by their sequence number. The sequence number is
1485 	 *      determined by:
1486 	 *      * btrfs_extent_inline_ref::offset for all types  other than
1487 	 *        EXTENT_DATA_REF
1488 	 *      * hash_extent_data_ref() for EXTENT_DATA_REF
1489 	 */
1490 	if (unlikely(item_size < sizeof(*ei))) {
1491 		extent_err(leaf, slot,
1492 			   "invalid item size, have %u expect [%zu, %u)",
1493 			   item_size, sizeof(*ei),
1494 			   BTRFS_LEAF_DATA_SIZE(fs_info));
1495 		return -EUCLEAN;
1496 	}
1497 	end = item_size + btrfs_item_ptr_offset(leaf, slot);
1498 
1499 	/* Checks against extent_item */
1500 	ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
1501 	flags = btrfs_extent_flags(leaf, ei);
1502 	total_refs = btrfs_extent_refs(leaf, ei);
1503 	generation = btrfs_extent_generation(leaf, ei);
1504 	if (unlikely(generation >
1505 		     btrfs_super_generation(fs_info->super_copy) + 1)) {
1506 		extent_err(leaf, slot,
1507 			   "invalid generation, have %llu expect (0, %llu]",
1508 			   generation,
1509 			   btrfs_super_generation(fs_info->super_copy) + 1);
1510 		return -EUCLEAN;
1511 	}
1512 	if (unlikely(!has_single_bit_set(flags & (BTRFS_EXTENT_FLAG_DATA |
1513 						  BTRFS_EXTENT_FLAG_TREE_BLOCK)))) {
1514 		extent_err(leaf, slot,
1515 		"invalid extent flag, have 0x%llx expect 1 bit set in 0x%llx",
1516 			flags, BTRFS_EXTENT_FLAG_DATA |
1517 			BTRFS_EXTENT_FLAG_TREE_BLOCK);
1518 		return -EUCLEAN;
1519 	}
1520 	is_tree_block = !!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK);
1521 	if (is_tree_block) {
1522 		if (unlikely(key->type == BTRFS_EXTENT_ITEM_KEY &&
1523 			     key->offset != fs_info->nodesize)) {
1524 			extent_err(leaf, slot,
1525 				   "invalid extent length, have %llu expect %u",
1526 				   key->offset, fs_info->nodesize);
1527 			return -EUCLEAN;
1528 		}
1529 	} else {
1530 		if (unlikely(key->type != BTRFS_EXTENT_ITEM_KEY)) {
1531 			extent_err(leaf, slot,
1532 			"invalid key type, have %u expect %u for data backref",
1533 				   key->type, BTRFS_EXTENT_ITEM_KEY);
1534 			return -EUCLEAN;
1535 		}
1536 		if (unlikely(!IS_ALIGNED(key->offset, fs_info->sectorsize))) {
1537 			extent_err(leaf, slot,
1538 			"invalid extent length, have %llu expect aligned to %u",
1539 				   key->offset, fs_info->sectorsize);
1540 			return -EUCLEAN;
1541 		}
1542 		if (unlikely(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
1543 			extent_err(leaf, slot,
1544 			"invalid extent flag, data has full backref set");
1545 			return -EUCLEAN;
1546 		}
1547 	}
1548 	ptr = (unsigned long)(struct btrfs_extent_item *)(ei + 1);
1549 
1550 	/* Check the special case of btrfs_tree_block_info */
1551 	if (is_tree_block && key->type != BTRFS_METADATA_ITEM_KEY) {
1552 		struct btrfs_tree_block_info *info;
1553 
1554 		info = (struct btrfs_tree_block_info *)ptr;
1555 		if (unlikely(btrfs_tree_block_level(leaf, info) >= BTRFS_MAX_LEVEL)) {
1556 			extent_err(leaf, slot,
1557 			"invalid tree block info level, have %u expect [0, %u]",
1558 				   btrfs_tree_block_level(leaf, info),
1559 				   BTRFS_MAX_LEVEL - 1);
1560 			return -EUCLEAN;
1561 		}
1562 		ptr = (unsigned long)(struct btrfs_tree_block_info *)(info + 1);
1563 	}
1564 
1565 	/* Check inline refs */
1566 	while (ptr < end) {
1567 		struct btrfs_extent_inline_ref *iref;
1568 		struct btrfs_extent_data_ref *dref;
1569 		struct btrfs_shared_data_ref *sref;
1570 		u64 seq;
1571 		u64 dref_root;
1572 		u64 dref_objectid;
1573 		u64 dref_offset;
1574 		u64 inline_offset;
1575 		u8 inline_type;
1576 
1577 		if (unlikely(ptr + sizeof(*iref) > end)) {
1578 			extent_err(leaf, slot,
1579 "inline ref item overflows extent item, ptr %lu iref size %zu end %lu",
1580 				   ptr, sizeof(*iref), end);
1581 			return -EUCLEAN;
1582 		}
1583 		iref = (struct btrfs_extent_inline_ref *)ptr;
1584 		inline_type = btrfs_extent_inline_ref_type(leaf, iref);
1585 		inline_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1586 		seq = inline_offset;
1587 		if (unlikely(ptr + btrfs_extent_inline_ref_size(inline_type) > end)) {
1588 			extent_err(leaf, slot,
1589 "inline ref item overflows extent item, ptr %lu iref size %u end %lu",
1590 				   ptr, btrfs_extent_inline_ref_size(inline_type), end);
1591 			return -EUCLEAN;
1592 		}
1593 
1594 		switch (inline_type) {
1595 		/* inline_offset is subvolid of the owner, no need to check */
1596 		case BTRFS_TREE_BLOCK_REF_KEY:
1597 			inline_refs++;
1598 			break;
1599 		/* Contains parent bytenr */
1600 		case BTRFS_SHARED_BLOCK_REF_KEY:
1601 			if (unlikely(!IS_ALIGNED(inline_offset,
1602 						 fs_info->sectorsize))) {
1603 				extent_err(leaf, slot,
1604 		"invalid tree parent bytenr, have %llu expect aligned to %u",
1605 					   inline_offset, fs_info->sectorsize);
1606 				return -EUCLEAN;
1607 			}
1608 			inline_refs++;
1609 			break;
1610 		/*
1611 		 * Contains owner subvolid, owner key objectid, adjusted offset.
1612 		 * The only obvious corruption can happen in that offset.
1613 		 */
1614 		case BTRFS_EXTENT_DATA_REF_KEY:
1615 			dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1616 			dref_root = btrfs_extent_data_ref_root(leaf, dref);
1617 			dref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
1618 			dref_offset = btrfs_extent_data_ref_offset(leaf, dref);
1619 			seq = hash_extent_data_ref(
1620 					btrfs_extent_data_ref_root(leaf, dref),
1621 					btrfs_extent_data_ref_objectid(leaf, dref),
1622 					btrfs_extent_data_ref_offset(leaf, dref));
1623 			if (unlikely(!is_valid_dref_root(dref_root))) {
1624 				extent_err(leaf, slot,
1625 					   "invalid data ref root value %llu",
1626 					   dref_root);
1627 				return -EUCLEAN;
1628 			}
1629 			if (unlikely(dref_objectid < BTRFS_FIRST_FREE_OBJECTID ||
1630 				     dref_objectid > BTRFS_LAST_FREE_OBJECTID)) {
1631 				extent_err(leaf, slot,
1632 					   "invalid data ref objectid value %llu",
1633 					   dref_objectid);
1634 				return -EUCLEAN;
1635 			}
1636 			if (unlikely(!IS_ALIGNED(dref_offset,
1637 						 fs_info->sectorsize))) {
1638 				extent_err(leaf, slot,
1639 		"invalid data ref offset, have %llu expect aligned to %u",
1640 					   dref_offset, fs_info->sectorsize);
1641 				return -EUCLEAN;
1642 			}
1643 			if (unlikely(btrfs_extent_data_ref_count(leaf, dref) == 0)) {
1644 				extent_err(leaf, slot,
1645 			"invalid data ref count, should have non-zero value");
1646 				return -EUCLEAN;
1647 			}
1648 			inline_refs += btrfs_extent_data_ref_count(leaf, dref);
1649 			break;
1650 		/* Contains parent bytenr and ref count */
1651 		case BTRFS_SHARED_DATA_REF_KEY:
1652 			sref = (struct btrfs_shared_data_ref *)(iref + 1);
1653 			if (unlikely(!IS_ALIGNED(inline_offset,
1654 						 fs_info->sectorsize))) {
1655 				extent_err(leaf, slot,
1656 		"invalid data parent bytenr, have %llu expect aligned to %u",
1657 					   inline_offset, fs_info->sectorsize);
1658 				return -EUCLEAN;
1659 			}
1660 			if (unlikely(btrfs_shared_data_ref_count(leaf, sref) == 0)) {
1661 				extent_err(leaf, slot,
1662 			"invalid shared data ref count, should have non-zero value");
1663 				return -EUCLEAN;
1664 			}
1665 			inline_refs += btrfs_shared_data_ref_count(leaf, sref);
1666 			break;
1667 		case BTRFS_EXTENT_OWNER_REF_KEY:
1668 			WARN_ON(!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA));
1669 			break;
1670 		default:
1671 			extent_err(leaf, slot, "unknown inline ref type: %u",
1672 				   inline_type);
1673 			return -EUCLEAN;
1674 		}
1675 		if (unlikely(inline_type < last_type)) {
1676 			extent_err(leaf, slot,
1677 				   "inline ref out-of-order: has type %u, prev type %u",
1678 				   inline_type, last_type);
1679 			return -EUCLEAN;
1680 		}
1681 		/* Type changed, allow the sequence starts from U64_MAX again. */
1682 		if (inline_type > last_type)
1683 			last_seq = U64_MAX;
1684 		if (unlikely(seq > last_seq)) {
1685 			extent_err(leaf, slot,
1686 "inline ref out-of-order: has type %u offset %llu seq 0x%llx, prev type %u seq 0x%llx",
1687 				   inline_type, inline_offset, seq,
1688 				   last_type, last_seq);
1689 			return -EUCLEAN;
1690 		}
1691 		last_type = inline_type;
1692 		last_seq = seq;
1693 		ptr += btrfs_extent_inline_ref_size(inline_type);
1694 	}
1695 	/* No padding is allowed */
1696 	if (unlikely(ptr != end)) {
1697 		extent_err(leaf, slot,
1698 			   "invalid extent item size, padding bytes found");
1699 		return -EUCLEAN;
1700 	}
1701 
1702 	/* Finally, check the inline refs against total refs */
1703 	if (unlikely(inline_refs > total_refs)) {
1704 		extent_err(leaf, slot,
1705 			"invalid extent refs, have %llu expect >= inline %llu",
1706 			   total_refs, inline_refs);
1707 		return -EUCLEAN;
1708 	}
1709 
1710 	if ((prev_key->type == BTRFS_EXTENT_ITEM_KEY) ||
1711 	    (prev_key->type == BTRFS_METADATA_ITEM_KEY)) {
1712 		u64 prev_end = prev_key->objectid;
1713 
1714 		if (prev_key->type == BTRFS_METADATA_ITEM_KEY)
1715 			prev_end += fs_info->nodesize;
1716 		else
1717 			prev_end += prev_key->offset;
1718 
1719 		if (unlikely(prev_end > key->objectid)) {
1720 			extent_err(leaf, slot,
1721 	"previous extent " BTRFS_KEY_FMT " overlaps current extent " BTRFS_KEY_FMT,
1722 				   BTRFS_KEY_FMT_VALUE(prev_key),
1723 				   BTRFS_KEY_FMT_VALUE(key));
1724 			return -EUCLEAN;
1725 		}
1726 	}
1727 
1728 	return 0;
1729 }
1730 
1731 static int check_simple_keyed_refs(struct extent_buffer *leaf,
1732 				   struct btrfs_key *key, int slot)
1733 {
1734 	u32 expect_item_size = 0;
1735 
1736 	if (key->type == BTRFS_SHARED_DATA_REF_KEY) {
1737 		struct btrfs_shared_data_ref *sref;
1738 
1739 		sref = btrfs_item_ptr(leaf, slot, struct btrfs_shared_data_ref);
1740 		if (unlikely(btrfs_shared_data_ref_count(leaf, sref) == 0)) {
1741 			extent_err(leaf, slot,
1742 		"invalid shared data backref count, should have non-zero value");
1743 			return -EUCLEAN;
1744 		}
1745 
1746 		expect_item_size = sizeof(struct btrfs_shared_data_ref);
1747 	}
1748 
1749 	if (unlikely(btrfs_item_size(leaf, slot) != expect_item_size)) {
1750 		generic_err(leaf, slot,
1751 		"invalid item size, have %u expect %u for key type %u",
1752 			    btrfs_item_size(leaf, slot),
1753 			    expect_item_size, key->type);
1754 		return -EUCLEAN;
1755 	}
1756 	if (unlikely(!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize))) {
1757 		generic_err(leaf, slot,
1758 "invalid key objectid for shared block ref, have %llu expect aligned to %u",
1759 			    key->objectid, leaf->fs_info->sectorsize);
1760 		return -EUCLEAN;
1761 	}
1762 	if (unlikely(key->type != BTRFS_TREE_BLOCK_REF_KEY &&
1763 		     !IS_ALIGNED(key->offset, leaf->fs_info->sectorsize))) {
1764 		extent_err(leaf, slot,
1765 		"invalid tree parent bytenr, have %llu expect aligned to %u",
1766 			   key->offset, leaf->fs_info->sectorsize);
1767 		return -EUCLEAN;
1768 	}
1769 	return 0;
1770 }
1771 
1772 static int check_extent_data_ref(struct extent_buffer *leaf,
1773 				 struct btrfs_key *key, int slot)
1774 {
1775 	struct btrfs_extent_data_ref *dref;
1776 	unsigned long ptr = btrfs_item_ptr_offset(leaf, slot);
1777 	const unsigned long end = ptr + btrfs_item_size(leaf, slot);
1778 
1779 	if (unlikely(btrfs_item_size(leaf, slot) % sizeof(*dref) != 0)) {
1780 		generic_err(leaf, slot,
1781 	"invalid item size, have %u expect aligned to %zu for key type %u",
1782 			    btrfs_item_size(leaf, slot),
1783 			    sizeof(*dref), key->type);
1784 		return -EUCLEAN;
1785 	}
1786 	if (unlikely(!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize))) {
1787 		generic_err(leaf, slot,
1788 "invalid key objectid for shared block ref, have %llu expect aligned to %u",
1789 			    key->objectid, leaf->fs_info->sectorsize);
1790 		return -EUCLEAN;
1791 	}
1792 	for (; ptr < end; ptr += sizeof(*dref)) {
1793 		u64 root;
1794 		u64 objectid;
1795 		u64 offset;
1796 
1797 		/*
1798 		 * We cannot check the extent_data_ref hash due to possible
1799 		 * overflow from the leaf due to hash collisions.
1800 		 */
1801 		dref = (struct btrfs_extent_data_ref *)ptr;
1802 		root = btrfs_extent_data_ref_root(leaf, dref);
1803 		objectid = btrfs_extent_data_ref_objectid(leaf, dref);
1804 		offset = btrfs_extent_data_ref_offset(leaf, dref);
1805 		if (unlikely(!is_valid_dref_root(root))) {
1806 			extent_err(leaf, slot,
1807 				   "invalid extent data backref root value %llu",
1808 				   root);
1809 			return -EUCLEAN;
1810 		}
1811 		if (unlikely(objectid < BTRFS_FIRST_FREE_OBJECTID ||
1812 			     objectid > BTRFS_LAST_FREE_OBJECTID)) {
1813 			extent_err(leaf, slot,
1814 				   "invalid extent data backref objectid value %llu",
1815 				   objectid);
1816 			return -EUCLEAN;
1817 		}
1818 		if (unlikely(!IS_ALIGNED(offset, leaf->fs_info->sectorsize))) {
1819 			extent_err(leaf, slot,
1820 	"invalid extent data backref offset, have %llu expect aligned to %u",
1821 				   offset, leaf->fs_info->sectorsize);
1822 			return -EUCLEAN;
1823 		}
1824 		if (unlikely(btrfs_extent_data_ref_count(leaf, dref) == 0)) {
1825 			extent_err(leaf, slot,
1826 	"invalid extent data backref count, should have non-zero value");
1827 			return -EUCLEAN;
1828 		}
1829 	}
1830 	return 0;
1831 }
1832 
1833 #define inode_ref_err(eb, slot, fmt, args...)			\
1834 	inode_item_err(eb, slot, fmt, ##args)
1835 static int check_inode_ref(struct extent_buffer *leaf,
1836 			   struct btrfs_key *key, struct btrfs_key *prev_key,
1837 			   int slot)
1838 {
1839 	struct btrfs_inode_ref *iref;
1840 	unsigned long ptr;
1841 	unsigned long end;
1842 
1843 	if (unlikely(!check_prev_ino(leaf, key, slot, prev_key)))
1844 		return -EUCLEAN;
1845 	/* namelen can't be 0, so item_size == sizeof() is also invalid */
1846 	if (unlikely(btrfs_item_size(leaf, slot) <= sizeof(*iref))) {
1847 		inode_ref_err(leaf, slot,
1848 			"invalid item size, have %u expect (%zu, %u)",
1849 			btrfs_item_size(leaf, slot),
1850 			sizeof(*iref), BTRFS_LEAF_DATA_SIZE(leaf->fs_info));
1851 		return -EUCLEAN;
1852 	}
1853 
1854 	ptr = btrfs_item_ptr_offset(leaf, slot);
1855 	end = ptr + btrfs_item_size(leaf, slot);
1856 	while (ptr < end) {
1857 		u16 namelen;
1858 
1859 		if (unlikely(ptr + sizeof(*iref) > end)) {
1860 			inode_ref_err(leaf, slot,
1861 			"inode ref overflow, ptr %lu end %lu inode_ref_size %zu",
1862 				ptr, end, sizeof(*iref));
1863 			return -EUCLEAN;
1864 		}
1865 
1866 		iref = (struct btrfs_inode_ref *)ptr;
1867 		namelen = btrfs_inode_ref_name_len(leaf, iref);
1868 		if (unlikely(ptr + sizeof(*iref) + namelen > end)) {
1869 			inode_ref_err(leaf, slot,
1870 				"inode ref overflow, ptr %lu end %lu namelen %u",
1871 				ptr, end, namelen);
1872 			return -EUCLEAN;
1873 		}
1874 
1875 		/*
1876 		 * NOTE: In theory we should record all found index numbers
1877 		 * to find any duplicated indexes, but that will be too time
1878 		 * consuming for inodes with too many hard links.
1879 		 */
1880 		ptr += sizeof(*iref) + namelen;
1881 	}
1882 	return 0;
1883 }
1884 
1885 static int check_inode_extref(struct extent_buffer *leaf,
1886 			      struct btrfs_key *key, struct btrfs_key *prev_key,
1887 			      int slot)
1888 {
1889 	unsigned long ptr = btrfs_item_ptr_offset(leaf, slot);
1890 	unsigned long end = ptr + btrfs_item_size(leaf, slot);
1891 
1892 	if (unlikely(!check_prev_ino(leaf, key, slot, prev_key)))
1893 		return -EUCLEAN;
1894 
1895 	while (ptr < end) {
1896 		struct btrfs_inode_extref *extref = (struct btrfs_inode_extref *)ptr;
1897 		u16 namelen;
1898 
1899 		if (unlikely(ptr + sizeof(*extref) > end)) {
1900 			inode_ref_err(leaf, slot,
1901 			"inode extref overflow, ptr %lu end %lu inode_extref size %zu",
1902 				      ptr, end, sizeof(*extref));
1903 			return -EUCLEAN;
1904 		}
1905 
1906 		namelen = btrfs_inode_extref_name_len(leaf, extref);
1907 		if (unlikely(ptr + sizeof(*extref) + namelen > end)) {
1908 			inode_ref_err(leaf, slot,
1909 				"inode extref overflow, ptr %lu end %lu namelen %u",
1910 				ptr, end, namelen);
1911 			return -EUCLEAN;
1912 		}
1913 		ptr += sizeof(*extref) + namelen;
1914 	}
1915 	return 0;
1916 }
1917 
1918 static int check_raid_stripe_extent(const struct extent_buffer *leaf,
1919 				    const struct btrfs_key *key, int slot)
1920 {
1921 	if (unlikely(!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize))) {
1922 		generic_err(leaf, slot,
1923 "invalid key objectid for raid stripe extent, have %llu expect aligned to %u",
1924 			    key->objectid, leaf->fs_info->sectorsize);
1925 		return -EUCLEAN;
1926 	}
1927 
1928 	if (unlikely(!btrfs_fs_incompat(leaf->fs_info, RAID_STRIPE_TREE))) {
1929 		generic_err(leaf, slot,
1930 	"RAID_STRIPE_EXTENT present but RAID_STRIPE_TREE incompat bit unset");
1931 		return -EUCLEAN;
1932 	}
1933 
1934 	return 0;
1935 }
1936 
1937 static int check_remap_key(const struct extent_buffer *leaf,
1938 			   const struct btrfs_key *key, int slot)
1939 {
1940 	const u32 item_size = btrfs_item_size(leaf, slot);
1941 	const u32 sectorsize = leaf->fs_info->sectorsize;
1942 	u64 end;
1943 
1944 	if (unlikely(!btrfs_fs_incompat(leaf->fs_info, REMAP_TREE))) {
1945 		generic_err(leaf, slot,
1946 		"remap key type %u present but REMAP_TREE incompat bit unset",
1947 			    key->type);
1948 		return -EUCLEAN;
1949 	}
1950 
1951 	switch (key->type) {
1952 	case BTRFS_IDENTITY_REMAP_KEY:
1953 		if (unlikely(item_size != 0)) {
1954 			generic_err(leaf, slot,
1955 			"invalid item size for IDENTITY_REMAP, have %u expect 0",
1956 				    item_size);
1957 			return -EUCLEAN;
1958 		}
1959 	break;
1960 	case BTRFS_REMAP_KEY:
1961 	case BTRFS_REMAP_BACKREF_KEY:
1962 		if (unlikely(item_size != sizeof(struct btrfs_remap_item))) {
1963 			generic_err(leaf, slot,
1964 			"invalid item size for remap key type %u, have %u expect %zu",
1965 				    key->type, item_size,
1966 				    sizeof(struct btrfs_remap_item));
1967 			return -EUCLEAN;
1968 		}
1969 		break;
1970 	}
1971 
1972 	if (unlikely(key->offset == 0)) {
1973 		generic_err(leaf, slot,
1974 			    "invalid remap key length, have 0 expect nonzero");
1975 		return -EUCLEAN;
1976 	}
1977 
1978 	if (unlikely(!IS_ALIGNED(key->objectid, sectorsize))) {
1979 		generic_err(leaf, slot,
1980 		"invalid remap key objectid, have %llu expect aligned to %u",
1981 			    key->objectid, sectorsize);
1982 		return -EUCLEAN;
1983 	}
1984 
1985 	if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) {
1986 		generic_err(leaf, slot,
1987 		"invalid remap key offset (length), have %llu expect aligned to %u",
1988 			    key->offset, sectorsize);
1989 		return -EUCLEAN;
1990 	}
1991 
1992 	if (unlikely(check_add_overflow(key->objectid, key->offset, &end))) {
1993 		generic_err(leaf, slot,
1994 		"remap key overflow, objectid %llu + offset %llu wraps",
1995 			    key->objectid, key->offset);
1996 		return -EUCLEAN;
1997 	}
1998 
1999 	return 0;
2000 }
2001 
2002 static int check_dev_extent_item(const struct extent_buffer *leaf,
2003 				 const struct btrfs_key *key,
2004 				 int slot,
2005 				 struct btrfs_key *prev_key)
2006 {
2007 	struct btrfs_dev_extent *de;
2008 	const u32 sectorsize = leaf->fs_info->sectorsize;
2009 
2010 	de = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
2011 	/* Basic fixed member checks. */
2012 	if (unlikely(btrfs_dev_extent_chunk_tree(leaf, de) !=
2013 		     BTRFS_CHUNK_TREE_OBJECTID)) {
2014 		generic_err(leaf, slot,
2015 			    "invalid dev extent chunk tree id, has %llu expect %llu",
2016 			    btrfs_dev_extent_chunk_tree(leaf, de),
2017 			    BTRFS_CHUNK_TREE_OBJECTID);
2018 		return -EUCLEAN;
2019 	}
2020 	if (unlikely(btrfs_dev_extent_chunk_objectid(leaf, de) !=
2021 		     BTRFS_FIRST_CHUNK_TREE_OBJECTID)) {
2022 		generic_err(leaf, slot,
2023 			    "invalid dev extent chunk objectid, has %llu expect %llu",
2024 			    btrfs_dev_extent_chunk_objectid(leaf, de),
2025 			    BTRFS_FIRST_CHUNK_TREE_OBJECTID);
2026 		return -EUCLEAN;
2027 	}
2028 	/* Alignment check. */
2029 	if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) {
2030 		generic_err(leaf, slot,
2031 			    "invalid dev extent key.offset, has %llu not aligned to %u",
2032 			    key->offset, sectorsize);
2033 		return -EUCLEAN;
2034 	}
2035 	if (unlikely(!IS_ALIGNED(btrfs_dev_extent_chunk_offset(leaf, de),
2036 				 sectorsize))) {
2037 		generic_err(leaf, slot,
2038 			    "invalid dev extent chunk offset, has %llu not aligned to %u",
2039 			    btrfs_dev_extent_chunk_objectid(leaf, de),
2040 			    sectorsize);
2041 		return -EUCLEAN;
2042 	}
2043 	if (unlikely(!IS_ALIGNED(btrfs_dev_extent_length(leaf, de),
2044 				 sectorsize))) {
2045 		generic_err(leaf, slot,
2046 			    "invalid dev extent length, has %llu not aligned to %u",
2047 			    btrfs_dev_extent_length(leaf, de), sectorsize);
2048 		return -EUCLEAN;
2049 	}
2050 	/* Overlap check with previous dev extent. */
2051 	if (slot && prev_key->objectid == key->objectid &&
2052 	    prev_key->type == key->type) {
2053 		struct btrfs_dev_extent *prev_de;
2054 		u64 prev_len;
2055 
2056 		prev_de = btrfs_item_ptr(leaf, slot - 1, struct btrfs_dev_extent);
2057 		prev_len = btrfs_dev_extent_length(leaf, prev_de);
2058 		if (unlikely(prev_key->offset + prev_len > key->offset)) {
2059 			generic_err(leaf, slot,
2060 		"dev extent overlap, prev offset %llu len %llu current offset %llu",
2061 				    prev_key->offset, prev_len, key->offset);
2062 			return -EUCLEAN;
2063 		}
2064 	}
2065 	return 0;
2066 }
2067 
2068 static int check_free_space_info(struct extent_buffer *leaf, struct btrfs_key *key,
2069 				 int slot)
2070 {
2071 	struct btrfs_fs_info *fs_info = leaf->fs_info;
2072 	struct btrfs_free_space_info *fsi;
2073 	const u32 blocksize = fs_info->sectorsize;
2074 	u32 flags;
2075 
2076 	if (unlikely(!IS_ALIGNED(key->objectid, blocksize))) {
2077 		generic_err(leaf, slot,
2078 		"free space info key objectid is not aligned to %u, has " BTRFS_KEY_FMT,
2079 			    blocksize, BTRFS_KEY_FMT_VALUE(key));
2080 		return -EUCLEAN;
2081 	}
2082 	if (unlikely(!IS_ALIGNED(key->offset, blocksize))) {
2083 		generic_err(leaf, slot,
2084 		"free space info key offset is not aligned to %u, has " BTRFS_KEY_FMT,
2085 			    blocksize, BTRFS_KEY_FMT_VALUE(key));
2086 		return -EUCLEAN;
2087 	}
2088 	if (unlikely(btrfs_item_size(leaf, slot) !=
2089 		     sizeof(struct btrfs_free_space_info))) {
2090 		generic_err(leaf, slot,
2091 		"invalid item size for free space info, has %u expect %zu",
2092 			    btrfs_item_size(leaf, slot),
2093 			    sizeof(struct btrfs_free_space_info));
2094 		return -EUCLEAN;
2095 	}
2096 	fsi = btrfs_item_ptr(leaf, slot, struct btrfs_free_space_info);
2097 	flags = btrfs_free_space_flags(leaf, fsi);
2098 	if (unlikely(flags & ~BTRFS_FREE_SPACE_FLAGS_MASK)) {
2099 		generic_err(leaf, slot,
2100 		"unknown flags for free space info, has 0x%x valid mask 0x%lx",
2101 			    flags, BTRFS_FREE_SPACE_FLAGS_MASK);
2102 		return -EUCLEAN;
2103 	}
2104 	if (unlikely(btrfs_free_space_extent_count(leaf, fsi) >
2105 		     key->offset >> fs_info->sectorsize_bits)) {
2106 		generic_err(leaf, slot,
2107 			    "suspicious extent count, has %u max valid %llu",
2108 			    btrfs_free_space_extent_count(leaf, fsi),
2109 			    key->offset >> fs_info->sectorsize_bits);
2110 		return -EUCLEAN;
2111 	}
2112 	return 0;
2113 }
2114 
2115 static int check_free_space_extent(struct extent_buffer *leaf, struct btrfs_key *key, int slot)
2116 {
2117 	struct btrfs_fs_info *fs_info = leaf->fs_info;
2118 	const u32 blocksize = fs_info->sectorsize;
2119 
2120 	if (unlikely(!IS_ALIGNED(key->objectid, blocksize))) {
2121 		generic_err(leaf, slot,
2122 		"free space extent key objectid is not aligned to %u, has " BTRFS_KEY_FMT,
2123 			    blocksize, BTRFS_KEY_FMT_VALUE(key));
2124 		return -EUCLEAN;
2125 	}
2126 	if (unlikely(!IS_ALIGNED(key->offset, blocksize))) {
2127 		generic_err(leaf, slot,
2128 		"free space extent key offset is not aligned to %u, has " BTRFS_KEY_FMT,
2129 			    blocksize, BTRFS_KEY_FMT_VALUE(key));
2130 		return -EUCLEAN;
2131 	}
2132 	if (unlikely(btrfs_item_size(leaf, slot) != 0)) {
2133 		generic_err(leaf, slot,
2134 			    "invalid item size for free space info, has %u expect 0",
2135 			    btrfs_item_size(leaf, slot));
2136 		return -EUCLEAN;
2137 	}
2138 	return 0;
2139 }
2140 
2141 static int check_free_space_bitmap(struct extent_buffer *leaf,
2142 				   struct btrfs_key *key, int slot)
2143 {
2144 	struct btrfs_fs_info *fs_info = leaf->fs_info;
2145 	const u32 blocksize = fs_info->sectorsize;
2146 	u32 expected_item_size;
2147 
2148 	if (unlikely(!IS_ALIGNED(key->objectid, blocksize))) {
2149 		generic_err(leaf, slot,
2150 		"free space bitmap key objectid is not aligned to %u, has " BTRFS_KEY_FMT,
2151 			    blocksize, BTRFS_KEY_FMT_VALUE(key));
2152 		return -EUCLEAN;
2153 	}
2154 	if (unlikely(!IS_ALIGNED(key->offset, blocksize))) {
2155 		generic_err(leaf, slot,
2156 		"free space bitmap key offset is not aligned to %u, has " BTRFS_KEY_FMT,
2157 			    blocksize, BTRFS_KEY_FMT_VALUE(key));
2158 		return -EUCLEAN;
2159 	}
2160 	if (unlikely(key->offset == 0)) {
2161 		generic_err(leaf, slot, "free space bitmap length is 0");
2162 		return -EUCLEAN;
2163 	}
2164 	/*
2165 	 * The item must hold exactly the right number of bitmap bytes for the
2166 	 * range described by key->offset.  A mismatch means the item was
2167 	 * truncated or the key is corrupt; either way the bitmap data is not
2168 	 * safe to access.
2169 	 */
2170 	expected_item_size = DIV_ROUND_UP(key->offset >> fs_info->sectorsize_bits,
2171 					  BITS_PER_BYTE);
2172 	if (unlikely(btrfs_item_size(leaf, slot) != expected_item_size)) {
2173 		generic_err(leaf, slot,
2174 			    "invalid item size for free space bitmap, has %u expect %u",
2175 			    btrfs_item_size(leaf, slot), expected_item_size);
2176 		return -EUCLEAN;
2177 	}
2178 	return 0;
2179 }
2180 
2181 /*
2182  * Common point to switch the item-specific validation.
2183  */
2184 static enum btrfs_tree_block_status check_leaf_item(struct extent_buffer *leaf,
2185 						    struct btrfs_key *key,
2186 						    int slot,
2187 						    struct btrfs_key *prev_key)
2188 {
2189 	int ret = 0;
2190 	struct btrfs_chunk *chunk;
2191 
2192 	switch (key->type) {
2193 	case BTRFS_EXTENT_DATA_KEY:
2194 		ret = check_extent_data_item(leaf, key, slot, prev_key);
2195 		break;
2196 	case BTRFS_EXTENT_CSUM_KEY:
2197 		ret = check_csum_item(leaf, key, slot, prev_key);
2198 		break;
2199 	case BTRFS_DIR_ITEM_KEY:
2200 	case BTRFS_DIR_INDEX_KEY:
2201 	case BTRFS_XATTR_ITEM_KEY:
2202 		ret = check_dir_item(leaf, key, prev_key, slot);
2203 		break;
2204 	case BTRFS_INODE_REF_KEY:
2205 		ret = check_inode_ref(leaf, key, prev_key, slot);
2206 		break;
2207 	case BTRFS_INODE_EXTREF_KEY:
2208 		ret = check_inode_extref(leaf, key, prev_key, slot);
2209 		break;
2210 	case BTRFS_BLOCK_GROUP_ITEM_KEY:
2211 		ret = check_block_group_item(leaf, key, slot);
2212 		break;
2213 	case BTRFS_CHUNK_ITEM_KEY:
2214 		chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
2215 		ret = check_leaf_chunk_item(leaf, chunk, key, slot);
2216 		break;
2217 	case BTRFS_DEV_ITEM_KEY:
2218 		ret = check_dev_item(leaf, key, slot);
2219 		break;
2220 	case BTRFS_DEV_EXTENT_KEY:
2221 		ret = check_dev_extent_item(leaf, key, slot, prev_key);
2222 		break;
2223 	case BTRFS_INODE_ITEM_KEY:
2224 		ret = check_inode_item(leaf, key, slot);
2225 		break;
2226 	case BTRFS_ROOT_ITEM_KEY:
2227 		ret = check_root_item(leaf, key, slot);
2228 		break;
2229 	case BTRFS_EXTENT_ITEM_KEY:
2230 	case BTRFS_METADATA_ITEM_KEY:
2231 		ret = check_extent_item(leaf, key, slot, prev_key);
2232 		break;
2233 	case BTRFS_TREE_BLOCK_REF_KEY:
2234 	case BTRFS_SHARED_DATA_REF_KEY:
2235 	case BTRFS_SHARED_BLOCK_REF_KEY:
2236 		ret = check_simple_keyed_refs(leaf, key, slot);
2237 		break;
2238 	case BTRFS_EXTENT_DATA_REF_KEY:
2239 		ret = check_extent_data_ref(leaf, key, slot);
2240 		break;
2241 	case BTRFS_RAID_STRIPE_KEY:
2242 		ret = check_raid_stripe_extent(leaf, key, slot);
2243 		break;
2244 	case BTRFS_FREE_SPACE_INFO_KEY:
2245 		ret = check_free_space_info(leaf, key, slot);
2246 		break;
2247 	case BTRFS_FREE_SPACE_EXTENT_KEY:
2248 		ret = check_free_space_extent(leaf, key, slot);
2249 		break;
2250 	case BTRFS_FREE_SPACE_BITMAP_KEY:
2251 		ret = check_free_space_bitmap(leaf, key, slot);
2252 		break;
2253 	case BTRFS_IDENTITY_REMAP_KEY:
2254 	case BTRFS_REMAP_KEY:
2255 	case BTRFS_REMAP_BACKREF_KEY:
2256 		ret = check_remap_key(leaf, key, slot);
2257 		break;
2258 	}
2259 
2260 	if (unlikely(ret))
2261 		return BTRFS_TREE_BLOCK_INVALID_ITEM;
2262 	return BTRFS_TREE_BLOCK_CLEAN;
2263 }
2264 
2265 enum btrfs_tree_block_status __btrfs_check_leaf(struct extent_buffer *leaf)
2266 {
2267 	struct btrfs_fs_info *fs_info = leaf->fs_info;
2268 	/* No valid key type is 0, so all key should be larger than this key */
2269 	struct btrfs_key prev_key = {0, 0, 0};
2270 	struct btrfs_key key;
2271 	u32 nritems = btrfs_header_nritems(leaf);
2272 	int slot;
2273 
2274 	if (unlikely(btrfs_header_level(leaf) != 0)) {
2275 		generic_err(leaf, 0,
2276 			"invalid level for leaf, have %d expect 0",
2277 			btrfs_header_level(leaf));
2278 		return BTRFS_TREE_BLOCK_INVALID_LEVEL;
2279 	}
2280 
2281 	if (unlikely(!btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_WRITTEN))) {
2282 		generic_err(leaf, 0, "invalid flag for leaf, WRITTEN not set");
2283 		return BTRFS_TREE_BLOCK_WRITTEN_NOT_SET;
2284 	}
2285 
2286 	/*
2287 	 * Extent buffers from a relocation tree have a owner field that
2288 	 * corresponds to the subvolume tree they are based on. So just from an
2289 	 * extent buffer alone we can not find out what is the id of the
2290 	 * corresponding subvolume tree, so we can not figure out if the extent
2291 	 * buffer corresponds to the root of the relocation tree or not. So
2292 	 * skip this check for relocation trees.
2293 	 */
2294 	if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
2295 		u64 owner = btrfs_header_owner(leaf);
2296 
2297 		/* These trees must never be empty */
2298 		if (unlikely(owner == BTRFS_ROOT_TREE_OBJECTID ||
2299 			     owner == BTRFS_CHUNK_TREE_OBJECTID ||
2300 			     owner == BTRFS_DEV_TREE_OBJECTID ||
2301 			     owner == BTRFS_FS_TREE_OBJECTID ||
2302 			     owner == BTRFS_DATA_RELOC_TREE_OBJECTID)) {
2303 			generic_err(leaf, 0,
2304 			"invalid root, root %llu must never be empty",
2305 				    owner);
2306 			return BTRFS_TREE_BLOCK_INVALID_NRITEMS;
2307 		}
2308 
2309 		/* Unknown tree */
2310 		if (unlikely(owner == 0)) {
2311 			generic_err(leaf, 0,
2312 				"invalid owner, root 0 is not defined");
2313 			return BTRFS_TREE_BLOCK_INVALID_OWNER;
2314 		}
2315 
2316 		/* EXTENT_TREE_V2 can have empty extent trees. */
2317 		if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
2318 			return BTRFS_TREE_BLOCK_CLEAN;
2319 
2320 		if (unlikely(owner == BTRFS_EXTENT_TREE_OBJECTID)) {
2321 			generic_err(leaf, 0,
2322 			"invalid root, root %llu must never be empty",
2323 				    owner);
2324 			return BTRFS_TREE_BLOCK_INVALID_NRITEMS;
2325 		}
2326 
2327 		return BTRFS_TREE_BLOCK_CLEAN;
2328 	}
2329 
2330 	if (unlikely(nritems == 0))
2331 		return BTRFS_TREE_BLOCK_CLEAN;
2332 
2333 	/*
2334 	 * Check the following things to make sure this is a good leaf, and
2335 	 * leaf users won't need to bother with similar sanity checks:
2336 	 *
2337 	 * 1) key ordering
2338 	 * 2) item offset and size
2339 	 *    No overlap, no hole, all inside the leaf.
2340 	 * 3) item content
2341 	 *    If possible, do comprehensive sanity check.
2342 	 *    NOTE: All checks must only rely on the item data itself.
2343 	 */
2344 	for (slot = 0; slot < nritems; slot++) {
2345 		u32 item_end_expected;
2346 		u64 item_data_end;
2347 		enum btrfs_tree_block_status ret;
2348 
2349 		btrfs_item_key_to_cpu(leaf, &key, slot);
2350 
2351 		/* Make sure the keys are in the right order */
2352 		if (unlikely(btrfs_comp_cpu_keys(&prev_key, &key) >= 0)) {
2353 			generic_err(leaf, slot,
2354 	"bad key order, prev " BTRFS_KEY_FMT " current " BTRFS_KEY_FMT,
2355 				    BTRFS_KEY_FMT_VALUE(&prev_key),
2356 				    BTRFS_KEY_FMT_VALUE(&key));
2357 			return BTRFS_TREE_BLOCK_BAD_KEY_ORDER;
2358 		}
2359 
2360 		item_data_end = (u64)btrfs_item_offset(leaf, slot) +
2361 				btrfs_item_size(leaf, slot);
2362 		/*
2363 		 * Make sure the offset and ends are right, remember that the
2364 		 * item data starts at the end of the leaf and grows towards the
2365 		 * front.
2366 		 */
2367 		if (slot == 0)
2368 			item_end_expected = BTRFS_LEAF_DATA_SIZE(fs_info);
2369 		else
2370 			item_end_expected = btrfs_item_offset(leaf,
2371 								 slot - 1);
2372 		if (unlikely(item_data_end != item_end_expected)) {
2373 			generic_err(leaf, slot,
2374 				"unexpected item end, have %llu expect %u",
2375 				item_data_end, item_end_expected);
2376 			return BTRFS_TREE_BLOCK_INVALID_OFFSETS;
2377 		}
2378 
2379 		/*
2380 		 * Check to make sure that we don't point outside of the leaf,
2381 		 * just in case all the items are consistent to each other, but
2382 		 * all point outside of the leaf.
2383 		 */
2384 		if (unlikely(item_data_end > BTRFS_LEAF_DATA_SIZE(fs_info))) {
2385 			generic_err(leaf, slot,
2386 			"slot end outside of leaf, have %llu expect range [0, %u]",
2387 				item_data_end, BTRFS_LEAF_DATA_SIZE(fs_info));
2388 			return BTRFS_TREE_BLOCK_INVALID_OFFSETS;
2389 		}
2390 
2391 		/* Also check if the item pointer overlaps with btrfs item. */
2392 		if (unlikely(btrfs_item_ptr_offset(leaf, slot) <
2393 			     btrfs_item_nr_offset(leaf, slot) + sizeof(struct btrfs_item))) {
2394 			generic_err(leaf, slot,
2395 		"slot overlaps with its data, item end %lu data start %lu",
2396 				btrfs_item_nr_offset(leaf, slot) +
2397 				sizeof(struct btrfs_item),
2398 				btrfs_item_ptr_offset(leaf, slot));
2399 			return BTRFS_TREE_BLOCK_INVALID_OFFSETS;
2400 		}
2401 
2402 		/* Check if the item size and content meet other criteria. */
2403 		ret = check_leaf_item(leaf, &key, slot, &prev_key);
2404 		if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN))
2405 			return ret;
2406 
2407 		prev_key.objectid = key.objectid;
2408 		prev_key.type = key.type;
2409 		prev_key.offset = key.offset;
2410 	}
2411 
2412 	return BTRFS_TREE_BLOCK_CLEAN;
2413 }
2414 
2415 int btrfs_check_leaf(struct extent_buffer *leaf)
2416 {
2417 	enum btrfs_tree_block_status ret;
2418 
2419 	ret = __btrfs_check_leaf(leaf);
2420 	if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN))
2421 		return -EUCLEAN;
2422 	return 0;
2423 }
2424 ALLOW_ERROR_INJECTION(btrfs_check_leaf, ERRNO);
2425 
2426 enum btrfs_tree_block_status __btrfs_check_node(struct extent_buffer *node)
2427 {
2428 	struct btrfs_fs_info *fs_info = node->fs_info;
2429 	unsigned long nr = btrfs_header_nritems(node);
2430 	struct btrfs_key key, next_key;
2431 	int slot;
2432 	int level = btrfs_header_level(node);
2433 	u64 bytenr;
2434 
2435 	if (unlikely(!btrfs_header_flag(node, BTRFS_HEADER_FLAG_WRITTEN))) {
2436 		generic_err(node, 0, "invalid flag for node, WRITTEN not set");
2437 		return BTRFS_TREE_BLOCK_WRITTEN_NOT_SET;
2438 	}
2439 
2440 	if (unlikely(level <= 0 || level >= BTRFS_MAX_LEVEL)) {
2441 		generic_err(node, 0,
2442 			"invalid level for node, have %d expect [1, %d]",
2443 			level, BTRFS_MAX_LEVEL - 1);
2444 		return BTRFS_TREE_BLOCK_INVALID_LEVEL;
2445 	}
2446 	if (unlikely(nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(fs_info))) {
2447 		btrfs_crit(fs_info,
2448 "corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]",
2449 			   btrfs_header_owner(node), node->start,
2450 			   nr == 0 ? "small" : "large", nr,
2451 			   BTRFS_NODEPTRS_PER_BLOCK(fs_info));
2452 		return BTRFS_TREE_BLOCK_INVALID_NRITEMS;
2453 	}
2454 
2455 	for (slot = 0; slot < nr - 1; slot++) {
2456 		bytenr = btrfs_node_blockptr(node, slot);
2457 		btrfs_node_key_to_cpu(node, &key, slot);
2458 		btrfs_node_key_to_cpu(node, &next_key, slot + 1);
2459 
2460 		if (unlikely(!bytenr)) {
2461 			generic_err(node, slot,
2462 				"invalid NULL node pointer");
2463 			return BTRFS_TREE_BLOCK_INVALID_BLOCKPTR;
2464 		}
2465 		if (unlikely(!IS_ALIGNED(bytenr, fs_info->sectorsize))) {
2466 			generic_err(node, slot,
2467 			"unaligned pointer, have %llu should be aligned to %u",
2468 				bytenr, fs_info->sectorsize);
2469 			return BTRFS_TREE_BLOCK_INVALID_BLOCKPTR;
2470 		}
2471 
2472 		if (unlikely(btrfs_comp_cpu_keys(&key, &next_key) >= 0)) {
2473 			generic_err(node, slot,
2474 	"bad key order, current " BTRFS_KEY_FMT " next " BTRFS_KEY_FMT,
2475 				    BTRFS_KEY_FMT_VALUE(&key),
2476 				    BTRFS_KEY_FMT_VALUE(&next_key));
2477 			return BTRFS_TREE_BLOCK_BAD_KEY_ORDER;
2478 		}
2479 	}
2480 	return BTRFS_TREE_BLOCK_CLEAN;
2481 }
2482 
2483 int btrfs_check_node(struct extent_buffer *node)
2484 {
2485 	enum btrfs_tree_block_status ret;
2486 
2487 	ret = __btrfs_check_node(node);
2488 	if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN))
2489 		return -EUCLEAN;
2490 	return 0;
2491 }
2492 ALLOW_ERROR_INJECTION(btrfs_check_node, ERRNO);
2493 
2494 int btrfs_check_eb_owner(const struct extent_buffer *eb, u64 root_owner)
2495 {
2496 	const bool is_subvol = btrfs_is_fstree(root_owner);
2497 	const u64 eb_owner = btrfs_header_owner(eb);
2498 
2499 	/*
2500 	 * Skip dummy fs, as selftests don't create unique ebs for each dummy
2501 	 * root.
2502 	 */
2503 	if (btrfs_is_testing(eb->fs_info))
2504 		return 0;
2505 	/*
2506 	 * There are several call sites (backref walking, qgroup, and data
2507 	 * reloc) passing 0 as @root_owner, as they are not holding the
2508 	 * tree root.  In that case, we can not do a reliable ownership check,
2509 	 * so just exit.
2510 	 */
2511 	if (root_owner == 0)
2512 		return 0;
2513 	/*
2514 	 * These trees use key.offset as their owner, our callers don't have
2515 	 * the extra capacity to pass key.offset here.  So we just skip them.
2516 	 */
2517 	if (root_owner == BTRFS_TREE_LOG_OBJECTID ||
2518 	    root_owner == BTRFS_TREE_RELOC_OBJECTID)
2519 		return 0;
2520 
2521 	if (!is_subvol) {
2522 		/* For non-subvolume trees, the eb owner should match root owner */
2523 		if (unlikely(root_owner != eb_owner)) {
2524 			btrfs_crit(eb->fs_info,
2525 "corrupted %s, root=%llu block=%llu owner mismatch, have %llu expect %llu",
2526 				btrfs_header_level(eb) == 0 ? "leaf" : "node",
2527 				root_owner, btrfs_header_bytenr(eb), eb_owner,
2528 				root_owner);
2529 			return -EUCLEAN;
2530 		}
2531 		return 0;
2532 	}
2533 
2534 	/*
2535 	 * For subvolume trees, owners can mismatch, but they should all belong
2536 	 * to subvolume trees.
2537 	 */
2538 	if (unlikely(is_subvol != btrfs_is_fstree(eb_owner))) {
2539 		btrfs_crit(eb->fs_info,
2540 "corrupted %s, root=%llu block=%llu owner mismatch, have %llu expect [%llu, %llu]",
2541 			btrfs_header_level(eb) == 0 ? "leaf" : "node",
2542 			root_owner, btrfs_header_bytenr(eb), eb_owner,
2543 			BTRFS_FIRST_FREE_OBJECTID, BTRFS_LAST_FREE_OBJECTID);
2544 		return -EUCLEAN;
2545 	}
2546 	return 0;
2547 }
2548 
2549 int btrfs_verify_level_key(struct extent_buffer *eb,
2550 			   const struct btrfs_tree_parent_check *check)
2551 {
2552 	struct btrfs_fs_info *fs_info = eb->fs_info;
2553 	int found_level;
2554 	struct btrfs_key found_key;
2555 	int ret;
2556 
2557 	found_level = btrfs_header_level(eb);
2558 	if (unlikely(found_level != check->level)) {
2559 		DEBUG_WARN();
2560 		btrfs_err(fs_info,
2561 "tree level mismatch detected, bytenr=%llu level expected=%u has=%u",
2562 			  eb->start, check->level, found_level);
2563 		return -EUCLEAN;
2564 	}
2565 
2566 	if (!check->has_first_key)
2567 		return 0;
2568 
2569 	/*
2570 	 * For live tree block (new tree blocks in current transaction),
2571 	 * we need proper lock context to avoid race, which is impossible here.
2572 	 * So we only checks tree blocks which is read from disk, whose
2573 	 * generation <= fs_info->last_trans_committed.
2574 	 */
2575 	if (btrfs_header_generation(eb) > btrfs_get_last_trans_committed(fs_info))
2576 		return 0;
2577 
2578 	/* We have @first_key, so this @eb must have at least one item */
2579 	if (unlikely(btrfs_header_nritems(eb) == 0)) {
2580 		btrfs_err(fs_info,
2581 		"invalid tree nritems, bytenr=%llu nritems=0 expect >0",
2582 			  eb->start);
2583 		DEBUG_WARN();
2584 		return -EUCLEAN;
2585 	}
2586 
2587 	if (found_level)
2588 		btrfs_node_key_to_cpu(eb, &found_key, 0);
2589 	else
2590 		btrfs_item_key_to_cpu(eb, &found_key, 0);
2591 
2592 	ret = btrfs_comp_cpu_keys(&check->first_key, &found_key);
2593 	if (unlikely(ret)) {
2594 		DEBUG_WARN();
2595 		btrfs_err(fs_info,
2596 "tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)",
2597 			  eb->start, check->transid, check->first_key.objectid,
2598 			  check->first_key.type, check->first_key.offset,
2599 			  found_key.objectid, found_key.type,
2600 			  found_key.offset);
2601 	}
2602 	return ret;
2603 }
2604