1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (c) 2021-2024 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_ialloc.h"
16 #include "xfs_quota.h"
17 #include "xfs_trans_space.h"
18 #include "xfs_dir2.h"
19 #include "xfs_icache.h"
20 #include "xfs_bmap.h"
21 #include "xfs_bmap_btree.h"
22 #include "xfs_parent.h"
23 #include "xfs_attr_sf.h"
24 #include "scrub/scrub.h"
25 #include "scrub/common.h"
26 #include "scrub/repair.h"
27 #include "scrub/trace.h"
28 #include "scrub/orphanage.h"
29 #include "scrub/readdir.h"
30
31 #include <linux/namei.h>
32
33 /*
34 * The Orphanage
35 * =============
36 *
37 * If the directory tree is damaged, children of that directory become
38 * inaccessible via that file path. If a child has no other parents, the file
39 * is said to be orphaned. xfs_repair fixes this situation by creating a
40 * orphanage directory (specifically, /lost+found) and creating a directory
41 * entry pointing to the orphaned file.
42 *
43 * Online repair follows this tactic by creating a root-owned /lost+found
44 * directory if one does not exist. If an orphan is found, it will move that
45 * files into orphanage.
46 */
47
48 /* Make the orphanage owned by root. */
49 STATIC int
xrep_chown_orphanage(struct xfs_scrub * sc,struct xfs_inode * dp)50 xrep_chown_orphanage(
51 struct xfs_scrub *sc,
52 struct xfs_inode *dp)
53 {
54 struct xfs_trans *tp;
55 struct xfs_mount *mp = sc->mp;
56 struct xfs_dquot *udqp = NULL, *gdqp = NULL, *pdqp = NULL;
57 struct xfs_dquot *oldu = NULL, *oldg = NULL, *oldp = NULL;
58 struct inode *inode = VFS_I(dp);
59 int error;
60
61 error = xfs_qm_vop_dqalloc(dp, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
62 XFS_QMOPT_QUOTALL, &udqp, &gdqp, &pdqp);
63 if (error)
64 return error;
65
66 error = xfs_trans_alloc_ichange(dp, udqp, gdqp, pdqp, true, &tp);
67 if (error)
68 goto out_dqrele;
69
70 /*
71 * Always clear setuid/setgid/sticky on the orphanage since we don't
72 * normally want that functionality on this directory and xfs_repair
73 * doesn't create it this way either. Leave the other access bits
74 * unchanged.
75 */
76 inode->i_mode &= ~(S_ISUID | S_ISGID | S_ISVTX);
77
78 /*
79 * Change the ownerships and register quota modifications
80 * in the transaction.
81 */
82 if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID)) {
83 if (XFS_IS_UQUOTA_ON(mp))
84 oldu = xfs_qm_vop_chown(tp, dp, &dp->i_udquot, udqp);
85 inode->i_uid = GLOBAL_ROOT_UID;
86 }
87 if (!gid_eq(inode->i_gid, GLOBAL_ROOT_GID)) {
88 if (XFS_IS_GQUOTA_ON(mp))
89 oldg = xfs_qm_vop_chown(tp, dp, &dp->i_gdquot, gdqp);
90 inode->i_gid = GLOBAL_ROOT_GID;
91 }
92 if (dp->i_projid != 0) {
93 if (XFS_IS_PQUOTA_ON(mp))
94 oldp = xfs_qm_vop_chown(tp, dp, &dp->i_pdquot, pdqp);
95 dp->i_projid = 0;
96 }
97
98 dp->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT);
99 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
100
101 XFS_STATS_INC(mp, xs_ig_attrchg);
102
103 if (xfs_has_wsync(mp))
104 xfs_trans_set_sync(tp);
105 error = xfs_trans_commit(tp);
106
107 xfs_qm_dqrele(oldu);
108 xfs_qm_dqrele(oldg);
109 xfs_qm_dqrele(oldp);
110
111 out_dqrele:
112 xfs_qm_dqrele(udqp);
113 xfs_qm_dqrele(gdqp);
114 xfs_qm_dqrele(pdqp);
115 return error;
116 }
117
118 #define ORPHANAGE "lost+found"
119
120 /* Create the orphanage directory, and set sc->orphanage to it. */
121 int
xrep_orphanage_create(struct xfs_scrub * sc)122 xrep_orphanage_create(
123 struct xfs_scrub *sc)
124 {
125 struct xfs_mount *mp = sc->mp;
126 struct dentry *root_dentry, *orphanage_dentry;
127 struct inode *root_inode = VFS_I(sc->mp->m_rootip);
128 struct inode *orphanage_inode;
129 int error;
130
131 if (xfs_is_shutdown(mp))
132 return -EIO;
133 if (xfs_is_readonly(mp)) {
134 sc->orphanage = NULL;
135 return 0;
136 }
137
138 ASSERT(sc->tp == NULL);
139 ASSERT(sc->orphanage == NULL);
140
141 /* Find the dentry for the root directory... */
142 root_dentry = d_find_alias(root_inode);
143 if (!root_dentry) {
144 error = -EFSCORRUPTED;
145 goto out;
146 }
147
148 /* ...which is a directory, right? */
149 if (!d_is_dir(root_dentry)) {
150 error = -EFSCORRUPTED;
151 goto out_dput_root;
152 }
153
154 /* Try to find the orphanage directory. */
155 inode_lock_nested(root_inode, I_MUTEX_PARENT);
156 orphanage_dentry = lookup_one_len(ORPHANAGE, root_dentry,
157 strlen(ORPHANAGE));
158 if (IS_ERR(orphanage_dentry)) {
159 error = PTR_ERR(orphanage_dentry);
160 goto out_unlock_root;
161 }
162
163 /*
164 * Nothing found? Call mkdir to create the orphanage. Create the
165 * directory without other-user access because we're live and someone
166 * could have been relying partly on minimal access to a parent
167 * directory to control access to a file we put in here.
168 */
169 if (d_really_is_negative(orphanage_dentry)) {
170 orphanage_dentry = vfs_mkdir(&nop_mnt_idmap, root_inode,
171 orphanage_dentry, 0750);
172 error = PTR_ERR(orphanage_dentry);
173 if (IS_ERR(orphanage_dentry))
174 goto out_unlock_root;
175 }
176
177 /* Not a directory? Bail out. */
178 if (!d_is_dir(orphanage_dentry)) {
179 error = -ENOTDIR;
180 goto out_dput_orphanage;
181 }
182
183 /*
184 * Grab a reference to the orphanage. This /should/ succeed since
185 * we hold the root directory locked and therefore nobody can delete
186 * the orphanage.
187 */
188 orphanage_inode = igrab(d_inode(orphanage_dentry));
189 if (!orphanage_inode) {
190 error = -ENOENT;
191 goto out_dput_orphanage;
192 }
193
194 /* Make sure the orphanage is owned by root. */
195 error = xrep_chown_orphanage(sc, XFS_I(orphanage_inode));
196 if (error)
197 goto out_dput_orphanage;
198
199 /* Stash the reference for later and bail out. */
200 sc->orphanage = XFS_I(orphanage_inode);
201 sc->orphanage_ilock_flags = 0;
202
203 out_dput_orphanage:
204 dput(orphanage_dentry);
205 out_unlock_root:
206 inode_unlock(VFS_I(sc->mp->m_rootip));
207 out_dput_root:
208 dput(root_dentry);
209 out:
210 return error;
211 }
212
213 void
xrep_orphanage_ilock(struct xfs_scrub * sc,unsigned int ilock_flags)214 xrep_orphanage_ilock(
215 struct xfs_scrub *sc,
216 unsigned int ilock_flags)
217 {
218 sc->orphanage_ilock_flags |= ilock_flags;
219 xfs_ilock(sc->orphanage, ilock_flags);
220 }
221
222 bool
xrep_orphanage_ilock_nowait(struct xfs_scrub * sc,unsigned int ilock_flags)223 xrep_orphanage_ilock_nowait(
224 struct xfs_scrub *sc,
225 unsigned int ilock_flags)
226 {
227 if (xfs_ilock_nowait(sc->orphanage, ilock_flags)) {
228 sc->orphanage_ilock_flags |= ilock_flags;
229 return true;
230 }
231
232 return false;
233 }
234
235 void
xrep_orphanage_iunlock(struct xfs_scrub * sc,unsigned int ilock_flags)236 xrep_orphanage_iunlock(
237 struct xfs_scrub *sc,
238 unsigned int ilock_flags)
239 {
240 xfs_iunlock(sc->orphanage, ilock_flags);
241 sc->orphanage_ilock_flags &= ~ilock_flags;
242 }
243
244 /* Grab the IOLOCK of the orphanage and sc->ip. */
245 int
xrep_orphanage_iolock_two(struct xfs_scrub * sc)246 xrep_orphanage_iolock_two(
247 struct xfs_scrub *sc)
248 {
249 int error = 0;
250
251 while (true) {
252 if (xchk_should_terminate(sc, &error))
253 return error;
254
255 /*
256 * Normal XFS takes the IOLOCK before grabbing a transaction.
257 * Scrub holds a transaction, which means that we can't block
258 * on either IOLOCK.
259 */
260 if (xrep_orphanage_ilock_nowait(sc, XFS_IOLOCK_EXCL)) {
261 if (xchk_ilock_nowait(sc, XFS_IOLOCK_EXCL))
262 break;
263 xrep_orphanage_iunlock(sc, XFS_IOLOCK_EXCL);
264 }
265 delay(1);
266 }
267
268 return 0;
269 }
270
271 /* Release the orphanage. */
272 void
xrep_orphanage_rele(struct xfs_scrub * sc)273 xrep_orphanage_rele(
274 struct xfs_scrub *sc)
275 {
276 if (!sc->orphanage)
277 return;
278
279 if (sc->orphanage_ilock_flags)
280 xfs_iunlock(sc->orphanage, sc->orphanage_ilock_flags);
281
282 xchk_irele(sc, sc->orphanage);
283 sc->orphanage = NULL;
284 }
285
286 /* Adoption moves a file into /lost+found */
287
288 /* Can the orphanage adopt @sc->ip? */
289 bool
xrep_orphanage_can_adopt(struct xfs_scrub * sc)290 xrep_orphanage_can_adopt(
291 struct xfs_scrub *sc)
292 {
293 ASSERT(sc->ip != NULL);
294
295 if (!sc->orphanage)
296 return false;
297 if (sc->ip == sc->orphanage)
298 return false;
299 if (xchk_inode_is_sb_rooted(sc->ip))
300 return false;
301 if (xfs_is_internal_inode(sc->ip))
302 return false;
303 return true;
304 }
305
306 /*
307 * Create a new transaction to send a child to the orphanage.
308 *
309 * Allocate a new transaction with sufficient disk space to handle the
310 * adoption, take ILOCK_EXCL of the orphanage and sc->ip, joins them to the
311 * transaction, and reserve quota to reparent the latter. Caller must hold the
312 * IOLOCK of the orphanage and sc->ip.
313 */
314 int
xrep_adoption_trans_alloc(struct xfs_scrub * sc,struct xrep_adoption * adopt)315 xrep_adoption_trans_alloc(
316 struct xfs_scrub *sc,
317 struct xrep_adoption *adopt)
318 {
319 struct xfs_mount *mp = sc->mp;
320 unsigned int child_blkres = 0;
321 int error;
322
323 ASSERT(sc->tp == NULL);
324 ASSERT(sc->ip != NULL);
325 ASSERT(sc->orphanage != NULL);
326 ASSERT(sc->ilock_flags & XFS_IOLOCK_EXCL);
327 ASSERT(sc->orphanage_ilock_flags & XFS_IOLOCK_EXCL);
328 ASSERT(!(sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)));
329 ASSERT(!(sc->orphanage_ilock_flags &
330 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)));
331
332 /* Compute the worst case space reservation that we need. */
333 adopt->sc = sc;
334 adopt->orphanage_blkres = xfs_link_space_res(mp, MAXNAMELEN);
335 if (S_ISDIR(VFS_I(sc->ip)->i_mode))
336 child_blkres = xfs_rename_space_res(mp, 0, false,
337 xfs_name_dotdot.len, false);
338 if (xfs_has_parent(mp))
339 child_blkres += XFS_ADDAFORK_SPACE_RES(mp);
340 adopt->child_blkres = child_blkres;
341
342 /*
343 * Allocate a transaction to link the child into the parent, along with
344 * enough disk space to handle expansion of both the orphanage and the
345 * dotdot entry of a child directory.
346 */
347 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link,
348 adopt->orphanage_blkres + adopt->child_blkres, 0, 0,
349 &sc->tp);
350 if (error)
351 return error;
352
353 xfs_lock_two_inodes(sc->orphanage, XFS_ILOCK_EXCL,
354 sc->ip, XFS_ILOCK_EXCL);
355 sc->ilock_flags |= XFS_ILOCK_EXCL;
356 sc->orphanage_ilock_flags |= XFS_ILOCK_EXCL;
357
358 xfs_trans_ijoin(sc->tp, sc->orphanage, 0);
359 xfs_trans_ijoin(sc->tp, sc->ip, 0);
360
361 /*
362 * Reserve enough quota in the orphan directory to add the new name.
363 * Normally the orphanage should have user/group/project ids of zero
364 * and hence is not subject to quota enforcement, but we're allowed to
365 * exceed quota to reattach disconnected parts of the directory tree.
366 */
367 error = xfs_trans_reserve_quota_nblks(sc->tp, sc->orphanage,
368 adopt->orphanage_blkres, 0, true);
369 if (error)
370 goto out_cancel;
371
372 /*
373 * Reserve enough quota in the child directory to change dotdot.
374 * Here we're also allowed to exceed file quota to repair inconsistent
375 * metadata.
376 */
377 if (adopt->child_blkres) {
378 error = xfs_trans_reserve_quota_nblks(sc->tp, sc->ip,
379 adopt->child_blkres, 0, true);
380 if (error)
381 goto out_cancel;
382 }
383
384 return 0;
385 out_cancel:
386 xchk_trans_cancel(sc);
387 xrep_orphanage_iunlock(sc, XFS_ILOCK_EXCL);
388 xchk_iunlock(sc, XFS_ILOCK_EXCL);
389 return error;
390 }
391
392 /*
393 * Compute the xfs_name for the directory entry that we're adding to the
394 * orphanage. Caller must hold ILOCKs of sc->ip and the orphanage and must not
395 * reuse namebuf until the adoption completes or is dissolved.
396 */
397 int
xrep_adoption_compute_name(struct xrep_adoption * adopt,struct xfs_name * xname)398 xrep_adoption_compute_name(
399 struct xrep_adoption *adopt,
400 struct xfs_name *xname)
401 {
402 struct xfs_scrub *sc = adopt->sc;
403 char *namebuf = (void *)xname->name;
404 xfs_ino_t ino;
405 unsigned int incr = 0;
406 int error = 0;
407
408 adopt->xname = xname;
409 xname->len = snprintf(namebuf, MAXNAMELEN, "%llu", sc->ip->i_ino);
410 xname->type = xfs_mode_to_ftype(VFS_I(sc->ip)->i_mode);
411
412 /* Make sure the filename is unique in the lost+found. */
413 error = xchk_dir_lookup(sc, sc->orphanage, xname, &ino);
414 while (error == 0 && incr < 10000) {
415 xname->len = snprintf(namebuf, MAXNAMELEN, "%llu.%u",
416 sc->ip->i_ino, ++incr);
417 error = xchk_dir_lookup(sc, sc->orphanage, xname, &ino);
418 }
419 if (error == 0) {
420 /* We already have 10,000 entries in the orphanage? */
421 return -EFSCORRUPTED;
422 }
423
424 if (error != -ENOENT)
425 return error;
426 return 0;
427 }
428
429 /*
430 * Make sure the dcache does not have a positive dentry for the name we've
431 * chosen. The caller should have checked with the ondisk directory, so any
432 * discrepancy is a sign that something is seriously wrong.
433 */
434 static int
xrep_adoption_check_dcache(struct xrep_adoption * adopt)435 xrep_adoption_check_dcache(
436 struct xrep_adoption *adopt)
437 {
438 struct qstr qname = QSTR_INIT(adopt->xname->name,
439 adopt->xname->len);
440 struct xfs_scrub *sc = adopt->sc;
441 struct dentry *d_orphanage, *d_child;
442 int error = 0;
443
444 d_orphanage = d_find_alias(VFS_I(sc->orphanage));
445 if (!d_orphanage)
446 return 0;
447
448 d_child = d_hash_and_lookup(d_orphanage, &qname);
449 if (d_child) {
450 trace_xrep_adoption_check_child(sc->mp, d_child);
451
452 if (d_is_positive(d_child)) {
453 ASSERT(d_is_negative(d_child));
454 error = -EFSCORRUPTED;
455 }
456
457 dput(d_child);
458 }
459
460 dput(d_orphanage);
461 return error;
462 }
463
464 /*
465 * Invalidate all dentries for the name that was added to the orphanage
466 * directory, and all dentries pointing to the child inode that was moved.
467 *
468 * There should not be any positive entries for the name, since we've
469 * maintained our lock on the orphanage directory.
470 */
471 static void
xrep_adoption_zap_dcache(struct xrep_adoption * adopt)472 xrep_adoption_zap_dcache(
473 struct xrep_adoption *adopt)
474 {
475 struct qstr qname = QSTR_INIT(adopt->xname->name,
476 adopt->xname->len);
477 struct xfs_scrub *sc = adopt->sc;
478 struct dentry *d_orphanage, *d_child;
479
480 /* Invalidate all dentries for the adoption name */
481 d_orphanage = d_find_alias(VFS_I(sc->orphanage));
482 if (!d_orphanage)
483 return;
484
485 d_child = d_hash_and_lookup(d_orphanage, &qname);
486 while (d_child != NULL) {
487 trace_xrep_adoption_invalidate_child(sc->mp, d_child);
488
489 ASSERT(d_is_negative(d_child));
490 d_invalidate(d_child);
491 dput(d_child);
492 d_child = d_lookup(d_orphanage, &qname);
493 }
494
495 dput(d_orphanage);
496
497 /* Invalidate all the dentries pointing down to this file. */
498 while ((d_child = d_find_alias(VFS_I(sc->ip))) != NULL) {
499 trace_xrep_adoption_invalidate_child(sc->mp, d_child);
500
501 d_invalidate(d_child);
502 dput(d_child);
503 }
504 }
505
506 /*
507 * If we have to add an attr fork ahead of a parent pointer update, how much
508 * space should we ask for?
509 */
510 static inline int
xrep_adoption_attr_sizeof(const struct xrep_adoption * adopt)511 xrep_adoption_attr_sizeof(
512 const struct xrep_adoption *adopt)
513 {
514 return sizeof(struct xfs_attr_sf_hdr) +
515 xfs_attr_sf_entsize_byname(sizeof(struct xfs_parent_rec),
516 adopt->xname->len);
517 }
518
519 /*
520 * Move the current file to the orphanage under the computed name.
521 *
522 * Returns with a dirty transaction so that the caller can handle any other
523 * work, such as fixing up unlinked lists or resetting link counts.
524 */
525 int
xrep_adoption_move(struct xrep_adoption * adopt)526 xrep_adoption_move(
527 struct xrep_adoption *adopt)
528 {
529 struct xfs_scrub *sc = adopt->sc;
530 bool isdir = S_ISDIR(VFS_I(sc->ip)->i_mode);
531 int error;
532
533 trace_xrep_adoption_reparent(sc->orphanage, adopt->xname,
534 sc->ip->i_ino);
535
536 error = xrep_adoption_check_dcache(adopt);
537 if (error)
538 return error;
539
540 /*
541 * If this filesystem has parent pointers, ensure that the file being
542 * moved to the orphanage has an attribute fork. This is required
543 * because the parent pointer code does not itself add attr forks.
544 */
545 if (!xfs_inode_has_attr_fork(sc->ip) && xfs_has_parent(sc->mp)) {
546 int sf_size = xrep_adoption_attr_sizeof(adopt);
547
548 error = xfs_bmap_add_attrfork(sc->tp, sc->ip, sf_size, true);
549 if (error)
550 return error;
551 }
552
553 /* Create the new name in the orphanage. */
554 error = xfs_dir_createname(sc->tp, sc->orphanage, adopt->xname,
555 sc->ip->i_ino, adopt->orphanage_blkres);
556 if (error)
557 return error;
558
559 /*
560 * Bump the link count of the orphanage if we just added a
561 * subdirectory, and update its timestamps.
562 */
563 xfs_trans_ichgtime(sc->tp, sc->orphanage,
564 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
565 if (isdir)
566 xfs_bumplink(sc->tp, sc->orphanage);
567 xfs_trans_log_inode(sc->tp, sc->orphanage, XFS_ILOG_CORE);
568
569 /* Bump the link count of the child. */
570 if (adopt->bump_child_nlink) {
571 xfs_bumplink(sc->tp, sc->ip);
572 xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
573 }
574
575 /* Replace the dotdot entry if the child is a subdirectory. */
576 if (isdir) {
577 error = xfs_dir_replace(sc->tp, sc->ip, &xfs_name_dotdot,
578 sc->orphanage->i_ino, adopt->child_blkres);
579 if (error)
580 return error;
581 }
582
583 /* Add a parent pointer from the file back to the lost+found. */
584 if (xfs_has_parent(sc->mp)) {
585 error = xfs_parent_addname(sc->tp, &adopt->ppargs,
586 sc->orphanage, adopt->xname, sc->ip);
587 if (error)
588 return error;
589 }
590
591 /*
592 * Notify dirent hooks that we moved the file to /lost+found, and
593 * finish all the deferred work so that we know the adoption is fully
594 * recorded in the log.
595 */
596 xfs_dir_update_hook(sc->orphanage, sc->ip, 1, adopt->xname);
597
598 /* Remove negative dentries from the lost+found's dcache */
599 xrep_adoption_zap_dcache(adopt);
600 return 0;
601 }
602
603 /*
604 * Roll to a clean scrub transaction so that we can release the orphanage,
605 * even if xrep_adoption_move was not called.
606 *
607 * Commits all the work and deferred ops attached to an adoption request and
608 * rolls to a clean scrub transaction. On success, returns 0 with the scrub
609 * context holding a clean transaction with no inodes joined. On failure,
610 * returns negative errno with no scrub transaction. All inode locks are
611 * still held after this function returns.
612 */
613 int
xrep_adoption_trans_roll(struct xrep_adoption * adopt)614 xrep_adoption_trans_roll(
615 struct xrep_adoption *adopt)
616 {
617 struct xfs_scrub *sc = adopt->sc;
618 int error;
619
620 trace_xrep_adoption_trans_roll(sc->orphanage, sc->ip,
621 !!(sc->tp->t_flags & XFS_TRANS_DIRTY));
622
623 /* Finish all the deferred ops to commit all repairs. */
624 error = xrep_defer_finish(sc);
625 if (error)
626 return error;
627
628 /* Roll the transaction once more to detach the inodes. */
629 return xfs_trans_roll(&sc->tp);
630 }
631