1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
4  * All Rights Reserved.
5  */
6 
7 #include "xfs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_sb.h"
13 #include "xfs_mount.h"
14 #include "xfs_inode.h"
15 #include "xfs_btree.h"
16 #include "xfs_bmap.h"
17 #include "xfs_alloc.h"
18 #include "xfs_fsops.h"
19 #include "xfs_trans.h"
20 #include "xfs_buf_item.h"
21 #include "xfs_log.h"
22 #include "xfs_log_priv.h"
23 #include "xfs_dir2.h"
24 #include "xfs_extfree_item.h"
25 #include "xfs_mru_cache.h"
26 #include "xfs_inode_item.h"
27 #include "xfs_icache.h"
28 #include "xfs_trace.h"
29 #include "xfs_icreate_item.h"
30 #include "xfs_filestream.h"
31 #include "xfs_quota.h"
32 #include "xfs_sysfs.h"
33 #include "xfs_ondisk.h"
34 #include "xfs_rmap_item.h"
35 #include "xfs_refcount_item.h"
36 #include "xfs_bmap_item.h"
37 #include "xfs_reflink.h"
38 #include "xfs_pwork.h"
39 #include "xfs_ag.h"
40 #include "xfs_defer.h"
41 #include "xfs_attr_item.h"
42 #include "xfs_xattr.h"
43 #include "xfs_iunlink_item.h"
44 #include "xfs_dahash_test.h"
45 #include "xfs_rtbitmap.h"
46 #include "xfs_exchmaps_item.h"
47 #include "xfs_parent.h"
48 #include "xfs_rtalloc.h"
49 #include "xfs_zone_alloc.h"
50 #include "scrub/stats.h"
51 #include "scrub/rcbag_btree.h"
52 
53 #include <linux/magic.h>
54 #include <linux/fs_context.h>
55 #include <linux/fs_parser.h>
56 
57 static const struct super_operations xfs_super_operations;
58 
59 static struct dentry *xfs_debugfs;	/* top-level xfs debugfs dir */
60 static struct kset *xfs_kset;		/* top-level xfs sysfs dir */
61 #ifdef DEBUG
62 static struct xfs_kobj xfs_dbg_kobj;	/* global debug sysfs attrs */
63 #endif
64 
65 enum xfs_dax_mode {
66 	XFS_DAX_INODE = 0,
67 	XFS_DAX_ALWAYS = 1,
68 	XFS_DAX_NEVER = 2,
69 };
70 
71 /* Were quota mount options provided?  Must use the upper 16 bits of qflags. */
72 #define XFS_QFLAGS_MNTOPTS	(1U << 31)
73 
74 static void
xfs_mount_set_dax_mode(struct xfs_mount * mp,enum xfs_dax_mode mode)75 xfs_mount_set_dax_mode(
76 	struct xfs_mount	*mp,
77 	enum xfs_dax_mode	mode)
78 {
79 	switch (mode) {
80 	case XFS_DAX_INODE:
81 		mp->m_features &= ~(XFS_FEAT_DAX_ALWAYS | XFS_FEAT_DAX_NEVER);
82 		break;
83 	case XFS_DAX_ALWAYS:
84 		mp->m_features |= XFS_FEAT_DAX_ALWAYS;
85 		mp->m_features &= ~XFS_FEAT_DAX_NEVER;
86 		break;
87 	case XFS_DAX_NEVER:
88 		mp->m_features |= XFS_FEAT_DAX_NEVER;
89 		mp->m_features &= ~XFS_FEAT_DAX_ALWAYS;
90 		break;
91 	}
92 }
93 
94 static const struct constant_table dax_param_enums[] = {
95 	{"inode",	XFS_DAX_INODE },
96 	{"always",	XFS_DAX_ALWAYS },
97 	{"never",	XFS_DAX_NEVER },
98 	{}
99 };
100 
101 /*
102  * Table driven mount option parser.
103  */
104 enum {
105 	Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev,
106 	Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid,
107 	Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups,
108 	Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, Opt_ikeep,
109 	Opt_noikeep, Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2,
110 	Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota,
111 	Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
112 	Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
113 	Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum, Opt_max_open_zones,
114 	Opt_lifetime, Opt_nolifetime,
115 };
116 
117 static const struct fs_parameter_spec xfs_fs_parameters[] = {
118 	fsparam_u32("logbufs",		Opt_logbufs),
119 	fsparam_string("logbsize",	Opt_logbsize),
120 	fsparam_string("logdev",	Opt_logdev),
121 	fsparam_string("rtdev",		Opt_rtdev),
122 	fsparam_flag("wsync",		Opt_wsync),
123 	fsparam_flag("noalign",		Opt_noalign),
124 	fsparam_flag("swalloc",		Opt_swalloc),
125 	fsparam_u32("sunit",		Opt_sunit),
126 	fsparam_u32("swidth",		Opt_swidth),
127 	fsparam_flag("nouuid",		Opt_nouuid),
128 	fsparam_flag("grpid",		Opt_grpid),
129 	fsparam_flag("nogrpid",		Opt_nogrpid),
130 	fsparam_flag("bsdgroups",	Opt_bsdgroups),
131 	fsparam_flag("sysvgroups",	Opt_sysvgroups),
132 	fsparam_string("allocsize",	Opt_allocsize),
133 	fsparam_flag("norecovery",	Opt_norecovery),
134 	fsparam_flag("inode64",		Opt_inode64),
135 	fsparam_flag("inode32",		Opt_inode32),
136 	fsparam_flag("ikeep",		Opt_ikeep),
137 	fsparam_flag("noikeep",		Opt_noikeep),
138 	fsparam_flag("largeio",		Opt_largeio),
139 	fsparam_flag("nolargeio",	Opt_nolargeio),
140 	fsparam_flag("attr2",		Opt_attr2),
141 	fsparam_flag("noattr2",		Opt_noattr2),
142 	fsparam_flag("filestreams",	Opt_filestreams),
143 	fsparam_flag("quota",		Opt_quota),
144 	fsparam_flag("noquota",		Opt_noquota),
145 	fsparam_flag("usrquota",	Opt_usrquota),
146 	fsparam_flag("grpquota",	Opt_grpquota),
147 	fsparam_flag("prjquota",	Opt_prjquota),
148 	fsparam_flag("uquota",		Opt_uquota),
149 	fsparam_flag("gquota",		Opt_gquota),
150 	fsparam_flag("pquota",		Opt_pquota),
151 	fsparam_flag("uqnoenforce",	Opt_uqnoenforce),
152 	fsparam_flag("gqnoenforce",	Opt_gqnoenforce),
153 	fsparam_flag("pqnoenforce",	Opt_pqnoenforce),
154 	fsparam_flag("qnoenforce",	Opt_qnoenforce),
155 	fsparam_flag("discard",		Opt_discard),
156 	fsparam_flag("nodiscard",	Opt_nodiscard),
157 	fsparam_flag("dax",		Opt_dax),
158 	fsparam_enum("dax",		Opt_dax_enum, dax_param_enums),
159 	fsparam_u32("max_open_zones",	Opt_max_open_zones),
160 	fsparam_flag("lifetime",	Opt_lifetime),
161 	fsparam_flag("nolifetime",	Opt_nolifetime),
162 	{}
163 };
164 
165 struct proc_xfs_info {
166 	uint64_t	flag;
167 	char		*str;
168 };
169 
170 static int
xfs_fs_show_options(struct seq_file * m,struct dentry * root)171 xfs_fs_show_options(
172 	struct seq_file		*m,
173 	struct dentry		*root)
174 {
175 	static struct proc_xfs_info xfs_info_set[] = {
176 		/* the few simple ones we can get from the mount struct */
177 		{ XFS_FEAT_IKEEP,		",ikeep" },
178 		{ XFS_FEAT_WSYNC,		",wsync" },
179 		{ XFS_FEAT_NOALIGN,		",noalign" },
180 		{ XFS_FEAT_SWALLOC,		",swalloc" },
181 		{ XFS_FEAT_NOUUID,		",nouuid" },
182 		{ XFS_FEAT_NORECOVERY,		",norecovery" },
183 		{ XFS_FEAT_ATTR2,		",attr2" },
184 		{ XFS_FEAT_FILESTREAMS,		",filestreams" },
185 		{ XFS_FEAT_GRPID,		",grpid" },
186 		{ XFS_FEAT_DISCARD,		",discard" },
187 		{ XFS_FEAT_LARGE_IOSIZE,	",largeio" },
188 		{ XFS_FEAT_DAX_ALWAYS,		",dax=always" },
189 		{ XFS_FEAT_DAX_NEVER,		",dax=never" },
190 		{ XFS_FEAT_NOLIFETIME,		",nolifetime" },
191 		{ 0, NULL }
192 	};
193 	struct xfs_mount	*mp = XFS_M(root->d_sb);
194 	struct proc_xfs_info	*xfs_infop;
195 
196 	for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
197 		if (mp->m_features & xfs_infop->flag)
198 			seq_puts(m, xfs_infop->str);
199 	}
200 
201 	seq_printf(m, ",inode%d", xfs_has_small_inums(mp) ? 32 : 64);
202 
203 	if (xfs_has_allocsize(mp))
204 		seq_printf(m, ",allocsize=%dk",
205 			   (1 << mp->m_allocsize_log) >> 10);
206 
207 	if (mp->m_logbufs > 0)
208 		seq_printf(m, ",logbufs=%d", mp->m_logbufs);
209 	if (mp->m_logbsize > 0)
210 		seq_printf(m, ",logbsize=%dk", mp->m_logbsize >> 10);
211 
212 	if (mp->m_logname)
213 		seq_show_option(m, "logdev", mp->m_logname);
214 	if (mp->m_rtname)
215 		seq_show_option(m, "rtdev", mp->m_rtname);
216 
217 	if (mp->m_dalign > 0)
218 		seq_printf(m, ",sunit=%d",
219 				(int)XFS_FSB_TO_BB(mp, mp->m_dalign));
220 	if (mp->m_swidth > 0)
221 		seq_printf(m, ",swidth=%d",
222 				(int)XFS_FSB_TO_BB(mp, mp->m_swidth));
223 
224 	if (mp->m_qflags & XFS_UQUOTA_ENFD)
225 		seq_puts(m, ",usrquota");
226 	else if (mp->m_qflags & XFS_UQUOTA_ACCT)
227 		seq_puts(m, ",uqnoenforce");
228 
229 	if (mp->m_qflags & XFS_PQUOTA_ENFD)
230 		seq_puts(m, ",prjquota");
231 	else if (mp->m_qflags & XFS_PQUOTA_ACCT)
232 		seq_puts(m, ",pqnoenforce");
233 
234 	if (mp->m_qflags & XFS_GQUOTA_ENFD)
235 		seq_puts(m, ",grpquota");
236 	else if (mp->m_qflags & XFS_GQUOTA_ACCT)
237 		seq_puts(m, ",gqnoenforce");
238 
239 	if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
240 		seq_puts(m, ",noquota");
241 
242 	if (mp->m_max_open_zones)
243 		seq_printf(m, ",max_open_zones=%u", mp->m_max_open_zones);
244 
245 	return 0;
246 }
247 
248 static bool
xfs_set_inode_alloc_perag(struct xfs_perag * pag,xfs_ino_t ino,xfs_agnumber_t max_metadata)249 xfs_set_inode_alloc_perag(
250 	struct xfs_perag	*pag,
251 	xfs_ino_t		ino,
252 	xfs_agnumber_t		max_metadata)
253 {
254 	if (!xfs_is_inode32(pag_mount(pag))) {
255 		set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate);
256 		clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
257 		return false;
258 	}
259 
260 	if (ino > XFS_MAXINUMBER_32) {
261 		clear_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate);
262 		clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
263 		return false;
264 	}
265 
266 	set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate);
267 	if (pag_agno(pag) < max_metadata)
268 		set_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
269 	else
270 		clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
271 	return true;
272 }
273 
274 /*
275  * Set parameters for inode allocation heuristics, taking into account
276  * filesystem size and inode32/inode64 mount options; i.e. specifically
277  * whether or not XFS_FEAT_SMALL_INUMS is set.
278  *
279  * Inode allocation patterns are altered only if inode32 is requested
280  * (XFS_FEAT_SMALL_INUMS), and the filesystem is sufficiently large.
281  * If altered, XFS_OPSTATE_INODE32 is set as well.
282  *
283  * An agcount independent of that in the mount structure is provided
284  * because in the growfs case, mp->m_sb.sb_agcount is not yet updated
285  * to the potentially higher ag count.
286  *
287  * Returns the maximum AG index which may contain inodes.
288  */
289 xfs_agnumber_t
xfs_set_inode_alloc(struct xfs_mount * mp,xfs_agnumber_t agcount)290 xfs_set_inode_alloc(
291 	struct xfs_mount *mp,
292 	xfs_agnumber_t	agcount)
293 {
294 	xfs_agnumber_t	index;
295 	xfs_agnumber_t	maxagi = 0;
296 	xfs_sb_t	*sbp = &mp->m_sb;
297 	xfs_agnumber_t	max_metadata;
298 	xfs_agino_t	agino;
299 	xfs_ino_t	ino;
300 
301 	/*
302 	 * Calculate how much should be reserved for inodes to meet
303 	 * the max inode percentage.  Used only for inode32.
304 	 */
305 	if (M_IGEO(mp)->maxicount) {
306 		uint64_t	icount;
307 
308 		icount = sbp->sb_dblocks * sbp->sb_imax_pct;
309 		do_div(icount, 100);
310 		icount += sbp->sb_agblocks - 1;
311 		do_div(icount, sbp->sb_agblocks);
312 		max_metadata = icount;
313 	} else {
314 		max_metadata = agcount;
315 	}
316 
317 	/* Get the last possible inode in the filesystem */
318 	agino =	XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - 1);
319 	ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
320 
321 	/*
322 	 * If user asked for no more than 32-bit inodes, and the fs is
323 	 * sufficiently large, set XFS_OPSTATE_INODE32 if we must alter
324 	 * the allocator to accommodate the request.
325 	 */
326 	if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32)
327 		xfs_set_inode32(mp);
328 	else
329 		xfs_clear_inode32(mp);
330 
331 	for (index = 0; index < agcount; index++) {
332 		struct xfs_perag	*pag;
333 
334 		ino = XFS_AGINO_TO_INO(mp, index, agino);
335 
336 		pag = xfs_perag_get(mp, index);
337 		if (xfs_set_inode_alloc_perag(pag, ino, max_metadata))
338 			maxagi++;
339 		xfs_perag_put(pag);
340 	}
341 
342 	return xfs_is_inode32(mp) ? maxagi : agcount;
343 }
344 
345 static int
xfs_setup_dax_always(struct xfs_mount * mp)346 xfs_setup_dax_always(
347 	struct xfs_mount	*mp)
348 {
349 	if (!mp->m_ddev_targp->bt_daxdev &&
350 	    (!mp->m_rtdev_targp || !mp->m_rtdev_targp->bt_daxdev)) {
351 		xfs_alert(mp,
352 			"DAX unsupported by block device. Turning off DAX.");
353 		goto disable_dax;
354 	}
355 
356 	if (mp->m_super->s_blocksize != PAGE_SIZE) {
357 		xfs_alert(mp,
358 			"DAX not supported for blocksize. Turning off DAX.");
359 		goto disable_dax;
360 	}
361 
362 	if (xfs_has_reflink(mp) &&
363 	    bdev_is_partition(mp->m_ddev_targp->bt_bdev)) {
364 		xfs_alert(mp,
365 			"DAX and reflink cannot work with multi-partitions!");
366 		return -EINVAL;
367 	}
368 
369 	return 0;
370 
371 disable_dax:
372 	xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER);
373 	return 0;
374 }
375 
376 STATIC int
xfs_blkdev_get(xfs_mount_t * mp,const char * name,struct file ** bdev_filep)377 xfs_blkdev_get(
378 	xfs_mount_t		*mp,
379 	const char		*name,
380 	struct file		**bdev_filep)
381 {
382 	int			error = 0;
383 
384 	*bdev_filep = bdev_file_open_by_path(name,
385 		BLK_OPEN_READ | BLK_OPEN_WRITE | BLK_OPEN_RESTRICT_WRITES,
386 		mp->m_super, &fs_holder_ops);
387 	if (IS_ERR(*bdev_filep)) {
388 		error = PTR_ERR(*bdev_filep);
389 		*bdev_filep = NULL;
390 		xfs_warn(mp, "Invalid device [%s], error=%d", name, error);
391 	}
392 
393 	return error;
394 }
395 
396 STATIC void
xfs_shutdown_devices(struct xfs_mount * mp)397 xfs_shutdown_devices(
398 	struct xfs_mount	*mp)
399 {
400 	/*
401 	 * Udev is triggered whenever anyone closes a block device or unmounts
402 	 * a file systemm on a block device.
403 	 * The default udev rules invoke blkid to read the fs super and create
404 	 * symlinks to the bdev under /dev/disk.  For this, it uses buffered
405 	 * reads through the page cache.
406 	 *
407 	 * xfs_db also uses buffered reads to examine metadata.  There is no
408 	 * coordination between xfs_db and udev, which means that they can run
409 	 * concurrently.  Note there is no coordination between the kernel and
410 	 * blkid either.
411 	 *
412 	 * On a system with 64k pages, the page cache can cache the superblock
413 	 * and the root inode (and hence the root directory) with the same 64k
414 	 * page.  If udev spawns blkid after the mkfs and the system is busy
415 	 * enough that it is still running when xfs_db starts up, they'll both
416 	 * read from the same page in the pagecache.
417 	 *
418 	 * The unmount writes updated inode metadata to disk directly.  The XFS
419 	 * buffer cache does not use the bdev pagecache, so it needs to
420 	 * invalidate that pagecache on unmount.  If the above scenario occurs,
421 	 * the pagecache no longer reflects what's on disk, xfs_db reads the
422 	 * stale metadata, and fails to find /a.  Most of the time this succeeds
423 	 * because closing a bdev invalidates the page cache, but when processes
424 	 * race, everyone loses.
425 	 */
426 	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
427 		blkdev_issue_flush(mp->m_logdev_targp->bt_bdev);
428 		invalidate_bdev(mp->m_logdev_targp->bt_bdev);
429 	}
430 	if (mp->m_rtdev_targp) {
431 		blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev);
432 		invalidate_bdev(mp->m_rtdev_targp->bt_bdev);
433 	}
434 	blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
435 	invalidate_bdev(mp->m_ddev_targp->bt_bdev);
436 }
437 
438 /*
439  * The file system configurations are:
440  *	(1) device (partition) with data and internal log
441  *	(2) logical volume with data and log subvolumes.
442  *	(3) logical volume with data, log, and realtime subvolumes.
443  *
444  * We only have to handle opening the log and realtime volumes here if
445  * they are present.  The data subvolume has already been opened by
446  * get_sb_bdev() and is stored in sb->s_bdev.
447  */
448 STATIC int
xfs_open_devices(struct xfs_mount * mp)449 xfs_open_devices(
450 	struct xfs_mount	*mp)
451 {
452 	struct super_block	*sb = mp->m_super;
453 	struct block_device	*ddev = sb->s_bdev;
454 	struct file		*logdev_file = NULL, *rtdev_file = NULL;
455 	int			error;
456 
457 	/*
458 	 * Open real time and log devices - order is important.
459 	 */
460 	if (mp->m_logname) {
461 		error = xfs_blkdev_get(mp, mp->m_logname, &logdev_file);
462 		if (error)
463 			return error;
464 	}
465 
466 	if (mp->m_rtname) {
467 		error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev_file);
468 		if (error)
469 			goto out_close_logdev;
470 
471 		if (file_bdev(rtdev_file) == ddev ||
472 		    (logdev_file &&
473 		     file_bdev(rtdev_file) == file_bdev(logdev_file))) {
474 			xfs_warn(mp,
475 	"Cannot mount filesystem with identical rtdev and ddev/logdev.");
476 			error = -EINVAL;
477 			goto out_close_rtdev;
478 		}
479 	}
480 
481 	/*
482 	 * Setup xfs_mount buffer target pointers
483 	 */
484 	error = -ENOMEM;
485 	mp->m_ddev_targp = xfs_alloc_buftarg(mp, sb->s_bdev_file);
486 	if (!mp->m_ddev_targp)
487 		goto out_close_rtdev;
488 
489 	if (rtdev_file) {
490 		mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev_file);
491 		if (!mp->m_rtdev_targp)
492 			goto out_free_ddev_targ;
493 	}
494 
495 	if (logdev_file && file_bdev(logdev_file) != ddev) {
496 		mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev_file);
497 		if (!mp->m_logdev_targp)
498 			goto out_free_rtdev_targ;
499 	} else {
500 		mp->m_logdev_targp = mp->m_ddev_targp;
501 		/* Handle won't be used, drop it */
502 		if (logdev_file)
503 			bdev_fput(logdev_file);
504 	}
505 
506 	return 0;
507 
508  out_free_rtdev_targ:
509 	if (mp->m_rtdev_targp)
510 		xfs_free_buftarg(mp->m_rtdev_targp);
511  out_free_ddev_targ:
512 	xfs_free_buftarg(mp->m_ddev_targp);
513  out_close_rtdev:
514 	 if (rtdev_file)
515 		bdev_fput(rtdev_file);
516  out_close_logdev:
517 	if (logdev_file)
518 		bdev_fput(logdev_file);
519 	return error;
520 }
521 
522 /*
523  * Setup xfs_mount buffer target pointers based on superblock
524  */
525 STATIC int
xfs_setup_devices(struct xfs_mount * mp)526 xfs_setup_devices(
527 	struct xfs_mount	*mp)
528 {
529 	int			error;
530 
531 	error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
532 	if (error)
533 		return error;
534 
535 	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
536 		unsigned int	log_sector_size = BBSIZE;
537 
538 		if (xfs_has_sector(mp))
539 			log_sector_size = mp->m_sb.sb_logsectsize;
540 		error = xfs_setsize_buftarg(mp->m_logdev_targp,
541 					    log_sector_size);
542 		if (error)
543 			return error;
544 	}
545 
546 	if (mp->m_sb.sb_rtstart) {
547 		if (mp->m_rtdev_targp) {
548 			xfs_warn(mp,
549 		"can't use internal and external rtdev at the same time");
550 			return -EINVAL;
551 		}
552 		mp->m_rtdev_targp = mp->m_ddev_targp;
553 	} else if (mp->m_rtname) {
554 		error = xfs_setsize_buftarg(mp->m_rtdev_targp,
555 					    mp->m_sb.sb_sectsize);
556 		if (error)
557 			return error;
558 	}
559 
560 	return 0;
561 }
562 
563 STATIC int
xfs_init_mount_workqueues(struct xfs_mount * mp)564 xfs_init_mount_workqueues(
565 	struct xfs_mount	*mp)
566 {
567 	mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s",
568 			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
569 			1, mp->m_super->s_id);
570 	if (!mp->m_buf_workqueue)
571 		goto out;
572 
573 	mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
574 			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
575 			0, mp->m_super->s_id);
576 	if (!mp->m_unwritten_workqueue)
577 		goto out_destroy_buf;
578 
579 	mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
580 			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
581 			0, mp->m_super->s_id);
582 	if (!mp->m_reclaim_workqueue)
583 		goto out_destroy_unwritten;
584 
585 	mp->m_blockgc_wq = alloc_workqueue("xfs-blockgc/%s",
586 			XFS_WQFLAGS(WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM),
587 			0, mp->m_super->s_id);
588 	if (!mp->m_blockgc_wq)
589 		goto out_destroy_reclaim;
590 
591 	mp->m_inodegc_wq = alloc_workqueue("xfs-inodegc/%s",
592 			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
593 			1, mp->m_super->s_id);
594 	if (!mp->m_inodegc_wq)
595 		goto out_destroy_blockgc;
596 
597 	mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s",
598 			XFS_WQFLAGS(WQ_FREEZABLE), 0, mp->m_super->s_id);
599 	if (!mp->m_sync_workqueue)
600 		goto out_destroy_inodegc;
601 
602 	return 0;
603 
604 out_destroy_inodegc:
605 	destroy_workqueue(mp->m_inodegc_wq);
606 out_destroy_blockgc:
607 	destroy_workqueue(mp->m_blockgc_wq);
608 out_destroy_reclaim:
609 	destroy_workqueue(mp->m_reclaim_workqueue);
610 out_destroy_unwritten:
611 	destroy_workqueue(mp->m_unwritten_workqueue);
612 out_destroy_buf:
613 	destroy_workqueue(mp->m_buf_workqueue);
614 out:
615 	return -ENOMEM;
616 }
617 
618 STATIC void
xfs_destroy_mount_workqueues(struct xfs_mount * mp)619 xfs_destroy_mount_workqueues(
620 	struct xfs_mount	*mp)
621 {
622 	destroy_workqueue(mp->m_sync_workqueue);
623 	destroy_workqueue(mp->m_blockgc_wq);
624 	destroy_workqueue(mp->m_inodegc_wq);
625 	destroy_workqueue(mp->m_reclaim_workqueue);
626 	destroy_workqueue(mp->m_unwritten_workqueue);
627 	destroy_workqueue(mp->m_buf_workqueue);
628 }
629 
630 static void
xfs_flush_inodes_worker(struct work_struct * work)631 xfs_flush_inodes_worker(
632 	struct work_struct	*work)
633 {
634 	struct xfs_mount	*mp = container_of(work, struct xfs_mount,
635 						   m_flush_inodes_work);
636 	struct super_block	*sb = mp->m_super;
637 
638 	if (down_read_trylock(&sb->s_umount)) {
639 		sync_inodes_sb(sb);
640 		up_read(&sb->s_umount);
641 	}
642 }
643 
644 /*
645  * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK
646  * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting
647  * for IO to complete so that we effectively throttle multiple callers to the
648  * rate at which IO is completing.
649  */
650 void
xfs_flush_inodes(struct xfs_mount * mp)651 xfs_flush_inodes(
652 	struct xfs_mount	*mp)
653 {
654 	/*
655 	 * If flush_work() returns true then that means we waited for a flush
656 	 * which was already in progress.  Don't bother running another scan.
657 	 */
658 	if (flush_work(&mp->m_flush_inodes_work))
659 		return;
660 
661 	queue_work(mp->m_sync_workqueue, &mp->m_flush_inodes_work);
662 	flush_work(&mp->m_flush_inodes_work);
663 }
664 
665 /* Catch misguided souls that try to use this interface on XFS */
666 STATIC struct inode *
xfs_fs_alloc_inode(struct super_block * sb)667 xfs_fs_alloc_inode(
668 	struct super_block	*sb)
669 {
670 	BUG();
671 	return NULL;
672 }
673 
674 /*
675  * Now that the generic code is guaranteed not to be accessing
676  * the linux inode, we can inactivate and reclaim the inode.
677  */
678 STATIC void
xfs_fs_destroy_inode(struct inode * inode)679 xfs_fs_destroy_inode(
680 	struct inode		*inode)
681 {
682 	struct xfs_inode	*ip = XFS_I(inode);
683 
684 	trace_xfs_destroy_inode(ip);
685 
686 	ASSERT(!rwsem_is_locked(&inode->i_rwsem));
687 	XFS_STATS_INC(ip->i_mount, vn_rele);
688 	XFS_STATS_INC(ip->i_mount, vn_remove);
689 	xfs_inode_mark_reclaimable(ip);
690 }
691 
692 static void
xfs_fs_dirty_inode(struct inode * inode,int flags)693 xfs_fs_dirty_inode(
694 	struct inode			*inode,
695 	int				flags)
696 {
697 	struct xfs_inode		*ip = XFS_I(inode);
698 	struct xfs_mount		*mp = ip->i_mount;
699 	struct xfs_trans		*tp;
700 
701 	if (!(inode->i_sb->s_flags & SB_LAZYTIME))
702 		return;
703 
704 	/*
705 	 * Only do the timestamp update if the inode is dirty (I_DIRTY_SYNC)
706 	 * and has dirty timestamp (I_DIRTY_TIME). I_DIRTY_TIME can be passed
707 	 * in flags possibly together with I_DIRTY_SYNC.
708 	 */
709 	if ((flags & ~I_DIRTY_TIME) != I_DIRTY_SYNC || !(flags & I_DIRTY_TIME))
710 		return;
711 
712 	if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp))
713 		return;
714 	xfs_ilock(ip, XFS_ILOCK_EXCL);
715 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
716 	xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
717 	xfs_trans_commit(tp);
718 }
719 
720 /*
721  * Slab object creation initialisation for the XFS inode.
722  * This covers only the idempotent fields in the XFS inode;
723  * all other fields need to be initialised on allocation
724  * from the slab. This avoids the need to repeatedly initialise
725  * fields in the xfs inode that left in the initialise state
726  * when freeing the inode.
727  */
728 STATIC void
xfs_fs_inode_init_once(void * inode)729 xfs_fs_inode_init_once(
730 	void			*inode)
731 {
732 	struct xfs_inode	*ip = inode;
733 
734 	memset(ip, 0, sizeof(struct xfs_inode));
735 
736 	/* vfs inode */
737 	inode_init_once(VFS_I(ip));
738 
739 	/* xfs inode */
740 	atomic_set(&ip->i_pincount, 0);
741 	spin_lock_init(&ip->i_flags_lock);
742 	init_rwsem(&ip->i_lock);
743 }
744 
745 /*
746  * We do an unlocked check for XFS_IDONTCACHE here because we are already
747  * serialised against cache hits here via the inode->i_lock and igrab() in
748  * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be
749  * racing with us, and it avoids needing to grab a spinlock here for every inode
750  * we drop the final reference on.
751  */
752 STATIC int
xfs_fs_drop_inode(struct inode * inode)753 xfs_fs_drop_inode(
754 	struct inode		*inode)
755 {
756 	struct xfs_inode	*ip = XFS_I(inode);
757 
758 	/*
759 	 * If this unlinked inode is in the middle of recovery, don't
760 	 * drop the inode just yet; log recovery will take care of
761 	 * that.  See the comment for this inode flag.
762 	 */
763 	if (ip->i_flags & XFS_IRECOVERY) {
764 		ASSERT(xlog_recovery_needed(ip->i_mount->m_log));
765 		return 0;
766 	}
767 
768 	return generic_drop_inode(inode);
769 }
770 
771 STATIC void
xfs_fs_evict_inode(struct inode * inode)772 xfs_fs_evict_inode(
773 	struct inode		*inode)
774 {
775 	if (IS_DAX(inode))
776 		dax_break_layout_final(inode);
777 
778 	truncate_inode_pages_final(&inode->i_data);
779 	clear_inode(inode);
780 }
781 
782 static void
xfs_mount_free(struct xfs_mount * mp)783 xfs_mount_free(
784 	struct xfs_mount	*mp)
785 {
786 	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
787 		xfs_free_buftarg(mp->m_logdev_targp);
788 	if (mp->m_rtdev_targp && mp->m_rtdev_targp != mp->m_ddev_targp)
789 		xfs_free_buftarg(mp->m_rtdev_targp);
790 	if (mp->m_ddev_targp)
791 		xfs_free_buftarg(mp->m_ddev_targp);
792 
793 	debugfs_remove(mp->m_debugfs);
794 	kfree(mp->m_rtname);
795 	kfree(mp->m_logname);
796 	kfree(mp);
797 }
798 
799 STATIC int
xfs_fs_sync_fs(struct super_block * sb,int wait)800 xfs_fs_sync_fs(
801 	struct super_block	*sb,
802 	int			wait)
803 {
804 	struct xfs_mount	*mp = XFS_M(sb);
805 	int			error;
806 
807 	trace_xfs_fs_sync_fs(mp, __return_address);
808 
809 	/*
810 	 * Doing anything during the async pass would be counterproductive.
811 	 */
812 	if (!wait)
813 		return 0;
814 
815 	error = xfs_log_force(mp, XFS_LOG_SYNC);
816 	if (error)
817 		return error;
818 
819 	if (laptop_mode) {
820 		/*
821 		 * The disk must be active because we're syncing.
822 		 * We schedule log work now (now that the disk is
823 		 * active) instead of later (when it might not be).
824 		 */
825 		flush_delayed_work(&mp->m_log->l_work);
826 	}
827 
828 	/*
829 	 * If we are called with page faults frozen out, it means we are about
830 	 * to freeze the transaction subsystem. Take the opportunity to shut
831 	 * down inodegc because once SB_FREEZE_FS is set it's too late to
832 	 * prevent inactivation races with freeze. The fs doesn't get called
833 	 * again by the freezing process until after SB_FREEZE_FS has been set,
834 	 * so it's now or never.  Same logic applies to speculative allocation
835 	 * garbage collection.
836 	 *
837 	 * We don't care if this is a normal syncfs call that does this or
838 	 * freeze that does this - we can run this multiple times without issue
839 	 * and we won't race with a restart because a restart can only occur
840 	 * when the state is either SB_FREEZE_FS or SB_FREEZE_COMPLETE.
841 	 */
842 	if (sb->s_writers.frozen == SB_FREEZE_PAGEFAULT) {
843 		xfs_inodegc_stop(mp);
844 		xfs_blockgc_stop(mp);
845 		xfs_zone_gc_stop(mp);
846 	}
847 
848 	return 0;
849 }
850 
851 static xfs_extlen_t
xfs_internal_log_size(struct xfs_mount * mp)852 xfs_internal_log_size(
853 	struct xfs_mount	*mp)
854 {
855 	if (!mp->m_sb.sb_logstart)
856 		return 0;
857 	return mp->m_sb.sb_logblocks;
858 }
859 
860 static void
xfs_statfs_data(struct xfs_mount * mp,struct kstatfs * st)861 xfs_statfs_data(
862 	struct xfs_mount	*mp,
863 	struct kstatfs		*st)
864 {
865 	int64_t			fdblocks =
866 		xfs_sum_freecounter(mp, XC_FREE_BLOCKS);
867 
868 	/* make sure st->f_bfree does not underflow */
869 	st->f_bfree = max(0LL,
870 		fdblocks - xfs_freecounter_unavailable(mp, XC_FREE_BLOCKS));
871 
872 	/*
873 	 * sb_dblocks can change during growfs, but nothing cares about reporting
874 	 * the old or new value during growfs.
875 	 */
876 	st->f_blocks = mp->m_sb.sb_dblocks - xfs_internal_log_size(mp);
877 }
878 
879 /*
880  * When stat(v)fs is called on a file with the realtime bit set or a directory
881  * with the rtinherit bit, report freespace information for the RT device
882  * instead of the main data device.
883  */
884 static void
xfs_statfs_rt(struct xfs_mount * mp,struct kstatfs * st)885 xfs_statfs_rt(
886 	struct xfs_mount	*mp,
887 	struct kstatfs		*st)
888 {
889 	st->f_bfree = xfs_rtbxlen_to_blen(mp,
890 			xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS));
891 	st->f_blocks = mp->m_sb.sb_rblocks - xfs_rtbxlen_to_blen(mp,
892 			mp->m_free[XC_FREE_RTEXTENTS].res_total);
893 }
894 
895 static void
xfs_statfs_inodes(struct xfs_mount * mp,struct kstatfs * st)896 xfs_statfs_inodes(
897 	struct xfs_mount	*mp,
898 	struct kstatfs		*st)
899 {
900 	uint64_t		icount = percpu_counter_sum(&mp->m_icount);
901 	uint64_t		ifree = percpu_counter_sum(&mp->m_ifree);
902 	uint64_t		fakeinos = XFS_FSB_TO_INO(mp, st->f_bfree);
903 
904 	st->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER);
905 	if (M_IGEO(mp)->maxicount)
906 		st->f_files = min_t(typeof(st->f_files), st->f_files,
907 					M_IGEO(mp)->maxicount);
908 
909 	/* If sb_icount overshot maxicount, report actual allocation */
910 	st->f_files = max_t(typeof(st->f_files), st->f_files,
911 			mp->m_sb.sb_icount);
912 
913 	/* Make sure st->f_ffree does not underflow */
914 	st->f_ffree = max_t(int64_t, 0, st->f_files - (icount - ifree));
915 }
916 
917 STATIC int
xfs_fs_statfs(struct dentry * dentry,struct kstatfs * st)918 xfs_fs_statfs(
919 	struct dentry		*dentry,
920 	struct kstatfs		*st)
921 {
922 	struct xfs_mount	*mp = XFS_M(dentry->d_sb);
923 	struct xfs_inode	*ip = XFS_I(d_inode(dentry));
924 
925 	/*
926 	 * Expedite background inodegc but don't wait. We do not want to block
927 	 * here waiting hours for a billion extent file to be truncated.
928 	 */
929 	xfs_inodegc_push(mp);
930 
931 	st->f_type = XFS_SUPER_MAGIC;
932 	st->f_namelen = MAXNAMELEN - 1;
933 	st->f_bsize = mp->m_sb.sb_blocksize;
934 	st->f_fsid = u64_to_fsid(huge_encode_dev(mp->m_ddev_targp->bt_dev));
935 
936 	xfs_statfs_data(mp, st);
937 	xfs_statfs_inodes(mp, st);
938 
939 	if (XFS_IS_REALTIME_MOUNT(mp) &&
940 	    (ip->i_diflags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME)))
941 		xfs_statfs_rt(mp, st);
942 
943 	if ((ip->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
944 	    ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
945 			      (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))
946 		xfs_qm_statvfs(ip, st);
947 
948 	/*
949 	 * XFS does not distinguish between blocks available to privileged and
950 	 * unprivileged users.
951 	 */
952 	st->f_bavail = st->f_bfree;
953 	return 0;
954 }
955 
956 STATIC void
xfs_save_resvblks(struct xfs_mount * mp)957 xfs_save_resvblks(
958 	struct xfs_mount	*mp)
959 {
960 	enum xfs_free_counter	i;
961 
962 	for (i = 0; i < XC_FREE_NR; i++) {
963 		mp->m_free[i].res_saved = mp->m_free[i].res_total;
964 		xfs_reserve_blocks(mp, i, 0);
965 	}
966 }
967 
968 STATIC void
xfs_restore_resvblks(struct xfs_mount * mp)969 xfs_restore_resvblks(
970 	struct xfs_mount	*mp)
971 {
972 	uint64_t		resblks;
973 	enum xfs_free_counter	i;
974 
975 	for (i = 0; i < XC_FREE_NR; i++) {
976 		if (mp->m_free[i].res_saved) {
977 			resblks = mp->m_free[i].res_saved;
978 			mp->m_free[i].res_saved = 0;
979 		} else
980 			resblks = xfs_default_resblks(mp, i);
981 		xfs_reserve_blocks(mp, i, resblks);
982 	}
983 }
984 
985 /*
986  * Second stage of a freeze. The data is already frozen so we only
987  * need to take care of the metadata. Once that's done sync the superblock
988  * to the log to dirty it in case of a crash while frozen. This ensures that we
989  * will recover the unlinked inode lists on the next mount.
990  */
991 STATIC int
xfs_fs_freeze(struct super_block * sb)992 xfs_fs_freeze(
993 	struct super_block	*sb)
994 {
995 	struct xfs_mount	*mp = XFS_M(sb);
996 	unsigned int		flags;
997 	int			ret;
998 
999 	/*
1000 	 * The filesystem is now frozen far enough that memory reclaim
1001 	 * cannot safely operate on the filesystem. Hence we need to
1002 	 * set a GFP_NOFS context here to avoid recursion deadlocks.
1003 	 */
1004 	flags = memalloc_nofs_save();
1005 	xfs_save_resvblks(mp);
1006 	ret = xfs_log_quiesce(mp);
1007 	memalloc_nofs_restore(flags);
1008 
1009 	/*
1010 	 * For read-write filesystems, we need to restart the inodegc on error
1011 	 * because we stopped it at SB_FREEZE_PAGEFAULT level and a thaw is not
1012 	 * going to be run to restart it now.  We are at SB_FREEZE_FS level
1013 	 * here, so we can restart safely without racing with a stop in
1014 	 * xfs_fs_sync_fs().
1015 	 */
1016 	if (ret && !xfs_is_readonly(mp)) {
1017 		xfs_blockgc_start(mp);
1018 		xfs_inodegc_start(mp);
1019 		xfs_zone_gc_start(mp);
1020 	}
1021 
1022 	return ret;
1023 }
1024 
1025 STATIC int
xfs_fs_unfreeze(struct super_block * sb)1026 xfs_fs_unfreeze(
1027 	struct super_block	*sb)
1028 {
1029 	struct xfs_mount	*mp = XFS_M(sb);
1030 
1031 	xfs_restore_resvblks(mp);
1032 	xfs_log_work_queue(mp);
1033 
1034 	/*
1035 	 * Don't reactivate the inodegc worker on a readonly filesystem because
1036 	 * inodes are sent directly to reclaim.  Don't reactivate the blockgc
1037 	 * worker because there are no speculative preallocations on a readonly
1038 	 * filesystem.
1039 	 */
1040 	if (!xfs_is_readonly(mp)) {
1041 		xfs_zone_gc_start(mp);
1042 		xfs_blockgc_start(mp);
1043 		xfs_inodegc_start(mp);
1044 	}
1045 
1046 	return 0;
1047 }
1048 
1049 /*
1050  * This function fills in xfs_mount_t fields based on mount args.
1051  * Note: the superblock _has_ now been read in.
1052  */
1053 STATIC int
xfs_finish_flags(struct xfs_mount * mp)1054 xfs_finish_flags(
1055 	struct xfs_mount	*mp)
1056 {
1057 	/* Fail a mount where the logbuf is smaller than the log stripe */
1058 	if (xfs_has_logv2(mp)) {
1059 		if (mp->m_logbsize <= 0 &&
1060 		    mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
1061 			mp->m_logbsize = mp->m_sb.sb_logsunit;
1062 		} else if (mp->m_logbsize > 0 &&
1063 			   mp->m_logbsize < mp->m_sb.sb_logsunit) {
1064 			xfs_warn(mp,
1065 		"logbuf size must be greater than or equal to log stripe size");
1066 			return -EINVAL;
1067 		}
1068 	} else {
1069 		/* Fail a mount if the logbuf is larger than 32K */
1070 		if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
1071 			xfs_warn(mp,
1072 		"logbuf size for version 1 logs must be 16K or 32K");
1073 			return -EINVAL;
1074 		}
1075 	}
1076 
1077 	/*
1078 	 * V5 filesystems always use attr2 format for attributes.
1079 	 */
1080 	if (xfs_has_crc(mp) && xfs_has_noattr2(mp)) {
1081 		xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. "
1082 			     "attr2 is always enabled for V5 filesystems.");
1083 		return -EINVAL;
1084 	}
1085 
1086 	/*
1087 	 * prohibit r/w mounts of read-only filesystems
1088 	 */
1089 	if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !xfs_is_readonly(mp)) {
1090 		xfs_warn(mp,
1091 			"cannot mount a read-only filesystem as read-write");
1092 		return -EROFS;
1093 	}
1094 
1095 	if ((mp->m_qflags & XFS_GQUOTA_ACCT) &&
1096 	    (mp->m_qflags & XFS_PQUOTA_ACCT) &&
1097 	    !xfs_has_pquotino(mp)) {
1098 		xfs_warn(mp,
1099 		  "Super block does not support project and group quota together");
1100 		return -EINVAL;
1101 	}
1102 
1103 	if (!xfs_has_zoned(mp)) {
1104 		if (mp->m_max_open_zones) {
1105 			xfs_warn(mp,
1106 "max_open_zones mount option only supported on zoned file systems.");
1107 			return -EINVAL;
1108 		}
1109 		if (mp->m_features & XFS_FEAT_NOLIFETIME) {
1110 			xfs_warn(mp,
1111 "nolifetime mount option only supported on zoned file systems.");
1112 			return -EINVAL;
1113 		}
1114 	}
1115 
1116 	return 0;
1117 }
1118 
1119 static int
xfs_init_percpu_counters(struct xfs_mount * mp)1120 xfs_init_percpu_counters(
1121 	struct xfs_mount	*mp)
1122 {
1123 	int			error;
1124 	int			i;
1125 
1126 	error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL);
1127 	if (error)
1128 		return -ENOMEM;
1129 
1130 	error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL);
1131 	if (error)
1132 		goto free_icount;
1133 
1134 	error = percpu_counter_init(&mp->m_delalloc_blks, 0, GFP_KERNEL);
1135 	if (error)
1136 		goto free_ifree;
1137 
1138 	error = percpu_counter_init(&mp->m_delalloc_rtextents, 0, GFP_KERNEL);
1139 	if (error)
1140 		goto free_delalloc;
1141 
1142 	for (i = 0; i < XC_FREE_NR; i++) {
1143 		error = percpu_counter_init(&mp->m_free[i].count, 0,
1144 				GFP_KERNEL);
1145 		if (error)
1146 			goto free_freecounters;
1147 	}
1148 
1149 	return 0;
1150 
1151 free_freecounters:
1152 	while (--i >= 0)
1153 		percpu_counter_destroy(&mp->m_free[i].count);
1154 	percpu_counter_destroy(&mp->m_delalloc_rtextents);
1155 free_delalloc:
1156 	percpu_counter_destroy(&mp->m_delalloc_blks);
1157 free_ifree:
1158 	percpu_counter_destroy(&mp->m_ifree);
1159 free_icount:
1160 	percpu_counter_destroy(&mp->m_icount);
1161 	return -ENOMEM;
1162 }
1163 
1164 void
xfs_reinit_percpu_counters(struct xfs_mount * mp)1165 xfs_reinit_percpu_counters(
1166 	struct xfs_mount	*mp)
1167 {
1168 	percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
1169 	percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
1170 	xfs_set_freecounter(mp, XC_FREE_BLOCKS, mp->m_sb.sb_fdblocks);
1171 	if (!xfs_has_zoned(mp))
1172 		xfs_set_freecounter(mp, XC_FREE_RTEXTENTS,
1173 				mp->m_sb.sb_frextents);
1174 }
1175 
1176 static void
xfs_destroy_percpu_counters(struct xfs_mount * mp)1177 xfs_destroy_percpu_counters(
1178 	struct xfs_mount	*mp)
1179 {
1180 	enum xfs_free_counter	i;
1181 
1182 	for (i = 0; i < XC_FREE_NR; i++)
1183 		percpu_counter_destroy(&mp->m_free[i].count);
1184 	percpu_counter_destroy(&mp->m_icount);
1185 	percpu_counter_destroy(&mp->m_ifree);
1186 	ASSERT(xfs_is_shutdown(mp) ||
1187 	       percpu_counter_sum(&mp->m_delalloc_rtextents) == 0);
1188 	percpu_counter_destroy(&mp->m_delalloc_rtextents);
1189 	ASSERT(xfs_is_shutdown(mp) ||
1190 	       percpu_counter_sum(&mp->m_delalloc_blks) == 0);
1191 	percpu_counter_destroy(&mp->m_delalloc_blks);
1192 }
1193 
1194 static int
xfs_inodegc_init_percpu(struct xfs_mount * mp)1195 xfs_inodegc_init_percpu(
1196 	struct xfs_mount	*mp)
1197 {
1198 	struct xfs_inodegc	*gc;
1199 	int			cpu;
1200 
1201 	mp->m_inodegc = alloc_percpu(struct xfs_inodegc);
1202 	if (!mp->m_inodegc)
1203 		return -ENOMEM;
1204 
1205 	for_each_possible_cpu(cpu) {
1206 		gc = per_cpu_ptr(mp->m_inodegc, cpu);
1207 		gc->cpu = cpu;
1208 		gc->mp = mp;
1209 		init_llist_head(&gc->list);
1210 		gc->items = 0;
1211 		gc->error = 0;
1212 		INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker);
1213 	}
1214 	return 0;
1215 }
1216 
1217 static void
xfs_inodegc_free_percpu(struct xfs_mount * mp)1218 xfs_inodegc_free_percpu(
1219 	struct xfs_mount	*mp)
1220 {
1221 	if (!mp->m_inodegc)
1222 		return;
1223 	free_percpu(mp->m_inodegc);
1224 }
1225 
1226 static void
xfs_fs_put_super(struct super_block * sb)1227 xfs_fs_put_super(
1228 	struct super_block	*sb)
1229 {
1230 	struct xfs_mount	*mp = XFS_M(sb);
1231 
1232 	xfs_notice(mp, "Unmounting Filesystem %pU", &mp->m_sb.sb_uuid);
1233 	xfs_filestream_unmount(mp);
1234 	xfs_unmountfs(mp);
1235 
1236 	xfs_rtmount_freesb(mp);
1237 	xfs_freesb(mp);
1238 	xchk_mount_stats_free(mp);
1239 	free_percpu(mp->m_stats.xs_stats);
1240 	xfs_inodegc_free_percpu(mp);
1241 	xfs_destroy_percpu_counters(mp);
1242 	xfs_destroy_mount_workqueues(mp);
1243 	xfs_shutdown_devices(mp);
1244 }
1245 
1246 static long
xfs_fs_nr_cached_objects(struct super_block * sb,struct shrink_control * sc)1247 xfs_fs_nr_cached_objects(
1248 	struct super_block	*sb,
1249 	struct shrink_control	*sc)
1250 {
1251 	/* Paranoia: catch incorrect calls during mount setup or teardown */
1252 	if (WARN_ON_ONCE(!sb->s_fs_info))
1253 		return 0;
1254 	return xfs_reclaim_inodes_count(XFS_M(sb));
1255 }
1256 
1257 static long
xfs_fs_free_cached_objects(struct super_block * sb,struct shrink_control * sc)1258 xfs_fs_free_cached_objects(
1259 	struct super_block	*sb,
1260 	struct shrink_control	*sc)
1261 {
1262 	return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan);
1263 }
1264 
1265 static void
xfs_fs_shutdown(struct super_block * sb)1266 xfs_fs_shutdown(
1267 	struct super_block	*sb)
1268 {
1269 	xfs_force_shutdown(XFS_M(sb), SHUTDOWN_DEVICE_REMOVED);
1270 }
1271 
1272 static int
xfs_fs_show_stats(struct seq_file * m,struct dentry * root)1273 xfs_fs_show_stats(
1274 	struct seq_file		*m,
1275 	struct dentry		*root)
1276 {
1277 	struct xfs_mount	*mp = XFS_M(root->d_sb);
1278 
1279 	if (xfs_has_zoned(mp) && IS_ENABLED(CONFIG_XFS_RT))
1280 		xfs_zoned_show_stats(m, mp);
1281 	return 0;
1282 }
1283 
1284 static const struct super_operations xfs_super_operations = {
1285 	.alloc_inode		= xfs_fs_alloc_inode,
1286 	.destroy_inode		= xfs_fs_destroy_inode,
1287 	.dirty_inode		= xfs_fs_dirty_inode,
1288 	.drop_inode		= xfs_fs_drop_inode,
1289 	.evict_inode		= xfs_fs_evict_inode,
1290 	.put_super		= xfs_fs_put_super,
1291 	.sync_fs		= xfs_fs_sync_fs,
1292 	.freeze_fs		= xfs_fs_freeze,
1293 	.unfreeze_fs		= xfs_fs_unfreeze,
1294 	.statfs			= xfs_fs_statfs,
1295 	.show_options		= xfs_fs_show_options,
1296 	.nr_cached_objects	= xfs_fs_nr_cached_objects,
1297 	.free_cached_objects	= xfs_fs_free_cached_objects,
1298 	.shutdown		= xfs_fs_shutdown,
1299 	.show_stats		= xfs_fs_show_stats,
1300 };
1301 
1302 static int
suffix_kstrtoint(const char * s,unsigned int base,int * res)1303 suffix_kstrtoint(
1304 	const char	*s,
1305 	unsigned int	base,
1306 	int		*res)
1307 {
1308 	int		last, shift_left_factor = 0, _res;
1309 	char		*value;
1310 	int		ret = 0;
1311 
1312 	value = kstrdup(s, GFP_KERNEL);
1313 	if (!value)
1314 		return -ENOMEM;
1315 
1316 	last = strlen(value) - 1;
1317 	if (value[last] == 'K' || value[last] == 'k') {
1318 		shift_left_factor = 10;
1319 		value[last] = '\0';
1320 	}
1321 	if (value[last] == 'M' || value[last] == 'm') {
1322 		shift_left_factor = 20;
1323 		value[last] = '\0';
1324 	}
1325 	if (value[last] == 'G' || value[last] == 'g') {
1326 		shift_left_factor = 30;
1327 		value[last] = '\0';
1328 	}
1329 
1330 	if (kstrtoint(value, base, &_res))
1331 		ret = -EINVAL;
1332 	kfree(value);
1333 	*res = _res << shift_left_factor;
1334 	return ret;
1335 }
1336 
1337 static inline void
xfs_fs_warn_deprecated(struct fs_context * fc,struct fs_parameter * param,uint64_t flag,bool value)1338 xfs_fs_warn_deprecated(
1339 	struct fs_context	*fc,
1340 	struct fs_parameter	*param,
1341 	uint64_t		flag,
1342 	bool			value)
1343 {
1344 	/* Don't print the warning if reconfiguring and current mount point
1345 	 * already had the flag set
1346 	 */
1347 	if ((fc->purpose & FS_CONTEXT_FOR_RECONFIGURE) &&
1348             !!(XFS_M(fc->root->d_sb)->m_features & flag) == value)
1349 		return;
1350 	xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key);
1351 }
1352 
1353 /*
1354  * Set mount state from a mount option.
1355  *
1356  * NOTE: mp->m_super is NULL here!
1357  */
1358 static int
xfs_fs_parse_param(struct fs_context * fc,struct fs_parameter * param)1359 xfs_fs_parse_param(
1360 	struct fs_context	*fc,
1361 	struct fs_parameter	*param)
1362 {
1363 	struct xfs_mount	*parsing_mp = fc->s_fs_info;
1364 	struct fs_parse_result	result;
1365 	int			size = 0;
1366 	int			opt;
1367 
1368 	BUILD_BUG_ON(XFS_QFLAGS_MNTOPTS & XFS_MOUNT_QUOTA_ALL);
1369 
1370 	opt = fs_parse(fc, xfs_fs_parameters, param, &result);
1371 	if (opt < 0)
1372 		return opt;
1373 
1374 	switch (opt) {
1375 	case Opt_logbufs:
1376 		parsing_mp->m_logbufs = result.uint_32;
1377 		return 0;
1378 	case Opt_logbsize:
1379 		if (suffix_kstrtoint(param->string, 10, &parsing_mp->m_logbsize))
1380 			return -EINVAL;
1381 		return 0;
1382 	case Opt_logdev:
1383 		kfree(parsing_mp->m_logname);
1384 		parsing_mp->m_logname = kstrdup(param->string, GFP_KERNEL);
1385 		if (!parsing_mp->m_logname)
1386 			return -ENOMEM;
1387 		return 0;
1388 	case Opt_rtdev:
1389 		kfree(parsing_mp->m_rtname);
1390 		parsing_mp->m_rtname = kstrdup(param->string, GFP_KERNEL);
1391 		if (!parsing_mp->m_rtname)
1392 			return -ENOMEM;
1393 		return 0;
1394 	case Opt_allocsize:
1395 		if (suffix_kstrtoint(param->string, 10, &size))
1396 			return -EINVAL;
1397 		parsing_mp->m_allocsize_log = ffs(size) - 1;
1398 		parsing_mp->m_features |= XFS_FEAT_ALLOCSIZE;
1399 		return 0;
1400 	case Opt_grpid:
1401 	case Opt_bsdgroups:
1402 		parsing_mp->m_features |= XFS_FEAT_GRPID;
1403 		return 0;
1404 	case Opt_nogrpid:
1405 	case Opt_sysvgroups:
1406 		parsing_mp->m_features &= ~XFS_FEAT_GRPID;
1407 		return 0;
1408 	case Opt_wsync:
1409 		parsing_mp->m_features |= XFS_FEAT_WSYNC;
1410 		return 0;
1411 	case Opt_norecovery:
1412 		parsing_mp->m_features |= XFS_FEAT_NORECOVERY;
1413 		return 0;
1414 	case Opt_noalign:
1415 		parsing_mp->m_features |= XFS_FEAT_NOALIGN;
1416 		return 0;
1417 	case Opt_swalloc:
1418 		parsing_mp->m_features |= XFS_FEAT_SWALLOC;
1419 		return 0;
1420 	case Opt_sunit:
1421 		parsing_mp->m_dalign = result.uint_32;
1422 		return 0;
1423 	case Opt_swidth:
1424 		parsing_mp->m_swidth = result.uint_32;
1425 		return 0;
1426 	case Opt_inode32:
1427 		parsing_mp->m_features |= XFS_FEAT_SMALL_INUMS;
1428 		return 0;
1429 	case Opt_inode64:
1430 		parsing_mp->m_features &= ~XFS_FEAT_SMALL_INUMS;
1431 		return 0;
1432 	case Opt_nouuid:
1433 		parsing_mp->m_features |= XFS_FEAT_NOUUID;
1434 		return 0;
1435 	case Opt_largeio:
1436 		parsing_mp->m_features |= XFS_FEAT_LARGE_IOSIZE;
1437 		return 0;
1438 	case Opt_nolargeio:
1439 		parsing_mp->m_features &= ~XFS_FEAT_LARGE_IOSIZE;
1440 		return 0;
1441 	case Opt_filestreams:
1442 		parsing_mp->m_features |= XFS_FEAT_FILESTREAMS;
1443 		return 0;
1444 	case Opt_noquota:
1445 		parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
1446 		parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
1447 		parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
1448 		return 0;
1449 	case Opt_quota:
1450 	case Opt_uquota:
1451 	case Opt_usrquota:
1452 		parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ENFD);
1453 		parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
1454 		return 0;
1455 	case Opt_qnoenforce:
1456 	case Opt_uqnoenforce:
1457 		parsing_mp->m_qflags |= XFS_UQUOTA_ACCT;
1458 		parsing_mp->m_qflags &= ~XFS_UQUOTA_ENFD;
1459 		parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
1460 		return 0;
1461 	case Opt_pquota:
1462 	case Opt_prjquota:
1463 		parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ENFD);
1464 		parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
1465 		return 0;
1466 	case Opt_pqnoenforce:
1467 		parsing_mp->m_qflags |= XFS_PQUOTA_ACCT;
1468 		parsing_mp->m_qflags &= ~XFS_PQUOTA_ENFD;
1469 		parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
1470 		return 0;
1471 	case Opt_gquota:
1472 	case Opt_grpquota:
1473 		parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ENFD);
1474 		parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
1475 		return 0;
1476 	case Opt_gqnoenforce:
1477 		parsing_mp->m_qflags |= XFS_GQUOTA_ACCT;
1478 		parsing_mp->m_qflags &= ~XFS_GQUOTA_ENFD;
1479 		parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
1480 		return 0;
1481 	case Opt_discard:
1482 		parsing_mp->m_features |= XFS_FEAT_DISCARD;
1483 		return 0;
1484 	case Opt_nodiscard:
1485 		parsing_mp->m_features &= ~XFS_FEAT_DISCARD;
1486 		return 0;
1487 #ifdef CONFIG_FS_DAX
1488 	case Opt_dax:
1489 		xfs_mount_set_dax_mode(parsing_mp, XFS_DAX_ALWAYS);
1490 		return 0;
1491 	case Opt_dax_enum:
1492 		xfs_mount_set_dax_mode(parsing_mp, result.uint_32);
1493 		return 0;
1494 #endif
1495 	/* Following mount options will be removed in September 2025 */
1496 	case Opt_ikeep:
1497 		xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, true);
1498 		parsing_mp->m_features |= XFS_FEAT_IKEEP;
1499 		return 0;
1500 	case Opt_noikeep:
1501 		xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, false);
1502 		parsing_mp->m_features &= ~XFS_FEAT_IKEEP;
1503 		return 0;
1504 	case Opt_attr2:
1505 		xfs_fs_warn_deprecated(fc, param, XFS_FEAT_ATTR2, true);
1506 		parsing_mp->m_features |= XFS_FEAT_ATTR2;
1507 		return 0;
1508 	case Opt_noattr2:
1509 		xfs_fs_warn_deprecated(fc, param, XFS_FEAT_NOATTR2, true);
1510 		parsing_mp->m_features |= XFS_FEAT_NOATTR2;
1511 		return 0;
1512 	case Opt_max_open_zones:
1513 		parsing_mp->m_max_open_zones = result.uint_32;
1514 		return 0;
1515 	case Opt_lifetime:
1516 		parsing_mp->m_features &= ~XFS_FEAT_NOLIFETIME;
1517 		return 0;
1518 	case Opt_nolifetime:
1519 		parsing_mp->m_features |= XFS_FEAT_NOLIFETIME;
1520 		return 0;
1521 	default:
1522 		xfs_warn(parsing_mp, "unknown mount option [%s].", param->key);
1523 		return -EINVAL;
1524 	}
1525 
1526 	return 0;
1527 }
1528 
1529 static int
xfs_fs_validate_params(struct xfs_mount * mp)1530 xfs_fs_validate_params(
1531 	struct xfs_mount	*mp)
1532 {
1533 	/* No recovery flag requires a read-only mount */
1534 	if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) {
1535 		xfs_warn(mp, "no-recovery mounts must be read-only.");
1536 		return -EINVAL;
1537 	}
1538 
1539 	/*
1540 	 * We have not read the superblock at this point, so only the attr2
1541 	 * mount option can set the attr2 feature by this stage.
1542 	 */
1543 	if (xfs_has_attr2(mp) && xfs_has_noattr2(mp)) {
1544 		xfs_warn(mp, "attr2 and noattr2 cannot both be specified.");
1545 		return -EINVAL;
1546 	}
1547 
1548 
1549 	if (xfs_has_noalign(mp) && (mp->m_dalign || mp->m_swidth)) {
1550 		xfs_warn(mp,
1551 	"sunit and swidth options incompatible with the noalign option");
1552 		return -EINVAL;
1553 	}
1554 
1555 	if (!IS_ENABLED(CONFIG_XFS_QUOTA) &&
1556 	    (mp->m_qflags & ~XFS_QFLAGS_MNTOPTS)) {
1557 		xfs_warn(mp, "quota support not available in this kernel.");
1558 		return -EINVAL;
1559 	}
1560 
1561 	if ((mp->m_dalign && !mp->m_swidth) ||
1562 	    (!mp->m_dalign && mp->m_swidth)) {
1563 		xfs_warn(mp, "sunit and swidth must be specified together");
1564 		return -EINVAL;
1565 	}
1566 
1567 	if (mp->m_dalign && (mp->m_swidth % mp->m_dalign != 0)) {
1568 		xfs_warn(mp,
1569 	"stripe width (%d) must be a multiple of the stripe unit (%d)",
1570 			mp->m_swidth, mp->m_dalign);
1571 		return -EINVAL;
1572 	}
1573 
1574 	if (mp->m_logbufs != -1 &&
1575 	    mp->m_logbufs != 0 &&
1576 	    (mp->m_logbufs < XLOG_MIN_ICLOGS ||
1577 	     mp->m_logbufs > XLOG_MAX_ICLOGS)) {
1578 		xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
1579 			mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
1580 		return -EINVAL;
1581 	}
1582 
1583 	if (mp->m_logbsize != -1 &&
1584 	    mp->m_logbsize !=  0 &&
1585 	    (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
1586 	     mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
1587 	     !is_power_of_2(mp->m_logbsize))) {
1588 		xfs_warn(mp,
1589 			"invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
1590 			mp->m_logbsize);
1591 		return -EINVAL;
1592 	}
1593 
1594 	if (xfs_has_allocsize(mp) &&
1595 	    (mp->m_allocsize_log > XFS_MAX_IO_LOG ||
1596 	     mp->m_allocsize_log < XFS_MIN_IO_LOG)) {
1597 		xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
1598 			mp->m_allocsize_log, XFS_MIN_IO_LOG, XFS_MAX_IO_LOG);
1599 		return -EINVAL;
1600 	}
1601 
1602 	return 0;
1603 }
1604 
1605 struct dentry *
xfs_debugfs_mkdir(const char * name,struct dentry * parent)1606 xfs_debugfs_mkdir(
1607 	const char	*name,
1608 	struct dentry	*parent)
1609 {
1610 	struct dentry	*child;
1611 
1612 	/* Apparently we're expected to ignore error returns?? */
1613 	child = debugfs_create_dir(name, parent);
1614 	if (IS_ERR(child))
1615 		return NULL;
1616 
1617 	return child;
1618 }
1619 
1620 static int
xfs_fs_fill_super(struct super_block * sb,struct fs_context * fc)1621 xfs_fs_fill_super(
1622 	struct super_block	*sb,
1623 	struct fs_context	*fc)
1624 {
1625 	struct xfs_mount	*mp = sb->s_fs_info;
1626 	struct inode		*root;
1627 	int			flags = 0, error;
1628 
1629 	mp->m_super = sb;
1630 
1631 	/*
1632 	 * Copy VFS mount flags from the context now that all parameter parsing
1633 	 * is guaranteed to have been completed by either the old mount API or
1634 	 * the newer fsopen/fsconfig API.
1635 	 */
1636 	if (fc->sb_flags & SB_RDONLY)
1637 		xfs_set_readonly(mp);
1638 	if (fc->sb_flags & SB_DIRSYNC)
1639 		mp->m_features |= XFS_FEAT_DIRSYNC;
1640 	if (fc->sb_flags & SB_SYNCHRONOUS)
1641 		mp->m_features |= XFS_FEAT_WSYNC;
1642 
1643 	error = xfs_fs_validate_params(mp);
1644 	if (error)
1645 		return error;
1646 
1647 	sb_min_blocksize(sb, BBSIZE);
1648 	sb->s_xattr = xfs_xattr_handlers;
1649 	sb->s_export_op = &xfs_export_operations;
1650 #ifdef CONFIG_XFS_QUOTA
1651 	sb->s_qcop = &xfs_quotactl_operations;
1652 	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
1653 #endif
1654 	sb->s_op = &xfs_super_operations;
1655 
1656 	/*
1657 	 * Delay mount work if the debug hook is set. This is debug
1658 	 * instrumention to coordinate simulation of xfs mount failures with
1659 	 * VFS superblock operations
1660 	 */
1661 	if (xfs_globals.mount_delay) {
1662 		xfs_notice(mp, "Delaying mount for %d seconds.",
1663 			xfs_globals.mount_delay);
1664 		msleep(xfs_globals.mount_delay * 1000);
1665 	}
1666 
1667 	if (fc->sb_flags & SB_SILENT)
1668 		flags |= XFS_MFSI_QUIET;
1669 
1670 	error = xfs_open_devices(mp);
1671 	if (error)
1672 		return error;
1673 
1674 	if (xfs_debugfs) {
1675 		mp->m_debugfs = xfs_debugfs_mkdir(mp->m_super->s_id,
1676 						  xfs_debugfs);
1677 	} else {
1678 		mp->m_debugfs = NULL;
1679 	}
1680 
1681 	error = xfs_init_mount_workqueues(mp);
1682 	if (error)
1683 		goto out_shutdown_devices;
1684 
1685 	error = xfs_init_percpu_counters(mp);
1686 	if (error)
1687 		goto out_destroy_workqueues;
1688 
1689 	error = xfs_inodegc_init_percpu(mp);
1690 	if (error)
1691 		goto out_destroy_counters;
1692 
1693 	/* Allocate stats memory before we do operations that might use it */
1694 	mp->m_stats.xs_stats = alloc_percpu(struct xfsstats);
1695 	if (!mp->m_stats.xs_stats) {
1696 		error = -ENOMEM;
1697 		goto out_destroy_inodegc;
1698 	}
1699 
1700 	error = xchk_mount_stats_alloc(mp);
1701 	if (error)
1702 		goto out_free_stats;
1703 
1704 	error = xfs_readsb(mp, flags);
1705 	if (error)
1706 		goto out_free_scrub_stats;
1707 
1708 	error = xfs_finish_flags(mp);
1709 	if (error)
1710 		goto out_free_sb;
1711 
1712 	error = xfs_setup_devices(mp);
1713 	if (error)
1714 		goto out_free_sb;
1715 
1716 	/*
1717 	 * V4 support is undergoing deprecation.
1718 	 *
1719 	 * Note: this has to use an open coded m_features check as xfs_has_crc
1720 	 * always returns false for !CONFIG_XFS_SUPPORT_V4.
1721 	 */
1722 	if (!(mp->m_features & XFS_FEAT_CRC)) {
1723 		if (!IS_ENABLED(CONFIG_XFS_SUPPORT_V4)) {
1724 			xfs_warn(mp,
1725 	"Deprecated V4 format (crc=0) not supported by kernel.");
1726 			error = -EINVAL;
1727 			goto out_free_sb;
1728 		}
1729 		xfs_warn_once(mp,
1730 	"Deprecated V4 format (crc=0) will not be supported after September 2030.");
1731 	}
1732 
1733 	/* ASCII case insensitivity is undergoing deprecation. */
1734 	if (xfs_has_asciici(mp)) {
1735 #ifdef CONFIG_XFS_SUPPORT_ASCII_CI
1736 		xfs_warn_once(mp,
1737 	"Deprecated ASCII case-insensitivity feature (ascii-ci=1) will not be supported after September 2030.");
1738 #else
1739 		xfs_warn(mp,
1740 	"Deprecated ASCII case-insensitivity feature (ascii-ci=1) not supported by kernel.");
1741 		error = -EINVAL;
1742 		goto out_free_sb;
1743 #endif
1744 	}
1745 
1746 	/*
1747 	 * Filesystem claims it needs repair, so refuse the mount unless
1748 	 * norecovery is also specified, in which case the filesystem can
1749 	 * be mounted with no risk of further damage.
1750 	 */
1751 	if (xfs_has_needsrepair(mp) && !xfs_has_norecovery(mp)) {
1752 		xfs_warn(mp, "Filesystem needs repair.  Please run xfs_repair.");
1753 		error = -EFSCORRUPTED;
1754 		goto out_free_sb;
1755 	}
1756 
1757 	/*
1758 	 * Don't touch the filesystem if a user tool thinks it owns the primary
1759 	 * superblock.  mkfs doesn't clear the flag from secondary supers, so
1760 	 * we don't check them at all.
1761 	 */
1762 	if (mp->m_sb.sb_inprogress) {
1763 		xfs_warn(mp, "Offline file system operation in progress!");
1764 		error = -EFSCORRUPTED;
1765 		goto out_free_sb;
1766 	}
1767 
1768 	if (mp->m_sb.sb_blocksize > PAGE_SIZE) {
1769 		size_t max_folio_size = mapping_max_folio_size_supported();
1770 
1771 		if (!xfs_has_crc(mp)) {
1772 			xfs_warn(mp,
1773 "V4 Filesystem with blocksize %d bytes. Only pagesize (%ld) or less is supported.",
1774 				mp->m_sb.sb_blocksize, PAGE_SIZE);
1775 			error = -ENOSYS;
1776 			goto out_free_sb;
1777 		}
1778 
1779 		if (mp->m_sb.sb_blocksize > max_folio_size) {
1780 			xfs_warn(mp,
1781 "block size (%u bytes) not supported; Only block size (%zu) or less is supported",
1782 				mp->m_sb.sb_blocksize, max_folio_size);
1783 			error = -ENOSYS;
1784 			goto out_free_sb;
1785 		}
1786 
1787 		xfs_warn_experimental(mp, XFS_EXPERIMENTAL_LBS);
1788 	}
1789 
1790 	/* Ensure this filesystem fits in the page cache limits */
1791 	if (xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_dblocks) ||
1792 	    xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_rblocks)) {
1793 		xfs_warn(mp,
1794 		"file system too large to be mounted on this system.");
1795 		error = -EFBIG;
1796 		goto out_free_sb;
1797 	}
1798 
1799 	/*
1800 	 * XFS block mappings use 54 bits to store the logical block offset.
1801 	 * This should suffice to handle the maximum file size that the VFS
1802 	 * supports (currently 2^63 bytes on 64-bit and ULONG_MAX << PAGE_SHIFT
1803 	 * bytes on 32-bit), but as XFS and VFS have gotten the s_maxbytes
1804 	 * calculation wrong on 32-bit kernels in the past, we'll add a WARN_ON
1805 	 * to check this assertion.
1806 	 *
1807 	 * Avoid integer overflow by comparing the maximum bmbt offset to the
1808 	 * maximum pagecache offset in units of fs blocks.
1809 	 */
1810 	if (!xfs_verify_fileoff(mp, XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE))) {
1811 		xfs_warn(mp,
1812 "MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!",
1813 			 XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE),
1814 			 XFS_MAX_FILEOFF);
1815 		error = -EINVAL;
1816 		goto out_free_sb;
1817 	}
1818 
1819 	error = xfs_rtmount_readsb(mp);
1820 	if (error)
1821 		goto out_free_sb;
1822 
1823 	error = xfs_filestream_mount(mp);
1824 	if (error)
1825 		goto out_free_rtsb;
1826 
1827 	/*
1828 	 * we must configure the block size in the superblock before we run the
1829 	 * full mount process as the mount process can lookup and cache inodes.
1830 	 */
1831 	sb->s_magic = XFS_SUPER_MAGIC;
1832 	sb->s_blocksize = mp->m_sb.sb_blocksize;
1833 	sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
1834 	sb->s_maxbytes = MAX_LFS_FILESIZE;
1835 	sb->s_max_links = XFS_MAXLINK;
1836 	sb->s_time_gran = 1;
1837 	if (xfs_has_bigtime(mp)) {
1838 		sb->s_time_min = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MIN);
1839 		sb->s_time_max = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MAX);
1840 	} else {
1841 		sb->s_time_min = XFS_LEGACY_TIME_MIN;
1842 		sb->s_time_max = XFS_LEGACY_TIME_MAX;
1843 	}
1844 	trace_xfs_inode_timestamp_range(mp, sb->s_time_min, sb->s_time_max);
1845 	sb->s_iflags |= SB_I_CGROUPWB | SB_I_ALLOW_HSM;
1846 
1847 	set_posix_acl_flag(sb);
1848 
1849 	/* version 5 superblocks support inode version counters. */
1850 	if (xfs_has_crc(mp))
1851 		sb->s_flags |= SB_I_VERSION;
1852 
1853 	if (xfs_has_dax_always(mp)) {
1854 		error = xfs_setup_dax_always(mp);
1855 		if (error)
1856 			goto out_filestream_unmount;
1857 	}
1858 
1859 	if (xfs_has_discard(mp) && !bdev_max_discard_sectors(sb->s_bdev)) {
1860 		xfs_warn(mp,
1861 	"mounting with \"discard\" option, but the device does not support discard");
1862 		mp->m_features &= ~XFS_FEAT_DISCARD;
1863 	}
1864 
1865 	if (xfs_has_zoned(mp)) {
1866 		if (!xfs_has_metadir(mp)) {
1867 			xfs_alert(mp,
1868 		"metadir feature required for zoned realtime devices.");
1869 			error = -EINVAL;
1870 			goto out_filestream_unmount;
1871 		}
1872 		xfs_warn_experimental(mp, XFS_EXPERIMENTAL_ZONED);
1873 	} else if (xfs_has_metadir(mp)) {
1874 		xfs_warn_experimental(mp, XFS_EXPERIMENTAL_METADIR);
1875 	}
1876 
1877 	if (xfs_has_reflink(mp)) {
1878 		if (xfs_has_realtime(mp) &&
1879 		    !xfs_reflink_supports_rextsize(mp, mp->m_sb.sb_rextsize)) {
1880 			xfs_alert(mp,
1881 	"reflink not compatible with realtime extent size %u!",
1882 					mp->m_sb.sb_rextsize);
1883 			error = -EINVAL;
1884 			goto out_filestream_unmount;
1885 		}
1886 
1887 		if (xfs_has_zoned(mp)) {
1888 			xfs_alert(mp,
1889 	"reflink not compatible with zoned RT device!");
1890 			error = -EINVAL;
1891 			goto out_filestream_unmount;
1892 		}
1893 
1894 		if (xfs_globals.always_cow) {
1895 			xfs_info(mp, "using DEBUG-only always_cow mode.");
1896 			mp->m_always_cow = true;
1897 		}
1898 	}
1899 
1900 
1901 	if (xfs_has_exchange_range(mp))
1902 		xfs_warn_experimental(mp, XFS_EXPERIMENTAL_EXCHRANGE);
1903 
1904 	if (xfs_has_parent(mp))
1905 		xfs_warn_experimental(mp, XFS_EXPERIMENTAL_PPTR);
1906 
1907 	/*
1908 	 * If no quota mount options were provided, maybe we'll try to pick
1909 	 * up the quota accounting and enforcement flags from the ondisk sb.
1910 	 */
1911 	if (!(mp->m_qflags & XFS_QFLAGS_MNTOPTS))
1912 		xfs_set_resuming_quotaon(mp);
1913 	mp->m_qflags &= ~XFS_QFLAGS_MNTOPTS;
1914 
1915 	error = xfs_mountfs(mp);
1916 	if (error)
1917 		goto out_filestream_unmount;
1918 
1919 	root = igrab(VFS_I(mp->m_rootip));
1920 	if (!root) {
1921 		error = -ENOENT;
1922 		goto out_unmount;
1923 	}
1924 	sb->s_root = d_make_root(root);
1925 	if (!sb->s_root) {
1926 		error = -ENOMEM;
1927 		goto out_unmount;
1928 	}
1929 
1930 	return 0;
1931 
1932  out_filestream_unmount:
1933 	xfs_filestream_unmount(mp);
1934  out_free_rtsb:
1935 	xfs_rtmount_freesb(mp);
1936  out_free_sb:
1937 	xfs_freesb(mp);
1938  out_free_scrub_stats:
1939 	xchk_mount_stats_free(mp);
1940  out_free_stats:
1941 	free_percpu(mp->m_stats.xs_stats);
1942  out_destroy_inodegc:
1943 	xfs_inodegc_free_percpu(mp);
1944  out_destroy_counters:
1945 	xfs_destroy_percpu_counters(mp);
1946  out_destroy_workqueues:
1947 	xfs_destroy_mount_workqueues(mp);
1948  out_shutdown_devices:
1949 	xfs_shutdown_devices(mp);
1950 	return error;
1951 
1952  out_unmount:
1953 	xfs_filestream_unmount(mp);
1954 	xfs_unmountfs(mp);
1955 	goto out_free_rtsb;
1956 }
1957 
1958 static int
xfs_fs_get_tree(struct fs_context * fc)1959 xfs_fs_get_tree(
1960 	struct fs_context	*fc)
1961 {
1962 	return get_tree_bdev(fc, xfs_fs_fill_super);
1963 }
1964 
1965 static int
xfs_remount_rw(struct xfs_mount * mp)1966 xfs_remount_rw(
1967 	struct xfs_mount	*mp)
1968 {
1969 	struct xfs_sb		*sbp = &mp->m_sb;
1970 	int error;
1971 
1972 	if (xfs_has_norecovery(mp)) {
1973 		xfs_warn(mp,
1974 			"ro->rw transition prohibited on norecovery mount");
1975 		return -EINVAL;
1976 	}
1977 
1978 	if (xfs_sb_is_v5(sbp) &&
1979 	    xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
1980 		xfs_warn(mp,
1981 	"ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
1982 			(sbp->sb_features_ro_compat &
1983 				XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
1984 		return -EINVAL;
1985 	}
1986 
1987 	xfs_clear_readonly(mp);
1988 
1989 	/*
1990 	 * If this is the first remount to writeable state we might have some
1991 	 * superblock changes to update.
1992 	 */
1993 	if (mp->m_update_sb) {
1994 		error = xfs_sync_sb(mp, false);
1995 		if (error) {
1996 			xfs_warn(mp, "failed to write sb changes");
1997 			return error;
1998 		}
1999 		mp->m_update_sb = false;
2000 	}
2001 
2002 	/*
2003 	 * Fill out the reserve pool if it is empty. Use the stashed value if
2004 	 * it is non-zero, otherwise go with the default.
2005 	 */
2006 	xfs_restore_resvblks(mp);
2007 	xfs_log_work_queue(mp);
2008 	xfs_blockgc_start(mp);
2009 
2010 	/* Create the per-AG metadata reservation pool .*/
2011 	error = xfs_fs_reserve_ag_blocks(mp);
2012 	if (error && error != -ENOSPC)
2013 		return error;
2014 
2015 	/* Re-enable the background inode inactivation worker. */
2016 	xfs_inodegc_start(mp);
2017 
2018 	/* Restart zone reclaim */
2019 	xfs_zone_gc_start(mp);
2020 
2021 	return 0;
2022 }
2023 
2024 static int
xfs_remount_ro(struct xfs_mount * mp)2025 xfs_remount_ro(
2026 	struct xfs_mount	*mp)
2027 {
2028 	struct xfs_icwalk	icw = {
2029 		.icw_flags	= XFS_ICWALK_FLAG_SYNC,
2030 	};
2031 	int			error;
2032 
2033 	/* Flush all the dirty data to disk. */
2034 	error = sync_filesystem(mp->m_super);
2035 	if (error)
2036 		return error;
2037 
2038 	/*
2039 	 * Cancel background eofb scanning so it cannot race with the final
2040 	 * log force+buftarg wait and deadlock the remount.
2041 	 */
2042 	xfs_blockgc_stop(mp);
2043 
2044 	/*
2045 	 * Clear out all remaining COW staging extents and speculative post-EOF
2046 	 * preallocations so that we don't leave inodes requiring inactivation
2047 	 * cleanups during reclaim on a read-only mount.  We must process every
2048 	 * cached inode, so this requires a synchronous cache scan.
2049 	 */
2050 	error = xfs_blockgc_free_space(mp, &icw);
2051 	if (error) {
2052 		xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
2053 		return error;
2054 	}
2055 
2056 	/*
2057 	 * Stop the inodegc background worker.  xfs_fs_reconfigure already
2058 	 * flushed all pending inodegc work when it sync'd the filesystem.
2059 	 * The VFS holds s_umount, so we know that inodes cannot enter
2060 	 * xfs_fs_destroy_inode during a remount operation.  In readonly mode
2061 	 * we send inodes straight to reclaim, so no inodes will be queued.
2062 	 */
2063 	xfs_inodegc_stop(mp);
2064 
2065 	/* Stop zone reclaim */
2066 	xfs_zone_gc_stop(mp);
2067 
2068 	/* Free the per-AG metadata reservation pool. */
2069 	xfs_fs_unreserve_ag_blocks(mp);
2070 
2071 	/*
2072 	 * Before we sync the metadata, we need to free up the reserve block
2073 	 * pool so that the used block count in the superblock on disk is
2074 	 * correct at the end of the remount. Stash the current* reserve pool
2075 	 * size so that if we get remounted rw, we can return it to the same
2076 	 * size.
2077 	 */
2078 	xfs_save_resvblks(mp);
2079 
2080 	xfs_log_clean(mp);
2081 	xfs_set_readonly(mp);
2082 
2083 	return 0;
2084 }
2085 
2086 /*
2087  * Logically we would return an error here to prevent users from believing
2088  * they might have changed mount options using remount which can't be changed.
2089  *
2090  * But unfortunately mount(8) adds all options from mtab and fstab to the mount
2091  * arguments in some cases so we can't blindly reject options, but have to
2092  * check for each specified option if it actually differs from the currently
2093  * set option and only reject it if that's the case.
2094  *
2095  * Until that is implemented we return success for every remount request, and
2096  * silently ignore all options that we can't actually change.
2097  */
2098 static int
xfs_fs_reconfigure(struct fs_context * fc)2099 xfs_fs_reconfigure(
2100 	struct fs_context *fc)
2101 {
2102 	struct xfs_mount	*mp = XFS_M(fc->root->d_sb);
2103 	struct xfs_mount        *new_mp = fc->s_fs_info;
2104 	int			flags = fc->sb_flags;
2105 	int			error;
2106 
2107 	new_mp->m_qflags &= ~XFS_QFLAGS_MNTOPTS;
2108 
2109 	/* version 5 superblocks always support version counters. */
2110 	if (xfs_has_crc(mp))
2111 		fc->sb_flags |= SB_I_VERSION;
2112 
2113 	error = xfs_fs_validate_params(new_mp);
2114 	if (error)
2115 		return error;
2116 
2117 	/* attr2 -> noattr2 */
2118 	if (xfs_has_noattr2(new_mp)) {
2119 		if (xfs_has_crc(mp)) {
2120 			xfs_warn(mp,
2121 			"attr2 is always enabled for a V5 filesystem - can't be changed.");
2122 			return -EINVAL;
2123 		}
2124 		mp->m_features &= ~XFS_FEAT_ATTR2;
2125 		mp->m_features |= XFS_FEAT_NOATTR2;
2126 	} else if (xfs_has_attr2(new_mp)) {
2127 		/* noattr2 -> attr2 */
2128 		mp->m_features &= ~XFS_FEAT_NOATTR2;
2129 		mp->m_features |= XFS_FEAT_ATTR2;
2130 	}
2131 
2132 	/* inode32 -> inode64 */
2133 	if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) {
2134 		mp->m_features &= ~XFS_FEAT_SMALL_INUMS;
2135 		mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount);
2136 	}
2137 
2138 	/* inode64 -> inode32 */
2139 	if (!xfs_has_small_inums(mp) && xfs_has_small_inums(new_mp)) {
2140 		mp->m_features |= XFS_FEAT_SMALL_INUMS;
2141 		mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount);
2142 	}
2143 
2144 	/*
2145 	 * Now that mp has been modified according to the remount options, we
2146 	 * do a final option validation with xfs_finish_flags() just like it is
2147 	 * just like it is done during mount. We cannot use
2148 	 * done during mount. We cannot use xfs_finish_flags() on new_mp as it
2149 	 * contains only the user given options.
2150 	 */
2151 	error = xfs_finish_flags(mp);
2152 	if (error)
2153 		return error;
2154 
2155 	/* ro -> rw */
2156 	if (xfs_is_readonly(mp) && !(flags & SB_RDONLY)) {
2157 		error = xfs_remount_rw(mp);
2158 		if (error)
2159 			return error;
2160 	}
2161 
2162 	/* rw -> ro */
2163 	if (!xfs_is_readonly(mp) && (flags & SB_RDONLY)) {
2164 		error = xfs_remount_ro(mp);
2165 		if (error)
2166 			return error;
2167 	}
2168 
2169 	return 0;
2170 }
2171 
2172 static void
xfs_fs_free(struct fs_context * fc)2173 xfs_fs_free(
2174 	struct fs_context	*fc)
2175 {
2176 	struct xfs_mount	*mp = fc->s_fs_info;
2177 
2178 	/*
2179 	 * mp is stored in the fs_context when it is initialized.
2180 	 * mp is transferred to the superblock on a successful mount,
2181 	 * but if an error occurs before the transfer we have to free
2182 	 * it here.
2183 	 */
2184 	if (mp)
2185 		xfs_mount_free(mp);
2186 }
2187 
2188 static const struct fs_context_operations xfs_context_ops = {
2189 	.parse_param = xfs_fs_parse_param,
2190 	.get_tree    = xfs_fs_get_tree,
2191 	.reconfigure = xfs_fs_reconfigure,
2192 	.free        = xfs_fs_free,
2193 };
2194 
2195 /*
2196  * WARNING: do not initialise any parameters in this function that depend on
2197  * mount option parsing having already been performed as this can be called from
2198  * fsopen() before any parameters have been set.
2199  */
2200 static int
xfs_init_fs_context(struct fs_context * fc)2201 xfs_init_fs_context(
2202 	struct fs_context	*fc)
2203 {
2204 	struct xfs_mount	*mp;
2205 	int			i;
2206 
2207 	mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL | __GFP_NOFAIL);
2208 	if (!mp)
2209 		return -ENOMEM;
2210 
2211 	spin_lock_init(&mp->m_sb_lock);
2212 	for (i = 0; i < XG_TYPE_MAX; i++)
2213 		xa_init(&mp->m_groups[i].xa);
2214 	mutex_init(&mp->m_growlock);
2215 	mutex_init(&mp->m_metafile_resv_lock);
2216 	INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
2217 	INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
2218 	mp->m_kobj.kobject.kset = xfs_kset;
2219 	/*
2220 	 * We don't create the finobt per-ag space reservation until after log
2221 	 * recovery, so we must set this to true so that an ifree transaction
2222 	 * started during log recovery will not depend on space reservations
2223 	 * for finobt expansion.
2224 	 */
2225 	mp->m_finobt_nores = true;
2226 
2227 	/*
2228 	 * These can be overridden by the mount option parsing.
2229 	 */
2230 	mp->m_logbufs = -1;
2231 	mp->m_logbsize = -1;
2232 	mp->m_allocsize_log = 16; /* 64k */
2233 
2234 	xfs_hooks_init(&mp->m_dir_update_hooks);
2235 
2236 	fc->s_fs_info = mp;
2237 	fc->ops = &xfs_context_ops;
2238 
2239 	return 0;
2240 }
2241 
2242 static void
xfs_kill_sb(struct super_block * sb)2243 xfs_kill_sb(
2244 	struct super_block		*sb)
2245 {
2246 	kill_block_super(sb);
2247 	xfs_mount_free(XFS_M(sb));
2248 }
2249 
2250 static struct file_system_type xfs_fs_type = {
2251 	.owner			= THIS_MODULE,
2252 	.name			= "xfs",
2253 	.init_fs_context	= xfs_init_fs_context,
2254 	.parameters		= xfs_fs_parameters,
2255 	.kill_sb		= xfs_kill_sb,
2256 	.fs_flags		= FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME |
2257 				  FS_LBS,
2258 };
2259 MODULE_ALIAS_FS("xfs");
2260 
2261 STATIC int __init
xfs_init_caches(void)2262 xfs_init_caches(void)
2263 {
2264 	int		error;
2265 
2266 	xfs_buf_cache = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0,
2267 					 SLAB_HWCACHE_ALIGN |
2268 					 SLAB_RECLAIM_ACCOUNT,
2269 					 NULL);
2270 	if (!xfs_buf_cache)
2271 		goto out;
2272 
2273 	xfs_log_ticket_cache = kmem_cache_create("xfs_log_ticket",
2274 						sizeof(struct xlog_ticket),
2275 						0, 0, NULL);
2276 	if (!xfs_log_ticket_cache)
2277 		goto out_destroy_buf_cache;
2278 
2279 	error = xfs_btree_init_cur_caches();
2280 	if (error)
2281 		goto out_destroy_log_ticket_cache;
2282 
2283 	error = rcbagbt_init_cur_cache();
2284 	if (error)
2285 		goto out_destroy_btree_cur_cache;
2286 
2287 	error = xfs_defer_init_item_caches();
2288 	if (error)
2289 		goto out_destroy_rcbagbt_cur_cache;
2290 
2291 	xfs_da_state_cache = kmem_cache_create("xfs_da_state",
2292 					      sizeof(struct xfs_da_state),
2293 					      0, 0, NULL);
2294 	if (!xfs_da_state_cache)
2295 		goto out_destroy_defer_item_cache;
2296 
2297 	xfs_ifork_cache = kmem_cache_create("xfs_ifork",
2298 					   sizeof(struct xfs_ifork),
2299 					   0, 0, NULL);
2300 	if (!xfs_ifork_cache)
2301 		goto out_destroy_da_state_cache;
2302 
2303 	xfs_trans_cache = kmem_cache_create("xfs_trans",
2304 					   sizeof(struct xfs_trans),
2305 					   0, 0, NULL);
2306 	if (!xfs_trans_cache)
2307 		goto out_destroy_ifork_cache;
2308 
2309 
2310 	/*
2311 	 * The size of the cache-allocated buf log item is the maximum
2312 	 * size possible under XFS.  This wastes a little bit of memory,
2313 	 * but it is much faster.
2314 	 */
2315 	xfs_buf_item_cache = kmem_cache_create("xfs_buf_item",
2316 					      sizeof(struct xfs_buf_log_item),
2317 					      0, 0, NULL);
2318 	if (!xfs_buf_item_cache)
2319 		goto out_destroy_trans_cache;
2320 
2321 	xfs_efd_cache = kmem_cache_create("xfs_efd_item",
2322 			xfs_efd_log_item_sizeof(XFS_EFD_MAX_FAST_EXTENTS),
2323 			0, 0, NULL);
2324 	if (!xfs_efd_cache)
2325 		goto out_destroy_buf_item_cache;
2326 
2327 	xfs_efi_cache = kmem_cache_create("xfs_efi_item",
2328 			xfs_efi_log_item_sizeof(XFS_EFI_MAX_FAST_EXTENTS),
2329 			0, 0, NULL);
2330 	if (!xfs_efi_cache)
2331 		goto out_destroy_efd_cache;
2332 
2333 	xfs_inode_cache = kmem_cache_create("xfs_inode",
2334 					   sizeof(struct xfs_inode), 0,
2335 					   (SLAB_HWCACHE_ALIGN |
2336 					    SLAB_RECLAIM_ACCOUNT |
2337 					    SLAB_ACCOUNT),
2338 					   xfs_fs_inode_init_once);
2339 	if (!xfs_inode_cache)
2340 		goto out_destroy_efi_cache;
2341 
2342 	xfs_ili_cache = kmem_cache_create("xfs_ili",
2343 					 sizeof(struct xfs_inode_log_item), 0,
2344 					 SLAB_RECLAIM_ACCOUNT,
2345 					 NULL);
2346 	if (!xfs_ili_cache)
2347 		goto out_destroy_inode_cache;
2348 
2349 	xfs_icreate_cache = kmem_cache_create("xfs_icr",
2350 					     sizeof(struct xfs_icreate_item),
2351 					     0, 0, NULL);
2352 	if (!xfs_icreate_cache)
2353 		goto out_destroy_ili_cache;
2354 
2355 	xfs_rud_cache = kmem_cache_create("xfs_rud_item",
2356 					 sizeof(struct xfs_rud_log_item),
2357 					 0, 0, NULL);
2358 	if (!xfs_rud_cache)
2359 		goto out_destroy_icreate_cache;
2360 
2361 	xfs_rui_cache = kmem_cache_create("xfs_rui_item",
2362 			xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS),
2363 			0, 0, NULL);
2364 	if (!xfs_rui_cache)
2365 		goto out_destroy_rud_cache;
2366 
2367 	xfs_cud_cache = kmem_cache_create("xfs_cud_item",
2368 					 sizeof(struct xfs_cud_log_item),
2369 					 0, 0, NULL);
2370 	if (!xfs_cud_cache)
2371 		goto out_destroy_rui_cache;
2372 
2373 	xfs_cui_cache = kmem_cache_create("xfs_cui_item",
2374 			xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS),
2375 			0, 0, NULL);
2376 	if (!xfs_cui_cache)
2377 		goto out_destroy_cud_cache;
2378 
2379 	xfs_bud_cache = kmem_cache_create("xfs_bud_item",
2380 					 sizeof(struct xfs_bud_log_item),
2381 					 0, 0, NULL);
2382 	if (!xfs_bud_cache)
2383 		goto out_destroy_cui_cache;
2384 
2385 	xfs_bui_cache = kmem_cache_create("xfs_bui_item",
2386 			xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS),
2387 			0, 0, NULL);
2388 	if (!xfs_bui_cache)
2389 		goto out_destroy_bud_cache;
2390 
2391 	xfs_attrd_cache = kmem_cache_create("xfs_attrd_item",
2392 					    sizeof(struct xfs_attrd_log_item),
2393 					    0, 0, NULL);
2394 	if (!xfs_attrd_cache)
2395 		goto out_destroy_bui_cache;
2396 
2397 	xfs_attri_cache = kmem_cache_create("xfs_attri_item",
2398 					    sizeof(struct xfs_attri_log_item),
2399 					    0, 0, NULL);
2400 	if (!xfs_attri_cache)
2401 		goto out_destroy_attrd_cache;
2402 
2403 	xfs_iunlink_cache = kmem_cache_create("xfs_iul_item",
2404 					     sizeof(struct xfs_iunlink_item),
2405 					     0, 0, NULL);
2406 	if (!xfs_iunlink_cache)
2407 		goto out_destroy_attri_cache;
2408 
2409 	xfs_xmd_cache = kmem_cache_create("xfs_xmd_item",
2410 					 sizeof(struct xfs_xmd_log_item),
2411 					 0, 0, NULL);
2412 	if (!xfs_xmd_cache)
2413 		goto out_destroy_iul_cache;
2414 
2415 	xfs_xmi_cache = kmem_cache_create("xfs_xmi_item",
2416 					 sizeof(struct xfs_xmi_log_item),
2417 					 0, 0, NULL);
2418 	if (!xfs_xmi_cache)
2419 		goto out_destroy_xmd_cache;
2420 
2421 	xfs_parent_args_cache = kmem_cache_create("xfs_parent_args",
2422 					     sizeof(struct xfs_parent_args),
2423 					     0, 0, NULL);
2424 	if (!xfs_parent_args_cache)
2425 		goto out_destroy_xmi_cache;
2426 
2427 	return 0;
2428 
2429  out_destroy_xmi_cache:
2430 	kmem_cache_destroy(xfs_xmi_cache);
2431  out_destroy_xmd_cache:
2432 	kmem_cache_destroy(xfs_xmd_cache);
2433  out_destroy_iul_cache:
2434 	kmem_cache_destroy(xfs_iunlink_cache);
2435  out_destroy_attri_cache:
2436 	kmem_cache_destroy(xfs_attri_cache);
2437  out_destroy_attrd_cache:
2438 	kmem_cache_destroy(xfs_attrd_cache);
2439  out_destroy_bui_cache:
2440 	kmem_cache_destroy(xfs_bui_cache);
2441  out_destroy_bud_cache:
2442 	kmem_cache_destroy(xfs_bud_cache);
2443  out_destroy_cui_cache:
2444 	kmem_cache_destroy(xfs_cui_cache);
2445  out_destroy_cud_cache:
2446 	kmem_cache_destroy(xfs_cud_cache);
2447  out_destroy_rui_cache:
2448 	kmem_cache_destroy(xfs_rui_cache);
2449  out_destroy_rud_cache:
2450 	kmem_cache_destroy(xfs_rud_cache);
2451  out_destroy_icreate_cache:
2452 	kmem_cache_destroy(xfs_icreate_cache);
2453  out_destroy_ili_cache:
2454 	kmem_cache_destroy(xfs_ili_cache);
2455  out_destroy_inode_cache:
2456 	kmem_cache_destroy(xfs_inode_cache);
2457  out_destroy_efi_cache:
2458 	kmem_cache_destroy(xfs_efi_cache);
2459  out_destroy_efd_cache:
2460 	kmem_cache_destroy(xfs_efd_cache);
2461  out_destroy_buf_item_cache:
2462 	kmem_cache_destroy(xfs_buf_item_cache);
2463  out_destroy_trans_cache:
2464 	kmem_cache_destroy(xfs_trans_cache);
2465  out_destroy_ifork_cache:
2466 	kmem_cache_destroy(xfs_ifork_cache);
2467  out_destroy_da_state_cache:
2468 	kmem_cache_destroy(xfs_da_state_cache);
2469  out_destroy_defer_item_cache:
2470 	xfs_defer_destroy_item_caches();
2471  out_destroy_rcbagbt_cur_cache:
2472 	rcbagbt_destroy_cur_cache();
2473  out_destroy_btree_cur_cache:
2474 	xfs_btree_destroy_cur_caches();
2475  out_destroy_log_ticket_cache:
2476 	kmem_cache_destroy(xfs_log_ticket_cache);
2477  out_destroy_buf_cache:
2478 	kmem_cache_destroy(xfs_buf_cache);
2479  out:
2480 	return -ENOMEM;
2481 }
2482 
2483 STATIC void
xfs_destroy_caches(void)2484 xfs_destroy_caches(void)
2485 {
2486 	/*
2487 	 * Make sure all delayed rcu free are flushed before we
2488 	 * destroy caches.
2489 	 */
2490 	rcu_barrier();
2491 	kmem_cache_destroy(xfs_parent_args_cache);
2492 	kmem_cache_destroy(xfs_xmd_cache);
2493 	kmem_cache_destroy(xfs_xmi_cache);
2494 	kmem_cache_destroy(xfs_iunlink_cache);
2495 	kmem_cache_destroy(xfs_attri_cache);
2496 	kmem_cache_destroy(xfs_attrd_cache);
2497 	kmem_cache_destroy(xfs_bui_cache);
2498 	kmem_cache_destroy(xfs_bud_cache);
2499 	kmem_cache_destroy(xfs_cui_cache);
2500 	kmem_cache_destroy(xfs_cud_cache);
2501 	kmem_cache_destroy(xfs_rui_cache);
2502 	kmem_cache_destroy(xfs_rud_cache);
2503 	kmem_cache_destroy(xfs_icreate_cache);
2504 	kmem_cache_destroy(xfs_ili_cache);
2505 	kmem_cache_destroy(xfs_inode_cache);
2506 	kmem_cache_destroy(xfs_efi_cache);
2507 	kmem_cache_destroy(xfs_efd_cache);
2508 	kmem_cache_destroy(xfs_buf_item_cache);
2509 	kmem_cache_destroy(xfs_trans_cache);
2510 	kmem_cache_destroy(xfs_ifork_cache);
2511 	kmem_cache_destroy(xfs_da_state_cache);
2512 	xfs_defer_destroy_item_caches();
2513 	rcbagbt_destroy_cur_cache();
2514 	xfs_btree_destroy_cur_caches();
2515 	kmem_cache_destroy(xfs_log_ticket_cache);
2516 	kmem_cache_destroy(xfs_buf_cache);
2517 }
2518 
2519 STATIC int __init
xfs_init_workqueues(void)2520 xfs_init_workqueues(void)
2521 {
2522 	/*
2523 	 * The allocation workqueue can be used in memory reclaim situations
2524 	 * (writepage path), and parallelism is only limited by the number of
2525 	 * AGs in all the filesystems mounted. Hence use the default large
2526 	 * max_active value for this workqueue.
2527 	 */
2528 	xfs_alloc_wq = alloc_workqueue("xfsalloc",
2529 			XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE), 0);
2530 	if (!xfs_alloc_wq)
2531 		return -ENOMEM;
2532 
2533 	xfs_discard_wq = alloc_workqueue("xfsdiscard", XFS_WQFLAGS(WQ_UNBOUND),
2534 			0);
2535 	if (!xfs_discard_wq)
2536 		goto out_free_alloc_wq;
2537 
2538 	return 0;
2539 out_free_alloc_wq:
2540 	destroy_workqueue(xfs_alloc_wq);
2541 	return -ENOMEM;
2542 }
2543 
2544 STATIC void
xfs_destroy_workqueues(void)2545 xfs_destroy_workqueues(void)
2546 {
2547 	destroy_workqueue(xfs_discard_wq);
2548 	destroy_workqueue(xfs_alloc_wq);
2549 }
2550 
2551 STATIC int __init
init_xfs_fs(void)2552 init_xfs_fs(void)
2553 {
2554 	int			error;
2555 
2556 	xfs_check_ondisk_structs();
2557 
2558 	error = xfs_dahash_test();
2559 	if (error)
2560 		return error;
2561 
2562 	printk(KERN_INFO XFS_VERSION_STRING " with "
2563 			 XFS_BUILD_OPTIONS " enabled\n");
2564 
2565 	xfs_dir_startup();
2566 
2567 	error = xfs_init_caches();
2568 	if (error)
2569 		goto out;
2570 
2571 	error = xfs_init_workqueues();
2572 	if (error)
2573 		goto out_destroy_caches;
2574 
2575 	error = xfs_mru_cache_init();
2576 	if (error)
2577 		goto out_destroy_wq;
2578 
2579 	error = xfs_init_procfs();
2580 	if (error)
2581 		goto out_mru_cache_uninit;
2582 
2583 	error = xfs_sysctl_register();
2584 	if (error)
2585 		goto out_cleanup_procfs;
2586 
2587 	xfs_debugfs = xfs_debugfs_mkdir("xfs", NULL);
2588 
2589 	xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj);
2590 	if (!xfs_kset) {
2591 		error = -ENOMEM;
2592 		goto out_debugfs_unregister;
2593 	}
2594 
2595 	xfsstats.xs_kobj.kobject.kset = xfs_kset;
2596 
2597 	xfsstats.xs_stats = alloc_percpu(struct xfsstats);
2598 	if (!xfsstats.xs_stats) {
2599 		error = -ENOMEM;
2600 		goto out_kset_unregister;
2601 	}
2602 
2603 	error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL,
2604 			       "stats");
2605 	if (error)
2606 		goto out_free_stats;
2607 
2608 	error = xchk_global_stats_setup(xfs_debugfs);
2609 	if (error)
2610 		goto out_remove_stats_kobj;
2611 
2612 #ifdef DEBUG
2613 	xfs_dbg_kobj.kobject.kset = xfs_kset;
2614 	error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug");
2615 	if (error)
2616 		goto out_remove_scrub_stats;
2617 #endif
2618 
2619 	error = xfs_qm_init();
2620 	if (error)
2621 		goto out_remove_dbg_kobj;
2622 
2623 	error = register_filesystem(&xfs_fs_type);
2624 	if (error)
2625 		goto out_qm_exit;
2626 	return 0;
2627 
2628  out_qm_exit:
2629 	xfs_qm_exit();
2630  out_remove_dbg_kobj:
2631 #ifdef DEBUG
2632 	xfs_sysfs_del(&xfs_dbg_kobj);
2633  out_remove_scrub_stats:
2634 #endif
2635 	xchk_global_stats_teardown();
2636  out_remove_stats_kobj:
2637 	xfs_sysfs_del(&xfsstats.xs_kobj);
2638  out_free_stats:
2639 	free_percpu(xfsstats.xs_stats);
2640  out_kset_unregister:
2641 	kset_unregister(xfs_kset);
2642  out_debugfs_unregister:
2643 	debugfs_remove(xfs_debugfs);
2644 	xfs_sysctl_unregister();
2645  out_cleanup_procfs:
2646 	xfs_cleanup_procfs();
2647  out_mru_cache_uninit:
2648 	xfs_mru_cache_uninit();
2649  out_destroy_wq:
2650 	xfs_destroy_workqueues();
2651  out_destroy_caches:
2652 	xfs_destroy_caches();
2653  out:
2654 	return error;
2655 }
2656 
2657 STATIC void __exit
exit_xfs_fs(void)2658 exit_xfs_fs(void)
2659 {
2660 	xfs_qm_exit();
2661 	unregister_filesystem(&xfs_fs_type);
2662 #ifdef DEBUG
2663 	xfs_sysfs_del(&xfs_dbg_kobj);
2664 #endif
2665 	xchk_global_stats_teardown();
2666 	xfs_sysfs_del(&xfsstats.xs_kobj);
2667 	free_percpu(xfsstats.xs_stats);
2668 	kset_unregister(xfs_kset);
2669 	debugfs_remove(xfs_debugfs);
2670 	xfs_sysctl_unregister();
2671 	xfs_cleanup_procfs();
2672 	xfs_mru_cache_uninit();
2673 	xfs_destroy_workqueues();
2674 	xfs_destroy_caches();
2675 	xfs_uuid_table_free();
2676 }
2677 
2678 module_init(init_xfs_fs);
2679 module_exit(exit_xfs_fs);
2680 
2681 MODULE_AUTHOR("Silicon Graphics, Inc.");
2682 MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
2683 MODULE_LICENSE("GPL");
2684