1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #include <sched.h>
4 #include <stdio.h>
5 #include <errno.h>
6 #include <pthread.h>
7 #include <string.h>
8 #include <sys/stat.h>
9 #include <sys/types.h>
10 #include <sys/mount.h>
11 #include <sys/wait.h>
12 #include <sys/vfs.h>
13 #include <sys/statvfs.h>
14 #include <sys/sysinfo.h>
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <fcntl.h>
18 #include <grp.h>
19 #include <stdbool.h>
20 #include <stdarg.h>
21 #include <linux/mount.h>
22 
23 #include "../filesystems/overlayfs/wrappers.h"
24 #include "../kselftest_harness.h"
25 
26 #ifndef CLONE_NEWNS
27 #define CLONE_NEWNS 0x00020000
28 #endif
29 
30 #ifndef CLONE_NEWUSER
31 #define CLONE_NEWUSER 0x10000000
32 #endif
33 
34 #ifndef MS_REC
35 #define MS_REC 16384
36 #endif
37 
38 #ifndef MS_RELATIME
39 #define MS_RELATIME (1 << 21)
40 #endif
41 
42 #ifndef MS_STRICTATIME
43 #define MS_STRICTATIME (1 << 24)
44 #endif
45 
46 #ifndef MOUNT_ATTR_RDONLY
47 #define MOUNT_ATTR_RDONLY 0x00000001
48 #endif
49 
50 #ifndef MOUNT_ATTR_NOSUID
51 #define MOUNT_ATTR_NOSUID 0x00000002
52 #endif
53 
54 #ifndef MOUNT_ATTR_NOEXEC
55 #define MOUNT_ATTR_NOEXEC 0x00000008
56 #endif
57 
58 #ifndef MOUNT_ATTR_NODIRATIME
59 #define MOUNT_ATTR_NODIRATIME 0x00000080
60 #endif
61 
62 #ifndef MOUNT_ATTR__ATIME
63 #define MOUNT_ATTR__ATIME 0x00000070
64 #endif
65 
66 #ifndef MOUNT_ATTR_RELATIME
67 #define MOUNT_ATTR_RELATIME 0x00000000
68 #endif
69 
70 #ifndef MOUNT_ATTR_NOATIME
71 #define MOUNT_ATTR_NOATIME 0x00000010
72 #endif
73 
74 #ifndef MOUNT_ATTR_STRICTATIME
75 #define MOUNT_ATTR_STRICTATIME 0x00000020
76 #endif
77 
78 #ifndef AT_RECURSIVE
79 #define AT_RECURSIVE 0x8000
80 #endif
81 
82 #ifndef MS_SHARED
83 #define MS_SHARED (1 << 20)
84 #endif
85 
86 #define DEFAULT_THREADS 4
87 #define ptr_to_int(p) ((int)((intptr_t)(p)))
88 #define int_to_ptr(u) ((void *)((intptr_t)(u)))
89 
90 #ifndef __NR_mount_setattr
91 	#if defined __alpha__
92 		#define __NR_mount_setattr 552
93 	#elif defined _MIPS_SIM
94 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
95 			#define __NR_mount_setattr (442 + 4000)
96 		#endif
97 		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
98 			#define __NR_mount_setattr (442 + 6000)
99 		#endif
100 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
101 			#define __NR_mount_setattr (442 + 5000)
102 		#endif
103 	#elif defined __ia64__
104 		#define __NR_mount_setattr (442 + 1024)
105 	#else
106 		#define __NR_mount_setattr 442
107 	#endif
108 #endif
109 
110 #ifndef __NR_open_tree
111 	#if defined __alpha__
112 		#define __NR_open_tree 538
113 	#elif defined _MIPS_SIM
114 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
115 			#define __NR_open_tree 4428
116 		#endif
117 		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
118 			#define __NR_open_tree 6428
119 		#endif
120 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
121 			#define __NR_open_tree 5428
122 		#endif
123 	#elif defined __ia64__
124 		#define __NR_open_tree (428 + 1024)
125 	#else
126 		#define __NR_open_tree 428
127 	#endif
128 #endif
129 
130 #ifndef __NR_move_mount
131 	#if defined __alpha__
132 		#define __NR_move_mount 539
133 	#elif defined _MIPS_SIM
134 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
135 			#define __NR_move_mount 4429
136 		#endif
137 		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
138 			#define __NR_move_mount 6429
139 		#endif
140 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
141 			#define __NR_move_mount 5429
142 		#endif
143 	#elif defined __ia64__
144 		#define __NR_move_mount (428 + 1024)
145 	#else
146 		#define __NR_move_mount 429
147 	#endif
148 #endif
149 
150 #ifndef MOUNT_ATTR_IDMAP
151 #define MOUNT_ATTR_IDMAP 0x00100000
152 #endif
153 
154 #ifndef MOUNT_ATTR_NOSYMFOLLOW
155 #define MOUNT_ATTR_NOSYMFOLLOW 0x00200000
156 #endif
157 
sys_mount_setattr(int dfd,const char * path,unsigned int flags,struct mount_attr * attr,size_t size)158 static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags,
159 				    struct mount_attr *attr, size_t size)
160 {
161 	return syscall(__NR_mount_setattr, dfd, path, flags, attr, size);
162 }
163 
164 #ifndef OPEN_TREE_CLONE
165 #define OPEN_TREE_CLONE 1
166 #endif
167 
168 #ifndef OPEN_TREE_CLOEXEC
169 #define OPEN_TREE_CLOEXEC O_CLOEXEC
170 #endif
171 
172 #ifndef AT_RECURSIVE
173 #define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
174 #endif
175 
sys_open_tree(int dfd,const char * filename,unsigned int flags)176 static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
177 {
178 	return syscall(__NR_open_tree, dfd, filename, flags);
179 }
180 
write_nointr(int fd,const void * buf,size_t count)181 static ssize_t write_nointr(int fd, const void *buf, size_t count)
182 {
183 	ssize_t ret;
184 
185 	do {
186 		ret = write(fd, buf, count);
187 	} while (ret < 0 && errno == EINTR);
188 
189 	return ret;
190 }
191 
write_file(const char * path,const void * buf,size_t count)192 static int write_file(const char *path, const void *buf, size_t count)
193 {
194 	int fd;
195 	ssize_t ret;
196 
197 	fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
198 	if (fd < 0)
199 		return -1;
200 
201 	ret = write_nointr(fd, buf, count);
202 	close(fd);
203 	if (ret < 0 || (size_t)ret != count)
204 		return -1;
205 
206 	return 0;
207 }
208 
create_and_enter_userns(void)209 static int create_and_enter_userns(void)
210 {
211 	uid_t uid;
212 	gid_t gid;
213 	char map[100];
214 
215 	uid = getuid();
216 	gid = getgid();
217 
218 	if (unshare(CLONE_NEWUSER))
219 		return -1;
220 
221 	if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
222 	    errno != ENOENT)
223 		return -1;
224 
225 	snprintf(map, sizeof(map), "0 %d 1", uid);
226 	if (write_file("/proc/self/uid_map", map, strlen(map)))
227 		return -1;
228 
229 
230 	snprintf(map, sizeof(map), "0 %d 1", gid);
231 	if (write_file("/proc/self/gid_map", map, strlen(map)))
232 		return -1;
233 
234 	if (setgid(0))
235 		return -1;
236 
237 	if (setuid(0))
238 		return -1;
239 
240 	return 0;
241 }
242 
prepare_unpriv_mountns(void)243 static int prepare_unpriv_mountns(void)
244 {
245 	if (create_and_enter_userns())
246 		return -1;
247 
248 	if (unshare(CLONE_NEWNS))
249 		return -1;
250 
251 	if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
252 		return -1;
253 
254 	return 0;
255 }
256 
257 #ifndef ST_NOSYMFOLLOW
258 #define ST_NOSYMFOLLOW 0x2000 /* do not follow symlinks */
259 #endif
260 
read_mnt_flags(const char * path)261 static int read_mnt_flags(const char *path)
262 {
263 	int ret;
264 	struct statvfs stat;
265 	unsigned int mnt_flags;
266 
267 	ret = statvfs(path, &stat);
268 	if (ret != 0)
269 		return -EINVAL;
270 
271 	if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC |
272 			    ST_NOATIME | ST_NODIRATIME | ST_RELATIME |
273 			    ST_SYNCHRONOUS | ST_MANDLOCK | ST_NOSYMFOLLOW))
274 		return -EINVAL;
275 
276 	mnt_flags = 0;
277 	if (stat.f_flag & ST_RDONLY)
278 		mnt_flags |= MS_RDONLY;
279 	if (stat.f_flag & ST_NOSUID)
280 		mnt_flags |= MS_NOSUID;
281 	if (stat.f_flag & ST_NODEV)
282 		mnt_flags |= MS_NODEV;
283 	if (stat.f_flag & ST_NOEXEC)
284 		mnt_flags |= MS_NOEXEC;
285 	if (stat.f_flag & ST_NOATIME)
286 		mnt_flags |= MS_NOATIME;
287 	if (stat.f_flag & ST_NODIRATIME)
288 		mnt_flags |= MS_NODIRATIME;
289 	if (stat.f_flag & ST_RELATIME)
290 		mnt_flags |= MS_RELATIME;
291 	if (stat.f_flag & ST_SYNCHRONOUS)
292 		mnt_flags |= MS_SYNCHRONOUS;
293 	if (stat.f_flag & ST_MANDLOCK)
294 		mnt_flags |= ST_MANDLOCK;
295 	if (stat.f_flag & ST_NOSYMFOLLOW)
296 		mnt_flags |= ST_NOSYMFOLLOW;
297 
298 	return mnt_flags;
299 }
300 
get_field(char * src,int nfields)301 static char *get_field(char *src, int nfields)
302 {
303 	int i;
304 	char *p = src;
305 
306 	for (i = 0; i < nfields; i++) {
307 		while (*p && *p != ' ' && *p != '\t')
308 			p++;
309 
310 		if (!*p)
311 			break;
312 
313 		p++;
314 	}
315 
316 	return p;
317 }
318 
null_endofword(char * word)319 static void null_endofword(char *word)
320 {
321 	while (*word && *word != ' ' && *word != '\t')
322 		word++;
323 	*word = '\0';
324 }
325 
is_shared_mount(const char * path)326 static bool is_shared_mount(const char *path)
327 {
328 	size_t len = 0;
329 	char *line = NULL;
330 	FILE *f = NULL;
331 
332 	f = fopen("/proc/self/mountinfo", "re");
333 	if (!f)
334 		return false;
335 
336 	while (getline(&line, &len, f) != -1) {
337 		char *opts, *target;
338 
339 		target = get_field(line, 4);
340 		if (!target)
341 			continue;
342 
343 		opts = get_field(target, 2);
344 		if (!opts)
345 			continue;
346 
347 		null_endofword(target);
348 
349 		if (strcmp(target, path) != 0)
350 			continue;
351 
352 		null_endofword(opts);
353 		if (strstr(opts, "shared:"))
354 			return true;
355 	}
356 
357 	free(line);
358 	fclose(f);
359 
360 	return false;
361 }
362 
mount_setattr_thread(void * data)363 static void *mount_setattr_thread(void *data)
364 {
365 	struct mount_attr attr = {
366 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID,
367 		.attr_clr	= 0,
368 		.propagation	= MS_SHARED,
369 	};
370 
371 	if (sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)))
372 		pthread_exit(int_to_ptr(-1));
373 
374 	pthread_exit(int_to_ptr(0));
375 }
376 
377 /* Attempt to de-conflict with the selftests tree. */
378 #ifndef SKIP
379 #define SKIP(s, ...)	XFAIL(s, ##__VA_ARGS__)
380 #endif
381 
mount_setattr_supported(void)382 static bool mount_setattr_supported(void)
383 {
384 	int ret;
385 
386 	ret = sys_mount_setattr(-EBADF, "", AT_EMPTY_PATH, NULL, 0);
387 	if (ret < 0 && errno == ENOSYS)
388 		return false;
389 
390 	return true;
391 }
392 
FIXTURE(mount_setattr)393 FIXTURE(mount_setattr) {
394 };
395 
396 #define NOSYMFOLLOW_TARGET "/mnt/A/AA/data"
397 #define NOSYMFOLLOW_SYMLINK "/mnt/A/AA/symlink"
398 
FIXTURE_SETUP(mount_setattr)399 FIXTURE_SETUP(mount_setattr)
400 {
401 	int fd = -EBADF;
402 
403 	if (!mount_setattr_supported())
404 		SKIP(return, "mount_setattr syscall not supported");
405 
406 	ASSERT_EQ(prepare_unpriv_mountns(), 0);
407 
408 	(void)umount2("/mnt", MNT_DETACH);
409 	(void)umount2("/tmp", MNT_DETACH);
410 
411 	ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
412 			"size=100000,mode=700"), 0);
413 
414 	ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
415 
416 	ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
417 			"size=100000,mode=700"), 0);
418 
419 	ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
420 
421 	ASSERT_EQ(mkdir("/tmp/target1", 0777), 0);
422 
423 	ASSERT_EQ(mkdir("/tmp/target2", 0777), 0);
424 
425 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
426 			"size=100000,mode=700"), 0);
427 
428 	ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
429 			"size=100000,mode=700"), 0);
430 
431 	ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
432 
433 	ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
434 			"size=100000,mode=700"), 0);
435 
436 	ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
437 
438 	ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
439 
440 	ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
441 
442 	ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
443 			MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
444 
445 	ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
446 
447 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
448 			MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
449 
450 	fd = creat(NOSYMFOLLOW_TARGET, O_RDWR | O_CLOEXEC);
451 	ASSERT_GT(fd, 0);
452 	ASSERT_EQ(symlink(NOSYMFOLLOW_TARGET, NOSYMFOLLOW_SYMLINK), 0);
453 	ASSERT_EQ(close(fd), 0);
454 }
455 
FIXTURE_TEARDOWN(mount_setattr)456 FIXTURE_TEARDOWN(mount_setattr)
457 {
458 	if (!mount_setattr_supported())
459 		SKIP(return, "mount_setattr syscall not supported");
460 
461 	(void)umount2("/mnt/A", MNT_DETACH);
462 	(void)umount2("/tmp", MNT_DETACH);
463 }
464 
TEST_F(mount_setattr,invalid_attributes)465 TEST_F(mount_setattr, invalid_attributes)
466 {
467 	struct mount_attr invalid_attr = {
468 		.attr_set = (1U << 31),
469 	};
470 
471 	if (!mount_setattr_supported())
472 		SKIP(return, "mount_setattr syscall not supported");
473 
474 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
475 				    sizeof(invalid_attr)), 0);
476 
477 	invalid_attr.attr_set	= 0;
478 	invalid_attr.attr_clr	= (1U << 31);
479 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
480 				    sizeof(invalid_attr)), 0);
481 
482 	invalid_attr.attr_clr		= 0;
483 	invalid_attr.propagation	= (1U << 31);
484 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
485 				    sizeof(invalid_attr)), 0);
486 
487 	invalid_attr.attr_set		= (1U << 31);
488 	invalid_attr.attr_clr		= (1U << 31);
489 	invalid_attr.propagation	= (1U << 31);
490 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
491 				    sizeof(invalid_attr)), 0);
492 
493 	ASSERT_NE(sys_mount_setattr(-1, "mnt/A", AT_RECURSIVE, &invalid_attr,
494 				    sizeof(invalid_attr)), 0);
495 }
496 
TEST_F(mount_setattr,extensibility)497 TEST_F(mount_setattr, extensibility)
498 {
499 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
500 	char *s = "dummy";
501 	struct mount_attr invalid_attr = {};
502 	struct mount_attr_large {
503 		struct mount_attr attr1;
504 		struct mount_attr attr2;
505 		struct mount_attr attr3;
506 	} large_attr = {};
507 
508 	if (!mount_setattr_supported())
509 		SKIP(return, "mount_setattr syscall not supported");
510 
511 	old_flags = read_mnt_flags("/mnt/A");
512 	ASSERT_GT(old_flags, 0);
513 
514 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, NULL,
515 				    sizeof(invalid_attr)), 0);
516 	ASSERT_EQ(errno, EFAULT);
517 
518 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, (void *)s,
519 				    sizeof(invalid_attr)), 0);
520 	ASSERT_EQ(errno, EINVAL);
521 
522 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 0), 0);
523 	ASSERT_EQ(errno, EINVAL);
524 
525 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
526 				    sizeof(invalid_attr) / 2), 0);
527 	ASSERT_EQ(errno, EINVAL);
528 
529 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
530 				    sizeof(invalid_attr) / 2), 0);
531 	ASSERT_EQ(errno, EINVAL);
532 
533 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
534 				    (void *)&large_attr, sizeof(large_attr)), 0);
535 
536 	large_attr.attr3.attr_set = MOUNT_ATTR_RDONLY;
537 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
538 				    (void *)&large_attr, sizeof(large_attr)), 0);
539 
540 	large_attr.attr3.attr_set = 0;
541 	large_attr.attr1.attr_set = MOUNT_ATTR_RDONLY;
542 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
543 				    (void *)&large_attr, sizeof(large_attr)), 0);
544 
545 	expected_flags = old_flags;
546 	expected_flags |= MS_RDONLY;
547 
548 	new_flags = read_mnt_flags("/mnt/A");
549 	ASSERT_EQ(new_flags, expected_flags);
550 
551 	new_flags = read_mnt_flags("/mnt/A/AA");
552 	ASSERT_EQ(new_flags, expected_flags);
553 
554 	new_flags = read_mnt_flags("/mnt/A/AA/B");
555 	ASSERT_EQ(new_flags, expected_flags);
556 
557 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
558 	ASSERT_EQ(new_flags, expected_flags);
559 }
560 
TEST_F(mount_setattr,basic)561 TEST_F(mount_setattr, basic)
562 {
563 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
564 	struct mount_attr attr = {
565 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
566 		.attr_clr	= MOUNT_ATTR__ATIME,
567 	};
568 
569 	if (!mount_setattr_supported())
570 		SKIP(return, "mount_setattr syscall not supported");
571 
572 	old_flags = read_mnt_flags("/mnt/A");
573 	ASSERT_GT(old_flags, 0);
574 
575 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", 0, &attr, sizeof(attr)), 0);
576 
577 	expected_flags = old_flags;
578 	expected_flags |= MS_RDONLY;
579 	expected_flags |= MS_NOEXEC;
580 	expected_flags &= ~MS_NOATIME;
581 	expected_flags |= MS_RELATIME;
582 
583 	new_flags = read_mnt_flags("/mnt/A");
584 	ASSERT_EQ(new_flags, expected_flags);
585 
586 	new_flags = read_mnt_flags("/mnt/A/AA");
587 	ASSERT_EQ(new_flags, old_flags);
588 
589 	new_flags = read_mnt_flags("/mnt/A/AA/B");
590 	ASSERT_EQ(new_flags, old_flags);
591 
592 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
593 	ASSERT_EQ(new_flags, old_flags);
594 }
595 
TEST_F(mount_setattr,basic_recursive)596 TEST_F(mount_setattr, basic_recursive)
597 {
598 	int fd;
599 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
600 	struct mount_attr attr = {
601 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
602 		.attr_clr	= MOUNT_ATTR__ATIME,
603 	};
604 
605 	if (!mount_setattr_supported())
606 		SKIP(return, "mount_setattr syscall not supported");
607 
608 	old_flags = read_mnt_flags("/mnt/A");
609 	ASSERT_GT(old_flags, 0);
610 
611 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
612 
613 	expected_flags = old_flags;
614 	expected_flags |= MS_RDONLY;
615 	expected_flags |= MS_NOEXEC;
616 	expected_flags &= ~MS_NOATIME;
617 	expected_flags |= MS_RELATIME;
618 
619 	new_flags = read_mnt_flags("/mnt/A");
620 	ASSERT_EQ(new_flags, expected_flags);
621 
622 	new_flags = read_mnt_flags("/mnt/A/AA");
623 	ASSERT_EQ(new_flags, expected_flags);
624 
625 	new_flags = read_mnt_flags("/mnt/A/AA/B");
626 	ASSERT_EQ(new_flags, expected_flags);
627 
628 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
629 	ASSERT_EQ(new_flags, expected_flags);
630 
631 	memset(&attr, 0, sizeof(attr));
632 	attr.attr_clr = MOUNT_ATTR_RDONLY;
633 	attr.propagation = MS_SHARED;
634 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
635 
636 	expected_flags &= ~MS_RDONLY;
637 	new_flags = read_mnt_flags("/mnt/A");
638 	ASSERT_EQ(new_flags, expected_flags);
639 
640 	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
641 
642 	new_flags = read_mnt_flags("/mnt/A/AA");
643 	ASSERT_EQ(new_flags, expected_flags);
644 
645 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
646 
647 	new_flags = read_mnt_flags("/mnt/A/AA/B");
648 	ASSERT_EQ(new_flags, expected_flags);
649 
650 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
651 
652 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
653 	ASSERT_EQ(new_flags, expected_flags);
654 
655 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
656 
657 	fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
658 	ASSERT_GE(fd, 0);
659 
660 	/*
661 	 * We're holding a fd open for writing so this needs to fail somewhere
662 	 * in the middle and the mount options need to be unchanged.
663 	 */
664 	attr.attr_set = MOUNT_ATTR_RDONLY;
665 	ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
666 
667 	new_flags = read_mnt_flags("/mnt/A");
668 	ASSERT_EQ(new_flags, expected_flags);
669 
670 	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
671 
672 	new_flags = read_mnt_flags("/mnt/A/AA");
673 	ASSERT_EQ(new_flags, expected_flags);
674 
675 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
676 
677 	new_flags = read_mnt_flags("/mnt/A/AA/B");
678 	ASSERT_EQ(new_flags, expected_flags);
679 
680 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
681 
682 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
683 	ASSERT_EQ(new_flags, expected_flags);
684 
685 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
686 
687 	EXPECT_EQ(close(fd), 0);
688 }
689 
TEST_F(mount_setattr,mount_has_writers)690 TEST_F(mount_setattr, mount_has_writers)
691 {
692 	int fd, dfd;
693 	unsigned int old_flags = 0, new_flags = 0;
694 	struct mount_attr attr = {
695 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
696 		.attr_clr	= MOUNT_ATTR__ATIME,
697 		.propagation	= MS_SHARED,
698 	};
699 
700 	if (!mount_setattr_supported())
701 		SKIP(return, "mount_setattr syscall not supported");
702 
703 	old_flags = read_mnt_flags("/mnt/A");
704 	ASSERT_GT(old_flags, 0);
705 
706 	fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
707 	ASSERT_GE(fd, 0);
708 
709 	/*
710 	 * We're holding a fd open to a mount somwhere in the middle so this
711 	 * needs to fail somewhere in the middle. After this the mount options
712 	 * need to be unchanged.
713 	 */
714 	ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
715 
716 	new_flags = read_mnt_flags("/mnt/A");
717 	ASSERT_EQ(new_flags, old_flags);
718 
719 	ASSERT_EQ(is_shared_mount("/mnt/A"), false);
720 
721 	new_flags = read_mnt_flags("/mnt/A/AA");
722 	ASSERT_EQ(new_flags, old_flags);
723 
724 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), false);
725 
726 	new_flags = read_mnt_flags("/mnt/A/AA/B");
727 	ASSERT_EQ(new_flags, old_flags);
728 
729 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), false);
730 
731 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
732 	ASSERT_EQ(new_flags, old_flags);
733 
734 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), false);
735 
736 	dfd = open("/mnt/A/AA/B", O_DIRECTORY | O_CLOEXEC);
737 	ASSERT_GE(dfd, 0);
738 	EXPECT_EQ(fsync(dfd), 0);
739 	EXPECT_EQ(close(dfd), 0);
740 
741 	EXPECT_EQ(fsync(fd), 0);
742 	EXPECT_EQ(close(fd), 0);
743 
744 	/* All writers are gone so this should succeed. */
745 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
746 }
747 
TEST_F(mount_setattr,mixed_mount_options)748 TEST_F(mount_setattr, mixed_mount_options)
749 {
750 	unsigned int old_flags1 = 0, old_flags2 = 0, new_flags = 0, expected_flags = 0;
751 	struct mount_attr attr = {
752 		.attr_clr = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME,
753 		.attr_set = MOUNT_ATTR_RELATIME,
754 	};
755 
756 	if (!mount_setattr_supported())
757 		SKIP(return, "mount_setattr syscall not supported");
758 
759 	old_flags1 = read_mnt_flags("/mnt/B");
760 	ASSERT_GT(old_flags1, 0);
761 
762 	old_flags2 = read_mnt_flags("/mnt/B/BB");
763 	ASSERT_GT(old_flags2, 0);
764 
765 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/B", AT_RECURSIVE, &attr, sizeof(attr)), 0);
766 
767 	expected_flags = old_flags2;
768 	expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
769 	expected_flags |= MS_RELATIME;
770 
771 	new_flags = read_mnt_flags("/mnt/B");
772 	ASSERT_EQ(new_flags, expected_flags);
773 
774 	expected_flags = old_flags2;
775 	expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
776 	expected_flags |= MS_RELATIME;
777 
778 	new_flags = read_mnt_flags("/mnt/B/BB");
779 	ASSERT_EQ(new_flags, expected_flags);
780 }
781 
TEST_F(mount_setattr,time_changes)782 TEST_F(mount_setattr, time_changes)
783 {
784 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
785 	struct mount_attr attr = {
786 		.attr_set	= MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME,
787 	};
788 
789 	if (!mount_setattr_supported())
790 		SKIP(return, "mount_setattr syscall not supported");
791 
792 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
793 
794 	attr.attr_set = MOUNT_ATTR_STRICTATIME;
795 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
796 
797 	attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
798 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
799 
800 	attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
801 	attr.attr_clr = MOUNT_ATTR__ATIME;
802 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
803 
804 	attr.attr_set = 0;
805 	attr.attr_clr = MOUNT_ATTR_STRICTATIME;
806 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
807 
808 	attr.attr_clr = MOUNT_ATTR_NOATIME;
809 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
810 
811 	old_flags = read_mnt_flags("/mnt/A");
812 	ASSERT_GT(old_flags, 0);
813 
814 	attr.attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME;
815 	attr.attr_clr = MOUNT_ATTR__ATIME;
816 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
817 
818 	expected_flags = old_flags;
819 	expected_flags |= MS_NOATIME;
820 	expected_flags |= MS_NODIRATIME;
821 
822 	new_flags = read_mnt_flags("/mnt/A");
823 	ASSERT_EQ(new_flags, expected_flags);
824 
825 	new_flags = read_mnt_flags("/mnt/A/AA");
826 	ASSERT_EQ(new_flags, expected_flags);
827 
828 	new_flags = read_mnt_flags("/mnt/A/AA/B");
829 	ASSERT_EQ(new_flags, expected_flags);
830 
831 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
832 	ASSERT_EQ(new_flags, expected_flags);
833 
834 	memset(&attr, 0, sizeof(attr));
835 	attr.attr_set &= ~MOUNT_ATTR_NOATIME;
836 	attr.attr_set |= MOUNT_ATTR_RELATIME;
837 	attr.attr_clr |= MOUNT_ATTR__ATIME;
838 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
839 
840 	expected_flags &= ~MS_NOATIME;
841 	expected_flags |= MS_RELATIME;
842 
843 	new_flags = read_mnt_flags("/mnt/A");
844 	ASSERT_EQ(new_flags, expected_flags);
845 
846 	new_flags = read_mnt_flags("/mnt/A/AA");
847 	ASSERT_EQ(new_flags, expected_flags);
848 
849 	new_flags = read_mnt_flags("/mnt/A/AA/B");
850 	ASSERT_EQ(new_flags, expected_flags);
851 
852 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
853 	ASSERT_EQ(new_flags, expected_flags);
854 
855 	memset(&attr, 0, sizeof(attr));
856 	attr.attr_set &= ~MOUNT_ATTR_RELATIME;
857 	attr.attr_set |= MOUNT_ATTR_STRICTATIME;
858 	attr.attr_clr |= MOUNT_ATTR__ATIME;
859 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
860 
861 	expected_flags &= ~MS_RELATIME;
862 
863 	new_flags = read_mnt_flags("/mnt/A");
864 	ASSERT_EQ(new_flags, expected_flags);
865 
866 	new_flags = read_mnt_flags("/mnt/A/AA");
867 	ASSERT_EQ(new_flags, expected_flags);
868 
869 	new_flags = read_mnt_flags("/mnt/A/AA/B");
870 	ASSERT_EQ(new_flags, expected_flags);
871 
872 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
873 	ASSERT_EQ(new_flags, expected_flags);
874 
875 	memset(&attr, 0, sizeof(attr));
876 	attr.attr_set &= ~MOUNT_ATTR_STRICTATIME;
877 	attr.attr_set |= MOUNT_ATTR_NOATIME;
878 	attr.attr_clr |= MOUNT_ATTR__ATIME;
879 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
880 
881 	expected_flags |= MS_NOATIME;
882 	new_flags = read_mnt_flags("/mnt/A");
883 	ASSERT_EQ(new_flags, expected_flags);
884 
885 	new_flags = read_mnt_flags("/mnt/A/AA");
886 	ASSERT_EQ(new_flags, expected_flags);
887 
888 	new_flags = read_mnt_flags("/mnt/A/AA/B");
889 	ASSERT_EQ(new_flags, expected_flags);
890 
891 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
892 	ASSERT_EQ(new_flags, expected_flags);
893 
894 	memset(&attr, 0, sizeof(attr));
895 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
896 
897 	new_flags = read_mnt_flags("/mnt/A");
898 	ASSERT_EQ(new_flags, expected_flags);
899 
900 	new_flags = read_mnt_flags("/mnt/A/AA");
901 	ASSERT_EQ(new_flags, expected_flags);
902 
903 	new_flags = read_mnt_flags("/mnt/A/AA/B");
904 	ASSERT_EQ(new_flags, expected_flags);
905 
906 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
907 	ASSERT_EQ(new_flags, expected_flags);
908 
909 	memset(&attr, 0, sizeof(attr));
910 	attr.attr_clr = MOUNT_ATTR_NODIRATIME;
911 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
912 
913 	expected_flags &= ~MS_NODIRATIME;
914 
915 	new_flags = read_mnt_flags("/mnt/A");
916 	ASSERT_EQ(new_flags, expected_flags);
917 
918 	new_flags = read_mnt_flags("/mnt/A/AA");
919 	ASSERT_EQ(new_flags, expected_flags);
920 
921 	new_flags = read_mnt_flags("/mnt/A/AA/B");
922 	ASSERT_EQ(new_flags, expected_flags);
923 
924 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
925 	ASSERT_EQ(new_flags, expected_flags);
926 }
927 
TEST_F(mount_setattr,multi_threaded)928 TEST_F(mount_setattr, multi_threaded)
929 {
930 	int i, j, nthreads, ret = 0;
931 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
932 	pthread_attr_t pattr;
933 	pthread_t threads[DEFAULT_THREADS];
934 
935 	if (!mount_setattr_supported())
936 		SKIP(return, "mount_setattr syscall not supported");
937 
938 	old_flags = read_mnt_flags("/mnt/A");
939 	ASSERT_GT(old_flags, 0);
940 
941 	/* Try to change mount options from multiple threads. */
942 	nthreads = get_nprocs_conf();
943 	if (nthreads > DEFAULT_THREADS)
944 		nthreads = DEFAULT_THREADS;
945 
946 	pthread_attr_init(&pattr);
947 	for (i = 0; i < nthreads; i++)
948 		ASSERT_EQ(pthread_create(&threads[i], &pattr, mount_setattr_thread, NULL), 0);
949 
950 	for (j = 0; j < i; j++) {
951 		void *retptr = NULL;
952 
953 		EXPECT_EQ(pthread_join(threads[j], &retptr), 0);
954 
955 		ret += ptr_to_int(retptr);
956 		EXPECT_EQ(ret, 0);
957 	}
958 	pthread_attr_destroy(&pattr);
959 
960 	ASSERT_EQ(ret, 0);
961 
962 	expected_flags = old_flags;
963 	expected_flags |= MS_RDONLY;
964 	expected_flags |= MS_NOSUID;
965 	new_flags = read_mnt_flags("/mnt/A");
966 	ASSERT_EQ(new_flags, expected_flags);
967 
968 	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
969 
970 	new_flags = read_mnt_flags("/mnt/A/AA");
971 	ASSERT_EQ(new_flags, expected_flags);
972 
973 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
974 
975 	new_flags = read_mnt_flags("/mnt/A/AA/B");
976 	ASSERT_EQ(new_flags, expected_flags);
977 
978 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
979 
980 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
981 	ASSERT_EQ(new_flags, expected_flags);
982 
983 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
984 }
985 
TEST_F(mount_setattr,wrong_user_namespace)986 TEST_F(mount_setattr, wrong_user_namespace)
987 {
988 	int ret;
989 	struct mount_attr attr = {
990 		.attr_set = MOUNT_ATTR_RDONLY,
991 	};
992 
993 	if (!mount_setattr_supported())
994 		SKIP(return, "mount_setattr syscall not supported");
995 
996 	EXPECT_EQ(create_and_enter_userns(), 0);
997 	ret = sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr));
998 	ASSERT_LT(ret, 0);
999 	ASSERT_EQ(errno, EPERM);
1000 }
1001 
TEST_F(mount_setattr,wrong_mount_namespace)1002 TEST_F(mount_setattr, wrong_mount_namespace)
1003 {
1004 	int fd, ret;
1005 	struct mount_attr attr = {
1006 		.attr_set = MOUNT_ATTR_RDONLY,
1007 	};
1008 
1009 	if (!mount_setattr_supported())
1010 		SKIP(return, "mount_setattr syscall not supported");
1011 
1012 	fd = open("/mnt/A", O_DIRECTORY | O_CLOEXEC);
1013 	ASSERT_GE(fd, 0);
1014 
1015 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1016 
1017 	ret = sys_mount_setattr(fd, "", AT_EMPTY_PATH | AT_RECURSIVE, &attr, sizeof(attr));
1018 	ASSERT_LT(ret, 0);
1019 	ASSERT_EQ(errno, EINVAL);
1020 }
1021 
FIXTURE(mount_setattr_idmapped)1022 FIXTURE(mount_setattr_idmapped) {
1023 };
1024 
FIXTURE_SETUP(mount_setattr_idmapped)1025 FIXTURE_SETUP(mount_setattr_idmapped)
1026 {
1027 	int img_fd = -EBADF;
1028 
1029 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1030 
1031 	ASSERT_EQ(mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0), 0);
1032 
1033 	(void)umount2("/mnt", MNT_DETACH);
1034 	(void)umount2("/tmp", MNT_DETACH);
1035 
1036 	ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
1037 			"size=100000,mode=700"), 0);
1038 
1039 	ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
1040 	ASSERT_EQ(mknodat(-EBADF, "/tmp/B/b", S_IFREG | 0644, 0), 0);
1041 	ASSERT_EQ(chown("/tmp/B/b", 0, 0), 0);
1042 
1043 	ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
1044 			"size=100000,mode=700"), 0);
1045 
1046 	ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
1047 	ASSERT_EQ(mknodat(-EBADF, "/tmp/B/BB/b", S_IFREG | 0644, 0), 0);
1048 	ASSERT_EQ(chown("/tmp/B/BB/b", 0, 0), 0);
1049 
1050 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
1051 			"size=100000,mode=700"), 0);
1052 
1053 	ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
1054 			"size=2m,mode=700"), 0);
1055 
1056 	ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
1057 
1058 	ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
1059 			"size=100000,mode=700"), 0);
1060 
1061 	ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
1062 
1063 	ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
1064 
1065 	ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
1066 
1067 	ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
1068 			MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
1069 
1070 	ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
1071 
1072 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
1073 			MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
1074 
1075 	ASSERT_EQ(mkdir("/mnt/C", 0777), 0);
1076 	ASSERT_EQ(mkdir("/mnt/D", 0777), 0);
1077 	img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600);
1078 	ASSERT_GE(img_fd, 0);
1079 	ASSERT_EQ(ftruncate(img_fd, 1024 * 2048), 0);
1080 	ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0);
1081 	ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0);
1082 	ASSERT_EQ(close(img_fd), 0);
1083 }
1084 
FIXTURE_TEARDOWN(mount_setattr_idmapped)1085 FIXTURE_TEARDOWN(mount_setattr_idmapped)
1086 {
1087 	(void)umount2("/mnt/A", MNT_DETACH);
1088 	(void)umount2("/tmp", MNT_DETACH);
1089 }
1090 
1091 /**
1092  * Validate that negative fd values are rejected.
1093  */
TEST_F(mount_setattr_idmapped,invalid_fd_negative)1094 TEST_F(mount_setattr_idmapped, invalid_fd_negative)
1095 {
1096 	struct mount_attr attr = {
1097 		.attr_set	= MOUNT_ATTR_IDMAP,
1098 		.userns_fd	= -EBADF,
1099 	};
1100 
1101 	if (!mount_setattr_supported())
1102 		SKIP(return, "mount_setattr syscall not supported");
1103 
1104 	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1105 		TH_LOG("failure: created idmapped mount with negative fd");
1106 	}
1107 }
1108 
1109 /**
1110  * Validate that excessively large fd values are rejected.
1111  */
TEST_F(mount_setattr_idmapped,invalid_fd_large)1112 TEST_F(mount_setattr_idmapped, invalid_fd_large)
1113 {
1114 	struct mount_attr attr = {
1115 		.attr_set	= MOUNT_ATTR_IDMAP,
1116 		.userns_fd	= INT64_MAX,
1117 	};
1118 
1119 	if (!mount_setattr_supported())
1120 		SKIP(return, "mount_setattr syscall not supported");
1121 
1122 	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1123 		TH_LOG("failure: created idmapped mount with too large fd value");
1124 	}
1125 }
1126 
1127 /**
1128  * Validate that closed fd values are rejected.
1129  */
TEST_F(mount_setattr_idmapped,invalid_fd_closed)1130 TEST_F(mount_setattr_idmapped, invalid_fd_closed)
1131 {
1132 	int fd;
1133 	struct mount_attr attr = {
1134 		.attr_set = MOUNT_ATTR_IDMAP,
1135 	};
1136 
1137 	if (!mount_setattr_supported())
1138 		SKIP(return, "mount_setattr syscall not supported");
1139 
1140 	fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
1141 	ASSERT_GE(fd, 0);
1142 	ASSERT_GE(close(fd), 0);
1143 
1144 	attr.userns_fd = fd;
1145 	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1146 		TH_LOG("failure: created idmapped mount with closed fd");
1147 	}
1148 }
1149 
1150 /**
1151  * Validate that the initial user namespace is rejected.
1152  */
TEST_F(mount_setattr_idmapped,invalid_fd_initial_userns)1153 TEST_F(mount_setattr_idmapped, invalid_fd_initial_userns)
1154 {
1155 	int open_tree_fd = -EBADF;
1156 	struct mount_attr attr = {
1157 		.attr_set = MOUNT_ATTR_IDMAP,
1158 	};
1159 
1160 	if (!mount_setattr_supported())
1161 		SKIP(return, "mount_setattr syscall not supported");
1162 
1163 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1164 				     AT_NO_AUTOMOUNT |
1165 				     AT_SYMLINK_NOFOLLOW |
1166 				     OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1167 	ASSERT_GE(open_tree_fd, 0);
1168 
1169 	attr.userns_fd = open("/proc/1/ns/user", O_RDONLY | O_CLOEXEC);
1170 	ASSERT_GE(attr.userns_fd, 0);
1171 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1172 	ASSERT_EQ(errno, EPERM);
1173 	ASSERT_EQ(close(attr.userns_fd), 0);
1174 	ASSERT_EQ(close(open_tree_fd), 0);
1175 }
1176 
map_ids(pid_t pid,unsigned long nsid,unsigned long hostid,unsigned long range)1177 static int map_ids(pid_t pid, unsigned long nsid, unsigned long hostid,
1178 		   unsigned long range)
1179 {
1180 	char map[100], procfile[256];
1181 
1182 	snprintf(procfile, sizeof(procfile), "/proc/%d/uid_map", pid);
1183 	snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
1184 	if (write_file(procfile, map, strlen(map)))
1185 		return -1;
1186 
1187 
1188 	snprintf(procfile, sizeof(procfile), "/proc/%d/gid_map", pid);
1189 	snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
1190 	if (write_file(procfile, map, strlen(map)))
1191 		return -1;
1192 
1193 	return 0;
1194 }
1195 
1196 #define __STACK_SIZE (8 * 1024 * 1024)
do_clone(int (* fn)(void *),void * arg,int flags)1197 static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
1198 {
1199 	void *stack;
1200 
1201 	stack = malloc(__STACK_SIZE);
1202 	if (!stack)
1203 		return -ENOMEM;
1204 
1205 #ifdef __ia64__
1206 	return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
1207 #else
1208 	return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
1209 #endif
1210 }
1211 
get_userns_fd_cb(void * data)1212 static int get_userns_fd_cb(void *data)
1213 {
1214 	return kill(getpid(), SIGSTOP);
1215 }
1216 
wait_for_pid(pid_t pid)1217 static int wait_for_pid(pid_t pid)
1218 {
1219 	int status, ret;
1220 
1221 again:
1222 	ret = waitpid(pid, &status, 0);
1223 	if (ret == -1) {
1224 		if (errno == EINTR)
1225 			goto again;
1226 
1227 		return -1;
1228 	}
1229 
1230 	if (!WIFEXITED(status))
1231 		return -1;
1232 
1233 	return WEXITSTATUS(status);
1234 }
1235 
get_userns_fd(unsigned long nsid,unsigned long hostid,unsigned long range)1236 static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range)
1237 {
1238 	int ret;
1239 	pid_t pid;
1240 	char path[256];
1241 
1242 	pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER);
1243 	if (pid < 0)
1244 		return -errno;
1245 
1246 	ret = map_ids(pid, nsid, hostid, range);
1247 	if (ret < 0)
1248 		return ret;
1249 
1250 	snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
1251 	ret = open(path, O_RDONLY | O_CLOEXEC);
1252 	kill(pid, SIGKILL);
1253 	wait_for_pid(pid);
1254 	return ret;
1255 }
1256 
1257 /**
1258  * Validate that an attached mount in our mount namespace cannot be idmapped.
1259  * (The kernel enforces that the mount's mount namespace and the caller's mount
1260  *  namespace match.)
1261  */
TEST_F(mount_setattr_idmapped,attached_mount_inside_current_mount_namespace)1262 TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace)
1263 {
1264 	int open_tree_fd = -EBADF;
1265 	struct mount_attr attr = {
1266 		.attr_set = MOUNT_ATTR_IDMAP,
1267 	};
1268 
1269 	if (!mount_setattr_supported())
1270 		SKIP(return, "mount_setattr syscall not supported");
1271 
1272 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1273 				     AT_EMPTY_PATH |
1274 				     AT_NO_AUTOMOUNT |
1275 				     AT_SYMLINK_NOFOLLOW |
1276 				     OPEN_TREE_CLOEXEC);
1277 	ASSERT_GE(open_tree_fd, 0);
1278 
1279 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1280 	ASSERT_GE(attr.userns_fd, 0);
1281 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1282 	ASSERT_EQ(close(attr.userns_fd), 0);
1283 	ASSERT_EQ(close(open_tree_fd), 0);
1284 }
1285 
1286 /**
1287  * Validate that idmapping a mount is rejected if the mount's mount namespace
1288  * and our mount namespace don't match.
1289  * (The kernel enforces that the mount's mount namespace and the caller's mount
1290  *  namespace match.)
1291  */
TEST_F(mount_setattr_idmapped,attached_mount_outside_current_mount_namespace)1292 TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace)
1293 {
1294 	int open_tree_fd = -EBADF;
1295 	struct mount_attr attr = {
1296 		.attr_set = MOUNT_ATTR_IDMAP,
1297 	};
1298 
1299 	if (!mount_setattr_supported())
1300 		SKIP(return, "mount_setattr syscall not supported");
1301 
1302 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1303 				     AT_EMPTY_PATH |
1304 				     AT_NO_AUTOMOUNT |
1305 				     AT_SYMLINK_NOFOLLOW |
1306 				     OPEN_TREE_CLOEXEC);
1307 	ASSERT_GE(open_tree_fd, 0);
1308 
1309 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1310 
1311 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1312 	ASSERT_GE(attr.userns_fd, 0);
1313 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
1314 				    sizeof(attr)), 0);
1315 	ASSERT_EQ(close(attr.userns_fd), 0);
1316 	ASSERT_EQ(close(open_tree_fd), 0);
1317 }
1318 
1319 /**
1320  * Validate that an attached mount in our mount namespace can be idmapped.
1321  */
TEST_F(mount_setattr_idmapped,detached_mount_inside_current_mount_namespace)1322 TEST_F(mount_setattr_idmapped, detached_mount_inside_current_mount_namespace)
1323 {
1324 	int open_tree_fd = -EBADF;
1325 	struct mount_attr attr = {
1326 		.attr_set = MOUNT_ATTR_IDMAP,
1327 	};
1328 
1329 	if (!mount_setattr_supported())
1330 		SKIP(return, "mount_setattr syscall not supported");
1331 
1332 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1333 				     AT_EMPTY_PATH |
1334 				     AT_NO_AUTOMOUNT |
1335 				     AT_SYMLINK_NOFOLLOW |
1336 				     OPEN_TREE_CLOEXEC |
1337 				     OPEN_TREE_CLONE);
1338 	ASSERT_GE(open_tree_fd, 0);
1339 
1340 	/* Changing mount properties on a detached mount. */
1341 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1342 	ASSERT_GE(attr.userns_fd, 0);
1343 	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1344 				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1345 	ASSERT_EQ(close(attr.userns_fd), 0);
1346 	ASSERT_EQ(close(open_tree_fd), 0);
1347 }
1348 
1349 /**
1350  * Validate that a detached mount not in our mount namespace can be idmapped.
1351  */
TEST_F(mount_setattr_idmapped,detached_mount_outside_current_mount_namespace)1352 TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace)
1353 {
1354 	int open_tree_fd = -EBADF;
1355 	struct mount_attr attr = {
1356 		.attr_set = MOUNT_ATTR_IDMAP,
1357 	};
1358 
1359 	if (!mount_setattr_supported())
1360 		SKIP(return, "mount_setattr syscall not supported");
1361 
1362 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1363 				     AT_EMPTY_PATH |
1364 				     AT_NO_AUTOMOUNT |
1365 				     AT_SYMLINK_NOFOLLOW |
1366 				     OPEN_TREE_CLOEXEC |
1367 				     OPEN_TREE_CLONE);
1368 	ASSERT_GE(open_tree_fd, 0);
1369 
1370 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1371 
1372 	/* Changing mount properties on a detached mount. */
1373 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1374 	ASSERT_GE(attr.userns_fd, 0);
1375 	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1376 				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1377 	ASSERT_EQ(close(attr.userns_fd), 0);
1378 	ASSERT_EQ(close(open_tree_fd), 0);
1379 }
1380 
1381 /**
1382  * Validate that currently changing the idmapping of an idmapped mount fails.
1383  */
TEST_F(mount_setattr_idmapped,change_idmapping)1384 TEST_F(mount_setattr_idmapped, change_idmapping)
1385 {
1386 	int open_tree_fd = -EBADF;
1387 	struct mount_attr attr = {
1388 		.attr_set = MOUNT_ATTR_IDMAP,
1389 	};
1390 
1391 	if (!mount_setattr_supported())
1392 		SKIP(return, "mount_setattr syscall not supported");
1393 
1394 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1395 				     AT_EMPTY_PATH |
1396 				     AT_NO_AUTOMOUNT |
1397 				     AT_SYMLINK_NOFOLLOW |
1398 				     OPEN_TREE_CLOEXEC |
1399 				     OPEN_TREE_CLONE);
1400 	ASSERT_GE(open_tree_fd, 0);
1401 
1402 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1403 	ASSERT_GE(attr.userns_fd, 0);
1404 	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1405 				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1406 	ASSERT_EQ(close(attr.userns_fd), 0);
1407 
1408 	/* Change idmapping on a detached mount that is already idmapped. */
1409 	attr.userns_fd	= get_userns_fd(0, 20000, 10000);
1410 	ASSERT_GE(attr.userns_fd, 0);
1411 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1412 	ASSERT_EQ(close(attr.userns_fd), 0);
1413 	ASSERT_EQ(close(open_tree_fd), 0);
1414 }
1415 
expected_uid_gid(int dfd,const char * path,int flags,uid_t expected_uid,gid_t expected_gid)1416 static bool expected_uid_gid(int dfd, const char *path, int flags,
1417 			     uid_t expected_uid, gid_t expected_gid)
1418 {
1419 	int ret;
1420 	struct stat st;
1421 
1422 	ret = fstatat(dfd, path, &st, flags);
1423 	if (ret < 0)
1424 		return false;
1425 
1426 	return st.st_uid == expected_uid && st.st_gid == expected_gid;
1427 }
1428 
TEST_F(mount_setattr_idmapped,idmap_mount_tree_invalid)1429 TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid)
1430 {
1431 	int open_tree_fd = -EBADF;
1432 	struct mount_attr attr = {
1433 		.attr_set = MOUNT_ATTR_IDMAP,
1434 	};
1435 
1436 	if (!mount_setattr_supported())
1437 		SKIP(return, "mount_setattr syscall not supported");
1438 
1439 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
1440 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
1441 
1442 	ASSERT_EQ(mount("testing", "/mnt/A", "ramfs", MS_NOATIME | MS_NODEV,
1443 			"size=100000,mode=700"), 0);
1444 
1445 	ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
1446 
1447 	ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
1448 
1449 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/A",
1450 				     AT_RECURSIVE |
1451 				     AT_EMPTY_PATH |
1452 				     AT_NO_AUTOMOUNT |
1453 				     AT_SYMLINK_NOFOLLOW |
1454 				     OPEN_TREE_CLOEXEC |
1455 				     OPEN_TREE_CLONE);
1456 	ASSERT_GE(open_tree_fd, 0);
1457 
1458 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1459 	ASSERT_GE(attr.userns_fd, 0);
1460 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1461 	ASSERT_EQ(close(attr.userns_fd), 0);
1462 	ASSERT_EQ(close(open_tree_fd), 0);
1463 
1464 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
1465 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
1466 	ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/b", 0, 0, 0), 0);
1467 	ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0);
1468 
1469 	(void)umount2("/mnt/A", MNT_DETACH);
1470 }
1471 
TEST_F(mount_setattr,mount_attr_nosymfollow)1472 TEST_F(mount_setattr, mount_attr_nosymfollow)
1473 {
1474 	int fd;
1475 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
1476 	struct mount_attr attr = {
1477 		.attr_set	= MOUNT_ATTR_NOSYMFOLLOW,
1478 	};
1479 
1480 	if (!mount_setattr_supported())
1481 		SKIP(return, "mount_setattr syscall not supported");
1482 
1483 	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1484 	ASSERT_GT(fd, 0);
1485 	ASSERT_EQ(close(fd), 0);
1486 
1487 	old_flags = read_mnt_flags("/mnt/A");
1488 	ASSERT_GT(old_flags, 0);
1489 
1490 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
1491 
1492 	expected_flags = old_flags;
1493 	expected_flags |= ST_NOSYMFOLLOW;
1494 
1495 	new_flags = read_mnt_flags("/mnt/A");
1496 	ASSERT_EQ(new_flags, expected_flags);
1497 
1498 	new_flags = read_mnt_flags("/mnt/A/AA");
1499 	ASSERT_EQ(new_flags, expected_flags);
1500 
1501 	new_flags = read_mnt_flags("/mnt/A/AA/B");
1502 	ASSERT_EQ(new_flags, expected_flags);
1503 
1504 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
1505 	ASSERT_EQ(new_flags, expected_flags);
1506 
1507 	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1508 	ASSERT_LT(fd, 0);
1509 	ASSERT_EQ(errno, ELOOP);
1510 
1511 	attr.attr_set &= ~MOUNT_ATTR_NOSYMFOLLOW;
1512 	attr.attr_clr |= MOUNT_ATTR_NOSYMFOLLOW;
1513 
1514 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
1515 
1516 	expected_flags &= ~ST_NOSYMFOLLOW;
1517 	new_flags = read_mnt_flags("/mnt/A");
1518 	ASSERT_EQ(new_flags, expected_flags);
1519 
1520 	new_flags = read_mnt_flags("/mnt/A/AA");
1521 	ASSERT_EQ(new_flags, expected_flags);
1522 
1523 	new_flags = read_mnt_flags("/mnt/A/AA/B");
1524 	ASSERT_EQ(new_flags, expected_flags);
1525 
1526 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
1527 	ASSERT_EQ(new_flags, expected_flags);
1528 
1529 	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1530 	ASSERT_GT(fd, 0);
1531 	ASSERT_EQ(close(fd), 0);
1532 }
1533 
TEST_F(mount_setattr,open_tree_detached)1534 TEST_F(mount_setattr, open_tree_detached)
1535 {
1536 	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
1537 	struct statx stx;
1538 
1539 	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
1540 				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1541 				     AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1542 				     OPEN_TREE_CLONE);
1543 	ASSERT_GE(fd_tree_base, 0);
1544 	/*
1545 	 * /mnt                   testing tmpfs
1546 	 * |-/mnt/A               testing tmpfs
1547 	 * | `-/mnt/A/AA          testing tmpfs
1548 	 * |   `-/mnt/A/AA/B      testing tmpfs
1549 	 * |     `-/mnt/A/AA/B/BB testing tmpfs
1550 	 * `-/mnt/B               testing ramfs
1551 	 */
1552 	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
1553 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1554 	ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
1555 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1556 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
1557 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1558 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
1559 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1560 
1561 	fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
1562 				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1563 				       AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1564 				       OPEN_TREE_CLONE);
1565 	ASSERT_GE(fd_tree_subdir, 0);
1566 	/*
1567 	 * /AA          testing tmpfs
1568 	 * `-/AA/B      testing tmpfs
1569 	 *   `-/AA/B/BB testing tmpfs
1570 	 */
1571 	ASSERT_EQ(statx(fd_tree_subdir, "B", 0, 0, &stx), 0);
1572 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1573 	ASSERT_EQ(statx(fd_tree_subdir, "B/BB", 0, 0, &stx), 0);
1574 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1575 
1576 	ASSERT_EQ(move_mount(fd_tree_subdir, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
1577 	/*
1578 	 * /tmp/target1          testing tmpfs
1579 	 * `-/tmp/target1/B      testing tmpfs
1580 	 *   `-/tmp/target1/B/BB testing tmpfs
1581 	 */
1582 	ASSERT_EQ(statx(-EBADF, "/tmp/target1", 0, 0, &stx), 0);
1583 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1584 	ASSERT_EQ(statx(-EBADF, "/tmp/target1/B", 0, 0, &stx), 0);
1585 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1586 	ASSERT_EQ(statx(-EBADF, "/tmp/target1/B/BB", 0, 0, &stx), 0);
1587 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1588 
1589 	ASSERT_EQ(move_mount(fd_tree_base, "", -EBADF, "/tmp/target2", MOVE_MOUNT_F_EMPTY_PATH), 0);
1590 	/*
1591 	 * /tmp/target2                   testing tmpfs
1592 	 * |-/tmp/target2/A               testing tmpfs
1593 	 * | `-/tmp/target2/A/AA          testing tmpfs
1594 	 * |   `-/tmp/target2/A/AA/B      testing tmpfs
1595 	 * |     `-/tmp/target2/A/AA/B/BB testing tmpfs
1596 	 * `-/tmp/target2/B               testing ramfs
1597 	 */
1598 	ASSERT_EQ(statx(-EBADF, "/tmp/target2", 0, 0, &stx), 0);
1599 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1600 	ASSERT_EQ(statx(-EBADF, "/tmp/target2/A", 0, 0, &stx), 0);
1601 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1602 	ASSERT_EQ(statx(-EBADF, "/tmp/target2/A/AA", 0, 0, &stx), 0);
1603 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1604 	ASSERT_EQ(statx(-EBADF, "/tmp/target2/A/AA/B", 0, 0, &stx), 0);
1605 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1606 	ASSERT_EQ(statx(-EBADF, "/tmp/target2/A/AA/B/BB", 0, 0, &stx), 0);
1607 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1608 	ASSERT_EQ(statx(-EBADF, "/tmp/target2/B", 0, 0, &stx), 0);
1609 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1610 
1611 	EXPECT_EQ(close(fd_tree_base), 0);
1612 	EXPECT_EQ(close(fd_tree_subdir), 0);
1613 }
1614 
TEST_F(mount_setattr,open_tree_detached_fail)1615 TEST_F(mount_setattr, open_tree_detached_fail)
1616 {
1617 	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
1618 	struct statx stx;
1619 
1620 	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
1621 				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1622 				     AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1623 				     OPEN_TREE_CLONE);
1624 	ASSERT_GE(fd_tree_base, 0);
1625 	/*
1626 	 * /mnt                   testing tmpfs
1627 	 * |-/mnt/A               testing tmpfs
1628 	 * | `-/mnt/A/AA          testing tmpfs
1629 	 * |   `-/mnt/A/AA/B      testing tmpfs
1630 	 * |     `-/mnt/A/AA/B/BB testing tmpfs
1631 	 * `-/mnt/B               testing ramfs
1632 	 */
1633 	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
1634 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1635 	ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
1636 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1637 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
1638 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1639 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
1640 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1641 
1642 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1643 
1644 	/*
1645 	 * The origin mount namespace of the anonymous mount namespace
1646 	 * of @fd_tree_base doesn't match the caller's mount namespace
1647 	 * anymore so creation of another detached mounts must fail.
1648 	 */
1649 	fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
1650 				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1651 				       AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1652 				       OPEN_TREE_CLONE);
1653 	ASSERT_LT(fd_tree_subdir, 0);
1654 	ASSERT_EQ(errno, EINVAL);
1655 }
1656 
TEST_F(mount_setattr,open_tree_detached_fail2)1657 TEST_F(mount_setattr, open_tree_detached_fail2)
1658 {
1659 	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
1660 	struct statx stx;
1661 
1662 	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
1663 				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1664 				     AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1665 				     OPEN_TREE_CLONE);
1666 	ASSERT_GE(fd_tree_base, 0);
1667 	/*
1668 	 * /mnt                   testing tmpfs
1669 	 * |-/mnt/A               testing tmpfs
1670 	 * | `-/mnt/A/AA          testing tmpfs
1671 	 * |   `-/mnt/A/AA/B      testing tmpfs
1672 	 * |     `-/mnt/A/AA/B/BB testing tmpfs
1673 	 * `-/mnt/B               testing ramfs
1674 	 */
1675 	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
1676 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1677 	ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
1678 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1679 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
1680 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1681 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
1682 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1683 
1684 	EXPECT_EQ(create_and_enter_userns(), 0);
1685 
1686 	/*
1687 	 * The caller entered a new user namespace. They will have
1688 	 * CAP_SYS_ADMIN in this user namespace. However, they're still
1689 	 * located in a mount namespace that is owned by an ancestor
1690 	 * user namespace in which they hold no privilege. Creating a
1691 	 * detached mount must thus fail.
1692 	 */
1693 	fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
1694 				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1695 				       AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1696 				       OPEN_TREE_CLONE);
1697 	ASSERT_LT(fd_tree_subdir, 0);
1698 	ASSERT_EQ(errno, EPERM);
1699 }
1700 
TEST_F(mount_setattr,open_tree_detached_fail3)1701 TEST_F(mount_setattr, open_tree_detached_fail3)
1702 {
1703 	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
1704 	struct statx stx;
1705 
1706 	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
1707 				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1708 				     AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1709 				     OPEN_TREE_CLONE);
1710 	ASSERT_GE(fd_tree_base, 0);
1711 	/*
1712         * /mnt                   testing tmpfs
1713         * |-/mnt/A               testing tmpfs
1714         * | `-/mnt/A/AA          testing tmpfs
1715         * |   `-/mnt/A/AA/B      testing tmpfs
1716         * |     `-/mnt/A/AA/B/BB testing tmpfs
1717         * `-/mnt/B               testing ramfs
1718         */
1719 	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
1720 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1721 	ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
1722 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1723 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
1724 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1725 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
1726 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1727 
1728 	EXPECT_EQ(prepare_unpriv_mountns(), 0);
1729 
1730 	/*
1731         * The caller entered a new mount namespace. They will have
1732         * CAP_SYS_ADMIN in the owning user namespace of their mount
1733         * namespace.
1734         *
1735         * However, the origin mount namespace of the anonymous mount
1736         * namespace of @fd_tree_base doesn't match the caller's mount
1737         * namespace anymore so creation of another detached mounts must
1738         * fail.
1739         */
1740 	fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
1741 			               AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1742 				       AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1743 				       OPEN_TREE_CLONE);
1744 	ASSERT_LT(fd_tree_subdir, 0);
1745 	ASSERT_EQ(errno, EINVAL);
1746 }
1747 
TEST_F(mount_setattr,open_tree_subfolder)1748 TEST_F(mount_setattr, open_tree_subfolder)
1749 {
1750 	int fd_context, fd_tmpfs, fd_tree;
1751 
1752 	fd_context = sys_fsopen("tmpfs", 0);
1753 	ASSERT_GE(fd_context, 0);
1754 
1755 	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
1756 
1757 	fd_tmpfs = sys_fsmount(fd_context, 0, 0);
1758 	ASSERT_GE(fd_tmpfs, 0);
1759 
1760 	EXPECT_EQ(close(fd_context), 0);
1761 
1762 	ASSERT_EQ(mkdirat(fd_tmpfs, "subdir", 0755), 0);
1763 
1764 	fd_tree = sys_open_tree(fd_tmpfs, "subdir",
1765 				AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1766 				AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1767 				OPEN_TREE_CLONE);
1768 	ASSERT_GE(fd_tree, 0);
1769 
1770 	EXPECT_EQ(close(fd_tmpfs), 0);
1771 
1772 	ASSERT_EQ(mkdirat(-EBADF, "/mnt/open_tree_subfolder", 0755), 0);
1773 
1774 	ASSERT_EQ(sys_move_mount(fd_tree, "", -EBADF, "/mnt/open_tree_subfolder", MOVE_MOUNT_F_EMPTY_PATH), 0);
1775 
1776 	EXPECT_EQ(close(fd_tree), 0);
1777 
1778 	ASSERT_EQ(umount2("/mnt/open_tree_subfolder", 0), 0);
1779 
1780 	EXPECT_EQ(rmdir("/mnt/open_tree_subfolder"), 0);
1781 }
1782 
TEST_F(mount_setattr,mount_detached_mount_on_detached_mount_then_close)1783 TEST_F(mount_setattr, mount_detached_mount_on_detached_mount_then_close)
1784 {
1785 	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
1786 	struct statx stx;
1787 
1788 	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
1789 				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1790 				     OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1791 	ASSERT_GE(fd_tree_base, 0);
1792 	/*
1793 	 * /mnt testing tmpfs
1794 	 */
1795 	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
1796 	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1797 
1798 	fd_tree_subdir = sys_open_tree(fd_tree_base, "",
1799 				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1800 				       AT_EMPTY_PATH | OPEN_TREE_CLOEXEC |
1801 				       OPEN_TREE_CLONE);
1802 	ASSERT_GE(fd_tree_subdir, 0);
1803 	/*
1804 	 * /mnt testing tmpfs
1805 	 */
1806 	ASSERT_EQ(statx(fd_tree_subdir, "A", 0, 0, &stx), 0);
1807 	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1808 
1809 	/*
1810 	 * /mnt   testing tmpfs
1811 	 * `-/mnt testing tmpfs
1812 	 */
1813 	ASSERT_EQ(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
1814 	ASSERT_EQ(statx(fd_tree_subdir, "", AT_EMPTY_PATH, 0, &stx), 0);
1815 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1816 
1817 	ASSERT_NE(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
1818 
1819 	EXPECT_EQ(close(fd_tree_base), 0);
1820 	EXPECT_EQ(close(fd_tree_subdir), 0);
1821 }
1822 
TEST_F(mount_setattr,mount_detached_mount_on_detached_mount_and_attach)1823 TEST_F(mount_setattr, mount_detached_mount_on_detached_mount_and_attach)
1824 {
1825 	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
1826 	struct statx stx;
1827 	__u64 mnt_id = 0;
1828 
1829 	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
1830 				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1831 				     OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1832 	ASSERT_GE(fd_tree_base, 0);
1833 	/*
1834 	 * /mnt testing tmpfs
1835 	 */
1836 	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
1837 	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1838 
1839 	fd_tree_subdir = sys_open_tree(fd_tree_base, "",
1840 				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1841 				       AT_EMPTY_PATH | OPEN_TREE_CLOEXEC |
1842 				       OPEN_TREE_CLONE);
1843 	ASSERT_GE(fd_tree_subdir, 0);
1844 	/*
1845 	 * /mnt testing tmpfs
1846 	 */
1847 	ASSERT_EQ(statx(fd_tree_subdir, "A", 0, 0, &stx), 0);
1848 	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1849 
1850 	/*
1851 	 * /mnt   testing tmpfs
1852 	 * `-/mnt testing tmpfs
1853 	 */
1854 	ASSERT_EQ(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
1855 	ASSERT_EQ(statx(fd_tree_subdir, "", AT_EMPTY_PATH, STATX_MNT_ID_UNIQUE, &stx), 0);
1856 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1857 	ASSERT_TRUE(stx.stx_mask & STATX_MNT_ID_UNIQUE);
1858 	mnt_id = stx.stx_mnt_id;
1859 
1860 	ASSERT_NE(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
1861 
1862 	ASSERT_EQ(move_mount(fd_tree_base, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
1863 	ASSERT_EQ(statx(-EBADF, "/tmp/target1", 0, STATX_MNT_ID_UNIQUE, &stx), 0);
1864 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1865 	ASSERT_TRUE(stx.stx_mask & STATX_MNT_ID_UNIQUE);
1866 	ASSERT_EQ(stx.stx_mnt_id, mnt_id);
1867 
1868 	EXPECT_EQ(close(fd_tree_base), 0);
1869 	EXPECT_EQ(close(fd_tree_subdir), 0);
1870 }
1871 
TEST_F(mount_setattr,move_mount_detached_fail)1872 TEST_F(mount_setattr, move_mount_detached_fail)
1873 {
1874 	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
1875 	struct statx stx;
1876 
1877 	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
1878 				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1879 				     OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1880 	ASSERT_GE(fd_tree_base, 0);
1881 
1882 	/* Attach the mount to the caller's mount namespace. */
1883 	ASSERT_EQ(move_mount(fd_tree_base, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
1884 
1885 	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
1886 	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1887 
1888 	fd_tree_subdir = sys_open_tree(-EBADF, "/tmp/B",
1889 				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1890 				       OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1891 	ASSERT_GE(fd_tree_subdir, 0);
1892 	ASSERT_EQ(statx(fd_tree_subdir, "BB", 0, 0, &stx), 0);
1893 	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1894 
1895 	/* Not allowed to move an attached mount to a detached mount. */
1896 	ASSERT_NE(move_mount(fd_tree_base, "", fd_tree_subdir, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
1897 	ASSERT_EQ(errno, EINVAL);
1898 
1899 	EXPECT_EQ(close(fd_tree_base), 0);
1900 	EXPECT_EQ(close(fd_tree_subdir), 0);
1901 }
1902 
TEST_F(mount_setattr,attach_detached_mount_then_umount_then_close)1903 TEST_F(mount_setattr, attach_detached_mount_then_umount_then_close)
1904 {
1905 	int fd_tree = -EBADF;
1906 	struct statx stx;
1907 
1908 	fd_tree = sys_open_tree(-EBADF, "/mnt",
1909 				AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1910 				AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1911 				OPEN_TREE_CLONE);
1912 	ASSERT_GE(fd_tree, 0);
1913 
1914 	ASSERT_EQ(statx(fd_tree, "A", 0, 0, &stx), 0);
1915 	/* We copied with AT_RECURSIVE so /mnt/A must be a mountpoint. */
1916 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1917 
1918 	/* Attach the mount to the caller's mount namespace. */
1919 	ASSERT_EQ(move_mount(fd_tree, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
1920 
1921 	ASSERT_EQ(statx(-EBADF, "/tmp/target1", 0, 0, &stx), 0);
1922 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1923 
1924 	ASSERT_EQ(umount2("/tmp/target1", MNT_DETACH), 0);
1925 
1926 	/*
1927 	 * This tests whether dissolve_on_fput() handles a NULL mount
1928 	 * namespace correctly, i.e., that it doesn't splat.
1929 	 */
1930 	EXPECT_EQ(close(fd_tree), 0);
1931 }
1932 
TEST_F(mount_setattr,mount_detached1_onto_detached2_then_close_detached1_then_mount_detached2_onto_attached)1933 TEST_F(mount_setattr, mount_detached1_onto_detached2_then_close_detached1_then_mount_detached2_onto_attached)
1934 {
1935 	int fd_tree1 = -EBADF, fd_tree2 = -EBADF;
1936 
1937 	/*
1938 	 * |-/mnt/A               testing tmpfs
1939 	 *   `-/mnt/A/AA          testing tmpfs
1940 	 *     `-/mnt/A/AA/B      testing tmpfs
1941 	 *       `-/mnt/A/AA/B/BB testing tmpfs
1942 	 */
1943 	fd_tree1 = sys_open_tree(-EBADF, "/mnt/A",
1944 				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1945 				 AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1946 				 OPEN_TREE_CLONE);
1947 	ASSERT_GE(fd_tree1, 0);
1948 
1949 	/*
1950 	 * `-/mnt/B testing ramfs
1951 	 */
1952 	fd_tree2 = sys_open_tree(-EBADF, "/mnt/B",
1953 				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1954 				 AT_EMPTY_PATH | OPEN_TREE_CLOEXEC |
1955 				 OPEN_TREE_CLONE);
1956 	ASSERT_GE(fd_tree2, 0);
1957 
1958 	/*
1959 	 * Move the source detached mount tree to the target detached
1960 	 * mount tree. This will move all the mounts in the source mount
1961 	 * tree from the source anonymous mount namespace to the target
1962 	 * anonymous mount namespace.
1963 	 *
1964 	 * The source detached mount tree and the target detached mount
1965 	 * tree now both refer to the same anonymous mount namespace.
1966 	 *
1967 	 * |-""                 testing ramfs
1968 	 *   `-""               testing tmpfs
1969 	 *     `-""/AA          testing tmpfs
1970 	 *       `-""/AA/B      testing tmpfs
1971 	 *         `-""/AA/B/BB testing tmpfs
1972 	 */
1973 	ASSERT_EQ(move_mount(fd_tree1, "", fd_tree2, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
1974 
1975 	/*
1976 	 * The source detached mount tree @fd_tree1 is now an attached
1977 	 * mount, i.e., it has a parent. Specifically, it now has the
1978 	 * root mount of the mount tree of @fd_tree2 as its parent.
1979 	 *
1980 	 * That means we are no longer allowed to attach it as we only
1981 	 * allow attaching the root of an anonymous mount tree, not
1982 	 * random bits and pieces. Verify that the kernel enforces this.
1983 	 */
1984 	ASSERT_NE(move_mount(fd_tree1, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
1985 
1986 	/*
1987 	 * Closing the source detached mount tree must not unmount and
1988 	 * free the shared anonymous mount namespace. The kernel will
1989 	 * quickly yell at us because the anonymous mount namespace
1990 	 * won't be empty when it's freed.
1991 	 */
1992 	EXPECT_EQ(close(fd_tree1), 0);
1993 
1994 	/*
1995 	 * Attach the mount tree to a non-anonymous mount namespace.
1996 	 * This can only succeed if closing fd_tree1 had proper
1997 	 * semantics and didn't cause the anonymous mount namespace to
1998 	 * be freed. If it did this will trigger a UAF which will be
1999 	 * visible on any KASAN enabled kernel.
2000 	 *
2001 	 * |-/tmp/target1                 testing ramfs
2002 	 *   `-/tmp/target1               testing tmpfs
2003 	 *     `-/tmp/target1/AA          testing tmpfs
2004 	 *       `-/tmp/target1/AA/B      testing tmpfs
2005 	 *         `-/tmp/target1/AA/B/BB testing tmpfs
2006 	 */
2007 	ASSERT_EQ(move_mount(fd_tree2, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
2008 	EXPECT_EQ(close(fd_tree2), 0);
2009 }
2010 
TEST_F(mount_setattr,two_detached_mounts_referring_to_same_anonymous_mount_namespace)2011 TEST_F(mount_setattr, two_detached_mounts_referring_to_same_anonymous_mount_namespace)
2012 {
2013 	int fd_tree1 = -EBADF, fd_tree2 = -EBADF;
2014 
2015 	/*
2016 	 * Copy the following mount tree:
2017 	 *
2018 	 * |-/mnt/A               testing tmpfs
2019 	 *   `-/mnt/A/AA          testing tmpfs
2020 	 *     `-/mnt/A/AA/B      testing tmpfs
2021 	 *       `-/mnt/A/AA/B/BB testing tmpfs
2022 	 */
2023 	fd_tree1 = sys_open_tree(-EBADF, "/mnt/A",
2024 				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
2025 				 AT_RECURSIVE | OPEN_TREE_CLOEXEC |
2026 				 OPEN_TREE_CLONE);
2027 	ASSERT_GE(fd_tree1, 0);
2028 
2029 	/*
2030 	 * Create an O_PATH file descriptors with a separate struct file
2031 	 * that refers to the same detached mount tree as @fd_tree1
2032 	 */
2033 	fd_tree2 = sys_open_tree(fd_tree1, "",
2034 				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
2035 				 AT_EMPTY_PATH | OPEN_TREE_CLOEXEC);
2036 	ASSERT_GE(fd_tree2, 0);
2037 
2038 	/*
2039 	 * Copy the following mount tree:
2040 	 *
2041 	 * |-/tmp/target1               testing tmpfs
2042 	 *   `-/tmp/target1/AA          testing tmpfs
2043 	 *     `-/tmp/target1/AA/B      testing tmpfs
2044 	 *       `-/tmp/target1/AA/B/BB testing tmpfs
2045 	 */
2046 	ASSERT_EQ(move_mount(fd_tree2, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
2047 
2048 	/*
2049 	 * This must fail as this would mean adding the same mount tree
2050 	 * into the same mount tree.
2051 	 */
2052 	ASSERT_NE(move_mount(fd_tree1, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
2053 }
2054 
TEST_F(mount_setattr,two_detached_subtrees_of_same_anonymous_mount_namespace)2055 TEST_F(mount_setattr, two_detached_subtrees_of_same_anonymous_mount_namespace)
2056 {
2057 	int fd_tree1 = -EBADF, fd_tree2 = -EBADF;
2058 
2059 	/*
2060 	 * Copy the following mount tree:
2061 	 *
2062 	 * |-/mnt/A               testing tmpfs
2063 	 *   `-/mnt/A/AA          testing tmpfs
2064 	 *     `-/mnt/A/AA/B      testing tmpfs
2065 	 *       `-/mnt/A/AA/B/BB testing tmpfs
2066 	 */
2067 	fd_tree1 = sys_open_tree(-EBADF, "/mnt/A",
2068 				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
2069 				 AT_RECURSIVE | OPEN_TREE_CLOEXEC |
2070 				 OPEN_TREE_CLONE);
2071 	ASSERT_GE(fd_tree1, 0);
2072 
2073 	/*
2074 	 * Create an O_PATH file descriptors with a separate struct file that
2075 	 * refers to a subtree of the same detached mount tree as @fd_tree1
2076 	 */
2077 	fd_tree2 = sys_open_tree(fd_tree1, "AA",
2078 				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
2079 				 AT_EMPTY_PATH | OPEN_TREE_CLOEXEC);
2080 	ASSERT_GE(fd_tree2, 0);
2081 
2082 	/*
2083 	 * This must fail as it is only possible to attach the root of a
2084 	 * detached mount tree.
2085 	 */
2086 	ASSERT_NE(move_mount(fd_tree2, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
2087 
2088 	ASSERT_EQ(move_mount(fd_tree1, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
2089 }
2090 
TEST_F(mount_setattr,detached_tree_propagation)2091 TEST_F(mount_setattr, detached_tree_propagation)
2092 {
2093 	int fd_tree = -EBADF;
2094 	struct statx stx1, stx2, stx3, stx4;
2095 
2096 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
2097 	ASSERT_EQ(mount(NULL, "/mnt", NULL, MS_REC | MS_SHARED, NULL), 0);
2098 
2099 	/*
2100 	 * Copy the following mount tree:
2101 	 *
2102          * /mnt                   testing tmpfs
2103          * |-/mnt/A               testing tmpfs
2104          * | `-/mnt/A/AA          testing tmpfs
2105          * |   `-/mnt/A/AA/B      testing tmpfs
2106          * |     `-/mnt/A/AA/B/BB testing tmpfs
2107          * `-/mnt/B               testing ramfs
2108 	 */
2109 	fd_tree = sys_open_tree(-EBADF, "/mnt",
2110 				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
2111 				 AT_RECURSIVE | OPEN_TREE_CLOEXEC |
2112 				 OPEN_TREE_CLONE);
2113 	ASSERT_GE(fd_tree, 0);
2114 
2115 	ASSERT_EQ(statx(-EBADF, "/mnt/A", 0, 0, &stx1), 0);
2116 	ASSERT_EQ(statx(fd_tree, "A", 0, 0, &stx2), 0);
2117 
2118 	/*
2119 	 * Copying the mount namespace like done above doesn't alter the
2120 	 * mounts in any way so the filesystem mounted on /mnt must be
2121 	 * identical even though the mounts will differ. Use the device
2122 	 * information to verify that. Note that tmpfs will have a 0
2123 	 * major number so comparing the major number is misleading.
2124 	 */
2125 	ASSERT_EQ(stx1.stx_dev_minor, stx2.stx_dev_minor);
2126 
2127 	/* Mount a tmpfs filesystem over /mnt/A. */
2128 	ASSERT_EQ(mount(NULL, "/mnt/A", "tmpfs", 0, NULL), 0);
2129 
2130 
2131 	ASSERT_EQ(statx(-EBADF, "/mnt/A", 0, 0, &stx3), 0);
2132 	ASSERT_EQ(statx(fd_tree, "A", 0, 0, &stx4), 0);
2133 
2134 	/*
2135 	 * A new filesystem has been mounted on top of /mnt/A which
2136 	 * means that the device information will be different for any
2137 	 * statx() that was taken from /mnt/A before the mount compared
2138 	 * to one after the mount.
2139 	 *
2140 	 * Since we already now that the device information between the
2141 	 * stx1 and stx2 samples are identical we also now that stx2 and
2142 	 * stx3 device information will necessarily differ.
2143 	 */
2144 	ASSERT_NE(stx1.stx_dev_minor, stx3.stx_dev_minor);
2145 
2146 	/*
2147 	 * If mount propagation worked correctly then the tmpfs mount
2148 	 * that was created after the mount namespace was unshared will
2149 	 * have propagated onto /mnt/A in the detached mount tree.
2150 	 *
2151 	 * Verify that the device information for stx3 and stx4 are
2152 	 * identical. It is already established that stx3 is different
2153 	 * from both stx1 and stx2 sampled before the tmpfs mount was
2154 	 * done so if stx3 and stx4 are identical the proof is done.
2155 	 */
2156 	ASSERT_EQ(stx3.stx_dev_minor, stx4.stx_dev_minor);
2157 
2158 	EXPECT_EQ(close(fd_tree), 0);
2159 }
2160 
2161 TEST_HARNESS_MAIN
2162