1 // SPDX-License-Identifier: GPL-2.0-or-later
2 // Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu>
3 
4 #define _GNU_SOURCE
5 #include <fcntl.h>
6 #include <sched.h>
7 #include <stdio.h>
8 #include <string.h>
9 #include <sys/stat.h>
10 #include <sys/mount.h>
11 #include <linux/fanotify.h>
12 #include <unistd.h>
13 #include <sys/fanotify.h>
14 #include <sys/syscall.h>
15 
16 #include "../../kselftest_harness.h"
17 #include "../statmount/statmount.h"
18 
19 #ifndef FAN_MNT_ATTACH
20 struct fanotify_event_info_mnt {
21 	struct fanotify_event_info_header hdr;
22 	__u64 mnt_id;
23 };
24 #define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */
25 #endif
26 
27 #ifndef FAN_MNT_DETACH
28 #define FAN_MNT_DETACH 0x02000000 /* Mount was detached */
29 #endif
30 
31 #ifndef FAN_REPORT_MNT
32 #define FAN_REPORT_MNT 0x00004000 /* Report mount events */
33 #endif
34 
35 #ifndef FAN_MARK_MNTNS
36 #define FAN_MARK_MNTNS 0x00000110
37 #endif
38 
get_mnt_id(struct __test_metadata * const _metadata,const char * path)39 static uint64_t get_mnt_id(struct __test_metadata *const _metadata,
40 			   const char *path)
41 {
42 	struct statx sx;
43 
44 	ASSERT_EQ(statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx), 0);
45 	ASSERT_TRUE(!!(sx.stx_mask & STATX_MNT_ID_UNIQUE));
46 	return sx.stx_mnt_id;
47 }
48 
49 static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX";
50 
51 static const int mark_cmds[] = {
52 	FAN_MARK_ADD,
53 	FAN_MARK_REMOVE,
54 	FAN_MARK_FLUSH
55 };
56 
57 #define NUM_FAN_FDS ARRAY_SIZE(mark_cmds)
58 
FIXTURE(fanotify)59 FIXTURE(fanotify) {
60 	int fan_fd[NUM_FAN_FDS];
61 	char buf[256];
62 	unsigned int rem;
63 	void *next;
64 	char root_mntpoint[sizeof(root_mntpoint_templ)];
65 	int orig_root;
66 	int ns_fd;
67 	uint64_t root_id;
68 };
69 
FIXTURE_SETUP(fanotify)70 FIXTURE_SETUP(fanotify)
71 {
72 	int i, ret;
73 
74 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
75 
76 	self->ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
77 	ASSERT_GE(self->ns_fd, 0);
78 
79 	ASSERT_EQ(mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL), 0);
80 
81 	strcpy(self->root_mntpoint, root_mntpoint_templ);
82 	ASSERT_NE(mkdtemp(self->root_mntpoint), NULL);
83 
84 	self->orig_root = open("/", O_PATH | O_CLOEXEC);
85 	ASSERT_GE(self->orig_root, 0);
86 
87 	ASSERT_EQ(mount("tmpfs", self->root_mntpoint, "tmpfs", 0, NULL), 0);
88 
89 	ASSERT_EQ(chroot(self->root_mntpoint), 0);
90 
91 	ASSERT_EQ(chdir("/"), 0);
92 
93 	ASSERT_EQ(mkdir("a", 0700), 0);
94 
95 	ASSERT_EQ(mkdir("b", 0700), 0);
96 
97 	self->root_id = get_mnt_id(_metadata, "/");
98 	ASSERT_NE(self->root_id, 0);
99 
100 	for (i = 0; i < NUM_FAN_FDS; i++) {
101 		self->fan_fd[i] = fanotify_init(FAN_REPORT_MNT | FAN_NONBLOCK,
102 						0);
103 		ASSERT_GE(self->fan_fd[i], 0);
104 		ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD |
105 				    FAN_MARK_MNTNS,
106 				    FAN_MNT_ATTACH | FAN_MNT_DETACH,
107 				    self->ns_fd, NULL);
108 		ASSERT_EQ(ret, 0);
109 		// On fd[0] we do an extra ADD that changes nothing.
110 		// On fd[1]/fd[2] we REMOVE/FLUSH which removes the mark.
111 		ret = fanotify_mark(self->fan_fd[i], mark_cmds[i] |
112 				    FAN_MARK_MNTNS,
113 				    FAN_MNT_ATTACH | FAN_MNT_DETACH,
114 				    self->ns_fd, NULL);
115 		ASSERT_EQ(ret, 0);
116 	}
117 
118 	self->rem = 0;
119 }
120 
FIXTURE_TEARDOWN(fanotify)121 FIXTURE_TEARDOWN(fanotify)
122 {
123 	int i;
124 
125 	ASSERT_EQ(self->rem, 0);
126 	for (i = 0; i < NUM_FAN_FDS; i++)
127 		close(self->fan_fd[i]);
128 
129 	ASSERT_EQ(fchdir(self->orig_root), 0);
130 
131 	ASSERT_EQ(chroot("."), 0);
132 
133 	EXPECT_EQ(umount2(self->root_mntpoint, MNT_DETACH), 0);
134 	EXPECT_EQ(chdir(self->root_mntpoint), 0);
135 	EXPECT_EQ(chdir("/"), 0);
136 	EXPECT_EQ(rmdir(self->root_mntpoint), 0);
137 }
138 
expect_notify(struct __test_metadata * const _metadata,FIXTURE_DATA (fanotify)* self,uint64_t * mask)139 static uint64_t expect_notify(struct __test_metadata *const _metadata,
140 			      FIXTURE_DATA(fanotify) *self,
141 			      uint64_t *mask)
142 {
143 	struct fanotify_event_metadata *meta;
144 	struct fanotify_event_info_mnt *mnt;
145 	unsigned int thislen;
146 
147 	if (!self->rem) {
148 		ssize_t len;
149 		int i;
150 
151 		for (i = NUM_FAN_FDS - 1; i >= 0; i--) {
152 			len = read(self->fan_fd[i], self->buf,
153 				   sizeof(self->buf));
154 			if (i > 0) {
155 				// Groups 1,2 should get EAGAIN
156 				ASSERT_EQ(len, -1);
157 				ASSERT_EQ(errno, EAGAIN);
158 			} else {
159 				// Group 0 should get events
160 				ASSERT_GT(len, 0);
161 			}
162 		}
163 
164 		self->rem = len;
165 		self->next = (void *) self->buf;
166 	}
167 
168 	meta = self->next;
169 	ASSERT_TRUE(FAN_EVENT_OK(meta, self->rem));
170 
171 	thislen = meta->event_len;
172 	self->rem -= thislen;
173 	self->next += thislen;
174 
175 	*mask = meta->mask;
176 	thislen -= sizeof(*meta);
177 
178 	mnt = ((void *) meta) + meta->event_len - thislen;
179 
180 	ASSERT_EQ(thislen, sizeof(*mnt));
181 
182 	return mnt->mnt_id;
183 }
184 
expect_notify_n(struct __test_metadata * const _metadata,FIXTURE_DATA (fanotify)* self,unsigned int n,uint64_t mask[],uint64_t mnts[])185 static void expect_notify_n(struct __test_metadata *const _metadata,
186 				 FIXTURE_DATA(fanotify) *self,
187 				 unsigned int n, uint64_t mask[], uint64_t mnts[])
188 {
189 	unsigned int i;
190 
191 	for (i = 0; i < n; i++)
192 		mnts[i] = expect_notify(_metadata, self, &mask[i]);
193 }
194 
expect_notify_mask(struct __test_metadata * const _metadata,FIXTURE_DATA (fanotify)* self,uint64_t expect_mask)195 static uint64_t expect_notify_mask(struct __test_metadata *const _metadata,
196 				   FIXTURE_DATA(fanotify) *self,
197 				   uint64_t expect_mask)
198 {
199 	uint64_t mntid, mask;
200 
201 	mntid = expect_notify(_metadata, self, &mask);
202 	ASSERT_EQ(expect_mask, mask);
203 
204 	return mntid;
205 }
206 
207 
expect_notify_mask_n(struct __test_metadata * const _metadata,FIXTURE_DATA (fanotify)* self,uint64_t mask,unsigned int n,uint64_t mnts[])208 static void expect_notify_mask_n(struct __test_metadata *const _metadata,
209 				 FIXTURE_DATA(fanotify) *self,
210 				 uint64_t mask, unsigned int n, uint64_t mnts[])
211 {
212 	unsigned int i;
213 
214 	for (i = 0; i < n; i++)
215 		mnts[i] = expect_notify_mask(_metadata, self, mask);
216 }
217 
verify_mount_ids(struct __test_metadata * const _metadata,const uint64_t list1[],const uint64_t list2[],size_t num)218 static void verify_mount_ids(struct __test_metadata *const _metadata,
219 			     const uint64_t list1[], const uint64_t list2[],
220 			     size_t num)
221 {
222 	unsigned int i, j;
223 
224 	// Check that neither list has any duplicates
225 	for (i = 0; i < num; i++) {
226 		for (j = 0; j < num; j++) {
227 			if (i != j) {
228 				ASSERT_NE(list1[i], list1[j]);
229 				ASSERT_NE(list2[i], list2[j]);
230 			}
231 		}
232 	}
233 	// Check that all list1 memebers can be found in list2. Together with
234 	// the above it means that the list1 and list2 represent the same sets.
235 	for (i = 0; i < num; i++) {
236 		for (j = 0; j < num; j++) {
237 			if (list1[i] == list2[j])
238 				break;
239 		}
240 		ASSERT_NE(j, num);
241 	}
242 }
243 
check_mounted(struct __test_metadata * const _metadata,const uint64_t mnts[],size_t num)244 static void check_mounted(struct __test_metadata *const _metadata,
245 			  const uint64_t mnts[], size_t num)
246 {
247 	ssize_t ret;
248 	uint64_t *list;
249 
250 	list = malloc((num + 1) * sizeof(list[0]));
251 	ASSERT_NE(list, NULL);
252 
253 	ret = listmount(LSMT_ROOT, 0, 0, list, num + 1, 0);
254 	ASSERT_EQ(ret, num);
255 
256 	verify_mount_ids(_metadata, mnts, list, num);
257 
258 	free(list);
259 }
260 
setup_mount_tree(struct __test_metadata * const _metadata,int log2_num)261 static void setup_mount_tree(struct __test_metadata *const _metadata,
262 			    int log2_num)
263 {
264 	int ret, i;
265 
266 	ret = mount("", "/", NULL, MS_SHARED, NULL);
267 	ASSERT_EQ(ret, 0);
268 
269 	for (i = 0; i < log2_num; i++) {
270 		ret = mount("/", "/", NULL, MS_BIND, NULL);
271 		ASSERT_EQ(ret, 0);
272 	}
273 }
274 
TEST_F(fanotify,bind)275 TEST_F(fanotify, bind)
276 {
277 	int ret;
278 	uint64_t mnts[2] = { self->root_id };
279 
280 	ret = mount("/", "/", NULL, MS_BIND, NULL);
281 	ASSERT_EQ(ret, 0);
282 
283 	mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
284 	ASSERT_NE(mnts[0], mnts[1]);
285 
286 	check_mounted(_metadata, mnts, 2);
287 
288 	// Cleanup
289 	uint64_t detach_id;
290 	ret = umount("/");
291 	ASSERT_EQ(ret, 0);
292 
293 	detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
294 	ASSERT_EQ(detach_id, mnts[1]);
295 
296 	check_mounted(_metadata, mnts, 1);
297 }
298 
TEST_F(fanotify,move)299 TEST_F(fanotify, move)
300 {
301 	int ret;
302 	uint64_t mnts[2] = { self->root_id };
303 	uint64_t move_id;
304 
305 	ret = mount("/", "/a", NULL, MS_BIND, NULL);
306 	ASSERT_EQ(ret, 0);
307 
308 	mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
309 	ASSERT_NE(mnts[0], mnts[1]);
310 
311 	check_mounted(_metadata, mnts, 2);
312 
313 	ret = move_mount(AT_FDCWD, "/a", AT_FDCWD, "/b", 0);
314 	ASSERT_EQ(ret, 0);
315 
316 	move_id = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH);
317 	ASSERT_EQ(move_id, mnts[1]);
318 
319 	// Cleanup
320 	ret = umount("/b");
321 	ASSERT_EQ(ret, 0);
322 
323 	check_mounted(_metadata, mnts, 1);
324 }
325 
TEST_F(fanotify,propagate)326 TEST_F(fanotify, propagate)
327 {
328 	const unsigned int log2_num = 4;
329 	const unsigned int num = (1 << log2_num);
330 	uint64_t mnts[num];
331 
332 	setup_mount_tree(_metadata, log2_num);
333 
334 	expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, num - 1, mnts + 1);
335 
336 	mnts[0] = self->root_id;
337 	check_mounted(_metadata, mnts, num);
338 
339 	// Cleanup
340 	int ret;
341 	uint64_t mnts2[num];
342 	ret = umount2("/", MNT_DETACH);
343 	ASSERT_EQ(ret, 0);
344 
345 	ret = mount("", "/", NULL, MS_PRIVATE, NULL);
346 	ASSERT_EQ(ret, 0);
347 
348 	mnts2[0] = self->root_id;
349 	expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, num - 1, mnts2 + 1);
350 	verify_mount_ids(_metadata, mnts, mnts2, num);
351 
352 	check_mounted(_metadata, mnts, 1);
353 }
354 
TEST_F(fanotify,fsmount)355 TEST_F(fanotify, fsmount)
356 {
357 	int ret, fs, mnt;
358 	uint64_t mnts[2] = { self->root_id };
359 
360 	fs = fsopen("tmpfs", 0);
361 	ASSERT_GE(fs, 0);
362 
363         ret = fsconfig(fs, FSCONFIG_CMD_CREATE, 0, 0, 0);
364 	ASSERT_EQ(ret, 0);
365 
366         mnt = fsmount(fs, 0, 0);
367 	ASSERT_GE(mnt, 0);
368 
369         close(fs);
370 
371 	ret = move_mount(mnt, "", AT_FDCWD, "/a", MOVE_MOUNT_F_EMPTY_PATH);
372 	ASSERT_EQ(ret, 0);
373 
374         close(mnt);
375 
376 	mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
377 	ASSERT_NE(mnts[0], mnts[1]);
378 
379 	check_mounted(_metadata, mnts, 2);
380 
381 	// Cleanup
382 	uint64_t detach_id;
383 	ret = umount("/a");
384 	ASSERT_EQ(ret, 0);
385 
386 	detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
387 	ASSERT_EQ(detach_id, mnts[1]);
388 
389 	check_mounted(_metadata, mnts, 1);
390 }
391 
TEST_F(fanotify,reparent)392 TEST_F(fanotify, reparent)
393 {
394 	uint64_t mnts[6] = { self->root_id };
395 	uint64_t dmnts[3];
396 	uint64_t masks[3];
397 	unsigned int i;
398 	int ret;
399 
400 	// Create setup with a[1] -> b[2] propagation
401 	ret = mount("/", "/a", NULL, MS_BIND, NULL);
402 	ASSERT_EQ(ret, 0);
403 
404 	ret = mount("", "/a", NULL, MS_SHARED, NULL);
405 	ASSERT_EQ(ret, 0);
406 
407 	ret = mount("/a", "/b", NULL, MS_BIND, NULL);
408 	ASSERT_EQ(ret, 0);
409 
410 	ret = mount("", "/b", NULL, MS_SLAVE, NULL);
411 	ASSERT_EQ(ret, 0);
412 
413 	expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
414 
415 	check_mounted(_metadata, mnts, 3);
416 
417 	// Mount on a[3], which is propagated to b[4]
418 	ret = mount("/", "/a", NULL, MS_BIND, NULL);
419 	ASSERT_EQ(ret, 0);
420 
421 	expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 3);
422 
423 	check_mounted(_metadata, mnts, 5);
424 
425 	// Mount on b[5], not propagated
426 	ret = mount("/", "/b", NULL, MS_BIND, NULL);
427 	ASSERT_EQ(ret, 0);
428 
429 	mnts[5] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
430 
431 	check_mounted(_metadata, mnts, 6);
432 
433 	// Umount a[3], which is propagated to b[4], but not b[5]
434 	// This will result in b[5] "falling" on b[2]
435 	ret = umount("/a");
436 	ASSERT_EQ(ret, 0);
437 
438 	expect_notify_n(_metadata, self, 3, masks, dmnts);
439 	verify_mount_ids(_metadata, mnts + 3, dmnts, 3);
440 
441 	for (i = 0; i < 3; i++) {
442 		if (dmnts[i] == mnts[5]) {
443 			ASSERT_EQ(masks[i], FAN_MNT_ATTACH | FAN_MNT_DETACH);
444 		} else {
445 			ASSERT_EQ(masks[i], FAN_MNT_DETACH);
446 		}
447 	}
448 
449 	mnts[3] = mnts[5];
450 	check_mounted(_metadata, mnts, 4);
451 
452 	// Cleanup
453 	ret = umount("/b");
454 	ASSERT_EQ(ret, 0);
455 
456 	ret = umount("/a");
457 	ASSERT_EQ(ret, 0);
458 
459 	ret = umount("/b");
460 	ASSERT_EQ(ret, 0);
461 
462 	expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 3, dmnts);
463 	verify_mount_ids(_metadata, mnts + 1, dmnts, 3);
464 
465 	check_mounted(_metadata, mnts, 1);
466 }
467 
TEST_F(fanotify,rmdir)468 TEST_F(fanotify, rmdir)
469 {
470 	uint64_t mnts[3] = { self->root_id };
471 	int ret;
472 
473 	ret = mount("/", "/a", NULL, MS_BIND, NULL);
474 	ASSERT_EQ(ret, 0);
475 
476 	ret = mount("/", "/a/b", NULL, MS_BIND, NULL);
477 	ASSERT_EQ(ret, 0);
478 
479 	expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
480 
481 	check_mounted(_metadata, mnts, 3);
482 
483 	ret = chdir("/a");
484 	ASSERT_EQ(ret, 0);
485 
486 	ret = fork();
487 	ASSERT_GE(ret, 0);
488 
489 	if (ret == 0) {
490 		chdir("/");
491 		unshare(CLONE_NEWNS);
492 		mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
493 		umount2("/a", MNT_DETACH);
494 		// This triggers a detach in the other namespace
495 		rmdir("/a");
496 		exit(0);
497 	}
498 	wait(NULL);
499 
500 	expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 2, mnts + 1);
501 	check_mounted(_metadata, mnts, 1);
502 
503 	// Cleanup
504 	ret = chdir("/");
505 	ASSERT_EQ(ret, 0);
506 }
507 
TEST_F(fanotify,pivot_root)508 TEST_F(fanotify, pivot_root)
509 {
510 	uint64_t mnts[3] = { self->root_id };
511 	uint64_t mnts2[3];
512 	int ret;
513 
514 	ret = mount("tmpfs", "/a", "tmpfs", 0, NULL);
515 	ASSERT_EQ(ret, 0);
516 
517 	mnts[2] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
518 
519 	ret = mkdir("/a/new", 0700);
520 	ASSERT_EQ(ret, 0);
521 
522 	ret = mkdir("/a/old", 0700);
523 	ASSERT_EQ(ret, 0);
524 
525 	ret = mount("/a", "/a/new", NULL, MS_BIND, NULL);
526 	ASSERT_EQ(ret, 0);
527 
528 	mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
529 	check_mounted(_metadata, mnts, 3);
530 
531 	ret = syscall(SYS_pivot_root, "/a/new", "/a/new/old");
532 	ASSERT_EQ(ret, 0);
533 
534 	expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH, 2, mnts2);
535 	verify_mount_ids(_metadata, mnts, mnts2, 2);
536 	check_mounted(_metadata, mnts, 3);
537 
538 	// Cleanup
539 	ret = syscall(SYS_pivot_root, "/old", "/old/a/new");
540 	ASSERT_EQ(ret, 0);
541 
542 	ret = umount("/a/new");
543 	ASSERT_EQ(ret, 0);
544 
545 	ret = umount("/a");
546 	ASSERT_EQ(ret, 0);
547 
548 	check_mounted(_metadata, mnts, 1);
549 }
550 
551 TEST_HARNESS_MAIN
552