1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/mount.h> 3 #include <linux/pseudo_fs.h> 4 #include <linux/file.h> 5 #include <linux/fs.h> 6 #include <linux/proc_fs.h> 7 #include <linux/proc_ns.h> 8 #include <linux/magic.h> 9 #include <linux/ktime.h> 10 #include <linux/seq_file.h> 11 #include <linux/pid_namespace.h> 12 #include <linux/user_namespace.h> 13 #include <linux/nsfs.h> 14 #include <linux/uaccess.h> 15 #include <linux/mnt_namespace.h> 16 17 #include "mount.h" 18 #include "internal.h" 19 20 static struct vfsmount *nsfs_mnt; 21 22 static long ns_ioctl(struct file *filp, unsigned int ioctl, 23 unsigned long arg); 24 static const struct file_operations ns_file_operations = { 25 .unlocked_ioctl = ns_ioctl, 26 .compat_ioctl = compat_ptr_ioctl, 27 }; 28 29 static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) 30 { 31 struct inode *inode = d_inode(dentry); 32 struct ns_common *ns = inode->i_private; 33 const struct proc_ns_operations *ns_ops = ns->ops; 34 35 return dynamic_dname(buffer, buflen, "%s:[%lu]", 36 ns_ops->name, inode->i_ino); 37 } 38 39 const struct dentry_operations ns_dentry_operations = { 40 .d_dname = ns_dname, 41 .d_prune = stashed_dentry_prune, 42 }; 43 44 static void nsfs_evict(struct inode *inode) 45 { 46 struct ns_common *ns = inode->i_private; 47 clear_inode(inode); 48 ns->ops->put(ns); 49 } 50 51 int ns_get_path_cb(struct path *path, ns_get_path_helper_t *ns_get_cb, 52 void *private_data) 53 { 54 struct ns_common *ns; 55 56 ns = ns_get_cb(private_data); 57 if (!ns) 58 return -ENOENT; 59 60 return path_from_stashed(&ns->stashed, nsfs_mnt, ns, path); 61 } 62 63 struct ns_get_path_task_args { 64 const struct proc_ns_operations *ns_ops; 65 struct task_struct *task; 66 }; 67 68 static struct ns_common *ns_get_path_task(void *private_data) 69 { 70 struct ns_get_path_task_args *args = private_data; 71 72 return args->ns_ops->get(args->task); 73 } 74 75 int ns_get_path(struct path *path, struct task_struct *task, 76 const struct proc_ns_operations *ns_ops) 77 { 78 struct ns_get_path_task_args args = { 79 .ns_ops = ns_ops, 80 .task = task, 81 }; 82 83 return ns_get_path_cb(path, ns_get_path_task, &args); 84 } 85 86 /** 87 * open_namespace - open a namespace 88 * @ns: the namespace to open 89 * 90 * This will consume a reference to @ns indendent of success or failure. 91 * 92 * Return: A file descriptor on success or a negative error code on failure. 93 */ 94 int open_namespace(struct ns_common *ns) 95 { 96 struct path path __free(path_put) = {}; 97 struct file *f; 98 int err; 99 100 /* call first to consume reference */ 101 err = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path); 102 if (err < 0) 103 return err; 104 105 CLASS(get_unused_fd, fd)(O_CLOEXEC); 106 if (fd < 0) 107 return fd; 108 109 f = dentry_open(&path, O_RDONLY, current_cred()); 110 if (IS_ERR(f)) 111 return PTR_ERR(f); 112 113 fd_install(fd, f); 114 return take_fd(fd); 115 } 116 117 int open_related_ns(struct ns_common *ns, 118 struct ns_common *(*get_ns)(struct ns_common *ns)) 119 { 120 struct ns_common *relative; 121 122 relative = get_ns(ns); 123 if (IS_ERR(relative)) 124 return PTR_ERR(relative); 125 126 return open_namespace(relative); 127 } 128 EXPORT_SYMBOL_GPL(open_related_ns); 129 130 static int copy_ns_info_to_user(const struct mnt_namespace *mnt_ns, 131 struct mnt_ns_info __user *uinfo, size_t usize, 132 struct mnt_ns_info *kinfo) 133 { 134 /* 135 * If userspace and the kernel have the same struct size it can just 136 * be copied. If userspace provides an older struct, only the bits that 137 * userspace knows about will be copied. If userspace provides a new 138 * struct, only the bits that the kernel knows aobut will be copied and 139 * the size value will be set to the size the kernel knows about. 140 */ 141 kinfo->size = min(usize, sizeof(*kinfo)); 142 kinfo->mnt_ns_id = mnt_ns->seq; 143 kinfo->nr_mounts = READ_ONCE(mnt_ns->nr_mounts); 144 /* Subtract the root mount of the mount namespace. */ 145 if (kinfo->nr_mounts) 146 kinfo->nr_mounts--; 147 148 if (copy_to_user(uinfo, kinfo, kinfo->size)) 149 return -EFAULT; 150 151 return 0; 152 } 153 154 static bool nsfs_ioctl_valid(unsigned int cmd) 155 { 156 switch (cmd) { 157 case NS_GET_USERNS: 158 case NS_GET_PARENT: 159 case NS_GET_NSTYPE: 160 case NS_GET_OWNER_UID: 161 case NS_GET_MNTNS_ID: 162 case NS_GET_PID_FROM_PIDNS: 163 case NS_GET_TGID_FROM_PIDNS: 164 case NS_GET_PID_IN_PIDNS: 165 case NS_GET_TGID_IN_PIDNS: 166 return (_IOC_TYPE(cmd) == _IOC_TYPE(cmd)); 167 } 168 169 /* Extensible ioctls require some extra handling. */ 170 switch (_IOC_NR(cmd)) { 171 case _IOC_NR(NS_MNT_GET_INFO): 172 case _IOC_NR(NS_MNT_GET_NEXT): 173 case _IOC_NR(NS_MNT_GET_PREV): 174 return (_IOC_TYPE(cmd) == _IOC_TYPE(cmd)); 175 } 176 177 return false; 178 } 179 180 static long ns_ioctl(struct file *filp, unsigned int ioctl, 181 unsigned long arg) 182 { 183 struct user_namespace *user_ns; 184 struct pid_namespace *pid_ns; 185 struct task_struct *tsk; 186 struct ns_common *ns; 187 struct mnt_namespace *mnt_ns; 188 bool previous = false; 189 uid_t __user *argp; 190 uid_t uid; 191 int ret; 192 193 if (!nsfs_ioctl_valid(ioctl)) 194 return -ENOIOCTLCMD; 195 196 ns = get_proc_ns(file_inode(filp)); 197 switch (ioctl) { 198 case NS_GET_USERNS: 199 return open_related_ns(ns, ns_get_owner); 200 case NS_GET_PARENT: 201 if (!ns->ops->get_parent) 202 return -EINVAL; 203 return open_related_ns(ns, ns->ops->get_parent); 204 case NS_GET_NSTYPE: 205 return ns->ops->type; 206 case NS_GET_OWNER_UID: 207 if (ns->ops->type != CLONE_NEWUSER) 208 return -EINVAL; 209 user_ns = container_of(ns, struct user_namespace, ns); 210 argp = (uid_t __user *) arg; 211 uid = from_kuid_munged(current_user_ns(), user_ns->owner); 212 return put_user(uid, argp); 213 case NS_GET_MNTNS_ID: { 214 __u64 __user *idp; 215 __u64 id; 216 217 if (ns->ops->type != CLONE_NEWNS) 218 return -EINVAL; 219 220 mnt_ns = container_of(ns, struct mnt_namespace, ns); 221 idp = (__u64 __user *)arg; 222 id = mnt_ns->seq; 223 return put_user(id, idp); 224 } 225 case NS_GET_PID_FROM_PIDNS: 226 fallthrough; 227 case NS_GET_TGID_FROM_PIDNS: 228 fallthrough; 229 case NS_GET_PID_IN_PIDNS: 230 fallthrough; 231 case NS_GET_TGID_IN_PIDNS: { 232 if (ns->ops->type != CLONE_NEWPID) 233 return -EINVAL; 234 235 ret = -ESRCH; 236 pid_ns = container_of(ns, struct pid_namespace, ns); 237 238 guard(rcu)(); 239 240 if (ioctl == NS_GET_PID_IN_PIDNS || 241 ioctl == NS_GET_TGID_IN_PIDNS) 242 tsk = find_task_by_vpid(arg); 243 else 244 tsk = find_task_by_pid_ns(arg, pid_ns); 245 if (!tsk) 246 break; 247 248 switch (ioctl) { 249 case NS_GET_PID_FROM_PIDNS: 250 ret = task_pid_vnr(tsk); 251 break; 252 case NS_GET_TGID_FROM_PIDNS: 253 ret = task_tgid_vnr(tsk); 254 break; 255 case NS_GET_PID_IN_PIDNS: 256 ret = task_pid_nr_ns(tsk, pid_ns); 257 break; 258 case NS_GET_TGID_IN_PIDNS: 259 ret = task_tgid_nr_ns(tsk, pid_ns); 260 break; 261 default: 262 ret = 0; 263 break; 264 } 265 266 if (!ret) 267 ret = -ESRCH; 268 return ret; 269 } 270 } 271 272 /* extensible ioctls */ 273 switch (_IOC_NR(ioctl)) { 274 case _IOC_NR(NS_MNT_GET_INFO): { 275 struct mnt_ns_info kinfo = {}; 276 struct mnt_ns_info __user *uinfo = (struct mnt_ns_info __user *)arg; 277 size_t usize = _IOC_SIZE(ioctl); 278 279 if (ns->ops->type != CLONE_NEWNS) 280 return -EINVAL; 281 282 if (!uinfo) 283 return -EINVAL; 284 285 if (usize < MNT_NS_INFO_SIZE_VER0) 286 return -EINVAL; 287 288 return copy_ns_info_to_user(to_mnt_ns(ns), uinfo, usize, &kinfo); 289 } 290 case _IOC_NR(NS_MNT_GET_PREV): 291 previous = true; 292 fallthrough; 293 case _IOC_NR(NS_MNT_GET_NEXT): { 294 struct mnt_ns_info kinfo = {}; 295 struct mnt_ns_info __user *uinfo = (struct mnt_ns_info __user *)arg; 296 struct path path __free(path_put) = {}; 297 struct file *f __free(fput) = NULL; 298 size_t usize = _IOC_SIZE(ioctl); 299 300 if (ns->ops->type != CLONE_NEWNS) 301 return -EINVAL; 302 303 if (usize < MNT_NS_INFO_SIZE_VER0) 304 return -EINVAL; 305 306 mnt_ns = get_sequential_mnt_ns(to_mnt_ns(ns), previous); 307 if (IS_ERR(mnt_ns)) 308 return PTR_ERR(mnt_ns); 309 310 ns = to_ns_common(mnt_ns); 311 /* Transfer ownership of @mnt_ns reference to @path. */ 312 ret = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path); 313 if (ret) 314 return ret; 315 316 CLASS(get_unused_fd, fd)(O_CLOEXEC); 317 if (fd < 0) 318 return fd; 319 320 f = dentry_open(&path, O_RDONLY, current_cred()); 321 if (IS_ERR(f)) 322 return PTR_ERR(f); 323 324 if (uinfo) { 325 /* 326 * If @uinfo is passed return all information about the 327 * mount namespace as well. 328 */ 329 ret = copy_ns_info_to_user(to_mnt_ns(ns), uinfo, usize, &kinfo); 330 if (ret) 331 return ret; 332 } 333 334 /* Transfer reference of @f to caller's fdtable. */ 335 fd_install(fd, no_free_ptr(f)); 336 /* File descriptor is live so hand it off to the caller. */ 337 return take_fd(fd); 338 } 339 default: 340 ret = -ENOTTY; 341 } 342 343 return ret; 344 } 345 346 int ns_get_name(char *buf, size_t size, struct task_struct *task, 347 const struct proc_ns_operations *ns_ops) 348 { 349 struct ns_common *ns; 350 int res = -ENOENT; 351 const char *name; 352 ns = ns_ops->get(task); 353 if (ns) { 354 name = ns_ops->real_ns_name ? : ns_ops->name; 355 res = snprintf(buf, size, "%s:[%u]", name, ns->inum); 356 ns_ops->put(ns); 357 } 358 return res; 359 } 360 361 bool proc_ns_file(const struct file *file) 362 { 363 return file->f_op == &ns_file_operations; 364 } 365 366 /** 367 * ns_match() - Returns true if current namespace matches dev/ino provided. 368 * @ns: current namespace 369 * @dev: dev_t from nsfs that will be matched against current nsfs 370 * @ino: ino_t from nsfs that will be matched against current nsfs 371 * 372 * Return: true if dev and ino matches the current nsfs. 373 */ 374 bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino) 375 { 376 return (ns->inum == ino) && (nsfs_mnt->mnt_sb->s_dev == dev); 377 } 378 379 380 static int nsfs_show_path(struct seq_file *seq, struct dentry *dentry) 381 { 382 struct inode *inode = d_inode(dentry); 383 const struct ns_common *ns = inode->i_private; 384 const struct proc_ns_operations *ns_ops = ns->ops; 385 386 seq_printf(seq, "%s:[%lu]", ns_ops->name, inode->i_ino); 387 return 0; 388 } 389 390 static const struct super_operations nsfs_ops = { 391 .statfs = simple_statfs, 392 .evict_inode = nsfs_evict, 393 .show_path = nsfs_show_path, 394 }; 395 396 static int nsfs_init_inode(struct inode *inode, void *data) 397 { 398 struct ns_common *ns = data; 399 400 inode->i_private = data; 401 inode->i_mode |= S_IRUGO; 402 inode->i_fop = &ns_file_operations; 403 inode->i_ino = ns->inum; 404 return 0; 405 } 406 407 static void nsfs_put_data(void *data) 408 { 409 struct ns_common *ns = data; 410 ns->ops->put(ns); 411 } 412 413 static const struct stashed_operations nsfs_stashed_ops = { 414 .init_inode = nsfs_init_inode, 415 .put_data = nsfs_put_data, 416 }; 417 418 static int nsfs_init_fs_context(struct fs_context *fc) 419 { 420 struct pseudo_fs_context *ctx = init_pseudo(fc, NSFS_MAGIC); 421 if (!ctx) 422 return -ENOMEM; 423 ctx->ops = &nsfs_ops; 424 ctx->dops = &ns_dentry_operations; 425 fc->s_fs_info = (void *)&nsfs_stashed_ops; 426 return 0; 427 } 428 429 static struct file_system_type nsfs = { 430 .name = "nsfs", 431 .init_fs_context = nsfs_init_fs_context, 432 .kill_sb = kill_anon_super, 433 }; 434 435 void __init nsfs_init(void) 436 { 437 nsfs_mnt = kern_mount(&nsfs); 438 if (IS_ERR(nsfs_mnt)) 439 panic("can't set nsfs up\n"); 440 nsfs_mnt->mnt_sb->s_flags &= ~SB_NOUSER; 441 } 442