xref: /linux/kernel/user_namespace.c (revision 7ebdfaa52d15b947503f76474477f92854796d96)
1acce292cSCedric Le Goater /*
2acce292cSCedric Le Goater  *  This program is free software; you can redistribute it and/or
3acce292cSCedric Le Goater  *  modify it under the terms of the GNU General Public License as
4acce292cSCedric Le Goater  *  published by the Free Software Foundation, version 2 of the
5acce292cSCedric Le Goater  *  License.
6acce292cSCedric Le Goater  */
7acce292cSCedric Le Goater 
89984de1aSPaul Gortmaker #include <linux/export.h>
9acce292cSCedric Le Goater #include <linux/nsproxy.h>
101aeb272cSRobert P. J. Day #include <linux/slab.h>
11acce292cSCedric Le Goater #include <linux/user_namespace.h>
120bb80f24SDavid Howells #include <linux/proc_ns.h>
135c1469deSEric W. Biederman #include <linux/highuid.h>
1418b6e041SSerge Hallyn #include <linux/cred.h>
15973c5914SEric W. Biederman #include <linux/securebits.h>
1622d917d8SEric W. Biederman #include <linux/keyctl.h>
1722d917d8SEric W. Biederman #include <linux/key-type.h>
1822d917d8SEric W. Biederman #include <keys/user-type.h>
1922d917d8SEric W. Biederman #include <linux/seq_file.h>
2022d917d8SEric W. Biederman #include <linux/fs.h>
2122d917d8SEric W. Biederman #include <linux/uaccess.h>
2222d917d8SEric W. Biederman #include <linux/ctype.h>
23f76d207aSEric W. Biederman #include <linux/projid.h>
24e66eded8SEric W. Biederman #include <linux/fs_struct.h>
25acce292cSCedric Le Goater 
266164281aSPavel Emelyanov static struct kmem_cache *user_ns_cachep __read_mostly;
27f0d62aecSEric W. Biederman static DEFINE_MUTEX(userns_state_mutex);
286164281aSPavel Emelyanov 
296708075fSEric W. Biederman static bool new_idmap_permitted(const struct file *file,
306708075fSEric W. Biederman 				struct user_namespace *ns, int cap_setid,
3122d917d8SEric W. Biederman 				struct uid_gid_map *map);
3222d917d8SEric W. Biederman 
33cde1975bSEric W. Biederman static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
34cde1975bSEric W. Biederman {
35cde1975bSEric W. Biederman 	/* Start with the same capabilities as init but useless for doing
36cde1975bSEric W. Biederman 	 * anything as the capabilities are bound to the new user namespace.
37cde1975bSEric W. Biederman 	 */
38cde1975bSEric W. Biederman 	cred->securebits = SECUREBITS_DEFAULT;
39cde1975bSEric W. Biederman 	cred->cap_inheritable = CAP_EMPTY_SET;
40cde1975bSEric W. Biederman 	cred->cap_permitted = CAP_FULL_SET;
41cde1975bSEric W. Biederman 	cred->cap_effective = CAP_FULL_SET;
42cde1975bSEric W. Biederman 	cred->cap_bset = CAP_FULL_SET;
43cde1975bSEric W. Biederman #ifdef CONFIG_KEYS
44cde1975bSEric W. Biederman 	key_put(cred->request_key_auth);
45cde1975bSEric W. Biederman 	cred->request_key_auth = NULL;
46cde1975bSEric W. Biederman #endif
47cde1975bSEric W. Biederman 	/* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
48cde1975bSEric W. Biederman 	cred->user_ns = user_ns;
49cde1975bSEric W. Biederman }
50cde1975bSEric W. Biederman 
5177ec739dSSerge E. Hallyn /*
5218b6e041SSerge Hallyn  * Create a new user namespace, deriving the creator from the user in the
5318b6e041SSerge Hallyn  * passed credentials, and replacing that user with the new root user for the
5418b6e041SSerge Hallyn  * new namespace.
5518b6e041SSerge Hallyn  *
5618b6e041SSerge Hallyn  * This is called by copy_creds(), which will finish setting the target task's
5718b6e041SSerge Hallyn  * credentials.
5877ec739dSSerge E. Hallyn  */
5918b6e041SSerge Hallyn int create_user_ns(struct cred *new)
6077ec739dSSerge E. Hallyn {
610093ccb6SEric W. Biederman 	struct user_namespace *ns, *parent_ns = new->user_ns;
62078de5f7SEric W. Biederman 	kuid_t owner = new->euid;
63078de5f7SEric W. Biederman 	kgid_t group = new->egid;
6498f842e6SEric W. Biederman 	int ret;
65783291e6SEric W. Biederman 
668742f229SOleg Nesterov 	if (parent_ns->level > 32)
678742f229SOleg Nesterov 		return -EUSERS;
688742f229SOleg Nesterov 
693151527eSEric W. Biederman 	/*
703151527eSEric W. Biederman 	 * Verify that we can not violate the policy of which files
713151527eSEric W. Biederman 	 * may be accessed that is specified by the root directory,
723151527eSEric W. Biederman 	 * by verifing that the root directory is at the root of the
733151527eSEric W. Biederman 	 * mount namespace which allows all files to be accessed.
743151527eSEric W. Biederman 	 */
753151527eSEric W. Biederman 	if (current_chrooted())
763151527eSEric W. Biederman 		return -EPERM;
773151527eSEric W. Biederman 
78783291e6SEric W. Biederman 	/* The creator needs a mapping in the parent user namespace
79783291e6SEric W. Biederman 	 * or else we won't be able to reasonably tell userspace who
80783291e6SEric W. Biederman 	 * created a user_namespace.
81783291e6SEric W. Biederman 	 */
82783291e6SEric W. Biederman 	if (!kuid_has_mapping(parent_ns, owner) ||
83783291e6SEric W. Biederman 	    !kgid_has_mapping(parent_ns, group))
84783291e6SEric W. Biederman 		return -EPERM;
8577ec739dSSerge E. Hallyn 
8622d917d8SEric W. Biederman 	ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
8777ec739dSSerge E. Hallyn 	if (!ns)
8818b6e041SSerge Hallyn 		return -ENOMEM;
8977ec739dSSerge E. Hallyn 
906344c433SAl Viro 	ret = ns_alloc_inum(&ns->ns);
9198f842e6SEric W. Biederman 	if (ret) {
9298f842e6SEric W. Biederman 		kmem_cache_free(user_ns_cachep, ns);
9398f842e6SEric W. Biederman 		return ret;
9498f842e6SEric W. Biederman 	}
9533c42940SAl Viro 	ns->ns.ops = &userns_operations;
9698f842e6SEric W. Biederman 
97c61a2810SEric W. Biederman 	atomic_set(&ns->count, 1);
98cde1975bSEric W. Biederman 	/* Leave the new->user_ns reference with the new user namespace. */
99aeb3ae9dSEric W. Biederman 	ns->parent = parent_ns;
1008742f229SOleg Nesterov 	ns->level = parent_ns->level + 1;
101783291e6SEric W. Biederman 	ns->owner = owner;
102783291e6SEric W. Biederman 	ns->group = group;
10322d917d8SEric W. Biederman 
1049cc46516SEric W. Biederman 	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
1059cc46516SEric W. Biederman 	mutex_lock(&userns_state_mutex);
1069cc46516SEric W. Biederman 	ns->flags = parent_ns->flags;
1079cc46516SEric W. Biederman 	mutex_unlock(&userns_state_mutex);
1089cc46516SEric W. Biederman 
109cde1975bSEric W. Biederman 	set_cred_user_ns(new, ns);
1100093ccb6SEric W. Biederman 
111f36f8c75SDavid Howells #ifdef CONFIG_PERSISTENT_KEYRINGS
112f36f8c75SDavid Howells 	init_rwsem(&ns->persistent_keyring_register_sem);
113f36f8c75SDavid Howells #endif
11418b6e041SSerge Hallyn 	return 0;
115acce292cSCedric Le Goater }
116acce292cSCedric Le Goater 
117b2e0d987SEric W. Biederman int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
118b2e0d987SEric W. Biederman {
119b2e0d987SEric W. Biederman 	struct cred *cred;
1206160968cSOleg Nesterov 	int err = -ENOMEM;
121b2e0d987SEric W. Biederman 
122b2e0d987SEric W. Biederman 	if (!(unshare_flags & CLONE_NEWUSER))
123b2e0d987SEric W. Biederman 		return 0;
124b2e0d987SEric W. Biederman 
125b2e0d987SEric W. Biederman 	cred = prepare_creds();
1266160968cSOleg Nesterov 	if (cred) {
1276160968cSOleg Nesterov 		err = create_user_ns(cred);
1286160968cSOleg Nesterov 		if (err)
1296160968cSOleg Nesterov 			put_cred(cred);
1306160968cSOleg Nesterov 		else
131b2e0d987SEric W. Biederman 			*new_cred = cred;
1326160968cSOleg Nesterov 	}
1336160968cSOleg Nesterov 
1346160968cSOleg Nesterov 	return err;
135b2e0d987SEric W. Biederman }
136b2e0d987SEric W. Biederman 
137c61a2810SEric W. Biederman void free_user_ns(struct user_namespace *ns)
13851708366SDavid Howells {
139c61a2810SEric W. Biederman 	struct user_namespace *parent;
14051708366SDavid Howells 
141c61a2810SEric W. Biederman 	do {
142783291e6SEric W. Biederman 		parent = ns->parent;
143f36f8c75SDavid Howells #ifdef CONFIG_PERSISTENT_KEYRINGS
144f36f8c75SDavid Howells 		key_put(ns->persistent_keyring_register);
145f36f8c75SDavid Howells #endif
1466344c433SAl Viro 		ns_free_inum(&ns->ns);
147783291e6SEric W. Biederman 		kmem_cache_free(user_ns_cachep, ns);
148c61a2810SEric W. Biederman 		ns = parent;
149c61a2810SEric W. Biederman 	} while (atomic_dec_and_test(&parent->count));
15051708366SDavid Howells }
1516a3fd92eSMichael Halcrow EXPORT_SYMBOL(free_user_ns);
1525c1469deSEric W. Biederman 
15322d917d8SEric W. Biederman static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
1545c1469deSEric W. Biederman {
15522d917d8SEric W. Biederman 	unsigned idx, extents;
15622d917d8SEric W. Biederman 	u32 first, last, id2;
1575c1469deSEric W. Biederman 
15822d917d8SEric W. Biederman 	id2 = id + count - 1;
15922d917d8SEric W. Biederman 
16022d917d8SEric W. Biederman 	/* Find the matching extent */
16122d917d8SEric W. Biederman 	extents = map->nr_extents;
162e79323bdSMikulas Patocka 	smp_rmb();
16322d917d8SEric W. Biederman 	for (idx = 0; idx < extents; idx++) {
16422d917d8SEric W. Biederman 		first = map->extent[idx].first;
16522d917d8SEric W. Biederman 		last = first + map->extent[idx].count - 1;
16622d917d8SEric W. Biederman 		if (id >= first && id <= last &&
16722d917d8SEric W. Biederman 		    (id2 >= first && id2 <= last))
16822d917d8SEric W. Biederman 			break;
16922d917d8SEric W. Biederman 	}
17022d917d8SEric W. Biederman 	/* Map the id or note failure */
17122d917d8SEric W. Biederman 	if (idx < extents)
17222d917d8SEric W. Biederman 		id = (id - first) + map->extent[idx].lower_first;
17322d917d8SEric W. Biederman 	else
17422d917d8SEric W. Biederman 		id = (u32) -1;
17522d917d8SEric W. Biederman 
17622d917d8SEric W. Biederman 	return id;
17722d917d8SEric W. Biederman }
17822d917d8SEric W. Biederman 
17922d917d8SEric W. Biederman static u32 map_id_down(struct uid_gid_map *map, u32 id)
18022d917d8SEric W. Biederman {
18122d917d8SEric W. Biederman 	unsigned idx, extents;
18222d917d8SEric W. Biederman 	u32 first, last;
18322d917d8SEric W. Biederman 
18422d917d8SEric W. Biederman 	/* Find the matching extent */
18522d917d8SEric W. Biederman 	extents = map->nr_extents;
186e79323bdSMikulas Patocka 	smp_rmb();
18722d917d8SEric W. Biederman 	for (idx = 0; idx < extents; idx++) {
18822d917d8SEric W. Biederman 		first = map->extent[idx].first;
18922d917d8SEric W. Biederman 		last = first + map->extent[idx].count - 1;
19022d917d8SEric W. Biederman 		if (id >= first && id <= last)
19122d917d8SEric W. Biederman 			break;
19222d917d8SEric W. Biederman 	}
19322d917d8SEric W. Biederman 	/* Map the id or note failure */
19422d917d8SEric W. Biederman 	if (idx < extents)
19522d917d8SEric W. Biederman 		id = (id - first) + map->extent[idx].lower_first;
19622d917d8SEric W. Biederman 	else
19722d917d8SEric W. Biederman 		id = (u32) -1;
19822d917d8SEric W. Biederman 
19922d917d8SEric W. Biederman 	return id;
20022d917d8SEric W. Biederman }
20122d917d8SEric W. Biederman 
20222d917d8SEric W. Biederman static u32 map_id_up(struct uid_gid_map *map, u32 id)
20322d917d8SEric W. Biederman {
20422d917d8SEric W. Biederman 	unsigned idx, extents;
20522d917d8SEric W. Biederman 	u32 first, last;
20622d917d8SEric W. Biederman 
20722d917d8SEric W. Biederman 	/* Find the matching extent */
20822d917d8SEric W. Biederman 	extents = map->nr_extents;
209e79323bdSMikulas Patocka 	smp_rmb();
21022d917d8SEric W. Biederman 	for (idx = 0; idx < extents; idx++) {
21122d917d8SEric W. Biederman 		first = map->extent[idx].lower_first;
21222d917d8SEric W. Biederman 		last = first + map->extent[idx].count - 1;
21322d917d8SEric W. Biederman 		if (id >= first && id <= last)
21422d917d8SEric W. Biederman 			break;
21522d917d8SEric W. Biederman 	}
21622d917d8SEric W. Biederman 	/* Map the id or note failure */
21722d917d8SEric W. Biederman 	if (idx < extents)
21822d917d8SEric W. Biederman 		id = (id - first) + map->extent[idx].first;
21922d917d8SEric W. Biederman 	else
22022d917d8SEric W. Biederman 		id = (u32) -1;
22122d917d8SEric W. Biederman 
22222d917d8SEric W. Biederman 	return id;
22322d917d8SEric W. Biederman }
22422d917d8SEric W. Biederman 
22522d917d8SEric W. Biederman /**
22622d917d8SEric W. Biederman  *	make_kuid - Map a user-namespace uid pair into a kuid.
22722d917d8SEric W. Biederman  *	@ns:  User namespace that the uid is in
22822d917d8SEric W. Biederman  *	@uid: User identifier
22922d917d8SEric W. Biederman  *
23022d917d8SEric W. Biederman  *	Maps a user-namespace uid pair into a kernel internal kuid,
23122d917d8SEric W. Biederman  *	and returns that kuid.
23222d917d8SEric W. Biederman  *
23322d917d8SEric W. Biederman  *	When there is no mapping defined for the user-namespace uid
23422d917d8SEric W. Biederman  *	pair INVALID_UID is returned.  Callers are expected to test
235b080e047SBrian Campbell  *	for and handle INVALID_UID being returned.  INVALID_UID
23622d917d8SEric W. Biederman  *	may be tested for using uid_valid().
23722d917d8SEric W. Biederman  */
23822d917d8SEric W. Biederman kuid_t make_kuid(struct user_namespace *ns, uid_t uid)
23922d917d8SEric W. Biederman {
24022d917d8SEric W. Biederman 	/* Map the uid to a global kernel uid */
24122d917d8SEric W. Biederman 	return KUIDT_INIT(map_id_down(&ns->uid_map, uid));
24222d917d8SEric W. Biederman }
24322d917d8SEric W. Biederman EXPORT_SYMBOL(make_kuid);
24422d917d8SEric W. Biederman 
24522d917d8SEric W. Biederman /**
24622d917d8SEric W. Biederman  *	from_kuid - Create a uid from a kuid user-namespace pair.
24722d917d8SEric W. Biederman  *	@targ: The user namespace we want a uid in.
24822d917d8SEric W. Biederman  *	@kuid: The kernel internal uid to start with.
24922d917d8SEric W. Biederman  *
25022d917d8SEric W. Biederman  *	Map @kuid into the user-namespace specified by @targ and
25122d917d8SEric W. Biederman  *	return the resulting uid.
25222d917d8SEric W. Biederman  *
25322d917d8SEric W. Biederman  *	There is always a mapping into the initial user_namespace.
25422d917d8SEric W. Biederman  *
25522d917d8SEric W. Biederman  *	If @kuid has no mapping in @targ (uid_t)-1 is returned.
25622d917d8SEric W. Biederman  */
25722d917d8SEric W. Biederman uid_t from_kuid(struct user_namespace *targ, kuid_t kuid)
25822d917d8SEric W. Biederman {
25922d917d8SEric W. Biederman 	/* Map the uid from a global kernel uid */
26022d917d8SEric W. Biederman 	return map_id_up(&targ->uid_map, __kuid_val(kuid));
26122d917d8SEric W. Biederman }
26222d917d8SEric W. Biederman EXPORT_SYMBOL(from_kuid);
26322d917d8SEric W. Biederman 
26422d917d8SEric W. Biederman /**
26522d917d8SEric W. Biederman  *	from_kuid_munged - Create a uid from a kuid user-namespace pair.
26622d917d8SEric W. Biederman  *	@targ: The user namespace we want a uid in.
26722d917d8SEric W. Biederman  *	@kuid: The kernel internal uid to start with.
26822d917d8SEric W. Biederman  *
26922d917d8SEric W. Biederman  *	Map @kuid into the user-namespace specified by @targ and
27022d917d8SEric W. Biederman  *	return the resulting uid.
27122d917d8SEric W. Biederman  *
27222d917d8SEric W. Biederman  *	There is always a mapping into the initial user_namespace.
27322d917d8SEric W. Biederman  *
27422d917d8SEric W. Biederman  *	Unlike from_kuid from_kuid_munged never fails and always
27522d917d8SEric W. Biederman  *	returns a valid uid.  This makes from_kuid_munged appropriate
27622d917d8SEric W. Biederman  *	for use in syscalls like stat and getuid where failing the
27722d917d8SEric W. Biederman  *	system call and failing to provide a valid uid are not an
27822d917d8SEric W. Biederman  *	options.
27922d917d8SEric W. Biederman  *
28022d917d8SEric W. Biederman  *	If @kuid has no mapping in @targ overflowuid is returned.
28122d917d8SEric W. Biederman  */
28222d917d8SEric W. Biederman uid_t from_kuid_munged(struct user_namespace *targ, kuid_t kuid)
28322d917d8SEric W. Biederman {
28422d917d8SEric W. Biederman 	uid_t uid;
28522d917d8SEric W. Biederman 	uid = from_kuid(targ, kuid);
28622d917d8SEric W. Biederman 
28722d917d8SEric W. Biederman 	if (uid == (uid_t) -1)
28822d917d8SEric W. Biederman 		uid = overflowuid;
2895c1469deSEric W. Biederman 	return uid;
29022d917d8SEric W. Biederman }
29122d917d8SEric W. Biederman EXPORT_SYMBOL(from_kuid_munged);
2925c1469deSEric W. Biederman 
29322d917d8SEric W. Biederman /**
29422d917d8SEric W. Biederman  *	make_kgid - Map a user-namespace gid pair into a kgid.
29522d917d8SEric W. Biederman  *	@ns:  User namespace that the gid is in
29668a9a435SFabian Frederick  *	@gid: group identifier
29722d917d8SEric W. Biederman  *
29822d917d8SEric W. Biederman  *	Maps a user-namespace gid pair into a kernel internal kgid,
29922d917d8SEric W. Biederman  *	and returns that kgid.
30022d917d8SEric W. Biederman  *
30122d917d8SEric W. Biederman  *	When there is no mapping defined for the user-namespace gid
30222d917d8SEric W. Biederman  *	pair INVALID_GID is returned.  Callers are expected to test
30322d917d8SEric W. Biederman  *	for and handle INVALID_GID being returned.  INVALID_GID may be
30422d917d8SEric W. Biederman  *	tested for using gid_valid().
3055c1469deSEric W. Biederman  */
30622d917d8SEric W. Biederman kgid_t make_kgid(struct user_namespace *ns, gid_t gid)
3075c1469deSEric W. Biederman {
30822d917d8SEric W. Biederman 	/* Map the gid to a global kernel gid */
30922d917d8SEric W. Biederman 	return KGIDT_INIT(map_id_down(&ns->gid_map, gid));
31022d917d8SEric W. Biederman }
31122d917d8SEric W. Biederman EXPORT_SYMBOL(make_kgid);
3125c1469deSEric W. Biederman 
31322d917d8SEric W. Biederman /**
31422d917d8SEric W. Biederman  *	from_kgid - Create a gid from a kgid user-namespace pair.
31522d917d8SEric W. Biederman  *	@targ: The user namespace we want a gid in.
31622d917d8SEric W. Biederman  *	@kgid: The kernel internal gid to start with.
31722d917d8SEric W. Biederman  *
31822d917d8SEric W. Biederman  *	Map @kgid into the user-namespace specified by @targ and
31922d917d8SEric W. Biederman  *	return the resulting gid.
32022d917d8SEric W. Biederman  *
32122d917d8SEric W. Biederman  *	There is always a mapping into the initial user_namespace.
32222d917d8SEric W. Biederman  *
32322d917d8SEric W. Biederman  *	If @kgid has no mapping in @targ (gid_t)-1 is returned.
3245c1469deSEric W. Biederman  */
32522d917d8SEric W. Biederman gid_t from_kgid(struct user_namespace *targ, kgid_t kgid)
32622d917d8SEric W. Biederman {
32722d917d8SEric W. Biederman 	/* Map the gid from a global kernel gid */
32822d917d8SEric W. Biederman 	return map_id_up(&targ->gid_map, __kgid_val(kgid));
3295c1469deSEric W. Biederman }
33022d917d8SEric W. Biederman EXPORT_SYMBOL(from_kgid);
33122d917d8SEric W. Biederman 
33222d917d8SEric W. Biederman /**
33322d917d8SEric W. Biederman  *	from_kgid_munged - Create a gid from a kgid user-namespace pair.
33422d917d8SEric W. Biederman  *	@targ: The user namespace we want a gid in.
33522d917d8SEric W. Biederman  *	@kgid: The kernel internal gid to start with.
33622d917d8SEric W. Biederman  *
33722d917d8SEric W. Biederman  *	Map @kgid into the user-namespace specified by @targ and
33822d917d8SEric W. Biederman  *	return the resulting gid.
33922d917d8SEric W. Biederman  *
34022d917d8SEric W. Biederman  *	There is always a mapping into the initial user_namespace.
34122d917d8SEric W. Biederman  *
34222d917d8SEric W. Biederman  *	Unlike from_kgid from_kgid_munged never fails and always
34322d917d8SEric W. Biederman  *	returns a valid gid.  This makes from_kgid_munged appropriate
34422d917d8SEric W. Biederman  *	for use in syscalls like stat and getgid where failing the
34522d917d8SEric W. Biederman  *	system call and failing to provide a valid gid are not options.
34622d917d8SEric W. Biederman  *
34722d917d8SEric W. Biederman  *	If @kgid has no mapping in @targ overflowgid is returned.
34822d917d8SEric W. Biederman  */
34922d917d8SEric W. Biederman gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid)
35022d917d8SEric W. Biederman {
35122d917d8SEric W. Biederman 	gid_t gid;
35222d917d8SEric W. Biederman 	gid = from_kgid(targ, kgid);
35322d917d8SEric W. Biederman 
35422d917d8SEric W. Biederman 	if (gid == (gid_t) -1)
35522d917d8SEric W. Biederman 		gid = overflowgid;
35622d917d8SEric W. Biederman 	return gid;
35722d917d8SEric W. Biederman }
35822d917d8SEric W. Biederman EXPORT_SYMBOL(from_kgid_munged);
35922d917d8SEric W. Biederman 
360f76d207aSEric W. Biederman /**
361f76d207aSEric W. Biederman  *	make_kprojid - Map a user-namespace projid pair into a kprojid.
362f76d207aSEric W. Biederman  *	@ns:  User namespace that the projid is in
363f76d207aSEric W. Biederman  *	@projid: Project identifier
364f76d207aSEric W. Biederman  *
365f76d207aSEric W. Biederman  *	Maps a user-namespace uid pair into a kernel internal kuid,
366f76d207aSEric W. Biederman  *	and returns that kuid.
367f76d207aSEric W. Biederman  *
368f76d207aSEric W. Biederman  *	When there is no mapping defined for the user-namespace projid
369f76d207aSEric W. Biederman  *	pair INVALID_PROJID is returned.  Callers are expected to test
370f76d207aSEric W. Biederman  *	for and handle handle INVALID_PROJID being returned.  INVALID_PROJID
371f76d207aSEric W. Biederman  *	may be tested for using projid_valid().
372f76d207aSEric W. Biederman  */
373f76d207aSEric W. Biederman kprojid_t make_kprojid(struct user_namespace *ns, projid_t projid)
374f76d207aSEric W. Biederman {
375f76d207aSEric W. Biederman 	/* Map the uid to a global kernel uid */
376f76d207aSEric W. Biederman 	return KPROJIDT_INIT(map_id_down(&ns->projid_map, projid));
377f76d207aSEric W. Biederman }
378f76d207aSEric W. Biederman EXPORT_SYMBOL(make_kprojid);
379f76d207aSEric W. Biederman 
380f76d207aSEric W. Biederman /**
381f76d207aSEric W. Biederman  *	from_kprojid - Create a projid from a kprojid user-namespace pair.
382f76d207aSEric W. Biederman  *	@targ: The user namespace we want a projid in.
383f76d207aSEric W. Biederman  *	@kprojid: The kernel internal project identifier to start with.
384f76d207aSEric W. Biederman  *
385f76d207aSEric W. Biederman  *	Map @kprojid into the user-namespace specified by @targ and
386f76d207aSEric W. Biederman  *	return the resulting projid.
387f76d207aSEric W. Biederman  *
388f76d207aSEric W. Biederman  *	There is always a mapping into the initial user_namespace.
389f76d207aSEric W. Biederman  *
390f76d207aSEric W. Biederman  *	If @kprojid has no mapping in @targ (projid_t)-1 is returned.
391f76d207aSEric W. Biederman  */
392f76d207aSEric W. Biederman projid_t from_kprojid(struct user_namespace *targ, kprojid_t kprojid)
393f76d207aSEric W. Biederman {
394f76d207aSEric W. Biederman 	/* Map the uid from a global kernel uid */
395f76d207aSEric W. Biederman 	return map_id_up(&targ->projid_map, __kprojid_val(kprojid));
396f76d207aSEric W. Biederman }
397f76d207aSEric W. Biederman EXPORT_SYMBOL(from_kprojid);
398f76d207aSEric W. Biederman 
399f76d207aSEric W. Biederman /**
400f76d207aSEric W. Biederman  *	from_kprojid_munged - Create a projiid from a kprojid user-namespace pair.
401f76d207aSEric W. Biederman  *	@targ: The user namespace we want a projid in.
402f76d207aSEric W. Biederman  *	@kprojid: The kernel internal projid to start with.
403f76d207aSEric W. Biederman  *
404f76d207aSEric W. Biederman  *	Map @kprojid into the user-namespace specified by @targ and
405f76d207aSEric W. Biederman  *	return the resulting projid.
406f76d207aSEric W. Biederman  *
407f76d207aSEric W. Biederman  *	There is always a mapping into the initial user_namespace.
408f76d207aSEric W. Biederman  *
409f76d207aSEric W. Biederman  *	Unlike from_kprojid from_kprojid_munged never fails and always
410f76d207aSEric W. Biederman  *	returns a valid projid.  This makes from_kprojid_munged
411f76d207aSEric W. Biederman  *	appropriate for use in syscalls like stat and where
412f76d207aSEric W. Biederman  *	failing the system call and failing to provide a valid projid are
413f76d207aSEric W. Biederman  *	not an options.
414f76d207aSEric W. Biederman  *
415f76d207aSEric W. Biederman  *	If @kprojid has no mapping in @targ OVERFLOW_PROJID is returned.
416f76d207aSEric W. Biederman  */
417f76d207aSEric W. Biederman projid_t from_kprojid_munged(struct user_namespace *targ, kprojid_t kprojid)
418f76d207aSEric W. Biederman {
419f76d207aSEric W. Biederman 	projid_t projid;
420f76d207aSEric W. Biederman 	projid = from_kprojid(targ, kprojid);
421f76d207aSEric W. Biederman 
422f76d207aSEric W. Biederman 	if (projid == (projid_t) -1)
423f76d207aSEric W. Biederman 		projid = OVERFLOW_PROJID;
424f76d207aSEric W. Biederman 	return projid;
425f76d207aSEric W. Biederman }
426f76d207aSEric W. Biederman EXPORT_SYMBOL(from_kprojid_munged);
427f76d207aSEric W. Biederman 
428f76d207aSEric W. Biederman 
42922d917d8SEric W. Biederman static int uid_m_show(struct seq_file *seq, void *v)
43022d917d8SEric W. Biederman {
43122d917d8SEric W. Biederman 	struct user_namespace *ns = seq->private;
43222d917d8SEric W. Biederman 	struct uid_gid_extent *extent = v;
43322d917d8SEric W. Biederman 	struct user_namespace *lower_ns;
43422d917d8SEric W. Biederman 	uid_t lower;
43522d917d8SEric W. Biederman 
436c450f371SEric W. Biederman 	lower_ns = seq_user_ns(seq);
43722d917d8SEric W. Biederman 	if ((lower_ns == ns) && lower_ns->parent)
43822d917d8SEric W. Biederman 		lower_ns = lower_ns->parent;
43922d917d8SEric W. Biederman 
44022d917d8SEric W. Biederman 	lower = from_kuid(lower_ns, KUIDT_INIT(extent->lower_first));
44122d917d8SEric W. Biederman 
44222d917d8SEric W. Biederman 	seq_printf(seq, "%10u %10u %10u\n",
44322d917d8SEric W. Biederman 		extent->first,
44422d917d8SEric W. Biederman 		lower,
44522d917d8SEric W. Biederman 		extent->count);
44622d917d8SEric W. Biederman 
44722d917d8SEric W. Biederman 	return 0;
4485c1469deSEric W. Biederman }
4495c1469deSEric W. Biederman 
45022d917d8SEric W. Biederman static int gid_m_show(struct seq_file *seq, void *v)
45122d917d8SEric W. Biederman {
45222d917d8SEric W. Biederman 	struct user_namespace *ns = seq->private;
45322d917d8SEric W. Biederman 	struct uid_gid_extent *extent = v;
45422d917d8SEric W. Biederman 	struct user_namespace *lower_ns;
45522d917d8SEric W. Biederman 	gid_t lower;
45622d917d8SEric W. Biederman 
457c450f371SEric W. Biederman 	lower_ns = seq_user_ns(seq);
45822d917d8SEric W. Biederman 	if ((lower_ns == ns) && lower_ns->parent)
45922d917d8SEric W. Biederman 		lower_ns = lower_ns->parent;
46022d917d8SEric W. Biederman 
46122d917d8SEric W. Biederman 	lower = from_kgid(lower_ns, KGIDT_INIT(extent->lower_first));
46222d917d8SEric W. Biederman 
46322d917d8SEric W. Biederman 	seq_printf(seq, "%10u %10u %10u\n",
46422d917d8SEric W. Biederman 		extent->first,
46522d917d8SEric W. Biederman 		lower,
46622d917d8SEric W. Biederman 		extent->count);
46722d917d8SEric W. Biederman 
46822d917d8SEric W. Biederman 	return 0;
46922d917d8SEric W. Biederman }
47022d917d8SEric W. Biederman 
471f76d207aSEric W. Biederman static int projid_m_show(struct seq_file *seq, void *v)
472f76d207aSEric W. Biederman {
473f76d207aSEric W. Biederman 	struct user_namespace *ns = seq->private;
474f76d207aSEric W. Biederman 	struct uid_gid_extent *extent = v;
475f76d207aSEric W. Biederman 	struct user_namespace *lower_ns;
476f76d207aSEric W. Biederman 	projid_t lower;
477f76d207aSEric W. Biederman 
478f76d207aSEric W. Biederman 	lower_ns = seq_user_ns(seq);
479f76d207aSEric W. Biederman 	if ((lower_ns == ns) && lower_ns->parent)
480f76d207aSEric W. Biederman 		lower_ns = lower_ns->parent;
481f76d207aSEric W. Biederman 
482f76d207aSEric W. Biederman 	lower = from_kprojid(lower_ns, KPROJIDT_INIT(extent->lower_first));
483f76d207aSEric W. Biederman 
484f76d207aSEric W. Biederman 	seq_printf(seq, "%10u %10u %10u\n",
485f76d207aSEric W. Biederman 		extent->first,
486f76d207aSEric W. Biederman 		lower,
487f76d207aSEric W. Biederman 		extent->count);
488f76d207aSEric W. Biederman 
489f76d207aSEric W. Biederman 	return 0;
490f76d207aSEric W. Biederman }
491f76d207aSEric W. Biederman 
49268a9a435SFabian Frederick static void *m_start(struct seq_file *seq, loff_t *ppos,
49368a9a435SFabian Frederick 		     struct uid_gid_map *map)
49422d917d8SEric W. Biederman {
49522d917d8SEric W. Biederman 	struct uid_gid_extent *extent = NULL;
49622d917d8SEric W. Biederman 	loff_t pos = *ppos;
49722d917d8SEric W. Biederman 
49822d917d8SEric W. Biederman 	if (pos < map->nr_extents)
49922d917d8SEric W. Biederman 		extent = &map->extent[pos];
50022d917d8SEric W. Biederman 
50122d917d8SEric W. Biederman 	return extent;
50222d917d8SEric W. Biederman }
50322d917d8SEric W. Biederman 
50422d917d8SEric W. Biederman static void *uid_m_start(struct seq_file *seq, loff_t *ppos)
50522d917d8SEric W. Biederman {
50622d917d8SEric W. Biederman 	struct user_namespace *ns = seq->private;
50722d917d8SEric W. Biederman 
50822d917d8SEric W. Biederman 	return m_start(seq, ppos, &ns->uid_map);
50922d917d8SEric W. Biederman }
51022d917d8SEric W. Biederman 
51122d917d8SEric W. Biederman static void *gid_m_start(struct seq_file *seq, loff_t *ppos)
51222d917d8SEric W. Biederman {
51322d917d8SEric W. Biederman 	struct user_namespace *ns = seq->private;
51422d917d8SEric W. Biederman 
51522d917d8SEric W. Biederman 	return m_start(seq, ppos, &ns->gid_map);
51622d917d8SEric W. Biederman }
51722d917d8SEric W. Biederman 
518f76d207aSEric W. Biederman static void *projid_m_start(struct seq_file *seq, loff_t *ppos)
519f76d207aSEric W. Biederman {
520f76d207aSEric W. Biederman 	struct user_namespace *ns = seq->private;
521f76d207aSEric W. Biederman 
522f76d207aSEric W. Biederman 	return m_start(seq, ppos, &ns->projid_map);
523f76d207aSEric W. Biederman }
524f76d207aSEric W. Biederman 
52522d917d8SEric W. Biederman static void *m_next(struct seq_file *seq, void *v, loff_t *pos)
52622d917d8SEric W. Biederman {
52722d917d8SEric W. Biederman 	(*pos)++;
52822d917d8SEric W. Biederman 	return seq->op->start(seq, pos);
52922d917d8SEric W. Biederman }
53022d917d8SEric W. Biederman 
53122d917d8SEric W. Biederman static void m_stop(struct seq_file *seq, void *v)
53222d917d8SEric W. Biederman {
53322d917d8SEric W. Biederman 	return;
53422d917d8SEric W. Biederman }
53522d917d8SEric W. Biederman 
536ccf94f1bSFabian Frederick const struct seq_operations proc_uid_seq_operations = {
53722d917d8SEric W. Biederman 	.start = uid_m_start,
53822d917d8SEric W. Biederman 	.stop = m_stop,
53922d917d8SEric W. Biederman 	.next = m_next,
54022d917d8SEric W. Biederman 	.show = uid_m_show,
54122d917d8SEric W. Biederman };
54222d917d8SEric W. Biederman 
543ccf94f1bSFabian Frederick const struct seq_operations proc_gid_seq_operations = {
54422d917d8SEric W. Biederman 	.start = gid_m_start,
54522d917d8SEric W. Biederman 	.stop = m_stop,
54622d917d8SEric W. Biederman 	.next = m_next,
54722d917d8SEric W. Biederman 	.show = gid_m_show,
54822d917d8SEric W. Biederman };
54922d917d8SEric W. Biederman 
550ccf94f1bSFabian Frederick const struct seq_operations proc_projid_seq_operations = {
551f76d207aSEric W. Biederman 	.start = projid_m_start,
552f76d207aSEric W. Biederman 	.stop = m_stop,
553f76d207aSEric W. Biederman 	.next = m_next,
554f76d207aSEric W. Biederman 	.show = projid_m_show,
555f76d207aSEric W. Biederman };
556f76d207aSEric W. Biederman 
55768a9a435SFabian Frederick static bool mappings_overlap(struct uid_gid_map *new_map,
55868a9a435SFabian Frederick 			     struct uid_gid_extent *extent)
5590bd14b4fSEric W. Biederman {
5600bd14b4fSEric W. Biederman 	u32 upper_first, lower_first, upper_last, lower_last;
5610bd14b4fSEric W. Biederman 	unsigned idx;
5620bd14b4fSEric W. Biederman 
5630bd14b4fSEric W. Biederman 	upper_first = extent->first;
5640bd14b4fSEric W. Biederman 	lower_first = extent->lower_first;
5650bd14b4fSEric W. Biederman 	upper_last = upper_first + extent->count - 1;
5660bd14b4fSEric W. Biederman 	lower_last = lower_first + extent->count - 1;
5670bd14b4fSEric W. Biederman 
5680bd14b4fSEric W. Biederman 	for (idx = 0; idx < new_map->nr_extents; idx++) {
5690bd14b4fSEric W. Biederman 		u32 prev_upper_first, prev_lower_first;
5700bd14b4fSEric W. Biederman 		u32 prev_upper_last, prev_lower_last;
5710bd14b4fSEric W. Biederman 		struct uid_gid_extent *prev;
5720bd14b4fSEric W. Biederman 
5730bd14b4fSEric W. Biederman 		prev = &new_map->extent[idx];
5740bd14b4fSEric W. Biederman 
5750bd14b4fSEric W. Biederman 		prev_upper_first = prev->first;
5760bd14b4fSEric W. Biederman 		prev_lower_first = prev->lower_first;
5770bd14b4fSEric W. Biederman 		prev_upper_last = prev_upper_first + prev->count - 1;
5780bd14b4fSEric W. Biederman 		prev_lower_last = prev_lower_first + prev->count - 1;
5790bd14b4fSEric W. Biederman 
5800bd14b4fSEric W. Biederman 		/* Does the upper range intersect a previous extent? */
5810bd14b4fSEric W. Biederman 		if ((prev_upper_first <= upper_last) &&
5820bd14b4fSEric W. Biederman 		    (prev_upper_last >= upper_first))
5830bd14b4fSEric W. Biederman 			return true;
5840bd14b4fSEric W. Biederman 
5850bd14b4fSEric W. Biederman 		/* Does the lower range intersect a previous extent? */
5860bd14b4fSEric W. Biederman 		if ((prev_lower_first <= lower_last) &&
5870bd14b4fSEric W. Biederman 		    (prev_lower_last >= lower_first))
5880bd14b4fSEric W. Biederman 			return true;
5890bd14b4fSEric W. Biederman 	}
5900bd14b4fSEric W. Biederman 	return false;
5910bd14b4fSEric W. Biederman }
5920bd14b4fSEric W. Biederman 
59322d917d8SEric W. Biederman static ssize_t map_write(struct file *file, const char __user *buf,
59422d917d8SEric W. Biederman 			 size_t count, loff_t *ppos,
59522d917d8SEric W. Biederman 			 int cap_setid,
59622d917d8SEric W. Biederman 			 struct uid_gid_map *map,
59722d917d8SEric W. Biederman 			 struct uid_gid_map *parent_map)
59822d917d8SEric W. Biederman {
59922d917d8SEric W. Biederman 	struct seq_file *seq = file->private_data;
60022d917d8SEric W. Biederman 	struct user_namespace *ns = seq->private;
60122d917d8SEric W. Biederman 	struct uid_gid_map new_map;
60222d917d8SEric W. Biederman 	unsigned idx;
6030bd14b4fSEric W. Biederman 	struct uid_gid_extent *extent = NULL;
60422d917d8SEric W. Biederman 	unsigned long page = 0;
60522d917d8SEric W. Biederman 	char *kbuf, *pos, *next_line;
60622d917d8SEric W. Biederman 	ssize_t ret = -EINVAL;
60722d917d8SEric W. Biederman 
60822d917d8SEric W. Biederman 	/*
609f0d62aecSEric W. Biederman 	 * The userns_state_mutex serializes all writes to any given map.
61022d917d8SEric W. Biederman 	 *
61122d917d8SEric W. Biederman 	 * Any map is only ever written once.
61222d917d8SEric W. Biederman 	 *
61322d917d8SEric W. Biederman 	 * An id map fits within 1 cache line on most architectures.
61422d917d8SEric W. Biederman 	 *
61522d917d8SEric W. Biederman 	 * On read nothing needs to be done unless you are on an
61622d917d8SEric W. Biederman 	 * architecture with a crazy cache coherency model like alpha.
61722d917d8SEric W. Biederman 	 *
61822d917d8SEric W. Biederman 	 * There is a one time data dependency between reading the
61922d917d8SEric W. Biederman 	 * count of the extents and the values of the extents.  The
62022d917d8SEric W. Biederman 	 * desired behavior is to see the values of the extents that
62122d917d8SEric W. Biederman 	 * were written before the count of the extents.
62222d917d8SEric W. Biederman 	 *
62322d917d8SEric W. Biederman 	 * To achieve this smp_wmb() is used on guarantee the write
624e79323bdSMikulas Patocka 	 * order and smp_rmb() is guaranteed that we don't have crazy
625e79323bdSMikulas Patocka 	 * architectures returning stale data.
62622d917d8SEric W. Biederman 	 */
627f0d62aecSEric W. Biederman 	mutex_lock(&userns_state_mutex);
62822d917d8SEric W. Biederman 
62922d917d8SEric W. Biederman 	ret = -EPERM;
63022d917d8SEric W. Biederman 	/* Only allow one successful write to the map */
63122d917d8SEric W. Biederman 	if (map->nr_extents != 0)
63222d917d8SEric W. Biederman 		goto out;
63322d917d8SEric W. Biederman 
63441c21e35SAndy Lutomirski 	/*
63541c21e35SAndy Lutomirski 	 * Adjusting namespace settings requires capabilities on the target.
63622d917d8SEric W. Biederman 	 */
63741c21e35SAndy Lutomirski 	if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN))
63822d917d8SEric W. Biederman 		goto out;
63922d917d8SEric W. Biederman 
64022d917d8SEric W. Biederman 	/* Get a buffer */
64122d917d8SEric W. Biederman 	ret = -ENOMEM;
64222d917d8SEric W. Biederman 	page = __get_free_page(GFP_TEMPORARY);
64322d917d8SEric W. Biederman 	kbuf = (char *) page;
64422d917d8SEric W. Biederman 	if (!page)
64522d917d8SEric W. Biederman 		goto out;
64622d917d8SEric W. Biederman 
647*36476beaSEric W. Biederman 	/* Only allow < page size writes at the beginning of the file */
64822d917d8SEric W. Biederman 	ret = -EINVAL;
64922d917d8SEric W. Biederman 	if ((*ppos != 0) || (count >= PAGE_SIZE))
65022d917d8SEric W. Biederman 		goto out;
65122d917d8SEric W. Biederman 
65222d917d8SEric W. Biederman 	/* Slurp in the user data */
65322d917d8SEric W. Biederman 	ret = -EFAULT;
65422d917d8SEric W. Biederman 	if (copy_from_user(kbuf, buf, count))
65522d917d8SEric W. Biederman 		goto out;
65622d917d8SEric W. Biederman 	kbuf[count] = '\0';
65722d917d8SEric W. Biederman 
65822d917d8SEric W. Biederman 	/* Parse the user data */
65922d917d8SEric W. Biederman 	ret = -EINVAL;
66022d917d8SEric W. Biederman 	pos = kbuf;
66122d917d8SEric W. Biederman 	new_map.nr_extents = 0;
66222d917d8SEric W. Biederman 	for (; pos; pos = next_line) {
66322d917d8SEric W. Biederman 		extent = &new_map.extent[new_map.nr_extents];
66422d917d8SEric W. Biederman 
66522d917d8SEric W. Biederman 		/* Find the end of line and ensure I don't look past it */
66622d917d8SEric W. Biederman 		next_line = strchr(pos, '\n');
66722d917d8SEric W. Biederman 		if (next_line) {
66822d917d8SEric W. Biederman 			*next_line = '\0';
66922d917d8SEric W. Biederman 			next_line++;
67022d917d8SEric W. Biederman 			if (*next_line == '\0')
67122d917d8SEric W. Biederman 				next_line = NULL;
67222d917d8SEric W. Biederman 		}
67322d917d8SEric W. Biederman 
67422d917d8SEric W. Biederman 		pos = skip_spaces(pos);
67522d917d8SEric W. Biederman 		extent->first = simple_strtoul(pos, &pos, 10);
67622d917d8SEric W. Biederman 		if (!isspace(*pos))
67722d917d8SEric W. Biederman 			goto out;
67822d917d8SEric W. Biederman 
67922d917d8SEric W. Biederman 		pos = skip_spaces(pos);
68022d917d8SEric W. Biederman 		extent->lower_first = simple_strtoul(pos, &pos, 10);
68122d917d8SEric W. Biederman 		if (!isspace(*pos))
68222d917d8SEric W. Biederman 			goto out;
68322d917d8SEric W. Biederman 
68422d917d8SEric W. Biederman 		pos = skip_spaces(pos);
68522d917d8SEric W. Biederman 		extent->count = simple_strtoul(pos, &pos, 10);
68622d917d8SEric W. Biederman 		if (*pos && !isspace(*pos))
68722d917d8SEric W. Biederman 			goto out;
68822d917d8SEric W. Biederman 
68922d917d8SEric W. Biederman 		/* Verify there is not trailing junk on the line */
69022d917d8SEric W. Biederman 		pos = skip_spaces(pos);
69122d917d8SEric W. Biederman 		if (*pos != '\0')
69222d917d8SEric W. Biederman 			goto out;
69322d917d8SEric W. Biederman 
69422d917d8SEric W. Biederman 		/* Verify we have been given valid starting values */
69522d917d8SEric W. Biederman 		if ((extent->first == (u32) -1) ||
69622d917d8SEric W. Biederman 		    (extent->lower_first == (u32) -1))
69722d917d8SEric W. Biederman 			goto out;
69822d917d8SEric W. Biederman 
69968a9a435SFabian Frederick 		/* Verify count is not zero and does not cause the
70068a9a435SFabian Frederick 		 * extent to wrap
70168a9a435SFabian Frederick 		 */
70222d917d8SEric W. Biederman 		if ((extent->first + extent->count) <= extent->first)
70322d917d8SEric W. Biederman 			goto out;
70468a9a435SFabian Frederick 		if ((extent->lower_first + extent->count) <=
70568a9a435SFabian Frederick 		     extent->lower_first)
70622d917d8SEric W. Biederman 			goto out;
70722d917d8SEric W. Biederman 
7080bd14b4fSEric W. Biederman 		/* Do the ranges in extent overlap any previous extents? */
7090bd14b4fSEric W. Biederman 		if (mappings_overlap(&new_map, extent))
71022d917d8SEric W. Biederman 			goto out;
71122d917d8SEric W. Biederman 
71222d917d8SEric W. Biederman 		new_map.nr_extents++;
71322d917d8SEric W. Biederman 
71422d917d8SEric W. Biederman 		/* Fail if the file contains too many extents */
71522d917d8SEric W. Biederman 		if ((new_map.nr_extents == UID_GID_MAP_MAX_EXTENTS) &&
71622d917d8SEric W. Biederman 		    (next_line != NULL))
71722d917d8SEric W. Biederman 			goto out;
71822d917d8SEric W. Biederman 	}
71922d917d8SEric W. Biederman 	/* Be very certaint the new map actually exists */
72022d917d8SEric W. Biederman 	if (new_map.nr_extents == 0)
72122d917d8SEric W. Biederman 		goto out;
72222d917d8SEric W. Biederman 
72322d917d8SEric W. Biederman 	ret = -EPERM;
72422d917d8SEric W. Biederman 	/* Validate the user is allowed to use user id's mapped to. */
7256708075fSEric W. Biederman 	if (!new_idmap_permitted(file, ns, cap_setid, &new_map))
72622d917d8SEric W. Biederman 		goto out;
72722d917d8SEric W. Biederman 
72822d917d8SEric W. Biederman 	/* Map the lower ids from the parent user namespace to the
72922d917d8SEric W. Biederman 	 * kernel global id space.
73022d917d8SEric W. Biederman 	 */
73122d917d8SEric W. Biederman 	for (idx = 0; idx < new_map.nr_extents; idx++) {
73222d917d8SEric W. Biederman 		u32 lower_first;
73322d917d8SEric W. Biederman 		extent = &new_map.extent[idx];
73422d917d8SEric W. Biederman 
73522d917d8SEric W. Biederman 		lower_first = map_id_range_down(parent_map,
73622d917d8SEric W. Biederman 						extent->lower_first,
73722d917d8SEric W. Biederman 						extent->count);
73822d917d8SEric W. Biederman 
73922d917d8SEric W. Biederman 		/* Fail if we can not map the specified extent to
74022d917d8SEric W. Biederman 		 * the kernel global id space.
74122d917d8SEric W. Biederman 		 */
74222d917d8SEric W. Biederman 		if (lower_first == (u32) -1)
74322d917d8SEric W. Biederman 			goto out;
74422d917d8SEric W. Biederman 
74522d917d8SEric W. Biederman 		extent->lower_first = lower_first;
74622d917d8SEric W. Biederman 	}
74722d917d8SEric W. Biederman 
74822d917d8SEric W. Biederman 	/* Install the map */
74922d917d8SEric W. Biederman 	memcpy(map->extent, new_map.extent,
75022d917d8SEric W. Biederman 		new_map.nr_extents*sizeof(new_map.extent[0]));
75122d917d8SEric W. Biederman 	smp_wmb();
75222d917d8SEric W. Biederman 	map->nr_extents = new_map.nr_extents;
75322d917d8SEric W. Biederman 
75422d917d8SEric W. Biederman 	*ppos = count;
75522d917d8SEric W. Biederman 	ret = count;
75622d917d8SEric W. Biederman out:
757f0d62aecSEric W. Biederman 	mutex_unlock(&userns_state_mutex);
75822d917d8SEric W. Biederman 	if (page)
75922d917d8SEric W. Biederman 		free_page(page);
76022d917d8SEric W. Biederman 	return ret;
76122d917d8SEric W. Biederman }
76222d917d8SEric W. Biederman 
76368a9a435SFabian Frederick ssize_t proc_uid_map_write(struct file *file, const char __user *buf,
76468a9a435SFabian Frederick 			   size_t size, loff_t *ppos)
76522d917d8SEric W. Biederman {
76622d917d8SEric W. Biederman 	struct seq_file *seq = file->private_data;
76722d917d8SEric W. Biederman 	struct user_namespace *ns = seq->private;
768c450f371SEric W. Biederman 	struct user_namespace *seq_ns = seq_user_ns(seq);
76922d917d8SEric W. Biederman 
77022d917d8SEric W. Biederman 	if (!ns->parent)
77122d917d8SEric W. Biederman 		return -EPERM;
77222d917d8SEric W. Biederman 
773c450f371SEric W. Biederman 	if ((seq_ns != ns) && (seq_ns != ns->parent))
774c450f371SEric W. Biederman 		return -EPERM;
775c450f371SEric W. Biederman 
77622d917d8SEric W. Biederman 	return map_write(file, buf, size, ppos, CAP_SETUID,
77722d917d8SEric W. Biederman 			 &ns->uid_map, &ns->parent->uid_map);
77822d917d8SEric W. Biederman }
77922d917d8SEric W. Biederman 
78068a9a435SFabian Frederick ssize_t proc_gid_map_write(struct file *file, const char __user *buf,
78168a9a435SFabian Frederick 			   size_t size, loff_t *ppos)
78222d917d8SEric W. Biederman {
78322d917d8SEric W. Biederman 	struct seq_file *seq = file->private_data;
78422d917d8SEric W. Biederman 	struct user_namespace *ns = seq->private;
785c450f371SEric W. Biederman 	struct user_namespace *seq_ns = seq_user_ns(seq);
78622d917d8SEric W. Biederman 
78722d917d8SEric W. Biederman 	if (!ns->parent)
78822d917d8SEric W. Biederman 		return -EPERM;
78922d917d8SEric W. Biederman 
790c450f371SEric W. Biederman 	if ((seq_ns != ns) && (seq_ns != ns->parent))
791c450f371SEric W. Biederman 		return -EPERM;
792c450f371SEric W. Biederman 
79322d917d8SEric W. Biederman 	return map_write(file, buf, size, ppos, CAP_SETGID,
79422d917d8SEric W. Biederman 			 &ns->gid_map, &ns->parent->gid_map);
79522d917d8SEric W. Biederman }
79622d917d8SEric W. Biederman 
79768a9a435SFabian Frederick ssize_t proc_projid_map_write(struct file *file, const char __user *buf,
79868a9a435SFabian Frederick 			      size_t size, loff_t *ppos)
799f76d207aSEric W. Biederman {
800f76d207aSEric W. Biederman 	struct seq_file *seq = file->private_data;
801f76d207aSEric W. Biederman 	struct user_namespace *ns = seq->private;
802f76d207aSEric W. Biederman 	struct user_namespace *seq_ns = seq_user_ns(seq);
803f76d207aSEric W. Biederman 
804f76d207aSEric W. Biederman 	if (!ns->parent)
805f76d207aSEric W. Biederman 		return -EPERM;
806f76d207aSEric W. Biederman 
807f76d207aSEric W. Biederman 	if ((seq_ns != ns) && (seq_ns != ns->parent))
808f76d207aSEric W. Biederman 		return -EPERM;
809f76d207aSEric W. Biederman 
810f76d207aSEric W. Biederman 	/* Anyone can set any valid project id no capability needed */
811f76d207aSEric W. Biederman 	return map_write(file, buf, size, ppos, -1,
812f76d207aSEric W. Biederman 			 &ns->projid_map, &ns->parent->projid_map);
813f76d207aSEric W. Biederman }
814f76d207aSEric W. Biederman 
8156708075fSEric W. Biederman static bool new_idmap_permitted(const struct file *file,
8166708075fSEric W. Biederman 				struct user_namespace *ns, int cap_setid,
81722d917d8SEric W. Biederman 				struct uid_gid_map *new_map)
81822d917d8SEric W. Biederman {
819f95d7918SEric W. Biederman 	const struct cred *cred = file->f_cred;
8200542f17bSEric W. Biederman 	/* Don't allow mappings that would allow anything that wouldn't
8210542f17bSEric W. Biederman 	 * be allowed without the establishment of unprivileged mappings.
8220542f17bSEric W. Biederman 	 */
823f95d7918SEric W. Biederman 	if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
824f95d7918SEric W. Biederman 	    uid_eq(ns->owner, cred->euid)) {
82537657da3SEric W. Biederman 		u32 id = new_map->extent[0].lower_first;
82637657da3SEric W. Biederman 		if (cap_setid == CAP_SETUID) {
82737657da3SEric W. Biederman 			kuid_t uid = make_kuid(ns->parent, id);
828f95d7918SEric W. Biederman 			if (uid_eq(uid, cred->euid))
82937657da3SEric W. Biederman 				return true;
83068a9a435SFabian Frederick 		} else if (cap_setid == CAP_SETGID) {
83137657da3SEric W. Biederman 			kgid_t gid = make_kgid(ns->parent, id);
83266d2f338SEric W. Biederman 			if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) &&
83366d2f338SEric W. Biederman 			    gid_eq(gid, cred->egid))
83437657da3SEric W. Biederman 				return true;
83537657da3SEric W. Biederman 		}
83637657da3SEric W. Biederman 	}
83737657da3SEric W. Biederman 
838f76d207aSEric W. Biederman 	/* Allow anyone to set a mapping that doesn't require privilege */
839f76d207aSEric W. Biederman 	if (!cap_valid(cap_setid))
840f76d207aSEric W. Biederman 		return true;
841f76d207aSEric W. Biederman 
84222d917d8SEric W. Biederman 	/* Allow the specified ids if we have the appropriate capability
84322d917d8SEric W. Biederman 	 * (CAP_SETUID or CAP_SETGID) over the parent user namespace.
8446708075fSEric W. Biederman 	 * And the opener of the id file also had the approprpiate capability.
84522d917d8SEric W. Biederman 	 */
8466708075fSEric W. Biederman 	if (ns_capable(ns->parent, cap_setid) &&
8476708075fSEric W. Biederman 	    file_ns_capable(file, ns->parent, cap_setid))
84822d917d8SEric W. Biederman 		return true;
84922d917d8SEric W. Biederman 
85022d917d8SEric W. Biederman 	return false;
8515c1469deSEric W. Biederman }
8526164281aSPavel Emelyanov 
8539cc46516SEric W. Biederman int proc_setgroups_show(struct seq_file *seq, void *v)
8549cc46516SEric W. Biederman {
8559cc46516SEric W. Biederman 	struct user_namespace *ns = seq->private;
8569cc46516SEric W. Biederman 	unsigned long userns_flags = ACCESS_ONCE(ns->flags);
8579cc46516SEric W. Biederman 
8589cc46516SEric W. Biederman 	seq_printf(seq, "%s\n",
8599cc46516SEric W. Biederman 		   (userns_flags & USERNS_SETGROUPS_ALLOWED) ?
8609cc46516SEric W. Biederman 		   "allow" : "deny");
8619cc46516SEric W. Biederman 	return 0;
8629cc46516SEric W. Biederman }
8639cc46516SEric W. Biederman 
8649cc46516SEric W. Biederman ssize_t proc_setgroups_write(struct file *file, const char __user *buf,
8659cc46516SEric W. Biederman 			     size_t count, loff_t *ppos)
8669cc46516SEric W. Biederman {
8679cc46516SEric W. Biederman 	struct seq_file *seq = file->private_data;
8689cc46516SEric W. Biederman 	struct user_namespace *ns = seq->private;
8699cc46516SEric W. Biederman 	char kbuf[8], *pos;
8709cc46516SEric W. Biederman 	bool setgroups_allowed;
8719cc46516SEric W. Biederman 	ssize_t ret;
8729cc46516SEric W. Biederman 
8739cc46516SEric W. Biederman 	/* Only allow a very narrow range of strings to be written */
8749cc46516SEric W. Biederman 	ret = -EINVAL;
8759cc46516SEric W. Biederman 	if ((*ppos != 0) || (count >= sizeof(kbuf)))
8769cc46516SEric W. Biederman 		goto out;
8779cc46516SEric W. Biederman 
8789cc46516SEric W. Biederman 	/* What was written? */
8799cc46516SEric W. Biederman 	ret = -EFAULT;
8809cc46516SEric W. Biederman 	if (copy_from_user(kbuf, buf, count))
8819cc46516SEric W. Biederman 		goto out;
8829cc46516SEric W. Biederman 	kbuf[count] = '\0';
8839cc46516SEric W. Biederman 	pos = kbuf;
8849cc46516SEric W. Biederman 
8859cc46516SEric W. Biederman 	/* What is being requested? */
8869cc46516SEric W. Biederman 	ret = -EINVAL;
8879cc46516SEric W. Biederman 	if (strncmp(pos, "allow", 5) == 0) {
8889cc46516SEric W. Biederman 		pos += 5;
8899cc46516SEric W. Biederman 		setgroups_allowed = true;
8909cc46516SEric W. Biederman 	}
8919cc46516SEric W. Biederman 	else if (strncmp(pos, "deny", 4) == 0) {
8929cc46516SEric W. Biederman 		pos += 4;
8939cc46516SEric W. Biederman 		setgroups_allowed = false;
8949cc46516SEric W. Biederman 	}
8959cc46516SEric W. Biederman 	else
8969cc46516SEric W. Biederman 		goto out;
8979cc46516SEric W. Biederman 
8989cc46516SEric W. Biederman 	/* Verify there is not trailing junk on the line */
8999cc46516SEric W. Biederman 	pos = skip_spaces(pos);
9009cc46516SEric W. Biederman 	if (*pos != '\0')
9019cc46516SEric W. Biederman 		goto out;
9029cc46516SEric W. Biederman 
9039cc46516SEric W. Biederman 	ret = -EPERM;
9049cc46516SEric W. Biederman 	mutex_lock(&userns_state_mutex);
9059cc46516SEric W. Biederman 	if (setgroups_allowed) {
9069cc46516SEric W. Biederman 		/* Enabling setgroups after setgroups has been disabled
9079cc46516SEric W. Biederman 		 * is not allowed.
9089cc46516SEric W. Biederman 		 */
9099cc46516SEric W. Biederman 		if (!(ns->flags & USERNS_SETGROUPS_ALLOWED))
9109cc46516SEric W. Biederman 			goto out_unlock;
9119cc46516SEric W. Biederman 	} else {
9129cc46516SEric W. Biederman 		/* Permanently disabling setgroups after setgroups has
9139cc46516SEric W. Biederman 		 * been enabled by writing the gid_map is not allowed.
9149cc46516SEric W. Biederman 		 */
9159cc46516SEric W. Biederman 		if (ns->gid_map.nr_extents != 0)
9169cc46516SEric W. Biederman 			goto out_unlock;
9179cc46516SEric W. Biederman 		ns->flags &= ~USERNS_SETGROUPS_ALLOWED;
9189cc46516SEric W. Biederman 	}
9199cc46516SEric W. Biederman 	mutex_unlock(&userns_state_mutex);
9209cc46516SEric W. Biederman 
9219cc46516SEric W. Biederman 	/* Report a successful write */
9229cc46516SEric W. Biederman 	*ppos = count;
9239cc46516SEric W. Biederman 	ret = count;
9249cc46516SEric W. Biederman out:
9259cc46516SEric W. Biederman 	return ret;
9269cc46516SEric W. Biederman out_unlock:
9279cc46516SEric W. Biederman 	mutex_unlock(&userns_state_mutex);
9289cc46516SEric W. Biederman 	goto out;
9299cc46516SEric W. Biederman }
9309cc46516SEric W. Biederman 
931273d2c67SEric W. Biederman bool userns_may_setgroups(const struct user_namespace *ns)
932273d2c67SEric W. Biederman {
933273d2c67SEric W. Biederman 	bool allowed;
934273d2c67SEric W. Biederman 
935f0d62aecSEric W. Biederman 	mutex_lock(&userns_state_mutex);
936273d2c67SEric W. Biederman 	/* It is not safe to use setgroups until a gid mapping in
937273d2c67SEric W. Biederman 	 * the user namespace has been established.
938273d2c67SEric W. Biederman 	 */
939273d2c67SEric W. Biederman 	allowed = ns->gid_map.nr_extents != 0;
9409cc46516SEric W. Biederman 	/* Is setgroups allowed? */
9419cc46516SEric W. Biederman 	allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED);
942f0d62aecSEric W. Biederman 	mutex_unlock(&userns_state_mutex);
943273d2c67SEric W. Biederman 
944273d2c67SEric W. Biederman 	return allowed;
945273d2c67SEric W. Biederman }
946273d2c67SEric W. Biederman 
9473c041184SAl Viro static inline struct user_namespace *to_user_ns(struct ns_common *ns)
9483c041184SAl Viro {
9493c041184SAl Viro 	return container_of(ns, struct user_namespace, ns);
9503c041184SAl Viro }
9513c041184SAl Viro 
95264964528SAl Viro static struct ns_common *userns_get(struct task_struct *task)
953cde1975bSEric W. Biederman {
954cde1975bSEric W. Biederman 	struct user_namespace *user_ns;
955cde1975bSEric W. Biederman 
956cde1975bSEric W. Biederman 	rcu_read_lock();
957cde1975bSEric W. Biederman 	user_ns = get_user_ns(__task_cred(task)->user_ns);
958cde1975bSEric W. Biederman 	rcu_read_unlock();
959cde1975bSEric W. Biederman 
9603c041184SAl Viro 	return user_ns ? &user_ns->ns : NULL;
961cde1975bSEric W. Biederman }
962cde1975bSEric W. Biederman 
96364964528SAl Viro static void userns_put(struct ns_common *ns)
964cde1975bSEric W. Biederman {
9653c041184SAl Viro 	put_user_ns(to_user_ns(ns));
966cde1975bSEric W. Biederman }
967cde1975bSEric W. Biederman 
96864964528SAl Viro static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns)
969cde1975bSEric W. Biederman {
9703c041184SAl Viro 	struct user_namespace *user_ns = to_user_ns(ns);
971cde1975bSEric W. Biederman 	struct cred *cred;
972cde1975bSEric W. Biederman 
973cde1975bSEric W. Biederman 	/* Don't allow gaining capabilities by reentering
974cde1975bSEric W. Biederman 	 * the same user namespace.
975cde1975bSEric W. Biederman 	 */
976cde1975bSEric W. Biederman 	if (user_ns == current_user_ns())
977cde1975bSEric W. Biederman 		return -EINVAL;
978cde1975bSEric W. Biederman 
9795155040eSEric W. Biederman 	/* Threaded processes may not enter a different user namespace */
980cde1975bSEric W. Biederman 	if (atomic_read(&current->mm->mm_users) > 1)
981cde1975bSEric W. Biederman 		return -EINVAL;
982cde1975bSEric W. Biederman 
983e66eded8SEric W. Biederman 	if (current->fs->users != 1)
984e66eded8SEric W. Biederman 		return -EINVAL;
985e66eded8SEric W. Biederman 
986cde1975bSEric W. Biederman 	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
987cde1975bSEric W. Biederman 		return -EPERM;
988cde1975bSEric W. Biederman 
989cde1975bSEric W. Biederman 	cred = prepare_creds();
990cde1975bSEric W. Biederman 	if (!cred)
991cde1975bSEric W. Biederman 		return -ENOMEM;
992cde1975bSEric W. Biederman 
993cde1975bSEric W. Biederman 	put_user_ns(cred->user_ns);
994cde1975bSEric W. Biederman 	set_cred_user_ns(cred, get_user_ns(user_ns));
995cde1975bSEric W. Biederman 
996cde1975bSEric W. Biederman 	return commit_creds(cred);
997cde1975bSEric W. Biederman }
998cde1975bSEric W. Biederman 
999cde1975bSEric W. Biederman const struct proc_ns_operations userns_operations = {
1000cde1975bSEric W. Biederman 	.name		= "user",
1001cde1975bSEric W. Biederman 	.type		= CLONE_NEWUSER,
1002cde1975bSEric W. Biederman 	.get		= userns_get,
1003cde1975bSEric W. Biederman 	.put		= userns_put,
1004cde1975bSEric W. Biederman 	.install	= userns_install,
1005cde1975bSEric W. Biederman };
1006cde1975bSEric W. Biederman 
10076164281aSPavel Emelyanov static __init int user_namespaces_init(void)
10086164281aSPavel Emelyanov {
10096164281aSPavel Emelyanov 	user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
10106164281aSPavel Emelyanov 	return 0;
10116164281aSPavel Emelyanov }
1012c96d6660SPaul Gortmaker subsys_initcall(user_namespaces_init);
1013