1 // SPDX-License-Identifier: GPL-2.0-only 2 3 #include <linux/stat.h> 4 #include <linux/sysctl.h> 5 #include <linux/slab.h> 6 #include <linux/cred.h> 7 #include <linux/hash.h> 8 #include <linux/kmemleak.h> 9 #include <linux/user_namespace.h> 10 11 struct ucounts init_ucounts = { 12 .ns = &init_user_ns, 13 .uid = GLOBAL_ROOT_UID, 14 .count = RCUREF_INIT(1), 15 }; 16 17 #define UCOUNTS_HASHTABLE_BITS 10 18 #define UCOUNTS_HASHTABLE_ENTRIES (1 << UCOUNTS_HASHTABLE_BITS) 19 static struct hlist_nulls_head ucounts_hashtable[UCOUNTS_HASHTABLE_ENTRIES] = { 20 [0 ... UCOUNTS_HASHTABLE_ENTRIES - 1] = HLIST_NULLS_HEAD_INIT(0) 21 }; 22 static DEFINE_SPINLOCK(ucounts_lock); 23 24 #define ucounts_hashfn(ns, uid) \ 25 hash_long((unsigned long)__kuid_val(uid) + (unsigned long)(ns), \ 26 UCOUNTS_HASHTABLE_BITS) 27 #define ucounts_hashentry(ns, uid) \ 28 (ucounts_hashtable + ucounts_hashfn(ns, uid)) 29 30 #ifdef CONFIG_SYSCTL 31 static struct ctl_table_set * 32 set_lookup(struct ctl_table_root *root) 33 { 34 return ¤t_user_ns()->set; 35 } 36 37 static int set_is_seen(struct ctl_table_set *set) 38 { 39 return ¤t_user_ns()->set == set; 40 } 41 42 static int set_permissions(struct ctl_table_header *head, 43 const struct ctl_table *table) 44 { 45 struct user_namespace *user_ns = 46 container_of(head->set, struct user_namespace, set); 47 int mode; 48 49 /* Allow users with CAP_SYS_RESOURCE unrestrained access */ 50 if (ns_capable(user_ns, CAP_SYS_RESOURCE)) 51 mode = (table->mode & S_IRWXU) >> 6; 52 else 53 /* Allow all others at most read-only access */ 54 mode = table->mode & S_IROTH; 55 return (mode << 6) | (mode << 3) | mode; 56 } 57 58 static struct ctl_table_root set_root = { 59 .lookup = set_lookup, 60 .permissions = set_permissions, 61 }; 62 63 static long ue_zero = 0; 64 static long ue_int_max = INT_MAX; 65 66 #define UCOUNT_ENTRY(name) \ 67 { \ 68 .procname = name, \ 69 .maxlen = sizeof(long), \ 70 .mode = 0644, \ 71 .proc_handler = proc_doulongvec_minmax, \ 72 .extra1 = &ue_zero, \ 73 .extra2 = &ue_int_max, \ 74 } 75 static const struct ctl_table user_table[] = { 76 UCOUNT_ENTRY("max_user_namespaces"), 77 UCOUNT_ENTRY("max_pid_namespaces"), 78 UCOUNT_ENTRY("max_uts_namespaces"), 79 UCOUNT_ENTRY("max_ipc_namespaces"), 80 UCOUNT_ENTRY("max_net_namespaces"), 81 UCOUNT_ENTRY("max_mnt_namespaces"), 82 UCOUNT_ENTRY("max_cgroup_namespaces"), 83 UCOUNT_ENTRY("max_time_namespaces"), 84 #ifdef CONFIG_INOTIFY_USER 85 UCOUNT_ENTRY("max_inotify_instances"), 86 UCOUNT_ENTRY("max_inotify_watches"), 87 #endif 88 #ifdef CONFIG_FANOTIFY 89 UCOUNT_ENTRY("max_fanotify_groups"), 90 UCOUNT_ENTRY("max_fanotify_marks"), 91 #endif 92 }; 93 #endif /* CONFIG_SYSCTL */ 94 95 bool setup_userns_sysctls(struct user_namespace *ns) 96 { 97 #ifdef CONFIG_SYSCTL 98 struct ctl_table *tbl; 99 100 BUILD_BUG_ON(ARRAY_SIZE(user_table) != UCOUNT_COUNTS); 101 setup_sysctl_set(&ns->set, &set_root, set_is_seen); 102 tbl = kmemdup(user_table, sizeof(user_table), GFP_KERNEL); 103 if (tbl) { 104 int i; 105 for (i = 0; i < UCOUNT_COUNTS; i++) { 106 tbl[i].data = &ns->ucount_max[i]; 107 } 108 ns->sysctls = __register_sysctl_table(&ns->set, "user", tbl, 109 ARRAY_SIZE(user_table)); 110 } 111 if (!ns->sysctls) { 112 kfree(tbl); 113 retire_sysctl_set(&ns->set); 114 return false; 115 } 116 #endif 117 return true; 118 } 119 120 void retire_userns_sysctls(struct user_namespace *ns) 121 { 122 #ifdef CONFIG_SYSCTL 123 const struct ctl_table *tbl; 124 125 tbl = ns->sysctls->ctl_table_arg; 126 unregister_sysctl_table(ns->sysctls); 127 retire_sysctl_set(&ns->set); 128 kfree(tbl); 129 #endif 130 } 131 132 static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, 133 struct hlist_nulls_head *hashent) 134 { 135 struct ucounts *ucounts; 136 struct hlist_nulls_node *pos; 137 138 guard(rcu)(); 139 hlist_nulls_for_each_entry_rcu(ucounts, pos, hashent, node) { 140 if (uid_eq(ucounts->uid, uid) && (ucounts->ns == ns)) { 141 if (rcuref_get(&ucounts->count)) 142 return ucounts; 143 } 144 } 145 return NULL; 146 } 147 148 static void hlist_add_ucounts(struct ucounts *ucounts) 149 { 150 struct hlist_nulls_head *hashent = ucounts_hashentry(ucounts->ns, ucounts->uid); 151 152 spin_lock_irq(&ucounts_lock); 153 hlist_nulls_add_head_rcu(&ucounts->node, hashent); 154 spin_unlock_irq(&ucounts_lock); 155 } 156 157 struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) 158 { 159 struct hlist_nulls_head *hashent = ucounts_hashentry(ns, uid); 160 struct ucounts *ucounts, *new; 161 162 ucounts = find_ucounts(ns, uid, hashent); 163 if (ucounts) 164 return ucounts; 165 166 new = kzalloc(sizeof(*new), GFP_KERNEL); 167 if (!new) 168 return NULL; 169 170 new->ns = ns; 171 new->uid = uid; 172 rcuref_init(&new->count, 1); 173 174 spin_lock_irq(&ucounts_lock); 175 ucounts = find_ucounts(ns, uid, hashent); 176 if (ucounts) { 177 spin_unlock_irq(&ucounts_lock); 178 kfree(new); 179 return ucounts; 180 } 181 182 hlist_nulls_add_head_rcu(&new->node, hashent); 183 get_user_ns(new->ns); 184 spin_unlock_irq(&ucounts_lock); 185 return new; 186 } 187 188 void put_ucounts(struct ucounts *ucounts) 189 { 190 unsigned long flags; 191 192 if (rcuref_put(&ucounts->count)) { 193 spin_lock_irqsave(&ucounts_lock, flags); 194 hlist_nulls_del_rcu(&ucounts->node); 195 spin_unlock_irqrestore(&ucounts_lock, flags); 196 197 put_user_ns(ucounts->ns); 198 kfree_rcu(ucounts, rcu); 199 } 200 } 201 202 static inline bool atomic_long_inc_below(atomic_long_t *v, int u) 203 { 204 long c, old; 205 c = atomic_long_read(v); 206 for (;;) { 207 if (unlikely(c >= u)) 208 return false; 209 old = atomic_long_cmpxchg(v, c, c+1); 210 if (likely(old == c)) 211 return true; 212 c = old; 213 } 214 } 215 216 struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, 217 enum ucount_type type) 218 { 219 struct ucounts *ucounts, *iter, *bad; 220 struct user_namespace *tns; 221 ucounts = alloc_ucounts(ns, uid); 222 for (iter = ucounts; iter; iter = tns->ucounts) { 223 long max; 224 tns = iter->ns; 225 max = READ_ONCE(tns->ucount_max[type]); 226 if (!atomic_long_inc_below(&iter->ucount[type], max)) 227 goto fail; 228 } 229 return ucounts; 230 fail: 231 bad = iter; 232 for (iter = ucounts; iter != bad; iter = iter->ns->ucounts) 233 atomic_long_dec(&iter->ucount[type]); 234 235 put_ucounts(ucounts); 236 return NULL; 237 } 238 239 void dec_ucount(struct ucounts *ucounts, enum ucount_type type) 240 { 241 struct ucounts *iter; 242 for (iter = ucounts; iter; iter = iter->ns->ucounts) { 243 long dec = atomic_long_dec_if_positive(&iter->ucount[type]); 244 WARN_ON_ONCE(dec < 0); 245 } 246 put_ucounts(ucounts); 247 } 248 249 long inc_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v) 250 { 251 struct ucounts *iter; 252 long max = LONG_MAX; 253 long ret = 0; 254 255 for (iter = ucounts; iter; iter = iter->ns->ucounts) { 256 long new = atomic_long_add_return(v, &iter->rlimit[type]); 257 if (new < 0 || new > max) 258 ret = LONG_MAX; 259 else if (iter == ucounts) 260 ret = new; 261 max = get_userns_rlimit_max(iter->ns, type); 262 } 263 return ret; 264 } 265 266 bool dec_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v) 267 { 268 struct ucounts *iter; 269 long new = -1; /* Silence compiler warning */ 270 for (iter = ucounts; iter; iter = iter->ns->ucounts) { 271 long dec = atomic_long_sub_return(v, &iter->rlimit[type]); 272 WARN_ON_ONCE(dec < 0); 273 if (iter == ucounts) 274 new = dec; 275 } 276 return (new == 0); 277 } 278 279 static void do_dec_rlimit_put_ucounts(struct ucounts *ucounts, 280 struct ucounts *last, enum rlimit_type type) 281 { 282 struct ucounts *iter, *next; 283 for (iter = ucounts; iter != last; iter = next) { 284 long dec = atomic_long_sub_return(1, &iter->rlimit[type]); 285 WARN_ON_ONCE(dec < 0); 286 next = iter->ns->ucounts; 287 if (dec == 0) 288 put_ucounts(iter); 289 } 290 } 291 292 void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum rlimit_type type) 293 { 294 do_dec_rlimit_put_ucounts(ucounts, NULL, type); 295 } 296 297 long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type, 298 bool override_rlimit) 299 { 300 /* Caller must hold a reference to ucounts */ 301 struct ucounts *iter; 302 long max = LONG_MAX; 303 long dec, ret = 0; 304 305 for (iter = ucounts; iter; iter = iter->ns->ucounts) { 306 long new = atomic_long_add_return(1, &iter->rlimit[type]); 307 if (new < 0 || new > max) 308 goto dec_unwind; 309 if (iter == ucounts) 310 ret = new; 311 if (!override_rlimit) 312 max = get_userns_rlimit_max(iter->ns, type); 313 /* 314 * Grab an extra ucount reference for the caller when 315 * the rlimit count was previously 0. 316 */ 317 if (new != 1) 318 continue; 319 if (!get_ucounts(iter)) 320 goto dec_unwind; 321 } 322 return ret; 323 dec_unwind: 324 dec = atomic_long_sub_return(1, &iter->rlimit[type]); 325 WARN_ON_ONCE(dec < 0); 326 do_dec_rlimit_put_ucounts(ucounts, iter, type); 327 return 0; 328 } 329 330 bool is_rlimit_overlimit(struct ucounts *ucounts, enum rlimit_type type, unsigned long rlimit) 331 { 332 struct ucounts *iter; 333 long max = rlimit; 334 if (rlimit > LONG_MAX) 335 max = LONG_MAX; 336 for (iter = ucounts; iter; iter = iter->ns->ucounts) { 337 long val = get_rlimit_value(iter, type); 338 if (val < 0 || val > max) 339 return true; 340 max = get_userns_rlimit_max(iter->ns, type); 341 } 342 return false; 343 } 344 345 static __init int user_namespace_sysctl_init(void) 346 { 347 #ifdef CONFIG_SYSCTL 348 static struct ctl_table_header *user_header; 349 static struct ctl_table empty[1]; 350 /* 351 * It is necessary to register the user directory in the 352 * default set so that registrations in the child sets work 353 * properly. 354 */ 355 user_header = register_sysctl_sz("user", empty, 0); 356 kmemleak_ignore(user_header); 357 BUG_ON(!user_header); 358 BUG_ON(!setup_userns_sysctls(&init_user_ns)); 359 #endif 360 hlist_add_ucounts(&init_ucounts); 361 inc_rlimit_ucounts(&init_ucounts, UCOUNT_RLIMIT_NPROC, 1); 362 return 0; 363 } 364 subsys_initcall(user_namespace_sysctl_init); 365