1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Miscellaneous cgroup controller
4  *
5  * Copyright 2020 Google LLC
6  * Author: Vipin Sharma <vipinsh@google.com>
7  */
8 
9 #include <linux/limits.h>
10 #include <linux/cgroup.h>
11 #include <linux/errno.h>
12 #include <linux/atomic.h>
13 #include <linux/slab.h>
14 #include <linux/misc_cgroup.h>
15 
16 #define MAX_STR "max"
17 #define MAX_NUM U64_MAX
18 
19 /* Miscellaneous res name, keep it in sync with enum misc_res_type */
20 static const char *const misc_res_name[] = {
21 #ifdef CONFIG_KVM_AMD_SEV
22 	/* AMD SEV ASIDs resource */
23 	"sev",
24 	/* AMD SEV-ES ASIDs resource */
25 	"sev_es",
26 #endif
27 #ifdef CONFIG_INTEL_TDX_HOST
28 	/* Intel TDX HKIDs resource */
29 	"tdx",
30 #endif
31 };
32 
33 /* Root misc cgroup */
34 static struct misc_cg root_cg;
35 
36 /*
37  * Miscellaneous resources capacity for the entire machine. 0 capacity means
38  * resource is not initialized or not present in the host.
39  *
40  * root_cg.max and capacity are independent of each other. root_cg.max can be
41  * more than the actual capacity. We are using Limits resource distribution
42  * model of cgroup for miscellaneous controller.
43  */
44 static u64 misc_res_capacity[MISC_CG_RES_TYPES];
45 
46 /**
47  * parent_misc() - Get the parent of the passed misc cgroup.
48  * @cgroup: cgroup whose parent needs to be fetched.
49  *
50  * Context: Any context.
51  * Return:
52  * * struct misc_cg* - Parent of the @cgroup.
53  * * %NULL - If @cgroup is null or the passed cgroup does not have a parent.
54  */
55 static struct misc_cg *parent_misc(struct misc_cg *cgroup)
56 {
57 	return cgroup ? css_misc(cgroup->css.parent) : NULL;
58 }
59 
60 /**
61  * valid_type() - Check if @type is valid or not.
62  * @type: misc res type.
63  *
64  * Context: Any context.
65  * Return:
66  * * true - If valid type.
67  * * false - If not valid type.
68  */
69 static inline bool valid_type(enum misc_res_type type)
70 {
71 	return type >= 0 && type < MISC_CG_RES_TYPES;
72 }
73 
74 /**
75  * misc_cg_set_capacity() - Set the capacity of the misc cgroup res.
76  * @type: Type of the misc res.
77  * @capacity: Supported capacity of the misc res on the host.
78  *
79  * If capacity is 0 then the charging a misc cgroup fails for that type.
80  *
81  * Context: Any context.
82  * Return:
83  * * %0 - Successfully registered the capacity.
84  * * %-EINVAL - If @type is invalid.
85  */
86 int misc_cg_set_capacity(enum misc_res_type type, u64 capacity)
87 {
88 	if (!valid_type(type))
89 		return -EINVAL;
90 
91 	WRITE_ONCE(misc_res_capacity[type], capacity);
92 	return 0;
93 }
94 EXPORT_SYMBOL_GPL(misc_cg_set_capacity);
95 
96 /**
97  * misc_cg_cancel_charge() - Cancel the charge from the misc cgroup.
98  * @type: Misc res type in misc cg to cancel the charge from.
99  * @cg: Misc cgroup to cancel charge from.
100  * @amount: Amount to cancel.
101  *
102  * Context: Any context.
103  */
104 static void misc_cg_cancel_charge(enum misc_res_type type, struct misc_cg *cg,
105 				  u64 amount)
106 {
107 	WARN_ONCE(atomic64_add_negative(-amount, &cg->res[type].usage),
108 		  "misc cgroup resource %s became less than 0",
109 		  misc_res_name[type]);
110 }
111 
112 static void misc_cg_update_watermark(struct misc_res *res, u64 new_usage)
113 {
114 	u64 old;
115 
116 	while (true) {
117 		old = atomic64_read(&res->watermark);
118 		if (new_usage <= old)
119 			break;
120 		if (atomic64_cmpxchg(&res->watermark, old, new_usage) == old)
121 			break;
122 	}
123 }
124 
125 static void misc_cg_event(enum misc_res_type type, struct misc_cg *cg)
126 {
127 	atomic64_inc(&cg->res[type].events_local);
128 	cgroup_file_notify(&cg->events_local_file);
129 
130 	for (; parent_misc(cg); cg = parent_misc(cg)) {
131 		atomic64_inc(&cg->res[type].events);
132 		cgroup_file_notify(&cg->events_file);
133 	}
134 }
135 
136 /**
137  * misc_cg_try_charge() - Try charging the misc cgroup.
138  * @type: Misc res type to charge.
139  * @cg: Misc cgroup which will be charged.
140  * @amount: Amount to charge.
141  *
142  * Charge @amount to the misc cgroup. Caller must use the same cgroup during
143  * the uncharge call.
144  *
145  * Context: Any context.
146  * Return:
147  * * %0 - If successfully charged.
148  * * -EINVAL - If @type is invalid or misc res has 0 capacity.
149  * * -EBUSY - If max limit will be crossed or total usage will be more than the
150  *	      capacity.
151  */
152 int misc_cg_try_charge(enum misc_res_type type, struct misc_cg *cg, u64 amount)
153 {
154 	struct misc_cg *i, *j;
155 	int ret;
156 	struct misc_res *res;
157 	u64 new_usage;
158 
159 	if (!(valid_type(type) && cg && READ_ONCE(misc_res_capacity[type])))
160 		return -EINVAL;
161 
162 	if (!amount)
163 		return 0;
164 
165 	for (i = cg; i; i = parent_misc(i)) {
166 		res = &i->res[type];
167 
168 		new_usage = atomic64_add_return(amount, &res->usage);
169 		if (new_usage > READ_ONCE(res->max) ||
170 		    new_usage > READ_ONCE(misc_res_capacity[type])) {
171 			ret = -EBUSY;
172 			goto err_charge;
173 		}
174 		misc_cg_update_watermark(res, new_usage);
175 	}
176 	return 0;
177 
178 err_charge:
179 	misc_cg_event(type, i);
180 
181 	for (j = cg; j != i; j = parent_misc(j))
182 		misc_cg_cancel_charge(type, j, amount);
183 	misc_cg_cancel_charge(type, i, amount);
184 	return ret;
185 }
186 EXPORT_SYMBOL_GPL(misc_cg_try_charge);
187 
188 /**
189  * misc_cg_uncharge() - Uncharge the misc cgroup.
190  * @type: Misc res type which was charged.
191  * @cg: Misc cgroup which will be uncharged.
192  * @amount: Charged amount.
193  *
194  * Context: Any context.
195  */
196 void misc_cg_uncharge(enum misc_res_type type, struct misc_cg *cg, u64 amount)
197 {
198 	struct misc_cg *i;
199 
200 	if (!(amount && valid_type(type) && cg))
201 		return;
202 
203 	for (i = cg; i; i = parent_misc(i))
204 		misc_cg_cancel_charge(type, i, amount);
205 }
206 EXPORT_SYMBOL_GPL(misc_cg_uncharge);
207 
208 /**
209  * misc_cg_max_show() - Show the misc cgroup max limit.
210  * @sf: Interface file
211  * @v: Arguments passed
212  *
213  * Context: Any context.
214  * Return: 0 to denote successful print.
215  */
216 static int misc_cg_max_show(struct seq_file *sf, void *v)
217 {
218 	int i;
219 	struct misc_cg *cg = css_misc(seq_css(sf));
220 	u64 max;
221 
222 	for (i = 0; i < MISC_CG_RES_TYPES; i++) {
223 		if (READ_ONCE(misc_res_capacity[i])) {
224 			max = READ_ONCE(cg->res[i].max);
225 			if (max == MAX_NUM)
226 				seq_printf(sf, "%s max\n", misc_res_name[i]);
227 			else
228 				seq_printf(sf, "%s %llu\n", misc_res_name[i],
229 					   max);
230 		}
231 	}
232 
233 	return 0;
234 }
235 
236 /**
237  * misc_cg_max_write() - Update the maximum limit of the cgroup.
238  * @of: Handler for the file.
239  * @buf: Data from the user. It should be either "max", 0, or a positive
240  *	 integer.
241  * @nbytes: Number of bytes of the data.
242  * @off: Offset in the file.
243  *
244  * User can pass data like:
245  * echo sev 23 > misc.max, OR
246  * echo sev max > misc.max
247  *
248  * Context: Any context.
249  * Return:
250  * * >= 0 - Number of bytes processed in the input.
251  * * -EINVAL - If buf is not valid.
252  * * -ERANGE - If number is bigger than the u64 capacity.
253  */
254 static ssize_t misc_cg_max_write(struct kernfs_open_file *of, char *buf,
255 				 size_t nbytes, loff_t off)
256 {
257 	struct misc_cg *cg;
258 	u64 max;
259 	int ret = 0, i;
260 	enum misc_res_type type = MISC_CG_RES_TYPES;
261 	char *token;
262 
263 	buf = strstrip(buf);
264 	token = strsep(&buf, " ");
265 
266 	if (!token || !buf)
267 		return -EINVAL;
268 
269 	for (i = 0; i < MISC_CG_RES_TYPES; i++) {
270 		if (!strcmp(misc_res_name[i], token)) {
271 			type = i;
272 			break;
273 		}
274 	}
275 
276 	if (type == MISC_CG_RES_TYPES)
277 		return -EINVAL;
278 
279 	if (!strcmp(MAX_STR, buf)) {
280 		max = MAX_NUM;
281 	} else {
282 		ret = kstrtou64(buf, 0, &max);
283 		if (ret)
284 			return ret;
285 	}
286 
287 	cg = css_misc(of_css(of));
288 
289 	if (READ_ONCE(misc_res_capacity[type]))
290 		WRITE_ONCE(cg->res[type].max, max);
291 	else
292 		ret = -EINVAL;
293 
294 	return ret ? ret : nbytes;
295 }
296 
297 /**
298  * misc_cg_current_show() - Show the current usage of the misc cgroup.
299  * @sf: Interface file
300  * @v: Arguments passed
301  *
302  * Context: Any context.
303  * Return: 0 to denote successful print.
304  */
305 static int misc_cg_current_show(struct seq_file *sf, void *v)
306 {
307 	int i;
308 	u64 usage;
309 	struct misc_cg *cg = css_misc(seq_css(sf));
310 
311 	for (i = 0; i < MISC_CG_RES_TYPES; i++) {
312 		usage = atomic64_read(&cg->res[i].usage);
313 		if (READ_ONCE(misc_res_capacity[i]) || usage)
314 			seq_printf(sf, "%s %llu\n", misc_res_name[i], usage);
315 	}
316 
317 	return 0;
318 }
319 
320 /**
321  * misc_cg_peak_show() - Show the peak usage of the misc cgroup.
322  * @sf: Interface file
323  * @v: Arguments passed
324  *
325  * Context: Any context.
326  * Return: 0 to denote successful print.
327  */
328 static int misc_cg_peak_show(struct seq_file *sf, void *v)
329 {
330 	int i;
331 	u64 watermark;
332 	struct misc_cg *cg = css_misc(seq_css(sf));
333 
334 	for (i = 0; i < MISC_CG_RES_TYPES; i++) {
335 		watermark = atomic64_read(&cg->res[i].watermark);
336 		if (READ_ONCE(misc_res_capacity[i]) || watermark)
337 			seq_printf(sf, "%s %llu\n", misc_res_name[i], watermark);
338 	}
339 
340 	return 0;
341 }
342 
343 /**
344  * misc_cg_capacity_show() - Show the total capacity of misc res on the host.
345  * @sf: Interface file
346  * @v: Arguments passed
347  *
348  * Only present in the root cgroup directory.
349  *
350  * Context: Any context.
351  * Return: 0 to denote successful print.
352  */
353 static int misc_cg_capacity_show(struct seq_file *sf, void *v)
354 {
355 	int i;
356 	u64 cap;
357 
358 	for (i = 0; i < MISC_CG_RES_TYPES; i++) {
359 		cap = READ_ONCE(misc_res_capacity[i]);
360 		if (cap)
361 			seq_printf(sf, "%s %llu\n", misc_res_name[i], cap);
362 	}
363 
364 	return 0;
365 }
366 
367 static int __misc_events_show(struct seq_file *sf, bool local)
368 {
369 	struct misc_cg *cg = css_misc(seq_css(sf));
370 	u64 events;
371 	int i;
372 
373 	for (i = 0; i < MISC_CG_RES_TYPES; i++) {
374 		if (local)
375 			events = atomic64_read(&cg->res[i].events_local);
376 		else
377 			events = atomic64_read(&cg->res[i].events);
378 		if (READ_ONCE(misc_res_capacity[i]) || events)
379 			seq_printf(sf, "%s.max %llu\n", misc_res_name[i], events);
380 	}
381 	return 0;
382 }
383 
384 static int misc_events_show(struct seq_file *sf, void *v)
385 {
386 	return __misc_events_show(sf, false);
387 }
388 
389 static int misc_events_local_show(struct seq_file *sf, void *v)
390 {
391 	return __misc_events_show(sf, true);
392 }
393 
394 /* Misc cgroup interface files */
395 static struct cftype misc_cg_files[] = {
396 	{
397 		.name = "max",
398 		.write = misc_cg_max_write,
399 		.seq_show = misc_cg_max_show,
400 		.flags = CFTYPE_NOT_ON_ROOT,
401 	},
402 	{
403 		.name = "current",
404 		.seq_show = misc_cg_current_show,
405 	},
406 	{
407 		.name = "peak",
408 		.seq_show = misc_cg_peak_show,
409 	},
410 	{
411 		.name = "capacity",
412 		.seq_show = misc_cg_capacity_show,
413 		.flags = CFTYPE_ONLY_ON_ROOT,
414 	},
415 	{
416 		.name = "events",
417 		.flags = CFTYPE_NOT_ON_ROOT,
418 		.file_offset = offsetof(struct misc_cg, events_file),
419 		.seq_show = misc_events_show,
420 	},
421 	{
422 		.name = "events.local",
423 		.flags = CFTYPE_NOT_ON_ROOT,
424 		.file_offset = offsetof(struct misc_cg, events_local_file),
425 		.seq_show = misc_events_local_show,
426 	},
427 	{}
428 };
429 
430 /**
431  * misc_cg_alloc() - Allocate misc cgroup.
432  * @parent_css: Parent cgroup.
433  *
434  * Context: Process context.
435  * Return:
436  * * struct cgroup_subsys_state* - css of the allocated cgroup.
437  * * ERR_PTR(-ENOMEM) - No memory available to allocate.
438  */
439 static struct cgroup_subsys_state *
440 misc_cg_alloc(struct cgroup_subsys_state *parent_css)
441 {
442 	enum misc_res_type i;
443 	struct misc_cg *cg;
444 
445 	if (!parent_css) {
446 		cg = &root_cg;
447 	} else {
448 		cg = kzalloc(sizeof(*cg), GFP_KERNEL);
449 		if (!cg)
450 			return ERR_PTR(-ENOMEM);
451 	}
452 
453 	for (i = 0; i < MISC_CG_RES_TYPES; i++) {
454 		WRITE_ONCE(cg->res[i].max, MAX_NUM);
455 		atomic64_set(&cg->res[i].usage, 0);
456 	}
457 
458 	return &cg->css;
459 }
460 
461 /**
462  * misc_cg_free() - Free the misc cgroup.
463  * @css: cgroup subsys object.
464  *
465  * Context: Any context.
466  */
467 static void misc_cg_free(struct cgroup_subsys_state *css)
468 {
469 	kfree(css_misc(css));
470 }
471 
472 /* Cgroup controller callbacks */
473 struct cgroup_subsys misc_cgrp_subsys = {
474 	.css_alloc = misc_cg_alloc,
475 	.css_free = misc_cg_free,
476 	.legacy_cftypes = misc_cg_files,
477 	.dfl_cftypes = misc_cg_files,
478 };
479