xref: /linux/kernel/sched/core_sched.c (revision bf76f23aa1c178e9115eba17f699fa726aed669b)
1 // SPDX-License-Identifier: GPL-2.0-only
2 
3 /*
4  * A simple wrapper around refcount. An allocated sched_core_cookie's
5  * address is used to compute the cookie of the task.
6  */
7 #include "sched.h"
8 
9 struct sched_core_cookie {
10 	refcount_t refcnt;
11 };
12 
sched_core_alloc_cookie(void)13 static unsigned long sched_core_alloc_cookie(void)
14 {
15 	struct sched_core_cookie *ck = kmalloc(sizeof(*ck), GFP_KERNEL);
16 	if (!ck)
17 		return 0;
18 
19 	refcount_set(&ck->refcnt, 1);
20 	sched_core_get();
21 
22 	return (unsigned long)ck;
23 }
24 
sched_core_put_cookie(unsigned long cookie)25 static void sched_core_put_cookie(unsigned long cookie)
26 {
27 	struct sched_core_cookie *ptr = (void *)cookie;
28 
29 	if (ptr && refcount_dec_and_test(&ptr->refcnt)) {
30 		kfree(ptr);
31 		sched_core_put();
32 	}
33 }
34 
sched_core_get_cookie(unsigned long cookie)35 static unsigned long sched_core_get_cookie(unsigned long cookie)
36 {
37 	struct sched_core_cookie *ptr = (void *)cookie;
38 
39 	if (ptr)
40 		refcount_inc(&ptr->refcnt);
41 
42 	return cookie;
43 }
44 
45 /*
46  * sched_core_update_cookie - replace the cookie on a task
47  * @p: the task to update
48  * @cookie: the new cookie
49  *
50  * Effectively exchange the task cookie; caller is responsible for lifetimes on
51  * both ends.
52  *
53  * Returns: the old cookie
54  */
sched_core_update_cookie(struct task_struct * p,unsigned long cookie)55 static unsigned long sched_core_update_cookie(struct task_struct *p,
56 					      unsigned long cookie)
57 {
58 	unsigned long old_cookie;
59 	struct rq_flags rf;
60 	struct rq *rq;
61 
62 	rq = task_rq_lock(p, &rf);
63 
64 	/*
65 	 * Since creating a cookie implies sched_core_get(), and we cannot set
66 	 * a cookie until after we've created it, similarly, we cannot destroy
67 	 * a cookie until after we've removed it, we must have core scheduling
68 	 * enabled here.
69 	 */
70 	WARN_ON_ONCE((p->core_cookie || cookie) && !sched_core_enabled(rq));
71 
72 	if (sched_core_enqueued(p))
73 		sched_core_dequeue(rq, p, DEQUEUE_SAVE);
74 
75 	old_cookie = p->core_cookie;
76 	p->core_cookie = cookie;
77 
78 	/*
79 	 * Consider the cases: !prev_cookie and !cookie.
80 	 */
81 	if (cookie && task_on_rq_queued(p))
82 		sched_core_enqueue(rq, p);
83 
84 	/*
85 	 * If task is currently running, it may not be compatible anymore after
86 	 * the cookie change, so enter the scheduler on its CPU to schedule it
87 	 * away.
88 	 *
89 	 * Note that it is possible that as a result of this cookie change, the
90 	 * core has now entered/left forced idle state. Defer accounting to the
91 	 * next scheduling edge, rather than always forcing a reschedule here.
92 	 */
93 	if (task_on_cpu(rq, p))
94 		resched_curr(rq);
95 
96 	task_rq_unlock(rq, p, &rf);
97 
98 	return old_cookie;
99 }
100 
sched_core_clone_cookie(struct task_struct * p)101 static unsigned long sched_core_clone_cookie(struct task_struct *p)
102 {
103 	unsigned long cookie, flags;
104 
105 	raw_spin_lock_irqsave(&p->pi_lock, flags);
106 	cookie = sched_core_get_cookie(p->core_cookie);
107 	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
108 
109 	return cookie;
110 }
111 
sched_core_fork(struct task_struct * p)112 void sched_core_fork(struct task_struct *p)
113 {
114 	RB_CLEAR_NODE(&p->core_node);
115 	p->core_cookie = sched_core_clone_cookie(current);
116 }
117 
sched_core_free(struct task_struct * p)118 void sched_core_free(struct task_struct *p)
119 {
120 	sched_core_put_cookie(p->core_cookie);
121 }
122 
__sched_core_set(struct task_struct * p,unsigned long cookie)123 static void __sched_core_set(struct task_struct *p, unsigned long cookie)
124 {
125 	cookie = sched_core_get_cookie(cookie);
126 	cookie = sched_core_update_cookie(p, cookie);
127 	sched_core_put_cookie(cookie);
128 }
129 
130 /* Called from prctl interface: PR_SCHED_CORE */
sched_core_share_pid(unsigned int cmd,pid_t pid,enum pid_type type,unsigned long uaddr)131 int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type,
132 			 unsigned long uaddr)
133 {
134 	unsigned long cookie = 0, id = 0;
135 	struct task_struct *task, *p;
136 	struct pid *grp;
137 	int err = 0;
138 
139 	if (!static_branch_likely(&sched_smt_present))
140 		return -ENODEV;
141 
142 	BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_THREAD != PIDTYPE_PID);
143 	BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_THREAD_GROUP != PIDTYPE_TGID);
144 	BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_PROCESS_GROUP != PIDTYPE_PGID);
145 
146 	if (type > PIDTYPE_PGID || cmd >= PR_SCHED_CORE_MAX || pid < 0 ||
147 	    (cmd != PR_SCHED_CORE_GET && uaddr))
148 		return -EINVAL;
149 
150 	rcu_read_lock();
151 	if (pid == 0) {
152 		task = current;
153 	} else {
154 		task = find_task_by_vpid(pid);
155 		if (!task) {
156 			rcu_read_unlock();
157 			return -ESRCH;
158 		}
159 	}
160 	get_task_struct(task);
161 	rcu_read_unlock();
162 
163 	/*
164 	 * Check if this process has the right to modify the specified
165 	 * process. Use the regular "ptrace_may_access()" checks.
166 	 */
167 	if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
168 		err = -EPERM;
169 		goto out;
170 	}
171 
172 	switch (cmd) {
173 	case PR_SCHED_CORE_GET:
174 		if (type != PIDTYPE_PID || uaddr & 7) {
175 			err = -EINVAL;
176 			goto out;
177 		}
178 		cookie = sched_core_clone_cookie(task);
179 		if (cookie) {
180 			/* XXX improve ? */
181 			ptr_to_hashval((void *)cookie, &id);
182 		}
183 		err = put_user(id, (u64 __user *)uaddr);
184 		goto out;
185 
186 	case PR_SCHED_CORE_CREATE:
187 		cookie = sched_core_alloc_cookie();
188 		if (!cookie) {
189 			err = -ENOMEM;
190 			goto out;
191 		}
192 		break;
193 
194 	case PR_SCHED_CORE_SHARE_TO:
195 		cookie = sched_core_clone_cookie(current);
196 		break;
197 
198 	case PR_SCHED_CORE_SHARE_FROM:
199 		if (type != PIDTYPE_PID) {
200 			err = -EINVAL;
201 			goto out;
202 		}
203 		cookie = sched_core_clone_cookie(task);
204 		__sched_core_set(current, cookie);
205 		goto out;
206 
207 	default:
208 		err = -EINVAL;
209 		goto out;
210 	}
211 
212 	if (type == PIDTYPE_PID) {
213 		__sched_core_set(task, cookie);
214 		goto out;
215 	}
216 
217 	read_lock(&tasklist_lock);
218 	grp = task_pid_type(task, type);
219 
220 	do_each_pid_thread(grp, type, p) {
221 		if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) {
222 			err = -EPERM;
223 			goto out_tasklist;
224 		}
225 	} while_each_pid_thread(grp, type, p);
226 
227 	do_each_pid_thread(grp, type, p) {
228 		__sched_core_set(p, cookie);
229 	} while_each_pid_thread(grp, type, p);
230 out_tasklist:
231 	read_unlock(&tasklist_lock);
232 
233 out:
234 	sched_core_put_cookie(cookie);
235 	put_task_struct(task);
236 	return err;
237 }
238 
239 #ifdef CONFIG_SCHEDSTATS
240 
241 /* REQUIRES: rq->core's clock recently updated. */
__sched_core_account_forceidle(struct rq * rq)242 void __sched_core_account_forceidle(struct rq *rq)
243 {
244 	const struct cpumask *smt_mask = cpu_smt_mask(cpu_of(rq));
245 	u64 delta, now = rq_clock(rq->core);
246 	struct rq *rq_i;
247 	struct task_struct *p;
248 	int i;
249 
250 	lockdep_assert_rq_held(rq);
251 
252 	WARN_ON_ONCE(!rq->core->core_forceidle_count);
253 
254 	if (rq->core->core_forceidle_start == 0)
255 		return;
256 
257 	delta = now - rq->core->core_forceidle_start;
258 	if (unlikely((s64)delta <= 0))
259 		return;
260 
261 	rq->core->core_forceidle_start = now;
262 
263 	if (WARN_ON_ONCE(!rq->core->core_forceidle_occupation)) {
264 		/* can't be forced idle without a running task */
265 	} else if (rq->core->core_forceidle_count > 1 ||
266 		   rq->core->core_forceidle_occupation > 1) {
267 		/*
268 		 * For larger SMT configurations, we need to scale the charged
269 		 * forced idle amount since there can be more than one forced
270 		 * idle sibling and more than one running cookied task.
271 		 */
272 		delta *= rq->core->core_forceidle_count;
273 		delta = div_u64(delta, rq->core->core_forceidle_occupation);
274 	}
275 
276 	for_each_cpu(i, smt_mask) {
277 		rq_i = cpu_rq(i);
278 		p = rq_i->core_pick ?: rq_i->curr;
279 
280 		if (p == rq_i->idle)
281 			continue;
282 
283 		/*
284 		 * Note: this will account forceidle to the current CPU, even
285 		 * if it comes from our SMT sibling.
286 		 */
287 		__account_forceidle_time(p, delta);
288 	}
289 }
290 
__sched_core_tick(struct rq * rq)291 void __sched_core_tick(struct rq *rq)
292 {
293 	if (!rq->core->core_forceidle_count)
294 		return;
295 
296 	if (rq != rq->core)
297 		update_rq_clock(rq->core);
298 
299 	__sched_core_account_forceidle(rq);
300 }
301 
302 #endif /* CONFIG_SCHEDSTATS */
303