1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * A scheduler that validates the behavior of the NUMA-aware
4  * functionalities.
5  *
6  * The scheduler creates a separate DSQ for each NUMA node, ensuring tasks
7  * are exclusively processed by CPUs within their respective nodes. Idle
8  * CPUs are selected only within the same node, so task migration can only
9  * occurs between CPUs belonging to the same node.
10  *
11  * Copyright (c) 2025 Andrea Righi <arighi@nvidia.com>
12  */
13 
14 #include <scx/common.bpf.h>
15 
16 char _license[] SEC("license") = "GPL";
17 
18 UEI_DEFINE(uei);
19 
20 const volatile unsigned int __COMPAT_SCX_PICK_IDLE_IN_NODE;
21 
is_cpu_idle(s32 cpu,int node)22 static bool is_cpu_idle(s32 cpu, int node)
23 {
24 	const struct cpumask *idle_cpumask;
25 	bool idle;
26 
27 	idle_cpumask = __COMPAT_scx_bpf_get_idle_cpumask_node(node);
28 	idle = bpf_cpumask_test_cpu(cpu, idle_cpumask);
29 	scx_bpf_put_cpumask(idle_cpumask);
30 
31 	return idle;
32 }
33 
BPF_STRUCT_OPS(numa_select_cpu,struct task_struct * p,s32 prev_cpu,u64 wake_flags)34 s32 BPF_STRUCT_OPS(numa_select_cpu,
35 		   struct task_struct *p, s32 prev_cpu, u64 wake_flags)
36 {
37 	int node = __COMPAT_scx_bpf_cpu_node(scx_bpf_task_cpu(p));
38 	s32 cpu;
39 
40 	/*
41 	 * We could just use __COMPAT_scx_bpf_pick_any_cpu_node() here,
42 	 * since it already tries to pick an idle CPU within the node
43 	 * first, but let's use both functions for better testing coverage.
44 	 */
45 	cpu = __COMPAT_scx_bpf_pick_idle_cpu_node(p->cpus_ptr, node,
46 					__COMPAT_SCX_PICK_IDLE_IN_NODE);
47 	if (cpu < 0)
48 		cpu = __COMPAT_scx_bpf_pick_any_cpu_node(p->cpus_ptr, node,
49 						__COMPAT_SCX_PICK_IDLE_IN_NODE);
50 
51 	if (is_cpu_idle(cpu, node))
52 		scx_bpf_error("CPU %d should be marked as busy", cpu);
53 
54 	if (__COMPAT_scx_bpf_cpu_node(cpu) != node)
55 		scx_bpf_error("CPU %d should be in node %d", cpu, node);
56 
57 	return cpu;
58 }
59 
BPF_STRUCT_OPS(numa_enqueue,struct task_struct * p,u64 enq_flags)60 void BPF_STRUCT_OPS(numa_enqueue, struct task_struct *p, u64 enq_flags)
61 {
62 	int node = __COMPAT_scx_bpf_cpu_node(scx_bpf_task_cpu(p));
63 
64 	scx_bpf_dsq_insert(p, node, SCX_SLICE_DFL, enq_flags);
65 }
66 
BPF_STRUCT_OPS(numa_dispatch,s32 cpu,struct task_struct * prev)67 void BPF_STRUCT_OPS(numa_dispatch, s32 cpu, struct task_struct *prev)
68 {
69 	int node = __COMPAT_scx_bpf_cpu_node(cpu);
70 
71 	scx_bpf_dsq_move_to_local(node);
72 }
73 
BPF_STRUCT_OPS_SLEEPABLE(numa_init)74 s32 BPF_STRUCT_OPS_SLEEPABLE(numa_init)
75 {
76 	int node, err;
77 
78 	bpf_for(node, 0, __COMPAT_scx_bpf_nr_node_ids()) {
79 		err = scx_bpf_create_dsq(node, node);
80 		if (err)
81 			return err;
82 	}
83 
84 	return 0;
85 }
86 
BPF_STRUCT_OPS(numa_exit,struct scx_exit_info * ei)87 void BPF_STRUCT_OPS(numa_exit, struct scx_exit_info *ei)
88 {
89 	UEI_RECORD(uei, ei);
90 }
91 
92 SEC(".struct_ops.link")
93 struct sched_ext_ops numa_ops = {
94 	.select_cpu		= (void *)numa_select_cpu,
95 	.enqueue		= (void *)numa_enqueue,
96 	.dispatch		= (void *)numa_dispatch,
97 	.init			= (void *)numa_init,
98 	.exit			= (void *)numa_exit,
99 	.name			= "numa",
100 };
101