1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * A scheduler that validates the behavior of the NUMA-aware
4 * functionalities.
5 *
6 * The scheduler creates a separate DSQ for each NUMA node, ensuring tasks
7 * are exclusively processed by CPUs within their respective nodes. Idle
8 * CPUs are selected only within the same node, so task migration can only
9 * occurs between CPUs belonging to the same node.
10 *
11 * Copyright (c) 2025 Andrea Righi <arighi@nvidia.com>
12 */
13
14 #include <scx/common.bpf.h>
15
16 char _license[] SEC("license") = "GPL";
17
18 UEI_DEFINE(uei);
19
20 const volatile unsigned int __COMPAT_SCX_PICK_IDLE_IN_NODE;
21
is_cpu_idle(s32 cpu,int node)22 static bool is_cpu_idle(s32 cpu, int node)
23 {
24 const struct cpumask *idle_cpumask;
25 bool idle;
26
27 idle_cpumask = __COMPAT_scx_bpf_get_idle_cpumask_node(node);
28 idle = bpf_cpumask_test_cpu(cpu, idle_cpumask);
29 scx_bpf_put_cpumask(idle_cpumask);
30
31 return idle;
32 }
33
BPF_STRUCT_OPS(numa_select_cpu,struct task_struct * p,s32 prev_cpu,u64 wake_flags)34 s32 BPF_STRUCT_OPS(numa_select_cpu,
35 struct task_struct *p, s32 prev_cpu, u64 wake_flags)
36 {
37 int node = __COMPAT_scx_bpf_cpu_node(scx_bpf_task_cpu(p));
38 s32 cpu;
39
40 /*
41 * We could just use __COMPAT_scx_bpf_pick_any_cpu_node() here,
42 * since it already tries to pick an idle CPU within the node
43 * first, but let's use both functions for better testing coverage.
44 */
45 cpu = __COMPAT_scx_bpf_pick_idle_cpu_node(p->cpus_ptr, node,
46 __COMPAT_SCX_PICK_IDLE_IN_NODE);
47 if (cpu < 0)
48 cpu = __COMPAT_scx_bpf_pick_any_cpu_node(p->cpus_ptr, node,
49 __COMPAT_SCX_PICK_IDLE_IN_NODE);
50
51 if (is_cpu_idle(cpu, node))
52 scx_bpf_error("CPU %d should be marked as busy", cpu);
53
54 if (__COMPAT_scx_bpf_cpu_node(cpu) != node)
55 scx_bpf_error("CPU %d should be in node %d", cpu, node);
56
57 return cpu;
58 }
59
BPF_STRUCT_OPS(numa_enqueue,struct task_struct * p,u64 enq_flags)60 void BPF_STRUCT_OPS(numa_enqueue, struct task_struct *p, u64 enq_flags)
61 {
62 int node = __COMPAT_scx_bpf_cpu_node(scx_bpf_task_cpu(p));
63
64 scx_bpf_dsq_insert(p, node, SCX_SLICE_DFL, enq_flags);
65 }
66
BPF_STRUCT_OPS(numa_dispatch,s32 cpu,struct task_struct * prev)67 void BPF_STRUCT_OPS(numa_dispatch, s32 cpu, struct task_struct *prev)
68 {
69 int node = __COMPAT_scx_bpf_cpu_node(cpu);
70
71 scx_bpf_dsq_move_to_local(node);
72 }
73
BPF_STRUCT_OPS_SLEEPABLE(numa_init)74 s32 BPF_STRUCT_OPS_SLEEPABLE(numa_init)
75 {
76 int node, err;
77
78 bpf_for(node, 0, __COMPAT_scx_bpf_nr_node_ids()) {
79 err = scx_bpf_create_dsq(node, node);
80 if (err)
81 return err;
82 }
83
84 return 0;
85 }
86
BPF_STRUCT_OPS(numa_exit,struct scx_exit_info * ei)87 void BPF_STRUCT_OPS(numa_exit, struct scx_exit_info *ei)
88 {
89 UEI_RECORD(uei, ei);
90 }
91
92 SEC(".struct_ops.link")
93 struct sched_ext_ops numa_ops = {
94 .select_cpu = (void *)numa_select_cpu,
95 .enqueue = (void *)numa_enqueue,
96 .dispatch = (void *)numa_dispatch,
97 .init = (void *)numa_init,
98 .exit = (void *)numa_exit,
99 .name = "numa",
100 };
101