xref: /src/sys/sys/smp.h (revision 120ca8d74b46caa260702485e30fe5f9f9984682)
1 /*-
2  * SPDX-License-Identifier: Beerware
3  *
4  * ----------------------------------------------------------------------------
5  * "THE BEER-WARE LICENSE" (Revision 42):
6  * <phk@FreeBSD.org> wrote this file.  As long as you retain this notice you
7  * can do whatever you want with this stuff. If we meet some day, and you think
8  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
9  * ----------------------------------------------------------------------------
10  */
11 
12 #ifndef _SYS_SMP_H_
13 #define _SYS_SMP_H_
14 
15 #ifdef _KERNEL
16 
17 #ifndef LOCORE
18 
19 #include <sys/types.h>
20 #include <sys/cpuset.h>
21 #include <sys/queue.h>
22 
23 /*
24  * Types of nodes in the topological tree.
25  */
26 typedef enum {
27 	/* No node has this type; can be used in topo API calls. */
28 	TOPO_TYPE_DUMMY,
29 	/* Processing unit aka computing unit aka logical CPU. */
30 	TOPO_TYPE_PU,
31 	/* Physical subdivision of a package. */
32 	TOPO_TYPE_CORE,
33 	/* CPU L1/L2/L3 cache. */
34 	TOPO_TYPE_CACHE,
35 	/* Package aka chip, equivalent to socket. */
36 	TOPO_TYPE_PKG,
37 	/* NUMA node. */
38 	TOPO_TYPE_NODE,
39 	/* Other logical or physical grouping of PUs. */
40 	/* E.g. PUs on the same dye, or PUs sharing an FPU. */
41 	TOPO_TYPE_GROUP,
42 	/* The whole system. */
43 	TOPO_TYPE_SYSTEM
44 } topo_node_type;
45 
46 /* Hardware indenitifier of a topology component. */
47 typedef	unsigned int hwid_t;
48 /* Logical CPU idenitifier. */
49 typedef	int cpuid_t;
50 
51 /* A node in the topology. */
52 struct topo_node {
53 	struct topo_node			*parent;
54 	TAILQ_HEAD(topo_children, topo_node)	children;
55 	TAILQ_ENTRY(topo_node)			siblings;
56 	cpuset_t				cpuset;
57 	topo_node_type				type;
58 	uintptr_t				subtype;
59 	hwid_t					hwid;
60 	cpuid_t					id;
61 	int					nchildren;
62 	int					cpu_count;
63 };
64 
65 /*
66  * Scheduling topology of a NUMA or SMP system.
67  *
68  * The top level topology is an array of pointers to groups.  Each group
69  * contains a bitmask of cpus in its group or subgroups.  It may also
70  * contain a pointer to an array of child groups.
71  *
72  * The bitmasks at non leaf groups may be used by consumers who support
73  * a smaller depth than the hardware provides.
74  *
75  * The topology may be omitted by systems where all CPUs are equal.
76  */
77 
78 struct cpu_group {
79 	struct cpu_group *cg_parent;	/* Our parent group. */
80 	struct cpu_group *cg_child;	/* Optional children groups. */
81 	cpuset_t	cg_mask;	/* Mask of cpus in this group. */
82 	int32_t		cg_count;	/* Count of cpus in this group. */
83 	int32_t		cg_first;	/* First cpu in this group. */
84 	int32_t		cg_last;	/* Last cpu in this group. */
85 	int16_t		cg_children;	/* Number of children groups. */
86 	int8_t		cg_level;	/* Shared cache level. */
87 	int8_t		cg_flags;	/* Traversal modifiers. */
88 };
89 
90 typedef struct cpu_group *cpu_group_t;
91 
92 extern cpu_group_t cpu_top;
93 
94 /*
95  * Defines common resources for CPUs in the group.  The highest level
96  * resource should be used when multiple are shared.
97  */
98 #define	CG_SHARE_NONE	0
99 #define	CG_SHARE_L1	1
100 #define	CG_SHARE_L2	2
101 #define	CG_SHARE_L3	3
102 
103 #define MAX_CACHE_LEVELS	CG_SHARE_L3
104 
105 /*
106  * Behavior modifiers for load balancing and affinity.
107  */
108 #define	CG_FLAG_HTT	0x01		/* Schedule the alternate core last. */
109 #define	CG_FLAG_SMT	0x02		/* New age htt, less crippled. */
110 #define	CG_FLAG_THREAD	(CG_FLAG_HTT | CG_FLAG_SMT)	/* Any threading. */
111 #define	CG_FLAG_NODE	0x04		/* NUMA node. */
112 
113 /*
114  * Convenience routines for building and traversing topologies.
115  */
116 #ifdef SMP
117 void topo_init_node(struct topo_node *node);
118 void topo_init_root(struct topo_node *root);
119 struct topo_node * topo_add_node_by_hwid(struct topo_node *parent, int hwid,
120     topo_node_type type, uintptr_t subtype);
121 struct topo_node * topo_find_node_by_hwid(struct topo_node *parent, int hwid,
122     topo_node_type type, uintptr_t subtype);
123 void topo_promote_child(struct topo_node *child);
124 struct topo_node * topo_next_node(struct topo_node *top,
125     struct topo_node *node);
126 struct topo_node * topo_next_nonchild_node(struct topo_node *top,
127     struct topo_node *node);
128 void topo_set_pu_id(struct topo_node *node, cpuid_t id);
129 
130 enum topo_level {
131 	TOPO_LEVEL_PKG = 0,
132 	/*
133 	 * Some systems have useful sub-package core organizations.  On these,
134 	 * a package has one or more subgroups.  Each subgroup contains one or
135 	 * more cache groups (cores that share a last level cache).
136 	 */
137 	TOPO_LEVEL_GROUP,
138 	TOPO_LEVEL_CACHEGROUP,
139 	TOPO_LEVEL_CORE,
140 	TOPO_LEVEL_THREAD,
141 	TOPO_LEVEL_COUNT	/* Must be last */
142 };
143 struct topo_analysis {
144 	int entities[TOPO_LEVEL_COUNT];
145 };
146 int topo_analyze(struct topo_node *topo_root, int all,
147     struct topo_analysis *results);
148 
149 #define	TOPO_FOREACH(i, root)	\
150 	for (i = root; i != NULL; i = topo_next_node(root, i))
151 
152 struct cpu_group *smp_topo_1level(int l1share, int l1count, int l1flags);
153 struct cpu_group *smp_topo_2level(int l2share, int l2count, int l1share,
154     int l1count, int l1flags);
155 struct cpu_group *smp_topo_find(struct cpu_group *top, int cpu);
156 
157 extern void (*cpustop_restartfunc)(void);
158 /* The suspend/resume cpusets are x86 only, but minimize ifdefs. */
159 extern volatile cpuset_t resuming_cpus;	/* woken up cpus in suspend pen */
160 extern volatile cpuset_t started_cpus;	/* cpus to let out of stop pen */
161 extern volatile cpuset_t stopped_cpus;	/* cpus in stop pen */
162 extern volatile cpuset_t suspended_cpus; /* cpus [near] sleeping in susp pen */
163 extern volatile cpuset_t toresume_cpus;	/* cpus to let out of suspend pen */
164 extern cpuset_t hlt_cpus_mask;		/* XXX 'mask' is detail in old impl */
165 extern cpuset_t logical_cpus_mask;
166 #endif /* SMP */
167 
168 struct cpu_group *smp_topo(void);
169 struct cpu_group *smp_topo_alloc(u_int count);
170 struct cpu_group *smp_topo_none(void);
171 
172 extern u_int mp_maxid;
173 extern int mp_maxcpus;
174 extern int mp_ncores;
175 extern int mp_ncpus;
176 extern int smp_cpus;
177 extern volatile int smp_started;
178 extern int smp_threads_per_core;
179 
180 extern cpuset_t all_cpus;
181 extern cpuset_t cpuset_domain[MAXMEMDOM]; 	/* CPUs in each NUMA domain. */
182 
183 struct pcb;
184 extern struct pcb *stoppcbs;
185 
186 /*
187  * Macro allowing us to determine whether a CPU is absent at any given
188  * time, thus permitting us to configure sparse maps of cpuid-dependent
189  * (per-CPU) structures.
190  */
191 #define	CPU_ABSENT(x_cpu)	(!CPU_ISSET(x_cpu, &all_cpus))
192 
193 /*
194  * Macros to iterate over non-absent CPUs.  CPU_FOREACH() takes an
195  * integer iterator and iterates over the available set of CPUs.
196  * CPU_FIRST() returns the id of the first non-absent CPU.  CPU_NEXT()
197  * returns the id of the next non-absent CPU.  It will wrap back to
198  * CPU_FIRST() once the end of the list is reached.  The iterators are
199  * currently implemented via inline functions.
200  */
201 #define	CPU_FOREACH(i)							\
202 	for ((i) = 0; (i) <= mp_maxid; (i)++)				\
203 		if (!CPU_ABSENT((i)))
204 
205 static __inline int
cpu_first(void)206 cpu_first(void)
207 {
208 	int i;
209 
210 	for (i = 0;; i++)
211 		if (!CPU_ABSENT(i))
212 			return (i);
213 }
214 
215 static __inline int
cpu_next(int i)216 cpu_next(int i)
217 {
218 
219 	for (;;) {
220 		i++;
221 		if ((u_int)i > mp_maxid)
222 			i = 0;
223 		if (!CPU_ABSENT(i))
224 			return (i);
225 	}
226 }
227 
228 #define	CPU_FIRST()	cpu_first()
229 #define	CPU_NEXT(i)	cpu_next((i))
230 
231 #ifdef SMP
232 /*
233  * Machine dependent functions used to initialize MP support.
234  *
235  * The cpu_mp_probe() should check to see if MP support is present and return
236  * zero if it is not or non-zero if it is.  If MP support is present, then
237  * cpu_mp_start() will be called so that MP can be enabled.  This function
238  * should do things such as startup secondary processors.  It should also
239  * setup mp_ncpus, all_cpus, and smp_cpus.  It should also ensure that
240  * smp_started is initialized at the appropriate time.
241  * Once cpu_mp_start() returns, machine independent MP startup code will be
242  * executed and a simple message will be output to the console.  Finally,
243  * cpu_mp_announce() will be called so that machine dependent messages about
244  * the MP support may be output to the console if desired.
245  *
246  * The cpu_setmaxid() function is called very early during the boot process
247  * so that the MD code may set mp_maxid to provide an upper bound on CPU IDs
248  * that other subsystems may use.  If a platform is not able to determine
249  * the exact maximum ID that early, then it may set mp_maxid to MAXCPU - 1.
250  */
251 struct thread;
252 
253 struct cpu_group *cpu_topo(void);
254 void	cpu_mp_announce(void);
255 int	cpu_mp_probe(void);
256 void	cpu_mp_setmaxid(void);
257 void	cpu_mp_start(void);
258 void	cpu_mp_stop(void);	/* Go back to single-CPU */
259 
260 void	forward_signal(struct thread *);
261 int	restart_cpus(cpuset_t);
262 int	stop_cpus(cpuset_t);
263 int	stop_cpus_hard(cpuset_t);
264 #if defined(__amd64__) || defined(__i386__)
265 int	suspend_cpus(cpuset_t);
266 int	resume_cpus(cpuset_t);
267 int	offline_cpus(cpuset_t);
268 #endif
269 
270 void	smp_rendezvous_action(void);
271 extern	struct mtx smp_ipi_mtx;
272 
273 #endif /* SMP */
274 
275 int	quiesce_all_cpus(const char *, int);
276 int	quiesce_cpus(cpuset_t, const char *, int);
277 void	quiesce_all_critical(void);
278 void	cpus_fence_seq_cst(void);
279 void	smp_no_rendezvous_barrier(void *);
280 void	smp_rendezvous(void (*)(void *),
281 		       void (*)(void *),
282 		       void (*)(void *),
283 		       void *arg);
284 void	smp_rendezvous_cpus(cpuset_t,
285 		       void (*)(void *),
286 		       void (*)(void *),
287 		       void (*)(void *),
288 		       void *arg);
289 void	smp_rendezvous_cpu(u_int,
290 		       void (*)(void *),
291 		       void (*)(void *),
292 		       void (*)(void *),
293 		       void *arg);
294 
295 struct smp_rendezvous_cpus_retry_arg {
296 	cpuset_t cpus;
297 };
298 void	smp_rendezvous_cpus_retry(cpuset_t,
299 		       void (*)(void *),
300 		       void (*)(void *),
301 		       void (*)(void *),
302 		       void (*)(void *, int),
303 		       struct smp_rendezvous_cpus_retry_arg *);
304 
305 void	smp_rendezvous_cpus_done(struct smp_rendezvous_cpus_retry_arg *);
306 
307 #endif /* !LOCORE */
308 #endif /* _KERNEL */
309 #endif /* _SYS_SMP_H_ */
310