xref: /src/sys/x86/cpufreq/hwpstate_amd.c (revision 191f47bcd65097599a962b46ae293e5ebe4e5b67)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2005 Nate Lawson
5  * Copyright (c) 2004 Colin Percival
6  * Copyright (c) 2004-2005 Bruno Durcot
7  * Copyright (c) 2004 FUKUDA Nobuhiko
8  * Copyright (c) 2009 Michael Reifenberger
9  * Copyright (c) 2009 Norikatsu Shigemura
10  * Copyright (c) 2008-2009 Gen Otsuji
11  * Copyright (c) 2025 ShengYi Hung
12  * Copyright (c) 2026 The FreeBSD Foundation
13  *
14  * Portions of this software were developed by Olivier Certner
15  * <olce@FreeBSD.org> at Kumacom SARL under sponsorship from the FreeBSD
16  * Foundation.
17  *
18  * This code is depending on kern_cpu.c, est.c, powernow.c, p4tcc.c, smist.c
19  * in various parts. The authors of these files are Nate Lawson,
20  * Colin Percival, Bruno Durcot, and FUKUDA Nobuhiko.
21  * This code contains patches by Michael Reifenberger and Norikatsu Shigemura.
22  * Thank you.
23  *
24  * Redistribution and use in source and binary forms, with or without
25  * modification, are permitted providing that the following conditions
26  * are met:
27  * 1. Redistributions of source code must retain the above copyright
28  *    notice, this list of conditions and the following disclaimer.
29  * 2. Redistributions in binary form must reproduce the above copyright
30  *    notice, this list of conditions and the following disclaimer in the
31  *    documentation and/or other materials provided with the distribution.
32  *
33  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR``AS IS'' AND ANY EXPRESS OR
34  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
37  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
39  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
42  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43  * POSSIBILITY OF SUCH DAMAGE.
44  */
45 
46 /*
47  * For more info:
48  * BIOS and Kernel Developer's Guide(BKDG) for AMD Family 10h Processors
49  * 31116 Rev 3.20  February 04, 2009
50  * BIOS and Kernel Developer's Guide(BKDG) for AMD Family 11h Processors
51  * 41256 Rev 3.00 - July 07, 2008
52  * Processor Programming Reference (PPR) for AMD Family 1Ah Model 02h,
53  * Revision C1 Processors Volume 1 of 7 - Sep 29, 2024
54  */
55 
56 #include <sys/param.h>
57 #include <sys/bus.h>
58 #include <sys/cpu.h>
59 #include <sys/kernel.h>
60 #include <sys/malloc.h>
61 #include <sys/module.h>
62 #include <sys/pcpu.h>
63 #include <sys/proc.h>
64 #include <sys/sbuf.h>
65 #include <sys/sched.h>
66 #include <sys/smp.h>
67 
68 #include <machine/_inttypes.h>
69 #include <machine/cputypes.h>
70 #include <machine/md_var.h>
71 #include <machine/specialreg.h>
72 
73 #include <contrib/dev/acpica/include/acpi.h>
74 
75 #include <dev/acpica/acpivar.h>
76 
77 #include <x86/cpufreq/hwpstate_common.h>
78 
79 #include "acpi_if.h"
80 #include "cpufreq_if.h"
81 
82 
83 #define	MSR_AMD_10H_11H_LIMIT	0xc0010061
84 #define	MSR_AMD_10H_11H_CONTROL	0xc0010062
85 #define	MSR_AMD_10H_11H_STATUS	0xc0010063
86 #define	MSR_AMD_10H_11H_CONFIG	0xc0010064
87 
88 #define	MSR_AMD_CPPC_CAPS_1	0xc00102b0
89 #define	MSR_AMD_CPPC_ENABLE	0xc00102b1
90 #define	MSR_AMD_CPPC_CAPS_2	0xc00102b2
91 #define	MSR_AMD_CPPC_REQUEST	0xc00102b3
92 #define	MSR_AMD_CPPC_STATUS	0xc00102b4
93 
94 #define	MSR_AMD_CPPC_CAPS_1_NAME	"CPPC_CAPABILITY_1"
95 #define	MSR_AMD_CPPC_ENABLE_NAME	"CPPC_ENABLE"
96 #define	MSR_AMD_CPPC_REQUEST_NAME	"CPPC_REQUEST"
97 
98 #define	MSR_AMD_PWR_ACC		0xc001007a
99 #define	MSR_AMD_PWR_ACC_MX	0xc001007b
100 
101 #define	AMD_10H_11H_MAX_STATES	16
102 
103 /* for MSR_AMD_10H_11H_LIMIT C001_0061 */
104 #define	AMD_10H_11H_GET_PSTATE_MAX_VAL(msr)	(((msr) >> 4) & 0x7)
105 #define	AMD_10H_11H_GET_PSTATE_LIMIT(msr)	(((msr)) & 0x7)
106 /* for MSR_AMD_10H_11H_CONFIG 10h:C001_0064:68 / 11h:C001_0064:6B */
107 #define	AMD_10H_11H_CUR_VID(msr)		(((msr) >> 9) & 0x7F)
108 #define	AMD_10H_11H_CUR_DID(msr)		(((msr) >> 6) & 0x07)
109 #define	AMD_10H_11H_CUR_FID(msr)		((msr) & 0x3F)
110 
111 #define	AMD_17H_CUR_IDIV(msr)			(((msr) >> 30) & 0x03)
112 #define	AMD_17H_CUR_IDD(msr)			(((msr) >> 22) & 0xFF)
113 #define	AMD_17H_CUR_VID(msr)			(((msr) >> 14) & 0xFF)
114 #define	AMD_17H_CUR_DID(msr)			(((msr) >> 8) & 0x3F)
115 #define	AMD_17H_CUR_FID(msr)			((msr) & 0xFF)
116 
117 #define	AMD_1AH_CUR_FID(msr)			((msr) & 0xFFF)
118 
119 #define	AMD_CPPC_CAPS_1_HIGHEST_PERF_BITS	0xff000000
120 #define	AMD_CPPC_CAPS_1_NOMINAL_PERF_BITS	0x00ff0000
121 #define	AMD_CPPC_CAPS_1_EFFICIENT_PERF_BITS	0x0000ff00
122 #define	AMD_CPPC_CAPS_1_LOWEST_PERF_BITS	0x000000ff
123 
124 #define	AMD_CPPC_REQUEST_EPP_BITS		0xff000000
125 #define	AMD_CPPC_REQUEST_DES_PERF_BITS		0x00ff0000
126 #define	AMD_CPPC_REQUEST_MIN_PERF_BITS		0x0000ff00
127 #define	AMD_CPPC_REQUEST_MAX_PERF_BITS		0x000000ff
128 
129 #define	HWP_AMD_CLASSNAME			"hwpstate_amd"
130 
131 #define	BITS_VALUE(bits, val)						\
132 	(((val) & (bits)) >> (ffsll((bits)) - 1))
133 #define	BITS_WITH_VALUE(bits, val)					\
134 	(((uintmax_t)(val) << (ffsll((bits)) - 1)) & (bits))
135 #define	SET_BITS_VALUE(var, bits, val)					\
136 	((var) = ((var) & ~(bits)) | BITS_WITH_VALUE((bits), (val)))
137 
138 #define	HWPSTATE_DEBUG(dev, msg...)			\
139 	do {						\
140 		if (hwpstate_verbose)			\
141 			device_printf(dev, msg);	\
142 	} while (0)
143 
144 struct hwpstate_setting {
145 	int	freq;		/* CPU clock in Mhz or 100ths of a percent. */
146 	int	volts;		/* Voltage in mV. */
147 	int	power;		/* Power consumed in mW. */
148 	int	lat;		/* Transition latency in us. */
149 	int	pstate_id;	/* P-State id */
150 };
151 
152 #define HWPFL_USE_CPPC			(1 << 0)
153 #define HWPFL_CPPC_REQUEST_NOT_READ	(1 << 1)
154 
155 struct hwpstate_cpufreq_methods {
156 	int (*get)(device_t dev, struct cf_setting *cf);
157 	int (*set)(device_t dev, const struct cf_setting *cf);
158 	int (*settings)(device_t dev, struct cf_setting *sets, int *count);
159 	int (*type)(device_t dev, int *type);
160 };
161 
162 /*
163  * Atomicity is achieved by only modifying a given softc on its associated CPU
164  * and with interrupts disabled.
165  *
166  * XXX - Only the CPPC support complies at the moment.
167  */
168 struct hwpstate_softc {
169 	device_t	dev;
170 	u_int		flags;
171 	const struct hwpstate_cpufreq_methods *cpufreq_methods;
172 	union {
173 		struct {
174 			struct hwpstate_setting
175 			hwpstate_settings[AMD_10H_11H_MAX_STATES];
176 			int cfnum;
177 		};
178 		struct {
179 			uint64_t request;
180 		} cppc;
181 	};
182 };
183 
184 static void	hwpstate_identify(driver_t *driver, device_t parent);
185 static int	hwpstate_probe(device_t dev);
186 static int	hwpstate_attach(device_t dev);
187 static int	hwpstate_detach(device_t dev);
188 static int	hwpstate_set(device_t dev, const struct cf_setting *cf);
189 static int	hwpstate_get(device_t dev, struct cf_setting *cf);
190 static int	hwpstate_settings(device_t dev, struct cf_setting *sets, int *count);
191 static int	hwpstate_type(device_t dev, int *type);
192 static int	hwpstate_shutdown(device_t dev);
193 static int	hwpstate_features(driver_t *driver, u_int *features);
194 static int	hwpstate_get_info_from_acpi_perf(device_t dev, device_t perf_dev);
195 static int	hwpstate_get_info_from_msr(device_t dev);
196 static int	hwpstate_goto_pstate(device_t dev, int pstate_id);
197 
198 static int	hwpstate_verify;
199 SYSCTL_INT(_debug, OID_AUTO, hwpstate_verify, CTLFLAG_RWTUN,
200     &hwpstate_verify, 0, "Verify P-state after setting");
201 
202 static bool	hwpstate_pstate_limit;
203 SYSCTL_BOOL(_debug, OID_AUTO, hwpstate_pstate_limit, CTLFLAG_RWTUN,
204     &hwpstate_pstate_limit, 0,
205     "If enabled (1), limit administrative control of P-states to the value in "
206     "CurPstateLimit");
207 
208 static bool	hwpstate_amd_cppc_enable = true;
209 SYSCTL_BOOL(_machdep, OID_AUTO, hwpstate_amd_cppc_enable, CTLFLAG_RDTUN,
210     &hwpstate_amd_cppc_enable, 0,
211     "Set 1 (default) to enable AMD CPPC, 0 to disable");
212 
213 static device_method_t hwpstate_methods[] = {
214 	/* Device interface */
215 	DEVMETHOD(device_identify,	hwpstate_identify),
216 	DEVMETHOD(device_probe,		hwpstate_probe),
217 	DEVMETHOD(device_attach,	hwpstate_attach),
218 	DEVMETHOD(device_detach,	hwpstate_detach),
219 	DEVMETHOD(device_shutdown,	hwpstate_shutdown),
220 
221 	/* cpufreq interface */
222 	DEVMETHOD(cpufreq_drv_set,	hwpstate_set),
223 	DEVMETHOD(cpufreq_drv_get,	hwpstate_get),
224 	DEVMETHOD(cpufreq_drv_settings,	hwpstate_settings),
225 	DEVMETHOD(cpufreq_drv_type,	hwpstate_type),
226 
227 	/* ACPI interface */
228 	DEVMETHOD(acpi_get_features,	hwpstate_features),
229 	{0, 0}
230 };
231 
232 static inline void
check_cppc_in_use(const struct hwpstate_softc * const sc,const char * const func)233 check_cppc_in_use(const struct hwpstate_softc *const sc, const char *const func)
234 {
235 	KASSERT((sc->flags & HWPFL_USE_CPPC) != 0, (HWP_AMD_CLASSNAME
236 	    ": %s() called but HWPFL_USE_CPPC not set", func));
237 }
238 
239 static void
print_msr_bits(struct sbuf * const sb,const char * const legend,const uint64_t bits,const uint64_t msr_value)240 print_msr_bits(struct sbuf *const sb, const char *const legend,
241     const uint64_t bits, const uint64_t msr_value)
242 {
243 	sbuf_printf(sb, "\t%s: %" PRIu64 "\n", legend,
244 	    BITS_VALUE(bits, msr_value));
245 }
246 
247 static void
print_cppc_caps_1(struct sbuf * const sb,const uint64_t caps)248 print_cppc_caps_1(struct sbuf *const sb, const uint64_t caps)
249 {
250 	sbuf_printf(sb, MSR_AMD_CPPC_CAPS_1_NAME ": %#016" PRIx64 "\n", caps);
251 	print_msr_bits(sb, "Highest Performance",
252 	    AMD_CPPC_CAPS_1_HIGHEST_PERF_BITS, caps);
253 	print_msr_bits(sb, "Guaranteed Performance",
254 	    AMD_CPPC_CAPS_1_NOMINAL_PERF_BITS, caps);
255 	print_msr_bits(sb, "Efficient Performance",
256 	    AMD_CPPC_CAPS_1_EFFICIENT_PERF_BITS, caps);
257 	print_msr_bits(sb, "Lowest Performance",
258 	    AMD_CPPC_CAPS_1_LOWEST_PERF_BITS, caps);
259 }
260 
261 #define MSR_NOT_READ_MSG	"Not read (fault or previous errors)"
262 
263 static void
print_cppc_no_caps_1(struct sbuf * const sb)264 print_cppc_no_caps_1(struct sbuf *const sb)
265 {
266 	sbuf_printf(sb, MSR_AMD_CPPC_CAPS_1_NAME ": " MSR_NOT_READ_MSG "\n");
267 }
268 
269 static void
print_cppc_request(struct sbuf * const sb,const uint64_t request)270 print_cppc_request(struct sbuf *const sb, const uint64_t request)
271 {
272 	sbuf_printf(sb, MSR_AMD_CPPC_REQUEST_NAME ": %#016" PRIx64 "\n",
273 	    request);
274 	print_msr_bits(sb, "Efficiency / Energy Preference",
275 	    AMD_CPPC_REQUEST_EPP_BITS, request);
276 	print_msr_bits(sb, "Desired Performance",
277 	    AMD_CPPC_REQUEST_DES_PERF_BITS, request);
278 	print_msr_bits(sb, "Minimum Performance",
279 	    AMD_CPPC_REQUEST_MIN_PERF_BITS, request);
280 	print_msr_bits(sb, "Maximum Performance",
281 	    AMD_CPPC_REQUEST_MAX_PERF_BITS, request);
282 }
283 
284 static void
print_cppc_no_request(struct sbuf * const sb)285 print_cppc_no_request(struct sbuf *const sb)
286 {
287 	sbuf_printf(sb, MSR_AMD_CPPC_REQUEST_NAME ": " MSR_NOT_READ_MSG "\n");
288 }
289 
290 /*
291  * Internal errors conveyed by code executing on another CPU.
292  */
293 #define HWP_ERROR_CPPC_ENABLE		(1 << 0)
294 #define HWP_ERROR_CPPC_CAPS		(1 << 1)
295 #define HWP_ERROR_CPPC_REQUEST		(1 << 2)
296 #define HWP_ERROR_CPPC_REQUEST_WRITE	(1 << 3)
297 
298 static inline bool
hwp_has_error(u_int res,u_int err)299 hwp_has_error(u_int res, u_int err)
300 {
301 	return ((res & err) != 0);
302 }
303 
304 struct get_cppc_regs_data {
305 	uint64_t enable;
306 	uint64_t caps;
307 	uint64_t req;
308 	/* HWP_ERROR_CPPC_* except HWP_ERROR_*_WRITE */
309 	u_int res;
310 };
311 
312 static void
get_cppc_regs_cb(void * args)313 get_cppc_regs_cb(void *args)
314 {
315 	struct get_cppc_regs_data *data = args;
316 	int error;
317 
318 	data->res = 0;
319 
320 	error = rdmsr_safe(MSR_AMD_CPPC_ENABLE, &data->enable);
321 	if (error != 0)
322 		data->res |= HWP_ERROR_CPPC_ENABLE;
323 
324 	error = rdmsr_safe(MSR_AMD_CPPC_CAPS_1, &data->caps);
325 	if (error != 0)
326 		data->res |= HWP_ERROR_CPPC_CAPS;
327 
328 	error = rdmsr_safe(MSR_AMD_CPPC_REQUEST, &data->req);
329 	if (error != 0)
330 		data->res |= HWP_ERROR_CPPC_REQUEST;
331 }
332 
333 /*
334  * Debug: Read all MSRs (bypassing the softc) and dump them.
335  */
336 static int
sysctl_cppc_dump_handler(SYSCTL_HANDLER_ARGS)337 sysctl_cppc_dump_handler(SYSCTL_HANDLER_ARGS)
338 {
339 	const struct hwpstate_softc *const sc = arg1;
340 	const device_t dev = sc->dev;
341 	const u_int cpuid = cpu_get_pcpu(dev)->pc_cpuid;
342 	struct sbuf *sb;
343 	struct sbuf sbs;
344 	struct get_cppc_regs_data data;
345 	int error;
346 
347 	/* Sysctl knob does not exist if HWPFL_USE_CPPC is not set. */
348 	check_cppc_in_use(sc, __func__);
349 
350 	sb = sbuf_new_for_sysctl(&sbs, NULL, 0, req);
351 
352 	smp_rendezvous_cpu(cpuid, smp_no_rendezvous_barrier, get_cppc_regs_cb,
353 	    smp_no_rendezvous_barrier, &data);
354 
355 	if (hwp_has_error(data.res, HWP_ERROR_CPPC_ENABLE))
356 		sbuf_printf(sb, "CPU%u: " MSR_AMD_CPPC_ENABLE_NAME ": "
357 		    MSR_NOT_READ_MSG "\n", cpuid);
358 	else
359 		sbuf_printf(sb, "CPU%u: HWP %sabled (" MSR_AMD_CPPC_REQUEST_NAME
360 		    ": %#" PRIx64 ")\n", cpuid, data.enable & 1 ? "En" : "Dis",
361 		    data.enable);
362 
363 	if (hwp_has_error(data.res, HWP_ERROR_CPPC_CAPS))
364 		print_cppc_no_caps_1(sb);
365 	else
366 		print_cppc_caps_1(sb, data.caps);
367 
368 	if (hwp_has_error(data.res, HWP_ERROR_CPPC_REQUEST))
369 		print_cppc_no_request(sb);
370 	else
371 		print_cppc_request(sb, data.req);
372 
373 	error = sbuf_finish(sb);
374 	sbuf_delete(sb);
375 
376 	return (error);
377 }
378 
379 /*
380  * Read CPPC_REQUEST's value in the softc, if not already present.
381  */
382 static int
get_cppc_request(struct hwpstate_softc * const sc)383 get_cppc_request(struct hwpstate_softc *const sc)
384 {
385 	uint64_t val;
386 	int error;
387 
388 	check_cppc_in_use(sc, __func__);
389 
390 	if ((sc->flags & HWPFL_CPPC_REQUEST_NOT_READ) != 0) {
391 		error = rdmsr_safe(MSR_AMD_CPPC_REQUEST, &val);
392 		if (error != 0)
393 			return (EIO);
394 		sc->flags &= ~HWPFL_CPPC_REQUEST_NOT_READ;
395 		sc->cppc.request = val;
396 	}
397 
398 	return (0);
399 }
400 
401 struct set_cppc_request_cb {
402 	struct hwpstate_softc	*sc;
403 	uint64_t		 request;
404 	uint64_t		 mask;
405 	int			 res; /* 0 or HWP_ERROR_CPPC_REQUEST* */
406 };
407 
408 static void
set_cppc_request_cb(void * args)409 set_cppc_request_cb(void *args)
410 {
411 	struct set_cppc_request_cb *const data = args;
412 	uint64_t *const sc_req = &data->sc->cppc.request;
413 	uint64_t new_req;
414 	int error;
415 
416 	/* We proceed sequentially, so we'll clear out errors on progress. */
417 	data->res = HWP_ERROR_CPPC_REQUEST | HWP_ERROR_CPPC_REQUEST_WRITE;
418 
419 	error = get_cppc_request(data->sc);
420 	if (error != 0)
421 		return;
422 	data->res &= ~HWP_ERROR_CPPC_REQUEST;
423 
424 	new_req = (*sc_req & ~data->mask) | (data->request & data->mask);
425 
426 	error = wrmsr_safe(MSR_AMD_CPPC_REQUEST, new_req);
427 	if (error != 0)
428 		return;
429 	data->res &= ~HWP_ERROR_CPPC_REQUEST_WRITE;
430 	*sc_req = new_req;
431 }
432 
433 static inline void
set_cppc_request_send_one(struct set_cppc_request_cb * const data,device_t dev)434 set_cppc_request_send_one(struct set_cppc_request_cb *const data, device_t dev)
435 {
436 	const u_int cpuid = cpu_get_pcpu(dev)->pc_cpuid;
437 
438 	data->sc = device_get_softc(dev);
439 	smp_rendezvous_cpu(cpuid, smp_no_rendezvous_barrier,
440 	    set_cppc_request_cb, smp_no_rendezvous_barrier, data);
441 }
442 
443 static inline void
set_cppc_request_update_error(const struct set_cppc_request_cb * const data,int * const error)444 set_cppc_request_update_error(const struct set_cppc_request_cb *const data,
445     int *const error)
446 {
447 	/* A read error has precedence on a write error. */
448 	if (hwp_has_error(data->res, HWP_ERROR_CPPC_REQUEST))
449 		*error = EIO;
450 	else if (hwp_has_error(data->res, HWP_ERROR_CPPC_REQUEST_WRITE) &&
451 	    *error != EIO)
452 		*error = EOPNOTSUPP;
453 	else if (data->res != 0)
454 		/* Fallback case (normally not needed; defensive). */
455 		*error = EFAULT;
456 }
457 
458 static int
set_cppc_request(device_t hwp_dev,uint64_t request,uint64_t mask)459 set_cppc_request(device_t hwp_dev, uint64_t request, uint64_t mask)
460 {
461 	struct set_cppc_request_cb data = {
462 		.request = request,
463 		.mask = mask,
464 		/* 'sc' filled by set_cppc_request_send_one(). */
465 	};
466 	int error = 0;
467 
468 	if (hwpstate_pkg_ctrl_enable) {
469 		const devclass_t dc = devclass_find(HWP_AMD_CLASSNAME);
470 		const int units = devclass_get_maxunit(dc);
471 
472 		for (int i = 0; i < units; ++i) {
473 			const device_t dev = devclass_get_device(dc, i);
474 
475 			set_cppc_request_send_one(&data, dev);
476 			/* Note errors, but always continue. */
477 			set_cppc_request_update_error(&data, &error);
478 		}
479 	} else {
480 		set_cppc_request_send_one(&data, hwp_dev);
481 		set_cppc_request_update_error(&data, &error);
482 	}
483 
484 	return (error);
485 }
486 
487 static void
get_cppc_request_cb(void * args)488 get_cppc_request_cb(void *args)
489 {
490 	struct hwpstate_softc *const sc = args;
491 
492 	(void)get_cppc_request(sc);
493 }
494 
495 static int
sysctl_cppc_request_field_handler(SYSCTL_HANDLER_ARGS)496 sysctl_cppc_request_field_handler(SYSCTL_HANDLER_ARGS)
497 {
498 	const u_int max = BITS_VALUE(arg2, (uint64_t)-1);
499 	const device_t dev = arg1;
500 	struct hwpstate_softc *const sc = device_get_softc(dev);
501 	u_int val;
502 	int error;
503 
504 	/* Sysctl knob does not exist if HWPFL_USE_CPPC is not set. */
505 	check_cppc_in_use(sc, __func__);
506 
507 	if ((sc->flags & HWPFL_CPPC_REQUEST_NOT_READ) != 0) {
508 		const u_int cpuid = cpu_get_pcpu(dev)->pc_cpuid;
509 
510 		smp_rendezvous_cpu(cpuid, smp_no_rendezvous_barrier,
511 		    get_cppc_request_cb, smp_no_rendezvous_barrier, sc);
512 
513 		if ((sc->flags & HWPFL_CPPC_REQUEST_NOT_READ) != 0)
514 			return (EIO);
515 	}
516 
517 	val = BITS_VALUE(arg2, sc->cppc.request);
518 
519 	error = sysctl_handle_int(oidp, &val, 0, req);
520 	if (error != 0 || req->newptr == NULL)
521 		return (error);
522 
523 	if (val > max)
524 		return (EINVAL);
525 	error = set_cppc_request(dev, BITS_WITH_VALUE(arg2, val),
526 	    BITS_WITH_VALUE(arg2, -1));
527 	return (error);
528 }
529 
530 static driver_t hwpstate_driver = {
531 	HWP_AMD_CLASSNAME,
532 	hwpstate_methods,
533 	sizeof(struct hwpstate_softc),
534 };
535 
536 DRIVER_MODULE(hwpstate, cpu, hwpstate_driver, 0, 0);
537 
538 static int
hwpstate_amd_iscale(int val,int div)539 hwpstate_amd_iscale(int val, int div)
540 {
541 	switch (div) {
542 	case 3: /* divide by 1000 */
543 		val /= 10;
544 	case 2: /* divide by 100 */
545 		val /= 10;
546 	case 1: /* divide by 10 */
547 		val /= 10;
548 	case 0: /* divide by 1 */
549 	    ;
550 	}
551 
552 	return (val);
553 }
554 
555 /*
556  * Go to Px-state on all cpus, considering the limit register (if so
557  * configured).
558  */
559 static int
hwpstate_goto_pstate(device_t dev,int id)560 hwpstate_goto_pstate(device_t dev, int id)
561 {
562 	sbintime_t sbt;
563 	uint64_t msr;
564 	int cpu, i, j, limit;
565 
566 	if (hwpstate_pstate_limit) {
567 		/* get the current pstate limit */
568 		msr = rdmsr(MSR_AMD_10H_11H_LIMIT);
569 		limit = AMD_10H_11H_GET_PSTATE_LIMIT(msr);
570 		if (limit > id) {
571 			HWPSTATE_DEBUG(dev, "Restricting requested P%d to P%d "
572 			    "due to HW limit\n", id, limit);
573 			id = limit;
574 		}
575 	}
576 
577 	cpu = curcpu;
578 	HWPSTATE_DEBUG(dev, "setting P%d-state on cpu%d\n", id, cpu);
579 	/* Go To Px-state */
580 	wrmsr(MSR_AMD_10H_11H_CONTROL, id);
581 
582 	/*
583 	 * We are going to the same Px-state on all cpus.
584 	 * Probably should take _PSD into account.
585 	 */
586 	CPU_FOREACH(i) {
587 		if (i == cpu)
588 			continue;
589 
590 		/* Bind to each cpu. */
591 		thread_lock(curthread);
592 		sched_bind(curthread, i);
593 		thread_unlock(curthread);
594 		HWPSTATE_DEBUG(dev, "setting P%d-state on cpu%d\n", id, i);
595 		/* Go To Px-state */
596 		wrmsr(MSR_AMD_10H_11H_CONTROL, id);
597 	}
598 
599 	/*
600 	 * Verify whether each core is in the requested P-state.
601 	 */
602 	if (hwpstate_verify) {
603 		CPU_FOREACH(i) {
604 			thread_lock(curthread);
605 			sched_bind(curthread, i);
606 			thread_unlock(curthread);
607 			/* wait loop (100*100 usec is enough ?) */
608 			for (j = 0; j < 100; j++) {
609 				/* get the result. not assure msr=id */
610 				msr = rdmsr(MSR_AMD_10H_11H_STATUS);
611 				if (msr == id)
612 					break;
613 				sbt = SBT_1MS / 10;
614 				tsleep_sbt(dev, PZERO, "pstate_goto", sbt,
615 				    sbt >> tc_precexp, 0);
616 			}
617 			HWPSTATE_DEBUG(dev, "result: P%d-state on cpu%d\n",
618 			    (int)msr, i);
619 			if (msr != id) {
620 				HWPSTATE_DEBUG(dev,
621 				    "error: loop is not enough.\n");
622 				return (ENXIO);
623 			}
624 		}
625 	}
626 
627 	return (0);
628 }
629 
630 static int
hwpstate_set_cppc(device_t dev __unused,const struct cf_setting * cf __unused)631 hwpstate_set_cppc(device_t dev __unused, const struct cf_setting *cf __unused)
632 {
633 	return (EOPNOTSUPP);
634 }
635 
636 static int
hwpstate_set_pstate(device_t dev,const struct cf_setting * cf)637 hwpstate_set_pstate(device_t dev, const struct cf_setting *cf)
638 {
639 	struct hwpstate_softc *sc;
640 	struct hwpstate_setting *set;
641 	int i;
642 
643 	sc = device_get_softc(dev);
644 	set = sc->hwpstate_settings;
645 	for (i = 0; i < sc->cfnum; i++)
646 		if (CPUFREQ_CMP(cf->freq, set[i].freq))
647 			break;
648 	if (i == sc->cfnum)
649 		return (EINVAL);
650 	return (hwpstate_goto_pstate(dev, set[i].pstate_id));
651 }
652 
653 static int
hwpstate_set(device_t dev,const struct cf_setting * cf)654 hwpstate_set(device_t dev, const struct cf_setting *cf)
655 {
656 	struct hwpstate_softc *sc = device_get_softc(dev);
657 
658 	if (cf == NULL)
659 		return (EINVAL);
660 	return (sc->cpufreq_methods->set(dev, cf));
661 }
662 
663 static int
hwpstate_get_cppc(device_t dev,struct cf_setting * cf)664 hwpstate_get_cppc(device_t dev, struct cf_setting *cf)
665 {
666 	struct pcpu *pc;
667 	uint64_t rate;
668 	int ret;
669 
670 	pc = cpu_get_pcpu(dev);
671 	if (pc == NULL)
672 		return (ENXIO);
673 
674 	memset(cf, CPUFREQ_VAL_UNKNOWN, sizeof(*cf));
675 	cf->dev = dev;
676 	if ((ret = cpu_est_clockrate(pc->pc_cpuid, &rate)))
677 		return (ret);
678 	cf->freq = rate / 1000000;
679 	return (0);
680 }
681 
682 static int
hwpstate_get_pstate(device_t dev,struct cf_setting * cf)683 hwpstate_get_pstate(device_t dev, struct cf_setting *cf)
684 {
685 	struct hwpstate_softc *sc;
686 	struct hwpstate_setting set;
687 	uint64_t msr;
688 
689 	sc = device_get_softc(dev);
690 	msr = rdmsr(MSR_AMD_10H_11H_STATUS);
691 	if (msr >= sc->cfnum)
692 		return (EINVAL);
693 	set = sc->hwpstate_settings[msr];
694 
695 	cf->freq = set.freq;
696 	cf->volts = set.volts;
697 	cf->power = set.power;
698 	cf->lat = set.lat;
699 	cf->dev = dev;
700 
701 	return (0);
702 }
703 
704 static int
hwpstate_get(device_t dev,struct cf_setting * cf)705 hwpstate_get(device_t dev, struct cf_setting *cf)
706 {
707 	struct hwpstate_softc *sc;
708 
709 	sc = device_get_softc(dev);
710 	if (cf == NULL)
711 		return (EINVAL);
712 	return (sc->cpufreq_methods->get(dev, cf));
713 }
714 
715 static int
hwpstate_settings_cppc(device_t dev __unused,struct cf_setting * sets __unused,int * count __unused)716 hwpstate_settings_cppc(device_t dev __unused, struct cf_setting *sets __unused,
717     int *count __unused)
718 {
719 	return (EOPNOTSUPP);
720 }
721 
722 static int
hwpstate_settings_pstate(device_t dev,struct cf_setting * sets,int * count)723 hwpstate_settings_pstate(device_t dev, struct cf_setting *sets, int *count)
724 {
725 	struct hwpstate_setting set;
726 	struct hwpstate_softc *sc;
727 	int i;
728 
729 	sc = device_get_softc(dev);
730 	if (*count < sc->cfnum)
731 		return (E2BIG);
732 	for (i = 0; i < sc->cfnum; i++, sets++) {
733 		set = sc->hwpstate_settings[i];
734 		sets->freq = set.freq;
735 		sets->volts = set.volts;
736 		sets->power = set.power;
737 		sets->lat = set.lat;
738 		sets->dev = dev;
739 	}
740 	*count = sc->cfnum;
741 
742 	return (0);
743 }
744 
745 static int
hwpstate_settings(device_t dev,struct cf_setting * sets,int * count)746 hwpstate_settings(device_t dev, struct cf_setting *sets, int *count)
747 {
748 	struct hwpstate_softc *sc;
749 
750 	if (sets == NULL || count == NULL)
751 		return (EINVAL);
752 	sc = device_get_softc(dev);
753 	return (sc->cpufreq_methods->settings(dev, sets, count));
754 }
755 
756 static int
hwpstate_type_cppc(device_t dev,int * type)757 hwpstate_type_cppc(device_t dev, int *type)
758 {
759 	*type |= CPUFREQ_TYPE_ABSOLUTE | CPUFREQ_FLAG_INFO_ONLY |
760 	    CPUFREQ_FLAG_UNCACHED;
761 	return (0);
762 }
763 
764 static int
hwpstate_type_pstate(device_t dev,int * type)765 hwpstate_type_pstate(device_t dev, int *type)
766 {
767 	*type = CPUFREQ_TYPE_ABSOLUTE;
768 	return (0);
769 }
770 
771 static int
hwpstate_type(device_t dev,int * type)772 hwpstate_type(device_t dev, int *type)
773 {
774 	struct hwpstate_softc *sc;
775 
776 	sc = device_get_softc(dev);
777 	return (sc->cpufreq_methods->type(dev, type));
778 }
779 
780 static void
hwpstate_identify(driver_t * driver,device_t parent)781 hwpstate_identify(driver_t *driver, device_t parent)
782 {
783 	if (device_find_child(parent, HWP_AMD_CLASSNAME, DEVICE_UNIT_ANY) !=
784 	    NULL)
785 		return;
786 
787 	if ((cpu_vendor_id != CPU_VENDOR_AMD || CPUID_TO_FAMILY(cpu_id) < 0x10) &&
788 	    cpu_vendor_id != CPU_VENDOR_HYGON)
789 		return;
790 
791 	/*
792 	 * Check if hardware pstate enable bit is set.
793 	 */
794 	if ((amd_pminfo & AMDPM_HW_PSTATE) == 0) {
795 		HWPSTATE_DEBUG(parent, "hwpstate enable bit is not set.\n");
796 		return;
797 	}
798 
799 	if (resource_disabled(HWP_AMD_CLASSNAME, 0))
800 		return;
801 
802 	if (BUS_ADD_CHILD(parent, 10, HWP_AMD_CLASSNAME,
803 		device_get_unit(parent)) == NULL)
804 		device_printf(parent, "hwpstate: add child failed\n");
805 }
806 
807 struct set_autonomous_hwp_data {
808 	/* Inputs */
809 	struct hwpstate_softc *sc;
810 	/* Outputs */
811 	/* HWP_ERROR_CPPC_* */
812 	u_int res;
813 	/* Below fields filled depending on 'res'. */
814 	uint64_t caps;
815 	uint64_t init_request;
816 	uint64_t request;
817 };
818 
819 static void
enable_cppc_cb(void * args)820 enable_cppc_cb(void *args)
821 {
822 	struct set_autonomous_hwp_data *const data = args;
823 	struct hwpstate_softc *const sc = data->sc;
824 	uint64_t lowest_perf, highest_perf;
825 	int error;
826 
827 	/*
828 	 * We proceed mostly sequentially, so we'll clear out errors on
829 	 * progress.
830 	 */
831 	data->res = HWP_ERROR_CPPC_ENABLE | HWP_ERROR_CPPC_CAPS |
832 	    HWP_ERROR_CPPC_REQUEST | HWP_ERROR_CPPC_REQUEST_WRITE;
833 
834 	sc->flags |= HWPFL_CPPC_REQUEST_NOT_READ;
835 
836 	error = wrmsr_safe(MSR_AMD_CPPC_ENABLE, 1);
837 	if (error != 0)
838 		return;
839 	data->res &= ~HWP_ERROR_CPPC_ENABLE;
840 
841 	error = rdmsr_safe(MSR_AMD_CPPC_CAPS_1, &data->caps);
842 	/* We can do away without CAPABILITY_1, so just continue on error. */
843 	if (error == 0)
844 		data->res &= ~HWP_ERROR_CPPC_CAPS;
845 
846 	error = get_cppc_request(sc);
847 	if (error != 0)
848 		return;
849 	data->res &= ~HWP_ERROR_CPPC_REQUEST;
850 	data->init_request = sc->cppc.request;
851 
852 	data->request = sc->cppc.request;
853 	/*
854 	 * Assuming reading MSR_AMD_CPPC_CAPS_1 succeeded, if it stays at its
855 	 * reset value (0) before CPPC activation (not supposed to happen, but
856 	 * happens in the field), we use reasonable default values that are
857 	 * explicitly described by the ACPI spec (all 0s for the minimum value,
858 	 * all 1s for the maximum one).  Going further, we actually do the same
859 	 * as long as the minimum and maximum performance levels are not sorted
860 	 * or are equal (in which case CPPC is not supposed to make sense at
861 	 * all), which covers the reset value case.  And we also fallback to
862 	 * these if MSR_AMD_CPPC_CAPS_1 could not be read at all.
863 	 */
864 	lowest_perf = 0;
865 	highest_perf = -1;
866 	if (!hwp_has_error(data->res, HWP_ERROR_CPPC_CAPS)) {
867 		const uint64_t lowest_cand =
868 		    BITS_VALUE(AMD_CPPC_CAPS_1_LOWEST_PERF_BITS, data->caps);
869 		const uint64_t highest_cand =
870 		    BITS_VALUE(AMD_CPPC_CAPS_1_HIGHEST_PERF_BITS, data->caps);
871 
872 		if (lowest_cand < highest_cand) {
873 			lowest_perf = lowest_cand;
874 			highest_perf = highest_cand;
875 		}
876 	}
877 	SET_BITS_VALUE(data->request, AMD_CPPC_REQUEST_MIN_PERF_BITS,
878 	    lowest_perf);
879 	SET_BITS_VALUE(data->request, AMD_CPPC_REQUEST_MAX_PERF_BITS,
880 	    highest_perf);
881 	/*
882 	 * Set controls to maximum performance to avoid regressions now that
883 	 * CPPC is activated by default and to match what the P-state support
884 	 * does.
885 	 */
886 	SET_BITS_VALUE(data->request, AMD_CPPC_REQUEST_EPP_BITS, 0);
887 	/* 0 in "Desired Performance" is autonomous mode. */
888 	MPASS(highest_perf != 0);
889 	SET_BITS_VALUE(data->request, AMD_CPPC_REQUEST_DES_PERF_BITS,
890 	    highest_perf);
891 
892 	error = wrmsr_safe(MSR_AMD_CPPC_REQUEST, data->request);
893 	if (error != 0)
894 		return;
895 	data->res &= ~HWP_ERROR_CPPC_REQUEST_WRITE;
896 	sc->cppc.request = data->request;
897 }
898 
899 static int
enable_cppc(struct hwpstate_softc * sc)900 enable_cppc(struct hwpstate_softc *sc)
901 {
902 	const device_t dev = sc->dev;
903 	const u_int cpuid = cpu_get_pcpu(dev)->pc_cpuid;
904 	struct set_autonomous_hwp_data data;
905 	struct sbuf sbs;
906 	struct sbuf *sb;
907 
908 	data.sc = sc;
909 	smp_rendezvous_cpu(cpuid, smp_no_rendezvous_barrier,
910 	    enable_cppc_cb, smp_no_rendezvous_barrier, &data);
911 
912 	if (hwp_has_error(data.res, HWP_ERROR_CPPC_ENABLE)) {
913 		device_printf(dev, "CPU%u: Failed to enable CPPC!\n", cpuid);
914 		return (ENXIO);
915 	}
916 	device_printf(dev, "CPU%u: CPPC enabled.\n", cpuid);
917 
918 	/*
919 	 * Now that we have enabled CPPC, we can't go back (hardware does not
920 	 * support doing so), so we'll attach even in case of further
921 	 * malfunction, allowing the user to retry retrieving/setting MSRs via
922 	 * the sysctl knobs.
923 	 */
924 
925 	sb = sbuf_new(&sbs, NULL, 0, SBUF_AUTOEXTEND);
926 
927 	if (hwpstate_verbose)
928 		sbuf_printf(sb,
929 		    "CPU%u: Initial MSR values after CPPC enable:\n", cpuid);
930 	if (hwp_has_error(data.res, HWP_ERROR_CPPC_CAPS))
931 		print_cppc_no_caps_1(sb);
932 	else if (hwpstate_verbose)
933 		print_cppc_caps_1(sb, data.caps);
934 	if (hwp_has_error(data.res, HWP_ERROR_CPPC_REQUEST))
935 		print_cppc_no_request(sb);
936 	else if (hwpstate_verbose)
937 		print_cppc_request(sb, data.init_request);
938 	if (hwp_has_error(data.res, HWP_ERROR_CPPC_REQUEST_WRITE)) {
939 		const bool request_read = !hwp_has_error(data.res,
940 		    HWP_ERROR_CPPC_REQUEST);
941 
942 		/* This is printed first, as it is not printed into 'sb'. */
943 		device_printf(dev, "CPU%u: %s not write into "
944 		    MSR_AMD_CPPC_REQUEST_NAME "!\n", cpuid,
945 		    request_read ? "Could" : "Did");
946 		if (request_read) {
947 			sbuf_printf(sb, "CPU%u: Failed when trying to set:",
948 			    cpuid);
949 			print_cppc_request(sb, data.request);
950 		}
951 	} else if (hwpstate_verbose) {
952 		sbuf_printf(sb, "CPU%u: Tweaked MSR values:\n", cpuid);
953 		print_cppc_request(sb, data.request);
954 	}
955 
956 	sbuf_finish(sb);
957 	sbuf_putbuf(sb);
958 	sbuf_delete(sb);
959 
960 	return (0);
961 }
962 
963 static int
hwpstate_probe_pstate(device_t dev)964 hwpstate_probe_pstate(device_t dev)
965 {
966 	struct hwpstate_softc *sc;
967 	device_t perf_dev;
968 	int error, type;
969 	uint64_t msr;
970 
971 	sc = device_get_softc(dev);
972 	/*
973 	 * Check if acpi_perf has INFO only flag.
974 	 */
975 	perf_dev = device_find_child(device_get_parent(dev), "acpi_perf",
976 	    DEVICE_UNIT_ANY);
977 	error = TRUE;
978 	if (perf_dev && device_is_attached(perf_dev)) {
979 		error = CPUFREQ_DRV_TYPE(perf_dev, &type);
980 		if (error == 0) {
981 			if ((type & CPUFREQ_FLAG_INFO_ONLY) == 0) {
982 				/*
983 				 * If acpi_perf doesn't have INFO_ONLY flag,
984 				 * it will take care of pstate transitions.
985 				 */
986 				HWPSTATE_DEBUG(dev, "acpi_perf will take care of pstate transitions.\n");
987 				return (ENXIO);
988 			} else {
989 				/*
990 				 * If acpi_perf has INFO_ONLY flag, (_PCT has FFixedHW)
991 				 * we can get _PSS info from acpi_perf
992 				 * without going into ACPI.
993 				 */
994 				HWPSTATE_DEBUG(dev, "going to fetch info from acpi_perf\n");
995 				error = hwpstate_get_info_from_acpi_perf(dev, perf_dev);
996 			}
997 		}
998 	}
999 
1000 	if (error == 0) {
1001 		/*
1002 		 * Now we get _PSS info from acpi_perf without error.
1003 		 * Let's check it.
1004 		 */
1005 		msr = rdmsr(MSR_AMD_10H_11H_LIMIT);
1006 		if (sc->cfnum != 1 + AMD_10H_11H_GET_PSTATE_MAX_VAL(msr)) {
1007 			HWPSTATE_DEBUG(dev, "MSR (%jd) and ACPI _PSS (%d)"
1008 			    " count mismatch\n", (intmax_t)msr, sc->cfnum);
1009 			error = TRUE;
1010 		}
1011 	}
1012 
1013 	/*
1014 	 * If we cannot get info from acpi_perf,
1015 	 * Let's get info from MSRs.
1016 	 */
1017 	if (error)
1018 		error = hwpstate_get_info_from_msr(dev);
1019 	return (error);
1020 }
1021 
1022 static const struct hwpstate_cpufreq_methods cppc_methods = {
1023 	.get = hwpstate_get_cppc,
1024 	.set = hwpstate_set_cppc,
1025 	.settings = hwpstate_settings_cppc,
1026 	.type = hwpstate_type_cppc };
1027 
1028 static const struct hwpstate_cpufreq_methods pstate_methods = {
1029 	.get = hwpstate_get_pstate,
1030 	.set = hwpstate_set_pstate,
1031 	.settings = hwpstate_settings_pstate,
1032 	.type = hwpstate_type_pstate };
1033 
1034 static int
hwpstate_probe(device_t dev)1035 hwpstate_probe(device_t dev)
1036 {
1037 	struct hwpstate_softc *sc;
1038 	sc = device_get_softc(dev);
1039 
1040 	if (hwpstate_amd_cppc_enable &&
1041 	    (amd_extended_feature_extensions & AMDFEID_CPPC)) {
1042 		sc->flags |= HWPFL_USE_CPPC;
1043 		device_set_desc(dev,
1044 		    "AMD Collaborative Processor Performance Control (CPPC)");
1045 	} else {
1046 		/*
1047 		 * No CPPC support.  Only keep hwpstate0, it goes well with
1048 		 * acpi_throttle.
1049 		 */
1050 		if (device_get_unit(dev) != 0)
1051 			return (ENXIO);
1052 		device_set_desc(dev, "Cool`n'Quiet 2.0");
1053 	}
1054 
1055 	sc->dev = dev;
1056 	if ((sc->flags & HWPFL_USE_CPPC) != 0) {
1057 		sc->cpufreq_methods = &cppc_methods;
1058 		return (0);
1059 	}
1060 	sc->cpufreq_methods = &pstate_methods;
1061 	return (hwpstate_probe_pstate(dev));
1062 }
1063 
1064 static int
hwpstate_attach(device_t dev)1065 hwpstate_attach(device_t dev)
1066 {
1067 	struct hwpstate_softc *sc;
1068 	int res;
1069 
1070 	sc = device_get_softc(dev);
1071 	if ((sc->flags & HWPFL_USE_CPPC) != 0) {
1072 		if ((res = enable_cppc(sc)) != 0)
1073 			return (res);
1074 		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
1075 		    SYSCTL_STATIC_CHILDREN(_debug), OID_AUTO,
1076 		    device_get_nameunit(dev),
1077 		    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE,
1078 		    sc, 0, sysctl_cppc_dump_handler, "A", "");
1079 
1080 		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
1081 		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
1082 		    "epp", CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
1083 		    dev, AMD_CPPC_REQUEST_EPP_BITS,
1084 		    sysctl_cppc_request_field_handler, "IU",
1085 		    "Efficiency/Performance Preference (from 0, "
1086 		    "most performant, to 255, most efficient)");
1087 
1088 		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
1089 		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
1090 		    "minimum_performance",
1091 		    CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
1092 		    dev, AMD_CPPC_REQUEST_MIN_PERF_BITS,
1093 		    sysctl_cppc_request_field_handler, "IU",
1094 		    "Minimum allowed performance level (from 0 to 255; "
1095 		    "should be smaller than 'maximum_performance'; "
1096 		    "effective range limited by CPU)");
1097 
1098 		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
1099 		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
1100 		    "maximum_performance",
1101 		    CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
1102 		    dev, AMD_CPPC_REQUEST_MAX_PERF_BITS,
1103 		    sysctl_cppc_request_field_handler, "IU",
1104 		    "Maximum allowed performance level (from 0 to 255; "
1105 		    "should be larger than 'minimum_performance'; "
1106 		    "effective range limited by CPU)");
1107 
1108 		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
1109 		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
1110 		    "desired_performance",
1111 		    CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, dev,
1112 		    AMD_CPPC_REQUEST_DES_PERF_BITS,
1113 		    sysctl_cppc_request_field_handler, "IU",
1114 		    "Desired performance level (from 0 to 255; "
1115 		    "0 enables autonomous mode, otherwise value should be "
1116 		    "between 'minimum_performance' and 'maximum_performance' "
1117 		    "inclusive)");
1118 	}
1119 	return (cpufreq_register(dev));
1120 }
1121 
1122 static int
hwpstate_get_info_from_msr(device_t dev)1123 hwpstate_get_info_from_msr(device_t dev)
1124 {
1125 	struct hwpstate_softc *sc;
1126 	struct hwpstate_setting *hwpstate_set;
1127 	uint64_t msr;
1128 	int family, i, fid, did;
1129 
1130 	family = CPUID_TO_FAMILY(cpu_id);
1131 	sc = device_get_softc(dev);
1132 	/* Get pstate count */
1133 	msr = rdmsr(MSR_AMD_10H_11H_LIMIT);
1134 	sc->cfnum = 1 + AMD_10H_11H_GET_PSTATE_MAX_VAL(msr);
1135 	hwpstate_set = sc->hwpstate_settings;
1136 	for (i = 0; i < sc->cfnum; i++) {
1137 		msr = rdmsr(MSR_AMD_10H_11H_CONFIG + i);
1138 		if ((msr & ((uint64_t)1 << 63)) == 0) {
1139 			HWPSTATE_DEBUG(dev, "msr is not valid.\n");
1140 			return (ENXIO);
1141 		}
1142 		did = AMD_10H_11H_CUR_DID(msr);
1143 		fid = AMD_10H_11H_CUR_FID(msr);
1144 
1145 		hwpstate_set[i].volts = CPUFREQ_VAL_UNKNOWN;
1146 		hwpstate_set[i].power = CPUFREQ_VAL_UNKNOWN;
1147 		hwpstate_set[i].lat = CPUFREQ_VAL_UNKNOWN;
1148 		/* Convert fid/did to frequency. */
1149 		switch (family) {
1150 		case 0x11:
1151 			hwpstate_set[i].freq = (100 * (fid + 0x08)) >> did;
1152 			break;
1153 		case 0x10:
1154 		case 0x12:
1155 		case 0x15:
1156 		case 0x16:
1157 			hwpstate_set[i].freq = (100 * (fid + 0x10)) >> did;
1158 			break;
1159 		case 0x17:
1160 		case 0x18:
1161 		case 0x19:
1162 		case 0x1A:
1163 			/* calculate freq */
1164 			if (family == 0x1A) {
1165 				fid = AMD_1AH_CUR_FID(msr);
1166 				/* 1Ah CPU don't use a divisor */
1167 				hwpstate_set[i].freq = fid;
1168 				if (fid > 0x0f)
1169 					hwpstate_set[i].freq *= 5;
1170 				else {
1171 					HWPSTATE_DEBUG(dev,
1172 					    "unexpected fid: %d\n", fid);
1173 					return (ENXIO);
1174 				}
1175 			} else {
1176 				did = AMD_17H_CUR_DID(msr);
1177 				if (did == 0) {
1178 					HWPSTATE_DEBUG(dev,
1179 					    "unexpected did: 0\n");
1180 					did = 1;
1181 				}
1182 				fid = AMD_17H_CUR_FID(msr);
1183 				hwpstate_set[i].freq = (200 * fid) / did;
1184 			}
1185 
1186 			/* Vid step is 6.25mV, so scale by 100. */
1187 			hwpstate_set[i].volts =
1188 			    (155000 - (625 * AMD_17H_CUR_VID(msr))) / 100;
1189 			/*
1190 			 * Calculate current first.
1191 			 * This equation is mentioned in
1192 			 * "BKDG for AMD Family 15h Models 70h-7fh Processors",
1193 			 * section 2.5.2.1.6.
1194 			 */
1195 			hwpstate_set[i].power = AMD_17H_CUR_IDD(msr) * 1000;
1196 			hwpstate_set[i].power = hwpstate_amd_iscale(
1197 			    hwpstate_set[i].power, AMD_17H_CUR_IDIV(msr));
1198 			hwpstate_set[i].power *= hwpstate_set[i].volts;
1199 			/* Milli amps * milli volts to milli watts. */
1200 			hwpstate_set[i].power /= 1000;
1201 			break;
1202 		default:
1203 			HWPSTATE_DEBUG(dev, "get_info_from_msr: %s family"
1204 			    " 0x%02x CPUs are not supported yet\n",
1205 			    cpu_vendor_id == CPU_VENDOR_HYGON ? "Hygon" : "AMD",
1206 			    family);
1207 			return (ENXIO);
1208 		}
1209 		hwpstate_set[i].pstate_id = i;
1210 	}
1211 	return (0);
1212 }
1213 
1214 static int
hwpstate_get_info_from_acpi_perf(device_t dev,device_t perf_dev)1215 hwpstate_get_info_from_acpi_perf(device_t dev, device_t perf_dev)
1216 {
1217 	struct hwpstate_softc *sc;
1218 	struct cf_setting *perf_set;
1219 	struct hwpstate_setting *hwpstate_set;
1220 	int count, error, i;
1221 
1222 	perf_set = malloc(MAX_SETTINGS * sizeof(*perf_set), M_TEMP, M_NOWAIT);
1223 	if (perf_set == NULL) {
1224 		HWPSTATE_DEBUG(dev, "nomem\n");
1225 		return (ENOMEM);
1226 	}
1227 	/*
1228 	 * Fetch settings from acpi_perf.
1229 	 * Now it is attached, and has info only flag.
1230 	 */
1231 	count = MAX_SETTINGS;
1232 	error = CPUFREQ_DRV_SETTINGS(perf_dev, perf_set, &count);
1233 	if (error) {
1234 		HWPSTATE_DEBUG(dev, "error: CPUFREQ_DRV_SETTINGS.\n");
1235 		goto out;
1236 	}
1237 	sc = device_get_softc(dev);
1238 	sc->cfnum = count;
1239 	hwpstate_set = sc->hwpstate_settings;
1240 	for (i = 0; i < count; i++) {
1241 		if (i == perf_set[i].spec[0]) {
1242 			hwpstate_set[i].pstate_id = i;
1243 			hwpstate_set[i].freq = perf_set[i].freq;
1244 			hwpstate_set[i].volts = perf_set[i].volts;
1245 			hwpstate_set[i].power = perf_set[i].power;
1246 			hwpstate_set[i].lat = perf_set[i].lat;
1247 		} else {
1248 			HWPSTATE_DEBUG(dev, "ACPI _PSS object mismatch.\n");
1249 			error = ENXIO;
1250 			goto out;
1251 		}
1252 	}
1253 out:
1254 	if (perf_set)
1255 		free(perf_set, M_TEMP);
1256 	return (error);
1257 }
1258 
1259 static int
hwpstate_detach(device_t dev)1260 hwpstate_detach(device_t dev)
1261 {
1262 	struct hwpstate_softc *sc;
1263 
1264 	sc = device_get_softc(dev);
1265 	if ((sc->flags & HWPFL_USE_CPPC) == 0)
1266 		hwpstate_goto_pstate(dev, 0);
1267 	return (cpufreq_unregister(dev));
1268 }
1269 
1270 static int
hwpstate_shutdown(device_t dev)1271 hwpstate_shutdown(device_t dev)
1272 {
1273 
1274 	/* hwpstate_goto_pstate(dev, 0); */
1275 	return (0);
1276 }
1277 
1278 static int
hwpstate_features(driver_t * driver,u_int * features)1279 hwpstate_features(driver_t *driver, u_int *features)
1280 {
1281 
1282 	/* Notify the ACPI CPU that we support direct access to MSRs */
1283 	*features = ACPI_CAP_PERF_MSRS;
1284 	return (0);
1285 }
1286