1 /* 2 * Privileged RAPL MSR helper commands for QEMU 3 * 4 * Copyright (C) 2024 Red Hat, Inc. <aharivel@redhat.com> 5 * 6 * Author: Anthony Harivel <aharivel@redhat.com> 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; under version 2 of the License. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, see <http://www.gnu.org/licenses/>. 19 */ 20 21 #include "qemu/osdep.h" 22 #include <getopt.h> 23 #include <stdbool.h> 24 #include <sys/ioctl.h> 25 #ifdef CONFIG_LIBCAP_NG 26 #include <cap-ng.h> 27 #endif 28 #include <pwd.h> 29 #include <grp.h> 30 31 #include "qemu/help-texts.h" 32 #include "qapi/error.h" 33 #include "qemu/cutils.h" 34 #include "qemu/main-loop.h" 35 #include "qemu/module.h" 36 #include "qemu/error-report.h" 37 #include "qemu/config-file.h" 38 #include "qemu-version.h" 39 #include "qapi/error.h" 40 #include "qemu/error-report.h" 41 #include "qemu/log.h" 42 #include "qemu/systemd.h" 43 #include "io/channel.h" 44 #include "io/channel-socket.h" 45 #include "trace/control.h" 46 #include "qemu-version.h" 47 #include "rapl-msr-index.h" 48 49 #define MSR_PATH_TEMPLATE "/dev/cpu/%u/msr" 50 51 static char *socket_path; 52 static char *pidfile; 53 static enum { RUNNING, TERMINATE, TERMINATING } state; 54 static QIOChannelSocket *server_ioc; 55 static int server_watch; 56 static int num_active_sockets = 1; 57 static bool verbose; 58 59 #ifdef CONFIG_LIBCAP_NG 60 static int uid = -1; 61 static int gid = -1; 62 #endif 63 64 static void compute_default_paths(void) 65 { 66 g_autofree char *state = qemu_get_local_state_dir(); 67 68 socket_path = g_build_filename(state, "run", "qemu-vmsr-helper.sock", NULL); 69 pidfile = g_build_filename(state, "run", "qemu-vmsr-helper.pid", NULL); 70 } 71 72 static int is_intel_processor(void) 73 { 74 int ebx, ecx, edx; 75 76 /* Execute CPUID instruction with eax=0 (basic identification) */ 77 asm volatile ( 78 "cpuid" 79 : "=b" (ebx), "=c" (ecx), "=d" (edx) 80 : "a" (0) 81 ); 82 83 /* 84 * Check if processor is "GenuineIntel" 85 * 0x756e6547 = "Genu" 86 * 0x49656e69 = "ineI" 87 * 0x6c65746e = "ntel" 88 */ 89 return (ebx == 0x756e6547) && (edx == 0x49656e69) && (ecx == 0x6c65746e); 90 } 91 92 static int is_rapl_enabled(void) 93 { 94 const char *path = "/sys/class/powercap/intel-rapl/enabled"; 95 FILE *file = fopen(path, "r"); 96 int value = 0; 97 98 if (file != NULL) { 99 if (fscanf(file, "%d", &value) != 1) { 100 error_report("INTEL RAPL not enabled"); 101 } 102 fclose(file); 103 } else { 104 error_report("Error opening %s", path); 105 } 106 107 return value; 108 } 109 110 /* 111 * Check if the TID that request the MSR read 112 * belongs to the peer. It be should a TID of a vCPU. 113 */ 114 static bool is_tid_present(pid_t pid, pid_t tid) 115 { 116 g_autofree char *tidPath = g_strdup_printf("/proc/%d/task/%d", pid, tid); 117 118 /* Check if the TID directory exists within the PID directory */ 119 if (access(tidPath, F_OK) == 0) { 120 return true; 121 } 122 123 error_report("Failed to open /proc at %s", tidPath); 124 return false; 125 } 126 127 /* 128 * Only the RAPL MSR in target/i386/cpu.h are allowed 129 */ 130 static bool is_msr_allowed(uint32_t reg) 131 { 132 switch (reg) { 133 case MSR_RAPL_POWER_UNIT: 134 case MSR_PKG_POWER_LIMIT: 135 case MSR_PKG_ENERGY_STATUS: 136 case MSR_PKG_POWER_INFO: 137 return true; 138 default: 139 return false; 140 } 141 } 142 143 static uint64_t vmsr_read_msr(uint32_t msr_register, unsigned int cpu_id) 144 { 145 int fd; 146 uint64_t result = 0; 147 148 g_autofree char *path = g_strdup_printf(MSR_PATH_TEMPLATE, cpu_id); 149 150 fd = open(path, O_RDONLY); 151 if (fd < 0) { 152 error_report("Failed to open MSR file at %s", path); 153 return result; 154 } 155 156 if (pread(fd, &result, sizeof(result), msr_register) != sizeof(result)) { 157 error_report("Failed to read MSR"); 158 result = 0; 159 } 160 161 close(fd); 162 return result; 163 } 164 165 static void usage(const char *name) 166 { 167 (printf) ( 168 "Usage: %s [OPTIONS] FILE\n" 169 "Virtual RAPL MSR helper program for QEMU\n" 170 "\n" 171 " -h, --help display this help and exit\n" 172 " -V, --version output version information and exit\n" 173 "\n" 174 " -d, --daemon run in the background\n" 175 " -f, --pidfile=PATH PID file when running as a daemon\n" 176 " (default '%s')\n" 177 " -k, --socket=PATH path to the unix socket\n" 178 " (default '%s')\n" 179 " -T, --trace [[enable=]<pattern>][,events=<file>][,file=<file>]\n" 180 " specify tracing options\n" 181 #ifdef CONFIG_LIBCAP_NG 182 " -u, --user=USER user to drop privileges to\n" 183 " -g, --group=GROUP group to drop privileges to\n" 184 #endif 185 "\n" 186 QEMU_HELP_BOTTOM "\n" 187 , name, pidfile, socket_path); 188 } 189 190 static void version(const char *name) 191 { 192 printf( 193 "%s " QEMU_FULL_VERSION "\n" 194 "Written by Anthony Harivel.\n" 195 "\n" 196 QEMU_COPYRIGHT "\n" 197 "This is free software; see the source for copying conditions. There is NO\n" 198 "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n" 199 , name); 200 } 201 202 typedef struct VMSRHelperClient { 203 QIOChannelSocket *ioc; 204 Coroutine *co; 205 } VMSRHelperClient; 206 207 static void coroutine_fn vh_co_entry(void *opaque) 208 { 209 VMSRHelperClient *client = opaque; 210 Error *local_err = NULL; 211 unsigned int peer_pid; 212 uint32_t request[3]; 213 uint64_t vmsr; 214 int r; 215 216 qio_channel_set_blocking(QIO_CHANNEL(client->ioc), 217 false, NULL); 218 219 qio_channel_set_follow_coroutine_ctx(QIO_CHANNEL(client->ioc), true); 220 221 /* 222 * Check peer credentials 223 */ 224 r = qio_channel_get_peerpid(QIO_CHANNEL(client->ioc), 225 &peer_pid, 226 &local_err); 227 if (r < 0) { 228 goto out; 229 } 230 231 for (;;) { 232 /* 233 * Read the requested MSR 234 * Only RAPL MSR in rapl-msr-index.h is allowed 235 */ 236 r = qio_channel_read_all_eof(QIO_CHANNEL(client->ioc), 237 (char *) &request, sizeof(request), &local_err); 238 if (r <= 0) { 239 break; 240 } 241 242 if (!is_msr_allowed(request[0])) { 243 error_report("Requested unallowed msr: %d", request[0]); 244 break; 245 } 246 247 vmsr = vmsr_read_msr(request[0], request[1]); 248 249 if (!is_tid_present(peer_pid, request[2])) { 250 error_report("Requested TID not in peer PID: %d %d", 251 peer_pid, request[2]); 252 vmsr = 0; 253 } 254 255 r = qio_channel_write_all(QIO_CHANNEL(client->ioc), 256 (char *) &vmsr, 257 sizeof(vmsr), 258 &local_err); 259 if (r < 0) { 260 break; 261 } 262 } 263 264 out: 265 if (local_err) { 266 if (!verbose) { 267 error_free(local_err); 268 } else { 269 error_report_err(local_err); 270 } 271 } 272 273 object_unref(OBJECT(client->ioc)); 274 g_free(client); 275 } 276 277 static gboolean accept_client(QIOChannel *ioc, 278 GIOCondition cond, 279 gpointer opaque) 280 { 281 QIOChannelSocket *cioc; 282 VMSRHelperClient *vmsrh; 283 284 cioc = qio_channel_socket_accept(QIO_CHANNEL_SOCKET(ioc), 285 NULL); 286 if (!cioc) { 287 return TRUE; 288 } 289 290 vmsrh = g_new(VMSRHelperClient, 1); 291 vmsrh->ioc = cioc; 292 vmsrh->co = qemu_coroutine_create(vh_co_entry, vmsrh); 293 qemu_coroutine_enter(vmsrh->co); 294 295 return TRUE; 296 } 297 298 static void termsig_handler(int signum) 299 { 300 qatomic_cmpxchg(&state, RUNNING, TERMINATE); 301 qemu_notify_event(); 302 } 303 304 static void close_server_socket(void) 305 { 306 assert(server_ioc); 307 308 g_source_remove(server_watch); 309 server_watch = -1; 310 object_unref(OBJECT(server_ioc)); 311 num_active_sockets--; 312 } 313 314 #ifdef CONFIG_LIBCAP_NG 315 static int drop_privileges(void) 316 { 317 /* clear all capabilities */ 318 capng_clear(CAPNG_SELECT_BOTH); 319 320 if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE | CAPNG_PERMITTED, 321 CAP_SYS_RAWIO) < 0) { 322 return -1; 323 } 324 325 return 0; 326 } 327 #endif 328 329 int main(int argc, char **argv) 330 { 331 const char *sopt = "hVk:f:dT:u:g:vq"; 332 struct option lopt[] = { 333 { "help", no_argument, NULL, 'h' }, 334 { "version", no_argument, NULL, 'V' }, 335 { "socket", required_argument, NULL, 'k' }, 336 { "pidfile", required_argument, NULL, 'f' }, 337 { "daemon", no_argument, NULL, 'd' }, 338 { "trace", required_argument, NULL, 'T' }, 339 { "verbose", no_argument, NULL, 'v' }, 340 { NULL, 0, NULL, 0 } 341 }; 342 int opt_ind = 0; 343 int ch; 344 Error *local_err = NULL; 345 bool daemonize = false; 346 bool pidfile_specified = false; 347 bool socket_path_specified = false; 348 unsigned socket_activation; 349 350 struct sigaction sa_sigterm; 351 memset(&sa_sigterm, 0, sizeof(sa_sigterm)); 352 sa_sigterm.sa_handler = termsig_handler; 353 sigaction(SIGTERM, &sa_sigterm, NULL); 354 sigaction(SIGINT, &sa_sigterm, NULL); 355 sigaction(SIGHUP, &sa_sigterm, NULL); 356 357 signal(SIGPIPE, SIG_IGN); 358 359 error_init(argv[0]); 360 module_call_init(MODULE_INIT_TRACE); 361 module_call_init(MODULE_INIT_QOM); 362 qemu_add_opts(&qemu_trace_opts); 363 qemu_init_exec_dir(argv[0]); 364 365 compute_default_paths(); 366 367 /* 368 * Sanity check 369 * 1. cpu must be Intel cpu 370 * 2. RAPL must be enabled 371 */ 372 if (!is_intel_processor()) { 373 error_report("error: CPU is not INTEL cpu"); 374 exit(EXIT_FAILURE); 375 } 376 377 if (!is_rapl_enabled()) { 378 error_report("error: RAPL driver not enable"); 379 exit(EXIT_FAILURE); 380 } 381 382 while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) { 383 switch (ch) { 384 case 'k': 385 g_free(socket_path); 386 socket_path = g_strdup(optarg); 387 socket_path_specified = true; 388 if (socket_path[0] != '/') { 389 error_report("socket path must be absolute"); 390 exit(EXIT_FAILURE); 391 } 392 break; 393 case 'f': 394 g_free(pidfile); 395 pidfile = g_strdup(optarg); 396 pidfile_specified = true; 397 break; 398 #ifdef CONFIG_LIBCAP_NG 399 case 'u': { 400 unsigned long res; 401 struct passwd *userinfo = getpwnam(optarg); 402 if (userinfo) { 403 uid = userinfo->pw_uid; 404 } else if (qemu_strtoul(optarg, NULL, 10, &res) == 0 && 405 (uid_t)res == res) { 406 uid = res; 407 } else { 408 error_report("invalid user '%s'", optarg); 409 exit(EXIT_FAILURE); 410 } 411 break; 412 } 413 case 'g': { 414 unsigned long res; 415 struct group *groupinfo = getgrnam(optarg); 416 if (groupinfo) { 417 gid = groupinfo->gr_gid; 418 } else if (qemu_strtoul(optarg, NULL, 10, &res) == 0 && 419 (gid_t)res == res) { 420 gid = res; 421 } else { 422 error_report("invalid group '%s'", optarg); 423 exit(EXIT_FAILURE); 424 } 425 break; 426 } 427 #else 428 case 'u': 429 case 'g': 430 error_report("-%c not supported by this %s", ch, argv[0]); 431 exit(1); 432 #endif 433 case 'd': 434 daemonize = true; 435 break; 436 case 'v': 437 verbose = true; 438 break; 439 case 'T': 440 trace_opt_parse(optarg); 441 break; 442 case 'V': 443 version(argv[0]); 444 exit(EXIT_SUCCESS); 445 break; 446 case 'h': 447 usage(argv[0]); 448 exit(EXIT_SUCCESS); 449 break; 450 case '?': 451 error_report("Try `%s --help' for more information.", argv[0]); 452 exit(EXIT_FAILURE); 453 } 454 } 455 456 if (!trace_init_backends()) { 457 exit(EXIT_FAILURE); 458 } 459 trace_init_file(); 460 qemu_set_log(LOG_TRACE, &error_fatal); 461 462 socket_activation = check_socket_activation(); 463 if (socket_activation == 0) { 464 SocketAddress saddr; 465 saddr = (SocketAddress){ 466 .type = SOCKET_ADDRESS_TYPE_UNIX, 467 .u.q_unix.path = socket_path, 468 }; 469 server_ioc = qio_channel_socket_new(); 470 if (qio_channel_socket_listen_sync(server_ioc, &saddr, 471 1, &local_err) < 0) { 472 object_unref(OBJECT(server_ioc)); 473 error_report_err(local_err); 474 return 1; 475 } 476 } else { 477 /* Using socket activation - check user didn't use -p etc. */ 478 if (socket_path_specified) { 479 error_report("Unix socket can't be set when" 480 "using socket activation"); 481 exit(EXIT_FAILURE); 482 } 483 484 /* Can only listen on a single socket. */ 485 if (socket_activation > 1) { 486 error_report("%s does not support socket activation" 487 "with LISTEN_FDS > 1", 488 argv[0]); 489 exit(EXIT_FAILURE); 490 } 491 server_ioc = qio_channel_socket_new_fd(FIRST_SOCKET_ACTIVATION_FD, 492 &local_err); 493 if (server_ioc == NULL) { 494 error_reportf_err(local_err, 495 "Failed to use socket activation: "); 496 exit(EXIT_FAILURE); 497 } 498 } 499 500 qemu_init_main_loop(&error_fatal); 501 502 server_watch = qio_channel_add_watch(QIO_CHANNEL(server_ioc), 503 G_IO_IN, 504 accept_client, 505 NULL, NULL); 506 507 if (daemonize) { 508 if (daemon(0, 0) < 0) { 509 error_report("Failed to daemonize: %s", strerror(errno)); 510 exit(EXIT_FAILURE); 511 } 512 } 513 514 if (daemonize || pidfile_specified) { 515 qemu_write_pidfile(pidfile, &error_fatal); 516 } 517 518 #ifdef CONFIG_LIBCAP_NG 519 if (drop_privileges() < 0) { 520 error_report("Failed to drop privileges: %s", strerror(errno)); 521 exit(EXIT_FAILURE); 522 } 523 #endif 524 525 info_report("Listening on %s", socket_path); 526 527 state = RUNNING; 528 do { 529 main_loop_wait(false); 530 if (state == TERMINATE) { 531 state = TERMINATING; 532 close_server_socket(); 533 } 534 } while (num_active_sockets > 0); 535 536 exit(EXIT_SUCCESS); 537 } 538