1 #include <sys/epoll.h> 2 #include <sys/un.h> 3 #include <sys/types.h> 4 #include <sys/socket.h> 5 #include <sys/eventfd.h> 6 #include <dirent.h> 7 8 #include "kvm/kvm-ipc.h" 9 #include "kvm/rwsem.h" 10 #include "kvm/read-write.h" 11 #include "kvm/util.h" 12 #include "kvm/kvm.h" 13 #include "kvm/builtin-debug.h" 14 #include "kvm/strbuf.h" 15 #include "kvm/kvm-cpu.h" 16 #include "kvm/8250-serial.h" 17 18 struct kvm_ipc_head { 19 u32 type; 20 u32 len; 21 }; 22 23 #define KVM_IPC_MAX_MSGS 16 24 25 #define KVM_SOCK_SUFFIX ".sock" 26 #define KVM_SOCK_SUFFIX_LEN ((ssize_t)sizeof(KVM_SOCK_SUFFIX) - 1) 27 28 extern __thread struct kvm_cpu *current_kvm_cpu; 29 static void (*msgs[KVM_IPC_MAX_MSGS])(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg); 30 static DECLARE_RWSEM(msgs_rwlock); 31 static int epoll_fd, server_fd, stop_fd; 32 static pthread_t thread; 33 34 static int kvm__create_socket(struct kvm *kvm) 35 { 36 char full_name[PATH_MAX]; 37 int s; 38 struct sockaddr_un local; 39 int len, r; 40 41 /* This usually 108 bytes long */ 42 BUILD_BUG_ON(sizeof(local.sun_path) < 32); 43 44 snprintf(full_name, sizeof(full_name), "%s/%s%s", 45 kvm__get_dir(), kvm->cfg.guest_name, KVM_SOCK_SUFFIX); 46 47 s = socket(AF_UNIX, SOCK_STREAM, 0); 48 if (s < 0) { 49 perror("socket"); 50 return s; 51 } 52 53 local.sun_family = AF_UNIX; 54 strlcpy(local.sun_path, full_name, sizeof(local.sun_path)); 55 len = strlen(local.sun_path) + sizeof(local.sun_family); 56 r = bind(s, (struct sockaddr *)&local, len); 57 /* Check for an existing socket file */ 58 if (r < 0 && errno == EADDRINUSE) { 59 r = connect(s, (struct sockaddr *)&local, len); 60 if (r == 0) { 61 /* 62 * If we could connect, there is already a guest 63 * using this same name. This should not happen 64 * for PID derived names, but could happen for user 65 * provided guest names. 66 */ 67 pr_err("Guest socket file %s already exists.", 68 full_name); 69 r = -EEXIST; 70 goto fail; 71 } 72 if (errno == ECONNREFUSED) { 73 /* 74 * This is a ghost socket file, with no-one listening 75 * on the other end. Since kvmtool will only bind 76 * above when creating a new guest, there is no 77 * danger in just removing the file and re-trying. 78 */ 79 unlink(full_name); 80 pr_info("Removed ghost socket file \"%s\".", full_name); 81 r = bind(s, (struct sockaddr *)&local, len); 82 } 83 } 84 if (r < 0) { 85 perror("bind"); 86 goto fail; 87 } 88 89 r = listen(s, 5); 90 if (r < 0) { 91 perror("listen"); 92 goto fail; 93 } 94 95 return s; 96 97 fail: 98 close(s); 99 return r; 100 } 101 102 void kvm__remove_socket(const char *name) 103 { 104 char full_name[PATH_MAX]; 105 106 snprintf(full_name, sizeof(full_name), "%s/%s%s", 107 kvm__get_dir(), name, KVM_SOCK_SUFFIX); 108 unlink(full_name); 109 } 110 111 int kvm__get_sock_by_instance(const char *name) 112 { 113 int s, len, r; 114 char sock_file[PATH_MAX]; 115 struct sockaddr_un local; 116 117 snprintf(sock_file, sizeof(sock_file), "%s/%s%s", 118 kvm__get_dir(), name, KVM_SOCK_SUFFIX); 119 s = socket(AF_UNIX, SOCK_STREAM, 0); 120 121 local.sun_family = AF_UNIX; 122 strlcpy(local.sun_path, sock_file, sizeof(local.sun_path)); 123 len = strlen(local.sun_path) + sizeof(local.sun_family); 124 125 r = connect(s, (struct sockaddr *)&local, len); 126 if (r < 0 && errno == ECONNREFUSED) { 127 /* Clean up the ghost socket file */ 128 unlink(local.sun_path); 129 pr_info("Removed ghost socket file \"%s\".", sock_file); 130 return r; 131 } else if (r < 0) { 132 return r; 133 } 134 135 return s; 136 } 137 138 static bool is_socket(const char *base_path, const struct dirent *dent) 139 { 140 switch (dent->d_type) { 141 case DT_SOCK: 142 return true; 143 144 case DT_UNKNOWN: { 145 char path[PATH_MAX]; 146 struct stat st; 147 148 sprintf(path, "%s/%s", base_path, dent->d_name); 149 if (stat(path, &st)) 150 return false; 151 152 return S_ISSOCK(st.st_mode); 153 } 154 default: 155 return false; 156 } 157 } 158 159 int kvm__enumerate_instances(int (*callback)(const char *name, int fd)) 160 { 161 int sock; 162 DIR *dir; 163 struct dirent *entry; 164 int ret = 0; 165 const char *path; 166 167 path = kvm__get_dir(); 168 169 dir = opendir(path); 170 if (!dir) 171 return -errno; 172 173 for (;;) { 174 entry = readdir(dir); 175 if (!entry) 176 break; 177 if (is_socket(path, entry)) { 178 ssize_t name_len = strlen(entry->d_name); 179 char *p; 180 181 if (name_len <= KVM_SOCK_SUFFIX_LEN) 182 continue; 183 184 p = &entry->d_name[name_len - KVM_SOCK_SUFFIX_LEN]; 185 if (memcmp(KVM_SOCK_SUFFIX, p, KVM_SOCK_SUFFIX_LEN)) 186 continue; 187 188 *p = 0; 189 sock = kvm__get_sock_by_instance(entry->d_name); 190 if (sock < 0) 191 continue; 192 ret = callback(entry->d_name, sock); 193 close(sock); 194 if (ret < 0) 195 break; 196 } 197 } 198 199 closedir(dir); 200 201 return ret; 202 } 203 204 int kvm_ipc__register_handler(u32 type, void (*cb)(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg)) 205 { 206 if (type >= KVM_IPC_MAX_MSGS) 207 return -ENOSPC; 208 209 down_write(&msgs_rwlock); 210 msgs[type] = cb; 211 up_write(&msgs_rwlock); 212 213 return 0; 214 } 215 216 int kvm_ipc__send(int fd, u32 type) 217 { 218 struct kvm_ipc_head head = {.type = type, .len = 0,}; 219 220 if (write_in_full(fd, &head, sizeof(head)) < 0) 221 return -1; 222 223 return 0; 224 } 225 226 int kvm_ipc__send_msg(int fd, u32 type, u32 len, u8 *msg) 227 { 228 struct kvm_ipc_head head = {.type = type, .len = len,}; 229 230 if (write_in_full(fd, &head, sizeof(head)) < 0) 231 return -1; 232 233 if (write_in_full(fd, msg, len) < 0) 234 return -1; 235 236 return 0; 237 } 238 239 static int kvm_ipc__handle(struct kvm *kvm, int fd, u32 type, u32 len, u8 *data) 240 { 241 void (*cb)(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg); 242 243 if (type >= KVM_IPC_MAX_MSGS) 244 return -ENOSPC; 245 246 down_read(&msgs_rwlock); 247 cb = msgs[type]; 248 up_read(&msgs_rwlock); 249 250 if (cb == NULL) { 251 pr_warning("No device handles type %u\n", type); 252 return -ENODEV; 253 } 254 255 cb(kvm, fd, type, len, data); 256 257 return 0; 258 } 259 260 static int kvm_ipc__new_conn(int fd) 261 { 262 int client; 263 struct epoll_event ev; 264 265 client = accept(fd, NULL, NULL); 266 if (client < 0) 267 return -1; 268 269 ev.events = EPOLLIN | EPOLLRDHUP; 270 ev.data.fd = client; 271 if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, client, &ev) < 0) { 272 close(client); 273 return -1; 274 } 275 276 return client; 277 } 278 279 static void kvm_ipc__close_conn(int fd) 280 { 281 epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fd, NULL); 282 close(fd); 283 } 284 285 static int kvm_ipc__receive(struct kvm *kvm, int fd) 286 { 287 struct kvm_ipc_head head; 288 u8 *msg = NULL; 289 u32 n; 290 291 n = read(fd, &head, sizeof(head)); 292 if (n != sizeof(head)) 293 goto done; 294 295 msg = malloc(head.len); 296 if (msg == NULL) 297 goto done; 298 299 n = read_in_full(fd, msg, head.len); 300 if (n != head.len) 301 goto done; 302 303 kvm_ipc__handle(kvm, fd, head.type, head.len, msg); 304 305 return 0; 306 307 done: 308 free(msg); 309 return -1; 310 } 311 312 static void *kvm_ipc__thread(void *param) 313 { 314 struct epoll_event event; 315 struct kvm *kvm = param; 316 317 kvm__set_thread_name("kvm-ipc"); 318 319 for (;;) { 320 int nfds; 321 322 nfds = epoll_wait(epoll_fd, &event, 1, -1); 323 if (nfds > 0) { 324 int fd = event.data.fd; 325 326 if (fd == stop_fd && event.events & EPOLLIN) { 327 break; 328 } else if (fd == server_fd) { 329 int client, r; 330 331 client = kvm_ipc__new_conn(fd); 332 /* 333 * Handle multiple IPC cmd at a time 334 */ 335 do { 336 r = kvm_ipc__receive(kvm, client); 337 } while (r == 0); 338 339 } else if (event.events & (EPOLLERR | EPOLLRDHUP | EPOLLHUP)) { 340 kvm_ipc__close_conn(fd); 341 } else { 342 kvm_ipc__receive(kvm, fd); 343 } 344 } 345 } 346 347 return NULL; 348 } 349 350 static void kvm__pid(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg) 351 { 352 pid_t pid = getpid(); 353 int r = 0; 354 355 if (type == KVM_IPC_PID) 356 r = write(fd, &pid, sizeof(pid)); 357 358 if (r < 0) 359 pr_warning("Failed sending PID"); 360 } 361 362 static void handle_stop(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg) 363 { 364 if (WARN_ON(type != KVM_IPC_STOP || len)) 365 return; 366 367 kvm__reboot(kvm); 368 } 369 370 /* Pause/resume the guest using SIGUSR2 */ 371 static int is_paused; 372 373 static void handle_pause(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg) 374 { 375 if (WARN_ON(len)) 376 return; 377 378 if (type == KVM_IPC_RESUME && is_paused) { 379 kvm->vm_state = KVM_VMSTATE_RUNNING; 380 kvm__continue(kvm); 381 } else if (type == KVM_IPC_PAUSE && !is_paused) { 382 kvm->vm_state = KVM_VMSTATE_PAUSED; 383 ioctl(kvm->vm_fd, KVM_KVMCLOCK_CTRL); 384 kvm__pause(kvm); 385 } else { 386 return; 387 } 388 389 is_paused = !is_paused; 390 } 391 392 static void handle_vmstate(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg) 393 { 394 int r = 0; 395 396 if (type == KVM_IPC_VMSTATE) 397 r = write(fd, &kvm->vm_state, sizeof(kvm->vm_state)); 398 399 if (r < 0) 400 pr_warning("Failed sending VMSTATE"); 401 } 402 403 /* 404 * Serialize debug printout so that the output of multiple vcpus does not 405 * get mixed up: 406 */ 407 static int printout_done; 408 409 static void handle_sigusr1(int sig) 410 { 411 struct kvm_cpu *cpu = current_kvm_cpu; 412 int fd = kvm_cpu__get_debug_fd(); 413 414 if (!cpu || cpu->needs_nmi) 415 return; 416 417 dprintf(fd, "\n #\n # vCPU #%ld's dump:\n #\n", cpu->cpu_id); 418 kvm_cpu__show_registers(cpu); 419 kvm_cpu__show_code(cpu); 420 kvm_cpu__show_page_tables(cpu); 421 fflush(stdout); 422 printout_done = 1; 423 } 424 425 static void handle_debug(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg) 426 { 427 int i; 428 struct debug_cmd_params *params; 429 u32 dbg_type; 430 u32 vcpu; 431 432 if (WARN_ON(type != KVM_IPC_DEBUG || len != sizeof(*params))) 433 return; 434 435 params = (void *)msg; 436 dbg_type = params->dbg_type; 437 vcpu = params->cpu; 438 439 if (dbg_type & KVM_DEBUG_CMD_TYPE_SYSRQ) 440 serial8250__inject_sysrq(kvm, params->sysrq); 441 442 if (dbg_type & KVM_DEBUG_CMD_TYPE_NMI) { 443 if ((int)vcpu >= kvm->nrcpus) 444 return; 445 446 kvm->cpus[vcpu]->needs_nmi = 1; 447 pthread_kill(kvm->cpus[vcpu]->thread, SIGUSR1); 448 } 449 450 if (!(dbg_type & KVM_DEBUG_CMD_TYPE_DUMP)) 451 return; 452 453 for (i = 0; i < kvm->nrcpus; i++) { 454 struct kvm_cpu *cpu = kvm->cpus[i]; 455 456 if (!cpu) 457 continue; 458 459 printout_done = 0; 460 461 kvm_cpu__set_debug_fd(fd); 462 pthread_kill(cpu->thread, SIGUSR1); 463 /* 464 * Wait for the vCPU to dump state before signalling 465 * the next thread. Since this is debug code it does 466 * not matter that we are burning CPU time a bit: 467 */ 468 while (!printout_done) 469 sleep(0); 470 } 471 472 close(fd); 473 474 serial8250__inject_sysrq(kvm, 'p'); 475 } 476 477 int kvm_ipc__init(struct kvm *kvm) 478 { 479 int ret; 480 int sock = kvm__create_socket(kvm); 481 struct epoll_event ev = {0}; 482 483 server_fd = sock; 484 485 epoll_fd = epoll_create(KVM_IPC_MAX_MSGS); 486 if (epoll_fd < 0) { 487 perror("epoll_create"); 488 ret = epoll_fd; 489 goto err; 490 } 491 492 ev.events = EPOLLIN | EPOLLET; 493 ev.data.fd = sock; 494 if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, sock, &ev) < 0) { 495 pr_err("Failed adding socket to epoll"); 496 ret = -EFAULT; 497 goto err_epoll; 498 } 499 500 stop_fd = eventfd(0, 0); 501 if (stop_fd < 0) { 502 perror("eventfd"); 503 ret = stop_fd; 504 goto err_epoll; 505 } 506 507 ev.events = EPOLLIN | EPOLLET; 508 ev.data.fd = stop_fd; 509 if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, stop_fd, &ev) < 0) { 510 pr_err("Failed adding stop event to epoll"); 511 ret = -EFAULT; 512 goto err_stop; 513 } 514 515 if (pthread_create(&thread, NULL, kvm_ipc__thread, kvm) != 0) { 516 pr_err("Failed starting IPC thread"); 517 ret = -EFAULT; 518 goto err_stop; 519 } 520 521 kvm_ipc__register_handler(KVM_IPC_PID, kvm__pid); 522 kvm_ipc__register_handler(KVM_IPC_DEBUG, handle_debug); 523 kvm_ipc__register_handler(KVM_IPC_PAUSE, handle_pause); 524 kvm_ipc__register_handler(KVM_IPC_RESUME, handle_pause); 525 kvm_ipc__register_handler(KVM_IPC_STOP, handle_stop); 526 kvm_ipc__register_handler(KVM_IPC_VMSTATE, handle_vmstate); 527 signal(SIGUSR1, handle_sigusr1); 528 529 return 0; 530 531 err_stop: 532 close(stop_fd); 533 err_epoll: 534 close(epoll_fd); 535 err: 536 return ret; 537 } 538 base_init(kvm_ipc__init); 539 540 int kvm_ipc__exit(struct kvm *kvm) 541 { 542 u64 val = 1; 543 int ret; 544 545 ret = write(stop_fd, &val, sizeof(val)); 546 if (ret < 0) 547 return ret; 548 549 close(server_fd); 550 close(epoll_fd); 551 552 kvm__remove_socket(kvm->cfg.guest_name); 553 554 return ret; 555 } 556 base_exit(kvm_ipc__exit); 557