xref: /kvmtool/kvm-ipc.c (revision ef5b941fba75e42a1f7f0a85161c0ed4686ebeee)
1 #include <sys/epoll.h>
2 #include <sys/un.h>
3 #include <sys/types.h>
4 #include <sys/socket.h>
5 #include <sys/eventfd.h>
6 #include <dirent.h>
7 
8 #include "kvm/kvm-ipc.h"
9 #include "kvm/rwsem.h"
10 #include "kvm/read-write.h"
11 #include "kvm/util.h"
12 #include "kvm/kvm.h"
13 #include "kvm/builtin-debug.h"
14 #include "kvm/strbuf.h"
15 #include "kvm/kvm-cpu.h"
16 #include "kvm/8250-serial.h"
17 
18 struct kvm_ipc_head {
19 	u32 type;
20 	u32 len;
21 };
22 
23 #define KVM_IPC_MAX_MSGS 16
24 
25 #define KVM_SOCK_SUFFIX		".sock"
26 #define KVM_SOCK_SUFFIX_LEN	((ssize_t)sizeof(KVM_SOCK_SUFFIX) - 1)
27 
28 extern __thread struct kvm_cpu *current_kvm_cpu;
29 static void (*msgs[KVM_IPC_MAX_MSGS])(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg);
30 static DECLARE_RWSEM(msgs_rwlock);
31 static int epoll_fd, server_fd, stop_fd;
32 static pthread_t thread;
33 
34 static int kvm__create_socket(struct kvm *kvm)
35 {
36 	char full_name[PATH_MAX];
37 	int s;
38 	struct sockaddr_un local;
39 	int len, r;
40 
41 	/* This usually 108 bytes long */
42 	BUILD_BUG_ON(sizeof(local.sun_path) < 32);
43 
44 	snprintf(full_name, sizeof(full_name), "%s/%s%s",
45 		 kvm__get_dir(), kvm->cfg.guest_name, KVM_SOCK_SUFFIX);
46 
47 	s = socket(AF_UNIX, SOCK_STREAM, 0);
48 	if (s < 0) {
49 		perror("socket");
50 		return s;
51 	}
52 
53 	local.sun_family = AF_UNIX;
54 	strlcpy(local.sun_path, full_name, sizeof(local.sun_path));
55 	len = strlen(local.sun_path) + sizeof(local.sun_family);
56 	r = bind(s, (struct sockaddr *)&local, len);
57 	/* Check for an existing socket file */
58 	if (r < 0 && errno == EADDRINUSE) {
59 		r = connect(s, (struct sockaddr *)&local, len);
60 		if (r == 0) {
61 			/*
62 			 * If we could connect, there is already a guest
63 			 * using this same name. This should not happen
64 			 * for PID derived names, but could happen for user
65 			 * provided guest names.
66 			 */
67 			pr_err("Guest socket file %s already exists.",
68 			       full_name);
69 			r = -EEXIST;
70 			goto fail;
71 		}
72 		if (errno == ECONNREFUSED) {
73 			/*
74 			 * This is a ghost socket file, with no-one listening
75 			 * on the other end. Since kvmtool will only bind
76 			 * above when creating a new guest, there is no
77 			 * danger in just removing the file and re-trying.
78 			 */
79 			unlink(full_name);
80 			pr_info("Removed ghost socket file \"%s\".", full_name);
81 			r = bind(s, (struct sockaddr *)&local, len);
82 		}
83 	}
84 	if (r < 0) {
85 		perror("bind");
86 		goto fail;
87 	}
88 
89 	r = listen(s, 5);
90 	if (r < 0) {
91 		perror("listen");
92 		goto fail;
93 	}
94 
95 	return s;
96 
97 fail:
98 	close(s);
99 	return r;
100 }
101 
102 void kvm__remove_socket(const char *name)
103 {
104 	char full_name[PATH_MAX];
105 
106 	snprintf(full_name, sizeof(full_name), "%s/%s%s",
107 		 kvm__get_dir(), name, KVM_SOCK_SUFFIX);
108 	unlink(full_name);
109 }
110 
111 int kvm__get_sock_by_instance(const char *name)
112 {
113 	int s, len, r;
114 	char sock_file[PATH_MAX];
115 	struct sockaddr_un local;
116 
117 	snprintf(sock_file, sizeof(sock_file), "%s/%s%s",
118 		 kvm__get_dir(), name, KVM_SOCK_SUFFIX);
119 	s = socket(AF_UNIX, SOCK_STREAM, 0);
120 
121 	local.sun_family = AF_UNIX;
122 	strlcpy(local.sun_path, sock_file, sizeof(local.sun_path));
123 	len = strlen(local.sun_path) + sizeof(local.sun_family);
124 
125 	r = connect(s, (struct sockaddr *)&local, len);
126 	if (r < 0 && errno == ECONNREFUSED) {
127 		/* Clean up the ghost socket file */
128 		unlink(local.sun_path);
129 		pr_info("Removed ghost socket file \"%s\".", sock_file);
130 		return r;
131 	} else if (r < 0) {
132 		return r;
133 	}
134 
135 	return s;
136 }
137 
138 static bool is_socket(const char *base_path, const struct dirent *dent)
139 {
140 	switch (dent->d_type) {
141 	case DT_SOCK:
142 		return true;
143 
144 	case DT_UNKNOWN: {
145 		char path[PATH_MAX];
146 		struct stat st;
147 
148 		sprintf(path, "%s/%s", base_path, dent->d_name);
149 		if (stat(path, &st))
150 			return false;
151 
152 		return S_ISSOCK(st.st_mode);
153 	}
154 	default:
155 		return false;
156 	}
157 }
158 
159 int kvm__enumerate_instances(int (*callback)(const char *name, int fd))
160 {
161 	int sock;
162 	DIR *dir;
163 	struct dirent *entry;
164 	int ret = 0;
165 	const char *path;
166 
167 	path = kvm__get_dir();
168 
169 	dir = opendir(path);
170 	if (!dir)
171 		return -errno;
172 
173 	for (;;) {
174 		entry = readdir(dir);
175 		if (!entry)
176 			break;
177 		if (is_socket(path, entry)) {
178 			ssize_t name_len = strlen(entry->d_name);
179 			char *p;
180 
181 			if (name_len <= KVM_SOCK_SUFFIX_LEN)
182 				continue;
183 
184 			p = &entry->d_name[name_len - KVM_SOCK_SUFFIX_LEN];
185 			if (memcmp(KVM_SOCK_SUFFIX, p, KVM_SOCK_SUFFIX_LEN))
186 				continue;
187 
188 			*p = 0;
189 			sock = kvm__get_sock_by_instance(entry->d_name);
190 			if (sock < 0)
191 				continue;
192 			ret = callback(entry->d_name, sock);
193 			close(sock);
194 			if (ret < 0)
195 				break;
196 		}
197 	}
198 
199 	closedir(dir);
200 
201 	return ret;
202 }
203 
204 int kvm_ipc__register_handler(u32 type, void (*cb)(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg))
205 {
206 	if (type >= KVM_IPC_MAX_MSGS)
207 		return -ENOSPC;
208 
209 	down_write(&msgs_rwlock);
210 	msgs[type] = cb;
211 	up_write(&msgs_rwlock);
212 
213 	return 0;
214 }
215 
216 int kvm_ipc__send(int fd, u32 type)
217 {
218 	struct kvm_ipc_head head = {.type = type, .len = 0,};
219 
220 	if (write_in_full(fd, &head, sizeof(head)) < 0)
221 		return -1;
222 
223 	return 0;
224 }
225 
226 int kvm_ipc__send_msg(int fd, u32 type, u32 len, u8 *msg)
227 {
228 	struct kvm_ipc_head head = {.type = type, .len = len,};
229 
230 	if (write_in_full(fd, &head, sizeof(head)) < 0)
231 		return -1;
232 
233 	if (write_in_full(fd, msg, len) < 0)
234 		return -1;
235 
236 	return 0;
237 }
238 
239 static int kvm_ipc__handle(struct kvm *kvm, int fd, u32 type, u32 len, u8 *data)
240 {
241 	void (*cb)(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg);
242 
243 	if (type >= KVM_IPC_MAX_MSGS)
244 		return -ENOSPC;
245 
246 	down_read(&msgs_rwlock);
247 	cb = msgs[type];
248 	up_read(&msgs_rwlock);
249 
250 	if (cb == NULL) {
251 		pr_warning("No device handles type %u\n", type);
252 		return -ENODEV;
253 	}
254 
255 	cb(kvm, fd, type, len, data);
256 
257 	return 0;
258 }
259 
260 static int kvm_ipc__new_conn(int fd)
261 {
262 	int client;
263 	struct epoll_event ev;
264 
265 	client = accept(fd, NULL, NULL);
266 	if (client < 0)
267 		return -1;
268 
269 	ev.events = EPOLLIN | EPOLLRDHUP;
270 	ev.data.fd = client;
271 	if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, client, &ev) < 0) {
272 		close(client);
273 		return -1;
274 	}
275 
276 	return client;
277 }
278 
279 static void kvm_ipc__close_conn(int fd)
280 {
281 	epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fd, NULL);
282 	close(fd);
283 }
284 
285 static int kvm_ipc__receive(struct kvm *kvm, int fd)
286 {
287 	struct kvm_ipc_head head;
288 	u8 *msg = NULL;
289 	u32 n;
290 
291 	n = read(fd, &head, sizeof(head));
292 	if (n != sizeof(head))
293 		goto done;
294 
295 	msg = malloc(head.len);
296 	if (msg == NULL)
297 		goto done;
298 
299 	n = read_in_full(fd, msg, head.len);
300 	if (n != head.len)
301 		goto done;
302 
303 	kvm_ipc__handle(kvm, fd, head.type, head.len, msg);
304 
305 	return 0;
306 
307 done:
308 	free(msg);
309 	return -1;
310 }
311 
312 static void *kvm_ipc__thread(void *param)
313 {
314 	struct epoll_event event;
315 	struct kvm *kvm = param;
316 
317 	kvm__set_thread_name("kvm-ipc");
318 
319 	for (;;) {
320 		int nfds;
321 
322 		nfds = epoll_wait(epoll_fd, &event, 1, -1);
323 		if (nfds > 0) {
324 			int fd = event.data.fd;
325 
326 			if (fd == stop_fd && event.events & EPOLLIN) {
327 				break;
328 			} else if (fd == server_fd) {
329 				int client, r;
330 
331 				client = kvm_ipc__new_conn(fd);
332 				/*
333 				 * Handle multiple IPC cmd at a time
334 				 */
335 				do {
336 					r = kvm_ipc__receive(kvm, client);
337 				} while	(r == 0);
338 
339 			} else if (event.events & (EPOLLERR | EPOLLRDHUP | EPOLLHUP)) {
340 				kvm_ipc__close_conn(fd);
341 			} else {
342 				kvm_ipc__receive(kvm, fd);
343 			}
344 		}
345 	}
346 
347 	return NULL;
348 }
349 
350 static void kvm__pid(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg)
351 {
352 	pid_t pid = getpid();
353 	int r = 0;
354 
355 	if (type == KVM_IPC_PID)
356 		r = write(fd, &pid, sizeof(pid));
357 
358 	if (r < 0)
359 		pr_warning("Failed sending PID");
360 }
361 
362 static void handle_stop(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg)
363 {
364 	if (WARN_ON(type != KVM_IPC_STOP || len))
365 		return;
366 
367 	kvm__reboot(kvm);
368 }
369 
370 /* Pause/resume the guest using SIGUSR2 */
371 static int is_paused;
372 
373 static void handle_pause(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg)
374 {
375 	if (WARN_ON(len))
376 		return;
377 
378 	if (type == KVM_IPC_RESUME && is_paused) {
379 		kvm->vm_state = KVM_VMSTATE_RUNNING;
380 		kvm__continue(kvm);
381 	} else if (type == KVM_IPC_PAUSE && !is_paused) {
382 		kvm->vm_state = KVM_VMSTATE_PAUSED;
383 		ioctl(kvm->vm_fd, KVM_KVMCLOCK_CTRL);
384 		kvm__pause(kvm);
385 	} else {
386 		return;
387 	}
388 
389 	is_paused = !is_paused;
390 }
391 
392 static void handle_vmstate(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg)
393 {
394 	int r = 0;
395 
396 	if (type == KVM_IPC_VMSTATE)
397 		r = write(fd, &kvm->vm_state, sizeof(kvm->vm_state));
398 
399 	if (r < 0)
400 		pr_warning("Failed sending VMSTATE");
401 }
402 
403 /*
404  * Serialize debug printout so that the output of multiple vcpus does not
405  * get mixed up:
406  */
407 static int printout_done;
408 
409 static void handle_sigusr1(int sig)
410 {
411 	struct kvm_cpu *cpu = current_kvm_cpu;
412 	int fd = kvm_cpu__get_debug_fd();
413 
414 	if (!cpu || cpu->needs_nmi)
415 		return;
416 
417 	dprintf(fd, "\n #\n # vCPU #%ld's dump:\n #\n", cpu->cpu_id);
418 	kvm_cpu__show_registers(cpu);
419 	kvm_cpu__show_code(cpu);
420 	kvm_cpu__show_page_tables(cpu);
421 	fflush(stdout);
422 	printout_done = 1;
423 }
424 
425 static void handle_debug(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg)
426 {
427 	int i;
428 	struct debug_cmd_params *params;
429 	u32 dbg_type;
430 	u32 vcpu;
431 
432 	if (WARN_ON(type != KVM_IPC_DEBUG || len != sizeof(*params)))
433 		return;
434 
435 	params = (void *)msg;
436 	dbg_type = params->dbg_type;
437 	vcpu = params->cpu;
438 
439 	if (dbg_type & KVM_DEBUG_CMD_TYPE_SYSRQ)
440 		serial8250__inject_sysrq(kvm, params->sysrq);
441 
442 	if (dbg_type & KVM_DEBUG_CMD_TYPE_NMI) {
443 		if ((int)vcpu >= kvm->nrcpus)
444 			return;
445 
446 		kvm->cpus[vcpu]->needs_nmi = 1;
447 		pthread_kill(kvm->cpus[vcpu]->thread, SIGUSR1);
448 	}
449 
450 	if (!(dbg_type & KVM_DEBUG_CMD_TYPE_DUMP))
451 		return;
452 
453 	for (i = 0; i < kvm->nrcpus; i++) {
454 		struct kvm_cpu *cpu = kvm->cpus[i];
455 
456 		if (!cpu)
457 			continue;
458 
459 		printout_done = 0;
460 
461 		kvm_cpu__set_debug_fd(fd);
462 		pthread_kill(cpu->thread, SIGUSR1);
463 		/*
464 		 * Wait for the vCPU to dump state before signalling
465 		 * the next thread. Since this is debug code it does
466 		 * not matter that we are burning CPU time a bit:
467 		 */
468 		while (!printout_done)
469 			sleep(0);
470 	}
471 
472 	close(fd);
473 
474 	serial8250__inject_sysrq(kvm, 'p');
475 }
476 
477 int kvm_ipc__init(struct kvm *kvm)
478 {
479 	int ret;
480 	int sock = kvm__create_socket(kvm);
481 	struct epoll_event ev = {0};
482 
483 	server_fd = sock;
484 
485 	epoll_fd = epoll_create(KVM_IPC_MAX_MSGS);
486 	if (epoll_fd < 0) {
487 		perror("epoll_create");
488 		ret = epoll_fd;
489 		goto err;
490 	}
491 
492 	ev.events = EPOLLIN | EPOLLET;
493 	ev.data.fd = sock;
494 	if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, sock, &ev) < 0) {
495 		pr_err("Failed adding socket to epoll");
496 		ret = -EFAULT;
497 		goto err_epoll;
498 	}
499 
500 	stop_fd = eventfd(0, 0);
501 	if (stop_fd < 0) {
502 		perror("eventfd");
503 		ret = stop_fd;
504 		goto err_epoll;
505 	}
506 
507 	ev.events = EPOLLIN | EPOLLET;
508 	ev.data.fd = stop_fd;
509 	if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, stop_fd, &ev) < 0) {
510 		pr_err("Failed adding stop event to epoll");
511 		ret = -EFAULT;
512 		goto err_stop;
513 	}
514 
515 	if (pthread_create(&thread, NULL, kvm_ipc__thread, kvm) != 0) {
516 		pr_err("Failed starting IPC thread");
517 		ret = -EFAULT;
518 		goto err_stop;
519 	}
520 
521 	kvm_ipc__register_handler(KVM_IPC_PID, kvm__pid);
522 	kvm_ipc__register_handler(KVM_IPC_DEBUG, handle_debug);
523 	kvm_ipc__register_handler(KVM_IPC_PAUSE, handle_pause);
524 	kvm_ipc__register_handler(KVM_IPC_RESUME, handle_pause);
525 	kvm_ipc__register_handler(KVM_IPC_STOP, handle_stop);
526 	kvm_ipc__register_handler(KVM_IPC_VMSTATE, handle_vmstate);
527 	signal(SIGUSR1, handle_sigusr1);
528 
529 	return 0;
530 
531 err_stop:
532 	close(stop_fd);
533 err_epoll:
534 	close(epoll_fd);
535 err:
536 	return ret;
537 }
538 base_init(kvm_ipc__init);
539 
540 int kvm_ipc__exit(struct kvm *kvm)
541 {
542 	u64 val = 1;
543 	int ret;
544 
545 	ret = write(stop_fd, &val, sizeof(val));
546 	if (ret < 0)
547 		return ret;
548 
549 	close(server_fd);
550 	close(epoll_fd);
551 
552 	kvm__remove_socket(kvm->cfg.guest_name);
553 
554 	return ret;
555 }
556 base_exit(kvm_ipc__exit);
557