xref: /kvmtool/kvm-ipc.c (revision a2583dbf82ff8eb913b871fe64c0d691c4c14e3c)
1 #include <sys/epoll.h>
2 #include <sys/un.h>
3 #include <sys/types.h>
4 #include <sys/socket.h>
5 #include <sys/eventfd.h>
6 #include <dirent.h>
7 
8 #include "kvm/kvm-ipc.h"
9 #include "kvm/rwsem.h"
10 #include "kvm/read-write.h"
11 #include "kvm/util.h"
12 #include "kvm/kvm.h"
13 #include "kvm/builtin-debug.h"
14 #include "kvm/strbuf.h"
15 #include "kvm/kvm-cpu.h"
16 #include "kvm/8250-serial.h"
17 
18 struct kvm_ipc_head {
19 	u32 type;
20 	u32 len;
21 };
22 
23 #define KVM_IPC_MAX_MSGS 16
24 
25 #define KVM_SOCK_SUFFIX		".sock"
26 #define KVM_SOCK_SUFFIX_LEN	((ssize_t)sizeof(KVM_SOCK_SUFFIX) - 1)
27 
28 extern __thread struct kvm_cpu *current_kvm_cpu;
29 static void (*msgs[KVM_IPC_MAX_MSGS])(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg);
30 static DECLARE_RWSEM(msgs_rwlock);
31 static int epoll_fd, server_fd, stop_fd;
32 static pthread_t thread;
33 
34 static int kvm__create_socket(struct kvm *kvm)
35 {
36 	char full_name[PATH_MAX];
37 	int s;
38 	struct sockaddr_un local;
39 	int len, r;
40 
41 	/* This usually 108 bytes long */
42 	BUILD_BUG_ON(sizeof(local.sun_path) < 32);
43 
44 	snprintf(full_name, sizeof(full_name), "%s/%s%s",
45 		 kvm__get_dir(), kvm->cfg.guest_name, KVM_SOCK_SUFFIX);
46 	if (access(full_name, F_OK) == 0) {
47 		pr_err("Socket file %s already exist", full_name);
48 		return -EEXIST;
49 	}
50 
51 	s = socket(AF_UNIX, SOCK_STREAM, 0);
52 	if (s < 0) {
53 		perror("socket");
54 		return s;
55 	}
56 
57 	local.sun_family = AF_UNIX;
58 	strlcpy(local.sun_path, full_name, sizeof(local.sun_path));
59 	len = strlen(local.sun_path) + sizeof(local.sun_family);
60 	r = bind(s, (struct sockaddr *)&local, len);
61 	if (r < 0) {
62 		perror("bind");
63 		goto fail;
64 	}
65 
66 	r = listen(s, 5);
67 	if (r < 0) {
68 		perror("listen");
69 		goto fail;
70 	}
71 
72 	return s;
73 
74 fail:
75 	close(s);
76 	return r;
77 }
78 
79 void kvm__remove_socket(const char *name)
80 {
81 	char full_name[PATH_MAX];
82 
83 	snprintf(full_name, sizeof(full_name), "%s/%s%s",
84 		 kvm__get_dir(), name, KVM_SOCK_SUFFIX);
85 	unlink(full_name);
86 }
87 
88 int kvm__get_sock_by_instance(const char *name)
89 {
90 	int s, len, r;
91 	char sock_file[PATH_MAX];
92 	struct sockaddr_un local;
93 
94 	snprintf(sock_file, sizeof(sock_file), "%s/%s%s",
95 		 kvm__get_dir(), name, KVM_SOCK_SUFFIX);
96 	s = socket(AF_UNIX, SOCK_STREAM, 0);
97 
98 	local.sun_family = AF_UNIX;
99 	strlcpy(local.sun_path, sock_file, sizeof(local.sun_path));
100 	len = strlen(local.sun_path) + sizeof(local.sun_family);
101 
102 	r = connect(s, &local, len);
103 	if (r < 0 && errno == ECONNREFUSED) {
104 		/* Tell the user clean ghost socket file */
105 		pr_err("\"%s\" could be a ghost socket file, please remove it",
106 				sock_file);
107 		return r;
108 	} else if (r < 0) {
109 		return r;
110 	}
111 
112 	return s;
113 }
114 
115 static bool is_socket(const char *base_path, const struct dirent *dent)
116 {
117 	switch (dent->d_type) {
118 	case DT_SOCK:
119 		return true;
120 
121 	case DT_UNKNOWN: {
122 		char path[PATH_MAX];
123 		struct stat st;
124 
125 		sprintf(path, "%s/%s", base_path, dent->d_name);
126 		if (stat(path, &st))
127 			return false;
128 
129 		return S_ISSOCK(st.st_mode);
130 	}
131 	default:
132 		return false;
133 	}
134 }
135 
136 int kvm__enumerate_instances(int (*callback)(const char *name, int fd))
137 {
138 	int sock;
139 	DIR *dir;
140 	struct dirent entry, *result;
141 	int ret = 0;
142 	const char *path;
143 
144 	path = kvm__get_dir();
145 
146 	dir = opendir(path);
147 	if (!dir)
148 		return -errno;
149 
150 	for (;;) {
151 		readdir_r(dir, &entry, &result);
152 		if (result == NULL)
153 			break;
154 		if (is_socket(path, &entry)) {
155 			ssize_t name_len = strlen(entry.d_name);
156 			char *p;
157 
158 			if (name_len <= KVM_SOCK_SUFFIX_LEN)
159 				continue;
160 
161 			p = &entry.d_name[name_len - KVM_SOCK_SUFFIX_LEN];
162 			if (memcmp(KVM_SOCK_SUFFIX, p, KVM_SOCK_SUFFIX_LEN))
163 				continue;
164 
165 			*p = 0;
166 			sock = kvm__get_sock_by_instance(entry.d_name);
167 			if (sock < 0)
168 				continue;
169 			ret = callback(entry.d_name, sock);
170 			close(sock);
171 			if (ret < 0)
172 				break;
173 		}
174 	}
175 
176 	closedir(dir);
177 
178 	return ret;
179 }
180 
181 int kvm_ipc__register_handler(u32 type, void (*cb)(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg))
182 {
183 	if (type >= KVM_IPC_MAX_MSGS)
184 		return -ENOSPC;
185 
186 	down_write(&msgs_rwlock);
187 	msgs[type] = cb;
188 	up_write(&msgs_rwlock);
189 
190 	return 0;
191 }
192 
193 int kvm_ipc__send(int fd, u32 type)
194 {
195 	struct kvm_ipc_head head = {.type = type, .len = 0,};
196 
197 	if (write_in_full(fd, &head, sizeof(head)) < 0)
198 		return -1;
199 
200 	return 0;
201 }
202 
203 int kvm_ipc__send_msg(int fd, u32 type, u32 len, u8 *msg)
204 {
205 	struct kvm_ipc_head head = {.type = type, .len = len,};
206 
207 	if (write_in_full(fd, &head, sizeof(head)) < 0)
208 		return -1;
209 
210 	if (write_in_full(fd, msg, len) < 0)
211 		return -1;
212 
213 	return 0;
214 }
215 
216 static int kvm_ipc__handle(struct kvm *kvm, int fd, u32 type, u32 len, u8 *data)
217 {
218 	void (*cb)(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg);
219 
220 	if (type >= KVM_IPC_MAX_MSGS)
221 		return -ENOSPC;
222 
223 	down_read(&msgs_rwlock);
224 	cb = msgs[type];
225 	up_read(&msgs_rwlock);
226 
227 	if (cb == NULL) {
228 		pr_warning("No device handles type %u\n", type);
229 		return -ENODEV;
230 	}
231 
232 	cb(kvm, fd, type, len, data);
233 
234 	return 0;
235 }
236 
237 static int kvm_ipc__new_conn(int fd)
238 {
239 	int client;
240 	struct epoll_event ev;
241 
242 	client = accept(fd, NULL, NULL);
243 	if (client < 0)
244 		return -1;
245 
246 	ev.events = EPOLLIN | EPOLLRDHUP;
247 	ev.data.fd = client;
248 	if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, client, &ev) < 0) {
249 		close(client);
250 		return -1;
251 	}
252 
253 	return client;
254 }
255 
256 static void kvm_ipc__close_conn(int fd)
257 {
258 	epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fd, NULL);
259 	close(fd);
260 }
261 
262 static int kvm_ipc__receive(struct kvm *kvm, int fd)
263 {
264 	struct kvm_ipc_head head;
265 	u8 *msg = NULL;
266 	u32 n;
267 
268 	n = read(fd, &head, sizeof(head));
269 	if (n != sizeof(head))
270 		goto done;
271 
272 	msg = malloc(head.len);
273 	if (msg == NULL)
274 		goto done;
275 
276 	n = read_in_full(fd, msg, head.len);
277 	if (n != head.len)
278 		goto done;
279 
280 	kvm_ipc__handle(kvm, fd, head.type, head.len, msg);
281 
282 	return 0;
283 
284 done:
285 	free(msg);
286 	return -1;
287 }
288 
289 static void *kvm_ipc__thread(void *param)
290 {
291 	struct epoll_event event;
292 	struct kvm *kvm = param;
293 
294 	kvm__set_thread_name("kvm-ipc");
295 
296 	for (;;) {
297 		int nfds;
298 
299 		nfds = epoll_wait(epoll_fd, &event, 1, -1);
300 		if (nfds > 0) {
301 			int fd = event.data.fd;
302 
303 			if (fd == stop_fd && event.events & EPOLLIN) {
304 				break;
305 			} else if (fd == server_fd) {
306 				int client, r;
307 
308 				client = kvm_ipc__new_conn(fd);
309 				/*
310 				 * Handle multiple IPC cmd at a time
311 				 */
312 				do {
313 					r = kvm_ipc__receive(kvm, client);
314 				} while	(r == 0);
315 
316 			} else if (event.events & (EPOLLERR | EPOLLRDHUP | EPOLLHUP)) {
317 				kvm_ipc__close_conn(fd);
318 			} else {
319 				kvm_ipc__receive(kvm, fd);
320 			}
321 		}
322 	}
323 
324 	return NULL;
325 }
326 
327 static void kvm__pid(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg)
328 {
329 	pid_t pid = getpid();
330 	int r = 0;
331 
332 	if (type == KVM_IPC_PID)
333 		r = write(fd, &pid, sizeof(pid));
334 
335 	if (r < 0)
336 		pr_warning("Failed sending PID");
337 }
338 
339 static void handle_stop(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg)
340 {
341 	if (WARN_ON(type != KVM_IPC_STOP || len))
342 		return;
343 
344 	kvm_cpu__reboot(kvm);
345 }
346 
347 /* Pause/resume the guest using SIGUSR2 */
348 static int is_paused;
349 
350 static void handle_pause(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg)
351 {
352 	if (WARN_ON(len))
353 		return;
354 
355 	if (type == KVM_IPC_RESUME && is_paused) {
356 		kvm->vm_state = KVM_VMSTATE_RUNNING;
357 		kvm__continue(kvm);
358 	} else if (type == KVM_IPC_PAUSE && !is_paused) {
359 		kvm->vm_state = KVM_VMSTATE_PAUSED;
360 		ioctl(kvm->vm_fd, KVM_KVMCLOCK_CTRL);
361 		kvm__pause(kvm);
362 	} else {
363 		return;
364 	}
365 
366 	is_paused = !is_paused;
367 }
368 
369 static void handle_vmstate(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg)
370 {
371 	int r = 0;
372 
373 	if (type == KVM_IPC_VMSTATE)
374 		r = write(fd, &kvm->vm_state, sizeof(kvm->vm_state));
375 
376 	if (r < 0)
377 		pr_warning("Failed sending VMSTATE");
378 }
379 
380 /*
381  * Serialize debug printout so that the output of multiple vcpus does not
382  * get mixed up:
383  */
384 static int printout_done;
385 
386 static void handle_sigusr1(int sig)
387 {
388 	struct kvm_cpu *cpu = current_kvm_cpu;
389 	int fd = kvm_cpu__get_debug_fd();
390 
391 	if (!cpu || cpu->needs_nmi)
392 		return;
393 
394 	dprintf(fd, "\n #\n # vCPU #%ld's dump:\n #\n", cpu->cpu_id);
395 	kvm_cpu__show_registers(cpu);
396 	kvm_cpu__show_code(cpu);
397 	kvm_cpu__show_page_tables(cpu);
398 	fflush(stdout);
399 	printout_done = 1;
400 }
401 
402 static void handle_debug(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg)
403 {
404 	int i;
405 	struct debug_cmd_params *params;
406 	u32 dbg_type;
407 	u32 vcpu;
408 
409 	if (WARN_ON(type != KVM_IPC_DEBUG || len != sizeof(*params)))
410 		return;
411 
412 	params = (void *)msg;
413 	dbg_type = params->dbg_type;
414 	vcpu = params->cpu;
415 
416 	if (dbg_type & KVM_DEBUG_CMD_TYPE_SYSRQ)
417 		serial8250__inject_sysrq(kvm, params->sysrq);
418 
419 	if (dbg_type & KVM_DEBUG_CMD_TYPE_NMI) {
420 		if ((int)vcpu >= kvm->nrcpus)
421 			return;
422 
423 		kvm->cpus[vcpu]->needs_nmi = 1;
424 		pthread_kill(kvm->cpus[vcpu]->thread, SIGUSR1);
425 	}
426 
427 	if (!(dbg_type & KVM_DEBUG_CMD_TYPE_DUMP))
428 		return;
429 
430 	for (i = 0; i < kvm->nrcpus; i++) {
431 		struct kvm_cpu *cpu = kvm->cpus[i];
432 
433 		if (!cpu)
434 			continue;
435 
436 		printout_done = 0;
437 
438 		kvm_cpu__set_debug_fd(fd);
439 		pthread_kill(cpu->thread, SIGUSR1);
440 		/*
441 		 * Wait for the vCPU to dump state before signalling
442 		 * the next thread. Since this is debug code it does
443 		 * not matter that we are burning CPU time a bit:
444 		 */
445 		while (!printout_done)
446 			sleep(0);
447 	}
448 
449 	close(fd);
450 
451 	serial8250__inject_sysrq(kvm, 'p');
452 }
453 
454 int kvm_ipc__init(struct kvm *kvm)
455 {
456 	int ret;
457 	int sock = kvm__create_socket(kvm);
458 	struct epoll_event ev = {0};
459 
460 	server_fd = sock;
461 
462 	epoll_fd = epoll_create(KVM_IPC_MAX_MSGS);
463 	if (epoll_fd < 0) {
464 		perror("epoll_create");
465 		ret = epoll_fd;
466 		goto err;
467 	}
468 
469 	ev.events = EPOLLIN | EPOLLET;
470 	ev.data.fd = sock;
471 	if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, sock, &ev) < 0) {
472 		pr_err("Failed adding socket to epoll");
473 		ret = -EFAULT;
474 		goto err_epoll;
475 	}
476 
477 	stop_fd = eventfd(0, 0);
478 	if (stop_fd < 0) {
479 		perror("eventfd");
480 		ret = stop_fd;
481 		goto err_epoll;
482 	}
483 
484 	ev.events = EPOLLIN | EPOLLET;
485 	ev.data.fd = stop_fd;
486 	if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, stop_fd, &ev) < 0) {
487 		pr_err("Failed adding stop event to epoll");
488 		ret = -EFAULT;
489 		goto err_stop;
490 	}
491 
492 	if (pthread_create(&thread, NULL, kvm_ipc__thread, kvm) != 0) {
493 		pr_err("Failed starting IPC thread");
494 		ret = -EFAULT;
495 		goto err_stop;
496 	}
497 
498 	kvm_ipc__register_handler(KVM_IPC_PID, kvm__pid);
499 	kvm_ipc__register_handler(KVM_IPC_DEBUG, handle_debug);
500 	kvm_ipc__register_handler(KVM_IPC_PAUSE, handle_pause);
501 	kvm_ipc__register_handler(KVM_IPC_RESUME, handle_pause);
502 	kvm_ipc__register_handler(KVM_IPC_STOP, handle_stop);
503 	kvm_ipc__register_handler(KVM_IPC_VMSTATE, handle_vmstate);
504 	signal(SIGUSR1, handle_sigusr1);
505 
506 	return 0;
507 
508 err_stop:
509 	close(stop_fd);
510 err_epoll:
511 	close(epoll_fd);
512 err:
513 	return ret;
514 }
515 base_init(kvm_ipc__init);
516 
517 int kvm_ipc__exit(struct kvm *kvm)
518 {
519 	u64 val = 1;
520 	int ret;
521 
522 	ret = write(stop_fd, &val, sizeof(val));
523 	if (ret < 0)
524 		return ret;
525 
526 	close(server_fd);
527 	close(epoll_fd);
528 
529 	kvm__remove_socket(kvm->cfg.guest_name);
530 
531 	return ret;
532 }
533 base_exit(kvm_ipc__exit);
534