xref: /kvmtool/kvm.c (revision 5e9c654e60e85090ff365e875b2a7269d9301859)
1 #include "kvm/kvm.h"
2 #include "kvm/read-write.h"
3 #include "kvm/util.h"
4 #include "kvm/strbuf.h"
5 #include "kvm/mutex.h"
6 #include "kvm/kvm-cpu.h"
7 #include "kvm/kvm-ipc.h"
8 
9 #include <linux/kernel.h>
10 #include <linux/kvm.h>
11 #include <linux/list.h>
12 #include <linux/err.h>
13 
14 #include <sys/un.h>
15 #include <sys/stat.h>
16 #include <sys/types.h>
17 #include <sys/socket.h>
18 #include <sys/ioctl.h>
19 #include <sys/mman.h>
20 #include <stdbool.h>
21 #include <limits.h>
22 #include <signal.h>
23 #include <stdarg.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <unistd.h>
27 #include <stdio.h>
28 #include <fcntl.h>
29 #include <time.h>
30 #include <sys/eventfd.h>
31 #include <asm/unistd.h>
32 #include <dirent.h>
33 
34 #define DEFINE_KVM_EXIT_REASON(reason) [reason] = #reason
35 
36 const char *kvm_exit_reasons[] = {
37 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_UNKNOWN),
38 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_EXCEPTION),
39 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_IO),
40 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_HYPERCALL),
41 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_DEBUG),
42 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_HLT),
43 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_MMIO),
44 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_IRQ_WINDOW_OPEN),
45 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_SHUTDOWN),
46 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_FAIL_ENTRY),
47 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTR),
48 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_SET_TPR),
49 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_TPR_ACCESS),
50 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_S390_SIEIC),
51 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_S390_RESET),
52 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_DCR),
53 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_NMI),
54 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTERNAL_ERROR),
55 #ifdef CONFIG_PPC64
56 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_PAPR_HCALL),
57 #endif
58 };
59 
60 static int pause_event;
61 static DEFINE_MUTEX(pause_lock);
62 extern struct kvm_ext kvm_req_ext[];
63 
64 static char kvm_dir[PATH_MAX];
65 
66 extern __thread struct kvm_cpu *current_kvm_cpu;
67 
set_dir(const char * fmt,va_list args)68 static int set_dir(const char *fmt, va_list args)
69 {
70 	char tmp[PATH_MAX];
71 
72 	vsnprintf(tmp, sizeof(tmp), fmt, args);
73 
74 	mkdir(tmp, 0777);
75 
76 	if (!realpath(tmp, kvm_dir))
77 		return -errno;
78 
79 	strcat(kvm_dir, "/");
80 
81 	return 0;
82 }
83 
kvm__set_dir(const char * fmt,...)84 void kvm__set_dir(const char *fmt, ...)
85 {
86 	va_list args;
87 
88 	va_start(args, fmt);
89 	set_dir(fmt, args);
90 	va_end(args);
91 }
92 
kvm__get_dir(void)93 const char *kvm__get_dir(void)
94 {
95 	return kvm_dir;
96 }
97 
kvm__supports_vm_extension(struct kvm * kvm,unsigned int extension)98 bool kvm__supports_vm_extension(struct kvm *kvm, unsigned int extension)
99 {
100 	static int supports_vm_ext_check = 0;
101 	int ret;
102 
103 	switch (supports_vm_ext_check) {
104 	case 0:
105 		ret = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION,
106 			    KVM_CAP_CHECK_EXTENSION_VM);
107 		if (ret <= 0) {
108 			supports_vm_ext_check = -1;
109 			return false;
110 		}
111 		supports_vm_ext_check = 1;
112 		/* fall through */
113 	case 1:
114 		break;
115 	case -1:
116 		return false;
117 	}
118 
119 	ret = ioctl(kvm->vm_fd, KVM_CHECK_EXTENSION, extension);
120 	if (ret < 0)
121 		return false;
122 
123 	return ret;
124 }
125 
kvm__supports_extension(struct kvm * kvm,unsigned int extension)126 bool kvm__supports_extension(struct kvm *kvm, unsigned int extension)
127 {
128 	int ret;
129 
130 	ret = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, extension);
131 	if (ret < 0)
132 		return false;
133 
134 	return ret;
135 }
136 
kvm__check_extensions(struct kvm * kvm)137 static int kvm__check_extensions(struct kvm *kvm)
138 {
139 	int i;
140 
141 	for (i = 0; ; i++) {
142 		if (!kvm_req_ext[i].name)
143 			break;
144 		if (!kvm__supports_extension(kvm, kvm_req_ext[i].code)) {
145 			pr_err("Unsupported KVM extension detected: %s",
146 				kvm_req_ext[i].name);
147 			return -i;
148 		}
149 	}
150 
151 	return 0;
152 }
153 
kvm__new(void)154 struct kvm *kvm__new(void)
155 {
156 	struct kvm *kvm = calloc(1, sizeof(*kvm));
157 	if (!kvm)
158 		return ERR_PTR(-ENOMEM);
159 
160 	mutex_init(&kvm->mem_banks_lock);
161 	kvm->sys_fd = -1;
162 	kvm->vm_fd = -1;
163 
164 #ifdef KVM_BRLOCK_DEBUG
165 	kvm->brlock_sem = (pthread_rwlock_t) PTHREAD_RWLOCK_INITIALIZER;
166 #endif
167 
168 	return kvm;
169 }
170 
kvm__exit(struct kvm * kvm)171 int kvm__exit(struct kvm *kvm)
172 {
173 	struct kvm_mem_bank *bank, *tmp;
174 
175 	kvm__arch_delete_ram(kvm);
176 
177 	list_for_each_entry_safe(bank, tmp, &kvm->mem_banks, list) {
178 		list_del(&bank->list);
179 		free(bank);
180 	}
181 
182 	free(kvm);
183 	return 0;
184 }
185 core_exit(kvm__exit);
186 
kvm__destroy_mem(struct kvm * kvm,u64 guest_phys,u64 size,void * userspace_addr)187 int kvm__destroy_mem(struct kvm *kvm, u64 guest_phys, u64 size,
188 		     void *userspace_addr)
189 {
190 	struct kvm_userspace_memory_region mem;
191 	struct kvm_mem_bank *bank;
192 	int ret;
193 
194 	mutex_lock(&kvm->mem_banks_lock);
195 	list_for_each_entry(bank, &kvm->mem_banks, list)
196 		if (bank->guest_phys_addr == guest_phys &&
197 		    bank->size == size && bank->host_addr == userspace_addr)
198 			break;
199 
200 	if (&bank->list == &kvm->mem_banks) {
201 		pr_err("Region [%llx-%llx] not found", guest_phys,
202 		       guest_phys + size - 1);
203 		ret = -EINVAL;
204 		goto out;
205 	}
206 
207 	if (bank->type == KVM_MEM_TYPE_RESERVED) {
208 		pr_err("Cannot delete reserved region [%llx-%llx]",
209 		       guest_phys, guest_phys + size - 1);
210 		ret = -EINVAL;
211 		goto out;
212 	}
213 
214 	mem = (struct kvm_userspace_memory_region) {
215 		.slot			= bank->slot,
216 		.guest_phys_addr	= guest_phys,
217 		.memory_size		= 0,
218 		.userspace_addr		= (unsigned long)userspace_addr,
219 	};
220 
221 	ret = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem);
222 	if (ret < 0) {
223 		ret = -errno;
224 		goto out;
225 	}
226 
227 	list_del(&bank->list);
228 	free(bank);
229 	kvm->mem_slots--;
230 	ret = 0;
231 
232 out:
233 	mutex_unlock(&kvm->mem_banks_lock);
234 	return ret;
235 }
236 
kvm__register_mem(struct kvm * kvm,u64 guest_phys,u64 size,void * userspace_addr,enum kvm_mem_type type)237 int kvm__register_mem(struct kvm *kvm, u64 guest_phys, u64 size,
238 		      void *userspace_addr, enum kvm_mem_type type)
239 {
240 	struct kvm_userspace_memory_region mem;
241 	struct kvm_mem_bank *merged = NULL;
242 	struct kvm_mem_bank *bank;
243 	struct list_head *prev_entry;
244 	u32 slot;
245 	u32 flags = 0;
246 	int ret;
247 
248 	mutex_lock(&kvm->mem_banks_lock);
249 	/* Check for overlap and find first empty slot. */
250 	slot = 0;
251 	prev_entry = &kvm->mem_banks;
252 	list_for_each_entry(bank, &kvm->mem_banks, list) {
253 		u64 bank_end = bank->guest_phys_addr + bank->size - 1;
254 		u64 end = guest_phys + size - 1;
255 		if (guest_phys > bank_end || end < bank->guest_phys_addr) {
256 			/*
257 			 * Keep the banks sorted ascending by slot, so it's
258 			 * easier for us to find a free slot.
259 			 */
260 			if (bank->slot == slot) {
261 				slot++;
262 				prev_entry = &bank->list;
263 			}
264 			continue;
265 		}
266 
267 		/* Merge overlapping reserved regions */
268 		if (bank->type == KVM_MEM_TYPE_RESERVED &&
269 		    type == KVM_MEM_TYPE_RESERVED) {
270 			bank->guest_phys_addr = min(bank->guest_phys_addr, guest_phys);
271 			bank->size = max(bank_end, end) - bank->guest_phys_addr + 1;
272 
273 			if (merged) {
274 				/*
275 				 * This is at least the second merge, remove
276 				 * previous result.
277 				 */
278 				list_del(&merged->list);
279 				free(merged);
280 			}
281 
282 			guest_phys = bank->guest_phys_addr;
283 			size = bank->size;
284 			merged = bank;
285 
286 			/* Keep checking that we don't overlap another region */
287 			continue;
288 		}
289 
290 		pr_err("%s region [%llx-%llx] would overlap %s region [%llx-%llx]",
291 		       kvm_mem_type_to_string(type), guest_phys, guest_phys + size - 1,
292 		       kvm_mem_type_to_string(bank->type), bank->guest_phys_addr,
293 		       bank->guest_phys_addr + bank->size - 1);
294 
295 		ret = -EINVAL;
296 		goto out;
297 	}
298 
299 	if (merged) {
300 		ret = 0;
301 		goto out;
302 	}
303 
304 	bank = malloc(sizeof(*bank));
305 	if (!bank) {
306 		ret = -ENOMEM;
307 		goto out;
308 	}
309 
310 	INIT_LIST_HEAD(&bank->list);
311 	bank->guest_phys_addr		= guest_phys;
312 	bank->host_addr			= userspace_addr;
313 	bank->size			= size;
314 	bank->type			= type;
315 	bank->slot			= slot;
316 
317 	if (type & KVM_MEM_TYPE_READONLY)
318 		flags |= KVM_MEM_READONLY;
319 
320 	if (type != KVM_MEM_TYPE_RESERVED) {
321 		mem = (struct kvm_userspace_memory_region) {
322 			.slot			= slot,
323 			.flags			= flags,
324 			.guest_phys_addr	= guest_phys,
325 			.memory_size		= size,
326 			.userspace_addr		= (unsigned long)userspace_addr,
327 		};
328 
329 		ret = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem);
330 		if (ret < 0) {
331 			ret = -errno;
332 			goto out;
333 		}
334 	}
335 
336 	list_add(&bank->list, prev_entry);
337 	kvm->mem_slots++;
338 	ret = 0;
339 
340 out:
341 	mutex_unlock(&kvm->mem_banks_lock);
342 	return ret;
343 }
344 
guest_flat_to_host(struct kvm * kvm,u64 offset)345 void *guest_flat_to_host(struct kvm *kvm, u64 offset)
346 {
347 	struct kvm_mem_bank *bank;
348 
349 	list_for_each_entry(bank, &kvm->mem_banks, list) {
350 		u64 bank_start = bank->guest_phys_addr;
351 		u64 bank_end = bank_start + bank->size;
352 
353 		if (offset >= bank_start && offset < bank_end)
354 			return bank->host_addr + (offset - bank_start);
355 	}
356 
357 	pr_warning("unable to translate guest address 0x%llx to host",
358 			(unsigned long long)offset);
359 	return NULL;
360 }
361 
host_to_guest_flat(struct kvm * kvm,void * ptr)362 u64 host_to_guest_flat(struct kvm *kvm, void *ptr)
363 {
364 	struct kvm_mem_bank *bank;
365 
366 	list_for_each_entry(bank, &kvm->mem_banks, list) {
367 		void *bank_start = bank->host_addr;
368 		void *bank_end = bank_start + bank->size;
369 
370 		if (ptr >= bank_start && ptr < bank_end)
371 			return bank->guest_phys_addr + (ptr - bank_start);
372 	}
373 
374 	pr_warning("unable to translate host address %p to guest", ptr);
375 	return 0;
376 }
377 
378 /*
379  * Iterate over each registered memory bank. Call @fun for each bank with @data
380  * as argument. @type is a bitmask that allows to filter banks according to
381  * their type.
382  *
383  * If one call to @fun returns a non-zero value, stop iterating and return the
384  * value. Otherwise, return zero.
385  */
kvm__for_each_mem_bank(struct kvm * kvm,enum kvm_mem_type type,int (* fun)(struct kvm * kvm,struct kvm_mem_bank * bank,void * data),void * data)386 int kvm__for_each_mem_bank(struct kvm *kvm, enum kvm_mem_type type,
387 			   int (*fun)(struct kvm *kvm, struct kvm_mem_bank *bank, void *data),
388 			   void *data)
389 {
390 	int ret;
391 	struct kvm_mem_bank *bank;
392 
393 	list_for_each_entry(bank, &kvm->mem_banks, list) {
394 		if (type != KVM_MEM_TYPE_ALL && !(bank->type & type))
395 			continue;
396 
397 		ret = fun(kvm, bank, data);
398 		if (ret)
399 			break;
400 	}
401 
402 	return ret;
403 }
404 
kvm__recommended_cpus(struct kvm * kvm)405 int kvm__recommended_cpus(struct kvm *kvm)
406 {
407 	int ret;
408 
409 	ret = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_NR_VCPUS);
410 	if (ret <= 0)
411 		/*
412 		 * api.txt states that if KVM_CAP_NR_VCPUS does not exist,
413 		 * assume 4.
414 		 */
415 		return 4;
416 
417 	return ret;
418 }
419 
kvm__max_cpus(struct kvm * kvm)420 int kvm__max_cpus(struct kvm *kvm)
421 {
422 	int ret;
423 
424 	ret = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_MAX_VCPUS);
425 	if (ret <= 0)
426 		ret = kvm__recommended_cpus(kvm);
427 
428 	return ret;
429 }
430 
kvm__get_vm_type(struct kvm * kvm)431 int __attribute__((weak)) kvm__get_vm_type(struct kvm *kvm)
432 {
433 	return KVM_VM_TYPE;
434 }
435 
kvm__init(struct kvm * kvm)436 int kvm__init(struct kvm *kvm)
437 {
438 	int ret;
439 
440 	if (!kvm__arch_cpu_supports_vm()) {
441 		pr_err("Your CPU does not support hardware virtualization");
442 		ret = -ENOSYS;
443 		goto err;
444 	}
445 
446 	kvm->sys_fd = open(kvm->cfg.dev, O_RDWR);
447 	if (kvm->sys_fd < 0) {
448 		if (errno == ENOENT)
449 			pr_err("'%s' not found. Please make sure your kernel has CONFIG_KVM "
450 			       "enabled and that the KVM modules are loaded.", kvm->cfg.dev);
451 		else if (errno == ENODEV)
452 			pr_err("'%s' KVM driver not available.\n  # (If the KVM "
453 			       "module is loaded then 'dmesg' may offer further clues "
454 			       "about the failure.)", kvm->cfg.dev);
455 		else
456 			pr_err("Could not open %s: ", kvm->cfg.dev);
457 
458 		ret = -errno;
459 		goto err_free;
460 	}
461 
462 	ret = ioctl(kvm->sys_fd, KVM_GET_API_VERSION, 0);
463 	if (ret != KVM_API_VERSION) {
464 		pr_err("KVM_API_VERSION ioctl");
465 		ret = -errno;
466 		goto err_sys_fd;
467 	}
468 
469 	kvm->vm_fd = ioctl(kvm->sys_fd, KVM_CREATE_VM, kvm__get_vm_type(kvm));
470 	if (kvm->vm_fd < 0) {
471 		pr_err("KVM_CREATE_VM ioctl");
472 		ret = kvm->vm_fd;
473 		goto err_sys_fd;
474 	}
475 
476 	if (kvm__check_extensions(kvm)) {
477 		pr_err("A required KVM extension is not supported by OS");
478 		ret = -ENOSYS;
479 		goto err_vm_fd;
480 	}
481 
482 	kvm__arch_init(kvm);
483 
484 	INIT_LIST_HEAD(&kvm->mem_banks);
485 	kvm__init_ram(kvm);
486 
487 	if (!kvm->cfg.firmware_filename) {
488 		if (!kvm__load_kernel(kvm, kvm->cfg.kernel_filename,
489 				kvm->cfg.initrd_filename, kvm->cfg.real_cmdline))
490 			die("unable to load kernel %s", kvm->cfg.kernel_filename);
491 	}
492 
493 	if (kvm->cfg.firmware_filename) {
494 		if (!kvm__load_firmware(kvm, kvm->cfg.firmware_filename))
495 			die("unable to load firmware image %s: %s", kvm->cfg.firmware_filename, strerror(errno));
496 	} else {
497 		ret = kvm__arch_setup_firmware(kvm);
498 		if (ret < 0)
499 			die("kvm__arch_setup_firmware() failed with error %d\n", ret);
500 	}
501 
502 	return 0;
503 
504 err_vm_fd:
505 	close(kvm->vm_fd);
506 err_sys_fd:
507 	close(kvm->sys_fd);
508 err_free:
509 	free(kvm);
510 err:
511 	return ret;
512 }
513 core_init(kvm__init);
514 
kvm__load_kernel(struct kvm * kvm,const char * kernel_filename,const char * initrd_filename,const char * kernel_cmdline)515 bool kvm__load_kernel(struct kvm *kvm, const char *kernel_filename,
516 		const char *initrd_filename, const char *kernel_cmdline)
517 {
518 	bool ret;
519 	int fd_kernel = -1, fd_initrd = -1;
520 
521 	fd_kernel = open(kernel_filename, O_RDONLY);
522 	if (fd_kernel < 0)
523 		die("Unable to open kernel %s", kernel_filename);
524 
525 	if (initrd_filename) {
526 		fd_initrd = open(initrd_filename, O_RDONLY);
527 		if (fd_initrd < 0)
528 			die("Unable to open initrd %s", initrd_filename);
529 	}
530 
531 	ret = kvm__arch_load_kernel_image(kvm, fd_kernel, fd_initrd,
532 					  kernel_cmdline);
533 
534 	if (initrd_filename)
535 		close(fd_initrd);
536 	close(fd_kernel);
537 
538 	if (!ret)
539 		die("%s is not a valid kernel image", kernel_filename);
540 	return ret;
541 }
542 
kvm__dump_mem(struct kvm * kvm,unsigned long addr,unsigned long size,int debug_fd)543 void kvm__dump_mem(struct kvm *kvm, unsigned long addr, unsigned long size, int debug_fd)
544 {
545 	unsigned char *p;
546 	unsigned long n;
547 
548 	size &= ~7; /* mod 8 */
549 	if (!size)
550 		return;
551 
552 	p = guest_flat_to_host(kvm, addr);
553 
554 	for (n = 0; n < size; n += 8) {
555 		if (!host_ptr_in_ram(kvm, p + n)) {
556 			dprintf(debug_fd, " 0x%08lx: <unknown>\n", addr + n);
557 			continue;
558 		}
559 		dprintf(debug_fd, " 0x%08lx: %02x %02x %02x %02x  %02x %02x %02x %02x\n",
560 			addr + n, p[n + 0], p[n + 1], p[n + 2], p[n + 3],
561 				  p[n + 4], p[n + 5], p[n + 6], p[n + 7]);
562 	}
563 }
564 
kvm__reboot(struct kvm * kvm)565 void kvm__reboot(struct kvm *kvm)
566 {
567 	/* Check if the guest is running */
568 	if (!kvm->cpus[0] || kvm->cpus[0]->thread == 0)
569 		return;
570 
571 	pthread_kill(kvm->cpus[0]->thread, SIGKVMEXIT);
572 }
573 
kvm__continue(struct kvm * kvm)574 void kvm__continue(struct kvm *kvm)
575 {
576 	mutex_unlock(&pause_lock);
577 }
578 
kvm__pause(struct kvm * kvm)579 void kvm__pause(struct kvm *kvm)
580 {
581 	int i, paused_vcpus = 0;
582 
583 	mutex_lock(&pause_lock);
584 
585 	/* Check if the guest is running */
586 	if (!kvm->cpus || !kvm->cpus[0] || kvm->cpus[0]->thread == 0)
587 		return;
588 
589 	pause_event = eventfd(0, 0);
590 	if (pause_event < 0)
591 		die("Failed creating pause notification event");
592 	for (i = 0; i < kvm->nrcpus; i++) {
593 		if (kvm->cpus[i]->is_running && kvm->cpus[i]->paused == 0)
594 			pthread_kill(kvm->cpus[i]->thread, SIGKVMPAUSE);
595 		else
596 			paused_vcpus++;
597 	}
598 
599 	while (paused_vcpus < kvm->nrcpus) {
600 		u64 cur_read;
601 
602 		if (read(pause_event, &cur_read, sizeof(cur_read)) < 0)
603 			die("Failed reading pause event");
604 		paused_vcpus += cur_read;
605 	}
606 	close(pause_event);
607 }
608 
kvm__notify_paused(void)609 void kvm__notify_paused(void)
610 {
611 	u64 p = 1;
612 
613 	if (write(pause_event, &p, sizeof(p)) < 0)
614 		die("Failed notifying of paused VCPU.");
615 
616 	mutex_lock(&pause_lock);
617 	current_kvm_cpu->paused = 0;
618 	mutex_unlock(&pause_lock);
619 }
620