1 #include "kvm/kvm.h"
2 #include "kvm/read-write.h"
3 #include "kvm/util.h"
4 #include "kvm/strbuf.h"
5 #include "kvm/mutex.h"
6 #include "kvm/kvm-cpu.h"
7 #include "kvm/kvm-ipc.h"
8
9 #include <linux/kernel.h>
10 #include <linux/kvm.h>
11 #include <linux/list.h>
12 #include <linux/err.h>
13
14 #include <sys/un.h>
15 #include <sys/stat.h>
16 #include <sys/types.h>
17 #include <sys/socket.h>
18 #include <sys/ioctl.h>
19 #include <sys/mman.h>
20 #include <stdbool.h>
21 #include <limits.h>
22 #include <signal.h>
23 #include <stdarg.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <unistd.h>
27 #include <stdio.h>
28 #include <fcntl.h>
29 #include <time.h>
30 #include <sys/eventfd.h>
31 #include <asm/unistd.h>
32 #include <dirent.h>
33
34 #define DEFINE_KVM_EXIT_REASON(reason) [reason] = #reason
35
36 const char *kvm_exit_reasons[] = {
37 DEFINE_KVM_EXIT_REASON(KVM_EXIT_UNKNOWN),
38 DEFINE_KVM_EXIT_REASON(KVM_EXIT_EXCEPTION),
39 DEFINE_KVM_EXIT_REASON(KVM_EXIT_IO),
40 DEFINE_KVM_EXIT_REASON(KVM_EXIT_HYPERCALL),
41 DEFINE_KVM_EXIT_REASON(KVM_EXIT_DEBUG),
42 DEFINE_KVM_EXIT_REASON(KVM_EXIT_HLT),
43 DEFINE_KVM_EXIT_REASON(KVM_EXIT_MMIO),
44 DEFINE_KVM_EXIT_REASON(KVM_EXIT_IRQ_WINDOW_OPEN),
45 DEFINE_KVM_EXIT_REASON(KVM_EXIT_SHUTDOWN),
46 DEFINE_KVM_EXIT_REASON(KVM_EXIT_FAIL_ENTRY),
47 DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTR),
48 DEFINE_KVM_EXIT_REASON(KVM_EXIT_SET_TPR),
49 DEFINE_KVM_EXIT_REASON(KVM_EXIT_TPR_ACCESS),
50 DEFINE_KVM_EXIT_REASON(KVM_EXIT_S390_SIEIC),
51 DEFINE_KVM_EXIT_REASON(KVM_EXIT_S390_RESET),
52 DEFINE_KVM_EXIT_REASON(KVM_EXIT_DCR),
53 DEFINE_KVM_EXIT_REASON(KVM_EXIT_NMI),
54 DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTERNAL_ERROR),
55 #ifdef CONFIG_PPC64
56 DEFINE_KVM_EXIT_REASON(KVM_EXIT_PAPR_HCALL),
57 #endif
58 };
59
60 static int pause_event;
61 static DEFINE_MUTEX(pause_lock);
62 extern struct kvm_ext kvm_req_ext[];
63
64 static char kvm_dir[PATH_MAX];
65
66 extern __thread struct kvm_cpu *current_kvm_cpu;
67
set_dir(const char * fmt,va_list args)68 static int set_dir(const char *fmt, va_list args)
69 {
70 char tmp[PATH_MAX];
71
72 vsnprintf(tmp, sizeof(tmp), fmt, args);
73
74 mkdir(tmp, 0777);
75
76 if (!realpath(tmp, kvm_dir))
77 return -errno;
78
79 strcat(kvm_dir, "/");
80
81 return 0;
82 }
83
kvm__set_dir(const char * fmt,...)84 void kvm__set_dir(const char *fmt, ...)
85 {
86 va_list args;
87
88 va_start(args, fmt);
89 set_dir(fmt, args);
90 va_end(args);
91 }
92
kvm__get_dir(void)93 const char *kvm__get_dir(void)
94 {
95 return kvm_dir;
96 }
97
kvm__supports_vm_extension(struct kvm * kvm,unsigned int extension)98 bool kvm__supports_vm_extension(struct kvm *kvm, unsigned int extension)
99 {
100 static int supports_vm_ext_check = 0;
101 int ret;
102
103 switch (supports_vm_ext_check) {
104 case 0:
105 ret = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION,
106 KVM_CAP_CHECK_EXTENSION_VM);
107 if (ret <= 0) {
108 supports_vm_ext_check = -1;
109 return false;
110 }
111 supports_vm_ext_check = 1;
112 /* fall through */
113 case 1:
114 break;
115 case -1:
116 return false;
117 }
118
119 ret = ioctl(kvm->vm_fd, KVM_CHECK_EXTENSION, extension);
120 if (ret < 0)
121 return false;
122
123 return ret;
124 }
125
kvm__supports_extension(struct kvm * kvm,unsigned int extension)126 bool kvm__supports_extension(struct kvm *kvm, unsigned int extension)
127 {
128 int ret;
129
130 ret = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, extension);
131 if (ret < 0)
132 return false;
133
134 return ret;
135 }
136
kvm__check_extensions(struct kvm * kvm)137 static int kvm__check_extensions(struct kvm *kvm)
138 {
139 int i;
140
141 for (i = 0; ; i++) {
142 if (!kvm_req_ext[i].name)
143 break;
144 if (!kvm__supports_extension(kvm, kvm_req_ext[i].code)) {
145 pr_err("Unsupported KVM extension detected: %s",
146 kvm_req_ext[i].name);
147 return -i;
148 }
149 }
150
151 return 0;
152 }
153
kvm__new(void)154 struct kvm *kvm__new(void)
155 {
156 struct kvm *kvm = calloc(1, sizeof(*kvm));
157 if (!kvm)
158 return ERR_PTR(-ENOMEM);
159
160 mutex_init(&kvm->mem_banks_lock);
161 kvm->sys_fd = -1;
162 kvm->vm_fd = -1;
163
164 #ifdef KVM_BRLOCK_DEBUG
165 kvm->brlock_sem = (pthread_rwlock_t) PTHREAD_RWLOCK_INITIALIZER;
166 #endif
167
168 return kvm;
169 }
170
kvm__exit(struct kvm * kvm)171 int kvm__exit(struct kvm *kvm)
172 {
173 struct kvm_mem_bank *bank, *tmp;
174
175 kvm__arch_delete_ram(kvm);
176
177 list_for_each_entry_safe(bank, tmp, &kvm->mem_banks, list) {
178 list_del(&bank->list);
179 free(bank);
180 }
181
182 free(kvm);
183 return 0;
184 }
185 core_exit(kvm__exit);
186
kvm__destroy_mem(struct kvm * kvm,u64 guest_phys,u64 size,void * userspace_addr)187 int kvm__destroy_mem(struct kvm *kvm, u64 guest_phys, u64 size,
188 void *userspace_addr)
189 {
190 struct kvm_userspace_memory_region mem;
191 struct kvm_mem_bank *bank;
192 int ret;
193
194 mutex_lock(&kvm->mem_banks_lock);
195 list_for_each_entry(bank, &kvm->mem_banks, list)
196 if (bank->guest_phys_addr == guest_phys &&
197 bank->size == size && bank->host_addr == userspace_addr)
198 break;
199
200 if (&bank->list == &kvm->mem_banks) {
201 pr_err("Region [%llx-%llx] not found", guest_phys,
202 guest_phys + size - 1);
203 ret = -EINVAL;
204 goto out;
205 }
206
207 if (bank->type == KVM_MEM_TYPE_RESERVED) {
208 pr_err("Cannot delete reserved region [%llx-%llx]",
209 guest_phys, guest_phys + size - 1);
210 ret = -EINVAL;
211 goto out;
212 }
213
214 mem = (struct kvm_userspace_memory_region) {
215 .slot = bank->slot,
216 .guest_phys_addr = guest_phys,
217 .memory_size = 0,
218 .userspace_addr = (unsigned long)userspace_addr,
219 };
220
221 ret = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem);
222 if (ret < 0) {
223 ret = -errno;
224 goto out;
225 }
226
227 list_del(&bank->list);
228 free(bank);
229 kvm->mem_slots--;
230 ret = 0;
231
232 out:
233 mutex_unlock(&kvm->mem_banks_lock);
234 return ret;
235 }
236
kvm__register_mem(struct kvm * kvm,u64 guest_phys,u64 size,void * userspace_addr,enum kvm_mem_type type)237 int kvm__register_mem(struct kvm *kvm, u64 guest_phys, u64 size,
238 void *userspace_addr, enum kvm_mem_type type)
239 {
240 struct kvm_userspace_memory_region mem;
241 struct kvm_mem_bank *merged = NULL;
242 struct kvm_mem_bank *bank;
243 struct list_head *prev_entry;
244 u32 slot;
245 u32 flags = 0;
246 int ret;
247
248 mutex_lock(&kvm->mem_banks_lock);
249 /* Check for overlap and find first empty slot. */
250 slot = 0;
251 prev_entry = &kvm->mem_banks;
252 list_for_each_entry(bank, &kvm->mem_banks, list) {
253 u64 bank_end = bank->guest_phys_addr + bank->size - 1;
254 u64 end = guest_phys + size - 1;
255 if (guest_phys > bank_end || end < bank->guest_phys_addr) {
256 /*
257 * Keep the banks sorted ascending by slot, so it's
258 * easier for us to find a free slot.
259 */
260 if (bank->slot == slot) {
261 slot++;
262 prev_entry = &bank->list;
263 }
264 continue;
265 }
266
267 /* Merge overlapping reserved regions */
268 if (bank->type == KVM_MEM_TYPE_RESERVED &&
269 type == KVM_MEM_TYPE_RESERVED) {
270 bank->guest_phys_addr = min(bank->guest_phys_addr, guest_phys);
271 bank->size = max(bank_end, end) - bank->guest_phys_addr + 1;
272
273 if (merged) {
274 /*
275 * This is at least the second merge, remove
276 * previous result.
277 */
278 list_del(&merged->list);
279 free(merged);
280 }
281
282 guest_phys = bank->guest_phys_addr;
283 size = bank->size;
284 merged = bank;
285
286 /* Keep checking that we don't overlap another region */
287 continue;
288 }
289
290 pr_err("%s region [%llx-%llx] would overlap %s region [%llx-%llx]",
291 kvm_mem_type_to_string(type), guest_phys, guest_phys + size - 1,
292 kvm_mem_type_to_string(bank->type), bank->guest_phys_addr,
293 bank->guest_phys_addr + bank->size - 1);
294
295 ret = -EINVAL;
296 goto out;
297 }
298
299 if (merged) {
300 ret = 0;
301 goto out;
302 }
303
304 bank = malloc(sizeof(*bank));
305 if (!bank) {
306 ret = -ENOMEM;
307 goto out;
308 }
309
310 INIT_LIST_HEAD(&bank->list);
311 bank->guest_phys_addr = guest_phys;
312 bank->host_addr = userspace_addr;
313 bank->size = size;
314 bank->type = type;
315 bank->slot = slot;
316
317 if (type & KVM_MEM_TYPE_READONLY)
318 flags |= KVM_MEM_READONLY;
319
320 if (type != KVM_MEM_TYPE_RESERVED) {
321 mem = (struct kvm_userspace_memory_region) {
322 .slot = slot,
323 .flags = flags,
324 .guest_phys_addr = guest_phys,
325 .memory_size = size,
326 .userspace_addr = (unsigned long)userspace_addr,
327 };
328
329 ret = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem);
330 if (ret < 0) {
331 ret = -errno;
332 goto out;
333 }
334 }
335
336 list_add(&bank->list, prev_entry);
337 kvm->mem_slots++;
338 ret = 0;
339
340 out:
341 mutex_unlock(&kvm->mem_banks_lock);
342 return ret;
343 }
344
guest_flat_to_host(struct kvm * kvm,u64 offset)345 void *guest_flat_to_host(struct kvm *kvm, u64 offset)
346 {
347 struct kvm_mem_bank *bank;
348
349 list_for_each_entry(bank, &kvm->mem_banks, list) {
350 u64 bank_start = bank->guest_phys_addr;
351 u64 bank_end = bank_start + bank->size;
352
353 if (offset >= bank_start && offset < bank_end)
354 return bank->host_addr + (offset - bank_start);
355 }
356
357 pr_warning("unable to translate guest address 0x%llx to host",
358 (unsigned long long)offset);
359 return NULL;
360 }
361
host_to_guest_flat(struct kvm * kvm,void * ptr)362 u64 host_to_guest_flat(struct kvm *kvm, void *ptr)
363 {
364 struct kvm_mem_bank *bank;
365
366 list_for_each_entry(bank, &kvm->mem_banks, list) {
367 void *bank_start = bank->host_addr;
368 void *bank_end = bank_start + bank->size;
369
370 if (ptr >= bank_start && ptr < bank_end)
371 return bank->guest_phys_addr + (ptr - bank_start);
372 }
373
374 pr_warning("unable to translate host address %p to guest", ptr);
375 return 0;
376 }
377
378 /*
379 * Iterate over each registered memory bank. Call @fun for each bank with @data
380 * as argument. @type is a bitmask that allows to filter banks according to
381 * their type.
382 *
383 * If one call to @fun returns a non-zero value, stop iterating and return the
384 * value. Otherwise, return zero.
385 */
kvm__for_each_mem_bank(struct kvm * kvm,enum kvm_mem_type type,int (* fun)(struct kvm * kvm,struct kvm_mem_bank * bank,void * data),void * data)386 int kvm__for_each_mem_bank(struct kvm *kvm, enum kvm_mem_type type,
387 int (*fun)(struct kvm *kvm, struct kvm_mem_bank *bank, void *data),
388 void *data)
389 {
390 int ret;
391 struct kvm_mem_bank *bank;
392
393 list_for_each_entry(bank, &kvm->mem_banks, list) {
394 if (type != KVM_MEM_TYPE_ALL && !(bank->type & type))
395 continue;
396
397 ret = fun(kvm, bank, data);
398 if (ret)
399 break;
400 }
401
402 return ret;
403 }
404
kvm__recommended_cpus(struct kvm * kvm)405 int kvm__recommended_cpus(struct kvm *kvm)
406 {
407 int ret;
408
409 ret = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_NR_VCPUS);
410 if (ret <= 0)
411 /*
412 * api.txt states that if KVM_CAP_NR_VCPUS does not exist,
413 * assume 4.
414 */
415 return 4;
416
417 return ret;
418 }
419
kvm__max_cpus(struct kvm * kvm)420 int kvm__max_cpus(struct kvm *kvm)
421 {
422 int ret;
423
424 ret = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_MAX_VCPUS);
425 if (ret <= 0)
426 ret = kvm__recommended_cpus(kvm);
427
428 return ret;
429 }
430
kvm__get_vm_type(struct kvm * kvm)431 int __attribute__((weak)) kvm__get_vm_type(struct kvm *kvm)
432 {
433 return KVM_VM_TYPE;
434 }
435
kvm__init(struct kvm * kvm)436 int kvm__init(struct kvm *kvm)
437 {
438 int ret;
439
440 if (!kvm__arch_cpu_supports_vm()) {
441 pr_err("Your CPU does not support hardware virtualization");
442 ret = -ENOSYS;
443 goto err;
444 }
445
446 kvm->sys_fd = open(kvm->cfg.dev, O_RDWR);
447 if (kvm->sys_fd < 0) {
448 if (errno == ENOENT)
449 pr_err("'%s' not found. Please make sure your kernel has CONFIG_KVM "
450 "enabled and that the KVM modules are loaded.", kvm->cfg.dev);
451 else if (errno == ENODEV)
452 pr_err("'%s' KVM driver not available.\n # (If the KVM "
453 "module is loaded then 'dmesg' may offer further clues "
454 "about the failure.)", kvm->cfg.dev);
455 else
456 pr_err("Could not open %s: ", kvm->cfg.dev);
457
458 ret = -errno;
459 goto err_free;
460 }
461
462 ret = ioctl(kvm->sys_fd, KVM_GET_API_VERSION, 0);
463 if (ret != KVM_API_VERSION) {
464 pr_err("KVM_API_VERSION ioctl");
465 ret = -errno;
466 goto err_sys_fd;
467 }
468
469 kvm->vm_fd = ioctl(kvm->sys_fd, KVM_CREATE_VM, kvm__get_vm_type(kvm));
470 if (kvm->vm_fd < 0) {
471 pr_err("KVM_CREATE_VM ioctl");
472 ret = kvm->vm_fd;
473 goto err_sys_fd;
474 }
475
476 if (kvm__check_extensions(kvm)) {
477 pr_err("A required KVM extension is not supported by OS");
478 ret = -ENOSYS;
479 goto err_vm_fd;
480 }
481
482 kvm__arch_init(kvm);
483
484 INIT_LIST_HEAD(&kvm->mem_banks);
485 kvm__init_ram(kvm);
486
487 if (!kvm->cfg.firmware_filename) {
488 if (!kvm__load_kernel(kvm, kvm->cfg.kernel_filename,
489 kvm->cfg.initrd_filename, kvm->cfg.real_cmdline))
490 die("unable to load kernel %s", kvm->cfg.kernel_filename);
491 }
492
493 if (kvm->cfg.firmware_filename) {
494 if (!kvm__load_firmware(kvm, kvm->cfg.firmware_filename))
495 die("unable to load firmware image %s: %s", kvm->cfg.firmware_filename, strerror(errno));
496 } else {
497 ret = kvm__arch_setup_firmware(kvm);
498 if (ret < 0)
499 die("kvm__arch_setup_firmware() failed with error %d\n", ret);
500 }
501
502 return 0;
503
504 err_vm_fd:
505 close(kvm->vm_fd);
506 err_sys_fd:
507 close(kvm->sys_fd);
508 err_free:
509 free(kvm);
510 err:
511 return ret;
512 }
513 core_init(kvm__init);
514
kvm__load_kernel(struct kvm * kvm,const char * kernel_filename,const char * initrd_filename,const char * kernel_cmdline)515 bool kvm__load_kernel(struct kvm *kvm, const char *kernel_filename,
516 const char *initrd_filename, const char *kernel_cmdline)
517 {
518 bool ret;
519 int fd_kernel = -1, fd_initrd = -1;
520
521 fd_kernel = open(kernel_filename, O_RDONLY);
522 if (fd_kernel < 0)
523 die("Unable to open kernel %s", kernel_filename);
524
525 if (initrd_filename) {
526 fd_initrd = open(initrd_filename, O_RDONLY);
527 if (fd_initrd < 0)
528 die("Unable to open initrd %s", initrd_filename);
529 }
530
531 ret = kvm__arch_load_kernel_image(kvm, fd_kernel, fd_initrd,
532 kernel_cmdline);
533
534 if (initrd_filename)
535 close(fd_initrd);
536 close(fd_kernel);
537
538 if (!ret)
539 die("%s is not a valid kernel image", kernel_filename);
540 return ret;
541 }
542
kvm__dump_mem(struct kvm * kvm,unsigned long addr,unsigned long size,int debug_fd)543 void kvm__dump_mem(struct kvm *kvm, unsigned long addr, unsigned long size, int debug_fd)
544 {
545 unsigned char *p;
546 unsigned long n;
547
548 size &= ~7; /* mod 8 */
549 if (!size)
550 return;
551
552 p = guest_flat_to_host(kvm, addr);
553
554 for (n = 0; n < size; n += 8) {
555 if (!host_ptr_in_ram(kvm, p + n)) {
556 dprintf(debug_fd, " 0x%08lx: <unknown>\n", addr + n);
557 continue;
558 }
559 dprintf(debug_fd, " 0x%08lx: %02x %02x %02x %02x %02x %02x %02x %02x\n",
560 addr + n, p[n + 0], p[n + 1], p[n + 2], p[n + 3],
561 p[n + 4], p[n + 5], p[n + 6], p[n + 7]);
562 }
563 }
564
kvm__reboot(struct kvm * kvm)565 void kvm__reboot(struct kvm *kvm)
566 {
567 /* Check if the guest is running */
568 if (!kvm->cpus[0] || kvm->cpus[0]->thread == 0)
569 return;
570
571 pthread_kill(kvm->cpus[0]->thread, SIGKVMEXIT);
572 }
573
kvm__continue(struct kvm * kvm)574 void kvm__continue(struct kvm *kvm)
575 {
576 mutex_unlock(&pause_lock);
577 }
578
kvm__pause(struct kvm * kvm)579 void kvm__pause(struct kvm *kvm)
580 {
581 int i, paused_vcpus = 0;
582
583 mutex_lock(&pause_lock);
584
585 /* Check if the guest is running */
586 if (!kvm->cpus || !kvm->cpus[0] || kvm->cpus[0]->thread == 0)
587 return;
588
589 pause_event = eventfd(0, 0);
590 if (pause_event < 0)
591 die("Failed creating pause notification event");
592 for (i = 0; i < kvm->nrcpus; i++) {
593 if (kvm->cpus[i]->is_running && kvm->cpus[i]->paused == 0)
594 pthread_kill(kvm->cpus[i]->thread, SIGKVMPAUSE);
595 else
596 paused_vcpus++;
597 }
598
599 while (paused_vcpus < kvm->nrcpus) {
600 u64 cur_read;
601
602 if (read(pause_event, &cur_read, sizeof(cur_read)) < 0)
603 die("Failed reading pause event");
604 paused_vcpus += cur_read;
605 }
606 close(pause_event);
607 }
608
kvm__notify_paused(void)609 void kvm__notify_paused(void)
610 {
611 u64 p = 1;
612
613 if (write(pause_event, &p, sizeof(p)) < 0)
614 die("Failed notifying of paused VCPU.");
615
616 mutex_lock(&pause_lock);
617 current_kvm_cpu->paused = 0;
618 mutex_unlock(&pause_lock);
619 }
620