xref: /kvmtool/kvm-cpu.c (revision fdd26ecb4bc52ac7e06455d5ea2cf5ebf7d500bc)
15c3d55faSPekka Enberg #include "kvm/kvm-cpu.h"
25c3d55faSPekka Enberg 
3b0b42ba0SPekka Enberg #include "kvm/symbol.h"
45c3d55faSPekka Enberg #include "kvm/util.h"
55c3d55faSPekka Enberg #include "kvm/kvm.h"
619d98215SMarc Zyngier #include "kvm/virtio.h"
7e300a5eeSMichael Ellerman #include "kvm/mutex.h"
8e300a5eeSMichael Ellerman #include "kvm/barrier.h"
95c3d55faSPekka Enberg 
105c3d55faSPekka Enberg #include <sys/ioctl.h>
115c3d55faSPekka Enberg #include <sys/mman.h>
12e300a5eeSMichael Ellerman #include <sys/eventfd.h>
135ee154d1SPekka Enberg #include <signal.h>
145c3d55faSPekka Enberg #include <stdlib.h>
15b0b42ba0SPekka Enberg #include <string.h>
165c3d55faSPekka Enberg #include <errno.h>
175c3d55faSPekka Enberg #include <stdio.h>
185c3d55faSPekka Enberg 
19656be1b8SSasha Levin extern __thread struct kvm_cpu *current_kvm_cpu;
20656be1b8SSasha Levin 
2119d98215SMarc Zyngier int __attribute__((weak)) kvm_cpu__get_endianness(struct kvm_cpu *vcpu)
2219d98215SMarc Zyngier {
2319d98215SMarc Zyngier 	return VIRTIO_ENDIAN_HOST;
2419d98215SMarc Zyngier }
2519d98215SMarc Zyngier 
2643835ac9SSasha Levin void kvm_cpu__enable_singlestep(struct kvm_cpu *vcpu)
275c3d55faSPekka Enberg {
285c3d55faSPekka Enberg 	struct kvm_guest_debug debug = {
295c3d55faSPekka Enberg 		.control	= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP,
305c3d55faSPekka Enberg 	};
315c3d55faSPekka Enberg 
3243835ac9SSasha Levin 	if (ioctl(vcpu->vcpu_fd, KVM_SET_GUEST_DEBUG, &debug) < 0)
334542f276SCyrill Gorcunov 		pr_warning("KVM_SET_GUEST_DEBUG failed");
345c3d55faSPekka Enberg }
355c3d55faSPekka Enberg 
3643835ac9SSasha Levin void kvm_cpu__run(struct kvm_cpu *vcpu)
375c3d55faSPekka Enberg {
385c3d55faSPekka Enberg 	int err;
395c3d55faSPekka Enberg 
40f9fdf5cdSAsias He 	if (!vcpu->is_running)
41f9fdf5cdSAsias He 		return;
42f9fdf5cdSAsias He 
4343835ac9SSasha Levin 	err = ioctl(vcpu->vcpu_fd, KVM_RUN, 0);
4476b75d32SMatt Evans 	if (err < 0 && (errno != EINTR && errno != EAGAIN))
455c3d55faSPekka Enberg 		die_perror("KVM_RUN failed");
465c3d55faSPekka Enberg }
4765bab644SPekka Enberg 
484298ddadSSasha Levin static void kvm_cpu_signal_handler(int signum)
4949e5227dSSasha Levin {
504298ddadSSasha Levin 	if (signum == SIGKVMEXIT) {
512aa76b26SWill Deacon 		if (current_kvm_cpu && current_kvm_cpu->is_running)
52656be1b8SSasha Levin 			current_kvm_cpu->is_running = false;
534298ddadSSasha Levin 	} else if (signum == SIGKVMPAUSE) {
54*fdd26ecbSJulien Thierry 		if (current_kvm_cpu->paused)
55*fdd26ecbSJulien Thierry 			die("Pause signaled for already paused CPU\n");
56*fdd26ecbSJulien Thierry 
57*fdd26ecbSJulien Thierry 		/* pause_lock is held by kvm__pause() */
584298ddadSSasha Levin 		current_kvm_cpu->paused = 1;
59*fdd26ecbSJulien Thierry 
60*fdd26ecbSJulien Thierry 		/*
61*fdd26ecbSJulien Thierry 		 * This is a blocking function and uses locks. It is safe
62*fdd26ecbSJulien Thierry 		 * to call it for this signal as a second pause event should
63*fdd26ecbSJulien Thierry 		 * not be send to this thread until it acquires and releases
64*fdd26ecbSJulien Thierry 		 * the pause_lock.
65*fdd26ecbSJulien Thierry 		 */
66*fdd26ecbSJulien Thierry 		kvm__notify_paused();
674298ddadSSasha Levin 	}
68e300a5eeSMichael Ellerman 
69e300a5eeSMichael Ellerman 	/* For SIGKVMTASK cpu->task is already set */
7049e5227dSSasha Levin }
7149e5227dSSasha Levin 
7273f7e5b3SSasha Levin static void kvm_cpu__handle_coalesced_mmio(struct kvm_cpu *cpu)
7373f7e5b3SSasha Levin {
7473f7e5b3SSasha Levin 	if (cpu->ring) {
7573f7e5b3SSasha Levin 		while (cpu->ring->first != cpu->ring->last) {
7673f7e5b3SSasha Levin 			struct kvm_coalesced_mmio *m;
7773f7e5b3SSasha Levin 			m = &cpu->ring->coalesced_mmio[cpu->ring->first];
789b735910SMarc Zyngier 			kvm_cpu__emulate_mmio(cpu,
7973f7e5b3SSasha Levin 					      m->phys_addr,
8073f7e5b3SSasha Levin 					      m->data,
8173f7e5b3SSasha Levin 					      m->len,
8273f7e5b3SSasha Levin 					      1);
8373f7e5b3SSasha Levin 			cpu->ring->first = (cpu->ring->first + 1) % KVM_COALESCED_MMIO_MAX;
8473f7e5b3SSasha Levin 		}
8573f7e5b3SSasha Levin 	}
8673f7e5b3SSasha Levin }
8773f7e5b3SSasha Levin 
88e300a5eeSMichael Ellerman static DEFINE_MUTEX(task_lock);
89e300a5eeSMichael Ellerman static int task_eventfd;
90e300a5eeSMichael Ellerman 
91e300a5eeSMichael Ellerman static void kvm_cpu__run_task(struct kvm_cpu *cpu)
92e300a5eeSMichael Ellerman {
93e300a5eeSMichael Ellerman 	u64 inc = 1;
94e300a5eeSMichael Ellerman 
95e300a5eeSMichael Ellerman 	pr_debug("Running task %p on cpu %lu", cpu->task, cpu->cpu_id);
96e300a5eeSMichael Ellerman 
97e300a5eeSMichael Ellerman 	/* Make sure we see the store to cpu->task */
98e300a5eeSMichael Ellerman 	rmb();
99e300a5eeSMichael Ellerman 	cpu->task->func(cpu, cpu->task->data);
100e300a5eeSMichael Ellerman 
101e300a5eeSMichael Ellerman 	/* Clear task before we signal completion */
102e300a5eeSMichael Ellerman 	cpu->task = NULL;
103e300a5eeSMichael Ellerman 	wmb();
104e300a5eeSMichael Ellerman 
105e300a5eeSMichael Ellerman 	if (write(task_eventfd, &inc, sizeof(inc)) < 0)
106e300a5eeSMichael Ellerman 		die("Failed notifying of completed task.");
107e300a5eeSMichael Ellerman }
108e300a5eeSMichael Ellerman 
109e300a5eeSMichael Ellerman void kvm_cpu__run_on_all_cpus(struct kvm *kvm, struct kvm_cpu_task *task)
110e300a5eeSMichael Ellerman {
111e300a5eeSMichael Ellerman 	int i, done = 0;
112e300a5eeSMichael Ellerman 
113e300a5eeSMichael Ellerman 	pr_debug("Running task %p on all cpus", task);
114e300a5eeSMichael Ellerman 
115e300a5eeSMichael Ellerman 	mutex_lock(&task_lock);
116e300a5eeSMichael Ellerman 
117e300a5eeSMichael Ellerman 	for (i = 0; i < kvm->nrcpus; i++) {
118e300a5eeSMichael Ellerman 		if (kvm->cpus[i]->task) {
119e300a5eeSMichael Ellerman 			/* Should never happen */
120e300a5eeSMichael Ellerman 			die("CPU %d already has a task pending!", i);
121e300a5eeSMichael Ellerman 		}
122e300a5eeSMichael Ellerman 
123e300a5eeSMichael Ellerman 		kvm->cpus[i]->task = task;
124e300a5eeSMichael Ellerman 		wmb();
125e300a5eeSMichael Ellerman 
126e300a5eeSMichael Ellerman 		if (kvm->cpus[i] == current_kvm_cpu)
127e300a5eeSMichael Ellerman 			kvm_cpu__run_task(current_kvm_cpu);
128e300a5eeSMichael Ellerman 		else
129e300a5eeSMichael Ellerman 			pthread_kill(kvm->cpus[i]->thread, SIGKVMTASK);
130e300a5eeSMichael Ellerman 	}
131e300a5eeSMichael Ellerman 
132e300a5eeSMichael Ellerman 	while (done < kvm->nrcpus) {
133e300a5eeSMichael Ellerman 		u64 count;
134e300a5eeSMichael Ellerman 
135e300a5eeSMichael Ellerman 		if (read(task_eventfd, &count, sizeof(count)) < 0)
136e300a5eeSMichael Ellerman 			die("Failed reading task eventfd");
137e300a5eeSMichael Ellerman 
138e300a5eeSMichael Ellerman 		done += count;
139e300a5eeSMichael Ellerman 	}
140e300a5eeSMichael Ellerman 
141e300a5eeSMichael Ellerman 	mutex_unlock(&task_lock);
142e300a5eeSMichael Ellerman }
143e300a5eeSMichael Ellerman 
14465bab644SPekka Enberg int kvm_cpu__start(struct kvm_cpu *cpu)
14565bab644SPekka Enberg {
1465ee154d1SPekka Enberg 	sigset_t sigset;
1475ee154d1SPekka Enberg 
1485ee154d1SPekka Enberg 	sigemptyset(&sigset);
1495ee154d1SPekka Enberg 	sigaddset(&sigset, SIGALRM);
1505ee154d1SPekka Enberg 
1515ee154d1SPekka Enberg 	pthread_sigmask(SIG_BLOCK, &sigset, NULL);
1525ee154d1SPekka Enberg 
1534298ddadSSasha Levin 	signal(SIGKVMEXIT, kvm_cpu_signal_handler);
1544298ddadSSasha Levin 	signal(SIGKVMPAUSE, kvm_cpu_signal_handler);
155e300a5eeSMichael Ellerman 	signal(SIGKVMTASK, kvm_cpu_signal_handler);
15649e5227dSSasha Levin 
1575d1a249cSPekka Enberg 	kvm_cpu__reset_vcpu(cpu);
1585d1a249cSPekka Enberg 
159809f088bSSasha Levin 	if (cpu->kvm->cfg.single_step)
1606d6a4d41SPekka Enberg 		kvm_cpu__enable_singlestep(cpu);
1616d6a4d41SPekka Enberg 
162c23d9748SSasha Levin 	while (cpu->is_running) {
1634b1c6f6eSSasha Levin 		if (cpu->needs_nmi) {
1644b1c6f6eSSasha Levin 			kvm_cpu__arch_nmi(cpu);
1654b1c6f6eSSasha Levin 			cpu->needs_nmi = 0;
1664b1c6f6eSSasha Levin 		}
1674b1c6f6eSSasha Levin 
168e300a5eeSMichael Ellerman 		if (cpu->task)
169e300a5eeSMichael Ellerman 			kvm_cpu__run_task(cpu);
170e300a5eeSMichael Ellerman 
17165bab644SPekka Enberg 		kvm_cpu__run(cpu);
17265bab644SPekka Enberg 
17365bab644SPekka Enberg 		switch (cpu->kvm_run->exit_reason) {
1741621292eSSasha Levin 		case KVM_EXIT_UNKNOWN:
1751621292eSSasha Levin 			break;
17665bab644SPekka Enberg 		case KVM_EXIT_DEBUG:
17765bab644SPekka Enberg 			kvm_cpu__show_registers(cpu);
17865bab644SPekka Enberg 			kvm_cpu__show_code(cpu);
17965bab644SPekka Enberg 			break;
18065bab644SPekka Enberg 		case KVM_EXIT_IO: {
18165bab644SPekka Enberg 			bool ret;
18265bab644SPekka Enberg 
1834123ca55SMarc Zyngier 			ret = kvm_cpu__emulate_io(cpu,
18465bab644SPekka Enberg 						  cpu->kvm_run->io.port,
1853fdf659dSSasha Levin 						  (u8 *)cpu->kvm_run +
18665bab644SPekka Enberg 						  cpu->kvm_run->io.data_offset,
18765bab644SPekka Enberg 						  cpu->kvm_run->io.direction,
18865bab644SPekka Enberg 						  cpu->kvm_run->io.size,
18965bab644SPekka Enberg 						  cpu->kvm_run->io.count);
19065bab644SPekka Enberg 
19165bab644SPekka Enberg 			if (!ret)
19265bab644SPekka Enberg 				goto panic_kvm;
19365bab644SPekka Enberg 			break;
19465bab644SPekka Enberg 		}
19565bab644SPekka Enberg 		case KVM_EXIT_MMIO: {
19665bab644SPekka Enberg 			bool ret;
19765bab644SPekka Enberg 
198a7518f05SSasha Levin 			/*
199a7518f05SSasha Levin 			 * If we had MMIO exit, coalesced ring should be processed
200a7518f05SSasha Levin 			 * *before* processing the exit itself
201a7518f05SSasha Levin 			 */
202a7518f05SSasha Levin 			kvm_cpu__handle_coalesced_mmio(cpu);
203a7518f05SSasha Levin 
2049b735910SMarc Zyngier 			ret = kvm_cpu__emulate_mmio(cpu,
20565bab644SPekka Enberg 						    cpu->kvm_run->mmio.phys_addr,
20665bab644SPekka Enberg 						    cpu->kvm_run->mmio.data,
20765bab644SPekka Enberg 						    cpu->kvm_run->mmio.len,
20865bab644SPekka Enberg 						    cpu->kvm_run->mmio.is_write);
20965bab644SPekka Enberg 
21065bab644SPekka Enberg 			if (!ret)
21165bab644SPekka Enberg 				goto panic_kvm;
21265bab644SPekka Enberg 			break;
21365bab644SPekka Enberg 		}
2145ee154d1SPekka Enberg 		case KVM_EXIT_INTR:
215656be1b8SSasha Levin 			if (cpu->is_running)
216656be1b8SSasha Levin 				break;
21749e5227dSSasha Levin 			goto exit_kvm;
21865bab644SPekka Enberg 		case KVM_EXIT_SHUTDOWN:
21965bab644SPekka Enberg 			goto exit_kvm;
2205f9b016eSAnup Patel 		case KVM_EXIT_SYSTEM_EVENT:
2215f9b016eSAnup Patel 			/*
2225f9b016eSAnup Patel 			 * Print the type of system event and
2235f9b016eSAnup Patel 			 * treat all system events as shutdown request.
2245f9b016eSAnup Patel 			 */
2255f9b016eSAnup Patel 			switch (cpu->kvm_run->system_event.type) {
2265f9b016eSAnup Patel 			default:
2275f9b016eSAnup Patel 				pr_warning("unknown system event type %d",
2285f9b016eSAnup Patel 					   cpu->kvm_run->system_event.type);
2290161ed77SMark Rutland 				/* fall through for now */
2300161ed77SMark Rutland 			case KVM_SYSTEM_EVENT_RESET:
2310161ed77SMark Rutland 				/* Fall through for now */
2320161ed77SMark Rutland 			case KVM_SYSTEM_EVENT_SHUTDOWN:
2330161ed77SMark Rutland 				/*
2340161ed77SMark Rutland 				 * Ensure that all VCPUs are torn down,
2350161ed77SMark Rutland 				 * regardless of which CPU generated the event.
2360161ed77SMark Rutland 				 */
2372aa76b26SWill Deacon 				kvm__reboot(cpu->kvm);
2385f9b016eSAnup Patel 				goto exit_kvm;
2395f9b016eSAnup Patel 			};
2405f9b016eSAnup Patel 			break;
241341ee0d4SMatt Evans 		default: {
242341ee0d4SMatt Evans 			bool ret;
243341ee0d4SMatt Evans 
244341ee0d4SMatt Evans 			ret = kvm_cpu__handle_exit(cpu);
245341ee0d4SMatt Evans 			if (!ret)
24665bab644SPekka Enberg 				goto panic_kvm;
247341ee0d4SMatt Evans 			break;
248341ee0d4SMatt Evans 		}
24965bab644SPekka Enberg 		}
25073f7e5b3SSasha Levin 		kvm_cpu__handle_coalesced_mmio(cpu);
25165bab644SPekka Enberg 	}
25265bab644SPekka Enberg 
25365bab644SPekka Enberg exit_kvm:
25465bab644SPekka Enberg 	return 0;
25565bab644SPekka Enberg 
25665bab644SPekka Enberg panic_kvm:
25765bab644SPekka Enberg 	return 1;
25865bab644SPekka Enberg }
259df4239fbSSasha Levin 
260df4239fbSSasha Levin int kvm_cpu__init(struct kvm *kvm)
261df4239fbSSasha Levin {
262df4239fbSSasha Levin 	int max_cpus, recommended_cpus, i;
263df4239fbSSasha Levin 
264df4239fbSSasha Levin 	max_cpus = kvm__max_cpus(kvm);
265df4239fbSSasha Levin 	recommended_cpus = kvm__recommended_cpus(kvm);
266df4239fbSSasha Levin 
267df4239fbSSasha Levin 	if (kvm->cfg.nrcpus > max_cpus) {
268df4239fbSSasha Levin 		printf("  # Limit the number of CPUs to %d\n", max_cpus);
269df4239fbSSasha Levin 		kvm->cfg.nrcpus = max_cpus;
270df4239fbSSasha Levin 	} else if (kvm->cfg.nrcpus > recommended_cpus) {
271df4239fbSSasha Levin 		printf("  # Warning: The maximum recommended amount of VCPUs"
272df4239fbSSasha Levin 			" is %d\n", recommended_cpus);
273df4239fbSSasha Levin 	}
274df4239fbSSasha Levin 
275df4239fbSSasha Levin 	kvm->nrcpus = kvm->cfg.nrcpus;
276df4239fbSSasha Levin 
277e300a5eeSMichael Ellerman 	task_eventfd = eventfd(0, 0);
278e300a5eeSMichael Ellerman 	if (task_eventfd < 0) {
279e300a5eeSMichael Ellerman 		pr_warning("Couldn't create task_eventfd");
280e300a5eeSMichael Ellerman 		return task_eventfd;
281e300a5eeSMichael Ellerman 	}
282e300a5eeSMichael Ellerman 
283df4239fbSSasha Levin 	/* Alloc one pointer too many, so array ends up 0-terminated */
284df4239fbSSasha Levin 	kvm->cpus = calloc(kvm->nrcpus + 1, sizeof(void *));
285df4239fbSSasha Levin 	if (!kvm->cpus) {
286df4239fbSSasha Levin 		pr_warning("Couldn't allocate array for %d CPUs", kvm->nrcpus);
287df4239fbSSasha Levin 		return -ENOMEM;
288df4239fbSSasha Levin 	}
289df4239fbSSasha Levin 
290df4239fbSSasha Levin 	for (i = 0; i < kvm->nrcpus; i++) {
291df4239fbSSasha Levin 		kvm->cpus[i] = kvm_cpu__arch_init(kvm, i);
292df4239fbSSasha Levin 		if (!kvm->cpus[i]) {
293df4239fbSSasha Levin 			pr_warning("unable to initialize KVM VCPU");
294df4239fbSSasha Levin 			goto fail_alloc;
295df4239fbSSasha Levin 		}
296df4239fbSSasha Levin 	}
297df4239fbSSasha Levin 
298df4239fbSSasha Levin 	return 0;
299df4239fbSSasha Levin 
300df4239fbSSasha Levin fail_alloc:
301df4239fbSSasha Levin 	for (i = 0; i < kvm->nrcpus; i++)
302df4239fbSSasha Levin 		free(kvm->cpus[i]);
303df4239fbSSasha Levin 	return -ENOMEM;
304df4239fbSSasha Levin }
30549a8afd1SSasha Levin base_init(kvm_cpu__init);
306df4239fbSSasha Levin 
307df4239fbSSasha Levin int kvm_cpu__exit(struct kvm *kvm)
308df4239fbSSasha Levin {
309df4239fbSSasha Levin 	int i, r;
310df4239fbSSasha Levin 	void *ret = NULL;
311df4239fbSSasha Levin 
312df4239fbSSasha Levin 	kvm_cpu__delete(kvm->cpus[0]);
313df4239fbSSasha Levin 	kvm->cpus[0] = NULL;
314df4239fbSSasha Levin 
315e8cb90fbSWill Deacon 	kvm__pause(kvm);
316df4239fbSSasha Levin 	for (i = 1; i < kvm->nrcpus; i++) {
317df4239fbSSasha Levin 		if (kvm->cpus[i]->is_running) {
318df4239fbSSasha Levin 			pthread_kill(kvm->cpus[i]->thread, SIGKVMEXIT);
319df4239fbSSasha Levin 			if (pthread_join(kvm->cpus[i]->thread, &ret) != 0)
320df4239fbSSasha Levin 				die("pthread_join");
321df4239fbSSasha Levin 			kvm_cpu__delete(kvm->cpus[i]);
322df4239fbSSasha Levin 		}
323df4239fbSSasha Levin 		if (ret == NULL)
324df4239fbSSasha Levin 			r = 0;
325df4239fbSSasha Levin 	}
326e8cb90fbSWill Deacon 	kvm__continue(kvm);
327df4239fbSSasha Levin 
328df4239fbSSasha Levin 	free(kvm->cpus);
329df4239fbSSasha Levin 
330df4239fbSSasha Levin 	kvm->nrcpus = 0;
331df4239fbSSasha Levin 
332e300a5eeSMichael Ellerman 	close(task_eventfd);
333e300a5eeSMichael Ellerman 
334df4239fbSSasha Levin 	return r;
335df4239fbSSasha Levin }
336