1 /*
2  * Kernel-based Virtual Machine driver for Linux
3  *
4  * AMD SVM support
5  *
6  * Copyright (C) 2006 Qumranet, Inc.
7  * Copyright 2010 Red Hat, Inc. and/or its affiliates.
8  *
9  * Authors:
10  *   Yaniv Kamay  <yaniv@qumranet.com>
11  *   Avi Kivity   <avi@qumranet.com>
12  *
13  * This work is licensed under the terms of the GNU GPL, version 2.  See
14  * the COPYING file in the top-level directory.
15  *
16  */
17 #include <linux/kvm_host.h>
18 
19 #include "irq.h"
20 #include "mmu.h"
21 #include "kvm_cache_regs.h"
22 #include "x86.h"
23 
24 #include <linux/module.h>
25 #include <linux/kernel.h>
26 #include <linux/vmalloc.h>
27 #include <linux/highmem.h>
28 #include <linux/sched.h>
29 #include <linux/ftrace_event.h>
30 #include <linux/slab.h>
31 
32 #include <asm/perf_event.h>
33 #include <asm/tlbflush.h>
34 #include <asm/desc.h>
35 #include <asm/kvm_para.h>
36 
37 #include <asm/virtext.h>
38 #include "trace.h"
39 
40 #define __ex(x) __kvm_handle_fault_on_reboot(x)
41 
42 MODULE_AUTHOR("Qumranet");
43 MODULE_LICENSE("GPL");
44 
45 #define IOPM_ALLOC_ORDER 2
46 #define MSRPM_ALLOC_ORDER 1
47 
48 #define SEG_TYPE_LDT 2
49 #define SEG_TYPE_BUSY_TSS16 3
50 
51 #define SVM_FEATURE_NPT            (1 <<  0)
52 #define SVM_FEATURE_LBRV           (1 <<  1)
53 #define SVM_FEATURE_SVML           (1 <<  2)
54 #define SVM_FEATURE_NRIP           (1 <<  3)
55 #define SVM_FEATURE_TSC_RATE       (1 <<  4)
56 #define SVM_FEATURE_VMCB_CLEAN     (1 <<  5)
57 #define SVM_FEATURE_FLUSH_ASID     (1 <<  6)
58 #define SVM_FEATURE_DECODE_ASSIST  (1 <<  7)
59 #define SVM_FEATURE_PAUSE_FILTER   (1 << 10)
60 
61 #define NESTED_EXIT_HOST	0	/* Exit handled on host level */
62 #define NESTED_EXIT_DONE	1	/* Exit caused nested vmexit  */
63 #define NESTED_EXIT_CONTINUE	2	/* Further checks needed      */
64 
65 #define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
66 
67 #define TSC_RATIO_RSVD          0xffffff0000000000ULL
68 #define TSC_RATIO_MIN		0x0000000000000001ULL
69 #define TSC_RATIO_MAX		0x000000ffffffffffULL
70 
71 static bool erratum_383_found __read_mostly;
72 
73 static const u32 host_save_user_msrs[] = {
74 #ifdef CONFIG_X86_64
75 	MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
76 	MSR_FS_BASE,
77 #endif
78 	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
79 };
80 
81 #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
82 
83 struct kvm_vcpu;
84 
85 struct nested_state {
86 	struct vmcb *hsave;
87 	u64 hsave_msr;
88 	u64 vm_cr_msr;
89 	u64 vmcb;
90 
91 	/* These are the merged vectors */
92 	u32 *msrpm;
93 
94 	/* gpa pointers to the real vectors */
95 	u64 vmcb_msrpm;
96 	u64 vmcb_iopm;
97 
98 	/* A VMEXIT is required but not yet emulated */
99 	bool exit_required;
100 
101 	/* cache for intercepts of the guest */
102 	u32 intercept_cr;
103 	u32 intercept_dr;
104 	u32 intercept_exceptions;
105 	u64 intercept;
106 
107 	/* Nested Paging related state */
108 	u64 nested_cr3;
109 };
110 
111 #define MSRPM_OFFSETS	16
112 static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
113 
114 struct vcpu_svm {
115 	struct kvm_vcpu vcpu;
116 	struct vmcb *vmcb;
117 	unsigned long vmcb_pa;
118 	struct svm_cpu_data *svm_data;
119 	uint64_t asid_generation;
120 	uint64_t sysenter_esp;
121 	uint64_t sysenter_eip;
122 
123 	u64 next_rip;
124 
125 	u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
126 	struct {
127 		u16 fs;
128 		u16 gs;
129 		u16 ldt;
130 		u64 gs_base;
131 	} host;
132 
133 	u32 *msrpm;
134 
135 	ulong nmi_iret_rip;
136 
137 	struct nested_state nested;
138 
139 	bool nmi_singlestep;
140 
141 	unsigned int3_injected;
142 	unsigned long int3_rip;
143 	u32 apf_reason;
144 
145 	u64  tsc_ratio;
146 };
147 
148 static DEFINE_PER_CPU(u64, current_tsc_ratio);
149 #define TSC_RATIO_DEFAULT	0x0100000000ULL
150 
151 #define MSR_INVALID			0xffffffffU
152 
153 static struct svm_direct_access_msrs {
154 	u32 index;   /* Index of the MSR */
155 	bool always; /* True if intercept is always on */
156 } direct_access_msrs[] = {
157 	{ .index = MSR_STAR,				.always = true  },
158 	{ .index = MSR_IA32_SYSENTER_CS,		.always = true  },
159 #ifdef CONFIG_X86_64
160 	{ .index = MSR_GS_BASE,				.always = true  },
161 	{ .index = MSR_FS_BASE,				.always = true  },
162 	{ .index = MSR_KERNEL_GS_BASE,			.always = true  },
163 	{ .index = MSR_LSTAR,				.always = true  },
164 	{ .index = MSR_CSTAR,				.always = true  },
165 	{ .index = MSR_SYSCALL_MASK,			.always = true  },
166 #endif
167 	{ .index = MSR_IA32_LASTBRANCHFROMIP,		.always = false },
168 	{ .index = MSR_IA32_LASTBRANCHTOIP,		.always = false },
169 	{ .index = MSR_IA32_LASTINTFROMIP,		.always = false },
170 	{ .index = MSR_IA32_LASTINTTOIP,		.always = false },
171 	{ .index = MSR_INVALID,				.always = false },
172 };
173 
174 /* enable NPT for AMD64 and X86 with PAE */
175 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
176 static bool npt_enabled = true;
177 #else
178 static bool npt_enabled;
179 #endif
180 static int npt = 1;
181 
182 module_param(npt, int, S_IRUGO);
183 
184 static int nested = 1;
185 module_param(nested, int, S_IRUGO);
186 
187 static void svm_flush_tlb(struct kvm_vcpu *vcpu);
188 static void svm_complete_interrupts(struct vcpu_svm *svm);
189 
190 static int nested_svm_exit_handled(struct vcpu_svm *svm);
191 static int nested_svm_intercept(struct vcpu_svm *svm);
192 static int nested_svm_vmexit(struct vcpu_svm *svm);
193 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
194 				      bool has_error_code, u32 error_code);
195 static u64 __scale_tsc(u64 ratio, u64 tsc);
196 
197 enum {
198 	VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
199 			    pause filter count */
200 	VMCB_PERM_MAP,   /* IOPM Base and MSRPM Base */
201 	VMCB_ASID,	 /* ASID */
202 	VMCB_INTR,	 /* int_ctl, int_vector */
203 	VMCB_NPT,        /* npt_en, nCR3, gPAT */
204 	VMCB_CR,	 /* CR0, CR3, CR4, EFER */
205 	VMCB_DR,         /* DR6, DR7 */
206 	VMCB_DT,         /* GDT, IDT */
207 	VMCB_SEG,        /* CS, DS, SS, ES, CPL */
208 	VMCB_CR2,        /* CR2 only */
209 	VMCB_LBR,        /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */
210 	VMCB_DIRTY_MAX,
211 };
212 
213 /* TPR and CR2 are always written before VMRUN */
214 #define VMCB_ALWAYS_DIRTY_MASK	((1U << VMCB_INTR) | (1U << VMCB_CR2))
215 
mark_all_dirty(struct vmcb * vmcb)216 static inline void mark_all_dirty(struct vmcb *vmcb)
217 {
218 	vmcb->control.clean = 0;
219 }
220 
mark_all_clean(struct vmcb * vmcb)221 static inline void mark_all_clean(struct vmcb *vmcb)
222 {
223 	vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1)
224 			       & ~VMCB_ALWAYS_DIRTY_MASK;
225 }
226 
mark_dirty(struct vmcb * vmcb,int bit)227 static inline void mark_dirty(struct vmcb *vmcb, int bit)
228 {
229 	vmcb->control.clean &= ~(1 << bit);
230 }
231 
to_svm(struct kvm_vcpu * vcpu)232 static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
233 {
234 	return container_of(vcpu, struct vcpu_svm, vcpu);
235 }
236 
recalc_intercepts(struct vcpu_svm * svm)237 static void recalc_intercepts(struct vcpu_svm *svm)
238 {
239 	struct vmcb_control_area *c, *h;
240 	struct nested_state *g;
241 
242 	mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
243 
244 	if (!is_guest_mode(&svm->vcpu))
245 		return;
246 
247 	c = &svm->vmcb->control;
248 	h = &svm->nested.hsave->control;
249 	g = &svm->nested;
250 
251 	c->intercept_cr = h->intercept_cr | g->intercept_cr;
252 	c->intercept_dr = h->intercept_dr | g->intercept_dr;
253 	c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
254 	c->intercept = h->intercept | g->intercept;
255 }
256 
get_host_vmcb(struct vcpu_svm * svm)257 static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm)
258 {
259 	if (is_guest_mode(&svm->vcpu))
260 		return svm->nested.hsave;
261 	else
262 		return svm->vmcb;
263 }
264 
set_cr_intercept(struct vcpu_svm * svm,int bit)265 static inline void set_cr_intercept(struct vcpu_svm *svm, int bit)
266 {
267 	struct vmcb *vmcb = get_host_vmcb(svm);
268 
269 	vmcb->control.intercept_cr |= (1U << bit);
270 
271 	recalc_intercepts(svm);
272 }
273 
clr_cr_intercept(struct vcpu_svm * svm,int bit)274 static inline void clr_cr_intercept(struct vcpu_svm *svm, int bit)
275 {
276 	struct vmcb *vmcb = get_host_vmcb(svm);
277 
278 	vmcb->control.intercept_cr &= ~(1U << bit);
279 
280 	recalc_intercepts(svm);
281 }
282 
is_cr_intercept(struct vcpu_svm * svm,int bit)283 static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit)
284 {
285 	struct vmcb *vmcb = get_host_vmcb(svm);
286 
287 	return vmcb->control.intercept_cr & (1U << bit);
288 }
289 
set_dr_intercept(struct vcpu_svm * svm,int bit)290 static inline void set_dr_intercept(struct vcpu_svm *svm, int bit)
291 {
292 	struct vmcb *vmcb = get_host_vmcb(svm);
293 
294 	vmcb->control.intercept_dr |= (1U << bit);
295 
296 	recalc_intercepts(svm);
297 }
298 
clr_dr_intercept(struct vcpu_svm * svm,int bit)299 static inline void clr_dr_intercept(struct vcpu_svm *svm, int bit)
300 {
301 	struct vmcb *vmcb = get_host_vmcb(svm);
302 
303 	vmcb->control.intercept_dr &= ~(1U << bit);
304 
305 	recalc_intercepts(svm);
306 }
307 
set_exception_intercept(struct vcpu_svm * svm,int bit)308 static inline void set_exception_intercept(struct vcpu_svm *svm, int bit)
309 {
310 	struct vmcb *vmcb = get_host_vmcb(svm);
311 
312 	vmcb->control.intercept_exceptions |= (1U << bit);
313 
314 	recalc_intercepts(svm);
315 }
316 
clr_exception_intercept(struct vcpu_svm * svm,int bit)317 static inline void clr_exception_intercept(struct vcpu_svm *svm, int bit)
318 {
319 	struct vmcb *vmcb = get_host_vmcb(svm);
320 
321 	vmcb->control.intercept_exceptions &= ~(1U << bit);
322 
323 	recalc_intercepts(svm);
324 }
325 
set_intercept(struct vcpu_svm * svm,int bit)326 static inline void set_intercept(struct vcpu_svm *svm, int bit)
327 {
328 	struct vmcb *vmcb = get_host_vmcb(svm);
329 
330 	vmcb->control.intercept |= (1ULL << bit);
331 
332 	recalc_intercepts(svm);
333 }
334 
clr_intercept(struct vcpu_svm * svm,int bit)335 static inline void clr_intercept(struct vcpu_svm *svm, int bit)
336 {
337 	struct vmcb *vmcb = get_host_vmcb(svm);
338 
339 	vmcb->control.intercept &= ~(1ULL << bit);
340 
341 	recalc_intercepts(svm);
342 }
343 
enable_gif(struct vcpu_svm * svm)344 static inline void enable_gif(struct vcpu_svm *svm)
345 {
346 	svm->vcpu.arch.hflags |= HF_GIF_MASK;
347 }
348 
disable_gif(struct vcpu_svm * svm)349 static inline void disable_gif(struct vcpu_svm *svm)
350 {
351 	svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
352 }
353 
gif_set(struct vcpu_svm * svm)354 static inline bool gif_set(struct vcpu_svm *svm)
355 {
356 	return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
357 }
358 
359 static unsigned long iopm_base;
360 
361 struct kvm_ldttss_desc {
362 	u16 limit0;
363 	u16 base0;
364 	unsigned base1:8, type:5, dpl:2, p:1;
365 	unsigned limit1:4, zero0:3, g:1, base2:8;
366 	u32 base3;
367 	u32 zero1;
368 } __attribute__((packed));
369 
370 struct svm_cpu_data {
371 	int cpu;
372 
373 	u64 asid_generation;
374 	u32 max_asid;
375 	u32 next_asid;
376 	struct kvm_ldttss_desc *tss_desc;
377 
378 	struct page *save_area;
379 };
380 
381 static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
382 
383 struct svm_init_data {
384 	int cpu;
385 	int r;
386 };
387 
388 static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
389 
390 #define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
391 #define MSRS_RANGE_SIZE 2048
392 #define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)
393 
svm_msrpm_offset(u32 msr)394 static u32 svm_msrpm_offset(u32 msr)
395 {
396 	u32 offset;
397 	int i;
398 
399 	for (i = 0; i < NUM_MSR_MAPS; i++) {
400 		if (msr < msrpm_ranges[i] ||
401 		    msr >= msrpm_ranges[i] + MSRS_IN_RANGE)
402 			continue;
403 
404 		offset  = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */
405 		offset += (i * MSRS_RANGE_SIZE);       /* add range offset */
406 
407 		/* Now we have the u8 offset - but need the u32 offset */
408 		return offset / 4;
409 	}
410 
411 	/* MSR not in any range */
412 	return MSR_INVALID;
413 }
414 
415 #define MAX_INST_SIZE 15
416 
clgi(void)417 static inline void clgi(void)
418 {
419 	asm volatile (__ex(SVM_CLGI));
420 }
421 
stgi(void)422 static inline void stgi(void)
423 {
424 	asm volatile (__ex(SVM_STGI));
425 }
426 
invlpga(unsigned long addr,u32 asid)427 static inline void invlpga(unsigned long addr, u32 asid)
428 {
429 	asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid));
430 }
431 
get_npt_level(void)432 static int get_npt_level(void)
433 {
434 #ifdef CONFIG_X86_64
435 	return PT64_ROOT_LEVEL;
436 #else
437 	return PT32E_ROOT_LEVEL;
438 #endif
439 }
440 
svm_set_efer(struct kvm_vcpu * vcpu,u64 efer)441 static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
442 {
443 	vcpu->arch.efer = efer;
444 	if (!npt_enabled && !(efer & EFER_LMA))
445 		efer &= ~EFER_LME;
446 
447 	to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
448 	mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
449 }
450 
is_external_interrupt(u32 info)451 static int is_external_interrupt(u32 info)
452 {
453 	info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
454 	return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
455 }
456 
svm_get_interrupt_shadow(struct kvm_vcpu * vcpu,int mask)457 static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
458 {
459 	struct vcpu_svm *svm = to_svm(vcpu);
460 	u32 ret = 0;
461 
462 	if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
463 		ret |= KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS;
464 	return ret & mask;
465 }
466 
svm_set_interrupt_shadow(struct kvm_vcpu * vcpu,int mask)467 static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
468 {
469 	struct vcpu_svm *svm = to_svm(vcpu);
470 
471 	if (mask == 0)
472 		svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
473 	else
474 		svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
475 
476 }
477 
skip_emulated_instruction(struct kvm_vcpu * vcpu)478 static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
479 {
480 	struct vcpu_svm *svm = to_svm(vcpu);
481 
482 	if (svm->vmcb->control.next_rip != 0)
483 		svm->next_rip = svm->vmcb->control.next_rip;
484 
485 	if (!svm->next_rip) {
486 		if (emulate_instruction(vcpu, EMULTYPE_SKIP) !=
487 				EMULATE_DONE)
488 			printk(KERN_DEBUG "%s: NOP\n", __func__);
489 		return;
490 	}
491 	if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
492 		printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
493 		       __func__, kvm_rip_read(vcpu), svm->next_rip);
494 
495 	kvm_rip_write(vcpu, svm->next_rip);
496 	svm_set_interrupt_shadow(vcpu, 0);
497 }
498 
svm_queue_exception(struct kvm_vcpu * vcpu,unsigned nr,bool has_error_code,u32 error_code,bool reinject)499 static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
500 				bool has_error_code, u32 error_code,
501 				bool reinject)
502 {
503 	struct vcpu_svm *svm = to_svm(vcpu);
504 
505 	/*
506 	 * If we are within a nested VM we'd better #VMEXIT and let the guest
507 	 * handle the exception
508 	 */
509 	if (!reinject &&
510 	    nested_svm_check_exception(svm, nr, has_error_code, error_code))
511 		return;
512 
513 	if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
514 		unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
515 
516 		/*
517 		 * For guest debugging where we have to reinject #BP if some
518 		 * INT3 is guest-owned:
519 		 * Emulate nRIP by moving RIP forward. Will fail if injection
520 		 * raises a fault that is not intercepted. Still better than
521 		 * failing in all cases.
522 		 */
523 		skip_emulated_instruction(&svm->vcpu);
524 		rip = kvm_rip_read(&svm->vcpu);
525 		svm->int3_rip = rip + svm->vmcb->save.cs.base;
526 		svm->int3_injected = rip - old_rip;
527 	}
528 
529 	svm->vmcb->control.event_inj = nr
530 		| SVM_EVTINJ_VALID
531 		| (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
532 		| SVM_EVTINJ_TYPE_EXEPT;
533 	svm->vmcb->control.event_inj_err = error_code;
534 }
535 
svm_init_erratum_383(void)536 static void svm_init_erratum_383(void)
537 {
538 	u32 low, high;
539 	int err;
540 	u64 val;
541 
542 	if (!cpu_has_amd_erratum(amd_erratum_383))
543 		return;
544 
545 	/* Use _safe variants to not break nested virtualization */
546 	val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err);
547 	if (err)
548 		return;
549 
550 	val |= (1ULL << 47);
551 
552 	low  = lower_32_bits(val);
553 	high = upper_32_bits(val);
554 
555 	native_write_msr_safe(MSR_AMD64_DC_CFG, low, high);
556 
557 	erratum_383_found = true;
558 }
559 
has_svm(void)560 static int has_svm(void)
561 {
562 	const char *msg;
563 
564 	if (!cpu_has_svm(&msg)) {
565 		printk(KERN_INFO "has_svm: %s\n", msg);
566 		return 0;
567 	}
568 
569 	return 1;
570 }
571 
svm_hardware_disable(void * garbage)572 static void svm_hardware_disable(void *garbage)
573 {
574 	/* Make sure we clean up behind us */
575 	if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
576 		wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
577 
578 	cpu_svm_disable();
579 
580 	amd_pmu_disable_virt();
581 }
582 
svm_hardware_enable(void * garbage)583 static int svm_hardware_enable(void *garbage)
584 {
585 
586 	struct svm_cpu_data *sd;
587 	uint64_t efer;
588 	struct desc_ptr gdt_descr;
589 	struct desc_struct *gdt;
590 	int me = raw_smp_processor_id();
591 
592 	rdmsrl(MSR_EFER, efer);
593 	if (efer & EFER_SVME)
594 		return -EBUSY;
595 
596 	if (!has_svm()) {
597 		printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n",
598 		       me);
599 		return -EINVAL;
600 	}
601 	sd = per_cpu(svm_data, me);
602 
603 	if (!sd) {
604 		printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n",
605 		       me);
606 		return -EINVAL;
607 	}
608 
609 	sd->asid_generation = 1;
610 	sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
611 	sd->next_asid = sd->max_asid + 1;
612 
613 	native_store_gdt(&gdt_descr);
614 	gdt = (struct desc_struct *)gdt_descr.address;
615 	sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
616 
617 	wrmsrl(MSR_EFER, efer | EFER_SVME);
618 
619 	wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
620 
621 	if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
622 		wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
623 		__get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT;
624 	}
625 
626 	svm_init_erratum_383();
627 
628 	amd_pmu_enable_virt();
629 
630 	return 0;
631 }
632 
svm_cpu_uninit(int cpu)633 static void svm_cpu_uninit(int cpu)
634 {
635 	struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
636 
637 	if (!sd)
638 		return;
639 
640 	per_cpu(svm_data, raw_smp_processor_id()) = NULL;
641 	__free_page(sd->save_area);
642 	kfree(sd);
643 }
644 
svm_cpu_init(int cpu)645 static int svm_cpu_init(int cpu)
646 {
647 	struct svm_cpu_data *sd;
648 	int r;
649 
650 	sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
651 	if (!sd)
652 		return -ENOMEM;
653 	sd->cpu = cpu;
654 	sd->save_area = alloc_page(GFP_KERNEL);
655 	r = -ENOMEM;
656 	if (!sd->save_area)
657 		goto err_1;
658 
659 	per_cpu(svm_data, cpu) = sd;
660 
661 	return 0;
662 
663 err_1:
664 	kfree(sd);
665 	return r;
666 
667 }
668 
valid_msr_intercept(u32 index)669 static bool valid_msr_intercept(u32 index)
670 {
671 	int i;
672 
673 	for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++)
674 		if (direct_access_msrs[i].index == index)
675 			return true;
676 
677 	return false;
678 }
679 
set_msr_interception(u32 * msrpm,unsigned msr,int read,int write)680 static void set_msr_interception(u32 *msrpm, unsigned msr,
681 				 int read, int write)
682 {
683 	u8 bit_read, bit_write;
684 	unsigned long tmp;
685 	u32 offset;
686 
687 	/*
688 	 * If this warning triggers extend the direct_access_msrs list at the
689 	 * beginning of the file
690 	 */
691 	WARN_ON(!valid_msr_intercept(msr));
692 
693 	offset    = svm_msrpm_offset(msr);
694 	bit_read  = 2 * (msr & 0x0f);
695 	bit_write = 2 * (msr & 0x0f) + 1;
696 	tmp       = msrpm[offset];
697 
698 	BUG_ON(offset == MSR_INVALID);
699 
700 	read  ? clear_bit(bit_read,  &tmp) : set_bit(bit_read,  &tmp);
701 	write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
702 
703 	msrpm[offset] = tmp;
704 }
705 
svm_vcpu_init_msrpm(u32 * msrpm)706 static void svm_vcpu_init_msrpm(u32 *msrpm)
707 {
708 	int i;
709 
710 	memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
711 
712 	for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
713 		if (!direct_access_msrs[i].always)
714 			continue;
715 
716 		set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
717 	}
718 }
719 
add_msr_offset(u32 offset)720 static void add_msr_offset(u32 offset)
721 {
722 	int i;
723 
724 	for (i = 0; i < MSRPM_OFFSETS; ++i) {
725 
726 		/* Offset already in list? */
727 		if (msrpm_offsets[i] == offset)
728 			return;
729 
730 		/* Slot used by another offset? */
731 		if (msrpm_offsets[i] != MSR_INVALID)
732 			continue;
733 
734 		/* Add offset to list */
735 		msrpm_offsets[i] = offset;
736 
737 		return;
738 	}
739 
740 	/*
741 	 * If this BUG triggers the msrpm_offsets table has an overflow. Just
742 	 * increase MSRPM_OFFSETS in this case.
743 	 */
744 	BUG();
745 }
746 
init_msrpm_offsets(void)747 static void init_msrpm_offsets(void)
748 {
749 	int i;
750 
751 	memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets));
752 
753 	for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
754 		u32 offset;
755 
756 		offset = svm_msrpm_offset(direct_access_msrs[i].index);
757 		BUG_ON(offset == MSR_INVALID);
758 
759 		add_msr_offset(offset);
760 	}
761 }
762 
svm_enable_lbrv(struct vcpu_svm * svm)763 static void svm_enable_lbrv(struct vcpu_svm *svm)
764 {
765 	u32 *msrpm = svm->msrpm;
766 
767 	svm->vmcb->control.lbr_ctl = 1;
768 	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
769 	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
770 	set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
771 	set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
772 }
773 
svm_disable_lbrv(struct vcpu_svm * svm)774 static void svm_disable_lbrv(struct vcpu_svm *svm)
775 {
776 	u32 *msrpm = svm->msrpm;
777 
778 	svm->vmcb->control.lbr_ctl = 0;
779 	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
780 	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
781 	set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
782 	set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
783 }
784 
svm_hardware_setup(void)785 static __init int svm_hardware_setup(void)
786 {
787 	int cpu;
788 	struct page *iopm_pages;
789 	void *iopm_va;
790 	int r;
791 
792 	iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
793 
794 	if (!iopm_pages)
795 		return -ENOMEM;
796 
797 	iopm_va = page_address(iopm_pages);
798 	memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
799 	iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
800 
801 	init_msrpm_offsets();
802 
803 	if (boot_cpu_has(X86_FEATURE_NX))
804 		kvm_enable_efer_bits(EFER_NX);
805 
806 	if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
807 		kvm_enable_efer_bits(EFER_FFXSR);
808 
809 	if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
810 		u64 max;
811 
812 		kvm_has_tsc_control = true;
813 
814 		/*
815 		 * Make sure the user can only configure tsc_khz values that
816 		 * fit into a signed integer.
817 		 * A min value is not calculated needed because it will always
818 		 * be 1 on all machines and a value of 0 is used to disable
819 		 * tsc-scaling for the vcpu.
820 		 */
821 		max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX));
822 
823 		kvm_max_guest_tsc_khz = max;
824 	}
825 
826 	if (nested) {
827 		printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
828 		kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
829 	}
830 
831 	for_each_possible_cpu(cpu) {
832 		r = svm_cpu_init(cpu);
833 		if (r)
834 			goto err;
835 	}
836 
837 	if (!boot_cpu_has(X86_FEATURE_NPT))
838 		npt_enabled = false;
839 
840 	if (npt_enabled && !npt) {
841 		printk(KERN_INFO "kvm: Nested Paging disabled\n");
842 		npt_enabled = false;
843 	}
844 
845 	if (npt_enabled) {
846 		printk(KERN_INFO "kvm: Nested Paging enabled\n");
847 		kvm_enable_tdp();
848 	} else
849 		kvm_disable_tdp();
850 
851 	return 0;
852 
853 err:
854 	__free_pages(iopm_pages, IOPM_ALLOC_ORDER);
855 	iopm_base = 0;
856 	return r;
857 }
858 
svm_hardware_unsetup(void)859 static __exit void svm_hardware_unsetup(void)
860 {
861 	int cpu;
862 
863 	for_each_possible_cpu(cpu)
864 		svm_cpu_uninit(cpu);
865 
866 	__free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
867 	iopm_base = 0;
868 }
869 
init_seg(struct vmcb_seg * seg)870 static void init_seg(struct vmcb_seg *seg)
871 {
872 	seg->selector = 0;
873 	seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK |
874 		      SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */
875 	seg->limit = 0xffff;
876 	seg->base = 0;
877 }
878 
init_sys_seg(struct vmcb_seg * seg,uint32_t type)879 static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
880 {
881 	seg->selector = 0;
882 	seg->attrib = SVM_SELECTOR_P_MASK | type;
883 	seg->limit = 0xffff;
884 	seg->base = 0;
885 }
886 
__scale_tsc(u64 ratio,u64 tsc)887 static u64 __scale_tsc(u64 ratio, u64 tsc)
888 {
889 	u64 mult, frac, _tsc;
890 
891 	mult  = ratio >> 32;
892 	frac  = ratio & ((1ULL << 32) - 1);
893 
894 	_tsc  = tsc;
895 	_tsc *= mult;
896 	_tsc += (tsc >> 32) * frac;
897 	_tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32;
898 
899 	return _tsc;
900 }
901 
svm_scale_tsc(struct kvm_vcpu * vcpu,u64 tsc)902 static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
903 {
904 	struct vcpu_svm *svm = to_svm(vcpu);
905 	u64 _tsc = tsc;
906 
907 	if (svm->tsc_ratio != TSC_RATIO_DEFAULT)
908 		_tsc = __scale_tsc(svm->tsc_ratio, tsc);
909 
910 	return _tsc;
911 }
912 
svm_set_tsc_khz(struct kvm_vcpu * vcpu,u32 user_tsc_khz)913 static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
914 {
915 	struct vcpu_svm *svm = to_svm(vcpu);
916 	u64 ratio;
917 	u64 khz;
918 
919 	/* TSC scaling supported? */
920 	if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR))
921 		return;
922 
923 	/* TSC-Scaling disabled or guest TSC same frequency as host TSC? */
924 	if (user_tsc_khz == 0) {
925 		vcpu->arch.virtual_tsc_khz = 0;
926 		svm->tsc_ratio = TSC_RATIO_DEFAULT;
927 		return;
928 	}
929 
930 	khz = user_tsc_khz;
931 
932 	/* TSC scaling required  - calculate ratio */
933 	ratio = khz << 32;
934 	do_div(ratio, tsc_khz);
935 
936 	if (ratio == 0 || ratio & TSC_RATIO_RSVD) {
937 		WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n",
938 				user_tsc_khz);
939 		return;
940 	}
941 	vcpu->arch.virtual_tsc_khz = user_tsc_khz;
942 	svm->tsc_ratio             = ratio;
943 }
944 
svm_write_tsc_offset(struct kvm_vcpu * vcpu,u64 offset)945 static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
946 {
947 	struct vcpu_svm *svm = to_svm(vcpu);
948 	u64 g_tsc_offset = 0;
949 
950 	if (is_guest_mode(vcpu)) {
951 		g_tsc_offset = svm->vmcb->control.tsc_offset -
952 			       svm->nested.hsave->control.tsc_offset;
953 		svm->nested.hsave->control.tsc_offset = offset;
954 	}
955 
956 	svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
957 
958 	mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
959 }
960 
svm_adjust_tsc_offset(struct kvm_vcpu * vcpu,s64 adjustment)961 static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
962 {
963 	struct vcpu_svm *svm = to_svm(vcpu);
964 
965 	svm->vmcb->control.tsc_offset += adjustment;
966 	if (is_guest_mode(vcpu))
967 		svm->nested.hsave->control.tsc_offset += adjustment;
968 	mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
969 }
970 
svm_compute_tsc_offset(struct kvm_vcpu * vcpu,u64 target_tsc)971 static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
972 {
973 	u64 tsc;
974 
975 	tsc = svm_scale_tsc(vcpu, native_read_tsc());
976 
977 	return target_tsc - tsc;
978 }
979 
init_vmcb(struct vcpu_svm * svm)980 static void init_vmcb(struct vcpu_svm *svm)
981 {
982 	struct vmcb_control_area *control = &svm->vmcb->control;
983 	struct vmcb_save_area *save = &svm->vmcb->save;
984 
985 	svm->vcpu.fpu_active = 1;
986 	svm->vcpu.arch.hflags = 0;
987 
988 	set_cr_intercept(svm, INTERCEPT_CR0_READ);
989 	set_cr_intercept(svm, INTERCEPT_CR3_READ);
990 	set_cr_intercept(svm, INTERCEPT_CR4_READ);
991 	set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
992 	set_cr_intercept(svm, INTERCEPT_CR3_WRITE);
993 	set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
994 	set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
995 
996 	set_dr_intercept(svm, INTERCEPT_DR0_READ);
997 	set_dr_intercept(svm, INTERCEPT_DR1_READ);
998 	set_dr_intercept(svm, INTERCEPT_DR2_READ);
999 	set_dr_intercept(svm, INTERCEPT_DR3_READ);
1000 	set_dr_intercept(svm, INTERCEPT_DR4_READ);
1001 	set_dr_intercept(svm, INTERCEPT_DR5_READ);
1002 	set_dr_intercept(svm, INTERCEPT_DR6_READ);
1003 	set_dr_intercept(svm, INTERCEPT_DR7_READ);
1004 
1005 	set_dr_intercept(svm, INTERCEPT_DR0_WRITE);
1006 	set_dr_intercept(svm, INTERCEPT_DR1_WRITE);
1007 	set_dr_intercept(svm, INTERCEPT_DR2_WRITE);
1008 	set_dr_intercept(svm, INTERCEPT_DR3_WRITE);
1009 	set_dr_intercept(svm, INTERCEPT_DR4_WRITE);
1010 	set_dr_intercept(svm, INTERCEPT_DR5_WRITE);
1011 	set_dr_intercept(svm, INTERCEPT_DR6_WRITE);
1012 	set_dr_intercept(svm, INTERCEPT_DR7_WRITE);
1013 
1014 	set_exception_intercept(svm, PF_VECTOR);
1015 	set_exception_intercept(svm, UD_VECTOR);
1016 	set_exception_intercept(svm, MC_VECTOR);
1017 
1018 	set_intercept(svm, INTERCEPT_INTR);
1019 	set_intercept(svm, INTERCEPT_NMI);
1020 	set_intercept(svm, INTERCEPT_SMI);
1021 	set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
1022 	set_intercept(svm, INTERCEPT_RDPMC);
1023 	set_intercept(svm, INTERCEPT_CPUID);
1024 	set_intercept(svm, INTERCEPT_INVD);
1025 	set_intercept(svm, INTERCEPT_HLT);
1026 	set_intercept(svm, INTERCEPT_INVLPG);
1027 	set_intercept(svm, INTERCEPT_INVLPGA);
1028 	set_intercept(svm, INTERCEPT_IOIO_PROT);
1029 	set_intercept(svm, INTERCEPT_MSR_PROT);
1030 	set_intercept(svm, INTERCEPT_TASK_SWITCH);
1031 	set_intercept(svm, INTERCEPT_SHUTDOWN);
1032 	set_intercept(svm, INTERCEPT_VMRUN);
1033 	set_intercept(svm, INTERCEPT_VMMCALL);
1034 	set_intercept(svm, INTERCEPT_VMLOAD);
1035 	set_intercept(svm, INTERCEPT_VMSAVE);
1036 	set_intercept(svm, INTERCEPT_STGI);
1037 	set_intercept(svm, INTERCEPT_CLGI);
1038 	set_intercept(svm, INTERCEPT_SKINIT);
1039 	set_intercept(svm, INTERCEPT_WBINVD);
1040 	set_intercept(svm, INTERCEPT_MONITOR);
1041 	set_intercept(svm, INTERCEPT_MWAIT);
1042 	set_intercept(svm, INTERCEPT_XSETBV);
1043 
1044 	control->iopm_base_pa = iopm_base;
1045 	control->msrpm_base_pa = __pa(svm->msrpm);
1046 	control->int_ctl = V_INTR_MASKING_MASK;
1047 
1048 	init_seg(&save->es);
1049 	init_seg(&save->ss);
1050 	init_seg(&save->ds);
1051 	init_seg(&save->fs);
1052 	init_seg(&save->gs);
1053 
1054 	save->cs.selector = 0xf000;
1055 	/* Executable/Readable Code Segment */
1056 	save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
1057 		SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
1058 	save->cs.limit = 0xffff;
1059 	/*
1060 	 * cs.base should really be 0xffff0000, but vmx can't handle that, so
1061 	 * be consistent with it.
1062 	 *
1063 	 * Replace when we have real mode working for vmx.
1064 	 */
1065 	save->cs.base = 0xf0000;
1066 
1067 	save->gdtr.limit = 0xffff;
1068 	save->idtr.limit = 0xffff;
1069 
1070 	init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
1071 	init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
1072 
1073 	svm_set_efer(&svm->vcpu, 0);
1074 	save->dr6 = 0xffff0ff0;
1075 	save->dr7 = 0x400;
1076 	kvm_set_rflags(&svm->vcpu, 2);
1077 	save->rip = 0x0000fff0;
1078 	svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
1079 
1080 	/*
1081 	 * This is the guest-visible cr0 value.
1082 	 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
1083 	 */
1084 	svm->vcpu.arch.cr0 = 0;
1085 	(void)kvm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
1086 
1087 	save->cr4 = X86_CR4_PAE;
1088 	/* rdx = ?? */
1089 
1090 	if (npt_enabled) {
1091 		/* Setup VMCB for Nested Paging */
1092 		control->nested_ctl = 1;
1093 		clr_intercept(svm, INTERCEPT_INVLPG);
1094 		clr_exception_intercept(svm, PF_VECTOR);
1095 		clr_cr_intercept(svm, INTERCEPT_CR3_READ);
1096 		clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
1097 		save->g_pat = 0x0007040600070406ULL;
1098 		save->cr3 = 0;
1099 		save->cr4 = 0;
1100 	}
1101 	svm->asid_generation = 0;
1102 
1103 	svm->nested.vmcb = 0;
1104 	svm->vcpu.arch.hflags = 0;
1105 
1106 	if (boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
1107 		control->pause_filter_count = 3000;
1108 		set_intercept(svm, INTERCEPT_PAUSE);
1109 	}
1110 
1111 	mark_all_dirty(svm->vmcb);
1112 
1113 	enable_gif(svm);
1114 }
1115 
svm_vcpu_reset(struct kvm_vcpu * vcpu)1116 static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
1117 {
1118 	struct vcpu_svm *svm = to_svm(vcpu);
1119 
1120 	init_vmcb(svm);
1121 
1122 	if (!kvm_vcpu_is_bsp(vcpu)) {
1123 		kvm_rip_write(vcpu, 0);
1124 		svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12;
1125 		svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8;
1126 	}
1127 	vcpu->arch.regs_avail = ~0;
1128 	vcpu->arch.regs_dirty = ~0;
1129 
1130 	return 0;
1131 }
1132 
svm_create_vcpu(struct kvm * kvm,unsigned int id)1133 static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
1134 {
1135 	struct vcpu_svm *svm;
1136 	struct page *page;
1137 	struct page *msrpm_pages;
1138 	struct page *hsave_page;
1139 	struct page *nested_msrpm_pages;
1140 	int err;
1141 
1142 	svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1143 	if (!svm) {
1144 		err = -ENOMEM;
1145 		goto out;
1146 	}
1147 
1148 	svm->tsc_ratio = TSC_RATIO_DEFAULT;
1149 
1150 	err = kvm_vcpu_init(&svm->vcpu, kvm, id);
1151 	if (err)
1152 		goto free_svm;
1153 
1154 	err = -ENOMEM;
1155 	page = alloc_page(GFP_KERNEL);
1156 	if (!page)
1157 		goto uninit;
1158 
1159 	msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
1160 	if (!msrpm_pages)
1161 		goto free_page1;
1162 
1163 	nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
1164 	if (!nested_msrpm_pages)
1165 		goto free_page2;
1166 
1167 	hsave_page = alloc_page(GFP_KERNEL);
1168 	if (!hsave_page)
1169 		goto free_page3;
1170 
1171 	svm->nested.hsave = page_address(hsave_page);
1172 
1173 	svm->msrpm = page_address(msrpm_pages);
1174 	svm_vcpu_init_msrpm(svm->msrpm);
1175 
1176 	svm->nested.msrpm = page_address(nested_msrpm_pages);
1177 	svm_vcpu_init_msrpm(svm->nested.msrpm);
1178 
1179 	svm->vmcb = page_address(page);
1180 	clear_page(svm->vmcb);
1181 	svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
1182 	svm->asid_generation = 0;
1183 	init_vmcb(svm);
1184 	kvm_write_tsc(&svm->vcpu, 0);
1185 
1186 	err = fx_init(&svm->vcpu);
1187 	if (err)
1188 		goto free_page4;
1189 
1190 	svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
1191 	if (kvm_vcpu_is_bsp(&svm->vcpu))
1192 		svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
1193 
1194 	return &svm->vcpu;
1195 
1196 free_page4:
1197 	__free_page(hsave_page);
1198 free_page3:
1199 	__free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
1200 free_page2:
1201 	__free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
1202 free_page1:
1203 	__free_page(page);
1204 uninit:
1205 	kvm_vcpu_uninit(&svm->vcpu);
1206 free_svm:
1207 	kmem_cache_free(kvm_vcpu_cache, svm);
1208 out:
1209 	return ERR_PTR(err);
1210 }
1211 
svm_free_vcpu(struct kvm_vcpu * vcpu)1212 static void svm_free_vcpu(struct kvm_vcpu *vcpu)
1213 {
1214 	struct vcpu_svm *svm = to_svm(vcpu);
1215 
1216 	__free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
1217 	__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
1218 	__free_page(virt_to_page(svm->nested.hsave));
1219 	__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
1220 	kvm_vcpu_uninit(vcpu);
1221 	kmem_cache_free(kvm_vcpu_cache, svm);
1222 }
1223 
svm_vcpu_load(struct kvm_vcpu * vcpu,int cpu)1224 static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1225 {
1226 	struct vcpu_svm *svm = to_svm(vcpu);
1227 	int i;
1228 
1229 	if (unlikely(cpu != vcpu->cpu)) {
1230 		svm->asid_generation = 0;
1231 		mark_all_dirty(svm->vmcb);
1232 	}
1233 
1234 #ifdef CONFIG_X86_64
1235 	rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base);
1236 #endif
1237 	savesegment(fs, svm->host.fs);
1238 	savesegment(gs, svm->host.gs);
1239 	svm->host.ldt = kvm_read_ldt();
1240 
1241 	for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
1242 		rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
1243 
1244 	if (static_cpu_has(X86_FEATURE_TSCRATEMSR) &&
1245 	    svm->tsc_ratio != __get_cpu_var(current_tsc_ratio)) {
1246 		__get_cpu_var(current_tsc_ratio) = svm->tsc_ratio;
1247 		wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio);
1248 	}
1249 }
1250 
svm_vcpu_put(struct kvm_vcpu * vcpu)1251 static void svm_vcpu_put(struct kvm_vcpu *vcpu)
1252 {
1253 	struct vcpu_svm *svm = to_svm(vcpu);
1254 	int i;
1255 
1256 	++vcpu->stat.host_state_reload;
1257 	kvm_load_ldt(svm->host.ldt);
1258 #ifdef CONFIG_X86_64
1259 	loadsegment(fs, svm->host.fs);
1260 	wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs);
1261 	load_gs_index(svm->host.gs);
1262 #else
1263 #ifdef CONFIG_X86_32_LAZY_GS
1264 	loadsegment(gs, svm->host.gs);
1265 #endif
1266 #endif
1267 	for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
1268 		wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
1269 }
1270 
svm_get_rflags(struct kvm_vcpu * vcpu)1271 static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
1272 {
1273 	return to_svm(vcpu)->vmcb->save.rflags;
1274 }
1275 
svm_set_rflags(struct kvm_vcpu * vcpu,unsigned long rflags)1276 static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
1277 {
1278 	to_svm(vcpu)->vmcb->save.rflags = rflags;
1279 }
1280 
svm_cache_reg(struct kvm_vcpu * vcpu,enum kvm_reg reg)1281 static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
1282 {
1283 	switch (reg) {
1284 	case VCPU_EXREG_PDPTR:
1285 		BUG_ON(!npt_enabled);
1286 		load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
1287 		break;
1288 	default:
1289 		BUG();
1290 	}
1291 }
1292 
svm_set_vintr(struct vcpu_svm * svm)1293 static void svm_set_vintr(struct vcpu_svm *svm)
1294 {
1295 	set_intercept(svm, INTERCEPT_VINTR);
1296 }
1297 
svm_clear_vintr(struct vcpu_svm * svm)1298 static void svm_clear_vintr(struct vcpu_svm *svm)
1299 {
1300 	clr_intercept(svm, INTERCEPT_VINTR);
1301 }
1302 
svm_seg(struct kvm_vcpu * vcpu,int seg)1303 static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
1304 {
1305 	struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
1306 
1307 	switch (seg) {
1308 	case VCPU_SREG_CS: return &save->cs;
1309 	case VCPU_SREG_DS: return &save->ds;
1310 	case VCPU_SREG_ES: return &save->es;
1311 	case VCPU_SREG_FS: return &save->fs;
1312 	case VCPU_SREG_GS: return &save->gs;
1313 	case VCPU_SREG_SS: return &save->ss;
1314 	case VCPU_SREG_TR: return &save->tr;
1315 	case VCPU_SREG_LDTR: return &save->ldtr;
1316 	}
1317 	BUG();
1318 	return NULL;
1319 }
1320 
svm_get_segment_base(struct kvm_vcpu * vcpu,int seg)1321 static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg)
1322 {
1323 	struct vmcb_seg *s = svm_seg(vcpu, seg);
1324 
1325 	return s->base;
1326 }
1327 
svm_get_segment(struct kvm_vcpu * vcpu,struct kvm_segment * var,int seg)1328 static void svm_get_segment(struct kvm_vcpu *vcpu,
1329 			    struct kvm_segment *var, int seg)
1330 {
1331 	struct vmcb_seg *s = svm_seg(vcpu, seg);
1332 
1333 	var->base = s->base;
1334 	var->limit = s->limit;
1335 	var->selector = s->selector;
1336 	var->type = s->attrib & SVM_SELECTOR_TYPE_MASK;
1337 	var->s = (s->attrib >> SVM_SELECTOR_S_SHIFT) & 1;
1338 	var->dpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
1339 	var->present = (s->attrib >> SVM_SELECTOR_P_SHIFT) & 1;
1340 	var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1;
1341 	var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
1342 	var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
1343 	var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
1344 
1345 	/*
1346 	 * AMD's VMCB does not have an explicit unusable field, so emulate it
1347 	 * for cross vendor migration purposes by "not present"
1348 	 */
1349 	var->unusable = !var->present || (var->type == 0);
1350 
1351 	switch (seg) {
1352 	case VCPU_SREG_CS:
1353 		/*
1354 		 * SVM always stores 0 for the 'G' bit in the CS selector in
1355 		 * the VMCB on a VMEXIT. This hurts cross-vendor migration:
1356 		 * Intel's VMENTRY has a check on the 'G' bit.
1357 		 */
1358 		var->g = s->limit > 0xfffff;
1359 		break;
1360 	case VCPU_SREG_TR:
1361 		/*
1362 		 * Work around a bug where the busy flag in the tr selector
1363 		 * isn't exposed
1364 		 */
1365 		var->type |= 0x2;
1366 		break;
1367 	case VCPU_SREG_DS:
1368 	case VCPU_SREG_ES:
1369 	case VCPU_SREG_FS:
1370 	case VCPU_SREG_GS:
1371 		/*
1372 		 * The accessed bit must always be set in the segment
1373 		 * descriptor cache, although it can be cleared in the
1374 		 * descriptor, the cached bit always remains at 1. Since
1375 		 * Intel has a check on this, set it here to support
1376 		 * cross-vendor migration.
1377 		 */
1378 		if (!var->unusable)
1379 			var->type |= 0x1;
1380 		break;
1381 	case VCPU_SREG_SS:
1382 		/*
1383 		 * On AMD CPUs sometimes the DB bit in the segment
1384 		 * descriptor is left as 1, although the whole segment has
1385 		 * been made unusable. Clear it here to pass an Intel VMX
1386 		 * entry check when cross vendor migrating.
1387 		 */
1388 		if (var->unusable)
1389 			var->db = 0;
1390 		break;
1391 	}
1392 }
1393 
svm_get_cpl(struct kvm_vcpu * vcpu)1394 static int svm_get_cpl(struct kvm_vcpu *vcpu)
1395 {
1396 	struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
1397 
1398 	return save->cpl;
1399 }
1400 
svm_get_idt(struct kvm_vcpu * vcpu,struct desc_ptr * dt)1401 static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1402 {
1403 	struct vcpu_svm *svm = to_svm(vcpu);
1404 
1405 	dt->size = svm->vmcb->save.idtr.limit;
1406 	dt->address = svm->vmcb->save.idtr.base;
1407 }
1408 
svm_set_idt(struct kvm_vcpu * vcpu,struct desc_ptr * dt)1409 static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1410 {
1411 	struct vcpu_svm *svm = to_svm(vcpu);
1412 
1413 	svm->vmcb->save.idtr.limit = dt->size;
1414 	svm->vmcb->save.idtr.base = dt->address ;
1415 	mark_dirty(svm->vmcb, VMCB_DT);
1416 }
1417 
svm_get_gdt(struct kvm_vcpu * vcpu,struct desc_ptr * dt)1418 static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1419 {
1420 	struct vcpu_svm *svm = to_svm(vcpu);
1421 
1422 	dt->size = svm->vmcb->save.gdtr.limit;
1423 	dt->address = svm->vmcb->save.gdtr.base;
1424 }
1425 
svm_set_gdt(struct kvm_vcpu * vcpu,struct desc_ptr * dt)1426 static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1427 {
1428 	struct vcpu_svm *svm = to_svm(vcpu);
1429 
1430 	svm->vmcb->save.gdtr.limit = dt->size;
1431 	svm->vmcb->save.gdtr.base = dt->address ;
1432 	mark_dirty(svm->vmcb, VMCB_DT);
1433 }
1434 
svm_decache_cr0_guest_bits(struct kvm_vcpu * vcpu)1435 static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
1436 {
1437 }
1438 
svm_decache_cr3(struct kvm_vcpu * vcpu)1439 static void svm_decache_cr3(struct kvm_vcpu *vcpu)
1440 {
1441 }
1442 
svm_decache_cr4_guest_bits(struct kvm_vcpu * vcpu)1443 static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
1444 {
1445 }
1446 
update_cr0_intercept(struct vcpu_svm * svm)1447 static void update_cr0_intercept(struct vcpu_svm *svm)
1448 {
1449 	ulong gcr0 = svm->vcpu.arch.cr0;
1450 	u64 *hcr0 = &svm->vmcb->save.cr0;
1451 
1452 	if (!svm->vcpu.fpu_active)
1453 		*hcr0 |= SVM_CR0_SELECTIVE_MASK;
1454 	else
1455 		*hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
1456 			| (gcr0 & SVM_CR0_SELECTIVE_MASK);
1457 
1458 	mark_dirty(svm->vmcb, VMCB_CR);
1459 
1460 	if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
1461 		clr_cr_intercept(svm, INTERCEPT_CR0_READ);
1462 		clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
1463 	} else {
1464 		set_cr_intercept(svm, INTERCEPT_CR0_READ);
1465 		set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
1466 	}
1467 }
1468 
svm_set_cr0(struct kvm_vcpu * vcpu,unsigned long cr0)1469 static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1470 {
1471 	struct vcpu_svm *svm = to_svm(vcpu);
1472 
1473 #ifdef CONFIG_X86_64
1474 	if (vcpu->arch.efer & EFER_LME) {
1475 		if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
1476 			vcpu->arch.efer |= EFER_LMA;
1477 			svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
1478 		}
1479 
1480 		if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
1481 			vcpu->arch.efer &= ~EFER_LMA;
1482 			svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
1483 		}
1484 	}
1485 #endif
1486 	vcpu->arch.cr0 = cr0;
1487 
1488 	if (!npt_enabled)
1489 		cr0 |= X86_CR0_PG | X86_CR0_WP;
1490 
1491 	if (!vcpu->fpu_active)
1492 		cr0 |= X86_CR0_TS;
1493 	/*
1494 	 * re-enable caching here because the QEMU bios
1495 	 * does not do it - this results in some delay at
1496 	 * reboot
1497 	 */
1498 	cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
1499 	svm->vmcb->save.cr0 = cr0;
1500 	mark_dirty(svm->vmcb, VMCB_CR);
1501 	update_cr0_intercept(svm);
1502 }
1503 
svm_set_cr4(struct kvm_vcpu * vcpu,unsigned long cr4)1504 static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1505 {
1506 	unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE;
1507 	unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
1508 
1509 	if (cr4 & X86_CR4_VMXE)
1510 		return 1;
1511 
1512 	if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
1513 		svm_flush_tlb(vcpu);
1514 
1515 	vcpu->arch.cr4 = cr4;
1516 	if (!npt_enabled)
1517 		cr4 |= X86_CR4_PAE;
1518 	cr4 |= host_cr4_mce;
1519 	to_svm(vcpu)->vmcb->save.cr4 = cr4;
1520 	mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
1521 	return 0;
1522 }
1523 
svm_set_segment(struct kvm_vcpu * vcpu,struct kvm_segment * var,int seg)1524 static void svm_set_segment(struct kvm_vcpu *vcpu,
1525 			    struct kvm_segment *var, int seg)
1526 {
1527 	struct vcpu_svm *svm = to_svm(vcpu);
1528 	struct vmcb_seg *s = svm_seg(vcpu, seg);
1529 
1530 	s->base = var->base;
1531 	s->limit = var->limit;
1532 	s->selector = var->selector;
1533 	if (var->unusable)
1534 		s->attrib = 0;
1535 	else {
1536 		s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
1537 		s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
1538 		s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
1539 		s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT;
1540 		s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
1541 		s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
1542 		s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
1543 		s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
1544 	}
1545 	if (seg == VCPU_SREG_CS)
1546 		svm->vmcb->save.cpl
1547 			= (svm->vmcb->save.cs.attrib
1548 			   >> SVM_SELECTOR_DPL_SHIFT) & 3;
1549 
1550 	mark_dirty(svm->vmcb, VMCB_SEG);
1551 }
1552 
update_db_intercept(struct kvm_vcpu * vcpu)1553 static void update_db_intercept(struct kvm_vcpu *vcpu)
1554 {
1555 	struct vcpu_svm *svm = to_svm(vcpu);
1556 
1557 	clr_exception_intercept(svm, DB_VECTOR);
1558 	clr_exception_intercept(svm, BP_VECTOR);
1559 
1560 	if (svm->nmi_singlestep)
1561 		set_exception_intercept(svm, DB_VECTOR);
1562 
1563 	if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
1564 		if (vcpu->guest_debug &
1565 		    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
1566 			set_exception_intercept(svm, DB_VECTOR);
1567 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
1568 			set_exception_intercept(svm, BP_VECTOR);
1569 	} else
1570 		vcpu->guest_debug = 0;
1571 }
1572 
svm_guest_debug(struct kvm_vcpu * vcpu,struct kvm_guest_debug * dbg)1573 static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
1574 {
1575 	struct vcpu_svm *svm = to_svm(vcpu);
1576 
1577 	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1578 		svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
1579 	else
1580 		svm->vmcb->save.dr7 = vcpu->arch.dr7;
1581 
1582 	mark_dirty(svm->vmcb, VMCB_DR);
1583 
1584 	update_db_intercept(vcpu);
1585 }
1586 
new_asid(struct vcpu_svm * svm,struct svm_cpu_data * sd)1587 static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
1588 {
1589 	if (sd->next_asid > sd->max_asid) {
1590 		++sd->asid_generation;
1591 		sd->next_asid = 1;
1592 		svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
1593 	}
1594 
1595 	svm->asid_generation = sd->asid_generation;
1596 	svm->vmcb->control.asid = sd->next_asid++;
1597 
1598 	mark_dirty(svm->vmcb, VMCB_ASID);
1599 }
1600 
svm_set_dr7(struct kvm_vcpu * vcpu,unsigned long value)1601 static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
1602 {
1603 	struct vcpu_svm *svm = to_svm(vcpu);
1604 
1605 	svm->vmcb->save.dr7 = value;
1606 	mark_dirty(svm->vmcb, VMCB_DR);
1607 }
1608 
pf_interception(struct vcpu_svm * svm)1609 static int pf_interception(struct vcpu_svm *svm)
1610 {
1611 	u64 fault_address = svm->vmcb->control.exit_info_2;
1612 	u32 error_code;
1613 	int r = 1;
1614 
1615 	switch (svm->apf_reason) {
1616 	default:
1617 		error_code = svm->vmcb->control.exit_info_1;
1618 
1619 		trace_kvm_page_fault(fault_address, error_code);
1620 		if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu))
1621 			kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
1622 		r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
1623 			svm->vmcb->control.insn_bytes,
1624 			svm->vmcb->control.insn_len);
1625 		break;
1626 	case KVM_PV_REASON_PAGE_NOT_PRESENT:
1627 		svm->apf_reason = 0;
1628 		local_irq_disable();
1629 		kvm_async_pf_task_wait(fault_address);
1630 		local_irq_enable();
1631 		break;
1632 	case KVM_PV_REASON_PAGE_READY:
1633 		svm->apf_reason = 0;
1634 		local_irq_disable();
1635 		kvm_async_pf_task_wake(fault_address);
1636 		local_irq_enable();
1637 		break;
1638 	}
1639 	return r;
1640 }
1641 
db_interception(struct vcpu_svm * svm)1642 static int db_interception(struct vcpu_svm *svm)
1643 {
1644 	struct kvm_run *kvm_run = svm->vcpu.run;
1645 
1646 	if (!(svm->vcpu.guest_debug &
1647 	      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
1648 		!svm->nmi_singlestep) {
1649 		kvm_queue_exception(&svm->vcpu, DB_VECTOR);
1650 		return 1;
1651 	}
1652 
1653 	if (svm->nmi_singlestep) {
1654 		svm->nmi_singlestep = false;
1655 		if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
1656 			svm->vmcb->save.rflags &=
1657 				~(X86_EFLAGS_TF | X86_EFLAGS_RF);
1658 		update_db_intercept(&svm->vcpu);
1659 	}
1660 
1661 	if (svm->vcpu.guest_debug &
1662 	    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
1663 		kvm_run->exit_reason = KVM_EXIT_DEBUG;
1664 		kvm_run->debug.arch.pc =
1665 			svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1666 		kvm_run->debug.arch.exception = DB_VECTOR;
1667 		return 0;
1668 	}
1669 
1670 	return 1;
1671 }
1672 
bp_interception(struct vcpu_svm * svm)1673 static int bp_interception(struct vcpu_svm *svm)
1674 {
1675 	struct kvm_run *kvm_run = svm->vcpu.run;
1676 
1677 	kvm_run->exit_reason = KVM_EXIT_DEBUG;
1678 	kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1679 	kvm_run->debug.arch.exception = BP_VECTOR;
1680 	return 0;
1681 }
1682 
ud_interception(struct vcpu_svm * svm)1683 static int ud_interception(struct vcpu_svm *svm)
1684 {
1685 	int er;
1686 
1687 	er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
1688 	if (er != EMULATE_DONE)
1689 		kvm_queue_exception(&svm->vcpu, UD_VECTOR);
1690 	return 1;
1691 }
1692 
svm_fpu_activate(struct kvm_vcpu * vcpu)1693 static void svm_fpu_activate(struct kvm_vcpu *vcpu)
1694 {
1695 	struct vcpu_svm *svm = to_svm(vcpu);
1696 
1697 	clr_exception_intercept(svm, NM_VECTOR);
1698 
1699 	svm->vcpu.fpu_active = 1;
1700 	update_cr0_intercept(svm);
1701 }
1702 
nm_interception(struct vcpu_svm * svm)1703 static int nm_interception(struct vcpu_svm *svm)
1704 {
1705 	svm_fpu_activate(&svm->vcpu);
1706 	return 1;
1707 }
1708 
is_erratum_383(void)1709 static bool is_erratum_383(void)
1710 {
1711 	int err, i;
1712 	u64 value;
1713 
1714 	if (!erratum_383_found)
1715 		return false;
1716 
1717 	value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err);
1718 	if (err)
1719 		return false;
1720 
1721 	/* Bit 62 may or may not be set for this mce */
1722 	value &= ~(1ULL << 62);
1723 
1724 	if (value != 0xb600000000010015ULL)
1725 		return false;
1726 
1727 	/* Clear MCi_STATUS registers */
1728 	for (i = 0; i < 6; ++i)
1729 		native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0);
1730 
1731 	value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err);
1732 	if (!err) {
1733 		u32 low, high;
1734 
1735 		value &= ~(1ULL << 2);
1736 		low    = lower_32_bits(value);
1737 		high   = upper_32_bits(value);
1738 
1739 		native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high);
1740 	}
1741 
1742 	/* Flush tlb to evict multi-match entries */
1743 	__flush_tlb_all();
1744 
1745 	return true;
1746 }
1747 
svm_handle_mce(struct vcpu_svm * svm)1748 static void svm_handle_mce(struct vcpu_svm *svm)
1749 {
1750 	if (is_erratum_383()) {
1751 		/*
1752 		 * Erratum 383 triggered. Guest state is corrupt so kill the
1753 		 * guest.
1754 		 */
1755 		pr_err("KVM: Guest triggered AMD Erratum 383\n");
1756 
1757 		kvm_make_request(KVM_REQ_TRIPLE_FAULT, &svm->vcpu);
1758 
1759 		return;
1760 	}
1761 
1762 	/*
1763 	 * On an #MC intercept the MCE handler is not called automatically in
1764 	 * the host. So do it by hand here.
1765 	 */
1766 	asm volatile (
1767 		"int $0x12\n");
1768 	/* not sure if we ever come back to this point */
1769 
1770 	return;
1771 }
1772 
mc_interception(struct vcpu_svm * svm)1773 static int mc_interception(struct vcpu_svm *svm)
1774 {
1775 	return 1;
1776 }
1777 
shutdown_interception(struct vcpu_svm * svm)1778 static int shutdown_interception(struct vcpu_svm *svm)
1779 {
1780 	struct kvm_run *kvm_run = svm->vcpu.run;
1781 
1782 	/*
1783 	 * VMCB is undefined after a SHUTDOWN intercept
1784 	 * so reinitialize it.
1785 	 */
1786 	clear_page(svm->vmcb);
1787 	init_vmcb(svm);
1788 
1789 	kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
1790 	return 0;
1791 }
1792 
io_interception(struct vcpu_svm * svm)1793 static int io_interception(struct vcpu_svm *svm)
1794 {
1795 	struct kvm_vcpu *vcpu = &svm->vcpu;
1796 	u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
1797 	int size, in, string;
1798 	unsigned port;
1799 
1800 	++svm->vcpu.stat.io_exits;
1801 	string = (io_info & SVM_IOIO_STR_MASK) != 0;
1802 	in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
1803 	if (string || in)
1804 		return emulate_instruction(vcpu, 0) == EMULATE_DONE;
1805 
1806 	port = io_info >> 16;
1807 	size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
1808 	svm->next_rip = svm->vmcb->control.exit_info_2;
1809 	skip_emulated_instruction(&svm->vcpu);
1810 
1811 	return kvm_fast_pio_out(vcpu, size, port);
1812 }
1813 
nmi_interception(struct vcpu_svm * svm)1814 static int nmi_interception(struct vcpu_svm *svm)
1815 {
1816 	return 1;
1817 }
1818 
intr_interception(struct vcpu_svm * svm)1819 static int intr_interception(struct vcpu_svm *svm)
1820 {
1821 	++svm->vcpu.stat.irq_exits;
1822 	return 1;
1823 }
1824 
nop_on_interception(struct vcpu_svm * svm)1825 static int nop_on_interception(struct vcpu_svm *svm)
1826 {
1827 	return 1;
1828 }
1829 
halt_interception(struct vcpu_svm * svm)1830 static int halt_interception(struct vcpu_svm *svm)
1831 {
1832 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
1833 	skip_emulated_instruction(&svm->vcpu);
1834 	return kvm_emulate_halt(&svm->vcpu);
1835 }
1836 
vmmcall_interception(struct vcpu_svm * svm)1837 static int vmmcall_interception(struct vcpu_svm *svm)
1838 {
1839 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1840 	skip_emulated_instruction(&svm->vcpu);
1841 	kvm_emulate_hypercall(&svm->vcpu);
1842 	return 1;
1843 }
1844 
nested_svm_get_tdp_cr3(struct kvm_vcpu * vcpu)1845 static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
1846 {
1847 	struct vcpu_svm *svm = to_svm(vcpu);
1848 
1849 	return svm->nested.nested_cr3;
1850 }
1851 
nested_svm_get_tdp_pdptr(struct kvm_vcpu * vcpu,int index)1852 static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
1853 {
1854 	struct vcpu_svm *svm = to_svm(vcpu);
1855 	u64 cr3 = svm->nested.nested_cr3;
1856 	u64 pdpte;
1857 	int ret;
1858 
1859 	ret = kvm_read_guest_page(vcpu->kvm, gpa_to_gfn(cr3), &pdpte,
1860 				  offset_in_page(cr3) + index * 8, 8);
1861 	if (ret)
1862 		return 0;
1863 	return pdpte;
1864 }
1865 
nested_svm_set_tdp_cr3(struct kvm_vcpu * vcpu,unsigned long root)1866 static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
1867 				   unsigned long root)
1868 {
1869 	struct vcpu_svm *svm = to_svm(vcpu);
1870 
1871 	svm->vmcb->control.nested_cr3 = root;
1872 	mark_dirty(svm->vmcb, VMCB_NPT);
1873 	svm_flush_tlb(vcpu);
1874 }
1875 
nested_svm_inject_npf_exit(struct kvm_vcpu * vcpu,struct x86_exception * fault)1876 static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
1877 				       struct x86_exception *fault)
1878 {
1879 	struct vcpu_svm *svm = to_svm(vcpu);
1880 
1881 	svm->vmcb->control.exit_code = SVM_EXIT_NPF;
1882 	svm->vmcb->control.exit_code_hi = 0;
1883 	svm->vmcb->control.exit_info_1 = fault->error_code;
1884 	svm->vmcb->control.exit_info_2 = fault->address;
1885 
1886 	nested_svm_vmexit(svm);
1887 }
1888 
nested_svm_init_mmu_context(struct kvm_vcpu * vcpu)1889 static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
1890 {
1891 	int r;
1892 
1893 	r = kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu);
1894 
1895 	vcpu->arch.mmu.set_cr3           = nested_svm_set_tdp_cr3;
1896 	vcpu->arch.mmu.get_cr3           = nested_svm_get_tdp_cr3;
1897 	vcpu->arch.mmu.get_pdptr         = nested_svm_get_tdp_pdptr;
1898 	vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
1899 	vcpu->arch.mmu.shadow_root_level = get_npt_level();
1900 	vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
1901 
1902 	return r;
1903 }
1904 
nested_svm_uninit_mmu_context(struct kvm_vcpu * vcpu)1905 static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
1906 {
1907 	vcpu->arch.walk_mmu = &vcpu->arch.mmu;
1908 }
1909 
nested_svm_check_permissions(struct vcpu_svm * svm)1910 static int nested_svm_check_permissions(struct vcpu_svm *svm)
1911 {
1912 	if (!(svm->vcpu.arch.efer & EFER_SVME)
1913 	    || !is_paging(&svm->vcpu)) {
1914 		kvm_queue_exception(&svm->vcpu, UD_VECTOR);
1915 		return 1;
1916 	}
1917 
1918 	if (svm->vmcb->save.cpl) {
1919 		kvm_inject_gp(&svm->vcpu, 0);
1920 		return 1;
1921 	}
1922 
1923        return 0;
1924 }
1925 
nested_svm_check_exception(struct vcpu_svm * svm,unsigned nr,bool has_error_code,u32 error_code)1926 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
1927 				      bool has_error_code, u32 error_code)
1928 {
1929 	int vmexit;
1930 
1931 	if (!is_guest_mode(&svm->vcpu))
1932 		return 0;
1933 
1934 	svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
1935 	svm->vmcb->control.exit_code_hi = 0;
1936 	svm->vmcb->control.exit_info_1 = error_code;
1937 	svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
1938 
1939 	vmexit = nested_svm_intercept(svm);
1940 	if (vmexit == NESTED_EXIT_DONE)
1941 		svm->nested.exit_required = true;
1942 
1943 	return vmexit;
1944 }
1945 
1946 /* This function returns true if it is save to enable the irq window */
nested_svm_intr(struct vcpu_svm * svm)1947 static inline bool nested_svm_intr(struct vcpu_svm *svm)
1948 {
1949 	if (!is_guest_mode(&svm->vcpu))
1950 		return true;
1951 
1952 	if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
1953 		return true;
1954 
1955 	if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
1956 		return false;
1957 
1958 	/*
1959 	 * if vmexit was already requested (by intercepted exception
1960 	 * for instance) do not overwrite it with "external interrupt"
1961 	 * vmexit.
1962 	 */
1963 	if (svm->nested.exit_required)
1964 		return false;
1965 
1966 	svm->vmcb->control.exit_code   = SVM_EXIT_INTR;
1967 	svm->vmcb->control.exit_info_1 = 0;
1968 	svm->vmcb->control.exit_info_2 = 0;
1969 
1970 	if (svm->nested.intercept & 1ULL) {
1971 		/*
1972 		 * The #vmexit can't be emulated here directly because this
1973 		 * code path runs with irqs and preemtion disabled. A
1974 		 * #vmexit emulation might sleep. Only signal request for
1975 		 * the #vmexit here.
1976 		 */
1977 		svm->nested.exit_required = true;
1978 		trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
1979 		return false;
1980 	}
1981 
1982 	return true;
1983 }
1984 
1985 /* This function returns true if it is save to enable the nmi window */
nested_svm_nmi(struct vcpu_svm * svm)1986 static inline bool nested_svm_nmi(struct vcpu_svm *svm)
1987 {
1988 	if (!is_guest_mode(&svm->vcpu))
1989 		return true;
1990 
1991 	if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI)))
1992 		return true;
1993 
1994 	svm->vmcb->control.exit_code = SVM_EXIT_NMI;
1995 	svm->nested.exit_required = true;
1996 
1997 	return false;
1998 }
1999 
nested_svm_map(struct vcpu_svm * svm,u64 gpa,struct page ** _page)2000 static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
2001 {
2002 	struct page *page;
2003 
2004 	might_sleep();
2005 
2006 	page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
2007 	if (is_error_page(page))
2008 		goto error;
2009 
2010 	*_page = page;
2011 
2012 	return kmap(page);
2013 
2014 error:
2015 	kvm_release_page_clean(page);
2016 	kvm_inject_gp(&svm->vcpu, 0);
2017 
2018 	return NULL;
2019 }
2020 
nested_svm_unmap(struct page * page)2021 static void nested_svm_unmap(struct page *page)
2022 {
2023 	kunmap(page);
2024 	kvm_release_page_dirty(page);
2025 }
2026 
nested_svm_intercept_ioio(struct vcpu_svm * svm)2027 static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
2028 {
2029 	unsigned port;
2030 	u8 val, bit;
2031 	u64 gpa;
2032 
2033 	if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
2034 		return NESTED_EXIT_HOST;
2035 
2036 	port = svm->vmcb->control.exit_info_1 >> 16;
2037 	gpa  = svm->nested.vmcb_iopm + (port / 8);
2038 	bit  = port % 8;
2039 	val  = 0;
2040 
2041 	if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, 1))
2042 		val &= (1 << bit);
2043 
2044 	return val ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
2045 }
2046 
nested_svm_exit_handled_msr(struct vcpu_svm * svm)2047 static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
2048 {
2049 	u32 offset, msr, value;
2050 	int write, mask;
2051 
2052 	if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
2053 		return NESTED_EXIT_HOST;
2054 
2055 	msr    = svm->vcpu.arch.regs[VCPU_REGS_RCX];
2056 	offset = svm_msrpm_offset(msr);
2057 	write  = svm->vmcb->control.exit_info_1 & 1;
2058 	mask   = 1 << ((2 * (msr & 0xf)) + write);
2059 
2060 	if (offset == MSR_INVALID)
2061 		return NESTED_EXIT_DONE;
2062 
2063 	/* Offset is in 32 bit units but need in 8 bit units */
2064 	offset *= 4;
2065 
2066 	if (kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + offset, &value, 4))
2067 		return NESTED_EXIT_DONE;
2068 
2069 	return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
2070 }
2071 
nested_svm_exit_special(struct vcpu_svm * svm)2072 static int nested_svm_exit_special(struct vcpu_svm *svm)
2073 {
2074 	u32 exit_code = svm->vmcb->control.exit_code;
2075 
2076 	switch (exit_code) {
2077 	case SVM_EXIT_INTR:
2078 	case SVM_EXIT_NMI:
2079 	case SVM_EXIT_EXCP_BASE + MC_VECTOR:
2080 		return NESTED_EXIT_HOST;
2081 	case SVM_EXIT_NPF:
2082 		/* For now we are always handling NPFs when using them */
2083 		if (npt_enabled)
2084 			return NESTED_EXIT_HOST;
2085 		break;
2086 	case SVM_EXIT_EXCP_BASE + PF_VECTOR:
2087 		/* When we're shadowing, trap PFs, but not async PF */
2088 		if (!npt_enabled && svm->apf_reason == 0)
2089 			return NESTED_EXIT_HOST;
2090 		break;
2091 	case SVM_EXIT_EXCP_BASE + NM_VECTOR:
2092 		nm_interception(svm);
2093 		break;
2094 	default:
2095 		break;
2096 	}
2097 
2098 	return NESTED_EXIT_CONTINUE;
2099 }
2100 
2101 /*
2102  * If this function returns true, this #vmexit was already handled
2103  */
nested_svm_intercept(struct vcpu_svm * svm)2104 static int nested_svm_intercept(struct vcpu_svm *svm)
2105 {
2106 	u32 exit_code = svm->vmcb->control.exit_code;
2107 	int vmexit = NESTED_EXIT_HOST;
2108 
2109 	switch (exit_code) {
2110 	case SVM_EXIT_MSR:
2111 		vmexit = nested_svm_exit_handled_msr(svm);
2112 		break;
2113 	case SVM_EXIT_IOIO:
2114 		vmexit = nested_svm_intercept_ioio(svm);
2115 		break;
2116 	case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
2117 		u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0);
2118 		if (svm->nested.intercept_cr & bit)
2119 			vmexit = NESTED_EXIT_DONE;
2120 		break;
2121 	}
2122 	case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
2123 		u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0);
2124 		if (svm->nested.intercept_dr & bit)
2125 			vmexit = NESTED_EXIT_DONE;
2126 		break;
2127 	}
2128 	case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
2129 		u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
2130 		if (svm->nested.intercept_exceptions & excp_bits)
2131 			vmexit = NESTED_EXIT_DONE;
2132 		/* async page fault always cause vmexit */
2133 		else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
2134 			 svm->apf_reason != 0)
2135 			vmexit = NESTED_EXIT_DONE;
2136 		break;
2137 	}
2138 	case SVM_EXIT_ERR: {
2139 		vmexit = NESTED_EXIT_DONE;
2140 		break;
2141 	}
2142 	default: {
2143 		u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
2144 		if (svm->nested.intercept & exit_bits)
2145 			vmexit = NESTED_EXIT_DONE;
2146 	}
2147 	}
2148 
2149 	return vmexit;
2150 }
2151 
nested_svm_exit_handled(struct vcpu_svm * svm)2152 static int nested_svm_exit_handled(struct vcpu_svm *svm)
2153 {
2154 	int vmexit;
2155 
2156 	vmexit = nested_svm_intercept(svm);
2157 
2158 	if (vmexit == NESTED_EXIT_DONE)
2159 		nested_svm_vmexit(svm);
2160 
2161 	return vmexit;
2162 }
2163 
copy_vmcb_control_area(struct vmcb * dst_vmcb,struct vmcb * from_vmcb)2164 static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb)
2165 {
2166 	struct vmcb_control_area *dst  = &dst_vmcb->control;
2167 	struct vmcb_control_area *from = &from_vmcb->control;
2168 
2169 	dst->intercept_cr         = from->intercept_cr;
2170 	dst->intercept_dr         = from->intercept_dr;
2171 	dst->intercept_exceptions = from->intercept_exceptions;
2172 	dst->intercept            = from->intercept;
2173 	dst->iopm_base_pa         = from->iopm_base_pa;
2174 	dst->msrpm_base_pa        = from->msrpm_base_pa;
2175 	dst->tsc_offset           = from->tsc_offset;
2176 	dst->asid                 = from->asid;
2177 	dst->tlb_ctl              = from->tlb_ctl;
2178 	dst->int_ctl              = from->int_ctl;
2179 	dst->int_vector           = from->int_vector;
2180 	dst->int_state            = from->int_state;
2181 	dst->exit_code            = from->exit_code;
2182 	dst->exit_code_hi         = from->exit_code_hi;
2183 	dst->exit_info_1          = from->exit_info_1;
2184 	dst->exit_info_2          = from->exit_info_2;
2185 	dst->exit_int_info        = from->exit_int_info;
2186 	dst->exit_int_info_err    = from->exit_int_info_err;
2187 	dst->nested_ctl           = from->nested_ctl;
2188 	dst->event_inj            = from->event_inj;
2189 	dst->event_inj_err        = from->event_inj_err;
2190 	dst->nested_cr3           = from->nested_cr3;
2191 	dst->lbr_ctl              = from->lbr_ctl;
2192 }
2193 
nested_svm_vmexit(struct vcpu_svm * svm)2194 static int nested_svm_vmexit(struct vcpu_svm *svm)
2195 {
2196 	struct vmcb *nested_vmcb;
2197 	struct vmcb *hsave = svm->nested.hsave;
2198 	struct vmcb *vmcb = svm->vmcb;
2199 	struct page *page;
2200 
2201 	trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
2202 				       vmcb->control.exit_info_1,
2203 				       vmcb->control.exit_info_2,
2204 				       vmcb->control.exit_int_info,
2205 				       vmcb->control.exit_int_info_err,
2206 				       KVM_ISA_SVM);
2207 
2208 	nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page);
2209 	if (!nested_vmcb)
2210 		return 1;
2211 
2212 	/* Exit Guest-Mode */
2213 	leave_guest_mode(&svm->vcpu);
2214 	svm->nested.vmcb = 0;
2215 
2216 	/* Give the current vmcb to the guest */
2217 	disable_gif(svm);
2218 
2219 	nested_vmcb->save.es     = vmcb->save.es;
2220 	nested_vmcb->save.cs     = vmcb->save.cs;
2221 	nested_vmcb->save.ss     = vmcb->save.ss;
2222 	nested_vmcb->save.ds     = vmcb->save.ds;
2223 	nested_vmcb->save.gdtr   = vmcb->save.gdtr;
2224 	nested_vmcb->save.idtr   = vmcb->save.idtr;
2225 	nested_vmcb->save.efer   = svm->vcpu.arch.efer;
2226 	nested_vmcb->save.cr0    = kvm_read_cr0(&svm->vcpu);
2227 	nested_vmcb->save.cr3    = kvm_read_cr3(&svm->vcpu);
2228 	nested_vmcb->save.cr2    = vmcb->save.cr2;
2229 	nested_vmcb->save.cr4    = svm->vcpu.arch.cr4;
2230 	nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu);
2231 	nested_vmcb->save.rip    = vmcb->save.rip;
2232 	nested_vmcb->save.rsp    = vmcb->save.rsp;
2233 	nested_vmcb->save.rax    = vmcb->save.rax;
2234 	nested_vmcb->save.dr7    = vmcb->save.dr7;
2235 	nested_vmcb->save.dr6    = vmcb->save.dr6;
2236 	nested_vmcb->save.cpl    = vmcb->save.cpl;
2237 
2238 	nested_vmcb->control.int_ctl           = vmcb->control.int_ctl;
2239 	nested_vmcb->control.int_vector        = vmcb->control.int_vector;
2240 	nested_vmcb->control.int_state         = vmcb->control.int_state;
2241 	nested_vmcb->control.exit_code         = vmcb->control.exit_code;
2242 	nested_vmcb->control.exit_code_hi      = vmcb->control.exit_code_hi;
2243 	nested_vmcb->control.exit_info_1       = vmcb->control.exit_info_1;
2244 	nested_vmcb->control.exit_info_2       = vmcb->control.exit_info_2;
2245 	nested_vmcb->control.exit_int_info     = vmcb->control.exit_int_info;
2246 	nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
2247 	nested_vmcb->control.next_rip          = vmcb->control.next_rip;
2248 
2249 	/*
2250 	 * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
2251 	 * to make sure that we do not lose injected events. So check event_inj
2252 	 * here and copy it to exit_int_info if it is valid.
2253 	 * Exit_int_info and event_inj can't be both valid because the case
2254 	 * below only happens on a VMRUN instruction intercept which has
2255 	 * no valid exit_int_info set.
2256 	 */
2257 	if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
2258 		struct vmcb_control_area *nc = &nested_vmcb->control;
2259 
2260 		nc->exit_int_info     = vmcb->control.event_inj;
2261 		nc->exit_int_info_err = vmcb->control.event_inj_err;
2262 	}
2263 
2264 	nested_vmcb->control.tlb_ctl           = 0;
2265 	nested_vmcb->control.event_inj         = 0;
2266 	nested_vmcb->control.event_inj_err     = 0;
2267 
2268 	/* We always set V_INTR_MASKING and remember the old value in hflags */
2269 	if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
2270 		nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
2271 
2272 	/* Restore the original control entries */
2273 	copy_vmcb_control_area(vmcb, hsave);
2274 
2275 	kvm_clear_exception_queue(&svm->vcpu);
2276 	kvm_clear_interrupt_queue(&svm->vcpu);
2277 
2278 	svm->nested.nested_cr3 = 0;
2279 
2280 	/* Restore selected save entries */
2281 	svm->vmcb->save.es = hsave->save.es;
2282 	svm->vmcb->save.cs = hsave->save.cs;
2283 	svm->vmcb->save.ss = hsave->save.ss;
2284 	svm->vmcb->save.ds = hsave->save.ds;
2285 	svm->vmcb->save.gdtr = hsave->save.gdtr;
2286 	svm->vmcb->save.idtr = hsave->save.idtr;
2287 	kvm_set_rflags(&svm->vcpu, hsave->save.rflags);
2288 	svm_set_efer(&svm->vcpu, hsave->save.efer);
2289 	svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
2290 	svm_set_cr4(&svm->vcpu, hsave->save.cr4);
2291 	if (npt_enabled) {
2292 		svm->vmcb->save.cr3 = hsave->save.cr3;
2293 		svm->vcpu.arch.cr3 = hsave->save.cr3;
2294 	} else {
2295 		(void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
2296 	}
2297 	kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
2298 	kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
2299 	kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
2300 	svm->vmcb->save.dr7 = 0;
2301 	svm->vmcb->save.cpl = 0;
2302 	svm->vmcb->control.exit_int_info = 0;
2303 
2304 	mark_all_dirty(svm->vmcb);
2305 
2306 	nested_svm_unmap(page);
2307 
2308 	nested_svm_uninit_mmu_context(&svm->vcpu);
2309 	kvm_mmu_reset_context(&svm->vcpu);
2310 	kvm_mmu_load(&svm->vcpu);
2311 
2312 	return 0;
2313 }
2314 
nested_svm_vmrun_msrpm(struct vcpu_svm * svm)2315 static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
2316 {
2317 	/*
2318 	 * This function merges the msr permission bitmaps of kvm and the
2319 	 * nested vmcb. It is omptimized in that it only merges the parts where
2320 	 * the kvm msr permission bitmap may contain zero bits
2321 	 */
2322 	int i;
2323 
2324 	if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
2325 		return true;
2326 
2327 	for (i = 0; i < MSRPM_OFFSETS; i++) {
2328 		u32 value, p;
2329 		u64 offset;
2330 
2331 		if (msrpm_offsets[i] == 0xffffffff)
2332 			break;
2333 
2334 		p      = msrpm_offsets[i];
2335 		offset = svm->nested.vmcb_msrpm + (p * 4);
2336 
2337 		if (kvm_read_guest(svm->vcpu.kvm, offset, &value, 4))
2338 			return false;
2339 
2340 		svm->nested.msrpm[p] = svm->msrpm[p] | value;
2341 	}
2342 
2343 	svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm);
2344 
2345 	return true;
2346 }
2347 
nested_vmcb_checks(struct vmcb * vmcb)2348 static bool nested_vmcb_checks(struct vmcb *vmcb)
2349 {
2350 	if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
2351 		return false;
2352 
2353 	if (vmcb->control.asid == 0)
2354 		return false;
2355 
2356 	if (vmcb->control.nested_ctl && !npt_enabled)
2357 		return false;
2358 
2359 	return true;
2360 }
2361 
nested_svm_vmrun(struct vcpu_svm * svm)2362 static bool nested_svm_vmrun(struct vcpu_svm *svm)
2363 {
2364 	struct vmcb *nested_vmcb;
2365 	struct vmcb *hsave = svm->nested.hsave;
2366 	struct vmcb *vmcb = svm->vmcb;
2367 	struct page *page;
2368 	u64 vmcb_gpa;
2369 
2370 	vmcb_gpa = svm->vmcb->save.rax;
2371 
2372 	nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
2373 	if (!nested_vmcb)
2374 		return false;
2375 
2376 	if (!nested_vmcb_checks(nested_vmcb)) {
2377 		nested_vmcb->control.exit_code    = SVM_EXIT_ERR;
2378 		nested_vmcb->control.exit_code_hi = 0;
2379 		nested_vmcb->control.exit_info_1  = 0;
2380 		nested_vmcb->control.exit_info_2  = 0;
2381 
2382 		nested_svm_unmap(page);
2383 
2384 		return false;
2385 	}
2386 
2387 	trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
2388 			       nested_vmcb->save.rip,
2389 			       nested_vmcb->control.int_ctl,
2390 			       nested_vmcb->control.event_inj,
2391 			       nested_vmcb->control.nested_ctl);
2392 
2393 	trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff,
2394 				    nested_vmcb->control.intercept_cr >> 16,
2395 				    nested_vmcb->control.intercept_exceptions,
2396 				    nested_vmcb->control.intercept);
2397 
2398 	/* Clear internal status */
2399 	kvm_clear_exception_queue(&svm->vcpu);
2400 	kvm_clear_interrupt_queue(&svm->vcpu);
2401 
2402 	/*
2403 	 * Save the old vmcb, so we don't need to pick what we save, but can
2404 	 * restore everything when a VMEXIT occurs
2405 	 */
2406 	hsave->save.es     = vmcb->save.es;
2407 	hsave->save.cs     = vmcb->save.cs;
2408 	hsave->save.ss     = vmcb->save.ss;
2409 	hsave->save.ds     = vmcb->save.ds;
2410 	hsave->save.gdtr   = vmcb->save.gdtr;
2411 	hsave->save.idtr   = vmcb->save.idtr;
2412 	hsave->save.efer   = svm->vcpu.arch.efer;
2413 	hsave->save.cr0    = kvm_read_cr0(&svm->vcpu);
2414 	hsave->save.cr4    = svm->vcpu.arch.cr4;
2415 	hsave->save.rflags = kvm_get_rflags(&svm->vcpu);
2416 	hsave->save.rip    = kvm_rip_read(&svm->vcpu);
2417 	hsave->save.rsp    = vmcb->save.rsp;
2418 	hsave->save.rax    = vmcb->save.rax;
2419 	if (npt_enabled)
2420 		hsave->save.cr3    = vmcb->save.cr3;
2421 	else
2422 		hsave->save.cr3    = kvm_read_cr3(&svm->vcpu);
2423 
2424 	copy_vmcb_control_area(hsave, vmcb);
2425 
2426 	if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
2427 		svm->vcpu.arch.hflags |= HF_HIF_MASK;
2428 	else
2429 		svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
2430 
2431 	if (nested_vmcb->control.nested_ctl) {
2432 		kvm_mmu_unload(&svm->vcpu);
2433 		svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
2434 		nested_svm_init_mmu_context(&svm->vcpu);
2435 	}
2436 
2437 	/* Load the nested guest state */
2438 	svm->vmcb->save.es = nested_vmcb->save.es;
2439 	svm->vmcb->save.cs = nested_vmcb->save.cs;
2440 	svm->vmcb->save.ss = nested_vmcb->save.ss;
2441 	svm->vmcb->save.ds = nested_vmcb->save.ds;
2442 	svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
2443 	svm->vmcb->save.idtr = nested_vmcb->save.idtr;
2444 	kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags);
2445 	svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
2446 	svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
2447 	svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
2448 	if (npt_enabled) {
2449 		svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
2450 		svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
2451 	} else
2452 		(void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
2453 
2454 	/* Guest paging mode is active - reset mmu */
2455 	kvm_mmu_reset_context(&svm->vcpu);
2456 
2457 	svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
2458 	kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
2459 	kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
2460 	kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
2461 
2462 	/* In case we don't even reach vcpu_run, the fields are not updated */
2463 	svm->vmcb->save.rax = nested_vmcb->save.rax;
2464 	svm->vmcb->save.rsp = nested_vmcb->save.rsp;
2465 	svm->vmcb->save.rip = nested_vmcb->save.rip;
2466 	svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
2467 	svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
2468 	svm->vmcb->save.cpl = nested_vmcb->save.cpl;
2469 
2470 	svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
2471 	svm->nested.vmcb_iopm  = nested_vmcb->control.iopm_base_pa  & ~0x0fffULL;
2472 
2473 	/* cache intercepts */
2474 	svm->nested.intercept_cr         = nested_vmcb->control.intercept_cr;
2475 	svm->nested.intercept_dr         = nested_vmcb->control.intercept_dr;
2476 	svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
2477 	svm->nested.intercept            = nested_vmcb->control.intercept;
2478 
2479 	svm_flush_tlb(&svm->vcpu);
2480 	svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
2481 	if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
2482 		svm->vcpu.arch.hflags |= HF_VINTR_MASK;
2483 	else
2484 		svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
2485 
2486 	if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
2487 		/* We only want the cr8 intercept bits of the guest */
2488 		clr_cr_intercept(svm, INTERCEPT_CR8_READ);
2489 		clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
2490 	}
2491 
2492 	/* We don't want to see VMMCALLs from a nested guest */
2493 	clr_intercept(svm, INTERCEPT_VMMCALL);
2494 
2495 	svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl;
2496 	svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
2497 	svm->vmcb->control.int_state = nested_vmcb->control.int_state;
2498 	svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
2499 	svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
2500 	svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
2501 
2502 	nested_svm_unmap(page);
2503 
2504 	/* Enter Guest-Mode */
2505 	enter_guest_mode(&svm->vcpu);
2506 
2507 	/*
2508 	 * Merge guest and host intercepts - must be called  with vcpu in
2509 	 * guest-mode to take affect here
2510 	 */
2511 	recalc_intercepts(svm);
2512 
2513 	svm->nested.vmcb = vmcb_gpa;
2514 
2515 	enable_gif(svm);
2516 
2517 	mark_all_dirty(svm->vmcb);
2518 
2519 	return true;
2520 }
2521 
nested_svm_vmloadsave(struct vmcb * from_vmcb,struct vmcb * to_vmcb)2522 static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
2523 {
2524 	to_vmcb->save.fs = from_vmcb->save.fs;
2525 	to_vmcb->save.gs = from_vmcb->save.gs;
2526 	to_vmcb->save.tr = from_vmcb->save.tr;
2527 	to_vmcb->save.ldtr = from_vmcb->save.ldtr;
2528 	to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
2529 	to_vmcb->save.star = from_vmcb->save.star;
2530 	to_vmcb->save.lstar = from_vmcb->save.lstar;
2531 	to_vmcb->save.cstar = from_vmcb->save.cstar;
2532 	to_vmcb->save.sfmask = from_vmcb->save.sfmask;
2533 	to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
2534 	to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
2535 	to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
2536 }
2537 
vmload_interception(struct vcpu_svm * svm)2538 static int vmload_interception(struct vcpu_svm *svm)
2539 {
2540 	struct vmcb *nested_vmcb;
2541 	struct page *page;
2542 
2543 	if (nested_svm_check_permissions(svm))
2544 		return 1;
2545 
2546 	nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
2547 	if (!nested_vmcb)
2548 		return 1;
2549 
2550 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2551 	skip_emulated_instruction(&svm->vcpu);
2552 
2553 	nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
2554 	nested_svm_unmap(page);
2555 
2556 	return 1;
2557 }
2558 
vmsave_interception(struct vcpu_svm * svm)2559 static int vmsave_interception(struct vcpu_svm *svm)
2560 {
2561 	struct vmcb *nested_vmcb;
2562 	struct page *page;
2563 
2564 	if (nested_svm_check_permissions(svm))
2565 		return 1;
2566 
2567 	nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
2568 	if (!nested_vmcb)
2569 		return 1;
2570 
2571 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2572 	skip_emulated_instruction(&svm->vcpu);
2573 
2574 	nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
2575 	nested_svm_unmap(page);
2576 
2577 	return 1;
2578 }
2579 
vmrun_interception(struct vcpu_svm * svm)2580 static int vmrun_interception(struct vcpu_svm *svm)
2581 {
2582 	if (nested_svm_check_permissions(svm))
2583 		return 1;
2584 
2585 	/* Save rip after vmrun instruction */
2586 	kvm_rip_write(&svm->vcpu, kvm_rip_read(&svm->vcpu) + 3);
2587 
2588 	if (!nested_svm_vmrun(svm))
2589 		return 1;
2590 
2591 	if (!nested_svm_vmrun_msrpm(svm))
2592 		goto failed;
2593 
2594 	return 1;
2595 
2596 failed:
2597 
2598 	svm->vmcb->control.exit_code    = SVM_EXIT_ERR;
2599 	svm->vmcb->control.exit_code_hi = 0;
2600 	svm->vmcb->control.exit_info_1  = 0;
2601 	svm->vmcb->control.exit_info_2  = 0;
2602 
2603 	nested_svm_vmexit(svm);
2604 
2605 	return 1;
2606 }
2607 
stgi_interception(struct vcpu_svm * svm)2608 static int stgi_interception(struct vcpu_svm *svm)
2609 {
2610 	if (nested_svm_check_permissions(svm))
2611 		return 1;
2612 
2613 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2614 	skip_emulated_instruction(&svm->vcpu);
2615 	kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
2616 
2617 	enable_gif(svm);
2618 
2619 	return 1;
2620 }
2621 
clgi_interception(struct vcpu_svm * svm)2622 static int clgi_interception(struct vcpu_svm *svm)
2623 {
2624 	if (nested_svm_check_permissions(svm))
2625 		return 1;
2626 
2627 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2628 	skip_emulated_instruction(&svm->vcpu);
2629 
2630 	disable_gif(svm);
2631 
2632 	/* After a CLGI no interrupts should come */
2633 	svm_clear_vintr(svm);
2634 	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
2635 
2636 	mark_dirty(svm->vmcb, VMCB_INTR);
2637 
2638 	return 1;
2639 }
2640 
invlpga_interception(struct vcpu_svm * svm)2641 static int invlpga_interception(struct vcpu_svm *svm)
2642 {
2643 	struct kvm_vcpu *vcpu = &svm->vcpu;
2644 
2645 	trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
2646 			  vcpu->arch.regs[VCPU_REGS_RAX]);
2647 
2648 	/* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
2649 	kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);
2650 
2651 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2652 	skip_emulated_instruction(&svm->vcpu);
2653 	return 1;
2654 }
2655 
skinit_interception(struct vcpu_svm * svm)2656 static int skinit_interception(struct vcpu_svm *svm)
2657 {
2658 	trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]);
2659 
2660 	kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2661 	return 1;
2662 }
2663 
xsetbv_interception(struct vcpu_svm * svm)2664 static int xsetbv_interception(struct vcpu_svm *svm)
2665 {
2666 	u64 new_bv = kvm_read_edx_eax(&svm->vcpu);
2667 	u32 index = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
2668 
2669 	if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
2670 		svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2671 		skip_emulated_instruction(&svm->vcpu);
2672 	}
2673 
2674 	return 1;
2675 }
2676 
invalid_op_interception(struct vcpu_svm * svm)2677 static int invalid_op_interception(struct vcpu_svm *svm)
2678 {
2679 	kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2680 	return 1;
2681 }
2682 
task_switch_interception(struct vcpu_svm * svm)2683 static int task_switch_interception(struct vcpu_svm *svm)
2684 {
2685 	u16 tss_selector;
2686 	int reason;
2687 	int int_type = svm->vmcb->control.exit_int_info &
2688 		SVM_EXITINTINFO_TYPE_MASK;
2689 	int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK;
2690 	uint32_t type =
2691 		svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK;
2692 	uint32_t idt_v =
2693 		svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID;
2694 	bool has_error_code = false;
2695 	u32 error_code = 0;
2696 
2697 	tss_selector = (u16)svm->vmcb->control.exit_info_1;
2698 
2699 	if (svm->vmcb->control.exit_info_2 &
2700 	    (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
2701 		reason = TASK_SWITCH_IRET;
2702 	else if (svm->vmcb->control.exit_info_2 &
2703 		 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
2704 		reason = TASK_SWITCH_JMP;
2705 	else if (idt_v)
2706 		reason = TASK_SWITCH_GATE;
2707 	else
2708 		reason = TASK_SWITCH_CALL;
2709 
2710 	if (reason == TASK_SWITCH_GATE) {
2711 		switch (type) {
2712 		case SVM_EXITINTINFO_TYPE_NMI:
2713 			svm->vcpu.arch.nmi_injected = false;
2714 			break;
2715 		case SVM_EXITINTINFO_TYPE_EXEPT:
2716 			if (svm->vmcb->control.exit_info_2 &
2717 			    (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) {
2718 				has_error_code = true;
2719 				error_code =
2720 					(u32)svm->vmcb->control.exit_info_2;
2721 			}
2722 			kvm_clear_exception_queue(&svm->vcpu);
2723 			break;
2724 		case SVM_EXITINTINFO_TYPE_INTR:
2725 			kvm_clear_interrupt_queue(&svm->vcpu);
2726 			break;
2727 		default:
2728 			break;
2729 		}
2730 	}
2731 
2732 	if (reason != TASK_SWITCH_GATE ||
2733 	    int_type == SVM_EXITINTINFO_TYPE_SOFT ||
2734 	    (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
2735 	     (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
2736 		skip_emulated_instruction(&svm->vcpu);
2737 
2738 	if (kvm_task_switch(&svm->vcpu, tss_selector, reason,
2739 				has_error_code, error_code) == EMULATE_FAIL) {
2740 		svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
2741 		svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
2742 		svm->vcpu.run->internal.ndata = 0;
2743 		return 0;
2744 	}
2745 	return 1;
2746 }
2747 
cpuid_interception(struct vcpu_svm * svm)2748 static int cpuid_interception(struct vcpu_svm *svm)
2749 {
2750 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
2751 	kvm_emulate_cpuid(&svm->vcpu);
2752 	return 1;
2753 }
2754 
iret_interception(struct vcpu_svm * svm)2755 static int iret_interception(struct vcpu_svm *svm)
2756 {
2757 	++svm->vcpu.stat.nmi_window_exits;
2758 	clr_intercept(svm, INTERCEPT_IRET);
2759 	svm->vcpu.arch.hflags |= HF_IRET_MASK;
2760 	svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
2761 	return 1;
2762 }
2763 
invlpg_interception(struct vcpu_svm * svm)2764 static int invlpg_interception(struct vcpu_svm *svm)
2765 {
2766 	if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
2767 		return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
2768 
2769 	kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
2770 	skip_emulated_instruction(&svm->vcpu);
2771 	return 1;
2772 }
2773 
emulate_on_interception(struct vcpu_svm * svm)2774 static int emulate_on_interception(struct vcpu_svm *svm)
2775 {
2776 	return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
2777 }
2778 
rdpmc_interception(struct vcpu_svm * svm)2779 static int rdpmc_interception(struct vcpu_svm *svm)
2780 {
2781 	int err;
2782 
2783 	if (!static_cpu_has(X86_FEATURE_NRIPS))
2784 		return emulate_on_interception(svm);
2785 
2786 	err = kvm_rdpmc(&svm->vcpu);
2787 	kvm_complete_insn_gp(&svm->vcpu, err);
2788 
2789 	return 1;
2790 }
2791 
check_selective_cr0_intercepted(struct vcpu_svm * svm,unsigned long val)2792 bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val)
2793 {
2794 	unsigned long cr0 = svm->vcpu.arch.cr0;
2795 	bool ret = false;
2796 	u64 intercept;
2797 
2798 	intercept = svm->nested.intercept;
2799 
2800 	if (!is_guest_mode(&svm->vcpu) ||
2801 	    (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))))
2802 		return false;
2803 
2804 	cr0 &= ~SVM_CR0_SELECTIVE_MASK;
2805 	val &= ~SVM_CR0_SELECTIVE_MASK;
2806 
2807 	if (cr0 ^ val) {
2808 		svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
2809 		ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE);
2810 	}
2811 
2812 	return ret;
2813 }
2814 
2815 #define CR_VALID (1ULL << 63)
2816 
cr_interception(struct vcpu_svm * svm)2817 static int cr_interception(struct vcpu_svm *svm)
2818 {
2819 	int reg, cr;
2820 	unsigned long val;
2821 	int err;
2822 
2823 	if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
2824 		return emulate_on_interception(svm);
2825 
2826 	if (unlikely((svm->vmcb->control.exit_info_1 & CR_VALID) == 0))
2827 		return emulate_on_interception(svm);
2828 
2829 	reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
2830 	cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
2831 
2832 	err = 0;
2833 	if (cr >= 16) { /* mov to cr */
2834 		cr -= 16;
2835 		val = kvm_register_read(&svm->vcpu, reg);
2836 		switch (cr) {
2837 		case 0:
2838 			if (!check_selective_cr0_intercepted(svm, val))
2839 				err = kvm_set_cr0(&svm->vcpu, val);
2840 			else
2841 				return 1;
2842 
2843 			break;
2844 		case 3:
2845 			err = kvm_set_cr3(&svm->vcpu, val);
2846 			break;
2847 		case 4:
2848 			err = kvm_set_cr4(&svm->vcpu, val);
2849 			break;
2850 		case 8:
2851 			err = kvm_set_cr8(&svm->vcpu, val);
2852 			break;
2853 		default:
2854 			WARN(1, "unhandled write to CR%d", cr);
2855 			kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2856 			return 1;
2857 		}
2858 	} else { /* mov from cr */
2859 		switch (cr) {
2860 		case 0:
2861 			val = kvm_read_cr0(&svm->vcpu);
2862 			break;
2863 		case 2:
2864 			val = svm->vcpu.arch.cr2;
2865 			break;
2866 		case 3:
2867 			val = kvm_read_cr3(&svm->vcpu);
2868 			break;
2869 		case 4:
2870 			val = kvm_read_cr4(&svm->vcpu);
2871 			break;
2872 		case 8:
2873 			val = kvm_get_cr8(&svm->vcpu);
2874 			break;
2875 		default:
2876 			WARN(1, "unhandled read from CR%d", cr);
2877 			kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2878 			return 1;
2879 		}
2880 		kvm_register_write(&svm->vcpu, reg, val);
2881 	}
2882 	kvm_complete_insn_gp(&svm->vcpu, err);
2883 
2884 	return 1;
2885 }
2886 
dr_interception(struct vcpu_svm * svm)2887 static int dr_interception(struct vcpu_svm *svm)
2888 {
2889 	int reg, dr;
2890 	unsigned long val;
2891 	int err;
2892 
2893 	if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS))
2894 		return emulate_on_interception(svm);
2895 
2896 	reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
2897 	dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
2898 
2899 	if (dr >= 16) { /* mov to DRn */
2900 		val = kvm_register_read(&svm->vcpu, reg);
2901 		kvm_set_dr(&svm->vcpu, dr - 16, val);
2902 	} else {
2903 		err = kvm_get_dr(&svm->vcpu, dr, &val);
2904 		if (!err)
2905 			kvm_register_write(&svm->vcpu, reg, val);
2906 	}
2907 
2908 	skip_emulated_instruction(&svm->vcpu);
2909 
2910 	return 1;
2911 }
2912 
cr8_write_interception(struct vcpu_svm * svm)2913 static int cr8_write_interception(struct vcpu_svm *svm)
2914 {
2915 	struct kvm_run *kvm_run = svm->vcpu.run;
2916 	int r;
2917 
2918 	u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
2919 	/* instruction emulation calls kvm_set_cr8() */
2920 	r = cr_interception(svm);
2921 	if (irqchip_in_kernel(svm->vcpu.kvm)) {
2922 		clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
2923 		return r;
2924 	}
2925 	if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
2926 		return r;
2927 	kvm_run->exit_reason = KVM_EXIT_SET_TPR;
2928 	return 0;
2929 }
2930 
svm_read_l1_tsc(struct kvm_vcpu * vcpu)2931 u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu)
2932 {
2933 	struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu));
2934 	return vmcb->control.tsc_offset +
2935 		svm_scale_tsc(vcpu, native_read_tsc());
2936 }
2937 
svm_get_msr(struct kvm_vcpu * vcpu,unsigned ecx,u64 * data)2938 static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
2939 {
2940 	struct vcpu_svm *svm = to_svm(vcpu);
2941 
2942 	switch (ecx) {
2943 	case MSR_IA32_TSC: {
2944 		*data = svm->vmcb->control.tsc_offset +
2945 			svm_scale_tsc(vcpu, native_read_tsc());
2946 
2947 		break;
2948 	}
2949 	case MSR_STAR:
2950 		*data = svm->vmcb->save.star;
2951 		break;
2952 #ifdef CONFIG_X86_64
2953 	case MSR_LSTAR:
2954 		*data = svm->vmcb->save.lstar;
2955 		break;
2956 	case MSR_CSTAR:
2957 		*data = svm->vmcb->save.cstar;
2958 		break;
2959 	case MSR_KERNEL_GS_BASE:
2960 		*data = svm->vmcb->save.kernel_gs_base;
2961 		break;
2962 	case MSR_SYSCALL_MASK:
2963 		*data = svm->vmcb->save.sfmask;
2964 		break;
2965 #endif
2966 	case MSR_IA32_SYSENTER_CS:
2967 		*data = svm->vmcb->save.sysenter_cs;
2968 		break;
2969 	case MSR_IA32_SYSENTER_EIP:
2970 		*data = svm->sysenter_eip;
2971 		break;
2972 	case MSR_IA32_SYSENTER_ESP:
2973 		*data = svm->sysenter_esp;
2974 		break;
2975 	/*
2976 	 * Nobody will change the following 5 values in the VMCB so we can
2977 	 * safely return them on rdmsr. They will always be 0 until LBRV is
2978 	 * implemented.
2979 	 */
2980 	case MSR_IA32_DEBUGCTLMSR:
2981 		*data = svm->vmcb->save.dbgctl;
2982 		break;
2983 	case MSR_IA32_LASTBRANCHFROMIP:
2984 		*data = svm->vmcb->save.br_from;
2985 		break;
2986 	case MSR_IA32_LASTBRANCHTOIP:
2987 		*data = svm->vmcb->save.br_to;
2988 		break;
2989 	case MSR_IA32_LASTINTFROMIP:
2990 		*data = svm->vmcb->save.last_excp_from;
2991 		break;
2992 	case MSR_IA32_LASTINTTOIP:
2993 		*data = svm->vmcb->save.last_excp_to;
2994 		break;
2995 	case MSR_VM_HSAVE_PA:
2996 		*data = svm->nested.hsave_msr;
2997 		break;
2998 	case MSR_VM_CR:
2999 		*data = svm->nested.vm_cr_msr;
3000 		break;
3001 	case MSR_IA32_UCODE_REV:
3002 		*data = 0x01000065;
3003 		break;
3004 	default:
3005 		return kvm_get_msr_common(vcpu, ecx, data);
3006 	}
3007 	return 0;
3008 }
3009 
rdmsr_interception(struct vcpu_svm * svm)3010 static int rdmsr_interception(struct vcpu_svm *svm)
3011 {
3012 	u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
3013 	u64 data;
3014 
3015 	if (svm_get_msr(&svm->vcpu, ecx, &data)) {
3016 		trace_kvm_msr_read_ex(ecx);
3017 		kvm_inject_gp(&svm->vcpu, 0);
3018 	} else {
3019 		trace_kvm_msr_read(ecx, data);
3020 
3021 		svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff;
3022 		svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32;
3023 		svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
3024 		skip_emulated_instruction(&svm->vcpu);
3025 	}
3026 	return 1;
3027 }
3028 
svm_set_vm_cr(struct kvm_vcpu * vcpu,u64 data)3029 static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
3030 {
3031 	struct vcpu_svm *svm = to_svm(vcpu);
3032 	int svm_dis, chg_mask;
3033 
3034 	if (data & ~SVM_VM_CR_VALID_MASK)
3035 		return 1;
3036 
3037 	chg_mask = SVM_VM_CR_VALID_MASK;
3038 
3039 	if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK)
3040 		chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK);
3041 
3042 	svm->nested.vm_cr_msr &= ~chg_mask;
3043 	svm->nested.vm_cr_msr |= (data & chg_mask);
3044 
3045 	svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK;
3046 
3047 	/* check for svm_disable while efer.svme is set */
3048 	if (svm_dis && (vcpu->arch.efer & EFER_SVME))
3049 		return 1;
3050 
3051 	return 0;
3052 }
3053 
svm_set_msr(struct kvm_vcpu * vcpu,unsigned ecx,u64 data)3054 static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
3055 {
3056 	struct vcpu_svm *svm = to_svm(vcpu);
3057 
3058 	switch (ecx) {
3059 	case MSR_IA32_TSC:
3060 		kvm_write_tsc(vcpu, data);
3061 		break;
3062 	case MSR_STAR:
3063 		svm->vmcb->save.star = data;
3064 		break;
3065 #ifdef CONFIG_X86_64
3066 	case MSR_LSTAR:
3067 		svm->vmcb->save.lstar = data;
3068 		break;
3069 	case MSR_CSTAR:
3070 		svm->vmcb->save.cstar = data;
3071 		break;
3072 	case MSR_KERNEL_GS_BASE:
3073 		svm->vmcb->save.kernel_gs_base = data;
3074 		break;
3075 	case MSR_SYSCALL_MASK:
3076 		svm->vmcb->save.sfmask = data;
3077 		break;
3078 #endif
3079 	case MSR_IA32_SYSENTER_CS:
3080 		svm->vmcb->save.sysenter_cs = data;
3081 		break;
3082 	case MSR_IA32_SYSENTER_EIP:
3083 		svm->sysenter_eip = data;
3084 		svm->vmcb->save.sysenter_eip = data;
3085 		break;
3086 	case MSR_IA32_SYSENTER_ESP:
3087 		svm->sysenter_esp = data;
3088 		svm->vmcb->save.sysenter_esp = data;
3089 		break;
3090 	case MSR_IA32_DEBUGCTLMSR:
3091 		if (!boot_cpu_has(X86_FEATURE_LBRV)) {
3092 			pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
3093 					__func__, data);
3094 			break;
3095 		}
3096 		if (data & DEBUGCTL_RESERVED_BITS)
3097 			return 1;
3098 
3099 		svm->vmcb->save.dbgctl = data;
3100 		mark_dirty(svm->vmcb, VMCB_LBR);
3101 		if (data & (1ULL<<0))
3102 			svm_enable_lbrv(svm);
3103 		else
3104 			svm_disable_lbrv(svm);
3105 		break;
3106 	case MSR_VM_HSAVE_PA:
3107 		svm->nested.hsave_msr = data;
3108 		break;
3109 	case MSR_VM_CR:
3110 		return svm_set_vm_cr(vcpu, data);
3111 	case MSR_VM_IGNNE:
3112 		pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
3113 		break;
3114 	default:
3115 		return kvm_set_msr_common(vcpu, ecx, data);
3116 	}
3117 	return 0;
3118 }
3119 
wrmsr_interception(struct vcpu_svm * svm)3120 static int wrmsr_interception(struct vcpu_svm *svm)
3121 {
3122 	u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
3123 	u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
3124 		| ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
3125 
3126 
3127 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
3128 	if (svm_set_msr(&svm->vcpu, ecx, data)) {
3129 		trace_kvm_msr_write_ex(ecx, data);
3130 		kvm_inject_gp(&svm->vcpu, 0);
3131 	} else {
3132 		trace_kvm_msr_write(ecx, data);
3133 		skip_emulated_instruction(&svm->vcpu);
3134 	}
3135 	return 1;
3136 }
3137 
msr_interception(struct vcpu_svm * svm)3138 static int msr_interception(struct vcpu_svm *svm)
3139 {
3140 	if (svm->vmcb->control.exit_info_1)
3141 		return wrmsr_interception(svm);
3142 	else
3143 		return rdmsr_interception(svm);
3144 }
3145 
interrupt_window_interception(struct vcpu_svm * svm)3146 static int interrupt_window_interception(struct vcpu_svm *svm)
3147 {
3148 	struct kvm_run *kvm_run = svm->vcpu.run;
3149 
3150 	kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3151 	svm_clear_vintr(svm);
3152 	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
3153 	mark_dirty(svm->vmcb, VMCB_INTR);
3154 	/*
3155 	 * If the user space waits to inject interrupts, exit as soon as
3156 	 * possible
3157 	 */
3158 	if (!irqchip_in_kernel(svm->vcpu.kvm) &&
3159 	    kvm_run->request_interrupt_window &&
3160 	    !kvm_cpu_has_interrupt(&svm->vcpu)) {
3161 		++svm->vcpu.stat.irq_window_exits;
3162 		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
3163 		return 0;
3164 	}
3165 
3166 	return 1;
3167 }
3168 
pause_interception(struct vcpu_svm * svm)3169 static int pause_interception(struct vcpu_svm *svm)
3170 {
3171 	kvm_vcpu_on_spin(&(svm->vcpu));
3172 	return 1;
3173 }
3174 
3175 static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
3176 	[SVM_EXIT_READ_CR0]			= cr_interception,
3177 	[SVM_EXIT_READ_CR3]			= cr_interception,
3178 	[SVM_EXIT_READ_CR4]			= cr_interception,
3179 	[SVM_EXIT_READ_CR8]			= cr_interception,
3180 	[SVM_EXIT_CR0_SEL_WRITE]		= emulate_on_interception,
3181 	[SVM_EXIT_WRITE_CR0]			= cr_interception,
3182 	[SVM_EXIT_WRITE_CR3]			= cr_interception,
3183 	[SVM_EXIT_WRITE_CR4]			= cr_interception,
3184 	[SVM_EXIT_WRITE_CR8]			= cr8_write_interception,
3185 	[SVM_EXIT_READ_DR0]			= dr_interception,
3186 	[SVM_EXIT_READ_DR1]			= dr_interception,
3187 	[SVM_EXIT_READ_DR2]			= dr_interception,
3188 	[SVM_EXIT_READ_DR3]			= dr_interception,
3189 	[SVM_EXIT_READ_DR4]			= dr_interception,
3190 	[SVM_EXIT_READ_DR5]			= dr_interception,
3191 	[SVM_EXIT_READ_DR6]			= dr_interception,
3192 	[SVM_EXIT_READ_DR7]			= dr_interception,
3193 	[SVM_EXIT_WRITE_DR0]			= dr_interception,
3194 	[SVM_EXIT_WRITE_DR1]			= dr_interception,
3195 	[SVM_EXIT_WRITE_DR2]			= dr_interception,
3196 	[SVM_EXIT_WRITE_DR3]			= dr_interception,
3197 	[SVM_EXIT_WRITE_DR4]			= dr_interception,
3198 	[SVM_EXIT_WRITE_DR5]			= dr_interception,
3199 	[SVM_EXIT_WRITE_DR6]			= dr_interception,
3200 	[SVM_EXIT_WRITE_DR7]			= dr_interception,
3201 	[SVM_EXIT_EXCP_BASE + DB_VECTOR]	= db_interception,
3202 	[SVM_EXIT_EXCP_BASE + BP_VECTOR]	= bp_interception,
3203 	[SVM_EXIT_EXCP_BASE + UD_VECTOR]	= ud_interception,
3204 	[SVM_EXIT_EXCP_BASE + PF_VECTOR]	= pf_interception,
3205 	[SVM_EXIT_EXCP_BASE + NM_VECTOR]	= nm_interception,
3206 	[SVM_EXIT_EXCP_BASE + MC_VECTOR]	= mc_interception,
3207 	[SVM_EXIT_INTR]				= intr_interception,
3208 	[SVM_EXIT_NMI]				= nmi_interception,
3209 	[SVM_EXIT_SMI]				= nop_on_interception,
3210 	[SVM_EXIT_INIT]				= nop_on_interception,
3211 	[SVM_EXIT_VINTR]			= interrupt_window_interception,
3212 	[SVM_EXIT_RDPMC]			= rdpmc_interception,
3213 	[SVM_EXIT_CPUID]			= cpuid_interception,
3214 	[SVM_EXIT_IRET]                         = iret_interception,
3215 	[SVM_EXIT_INVD]                         = emulate_on_interception,
3216 	[SVM_EXIT_PAUSE]			= pause_interception,
3217 	[SVM_EXIT_HLT]				= halt_interception,
3218 	[SVM_EXIT_INVLPG]			= invlpg_interception,
3219 	[SVM_EXIT_INVLPGA]			= invlpga_interception,
3220 	[SVM_EXIT_IOIO]				= io_interception,
3221 	[SVM_EXIT_MSR]				= msr_interception,
3222 	[SVM_EXIT_TASK_SWITCH]			= task_switch_interception,
3223 	[SVM_EXIT_SHUTDOWN]			= shutdown_interception,
3224 	[SVM_EXIT_VMRUN]			= vmrun_interception,
3225 	[SVM_EXIT_VMMCALL]			= vmmcall_interception,
3226 	[SVM_EXIT_VMLOAD]			= vmload_interception,
3227 	[SVM_EXIT_VMSAVE]			= vmsave_interception,
3228 	[SVM_EXIT_STGI]				= stgi_interception,
3229 	[SVM_EXIT_CLGI]				= clgi_interception,
3230 	[SVM_EXIT_SKINIT]			= skinit_interception,
3231 	[SVM_EXIT_WBINVD]                       = emulate_on_interception,
3232 	[SVM_EXIT_MONITOR]			= invalid_op_interception,
3233 	[SVM_EXIT_MWAIT]			= invalid_op_interception,
3234 	[SVM_EXIT_XSETBV]			= xsetbv_interception,
3235 	[SVM_EXIT_NPF]				= pf_interception,
3236 };
3237 
dump_vmcb(struct kvm_vcpu * vcpu)3238 static void dump_vmcb(struct kvm_vcpu *vcpu)
3239 {
3240 	struct vcpu_svm *svm = to_svm(vcpu);
3241 	struct vmcb_control_area *control = &svm->vmcb->control;
3242 	struct vmcb_save_area *save = &svm->vmcb->save;
3243 
3244 	pr_err("VMCB Control Area:\n");
3245 	pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff);
3246 	pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16);
3247 	pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff);
3248 	pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16);
3249 	pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions);
3250 	pr_err("%-20s%016llx\n", "intercepts:", control->intercept);
3251 	pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count);
3252 	pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa);
3253 	pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa);
3254 	pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset);
3255 	pr_err("%-20s%d\n", "asid:", control->asid);
3256 	pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl);
3257 	pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl);
3258 	pr_err("%-20s%08x\n", "int_vector:", control->int_vector);
3259 	pr_err("%-20s%08x\n", "int_state:", control->int_state);
3260 	pr_err("%-20s%08x\n", "exit_code:", control->exit_code);
3261 	pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1);
3262 	pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2);
3263 	pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info);
3264 	pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err);
3265 	pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
3266 	pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
3267 	pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
3268 	pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
3269 	pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl);
3270 	pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
3271 	pr_err("VMCB State Save Area:\n");
3272 	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3273 	       "es:",
3274 	       save->es.selector, save->es.attrib,
3275 	       save->es.limit, save->es.base);
3276 	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3277 	       "cs:",
3278 	       save->cs.selector, save->cs.attrib,
3279 	       save->cs.limit, save->cs.base);
3280 	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3281 	       "ss:",
3282 	       save->ss.selector, save->ss.attrib,
3283 	       save->ss.limit, save->ss.base);
3284 	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3285 	       "ds:",
3286 	       save->ds.selector, save->ds.attrib,
3287 	       save->ds.limit, save->ds.base);
3288 	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3289 	       "fs:",
3290 	       save->fs.selector, save->fs.attrib,
3291 	       save->fs.limit, save->fs.base);
3292 	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3293 	       "gs:",
3294 	       save->gs.selector, save->gs.attrib,
3295 	       save->gs.limit, save->gs.base);
3296 	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3297 	       "gdtr:",
3298 	       save->gdtr.selector, save->gdtr.attrib,
3299 	       save->gdtr.limit, save->gdtr.base);
3300 	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3301 	       "ldtr:",
3302 	       save->ldtr.selector, save->ldtr.attrib,
3303 	       save->ldtr.limit, save->ldtr.base);
3304 	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3305 	       "idtr:",
3306 	       save->idtr.selector, save->idtr.attrib,
3307 	       save->idtr.limit, save->idtr.base);
3308 	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3309 	       "tr:",
3310 	       save->tr.selector, save->tr.attrib,
3311 	       save->tr.limit, save->tr.base);
3312 	pr_err("cpl:            %d                efer:         %016llx\n",
3313 		save->cpl, save->efer);
3314 	pr_err("%-15s %016llx %-13s %016llx\n",
3315 	       "cr0:", save->cr0, "cr2:", save->cr2);
3316 	pr_err("%-15s %016llx %-13s %016llx\n",
3317 	       "cr3:", save->cr3, "cr4:", save->cr4);
3318 	pr_err("%-15s %016llx %-13s %016llx\n",
3319 	       "dr6:", save->dr6, "dr7:", save->dr7);
3320 	pr_err("%-15s %016llx %-13s %016llx\n",
3321 	       "rip:", save->rip, "rflags:", save->rflags);
3322 	pr_err("%-15s %016llx %-13s %016llx\n",
3323 	       "rsp:", save->rsp, "rax:", save->rax);
3324 	pr_err("%-15s %016llx %-13s %016llx\n",
3325 	       "star:", save->star, "lstar:", save->lstar);
3326 	pr_err("%-15s %016llx %-13s %016llx\n",
3327 	       "cstar:", save->cstar, "sfmask:", save->sfmask);
3328 	pr_err("%-15s %016llx %-13s %016llx\n",
3329 	       "kernel_gs_base:", save->kernel_gs_base,
3330 	       "sysenter_cs:", save->sysenter_cs);
3331 	pr_err("%-15s %016llx %-13s %016llx\n",
3332 	       "sysenter_esp:", save->sysenter_esp,
3333 	       "sysenter_eip:", save->sysenter_eip);
3334 	pr_err("%-15s %016llx %-13s %016llx\n",
3335 	       "gpat:", save->g_pat, "dbgctl:", save->dbgctl);
3336 	pr_err("%-15s %016llx %-13s %016llx\n",
3337 	       "br_from:", save->br_from, "br_to:", save->br_to);
3338 	pr_err("%-15s %016llx %-13s %016llx\n",
3339 	       "excp_from:", save->last_excp_from,
3340 	       "excp_to:", save->last_excp_to);
3341 }
3342 
svm_get_exit_info(struct kvm_vcpu * vcpu,u64 * info1,u64 * info2)3343 static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
3344 {
3345 	struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
3346 
3347 	*info1 = control->exit_info_1;
3348 	*info2 = control->exit_info_2;
3349 }
3350 
handle_exit(struct kvm_vcpu * vcpu)3351 static int handle_exit(struct kvm_vcpu *vcpu)
3352 {
3353 	struct vcpu_svm *svm = to_svm(vcpu);
3354 	struct kvm_run *kvm_run = vcpu->run;
3355 	u32 exit_code = svm->vmcb->control.exit_code;
3356 
3357 	if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
3358 		vcpu->arch.cr0 = svm->vmcb->save.cr0;
3359 	if (npt_enabled)
3360 		vcpu->arch.cr3 = svm->vmcb->save.cr3;
3361 
3362 	if (unlikely(svm->nested.exit_required)) {
3363 		nested_svm_vmexit(svm);
3364 		svm->nested.exit_required = false;
3365 
3366 		return 1;
3367 	}
3368 
3369 	if (is_guest_mode(vcpu)) {
3370 		int vmexit;
3371 
3372 		trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
3373 					svm->vmcb->control.exit_info_1,
3374 					svm->vmcb->control.exit_info_2,
3375 					svm->vmcb->control.exit_int_info,
3376 					svm->vmcb->control.exit_int_info_err,
3377 					KVM_ISA_SVM);
3378 
3379 		vmexit = nested_svm_exit_special(svm);
3380 
3381 		if (vmexit == NESTED_EXIT_CONTINUE)
3382 			vmexit = nested_svm_exit_handled(svm);
3383 
3384 		if (vmexit == NESTED_EXIT_DONE)
3385 			return 1;
3386 	}
3387 
3388 	svm_complete_interrupts(svm);
3389 
3390 	if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
3391 		kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
3392 		kvm_run->fail_entry.hardware_entry_failure_reason
3393 			= svm->vmcb->control.exit_code;
3394 		pr_err("KVM: FAILED VMRUN WITH VMCB:\n");
3395 		dump_vmcb(vcpu);
3396 		return 0;
3397 	}
3398 
3399 	if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
3400 	    exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
3401 	    exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
3402 	    exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
3403 		printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
3404 		       "exit_code 0x%x\n",
3405 		       __func__, svm->vmcb->control.exit_int_info,
3406 		       exit_code);
3407 
3408 	if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
3409 	    || !svm_exit_handlers[exit_code]) {
3410 		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
3411 		kvm_run->hw.hardware_exit_reason = exit_code;
3412 		return 0;
3413 	}
3414 
3415 	return svm_exit_handlers[exit_code](svm);
3416 }
3417 
reload_tss(struct kvm_vcpu * vcpu)3418 static void reload_tss(struct kvm_vcpu *vcpu)
3419 {
3420 	int cpu = raw_smp_processor_id();
3421 
3422 	struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
3423 	sd->tss_desc->type = 9; /* available 32/64-bit TSS */
3424 	load_TR_desc();
3425 }
3426 
pre_svm_run(struct vcpu_svm * svm)3427 static void pre_svm_run(struct vcpu_svm *svm)
3428 {
3429 	int cpu = raw_smp_processor_id();
3430 
3431 	struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
3432 
3433 	/* FIXME: handle wraparound of asid_generation */
3434 	if (svm->asid_generation != sd->asid_generation)
3435 		new_asid(svm, sd);
3436 }
3437 
svm_inject_nmi(struct kvm_vcpu * vcpu)3438 static void svm_inject_nmi(struct kvm_vcpu *vcpu)
3439 {
3440 	struct vcpu_svm *svm = to_svm(vcpu);
3441 
3442 	svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
3443 	vcpu->arch.hflags |= HF_NMI_MASK;
3444 	set_intercept(svm, INTERCEPT_IRET);
3445 	++vcpu->stat.nmi_injections;
3446 }
3447 
svm_inject_irq(struct vcpu_svm * svm,int irq)3448 static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
3449 {
3450 	struct vmcb_control_area *control;
3451 
3452 	control = &svm->vmcb->control;
3453 	control->int_vector = irq;
3454 	control->int_ctl &= ~V_INTR_PRIO_MASK;
3455 	control->int_ctl |= V_IRQ_MASK |
3456 		((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
3457 	mark_dirty(svm->vmcb, VMCB_INTR);
3458 }
3459 
svm_set_irq(struct kvm_vcpu * vcpu)3460 static void svm_set_irq(struct kvm_vcpu *vcpu)
3461 {
3462 	struct vcpu_svm *svm = to_svm(vcpu);
3463 
3464 	BUG_ON(!(gif_set(svm)));
3465 
3466 	trace_kvm_inj_virq(vcpu->arch.interrupt.nr);
3467 	++vcpu->stat.irq_injections;
3468 
3469 	svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
3470 		SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
3471 }
3472 
update_cr8_intercept(struct kvm_vcpu * vcpu,int tpr,int irr)3473 static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
3474 {
3475 	struct vcpu_svm *svm = to_svm(vcpu);
3476 
3477 	if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
3478 		return;
3479 
3480 	if (irr == -1)
3481 		return;
3482 
3483 	if (tpr >= irr)
3484 		set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
3485 }
3486 
svm_nmi_allowed(struct kvm_vcpu * vcpu)3487 static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
3488 {
3489 	struct vcpu_svm *svm = to_svm(vcpu);
3490 	struct vmcb *vmcb = svm->vmcb;
3491 	int ret;
3492 	ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
3493 	      !(svm->vcpu.arch.hflags & HF_NMI_MASK);
3494 	ret = ret && gif_set(svm) && nested_svm_nmi(svm);
3495 
3496 	return ret;
3497 }
3498 
svm_get_nmi_mask(struct kvm_vcpu * vcpu)3499 static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
3500 {
3501 	struct vcpu_svm *svm = to_svm(vcpu);
3502 
3503 	return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
3504 }
3505 
svm_set_nmi_mask(struct kvm_vcpu * vcpu,bool masked)3506 static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
3507 {
3508 	struct vcpu_svm *svm = to_svm(vcpu);
3509 
3510 	if (masked) {
3511 		svm->vcpu.arch.hflags |= HF_NMI_MASK;
3512 		set_intercept(svm, INTERCEPT_IRET);
3513 	} else {
3514 		svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
3515 		clr_intercept(svm, INTERCEPT_IRET);
3516 	}
3517 }
3518 
svm_interrupt_allowed(struct kvm_vcpu * vcpu)3519 static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
3520 {
3521 	struct vcpu_svm *svm = to_svm(vcpu);
3522 	struct vmcb *vmcb = svm->vmcb;
3523 	int ret;
3524 
3525 	if (!gif_set(svm) ||
3526 	     (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
3527 		return 0;
3528 
3529 	ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF);
3530 
3531 	if (is_guest_mode(vcpu))
3532 		return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
3533 
3534 	return ret;
3535 }
3536 
enable_irq_window(struct kvm_vcpu * vcpu)3537 static void enable_irq_window(struct kvm_vcpu *vcpu)
3538 {
3539 	struct vcpu_svm *svm = to_svm(vcpu);
3540 
3541 	/*
3542 	 * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes
3543 	 * 1, because that's a separate STGI/VMRUN intercept.  The next time we
3544 	 * get that intercept, this function will be called again though and
3545 	 * we'll get the vintr intercept.
3546 	 */
3547 	if (gif_set(svm) && nested_svm_intr(svm)) {
3548 		svm_set_vintr(svm);
3549 		svm_inject_irq(svm, 0x0);
3550 	}
3551 }
3552 
enable_nmi_window(struct kvm_vcpu * vcpu)3553 static void enable_nmi_window(struct kvm_vcpu *vcpu)
3554 {
3555 	struct vcpu_svm *svm = to_svm(vcpu);
3556 
3557 	if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
3558 	    == HF_NMI_MASK)
3559 		return; /* IRET will cause a vm exit */
3560 
3561 	/*
3562 	 * Something prevents NMI from been injected. Single step over possible
3563 	 * problem (IRET or exception injection or interrupt shadow)
3564 	 */
3565 	svm->nmi_singlestep = true;
3566 	svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
3567 	update_db_intercept(vcpu);
3568 }
3569 
svm_set_tss_addr(struct kvm * kvm,unsigned int addr)3570 static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
3571 {
3572 	return 0;
3573 }
3574 
svm_flush_tlb(struct kvm_vcpu * vcpu)3575 static void svm_flush_tlb(struct kvm_vcpu *vcpu)
3576 {
3577 	struct vcpu_svm *svm = to_svm(vcpu);
3578 
3579 	if (static_cpu_has(X86_FEATURE_FLUSHBYASID))
3580 		svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
3581 	else
3582 		svm->asid_generation--;
3583 }
3584 
svm_prepare_guest_switch(struct kvm_vcpu * vcpu)3585 static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
3586 {
3587 }
3588 
sync_cr8_to_lapic(struct kvm_vcpu * vcpu)3589 static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
3590 {
3591 	struct vcpu_svm *svm = to_svm(vcpu);
3592 
3593 	if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
3594 		return;
3595 
3596 	if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) {
3597 		int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
3598 		kvm_set_cr8(vcpu, cr8);
3599 	}
3600 }
3601 
sync_lapic_to_cr8(struct kvm_vcpu * vcpu)3602 static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
3603 {
3604 	struct vcpu_svm *svm = to_svm(vcpu);
3605 	u64 cr8;
3606 
3607 	if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
3608 		return;
3609 
3610 	cr8 = kvm_get_cr8(vcpu);
3611 	svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
3612 	svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
3613 }
3614 
svm_complete_interrupts(struct vcpu_svm * svm)3615 static void svm_complete_interrupts(struct vcpu_svm *svm)
3616 {
3617 	u8 vector;
3618 	int type;
3619 	u32 exitintinfo = svm->vmcb->control.exit_int_info;
3620 	unsigned int3_injected = svm->int3_injected;
3621 
3622 	svm->int3_injected = 0;
3623 
3624 	/*
3625 	 * If we've made progress since setting HF_IRET_MASK, we've
3626 	 * executed an IRET and can allow NMI injection.
3627 	 */
3628 	if ((svm->vcpu.arch.hflags & HF_IRET_MASK)
3629 	    && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) {
3630 		svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
3631 		kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3632 	}
3633 
3634 	svm->vcpu.arch.nmi_injected = false;
3635 	kvm_clear_exception_queue(&svm->vcpu);
3636 	kvm_clear_interrupt_queue(&svm->vcpu);
3637 
3638 	if (!(exitintinfo & SVM_EXITINTINFO_VALID))
3639 		return;
3640 
3641 	kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3642 
3643 	vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
3644 	type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
3645 
3646 	switch (type) {
3647 	case SVM_EXITINTINFO_TYPE_NMI:
3648 		svm->vcpu.arch.nmi_injected = true;
3649 		break;
3650 	case SVM_EXITINTINFO_TYPE_EXEPT:
3651 		/*
3652 		 * In case of software exceptions, do not reinject the vector,
3653 		 * but re-execute the instruction instead. Rewind RIP first
3654 		 * if we emulated INT3 before.
3655 		 */
3656 		if (kvm_exception_is_soft(vector)) {
3657 			if (vector == BP_VECTOR && int3_injected &&
3658 			    kvm_is_linear_rip(&svm->vcpu, svm->int3_rip))
3659 				kvm_rip_write(&svm->vcpu,
3660 					      kvm_rip_read(&svm->vcpu) -
3661 					      int3_injected);
3662 			break;
3663 		}
3664 		if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
3665 			u32 err = svm->vmcb->control.exit_int_info_err;
3666 			kvm_requeue_exception_e(&svm->vcpu, vector, err);
3667 
3668 		} else
3669 			kvm_requeue_exception(&svm->vcpu, vector);
3670 		break;
3671 	case SVM_EXITINTINFO_TYPE_INTR:
3672 		kvm_queue_interrupt(&svm->vcpu, vector, false);
3673 		break;
3674 	default:
3675 		break;
3676 	}
3677 }
3678 
svm_cancel_injection(struct kvm_vcpu * vcpu)3679 static void svm_cancel_injection(struct kvm_vcpu *vcpu)
3680 {
3681 	struct vcpu_svm *svm = to_svm(vcpu);
3682 	struct vmcb_control_area *control = &svm->vmcb->control;
3683 
3684 	control->exit_int_info = control->event_inj;
3685 	control->exit_int_info_err = control->event_inj_err;
3686 	control->event_inj = 0;
3687 	svm_complete_interrupts(svm);
3688 }
3689 
3690 #ifdef CONFIG_X86_64
3691 #define R "r"
3692 #else
3693 #define R "e"
3694 #endif
3695 
svm_vcpu_run(struct kvm_vcpu * vcpu)3696 static void svm_vcpu_run(struct kvm_vcpu *vcpu)
3697 {
3698 	struct vcpu_svm *svm = to_svm(vcpu);
3699 
3700 	svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
3701 	svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
3702 	svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
3703 
3704 	/*
3705 	 * A vmexit emulation is required before the vcpu can be executed
3706 	 * again.
3707 	 */
3708 	if (unlikely(svm->nested.exit_required))
3709 		return;
3710 
3711 	pre_svm_run(svm);
3712 
3713 	sync_lapic_to_cr8(vcpu);
3714 
3715 	svm->vmcb->save.cr2 = vcpu->arch.cr2;
3716 
3717 	clgi();
3718 
3719 	local_irq_enable();
3720 
3721 	asm volatile (
3722 		"push %%"R"bp; \n\t"
3723 		"mov %c[rbx](%[svm]), %%"R"bx \n\t"
3724 		"mov %c[rcx](%[svm]), %%"R"cx \n\t"
3725 		"mov %c[rdx](%[svm]), %%"R"dx \n\t"
3726 		"mov %c[rsi](%[svm]), %%"R"si \n\t"
3727 		"mov %c[rdi](%[svm]), %%"R"di \n\t"
3728 		"mov %c[rbp](%[svm]), %%"R"bp \n\t"
3729 #ifdef CONFIG_X86_64
3730 		"mov %c[r8](%[svm]),  %%r8  \n\t"
3731 		"mov %c[r9](%[svm]),  %%r9  \n\t"
3732 		"mov %c[r10](%[svm]), %%r10 \n\t"
3733 		"mov %c[r11](%[svm]), %%r11 \n\t"
3734 		"mov %c[r12](%[svm]), %%r12 \n\t"
3735 		"mov %c[r13](%[svm]), %%r13 \n\t"
3736 		"mov %c[r14](%[svm]), %%r14 \n\t"
3737 		"mov %c[r15](%[svm]), %%r15 \n\t"
3738 #endif
3739 
3740 		/* Enter guest mode */
3741 		"push %%"R"ax \n\t"
3742 		"mov %c[vmcb](%[svm]), %%"R"ax \n\t"
3743 		__ex(SVM_VMLOAD) "\n\t"
3744 		__ex(SVM_VMRUN) "\n\t"
3745 		__ex(SVM_VMSAVE) "\n\t"
3746 		"pop %%"R"ax \n\t"
3747 
3748 		/* Save guest registers, load host registers */
3749 		"mov %%"R"bx, %c[rbx](%[svm]) \n\t"
3750 		"mov %%"R"cx, %c[rcx](%[svm]) \n\t"
3751 		"mov %%"R"dx, %c[rdx](%[svm]) \n\t"
3752 		"mov %%"R"si, %c[rsi](%[svm]) \n\t"
3753 		"mov %%"R"di, %c[rdi](%[svm]) \n\t"
3754 		"mov %%"R"bp, %c[rbp](%[svm]) \n\t"
3755 #ifdef CONFIG_X86_64
3756 		"mov %%r8,  %c[r8](%[svm]) \n\t"
3757 		"mov %%r9,  %c[r9](%[svm]) \n\t"
3758 		"mov %%r10, %c[r10](%[svm]) \n\t"
3759 		"mov %%r11, %c[r11](%[svm]) \n\t"
3760 		"mov %%r12, %c[r12](%[svm]) \n\t"
3761 		"mov %%r13, %c[r13](%[svm]) \n\t"
3762 		"mov %%r14, %c[r14](%[svm]) \n\t"
3763 		"mov %%r15, %c[r15](%[svm]) \n\t"
3764 #endif
3765 		"pop %%"R"bp"
3766 		:
3767 		: [svm]"a"(svm),
3768 		  [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
3769 		  [rbx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBX])),
3770 		  [rcx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RCX])),
3771 		  [rdx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDX])),
3772 		  [rsi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RSI])),
3773 		  [rdi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDI])),
3774 		  [rbp]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBP]))
3775 #ifdef CONFIG_X86_64
3776 		  , [r8]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R8])),
3777 		  [r9]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R9])),
3778 		  [r10]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R10])),
3779 		  [r11]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R11])),
3780 		  [r12]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R12])),
3781 		  [r13]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R13])),
3782 		  [r14]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R14])),
3783 		  [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
3784 #endif
3785 		: "cc", "memory"
3786 		, R"bx", R"cx", R"dx", R"si", R"di"
3787 #ifdef CONFIG_X86_64
3788 		, "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
3789 #endif
3790 		);
3791 
3792 #ifdef CONFIG_X86_64
3793 	wrmsrl(MSR_GS_BASE, svm->host.gs_base);
3794 #else
3795 	loadsegment(fs, svm->host.fs);
3796 #ifndef CONFIG_X86_32_LAZY_GS
3797 	loadsegment(gs, svm->host.gs);
3798 #endif
3799 #endif
3800 
3801 	reload_tss(vcpu);
3802 
3803 	local_irq_disable();
3804 
3805 	vcpu->arch.cr2 = svm->vmcb->save.cr2;
3806 	vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
3807 	vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
3808 	vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
3809 
3810 	trace_kvm_exit(svm->vmcb->control.exit_code, vcpu, KVM_ISA_SVM);
3811 
3812 	if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
3813 		kvm_before_handle_nmi(&svm->vcpu);
3814 
3815 	stgi();
3816 
3817 	/* Any pending NMI will happen here */
3818 
3819 	if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
3820 		kvm_after_handle_nmi(&svm->vcpu);
3821 
3822 	sync_cr8_to_lapic(vcpu);
3823 
3824 	svm->next_rip = 0;
3825 
3826 	svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
3827 
3828 	/* if exit due to PF check for async PF */
3829 	if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
3830 		svm->apf_reason = kvm_read_and_reset_pf_reason();
3831 
3832 	if (npt_enabled) {
3833 		vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
3834 		vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR);
3835 	}
3836 
3837 	/*
3838 	 * We need to handle MC intercepts here before the vcpu has a chance to
3839 	 * change the physical cpu
3840 	 */
3841 	if (unlikely(svm->vmcb->control.exit_code ==
3842 		     SVM_EXIT_EXCP_BASE + MC_VECTOR))
3843 		svm_handle_mce(svm);
3844 
3845 	mark_all_clean(svm->vmcb);
3846 }
3847 
3848 #undef R
3849 
3850 static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
3851 {
3852 	struct vcpu_svm *svm = to_svm(vcpu);
3853 
3854 	svm->vmcb->save.cr3 = root;
3855 	mark_dirty(svm->vmcb, VMCB_CR);
3856 	svm_flush_tlb(vcpu);
3857 }
3858 
3859 static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
3860 {
3861 	struct vcpu_svm *svm = to_svm(vcpu);
3862 
3863 	svm->vmcb->control.nested_cr3 = root;
3864 	mark_dirty(svm->vmcb, VMCB_NPT);
3865 
3866 	/* Also sync guest cr3 here in case we live migrate */
3867 	svm->vmcb->save.cr3 = kvm_read_cr3(vcpu);
3868 	mark_dirty(svm->vmcb, VMCB_CR);
3869 
3870 	svm_flush_tlb(vcpu);
3871 }
3872 
3873 static int is_disabled(void)
3874 {
3875 	u64 vm_cr;
3876 
3877 	rdmsrl(MSR_VM_CR, vm_cr);
3878 	if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE))
3879 		return 1;
3880 
3881 	return 0;
3882 }
3883 
3884 static void
3885 svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
3886 {
3887 	/*
3888 	 * Patch in the VMMCALL instruction:
3889 	 */
3890 	hypercall[0] = 0x0f;
3891 	hypercall[1] = 0x01;
3892 	hypercall[2] = 0xd9;
3893 }
3894 
3895 static void svm_check_processor_compat(void *rtn)
3896 {
3897 	*(int *)rtn = 0;
3898 }
3899 
3900 static bool svm_cpu_has_accelerated_tpr(void)
3901 {
3902 	return false;
3903 }
3904 
3905 static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
3906 {
3907 	return 0;
3908 }
3909 
3910 static void svm_cpuid_update(struct kvm_vcpu *vcpu)
3911 {
3912 }
3913 
3914 static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
3915 {
3916 	switch (func) {
3917 	case 0x80000001:
3918 		if (nested)
3919 			entry->ecx |= (1 << 2); /* Set SVM bit */
3920 		break;
3921 	case 0x8000000A:
3922 		entry->eax = 1; /* SVM revision 1 */
3923 		entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper
3924 				   ASID emulation to nested SVM */
3925 		entry->ecx = 0; /* Reserved */
3926 		entry->edx = 0; /* Per default do not support any
3927 				   additional features */
3928 
3929 		/* Support next_rip if host supports it */
3930 		if (boot_cpu_has(X86_FEATURE_NRIPS))
3931 			entry->edx |= SVM_FEATURE_NRIP;
3932 
3933 		/* Support NPT for the guest if enabled */
3934 		if (npt_enabled)
3935 			entry->edx |= SVM_FEATURE_NPT;
3936 
3937 		break;
3938 	}
3939 }
3940 
3941 static int svm_get_lpage_level(void)
3942 {
3943 	return PT_PDPE_LEVEL;
3944 }
3945 
3946 static bool svm_rdtscp_supported(void)
3947 {
3948 	return false;
3949 }
3950 
3951 static bool svm_has_wbinvd_exit(void)
3952 {
3953 	return true;
3954 }
3955 
3956 static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
3957 {
3958 	struct vcpu_svm *svm = to_svm(vcpu);
3959 
3960 	set_exception_intercept(svm, NM_VECTOR);
3961 	update_cr0_intercept(svm);
3962 }
3963 
3964 #define PRE_EX(exit)  { .exit_code = (exit), \
3965 			.stage = X86_ICPT_PRE_EXCEPT, }
3966 #define POST_EX(exit) { .exit_code = (exit), \
3967 			.stage = X86_ICPT_POST_EXCEPT, }
3968 #define POST_MEM(exit) { .exit_code = (exit), \
3969 			.stage = X86_ICPT_POST_MEMACCESS, }
3970 
3971 static struct __x86_intercept {
3972 	u32 exit_code;
3973 	enum x86_intercept_stage stage;
3974 } x86_intercept_map[] = {
3975 	[x86_intercept_cr_read]		= POST_EX(SVM_EXIT_READ_CR0),
3976 	[x86_intercept_cr_write]	= POST_EX(SVM_EXIT_WRITE_CR0),
3977 	[x86_intercept_clts]		= POST_EX(SVM_EXIT_WRITE_CR0),
3978 	[x86_intercept_lmsw]		= POST_EX(SVM_EXIT_WRITE_CR0),
3979 	[x86_intercept_smsw]		= POST_EX(SVM_EXIT_READ_CR0),
3980 	[x86_intercept_dr_read]		= POST_EX(SVM_EXIT_READ_DR0),
3981 	[x86_intercept_dr_write]	= POST_EX(SVM_EXIT_WRITE_DR0),
3982 	[x86_intercept_sldt]		= POST_EX(SVM_EXIT_LDTR_READ),
3983 	[x86_intercept_str]		= POST_EX(SVM_EXIT_TR_READ),
3984 	[x86_intercept_lldt]		= POST_EX(SVM_EXIT_LDTR_WRITE),
3985 	[x86_intercept_ltr]		= POST_EX(SVM_EXIT_TR_WRITE),
3986 	[x86_intercept_sgdt]		= POST_EX(SVM_EXIT_GDTR_READ),
3987 	[x86_intercept_sidt]		= POST_EX(SVM_EXIT_IDTR_READ),
3988 	[x86_intercept_lgdt]		= POST_EX(SVM_EXIT_GDTR_WRITE),
3989 	[x86_intercept_lidt]		= POST_EX(SVM_EXIT_IDTR_WRITE),
3990 	[x86_intercept_vmrun]		= POST_EX(SVM_EXIT_VMRUN),
3991 	[x86_intercept_vmmcall]		= POST_EX(SVM_EXIT_VMMCALL),
3992 	[x86_intercept_vmload]		= POST_EX(SVM_EXIT_VMLOAD),
3993 	[x86_intercept_vmsave]		= POST_EX(SVM_EXIT_VMSAVE),
3994 	[x86_intercept_stgi]		= POST_EX(SVM_EXIT_STGI),
3995 	[x86_intercept_clgi]		= POST_EX(SVM_EXIT_CLGI),
3996 	[x86_intercept_skinit]		= POST_EX(SVM_EXIT_SKINIT),
3997 	[x86_intercept_invlpga]		= POST_EX(SVM_EXIT_INVLPGA),
3998 	[x86_intercept_rdtscp]		= POST_EX(SVM_EXIT_RDTSCP),
3999 	[x86_intercept_monitor]		= POST_MEM(SVM_EXIT_MONITOR),
4000 	[x86_intercept_mwait]		= POST_EX(SVM_EXIT_MWAIT),
4001 	[x86_intercept_invlpg]		= POST_EX(SVM_EXIT_INVLPG),
4002 	[x86_intercept_invd]		= POST_EX(SVM_EXIT_INVD),
4003 	[x86_intercept_wbinvd]		= POST_EX(SVM_EXIT_WBINVD),
4004 	[x86_intercept_wrmsr]		= POST_EX(SVM_EXIT_MSR),
4005 	[x86_intercept_rdtsc]		= POST_EX(SVM_EXIT_RDTSC),
4006 	[x86_intercept_rdmsr]		= POST_EX(SVM_EXIT_MSR),
4007 	[x86_intercept_rdpmc]		= POST_EX(SVM_EXIT_RDPMC),
4008 	[x86_intercept_cpuid]		= PRE_EX(SVM_EXIT_CPUID),
4009 	[x86_intercept_rsm]		= PRE_EX(SVM_EXIT_RSM),
4010 	[x86_intercept_pause]		= PRE_EX(SVM_EXIT_PAUSE),
4011 	[x86_intercept_pushf]		= PRE_EX(SVM_EXIT_PUSHF),
4012 	[x86_intercept_popf]		= PRE_EX(SVM_EXIT_POPF),
4013 	[x86_intercept_intn]		= PRE_EX(SVM_EXIT_SWINT),
4014 	[x86_intercept_iret]		= PRE_EX(SVM_EXIT_IRET),
4015 	[x86_intercept_icebp]		= PRE_EX(SVM_EXIT_ICEBP),
4016 	[x86_intercept_hlt]		= POST_EX(SVM_EXIT_HLT),
4017 	[x86_intercept_in]		= POST_EX(SVM_EXIT_IOIO),
4018 	[x86_intercept_ins]		= POST_EX(SVM_EXIT_IOIO),
4019 	[x86_intercept_out]		= POST_EX(SVM_EXIT_IOIO),
4020 	[x86_intercept_outs]		= POST_EX(SVM_EXIT_IOIO),
4021 };
4022 
4023 #undef PRE_EX
4024 #undef POST_EX
4025 #undef POST_MEM
4026 
4027 static int svm_check_intercept(struct kvm_vcpu *vcpu,
4028 			       struct x86_instruction_info *info,
4029 			       enum x86_intercept_stage stage)
4030 {
4031 	struct vcpu_svm *svm = to_svm(vcpu);
4032 	int vmexit, ret = X86EMUL_CONTINUE;
4033 	struct __x86_intercept icpt_info;
4034 	struct vmcb *vmcb = svm->vmcb;
4035 
4036 	if (info->intercept >= ARRAY_SIZE(x86_intercept_map))
4037 		goto out;
4038 
4039 	icpt_info = x86_intercept_map[info->intercept];
4040 
4041 	if (stage != icpt_info.stage)
4042 		goto out;
4043 
4044 	switch (icpt_info.exit_code) {
4045 	case SVM_EXIT_READ_CR0:
4046 		if (info->intercept == x86_intercept_cr_read)
4047 			icpt_info.exit_code += info->modrm_reg;
4048 		break;
4049 	case SVM_EXIT_WRITE_CR0: {
4050 		unsigned long cr0, val;
4051 		u64 intercept;
4052 
4053 		if (info->intercept == x86_intercept_cr_write)
4054 			icpt_info.exit_code += info->modrm_reg;
4055 
4056 		if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0)
4057 			break;
4058 
4059 		intercept = svm->nested.intercept;
4060 
4061 		if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))
4062 			break;
4063 
4064 		cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
4065 		val = info->src_val  & ~SVM_CR0_SELECTIVE_MASK;
4066 
4067 		if (info->intercept == x86_intercept_lmsw) {
4068 			cr0 &= 0xfUL;
4069 			val &= 0xfUL;
4070 			/* lmsw can't clear PE - catch this here */
4071 			if (cr0 & X86_CR0_PE)
4072 				val |= X86_CR0_PE;
4073 		}
4074 
4075 		if (cr0 ^ val)
4076 			icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE;
4077 
4078 		break;
4079 	}
4080 	case SVM_EXIT_READ_DR0:
4081 	case SVM_EXIT_WRITE_DR0:
4082 		icpt_info.exit_code += info->modrm_reg;
4083 		break;
4084 	case SVM_EXIT_MSR:
4085 		if (info->intercept == x86_intercept_wrmsr)
4086 			vmcb->control.exit_info_1 = 1;
4087 		else
4088 			vmcb->control.exit_info_1 = 0;
4089 		break;
4090 	case SVM_EXIT_PAUSE:
4091 		/*
4092 		 * We get this for NOP only, but pause
4093 		 * is rep not, check this here
4094 		 */
4095 		if (info->rep_prefix != REPE_PREFIX)
4096 			goto out;
4097 	case SVM_EXIT_IOIO: {
4098 		u64 exit_info;
4099 		u32 bytes;
4100 
4101 		exit_info = (vcpu->arch.regs[VCPU_REGS_RDX] & 0xffff) << 16;
4102 
4103 		if (info->intercept == x86_intercept_in ||
4104 		    info->intercept == x86_intercept_ins) {
4105 			exit_info |= SVM_IOIO_TYPE_MASK;
4106 			bytes = info->src_bytes;
4107 		} else {
4108 			bytes = info->dst_bytes;
4109 		}
4110 
4111 		if (info->intercept == x86_intercept_outs ||
4112 		    info->intercept == x86_intercept_ins)
4113 			exit_info |= SVM_IOIO_STR_MASK;
4114 
4115 		if (info->rep_prefix)
4116 			exit_info |= SVM_IOIO_REP_MASK;
4117 
4118 		bytes = min(bytes, 4u);
4119 
4120 		exit_info |= bytes << SVM_IOIO_SIZE_SHIFT;
4121 
4122 		exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1);
4123 
4124 		vmcb->control.exit_info_1 = exit_info;
4125 		vmcb->control.exit_info_2 = info->next_rip;
4126 
4127 		break;
4128 	}
4129 	default:
4130 		break;
4131 	}
4132 
4133 	vmcb->control.next_rip  = info->next_rip;
4134 	vmcb->control.exit_code = icpt_info.exit_code;
4135 	vmexit = nested_svm_exit_handled(svm);
4136 
4137 	ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
4138 					   : X86EMUL_CONTINUE;
4139 
4140 out:
4141 	return ret;
4142 }
4143 
4144 static struct kvm_x86_ops svm_x86_ops = {
4145 	.cpu_has_kvm_support = has_svm,
4146 	.disabled_by_bios = is_disabled,
4147 	.hardware_setup = svm_hardware_setup,
4148 	.hardware_unsetup = svm_hardware_unsetup,
4149 	.check_processor_compatibility = svm_check_processor_compat,
4150 	.hardware_enable = svm_hardware_enable,
4151 	.hardware_disable = svm_hardware_disable,
4152 	.cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
4153 
4154 	.vcpu_create = svm_create_vcpu,
4155 	.vcpu_free = svm_free_vcpu,
4156 	.vcpu_reset = svm_vcpu_reset,
4157 
4158 	.prepare_guest_switch = svm_prepare_guest_switch,
4159 	.vcpu_load = svm_vcpu_load,
4160 	.vcpu_put = svm_vcpu_put,
4161 
4162 	.set_guest_debug = svm_guest_debug,
4163 	.get_msr = svm_get_msr,
4164 	.set_msr = svm_set_msr,
4165 	.get_segment_base = svm_get_segment_base,
4166 	.get_segment = svm_get_segment,
4167 	.set_segment = svm_set_segment,
4168 	.get_cpl = svm_get_cpl,
4169 	.get_cs_db_l_bits = kvm_get_cs_db_l_bits,
4170 	.decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
4171 	.decache_cr3 = svm_decache_cr3,
4172 	.decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
4173 	.set_cr0 = svm_set_cr0,
4174 	.set_cr3 = svm_set_cr3,
4175 	.set_cr4 = svm_set_cr4,
4176 	.set_efer = svm_set_efer,
4177 	.get_idt = svm_get_idt,
4178 	.set_idt = svm_set_idt,
4179 	.get_gdt = svm_get_gdt,
4180 	.set_gdt = svm_set_gdt,
4181 	.set_dr7 = svm_set_dr7,
4182 	.cache_reg = svm_cache_reg,
4183 	.get_rflags = svm_get_rflags,
4184 	.set_rflags = svm_set_rflags,
4185 	.fpu_activate = svm_fpu_activate,
4186 	.fpu_deactivate = svm_fpu_deactivate,
4187 
4188 	.tlb_flush = svm_flush_tlb,
4189 
4190 	.run = svm_vcpu_run,
4191 	.handle_exit = handle_exit,
4192 	.skip_emulated_instruction = skip_emulated_instruction,
4193 	.set_interrupt_shadow = svm_set_interrupt_shadow,
4194 	.get_interrupt_shadow = svm_get_interrupt_shadow,
4195 	.patch_hypercall = svm_patch_hypercall,
4196 	.set_irq = svm_set_irq,
4197 	.set_nmi = svm_inject_nmi,
4198 	.queue_exception = svm_queue_exception,
4199 	.cancel_injection = svm_cancel_injection,
4200 	.interrupt_allowed = svm_interrupt_allowed,
4201 	.nmi_allowed = svm_nmi_allowed,
4202 	.get_nmi_mask = svm_get_nmi_mask,
4203 	.set_nmi_mask = svm_set_nmi_mask,
4204 	.enable_nmi_window = enable_nmi_window,
4205 	.enable_irq_window = enable_irq_window,
4206 	.update_cr8_intercept = update_cr8_intercept,
4207 
4208 	.set_tss_addr = svm_set_tss_addr,
4209 	.get_tdp_level = get_npt_level,
4210 	.get_mt_mask = svm_get_mt_mask,
4211 
4212 	.get_exit_info = svm_get_exit_info,
4213 
4214 	.get_lpage_level = svm_get_lpage_level,
4215 
4216 	.cpuid_update = svm_cpuid_update,
4217 
4218 	.rdtscp_supported = svm_rdtscp_supported,
4219 
4220 	.set_supported_cpuid = svm_set_supported_cpuid,
4221 
4222 	.has_wbinvd_exit = svm_has_wbinvd_exit,
4223 
4224 	.set_tsc_khz = svm_set_tsc_khz,
4225 	.write_tsc_offset = svm_write_tsc_offset,
4226 	.adjust_tsc_offset = svm_adjust_tsc_offset,
4227 	.compute_tsc_offset = svm_compute_tsc_offset,
4228 	.read_l1_tsc = svm_read_l1_tsc,
4229 
4230 	.set_tdp_cr3 = set_tdp_cr3,
4231 
4232 	.check_intercept = svm_check_intercept,
4233 };
4234 
4235 static int __init svm_init(void)
4236 {
4237 	return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm),
4238 			__alignof__(struct vcpu_svm), THIS_MODULE);
4239 }
4240 
4241 static void __exit svm_exit(void)
4242 {
4243 	kvm_exit();
4244 }
4245 
4246 module_init(svm_init)
4247 module_exit(svm_exit)
4248