xref: /linux/arch/s390/kvm/kvm-s390.c (revision ab93e0dd72c37d378dd936f031ffb83ff2bd87ce) !
1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * hosting IBM Z kernel virtual machines (s390x)
4   *
5   * Copyright IBM Corp. 2008, 2020
6   *
7   *    Author(s): Carsten Otte <cotte@de.ibm.com>
8   *               Christian Borntraeger <borntraeger@de.ibm.com>
9   *               Christian Ehrhardt <ehrhardt@de.ibm.com>
10   *               Jason J. Herne <jjherne@us.ibm.com>
11   */
12  
13  #define KMSG_COMPONENT "kvm-s390"
14  #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
15  
16  #include <linux/compiler.h>
17  #include <linux/export.h>
18  #include <linux/err.h>
19  #include <linux/fs.h>
20  #include <linux/hrtimer.h>
21  #include <linux/init.h>
22  #include <linux/kvm.h>
23  #include <linux/kvm_host.h>
24  #include <linux/mman.h>
25  #include <linux/module.h>
26  #include <linux/moduleparam.h>
27  #include <linux/cpufeature.h>
28  #include <linux/random.h>
29  #include <linux/slab.h>
30  #include <linux/timer.h>
31  #include <linux/vmalloc.h>
32  #include <linux/bitmap.h>
33  #include <linux/sched/signal.h>
34  #include <linux/string.h>
35  #include <linux/pgtable.h>
36  #include <linux/mmu_notifier.h>
37  
38  #include <asm/access-regs.h>
39  #include <asm/asm-offsets.h>
40  #include <asm/lowcore.h>
41  #include <asm/machine.h>
42  #include <asm/stp.h>
43  #include <asm/gmap.h>
44  #include <asm/gmap_helpers.h>
45  #include <asm/nmi.h>
46  #include <asm/isc.h>
47  #include <asm/sclp.h>
48  #include <asm/cpacf.h>
49  #include <asm/timex.h>
50  #include <asm/asm.h>
51  #include <asm/fpu.h>
52  #include <asm/ap.h>
53  #include <asm/uv.h>
54  #include "kvm-s390.h"
55  #include "gaccess.h"
56  #include "pci.h"
57  
58  #define CREATE_TRACE_POINTS
59  #include "trace.h"
60  #include "trace-s390.h"
61  
62  #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
63  #define LOCAL_IRQS 32
64  #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
65  			   (KVM_MAX_VCPUS + LOCAL_IRQS))
66  
67  const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
68  	KVM_GENERIC_VM_STATS(),
69  	STATS_DESC_COUNTER(VM, inject_io),
70  	STATS_DESC_COUNTER(VM, inject_float_mchk),
71  	STATS_DESC_COUNTER(VM, inject_pfault_done),
72  	STATS_DESC_COUNTER(VM, inject_service_signal),
73  	STATS_DESC_COUNTER(VM, inject_virtio),
74  	STATS_DESC_COUNTER(VM, aen_forward),
75  	STATS_DESC_COUNTER(VM, gmap_shadow_reuse),
76  	STATS_DESC_COUNTER(VM, gmap_shadow_create),
77  	STATS_DESC_COUNTER(VM, gmap_shadow_r1_entry),
78  	STATS_DESC_COUNTER(VM, gmap_shadow_r2_entry),
79  	STATS_DESC_COUNTER(VM, gmap_shadow_r3_entry),
80  	STATS_DESC_COUNTER(VM, gmap_shadow_sg_entry),
81  	STATS_DESC_COUNTER(VM, gmap_shadow_pg_entry),
82  };
83  
84  const struct kvm_stats_header kvm_vm_stats_header = {
85  	.name_size = KVM_STATS_NAME_SIZE,
86  	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
87  	.id_offset = sizeof(struct kvm_stats_header),
88  	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
89  	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
90  		       sizeof(kvm_vm_stats_desc),
91  };
92  
93  const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
94  	KVM_GENERIC_VCPU_STATS(),
95  	STATS_DESC_COUNTER(VCPU, exit_userspace),
96  	STATS_DESC_COUNTER(VCPU, exit_null),
97  	STATS_DESC_COUNTER(VCPU, exit_external_request),
98  	STATS_DESC_COUNTER(VCPU, exit_io_request),
99  	STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
100  	STATS_DESC_COUNTER(VCPU, exit_stop_request),
101  	STATS_DESC_COUNTER(VCPU, exit_validity),
102  	STATS_DESC_COUNTER(VCPU, exit_instruction),
103  	STATS_DESC_COUNTER(VCPU, exit_pei),
104  	STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
105  	STATS_DESC_COUNTER(VCPU, instruction_lctl),
106  	STATS_DESC_COUNTER(VCPU, instruction_lctlg),
107  	STATS_DESC_COUNTER(VCPU, instruction_stctl),
108  	STATS_DESC_COUNTER(VCPU, instruction_stctg),
109  	STATS_DESC_COUNTER(VCPU, exit_program_interruption),
110  	STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
111  	STATS_DESC_COUNTER(VCPU, exit_operation_exception),
112  	STATS_DESC_COUNTER(VCPU, deliver_ckc),
113  	STATS_DESC_COUNTER(VCPU, deliver_cputm),
114  	STATS_DESC_COUNTER(VCPU, deliver_external_call),
115  	STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
116  	STATS_DESC_COUNTER(VCPU, deliver_service_signal),
117  	STATS_DESC_COUNTER(VCPU, deliver_virtio),
118  	STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
119  	STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
120  	STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
121  	STATS_DESC_COUNTER(VCPU, deliver_program),
122  	STATS_DESC_COUNTER(VCPU, deliver_io),
123  	STATS_DESC_COUNTER(VCPU, deliver_machine_check),
124  	STATS_DESC_COUNTER(VCPU, exit_wait_state),
125  	STATS_DESC_COUNTER(VCPU, inject_ckc),
126  	STATS_DESC_COUNTER(VCPU, inject_cputm),
127  	STATS_DESC_COUNTER(VCPU, inject_external_call),
128  	STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
129  	STATS_DESC_COUNTER(VCPU, inject_mchk),
130  	STATS_DESC_COUNTER(VCPU, inject_pfault_init),
131  	STATS_DESC_COUNTER(VCPU, inject_program),
132  	STATS_DESC_COUNTER(VCPU, inject_restart),
133  	STATS_DESC_COUNTER(VCPU, inject_set_prefix),
134  	STATS_DESC_COUNTER(VCPU, inject_stop_signal),
135  	STATS_DESC_COUNTER(VCPU, instruction_epsw),
136  	STATS_DESC_COUNTER(VCPU, instruction_gs),
137  	STATS_DESC_COUNTER(VCPU, instruction_io_other),
138  	STATS_DESC_COUNTER(VCPU, instruction_lpsw),
139  	STATS_DESC_COUNTER(VCPU, instruction_lpswe),
140  	STATS_DESC_COUNTER(VCPU, instruction_lpswey),
141  	STATS_DESC_COUNTER(VCPU, instruction_pfmf),
142  	STATS_DESC_COUNTER(VCPU, instruction_ptff),
143  	STATS_DESC_COUNTER(VCPU, instruction_sck),
144  	STATS_DESC_COUNTER(VCPU, instruction_sckpf),
145  	STATS_DESC_COUNTER(VCPU, instruction_stidp),
146  	STATS_DESC_COUNTER(VCPU, instruction_spx),
147  	STATS_DESC_COUNTER(VCPU, instruction_stpx),
148  	STATS_DESC_COUNTER(VCPU, instruction_stap),
149  	STATS_DESC_COUNTER(VCPU, instruction_iske),
150  	STATS_DESC_COUNTER(VCPU, instruction_ri),
151  	STATS_DESC_COUNTER(VCPU, instruction_rrbe),
152  	STATS_DESC_COUNTER(VCPU, instruction_sske),
153  	STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
154  	STATS_DESC_COUNTER(VCPU, instruction_stsi),
155  	STATS_DESC_COUNTER(VCPU, instruction_stfl),
156  	STATS_DESC_COUNTER(VCPU, instruction_tb),
157  	STATS_DESC_COUNTER(VCPU, instruction_tpi),
158  	STATS_DESC_COUNTER(VCPU, instruction_tprot),
159  	STATS_DESC_COUNTER(VCPU, instruction_tsch),
160  	STATS_DESC_COUNTER(VCPU, instruction_sie),
161  	STATS_DESC_COUNTER(VCPU, instruction_essa),
162  	STATS_DESC_COUNTER(VCPU, instruction_sthyi),
163  	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
164  	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
165  	STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
166  	STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
167  	STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
168  	STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
169  	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
170  	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
171  	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
172  	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
173  	STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
174  	STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
175  	STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
176  	STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
177  	STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
178  	STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
179  	STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
180  	STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
181  	STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
182  	STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
183  	STATS_DESC_COUNTER(VCPU, diag_9c_forward),
184  	STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
185  	STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
186  	STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
187  	STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
188  	STATS_DESC_COUNTER(VCPU, pfault_sync)
189  };
190  
191  const struct kvm_stats_header kvm_vcpu_stats_header = {
192  	.name_size = KVM_STATS_NAME_SIZE,
193  	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
194  	.id_offset = sizeof(struct kvm_stats_header),
195  	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
196  	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
197  		       sizeof(kvm_vcpu_stats_desc),
198  };
199  
200  /* allow nested virtualization in KVM (if enabled by user space) */
201  static int nested;
202  module_param(nested, int, S_IRUGO);
203  MODULE_PARM_DESC(nested, "Nested virtualization support");
204  
205  /* allow 1m huge page guest backing, if !nested */
206  static int hpage;
207  module_param(hpage, int, 0444);
208  MODULE_PARM_DESC(hpage, "1m huge page backing support");
209  
210  /* maximum percentage of steal time for polling.  >100 is treated like 100 */
211  static u8 halt_poll_max_steal = 10;
212  module_param(halt_poll_max_steal, byte, 0644);
213  MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
214  
215  /* if set to true, the GISA will be initialized and used if available */
216  static bool use_gisa  = true;
217  module_param(use_gisa, bool, 0644);
218  MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
219  
220  /* maximum diag9c forwarding per second */
221  unsigned int diag9c_forwarding_hz;
222  module_param(diag9c_forwarding_hz, uint, 0644);
223  MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
224  
225  /*
226   * allow asynchronous deinit for protected guests; enable by default since
227   * the feature is opt-in anyway
228   */
229  static int async_destroy = 1;
230  module_param(async_destroy, int, 0444);
231  MODULE_PARM_DESC(async_destroy, "Asynchronous destroy for protected guests");
232  
233  /*
234   * For now we handle at most 16 double words as this is what the s390 base
235   * kernel handles and stores in the prefix page. If we ever need to go beyond
236   * this, this requires changes to code, but the external uapi can stay.
237   */
238  #define SIZE_INTERNAL 16
239  
240  /*
241   * Base feature mask that defines default mask for facilities. Consists of the
242   * defines in FACILITIES_KVM and the non-hypervisor managed bits.
243   */
244  static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
245  /*
246   * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
247   * and defines the facilities that can be enabled via a cpu model.
248   */
249  static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
250  
kvm_s390_fac_size(void)251  static unsigned long kvm_s390_fac_size(void)
252  {
253  	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
254  	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
255  	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
256  		sizeof(stfle_fac_list));
257  
258  	return SIZE_INTERNAL;
259  }
260  
261  /* available cpu features supported by kvm */
262  static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
263  /* available subfunctions indicated via query / "test bit" */
264  static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
265  
266  static struct gmap_notifier gmap_notifier;
267  static struct gmap_notifier vsie_gmap_notifier;
268  debug_info_t *kvm_s390_dbf;
269  debug_info_t *kvm_s390_dbf_uv;
270  
271  /* Section: not file related */
272  /* forward declarations */
273  static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
274  			      unsigned long end);
275  static int sca_switch_to_extended(struct kvm *kvm);
276  
kvm_clock_sync_scb(struct kvm_s390_sie_block * scb,u64 delta)277  static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
278  {
279  	u8 delta_idx = 0;
280  
281  	/*
282  	 * The TOD jumps by delta, we have to compensate this by adding
283  	 * -delta to the epoch.
284  	 */
285  	delta = -delta;
286  
287  	/* sign-extension - we're adding to signed values below */
288  	if ((s64)delta < 0)
289  		delta_idx = -1;
290  
291  	scb->epoch += delta;
292  	if (scb->ecd & ECD_MEF) {
293  		scb->epdx += delta_idx;
294  		if (scb->epoch < delta)
295  			scb->epdx += 1;
296  	}
297  }
298  
299  /*
300   * This callback is executed during stop_machine(). All CPUs are therefore
301   * temporarily stopped. In order not to change guest behavior, we have to
302   * disable preemption whenever we touch the epoch of kvm and the VCPUs,
303   * so a CPU won't be stopped while calculating with the epoch.
304   */
kvm_clock_sync(struct notifier_block * notifier,unsigned long val,void * v)305  static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
306  			  void *v)
307  {
308  	struct kvm *kvm;
309  	struct kvm_vcpu *vcpu;
310  	unsigned long i;
311  	unsigned long long *delta = v;
312  
313  	list_for_each_entry(kvm, &vm_list, vm_list) {
314  		kvm_for_each_vcpu(i, vcpu, kvm) {
315  			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
316  			if (i == 0) {
317  				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
318  				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
319  			}
320  			if (vcpu->arch.cputm_enabled)
321  				vcpu->arch.cputm_start += *delta;
322  			if (vcpu->arch.vsie_block)
323  				kvm_clock_sync_scb(vcpu->arch.vsie_block,
324  						   *delta);
325  		}
326  	}
327  	return NOTIFY_OK;
328  }
329  
330  static struct notifier_block kvm_clock_notifier = {
331  	.notifier_call = kvm_clock_sync,
332  };
333  
allow_cpu_feat(unsigned long nr)334  static void allow_cpu_feat(unsigned long nr)
335  {
336  	set_bit_inv(nr, kvm_s390_available_cpu_feat);
337  }
338  
plo_test_bit(unsigned char nr)339  static inline int plo_test_bit(unsigned char nr)
340  {
341  	unsigned long function = (unsigned long)nr | 0x100;
342  	int cc;
343  
344  	asm volatile(
345  		"	lgr	0,%[function]\n"
346  		/* Parameter registers are ignored for "test bit" */
347  		"	plo	0,0,0,0(0)\n"
348  		CC_IPM(cc)
349  		: CC_OUT(cc, cc)
350  		: [function] "d" (function)
351  		: CC_CLOBBER_LIST("0"));
352  	return CC_TRANSFORM(cc) == 0;
353  }
354  
pfcr_query(u8 (* query)[16])355  static __always_inline void pfcr_query(u8 (*query)[16])
356  {
357  	asm volatile(
358  		"	lghi	0,0\n"
359  		"	.insn   rsy,0xeb0000000016,0,0,%[query]\n"
360  		: [query] "=QS" (*query)
361  		:
362  		: "cc", "0");
363  }
364  
__sortl_query(u8 (* query)[32])365  static __always_inline void __sortl_query(u8 (*query)[32])
366  {
367  	asm volatile(
368  		"	lghi	0,0\n"
369  		"	la	1,%[query]\n"
370  		/* Parameter registers are ignored */
371  		"	.insn	rre,0xb9380000,2,4\n"
372  		: [query] "=R" (*query)
373  		:
374  		: "cc", "0", "1");
375  }
376  
__dfltcc_query(u8 (* query)[32])377  static __always_inline void __dfltcc_query(u8 (*query)[32])
378  {
379  	asm volatile(
380  		"	lghi	0,0\n"
381  		"	la	1,%[query]\n"
382  		/* Parameter registers are ignored */
383  		"	.insn	rrf,0xb9390000,2,4,6,0\n"
384  		: [query] "=R" (*query)
385  		:
386  		: "cc", "0", "1");
387  }
388  
kvm_s390_cpu_feat_init(void)389  static void __init kvm_s390_cpu_feat_init(void)
390  {
391  	int i;
392  
393  	for (i = 0; i < 256; ++i) {
394  		if (plo_test_bit(i))
395  			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
396  	}
397  
398  	if (test_facility(28)) /* TOD-clock steering */
399  		ptff(kvm_s390_available_subfunc.ptff,
400  		     sizeof(kvm_s390_available_subfunc.ptff),
401  		     PTFF_QAF);
402  
403  	if (test_facility(17)) { /* MSA */
404  		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
405  			      kvm_s390_available_subfunc.kmac);
406  		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
407  			      kvm_s390_available_subfunc.kmc);
408  		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
409  			      kvm_s390_available_subfunc.km);
410  		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
411  			      kvm_s390_available_subfunc.kimd);
412  		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
413  			      kvm_s390_available_subfunc.klmd);
414  	}
415  	if (test_facility(76)) /* MSA3 */
416  		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
417  			      kvm_s390_available_subfunc.pckmo);
418  	if (test_facility(77)) { /* MSA4 */
419  		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
420  			      kvm_s390_available_subfunc.kmctr);
421  		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
422  			      kvm_s390_available_subfunc.kmf);
423  		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
424  			      kvm_s390_available_subfunc.kmo);
425  		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
426  			      kvm_s390_available_subfunc.pcc);
427  	}
428  	if (test_facility(57)) /* MSA5 */
429  		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
430  			      kvm_s390_available_subfunc.ppno);
431  
432  	if (test_facility(146)) /* MSA8 */
433  		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
434  			      kvm_s390_available_subfunc.kma);
435  
436  	if (test_facility(155)) /* MSA9 */
437  		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
438  			      kvm_s390_available_subfunc.kdsa);
439  
440  	if (test_facility(150)) /* SORTL */
441  		__sortl_query(&kvm_s390_available_subfunc.sortl);
442  
443  	if (test_facility(151)) /* DFLTCC */
444  		__dfltcc_query(&kvm_s390_available_subfunc.dfltcc);
445  
446  	if (test_facility(201))	/* PFCR */
447  		pfcr_query(&kvm_s390_available_subfunc.pfcr);
448  
449  	if (machine_has_esop())
450  		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
451  	/*
452  	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
453  	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
454  	 */
455  	if (!sclp.has_sief2 || !machine_has_esop() || !sclp.has_64bscao ||
456  	    !test_facility(3) || !nested)
457  		return;
458  	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
459  	if (sclp.has_64bscao)
460  		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
461  	if (sclp.has_siif)
462  		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
463  	if (sclp.has_gpere)
464  		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
465  	if (sclp.has_gsls)
466  		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
467  	if (sclp.has_ib)
468  		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
469  	if (sclp.has_cei)
470  		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
471  	if (sclp.has_ibs)
472  		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
473  	if (sclp.has_kss)
474  		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
475  	/*
476  	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
477  	 * all skey handling functions read/set the skey from the PGSTE
478  	 * instead of the real storage key.
479  	 *
480  	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
481  	 * pages being detected as preserved although they are resident.
482  	 *
483  	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
484  	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
485  	 *
486  	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
487  	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
488  	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
489  	 *
490  	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
491  	 * cannot easily shadow the SCA because of the ipte lock.
492  	 */
493  }
494  
__kvm_s390_init(void)495  static int __init __kvm_s390_init(void)
496  {
497  	int rc = -ENOMEM;
498  
499  	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
500  	if (!kvm_s390_dbf)
501  		return -ENOMEM;
502  
503  	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
504  	if (!kvm_s390_dbf_uv)
505  		goto err_kvm_uv;
506  
507  	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
508  	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
509  		goto err_debug_view;
510  
511  	kvm_s390_cpu_feat_init();
512  
513  	/* Register floating interrupt controller interface. */
514  	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
515  	if (rc) {
516  		pr_err("A FLIC registration call failed with rc=%d\n", rc);
517  		goto err_flic;
518  	}
519  
520  	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
521  		rc = kvm_s390_pci_init();
522  		if (rc) {
523  			pr_err("Unable to allocate AIFT for PCI\n");
524  			goto err_pci;
525  		}
526  	}
527  
528  	rc = kvm_s390_gib_init(GAL_ISC);
529  	if (rc)
530  		goto err_gib;
531  
532  	gmap_notifier.notifier_call = kvm_gmap_notifier;
533  	gmap_register_pte_notifier(&gmap_notifier);
534  	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
535  	gmap_register_pte_notifier(&vsie_gmap_notifier);
536  	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
537  				       &kvm_clock_notifier);
538  
539  	return 0;
540  
541  err_gib:
542  	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
543  		kvm_s390_pci_exit();
544  err_pci:
545  err_flic:
546  err_debug_view:
547  	debug_unregister(kvm_s390_dbf_uv);
548  err_kvm_uv:
549  	debug_unregister(kvm_s390_dbf);
550  	return rc;
551  }
552  
__kvm_s390_exit(void)553  static void __kvm_s390_exit(void)
554  {
555  	gmap_unregister_pte_notifier(&gmap_notifier);
556  	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
557  	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
558  					 &kvm_clock_notifier);
559  
560  	kvm_s390_gib_destroy();
561  	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
562  		kvm_s390_pci_exit();
563  	debug_unregister(kvm_s390_dbf);
564  	debug_unregister(kvm_s390_dbf_uv);
565  }
566  
567  /* Section: device related */
kvm_arch_dev_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)568  long kvm_arch_dev_ioctl(struct file *filp,
569  			unsigned int ioctl, unsigned long arg)
570  {
571  	if (ioctl == KVM_S390_ENABLE_SIE)
572  		return s390_enable_sie();
573  	return -EINVAL;
574  }
575  
kvm_vm_ioctl_check_extension(struct kvm * kvm,long ext)576  int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
577  {
578  	int r;
579  
580  	switch (ext) {
581  	case KVM_CAP_S390_PSW:
582  	case KVM_CAP_S390_GMAP:
583  	case KVM_CAP_SYNC_MMU:
584  #ifdef CONFIG_KVM_S390_UCONTROL
585  	case KVM_CAP_S390_UCONTROL:
586  #endif
587  	case KVM_CAP_ASYNC_PF:
588  	case KVM_CAP_SYNC_REGS:
589  	case KVM_CAP_ONE_REG:
590  	case KVM_CAP_ENABLE_CAP:
591  	case KVM_CAP_S390_CSS_SUPPORT:
592  	case KVM_CAP_IOEVENTFD:
593  	case KVM_CAP_S390_IRQCHIP:
594  	case KVM_CAP_VM_ATTRIBUTES:
595  	case KVM_CAP_MP_STATE:
596  	case KVM_CAP_IMMEDIATE_EXIT:
597  	case KVM_CAP_S390_INJECT_IRQ:
598  	case KVM_CAP_S390_USER_SIGP:
599  	case KVM_CAP_S390_USER_STSI:
600  	case KVM_CAP_S390_SKEYS:
601  	case KVM_CAP_S390_IRQ_STATE:
602  	case KVM_CAP_S390_USER_INSTR0:
603  	case KVM_CAP_S390_CMMA_MIGRATION:
604  	case KVM_CAP_S390_AIS:
605  	case KVM_CAP_S390_AIS_MIGRATION:
606  	case KVM_CAP_S390_VCPU_RESETS:
607  	case KVM_CAP_SET_GUEST_DEBUG:
608  	case KVM_CAP_S390_DIAG318:
609  	case KVM_CAP_IRQFD_RESAMPLE:
610  		r = 1;
611  		break;
612  	case KVM_CAP_SET_GUEST_DEBUG2:
613  		r = KVM_GUESTDBG_VALID_MASK;
614  		break;
615  	case KVM_CAP_S390_HPAGE_1M:
616  		r = 0;
617  		if (hpage && !(kvm && kvm_is_ucontrol(kvm)))
618  			r = 1;
619  		break;
620  	case KVM_CAP_S390_MEM_OP:
621  		r = MEM_OP_MAX_SIZE;
622  		break;
623  	case KVM_CAP_S390_MEM_OP_EXTENSION:
624  		/*
625  		 * Flag bits indicating which extensions are supported.
626  		 * If r > 0, the base extension must also be supported/indicated,
627  		 * in order to maintain backwards compatibility.
628  		 */
629  		r = KVM_S390_MEMOP_EXTENSION_CAP_BASE |
630  		    KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG;
631  		break;
632  	case KVM_CAP_NR_VCPUS:
633  	case KVM_CAP_MAX_VCPUS:
634  	case KVM_CAP_MAX_VCPU_ID:
635  		r = KVM_S390_BSCA_CPU_SLOTS;
636  		if (!kvm_s390_use_sca_entries())
637  			r = KVM_MAX_VCPUS;
638  		else if (sclp.has_esca && sclp.has_64bscao)
639  			r = KVM_S390_ESCA_CPU_SLOTS;
640  		if (ext == KVM_CAP_NR_VCPUS)
641  			r = min_t(unsigned int, num_online_cpus(), r);
642  		break;
643  	case KVM_CAP_S390_COW:
644  		r = machine_has_esop();
645  		break;
646  	case KVM_CAP_S390_VECTOR_REGISTERS:
647  		r = test_facility(129);
648  		break;
649  	case KVM_CAP_S390_RI:
650  		r = test_facility(64);
651  		break;
652  	case KVM_CAP_S390_GS:
653  		r = test_facility(133);
654  		break;
655  	case KVM_CAP_S390_BPB:
656  		r = test_facility(82);
657  		break;
658  	case KVM_CAP_S390_PROTECTED_ASYNC_DISABLE:
659  		r = async_destroy && is_prot_virt_host();
660  		break;
661  	case KVM_CAP_S390_PROTECTED:
662  		r = is_prot_virt_host();
663  		break;
664  	case KVM_CAP_S390_PROTECTED_DUMP: {
665  		u64 pv_cmds_dump[] = {
666  			BIT_UVC_CMD_DUMP_INIT,
667  			BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE,
668  			BIT_UVC_CMD_DUMP_CPU,
669  			BIT_UVC_CMD_DUMP_COMPLETE,
670  		};
671  		int i;
672  
673  		r = is_prot_virt_host();
674  
675  		for (i = 0; i < ARRAY_SIZE(pv_cmds_dump); i++) {
676  			if (!test_bit_inv(pv_cmds_dump[i],
677  					  (unsigned long *)&uv_info.inst_calls_list)) {
678  				r = 0;
679  				break;
680  			}
681  		}
682  		break;
683  	}
684  	case KVM_CAP_S390_ZPCI_OP:
685  		r = kvm_s390_pci_interp_allowed();
686  		break;
687  	case KVM_CAP_S390_CPU_TOPOLOGY:
688  		r = test_facility(11);
689  		break;
690  	default:
691  		r = 0;
692  	}
693  	return r;
694  }
695  
kvm_arch_sync_dirty_log(struct kvm * kvm,struct kvm_memory_slot * memslot)696  void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
697  {
698  	int i;
699  	gfn_t cur_gfn, last_gfn;
700  	unsigned long gaddr, vmaddr;
701  	struct gmap *gmap = kvm->arch.gmap;
702  	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
703  
704  	/* Loop over all guest segments */
705  	cur_gfn = memslot->base_gfn;
706  	last_gfn = memslot->base_gfn + memslot->npages;
707  	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
708  		gaddr = gfn_to_gpa(cur_gfn);
709  		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
710  		if (kvm_is_error_hva(vmaddr))
711  			continue;
712  
713  		bitmap_zero(bitmap, _PAGE_ENTRIES);
714  		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
715  		for (i = 0; i < _PAGE_ENTRIES; i++) {
716  			if (test_bit(i, bitmap))
717  				mark_page_dirty(kvm, cur_gfn + i);
718  		}
719  
720  		if (fatal_signal_pending(current))
721  			return;
722  		cond_resched();
723  	}
724  }
725  
726  /* Section: vm related */
727  static void sca_del_vcpu(struct kvm_vcpu *vcpu);
728  
729  /*
730   * Get (and clear) the dirty memory log for a memory slot.
731   */
kvm_vm_ioctl_get_dirty_log(struct kvm * kvm,struct kvm_dirty_log * log)732  int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
733  			       struct kvm_dirty_log *log)
734  {
735  	int r;
736  	unsigned long n;
737  	struct kvm_memory_slot *memslot;
738  	int is_dirty;
739  
740  	if (kvm_is_ucontrol(kvm))
741  		return -EINVAL;
742  
743  	mutex_lock(&kvm->slots_lock);
744  
745  	r = -EINVAL;
746  	if (log->slot >= KVM_USER_MEM_SLOTS)
747  		goto out;
748  
749  	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
750  	if (r)
751  		goto out;
752  
753  	/* Clear the dirty log */
754  	if (is_dirty) {
755  		n = kvm_dirty_bitmap_bytes(memslot);
756  		memset(memslot->dirty_bitmap, 0, n);
757  	}
758  	r = 0;
759  out:
760  	mutex_unlock(&kvm->slots_lock);
761  	return r;
762  }
763  
icpt_operexc_on_all_vcpus(struct kvm * kvm)764  static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
765  {
766  	unsigned long i;
767  	struct kvm_vcpu *vcpu;
768  
769  	kvm_for_each_vcpu(i, vcpu, kvm) {
770  		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
771  	}
772  }
773  
kvm_vm_ioctl_enable_cap(struct kvm * kvm,struct kvm_enable_cap * cap)774  int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
775  {
776  	int r;
777  
778  	if (cap->flags)
779  		return -EINVAL;
780  
781  	switch (cap->cap) {
782  	case KVM_CAP_S390_IRQCHIP:
783  		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
784  		kvm->arch.use_irqchip = 1;
785  		r = 0;
786  		break;
787  	case KVM_CAP_S390_USER_SIGP:
788  		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
789  		kvm->arch.user_sigp = 1;
790  		r = 0;
791  		break;
792  	case KVM_CAP_S390_VECTOR_REGISTERS:
793  		mutex_lock(&kvm->lock);
794  		if (kvm->created_vcpus) {
795  			r = -EBUSY;
796  		} else if (cpu_has_vx()) {
797  			set_kvm_facility(kvm->arch.model.fac_mask, 129);
798  			set_kvm_facility(kvm->arch.model.fac_list, 129);
799  			if (test_facility(134)) {
800  				set_kvm_facility(kvm->arch.model.fac_mask, 134);
801  				set_kvm_facility(kvm->arch.model.fac_list, 134);
802  			}
803  			if (test_facility(135)) {
804  				set_kvm_facility(kvm->arch.model.fac_mask, 135);
805  				set_kvm_facility(kvm->arch.model.fac_list, 135);
806  			}
807  			if (test_facility(148)) {
808  				set_kvm_facility(kvm->arch.model.fac_mask, 148);
809  				set_kvm_facility(kvm->arch.model.fac_list, 148);
810  			}
811  			if (test_facility(152)) {
812  				set_kvm_facility(kvm->arch.model.fac_mask, 152);
813  				set_kvm_facility(kvm->arch.model.fac_list, 152);
814  			}
815  			if (test_facility(192)) {
816  				set_kvm_facility(kvm->arch.model.fac_mask, 192);
817  				set_kvm_facility(kvm->arch.model.fac_list, 192);
818  			}
819  			if (test_facility(198)) {
820  				set_kvm_facility(kvm->arch.model.fac_mask, 198);
821  				set_kvm_facility(kvm->arch.model.fac_list, 198);
822  			}
823  			if (test_facility(199)) {
824  				set_kvm_facility(kvm->arch.model.fac_mask, 199);
825  				set_kvm_facility(kvm->arch.model.fac_list, 199);
826  			}
827  			r = 0;
828  		} else
829  			r = -EINVAL;
830  		mutex_unlock(&kvm->lock);
831  		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
832  			 r ? "(not available)" : "(success)");
833  		break;
834  	case KVM_CAP_S390_RI:
835  		r = -EINVAL;
836  		mutex_lock(&kvm->lock);
837  		if (kvm->created_vcpus) {
838  			r = -EBUSY;
839  		} else if (test_facility(64)) {
840  			set_kvm_facility(kvm->arch.model.fac_mask, 64);
841  			set_kvm_facility(kvm->arch.model.fac_list, 64);
842  			r = 0;
843  		}
844  		mutex_unlock(&kvm->lock);
845  		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
846  			 r ? "(not available)" : "(success)");
847  		break;
848  	case KVM_CAP_S390_AIS:
849  		mutex_lock(&kvm->lock);
850  		if (kvm->created_vcpus) {
851  			r = -EBUSY;
852  		} else {
853  			set_kvm_facility(kvm->arch.model.fac_mask, 72);
854  			set_kvm_facility(kvm->arch.model.fac_list, 72);
855  			r = 0;
856  		}
857  		mutex_unlock(&kvm->lock);
858  		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
859  			 r ? "(not available)" : "(success)");
860  		break;
861  	case KVM_CAP_S390_GS:
862  		r = -EINVAL;
863  		mutex_lock(&kvm->lock);
864  		if (kvm->created_vcpus) {
865  			r = -EBUSY;
866  		} else if (test_facility(133)) {
867  			set_kvm_facility(kvm->arch.model.fac_mask, 133);
868  			set_kvm_facility(kvm->arch.model.fac_list, 133);
869  			r = 0;
870  		}
871  		mutex_unlock(&kvm->lock);
872  		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
873  			 r ? "(not available)" : "(success)");
874  		break;
875  	case KVM_CAP_S390_HPAGE_1M:
876  		mutex_lock(&kvm->lock);
877  		if (kvm->created_vcpus)
878  			r = -EBUSY;
879  		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
880  			r = -EINVAL;
881  		else {
882  			r = 0;
883  			mmap_write_lock(kvm->mm);
884  			kvm->mm->context.allow_gmap_hpage_1m = 1;
885  			mmap_write_unlock(kvm->mm);
886  			/*
887  			 * We might have to create fake 4k page
888  			 * tables. To avoid that the hardware works on
889  			 * stale PGSTEs, we emulate these instructions.
890  			 */
891  			kvm->arch.use_skf = 0;
892  			kvm->arch.use_pfmfi = 0;
893  		}
894  		mutex_unlock(&kvm->lock);
895  		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
896  			 r ? "(not available)" : "(success)");
897  		break;
898  	case KVM_CAP_S390_USER_STSI:
899  		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
900  		kvm->arch.user_stsi = 1;
901  		r = 0;
902  		break;
903  	case KVM_CAP_S390_USER_INSTR0:
904  		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
905  		kvm->arch.user_instr0 = 1;
906  		icpt_operexc_on_all_vcpus(kvm);
907  		r = 0;
908  		break;
909  	case KVM_CAP_S390_CPU_TOPOLOGY:
910  		r = -EINVAL;
911  		mutex_lock(&kvm->lock);
912  		if (kvm->created_vcpus) {
913  			r = -EBUSY;
914  		} else if (test_facility(11)) {
915  			set_kvm_facility(kvm->arch.model.fac_mask, 11);
916  			set_kvm_facility(kvm->arch.model.fac_list, 11);
917  			r = 0;
918  		}
919  		mutex_unlock(&kvm->lock);
920  		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
921  			 r ? "(not available)" : "(success)");
922  		break;
923  	default:
924  		r = -EINVAL;
925  		break;
926  	}
927  	return r;
928  }
929  
kvm_s390_get_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)930  static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
931  {
932  	int ret;
933  
934  	switch (attr->attr) {
935  	case KVM_S390_VM_MEM_LIMIT_SIZE:
936  		ret = 0;
937  		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
938  			 kvm->arch.mem_limit);
939  		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
940  			ret = -EFAULT;
941  		break;
942  	default:
943  		ret = -ENXIO;
944  		break;
945  	}
946  	return ret;
947  }
948  
kvm_s390_set_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)949  static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
950  {
951  	int ret;
952  	unsigned int idx;
953  	switch (attr->attr) {
954  	case KVM_S390_VM_MEM_ENABLE_CMMA:
955  		ret = -ENXIO;
956  		if (!sclp.has_cmma)
957  			break;
958  
959  		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
960  		mutex_lock(&kvm->lock);
961  		if (kvm->created_vcpus)
962  			ret = -EBUSY;
963  		else if (kvm->mm->context.allow_gmap_hpage_1m)
964  			ret = -EINVAL;
965  		else {
966  			kvm->arch.use_cmma = 1;
967  			/* Not compatible with cmma. */
968  			kvm->arch.use_pfmfi = 0;
969  			ret = 0;
970  		}
971  		mutex_unlock(&kvm->lock);
972  		break;
973  	case KVM_S390_VM_MEM_CLR_CMMA:
974  		ret = -ENXIO;
975  		if (!sclp.has_cmma)
976  			break;
977  		ret = -EINVAL;
978  		if (!kvm->arch.use_cmma)
979  			break;
980  
981  		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
982  		mutex_lock(&kvm->lock);
983  		idx = srcu_read_lock(&kvm->srcu);
984  		s390_reset_cmma(kvm->arch.gmap->mm);
985  		srcu_read_unlock(&kvm->srcu, idx);
986  		mutex_unlock(&kvm->lock);
987  		ret = 0;
988  		break;
989  	case KVM_S390_VM_MEM_LIMIT_SIZE: {
990  		unsigned long new_limit;
991  
992  		if (kvm_is_ucontrol(kvm))
993  			return -EINVAL;
994  
995  		if (get_user(new_limit, (u64 __user *)attr->addr))
996  			return -EFAULT;
997  
998  		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
999  		    new_limit > kvm->arch.mem_limit)
1000  			return -E2BIG;
1001  
1002  		if (!new_limit)
1003  			return -EINVAL;
1004  
1005  		/* gmap_create takes last usable address */
1006  		if (new_limit != KVM_S390_NO_MEM_LIMIT)
1007  			new_limit -= 1;
1008  
1009  		ret = -EBUSY;
1010  		mutex_lock(&kvm->lock);
1011  		if (!kvm->created_vcpus) {
1012  			/* gmap_create will round the limit up */
1013  			struct gmap *new = gmap_create(current->mm, new_limit);
1014  
1015  			if (!new) {
1016  				ret = -ENOMEM;
1017  			} else {
1018  				gmap_remove(kvm->arch.gmap);
1019  				new->private = kvm;
1020  				kvm->arch.gmap = new;
1021  				ret = 0;
1022  			}
1023  		}
1024  		mutex_unlock(&kvm->lock);
1025  		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
1026  		VM_EVENT(kvm, 3, "New guest asce: 0x%p",
1027  			 (void *) kvm->arch.gmap->asce);
1028  		break;
1029  	}
1030  	default:
1031  		ret = -ENXIO;
1032  		break;
1033  	}
1034  	return ret;
1035  }
1036  
1037  static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
1038  
kvm_s390_vcpu_crypto_reset_all(struct kvm * kvm)1039  void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
1040  {
1041  	struct kvm_vcpu *vcpu;
1042  	unsigned long i;
1043  
1044  	kvm_s390_vcpu_block_all(kvm);
1045  
1046  	kvm_for_each_vcpu(i, vcpu, kvm) {
1047  		kvm_s390_vcpu_crypto_setup(vcpu);
1048  		/* recreate the shadow crycb by leaving the VSIE handler */
1049  		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
1050  	}
1051  
1052  	kvm_s390_vcpu_unblock_all(kvm);
1053  }
1054  
kvm_s390_vm_set_crypto(struct kvm * kvm,struct kvm_device_attr * attr)1055  static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
1056  {
1057  	mutex_lock(&kvm->lock);
1058  	switch (attr->attr) {
1059  	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1060  		if (!test_kvm_facility(kvm, 76)) {
1061  			mutex_unlock(&kvm->lock);
1062  			return -EINVAL;
1063  		}
1064  		get_random_bytes(
1065  			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1066  			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1067  		kvm->arch.crypto.aes_kw = 1;
1068  		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
1069  		break;
1070  	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1071  		if (!test_kvm_facility(kvm, 76)) {
1072  			mutex_unlock(&kvm->lock);
1073  			return -EINVAL;
1074  		}
1075  		get_random_bytes(
1076  			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1077  			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1078  		kvm->arch.crypto.dea_kw = 1;
1079  		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
1080  		break;
1081  	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1082  		if (!test_kvm_facility(kvm, 76)) {
1083  			mutex_unlock(&kvm->lock);
1084  			return -EINVAL;
1085  		}
1086  		kvm->arch.crypto.aes_kw = 0;
1087  		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
1088  			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1089  		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
1090  		break;
1091  	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1092  		if (!test_kvm_facility(kvm, 76)) {
1093  			mutex_unlock(&kvm->lock);
1094  			return -EINVAL;
1095  		}
1096  		kvm->arch.crypto.dea_kw = 0;
1097  		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
1098  			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1099  		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
1100  		break;
1101  	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1102  		if (!ap_instructions_available()) {
1103  			mutex_unlock(&kvm->lock);
1104  			return -EOPNOTSUPP;
1105  		}
1106  		kvm->arch.crypto.apie = 1;
1107  		break;
1108  	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1109  		if (!ap_instructions_available()) {
1110  			mutex_unlock(&kvm->lock);
1111  			return -EOPNOTSUPP;
1112  		}
1113  		kvm->arch.crypto.apie = 0;
1114  		break;
1115  	default:
1116  		mutex_unlock(&kvm->lock);
1117  		return -ENXIO;
1118  	}
1119  
1120  	kvm_s390_vcpu_crypto_reset_all(kvm);
1121  	mutex_unlock(&kvm->lock);
1122  	return 0;
1123  }
1124  
kvm_s390_vcpu_pci_setup(struct kvm_vcpu * vcpu)1125  static void kvm_s390_vcpu_pci_setup(struct kvm_vcpu *vcpu)
1126  {
1127  	/* Only set the ECB bits after guest requests zPCI interpretation */
1128  	if (!vcpu->kvm->arch.use_zpci_interp)
1129  		return;
1130  
1131  	vcpu->arch.sie_block->ecb2 |= ECB2_ZPCI_LSI;
1132  	vcpu->arch.sie_block->ecb3 |= ECB3_AISII + ECB3_AISI;
1133  }
1134  
kvm_s390_vcpu_pci_enable_interp(struct kvm * kvm)1135  void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm)
1136  {
1137  	struct kvm_vcpu *vcpu;
1138  	unsigned long i;
1139  
1140  	lockdep_assert_held(&kvm->lock);
1141  
1142  	if (!kvm_s390_pci_interp_allowed())
1143  		return;
1144  
1145  	/*
1146  	 * If host is configured for PCI and the necessary facilities are
1147  	 * available, turn on interpretation for the life of this guest
1148  	 */
1149  	kvm->arch.use_zpci_interp = 1;
1150  
1151  	kvm_s390_vcpu_block_all(kvm);
1152  
1153  	kvm_for_each_vcpu(i, vcpu, kvm) {
1154  		kvm_s390_vcpu_pci_setup(vcpu);
1155  		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
1156  	}
1157  
1158  	kvm_s390_vcpu_unblock_all(kvm);
1159  }
1160  
kvm_s390_sync_request_broadcast(struct kvm * kvm,int req)1161  static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1162  {
1163  	unsigned long cx;
1164  	struct kvm_vcpu *vcpu;
1165  
1166  	kvm_for_each_vcpu(cx, vcpu, kvm)
1167  		kvm_s390_sync_request(req, vcpu);
1168  }
1169  
1170  /*
1171   * Must be called with kvm->srcu held to avoid races on memslots, and with
1172   * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1173   */
kvm_s390_vm_start_migration(struct kvm * kvm)1174  static int kvm_s390_vm_start_migration(struct kvm *kvm)
1175  {
1176  	struct kvm_memory_slot *ms;
1177  	struct kvm_memslots *slots;
1178  	unsigned long ram_pages = 0;
1179  	int bkt;
1180  
1181  	/* migration mode already enabled */
1182  	if (kvm->arch.migration_mode)
1183  		return 0;
1184  	slots = kvm_memslots(kvm);
1185  	if (!slots || kvm_memslots_empty(slots))
1186  		return -EINVAL;
1187  
1188  	if (!kvm->arch.use_cmma) {
1189  		kvm->arch.migration_mode = 1;
1190  		return 0;
1191  	}
1192  	/* mark all the pages in active slots as dirty */
1193  	kvm_for_each_memslot(ms, bkt, slots) {
1194  		if (!ms->dirty_bitmap)
1195  			return -EINVAL;
1196  		/*
1197  		 * The second half of the bitmap is only used on x86,
1198  		 * and would be wasted otherwise, so we put it to good
1199  		 * use here to keep track of the state of the storage
1200  		 * attributes.
1201  		 */
1202  		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1203  		ram_pages += ms->npages;
1204  	}
1205  	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1206  	kvm->arch.migration_mode = 1;
1207  	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1208  	return 0;
1209  }
1210  
1211  /*
1212   * Must be called with kvm->slots_lock to avoid races with ourselves and
1213   * kvm_s390_vm_start_migration.
1214   */
kvm_s390_vm_stop_migration(struct kvm * kvm)1215  static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1216  {
1217  	/* migration mode already disabled */
1218  	if (!kvm->arch.migration_mode)
1219  		return 0;
1220  	kvm->arch.migration_mode = 0;
1221  	if (kvm->arch.use_cmma)
1222  		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1223  	return 0;
1224  }
1225  
kvm_s390_vm_set_migration(struct kvm * kvm,struct kvm_device_attr * attr)1226  static int kvm_s390_vm_set_migration(struct kvm *kvm,
1227  				     struct kvm_device_attr *attr)
1228  {
1229  	int res = -ENXIO;
1230  
1231  	mutex_lock(&kvm->slots_lock);
1232  	switch (attr->attr) {
1233  	case KVM_S390_VM_MIGRATION_START:
1234  		res = kvm_s390_vm_start_migration(kvm);
1235  		break;
1236  	case KVM_S390_VM_MIGRATION_STOP:
1237  		res = kvm_s390_vm_stop_migration(kvm);
1238  		break;
1239  	default:
1240  		break;
1241  	}
1242  	mutex_unlock(&kvm->slots_lock);
1243  
1244  	return res;
1245  }
1246  
kvm_s390_vm_get_migration(struct kvm * kvm,struct kvm_device_attr * attr)1247  static int kvm_s390_vm_get_migration(struct kvm *kvm,
1248  				     struct kvm_device_attr *attr)
1249  {
1250  	u64 mig = kvm->arch.migration_mode;
1251  
1252  	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1253  		return -ENXIO;
1254  
1255  	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1256  		return -EFAULT;
1257  	return 0;
1258  }
1259  
1260  static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
1261  
kvm_s390_set_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)1262  static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1263  {
1264  	struct kvm_s390_vm_tod_clock gtod;
1265  
1266  	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1267  		return -EFAULT;
1268  
1269  	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1270  		return -EINVAL;
1271  	__kvm_s390_set_tod_clock(kvm, &gtod);
1272  
1273  	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1274  		gtod.epoch_idx, gtod.tod);
1275  
1276  	return 0;
1277  }
1278  
kvm_s390_set_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1279  static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1280  {
1281  	u8 gtod_high;
1282  
1283  	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1284  					   sizeof(gtod_high)))
1285  		return -EFAULT;
1286  
1287  	if (gtod_high != 0)
1288  		return -EINVAL;
1289  	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1290  
1291  	return 0;
1292  }
1293  
kvm_s390_set_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1294  static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1295  {
1296  	struct kvm_s390_vm_tod_clock gtod = { 0 };
1297  
1298  	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1299  			   sizeof(gtod.tod)))
1300  		return -EFAULT;
1301  
1302  	__kvm_s390_set_tod_clock(kvm, &gtod);
1303  	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1304  	return 0;
1305  }
1306  
kvm_s390_set_tod(struct kvm * kvm,struct kvm_device_attr * attr)1307  static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1308  {
1309  	int ret;
1310  
1311  	if (attr->flags)
1312  		return -EINVAL;
1313  
1314  	mutex_lock(&kvm->lock);
1315  	/*
1316  	 * For protected guests, the TOD is managed by the ultravisor, so trying
1317  	 * to change it will never bring the expected results.
1318  	 */
1319  	if (kvm_s390_pv_is_protected(kvm)) {
1320  		ret = -EOPNOTSUPP;
1321  		goto out_unlock;
1322  	}
1323  
1324  	switch (attr->attr) {
1325  	case KVM_S390_VM_TOD_EXT:
1326  		ret = kvm_s390_set_tod_ext(kvm, attr);
1327  		break;
1328  	case KVM_S390_VM_TOD_HIGH:
1329  		ret = kvm_s390_set_tod_high(kvm, attr);
1330  		break;
1331  	case KVM_S390_VM_TOD_LOW:
1332  		ret = kvm_s390_set_tod_low(kvm, attr);
1333  		break;
1334  	default:
1335  		ret = -ENXIO;
1336  		break;
1337  	}
1338  
1339  out_unlock:
1340  	mutex_unlock(&kvm->lock);
1341  	return ret;
1342  }
1343  
kvm_s390_get_tod_clock(struct kvm * kvm,struct kvm_s390_vm_tod_clock * gtod)1344  static void kvm_s390_get_tod_clock(struct kvm *kvm,
1345  				   struct kvm_s390_vm_tod_clock *gtod)
1346  {
1347  	union tod_clock clk;
1348  
1349  	preempt_disable();
1350  
1351  	store_tod_clock_ext(&clk);
1352  
1353  	gtod->tod = clk.tod + kvm->arch.epoch;
1354  	gtod->epoch_idx = 0;
1355  	if (test_kvm_facility(kvm, 139)) {
1356  		gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1357  		if (gtod->tod < clk.tod)
1358  			gtod->epoch_idx += 1;
1359  	}
1360  
1361  	preempt_enable();
1362  }
1363  
kvm_s390_get_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)1364  static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1365  {
1366  	struct kvm_s390_vm_tod_clock gtod;
1367  
1368  	memset(&gtod, 0, sizeof(gtod));
1369  	kvm_s390_get_tod_clock(kvm, &gtod);
1370  	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1371  		return -EFAULT;
1372  
1373  	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1374  		gtod.epoch_idx, gtod.tod);
1375  	return 0;
1376  }
1377  
kvm_s390_get_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1378  static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1379  {
1380  	u8 gtod_high = 0;
1381  
1382  	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1383  					 sizeof(gtod_high)))
1384  		return -EFAULT;
1385  	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1386  
1387  	return 0;
1388  }
1389  
kvm_s390_get_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1390  static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1391  {
1392  	u64 gtod;
1393  
1394  	gtod = kvm_s390_get_tod_clock_fast(kvm);
1395  	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1396  		return -EFAULT;
1397  	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1398  
1399  	return 0;
1400  }
1401  
kvm_s390_get_tod(struct kvm * kvm,struct kvm_device_attr * attr)1402  static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1403  {
1404  	int ret;
1405  
1406  	if (attr->flags)
1407  		return -EINVAL;
1408  
1409  	switch (attr->attr) {
1410  	case KVM_S390_VM_TOD_EXT:
1411  		ret = kvm_s390_get_tod_ext(kvm, attr);
1412  		break;
1413  	case KVM_S390_VM_TOD_HIGH:
1414  		ret = kvm_s390_get_tod_high(kvm, attr);
1415  		break;
1416  	case KVM_S390_VM_TOD_LOW:
1417  		ret = kvm_s390_get_tod_low(kvm, attr);
1418  		break;
1419  	default:
1420  		ret = -ENXIO;
1421  		break;
1422  	}
1423  	return ret;
1424  }
1425  
kvm_s390_set_processor(struct kvm * kvm,struct kvm_device_attr * attr)1426  static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1427  {
1428  	struct kvm_s390_vm_cpu_processor *proc;
1429  	u16 lowest_ibc, unblocked_ibc;
1430  	int ret = 0;
1431  
1432  	mutex_lock(&kvm->lock);
1433  	if (kvm->created_vcpus) {
1434  		ret = -EBUSY;
1435  		goto out;
1436  	}
1437  	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1438  	if (!proc) {
1439  		ret = -ENOMEM;
1440  		goto out;
1441  	}
1442  	if (!copy_from_user(proc, (void __user *)attr->addr,
1443  			    sizeof(*proc))) {
1444  		kvm->arch.model.cpuid = proc->cpuid;
1445  		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1446  		unblocked_ibc = sclp.ibc & 0xfff;
1447  		if (lowest_ibc && proc->ibc) {
1448  			if (proc->ibc > unblocked_ibc)
1449  				kvm->arch.model.ibc = unblocked_ibc;
1450  			else if (proc->ibc < lowest_ibc)
1451  				kvm->arch.model.ibc = lowest_ibc;
1452  			else
1453  				kvm->arch.model.ibc = proc->ibc;
1454  		}
1455  		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1456  		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1457  		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1458  			 kvm->arch.model.ibc,
1459  			 kvm->arch.model.cpuid);
1460  		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1461  			 kvm->arch.model.fac_list[0],
1462  			 kvm->arch.model.fac_list[1],
1463  			 kvm->arch.model.fac_list[2]);
1464  	} else
1465  		ret = -EFAULT;
1466  	kfree(proc);
1467  out:
1468  	mutex_unlock(&kvm->lock);
1469  	return ret;
1470  }
1471  
kvm_s390_set_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1472  static int kvm_s390_set_processor_feat(struct kvm *kvm,
1473  				       struct kvm_device_attr *attr)
1474  {
1475  	struct kvm_s390_vm_cpu_feat data;
1476  
1477  	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1478  		return -EFAULT;
1479  	if (!bitmap_subset((unsigned long *) data.feat,
1480  			   kvm_s390_available_cpu_feat,
1481  			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1482  		return -EINVAL;
1483  
1484  	mutex_lock(&kvm->lock);
1485  	if (kvm->created_vcpus) {
1486  		mutex_unlock(&kvm->lock);
1487  		return -EBUSY;
1488  	}
1489  	bitmap_from_arr64(kvm->arch.cpu_feat, data.feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1490  	mutex_unlock(&kvm->lock);
1491  	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1492  			 data.feat[0],
1493  			 data.feat[1],
1494  			 data.feat[2]);
1495  	return 0;
1496  }
1497  
kvm_s390_set_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1498  static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1499  					  struct kvm_device_attr *attr)
1500  {
1501  	mutex_lock(&kvm->lock);
1502  	if (kvm->created_vcpus) {
1503  		mutex_unlock(&kvm->lock);
1504  		return -EBUSY;
1505  	}
1506  
1507  	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1508  			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1509  		mutex_unlock(&kvm->lock);
1510  		return -EFAULT;
1511  	}
1512  	mutex_unlock(&kvm->lock);
1513  
1514  	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1515  		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1516  		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1517  		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1518  		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1519  	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1520  		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1521  		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1522  	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1523  		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1524  		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1525  	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1526  		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1527  		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1528  	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1529  		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1530  		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1531  	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1532  		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1533  		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1534  	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1535  		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1536  		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1537  	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1538  		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1539  		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1540  	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1541  		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1542  		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1543  	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1544  		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1545  		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1546  	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1547  		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1548  		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1549  	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1550  		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1551  		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1552  	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1553  		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1554  		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1555  	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1556  		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1557  		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1558  	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1559  		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1560  		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1561  	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1562  		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1563  		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1564  		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1565  		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1566  	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1567  		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1568  		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1569  		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1570  		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1571  	VM_EVENT(kvm, 3, "GET: guest PFCR   subfunc 0x%16.16lx.%16.16lx",
1572  		 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0],
1573  		 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]);
1574  
1575  	return 0;
1576  }
1577  
1578  #define KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK	\
1579  (						\
1580  	((struct kvm_s390_vm_cpu_uv_feat){	\
1581  		.ap = 1,			\
1582  		.ap_intr = 1,			\
1583  	})					\
1584  	.feat					\
1585  )
1586  
kvm_s390_set_uv_feat(struct kvm * kvm,struct kvm_device_attr * attr)1587  static int kvm_s390_set_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
1588  {
1589  	struct kvm_s390_vm_cpu_uv_feat __user *ptr = (void __user *)attr->addr;
1590  	unsigned long data, filter;
1591  
1592  	filter = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
1593  	if (get_user(data, &ptr->feat))
1594  		return -EFAULT;
1595  	if (!bitmap_subset(&data, &filter, KVM_S390_VM_CPU_UV_FEAT_NR_BITS))
1596  		return -EINVAL;
1597  
1598  	mutex_lock(&kvm->lock);
1599  	if (kvm->created_vcpus) {
1600  		mutex_unlock(&kvm->lock);
1601  		return -EBUSY;
1602  	}
1603  	kvm->arch.model.uv_feat_guest.feat = data;
1604  	mutex_unlock(&kvm->lock);
1605  
1606  	VM_EVENT(kvm, 3, "SET: guest UV-feat: 0x%16.16lx", data);
1607  
1608  	return 0;
1609  }
1610  
kvm_s390_set_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1611  static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1612  {
1613  	int ret = -ENXIO;
1614  
1615  	switch (attr->attr) {
1616  	case KVM_S390_VM_CPU_PROCESSOR:
1617  		ret = kvm_s390_set_processor(kvm, attr);
1618  		break;
1619  	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1620  		ret = kvm_s390_set_processor_feat(kvm, attr);
1621  		break;
1622  	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1623  		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1624  		break;
1625  	case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
1626  		ret = kvm_s390_set_uv_feat(kvm, attr);
1627  		break;
1628  	}
1629  	return ret;
1630  }
1631  
kvm_s390_get_processor(struct kvm * kvm,struct kvm_device_attr * attr)1632  static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1633  {
1634  	struct kvm_s390_vm_cpu_processor *proc;
1635  	int ret = 0;
1636  
1637  	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1638  	if (!proc) {
1639  		ret = -ENOMEM;
1640  		goto out;
1641  	}
1642  	proc->cpuid = kvm->arch.model.cpuid;
1643  	proc->ibc = kvm->arch.model.ibc;
1644  	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1645  	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1646  	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1647  		 kvm->arch.model.ibc,
1648  		 kvm->arch.model.cpuid);
1649  	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1650  		 kvm->arch.model.fac_list[0],
1651  		 kvm->arch.model.fac_list[1],
1652  		 kvm->arch.model.fac_list[2]);
1653  	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1654  		ret = -EFAULT;
1655  	kfree(proc);
1656  out:
1657  	return ret;
1658  }
1659  
kvm_s390_get_machine(struct kvm * kvm,struct kvm_device_attr * attr)1660  static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1661  {
1662  	struct kvm_s390_vm_cpu_machine *mach;
1663  	int ret = 0;
1664  
1665  	mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1666  	if (!mach) {
1667  		ret = -ENOMEM;
1668  		goto out;
1669  	}
1670  	get_cpu_id((struct cpuid *) &mach->cpuid);
1671  	mach->ibc = sclp.ibc;
1672  	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1673  	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1674  	memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1675  	       sizeof(stfle_fac_list));
1676  	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1677  		 kvm->arch.model.ibc,
1678  		 kvm->arch.model.cpuid);
1679  	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1680  		 mach->fac_mask[0],
1681  		 mach->fac_mask[1],
1682  		 mach->fac_mask[2]);
1683  	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1684  		 mach->fac_list[0],
1685  		 mach->fac_list[1],
1686  		 mach->fac_list[2]);
1687  	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1688  		ret = -EFAULT;
1689  	kfree(mach);
1690  out:
1691  	return ret;
1692  }
1693  
kvm_s390_get_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1694  static int kvm_s390_get_processor_feat(struct kvm *kvm,
1695  				       struct kvm_device_attr *attr)
1696  {
1697  	struct kvm_s390_vm_cpu_feat data;
1698  
1699  	bitmap_to_arr64(data.feat, kvm->arch.cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1700  	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1701  		return -EFAULT;
1702  	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1703  			 data.feat[0],
1704  			 data.feat[1],
1705  			 data.feat[2]);
1706  	return 0;
1707  }
1708  
kvm_s390_get_machine_feat(struct kvm * kvm,struct kvm_device_attr * attr)1709  static int kvm_s390_get_machine_feat(struct kvm *kvm,
1710  				     struct kvm_device_attr *attr)
1711  {
1712  	struct kvm_s390_vm_cpu_feat data;
1713  
1714  	bitmap_to_arr64(data.feat, kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1715  	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1716  		return -EFAULT;
1717  	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1718  			 data.feat[0],
1719  			 data.feat[1],
1720  			 data.feat[2]);
1721  	return 0;
1722  }
1723  
kvm_s390_get_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1724  static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1725  					  struct kvm_device_attr *attr)
1726  {
1727  	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1728  	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1729  		return -EFAULT;
1730  
1731  	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1732  		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1733  		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1734  		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1735  		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1736  	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1737  		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1738  		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1739  	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1740  		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1741  		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1742  	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1743  		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1744  		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1745  	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1746  		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1747  		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1748  	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1749  		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1750  		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1751  	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1752  		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1753  		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1754  	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1755  		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1756  		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1757  	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1758  		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1759  		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1760  	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1761  		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1762  		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1763  	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1764  		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1765  		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1766  	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1767  		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1768  		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1769  	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1770  		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1771  		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1772  	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1773  		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1774  		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1775  	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1776  		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1777  		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1778  	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1779  		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1780  		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1781  		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1782  		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1783  	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1784  		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1785  		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1786  		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1787  		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1788  	VM_EVENT(kvm, 3, "GET: guest PFCR   subfunc 0x%16.16lx.%16.16lx",
1789  		 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0],
1790  		 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]);
1791  
1792  	return 0;
1793  }
1794  
kvm_s390_get_machine_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1795  static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1796  					struct kvm_device_attr *attr)
1797  {
1798  	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1799  	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1800  		return -EFAULT;
1801  
1802  	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1803  		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1804  		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1805  		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1806  		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1807  	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1808  		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1809  		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1810  	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1811  		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1812  		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1813  	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1814  		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1815  		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1816  	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1817  		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1818  		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1819  	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1820  		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1821  		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1822  	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1823  		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1824  		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1825  	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1826  		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1827  		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1828  	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1829  		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1830  		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1831  	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1832  		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1833  		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1834  	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1835  		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1836  		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1837  	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1838  		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1839  		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1840  	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1841  		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1842  		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1843  	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1844  		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1845  		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1846  	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1847  		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1848  		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1849  	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1850  		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1851  		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1852  		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1853  		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1854  	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1855  		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1856  		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1857  		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1858  		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1859  	VM_EVENT(kvm, 3, "GET: host  PFCR   subfunc 0x%16.16lx.%16.16lx",
1860  		 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0],
1861  		 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]);
1862  
1863  	return 0;
1864  }
1865  
kvm_s390_get_processor_uv_feat(struct kvm * kvm,struct kvm_device_attr * attr)1866  static int kvm_s390_get_processor_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
1867  {
1868  	struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
1869  	unsigned long feat = kvm->arch.model.uv_feat_guest.feat;
1870  
1871  	if (put_user(feat, &dst->feat))
1872  		return -EFAULT;
1873  	VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
1874  
1875  	return 0;
1876  }
1877  
kvm_s390_get_machine_uv_feat(struct kvm * kvm,struct kvm_device_attr * attr)1878  static int kvm_s390_get_machine_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
1879  {
1880  	struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
1881  	unsigned long feat;
1882  
1883  	BUILD_BUG_ON(sizeof(*dst) != sizeof(uv_info.uv_feature_indications));
1884  
1885  	feat = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
1886  	if (put_user(feat, &dst->feat))
1887  		return -EFAULT;
1888  	VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
1889  
1890  	return 0;
1891  }
1892  
kvm_s390_get_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1893  static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1894  {
1895  	int ret = -ENXIO;
1896  
1897  	switch (attr->attr) {
1898  	case KVM_S390_VM_CPU_PROCESSOR:
1899  		ret = kvm_s390_get_processor(kvm, attr);
1900  		break;
1901  	case KVM_S390_VM_CPU_MACHINE:
1902  		ret = kvm_s390_get_machine(kvm, attr);
1903  		break;
1904  	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1905  		ret = kvm_s390_get_processor_feat(kvm, attr);
1906  		break;
1907  	case KVM_S390_VM_CPU_MACHINE_FEAT:
1908  		ret = kvm_s390_get_machine_feat(kvm, attr);
1909  		break;
1910  	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1911  		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1912  		break;
1913  	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1914  		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1915  		break;
1916  	case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
1917  		ret = kvm_s390_get_processor_uv_feat(kvm, attr);
1918  		break;
1919  	case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
1920  		ret = kvm_s390_get_machine_uv_feat(kvm, attr);
1921  		break;
1922  	}
1923  	return ret;
1924  }
1925  
1926  /**
1927   * kvm_s390_update_topology_change_report - update CPU topology change report
1928   * @kvm: guest KVM description
1929   * @val: set or clear the MTCR bit
1930   *
1931   * Updates the Multiprocessor Topology-Change-Report bit to signal
1932   * the guest with a topology change.
1933   * This is only relevant if the topology facility is present.
1934   *
1935   * The SCA version, bsca or esca, doesn't matter as offset is the same.
1936   */
kvm_s390_update_topology_change_report(struct kvm * kvm,bool val)1937  static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
1938  {
1939  	union sca_utility new, old;
1940  	struct bsca_block *sca;
1941  
1942  	read_lock(&kvm->arch.sca_lock);
1943  	sca = kvm->arch.sca;
1944  	old = READ_ONCE(sca->utility);
1945  	do {
1946  		new = old;
1947  		new.mtcr = val;
1948  	} while (!try_cmpxchg(&sca->utility.val, &old.val, new.val));
1949  	read_unlock(&kvm->arch.sca_lock);
1950  }
1951  
kvm_s390_set_topo_change_indication(struct kvm * kvm,struct kvm_device_attr * attr)1952  static int kvm_s390_set_topo_change_indication(struct kvm *kvm,
1953  					       struct kvm_device_attr *attr)
1954  {
1955  	if (!test_kvm_facility(kvm, 11))
1956  		return -ENXIO;
1957  
1958  	kvm_s390_update_topology_change_report(kvm, !!attr->attr);
1959  	return 0;
1960  }
1961  
kvm_s390_get_topo_change_indication(struct kvm * kvm,struct kvm_device_attr * attr)1962  static int kvm_s390_get_topo_change_indication(struct kvm *kvm,
1963  					       struct kvm_device_attr *attr)
1964  {
1965  	u8 topo;
1966  
1967  	if (!test_kvm_facility(kvm, 11))
1968  		return -ENXIO;
1969  
1970  	read_lock(&kvm->arch.sca_lock);
1971  	topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr;
1972  	read_unlock(&kvm->arch.sca_lock);
1973  
1974  	return put_user(topo, (u8 __user *)attr->addr);
1975  }
1976  
kvm_s390_vm_set_attr(struct kvm * kvm,struct kvm_device_attr * attr)1977  static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1978  {
1979  	int ret;
1980  
1981  	switch (attr->group) {
1982  	case KVM_S390_VM_MEM_CTRL:
1983  		ret = kvm_s390_set_mem_control(kvm, attr);
1984  		break;
1985  	case KVM_S390_VM_TOD:
1986  		ret = kvm_s390_set_tod(kvm, attr);
1987  		break;
1988  	case KVM_S390_VM_CPU_MODEL:
1989  		ret = kvm_s390_set_cpu_model(kvm, attr);
1990  		break;
1991  	case KVM_S390_VM_CRYPTO:
1992  		ret = kvm_s390_vm_set_crypto(kvm, attr);
1993  		break;
1994  	case KVM_S390_VM_MIGRATION:
1995  		ret = kvm_s390_vm_set_migration(kvm, attr);
1996  		break;
1997  	case KVM_S390_VM_CPU_TOPOLOGY:
1998  		ret = kvm_s390_set_topo_change_indication(kvm, attr);
1999  		break;
2000  	default:
2001  		ret = -ENXIO;
2002  		break;
2003  	}
2004  
2005  	return ret;
2006  }
2007  
kvm_s390_vm_get_attr(struct kvm * kvm,struct kvm_device_attr * attr)2008  static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
2009  {
2010  	int ret;
2011  
2012  	switch (attr->group) {
2013  	case KVM_S390_VM_MEM_CTRL:
2014  		ret = kvm_s390_get_mem_control(kvm, attr);
2015  		break;
2016  	case KVM_S390_VM_TOD:
2017  		ret = kvm_s390_get_tod(kvm, attr);
2018  		break;
2019  	case KVM_S390_VM_CPU_MODEL:
2020  		ret = kvm_s390_get_cpu_model(kvm, attr);
2021  		break;
2022  	case KVM_S390_VM_MIGRATION:
2023  		ret = kvm_s390_vm_get_migration(kvm, attr);
2024  		break;
2025  	case KVM_S390_VM_CPU_TOPOLOGY:
2026  		ret = kvm_s390_get_topo_change_indication(kvm, attr);
2027  		break;
2028  	default:
2029  		ret = -ENXIO;
2030  		break;
2031  	}
2032  
2033  	return ret;
2034  }
2035  
kvm_s390_vm_has_attr(struct kvm * kvm,struct kvm_device_attr * attr)2036  static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
2037  {
2038  	int ret;
2039  
2040  	switch (attr->group) {
2041  	case KVM_S390_VM_MEM_CTRL:
2042  		switch (attr->attr) {
2043  		case KVM_S390_VM_MEM_ENABLE_CMMA:
2044  		case KVM_S390_VM_MEM_CLR_CMMA:
2045  			ret = sclp.has_cmma ? 0 : -ENXIO;
2046  			break;
2047  		case KVM_S390_VM_MEM_LIMIT_SIZE:
2048  			ret = 0;
2049  			break;
2050  		default:
2051  			ret = -ENXIO;
2052  			break;
2053  		}
2054  		break;
2055  	case KVM_S390_VM_TOD:
2056  		switch (attr->attr) {
2057  		case KVM_S390_VM_TOD_LOW:
2058  		case KVM_S390_VM_TOD_HIGH:
2059  			ret = 0;
2060  			break;
2061  		default:
2062  			ret = -ENXIO;
2063  			break;
2064  		}
2065  		break;
2066  	case KVM_S390_VM_CPU_MODEL:
2067  		switch (attr->attr) {
2068  		case KVM_S390_VM_CPU_PROCESSOR:
2069  		case KVM_S390_VM_CPU_MACHINE:
2070  		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
2071  		case KVM_S390_VM_CPU_MACHINE_FEAT:
2072  		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
2073  		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
2074  		case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
2075  		case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
2076  			ret = 0;
2077  			break;
2078  		default:
2079  			ret = -ENXIO;
2080  			break;
2081  		}
2082  		break;
2083  	case KVM_S390_VM_CRYPTO:
2084  		switch (attr->attr) {
2085  		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
2086  		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
2087  		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
2088  		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
2089  			ret = 0;
2090  			break;
2091  		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
2092  		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
2093  			ret = ap_instructions_available() ? 0 : -ENXIO;
2094  			break;
2095  		default:
2096  			ret = -ENXIO;
2097  			break;
2098  		}
2099  		break;
2100  	case KVM_S390_VM_MIGRATION:
2101  		ret = 0;
2102  		break;
2103  	case KVM_S390_VM_CPU_TOPOLOGY:
2104  		ret = test_kvm_facility(kvm, 11) ? 0 : -ENXIO;
2105  		break;
2106  	default:
2107  		ret = -ENXIO;
2108  		break;
2109  	}
2110  
2111  	return ret;
2112  }
2113  
kvm_s390_get_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)2114  static int kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
2115  {
2116  	uint8_t *keys;
2117  	uint64_t hva;
2118  	int srcu_idx, i, r = 0;
2119  
2120  	if (args->flags != 0)
2121  		return -EINVAL;
2122  
2123  	/* Is this guest using storage keys? */
2124  	if (!mm_uses_skeys(current->mm))
2125  		return KVM_S390_GET_SKEYS_NONE;
2126  
2127  	/* Enforce sane limit on memory allocation */
2128  	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
2129  		return -EINVAL;
2130  
2131  	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
2132  	if (!keys)
2133  		return -ENOMEM;
2134  
2135  	mmap_read_lock(current->mm);
2136  	srcu_idx = srcu_read_lock(&kvm->srcu);
2137  	for (i = 0; i < args->count; i++) {
2138  		hva = gfn_to_hva(kvm, args->start_gfn + i);
2139  		if (kvm_is_error_hva(hva)) {
2140  			r = -EFAULT;
2141  			break;
2142  		}
2143  
2144  		r = get_guest_storage_key(current->mm, hva, &keys[i]);
2145  		if (r)
2146  			break;
2147  	}
2148  	srcu_read_unlock(&kvm->srcu, srcu_idx);
2149  	mmap_read_unlock(current->mm);
2150  
2151  	if (!r) {
2152  		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
2153  				 sizeof(uint8_t) * args->count);
2154  		if (r)
2155  			r = -EFAULT;
2156  	}
2157  
2158  	kvfree(keys);
2159  	return r;
2160  }
2161  
kvm_s390_set_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)2162  static int kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
2163  {
2164  	uint8_t *keys;
2165  	uint64_t hva;
2166  	int srcu_idx, i, r = 0;
2167  	bool unlocked;
2168  
2169  	if (args->flags != 0)
2170  		return -EINVAL;
2171  
2172  	/* Enforce sane limit on memory allocation */
2173  	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
2174  		return -EINVAL;
2175  
2176  	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
2177  	if (!keys)
2178  		return -ENOMEM;
2179  
2180  	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
2181  			   sizeof(uint8_t) * args->count);
2182  	if (r) {
2183  		r = -EFAULT;
2184  		goto out;
2185  	}
2186  
2187  	/* Enable storage key handling for the guest */
2188  	r = s390_enable_skey();
2189  	if (r)
2190  		goto out;
2191  
2192  	i = 0;
2193  	mmap_read_lock(current->mm);
2194  	srcu_idx = srcu_read_lock(&kvm->srcu);
2195          while (i < args->count) {
2196  		unlocked = false;
2197  		hva = gfn_to_hva(kvm, args->start_gfn + i);
2198  		if (kvm_is_error_hva(hva)) {
2199  			r = -EFAULT;
2200  			break;
2201  		}
2202  
2203  		/* Lowest order bit is reserved */
2204  		if (keys[i] & 0x01) {
2205  			r = -EINVAL;
2206  			break;
2207  		}
2208  
2209  		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
2210  		if (r) {
2211  			r = fixup_user_fault(current->mm, hva,
2212  					     FAULT_FLAG_WRITE, &unlocked);
2213  			if (r)
2214  				break;
2215  		}
2216  		if (!r)
2217  			i++;
2218  	}
2219  	srcu_read_unlock(&kvm->srcu, srcu_idx);
2220  	mmap_read_unlock(current->mm);
2221  out:
2222  	kvfree(keys);
2223  	return r;
2224  }
2225  
2226  /*
2227   * Base address and length must be sent at the start of each block, therefore
2228   * it's cheaper to send some clean data, as long as it's less than the size of
2229   * two longs.
2230   */
2231  #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
2232  /* for consistency */
2233  #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
2234  
kvm_s390_peek_cmma(struct kvm * kvm,struct kvm_s390_cmma_log * args,u8 * res,unsigned long bufsize)2235  static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2236  			      u8 *res, unsigned long bufsize)
2237  {
2238  	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
2239  
2240  	args->count = 0;
2241  	while (args->count < bufsize) {
2242  		hva = gfn_to_hva(kvm, cur_gfn);
2243  		/*
2244  		 * We return an error if the first value was invalid, but we
2245  		 * return successfully if at least one value was copied.
2246  		 */
2247  		if (kvm_is_error_hva(hva))
2248  			return args->count ? 0 : -EFAULT;
2249  		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2250  			pgstev = 0;
2251  		res[args->count++] = (pgstev >> 24) & 0x43;
2252  		cur_gfn++;
2253  	}
2254  
2255  	return 0;
2256  }
2257  
gfn_to_memslot_approx(struct kvm_memslots * slots,gfn_t gfn)2258  static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
2259  						     gfn_t gfn)
2260  {
2261  	return ____gfn_to_memslot(slots, gfn, true);
2262  }
2263  
kvm_s390_next_dirty_cmma(struct kvm_memslots * slots,unsigned long cur_gfn)2264  static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2265  					      unsigned long cur_gfn)
2266  {
2267  	struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
2268  	unsigned long ofs = cur_gfn - ms->base_gfn;
2269  	struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
2270  
2271  	if (ms->base_gfn + ms->npages <= cur_gfn) {
2272  		mnode = rb_next(mnode);
2273  		/* If we are above the highest slot, wrap around */
2274  		if (!mnode)
2275  			mnode = rb_first(&slots->gfn_tree);
2276  
2277  		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
2278  		ofs = 0;
2279  	}
2280  
2281  	if (cur_gfn < ms->base_gfn)
2282  		ofs = 0;
2283  
2284  	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2285  	while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
2286  		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
2287  		ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
2288  	}
2289  	return ms->base_gfn + ofs;
2290  }
2291  
kvm_s390_get_cmma(struct kvm * kvm,struct kvm_s390_cmma_log * args,u8 * res,unsigned long bufsize)2292  static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2293  			     u8 *res, unsigned long bufsize)
2294  {
2295  	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2296  	struct kvm_memslots *slots = kvm_memslots(kvm);
2297  	struct kvm_memory_slot *ms;
2298  
2299  	if (unlikely(kvm_memslots_empty(slots)))
2300  		return 0;
2301  
2302  	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2303  	ms = gfn_to_memslot(kvm, cur_gfn);
2304  	args->count = 0;
2305  	args->start_gfn = cur_gfn;
2306  	if (!ms)
2307  		return 0;
2308  	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2309  	mem_end = kvm_s390_get_gfn_end(slots);
2310  
2311  	while (args->count < bufsize) {
2312  		hva = gfn_to_hva(kvm, cur_gfn);
2313  		if (kvm_is_error_hva(hva))
2314  			return 0;
2315  		/* Decrement only if we actually flipped the bit to 0 */
2316  		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2317  			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2318  		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2319  			pgstev = 0;
2320  		/* Save the value */
2321  		res[args->count++] = (pgstev >> 24) & 0x43;
2322  		/* If the next bit is too far away, stop. */
2323  		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2324  			return 0;
2325  		/* If we reached the previous "next", find the next one */
2326  		if (cur_gfn == next_gfn)
2327  			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2328  		/* Reached the end of memory or of the buffer, stop */
2329  		if ((next_gfn >= mem_end) ||
2330  		    (next_gfn - args->start_gfn >= bufsize))
2331  			return 0;
2332  		cur_gfn++;
2333  		/* Reached the end of the current memslot, take the next one. */
2334  		if (cur_gfn - ms->base_gfn >= ms->npages) {
2335  			ms = gfn_to_memslot(kvm, cur_gfn);
2336  			if (!ms)
2337  				return 0;
2338  		}
2339  	}
2340  	return 0;
2341  }
2342  
2343  /*
2344   * This function searches for the next page with dirty CMMA attributes, and
2345   * saves the attributes in the buffer up to either the end of the buffer or
2346   * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2347   * no trailing clean bytes are saved.
2348   * In case no dirty bits were found, or if CMMA was not enabled or used, the
2349   * output buffer will indicate 0 as length.
2350   */
kvm_s390_get_cmma_bits(struct kvm * kvm,struct kvm_s390_cmma_log * args)2351  static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2352  				  struct kvm_s390_cmma_log *args)
2353  {
2354  	unsigned long bufsize;
2355  	int srcu_idx, peek, ret;
2356  	u8 *values;
2357  
2358  	if (!kvm->arch.use_cmma)
2359  		return -ENXIO;
2360  	/* Invalid/unsupported flags were specified */
2361  	if (args->flags & ~KVM_S390_CMMA_PEEK)
2362  		return -EINVAL;
2363  	/* Migration mode query, and we are not doing a migration */
2364  	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2365  	if (!peek && !kvm->arch.migration_mode)
2366  		return -EINVAL;
2367  	/* CMMA is disabled or was not used, or the buffer has length zero */
2368  	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2369  	if (!bufsize || !kvm->mm->context.uses_cmm) {
2370  		memset(args, 0, sizeof(*args));
2371  		return 0;
2372  	}
2373  	/* We are not peeking, and there are no dirty pages */
2374  	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2375  		memset(args, 0, sizeof(*args));
2376  		return 0;
2377  	}
2378  
2379  	values = vmalloc(bufsize);
2380  	if (!values)
2381  		return -ENOMEM;
2382  
2383  	mmap_read_lock(kvm->mm);
2384  	srcu_idx = srcu_read_lock(&kvm->srcu);
2385  	if (peek)
2386  		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2387  	else
2388  		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2389  	srcu_read_unlock(&kvm->srcu, srcu_idx);
2390  	mmap_read_unlock(kvm->mm);
2391  
2392  	if (kvm->arch.migration_mode)
2393  		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2394  	else
2395  		args->remaining = 0;
2396  
2397  	if (copy_to_user((void __user *)args->values, values, args->count))
2398  		ret = -EFAULT;
2399  
2400  	vfree(values);
2401  	return ret;
2402  }
2403  
2404  /*
2405   * This function sets the CMMA attributes for the given pages. If the input
2406   * buffer has zero length, no action is taken, otherwise the attributes are
2407   * set and the mm->context.uses_cmm flag is set.
2408   */
kvm_s390_set_cmma_bits(struct kvm * kvm,const struct kvm_s390_cmma_log * args)2409  static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2410  				  const struct kvm_s390_cmma_log *args)
2411  {
2412  	unsigned long hva, mask, pgstev, i;
2413  	uint8_t *bits;
2414  	int srcu_idx, r = 0;
2415  
2416  	mask = args->mask;
2417  
2418  	if (!kvm->arch.use_cmma)
2419  		return -ENXIO;
2420  	/* invalid/unsupported flags */
2421  	if (args->flags != 0)
2422  		return -EINVAL;
2423  	/* Enforce sane limit on memory allocation */
2424  	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2425  		return -EINVAL;
2426  	/* Nothing to do */
2427  	if (args->count == 0)
2428  		return 0;
2429  
2430  	bits = vmalloc(array_size(sizeof(*bits), args->count));
2431  	if (!bits)
2432  		return -ENOMEM;
2433  
2434  	r = copy_from_user(bits, (void __user *)args->values, args->count);
2435  	if (r) {
2436  		r = -EFAULT;
2437  		goto out;
2438  	}
2439  
2440  	mmap_read_lock(kvm->mm);
2441  	srcu_idx = srcu_read_lock(&kvm->srcu);
2442  	for (i = 0; i < args->count; i++) {
2443  		hva = gfn_to_hva(kvm, args->start_gfn + i);
2444  		if (kvm_is_error_hva(hva)) {
2445  			r = -EFAULT;
2446  			break;
2447  		}
2448  
2449  		pgstev = bits[i];
2450  		pgstev = pgstev << 24;
2451  		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2452  		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2453  	}
2454  	srcu_read_unlock(&kvm->srcu, srcu_idx);
2455  	mmap_read_unlock(kvm->mm);
2456  
2457  	if (!kvm->mm->context.uses_cmm) {
2458  		mmap_write_lock(kvm->mm);
2459  		kvm->mm->context.uses_cmm = 1;
2460  		mmap_write_unlock(kvm->mm);
2461  	}
2462  out:
2463  	vfree(bits);
2464  	return r;
2465  }
2466  
2467  /**
2468   * kvm_s390_cpus_from_pv - Convert all protected vCPUs in a protected VM to
2469   * non protected.
2470   * @kvm: the VM whose protected vCPUs are to be converted
2471   * @rc: return value for the RC field of the UVC (in case of error)
2472   * @rrc: return value for the RRC field of the UVC (in case of error)
2473   *
2474   * Does not stop in case of error, tries to convert as many
2475   * CPUs as possible. In case of error, the RC and RRC of the last error are
2476   * returned.
2477   *
2478   * Return: 0 in case of success, otherwise -EIO
2479   */
kvm_s390_cpus_from_pv(struct kvm * kvm,u16 * rc,u16 * rrc)2480  int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2481  {
2482  	struct kvm_vcpu *vcpu;
2483  	unsigned long i;
2484  	u16 _rc, _rrc;
2485  	int ret = 0;
2486  
2487  	/*
2488  	 * We ignore failures and try to destroy as many CPUs as possible.
2489  	 * At the same time we must not free the assigned resources when
2490  	 * this fails, as the ultravisor has still access to that memory.
2491  	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2492  	 * behind.
2493  	 * We want to return the first failure rc and rrc, though.
2494  	 */
2495  	kvm_for_each_vcpu(i, vcpu, kvm) {
2496  		mutex_lock(&vcpu->mutex);
2497  		if (kvm_s390_pv_destroy_cpu(vcpu, &_rc, &_rrc) && !ret) {
2498  			*rc = _rc;
2499  			*rrc = _rrc;
2500  			ret = -EIO;
2501  		}
2502  		mutex_unlock(&vcpu->mutex);
2503  	}
2504  	/* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */
2505  	if (use_gisa)
2506  		kvm_s390_gisa_enable(kvm);
2507  	return ret;
2508  }
2509  
2510  /**
2511   * kvm_s390_cpus_to_pv - Convert all non-protected vCPUs in a protected VM
2512   * to protected.
2513   * @kvm: the VM whose protected vCPUs are to be converted
2514   * @rc: return value for the RC field of the UVC (in case of error)
2515   * @rrc: return value for the RRC field of the UVC (in case of error)
2516   *
2517   * Tries to undo the conversion in case of error.
2518   *
2519   * Return: 0 in case of success, otherwise -EIO
2520   */
kvm_s390_cpus_to_pv(struct kvm * kvm,u16 * rc,u16 * rrc)2521  static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2522  {
2523  	unsigned long i;
2524  	int r = 0;
2525  	u16 dummy;
2526  
2527  	struct kvm_vcpu *vcpu;
2528  
2529  	/* Disable the GISA if the ultravisor does not support AIV. */
2530  	if (!uv_has_feature(BIT_UV_FEAT_AIV))
2531  		kvm_s390_gisa_disable(kvm);
2532  
2533  	kvm_for_each_vcpu(i, vcpu, kvm) {
2534  		mutex_lock(&vcpu->mutex);
2535  		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2536  		mutex_unlock(&vcpu->mutex);
2537  		if (r)
2538  			break;
2539  	}
2540  	if (r)
2541  		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2542  	return r;
2543  }
2544  
2545  /*
2546   * Here we provide user space with a direct interface to query UV
2547   * related data like UV maxima and available features as well as
2548   * feature specific data.
2549   *
2550   * To facilitate future extension of the data structures we'll try to
2551   * write data up to the maximum requested length.
2552   */
kvm_s390_handle_pv_info(struct kvm_s390_pv_info * info)2553  static ssize_t kvm_s390_handle_pv_info(struct kvm_s390_pv_info *info)
2554  {
2555  	ssize_t len_min;
2556  
2557  	switch (info->header.id) {
2558  	case KVM_PV_INFO_VM: {
2559  		len_min =  sizeof(info->header) + sizeof(info->vm);
2560  
2561  		if (info->header.len_max < len_min)
2562  			return -EINVAL;
2563  
2564  		memcpy(info->vm.inst_calls_list,
2565  		       uv_info.inst_calls_list,
2566  		       sizeof(uv_info.inst_calls_list));
2567  
2568  		/* It's max cpuid not max cpus, so it's off by one */
2569  		info->vm.max_cpus = uv_info.max_guest_cpu_id + 1;
2570  		info->vm.max_guests = uv_info.max_num_sec_conf;
2571  		info->vm.max_guest_addr = uv_info.max_sec_stor_addr;
2572  		info->vm.feature_indication = uv_info.uv_feature_indications;
2573  
2574  		return len_min;
2575  	}
2576  	case KVM_PV_INFO_DUMP: {
2577  		len_min =  sizeof(info->header) + sizeof(info->dump);
2578  
2579  		if (info->header.len_max < len_min)
2580  			return -EINVAL;
2581  
2582  		info->dump.dump_cpu_buffer_len = uv_info.guest_cpu_stor_len;
2583  		info->dump.dump_config_mem_buffer_per_1m = uv_info.conf_dump_storage_state_len;
2584  		info->dump.dump_config_finalize_len = uv_info.conf_dump_finalize_len;
2585  		return len_min;
2586  	}
2587  	default:
2588  		return -EINVAL;
2589  	}
2590  }
2591  
kvm_s390_pv_dmp(struct kvm * kvm,struct kvm_pv_cmd * cmd,struct kvm_s390_pv_dmp dmp)2592  static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd,
2593  			   struct kvm_s390_pv_dmp dmp)
2594  {
2595  	int r = -EINVAL;
2596  	void __user *result_buff = (void __user *)dmp.buff_addr;
2597  
2598  	switch (dmp.subcmd) {
2599  	case KVM_PV_DUMP_INIT: {
2600  		if (kvm->arch.pv.dumping)
2601  			break;
2602  
2603  		/*
2604  		 * Block SIE entry as concurrent dump UVCs could lead
2605  		 * to validities.
2606  		 */
2607  		kvm_s390_vcpu_block_all(kvm);
2608  
2609  		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2610  				  UVC_CMD_DUMP_INIT, &cmd->rc, &cmd->rrc);
2611  		KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP INIT: rc %x rrc %x",
2612  			     cmd->rc, cmd->rrc);
2613  		if (!r) {
2614  			kvm->arch.pv.dumping = true;
2615  		} else {
2616  			kvm_s390_vcpu_unblock_all(kvm);
2617  			r = -EINVAL;
2618  		}
2619  		break;
2620  	}
2621  	case KVM_PV_DUMP_CONFIG_STOR_STATE: {
2622  		if (!kvm->arch.pv.dumping)
2623  			break;
2624  
2625  		/*
2626  		 * gaddr is an output parameter since we might stop
2627  		 * early. As dmp will be copied back in our caller, we
2628  		 * don't need to do it ourselves.
2629  		 */
2630  		r = kvm_s390_pv_dump_stor_state(kvm, result_buff, &dmp.gaddr, dmp.buff_len,
2631  						&cmd->rc, &cmd->rrc);
2632  		break;
2633  	}
2634  	case KVM_PV_DUMP_COMPLETE: {
2635  		if (!kvm->arch.pv.dumping)
2636  			break;
2637  
2638  		r = -EINVAL;
2639  		if (dmp.buff_len < uv_info.conf_dump_finalize_len)
2640  			break;
2641  
2642  		r = kvm_s390_pv_dump_complete(kvm, result_buff,
2643  					      &cmd->rc, &cmd->rrc);
2644  		break;
2645  	}
2646  	default:
2647  		r = -ENOTTY;
2648  		break;
2649  	}
2650  
2651  	return r;
2652  }
2653  
kvm_s390_handle_pv(struct kvm * kvm,struct kvm_pv_cmd * cmd)2654  static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2655  {
2656  	const bool need_lock = (cmd->cmd != KVM_PV_ASYNC_CLEANUP_PERFORM);
2657  	void __user *argp = (void __user *)cmd->data;
2658  	int r = 0;
2659  	u16 dummy;
2660  
2661  	if (need_lock)
2662  		mutex_lock(&kvm->lock);
2663  
2664  	switch (cmd->cmd) {
2665  	case KVM_PV_ENABLE: {
2666  		r = -EINVAL;
2667  		if (kvm_s390_pv_is_protected(kvm))
2668  			break;
2669  
2670  		/*
2671  		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2672  		 *  esca, we need no cleanup in the error cases below
2673  		 */
2674  		r = sca_switch_to_extended(kvm);
2675  		if (r)
2676  			break;
2677  
2678  		mmap_write_lock(kvm->mm);
2679  		r = gmap_helper_disable_cow_sharing();
2680  		mmap_write_unlock(kvm->mm);
2681  		if (r)
2682  			break;
2683  
2684  		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2685  		if (r)
2686  			break;
2687  
2688  		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2689  		if (r)
2690  			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2691  
2692  		/* we need to block service interrupts from now on */
2693  		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2694  		break;
2695  	}
2696  	case KVM_PV_ASYNC_CLEANUP_PREPARE:
2697  		r = -EINVAL;
2698  		if (!kvm_s390_pv_is_protected(kvm) || !async_destroy)
2699  			break;
2700  
2701  		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2702  		/*
2703  		 * If a CPU could not be destroyed, destroy VM will also fail.
2704  		 * There is no point in trying to destroy it. Instead return
2705  		 * the rc and rrc from the first CPU that failed destroying.
2706  		 */
2707  		if (r)
2708  			break;
2709  		r = kvm_s390_pv_set_aside(kvm, &cmd->rc, &cmd->rrc);
2710  
2711  		/* no need to block service interrupts any more */
2712  		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2713  		break;
2714  	case KVM_PV_ASYNC_CLEANUP_PERFORM:
2715  		r = -EINVAL;
2716  		if (!async_destroy)
2717  			break;
2718  		/* kvm->lock must not be held; this is asserted inside the function. */
2719  		r = kvm_s390_pv_deinit_aside_vm(kvm, &cmd->rc, &cmd->rrc);
2720  		break;
2721  	case KVM_PV_DISABLE: {
2722  		r = -EINVAL;
2723  		if (!kvm_s390_pv_is_protected(kvm))
2724  			break;
2725  
2726  		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2727  		/*
2728  		 * If a CPU could not be destroyed, destroy VM will also fail.
2729  		 * There is no point in trying to destroy it. Instead return
2730  		 * the rc and rrc from the first CPU that failed destroying.
2731  		 */
2732  		if (r)
2733  			break;
2734  		r = kvm_s390_pv_deinit_cleanup_all(kvm, &cmd->rc, &cmd->rrc);
2735  
2736  		/* no need to block service interrupts any more */
2737  		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2738  		break;
2739  	}
2740  	case KVM_PV_SET_SEC_PARMS: {
2741  		struct kvm_s390_pv_sec_parm parms = {};
2742  		void *hdr;
2743  
2744  		r = -EINVAL;
2745  		if (!kvm_s390_pv_is_protected(kvm))
2746  			break;
2747  
2748  		r = -EFAULT;
2749  		if (copy_from_user(&parms, argp, sizeof(parms)))
2750  			break;
2751  
2752  		/* Currently restricted to 8KB */
2753  		r = -EINVAL;
2754  		if (parms.length > PAGE_SIZE * 2)
2755  			break;
2756  
2757  		r = -ENOMEM;
2758  		hdr = vmalloc(parms.length);
2759  		if (!hdr)
2760  			break;
2761  
2762  		r = -EFAULT;
2763  		if (!copy_from_user(hdr, (void __user *)parms.origin,
2764  				    parms.length))
2765  			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2766  						      &cmd->rc, &cmd->rrc);
2767  
2768  		vfree(hdr);
2769  		break;
2770  	}
2771  	case KVM_PV_UNPACK: {
2772  		struct kvm_s390_pv_unp unp = {};
2773  
2774  		r = -EINVAL;
2775  		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2776  			break;
2777  
2778  		r = -EFAULT;
2779  		if (copy_from_user(&unp, argp, sizeof(unp)))
2780  			break;
2781  
2782  		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2783  				       &cmd->rc, &cmd->rrc);
2784  		break;
2785  	}
2786  	case KVM_PV_VERIFY: {
2787  		r = -EINVAL;
2788  		if (!kvm_s390_pv_is_protected(kvm))
2789  			break;
2790  
2791  		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2792  				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2793  		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2794  			     cmd->rrc);
2795  		break;
2796  	}
2797  	case KVM_PV_PREP_RESET: {
2798  		r = -EINVAL;
2799  		if (!kvm_s390_pv_is_protected(kvm))
2800  			break;
2801  
2802  		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2803  				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2804  		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2805  			     cmd->rc, cmd->rrc);
2806  		break;
2807  	}
2808  	case KVM_PV_UNSHARE_ALL: {
2809  		r = -EINVAL;
2810  		if (!kvm_s390_pv_is_protected(kvm))
2811  			break;
2812  
2813  		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2814  				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2815  		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2816  			     cmd->rc, cmd->rrc);
2817  		break;
2818  	}
2819  	case KVM_PV_INFO: {
2820  		struct kvm_s390_pv_info info = {};
2821  		ssize_t data_len;
2822  
2823  		/*
2824  		 * No need to check the VM protection here.
2825  		 *
2826  		 * Maybe user space wants to query some of the data
2827  		 * when the VM is still unprotected. If we see the
2828  		 * need to fence a new data command we can still
2829  		 * return an error in the info handler.
2830  		 */
2831  
2832  		r = -EFAULT;
2833  		if (copy_from_user(&info, argp, sizeof(info.header)))
2834  			break;
2835  
2836  		r = -EINVAL;
2837  		if (info.header.len_max < sizeof(info.header))
2838  			break;
2839  
2840  		data_len = kvm_s390_handle_pv_info(&info);
2841  		if (data_len < 0) {
2842  			r = data_len;
2843  			break;
2844  		}
2845  		/*
2846  		 * If a data command struct is extended (multiple
2847  		 * times) this can be used to determine how much of it
2848  		 * is valid.
2849  		 */
2850  		info.header.len_written = data_len;
2851  
2852  		r = -EFAULT;
2853  		if (copy_to_user(argp, &info, data_len))
2854  			break;
2855  
2856  		r = 0;
2857  		break;
2858  	}
2859  	case KVM_PV_DUMP: {
2860  		struct kvm_s390_pv_dmp dmp;
2861  
2862  		r = -EINVAL;
2863  		if (!kvm_s390_pv_is_protected(kvm))
2864  			break;
2865  
2866  		r = -EFAULT;
2867  		if (copy_from_user(&dmp, argp, sizeof(dmp)))
2868  			break;
2869  
2870  		r = kvm_s390_pv_dmp(kvm, cmd, dmp);
2871  		if (r)
2872  			break;
2873  
2874  		if (copy_to_user(argp, &dmp, sizeof(dmp))) {
2875  			r = -EFAULT;
2876  			break;
2877  		}
2878  
2879  		break;
2880  	}
2881  	default:
2882  		r = -ENOTTY;
2883  	}
2884  	if (need_lock)
2885  		mutex_unlock(&kvm->lock);
2886  
2887  	return r;
2888  }
2889  
mem_op_validate_common(struct kvm_s390_mem_op * mop,u64 supported_flags)2890  static int mem_op_validate_common(struct kvm_s390_mem_op *mop, u64 supported_flags)
2891  {
2892  	if (mop->flags & ~supported_flags || !mop->size)
2893  		return -EINVAL;
2894  	if (mop->size > MEM_OP_MAX_SIZE)
2895  		return -E2BIG;
2896  	if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
2897  		if (mop->key > 0xf)
2898  			return -EINVAL;
2899  	} else {
2900  		mop->key = 0;
2901  	}
2902  	return 0;
2903  }
2904  
kvm_s390_vm_mem_op_abs(struct kvm * kvm,struct kvm_s390_mem_op * mop)2905  static int kvm_s390_vm_mem_op_abs(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2906  {
2907  	void __user *uaddr = (void __user *)mop->buf;
2908  	enum gacc_mode acc_mode;
2909  	void *tmpbuf = NULL;
2910  	int r, srcu_idx;
2911  
2912  	r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION |
2913  					KVM_S390_MEMOP_F_CHECK_ONLY);
2914  	if (r)
2915  		return r;
2916  
2917  	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2918  		tmpbuf = vmalloc(mop->size);
2919  		if (!tmpbuf)
2920  			return -ENOMEM;
2921  	}
2922  
2923  	srcu_idx = srcu_read_lock(&kvm->srcu);
2924  
2925  	if (!kvm_is_gpa_in_memslot(kvm, mop->gaddr)) {
2926  		r = PGM_ADDRESSING;
2927  		goto out_unlock;
2928  	}
2929  
2930  	acc_mode = mop->op == KVM_S390_MEMOP_ABSOLUTE_READ ? GACC_FETCH : GACC_STORE;
2931  	if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2932  		r = check_gpa_range(kvm, mop->gaddr, mop->size, acc_mode, mop->key);
2933  		goto out_unlock;
2934  	}
2935  	if (acc_mode == GACC_FETCH) {
2936  		r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2937  					      mop->size, GACC_FETCH, mop->key);
2938  		if (r)
2939  			goto out_unlock;
2940  		if (copy_to_user(uaddr, tmpbuf, mop->size))
2941  			r = -EFAULT;
2942  	} else {
2943  		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2944  			r = -EFAULT;
2945  			goto out_unlock;
2946  		}
2947  		r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2948  					      mop->size, GACC_STORE, mop->key);
2949  	}
2950  
2951  out_unlock:
2952  	srcu_read_unlock(&kvm->srcu, srcu_idx);
2953  
2954  	vfree(tmpbuf);
2955  	return r;
2956  }
2957  
kvm_s390_vm_mem_op_cmpxchg(struct kvm * kvm,struct kvm_s390_mem_op * mop)2958  static int kvm_s390_vm_mem_op_cmpxchg(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2959  {
2960  	void __user *uaddr = (void __user *)mop->buf;
2961  	void __user *old_addr = (void __user *)mop->old_addr;
2962  	union {
2963  		__uint128_t quad;
2964  		char raw[sizeof(__uint128_t)];
2965  	} old = { .quad = 0}, new = { .quad = 0 };
2966  	unsigned int off_in_quad = sizeof(new) - mop->size;
2967  	int r, srcu_idx;
2968  	bool success;
2969  
2970  	r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION);
2971  	if (r)
2972  		return r;
2973  	/*
2974  	 * This validates off_in_quad. Checking that size is a power
2975  	 * of two is not necessary, as cmpxchg_guest_abs_with_key
2976  	 * takes care of that
2977  	 */
2978  	if (mop->size > sizeof(new))
2979  		return -EINVAL;
2980  	if (copy_from_user(&new.raw[off_in_quad], uaddr, mop->size))
2981  		return -EFAULT;
2982  	if (copy_from_user(&old.raw[off_in_quad], old_addr, mop->size))
2983  		return -EFAULT;
2984  
2985  	srcu_idx = srcu_read_lock(&kvm->srcu);
2986  
2987  	if (!kvm_is_gpa_in_memslot(kvm, mop->gaddr)) {
2988  		r = PGM_ADDRESSING;
2989  		goto out_unlock;
2990  	}
2991  
2992  	r = cmpxchg_guest_abs_with_key(kvm, mop->gaddr, mop->size, &old.quad,
2993  				       new.quad, mop->key, &success);
2994  	if (!success && copy_to_user(old_addr, &old.raw[off_in_quad], mop->size))
2995  		r = -EFAULT;
2996  
2997  out_unlock:
2998  	srcu_read_unlock(&kvm->srcu, srcu_idx);
2999  	return r;
3000  }
3001  
kvm_s390_vm_mem_op(struct kvm * kvm,struct kvm_s390_mem_op * mop)3002  static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
3003  {
3004  	/*
3005  	 * This is technically a heuristic only, if the kvm->lock is not
3006  	 * taken, it is not guaranteed that the vm is/remains non-protected.
3007  	 * This is ok from a kernel perspective, wrongdoing is detected
3008  	 * on the access, -EFAULT is returned and the vm may crash the
3009  	 * next time it accesses the memory in question.
3010  	 * There is no sane usecase to do switching and a memop on two
3011  	 * different CPUs at the same time.
3012  	 */
3013  	if (kvm_s390_pv_get_handle(kvm))
3014  		return -EINVAL;
3015  
3016  	switch (mop->op) {
3017  	case KVM_S390_MEMOP_ABSOLUTE_READ:
3018  	case KVM_S390_MEMOP_ABSOLUTE_WRITE:
3019  		return kvm_s390_vm_mem_op_abs(kvm, mop);
3020  	case KVM_S390_MEMOP_ABSOLUTE_CMPXCHG:
3021  		return kvm_s390_vm_mem_op_cmpxchg(kvm, mop);
3022  	default:
3023  		return -EINVAL;
3024  	}
3025  }
3026  
kvm_arch_vm_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)3027  int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
3028  {
3029  	struct kvm *kvm = filp->private_data;
3030  	void __user *argp = (void __user *)arg;
3031  	struct kvm_device_attr attr;
3032  	int r;
3033  
3034  	switch (ioctl) {
3035  	case KVM_S390_INTERRUPT: {
3036  		struct kvm_s390_interrupt s390int;
3037  
3038  		r = -EFAULT;
3039  		if (copy_from_user(&s390int, argp, sizeof(s390int)))
3040  			break;
3041  		r = kvm_s390_inject_vm(kvm, &s390int);
3042  		break;
3043  	}
3044  	case KVM_CREATE_IRQCHIP: {
3045  		r = -EINVAL;
3046  		if (kvm->arch.use_irqchip)
3047  			r = 0;
3048  		break;
3049  	}
3050  	case KVM_SET_DEVICE_ATTR: {
3051  		r = -EFAULT;
3052  		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
3053  			break;
3054  		r = kvm_s390_vm_set_attr(kvm, &attr);
3055  		break;
3056  	}
3057  	case KVM_GET_DEVICE_ATTR: {
3058  		r = -EFAULT;
3059  		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
3060  			break;
3061  		r = kvm_s390_vm_get_attr(kvm, &attr);
3062  		break;
3063  	}
3064  	case KVM_HAS_DEVICE_ATTR: {
3065  		r = -EFAULT;
3066  		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
3067  			break;
3068  		r = kvm_s390_vm_has_attr(kvm, &attr);
3069  		break;
3070  	}
3071  	case KVM_S390_GET_SKEYS: {
3072  		struct kvm_s390_skeys args;
3073  
3074  		r = -EFAULT;
3075  		if (copy_from_user(&args, argp,
3076  				   sizeof(struct kvm_s390_skeys)))
3077  			break;
3078  		r = kvm_s390_get_skeys(kvm, &args);
3079  		break;
3080  	}
3081  	case KVM_S390_SET_SKEYS: {
3082  		struct kvm_s390_skeys args;
3083  
3084  		r = -EFAULT;
3085  		if (copy_from_user(&args, argp,
3086  				   sizeof(struct kvm_s390_skeys)))
3087  			break;
3088  		r = kvm_s390_set_skeys(kvm, &args);
3089  		break;
3090  	}
3091  	case KVM_S390_GET_CMMA_BITS: {
3092  		struct kvm_s390_cmma_log args;
3093  
3094  		r = -EFAULT;
3095  		if (copy_from_user(&args, argp, sizeof(args)))
3096  			break;
3097  		mutex_lock(&kvm->slots_lock);
3098  		r = kvm_s390_get_cmma_bits(kvm, &args);
3099  		mutex_unlock(&kvm->slots_lock);
3100  		if (!r) {
3101  			r = copy_to_user(argp, &args, sizeof(args));
3102  			if (r)
3103  				r = -EFAULT;
3104  		}
3105  		break;
3106  	}
3107  	case KVM_S390_SET_CMMA_BITS: {
3108  		struct kvm_s390_cmma_log args;
3109  
3110  		r = -EFAULT;
3111  		if (copy_from_user(&args, argp, sizeof(args)))
3112  			break;
3113  		mutex_lock(&kvm->slots_lock);
3114  		r = kvm_s390_set_cmma_bits(kvm, &args);
3115  		mutex_unlock(&kvm->slots_lock);
3116  		break;
3117  	}
3118  	case KVM_S390_PV_COMMAND: {
3119  		struct kvm_pv_cmd args;
3120  
3121  		/* protvirt means user cpu state */
3122  		kvm_s390_set_user_cpu_state_ctrl(kvm);
3123  		r = 0;
3124  		if (!is_prot_virt_host()) {
3125  			r = -EINVAL;
3126  			break;
3127  		}
3128  		if (copy_from_user(&args, argp, sizeof(args))) {
3129  			r = -EFAULT;
3130  			break;
3131  		}
3132  		if (args.flags) {
3133  			r = -EINVAL;
3134  			break;
3135  		}
3136  		/* must be called without kvm->lock */
3137  		r = kvm_s390_handle_pv(kvm, &args);
3138  		if (copy_to_user(argp, &args, sizeof(args))) {
3139  			r = -EFAULT;
3140  			break;
3141  		}
3142  		break;
3143  	}
3144  	case KVM_S390_MEM_OP: {
3145  		struct kvm_s390_mem_op mem_op;
3146  
3147  		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3148  			r = kvm_s390_vm_mem_op(kvm, &mem_op);
3149  		else
3150  			r = -EFAULT;
3151  		break;
3152  	}
3153  	case KVM_S390_ZPCI_OP: {
3154  		struct kvm_s390_zpci_op args;
3155  
3156  		r = -EINVAL;
3157  		if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
3158  			break;
3159  		if (copy_from_user(&args, argp, sizeof(args))) {
3160  			r = -EFAULT;
3161  			break;
3162  		}
3163  		r = kvm_s390_pci_zpci_op(kvm, &args);
3164  		break;
3165  	}
3166  	default:
3167  		r = -ENOTTY;
3168  	}
3169  
3170  	return r;
3171  }
3172  
kvm_s390_apxa_installed(void)3173  static int kvm_s390_apxa_installed(void)
3174  {
3175  	struct ap_config_info info;
3176  
3177  	if (ap_instructions_available()) {
3178  		if (ap_qci(&info) == 0)
3179  			return info.apxa;
3180  	}
3181  
3182  	return 0;
3183  }
3184  
3185  /*
3186   * The format of the crypto control block (CRYCB) is specified in the 3 low
3187   * order bits of the CRYCB designation (CRYCBD) field as follows:
3188   * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
3189   *	     AP extended addressing (APXA) facility are installed.
3190   * Format 1: The APXA facility is not installed but the MSAX3 facility is.
3191   * Format 2: Both the APXA and MSAX3 facilities are installed
3192   */
kvm_s390_set_crycb_format(struct kvm * kvm)3193  static void kvm_s390_set_crycb_format(struct kvm *kvm)
3194  {
3195  	kvm->arch.crypto.crycbd = virt_to_phys(kvm->arch.crypto.crycb);
3196  
3197  	/* Clear the CRYCB format bits - i.e., set format 0 by default */
3198  	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
3199  
3200  	/* Check whether MSAX3 is installed */
3201  	if (!test_kvm_facility(kvm, 76))
3202  		return;
3203  
3204  	if (kvm_s390_apxa_installed())
3205  		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
3206  	else
3207  		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
3208  }
3209  
3210  /*
3211   * kvm_arch_crypto_set_masks
3212   *
3213   * @kvm: pointer to the target guest's KVM struct containing the crypto masks
3214   *	 to be set.
3215   * @apm: the mask identifying the accessible AP adapters
3216   * @aqm: the mask identifying the accessible AP domains
3217   * @adm: the mask identifying the accessible AP control domains
3218   *
3219   * Set the masks that identify the adapters, domains and control domains to
3220   * which the KVM guest is granted access.
3221   *
3222   * Note: The kvm->lock mutex must be locked by the caller before invoking this
3223   *	 function.
3224   */
kvm_arch_crypto_set_masks(struct kvm * kvm,unsigned long * apm,unsigned long * aqm,unsigned long * adm)3225  void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
3226  			       unsigned long *aqm, unsigned long *adm)
3227  {
3228  	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
3229  
3230  	kvm_s390_vcpu_block_all(kvm);
3231  
3232  	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
3233  	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
3234  		memcpy(crycb->apcb1.apm, apm, 32);
3235  		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
3236  			 apm[0], apm[1], apm[2], apm[3]);
3237  		memcpy(crycb->apcb1.aqm, aqm, 32);
3238  		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
3239  			 aqm[0], aqm[1], aqm[2], aqm[3]);
3240  		memcpy(crycb->apcb1.adm, adm, 32);
3241  		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
3242  			 adm[0], adm[1], adm[2], adm[3]);
3243  		break;
3244  	case CRYCB_FORMAT1:
3245  	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
3246  		memcpy(crycb->apcb0.apm, apm, 8);
3247  		memcpy(crycb->apcb0.aqm, aqm, 2);
3248  		memcpy(crycb->apcb0.adm, adm, 2);
3249  		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
3250  			 apm[0], *((unsigned short *)aqm),
3251  			 *((unsigned short *)adm));
3252  		break;
3253  	default:	/* Can not happen */
3254  		break;
3255  	}
3256  
3257  	/* recreate the shadow crycb for each vcpu */
3258  	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
3259  	kvm_s390_vcpu_unblock_all(kvm);
3260  }
3261  EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
3262  
3263  /*
3264   * kvm_arch_crypto_clear_masks
3265   *
3266   * @kvm: pointer to the target guest's KVM struct containing the crypto masks
3267   *	 to be cleared.
3268   *
3269   * Clear the masks that identify the adapters, domains and control domains to
3270   * which the KVM guest is granted access.
3271   *
3272   * Note: The kvm->lock mutex must be locked by the caller before invoking this
3273   *	 function.
3274   */
kvm_arch_crypto_clear_masks(struct kvm * kvm)3275  void kvm_arch_crypto_clear_masks(struct kvm *kvm)
3276  {
3277  	kvm_s390_vcpu_block_all(kvm);
3278  
3279  	memset(&kvm->arch.crypto.crycb->apcb0, 0,
3280  	       sizeof(kvm->arch.crypto.crycb->apcb0));
3281  	memset(&kvm->arch.crypto.crycb->apcb1, 0,
3282  	       sizeof(kvm->arch.crypto.crycb->apcb1));
3283  
3284  	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
3285  	/* recreate the shadow crycb for each vcpu */
3286  	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
3287  	kvm_s390_vcpu_unblock_all(kvm);
3288  }
3289  EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
3290  
kvm_s390_get_initial_cpuid(void)3291  static u64 kvm_s390_get_initial_cpuid(void)
3292  {
3293  	struct cpuid cpuid;
3294  
3295  	get_cpu_id(&cpuid);
3296  	cpuid.version = 0xff;
3297  	return *((u64 *) &cpuid);
3298  }
3299  
kvm_s390_crypto_init(struct kvm * kvm)3300  static void kvm_s390_crypto_init(struct kvm *kvm)
3301  {
3302  	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
3303  	kvm_s390_set_crycb_format(kvm);
3304  	init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
3305  
3306  	if (!test_kvm_facility(kvm, 76))
3307  		return;
3308  
3309  	/* Enable AES/DEA protected key functions by default */
3310  	kvm->arch.crypto.aes_kw = 1;
3311  	kvm->arch.crypto.dea_kw = 1;
3312  	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
3313  			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
3314  	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
3315  			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
3316  }
3317  
sca_dispose(struct kvm * kvm)3318  static void sca_dispose(struct kvm *kvm)
3319  {
3320  	if (kvm->arch.use_esca)
3321  		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
3322  	else
3323  		free_page((unsigned long)(kvm->arch.sca));
3324  	kvm->arch.sca = NULL;
3325  }
3326  
kvm_arch_free_vm(struct kvm * kvm)3327  void kvm_arch_free_vm(struct kvm *kvm)
3328  {
3329  	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
3330  		kvm_s390_pci_clear_list(kvm);
3331  
3332  	__kvm_arch_free_vm(kvm);
3333  }
3334  
kvm_arch_init_vm(struct kvm * kvm,unsigned long type)3335  int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
3336  {
3337  	gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
3338  	int i, rc;
3339  	char debug_name[16];
3340  	static unsigned long sca_offset;
3341  
3342  	rc = -EINVAL;
3343  #ifdef CONFIG_KVM_S390_UCONTROL
3344  	if (type & ~KVM_VM_S390_UCONTROL)
3345  		goto out_err;
3346  	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
3347  		goto out_err;
3348  #else
3349  	if (type)
3350  		goto out_err;
3351  #endif
3352  
3353  	rc = s390_enable_sie();
3354  	if (rc)
3355  		goto out_err;
3356  
3357  	rc = -ENOMEM;
3358  
3359  	if (!sclp.has_64bscao)
3360  		alloc_flags |= GFP_DMA;
3361  	rwlock_init(&kvm->arch.sca_lock);
3362  	/* start with basic SCA */
3363  	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
3364  	if (!kvm->arch.sca)
3365  		goto out_err;
3366  	mutex_lock(&kvm_lock);
3367  	sca_offset += 16;
3368  	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
3369  		sca_offset = 0;
3370  	kvm->arch.sca = (struct bsca_block *)
3371  			((char *) kvm->arch.sca + sca_offset);
3372  	mutex_unlock(&kvm_lock);
3373  
3374  	sprintf(debug_name, "kvm-%u", current->pid);
3375  
3376  	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
3377  	if (!kvm->arch.dbf)
3378  		goto out_err;
3379  
3380  	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
3381  	kvm->arch.sie_page2 =
3382  	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
3383  	if (!kvm->arch.sie_page2)
3384  		goto out_err;
3385  
3386  	kvm->arch.sie_page2->kvm = kvm;
3387  	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
3388  
3389  	for (i = 0; i < kvm_s390_fac_size(); i++) {
3390  		kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
3391  					      (kvm_s390_fac_base[i] |
3392  					       kvm_s390_fac_ext[i]);
3393  		kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
3394  					      kvm_s390_fac_base[i];
3395  	}
3396  	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
3397  
3398  	/* we are always in czam mode - even on pre z14 machines */
3399  	set_kvm_facility(kvm->arch.model.fac_mask, 138);
3400  	set_kvm_facility(kvm->arch.model.fac_list, 138);
3401  	/* we emulate STHYI in kvm */
3402  	set_kvm_facility(kvm->arch.model.fac_mask, 74);
3403  	set_kvm_facility(kvm->arch.model.fac_list, 74);
3404  	if (machine_has_tlb_guest()) {
3405  		set_kvm_facility(kvm->arch.model.fac_mask, 147);
3406  		set_kvm_facility(kvm->arch.model.fac_list, 147);
3407  	}
3408  
3409  	if (css_general_characteristics.aiv && test_facility(65))
3410  		set_kvm_facility(kvm->arch.model.fac_mask, 65);
3411  
3412  	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
3413  	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
3414  
3415  	kvm->arch.model.uv_feat_guest.feat = 0;
3416  
3417  	kvm_s390_crypto_init(kvm);
3418  
3419  	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
3420  		mutex_lock(&kvm->lock);
3421  		kvm_s390_pci_init_list(kvm);
3422  		kvm_s390_vcpu_pci_enable_interp(kvm);
3423  		mutex_unlock(&kvm->lock);
3424  	}
3425  
3426  	mutex_init(&kvm->arch.float_int.ais_lock);
3427  	spin_lock_init(&kvm->arch.float_int.lock);
3428  	for (i = 0; i < FIRQ_LIST_COUNT; i++)
3429  		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
3430  	init_waitqueue_head(&kvm->arch.ipte_wq);
3431  	mutex_init(&kvm->arch.ipte_mutex);
3432  
3433  	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
3434  	VM_EVENT(kvm, 3, "vm created with type %lu", type);
3435  
3436  	if (type & KVM_VM_S390_UCONTROL) {
3437  		struct kvm_userspace_memory_region2 fake_memslot = {
3438  			.slot = KVM_S390_UCONTROL_MEMSLOT,
3439  			.guest_phys_addr = 0,
3440  			.userspace_addr = 0,
3441  			.memory_size = ALIGN_DOWN(TASK_SIZE, _SEGMENT_SIZE),
3442  			.flags = 0,
3443  		};
3444  
3445  		kvm->arch.gmap = NULL;
3446  		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
3447  		/* one flat fake memslot covering the whole address-space */
3448  		mutex_lock(&kvm->slots_lock);
3449  		KVM_BUG_ON(kvm_set_internal_memslot(kvm, &fake_memslot), kvm);
3450  		mutex_unlock(&kvm->slots_lock);
3451  	} else {
3452  		if (sclp.hamax == U64_MAX)
3453  			kvm->arch.mem_limit = TASK_SIZE_MAX;
3454  		else
3455  			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
3456  						    sclp.hamax + 1);
3457  		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
3458  		if (!kvm->arch.gmap)
3459  			goto out_err;
3460  		kvm->arch.gmap->private = kvm;
3461  		kvm->arch.gmap->pfault_enabled = 0;
3462  	}
3463  
3464  	kvm->arch.use_pfmfi = sclp.has_pfmfi;
3465  	kvm->arch.use_skf = sclp.has_skey;
3466  	spin_lock_init(&kvm->arch.start_stop_lock);
3467  	kvm_s390_vsie_init(kvm);
3468  	if (use_gisa)
3469  		kvm_s390_gisa_init(kvm);
3470  	INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup);
3471  	kvm->arch.pv.set_aside = NULL;
3472  	KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
3473  
3474  	return 0;
3475  out_err:
3476  	free_page((unsigned long)kvm->arch.sie_page2);
3477  	debug_unregister(kvm->arch.dbf);
3478  	sca_dispose(kvm);
3479  	KVM_EVENT(3, "creation of vm failed: %d", rc);
3480  	return rc;
3481  }
3482  
kvm_arch_vcpu_destroy(struct kvm_vcpu * vcpu)3483  void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
3484  {
3485  	u16 rc, rrc;
3486  
3487  	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
3488  	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
3489  	kvm_s390_clear_local_irqs(vcpu);
3490  	kvm_clear_async_pf_completion_queue(vcpu);
3491  	if (!kvm_is_ucontrol(vcpu->kvm))
3492  		sca_del_vcpu(vcpu);
3493  	kvm_s390_update_topology_change_report(vcpu->kvm, 1);
3494  
3495  	if (kvm_is_ucontrol(vcpu->kvm))
3496  		gmap_remove(vcpu->arch.gmap);
3497  
3498  	if (vcpu->kvm->arch.use_cmma)
3499  		kvm_s390_vcpu_unsetup_cmma(vcpu);
3500  	/* We can not hold the vcpu mutex here, we are already dying */
3501  	if (kvm_s390_pv_cpu_get_handle(vcpu))
3502  		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
3503  	free_page((unsigned long)(vcpu->arch.sie_block));
3504  }
3505  
kvm_arch_destroy_vm(struct kvm * kvm)3506  void kvm_arch_destroy_vm(struct kvm *kvm)
3507  {
3508  	u16 rc, rrc;
3509  
3510  	kvm_destroy_vcpus(kvm);
3511  	sca_dispose(kvm);
3512  	kvm_s390_gisa_destroy(kvm);
3513  	/*
3514  	 * We are already at the end of life and kvm->lock is not taken.
3515  	 * This is ok as the file descriptor is closed by now and nobody
3516  	 * can mess with the pv state.
3517  	 */
3518  	kvm_s390_pv_deinit_cleanup_all(kvm, &rc, &rrc);
3519  	/*
3520  	 * Remove the mmu notifier only when the whole KVM VM is torn down,
3521  	 * and only if one was registered to begin with. If the VM is
3522  	 * currently not protected, but has been previously been protected,
3523  	 * then it's possible that the notifier is still registered.
3524  	 */
3525  	if (kvm->arch.pv.mmu_notifier.ops)
3526  		mmu_notifier_unregister(&kvm->arch.pv.mmu_notifier, kvm->mm);
3527  
3528  	debug_unregister(kvm->arch.dbf);
3529  	free_page((unsigned long)kvm->arch.sie_page2);
3530  	if (!kvm_is_ucontrol(kvm))
3531  		gmap_remove(kvm->arch.gmap);
3532  	kvm_s390_destroy_adapters(kvm);
3533  	kvm_s390_clear_float_irqs(kvm);
3534  	kvm_s390_vsie_destroy(kvm);
3535  	KVM_EVENT(3, "vm 0x%p destroyed", kvm);
3536  }
3537  
3538  /* Section: vcpu related */
__kvm_ucontrol_vcpu_init(struct kvm_vcpu * vcpu)3539  static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
3540  {
3541  	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
3542  	if (!vcpu->arch.gmap)
3543  		return -ENOMEM;
3544  	vcpu->arch.gmap->private = vcpu->kvm;
3545  
3546  	return 0;
3547  }
3548  
sca_del_vcpu(struct kvm_vcpu * vcpu)3549  static void sca_del_vcpu(struct kvm_vcpu *vcpu)
3550  {
3551  	if (!kvm_s390_use_sca_entries())
3552  		return;
3553  	read_lock(&vcpu->kvm->arch.sca_lock);
3554  	if (vcpu->kvm->arch.use_esca) {
3555  		struct esca_block *sca = vcpu->kvm->arch.sca;
3556  
3557  		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3558  		sca->cpu[vcpu->vcpu_id].sda = 0;
3559  	} else {
3560  		struct bsca_block *sca = vcpu->kvm->arch.sca;
3561  
3562  		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3563  		sca->cpu[vcpu->vcpu_id].sda = 0;
3564  	}
3565  	read_unlock(&vcpu->kvm->arch.sca_lock);
3566  }
3567  
sca_add_vcpu(struct kvm_vcpu * vcpu)3568  static void sca_add_vcpu(struct kvm_vcpu *vcpu)
3569  {
3570  	if (!kvm_s390_use_sca_entries()) {
3571  		phys_addr_t sca_phys = virt_to_phys(vcpu->kvm->arch.sca);
3572  
3573  		/* we still need the basic sca for the ipte control */
3574  		vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3575  		vcpu->arch.sie_block->scaol = sca_phys;
3576  		return;
3577  	}
3578  	read_lock(&vcpu->kvm->arch.sca_lock);
3579  	if (vcpu->kvm->arch.use_esca) {
3580  		struct esca_block *sca = vcpu->kvm->arch.sca;
3581  		phys_addr_t sca_phys = virt_to_phys(sca);
3582  
3583  		sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
3584  		vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3585  		vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK;
3586  		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3587  		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3588  	} else {
3589  		struct bsca_block *sca = vcpu->kvm->arch.sca;
3590  		phys_addr_t sca_phys = virt_to_phys(sca);
3591  
3592  		sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
3593  		vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3594  		vcpu->arch.sie_block->scaol = sca_phys;
3595  		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3596  	}
3597  	read_unlock(&vcpu->kvm->arch.sca_lock);
3598  }
3599  
3600  /* Basic SCA to Extended SCA data copy routines */
sca_copy_entry(struct esca_entry * d,struct bsca_entry * s)3601  static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
3602  {
3603  	d->sda = s->sda;
3604  	d->sigp_ctrl.c = s->sigp_ctrl.c;
3605  	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
3606  }
3607  
sca_copy_b_to_e(struct esca_block * d,struct bsca_block * s)3608  static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
3609  {
3610  	int i;
3611  
3612  	d->ipte_control = s->ipte_control;
3613  	d->mcn[0] = s->mcn;
3614  	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
3615  		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
3616  }
3617  
sca_switch_to_extended(struct kvm * kvm)3618  static int sca_switch_to_extended(struct kvm *kvm)
3619  {
3620  	struct bsca_block *old_sca = kvm->arch.sca;
3621  	struct esca_block *new_sca;
3622  	struct kvm_vcpu *vcpu;
3623  	unsigned long vcpu_idx;
3624  	u32 scaol, scaoh;
3625  	phys_addr_t new_sca_phys;
3626  
3627  	if (kvm->arch.use_esca)
3628  		return 0;
3629  
3630  	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
3631  	if (!new_sca)
3632  		return -ENOMEM;
3633  
3634  	new_sca_phys = virt_to_phys(new_sca);
3635  	scaoh = new_sca_phys >> 32;
3636  	scaol = new_sca_phys & ESCA_SCAOL_MASK;
3637  
3638  	kvm_s390_vcpu_block_all(kvm);
3639  	write_lock(&kvm->arch.sca_lock);
3640  
3641  	sca_copy_b_to_e(new_sca, old_sca);
3642  
3643  	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
3644  		vcpu->arch.sie_block->scaoh = scaoh;
3645  		vcpu->arch.sie_block->scaol = scaol;
3646  		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3647  	}
3648  	kvm->arch.sca = new_sca;
3649  	kvm->arch.use_esca = 1;
3650  
3651  	write_unlock(&kvm->arch.sca_lock);
3652  	kvm_s390_vcpu_unblock_all(kvm);
3653  
3654  	free_page((unsigned long)old_sca);
3655  
3656  	VM_EVENT(kvm, 2, "Switched to ESCA (0x%p -> 0x%p)",
3657  		 old_sca, kvm->arch.sca);
3658  	return 0;
3659  }
3660  
sca_can_add_vcpu(struct kvm * kvm,unsigned int id)3661  static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
3662  {
3663  	int rc;
3664  
3665  	if (!kvm_s390_use_sca_entries()) {
3666  		if (id < KVM_MAX_VCPUS)
3667  			return true;
3668  		return false;
3669  	}
3670  	if (id < KVM_S390_BSCA_CPU_SLOTS)
3671  		return true;
3672  	if (!sclp.has_esca || !sclp.has_64bscao)
3673  		return false;
3674  
3675  	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3676  
3677  	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3678  }
3679  
3680  /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__start_cpu_timer_accounting(struct kvm_vcpu * vcpu)3681  static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3682  {
3683  	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3684  	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3685  	vcpu->arch.cputm_start = get_tod_clock_fast();
3686  	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3687  }
3688  
3689  /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__stop_cpu_timer_accounting(struct kvm_vcpu * vcpu)3690  static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3691  {
3692  	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3693  	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3694  	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3695  	vcpu->arch.cputm_start = 0;
3696  	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3697  }
3698  
3699  /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3700  static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3701  {
3702  	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3703  	vcpu->arch.cputm_enabled = true;
3704  	__start_cpu_timer_accounting(vcpu);
3705  }
3706  
3707  /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3708  static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3709  {
3710  	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3711  	__stop_cpu_timer_accounting(vcpu);
3712  	vcpu->arch.cputm_enabled = false;
3713  }
3714  
enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3715  static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3716  {
3717  	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3718  	__enable_cpu_timer_accounting(vcpu);
3719  	preempt_enable();
3720  }
3721  
disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3722  static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3723  {
3724  	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3725  	__disable_cpu_timer_accounting(vcpu);
3726  	preempt_enable();
3727  }
3728  
3729  /* set the cpu timer - may only be called from the VCPU thread itself */
kvm_s390_set_cpu_timer(struct kvm_vcpu * vcpu,__u64 cputm)3730  void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3731  {
3732  	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3733  	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3734  	if (vcpu->arch.cputm_enabled)
3735  		vcpu->arch.cputm_start = get_tod_clock_fast();
3736  	vcpu->arch.sie_block->cputm = cputm;
3737  	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3738  	preempt_enable();
3739  }
3740  
3741  /* update and get the cpu timer - can also be called from other VCPU threads */
kvm_s390_get_cpu_timer(struct kvm_vcpu * vcpu)3742  __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3743  {
3744  	unsigned int seq;
3745  	__u64 value;
3746  
3747  	if (unlikely(!vcpu->arch.cputm_enabled))
3748  		return vcpu->arch.sie_block->cputm;
3749  
3750  	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3751  	do {
3752  		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3753  		/*
3754  		 * If the writer would ever execute a read in the critical
3755  		 * section, e.g. in irq context, we have a deadlock.
3756  		 */
3757  		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3758  		value = vcpu->arch.sie_block->cputm;
3759  		/* if cputm_start is 0, accounting is being started/stopped */
3760  		if (likely(vcpu->arch.cputm_start))
3761  			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3762  	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3763  	preempt_enable();
3764  	return value;
3765  }
3766  
kvm_arch_vcpu_load(struct kvm_vcpu * vcpu,int cpu)3767  void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3768  {
3769  
3770  	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3771  	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3772  		__start_cpu_timer_accounting(vcpu);
3773  	vcpu->cpu = cpu;
3774  }
3775  
kvm_arch_vcpu_put(struct kvm_vcpu * vcpu)3776  void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3777  {
3778  	vcpu->cpu = -1;
3779  	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3780  		__stop_cpu_timer_accounting(vcpu);
3781  	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3782  
3783  }
3784  
kvm_arch_vcpu_postcreate(struct kvm_vcpu * vcpu)3785  void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3786  {
3787  	mutex_lock(&vcpu->kvm->lock);
3788  	preempt_disable();
3789  	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3790  	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3791  	preempt_enable();
3792  	mutex_unlock(&vcpu->kvm->lock);
3793  	if (!kvm_is_ucontrol(vcpu->kvm)) {
3794  		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3795  		sca_add_vcpu(vcpu);
3796  	}
3797  	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3798  		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3799  }
3800  
kvm_has_pckmo_subfunc(struct kvm * kvm,unsigned long nr)3801  static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3802  {
3803  	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3804  	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3805  		return true;
3806  	return false;
3807  }
3808  
kvm_has_pckmo_ecc(struct kvm * kvm)3809  static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3810  {
3811  	/* At least one ECC subfunction must be present */
3812  	return kvm_has_pckmo_subfunc(kvm, 32) ||
3813  	       kvm_has_pckmo_subfunc(kvm, 33) ||
3814  	       kvm_has_pckmo_subfunc(kvm, 34) ||
3815  	       kvm_has_pckmo_subfunc(kvm, 40) ||
3816  	       kvm_has_pckmo_subfunc(kvm, 41);
3817  
3818  }
3819  
kvm_has_pckmo_hmac(struct kvm * kvm)3820  static bool kvm_has_pckmo_hmac(struct kvm *kvm)
3821  {
3822  	/* At least one HMAC subfunction must be present */
3823  	return kvm_has_pckmo_subfunc(kvm, 118) ||
3824  	       kvm_has_pckmo_subfunc(kvm, 122);
3825  }
3826  
kvm_s390_vcpu_crypto_setup(struct kvm_vcpu * vcpu)3827  static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3828  {
3829  	/*
3830  	 * If the AP instructions are not being interpreted and the MSAX3
3831  	 * facility is not configured for the guest, there is nothing to set up.
3832  	 */
3833  	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3834  		return;
3835  
3836  	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3837  	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3838  	vcpu->arch.sie_block->eca &= ~ECA_APIE;
3839  	vcpu->arch.sie_block->ecd &= ~(ECD_ECC | ECD_HMAC);
3840  
3841  	if (vcpu->kvm->arch.crypto.apie)
3842  		vcpu->arch.sie_block->eca |= ECA_APIE;
3843  
3844  	/* Set up protected key support */
3845  	if (vcpu->kvm->arch.crypto.aes_kw) {
3846  		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3847  		/* ecc/hmac is also wrapped with AES key */
3848  		if (kvm_has_pckmo_ecc(vcpu->kvm))
3849  			vcpu->arch.sie_block->ecd |= ECD_ECC;
3850  		if (kvm_has_pckmo_hmac(vcpu->kvm))
3851  			vcpu->arch.sie_block->ecd |= ECD_HMAC;
3852  	}
3853  
3854  	if (vcpu->kvm->arch.crypto.dea_kw)
3855  		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3856  }
3857  
kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu * vcpu)3858  void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3859  {
3860  	free_page((unsigned long)phys_to_virt(vcpu->arch.sie_block->cbrlo));
3861  	vcpu->arch.sie_block->cbrlo = 0;
3862  }
3863  
kvm_s390_vcpu_setup_cmma(struct kvm_vcpu * vcpu)3864  int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3865  {
3866  	void *cbrlo_page = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
3867  
3868  	if (!cbrlo_page)
3869  		return -ENOMEM;
3870  
3871  	vcpu->arch.sie_block->cbrlo = virt_to_phys(cbrlo_page);
3872  	return 0;
3873  }
3874  
kvm_s390_vcpu_setup_model(struct kvm_vcpu * vcpu)3875  static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3876  {
3877  	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3878  
3879  	vcpu->arch.sie_block->ibc = model->ibc;
3880  	if (test_kvm_facility(vcpu->kvm, 7))
3881  		vcpu->arch.sie_block->fac = virt_to_phys(model->fac_list);
3882  }
3883  
kvm_s390_vcpu_setup(struct kvm_vcpu * vcpu)3884  static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3885  {
3886  	int rc = 0;
3887  	u16 uvrc, uvrrc;
3888  
3889  	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3890  						    CPUSTAT_SM |
3891  						    CPUSTAT_STOPPED);
3892  
3893  	if (test_kvm_facility(vcpu->kvm, 78))
3894  		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3895  	else if (test_kvm_facility(vcpu->kvm, 8))
3896  		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3897  
3898  	kvm_s390_vcpu_setup_model(vcpu);
3899  
3900  	/* pgste_set_pte has special handling for !machine_has_esop() */
3901  	if (machine_has_esop())
3902  		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3903  	if (test_kvm_facility(vcpu->kvm, 9))
3904  		vcpu->arch.sie_block->ecb |= ECB_SRSI;
3905  	if (test_kvm_facility(vcpu->kvm, 11))
3906  		vcpu->arch.sie_block->ecb |= ECB_PTF;
3907  	if (test_kvm_facility(vcpu->kvm, 73))
3908  		vcpu->arch.sie_block->ecb |= ECB_TE;
3909  	if (!kvm_is_ucontrol(vcpu->kvm))
3910  		vcpu->arch.sie_block->ecb |= ECB_SPECI;
3911  
3912  	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3913  		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3914  	if (test_kvm_facility(vcpu->kvm, 130))
3915  		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3916  	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3917  	if (sclp.has_cei)
3918  		vcpu->arch.sie_block->eca |= ECA_CEI;
3919  	if (sclp.has_ib)
3920  		vcpu->arch.sie_block->eca |= ECA_IB;
3921  	if (sclp.has_siif)
3922  		vcpu->arch.sie_block->eca |= ECA_SII;
3923  	if (sclp.has_sigpif)
3924  		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3925  	if (test_kvm_facility(vcpu->kvm, 129)) {
3926  		vcpu->arch.sie_block->eca |= ECA_VX;
3927  		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3928  	}
3929  	if (test_kvm_facility(vcpu->kvm, 139))
3930  		vcpu->arch.sie_block->ecd |= ECD_MEF;
3931  	if (test_kvm_facility(vcpu->kvm, 156))
3932  		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3933  	if (vcpu->arch.sie_block->gd) {
3934  		vcpu->arch.sie_block->eca |= ECA_AIV;
3935  		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3936  			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3937  	}
3938  	vcpu->arch.sie_block->sdnxo = virt_to_phys(&vcpu->run->s.regs.sdnx) | SDNXC;
3939  	vcpu->arch.sie_block->riccbd = virt_to_phys(&vcpu->run->s.regs.riccb);
3940  
3941  	if (sclp.has_kss)
3942  		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3943  	else
3944  		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3945  
3946  	if (vcpu->kvm->arch.use_cmma) {
3947  		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3948  		if (rc)
3949  			return rc;
3950  	}
3951  	hrtimer_setup(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup, CLOCK_MONOTONIC,
3952  		      HRTIMER_MODE_REL);
3953  
3954  	vcpu->arch.sie_block->hpid = HPID_KVM;
3955  
3956  	kvm_s390_vcpu_crypto_setup(vcpu);
3957  
3958  	kvm_s390_vcpu_pci_setup(vcpu);
3959  
3960  	mutex_lock(&vcpu->kvm->lock);
3961  	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3962  		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3963  		if (rc)
3964  			kvm_s390_vcpu_unsetup_cmma(vcpu);
3965  	}
3966  	mutex_unlock(&vcpu->kvm->lock);
3967  
3968  	return rc;
3969  }
3970  
kvm_arch_vcpu_precreate(struct kvm * kvm,unsigned int id)3971  int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3972  {
3973  	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3974  		return -EINVAL;
3975  	return 0;
3976  }
3977  
kvm_arch_vcpu_create(struct kvm_vcpu * vcpu)3978  int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3979  {
3980  	struct sie_page *sie_page;
3981  	int rc;
3982  
3983  	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3984  	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3985  	if (!sie_page)
3986  		return -ENOMEM;
3987  
3988  	vcpu->arch.sie_block = &sie_page->sie_block;
3989  	vcpu->arch.sie_block->itdba = virt_to_phys(&sie_page->itdb);
3990  
3991  	/* the real guest size will always be smaller than msl */
3992  	vcpu->arch.sie_block->mso = 0;
3993  	vcpu->arch.sie_block->msl = sclp.hamax;
3994  
3995  	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3996  	spin_lock_init(&vcpu->arch.local_int.lock);
3997  	vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm);
3998  	seqcount_init(&vcpu->arch.cputm_seqcount);
3999  
4000  	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
4001  	kvm_clear_async_pf_completion_queue(vcpu);
4002  	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
4003  				    KVM_SYNC_GPRS |
4004  				    KVM_SYNC_ACRS |
4005  				    KVM_SYNC_CRS |
4006  				    KVM_SYNC_ARCH0 |
4007  				    KVM_SYNC_PFAULT |
4008  				    KVM_SYNC_DIAG318;
4009  	vcpu->arch.acrs_loaded = false;
4010  	kvm_s390_set_prefix(vcpu, 0);
4011  	if (test_kvm_facility(vcpu->kvm, 64))
4012  		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
4013  	if (test_kvm_facility(vcpu->kvm, 82))
4014  		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
4015  	if (test_kvm_facility(vcpu->kvm, 133))
4016  		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
4017  	if (test_kvm_facility(vcpu->kvm, 156))
4018  		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
4019  	/* fprs can be synchronized via vrs, even if the guest has no vx. With
4020  	 * cpu_has_vx(), (load|store)_fpu_regs() will work with vrs format.
4021  	 */
4022  	if (cpu_has_vx())
4023  		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
4024  	else
4025  		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
4026  
4027  	if (kvm_is_ucontrol(vcpu->kvm)) {
4028  		rc = __kvm_ucontrol_vcpu_init(vcpu);
4029  		if (rc)
4030  			goto out_free_sie_block;
4031  	}
4032  
4033  	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%p, sie block at 0x%p",
4034  		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
4035  	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
4036  
4037  	rc = kvm_s390_vcpu_setup(vcpu);
4038  	if (rc)
4039  		goto out_ucontrol_uninit;
4040  
4041  	kvm_s390_update_topology_change_report(vcpu->kvm, 1);
4042  	return 0;
4043  
4044  out_ucontrol_uninit:
4045  	if (kvm_is_ucontrol(vcpu->kvm))
4046  		gmap_remove(vcpu->arch.gmap);
4047  out_free_sie_block:
4048  	free_page((unsigned long)(vcpu->arch.sie_block));
4049  	return rc;
4050  }
4051  
kvm_arch_vcpu_runnable(struct kvm_vcpu * vcpu)4052  int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
4053  {
4054  	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4055  	return kvm_s390_vcpu_has_irq(vcpu, 0);
4056  }
4057  
kvm_arch_vcpu_in_kernel(struct kvm_vcpu * vcpu)4058  bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
4059  {
4060  	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
4061  }
4062  
kvm_s390_vcpu_block(struct kvm_vcpu * vcpu)4063  void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
4064  {
4065  	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
4066  	exit_sie(vcpu);
4067  }
4068  
kvm_s390_vcpu_unblock(struct kvm_vcpu * vcpu)4069  void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
4070  {
4071  	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
4072  }
4073  
kvm_s390_vcpu_request(struct kvm_vcpu * vcpu)4074  static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
4075  {
4076  	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
4077  	exit_sie(vcpu);
4078  }
4079  
kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu * vcpu)4080  bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
4081  {
4082  	return atomic_read(&vcpu->arch.sie_block->prog20) &
4083  	       (PROG_BLOCK_SIE | PROG_REQUEST);
4084  }
4085  
kvm_s390_vcpu_request_handled(struct kvm_vcpu * vcpu)4086  static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
4087  {
4088  	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
4089  }
4090  
4091  /*
4092   * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
4093   * If the CPU is not running (e.g. waiting as idle) the function will
4094   * return immediately. */
exit_sie(struct kvm_vcpu * vcpu)4095  void exit_sie(struct kvm_vcpu *vcpu)
4096  {
4097  	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
4098  	kvm_s390_vsie_kick(vcpu);
4099  	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
4100  		cpu_relax();
4101  }
4102  
4103  /* Kick a guest cpu out of SIE to process a request synchronously */
kvm_s390_sync_request(int req,struct kvm_vcpu * vcpu)4104  void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
4105  {
4106  	__kvm_make_request(req, vcpu);
4107  	kvm_s390_vcpu_request(vcpu);
4108  }
4109  
kvm_gmap_notifier(struct gmap * gmap,unsigned long start,unsigned long end)4110  static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
4111  			      unsigned long end)
4112  {
4113  	struct kvm *kvm = gmap->private;
4114  	struct kvm_vcpu *vcpu;
4115  	unsigned long prefix;
4116  	unsigned long i;
4117  
4118  	trace_kvm_s390_gmap_notifier(start, end, gmap_is_shadow(gmap));
4119  
4120  	if (gmap_is_shadow(gmap))
4121  		return;
4122  	if (start >= 1UL << 31)
4123  		/* We are only interested in prefix pages */
4124  		return;
4125  	kvm_for_each_vcpu(i, vcpu, kvm) {
4126  		/* match against both prefix pages */
4127  		prefix = kvm_s390_get_prefix(vcpu);
4128  		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
4129  			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
4130  				   start, end);
4131  			kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
4132  		}
4133  	}
4134  }
4135  
kvm_arch_no_poll(struct kvm_vcpu * vcpu)4136  bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
4137  {
4138  	/* do not poll with more than halt_poll_max_steal percent of steal time */
4139  	if (get_lowcore()->avg_steal_timer * 100 / (TICK_USEC << 12) >=
4140  	    READ_ONCE(halt_poll_max_steal)) {
4141  		vcpu->stat.halt_no_poll_steal++;
4142  		return true;
4143  	}
4144  	return false;
4145  }
4146  
kvm_arch_vcpu_should_kick(struct kvm_vcpu * vcpu)4147  int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
4148  {
4149  	/* kvm common code refers to this, but never calls it */
4150  	BUG();
4151  	return 0;
4152  }
4153  
kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)4154  static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
4155  					   struct kvm_one_reg *reg)
4156  {
4157  	int r = -EINVAL;
4158  
4159  	switch (reg->id) {
4160  	case KVM_REG_S390_TODPR:
4161  		r = put_user(vcpu->arch.sie_block->todpr,
4162  			     (u32 __user *)reg->addr);
4163  		break;
4164  	case KVM_REG_S390_EPOCHDIFF:
4165  		r = put_user(vcpu->arch.sie_block->epoch,
4166  			     (u64 __user *)reg->addr);
4167  		break;
4168  	case KVM_REG_S390_CPU_TIMER:
4169  		r = put_user(kvm_s390_get_cpu_timer(vcpu),
4170  			     (u64 __user *)reg->addr);
4171  		break;
4172  	case KVM_REG_S390_CLOCK_COMP:
4173  		r = put_user(vcpu->arch.sie_block->ckc,
4174  			     (u64 __user *)reg->addr);
4175  		break;
4176  	case KVM_REG_S390_PFTOKEN:
4177  		r = put_user(vcpu->arch.pfault_token,
4178  			     (u64 __user *)reg->addr);
4179  		break;
4180  	case KVM_REG_S390_PFCOMPARE:
4181  		r = put_user(vcpu->arch.pfault_compare,
4182  			     (u64 __user *)reg->addr);
4183  		break;
4184  	case KVM_REG_S390_PFSELECT:
4185  		r = put_user(vcpu->arch.pfault_select,
4186  			     (u64 __user *)reg->addr);
4187  		break;
4188  	case KVM_REG_S390_PP:
4189  		r = put_user(vcpu->arch.sie_block->pp,
4190  			     (u64 __user *)reg->addr);
4191  		break;
4192  	case KVM_REG_S390_GBEA:
4193  		r = put_user(vcpu->arch.sie_block->gbea,
4194  			     (u64 __user *)reg->addr);
4195  		break;
4196  	default:
4197  		break;
4198  	}
4199  
4200  	return r;
4201  }
4202  
kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)4203  static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
4204  					   struct kvm_one_reg *reg)
4205  {
4206  	int r = -EINVAL;
4207  	__u64 val;
4208  
4209  	switch (reg->id) {
4210  	case KVM_REG_S390_TODPR:
4211  		r = get_user(vcpu->arch.sie_block->todpr,
4212  			     (u32 __user *)reg->addr);
4213  		break;
4214  	case KVM_REG_S390_EPOCHDIFF:
4215  		r = get_user(vcpu->arch.sie_block->epoch,
4216  			     (u64 __user *)reg->addr);
4217  		break;
4218  	case KVM_REG_S390_CPU_TIMER:
4219  		r = get_user(val, (u64 __user *)reg->addr);
4220  		if (!r)
4221  			kvm_s390_set_cpu_timer(vcpu, val);
4222  		break;
4223  	case KVM_REG_S390_CLOCK_COMP:
4224  		r = get_user(vcpu->arch.sie_block->ckc,
4225  			     (u64 __user *)reg->addr);
4226  		break;
4227  	case KVM_REG_S390_PFTOKEN:
4228  		r = get_user(vcpu->arch.pfault_token,
4229  			     (u64 __user *)reg->addr);
4230  		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4231  			kvm_clear_async_pf_completion_queue(vcpu);
4232  		break;
4233  	case KVM_REG_S390_PFCOMPARE:
4234  		r = get_user(vcpu->arch.pfault_compare,
4235  			     (u64 __user *)reg->addr);
4236  		break;
4237  	case KVM_REG_S390_PFSELECT:
4238  		r = get_user(vcpu->arch.pfault_select,
4239  			     (u64 __user *)reg->addr);
4240  		break;
4241  	case KVM_REG_S390_PP:
4242  		r = get_user(vcpu->arch.sie_block->pp,
4243  			     (u64 __user *)reg->addr);
4244  		break;
4245  	case KVM_REG_S390_GBEA:
4246  		r = get_user(vcpu->arch.sie_block->gbea,
4247  			     (u64 __user *)reg->addr);
4248  		break;
4249  	default:
4250  		break;
4251  	}
4252  
4253  	return r;
4254  }
4255  
kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu * vcpu)4256  static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
4257  {
4258  	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
4259  	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
4260  	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
4261  
4262  	kvm_clear_async_pf_completion_queue(vcpu);
4263  	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
4264  		kvm_s390_vcpu_stop(vcpu);
4265  	kvm_s390_clear_local_irqs(vcpu);
4266  }
4267  
kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu * vcpu)4268  static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
4269  {
4270  	/* Initial reset is a superset of the normal reset */
4271  	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4272  
4273  	/*
4274  	 * This equals initial cpu reset in pop, but we don't switch to ESA.
4275  	 * We do not only reset the internal data, but also ...
4276  	 */
4277  	vcpu->arch.sie_block->gpsw.mask = 0;
4278  	vcpu->arch.sie_block->gpsw.addr = 0;
4279  	kvm_s390_set_prefix(vcpu, 0);
4280  	kvm_s390_set_cpu_timer(vcpu, 0);
4281  	vcpu->arch.sie_block->ckc = 0;
4282  	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
4283  	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
4284  	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
4285  
4286  	/* ... the data in sync regs */
4287  	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
4288  	vcpu->run->s.regs.ckc = 0;
4289  	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
4290  	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
4291  	vcpu->run->psw_addr = 0;
4292  	vcpu->run->psw_mask = 0;
4293  	vcpu->run->s.regs.todpr = 0;
4294  	vcpu->run->s.regs.cputm = 0;
4295  	vcpu->run->s.regs.ckc = 0;
4296  	vcpu->run->s.regs.pp = 0;
4297  	vcpu->run->s.regs.gbea = 1;
4298  	vcpu->run->s.regs.fpc = 0;
4299  	/*
4300  	 * Do not reset these registers in the protected case, as some of
4301  	 * them are overlaid and they are not accessible in this case
4302  	 * anyway.
4303  	 */
4304  	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
4305  		vcpu->arch.sie_block->gbea = 1;
4306  		vcpu->arch.sie_block->pp = 0;
4307  		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4308  		vcpu->arch.sie_block->todpr = 0;
4309  	}
4310  }
4311  
kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu * vcpu)4312  static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
4313  {
4314  	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
4315  
4316  	/* Clear reset is a superset of the initial reset */
4317  	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4318  
4319  	memset(&regs->gprs, 0, sizeof(regs->gprs));
4320  	memset(&regs->vrs, 0, sizeof(regs->vrs));
4321  	memset(&regs->acrs, 0, sizeof(regs->acrs));
4322  	memset(&regs->gscb, 0, sizeof(regs->gscb));
4323  
4324  	regs->etoken = 0;
4325  	regs->etoken_extension = 0;
4326  }
4327  
kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)4328  int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4329  {
4330  	vcpu_load(vcpu);
4331  	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
4332  	vcpu_put(vcpu);
4333  	return 0;
4334  }
4335  
kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)4336  int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4337  {
4338  	vcpu_load(vcpu);
4339  	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
4340  	vcpu_put(vcpu);
4341  	return 0;
4342  }
4343  
kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)4344  int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4345  				  struct kvm_sregs *sregs)
4346  {
4347  	vcpu_load(vcpu);
4348  
4349  	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
4350  	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
4351  
4352  	vcpu_put(vcpu);
4353  	return 0;
4354  }
4355  
kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)4356  int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
4357  				  struct kvm_sregs *sregs)
4358  {
4359  	vcpu_load(vcpu);
4360  
4361  	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
4362  	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
4363  
4364  	vcpu_put(vcpu);
4365  	return 0;
4366  }
4367  
kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)4368  int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
4369  {
4370  	int ret = 0;
4371  
4372  	vcpu_load(vcpu);
4373  
4374  	vcpu->run->s.regs.fpc = fpu->fpc;
4375  	if (cpu_has_vx())
4376  		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
4377  				 (freg_t *) fpu->fprs);
4378  	else
4379  		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
4380  
4381  	vcpu_put(vcpu);
4382  	return ret;
4383  }
4384  
kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)4385  int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
4386  {
4387  	vcpu_load(vcpu);
4388  
4389  	if (cpu_has_vx())
4390  		convert_vx_to_fp((freg_t *) fpu->fprs,
4391  				 (__vector128 *) vcpu->run->s.regs.vrs);
4392  	else
4393  		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
4394  	fpu->fpc = vcpu->run->s.regs.fpc;
4395  
4396  	vcpu_put(vcpu);
4397  	return 0;
4398  }
4399  
kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu * vcpu,psw_t psw)4400  static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
4401  {
4402  	int rc = 0;
4403  
4404  	if (!is_vcpu_stopped(vcpu))
4405  		rc = -EBUSY;
4406  	else {
4407  		vcpu->run->psw_mask = psw.mask;
4408  		vcpu->run->psw_addr = psw.addr;
4409  	}
4410  	return rc;
4411  }
4412  
kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu * vcpu,struct kvm_translation * tr)4413  int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
4414  				  struct kvm_translation *tr)
4415  {
4416  	return -EINVAL; /* not implemented yet */
4417  }
4418  
4419  #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
4420  			      KVM_GUESTDBG_USE_HW_BP | \
4421  			      KVM_GUESTDBG_ENABLE)
4422  
kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu * vcpu,struct kvm_guest_debug * dbg)4423  int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
4424  					struct kvm_guest_debug *dbg)
4425  {
4426  	int rc = 0;
4427  
4428  	vcpu_load(vcpu);
4429  
4430  	vcpu->guest_debug = 0;
4431  	kvm_s390_clear_bp_data(vcpu);
4432  
4433  	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
4434  		rc = -EINVAL;
4435  		goto out;
4436  	}
4437  	if (!sclp.has_gpere) {
4438  		rc = -EINVAL;
4439  		goto out;
4440  	}
4441  
4442  	if (dbg->control & KVM_GUESTDBG_ENABLE) {
4443  		vcpu->guest_debug = dbg->control;
4444  		/* enforce guest PER */
4445  		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
4446  
4447  		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
4448  			rc = kvm_s390_import_bp_data(vcpu, dbg);
4449  	} else {
4450  		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
4451  		vcpu->arch.guestdbg.last_bp = 0;
4452  	}
4453  
4454  	if (rc) {
4455  		vcpu->guest_debug = 0;
4456  		kvm_s390_clear_bp_data(vcpu);
4457  		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
4458  	}
4459  
4460  out:
4461  	vcpu_put(vcpu);
4462  	return rc;
4463  }
4464  
kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)4465  int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
4466  				    struct kvm_mp_state *mp_state)
4467  {
4468  	int ret;
4469  
4470  	vcpu_load(vcpu);
4471  
4472  	/* CHECK_STOP and LOAD are not supported yet */
4473  	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
4474  				      KVM_MP_STATE_OPERATING;
4475  
4476  	vcpu_put(vcpu);
4477  	return ret;
4478  }
4479  
kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)4480  int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
4481  				    struct kvm_mp_state *mp_state)
4482  {
4483  	int rc = 0;
4484  
4485  	vcpu_load(vcpu);
4486  
4487  	/* user space knows about this interface - let it control the state */
4488  	kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
4489  
4490  	switch (mp_state->mp_state) {
4491  	case KVM_MP_STATE_STOPPED:
4492  		rc = kvm_s390_vcpu_stop(vcpu);
4493  		break;
4494  	case KVM_MP_STATE_OPERATING:
4495  		rc = kvm_s390_vcpu_start(vcpu);
4496  		break;
4497  	case KVM_MP_STATE_LOAD:
4498  		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
4499  			rc = -ENXIO;
4500  			break;
4501  		}
4502  		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
4503  		break;
4504  	case KVM_MP_STATE_CHECK_STOP:
4505  		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
4506  	default:
4507  		rc = -ENXIO;
4508  	}
4509  
4510  	vcpu_put(vcpu);
4511  	return rc;
4512  }
4513  
ibs_enabled(struct kvm_vcpu * vcpu)4514  static bool ibs_enabled(struct kvm_vcpu *vcpu)
4515  {
4516  	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
4517  }
4518  
__kvm_s390_fixup_fault_sync(struct gmap * gmap,gpa_t gaddr,unsigned int flags)4519  static int __kvm_s390_fixup_fault_sync(struct gmap *gmap, gpa_t gaddr, unsigned int flags)
4520  {
4521  	struct kvm *kvm = gmap->private;
4522  	gfn_t gfn = gpa_to_gfn(gaddr);
4523  	bool unlocked;
4524  	hva_t vmaddr;
4525  	gpa_t tmp;
4526  	int rc;
4527  
4528  	if (kvm_is_ucontrol(kvm)) {
4529  		tmp = __gmap_translate(gmap, gaddr);
4530  		gfn = gpa_to_gfn(tmp);
4531  	}
4532  
4533  	vmaddr = gfn_to_hva(kvm, gfn);
4534  	rc = fixup_user_fault(gmap->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked);
4535  	if (!rc)
4536  		rc = __gmap_link(gmap, gaddr, vmaddr);
4537  	return rc;
4538  }
4539  
4540  /**
4541   * __kvm_s390_mprotect_many() - Apply specified protection to guest pages
4542   * @gmap: the gmap of the guest
4543   * @gpa: the starting guest address
4544   * @npages: how many pages to protect
4545   * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
4546   * @bits: pgste notification bits to set
4547   *
4548   * Returns: 0 in case of success, < 0 in case of error - see gmap_protect_one()
4549   *
4550   * Context: kvm->srcu and gmap->mm need to be held in read mode
4551   */
__kvm_s390_mprotect_many(struct gmap * gmap,gpa_t gpa,u8 npages,unsigned int prot,unsigned long bits)4552  int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot,
4553  			     unsigned long bits)
4554  {
4555  	unsigned int fault_flag = (prot & PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
4556  	gpa_t end = gpa + npages * PAGE_SIZE;
4557  	int rc;
4558  
4559  	for (; gpa < end; gpa = ALIGN(gpa + 1, rc)) {
4560  		rc = gmap_protect_one(gmap, gpa, prot, bits);
4561  		if (rc == -EAGAIN) {
4562  			__kvm_s390_fixup_fault_sync(gmap, gpa, fault_flag);
4563  			rc = gmap_protect_one(gmap, gpa, prot, bits);
4564  		}
4565  		if (rc < 0)
4566  			return rc;
4567  	}
4568  
4569  	return 0;
4570  }
4571  
kvm_s390_mprotect_notify_prefix(struct kvm_vcpu * vcpu)4572  static int kvm_s390_mprotect_notify_prefix(struct kvm_vcpu *vcpu)
4573  {
4574  	gpa_t gaddr = kvm_s390_get_prefix(vcpu);
4575  	int idx, rc;
4576  
4577  	idx = srcu_read_lock(&vcpu->kvm->srcu);
4578  	mmap_read_lock(vcpu->arch.gmap->mm);
4579  
4580  	rc = __kvm_s390_mprotect_many(vcpu->arch.gmap, gaddr, 2, PROT_WRITE, GMAP_NOTIFY_MPROT);
4581  
4582  	mmap_read_unlock(vcpu->arch.gmap->mm);
4583  	srcu_read_unlock(&vcpu->kvm->srcu, idx);
4584  
4585  	return rc;
4586  }
4587  
kvm_s390_handle_requests(struct kvm_vcpu * vcpu)4588  static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
4589  {
4590  retry:
4591  	kvm_s390_vcpu_request_handled(vcpu);
4592  	if (!kvm_request_pending(vcpu))
4593  		return 0;
4594  	/*
4595  	 * If the guest prefix changed, re-arm the ipte notifier for the
4596  	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
4597  	 * This ensures that the ipte instruction for this request has
4598  	 * already finished. We might race against a second unmapper that
4599  	 * wants to set the blocking bit. Lets just retry the request loop.
4600  	 */
4601  	if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
4602  		int rc;
4603  
4604  		rc = kvm_s390_mprotect_notify_prefix(vcpu);
4605  		if (rc) {
4606  			kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
4607  			return rc;
4608  		}
4609  		goto retry;
4610  	}
4611  
4612  	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
4613  		vcpu->arch.sie_block->ihcpu = 0xffff;
4614  		goto retry;
4615  	}
4616  
4617  	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
4618  		if (!ibs_enabled(vcpu)) {
4619  			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
4620  			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
4621  		}
4622  		goto retry;
4623  	}
4624  
4625  	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
4626  		if (ibs_enabled(vcpu)) {
4627  			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
4628  			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
4629  		}
4630  		goto retry;
4631  	}
4632  
4633  	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
4634  		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
4635  		goto retry;
4636  	}
4637  
4638  	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
4639  		/*
4640  		 * Disable CMM virtualization; we will emulate the ESSA
4641  		 * instruction manually, in order to provide additional
4642  		 * functionalities needed for live migration.
4643  		 */
4644  		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
4645  		goto retry;
4646  	}
4647  
4648  	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
4649  		/*
4650  		 * Re-enable CMM virtualization if CMMA is available and
4651  		 * CMM has been used.
4652  		 */
4653  		if ((vcpu->kvm->arch.use_cmma) &&
4654  		    (vcpu->kvm->mm->context.uses_cmm))
4655  			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
4656  		goto retry;
4657  	}
4658  
4659  	/* we left the vsie handler, nothing to do, just clear the request */
4660  	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
4661  
4662  	return 0;
4663  }
4664  
__kvm_s390_set_tod_clock(struct kvm * kvm,const struct kvm_s390_vm_tod_clock * gtod)4665  static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4666  {
4667  	struct kvm_vcpu *vcpu;
4668  	union tod_clock clk;
4669  	unsigned long i;
4670  
4671  	preempt_disable();
4672  
4673  	store_tod_clock_ext(&clk);
4674  
4675  	kvm->arch.epoch = gtod->tod - clk.tod;
4676  	kvm->arch.epdx = 0;
4677  	if (test_kvm_facility(kvm, 139)) {
4678  		kvm->arch.epdx = gtod->epoch_idx - clk.ei;
4679  		if (kvm->arch.epoch > gtod->tod)
4680  			kvm->arch.epdx -= 1;
4681  	}
4682  
4683  	kvm_s390_vcpu_block_all(kvm);
4684  	kvm_for_each_vcpu(i, vcpu, kvm) {
4685  		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
4686  		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
4687  	}
4688  
4689  	kvm_s390_vcpu_unblock_all(kvm);
4690  	preempt_enable();
4691  }
4692  
kvm_s390_try_set_tod_clock(struct kvm * kvm,const struct kvm_s390_vm_tod_clock * gtod)4693  int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4694  {
4695  	if (!mutex_trylock(&kvm->lock))
4696  		return 0;
4697  	__kvm_s390_set_tod_clock(kvm, gtod);
4698  	mutex_unlock(&kvm->lock);
4699  	return 1;
4700  }
4701  
__kvm_inject_pfault_token(struct kvm_vcpu * vcpu,bool start_token,unsigned long token)4702  static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
4703  				      unsigned long token)
4704  {
4705  	struct kvm_s390_interrupt inti;
4706  	struct kvm_s390_irq irq;
4707  
4708  	if (start_token) {
4709  		irq.u.ext.ext_params2 = token;
4710  		irq.type = KVM_S390_INT_PFAULT_INIT;
4711  		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
4712  	} else {
4713  		inti.type = KVM_S390_INT_PFAULT_DONE;
4714  		inti.parm64 = token;
4715  		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
4716  	}
4717  }
4718  
kvm_arch_async_page_not_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)4719  bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
4720  				     struct kvm_async_pf *work)
4721  {
4722  	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
4723  	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
4724  
4725  	return true;
4726  }
4727  
kvm_arch_async_page_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)4728  void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
4729  				 struct kvm_async_pf *work)
4730  {
4731  	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
4732  	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
4733  }
4734  
kvm_arch_async_page_ready(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)4735  void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
4736  			       struct kvm_async_pf *work)
4737  {
4738  	/* s390 will always inject the page directly */
4739  }
4740  
kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu * vcpu)4741  bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4742  {
4743  	/*
4744  	 * s390 will always inject the page directly,
4745  	 * but we still want check_async_completion to cleanup
4746  	 */
4747  	return true;
4748  }
4749  
kvm_arch_setup_async_pf(struct kvm_vcpu * vcpu)4750  static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4751  {
4752  	hva_t hva;
4753  	struct kvm_arch_async_pf arch;
4754  
4755  	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4756  		return false;
4757  	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4758  	    vcpu->arch.pfault_compare)
4759  		return false;
4760  	if (psw_extint_disabled(vcpu))
4761  		return false;
4762  	if (kvm_s390_vcpu_has_irq(vcpu, 0))
4763  		return false;
4764  	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4765  		return false;
4766  	if (!vcpu->arch.gmap->pfault_enabled)
4767  		return false;
4768  
4769  	hva = gfn_to_hva(vcpu->kvm, current->thread.gmap_teid.addr);
4770  	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4771  		return false;
4772  
4773  	return kvm_setup_async_pf(vcpu, current->thread.gmap_teid.addr * PAGE_SIZE, hva, &arch);
4774  }
4775  
vcpu_pre_run(struct kvm_vcpu * vcpu)4776  static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4777  {
4778  	int rc, cpuflags;
4779  
4780  	/*
4781  	 * On s390 notifications for arriving pages will be delivered directly
4782  	 * to the guest but the house keeping for completed pfaults is
4783  	 * handled outside the worker.
4784  	 */
4785  	kvm_check_async_pf_completion(vcpu);
4786  
4787  	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4788  	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4789  
4790  	if (need_resched())
4791  		schedule();
4792  
4793  	if (!kvm_is_ucontrol(vcpu->kvm)) {
4794  		rc = kvm_s390_deliver_pending_interrupts(vcpu);
4795  		if (rc || guestdbg_exit_pending(vcpu))
4796  			return rc;
4797  	}
4798  
4799  	rc = kvm_s390_handle_requests(vcpu);
4800  	if (rc)
4801  		return rc;
4802  
4803  	if (guestdbg_enabled(vcpu)) {
4804  		kvm_s390_backup_guest_per_regs(vcpu);
4805  		kvm_s390_patch_guest_per_regs(vcpu);
4806  	}
4807  
4808  	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4809  
4810  	vcpu->arch.sie_block->icptcode = 0;
4811  	current->thread.gmap_int_code = 0;
4812  	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4813  	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4814  	trace_kvm_s390_sie_enter(vcpu, cpuflags);
4815  
4816  	return 0;
4817  }
4818  
vcpu_post_run_addressing_exception(struct kvm_vcpu * vcpu)4819  static int vcpu_post_run_addressing_exception(struct kvm_vcpu *vcpu)
4820  {
4821  	struct kvm_s390_pgm_info pgm_info = {
4822  		.code = PGM_ADDRESSING,
4823  	};
4824  	u8 opcode, ilen;
4825  	int rc;
4826  
4827  	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4828  	trace_kvm_s390_sie_fault(vcpu);
4829  
4830  	/*
4831  	 * We want to inject an addressing exception, which is defined as a
4832  	 * suppressing or terminating exception. However, since we came here
4833  	 * by a DAT access exception, the PSW still points to the faulting
4834  	 * instruction since DAT exceptions are nullifying. So we've got
4835  	 * to look up the current opcode to get the length of the instruction
4836  	 * to be able to forward the PSW.
4837  	 */
4838  	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4839  	ilen = insn_length(opcode);
4840  	if (rc < 0) {
4841  		return rc;
4842  	} else if (rc) {
4843  		/* Instruction-Fetching Exceptions - we can't detect the ilen.
4844  		 * Forward by arbitrary ilc, injection will take care of
4845  		 * nullification if necessary.
4846  		 */
4847  		pgm_info = vcpu->arch.pgm;
4848  		ilen = 4;
4849  	}
4850  	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4851  	kvm_s390_forward_psw(vcpu, ilen);
4852  	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4853  }
4854  
kvm_s390_assert_primary_as(struct kvm_vcpu * vcpu)4855  static void kvm_s390_assert_primary_as(struct kvm_vcpu *vcpu)
4856  {
4857  	KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
4858  		"Unexpected program interrupt 0x%x, TEID 0x%016lx",
4859  		current->thread.gmap_int_code, current->thread.gmap_teid.val);
4860  }
4861  
4862  /*
4863   * __kvm_s390_handle_dat_fault() - handle a dat fault for the gmap of a vcpu
4864   * @vcpu: the vCPU whose gmap is to be fixed up
4865   * @gfn: the guest frame number used for memslots (including fake memslots)
4866   * @gaddr: the gmap address, does not have to match @gfn for ucontrol gmaps
4867   * @flags: FOLL_* flags
4868   *
4869   * Return: 0 on success, < 0 in case of error.
4870   * Context: The mm lock must not be held before calling. May sleep.
4871   */
__kvm_s390_handle_dat_fault(struct kvm_vcpu * vcpu,gfn_t gfn,gpa_t gaddr,unsigned int flags)4872  int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags)
4873  {
4874  	struct kvm_memory_slot *slot;
4875  	unsigned int fault_flags;
4876  	bool writable, unlocked;
4877  	unsigned long vmaddr;
4878  	struct page *page;
4879  	kvm_pfn_t pfn;
4880  	int rc;
4881  
4882  	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
4883  	if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
4884  		return vcpu_post_run_addressing_exception(vcpu);
4885  
4886  	fault_flags = flags & FOLL_WRITE ? FAULT_FLAG_WRITE : 0;
4887  	if (vcpu->arch.gmap->pfault_enabled)
4888  		flags |= FOLL_NOWAIT;
4889  	vmaddr = __gfn_to_hva_memslot(slot, gfn);
4890  
4891  try_again:
4892  	pfn = __kvm_faultin_pfn(slot, gfn, flags, &writable, &page);
4893  
4894  	/* Access outside memory, inject addressing exception */
4895  	if (is_noslot_pfn(pfn))
4896  		return vcpu_post_run_addressing_exception(vcpu);
4897  	/* Signal pending: try again */
4898  	if (pfn == KVM_PFN_ERR_SIGPENDING)
4899  		return -EAGAIN;
4900  
4901  	/* Needs I/O, try to setup async pfault (only possible with FOLL_NOWAIT) */
4902  	if (pfn == KVM_PFN_ERR_NEEDS_IO) {
4903  		trace_kvm_s390_major_guest_pfault(vcpu);
4904  		if (kvm_arch_setup_async_pf(vcpu))
4905  			return 0;
4906  		vcpu->stat.pfault_sync++;
4907  		/* Could not setup async pfault, try again synchronously */
4908  		flags &= ~FOLL_NOWAIT;
4909  		goto try_again;
4910  	}
4911  	/* Any other error */
4912  	if (is_error_pfn(pfn))
4913  		return -EFAULT;
4914  
4915  	/* Success */
4916  	mmap_read_lock(vcpu->arch.gmap->mm);
4917  	/* Mark the userspace PTEs as young and/or dirty, to avoid page fault loops */
4918  	rc = fixup_user_fault(vcpu->arch.gmap->mm, vmaddr, fault_flags, &unlocked);
4919  	if (!rc)
4920  		rc = __gmap_link(vcpu->arch.gmap, gaddr, vmaddr);
4921  	scoped_guard(spinlock, &vcpu->kvm->mmu_lock) {
4922  		kvm_release_faultin_page(vcpu->kvm, page, false, writable);
4923  	}
4924  	mmap_read_unlock(vcpu->arch.gmap->mm);
4925  	return rc;
4926  }
4927  
vcpu_dat_fault_handler(struct kvm_vcpu * vcpu,unsigned long gaddr,unsigned int flags)4928  static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int flags)
4929  {
4930  	unsigned long gaddr_tmp;
4931  	gfn_t gfn;
4932  
4933  	gfn = gpa_to_gfn(gaddr);
4934  	if (kvm_is_ucontrol(vcpu->kvm)) {
4935  		/*
4936  		 * This translates the per-vCPU guest address into a
4937  		 * fake guest address, which can then be used with the
4938  		 * fake memslots that are identity mapping userspace.
4939  		 * This allows ucontrol VMs to use the normal fault
4940  		 * resolution path, like normal VMs.
4941  		 */
4942  		mmap_read_lock(vcpu->arch.gmap->mm);
4943  		gaddr_tmp = __gmap_translate(vcpu->arch.gmap, gaddr);
4944  		mmap_read_unlock(vcpu->arch.gmap->mm);
4945  		if (gaddr_tmp == -EFAULT) {
4946  			vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4947  			vcpu->run->s390_ucontrol.trans_exc_code = gaddr;
4948  			vcpu->run->s390_ucontrol.pgm_code = PGM_SEGMENT_TRANSLATION;
4949  			return -EREMOTE;
4950  		}
4951  		gfn = gpa_to_gfn(gaddr_tmp);
4952  	}
4953  	return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, flags);
4954  }
4955  
vcpu_post_run_handle_fault(struct kvm_vcpu * vcpu)4956  static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
4957  {
4958  	unsigned int flags = 0;
4959  	unsigned long gaddr;
4960  	int rc;
4961  
4962  	gaddr = current->thread.gmap_teid.addr * PAGE_SIZE;
4963  	if (kvm_s390_cur_gmap_fault_is_write())
4964  		flags = FAULT_FLAG_WRITE;
4965  
4966  	switch (current->thread.gmap_int_code & PGM_INT_CODE_MASK) {
4967  	case 0:
4968  		vcpu->stat.exit_null++;
4969  		break;
4970  	case PGM_SECURE_STORAGE_ACCESS:
4971  	case PGM_SECURE_STORAGE_VIOLATION:
4972  		kvm_s390_assert_primary_as(vcpu);
4973  		/*
4974  		 * This can happen after a reboot with asynchronous teardown;
4975  		 * the new guest (normal or protected) will run on top of the
4976  		 * previous protected guest. The old pages need to be destroyed
4977  		 * so the new guest can use them.
4978  		 */
4979  		if (kvm_s390_pv_destroy_page(vcpu->kvm, gaddr)) {
4980  			/*
4981  			 * Either KVM messed up the secure guest mapping or the
4982  			 * same page is mapped into multiple secure guests.
4983  			 *
4984  			 * This exception is only triggered when a guest 2 is
4985  			 * running and can therefore never occur in kernel
4986  			 * context.
4987  			 */
4988  			pr_warn_ratelimited("Secure storage violation (%x) in task: %s, pid %d\n",
4989  					    current->thread.gmap_int_code, current->comm,
4990  					    current->pid);
4991  			send_sig(SIGSEGV, current, 0);
4992  		}
4993  		break;
4994  	case PGM_NON_SECURE_STORAGE_ACCESS:
4995  		kvm_s390_assert_primary_as(vcpu);
4996  		/*
4997  		 * This is normal operation; a page belonging to a protected
4998  		 * guest has not been imported yet. Try to import the page into
4999  		 * the protected guest.
5000  		 */
5001  		rc = kvm_s390_pv_convert_to_secure(vcpu->kvm, gaddr);
5002  		if (rc == -EINVAL)
5003  			send_sig(SIGSEGV, current, 0);
5004  		if (rc != -ENXIO)
5005  			break;
5006  		flags = FAULT_FLAG_WRITE;
5007  		fallthrough;
5008  	case PGM_PROTECTION:
5009  	case PGM_SEGMENT_TRANSLATION:
5010  	case PGM_PAGE_TRANSLATION:
5011  	case PGM_ASCE_TYPE:
5012  	case PGM_REGION_FIRST_TRANS:
5013  	case PGM_REGION_SECOND_TRANS:
5014  	case PGM_REGION_THIRD_TRANS:
5015  		kvm_s390_assert_primary_as(vcpu);
5016  		return vcpu_dat_fault_handler(vcpu, gaddr, flags);
5017  	default:
5018  		KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx",
5019  			current->thread.gmap_int_code, current->thread.gmap_teid.val);
5020  		send_sig(SIGSEGV, current, 0);
5021  		break;
5022  	}
5023  	return 0;
5024  }
5025  
vcpu_post_run(struct kvm_vcpu * vcpu,int exit_reason)5026  static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
5027  {
5028  	struct mcck_volatile_info *mcck_info;
5029  	struct sie_page *sie_page;
5030  	int rc;
5031  
5032  	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
5033  		   vcpu->arch.sie_block->icptcode);
5034  	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
5035  
5036  	if (guestdbg_enabled(vcpu))
5037  		kvm_s390_restore_guest_per_regs(vcpu);
5038  
5039  	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
5040  	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
5041  
5042  	if (exit_reason == -EINTR) {
5043  		VCPU_EVENT(vcpu, 3, "%s", "machine check");
5044  		sie_page = container_of(vcpu->arch.sie_block,
5045  					struct sie_page, sie_block);
5046  		mcck_info = &sie_page->mcck_info;
5047  		kvm_s390_reinject_machine_check(vcpu, mcck_info);
5048  		return 0;
5049  	}
5050  
5051  	if (vcpu->arch.sie_block->icptcode > 0) {
5052  		rc = kvm_handle_sie_intercept(vcpu);
5053  
5054  		if (rc != -EOPNOTSUPP)
5055  			return rc;
5056  		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
5057  		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
5058  		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
5059  		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
5060  		return -EREMOTE;
5061  	}
5062  
5063  	return vcpu_post_run_handle_fault(vcpu);
5064  }
5065  
kvm_s390_enter_exit_sie(struct kvm_s390_sie_block * scb,u64 * gprs,unsigned long gasce)5066  int noinstr kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb,
5067  				    u64 *gprs, unsigned long gasce)
5068  {
5069  	int ret;
5070  
5071  	guest_state_enter_irqoff();
5072  
5073  	/*
5074  	 * The guest_state_{enter,exit}_irqoff() functions inform lockdep and
5075  	 * tracing that entry to the guest will enable host IRQs, and exit from
5076  	 * the guest will disable host IRQs.
5077  	 *
5078  	 * We must not use lockdep/tracing/RCU in this critical section, so we
5079  	 * use the low-level arch_local_irq_*() helpers to enable/disable IRQs.
5080  	 */
5081  	arch_local_irq_enable();
5082  	ret = sie64a(scb, gprs, gasce);
5083  	arch_local_irq_disable();
5084  
5085  	guest_state_exit_irqoff();
5086  
5087  	return ret;
5088  }
5089  
5090  #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
__vcpu_run(struct kvm_vcpu * vcpu)5091  static int __vcpu_run(struct kvm_vcpu *vcpu)
5092  {
5093  	int rc, exit_reason;
5094  	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
5095  
5096  	/*
5097  	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
5098  	 * ning the guest), so that memslots (and other stuff) are protected
5099  	 */
5100  	kvm_vcpu_srcu_read_lock(vcpu);
5101  
5102  	do {
5103  		rc = vcpu_pre_run(vcpu);
5104  		if (rc || guestdbg_exit_pending(vcpu))
5105  			break;
5106  
5107  		kvm_vcpu_srcu_read_unlock(vcpu);
5108  		/*
5109  		 * As PF_VCPU will be used in fault handler, between
5110  		 * guest_timing_enter_irqoff and guest_timing_exit_irqoff
5111  		 * should be no uaccess.
5112  		 */
5113  		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5114  			memcpy(sie_page->pv_grregs,
5115  			       vcpu->run->s.regs.gprs,
5116  			       sizeof(sie_page->pv_grregs));
5117  		}
5118  
5119  		local_irq_disable();
5120  		guest_timing_enter_irqoff();
5121  		__disable_cpu_timer_accounting(vcpu);
5122  
5123  		exit_reason = kvm_s390_enter_exit_sie(vcpu->arch.sie_block,
5124  						      vcpu->run->s.regs.gprs,
5125  						      vcpu->arch.gmap->asce);
5126  
5127  		__enable_cpu_timer_accounting(vcpu);
5128  		guest_timing_exit_irqoff();
5129  		local_irq_enable();
5130  
5131  		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5132  			memcpy(vcpu->run->s.regs.gprs,
5133  			       sie_page->pv_grregs,
5134  			       sizeof(sie_page->pv_grregs));
5135  			/*
5136  			 * We're not allowed to inject interrupts on intercepts
5137  			 * that leave the guest state in an "in-between" state
5138  			 * where the next SIE entry will do a continuation.
5139  			 * Fence interrupts in our "internal" PSW.
5140  			 */
5141  			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
5142  			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
5143  				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
5144  			}
5145  		}
5146  		kvm_vcpu_srcu_read_lock(vcpu);
5147  
5148  		rc = vcpu_post_run(vcpu, exit_reason);
5149  	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
5150  
5151  	kvm_vcpu_srcu_read_unlock(vcpu);
5152  	return rc;
5153  }
5154  
sync_regs_fmt2(struct kvm_vcpu * vcpu)5155  static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
5156  {
5157  	struct kvm_run *kvm_run = vcpu->run;
5158  	struct runtime_instr_cb *riccb;
5159  	struct gs_cb *gscb;
5160  
5161  	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
5162  	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
5163  	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
5164  	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
5165  	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
5166  		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
5167  		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
5168  		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
5169  	}
5170  	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
5171  		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
5172  		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
5173  		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
5174  		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
5175  			kvm_clear_async_pf_completion_queue(vcpu);
5176  	}
5177  	if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
5178  		vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
5179  		vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
5180  		VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
5181  	}
5182  	/*
5183  	 * If userspace sets the riccb (e.g. after migration) to a valid state,
5184  	 * we should enable RI here instead of doing the lazy enablement.
5185  	 */
5186  	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
5187  	    test_kvm_facility(vcpu->kvm, 64) &&
5188  	    riccb->v &&
5189  	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
5190  		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
5191  		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
5192  	}
5193  	/*
5194  	 * If userspace sets the gscb (e.g. after migration) to non-zero,
5195  	 * we should enable GS here instead of doing the lazy enablement.
5196  	 */
5197  	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
5198  	    test_kvm_facility(vcpu->kvm, 133) &&
5199  	    gscb->gssm &&
5200  	    !vcpu->arch.gs_enabled) {
5201  		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
5202  		vcpu->arch.sie_block->ecb |= ECB_GS;
5203  		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
5204  		vcpu->arch.gs_enabled = 1;
5205  	}
5206  	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
5207  	    test_kvm_facility(vcpu->kvm, 82)) {
5208  		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
5209  		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
5210  	}
5211  	if (cpu_has_gs()) {
5212  		preempt_disable();
5213  		local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
5214  		if (current->thread.gs_cb) {
5215  			vcpu->arch.host_gscb = current->thread.gs_cb;
5216  			save_gs_cb(vcpu->arch.host_gscb);
5217  		}
5218  		if (vcpu->arch.gs_enabled) {
5219  			current->thread.gs_cb = (struct gs_cb *)
5220  						&vcpu->run->s.regs.gscb;
5221  			restore_gs_cb(current->thread.gs_cb);
5222  		}
5223  		preempt_enable();
5224  	}
5225  	/* SIE will load etoken directly from SDNX and therefore kvm_run */
5226  }
5227  
sync_regs(struct kvm_vcpu * vcpu)5228  static void sync_regs(struct kvm_vcpu *vcpu)
5229  {
5230  	struct kvm_run *kvm_run = vcpu->run;
5231  
5232  	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
5233  		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
5234  	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
5235  		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
5236  		/* some control register changes require a tlb flush */
5237  		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
5238  	}
5239  	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
5240  		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
5241  		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
5242  	}
5243  	save_access_regs(vcpu->arch.host_acrs);
5244  	restore_access_regs(vcpu->run->s.regs.acrs);
5245  	vcpu->arch.acrs_loaded = true;
5246  	kvm_s390_fpu_load(vcpu->run);
5247  	/* Sync fmt2 only data */
5248  	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
5249  		sync_regs_fmt2(vcpu);
5250  	} else {
5251  		/*
5252  		 * In several places we have to modify our internal view to
5253  		 * not do things that are disallowed by the ultravisor. For
5254  		 * example we must not inject interrupts after specific exits
5255  		 * (e.g. 112 prefix page not secure). We do this by turning
5256  		 * off the machine check, external and I/O interrupt bits
5257  		 * of our PSW copy. To avoid getting validity intercepts, we
5258  		 * do only accept the condition code from userspace.
5259  		 */
5260  		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
5261  		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
5262  						   PSW_MASK_CC;
5263  	}
5264  
5265  	kvm_run->kvm_dirty_regs = 0;
5266  }
5267  
store_regs_fmt2(struct kvm_vcpu * vcpu)5268  static void store_regs_fmt2(struct kvm_vcpu *vcpu)
5269  {
5270  	struct kvm_run *kvm_run = vcpu->run;
5271  
5272  	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
5273  	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
5274  	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
5275  	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
5276  	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
5277  	if (cpu_has_gs()) {
5278  		preempt_disable();
5279  		local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
5280  		if (vcpu->arch.gs_enabled)
5281  			save_gs_cb(current->thread.gs_cb);
5282  		current->thread.gs_cb = vcpu->arch.host_gscb;
5283  		restore_gs_cb(vcpu->arch.host_gscb);
5284  		if (!vcpu->arch.host_gscb)
5285  			local_ctl_clear_bit(2, CR2_GUARDED_STORAGE_BIT);
5286  		vcpu->arch.host_gscb = NULL;
5287  		preempt_enable();
5288  	}
5289  	/* SIE will save etoken directly into SDNX and therefore kvm_run */
5290  }
5291  
store_regs(struct kvm_vcpu * vcpu)5292  static void store_regs(struct kvm_vcpu *vcpu)
5293  {
5294  	struct kvm_run *kvm_run = vcpu->run;
5295  
5296  	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
5297  	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
5298  	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
5299  	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
5300  	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
5301  	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
5302  	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
5303  	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
5304  	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
5305  	save_access_regs(vcpu->run->s.regs.acrs);
5306  	restore_access_regs(vcpu->arch.host_acrs);
5307  	vcpu->arch.acrs_loaded = false;
5308  	kvm_s390_fpu_store(vcpu->run);
5309  	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
5310  		store_regs_fmt2(vcpu);
5311  }
5312  
kvm_arch_vcpu_ioctl_run(struct kvm_vcpu * vcpu)5313  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
5314  {
5315  	struct kvm_run *kvm_run = vcpu->run;
5316  	DECLARE_KERNEL_FPU_ONSTACK32(fpu);
5317  	int rc;
5318  
5319  	/*
5320  	 * Running a VM while dumping always has the potential to
5321  	 * produce inconsistent dump data. But for PV vcpus a SIE
5322  	 * entry while dumping could also lead to a fatal validity
5323  	 * intercept which we absolutely want to avoid.
5324  	 */
5325  	if (vcpu->kvm->arch.pv.dumping)
5326  		return -EINVAL;
5327  
5328  	if (!vcpu->wants_to_run)
5329  		return -EINTR;
5330  
5331  	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
5332  	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
5333  		return -EINVAL;
5334  
5335  	vcpu_load(vcpu);
5336  
5337  	if (guestdbg_exit_pending(vcpu)) {
5338  		kvm_s390_prepare_debug_exit(vcpu);
5339  		rc = 0;
5340  		goto out;
5341  	}
5342  
5343  	kvm_sigset_activate(vcpu);
5344  
5345  	/*
5346  	 * no need to check the return value of vcpu_start as it can only have
5347  	 * an error for protvirt, but protvirt means user cpu state
5348  	 */
5349  	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
5350  		kvm_s390_vcpu_start(vcpu);
5351  	} else if (is_vcpu_stopped(vcpu)) {
5352  		pr_err_ratelimited("can't run stopped vcpu %d\n",
5353  				   vcpu->vcpu_id);
5354  		rc = -EINVAL;
5355  		goto out;
5356  	}
5357  
5358  	kernel_fpu_begin(&fpu, KERNEL_FPC | KERNEL_VXR);
5359  	sync_regs(vcpu);
5360  	enable_cpu_timer_accounting(vcpu);
5361  
5362  	might_fault();
5363  	rc = __vcpu_run(vcpu);
5364  
5365  	if (signal_pending(current) && !rc) {
5366  		kvm_run->exit_reason = KVM_EXIT_INTR;
5367  		rc = -EINTR;
5368  	}
5369  
5370  	if (guestdbg_exit_pending(vcpu) && !rc)  {
5371  		kvm_s390_prepare_debug_exit(vcpu);
5372  		rc = 0;
5373  	}
5374  
5375  	if (rc == -EREMOTE) {
5376  		/* userspace support is needed, kvm_run has been prepared */
5377  		rc = 0;
5378  	}
5379  
5380  	disable_cpu_timer_accounting(vcpu);
5381  	store_regs(vcpu);
5382  	kernel_fpu_end(&fpu, KERNEL_FPC | KERNEL_VXR);
5383  
5384  	kvm_sigset_deactivate(vcpu);
5385  
5386  	vcpu->stat.exit_userspace++;
5387  out:
5388  	vcpu_put(vcpu);
5389  	return rc;
5390  }
5391  
5392  /*
5393   * store status at address
5394   * we use have two special cases:
5395   * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
5396   * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
5397   */
kvm_s390_store_status_unloaded(struct kvm_vcpu * vcpu,unsigned long gpa)5398  int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
5399  {
5400  	unsigned char archmode = 1;
5401  	freg_t fprs[NUM_FPRS];
5402  	unsigned int px;
5403  	u64 clkcomp, cputm;
5404  	int rc;
5405  
5406  	px = kvm_s390_get_prefix(vcpu);
5407  	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
5408  		if (write_guest_abs(vcpu, 163, &archmode, 1))
5409  			return -EFAULT;
5410  		gpa = 0;
5411  	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
5412  		if (write_guest_real(vcpu, 163, &archmode, 1))
5413  			return -EFAULT;
5414  		gpa = px;
5415  	} else
5416  		gpa -= __LC_FPREGS_SAVE_AREA;
5417  
5418  	/* manually convert vector registers if necessary */
5419  	if (cpu_has_vx()) {
5420  		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
5421  		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
5422  				     fprs, 128);
5423  	} else {
5424  		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
5425  				     vcpu->run->s.regs.fprs, 128);
5426  	}
5427  	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
5428  			      vcpu->run->s.regs.gprs, 128);
5429  	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
5430  			      &vcpu->arch.sie_block->gpsw, 16);
5431  	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
5432  			      &px, 4);
5433  	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
5434  			      &vcpu->run->s.regs.fpc, 4);
5435  	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
5436  			      &vcpu->arch.sie_block->todpr, 4);
5437  	cputm = kvm_s390_get_cpu_timer(vcpu);
5438  	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
5439  			      &cputm, 8);
5440  	clkcomp = vcpu->arch.sie_block->ckc >> 8;
5441  	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
5442  			      &clkcomp, 8);
5443  	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
5444  			      &vcpu->run->s.regs.acrs, 64);
5445  	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
5446  			      &vcpu->arch.sie_block->gcr, 128);
5447  	return rc ? -EFAULT : 0;
5448  }
5449  
kvm_s390_vcpu_store_status(struct kvm_vcpu * vcpu,unsigned long addr)5450  int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
5451  {
5452  	/*
5453  	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
5454  	 * switch in the run ioctl. Let's update our copies before we save
5455  	 * it into the save area
5456  	 */
5457  	kvm_s390_fpu_store(vcpu->run);
5458  	save_access_regs(vcpu->run->s.regs.acrs);
5459  
5460  	return kvm_s390_store_status_unloaded(vcpu, addr);
5461  }
5462  
__disable_ibs_on_vcpu(struct kvm_vcpu * vcpu)5463  static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
5464  {
5465  	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
5466  	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
5467  }
5468  
__disable_ibs_on_all_vcpus(struct kvm * kvm)5469  static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
5470  {
5471  	unsigned long i;
5472  	struct kvm_vcpu *vcpu;
5473  
5474  	kvm_for_each_vcpu(i, vcpu, kvm) {
5475  		__disable_ibs_on_vcpu(vcpu);
5476  	}
5477  }
5478  
__enable_ibs_on_vcpu(struct kvm_vcpu * vcpu)5479  static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
5480  {
5481  	if (!sclp.has_ibs)
5482  		return;
5483  	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
5484  	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
5485  }
5486  
kvm_s390_vcpu_start(struct kvm_vcpu * vcpu)5487  int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
5488  {
5489  	int i, online_vcpus, r = 0, started_vcpus = 0;
5490  
5491  	if (!is_vcpu_stopped(vcpu))
5492  		return 0;
5493  
5494  	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
5495  	/* Only one cpu at a time may enter/leave the STOPPED state. */
5496  	spin_lock(&vcpu->kvm->arch.start_stop_lock);
5497  	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
5498  
5499  	/* Let's tell the UV that we want to change into the operating state */
5500  	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5501  		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
5502  		if (r) {
5503  			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5504  			return r;
5505  		}
5506  	}
5507  
5508  	for (i = 0; i < online_vcpus; i++) {
5509  		if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
5510  			started_vcpus++;
5511  	}
5512  
5513  	if (started_vcpus == 0) {
5514  		/* we're the only active VCPU -> speed it up */
5515  		__enable_ibs_on_vcpu(vcpu);
5516  	} else if (started_vcpus == 1) {
5517  		/*
5518  		 * As we are starting a second VCPU, we have to disable
5519  		 * the IBS facility on all VCPUs to remove potentially
5520  		 * outstanding ENABLE requests.
5521  		 */
5522  		__disable_ibs_on_all_vcpus(vcpu->kvm);
5523  	}
5524  
5525  	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
5526  	/*
5527  	 * The real PSW might have changed due to a RESTART interpreted by the
5528  	 * ultravisor. We block all interrupts and let the next sie exit
5529  	 * refresh our view.
5530  	 */
5531  	if (kvm_s390_pv_cpu_is_protected(vcpu))
5532  		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
5533  	/*
5534  	 * Another VCPU might have used IBS while we were offline.
5535  	 * Let's play safe and flush the VCPU at startup.
5536  	 */
5537  	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
5538  	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5539  	return 0;
5540  }
5541  
kvm_s390_vcpu_stop(struct kvm_vcpu * vcpu)5542  int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
5543  {
5544  	int i, online_vcpus, r = 0, started_vcpus = 0;
5545  	struct kvm_vcpu *started_vcpu = NULL;
5546  
5547  	if (is_vcpu_stopped(vcpu))
5548  		return 0;
5549  
5550  	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
5551  	/* Only one cpu at a time may enter/leave the STOPPED state. */
5552  	spin_lock(&vcpu->kvm->arch.start_stop_lock);
5553  	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
5554  
5555  	/* Let's tell the UV that we want to change into the stopped state */
5556  	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5557  		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
5558  		if (r) {
5559  			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5560  			return r;
5561  		}
5562  	}
5563  
5564  	/*
5565  	 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
5566  	 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
5567  	 * have been fully processed. This will ensure that the VCPU
5568  	 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
5569  	 */
5570  	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
5571  	kvm_s390_clear_stop_irq(vcpu);
5572  
5573  	__disable_ibs_on_vcpu(vcpu);
5574  
5575  	for (i = 0; i < online_vcpus; i++) {
5576  		struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
5577  
5578  		if (!is_vcpu_stopped(tmp)) {
5579  			started_vcpus++;
5580  			started_vcpu = tmp;
5581  		}
5582  	}
5583  
5584  	if (started_vcpus == 1) {
5585  		/*
5586  		 * As we only have one VCPU left, we want to enable the
5587  		 * IBS facility for that VCPU to speed it up.
5588  		 */
5589  		__enable_ibs_on_vcpu(started_vcpu);
5590  	}
5591  
5592  	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5593  	return 0;
5594  }
5595  
kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu * vcpu,struct kvm_enable_cap * cap)5596  static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
5597  				     struct kvm_enable_cap *cap)
5598  {
5599  	int r;
5600  
5601  	if (cap->flags)
5602  		return -EINVAL;
5603  
5604  	switch (cap->cap) {
5605  	case KVM_CAP_S390_CSS_SUPPORT:
5606  		if (!vcpu->kvm->arch.css_support) {
5607  			vcpu->kvm->arch.css_support = 1;
5608  			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
5609  			trace_kvm_s390_enable_css(vcpu->kvm);
5610  		}
5611  		r = 0;
5612  		break;
5613  	default:
5614  		r = -EINVAL;
5615  		break;
5616  	}
5617  	return r;
5618  }
5619  
kvm_s390_vcpu_sida_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)5620  static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
5621  				  struct kvm_s390_mem_op *mop)
5622  {
5623  	void __user *uaddr = (void __user *)mop->buf;
5624  	void *sida_addr;
5625  	int r = 0;
5626  
5627  	if (mop->flags || !mop->size)
5628  		return -EINVAL;
5629  	if (mop->size + mop->sida_offset < mop->size)
5630  		return -EINVAL;
5631  	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
5632  		return -E2BIG;
5633  	if (!kvm_s390_pv_cpu_is_protected(vcpu))
5634  		return -EINVAL;
5635  
5636  	sida_addr = (char *)sida_addr(vcpu->arch.sie_block) + mop->sida_offset;
5637  
5638  	switch (mop->op) {
5639  	case KVM_S390_MEMOP_SIDA_READ:
5640  		if (copy_to_user(uaddr, sida_addr, mop->size))
5641  			r = -EFAULT;
5642  
5643  		break;
5644  	case KVM_S390_MEMOP_SIDA_WRITE:
5645  		if (copy_from_user(sida_addr, uaddr, mop->size))
5646  			r = -EFAULT;
5647  		break;
5648  	}
5649  	return r;
5650  }
5651  
kvm_s390_vcpu_mem_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)5652  static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
5653  				 struct kvm_s390_mem_op *mop)
5654  {
5655  	void __user *uaddr = (void __user *)mop->buf;
5656  	enum gacc_mode acc_mode;
5657  	void *tmpbuf = NULL;
5658  	int r;
5659  
5660  	r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_INJECT_EXCEPTION |
5661  					KVM_S390_MEMOP_F_CHECK_ONLY |
5662  					KVM_S390_MEMOP_F_SKEY_PROTECTION);
5663  	if (r)
5664  		return r;
5665  	if (mop->ar >= NUM_ACRS)
5666  		return -EINVAL;
5667  	if (kvm_s390_pv_cpu_is_protected(vcpu))
5668  		return -EINVAL;
5669  	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
5670  		tmpbuf = vmalloc(mop->size);
5671  		if (!tmpbuf)
5672  			return -ENOMEM;
5673  	}
5674  
5675  	acc_mode = mop->op == KVM_S390_MEMOP_LOGICAL_READ ? GACC_FETCH : GACC_STORE;
5676  	if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
5677  		r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
5678  				    acc_mode, mop->key);
5679  		goto out_inject;
5680  	}
5681  	if (acc_mode == GACC_FETCH) {
5682  		r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
5683  					mop->size, mop->key);
5684  		if (r)
5685  			goto out_inject;
5686  		if (copy_to_user(uaddr, tmpbuf, mop->size)) {
5687  			r = -EFAULT;
5688  			goto out_free;
5689  		}
5690  	} else {
5691  		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
5692  			r = -EFAULT;
5693  			goto out_free;
5694  		}
5695  		r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
5696  					 mop->size, mop->key);
5697  	}
5698  
5699  out_inject:
5700  	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
5701  		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
5702  
5703  out_free:
5704  	vfree(tmpbuf);
5705  	return r;
5706  }
5707  
kvm_s390_vcpu_memsida_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)5708  static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
5709  				     struct kvm_s390_mem_op *mop)
5710  {
5711  	int r, srcu_idx;
5712  
5713  	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5714  
5715  	switch (mop->op) {
5716  	case KVM_S390_MEMOP_LOGICAL_READ:
5717  	case KVM_S390_MEMOP_LOGICAL_WRITE:
5718  		r = kvm_s390_vcpu_mem_op(vcpu, mop);
5719  		break;
5720  	case KVM_S390_MEMOP_SIDA_READ:
5721  	case KVM_S390_MEMOP_SIDA_WRITE:
5722  		/* we are locked against sida going away by the vcpu->mutex */
5723  		r = kvm_s390_vcpu_sida_op(vcpu, mop);
5724  		break;
5725  	default:
5726  		r = -EINVAL;
5727  	}
5728  
5729  	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
5730  	return r;
5731  }
5732  
kvm_arch_vcpu_async_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)5733  long kvm_arch_vcpu_async_ioctl(struct file *filp,
5734  			       unsigned int ioctl, unsigned long arg)
5735  {
5736  	struct kvm_vcpu *vcpu = filp->private_data;
5737  	void __user *argp = (void __user *)arg;
5738  	int rc;
5739  
5740  	switch (ioctl) {
5741  	case KVM_S390_IRQ: {
5742  		struct kvm_s390_irq s390irq;
5743  
5744  		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
5745  			return -EFAULT;
5746  		rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
5747  		break;
5748  	}
5749  	case KVM_S390_INTERRUPT: {
5750  		struct kvm_s390_interrupt s390int;
5751  		struct kvm_s390_irq s390irq = {};
5752  
5753  		if (copy_from_user(&s390int, argp, sizeof(s390int)))
5754  			return -EFAULT;
5755  		if (s390int_to_s390irq(&s390int, &s390irq))
5756  			return -EINVAL;
5757  		rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
5758  		break;
5759  	}
5760  	default:
5761  		rc = -ENOIOCTLCMD;
5762  		break;
5763  	}
5764  
5765  	/*
5766  	 * To simplify single stepping of userspace-emulated instructions,
5767  	 * KVM_EXIT_S390_SIEIC exit sets KVM_GUESTDBG_EXIT_PENDING (see
5768  	 * should_handle_per_ifetch()). However, if userspace emulation injects
5769  	 * an interrupt, it needs to be cleared, so that KVM_EXIT_DEBUG happens
5770  	 * after (and not before) the interrupt delivery.
5771  	 */
5772  	if (!rc)
5773  		vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING;
5774  
5775  	return rc;
5776  }
5777  
kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu * vcpu,struct kvm_pv_cmd * cmd)5778  static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
5779  					struct kvm_pv_cmd *cmd)
5780  {
5781  	struct kvm_s390_pv_dmp dmp;
5782  	void *data;
5783  	int ret;
5784  
5785  	/* Dump initialization is a prerequisite */
5786  	if (!vcpu->kvm->arch.pv.dumping)
5787  		return -EINVAL;
5788  
5789  	if (copy_from_user(&dmp, (__u8 __user *)cmd->data, sizeof(dmp)))
5790  		return -EFAULT;
5791  
5792  	/* We only handle this subcmd right now */
5793  	if (dmp.subcmd != KVM_PV_DUMP_CPU)
5794  		return -EINVAL;
5795  
5796  	/* CPU dump length is the same as create cpu storage donation. */
5797  	if (dmp.buff_len != uv_info.guest_cpu_stor_len)
5798  		return -EINVAL;
5799  
5800  	data = kvzalloc(uv_info.guest_cpu_stor_len, GFP_KERNEL);
5801  	if (!data)
5802  		return -ENOMEM;
5803  
5804  	ret = kvm_s390_pv_dump_cpu(vcpu, data, &cmd->rc, &cmd->rrc);
5805  
5806  	VCPU_EVENT(vcpu, 3, "PROTVIRT DUMP CPU %d rc %x rrc %x",
5807  		   vcpu->vcpu_id, cmd->rc, cmd->rrc);
5808  
5809  	if (ret)
5810  		ret = -EINVAL;
5811  
5812  	/* On success copy over the dump data */
5813  	if (!ret && copy_to_user((__u8 __user *)dmp.buff_addr, data, uv_info.guest_cpu_stor_len))
5814  		ret = -EFAULT;
5815  
5816  	kvfree(data);
5817  	return ret;
5818  }
5819  
kvm_arch_vcpu_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)5820  long kvm_arch_vcpu_ioctl(struct file *filp,
5821  			 unsigned int ioctl, unsigned long arg)
5822  {
5823  	struct kvm_vcpu *vcpu = filp->private_data;
5824  	void __user *argp = (void __user *)arg;
5825  	int idx;
5826  	long r;
5827  	u16 rc, rrc;
5828  
5829  	vcpu_load(vcpu);
5830  
5831  	switch (ioctl) {
5832  	case KVM_S390_STORE_STATUS:
5833  		idx = srcu_read_lock(&vcpu->kvm->srcu);
5834  		r = kvm_s390_store_status_unloaded(vcpu, arg);
5835  		srcu_read_unlock(&vcpu->kvm->srcu, idx);
5836  		break;
5837  	case KVM_S390_SET_INITIAL_PSW: {
5838  		psw_t psw;
5839  
5840  		r = -EFAULT;
5841  		if (copy_from_user(&psw, argp, sizeof(psw)))
5842  			break;
5843  		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
5844  		break;
5845  	}
5846  	case KVM_S390_CLEAR_RESET:
5847  		r = 0;
5848  		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
5849  		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5850  			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5851  					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
5852  			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
5853  				   rc, rrc);
5854  		}
5855  		break;
5856  	case KVM_S390_INITIAL_RESET:
5857  		r = 0;
5858  		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
5859  		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5860  			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5861  					  UVC_CMD_CPU_RESET_INITIAL,
5862  					  &rc, &rrc);
5863  			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
5864  				   rc, rrc);
5865  		}
5866  		break;
5867  	case KVM_S390_NORMAL_RESET:
5868  		r = 0;
5869  		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
5870  		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5871  			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5872  					  UVC_CMD_CPU_RESET, &rc, &rrc);
5873  			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
5874  				   rc, rrc);
5875  		}
5876  		break;
5877  	case KVM_SET_ONE_REG:
5878  	case KVM_GET_ONE_REG: {
5879  		struct kvm_one_reg reg;
5880  		r = -EINVAL;
5881  		if (kvm_s390_pv_cpu_is_protected(vcpu))
5882  			break;
5883  		r = -EFAULT;
5884  		if (copy_from_user(&reg, argp, sizeof(reg)))
5885  			break;
5886  		if (ioctl == KVM_SET_ONE_REG)
5887  			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
5888  		else
5889  			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
5890  		break;
5891  	}
5892  #ifdef CONFIG_KVM_S390_UCONTROL
5893  	case KVM_S390_UCAS_MAP: {
5894  		struct kvm_s390_ucas_mapping ucasmap;
5895  
5896  		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5897  			r = -EFAULT;
5898  			break;
5899  		}
5900  
5901  		if (!kvm_is_ucontrol(vcpu->kvm)) {
5902  			r = -EINVAL;
5903  			break;
5904  		}
5905  
5906  		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
5907  				     ucasmap.vcpu_addr, ucasmap.length);
5908  		break;
5909  	}
5910  	case KVM_S390_UCAS_UNMAP: {
5911  		struct kvm_s390_ucas_mapping ucasmap;
5912  
5913  		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5914  			r = -EFAULT;
5915  			break;
5916  		}
5917  
5918  		if (!kvm_is_ucontrol(vcpu->kvm)) {
5919  			r = -EINVAL;
5920  			break;
5921  		}
5922  
5923  		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
5924  			ucasmap.length);
5925  		break;
5926  	}
5927  #endif
5928  	case KVM_S390_VCPU_FAULT: {
5929  		idx = srcu_read_lock(&vcpu->kvm->srcu);
5930  		r = vcpu_dat_fault_handler(vcpu, arg, 0);
5931  		srcu_read_unlock(&vcpu->kvm->srcu, idx);
5932  		break;
5933  	}
5934  	case KVM_ENABLE_CAP:
5935  	{
5936  		struct kvm_enable_cap cap;
5937  		r = -EFAULT;
5938  		if (copy_from_user(&cap, argp, sizeof(cap)))
5939  			break;
5940  		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
5941  		break;
5942  	}
5943  	case KVM_S390_MEM_OP: {
5944  		struct kvm_s390_mem_op mem_op;
5945  
5946  		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
5947  			r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op);
5948  		else
5949  			r = -EFAULT;
5950  		break;
5951  	}
5952  	case KVM_S390_SET_IRQ_STATE: {
5953  		struct kvm_s390_irq_state irq_state;
5954  
5955  		r = -EFAULT;
5956  		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5957  			break;
5958  		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
5959  		    irq_state.len == 0 ||
5960  		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
5961  			r = -EINVAL;
5962  			break;
5963  		}
5964  		/* do not use irq_state.flags, it will break old QEMUs */
5965  		r = kvm_s390_set_irq_state(vcpu,
5966  					   (void __user *) irq_state.buf,
5967  					   irq_state.len);
5968  		break;
5969  	}
5970  	case KVM_S390_GET_IRQ_STATE: {
5971  		struct kvm_s390_irq_state irq_state;
5972  
5973  		r = -EFAULT;
5974  		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5975  			break;
5976  		if (irq_state.len == 0) {
5977  			r = -EINVAL;
5978  			break;
5979  		}
5980  		/* do not use irq_state.flags, it will break old QEMUs */
5981  		r = kvm_s390_get_irq_state(vcpu,
5982  					   (__u8 __user *)  irq_state.buf,
5983  					   irq_state.len);
5984  		break;
5985  	}
5986  	case KVM_S390_PV_CPU_COMMAND: {
5987  		struct kvm_pv_cmd cmd;
5988  
5989  		r = -EINVAL;
5990  		if (!is_prot_virt_host())
5991  			break;
5992  
5993  		r = -EFAULT;
5994  		if (copy_from_user(&cmd, argp, sizeof(cmd)))
5995  			break;
5996  
5997  		r = -EINVAL;
5998  		if (cmd.flags)
5999  			break;
6000  
6001  		/* We only handle this cmd right now */
6002  		if (cmd.cmd != KVM_PV_DUMP)
6003  			break;
6004  
6005  		r = kvm_s390_handle_pv_vcpu_dump(vcpu, &cmd);
6006  
6007  		/* Always copy over UV rc / rrc data */
6008  		if (copy_to_user((__u8 __user *)argp, &cmd.rc,
6009  				 sizeof(cmd.rc) + sizeof(cmd.rrc)))
6010  			r = -EFAULT;
6011  		break;
6012  	}
6013  	default:
6014  		r = -ENOTTY;
6015  	}
6016  
6017  	vcpu_put(vcpu);
6018  	return r;
6019  }
6020  
kvm_arch_vcpu_fault(struct kvm_vcpu * vcpu,struct vm_fault * vmf)6021  vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
6022  {
6023  #ifdef CONFIG_KVM_S390_UCONTROL
6024  	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
6025  		 && (kvm_is_ucontrol(vcpu->kvm))) {
6026  		vmf->page = virt_to_page(vcpu->arch.sie_block);
6027  		get_page(vmf->page);
6028  		return 0;
6029  	}
6030  #endif
6031  	return VM_FAULT_SIGBUS;
6032  }
6033  
kvm_arch_irqchip_in_kernel(struct kvm * kvm)6034  bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
6035  {
6036  	return true;
6037  }
6038  
6039  /* Section: memory related */
kvm_arch_prepare_memory_region(struct kvm * kvm,const struct kvm_memory_slot * old,struct kvm_memory_slot * new,enum kvm_mr_change change)6040  int kvm_arch_prepare_memory_region(struct kvm *kvm,
6041  				   const struct kvm_memory_slot *old,
6042  				   struct kvm_memory_slot *new,
6043  				   enum kvm_mr_change change)
6044  {
6045  	gpa_t size;
6046  
6047  	if (kvm_is_ucontrol(kvm) && new->id < KVM_USER_MEM_SLOTS)
6048  		return -EINVAL;
6049  
6050  	/* When we are protected, we should not change the memory slots */
6051  	if (kvm_s390_pv_get_handle(kvm))
6052  		return -EINVAL;
6053  
6054  	if (change != KVM_MR_DELETE && change != KVM_MR_FLAGS_ONLY) {
6055  		/*
6056  		 * A few sanity checks. We can have memory slots which have to be
6057  		 * located/ended at a segment boundary (1MB). The memory in userland is
6058  		 * ok to be fragmented into various different vmas. It is okay to mmap()
6059  		 * and munmap() stuff in this slot after doing this call at any time
6060  		 */
6061  
6062  		if (new->userspace_addr & 0xffffful)
6063  			return -EINVAL;
6064  
6065  		size = new->npages * PAGE_SIZE;
6066  		if (size & 0xffffful)
6067  			return -EINVAL;
6068  
6069  		if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
6070  			return -EINVAL;
6071  	}
6072  
6073  	if (!kvm->arch.migration_mode)
6074  		return 0;
6075  
6076  	/*
6077  	 * Turn off migration mode when:
6078  	 * - userspace creates a new memslot with dirty logging off,
6079  	 * - userspace modifies an existing memslot (MOVE or FLAGS_ONLY) and
6080  	 *   dirty logging is turned off.
6081  	 * Migration mode expects dirty page logging being enabled to store
6082  	 * its dirty bitmap.
6083  	 */
6084  	if (change != KVM_MR_DELETE &&
6085  	    !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
6086  		WARN(kvm_s390_vm_stop_migration(kvm),
6087  		     "Failed to stop migration mode");
6088  
6089  	return 0;
6090  }
6091  
kvm_arch_commit_memory_region(struct kvm * kvm,struct kvm_memory_slot * old,const struct kvm_memory_slot * new,enum kvm_mr_change change)6092  void kvm_arch_commit_memory_region(struct kvm *kvm,
6093  				struct kvm_memory_slot *old,
6094  				const struct kvm_memory_slot *new,
6095  				enum kvm_mr_change change)
6096  {
6097  	int rc = 0;
6098  
6099  	if (kvm_is_ucontrol(kvm))
6100  		return;
6101  
6102  	switch (change) {
6103  	case KVM_MR_DELETE:
6104  		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
6105  					old->npages * PAGE_SIZE);
6106  		break;
6107  	case KVM_MR_MOVE:
6108  		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
6109  					old->npages * PAGE_SIZE);
6110  		if (rc)
6111  			break;
6112  		fallthrough;
6113  	case KVM_MR_CREATE:
6114  		rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
6115  				      new->base_gfn * PAGE_SIZE,
6116  				      new->npages * PAGE_SIZE);
6117  		break;
6118  	case KVM_MR_FLAGS_ONLY:
6119  		break;
6120  	default:
6121  		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
6122  	}
6123  	if (rc)
6124  		pr_warn("failed to commit memory region\n");
6125  	return;
6126  }
6127  
nonhyp_mask(int i)6128  static inline unsigned long nonhyp_mask(int i)
6129  {
6130  	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
6131  
6132  	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
6133  }
6134  
kvm_s390_init(void)6135  static int __init kvm_s390_init(void)
6136  {
6137  	int i, r;
6138  
6139  	if (!sclp.has_sief2) {
6140  		pr_info("SIE is not available\n");
6141  		return -ENODEV;
6142  	}
6143  
6144  	if (nested && hpage) {
6145  		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
6146  		return -EINVAL;
6147  	}
6148  
6149  	for (i = 0; i < 16; i++)
6150  		kvm_s390_fac_base[i] |=
6151  			stfle_fac_list[i] & nonhyp_mask(i);
6152  
6153  	r = __kvm_s390_init();
6154  	if (r)
6155  		return r;
6156  
6157  	r = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE);
6158  	if (r) {
6159  		__kvm_s390_exit();
6160  		return r;
6161  	}
6162  	return 0;
6163  }
6164  
kvm_s390_exit(void)6165  static void __exit kvm_s390_exit(void)
6166  {
6167  	kvm_exit();
6168  
6169  	__kvm_s390_exit();
6170  }
6171  
6172  module_init(kvm_s390_init);
6173  module_exit(kvm_s390_exit);
6174  
6175  /*
6176   * Enable autoloading of the kvm module.
6177   * Note that we add the module alias here instead of virt/kvm/kvm_main.c
6178   * since x86 takes a different approach.
6179   */
6180  #include <linux/miscdevice.h>
6181  MODULE_ALIAS_MISCDEV(KVM_MINOR);
6182  MODULE_ALIAS("devname:kvm");
6183