xref: /linux/arch/s390/kvm/kvm-s390.c (revision dd093fb0)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
10  *               Jason J. Herne <jjherne@us.ibm.com>
11  */
12 
13 #define KMSG_COMPONENT "kvm-s390"
14 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
15 
16 #include <linux/compiler.h>
17 #include <linux/err.h>
18 #include <linux/fs.h>
19 #include <linux/hrtimer.h>
20 #include <linux/init.h>
21 #include <linux/kvm.h>
22 #include <linux/kvm_host.h>
23 #include <linux/mman.h>
24 #include <linux/module.h>
25 #include <linux/moduleparam.h>
26 #include <linux/random.h>
27 #include <linux/slab.h>
28 #include <linux/timer.h>
29 #include <linux/vmalloc.h>
30 #include <linux/bitmap.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/pgtable.h>
34 #include <linux/mmu_notifier.h>
35 
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include <asm/fpu/api.h>
49 #include "kvm-s390.h"
50 #include "gaccess.h"
51 #include "pci.h"
52 
53 #define CREATE_TRACE_POINTS
54 #include "trace.h"
55 #include "trace-s390.h"
56 
57 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
58 #define LOCAL_IRQS 32
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
61 
62 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
63 	KVM_GENERIC_VM_STATS(),
64 	STATS_DESC_COUNTER(VM, inject_io),
65 	STATS_DESC_COUNTER(VM, inject_float_mchk),
66 	STATS_DESC_COUNTER(VM, inject_pfault_done),
67 	STATS_DESC_COUNTER(VM, inject_service_signal),
68 	STATS_DESC_COUNTER(VM, inject_virtio),
69 	STATS_DESC_COUNTER(VM, aen_forward)
70 };
71 
72 const struct kvm_stats_header kvm_vm_stats_header = {
73 	.name_size = KVM_STATS_NAME_SIZE,
74 	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
75 	.id_offset = sizeof(struct kvm_stats_header),
76 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
77 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
78 		       sizeof(kvm_vm_stats_desc),
79 };
80 
81 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
82 	KVM_GENERIC_VCPU_STATS(),
83 	STATS_DESC_COUNTER(VCPU, exit_userspace),
84 	STATS_DESC_COUNTER(VCPU, exit_null),
85 	STATS_DESC_COUNTER(VCPU, exit_external_request),
86 	STATS_DESC_COUNTER(VCPU, exit_io_request),
87 	STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
88 	STATS_DESC_COUNTER(VCPU, exit_stop_request),
89 	STATS_DESC_COUNTER(VCPU, exit_validity),
90 	STATS_DESC_COUNTER(VCPU, exit_instruction),
91 	STATS_DESC_COUNTER(VCPU, exit_pei),
92 	STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
93 	STATS_DESC_COUNTER(VCPU, instruction_lctl),
94 	STATS_DESC_COUNTER(VCPU, instruction_lctlg),
95 	STATS_DESC_COUNTER(VCPU, instruction_stctl),
96 	STATS_DESC_COUNTER(VCPU, instruction_stctg),
97 	STATS_DESC_COUNTER(VCPU, exit_program_interruption),
98 	STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
99 	STATS_DESC_COUNTER(VCPU, exit_operation_exception),
100 	STATS_DESC_COUNTER(VCPU, deliver_ckc),
101 	STATS_DESC_COUNTER(VCPU, deliver_cputm),
102 	STATS_DESC_COUNTER(VCPU, deliver_external_call),
103 	STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
104 	STATS_DESC_COUNTER(VCPU, deliver_service_signal),
105 	STATS_DESC_COUNTER(VCPU, deliver_virtio),
106 	STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
107 	STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
108 	STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
109 	STATS_DESC_COUNTER(VCPU, deliver_program),
110 	STATS_DESC_COUNTER(VCPU, deliver_io),
111 	STATS_DESC_COUNTER(VCPU, deliver_machine_check),
112 	STATS_DESC_COUNTER(VCPU, exit_wait_state),
113 	STATS_DESC_COUNTER(VCPU, inject_ckc),
114 	STATS_DESC_COUNTER(VCPU, inject_cputm),
115 	STATS_DESC_COUNTER(VCPU, inject_external_call),
116 	STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
117 	STATS_DESC_COUNTER(VCPU, inject_mchk),
118 	STATS_DESC_COUNTER(VCPU, inject_pfault_init),
119 	STATS_DESC_COUNTER(VCPU, inject_program),
120 	STATS_DESC_COUNTER(VCPU, inject_restart),
121 	STATS_DESC_COUNTER(VCPU, inject_set_prefix),
122 	STATS_DESC_COUNTER(VCPU, inject_stop_signal),
123 	STATS_DESC_COUNTER(VCPU, instruction_epsw),
124 	STATS_DESC_COUNTER(VCPU, instruction_gs),
125 	STATS_DESC_COUNTER(VCPU, instruction_io_other),
126 	STATS_DESC_COUNTER(VCPU, instruction_lpsw),
127 	STATS_DESC_COUNTER(VCPU, instruction_lpswe),
128 	STATS_DESC_COUNTER(VCPU, instruction_pfmf),
129 	STATS_DESC_COUNTER(VCPU, instruction_ptff),
130 	STATS_DESC_COUNTER(VCPU, instruction_sck),
131 	STATS_DESC_COUNTER(VCPU, instruction_sckpf),
132 	STATS_DESC_COUNTER(VCPU, instruction_stidp),
133 	STATS_DESC_COUNTER(VCPU, instruction_spx),
134 	STATS_DESC_COUNTER(VCPU, instruction_stpx),
135 	STATS_DESC_COUNTER(VCPU, instruction_stap),
136 	STATS_DESC_COUNTER(VCPU, instruction_iske),
137 	STATS_DESC_COUNTER(VCPU, instruction_ri),
138 	STATS_DESC_COUNTER(VCPU, instruction_rrbe),
139 	STATS_DESC_COUNTER(VCPU, instruction_sske),
140 	STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
141 	STATS_DESC_COUNTER(VCPU, instruction_stsi),
142 	STATS_DESC_COUNTER(VCPU, instruction_stfl),
143 	STATS_DESC_COUNTER(VCPU, instruction_tb),
144 	STATS_DESC_COUNTER(VCPU, instruction_tpi),
145 	STATS_DESC_COUNTER(VCPU, instruction_tprot),
146 	STATS_DESC_COUNTER(VCPU, instruction_tsch),
147 	STATS_DESC_COUNTER(VCPU, instruction_sie),
148 	STATS_DESC_COUNTER(VCPU, instruction_essa),
149 	STATS_DESC_COUNTER(VCPU, instruction_sthyi),
150 	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
151 	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
152 	STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
153 	STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
154 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
155 	STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
156 	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
157 	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
158 	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
159 	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
160 	STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
161 	STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
162 	STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
163 	STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
164 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
165 	STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
166 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
167 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
168 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
169 	STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
170 	STATS_DESC_COUNTER(VCPU, diag_9c_forward),
171 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
172 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
173 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
174 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
175 	STATS_DESC_COUNTER(VCPU, pfault_sync)
176 };
177 
178 const struct kvm_stats_header kvm_vcpu_stats_header = {
179 	.name_size = KVM_STATS_NAME_SIZE,
180 	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
181 	.id_offset = sizeof(struct kvm_stats_header),
182 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
183 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
184 		       sizeof(kvm_vcpu_stats_desc),
185 };
186 
187 /* allow nested virtualization in KVM (if enabled by user space) */
188 static int nested;
189 module_param(nested, int, S_IRUGO);
190 MODULE_PARM_DESC(nested, "Nested virtualization support");
191 
192 /* allow 1m huge page guest backing, if !nested */
193 static int hpage;
194 module_param(hpage, int, 0444);
195 MODULE_PARM_DESC(hpage, "1m huge page backing support");
196 
197 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
198 static u8 halt_poll_max_steal = 10;
199 module_param(halt_poll_max_steal, byte, 0644);
200 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
201 
202 /* if set to true, the GISA will be initialized and used if available */
203 static bool use_gisa  = true;
204 module_param(use_gisa, bool, 0644);
205 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
206 
207 /* maximum diag9c forwarding per second */
208 unsigned int diag9c_forwarding_hz;
209 module_param(diag9c_forwarding_hz, uint, 0644);
210 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
211 
212 /*
213  * allow asynchronous deinit for protected guests; enable by default since
214  * the feature is opt-in anyway
215  */
216 static int async_destroy = 1;
217 module_param(async_destroy, int, 0444);
218 MODULE_PARM_DESC(async_destroy, "Asynchronous destroy for protected guests");
219 
220 /*
221  * For now we handle at most 16 double words as this is what the s390 base
222  * kernel handles and stores in the prefix page. If we ever need to go beyond
223  * this, this requires changes to code, but the external uapi can stay.
224  */
225 #define SIZE_INTERNAL 16
226 
227 /*
228  * Base feature mask that defines default mask for facilities. Consists of the
229  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
230  */
231 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
232 /*
233  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
234  * and defines the facilities that can be enabled via a cpu model.
235  */
236 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
237 
238 static unsigned long kvm_s390_fac_size(void)
239 {
240 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
241 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
242 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
243 		sizeof(stfle_fac_list));
244 
245 	return SIZE_INTERNAL;
246 }
247 
248 /* available cpu features supported by kvm */
249 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
250 /* available subfunctions indicated via query / "test bit" */
251 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
252 
253 static struct gmap_notifier gmap_notifier;
254 static struct gmap_notifier vsie_gmap_notifier;
255 debug_info_t *kvm_s390_dbf;
256 debug_info_t *kvm_s390_dbf_uv;
257 
258 /* Section: not file related */
259 int kvm_arch_hardware_enable(void)
260 {
261 	/* every s390 is virtualization enabled ;-) */
262 	return 0;
263 }
264 
265 int kvm_arch_check_processor_compat(void *opaque)
266 {
267 	return 0;
268 }
269 
270 /* forward declarations */
271 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
272 			      unsigned long end);
273 static int sca_switch_to_extended(struct kvm *kvm);
274 
275 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
276 {
277 	u8 delta_idx = 0;
278 
279 	/*
280 	 * The TOD jumps by delta, we have to compensate this by adding
281 	 * -delta to the epoch.
282 	 */
283 	delta = -delta;
284 
285 	/* sign-extension - we're adding to signed values below */
286 	if ((s64)delta < 0)
287 		delta_idx = -1;
288 
289 	scb->epoch += delta;
290 	if (scb->ecd & ECD_MEF) {
291 		scb->epdx += delta_idx;
292 		if (scb->epoch < delta)
293 			scb->epdx += 1;
294 	}
295 }
296 
297 /*
298  * This callback is executed during stop_machine(). All CPUs are therefore
299  * temporarily stopped. In order not to change guest behavior, we have to
300  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
301  * so a CPU won't be stopped while calculating with the epoch.
302  */
303 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
304 			  void *v)
305 {
306 	struct kvm *kvm;
307 	struct kvm_vcpu *vcpu;
308 	unsigned long i;
309 	unsigned long long *delta = v;
310 
311 	list_for_each_entry(kvm, &vm_list, vm_list) {
312 		kvm_for_each_vcpu(i, vcpu, kvm) {
313 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
314 			if (i == 0) {
315 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
316 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
317 			}
318 			if (vcpu->arch.cputm_enabled)
319 				vcpu->arch.cputm_start += *delta;
320 			if (vcpu->arch.vsie_block)
321 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
322 						   *delta);
323 		}
324 	}
325 	return NOTIFY_OK;
326 }
327 
328 static struct notifier_block kvm_clock_notifier = {
329 	.notifier_call = kvm_clock_sync,
330 };
331 
332 int kvm_arch_hardware_setup(void *opaque)
333 {
334 	gmap_notifier.notifier_call = kvm_gmap_notifier;
335 	gmap_register_pte_notifier(&gmap_notifier);
336 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
337 	gmap_register_pte_notifier(&vsie_gmap_notifier);
338 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
339 				       &kvm_clock_notifier);
340 	return 0;
341 }
342 
343 void kvm_arch_hardware_unsetup(void)
344 {
345 	gmap_unregister_pte_notifier(&gmap_notifier);
346 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
347 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
348 					 &kvm_clock_notifier);
349 }
350 
351 static void allow_cpu_feat(unsigned long nr)
352 {
353 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
354 }
355 
356 static inline int plo_test_bit(unsigned char nr)
357 {
358 	unsigned long function = (unsigned long)nr | 0x100;
359 	int cc;
360 
361 	asm volatile(
362 		"	lgr	0,%[function]\n"
363 		/* Parameter registers are ignored for "test bit" */
364 		"	plo	0,0,0,0(0)\n"
365 		"	ipm	%0\n"
366 		"	srl	%0,28\n"
367 		: "=d" (cc)
368 		: [function] "d" (function)
369 		: "cc", "0");
370 	return cc == 0;
371 }
372 
373 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
374 {
375 	asm volatile(
376 		"	lghi	0,0\n"
377 		"	lgr	1,%[query]\n"
378 		/* Parameter registers are ignored */
379 		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
380 		:
381 		: [query] "d" ((unsigned long)query), [opc] "i" (opcode)
382 		: "cc", "memory", "0", "1");
383 }
384 
385 #define INSN_SORTL 0xb938
386 #define INSN_DFLTCC 0xb939
387 
388 static void kvm_s390_cpu_feat_init(void)
389 {
390 	int i;
391 
392 	for (i = 0; i < 256; ++i) {
393 		if (plo_test_bit(i))
394 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
395 	}
396 
397 	if (test_facility(28)) /* TOD-clock steering */
398 		ptff(kvm_s390_available_subfunc.ptff,
399 		     sizeof(kvm_s390_available_subfunc.ptff),
400 		     PTFF_QAF);
401 
402 	if (test_facility(17)) { /* MSA */
403 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
404 			      kvm_s390_available_subfunc.kmac);
405 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
406 			      kvm_s390_available_subfunc.kmc);
407 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
408 			      kvm_s390_available_subfunc.km);
409 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
410 			      kvm_s390_available_subfunc.kimd);
411 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
412 			      kvm_s390_available_subfunc.klmd);
413 	}
414 	if (test_facility(76)) /* MSA3 */
415 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
416 			      kvm_s390_available_subfunc.pckmo);
417 	if (test_facility(77)) { /* MSA4 */
418 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
419 			      kvm_s390_available_subfunc.kmctr);
420 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
421 			      kvm_s390_available_subfunc.kmf);
422 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
423 			      kvm_s390_available_subfunc.kmo);
424 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
425 			      kvm_s390_available_subfunc.pcc);
426 	}
427 	if (test_facility(57)) /* MSA5 */
428 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
429 			      kvm_s390_available_subfunc.ppno);
430 
431 	if (test_facility(146)) /* MSA8 */
432 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
433 			      kvm_s390_available_subfunc.kma);
434 
435 	if (test_facility(155)) /* MSA9 */
436 		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
437 			      kvm_s390_available_subfunc.kdsa);
438 
439 	if (test_facility(150)) /* SORTL */
440 		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
441 
442 	if (test_facility(151)) /* DFLTCC */
443 		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
444 
445 	if (MACHINE_HAS_ESOP)
446 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
447 	/*
448 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
449 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
450 	 */
451 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
452 	    !test_facility(3) || !nested)
453 		return;
454 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
455 	if (sclp.has_64bscao)
456 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
457 	if (sclp.has_siif)
458 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
459 	if (sclp.has_gpere)
460 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
461 	if (sclp.has_gsls)
462 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
463 	if (sclp.has_ib)
464 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
465 	if (sclp.has_cei)
466 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
467 	if (sclp.has_ibs)
468 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
469 	if (sclp.has_kss)
470 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
471 	/*
472 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
473 	 * all skey handling functions read/set the skey from the PGSTE
474 	 * instead of the real storage key.
475 	 *
476 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
477 	 * pages being detected as preserved although they are resident.
478 	 *
479 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
480 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
481 	 *
482 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
483 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
484 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
485 	 *
486 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
487 	 * cannot easily shadow the SCA because of the ipte lock.
488 	 */
489 }
490 
491 int kvm_arch_init(void *opaque)
492 {
493 	int rc = -ENOMEM;
494 
495 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
496 	if (!kvm_s390_dbf)
497 		return -ENOMEM;
498 
499 	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
500 	if (!kvm_s390_dbf_uv)
501 		goto out;
502 
503 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
504 	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
505 		goto out;
506 
507 	kvm_s390_cpu_feat_init();
508 
509 	/* Register floating interrupt controller interface. */
510 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
511 	if (rc) {
512 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
513 		goto out;
514 	}
515 
516 	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
517 		rc = kvm_s390_pci_init();
518 		if (rc) {
519 			pr_err("Unable to allocate AIFT for PCI\n");
520 			goto out;
521 		}
522 	}
523 
524 	rc = kvm_s390_gib_init(GAL_ISC);
525 	if (rc)
526 		goto out;
527 
528 	return 0;
529 
530 out:
531 	kvm_arch_exit();
532 	return rc;
533 }
534 
535 void kvm_arch_exit(void)
536 {
537 	kvm_s390_gib_destroy();
538 	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
539 		kvm_s390_pci_exit();
540 	debug_unregister(kvm_s390_dbf);
541 	debug_unregister(kvm_s390_dbf_uv);
542 }
543 
544 /* Section: device related */
545 long kvm_arch_dev_ioctl(struct file *filp,
546 			unsigned int ioctl, unsigned long arg)
547 {
548 	if (ioctl == KVM_S390_ENABLE_SIE)
549 		return s390_enable_sie();
550 	return -EINVAL;
551 }
552 
553 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
554 {
555 	int r;
556 
557 	switch (ext) {
558 	case KVM_CAP_S390_PSW:
559 	case KVM_CAP_S390_GMAP:
560 	case KVM_CAP_SYNC_MMU:
561 #ifdef CONFIG_KVM_S390_UCONTROL
562 	case KVM_CAP_S390_UCONTROL:
563 #endif
564 	case KVM_CAP_ASYNC_PF:
565 	case KVM_CAP_SYNC_REGS:
566 	case KVM_CAP_ONE_REG:
567 	case KVM_CAP_ENABLE_CAP:
568 	case KVM_CAP_S390_CSS_SUPPORT:
569 	case KVM_CAP_IOEVENTFD:
570 	case KVM_CAP_DEVICE_CTRL:
571 	case KVM_CAP_S390_IRQCHIP:
572 	case KVM_CAP_VM_ATTRIBUTES:
573 	case KVM_CAP_MP_STATE:
574 	case KVM_CAP_IMMEDIATE_EXIT:
575 	case KVM_CAP_S390_INJECT_IRQ:
576 	case KVM_CAP_S390_USER_SIGP:
577 	case KVM_CAP_S390_USER_STSI:
578 	case KVM_CAP_S390_SKEYS:
579 	case KVM_CAP_S390_IRQ_STATE:
580 	case KVM_CAP_S390_USER_INSTR0:
581 	case KVM_CAP_S390_CMMA_MIGRATION:
582 	case KVM_CAP_S390_AIS:
583 	case KVM_CAP_S390_AIS_MIGRATION:
584 	case KVM_CAP_S390_VCPU_RESETS:
585 	case KVM_CAP_SET_GUEST_DEBUG:
586 	case KVM_CAP_S390_DIAG318:
587 	case KVM_CAP_S390_MEM_OP_EXTENSION:
588 		r = 1;
589 		break;
590 	case KVM_CAP_SET_GUEST_DEBUG2:
591 		r = KVM_GUESTDBG_VALID_MASK;
592 		break;
593 	case KVM_CAP_S390_HPAGE_1M:
594 		r = 0;
595 		if (hpage && !kvm_is_ucontrol(kvm))
596 			r = 1;
597 		break;
598 	case KVM_CAP_S390_MEM_OP:
599 		r = MEM_OP_MAX_SIZE;
600 		break;
601 	case KVM_CAP_NR_VCPUS:
602 	case KVM_CAP_MAX_VCPUS:
603 	case KVM_CAP_MAX_VCPU_ID:
604 		r = KVM_S390_BSCA_CPU_SLOTS;
605 		if (!kvm_s390_use_sca_entries())
606 			r = KVM_MAX_VCPUS;
607 		else if (sclp.has_esca && sclp.has_64bscao)
608 			r = KVM_S390_ESCA_CPU_SLOTS;
609 		if (ext == KVM_CAP_NR_VCPUS)
610 			r = min_t(unsigned int, num_online_cpus(), r);
611 		break;
612 	case KVM_CAP_S390_COW:
613 		r = MACHINE_HAS_ESOP;
614 		break;
615 	case KVM_CAP_S390_VECTOR_REGISTERS:
616 		r = MACHINE_HAS_VX;
617 		break;
618 	case KVM_CAP_S390_RI:
619 		r = test_facility(64);
620 		break;
621 	case KVM_CAP_S390_GS:
622 		r = test_facility(133);
623 		break;
624 	case KVM_CAP_S390_BPB:
625 		r = test_facility(82);
626 		break;
627 	case KVM_CAP_S390_PROTECTED_ASYNC_DISABLE:
628 		r = async_destroy && is_prot_virt_host();
629 		break;
630 	case KVM_CAP_S390_PROTECTED:
631 		r = is_prot_virt_host();
632 		break;
633 	case KVM_CAP_S390_PROTECTED_DUMP: {
634 		u64 pv_cmds_dump[] = {
635 			BIT_UVC_CMD_DUMP_INIT,
636 			BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE,
637 			BIT_UVC_CMD_DUMP_CPU,
638 			BIT_UVC_CMD_DUMP_COMPLETE,
639 		};
640 		int i;
641 
642 		r = is_prot_virt_host();
643 
644 		for (i = 0; i < ARRAY_SIZE(pv_cmds_dump); i++) {
645 			if (!test_bit_inv(pv_cmds_dump[i],
646 					  (unsigned long *)&uv_info.inst_calls_list)) {
647 				r = 0;
648 				break;
649 			}
650 		}
651 		break;
652 	}
653 	case KVM_CAP_S390_ZPCI_OP:
654 		r = kvm_s390_pci_interp_allowed();
655 		break;
656 	case KVM_CAP_S390_CPU_TOPOLOGY:
657 		r = test_facility(11);
658 		break;
659 	default:
660 		r = 0;
661 	}
662 	return r;
663 }
664 
665 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
666 {
667 	int i;
668 	gfn_t cur_gfn, last_gfn;
669 	unsigned long gaddr, vmaddr;
670 	struct gmap *gmap = kvm->arch.gmap;
671 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
672 
673 	/* Loop over all guest segments */
674 	cur_gfn = memslot->base_gfn;
675 	last_gfn = memslot->base_gfn + memslot->npages;
676 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
677 		gaddr = gfn_to_gpa(cur_gfn);
678 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
679 		if (kvm_is_error_hva(vmaddr))
680 			continue;
681 
682 		bitmap_zero(bitmap, _PAGE_ENTRIES);
683 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
684 		for (i = 0; i < _PAGE_ENTRIES; i++) {
685 			if (test_bit(i, bitmap))
686 				mark_page_dirty(kvm, cur_gfn + i);
687 		}
688 
689 		if (fatal_signal_pending(current))
690 			return;
691 		cond_resched();
692 	}
693 }
694 
695 /* Section: vm related */
696 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
697 
698 /*
699  * Get (and clear) the dirty memory log for a memory slot.
700  */
701 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
702 			       struct kvm_dirty_log *log)
703 {
704 	int r;
705 	unsigned long n;
706 	struct kvm_memory_slot *memslot;
707 	int is_dirty;
708 
709 	if (kvm_is_ucontrol(kvm))
710 		return -EINVAL;
711 
712 	mutex_lock(&kvm->slots_lock);
713 
714 	r = -EINVAL;
715 	if (log->slot >= KVM_USER_MEM_SLOTS)
716 		goto out;
717 
718 	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
719 	if (r)
720 		goto out;
721 
722 	/* Clear the dirty log */
723 	if (is_dirty) {
724 		n = kvm_dirty_bitmap_bytes(memslot);
725 		memset(memslot->dirty_bitmap, 0, n);
726 	}
727 	r = 0;
728 out:
729 	mutex_unlock(&kvm->slots_lock);
730 	return r;
731 }
732 
733 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
734 {
735 	unsigned long i;
736 	struct kvm_vcpu *vcpu;
737 
738 	kvm_for_each_vcpu(i, vcpu, kvm) {
739 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
740 	}
741 }
742 
743 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
744 {
745 	int r;
746 
747 	if (cap->flags)
748 		return -EINVAL;
749 
750 	switch (cap->cap) {
751 	case KVM_CAP_S390_IRQCHIP:
752 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
753 		kvm->arch.use_irqchip = 1;
754 		r = 0;
755 		break;
756 	case KVM_CAP_S390_USER_SIGP:
757 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
758 		kvm->arch.user_sigp = 1;
759 		r = 0;
760 		break;
761 	case KVM_CAP_S390_VECTOR_REGISTERS:
762 		mutex_lock(&kvm->lock);
763 		if (kvm->created_vcpus) {
764 			r = -EBUSY;
765 		} else if (MACHINE_HAS_VX) {
766 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
767 			set_kvm_facility(kvm->arch.model.fac_list, 129);
768 			if (test_facility(134)) {
769 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
770 				set_kvm_facility(kvm->arch.model.fac_list, 134);
771 			}
772 			if (test_facility(135)) {
773 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
774 				set_kvm_facility(kvm->arch.model.fac_list, 135);
775 			}
776 			if (test_facility(148)) {
777 				set_kvm_facility(kvm->arch.model.fac_mask, 148);
778 				set_kvm_facility(kvm->arch.model.fac_list, 148);
779 			}
780 			if (test_facility(152)) {
781 				set_kvm_facility(kvm->arch.model.fac_mask, 152);
782 				set_kvm_facility(kvm->arch.model.fac_list, 152);
783 			}
784 			if (test_facility(192)) {
785 				set_kvm_facility(kvm->arch.model.fac_mask, 192);
786 				set_kvm_facility(kvm->arch.model.fac_list, 192);
787 			}
788 			r = 0;
789 		} else
790 			r = -EINVAL;
791 		mutex_unlock(&kvm->lock);
792 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
793 			 r ? "(not available)" : "(success)");
794 		break;
795 	case KVM_CAP_S390_RI:
796 		r = -EINVAL;
797 		mutex_lock(&kvm->lock);
798 		if (kvm->created_vcpus) {
799 			r = -EBUSY;
800 		} else if (test_facility(64)) {
801 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
802 			set_kvm_facility(kvm->arch.model.fac_list, 64);
803 			r = 0;
804 		}
805 		mutex_unlock(&kvm->lock);
806 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
807 			 r ? "(not available)" : "(success)");
808 		break;
809 	case KVM_CAP_S390_AIS:
810 		mutex_lock(&kvm->lock);
811 		if (kvm->created_vcpus) {
812 			r = -EBUSY;
813 		} else {
814 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
815 			set_kvm_facility(kvm->arch.model.fac_list, 72);
816 			r = 0;
817 		}
818 		mutex_unlock(&kvm->lock);
819 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
820 			 r ? "(not available)" : "(success)");
821 		break;
822 	case KVM_CAP_S390_GS:
823 		r = -EINVAL;
824 		mutex_lock(&kvm->lock);
825 		if (kvm->created_vcpus) {
826 			r = -EBUSY;
827 		} else if (test_facility(133)) {
828 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
829 			set_kvm_facility(kvm->arch.model.fac_list, 133);
830 			r = 0;
831 		}
832 		mutex_unlock(&kvm->lock);
833 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
834 			 r ? "(not available)" : "(success)");
835 		break;
836 	case KVM_CAP_S390_HPAGE_1M:
837 		mutex_lock(&kvm->lock);
838 		if (kvm->created_vcpus)
839 			r = -EBUSY;
840 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
841 			r = -EINVAL;
842 		else {
843 			r = 0;
844 			mmap_write_lock(kvm->mm);
845 			kvm->mm->context.allow_gmap_hpage_1m = 1;
846 			mmap_write_unlock(kvm->mm);
847 			/*
848 			 * We might have to create fake 4k page
849 			 * tables. To avoid that the hardware works on
850 			 * stale PGSTEs, we emulate these instructions.
851 			 */
852 			kvm->arch.use_skf = 0;
853 			kvm->arch.use_pfmfi = 0;
854 		}
855 		mutex_unlock(&kvm->lock);
856 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
857 			 r ? "(not available)" : "(success)");
858 		break;
859 	case KVM_CAP_S390_USER_STSI:
860 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
861 		kvm->arch.user_stsi = 1;
862 		r = 0;
863 		break;
864 	case KVM_CAP_S390_USER_INSTR0:
865 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
866 		kvm->arch.user_instr0 = 1;
867 		icpt_operexc_on_all_vcpus(kvm);
868 		r = 0;
869 		break;
870 	case KVM_CAP_S390_CPU_TOPOLOGY:
871 		r = -EINVAL;
872 		mutex_lock(&kvm->lock);
873 		if (kvm->created_vcpus) {
874 			r = -EBUSY;
875 		} else if (test_facility(11)) {
876 			set_kvm_facility(kvm->arch.model.fac_mask, 11);
877 			set_kvm_facility(kvm->arch.model.fac_list, 11);
878 			r = 0;
879 		}
880 		mutex_unlock(&kvm->lock);
881 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
882 			 r ? "(not available)" : "(success)");
883 		break;
884 	default:
885 		r = -EINVAL;
886 		break;
887 	}
888 	return r;
889 }
890 
891 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
892 {
893 	int ret;
894 
895 	switch (attr->attr) {
896 	case KVM_S390_VM_MEM_LIMIT_SIZE:
897 		ret = 0;
898 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
899 			 kvm->arch.mem_limit);
900 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
901 			ret = -EFAULT;
902 		break;
903 	default:
904 		ret = -ENXIO;
905 		break;
906 	}
907 	return ret;
908 }
909 
910 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
911 {
912 	int ret;
913 	unsigned int idx;
914 	switch (attr->attr) {
915 	case KVM_S390_VM_MEM_ENABLE_CMMA:
916 		ret = -ENXIO;
917 		if (!sclp.has_cmma)
918 			break;
919 
920 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
921 		mutex_lock(&kvm->lock);
922 		if (kvm->created_vcpus)
923 			ret = -EBUSY;
924 		else if (kvm->mm->context.allow_gmap_hpage_1m)
925 			ret = -EINVAL;
926 		else {
927 			kvm->arch.use_cmma = 1;
928 			/* Not compatible with cmma. */
929 			kvm->arch.use_pfmfi = 0;
930 			ret = 0;
931 		}
932 		mutex_unlock(&kvm->lock);
933 		break;
934 	case KVM_S390_VM_MEM_CLR_CMMA:
935 		ret = -ENXIO;
936 		if (!sclp.has_cmma)
937 			break;
938 		ret = -EINVAL;
939 		if (!kvm->arch.use_cmma)
940 			break;
941 
942 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
943 		mutex_lock(&kvm->lock);
944 		idx = srcu_read_lock(&kvm->srcu);
945 		s390_reset_cmma(kvm->arch.gmap->mm);
946 		srcu_read_unlock(&kvm->srcu, idx);
947 		mutex_unlock(&kvm->lock);
948 		ret = 0;
949 		break;
950 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
951 		unsigned long new_limit;
952 
953 		if (kvm_is_ucontrol(kvm))
954 			return -EINVAL;
955 
956 		if (get_user(new_limit, (u64 __user *)attr->addr))
957 			return -EFAULT;
958 
959 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
960 		    new_limit > kvm->arch.mem_limit)
961 			return -E2BIG;
962 
963 		if (!new_limit)
964 			return -EINVAL;
965 
966 		/* gmap_create takes last usable address */
967 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
968 			new_limit -= 1;
969 
970 		ret = -EBUSY;
971 		mutex_lock(&kvm->lock);
972 		if (!kvm->created_vcpus) {
973 			/* gmap_create will round the limit up */
974 			struct gmap *new = gmap_create(current->mm, new_limit);
975 
976 			if (!new) {
977 				ret = -ENOMEM;
978 			} else {
979 				gmap_remove(kvm->arch.gmap);
980 				new->private = kvm;
981 				kvm->arch.gmap = new;
982 				ret = 0;
983 			}
984 		}
985 		mutex_unlock(&kvm->lock);
986 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
987 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
988 			 (void *) kvm->arch.gmap->asce);
989 		break;
990 	}
991 	default:
992 		ret = -ENXIO;
993 		break;
994 	}
995 	return ret;
996 }
997 
998 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
999 
1000 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
1001 {
1002 	struct kvm_vcpu *vcpu;
1003 	unsigned long i;
1004 
1005 	kvm_s390_vcpu_block_all(kvm);
1006 
1007 	kvm_for_each_vcpu(i, vcpu, kvm) {
1008 		kvm_s390_vcpu_crypto_setup(vcpu);
1009 		/* recreate the shadow crycb by leaving the VSIE handler */
1010 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
1011 	}
1012 
1013 	kvm_s390_vcpu_unblock_all(kvm);
1014 }
1015 
1016 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
1017 {
1018 	mutex_lock(&kvm->lock);
1019 	switch (attr->attr) {
1020 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1021 		if (!test_kvm_facility(kvm, 76)) {
1022 			mutex_unlock(&kvm->lock);
1023 			return -EINVAL;
1024 		}
1025 		get_random_bytes(
1026 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1027 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1028 		kvm->arch.crypto.aes_kw = 1;
1029 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
1030 		break;
1031 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1032 		if (!test_kvm_facility(kvm, 76)) {
1033 			mutex_unlock(&kvm->lock);
1034 			return -EINVAL;
1035 		}
1036 		get_random_bytes(
1037 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1038 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1039 		kvm->arch.crypto.dea_kw = 1;
1040 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
1041 		break;
1042 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1043 		if (!test_kvm_facility(kvm, 76)) {
1044 			mutex_unlock(&kvm->lock);
1045 			return -EINVAL;
1046 		}
1047 		kvm->arch.crypto.aes_kw = 0;
1048 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
1049 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1050 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
1051 		break;
1052 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1053 		if (!test_kvm_facility(kvm, 76)) {
1054 			mutex_unlock(&kvm->lock);
1055 			return -EINVAL;
1056 		}
1057 		kvm->arch.crypto.dea_kw = 0;
1058 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
1059 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1060 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
1061 		break;
1062 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1063 		if (!ap_instructions_available()) {
1064 			mutex_unlock(&kvm->lock);
1065 			return -EOPNOTSUPP;
1066 		}
1067 		kvm->arch.crypto.apie = 1;
1068 		break;
1069 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1070 		if (!ap_instructions_available()) {
1071 			mutex_unlock(&kvm->lock);
1072 			return -EOPNOTSUPP;
1073 		}
1074 		kvm->arch.crypto.apie = 0;
1075 		break;
1076 	default:
1077 		mutex_unlock(&kvm->lock);
1078 		return -ENXIO;
1079 	}
1080 
1081 	kvm_s390_vcpu_crypto_reset_all(kvm);
1082 	mutex_unlock(&kvm->lock);
1083 	return 0;
1084 }
1085 
1086 static void kvm_s390_vcpu_pci_setup(struct kvm_vcpu *vcpu)
1087 {
1088 	/* Only set the ECB bits after guest requests zPCI interpretation */
1089 	if (!vcpu->kvm->arch.use_zpci_interp)
1090 		return;
1091 
1092 	vcpu->arch.sie_block->ecb2 |= ECB2_ZPCI_LSI;
1093 	vcpu->arch.sie_block->ecb3 |= ECB3_AISII + ECB3_AISI;
1094 }
1095 
1096 void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm)
1097 {
1098 	struct kvm_vcpu *vcpu;
1099 	unsigned long i;
1100 
1101 	lockdep_assert_held(&kvm->lock);
1102 
1103 	if (!kvm_s390_pci_interp_allowed())
1104 		return;
1105 
1106 	/*
1107 	 * If host is configured for PCI and the necessary facilities are
1108 	 * available, turn on interpretation for the life of this guest
1109 	 */
1110 	kvm->arch.use_zpci_interp = 1;
1111 
1112 	kvm_s390_vcpu_block_all(kvm);
1113 
1114 	kvm_for_each_vcpu(i, vcpu, kvm) {
1115 		kvm_s390_vcpu_pci_setup(vcpu);
1116 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
1117 	}
1118 
1119 	kvm_s390_vcpu_unblock_all(kvm);
1120 }
1121 
1122 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1123 {
1124 	unsigned long cx;
1125 	struct kvm_vcpu *vcpu;
1126 
1127 	kvm_for_each_vcpu(cx, vcpu, kvm)
1128 		kvm_s390_sync_request(req, vcpu);
1129 }
1130 
1131 /*
1132  * Must be called with kvm->srcu held to avoid races on memslots, and with
1133  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1134  */
1135 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1136 {
1137 	struct kvm_memory_slot *ms;
1138 	struct kvm_memslots *slots;
1139 	unsigned long ram_pages = 0;
1140 	int bkt;
1141 
1142 	/* migration mode already enabled */
1143 	if (kvm->arch.migration_mode)
1144 		return 0;
1145 	slots = kvm_memslots(kvm);
1146 	if (!slots || kvm_memslots_empty(slots))
1147 		return -EINVAL;
1148 
1149 	if (!kvm->arch.use_cmma) {
1150 		kvm->arch.migration_mode = 1;
1151 		return 0;
1152 	}
1153 	/* mark all the pages in active slots as dirty */
1154 	kvm_for_each_memslot(ms, bkt, slots) {
1155 		if (!ms->dirty_bitmap)
1156 			return -EINVAL;
1157 		/*
1158 		 * The second half of the bitmap is only used on x86,
1159 		 * and would be wasted otherwise, so we put it to good
1160 		 * use here to keep track of the state of the storage
1161 		 * attributes.
1162 		 */
1163 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1164 		ram_pages += ms->npages;
1165 	}
1166 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1167 	kvm->arch.migration_mode = 1;
1168 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1169 	return 0;
1170 }
1171 
1172 /*
1173  * Must be called with kvm->slots_lock to avoid races with ourselves and
1174  * kvm_s390_vm_start_migration.
1175  */
1176 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1177 {
1178 	/* migration mode already disabled */
1179 	if (!kvm->arch.migration_mode)
1180 		return 0;
1181 	kvm->arch.migration_mode = 0;
1182 	if (kvm->arch.use_cmma)
1183 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1184 	return 0;
1185 }
1186 
1187 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1188 				     struct kvm_device_attr *attr)
1189 {
1190 	int res = -ENXIO;
1191 
1192 	mutex_lock(&kvm->slots_lock);
1193 	switch (attr->attr) {
1194 	case KVM_S390_VM_MIGRATION_START:
1195 		res = kvm_s390_vm_start_migration(kvm);
1196 		break;
1197 	case KVM_S390_VM_MIGRATION_STOP:
1198 		res = kvm_s390_vm_stop_migration(kvm);
1199 		break;
1200 	default:
1201 		break;
1202 	}
1203 	mutex_unlock(&kvm->slots_lock);
1204 
1205 	return res;
1206 }
1207 
1208 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1209 				     struct kvm_device_attr *attr)
1210 {
1211 	u64 mig = kvm->arch.migration_mode;
1212 
1213 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1214 		return -ENXIO;
1215 
1216 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1217 		return -EFAULT;
1218 	return 0;
1219 }
1220 
1221 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
1222 
1223 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1224 {
1225 	struct kvm_s390_vm_tod_clock gtod;
1226 
1227 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1228 		return -EFAULT;
1229 
1230 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1231 		return -EINVAL;
1232 	__kvm_s390_set_tod_clock(kvm, &gtod);
1233 
1234 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1235 		gtod.epoch_idx, gtod.tod);
1236 
1237 	return 0;
1238 }
1239 
1240 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1241 {
1242 	u8 gtod_high;
1243 
1244 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1245 					   sizeof(gtod_high)))
1246 		return -EFAULT;
1247 
1248 	if (gtod_high != 0)
1249 		return -EINVAL;
1250 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1251 
1252 	return 0;
1253 }
1254 
1255 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1256 {
1257 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1258 
1259 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1260 			   sizeof(gtod.tod)))
1261 		return -EFAULT;
1262 
1263 	__kvm_s390_set_tod_clock(kvm, &gtod);
1264 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1265 	return 0;
1266 }
1267 
1268 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1269 {
1270 	int ret;
1271 
1272 	if (attr->flags)
1273 		return -EINVAL;
1274 
1275 	mutex_lock(&kvm->lock);
1276 	/*
1277 	 * For protected guests, the TOD is managed by the ultravisor, so trying
1278 	 * to change it will never bring the expected results.
1279 	 */
1280 	if (kvm_s390_pv_is_protected(kvm)) {
1281 		ret = -EOPNOTSUPP;
1282 		goto out_unlock;
1283 	}
1284 
1285 	switch (attr->attr) {
1286 	case KVM_S390_VM_TOD_EXT:
1287 		ret = kvm_s390_set_tod_ext(kvm, attr);
1288 		break;
1289 	case KVM_S390_VM_TOD_HIGH:
1290 		ret = kvm_s390_set_tod_high(kvm, attr);
1291 		break;
1292 	case KVM_S390_VM_TOD_LOW:
1293 		ret = kvm_s390_set_tod_low(kvm, attr);
1294 		break;
1295 	default:
1296 		ret = -ENXIO;
1297 		break;
1298 	}
1299 
1300 out_unlock:
1301 	mutex_unlock(&kvm->lock);
1302 	return ret;
1303 }
1304 
1305 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1306 				   struct kvm_s390_vm_tod_clock *gtod)
1307 {
1308 	union tod_clock clk;
1309 
1310 	preempt_disable();
1311 
1312 	store_tod_clock_ext(&clk);
1313 
1314 	gtod->tod = clk.tod + kvm->arch.epoch;
1315 	gtod->epoch_idx = 0;
1316 	if (test_kvm_facility(kvm, 139)) {
1317 		gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1318 		if (gtod->tod < clk.tod)
1319 			gtod->epoch_idx += 1;
1320 	}
1321 
1322 	preempt_enable();
1323 }
1324 
1325 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1326 {
1327 	struct kvm_s390_vm_tod_clock gtod;
1328 
1329 	memset(&gtod, 0, sizeof(gtod));
1330 	kvm_s390_get_tod_clock(kvm, &gtod);
1331 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1332 		return -EFAULT;
1333 
1334 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1335 		gtod.epoch_idx, gtod.tod);
1336 	return 0;
1337 }
1338 
1339 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1340 {
1341 	u8 gtod_high = 0;
1342 
1343 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1344 					 sizeof(gtod_high)))
1345 		return -EFAULT;
1346 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1347 
1348 	return 0;
1349 }
1350 
1351 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1352 {
1353 	u64 gtod;
1354 
1355 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1356 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1357 		return -EFAULT;
1358 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1359 
1360 	return 0;
1361 }
1362 
1363 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1364 {
1365 	int ret;
1366 
1367 	if (attr->flags)
1368 		return -EINVAL;
1369 
1370 	switch (attr->attr) {
1371 	case KVM_S390_VM_TOD_EXT:
1372 		ret = kvm_s390_get_tod_ext(kvm, attr);
1373 		break;
1374 	case KVM_S390_VM_TOD_HIGH:
1375 		ret = kvm_s390_get_tod_high(kvm, attr);
1376 		break;
1377 	case KVM_S390_VM_TOD_LOW:
1378 		ret = kvm_s390_get_tod_low(kvm, attr);
1379 		break;
1380 	default:
1381 		ret = -ENXIO;
1382 		break;
1383 	}
1384 	return ret;
1385 }
1386 
1387 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1388 {
1389 	struct kvm_s390_vm_cpu_processor *proc;
1390 	u16 lowest_ibc, unblocked_ibc;
1391 	int ret = 0;
1392 
1393 	mutex_lock(&kvm->lock);
1394 	if (kvm->created_vcpus) {
1395 		ret = -EBUSY;
1396 		goto out;
1397 	}
1398 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1399 	if (!proc) {
1400 		ret = -ENOMEM;
1401 		goto out;
1402 	}
1403 	if (!copy_from_user(proc, (void __user *)attr->addr,
1404 			    sizeof(*proc))) {
1405 		kvm->arch.model.cpuid = proc->cpuid;
1406 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1407 		unblocked_ibc = sclp.ibc & 0xfff;
1408 		if (lowest_ibc && proc->ibc) {
1409 			if (proc->ibc > unblocked_ibc)
1410 				kvm->arch.model.ibc = unblocked_ibc;
1411 			else if (proc->ibc < lowest_ibc)
1412 				kvm->arch.model.ibc = lowest_ibc;
1413 			else
1414 				kvm->arch.model.ibc = proc->ibc;
1415 		}
1416 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1417 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1418 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1419 			 kvm->arch.model.ibc,
1420 			 kvm->arch.model.cpuid);
1421 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1422 			 kvm->arch.model.fac_list[0],
1423 			 kvm->arch.model.fac_list[1],
1424 			 kvm->arch.model.fac_list[2]);
1425 	} else
1426 		ret = -EFAULT;
1427 	kfree(proc);
1428 out:
1429 	mutex_unlock(&kvm->lock);
1430 	return ret;
1431 }
1432 
1433 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1434 				       struct kvm_device_attr *attr)
1435 {
1436 	struct kvm_s390_vm_cpu_feat data;
1437 
1438 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1439 		return -EFAULT;
1440 	if (!bitmap_subset((unsigned long *) data.feat,
1441 			   kvm_s390_available_cpu_feat,
1442 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1443 		return -EINVAL;
1444 
1445 	mutex_lock(&kvm->lock);
1446 	if (kvm->created_vcpus) {
1447 		mutex_unlock(&kvm->lock);
1448 		return -EBUSY;
1449 	}
1450 	bitmap_from_arr64(kvm->arch.cpu_feat, data.feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1451 	mutex_unlock(&kvm->lock);
1452 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1453 			 data.feat[0],
1454 			 data.feat[1],
1455 			 data.feat[2]);
1456 	return 0;
1457 }
1458 
1459 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1460 					  struct kvm_device_attr *attr)
1461 {
1462 	mutex_lock(&kvm->lock);
1463 	if (kvm->created_vcpus) {
1464 		mutex_unlock(&kvm->lock);
1465 		return -EBUSY;
1466 	}
1467 
1468 	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1469 			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1470 		mutex_unlock(&kvm->lock);
1471 		return -EFAULT;
1472 	}
1473 	mutex_unlock(&kvm->lock);
1474 
1475 	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1476 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1477 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1478 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1479 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1480 	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1481 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1482 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1483 	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1484 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1485 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1486 	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1487 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1488 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1489 	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1490 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1491 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1492 	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1493 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1494 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1495 	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1496 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1497 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1498 	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1499 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1500 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1501 	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1502 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1503 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1504 	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1505 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1506 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1507 	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1508 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1509 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1510 	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1511 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1512 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1513 	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1514 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1515 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1516 	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1517 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1518 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1519 	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1520 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1521 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1522 	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1523 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1524 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1525 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1526 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1527 	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1528 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1529 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1530 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1531 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1532 
1533 	return 0;
1534 }
1535 
1536 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1537 {
1538 	int ret = -ENXIO;
1539 
1540 	switch (attr->attr) {
1541 	case KVM_S390_VM_CPU_PROCESSOR:
1542 		ret = kvm_s390_set_processor(kvm, attr);
1543 		break;
1544 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1545 		ret = kvm_s390_set_processor_feat(kvm, attr);
1546 		break;
1547 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1548 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1549 		break;
1550 	}
1551 	return ret;
1552 }
1553 
1554 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1555 {
1556 	struct kvm_s390_vm_cpu_processor *proc;
1557 	int ret = 0;
1558 
1559 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1560 	if (!proc) {
1561 		ret = -ENOMEM;
1562 		goto out;
1563 	}
1564 	proc->cpuid = kvm->arch.model.cpuid;
1565 	proc->ibc = kvm->arch.model.ibc;
1566 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1567 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1568 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1569 		 kvm->arch.model.ibc,
1570 		 kvm->arch.model.cpuid);
1571 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1572 		 kvm->arch.model.fac_list[0],
1573 		 kvm->arch.model.fac_list[1],
1574 		 kvm->arch.model.fac_list[2]);
1575 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1576 		ret = -EFAULT;
1577 	kfree(proc);
1578 out:
1579 	return ret;
1580 }
1581 
1582 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1583 {
1584 	struct kvm_s390_vm_cpu_machine *mach;
1585 	int ret = 0;
1586 
1587 	mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1588 	if (!mach) {
1589 		ret = -ENOMEM;
1590 		goto out;
1591 	}
1592 	get_cpu_id((struct cpuid *) &mach->cpuid);
1593 	mach->ibc = sclp.ibc;
1594 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1595 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1596 	memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1597 	       sizeof(stfle_fac_list));
1598 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1599 		 kvm->arch.model.ibc,
1600 		 kvm->arch.model.cpuid);
1601 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1602 		 mach->fac_mask[0],
1603 		 mach->fac_mask[1],
1604 		 mach->fac_mask[2]);
1605 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1606 		 mach->fac_list[0],
1607 		 mach->fac_list[1],
1608 		 mach->fac_list[2]);
1609 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1610 		ret = -EFAULT;
1611 	kfree(mach);
1612 out:
1613 	return ret;
1614 }
1615 
1616 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1617 				       struct kvm_device_attr *attr)
1618 {
1619 	struct kvm_s390_vm_cpu_feat data;
1620 
1621 	bitmap_to_arr64(data.feat, kvm->arch.cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1622 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1623 		return -EFAULT;
1624 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1625 			 data.feat[0],
1626 			 data.feat[1],
1627 			 data.feat[2]);
1628 	return 0;
1629 }
1630 
1631 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1632 				     struct kvm_device_attr *attr)
1633 {
1634 	struct kvm_s390_vm_cpu_feat data;
1635 
1636 	bitmap_to_arr64(data.feat, kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1637 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1638 		return -EFAULT;
1639 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1640 			 data.feat[0],
1641 			 data.feat[1],
1642 			 data.feat[2]);
1643 	return 0;
1644 }
1645 
1646 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1647 					  struct kvm_device_attr *attr)
1648 {
1649 	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1650 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1651 		return -EFAULT;
1652 
1653 	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1654 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1655 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1656 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1657 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1658 	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1659 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1660 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1661 	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1662 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1663 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1664 	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1665 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1666 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1667 	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1668 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1669 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1670 	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1671 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1672 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1673 	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1674 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1675 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1676 	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1677 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1678 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1679 	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1680 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1681 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1682 	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1683 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1684 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1685 	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1686 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1687 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1688 	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1689 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1690 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1691 	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1692 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1693 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1694 	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1695 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1696 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1697 	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1698 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1699 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1700 	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1701 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1702 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1703 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1704 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1705 	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1706 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1707 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1708 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1709 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1710 
1711 	return 0;
1712 }
1713 
1714 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1715 					struct kvm_device_attr *attr)
1716 {
1717 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1718 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1719 		return -EFAULT;
1720 
1721 	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1722 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1723 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1724 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1725 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1726 	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1727 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1728 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1729 	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1730 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1731 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1732 	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1733 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1734 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1735 	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1736 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1737 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1738 	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1739 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1740 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1741 	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1742 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1743 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1744 	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1745 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1746 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1747 	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1748 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1749 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1750 	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1751 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1752 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1753 	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1754 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1755 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1756 	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1757 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1758 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1759 	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1760 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1761 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1762 	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1763 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1764 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1765 	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1766 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1767 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1768 	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1769 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1770 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1771 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1772 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1773 	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1774 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1775 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1776 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1777 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1778 
1779 	return 0;
1780 }
1781 
1782 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1783 {
1784 	int ret = -ENXIO;
1785 
1786 	switch (attr->attr) {
1787 	case KVM_S390_VM_CPU_PROCESSOR:
1788 		ret = kvm_s390_get_processor(kvm, attr);
1789 		break;
1790 	case KVM_S390_VM_CPU_MACHINE:
1791 		ret = kvm_s390_get_machine(kvm, attr);
1792 		break;
1793 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1794 		ret = kvm_s390_get_processor_feat(kvm, attr);
1795 		break;
1796 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1797 		ret = kvm_s390_get_machine_feat(kvm, attr);
1798 		break;
1799 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1800 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1801 		break;
1802 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1803 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1804 		break;
1805 	}
1806 	return ret;
1807 }
1808 
1809 /**
1810  * kvm_s390_update_topology_change_report - update CPU topology change report
1811  * @kvm: guest KVM description
1812  * @val: set or clear the MTCR bit
1813  *
1814  * Updates the Multiprocessor Topology-Change-Report bit to signal
1815  * the guest with a topology change.
1816  * This is only relevant if the topology facility is present.
1817  *
1818  * The SCA version, bsca or esca, doesn't matter as offset is the same.
1819  */
1820 static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
1821 {
1822 	union sca_utility new, old;
1823 	struct bsca_block *sca;
1824 
1825 	read_lock(&kvm->arch.sca_lock);
1826 	sca = kvm->arch.sca;
1827 	do {
1828 		old = READ_ONCE(sca->utility);
1829 		new = old;
1830 		new.mtcr = val;
1831 	} while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val);
1832 	read_unlock(&kvm->arch.sca_lock);
1833 }
1834 
1835 static int kvm_s390_set_topo_change_indication(struct kvm *kvm,
1836 					       struct kvm_device_attr *attr)
1837 {
1838 	if (!test_kvm_facility(kvm, 11))
1839 		return -ENXIO;
1840 
1841 	kvm_s390_update_topology_change_report(kvm, !!attr->attr);
1842 	return 0;
1843 }
1844 
1845 static int kvm_s390_get_topo_change_indication(struct kvm *kvm,
1846 					       struct kvm_device_attr *attr)
1847 {
1848 	u8 topo;
1849 
1850 	if (!test_kvm_facility(kvm, 11))
1851 		return -ENXIO;
1852 
1853 	read_lock(&kvm->arch.sca_lock);
1854 	topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr;
1855 	read_unlock(&kvm->arch.sca_lock);
1856 
1857 	return put_user(topo, (u8 __user *)attr->addr);
1858 }
1859 
1860 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1861 {
1862 	int ret;
1863 
1864 	switch (attr->group) {
1865 	case KVM_S390_VM_MEM_CTRL:
1866 		ret = kvm_s390_set_mem_control(kvm, attr);
1867 		break;
1868 	case KVM_S390_VM_TOD:
1869 		ret = kvm_s390_set_tod(kvm, attr);
1870 		break;
1871 	case KVM_S390_VM_CPU_MODEL:
1872 		ret = kvm_s390_set_cpu_model(kvm, attr);
1873 		break;
1874 	case KVM_S390_VM_CRYPTO:
1875 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1876 		break;
1877 	case KVM_S390_VM_MIGRATION:
1878 		ret = kvm_s390_vm_set_migration(kvm, attr);
1879 		break;
1880 	case KVM_S390_VM_CPU_TOPOLOGY:
1881 		ret = kvm_s390_set_topo_change_indication(kvm, attr);
1882 		break;
1883 	default:
1884 		ret = -ENXIO;
1885 		break;
1886 	}
1887 
1888 	return ret;
1889 }
1890 
1891 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1892 {
1893 	int ret;
1894 
1895 	switch (attr->group) {
1896 	case KVM_S390_VM_MEM_CTRL:
1897 		ret = kvm_s390_get_mem_control(kvm, attr);
1898 		break;
1899 	case KVM_S390_VM_TOD:
1900 		ret = kvm_s390_get_tod(kvm, attr);
1901 		break;
1902 	case KVM_S390_VM_CPU_MODEL:
1903 		ret = kvm_s390_get_cpu_model(kvm, attr);
1904 		break;
1905 	case KVM_S390_VM_MIGRATION:
1906 		ret = kvm_s390_vm_get_migration(kvm, attr);
1907 		break;
1908 	case KVM_S390_VM_CPU_TOPOLOGY:
1909 		ret = kvm_s390_get_topo_change_indication(kvm, attr);
1910 		break;
1911 	default:
1912 		ret = -ENXIO;
1913 		break;
1914 	}
1915 
1916 	return ret;
1917 }
1918 
1919 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1920 {
1921 	int ret;
1922 
1923 	switch (attr->group) {
1924 	case KVM_S390_VM_MEM_CTRL:
1925 		switch (attr->attr) {
1926 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1927 		case KVM_S390_VM_MEM_CLR_CMMA:
1928 			ret = sclp.has_cmma ? 0 : -ENXIO;
1929 			break;
1930 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1931 			ret = 0;
1932 			break;
1933 		default:
1934 			ret = -ENXIO;
1935 			break;
1936 		}
1937 		break;
1938 	case KVM_S390_VM_TOD:
1939 		switch (attr->attr) {
1940 		case KVM_S390_VM_TOD_LOW:
1941 		case KVM_S390_VM_TOD_HIGH:
1942 			ret = 0;
1943 			break;
1944 		default:
1945 			ret = -ENXIO;
1946 			break;
1947 		}
1948 		break;
1949 	case KVM_S390_VM_CPU_MODEL:
1950 		switch (attr->attr) {
1951 		case KVM_S390_VM_CPU_PROCESSOR:
1952 		case KVM_S390_VM_CPU_MACHINE:
1953 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1954 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1955 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1956 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1957 			ret = 0;
1958 			break;
1959 		default:
1960 			ret = -ENXIO;
1961 			break;
1962 		}
1963 		break;
1964 	case KVM_S390_VM_CRYPTO:
1965 		switch (attr->attr) {
1966 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1967 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1968 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1969 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1970 			ret = 0;
1971 			break;
1972 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1973 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1974 			ret = ap_instructions_available() ? 0 : -ENXIO;
1975 			break;
1976 		default:
1977 			ret = -ENXIO;
1978 			break;
1979 		}
1980 		break;
1981 	case KVM_S390_VM_MIGRATION:
1982 		ret = 0;
1983 		break;
1984 	case KVM_S390_VM_CPU_TOPOLOGY:
1985 		ret = test_kvm_facility(kvm, 11) ? 0 : -ENXIO;
1986 		break;
1987 	default:
1988 		ret = -ENXIO;
1989 		break;
1990 	}
1991 
1992 	return ret;
1993 }
1994 
1995 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1996 {
1997 	uint8_t *keys;
1998 	uint64_t hva;
1999 	int srcu_idx, i, r = 0;
2000 
2001 	if (args->flags != 0)
2002 		return -EINVAL;
2003 
2004 	/* Is this guest using storage keys? */
2005 	if (!mm_uses_skeys(current->mm))
2006 		return KVM_S390_GET_SKEYS_NONE;
2007 
2008 	/* Enforce sane limit on memory allocation */
2009 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
2010 		return -EINVAL;
2011 
2012 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
2013 	if (!keys)
2014 		return -ENOMEM;
2015 
2016 	mmap_read_lock(current->mm);
2017 	srcu_idx = srcu_read_lock(&kvm->srcu);
2018 	for (i = 0; i < args->count; i++) {
2019 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2020 		if (kvm_is_error_hva(hva)) {
2021 			r = -EFAULT;
2022 			break;
2023 		}
2024 
2025 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
2026 		if (r)
2027 			break;
2028 	}
2029 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2030 	mmap_read_unlock(current->mm);
2031 
2032 	if (!r) {
2033 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
2034 				 sizeof(uint8_t) * args->count);
2035 		if (r)
2036 			r = -EFAULT;
2037 	}
2038 
2039 	kvfree(keys);
2040 	return r;
2041 }
2042 
2043 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
2044 {
2045 	uint8_t *keys;
2046 	uint64_t hva;
2047 	int srcu_idx, i, r = 0;
2048 	bool unlocked;
2049 
2050 	if (args->flags != 0)
2051 		return -EINVAL;
2052 
2053 	/* Enforce sane limit on memory allocation */
2054 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
2055 		return -EINVAL;
2056 
2057 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
2058 	if (!keys)
2059 		return -ENOMEM;
2060 
2061 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
2062 			   sizeof(uint8_t) * args->count);
2063 	if (r) {
2064 		r = -EFAULT;
2065 		goto out;
2066 	}
2067 
2068 	/* Enable storage key handling for the guest */
2069 	r = s390_enable_skey();
2070 	if (r)
2071 		goto out;
2072 
2073 	i = 0;
2074 	mmap_read_lock(current->mm);
2075 	srcu_idx = srcu_read_lock(&kvm->srcu);
2076         while (i < args->count) {
2077 		unlocked = false;
2078 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2079 		if (kvm_is_error_hva(hva)) {
2080 			r = -EFAULT;
2081 			break;
2082 		}
2083 
2084 		/* Lowest order bit is reserved */
2085 		if (keys[i] & 0x01) {
2086 			r = -EINVAL;
2087 			break;
2088 		}
2089 
2090 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
2091 		if (r) {
2092 			r = fixup_user_fault(current->mm, hva,
2093 					     FAULT_FLAG_WRITE, &unlocked);
2094 			if (r)
2095 				break;
2096 		}
2097 		if (!r)
2098 			i++;
2099 	}
2100 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2101 	mmap_read_unlock(current->mm);
2102 out:
2103 	kvfree(keys);
2104 	return r;
2105 }
2106 
2107 /*
2108  * Base address and length must be sent at the start of each block, therefore
2109  * it's cheaper to send some clean data, as long as it's less than the size of
2110  * two longs.
2111  */
2112 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
2113 /* for consistency */
2114 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
2115 
2116 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2117 			      u8 *res, unsigned long bufsize)
2118 {
2119 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
2120 
2121 	args->count = 0;
2122 	while (args->count < bufsize) {
2123 		hva = gfn_to_hva(kvm, cur_gfn);
2124 		/*
2125 		 * We return an error if the first value was invalid, but we
2126 		 * return successfully if at least one value was copied.
2127 		 */
2128 		if (kvm_is_error_hva(hva))
2129 			return args->count ? 0 : -EFAULT;
2130 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2131 			pgstev = 0;
2132 		res[args->count++] = (pgstev >> 24) & 0x43;
2133 		cur_gfn++;
2134 	}
2135 
2136 	return 0;
2137 }
2138 
2139 static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
2140 						     gfn_t gfn)
2141 {
2142 	return ____gfn_to_memslot(slots, gfn, true);
2143 }
2144 
2145 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2146 					      unsigned long cur_gfn)
2147 {
2148 	struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
2149 	unsigned long ofs = cur_gfn - ms->base_gfn;
2150 	struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
2151 
2152 	if (ms->base_gfn + ms->npages <= cur_gfn) {
2153 		mnode = rb_next(mnode);
2154 		/* If we are above the highest slot, wrap around */
2155 		if (!mnode)
2156 			mnode = rb_first(&slots->gfn_tree);
2157 
2158 		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
2159 		ofs = 0;
2160 	}
2161 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2162 	while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
2163 		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
2164 		ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
2165 	}
2166 	return ms->base_gfn + ofs;
2167 }
2168 
2169 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2170 			     u8 *res, unsigned long bufsize)
2171 {
2172 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2173 	struct kvm_memslots *slots = kvm_memslots(kvm);
2174 	struct kvm_memory_slot *ms;
2175 
2176 	if (unlikely(kvm_memslots_empty(slots)))
2177 		return 0;
2178 
2179 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2180 	ms = gfn_to_memslot(kvm, cur_gfn);
2181 	args->count = 0;
2182 	args->start_gfn = cur_gfn;
2183 	if (!ms)
2184 		return 0;
2185 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2186 	mem_end = kvm_s390_get_gfn_end(slots);
2187 
2188 	while (args->count < bufsize) {
2189 		hva = gfn_to_hva(kvm, cur_gfn);
2190 		if (kvm_is_error_hva(hva))
2191 			return 0;
2192 		/* Decrement only if we actually flipped the bit to 0 */
2193 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2194 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2195 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2196 			pgstev = 0;
2197 		/* Save the value */
2198 		res[args->count++] = (pgstev >> 24) & 0x43;
2199 		/* If the next bit is too far away, stop. */
2200 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2201 			return 0;
2202 		/* If we reached the previous "next", find the next one */
2203 		if (cur_gfn == next_gfn)
2204 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2205 		/* Reached the end of memory or of the buffer, stop */
2206 		if ((next_gfn >= mem_end) ||
2207 		    (next_gfn - args->start_gfn >= bufsize))
2208 			return 0;
2209 		cur_gfn++;
2210 		/* Reached the end of the current memslot, take the next one. */
2211 		if (cur_gfn - ms->base_gfn >= ms->npages) {
2212 			ms = gfn_to_memslot(kvm, cur_gfn);
2213 			if (!ms)
2214 				return 0;
2215 		}
2216 	}
2217 	return 0;
2218 }
2219 
2220 /*
2221  * This function searches for the next page with dirty CMMA attributes, and
2222  * saves the attributes in the buffer up to either the end of the buffer or
2223  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2224  * no trailing clean bytes are saved.
2225  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2226  * output buffer will indicate 0 as length.
2227  */
2228 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2229 				  struct kvm_s390_cmma_log *args)
2230 {
2231 	unsigned long bufsize;
2232 	int srcu_idx, peek, ret;
2233 	u8 *values;
2234 
2235 	if (!kvm->arch.use_cmma)
2236 		return -ENXIO;
2237 	/* Invalid/unsupported flags were specified */
2238 	if (args->flags & ~KVM_S390_CMMA_PEEK)
2239 		return -EINVAL;
2240 	/* Migration mode query, and we are not doing a migration */
2241 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2242 	if (!peek && !kvm->arch.migration_mode)
2243 		return -EINVAL;
2244 	/* CMMA is disabled or was not used, or the buffer has length zero */
2245 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2246 	if (!bufsize || !kvm->mm->context.uses_cmm) {
2247 		memset(args, 0, sizeof(*args));
2248 		return 0;
2249 	}
2250 	/* We are not peeking, and there are no dirty pages */
2251 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2252 		memset(args, 0, sizeof(*args));
2253 		return 0;
2254 	}
2255 
2256 	values = vmalloc(bufsize);
2257 	if (!values)
2258 		return -ENOMEM;
2259 
2260 	mmap_read_lock(kvm->mm);
2261 	srcu_idx = srcu_read_lock(&kvm->srcu);
2262 	if (peek)
2263 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2264 	else
2265 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2266 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2267 	mmap_read_unlock(kvm->mm);
2268 
2269 	if (kvm->arch.migration_mode)
2270 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2271 	else
2272 		args->remaining = 0;
2273 
2274 	if (copy_to_user((void __user *)args->values, values, args->count))
2275 		ret = -EFAULT;
2276 
2277 	vfree(values);
2278 	return ret;
2279 }
2280 
2281 /*
2282  * This function sets the CMMA attributes for the given pages. If the input
2283  * buffer has zero length, no action is taken, otherwise the attributes are
2284  * set and the mm->context.uses_cmm flag is set.
2285  */
2286 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2287 				  const struct kvm_s390_cmma_log *args)
2288 {
2289 	unsigned long hva, mask, pgstev, i;
2290 	uint8_t *bits;
2291 	int srcu_idx, r = 0;
2292 
2293 	mask = args->mask;
2294 
2295 	if (!kvm->arch.use_cmma)
2296 		return -ENXIO;
2297 	/* invalid/unsupported flags */
2298 	if (args->flags != 0)
2299 		return -EINVAL;
2300 	/* Enforce sane limit on memory allocation */
2301 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2302 		return -EINVAL;
2303 	/* Nothing to do */
2304 	if (args->count == 0)
2305 		return 0;
2306 
2307 	bits = vmalloc(array_size(sizeof(*bits), args->count));
2308 	if (!bits)
2309 		return -ENOMEM;
2310 
2311 	r = copy_from_user(bits, (void __user *)args->values, args->count);
2312 	if (r) {
2313 		r = -EFAULT;
2314 		goto out;
2315 	}
2316 
2317 	mmap_read_lock(kvm->mm);
2318 	srcu_idx = srcu_read_lock(&kvm->srcu);
2319 	for (i = 0; i < args->count; i++) {
2320 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2321 		if (kvm_is_error_hva(hva)) {
2322 			r = -EFAULT;
2323 			break;
2324 		}
2325 
2326 		pgstev = bits[i];
2327 		pgstev = pgstev << 24;
2328 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2329 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2330 	}
2331 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2332 	mmap_read_unlock(kvm->mm);
2333 
2334 	if (!kvm->mm->context.uses_cmm) {
2335 		mmap_write_lock(kvm->mm);
2336 		kvm->mm->context.uses_cmm = 1;
2337 		mmap_write_unlock(kvm->mm);
2338 	}
2339 out:
2340 	vfree(bits);
2341 	return r;
2342 }
2343 
2344 /**
2345  * kvm_s390_cpus_from_pv - Convert all protected vCPUs in a protected VM to
2346  * non protected.
2347  * @kvm: the VM whose protected vCPUs are to be converted
2348  * @rc: return value for the RC field of the UVC (in case of error)
2349  * @rrc: return value for the RRC field of the UVC (in case of error)
2350  *
2351  * Does not stop in case of error, tries to convert as many
2352  * CPUs as possible. In case of error, the RC and RRC of the last error are
2353  * returned.
2354  *
2355  * Return: 0 in case of success, otherwise -EIO
2356  */
2357 int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2358 {
2359 	struct kvm_vcpu *vcpu;
2360 	unsigned long i;
2361 	u16 _rc, _rrc;
2362 	int ret = 0;
2363 
2364 	/*
2365 	 * We ignore failures and try to destroy as many CPUs as possible.
2366 	 * At the same time we must not free the assigned resources when
2367 	 * this fails, as the ultravisor has still access to that memory.
2368 	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2369 	 * behind.
2370 	 * We want to return the first failure rc and rrc, though.
2371 	 */
2372 	kvm_for_each_vcpu(i, vcpu, kvm) {
2373 		mutex_lock(&vcpu->mutex);
2374 		if (kvm_s390_pv_destroy_cpu(vcpu, &_rc, &_rrc) && !ret) {
2375 			*rc = _rc;
2376 			*rrc = _rrc;
2377 			ret = -EIO;
2378 		}
2379 		mutex_unlock(&vcpu->mutex);
2380 	}
2381 	/* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */
2382 	if (use_gisa)
2383 		kvm_s390_gisa_enable(kvm);
2384 	return ret;
2385 }
2386 
2387 /**
2388  * kvm_s390_cpus_to_pv - Convert all non-protected vCPUs in a protected VM
2389  * to protected.
2390  * @kvm: the VM whose protected vCPUs are to be converted
2391  * @rc: return value for the RC field of the UVC (in case of error)
2392  * @rrc: return value for the RRC field of the UVC (in case of error)
2393  *
2394  * Tries to undo the conversion in case of error.
2395  *
2396  * Return: 0 in case of success, otherwise -EIO
2397  */
2398 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2399 {
2400 	unsigned long i;
2401 	int r = 0;
2402 	u16 dummy;
2403 
2404 	struct kvm_vcpu *vcpu;
2405 
2406 	/* Disable the GISA if the ultravisor does not support AIV. */
2407 	if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
2408 		kvm_s390_gisa_disable(kvm);
2409 
2410 	kvm_for_each_vcpu(i, vcpu, kvm) {
2411 		mutex_lock(&vcpu->mutex);
2412 		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2413 		mutex_unlock(&vcpu->mutex);
2414 		if (r)
2415 			break;
2416 	}
2417 	if (r)
2418 		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2419 	return r;
2420 }
2421 
2422 /*
2423  * Here we provide user space with a direct interface to query UV
2424  * related data like UV maxima and available features as well as
2425  * feature specific data.
2426  *
2427  * To facilitate future extension of the data structures we'll try to
2428  * write data up to the maximum requested length.
2429  */
2430 static ssize_t kvm_s390_handle_pv_info(struct kvm_s390_pv_info *info)
2431 {
2432 	ssize_t len_min;
2433 
2434 	switch (info->header.id) {
2435 	case KVM_PV_INFO_VM: {
2436 		len_min =  sizeof(info->header) + sizeof(info->vm);
2437 
2438 		if (info->header.len_max < len_min)
2439 			return -EINVAL;
2440 
2441 		memcpy(info->vm.inst_calls_list,
2442 		       uv_info.inst_calls_list,
2443 		       sizeof(uv_info.inst_calls_list));
2444 
2445 		/* It's max cpuid not max cpus, so it's off by one */
2446 		info->vm.max_cpus = uv_info.max_guest_cpu_id + 1;
2447 		info->vm.max_guests = uv_info.max_num_sec_conf;
2448 		info->vm.max_guest_addr = uv_info.max_sec_stor_addr;
2449 		info->vm.feature_indication = uv_info.uv_feature_indications;
2450 
2451 		return len_min;
2452 	}
2453 	case KVM_PV_INFO_DUMP: {
2454 		len_min =  sizeof(info->header) + sizeof(info->dump);
2455 
2456 		if (info->header.len_max < len_min)
2457 			return -EINVAL;
2458 
2459 		info->dump.dump_cpu_buffer_len = uv_info.guest_cpu_stor_len;
2460 		info->dump.dump_config_mem_buffer_per_1m = uv_info.conf_dump_storage_state_len;
2461 		info->dump.dump_config_finalize_len = uv_info.conf_dump_finalize_len;
2462 		return len_min;
2463 	}
2464 	default:
2465 		return -EINVAL;
2466 	}
2467 }
2468 
2469 static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd,
2470 			   struct kvm_s390_pv_dmp dmp)
2471 {
2472 	int r = -EINVAL;
2473 	void __user *result_buff = (void __user *)dmp.buff_addr;
2474 
2475 	switch (dmp.subcmd) {
2476 	case KVM_PV_DUMP_INIT: {
2477 		if (kvm->arch.pv.dumping)
2478 			break;
2479 
2480 		/*
2481 		 * Block SIE entry as concurrent dump UVCs could lead
2482 		 * to validities.
2483 		 */
2484 		kvm_s390_vcpu_block_all(kvm);
2485 
2486 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2487 				  UVC_CMD_DUMP_INIT, &cmd->rc, &cmd->rrc);
2488 		KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP INIT: rc %x rrc %x",
2489 			     cmd->rc, cmd->rrc);
2490 		if (!r) {
2491 			kvm->arch.pv.dumping = true;
2492 		} else {
2493 			kvm_s390_vcpu_unblock_all(kvm);
2494 			r = -EINVAL;
2495 		}
2496 		break;
2497 	}
2498 	case KVM_PV_DUMP_CONFIG_STOR_STATE: {
2499 		if (!kvm->arch.pv.dumping)
2500 			break;
2501 
2502 		/*
2503 		 * gaddr is an output parameter since we might stop
2504 		 * early. As dmp will be copied back in our caller, we
2505 		 * don't need to do it ourselves.
2506 		 */
2507 		r = kvm_s390_pv_dump_stor_state(kvm, result_buff, &dmp.gaddr, dmp.buff_len,
2508 						&cmd->rc, &cmd->rrc);
2509 		break;
2510 	}
2511 	case KVM_PV_DUMP_COMPLETE: {
2512 		if (!kvm->arch.pv.dumping)
2513 			break;
2514 
2515 		r = -EINVAL;
2516 		if (dmp.buff_len < uv_info.conf_dump_finalize_len)
2517 			break;
2518 
2519 		r = kvm_s390_pv_dump_complete(kvm, result_buff,
2520 					      &cmd->rc, &cmd->rrc);
2521 		break;
2522 	}
2523 	default:
2524 		r = -ENOTTY;
2525 		break;
2526 	}
2527 
2528 	return r;
2529 }
2530 
2531 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2532 {
2533 	const bool need_lock = (cmd->cmd != KVM_PV_ASYNC_CLEANUP_PERFORM);
2534 	void __user *argp = (void __user *)cmd->data;
2535 	int r = 0;
2536 	u16 dummy;
2537 
2538 	if (need_lock)
2539 		mutex_lock(&kvm->lock);
2540 
2541 	switch (cmd->cmd) {
2542 	case KVM_PV_ENABLE: {
2543 		r = -EINVAL;
2544 		if (kvm_s390_pv_is_protected(kvm))
2545 			break;
2546 
2547 		/*
2548 		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2549 		 *  esca, we need no cleanup in the error cases below
2550 		 */
2551 		r = sca_switch_to_extended(kvm);
2552 		if (r)
2553 			break;
2554 
2555 		mmap_write_lock(current->mm);
2556 		r = gmap_mark_unmergeable();
2557 		mmap_write_unlock(current->mm);
2558 		if (r)
2559 			break;
2560 
2561 		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2562 		if (r)
2563 			break;
2564 
2565 		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2566 		if (r)
2567 			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2568 
2569 		/* we need to block service interrupts from now on */
2570 		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2571 		break;
2572 	}
2573 	case KVM_PV_ASYNC_CLEANUP_PREPARE:
2574 		r = -EINVAL;
2575 		if (!kvm_s390_pv_is_protected(kvm) || !async_destroy)
2576 			break;
2577 
2578 		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2579 		/*
2580 		 * If a CPU could not be destroyed, destroy VM will also fail.
2581 		 * There is no point in trying to destroy it. Instead return
2582 		 * the rc and rrc from the first CPU that failed destroying.
2583 		 */
2584 		if (r)
2585 			break;
2586 		r = kvm_s390_pv_set_aside(kvm, &cmd->rc, &cmd->rrc);
2587 
2588 		/* no need to block service interrupts any more */
2589 		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2590 		break;
2591 	case KVM_PV_ASYNC_CLEANUP_PERFORM:
2592 		r = -EINVAL;
2593 		if (!async_destroy)
2594 			break;
2595 		/* kvm->lock must not be held; this is asserted inside the function. */
2596 		r = kvm_s390_pv_deinit_aside_vm(kvm, &cmd->rc, &cmd->rrc);
2597 		break;
2598 	case KVM_PV_DISABLE: {
2599 		r = -EINVAL;
2600 		if (!kvm_s390_pv_is_protected(kvm))
2601 			break;
2602 
2603 		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2604 		/*
2605 		 * If a CPU could not be destroyed, destroy VM will also fail.
2606 		 * There is no point in trying to destroy it. Instead return
2607 		 * the rc and rrc from the first CPU that failed destroying.
2608 		 */
2609 		if (r)
2610 			break;
2611 		r = kvm_s390_pv_deinit_cleanup_all(kvm, &cmd->rc, &cmd->rrc);
2612 
2613 		/* no need to block service interrupts any more */
2614 		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2615 		break;
2616 	}
2617 	case KVM_PV_SET_SEC_PARMS: {
2618 		struct kvm_s390_pv_sec_parm parms = {};
2619 		void *hdr;
2620 
2621 		r = -EINVAL;
2622 		if (!kvm_s390_pv_is_protected(kvm))
2623 			break;
2624 
2625 		r = -EFAULT;
2626 		if (copy_from_user(&parms, argp, sizeof(parms)))
2627 			break;
2628 
2629 		/* Currently restricted to 8KB */
2630 		r = -EINVAL;
2631 		if (parms.length > PAGE_SIZE * 2)
2632 			break;
2633 
2634 		r = -ENOMEM;
2635 		hdr = vmalloc(parms.length);
2636 		if (!hdr)
2637 			break;
2638 
2639 		r = -EFAULT;
2640 		if (!copy_from_user(hdr, (void __user *)parms.origin,
2641 				    parms.length))
2642 			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2643 						      &cmd->rc, &cmd->rrc);
2644 
2645 		vfree(hdr);
2646 		break;
2647 	}
2648 	case KVM_PV_UNPACK: {
2649 		struct kvm_s390_pv_unp unp = {};
2650 
2651 		r = -EINVAL;
2652 		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2653 			break;
2654 
2655 		r = -EFAULT;
2656 		if (copy_from_user(&unp, argp, sizeof(unp)))
2657 			break;
2658 
2659 		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2660 				       &cmd->rc, &cmd->rrc);
2661 		break;
2662 	}
2663 	case KVM_PV_VERIFY: {
2664 		r = -EINVAL;
2665 		if (!kvm_s390_pv_is_protected(kvm))
2666 			break;
2667 
2668 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2669 				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2670 		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2671 			     cmd->rrc);
2672 		break;
2673 	}
2674 	case KVM_PV_PREP_RESET: {
2675 		r = -EINVAL;
2676 		if (!kvm_s390_pv_is_protected(kvm))
2677 			break;
2678 
2679 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2680 				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2681 		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2682 			     cmd->rc, cmd->rrc);
2683 		break;
2684 	}
2685 	case KVM_PV_UNSHARE_ALL: {
2686 		r = -EINVAL;
2687 		if (!kvm_s390_pv_is_protected(kvm))
2688 			break;
2689 
2690 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2691 				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2692 		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2693 			     cmd->rc, cmd->rrc);
2694 		break;
2695 	}
2696 	case KVM_PV_INFO: {
2697 		struct kvm_s390_pv_info info = {};
2698 		ssize_t data_len;
2699 
2700 		/*
2701 		 * No need to check the VM protection here.
2702 		 *
2703 		 * Maybe user space wants to query some of the data
2704 		 * when the VM is still unprotected. If we see the
2705 		 * need to fence a new data command we can still
2706 		 * return an error in the info handler.
2707 		 */
2708 
2709 		r = -EFAULT;
2710 		if (copy_from_user(&info, argp, sizeof(info.header)))
2711 			break;
2712 
2713 		r = -EINVAL;
2714 		if (info.header.len_max < sizeof(info.header))
2715 			break;
2716 
2717 		data_len = kvm_s390_handle_pv_info(&info);
2718 		if (data_len < 0) {
2719 			r = data_len;
2720 			break;
2721 		}
2722 		/*
2723 		 * If a data command struct is extended (multiple
2724 		 * times) this can be used to determine how much of it
2725 		 * is valid.
2726 		 */
2727 		info.header.len_written = data_len;
2728 
2729 		r = -EFAULT;
2730 		if (copy_to_user(argp, &info, data_len))
2731 			break;
2732 
2733 		r = 0;
2734 		break;
2735 	}
2736 	case KVM_PV_DUMP: {
2737 		struct kvm_s390_pv_dmp dmp;
2738 
2739 		r = -EINVAL;
2740 		if (!kvm_s390_pv_is_protected(kvm))
2741 			break;
2742 
2743 		r = -EFAULT;
2744 		if (copy_from_user(&dmp, argp, sizeof(dmp)))
2745 			break;
2746 
2747 		r = kvm_s390_pv_dmp(kvm, cmd, dmp);
2748 		if (r)
2749 			break;
2750 
2751 		if (copy_to_user(argp, &dmp, sizeof(dmp))) {
2752 			r = -EFAULT;
2753 			break;
2754 		}
2755 
2756 		break;
2757 	}
2758 	default:
2759 		r = -ENOTTY;
2760 	}
2761 	if (need_lock)
2762 		mutex_unlock(&kvm->lock);
2763 
2764 	return r;
2765 }
2766 
2767 static bool access_key_invalid(u8 access_key)
2768 {
2769 	return access_key > 0xf;
2770 }
2771 
2772 static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2773 {
2774 	void __user *uaddr = (void __user *)mop->buf;
2775 	u64 supported_flags;
2776 	void *tmpbuf = NULL;
2777 	int r, srcu_idx;
2778 
2779 	supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION
2780 			  | KVM_S390_MEMOP_F_CHECK_ONLY;
2781 	if (mop->flags & ~supported_flags || !mop->size)
2782 		return -EINVAL;
2783 	if (mop->size > MEM_OP_MAX_SIZE)
2784 		return -E2BIG;
2785 	/*
2786 	 * This is technically a heuristic only, if the kvm->lock is not
2787 	 * taken, it is not guaranteed that the vm is/remains non-protected.
2788 	 * This is ok from a kernel perspective, wrongdoing is detected
2789 	 * on the access, -EFAULT is returned and the vm may crash the
2790 	 * next time it accesses the memory in question.
2791 	 * There is no sane usecase to do switching and a memop on two
2792 	 * different CPUs at the same time.
2793 	 */
2794 	if (kvm_s390_pv_get_handle(kvm))
2795 		return -EINVAL;
2796 	if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
2797 		if (access_key_invalid(mop->key))
2798 			return -EINVAL;
2799 	} else {
2800 		mop->key = 0;
2801 	}
2802 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2803 		tmpbuf = vmalloc(mop->size);
2804 		if (!tmpbuf)
2805 			return -ENOMEM;
2806 	}
2807 
2808 	srcu_idx = srcu_read_lock(&kvm->srcu);
2809 
2810 	if (kvm_is_error_gpa(kvm, mop->gaddr)) {
2811 		r = PGM_ADDRESSING;
2812 		goto out_unlock;
2813 	}
2814 
2815 	switch (mop->op) {
2816 	case KVM_S390_MEMOP_ABSOLUTE_READ: {
2817 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2818 			r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key);
2819 		} else {
2820 			r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2821 						      mop->size, GACC_FETCH, mop->key);
2822 			if (r == 0) {
2823 				if (copy_to_user(uaddr, tmpbuf, mop->size))
2824 					r = -EFAULT;
2825 			}
2826 		}
2827 		break;
2828 	}
2829 	case KVM_S390_MEMOP_ABSOLUTE_WRITE: {
2830 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2831 			r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key);
2832 		} else {
2833 			if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2834 				r = -EFAULT;
2835 				break;
2836 			}
2837 			r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2838 						      mop->size, GACC_STORE, mop->key);
2839 		}
2840 		break;
2841 	}
2842 	default:
2843 		r = -EINVAL;
2844 	}
2845 
2846 out_unlock:
2847 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2848 
2849 	vfree(tmpbuf);
2850 	return r;
2851 }
2852 
2853 long kvm_arch_vm_ioctl(struct file *filp,
2854 		       unsigned int ioctl, unsigned long arg)
2855 {
2856 	struct kvm *kvm = filp->private_data;
2857 	void __user *argp = (void __user *)arg;
2858 	struct kvm_device_attr attr;
2859 	int r;
2860 
2861 	switch (ioctl) {
2862 	case KVM_S390_INTERRUPT: {
2863 		struct kvm_s390_interrupt s390int;
2864 
2865 		r = -EFAULT;
2866 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2867 			break;
2868 		r = kvm_s390_inject_vm(kvm, &s390int);
2869 		break;
2870 	}
2871 	case KVM_CREATE_IRQCHIP: {
2872 		struct kvm_irq_routing_entry routing;
2873 
2874 		r = -EINVAL;
2875 		if (kvm->arch.use_irqchip) {
2876 			/* Set up dummy routing. */
2877 			memset(&routing, 0, sizeof(routing));
2878 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2879 		}
2880 		break;
2881 	}
2882 	case KVM_SET_DEVICE_ATTR: {
2883 		r = -EFAULT;
2884 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2885 			break;
2886 		r = kvm_s390_vm_set_attr(kvm, &attr);
2887 		break;
2888 	}
2889 	case KVM_GET_DEVICE_ATTR: {
2890 		r = -EFAULT;
2891 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2892 			break;
2893 		r = kvm_s390_vm_get_attr(kvm, &attr);
2894 		break;
2895 	}
2896 	case KVM_HAS_DEVICE_ATTR: {
2897 		r = -EFAULT;
2898 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2899 			break;
2900 		r = kvm_s390_vm_has_attr(kvm, &attr);
2901 		break;
2902 	}
2903 	case KVM_S390_GET_SKEYS: {
2904 		struct kvm_s390_skeys args;
2905 
2906 		r = -EFAULT;
2907 		if (copy_from_user(&args, argp,
2908 				   sizeof(struct kvm_s390_skeys)))
2909 			break;
2910 		r = kvm_s390_get_skeys(kvm, &args);
2911 		break;
2912 	}
2913 	case KVM_S390_SET_SKEYS: {
2914 		struct kvm_s390_skeys args;
2915 
2916 		r = -EFAULT;
2917 		if (copy_from_user(&args, argp,
2918 				   sizeof(struct kvm_s390_skeys)))
2919 			break;
2920 		r = kvm_s390_set_skeys(kvm, &args);
2921 		break;
2922 	}
2923 	case KVM_S390_GET_CMMA_BITS: {
2924 		struct kvm_s390_cmma_log args;
2925 
2926 		r = -EFAULT;
2927 		if (copy_from_user(&args, argp, sizeof(args)))
2928 			break;
2929 		mutex_lock(&kvm->slots_lock);
2930 		r = kvm_s390_get_cmma_bits(kvm, &args);
2931 		mutex_unlock(&kvm->slots_lock);
2932 		if (!r) {
2933 			r = copy_to_user(argp, &args, sizeof(args));
2934 			if (r)
2935 				r = -EFAULT;
2936 		}
2937 		break;
2938 	}
2939 	case KVM_S390_SET_CMMA_BITS: {
2940 		struct kvm_s390_cmma_log args;
2941 
2942 		r = -EFAULT;
2943 		if (copy_from_user(&args, argp, sizeof(args)))
2944 			break;
2945 		mutex_lock(&kvm->slots_lock);
2946 		r = kvm_s390_set_cmma_bits(kvm, &args);
2947 		mutex_unlock(&kvm->slots_lock);
2948 		break;
2949 	}
2950 	case KVM_S390_PV_COMMAND: {
2951 		struct kvm_pv_cmd args;
2952 
2953 		/* protvirt means user cpu state */
2954 		kvm_s390_set_user_cpu_state_ctrl(kvm);
2955 		r = 0;
2956 		if (!is_prot_virt_host()) {
2957 			r = -EINVAL;
2958 			break;
2959 		}
2960 		if (copy_from_user(&args, argp, sizeof(args))) {
2961 			r = -EFAULT;
2962 			break;
2963 		}
2964 		if (args.flags) {
2965 			r = -EINVAL;
2966 			break;
2967 		}
2968 		/* must be called without kvm->lock */
2969 		r = kvm_s390_handle_pv(kvm, &args);
2970 		if (copy_to_user(argp, &args, sizeof(args))) {
2971 			r = -EFAULT;
2972 			break;
2973 		}
2974 		break;
2975 	}
2976 	case KVM_S390_MEM_OP: {
2977 		struct kvm_s390_mem_op mem_op;
2978 
2979 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2980 			r = kvm_s390_vm_mem_op(kvm, &mem_op);
2981 		else
2982 			r = -EFAULT;
2983 		break;
2984 	}
2985 	case KVM_S390_ZPCI_OP: {
2986 		struct kvm_s390_zpci_op args;
2987 
2988 		r = -EINVAL;
2989 		if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
2990 			break;
2991 		if (copy_from_user(&args, argp, sizeof(args))) {
2992 			r = -EFAULT;
2993 			break;
2994 		}
2995 		r = kvm_s390_pci_zpci_op(kvm, &args);
2996 		break;
2997 	}
2998 	default:
2999 		r = -ENOTTY;
3000 	}
3001 
3002 	return r;
3003 }
3004 
3005 static int kvm_s390_apxa_installed(void)
3006 {
3007 	struct ap_config_info info;
3008 
3009 	if (ap_instructions_available()) {
3010 		if (ap_qci(&info) == 0)
3011 			return info.apxa;
3012 	}
3013 
3014 	return 0;
3015 }
3016 
3017 /*
3018  * The format of the crypto control block (CRYCB) is specified in the 3 low
3019  * order bits of the CRYCB designation (CRYCBD) field as follows:
3020  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
3021  *	     AP extended addressing (APXA) facility are installed.
3022  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
3023  * Format 2: Both the APXA and MSAX3 facilities are installed
3024  */
3025 static void kvm_s390_set_crycb_format(struct kvm *kvm)
3026 {
3027 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
3028 
3029 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
3030 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
3031 
3032 	/* Check whether MSAX3 is installed */
3033 	if (!test_kvm_facility(kvm, 76))
3034 		return;
3035 
3036 	if (kvm_s390_apxa_installed())
3037 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
3038 	else
3039 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
3040 }
3041 
3042 /*
3043  * kvm_arch_crypto_set_masks
3044  *
3045  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
3046  *	 to be set.
3047  * @apm: the mask identifying the accessible AP adapters
3048  * @aqm: the mask identifying the accessible AP domains
3049  * @adm: the mask identifying the accessible AP control domains
3050  *
3051  * Set the masks that identify the adapters, domains and control domains to
3052  * which the KVM guest is granted access.
3053  *
3054  * Note: The kvm->lock mutex must be locked by the caller before invoking this
3055  *	 function.
3056  */
3057 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
3058 			       unsigned long *aqm, unsigned long *adm)
3059 {
3060 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
3061 
3062 	kvm_s390_vcpu_block_all(kvm);
3063 
3064 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
3065 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
3066 		memcpy(crycb->apcb1.apm, apm, 32);
3067 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
3068 			 apm[0], apm[1], apm[2], apm[3]);
3069 		memcpy(crycb->apcb1.aqm, aqm, 32);
3070 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
3071 			 aqm[0], aqm[1], aqm[2], aqm[3]);
3072 		memcpy(crycb->apcb1.adm, adm, 32);
3073 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
3074 			 adm[0], adm[1], adm[2], adm[3]);
3075 		break;
3076 	case CRYCB_FORMAT1:
3077 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
3078 		memcpy(crycb->apcb0.apm, apm, 8);
3079 		memcpy(crycb->apcb0.aqm, aqm, 2);
3080 		memcpy(crycb->apcb0.adm, adm, 2);
3081 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
3082 			 apm[0], *((unsigned short *)aqm),
3083 			 *((unsigned short *)adm));
3084 		break;
3085 	default:	/* Can not happen */
3086 		break;
3087 	}
3088 
3089 	/* recreate the shadow crycb for each vcpu */
3090 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
3091 	kvm_s390_vcpu_unblock_all(kvm);
3092 }
3093 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
3094 
3095 /*
3096  * kvm_arch_crypto_clear_masks
3097  *
3098  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
3099  *	 to be cleared.
3100  *
3101  * Clear the masks that identify the adapters, domains and control domains to
3102  * which the KVM guest is granted access.
3103  *
3104  * Note: The kvm->lock mutex must be locked by the caller before invoking this
3105  *	 function.
3106  */
3107 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
3108 {
3109 	kvm_s390_vcpu_block_all(kvm);
3110 
3111 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
3112 	       sizeof(kvm->arch.crypto.crycb->apcb0));
3113 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
3114 	       sizeof(kvm->arch.crypto.crycb->apcb1));
3115 
3116 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
3117 	/* recreate the shadow crycb for each vcpu */
3118 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
3119 	kvm_s390_vcpu_unblock_all(kvm);
3120 }
3121 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
3122 
3123 static u64 kvm_s390_get_initial_cpuid(void)
3124 {
3125 	struct cpuid cpuid;
3126 
3127 	get_cpu_id(&cpuid);
3128 	cpuid.version = 0xff;
3129 	return *((u64 *) &cpuid);
3130 }
3131 
3132 static void kvm_s390_crypto_init(struct kvm *kvm)
3133 {
3134 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
3135 	kvm_s390_set_crycb_format(kvm);
3136 	init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
3137 
3138 	if (!test_kvm_facility(kvm, 76))
3139 		return;
3140 
3141 	/* Enable AES/DEA protected key functions by default */
3142 	kvm->arch.crypto.aes_kw = 1;
3143 	kvm->arch.crypto.dea_kw = 1;
3144 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
3145 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
3146 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
3147 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
3148 }
3149 
3150 static void sca_dispose(struct kvm *kvm)
3151 {
3152 	if (kvm->arch.use_esca)
3153 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
3154 	else
3155 		free_page((unsigned long)(kvm->arch.sca));
3156 	kvm->arch.sca = NULL;
3157 }
3158 
3159 void kvm_arch_free_vm(struct kvm *kvm)
3160 {
3161 	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
3162 		kvm_s390_pci_clear_list(kvm);
3163 
3164 	__kvm_arch_free_vm(kvm);
3165 }
3166 
3167 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
3168 {
3169 	gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
3170 	int i, rc;
3171 	char debug_name[16];
3172 	static unsigned long sca_offset;
3173 
3174 	rc = -EINVAL;
3175 #ifdef CONFIG_KVM_S390_UCONTROL
3176 	if (type & ~KVM_VM_S390_UCONTROL)
3177 		goto out_err;
3178 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
3179 		goto out_err;
3180 #else
3181 	if (type)
3182 		goto out_err;
3183 #endif
3184 
3185 	rc = s390_enable_sie();
3186 	if (rc)
3187 		goto out_err;
3188 
3189 	rc = -ENOMEM;
3190 
3191 	if (!sclp.has_64bscao)
3192 		alloc_flags |= GFP_DMA;
3193 	rwlock_init(&kvm->arch.sca_lock);
3194 	/* start with basic SCA */
3195 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
3196 	if (!kvm->arch.sca)
3197 		goto out_err;
3198 	mutex_lock(&kvm_lock);
3199 	sca_offset += 16;
3200 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
3201 		sca_offset = 0;
3202 	kvm->arch.sca = (struct bsca_block *)
3203 			((char *) kvm->arch.sca + sca_offset);
3204 	mutex_unlock(&kvm_lock);
3205 
3206 	sprintf(debug_name, "kvm-%u", current->pid);
3207 
3208 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
3209 	if (!kvm->arch.dbf)
3210 		goto out_err;
3211 
3212 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
3213 	kvm->arch.sie_page2 =
3214 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
3215 	if (!kvm->arch.sie_page2)
3216 		goto out_err;
3217 
3218 	kvm->arch.sie_page2->kvm = kvm;
3219 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
3220 
3221 	for (i = 0; i < kvm_s390_fac_size(); i++) {
3222 		kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
3223 					      (kvm_s390_fac_base[i] |
3224 					       kvm_s390_fac_ext[i]);
3225 		kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
3226 					      kvm_s390_fac_base[i];
3227 	}
3228 	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
3229 
3230 	/* we are always in czam mode - even on pre z14 machines */
3231 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
3232 	set_kvm_facility(kvm->arch.model.fac_list, 138);
3233 	/* we emulate STHYI in kvm */
3234 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
3235 	set_kvm_facility(kvm->arch.model.fac_list, 74);
3236 	if (MACHINE_HAS_TLB_GUEST) {
3237 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
3238 		set_kvm_facility(kvm->arch.model.fac_list, 147);
3239 	}
3240 
3241 	if (css_general_characteristics.aiv && test_facility(65))
3242 		set_kvm_facility(kvm->arch.model.fac_mask, 65);
3243 
3244 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
3245 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
3246 
3247 	kvm_s390_crypto_init(kvm);
3248 
3249 	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
3250 		mutex_lock(&kvm->lock);
3251 		kvm_s390_pci_init_list(kvm);
3252 		kvm_s390_vcpu_pci_enable_interp(kvm);
3253 		mutex_unlock(&kvm->lock);
3254 	}
3255 
3256 	mutex_init(&kvm->arch.float_int.ais_lock);
3257 	spin_lock_init(&kvm->arch.float_int.lock);
3258 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
3259 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
3260 	init_waitqueue_head(&kvm->arch.ipte_wq);
3261 	mutex_init(&kvm->arch.ipte_mutex);
3262 
3263 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
3264 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
3265 
3266 	if (type & KVM_VM_S390_UCONTROL) {
3267 		kvm->arch.gmap = NULL;
3268 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
3269 	} else {
3270 		if (sclp.hamax == U64_MAX)
3271 			kvm->arch.mem_limit = TASK_SIZE_MAX;
3272 		else
3273 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
3274 						    sclp.hamax + 1);
3275 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
3276 		if (!kvm->arch.gmap)
3277 			goto out_err;
3278 		kvm->arch.gmap->private = kvm;
3279 		kvm->arch.gmap->pfault_enabled = 0;
3280 	}
3281 
3282 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
3283 	kvm->arch.use_skf = sclp.has_skey;
3284 	spin_lock_init(&kvm->arch.start_stop_lock);
3285 	kvm_s390_vsie_init(kvm);
3286 	if (use_gisa)
3287 		kvm_s390_gisa_init(kvm);
3288 	INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup);
3289 	kvm->arch.pv.set_aside = NULL;
3290 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
3291 
3292 	return 0;
3293 out_err:
3294 	free_page((unsigned long)kvm->arch.sie_page2);
3295 	debug_unregister(kvm->arch.dbf);
3296 	sca_dispose(kvm);
3297 	KVM_EVENT(3, "creation of vm failed: %d", rc);
3298 	return rc;
3299 }
3300 
3301 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
3302 {
3303 	u16 rc, rrc;
3304 
3305 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
3306 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
3307 	kvm_s390_clear_local_irqs(vcpu);
3308 	kvm_clear_async_pf_completion_queue(vcpu);
3309 	if (!kvm_is_ucontrol(vcpu->kvm))
3310 		sca_del_vcpu(vcpu);
3311 	kvm_s390_update_topology_change_report(vcpu->kvm, 1);
3312 
3313 	if (kvm_is_ucontrol(vcpu->kvm))
3314 		gmap_remove(vcpu->arch.gmap);
3315 
3316 	if (vcpu->kvm->arch.use_cmma)
3317 		kvm_s390_vcpu_unsetup_cmma(vcpu);
3318 	/* We can not hold the vcpu mutex here, we are already dying */
3319 	if (kvm_s390_pv_cpu_get_handle(vcpu))
3320 		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
3321 	free_page((unsigned long)(vcpu->arch.sie_block));
3322 }
3323 
3324 void kvm_arch_destroy_vm(struct kvm *kvm)
3325 {
3326 	u16 rc, rrc;
3327 
3328 	kvm_destroy_vcpus(kvm);
3329 	sca_dispose(kvm);
3330 	kvm_s390_gisa_destroy(kvm);
3331 	/*
3332 	 * We are already at the end of life and kvm->lock is not taken.
3333 	 * This is ok as the file descriptor is closed by now and nobody
3334 	 * can mess with the pv state.
3335 	 */
3336 	kvm_s390_pv_deinit_cleanup_all(kvm, &rc, &rrc);
3337 	/*
3338 	 * Remove the mmu notifier only when the whole KVM VM is torn down,
3339 	 * and only if one was registered to begin with. If the VM is
3340 	 * currently not protected, but has been previously been protected,
3341 	 * then it's possible that the notifier is still registered.
3342 	 */
3343 	if (kvm->arch.pv.mmu_notifier.ops)
3344 		mmu_notifier_unregister(&kvm->arch.pv.mmu_notifier, kvm->mm);
3345 
3346 	debug_unregister(kvm->arch.dbf);
3347 	free_page((unsigned long)kvm->arch.sie_page2);
3348 	if (!kvm_is_ucontrol(kvm))
3349 		gmap_remove(kvm->arch.gmap);
3350 	kvm_s390_destroy_adapters(kvm);
3351 	kvm_s390_clear_float_irqs(kvm);
3352 	kvm_s390_vsie_destroy(kvm);
3353 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
3354 }
3355 
3356 /* Section: vcpu related */
3357 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
3358 {
3359 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
3360 	if (!vcpu->arch.gmap)
3361 		return -ENOMEM;
3362 	vcpu->arch.gmap->private = vcpu->kvm;
3363 
3364 	return 0;
3365 }
3366 
3367 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
3368 {
3369 	if (!kvm_s390_use_sca_entries())
3370 		return;
3371 	read_lock(&vcpu->kvm->arch.sca_lock);
3372 	if (vcpu->kvm->arch.use_esca) {
3373 		struct esca_block *sca = vcpu->kvm->arch.sca;
3374 
3375 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3376 		sca->cpu[vcpu->vcpu_id].sda = 0;
3377 	} else {
3378 		struct bsca_block *sca = vcpu->kvm->arch.sca;
3379 
3380 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3381 		sca->cpu[vcpu->vcpu_id].sda = 0;
3382 	}
3383 	read_unlock(&vcpu->kvm->arch.sca_lock);
3384 }
3385 
3386 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
3387 {
3388 	if (!kvm_s390_use_sca_entries()) {
3389 		phys_addr_t sca_phys = virt_to_phys(vcpu->kvm->arch.sca);
3390 
3391 		/* we still need the basic sca for the ipte control */
3392 		vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3393 		vcpu->arch.sie_block->scaol = sca_phys;
3394 		return;
3395 	}
3396 	read_lock(&vcpu->kvm->arch.sca_lock);
3397 	if (vcpu->kvm->arch.use_esca) {
3398 		struct esca_block *sca = vcpu->kvm->arch.sca;
3399 		phys_addr_t sca_phys = virt_to_phys(sca);
3400 
3401 		sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
3402 		vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3403 		vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK;
3404 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3405 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3406 	} else {
3407 		struct bsca_block *sca = vcpu->kvm->arch.sca;
3408 		phys_addr_t sca_phys = virt_to_phys(sca);
3409 
3410 		sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
3411 		vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3412 		vcpu->arch.sie_block->scaol = sca_phys;
3413 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3414 	}
3415 	read_unlock(&vcpu->kvm->arch.sca_lock);
3416 }
3417 
3418 /* Basic SCA to Extended SCA data copy routines */
3419 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
3420 {
3421 	d->sda = s->sda;
3422 	d->sigp_ctrl.c = s->sigp_ctrl.c;
3423 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
3424 }
3425 
3426 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
3427 {
3428 	int i;
3429 
3430 	d->ipte_control = s->ipte_control;
3431 	d->mcn[0] = s->mcn;
3432 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
3433 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
3434 }
3435 
3436 static int sca_switch_to_extended(struct kvm *kvm)
3437 {
3438 	struct bsca_block *old_sca = kvm->arch.sca;
3439 	struct esca_block *new_sca;
3440 	struct kvm_vcpu *vcpu;
3441 	unsigned long vcpu_idx;
3442 	u32 scaol, scaoh;
3443 	phys_addr_t new_sca_phys;
3444 
3445 	if (kvm->arch.use_esca)
3446 		return 0;
3447 
3448 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
3449 	if (!new_sca)
3450 		return -ENOMEM;
3451 
3452 	new_sca_phys = virt_to_phys(new_sca);
3453 	scaoh = new_sca_phys >> 32;
3454 	scaol = new_sca_phys & ESCA_SCAOL_MASK;
3455 
3456 	kvm_s390_vcpu_block_all(kvm);
3457 	write_lock(&kvm->arch.sca_lock);
3458 
3459 	sca_copy_b_to_e(new_sca, old_sca);
3460 
3461 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
3462 		vcpu->arch.sie_block->scaoh = scaoh;
3463 		vcpu->arch.sie_block->scaol = scaol;
3464 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3465 	}
3466 	kvm->arch.sca = new_sca;
3467 	kvm->arch.use_esca = 1;
3468 
3469 	write_unlock(&kvm->arch.sca_lock);
3470 	kvm_s390_vcpu_unblock_all(kvm);
3471 
3472 	free_page((unsigned long)old_sca);
3473 
3474 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
3475 		 old_sca, kvm->arch.sca);
3476 	return 0;
3477 }
3478 
3479 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
3480 {
3481 	int rc;
3482 
3483 	if (!kvm_s390_use_sca_entries()) {
3484 		if (id < KVM_MAX_VCPUS)
3485 			return true;
3486 		return false;
3487 	}
3488 	if (id < KVM_S390_BSCA_CPU_SLOTS)
3489 		return true;
3490 	if (!sclp.has_esca || !sclp.has_64bscao)
3491 		return false;
3492 
3493 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3494 
3495 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3496 }
3497 
3498 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3499 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3500 {
3501 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3502 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3503 	vcpu->arch.cputm_start = get_tod_clock_fast();
3504 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3505 }
3506 
3507 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3508 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3509 {
3510 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3511 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3512 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3513 	vcpu->arch.cputm_start = 0;
3514 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3515 }
3516 
3517 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3518 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3519 {
3520 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3521 	vcpu->arch.cputm_enabled = true;
3522 	__start_cpu_timer_accounting(vcpu);
3523 }
3524 
3525 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3526 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3527 {
3528 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3529 	__stop_cpu_timer_accounting(vcpu);
3530 	vcpu->arch.cputm_enabled = false;
3531 }
3532 
3533 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3534 {
3535 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3536 	__enable_cpu_timer_accounting(vcpu);
3537 	preempt_enable();
3538 }
3539 
3540 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3541 {
3542 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3543 	__disable_cpu_timer_accounting(vcpu);
3544 	preempt_enable();
3545 }
3546 
3547 /* set the cpu timer - may only be called from the VCPU thread itself */
3548 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3549 {
3550 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3551 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3552 	if (vcpu->arch.cputm_enabled)
3553 		vcpu->arch.cputm_start = get_tod_clock_fast();
3554 	vcpu->arch.sie_block->cputm = cputm;
3555 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3556 	preempt_enable();
3557 }
3558 
3559 /* update and get the cpu timer - can also be called from other VCPU threads */
3560 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3561 {
3562 	unsigned int seq;
3563 	__u64 value;
3564 
3565 	if (unlikely(!vcpu->arch.cputm_enabled))
3566 		return vcpu->arch.sie_block->cputm;
3567 
3568 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3569 	do {
3570 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3571 		/*
3572 		 * If the writer would ever execute a read in the critical
3573 		 * section, e.g. in irq context, we have a deadlock.
3574 		 */
3575 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3576 		value = vcpu->arch.sie_block->cputm;
3577 		/* if cputm_start is 0, accounting is being started/stopped */
3578 		if (likely(vcpu->arch.cputm_start))
3579 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3580 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3581 	preempt_enable();
3582 	return value;
3583 }
3584 
3585 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3586 {
3587 
3588 	gmap_enable(vcpu->arch.enabled_gmap);
3589 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3590 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3591 		__start_cpu_timer_accounting(vcpu);
3592 	vcpu->cpu = cpu;
3593 }
3594 
3595 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3596 {
3597 	vcpu->cpu = -1;
3598 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3599 		__stop_cpu_timer_accounting(vcpu);
3600 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3601 	vcpu->arch.enabled_gmap = gmap_get_enabled();
3602 	gmap_disable(vcpu->arch.enabled_gmap);
3603 
3604 }
3605 
3606 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3607 {
3608 	mutex_lock(&vcpu->kvm->lock);
3609 	preempt_disable();
3610 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3611 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3612 	preempt_enable();
3613 	mutex_unlock(&vcpu->kvm->lock);
3614 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3615 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3616 		sca_add_vcpu(vcpu);
3617 	}
3618 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3619 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3620 	/* make vcpu_load load the right gmap on the first trigger */
3621 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3622 }
3623 
3624 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3625 {
3626 	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3627 	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3628 		return true;
3629 	return false;
3630 }
3631 
3632 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3633 {
3634 	/* At least one ECC subfunction must be present */
3635 	return kvm_has_pckmo_subfunc(kvm, 32) ||
3636 	       kvm_has_pckmo_subfunc(kvm, 33) ||
3637 	       kvm_has_pckmo_subfunc(kvm, 34) ||
3638 	       kvm_has_pckmo_subfunc(kvm, 40) ||
3639 	       kvm_has_pckmo_subfunc(kvm, 41);
3640 
3641 }
3642 
3643 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3644 {
3645 	/*
3646 	 * If the AP instructions are not being interpreted and the MSAX3
3647 	 * facility is not configured for the guest, there is nothing to set up.
3648 	 */
3649 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3650 		return;
3651 
3652 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3653 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3654 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
3655 	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3656 
3657 	if (vcpu->kvm->arch.crypto.apie)
3658 		vcpu->arch.sie_block->eca |= ECA_APIE;
3659 
3660 	/* Set up protected key support */
3661 	if (vcpu->kvm->arch.crypto.aes_kw) {
3662 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3663 		/* ecc is also wrapped with AES key */
3664 		if (kvm_has_pckmo_ecc(vcpu->kvm))
3665 			vcpu->arch.sie_block->ecd |= ECD_ECC;
3666 	}
3667 
3668 	if (vcpu->kvm->arch.crypto.dea_kw)
3669 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3670 }
3671 
3672 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3673 {
3674 	free_page((unsigned long)phys_to_virt(vcpu->arch.sie_block->cbrlo));
3675 	vcpu->arch.sie_block->cbrlo = 0;
3676 }
3677 
3678 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3679 {
3680 	void *cbrlo_page = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
3681 
3682 	if (!cbrlo_page)
3683 		return -ENOMEM;
3684 
3685 	vcpu->arch.sie_block->cbrlo = virt_to_phys(cbrlo_page);
3686 	return 0;
3687 }
3688 
3689 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3690 {
3691 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3692 
3693 	vcpu->arch.sie_block->ibc = model->ibc;
3694 	if (test_kvm_facility(vcpu->kvm, 7))
3695 		vcpu->arch.sie_block->fac = virt_to_phys(model->fac_list);
3696 }
3697 
3698 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3699 {
3700 	int rc = 0;
3701 	u16 uvrc, uvrrc;
3702 
3703 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3704 						    CPUSTAT_SM |
3705 						    CPUSTAT_STOPPED);
3706 
3707 	if (test_kvm_facility(vcpu->kvm, 78))
3708 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3709 	else if (test_kvm_facility(vcpu->kvm, 8))
3710 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3711 
3712 	kvm_s390_vcpu_setup_model(vcpu);
3713 
3714 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3715 	if (MACHINE_HAS_ESOP)
3716 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3717 	if (test_kvm_facility(vcpu->kvm, 9))
3718 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
3719 	if (test_kvm_facility(vcpu->kvm, 11))
3720 		vcpu->arch.sie_block->ecb |= ECB_PTF;
3721 	if (test_kvm_facility(vcpu->kvm, 73))
3722 		vcpu->arch.sie_block->ecb |= ECB_TE;
3723 	if (!kvm_is_ucontrol(vcpu->kvm))
3724 		vcpu->arch.sie_block->ecb |= ECB_SPECI;
3725 
3726 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3727 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3728 	if (test_kvm_facility(vcpu->kvm, 130))
3729 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3730 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3731 	if (sclp.has_cei)
3732 		vcpu->arch.sie_block->eca |= ECA_CEI;
3733 	if (sclp.has_ib)
3734 		vcpu->arch.sie_block->eca |= ECA_IB;
3735 	if (sclp.has_siif)
3736 		vcpu->arch.sie_block->eca |= ECA_SII;
3737 	if (sclp.has_sigpif)
3738 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3739 	if (test_kvm_facility(vcpu->kvm, 129)) {
3740 		vcpu->arch.sie_block->eca |= ECA_VX;
3741 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3742 	}
3743 	if (test_kvm_facility(vcpu->kvm, 139))
3744 		vcpu->arch.sie_block->ecd |= ECD_MEF;
3745 	if (test_kvm_facility(vcpu->kvm, 156))
3746 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3747 	if (vcpu->arch.sie_block->gd) {
3748 		vcpu->arch.sie_block->eca |= ECA_AIV;
3749 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3750 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3751 	}
3752 	vcpu->arch.sie_block->sdnxo = virt_to_phys(&vcpu->run->s.regs.sdnx) | SDNXC;
3753 	vcpu->arch.sie_block->riccbd = virt_to_phys(&vcpu->run->s.regs.riccb);
3754 
3755 	if (sclp.has_kss)
3756 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3757 	else
3758 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3759 
3760 	if (vcpu->kvm->arch.use_cmma) {
3761 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3762 		if (rc)
3763 			return rc;
3764 	}
3765 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3766 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3767 
3768 	vcpu->arch.sie_block->hpid = HPID_KVM;
3769 
3770 	kvm_s390_vcpu_crypto_setup(vcpu);
3771 
3772 	kvm_s390_vcpu_pci_setup(vcpu);
3773 
3774 	mutex_lock(&vcpu->kvm->lock);
3775 	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3776 		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3777 		if (rc)
3778 			kvm_s390_vcpu_unsetup_cmma(vcpu);
3779 	}
3780 	mutex_unlock(&vcpu->kvm->lock);
3781 
3782 	return rc;
3783 }
3784 
3785 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3786 {
3787 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3788 		return -EINVAL;
3789 	return 0;
3790 }
3791 
3792 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3793 {
3794 	struct sie_page *sie_page;
3795 	int rc;
3796 
3797 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3798 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3799 	if (!sie_page)
3800 		return -ENOMEM;
3801 
3802 	vcpu->arch.sie_block = &sie_page->sie_block;
3803 	vcpu->arch.sie_block->itdba = virt_to_phys(&sie_page->itdb);
3804 
3805 	/* the real guest size will always be smaller than msl */
3806 	vcpu->arch.sie_block->mso = 0;
3807 	vcpu->arch.sie_block->msl = sclp.hamax;
3808 
3809 	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3810 	spin_lock_init(&vcpu->arch.local_int.lock);
3811 	vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm);
3812 	seqcount_init(&vcpu->arch.cputm_seqcount);
3813 
3814 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3815 	kvm_clear_async_pf_completion_queue(vcpu);
3816 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3817 				    KVM_SYNC_GPRS |
3818 				    KVM_SYNC_ACRS |
3819 				    KVM_SYNC_CRS |
3820 				    KVM_SYNC_ARCH0 |
3821 				    KVM_SYNC_PFAULT |
3822 				    KVM_SYNC_DIAG318;
3823 	kvm_s390_set_prefix(vcpu, 0);
3824 	if (test_kvm_facility(vcpu->kvm, 64))
3825 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3826 	if (test_kvm_facility(vcpu->kvm, 82))
3827 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3828 	if (test_kvm_facility(vcpu->kvm, 133))
3829 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3830 	if (test_kvm_facility(vcpu->kvm, 156))
3831 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3832 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
3833 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3834 	 */
3835 	if (MACHINE_HAS_VX)
3836 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3837 	else
3838 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3839 
3840 	if (kvm_is_ucontrol(vcpu->kvm)) {
3841 		rc = __kvm_ucontrol_vcpu_init(vcpu);
3842 		if (rc)
3843 			goto out_free_sie_block;
3844 	}
3845 
3846 	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3847 		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3848 	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3849 
3850 	rc = kvm_s390_vcpu_setup(vcpu);
3851 	if (rc)
3852 		goto out_ucontrol_uninit;
3853 
3854 	kvm_s390_update_topology_change_report(vcpu->kvm, 1);
3855 	return 0;
3856 
3857 out_ucontrol_uninit:
3858 	if (kvm_is_ucontrol(vcpu->kvm))
3859 		gmap_remove(vcpu->arch.gmap);
3860 out_free_sie_block:
3861 	free_page((unsigned long)(vcpu->arch.sie_block));
3862 	return rc;
3863 }
3864 
3865 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3866 {
3867 	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3868 	return kvm_s390_vcpu_has_irq(vcpu, 0);
3869 }
3870 
3871 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3872 {
3873 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3874 }
3875 
3876 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3877 {
3878 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3879 	exit_sie(vcpu);
3880 }
3881 
3882 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3883 {
3884 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3885 }
3886 
3887 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3888 {
3889 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3890 	exit_sie(vcpu);
3891 }
3892 
3893 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3894 {
3895 	return atomic_read(&vcpu->arch.sie_block->prog20) &
3896 	       (PROG_BLOCK_SIE | PROG_REQUEST);
3897 }
3898 
3899 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3900 {
3901 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3902 }
3903 
3904 /*
3905  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3906  * If the CPU is not running (e.g. waiting as idle) the function will
3907  * return immediately. */
3908 void exit_sie(struct kvm_vcpu *vcpu)
3909 {
3910 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3911 	kvm_s390_vsie_kick(vcpu);
3912 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3913 		cpu_relax();
3914 }
3915 
3916 /* Kick a guest cpu out of SIE to process a request synchronously */
3917 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3918 {
3919 	__kvm_make_request(req, vcpu);
3920 	kvm_s390_vcpu_request(vcpu);
3921 }
3922 
3923 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3924 			      unsigned long end)
3925 {
3926 	struct kvm *kvm = gmap->private;
3927 	struct kvm_vcpu *vcpu;
3928 	unsigned long prefix;
3929 	unsigned long i;
3930 
3931 	if (gmap_is_shadow(gmap))
3932 		return;
3933 	if (start >= 1UL << 31)
3934 		/* We are only interested in prefix pages */
3935 		return;
3936 	kvm_for_each_vcpu(i, vcpu, kvm) {
3937 		/* match against both prefix pages */
3938 		prefix = kvm_s390_get_prefix(vcpu);
3939 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3940 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3941 				   start, end);
3942 			kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
3943 		}
3944 	}
3945 }
3946 
3947 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3948 {
3949 	/* do not poll with more than halt_poll_max_steal percent of steal time */
3950 	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3951 	    READ_ONCE(halt_poll_max_steal)) {
3952 		vcpu->stat.halt_no_poll_steal++;
3953 		return true;
3954 	}
3955 	return false;
3956 }
3957 
3958 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3959 {
3960 	/* kvm common code refers to this, but never calls it */
3961 	BUG();
3962 	return 0;
3963 }
3964 
3965 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3966 					   struct kvm_one_reg *reg)
3967 {
3968 	int r = -EINVAL;
3969 
3970 	switch (reg->id) {
3971 	case KVM_REG_S390_TODPR:
3972 		r = put_user(vcpu->arch.sie_block->todpr,
3973 			     (u32 __user *)reg->addr);
3974 		break;
3975 	case KVM_REG_S390_EPOCHDIFF:
3976 		r = put_user(vcpu->arch.sie_block->epoch,
3977 			     (u64 __user *)reg->addr);
3978 		break;
3979 	case KVM_REG_S390_CPU_TIMER:
3980 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3981 			     (u64 __user *)reg->addr);
3982 		break;
3983 	case KVM_REG_S390_CLOCK_COMP:
3984 		r = put_user(vcpu->arch.sie_block->ckc,
3985 			     (u64 __user *)reg->addr);
3986 		break;
3987 	case KVM_REG_S390_PFTOKEN:
3988 		r = put_user(vcpu->arch.pfault_token,
3989 			     (u64 __user *)reg->addr);
3990 		break;
3991 	case KVM_REG_S390_PFCOMPARE:
3992 		r = put_user(vcpu->arch.pfault_compare,
3993 			     (u64 __user *)reg->addr);
3994 		break;
3995 	case KVM_REG_S390_PFSELECT:
3996 		r = put_user(vcpu->arch.pfault_select,
3997 			     (u64 __user *)reg->addr);
3998 		break;
3999 	case KVM_REG_S390_PP:
4000 		r = put_user(vcpu->arch.sie_block->pp,
4001 			     (u64 __user *)reg->addr);
4002 		break;
4003 	case KVM_REG_S390_GBEA:
4004 		r = put_user(vcpu->arch.sie_block->gbea,
4005 			     (u64 __user *)reg->addr);
4006 		break;
4007 	default:
4008 		break;
4009 	}
4010 
4011 	return r;
4012 }
4013 
4014 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
4015 					   struct kvm_one_reg *reg)
4016 {
4017 	int r = -EINVAL;
4018 	__u64 val;
4019 
4020 	switch (reg->id) {
4021 	case KVM_REG_S390_TODPR:
4022 		r = get_user(vcpu->arch.sie_block->todpr,
4023 			     (u32 __user *)reg->addr);
4024 		break;
4025 	case KVM_REG_S390_EPOCHDIFF:
4026 		r = get_user(vcpu->arch.sie_block->epoch,
4027 			     (u64 __user *)reg->addr);
4028 		break;
4029 	case KVM_REG_S390_CPU_TIMER:
4030 		r = get_user(val, (u64 __user *)reg->addr);
4031 		if (!r)
4032 			kvm_s390_set_cpu_timer(vcpu, val);
4033 		break;
4034 	case KVM_REG_S390_CLOCK_COMP:
4035 		r = get_user(vcpu->arch.sie_block->ckc,
4036 			     (u64 __user *)reg->addr);
4037 		break;
4038 	case KVM_REG_S390_PFTOKEN:
4039 		r = get_user(vcpu->arch.pfault_token,
4040 			     (u64 __user *)reg->addr);
4041 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4042 			kvm_clear_async_pf_completion_queue(vcpu);
4043 		break;
4044 	case KVM_REG_S390_PFCOMPARE:
4045 		r = get_user(vcpu->arch.pfault_compare,
4046 			     (u64 __user *)reg->addr);
4047 		break;
4048 	case KVM_REG_S390_PFSELECT:
4049 		r = get_user(vcpu->arch.pfault_select,
4050 			     (u64 __user *)reg->addr);
4051 		break;
4052 	case KVM_REG_S390_PP:
4053 		r = get_user(vcpu->arch.sie_block->pp,
4054 			     (u64 __user *)reg->addr);
4055 		break;
4056 	case KVM_REG_S390_GBEA:
4057 		r = get_user(vcpu->arch.sie_block->gbea,
4058 			     (u64 __user *)reg->addr);
4059 		break;
4060 	default:
4061 		break;
4062 	}
4063 
4064 	return r;
4065 }
4066 
4067 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
4068 {
4069 	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
4070 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
4071 	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
4072 
4073 	kvm_clear_async_pf_completion_queue(vcpu);
4074 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
4075 		kvm_s390_vcpu_stop(vcpu);
4076 	kvm_s390_clear_local_irqs(vcpu);
4077 }
4078 
4079 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
4080 {
4081 	/* Initial reset is a superset of the normal reset */
4082 	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4083 
4084 	/*
4085 	 * This equals initial cpu reset in pop, but we don't switch to ESA.
4086 	 * We do not only reset the internal data, but also ...
4087 	 */
4088 	vcpu->arch.sie_block->gpsw.mask = 0;
4089 	vcpu->arch.sie_block->gpsw.addr = 0;
4090 	kvm_s390_set_prefix(vcpu, 0);
4091 	kvm_s390_set_cpu_timer(vcpu, 0);
4092 	vcpu->arch.sie_block->ckc = 0;
4093 	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
4094 	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
4095 	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
4096 
4097 	/* ... the data in sync regs */
4098 	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
4099 	vcpu->run->s.regs.ckc = 0;
4100 	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
4101 	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
4102 	vcpu->run->psw_addr = 0;
4103 	vcpu->run->psw_mask = 0;
4104 	vcpu->run->s.regs.todpr = 0;
4105 	vcpu->run->s.regs.cputm = 0;
4106 	vcpu->run->s.regs.ckc = 0;
4107 	vcpu->run->s.regs.pp = 0;
4108 	vcpu->run->s.regs.gbea = 1;
4109 	vcpu->run->s.regs.fpc = 0;
4110 	/*
4111 	 * Do not reset these registers in the protected case, as some of
4112 	 * them are overlayed and they are not accessible in this case
4113 	 * anyway.
4114 	 */
4115 	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
4116 		vcpu->arch.sie_block->gbea = 1;
4117 		vcpu->arch.sie_block->pp = 0;
4118 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4119 		vcpu->arch.sie_block->todpr = 0;
4120 	}
4121 }
4122 
4123 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
4124 {
4125 	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
4126 
4127 	/* Clear reset is a superset of the initial reset */
4128 	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4129 
4130 	memset(&regs->gprs, 0, sizeof(regs->gprs));
4131 	memset(&regs->vrs, 0, sizeof(regs->vrs));
4132 	memset(&regs->acrs, 0, sizeof(regs->acrs));
4133 	memset(&regs->gscb, 0, sizeof(regs->gscb));
4134 
4135 	regs->etoken = 0;
4136 	regs->etoken_extension = 0;
4137 }
4138 
4139 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4140 {
4141 	vcpu_load(vcpu);
4142 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
4143 	vcpu_put(vcpu);
4144 	return 0;
4145 }
4146 
4147 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4148 {
4149 	vcpu_load(vcpu);
4150 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
4151 	vcpu_put(vcpu);
4152 	return 0;
4153 }
4154 
4155 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4156 				  struct kvm_sregs *sregs)
4157 {
4158 	vcpu_load(vcpu);
4159 
4160 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
4161 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
4162 
4163 	vcpu_put(vcpu);
4164 	return 0;
4165 }
4166 
4167 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
4168 				  struct kvm_sregs *sregs)
4169 {
4170 	vcpu_load(vcpu);
4171 
4172 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
4173 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
4174 
4175 	vcpu_put(vcpu);
4176 	return 0;
4177 }
4178 
4179 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
4180 {
4181 	int ret = 0;
4182 
4183 	vcpu_load(vcpu);
4184 
4185 	if (test_fp_ctl(fpu->fpc)) {
4186 		ret = -EINVAL;
4187 		goto out;
4188 	}
4189 	vcpu->run->s.regs.fpc = fpu->fpc;
4190 	if (MACHINE_HAS_VX)
4191 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
4192 				 (freg_t *) fpu->fprs);
4193 	else
4194 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
4195 
4196 out:
4197 	vcpu_put(vcpu);
4198 	return ret;
4199 }
4200 
4201 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
4202 {
4203 	vcpu_load(vcpu);
4204 
4205 	/* make sure we have the latest values */
4206 	save_fpu_regs();
4207 	if (MACHINE_HAS_VX)
4208 		convert_vx_to_fp((freg_t *) fpu->fprs,
4209 				 (__vector128 *) vcpu->run->s.regs.vrs);
4210 	else
4211 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
4212 	fpu->fpc = vcpu->run->s.regs.fpc;
4213 
4214 	vcpu_put(vcpu);
4215 	return 0;
4216 }
4217 
4218 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
4219 {
4220 	int rc = 0;
4221 
4222 	if (!is_vcpu_stopped(vcpu))
4223 		rc = -EBUSY;
4224 	else {
4225 		vcpu->run->psw_mask = psw.mask;
4226 		vcpu->run->psw_addr = psw.addr;
4227 	}
4228 	return rc;
4229 }
4230 
4231 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
4232 				  struct kvm_translation *tr)
4233 {
4234 	return -EINVAL; /* not implemented yet */
4235 }
4236 
4237 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
4238 			      KVM_GUESTDBG_USE_HW_BP | \
4239 			      KVM_GUESTDBG_ENABLE)
4240 
4241 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
4242 					struct kvm_guest_debug *dbg)
4243 {
4244 	int rc = 0;
4245 
4246 	vcpu_load(vcpu);
4247 
4248 	vcpu->guest_debug = 0;
4249 	kvm_s390_clear_bp_data(vcpu);
4250 
4251 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
4252 		rc = -EINVAL;
4253 		goto out;
4254 	}
4255 	if (!sclp.has_gpere) {
4256 		rc = -EINVAL;
4257 		goto out;
4258 	}
4259 
4260 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
4261 		vcpu->guest_debug = dbg->control;
4262 		/* enforce guest PER */
4263 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
4264 
4265 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
4266 			rc = kvm_s390_import_bp_data(vcpu, dbg);
4267 	} else {
4268 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
4269 		vcpu->arch.guestdbg.last_bp = 0;
4270 	}
4271 
4272 	if (rc) {
4273 		vcpu->guest_debug = 0;
4274 		kvm_s390_clear_bp_data(vcpu);
4275 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
4276 	}
4277 
4278 out:
4279 	vcpu_put(vcpu);
4280 	return rc;
4281 }
4282 
4283 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
4284 				    struct kvm_mp_state *mp_state)
4285 {
4286 	int ret;
4287 
4288 	vcpu_load(vcpu);
4289 
4290 	/* CHECK_STOP and LOAD are not supported yet */
4291 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
4292 				      KVM_MP_STATE_OPERATING;
4293 
4294 	vcpu_put(vcpu);
4295 	return ret;
4296 }
4297 
4298 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
4299 				    struct kvm_mp_state *mp_state)
4300 {
4301 	int rc = 0;
4302 
4303 	vcpu_load(vcpu);
4304 
4305 	/* user space knows about this interface - let it control the state */
4306 	kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
4307 
4308 	switch (mp_state->mp_state) {
4309 	case KVM_MP_STATE_STOPPED:
4310 		rc = kvm_s390_vcpu_stop(vcpu);
4311 		break;
4312 	case KVM_MP_STATE_OPERATING:
4313 		rc = kvm_s390_vcpu_start(vcpu);
4314 		break;
4315 	case KVM_MP_STATE_LOAD:
4316 		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
4317 			rc = -ENXIO;
4318 			break;
4319 		}
4320 		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
4321 		break;
4322 	case KVM_MP_STATE_CHECK_STOP:
4323 		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
4324 	default:
4325 		rc = -ENXIO;
4326 	}
4327 
4328 	vcpu_put(vcpu);
4329 	return rc;
4330 }
4331 
4332 static bool ibs_enabled(struct kvm_vcpu *vcpu)
4333 {
4334 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
4335 }
4336 
4337 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
4338 {
4339 retry:
4340 	kvm_s390_vcpu_request_handled(vcpu);
4341 	if (!kvm_request_pending(vcpu))
4342 		return 0;
4343 	/*
4344 	 * If the guest prefix changed, re-arm the ipte notifier for the
4345 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
4346 	 * This ensures that the ipte instruction for this request has
4347 	 * already finished. We might race against a second unmapper that
4348 	 * wants to set the blocking bit. Lets just retry the request loop.
4349 	 */
4350 	if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
4351 		int rc;
4352 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
4353 					  kvm_s390_get_prefix(vcpu),
4354 					  PAGE_SIZE * 2, PROT_WRITE);
4355 		if (rc) {
4356 			kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
4357 			return rc;
4358 		}
4359 		goto retry;
4360 	}
4361 
4362 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
4363 		vcpu->arch.sie_block->ihcpu = 0xffff;
4364 		goto retry;
4365 	}
4366 
4367 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
4368 		if (!ibs_enabled(vcpu)) {
4369 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
4370 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
4371 		}
4372 		goto retry;
4373 	}
4374 
4375 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
4376 		if (ibs_enabled(vcpu)) {
4377 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
4378 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
4379 		}
4380 		goto retry;
4381 	}
4382 
4383 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
4384 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
4385 		goto retry;
4386 	}
4387 
4388 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
4389 		/*
4390 		 * Disable CMM virtualization; we will emulate the ESSA
4391 		 * instruction manually, in order to provide additional
4392 		 * functionalities needed for live migration.
4393 		 */
4394 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
4395 		goto retry;
4396 	}
4397 
4398 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
4399 		/*
4400 		 * Re-enable CMM virtualization if CMMA is available and
4401 		 * CMM has been used.
4402 		 */
4403 		if ((vcpu->kvm->arch.use_cmma) &&
4404 		    (vcpu->kvm->mm->context.uses_cmm))
4405 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
4406 		goto retry;
4407 	}
4408 
4409 	/* we left the vsie handler, nothing to do, just clear the request */
4410 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
4411 
4412 	return 0;
4413 }
4414 
4415 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4416 {
4417 	struct kvm_vcpu *vcpu;
4418 	union tod_clock clk;
4419 	unsigned long i;
4420 
4421 	preempt_disable();
4422 
4423 	store_tod_clock_ext(&clk);
4424 
4425 	kvm->arch.epoch = gtod->tod - clk.tod;
4426 	kvm->arch.epdx = 0;
4427 	if (test_kvm_facility(kvm, 139)) {
4428 		kvm->arch.epdx = gtod->epoch_idx - clk.ei;
4429 		if (kvm->arch.epoch > gtod->tod)
4430 			kvm->arch.epdx -= 1;
4431 	}
4432 
4433 	kvm_s390_vcpu_block_all(kvm);
4434 	kvm_for_each_vcpu(i, vcpu, kvm) {
4435 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
4436 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
4437 	}
4438 
4439 	kvm_s390_vcpu_unblock_all(kvm);
4440 	preempt_enable();
4441 }
4442 
4443 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4444 {
4445 	if (!mutex_trylock(&kvm->lock))
4446 		return 0;
4447 	__kvm_s390_set_tod_clock(kvm, gtod);
4448 	mutex_unlock(&kvm->lock);
4449 	return 1;
4450 }
4451 
4452 /**
4453  * kvm_arch_fault_in_page - fault-in guest page if necessary
4454  * @vcpu: The corresponding virtual cpu
4455  * @gpa: Guest physical address
4456  * @writable: Whether the page should be writable or not
4457  *
4458  * Make sure that a guest page has been faulted-in on the host.
4459  *
4460  * Return: Zero on success, negative error code otherwise.
4461  */
4462 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
4463 {
4464 	return gmap_fault(vcpu->arch.gmap, gpa,
4465 			  writable ? FAULT_FLAG_WRITE : 0);
4466 }
4467 
4468 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
4469 				      unsigned long token)
4470 {
4471 	struct kvm_s390_interrupt inti;
4472 	struct kvm_s390_irq irq;
4473 
4474 	if (start_token) {
4475 		irq.u.ext.ext_params2 = token;
4476 		irq.type = KVM_S390_INT_PFAULT_INIT;
4477 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
4478 	} else {
4479 		inti.type = KVM_S390_INT_PFAULT_DONE;
4480 		inti.parm64 = token;
4481 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
4482 	}
4483 }
4484 
4485 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
4486 				     struct kvm_async_pf *work)
4487 {
4488 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
4489 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
4490 
4491 	return true;
4492 }
4493 
4494 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
4495 				 struct kvm_async_pf *work)
4496 {
4497 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
4498 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
4499 }
4500 
4501 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
4502 			       struct kvm_async_pf *work)
4503 {
4504 	/* s390 will always inject the page directly */
4505 }
4506 
4507 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4508 {
4509 	/*
4510 	 * s390 will always inject the page directly,
4511 	 * but we still want check_async_completion to cleanup
4512 	 */
4513 	return true;
4514 }
4515 
4516 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4517 {
4518 	hva_t hva;
4519 	struct kvm_arch_async_pf arch;
4520 
4521 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4522 		return false;
4523 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4524 	    vcpu->arch.pfault_compare)
4525 		return false;
4526 	if (psw_extint_disabled(vcpu))
4527 		return false;
4528 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
4529 		return false;
4530 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4531 		return false;
4532 	if (!vcpu->arch.gmap->pfault_enabled)
4533 		return false;
4534 
4535 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4536 	hva += current->thread.gmap_addr & ~PAGE_MASK;
4537 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4538 		return false;
4539 
4540 	return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4541 }
4542 
4543 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4544 {
4545 	int rc, cpuflags;
4546 
4547 	/*
4548 	 * On s390 notifications for arriving pages will be delivered directly
4549 	 * to the guest but the house keeping for completed pfaults is
4550 	 * handled outside the worker.
4551 	 */
4552 	kvm_check_async_pf_completion(vcpu);
4553 
4554 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4555 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4556 
4557 	if (need_resched())
4558 		schedule();
4559 
4560 	if (!kvm_is_ucontrol(vcpu->kvm)) {
4561 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
4562 		if (rc)
4563 			return rc;
4564 	}
4565 
4566 	rc = kvm_s390_handle_requests(vcpu);
4567 	if (rc)
4568 		return rc;
4569 
4570 	if (guestdbg_enabled(vcpu)) {
4571 		kvm_s390_backup_guest_per_regs(vcpu);
4572 		kvm_s390_patch_guest_per_regs(vcpu);
4573 	}
4574 
4575 	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4576 
4577 	vcpu->arch.sie_block->icptcode = 0;
4578 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4579 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4580 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
4581 
4582 	return 0;
4583 }
4584 
4585 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4586 {
4587 	struct kvm_s390_pgm_info pgm_info = {
4588 		.code = PGM_ADDRESSING,
4589 	};
4590 	u8 opcode, ilen;
4591 	int rc;
4592 
4593 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4594 	trace_kvm_s390_sie_fault(vcpu);
4595 
4596 	/*
4597 	 * We want to inject an addressing exception, which is defined as a
4598 	 * suppressing or terminating exception. However, since we came here
4599 	 * by a DAT access exception, the PSW still points to the faulting
4600 	 * instruction since DAT exceptions are nullifying. So we've got
4601 	 * to look up the current opcode to get the length of the instruction
4602 	 * to be able to forward the PSW.
4603 	 */
4604 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4605 	ilen = insn_length(opcode);
4606 	if (rc < 0) {
4607 		return rc;
4608 	} else if (rc) {
4609 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
4610 		 * Forward by arbitrary ilc, injection will take care of
4611 		 * nullification if necessary.
4612 		 */
4613 		pgm_info = vcpu->arch.pgm;
4614 		ilen = 4;
4615 	}
4616 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4617 	kvm_s390_forward_psw(vcpu, ilen);
4618 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4619 }
4620 
4621 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4622 {
4623 	struct mcck_volatile_info *mcck_info;
4624 	struct sie_page *sie_page;
4625 
4626 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4627 		   vcpu->arch.sie_block->icptcode);
4628 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4629 
4630 	if (guestdbg_enabled(vcpu))
4631 		kvm_s390_restore_guest_per_regs(vcpu);
4632 
4633 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4634 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4635 
4636 	if (exit_reason == -EINTR) {
4637 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
4638 		sie_page = container_of(vcpu->arch.sie_block,
4639 					struct sie_page, sie_block);
4640 		mcck_info = &sie_page->mcck_info;
4641 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
4642 		return 0;
4643 	}
4644 
4645 	if (vcpu->arch.sie_block->icptcode > 0) {
4646 		int rc = kvm_handle_sie_intercept(vcpu);
4647 
4648 		if (rc != -EOPNOTSUPP)
4649 			return rc;
4650 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4651 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4652 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4653 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4654 		return -EREMOTE;
4655 	} else if (exit_reason != -EFAULT) {
4656 		vcpu->stat.exit_null++;
4657 		return 0;
4658 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
4659 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4660 		vcpu->run->s390_ucontrol.trans_exc_code =
4661 						current->thread.gmap_addr;
4662 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
4663 		return -EREMOTE;
4664 	} else if (current->thread.gmap_pfault) {
4665 		trace_kvm_s390_major_guest_pfault(vcpu);
4666 		current->thread.gmap_pfault = 0;
4667 		if (kvm_arch_setup_async_pf(vcpu))
4668 			return 0;
4669 		vcpu->stat.pfault_sync++;
4670 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4671 	}
4672 	return vcpu_post_run_fault_in_sie(vcpu);
4673 }
4674 
4675 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4676 static int __vcpu_run(struct kvm_vcpu *vcpu)
4677 {
4678 	int rc, exit_reason;
4679 	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4680 
4681 	/*
4682 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4683 	 * ning the guest), so that memslots (and other stuff) are protected
4684 	 */
4685 	kvm_vcpu_srcu_read_lock(vcpu);
4686 
4687 	do {
4688 		rc = vcpu_pre_run(vcpu);
4689 		if (rc)
4690 			break;
4691 
4692 		kvm_vcpu_srcu_read_unlock(vcpu);
4693 		/*
4694 		 * As PF_VCPU will be used in fault handler, between
4695 		 * guest_enter and guest_exit should be no uaccess.
4696 		 */
4697 		local_irq_disable();
4698 		guest_enter_irqoff();
4699 		__disable_cpu_timer_accounting(vcpu);
4700 		local_irq_enable();
4701 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4702 			memcpy(sie_page->pv_grregs,
4703 			       vcpu->run->s.regs.gprs,
4704 			       sizeof(sie_page->pv_grregs));
4705 		}
4706 		if (test_cpu_flag(CIF_FPU))
4707 			load_fpu_regs();
4708 		exit_reason = sie64a(vcpu->arch.sie_block,
4709 				     vcpu->run->s.regs.gprs);
4710 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4711 			memcpy(vcpu->run->s.regs.gprs,
4712 			       sie_page->pv_grregs,
4713 			       sizeof(sie_page->pv_grregs));
4714 			/*
4715 			 * We're not allowed to inject interrupts on intercepts
4716 			 * that leave the guest state in an "in-between" state
4717 			 * where the next SIE entry will do a continuation.
4718 			 * Fence interrupts in our "internal" PSW.
4719 			 */
4720 			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4721 			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4722 				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4723 			}
4724 		}
4725 		local_irq_disable();
4726 		__enable_cpu_timer_accounting(vcpu);
4727 		guest_exit_irqoff();
4728 		local_irq_enable();
4729 		kvm_vcpu_srcu_read_lock(vcpu);
4730 
4731 		rc = vcpu_post_run(vcpu, exit_reason);
4732 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4733 
4734 	kvm_vcpu_srcu_read_unlock(vcpu);
4735 	return rc;
4736 }
4737 
4738 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4739 {
4740 	struct kvm_run *kvm_run = vcpu->run;
4741 	struct runtime_instr_cb *riccb;
4742 	struct gs_cb *gscb;
4743 
4744 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4745 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4746 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4747 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4748 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4749 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4750 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4751 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4752 	}
4753 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4754 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4755 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4756 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4757 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4758 			kvm_clear_async_pf_completion_queue(vcpu);
4759 	}
4760 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4761 		vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4762 		vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4763 		VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
4764 	}
4765 	/*
4766 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
4767 	 * we should enable RI here instead of doing the lazy enablement.
4768 	 */
4769 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4770 	    test_kvm_facility(vcpu->kvm, 64) &&
4771 	    riccb->v &&
4772 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4773 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4774 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4775 	}
4776 	/*
4777 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
4778 	 * we should enable GS here instead of doing the lazy enablement.
4779 	 */
4780 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4781 	    test_kvm_facility(vcpu->kvm, 133) &&
4782 	    gscb->gssm &&
4783 	    !vcpu->arch.gs_enabled) {
4784 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4785 		vcpu->arch.sie_block->ecb |= ECB_GS;
4786 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4787 		vcpu->arch.gs_enabled = 1;
4788 	}
4789 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4790 	    test_kvm_facility(vcpu->kvm, 82)) {
4791 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4792 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4793 	}
4794 	if (MACHINE_HAS_GS) {
4795 		preempt_disable();
4796 		__ctl_set_bit(2, 4);
4797 		if (current->thread.gs_cb) {
4798 			vcpu->arch.host_gscb = current->thread.gs_cb;
4799 			save_gs_cb(vcpu->arch.host_gscb);
4800 		}
4801 		if (vcpu->arch.gs_enabled) {
4802 			current->thread.gs_cb = (struct gs_cb *)
4803 						&vcpu->run->s.regs.gscb;
4804 			restore_gs_cb(current->thread.gs_cb);
4805 		}
4806 		preempt_enable();
4807 	}
4808 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
4809 }
4810 
4811 static void sync_regs(struct kvm_vcpu *vcpu)
4812 {
4813 	struct kvm_run *kvm_run = vcpu->run;
4814 
4815 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4816 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4817 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4818 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4819 		/* some control register changes require a tlb flush */
4820 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4821 	}
4822 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4823 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4824 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4825 	}
4826 	save_access_regs(vcpu->arch.host_acrs);
4827 	restore_access_regs(vcpu->run->s.regs.acrs);
4828 	/* save host (userspace) fprs/vrs */
4829 	save_fpu_regs();
4830 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4831 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4832 	if (MACHINE_HAS_VX)
4833 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4834 	else
4835 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4836 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4837 	if (test_fp_ctl(current->thread.fpu.fpc))
4838 		/* User space provided an invalid FPC, let's clear it */
4839 		current->thread.fpu.fpc = 0;
4840 
4841 	/* Sync fmt2 only data */
4842 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4843 		sync_regs_fmt2(vcpu);
4844 	} else {
4845 		/*
4846 		 * In several places we have to modify our internal view to
4847 		 * not do things that are disallowed by the ultravisor. For
4848 		 * example we must not inject interrupts after specific exits
4849 		 * (e.g. 112 prefix page not secure). We do this by turning
4850 		 * off the machine check, external and I/O interrupt bits
4851 		 * of our PSW copy. To avoid getting validity intercepts, we
4852 		 * do only accept the condition code from userspace.
4853 		 */
4854 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4855 		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4856 						   PSW_MASK_CC;
4857 	}
4858 
4859 	kvm_run->kvm_dirty_regs = 0;
4860 }
4861 
4862 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4863 {
4864 	struct kvm_run *kvm_run = vcpu->run;
4865 
4866 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4867 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4868 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4869 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4870 	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4871 	if (MACHINE_HAS_GS) {
4872 		preempt_disable();
4873 		__ctl_set_bit(2, 4);
4874 		if (vcpu->arch.gs_enabled)
4875 			save_gs_cb(current->thread.gs_cb);
4876 		current->thread.gs_cb = vcpu->arch.host_gscb;
4877 		restore_gs_cb(vcpu->arch.host_gscb);
4878 		if (!vcpu->arch.host_gscb)
4879 			__ctl_clear_bit(2, 4);
4880 		vcpu->arch.host_gscb = NULL;
4881 		preempt_enable();
4882 	}
4883 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
4884 }
4885 
4886 static void store_regs(struct kvm_vcpu *vcpu)
4887 {
4888 	struct kvm_run *kvm_run = vcpu->run;
4889 
4890 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4891 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4892 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4893 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4894 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4895 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4896 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4897 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4898 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4899 	save_access_regs(vcpu->run->s.regs.acrs);
4900 	restore_access_regs(vcpu->arch.host_acrs);
4901 	/* Save guest register state */
4902 	save_fpu_regs();
4903 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4904 	/* Restore will be done lazily at return */
4905 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4906 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4907 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4908 		store_regs_fmt2(vcpu);
4909 }
4910 
4911 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4912 {
4913 	struct kvm_run *kvm_run = vcpu->run;
4914 	int rc;
4915 
4916 	/*
4917 	 * Running a VM while dumping always has the potential to
4918 	 * produce inconsistent dump data. But for PV vcpus a SIE
4919 	 * entry while dumping could also lead to a fatal validity
4920 	 * intercept which we absolutely want to avoid.
4921 	 */
4922 	if (vcpu->kvm->arch.pv.dumping)
4923 		return -EINVAL;
4924 
4925 	if (kvm_run->immediate_exit)
4926 		return -EINTR;
4927 
4928 	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4929 	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4930 		return -EINVAL;
4931 
4932 	vcpu_load(vcpu);
4933 
4934 	if (guestdbg_exit_pending(vcpu)) {
4935 		kvm_s390_prepare_debug_exit(vcpu);
4936 		rc = 0;
4937 		goto out;
4938 	}
4939 
4940 	kvm_sigset_activate(vcpu);
4941 
4942 	/*
4943 	 * no need to check the return value of vcpu_start as it can only have
4944 	 * an error for protvirt, but protvirt means user cpu state
4945 	 */
4946 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4947 		kvm_s390_vcpu_start(vcpu);
4948 	} else if (is_vcpu_stopped(vcpu)) {
4949 		pr_err_ratelimited("can't run stopped vcpu %d\n",
4950 				   vcpu->vcpu_id);
4951 		rc = -EINVAL;
4952 		goto out;
4953 	}
4954 
4955 	sync_regs(vcpu);
4956 	enable_cpu_timer_accounting(vcpu);
4957 
4958 	might_fault();
4959 	rc = __vcpu_run(vcpu);
4960 
4961 	if (signal_pending(current) && !rc) {
4962 		kvm_run->exit_reason = KVM_EXIT_INTR;
4963 		rc = -EINTR;
4964 	}
4965 
4966 	if (guestdbg_exit_pending(vcpu) && !rc)  {
4967 		kvm_s390_prepare_debug_exit(vcpu);
4968 		rc = 0;
4969 	}
4970 
4971 	if (rc == -EREMOTE) {
4972 		/* userspace support is needed, kvm_run has been prepared */
4973 		rc = 0;
4974 	}
4975 
4976 	disable_cpu_timer_accounting(vcpu);
4977 	store_regs(vcpu);
4978 
4979 	kvm_sigset_deactivate(vcpu);
4980 
4981 	vcpu->stat.exit_userspace++;
4982 out:
4983 	vcpu_put(vcpu);
4984 	return rc;
4985 }
4986 
4987 /*
4988  * store status at address
4989  * we use have two special cases:
4990  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4991  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4992  */
4993 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4994 {
4995 	unsigned char archmode = 1;
4996 	freg_t fprs[NUM_FPRS];
4997 	unsigned int px;
4998 	u64 clkcomp, cputm;
4999 	int rc;
5000 
5001 	px = kvm_s390_get_prefix(vcpu);
5002 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
5003 		if (write_guest_abs(vcpu, 163, &archmode, 1))
5004 			return -EFAULT;
5005 		gpa = 0;
5006 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
5007 		if (write_guest_real(vcpu, 163, &archmode, 1))
5008 			return -EFAULT;
5009 		gpa = px;
5010 	} else
5011 		gpa -= __LC_FPREGS_SAVE_AREA;
5012 
5013 	/* manually convert vector registers if necessary */
5014 	if (MACHINE_HAS_VX) {
5015 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
5016 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
5017 				     fprs, 128);
5018 	} else {
5019 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
5020 				     vcpu->run->s.regs.fprs, 128);
5021 	}
5022 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
5023 			      vcpu->run->s.regs.gprs, 128);
5024 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
5025 			      &vcpu->arch.sie_block->gpsw, 16);
5026 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
5027 			      &px, 4);
5028 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
5029 			      &vcpu->run->s.regs.fpc, 4);
5030 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
5031 			      &vcpu->arch.sie_block->todpr, 4);
5032 	cputm = kvm_s390_get_cpu_timer(vcpu);
5033 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
5034 			      &cputm, 8);
5035 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
5036 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
5037 			      &clkcomp, 8);
5038 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
5039 			      &vcpu->run->s.regs.acrs, 64);
5040 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
5041 			      &vcpu->arch.sie_block->gcr, 128);
5042 	return rc ? -EFAULT : 0;
5043 }
5044 
5045 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
5046 {
5047 	/*
5048 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
5049 	 * switch in the run ioctl. Let's update our copies before we save
5050 	 * it into the save area
5051 	 */
5052 	save_fpu_regs();
5053 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
5054 	save_access_regs(vcpu->run->s.regs.acrs);
5055 
5056 	return kvm_s390_store_status_unloaded(vcpu, addr);
5057 }
5058 
5059 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
5060 {
5061 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
5062 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
5063 }
5064 
5065 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
5066 {
5067 	unsigned long i;
5068 	struct kvm_vcpu *vcpu;
5069 
5070 	kvm_for_each_vcpu(i, vcpu, kvm) {
5071 		__disable_ibs_on_vcpu(vcpu);
5072 	}
5073 }
5074 
5075 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
5076 {
5077 	if (!sclp.has_ibs)
5078 		return;
5079 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
5080 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
5081 }
5082 
5083 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
5084 {
5085 	int i, online_vcpus, r = 0, started_vcpus = 0;
5086 
5087 	if (!is_vcpu_stopped(vcpu))
5088 		return 0;
5089 
5090 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
5091 	/* Only one cpu at a time may enter/leave the STOPPED state. */
5092 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
5093 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
5094 
5095 	/* Let's tell the UV that we want to change into the operating state */
5096 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5097 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
5098 		if (r) {
5099 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5100 			return r;
5101 		}
5102 	}
5103 
5104 	for (i = 0; i < online_vcpus; i++) {
5105 		if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
5106 			started_vcpus++;
5107 	}
5108 
5109 	if (started_vcpus == 0) {
5110 		/* we're the only active VCPU -> speed it up */
5111 		__enable_ibs_on_vcpu(vcpu);
5112 	} else if (started_vcpus == 1) {
5113 		/*
5114 		 * As we are starting a second VCPU, we have to disable
5115 		 * the IBS facility on all VCPUs to remove potentially
5116 		 * outstanding ENABLE requests.
5117 		 */
5118 		__disable_ibs_on_all_vcpus(vcpu->kvm);
5119 	}
5120 
5121 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
5122 	/*
5123 	 * The real PSW might have changed due to a RESTART interpreted by the
5124 	 * ultravisor. We block all interrupts and let the next sie exit
5125 	 * refresh our view.
5126 	 */
5127 	if (kvm_s390_pv_cpu_is_protected(vcpu))
5128 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
5129 	/*
5130 	 * Another VCPU might have used IBS while we were offline.
5131 	 * Let's play safe and flush the VCPU at startup.
5132 	 */
5133 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
5134 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5135 	return 0;
5136 }
5137 
5138 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
5139 {
5140 	int i, online_vcpus, r = 0, started_vcpus = 0;
5141 	struct kvm_vcpu *started_vcpu = NULL;
5142 
5143 	if (is_vcpu_stopped(vcpu))
5144 		return 0;
5145 
5146 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
5147 	/* Only one cpu at a time may enter/leave the STOPPED state. */
5148 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
5149 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
5150 
5151 	/* Let's tell the UV that we want to change into the stopped state */
5152 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5153 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
5154 		if (r) {
5155 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5156 			return r;
5157 		}
5158 	}
5159 
5160 	/*
5161 	 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
5162 	 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
5163 	 * have been fully processed. This will ensure that the VCPU
5164 	 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
5165 	 */
5166 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
5167 	kvm_s390_clear_stop_irq(vcpu);
5168 
5169 	__disable_ibs_on_vcpu(vcpu);
5170 
5171 	for (i = 0; i < online_vcpus; i++) {
5172 		struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
5173 
5174 		if (!is_vcpu_stopped(tmp)) {
5175 			started_vcpus++;
5176 			started_vcpu = tmp;
5177 		}
5178 	}
5179 
5180 	if (started_vcpus == 1) {
5181 		/*
5182 		 * As we only have one VCPU left, we want to enable the
5183 		 * IBS facility for that VCPU to speed it up.
5184 		 */
5185 		__enable_ibs_on_vcpu(started_vcpu);
5186 	}
5187 
5188 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5189 	return 0;
5190 }
5191 
5192 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
5193 				     struct kvm_enable_cap *cap)
5194 {
5195 	int r;
5196 
5197 	if (cap->flags)
5198 		return -EINVAL;
5199 
5200 	switch (cap->cap) {
5201 	case KVM_CAP_S390_CSS_SUPPORT:
5202 		if (!vcpu->kvm->arch.css_support) {
5203 			vcpu->kvm->arch.css_support = 1;
5204 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
5205 			trace_kvm_s390_enable_css(vcpu->kvm);
5206 		}
5207 		r = 0;
5208 		break;
5209 	default:
5210 		r = -EINVAL;
5211 		break;
5212 	}
5213 	return r;
5214 }
5215 
5216 static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
5217 				  struct kvm_s390_mem_op *mop)
5218 {
5219 	void __user *uaddr = (void __user *)mop->buf;
5220 	void *sida_addr;
5221 	int r = 0;
5222 
5223 	if (mop->flags || !mop->size)
5224 		return -EINVAL;
5225 	if (mop->size + mop->sida_offset < mop->size)
5226 		return -EINVAL;
5227 	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
5228 		return -E2BIG;
5229 	if (!kvm_s390_pv_cpu_is_protected(vcpu))
5230 		return -EINVAL;
5231 
5232 	sida_addr = (char *)sida_addr(vcpu->arch.sie_block) + mop->sida_offset;
5233 
5234 	switch (mop->op) {
5235 	case KVM_S390_MEMOP_SIDA_READ:
5236 		if (copy_to_user(uaddr, sida_addr, mop->size))
5237 			r = -EFAULT;
5238 
5239 		break;
5240 	case KVM_S390_MEMOP_SIDA_WRITE:
5241 		if (copy_from_user(sida_addr, uaddr, mop->size))
5242 			r = -EFAULT;
5243 		break;
5244 	}
5245 	return r;
5246 }
5247 
5248 static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
5249 				 struct kvm_s390_mem_op *mop)
5250 {
5251 	void __user *uaddr = (void __user *)mop->buf;
5252 	void *tmpbuf = NULL;
5253 	int r = 0;
5254 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
5255 				    | KVM_S390_MEMOP_F_CHECK_ONLY
5256 				    | KVM_S390_MEMOP_F_SKEY_PROTECTION;
5257 
5258 	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
5259 		return -EINVAL;
5260 	if (mop->size > MEM_OP_MAX_SIZE)
5261 		return -E2BIG;
5262 	if (kvm_s390_pv_cpu_is_protected(vcpu))
5263 		return -EINVAL;
5264 	if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
5265 		if (access_key_invalid(mop->key))
5266 			return -EINVAL;
5267 	} else {
5268 		mop->key = 0;
5269 	}
5270 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
5271 		tmpbuf = vmalloc(mop->size);
5272 		if (!tmpbuf)
5273 			return -ENOMEM;
5274 	}
5275 
5276 	switch (mop->op) {
5277 	case KVM_S390_MEMOP_LOGICAL_READ:
5278 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
5279 			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
5280 					    GACC_FETCH, mop->key);
5281 			break;
5282 		}
5283 		r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
5284 					mop->size, mop->key);
5285 		if (r == 0) {
5286 			if (copy_to_user(uaddr, tmpbuf, mop->size))
5287 				r = -EFAULT;
5288 		}
5289 		break;
5290 	case KVM_S390_MEMOP_LOGICAL_WRITE:
5291 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
5292 			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
5293 					    GACC_STORE, mop->key);
5294 			break;
5295 		}
5296 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
5297 			r = -EFAULT;
5298 			break;
5299 		}
5300 		r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
5301 					 mop->size, mop->key);
5302 		break;
5303 	}
5304 
5305 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
5306 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
5307 
5308 	vfree(tmpbuf);
5309 	return r;
5310 }
5311 
5312 static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
5313 				     struct kvm_s390_mem_op *mop)
5314 {
5315 	int r, srcu_idx;
5316 
5317 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5318 
5319 	switch (mop->op) {
5320 	case KVM_S390_MEMOP_LOGICAL_READ:
5321 	case KVM_S390_MEMOP_LOGICAL_WRITE:
5322 		r = kvm_s390_vcpu_mem_op(vcpu, mop);
5323 		break;
5324 	case KVM_S390_MEMOP_SIDA_READ:
5325 	case KVM_S390_MEMOP_SIDA_WRITE:
5326 		/* we are locked against sida going away by the vcpu->mutex */
5327 		r = kvm_s390_vcpu_sida_op(vcpu, mop);
5328 		break;
5329 	default:
5330 		r = -EINVAL;
5331 	}
5332 
5333 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
5334 	return r;
5335 }
5336 
5337 long kvm_arch_vcpu_async_ioctl(struct file *filp,
5338 			       unsigned int ioctl, unsigned long arg)
5339 {
5340 	struct kvm_vcpu *vcpu = filp->private_data;
5341 	void __user *argp = (void __user *)arg;
5342 
5343 	switch (ioctl) {
5344 	case KVM_S390_IRQ: {
5345 		struct kvm_s390_irq s390irq;
5346 
5347 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
5348 			return -EFAULT;
5349 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
5350 	}
5351 	case KVM_S390_INTERRUPT: {
5352 		struct kvm_s390_interrupt s390int;
5353 		struct kvm_s390_irq s390irq = {};
5354 
5355 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
5356 			return -EFAULT;
5357 		if (s390int_to_s390irq(&s390int, &s390irq))
5358 			return -EINVAL;
5359 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
5360 	}
5361 	}
5362 	return -ENOIOCTLCMD;
5363 }
5364 
5365 static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
5366 					struct kvm_pv_cmd *cmd)
5367 {
5368 	struct kvm_s390_pv_dmp dmp;
5369 	void *data;
5370 	int ret;
5371 
5372 	/* Dump initialization is a prerequisite */
5373 	if (!vcpu->kvm->arch.pv.dumping)
5374 		return -EINVAL;
5375 
5376 	if (copy_from_user(&dmp, (__u8 __user *)cmd->data, sizeof(dmp)))
5377 		return -EFAULT;
5378 
5379 	/* We only handle this subcmd right now */
5380 	if (dmp.subcmd != KVM_PV_DUMP_CPU)
5381 		return -EINVAL;
5382 
5383 	/* CPU dump length is the same as create cpu storage donation. */
5384 	if (dmp.buff_len != uv_info.guest_cpu_stor_len)
5385 		return -EINVAL;
5386 
5387 	data = kvzalloc(uv_info.guest_cpu_stor_len, GFP_KERNEL);
5388 	if (!data)
5389 		return -ENOMEM;
5390 
5391 	ret = kvm_s390_pv_dump_cpu(vcpu, data, &cmd->rc, &cmd->rrc);
5392 
5393 	VCPU_EVENT(vcpu, 3, "PROTVIRT DUMP CPU %d rc %x rrc %x",
5394 		   vcpu->vcpu_id, cmd->rc, cmd->rrc);
5395 
5396 	if (ret)
5397 		ret = -EINVAL;
5398 
5399 	/* On success copy over the dump data */
5400 	if (!ret && copy_to_user((__u8 __user *)dmp.buff_addr, data, uv_info.guest_cpu_stor_len))
5401 		ret = -EFAULT;
5402 
5403 	kvfree(data);
5404 	return ret;
5405 }
5406 
5407 long kvm_arch_vcpu_ioctl(struct file *filp,
5408 			 unsigned int ioctl, unsigned long arg)
5409 {
5410 	struct kvm_vcpu *vcpu = filp->private_data;
5411 	void __user *argp = (void __user *)arg;
5412 	int idx;
5413 	long r;
5414 	u16 rc, rrc;
5415 
5416 	vcpu_load(vcpu);
5417 
5418 	switch (ioctl) {
5419 	case KVM_S390_STORE_STATUS:
5420 		idx = srcu_read_lock(&vcpu->kvm->srcu);
5421 		r = kvm_s390_store_status_unloaded(vcpu, arg);
5422 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
5423 		break;
5424 	case KVM_S390_SET_INITIAL_PSW: {
5425 		psw_t psw;
5426 
5427 		r = -EFAULT;
5428 		if (copy_from_user(&psw, argp, sizeof(psw)))
5429 			break;
5430 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
5431 		break;
5432 	}
5433 	case KVM_S390_CLEAR_RESET:
5434 		r = 0;
5435 		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
5436 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5437 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5438 					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
5439 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
5440 				   rc, rrc);
5441 		}
5442 		break;
5443 	case KVM_S390_INITIAL_RESET:
5444 		r = 0;
5445 		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
5446 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5447 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5448 					  UVC_CMD_CPU_RESET_INITIAL,
5449 					  &rc, &rrc);
5450 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
5451 				   rc, rrc);
5452 		}
5453 		break;
5454 	case KVM_S390_NORMAL_RESET:
5455 		r = 0;
5456 		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
5457 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5458 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5459 					  UVC_CMD_CPU_RESET, &rc, &rrc);
5460 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
5461 				   rc, rrc);
5462 		}
5463 		break;
5464 	case KVM_SET_ONE_REG:
5465 	case KVM_GET_ONE_REG: {
5466 		struct kvm_one_reg reg;
5467 		r = -EINVAL;
5468 		if (kvm_s390_pv_cpu_is_protected(vcpu))
5469 			break;
5470 		r = -EFAULT;
5471 		if (copy_from_user(&reg, argp, sizeof(reg)))
5472 			break;
5473 		if (ioctl == KVM_SET_ONE_REG)
5474 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
5475 		else
5476 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
5477 		break;
5478 	}
5479 #ifdef CONFIG_KVM_S390_UCONTROL
5480 	case KVM_S390_UCAS_MAP: {
5481 		struct kvm_s390_ucas_mapping ucasmap;
5482 
5483 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5484 			r = -EFAULT;
5485 			break;
5486 		}
5487 
5488 		if (!kvm_is_ucontrol(vcpu->kvm)) {
5489 			r = -EINVAL;
5490 			break;
5491 		}
5492 
5493 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
5494 				     ucasmap.vcpu_addr, ucasmap.length);
5495 		break;
5496 	}
5497 	case KVM_S390_UCAS_UNMAP: {
5498 		struct kvm_s390_ucas_mapping ucasmap;
5499 
5500 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5501 			r = -EFAULT;
5502 			break;
5503 		}
5504 
5505 		if (!kvm_is_ucontrol(vcpu->kvm)) {
5506 			r = -EINVAL;
5507 			break;
5508 		}
5509 
5510 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
5511 			ucasmap.length);
5512 		break;
5513 	}
5514 #endif
5515 	case KVM_S390_VCPU_FAULT: {
5516 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
5517 		break;
5518 	}
5519 	case KVM_ENABLE_CAP:
5520 	{
5521 		struct kvm_enable_cap cap;
5522 		r = -EFAULT;
5523 		if (copy_from_user(&cap, argp, sizeof(cap)))
5524 			break;
5525 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
5526 		break;
5527 	}
5528 	case KVM_S390_MEM_OP: {
5529 		struct kvm_s390_mem_op mem_op;
5530 
5531 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
5532 			r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op);
5533 		else
5534 			r = -EFAULT;
5535 		break;
5536 	}
5537 	case KVM_S390_SET_IRQ_STATE: {
5538 		struct kvm_s390_irq_state irq_state;
5539 
5540 		r = -EFAULT;
5541 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5542 			break;
5543 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
5544 		    irq_state.len == 0 ||
5545 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
5546 			r = -EINVAL;
5547 			break;
5548 		}
5549 		/* do not use irq_state.flags, it will break old QEMUs */
5550 		r = kvm_s390_set_irq_state(vcpu,
5551 					   (void __user *) irq_state.buf,
5552 					   irq_state.len);
5553 		break;
5554 	}
5555 	case KVM_S390_GET_IRQ_STATE: {
5556 		struct kvm_s390_irq_state irq_state;
5557 
5558 		r = -EFAULT;
5559 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5560 			break;
5561 		if (irq_state.len == 0) {
5562 			r = -EINVAL;
5563 			break;
5564 		}
5565 		/* do not use irq_state.flags, it will break old QEMUs */
5566 		r = kvm_s390_get_irq_state(vcpu,
5567 					   (__u8 __user *)  irq_state.buf,
5568 					   irq_state.len);
5569 		break;
5570 	}
5571 	case KVM_S390_PV_CPU_COMMAND: {
5572 		struct kvm_pv_cmd cmd;
5573 
5574 		r = -EINVAL;
5575 		if (!is_prot_virt_host())
5576 			break;
5577 
5578 		r = -EFAULT;
5579 		if (copy_from_user(&cmd, argp, sizeof(cmd)))
5580 			break;
5581 
5582 		r = -EINVAL;
5583 		if (cmd.flags)
5584 			break;
5585 
5586 		/* We only handle this cmd right now */
5587 		if (cmd.cmd != KVM_PV_DUMP)
5588 			break;
5589 
5590 		r = kvm_s390_handle_pv_vcpu_dump(vcpu, &cmd);
5591 
5592 		/* Always copy over UV rc / rrc data */
5593 		if (copy_to_user((__u8 __user *)argp, &cmd.rc,
5594 				 sizeof(cmd.rc) + sizeof(cmd.rrc)))
5595 			r = -EFAULT;
5596 		break;
5597 	}
5598 	default:
5599 		r = -ENOTTY;
5600 	}
5601 
5602 	vcpu_put(vcpu);
5603 	return r;
5604 }
5605 
5606 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5607 {
5608 #ifdef CONFIG_KVM_S390_UCONTROL
5609 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5610 		 && (kvm_is_ucontrol(vcpu->kvm))) {
5611 		vmf->page = virt_to_page(vcpu->arch.sie_block);
5612 		get_page(vmf->page);
5613 		return 0;
5614 	}
5615 #endif
5616 	return VM_FAULT_SIGBUS;
5617 }
5618 
5619 bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
5620 {
5621 	return true;
5622 }
5623 
5624 /* Section: memory related */
5625 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5626 				   const struct kvm_memory_slot *old,
5627 				   struct kvm_memory_slot *new,
5628 				   enum kvm_mr_change change)
5629 {
5630 	gpa_t size;
5631 
5632 	/* When we are protected, we should not change the memory slots */
5633 	if (kvm_s390_pv_get_handle(kvm))
5634 		return -EINVAL;
5635 
5636 	if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
5637 		return 0;
5638 
5639 	/* A few sanity checks. We can have memory slots which have to be
5640 	   located/ended at a segment boundary (1MB). The memory in userland is
5641 	   ok to be fragmented into various different vmas. It is okay to mmap()
5642 	   and munmap() stuff in this slot after doing this call at any time */
5643 
5644 	if (new->userspace_addr & 0xffffful)
5645 		return -EINVAL;
5646 
5647 	size = new->npages * PAGE_SIZE;
5648 	if (size & 0xffffful)
5649 		return -EINVAL;
5650 
5651 	if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
5652 		return -EINVAL;
5653 
5654 	return 0;
5655 }
5656 
5657 void kvm_arch_commit_memory_region(struct kvm *kvm,
5658 				struct kvm_memory_slot *old,
5659 				const struct kvm_memory_slot *new,
5660 				enum kvm_mr_change change)
5661 {
5662 	int rc = 0;
5663 
5664 	switch (change) {
5665 	case KVM_MR_DELETE:
5666 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5667 					old->npages * PAGE_SIZE);
5668 		break;
5669 	case KVM_MR_MOVE:
5670 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5671 					old->npages * PAGE_SIZE);
5672 		if (rc)
5673 			break;
5674 		fallthrough;
5675 	case KVM_MR_CREATE:
5676 		rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
5677 				      new->base_gfn * PAGE_SIZE,
5678 				      new->npages * PAGE_SIZE);
5679 		break;
5680 	case KVM_MR_FLAGS_ONLY:
5681 		break;
5682 	default:
5683 		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5684 	}
5685 	if (rc)
5686 		pr_warn("failed to commit memory region\n");
5687 	return;
5688 }
5689 
5690 static inline unsigned long nonhyp_mask(int i)
5691 {
5692 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5693 
5694 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5695 }
5696 
5697 static int __init kvm_s390_init(void)
5698 {
5699 	int i;
5700 
5701 	if (!sclp.has_sief2) {
5702 		pr_info("SIE is not available\n");
5703 		return -ENODEV;
5704 	}
5705 
5706 	if (nested && hpage) {
5707 		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5708 		return -EINVAL;
5709 	}
5710 
5711 	for (i = 0; i < 16; i++)
5712 		kvm_s390_fac_base[i] |=
5713 			stfle_fac_list[i] & nonhyp_mask(i);
5714 
5715 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5716 }
5717 
5718 static void __exit kvm_s390_exit(void)
5719 {
5720 	kvm_exit();
5721 }
5722 
5723 module_init(kvm_s390_init);
5724 module_exit(kvm_s390_exit);
5725 
5726 /*
5727  * Enable autoloading of the kvm module.
5728  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5729  * since x86 takes a different approach.
5730  */
5731 #include <linux/miscdevice.h>
5732 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5733 MODULE_ALIAS("devname:kvm");
5734