Merge branch 'linux-linaro-lsk-v4.4' into linux-linaro-lsk-v4.4-android
[firefly-linux-kernel-4.4.55.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
31 #include <asm/etr.h>
32 #include <asm/pgtable.h>
33 #include <asm/nmi.h>
34 #include <asm/switch_to.h>
35 #include <asm/isc.h>
36 #include <asm/sclp.h>
37 #include "kvm-s390.h"
38 #include "gaccess.h"
39
40 #define KMSG_COMPONENT "kvm-s390"
41 #undef pr_fmt
42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
43
44 #define CREATE_TRACE_POINTS
45 #include "trace.h"
46 #include "trace-s390.h"
47
48 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
49 #define LOCAL_IRQS 32
50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
51                            (KVM_MAX_VCPUS + LOCAL_IRQS))
52
53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
54
55 struct kvm_stats_debugfs_item debugfs_entries[] = {
56         { "userspace_handled", VCPU_STAT(exit_userspace) },
57         { "exit_null", VCPU_STAT(exit_null) },
58         { "exit_validity", VCPU_STAT(exit_validity) },
59         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
60         { "exit_external_request", VCPU_STAT(exit_external_request) },
61         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
62         { "exit_instruction", VCPU_STAT(exit_instruction) },
63         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
64         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
65         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
66         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
67         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
68         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
69         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
70         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
71         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
72         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
73         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
74         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
75         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
76         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
77         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
78         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
79         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
80         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
81         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
82         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
83         { "instruction_spx", VCPU_STAT(instruction_spx) },
84         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
85         { "instruction_stap", VCPU_STAT(instruction_stap) },
86         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
87         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
88         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
89         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
90         { "instruction_essa", VCPU_STAT(instruction_essa) },
91         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
92         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
93         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
94         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
95         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
96         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
97         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
98         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
99         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
100         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
101         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
102         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
103         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
104         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
105         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
106         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
107         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
108         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
109         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
110         { "diagnose_10", VCPU_STAT(diagnose_10) },
111         { "diagnose_44", VCPU_STAT(diagnose_44) },
112         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
113         { "diagnose_258", VCPU_STAT(diagnose_258) },
114         { "diagnose_308", VCPU_STAT(diagnose_308) },
115         { "diagnose_500", VCPU_STAT(diagnose_500) },
116         { NULL }
117 };
118
119 /* upper facilities limit for kvm */
120 unsigned long kvm_s390_fac_list_mask[] = {
121         0xffe6fffbfcfdfc40UL,
122         0x005e800000000000UL,
123 };
124
125 unsigned long kvm_s390_fac_list_mask_size(void)
126 {
127         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
128         return ARRAY_SIZE(kvm_s390_fac_list_mask);
129 }
130
131 static struct gmap_notifier gmap_notifier;
132 debug_info_t *kvm_s390_dbf;
133
134 /* Section: not file related */
135 int kvm_arch_hardware_enable(void)
136 {
137         /* every s390 is virtualization enabled ;-) */
138         return 0;
139 }
140
141 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
142
143 /*
144  * This callback is executed during stop_machine(). All CPUs are therefore
145  * temporarily stopped. In order not to change guest behavior, we have to
146  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
147  * so a CPU won't be stopped while calculating with the epoch.
148  */
149 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
150                           void *v)
151 {
152         struct kvm *kvm;
153         struct kvm_vcpu *vcpu;
154         int i;
155         unsigned long long *delta = v;
156
157         list_for_each_entry(kvm, &vm_list, vm_list) {
158                 kvm->arch.epoch -= *delta;
159                 kvm_for_each_vcpu(i, vcpu, kvm) {
160                         vcpu->arch.sie_block->epoch -= *delta;
161                 }
162         }
163         return NOTIFY_OK;
164 }
165
166 static struct notifier_block kvm_clock_notifier = {
167         .notifier_call = kvm_clock_sync,
168 };
169
170 int kvm_arch_hardware_setup(void)
171 {
172         gmap_notifier.notifier_call = kvm_gmap_notifier;
173         gmap_register_ipte_notifier(&gmap_notifier);
174         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
175                                        &kvm_clock_notifier);
176         return 0;
177 }
178
179 void kvm_arch_hardware_unsetup(void)
180 {
181         gmap_unregister_ipte_notifier(&gmap_notifier);
182         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
183                                          &kvm_clock_notifier);
184 }
185
186 int kvm_arch_init(void *opaque)
187 {
188         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
189         if (!kvm_s390_dbf)
190                 return -ENOMEM;
191
192         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
193                 debug_unregister(kvm_s390_dbf);
194                 return -ENOMEM;
195         }
196
197         /* Register floating interrupt controller interface. */
198         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
199 }
200
201 void kvm_arch_exit(void)
202 {
203         debug_unregister(kvm_s390_dbf);
204 }
205
206 /* Section: device related */
207 long kvm_arch_dev_ioctl(struct file *filp,
208                         unsigned int ioctl, unsigned long arg)
209 {
210         if (ioctl == KVM_S390_ENABLE_SIE)
211                 return s390_enable_sie();
212         return -EINVAL;
213 }
214
215 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
216 {
217         int r;
218
219         switch (ext) {
220         case KVM_CAP_S390_PSW:
221         case KVM_CAP_S390_GMAP:
222         case KVM_CAP_SYNC_MMU:
223 #ifdef CONFIG_KVM_S390_UCONTROL
224         case KVM_CAP_S390_UCONTROL:
225 #endif
226         case KVM_CAP_ASYNC_PF:
227         case KVM_CAP_SYNC_REGS:
228         case KVM_CAP_ONE_REG:
229         case KVM_CAP_ENABLE_CAP:
230         case KVM_CAP_S390_CSS_SUPPORT:
231         case KVM_CAP_IOEVENTFD:
232         case KVM_CAP_DEVICE_CTRL:
233         case KVM_CAP_ENABLE_CAP_VM:
234         case KVM_CAP_S390_IRQCHIP:
235         case KVM_CAP_VM_ATTRIBUTES:
236         case KVM_CAP_MP_STATE:
237         case KVM_CAP_S390_INJECT_IRQ:
238         case KVM_CAP_S390_USER_SIGP:
239         case KVM_CAP_S390_USER_STSI:
240         case KVM_CAP_S390_SKEYS:
241         case KVM_CAP_S390_IRQ_STATE:
242                 r = 1;
243                 break;
244         case KVM_CAP_S390_MEM_OP:
245                 r = MEM_OP_MAX_SIZE;
246                 break;
247         case KVM_CAP_NR_VCPUS:
248         case KVM_CAP_MAX_VCPUS:
249                 r = KVM_MAX_VCPUS;
250                 break;
251         case KVM_CAP_NR_MEMSLOTS:
252                 r = KVM_USER_MEM_SLOTS;
253                 break;
254         case KVM_CAP_S390_COW:
255                 r = MACHINE_HAS_ESOP;
256                 break;
257         case KVM_CAP_S390_VECTOR_REGISTERS:
258                 r = MACHINE_HAS_VX;
259                 break;
260         default:
261                 r = 0;
262         }
263         return r;
264 }
265
266 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
267                                         struct kvm_memory_slot *memslot)
268 {
269         gfn_t cur_gfn, last_gfn;
270         unsigned long address;
271         struct gmap *gmap = kvm->arch.gmap;
272
273         down_read(&gmap->mm->mmap_sem);
274         /* Loop over all guest pages */
275         last_gfn = memslot->base_gfn + memslot->npages;
276         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
277                 address = gfn_to_hva_memslot(memslot, cur_gfn);
278
279                 if (gmap_test_and_clear_dirty(address, gmap))
280                         mark_page_dirty(kvm, cur_gfn);
281         }
282         up_read(&gmap->mm->mmap_sem);
283 }
284
285 /* Section: vm related */
286 /*
287  * Get (and clear) the dirty memory log for a memory slot.
288  */
289 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
290                                struct kvm_dirty_log *log)
291 {
292         int r;
293         unsigned long n;
294         struct kvm_memslots *slots;
295         struct kvm_memory_slot *memslot;
296         int is_dirty = 0;
297
298         if (kvm_is_ucontrol(kvm))
299                 return -EINVAL;
300
301         mutex_lock(&kvm->slots_lock);
302
303         r = -EINVAL;
304         if (log->slot >= KVM_USER_MEM_SLOTS)
305                 goto out;
306
307         slots = kvm_memslots(kvm);
308         memslot = id_to_memslot(slots, log->slot);
309         r = -ENOENT;
310         if (!memslot->dirty_bitmap)
311                 goto out;
312
313         kvm_s390_sync_dirty_log(kvm, memslot);
314         r = kvm_get_dirty_log(kvm, log, &is_dirty);
315         if (r)
316                 goto out;
317
318         /* Clear the dirty log */
319         if (is_dirty) {
320                 n = kvm_dirty_bitmap_bytes(memslot);
321                 memset(memslot->dirty_bitmap, 0, n);
322         }
323         r = 0;
324 out:
325         mutex_unlock(&kvm->slots_lock);
326         return r;
327 }
328
329 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
330 {
331         int r;
332
333         if (cap->flags)
334                 return -EINVAL;
335
336         switch (cap->cap) {
337         case KVM_CAP_S390_IRQCHIP:
338                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
339                 kvm->arch.use_irqchip = 1;
340                 r = 0;
341                 break;
342         case KVM_CAP_S390_USER_SIGP:
343                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
344                 kvm->arch.user_sigp = 1;
345                 r = 0;
346                 break;
347         case KVM_CAP_S390_VECTOR_REGISTERS:
348                 mutex_lock(&kvm->lock);
349                 if (atomic_read(&kvm->online_vcpus)) {
350                         r = -EBUSY;
351                 } else if (MACHINE_HAS_VX) {
352                         set_kvm_facility(kvm->arch.model.fac->mask, 129);
353                         set_kvm_facility(kvm->arch.model.fac->list, 129);
354                         r = 0;
355                 } else
356                         r = -EINVAL;
357                 mutex_unlock(&kvm->lock);
358                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
359                          r ? "(not available)" : "(success)");
360                 break;
361         case KVM_CAP_S390_USER_STSI:
362                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
363                 kvm->arch.user_stsi = 1;
364                 r = 0;
365                 break;
366         default:
367                 r = -EINVAL;
368                 break;
369         }
370         return r;
371 }
372
373 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
374 {
375         int ret;
376
377         switch (attr->attr) {
378         case KVM_S390_VM_MEM_LIMIT_SIZE:
379                 ret = 0;
380                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
381                          kvm->arch.gmap->asce_end);
382                 if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
383                         ret = -EFAULT;
384                 break;
385         default:
386                 ret = -ENXIO;
387                 break;
388         }
389         return ret;
390 }
391
392 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
393 {
394         int ret;
395         unsigned int idx;
396         switch (attr->attr) {
397         case KVM_S390_VM_MEM_ENABLE_CMMA:
398                 /* enable CMMA only for z10 and later (EDAT_1) */
399                 ret = -EINVAL;
400                 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
401                         break;
402
403                 ret = -EBUSY;
404                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
405                 mutex_lock(&kvm->lock);
406                 if (atomic_read(&kvm->online_vcpus) == 0) {
407                         kvm->arch.use_cmma = 1;
408                         ret = 0;
409                 }
410                 mutex_unlock(&kvm->lock);
411                 break;
412         case KVM_S390_VM_MEM_CLR_CMMA:
413                 ret = -EINVAL;
414                 if (!kvm->arch.use_cmma)
415                         break;
416
417                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
418                 mutex_lock(&kvm->lock);
419                 idx = srcu_read_lock(&kvm->srcu);
420                 s390_reset_cmma(kvm->arch.gmap->mm);
421                 srcu_read_unlock(&kvm->srcu, idx);
422                 mutex_unlock(&kvm->lock);
423                 ret = 0;
424                 break;
425         case KVM_S390_VM_MEM_LIMIT_SIZE: {
426                 unsigned long new_limit;
427
428                 if (kvm_is_ucontrol(kvm))
429                         return -EINVAL;
430
431                 if (get_user(new_limit, (u64 __user *)attr->addr))
432                         return -EFAULT;
433
434                 if (new_limit > kvm->arch.gmap->asce_end)
435                         return -E2BIG;
436
437                 ret = -EBUSY;
438                 mutex_lock(&kvm->lock);
439                 if (atomic_read(&kvm->online_vcpus) == 0) {
440                         /* gmap_alloc will round the limit up */
441                         struct gmap *new = gmap_alloc(current->mm, new_limit);
442
443                         if (!new) {
444                                 ret = -ENOMEM;
445                         } else {
446                                 gmap_free(kvm->arch.gmap);
447                                 new->private = kvm;
448                                 kvm->arch.gmap = new;
449                                 ret = 0;
450                         }
451                 }
452                 mutex_unlock(&kvm->lock);
453                 VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
454                 break;
455         }
456         default:
457                 ret = -ENXIO;
458                 break;
459         }
460         return ret;
461 }
462
463 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
464
465 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
466 {
467         struct kvm_vcpu *vcpu;
468         int i;
469
470         if (!test_kvm_facility(kvm, 76))
471                 return -EINVAL;
472
473         mutex_lock(&kvm->lock);
474         switch (attr->attr) {
475         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
476                 get_random_bytes(
477                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
478                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
479                 kvm->arch.crypto.aes_kw = 1;
480                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
481                 break;
482         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
483                 get_random_bytes(
484                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
485                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
486                 kvm->arch.crypto.dea_kw = 1;
487                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
488                 break;
489         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
490                 kvm->arch.crypto.aes_kw = 0;
491                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
492                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
493                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
494                 break;
495         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
496                 kvm->arch.crypto.dea_kw = 0;
497                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
498                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
499                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
500                 break;
501         default:
502                 mutex_unlock(&kvm->lock);
503                 return -ENXIO;
504         }
505
506         kvm_for_each_vcpu(i, vcpu, kvm) {
507                 kvm_s390_vcpu_crypto_setup(vcpu);
508                 exit_sie(vcpu);
509         }
510         mutex_unlock(&kvm->lock);
511         return 0;
512 }
513
514 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
515 {
516         u8 gtod_high;
517
518         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
519                                            sizeof(gtod_high)))
520                 return -EFAULT;
521
522         if (gtod_high != 0)
523                 return -EINVAL;
524         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
525
526         return 0;
527 }
528
529 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
530 {
531         u64 gtod;
532
533         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
534                 return -EFAULT;
535
536         kvm_s390_set_tod_clock(kvm, gtod);
537         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
538         return 0;
539 }
540
541 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
542 {
543         int ret;
544
545         if (attr->flags)
546                 return -EINVAL;
547
548         switch (attr->attr) {
549         case KVM_S390_VM_TOD_HIGH:
550                 ret = kvm_s390_set_tod_high(kvm, attr);
551                 break;
552         case KVM_S390_VM_TOD_LOW:
553                 ret = kvm_s390_set_tod_low(kvm, attr);
554                 break;
555         default:
556                 ret = -ENXIO;
557                 break;
558         }
559         return ret;
560 }
561
562 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
563 {
564         u8 gtod_high = 0;
565
566         if (copy_to_user((void __user *)attr->addr, &gtod_high,
567                                          sizeof(gtod_high)))
568                 return -EFAULT;
569         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
570
571         return 0;
572 }
573
574 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
575 {
576         u64 gtod;
577
578         gtod = kvm_s390_get_tod_clock_fast(kvm);
579         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
580                 return -EFAULT;
581         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
582
583         return 0;
584 }
585
586 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
587 {
588         int ret;
589
590         if (attr->flags)
591                 return -EINVAL;
592
593         switch (attr->attr) {
594         case KVM_S390_VM_TOD_HIGH:
595                 ret = kvm_s390_get_tod_high(kvm, attr);
596                 break;
597         case KVM_S390_VM_TOD_LOW:
598                 ret = kvm_s390_get_tod_low(kvm, attr);
599                 break;
600         default:
601                 ret = -ENXIO;
602                 break;
603         }
604         return ret;
605 }
606
607 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
608 {
609         struct kvm_s390_vm_cpu_processor *proc;
610         int ret = 0;
611
612         mutex_lock(&kvm->lock);
613         if (atomic_read(&kvm->online_vcpus)) {
614                 ret = -EBUSY;
615                 goto out;
616         }
617         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
618         if (!proc) {
619                 ret = -ENOMEM;
620                 goto out;
621         }
622         if (!copy_from_user(proc, (void __user *)attr->addr,
623                             sizeof(*proc))) {
624                 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
625                        sizeof(struct cpuid));
626                 kvm->arch.model.ibc = proc->ibc;
627                 memcpy(kvm->arch.model.fac->list, proc->fac_list,
628                        S390_ARCH_FAC_LIST_SIZE_BYTE);
629         } else
630                 ret = -EFAULT;
631         kfree(proc);
632 out:
633         mutex_unlock(&kvm->lock);
634         return ret;
635 }
636
637 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
638 {
639         int ret = -ENXIO;
640
641         switch (attr->attr) {
642         case KVM_S390_VM_CPU_PROCESSOR:
643                 ret = kvm_s390_set_processor(kvm, attr);
644                 break;
645         }
646         return ret;
647 }
648
649 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
650 {
651         struct kvm_s390_vm_cpu_processor *proc;
652         int ret = 0;
653
654         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
655         if (!proc) {
656                 ret = -ENOMEM;
657                 goto out;
658         }
659         memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
660         proc->ibc = kvm->arch.model.ibc;
661         memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
662         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
663                 ret = -EFAULT;
664         kfree(proc);
665 out:
666         return ret;
667 }
668
669 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
670 {
671         struct kvm_s390_vm_cpu_machine *mach;
672         int ret = 0;
673
674         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
675         if (!mach) {
676                 ret = -ENOMEM;
677                 goto out;
678         }
679         get_cpu_id((struct cpuid *) &mach->cpuid);
680         mach->ibc = sclp.ibc;
681         memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
682                S390_ARCH_FAC_LIST_SIZE_BYTE);
683         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
684                S390_ARCH_FAC_LIST_SIZE_BYTE);
685         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
686                 ret = -EFAULT;
687         kfree(mach);
688 out:
689         return ret;
690 }
691
692 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
693 {
694         int ret = -ENXIO;
695
696         switch (attr->attr) {
697         case KVM_S390_VM_CPU_PROCESSOR:
698                 ret = kvm_s390_get_processor(kvm, attr);
699                 break;
700         case KVM_S390_VM_CPU_MACHINE:
701                 ret = kvm_s390_get_machine(kvm, attr);
702                 break;
703         }
704         return ret;
705 }
706
707 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
708 {
709         int ret;
710
711         switch (attr->group) {
712         case KVM_S390_VM_MEM_CTRL:
713                 ret = kvm_s390_set_mem_control(kvm, attr);
714                 break;
715         case KVM_S390_VM_TOD:
716                 ret = kvm_s390_set_tod(kvm, attr);
717                 break;
718         case KVM_S390_VM_CPU_MODEL:
719                 ret = kvm_s390_set_cpu_model(kvm, attr);
720                 break;
721         case KVM_S390_VM_CRYPTO:
722                 ret = kvm_s390_vm_set_crypto(kvm, attr);
723                 break;
724         default:
725                 ret = -ENXIO;
726                 break;
727         }
728
729         return ret;
730 }
731
732 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
733 {
734         int ret;
735
736         switch (attr->group) {
737         case KVM_S390_VM_MEM_CTRL:
738                 ret = kvm_s390_get_mem_control(kvm, attr);
739                 break;
740         case KVM_S390_VM_TOD:
741                 ret = kvm_s390_get_tod(kvm, attr);
742                 break;
743         case KVM_S390_VM_CPU_MODEL:
744                 ret = kvm_s390_get_cpu_model(kvm, attr);
745                 break;
746         default:
747                 ret = -ENXIO;
748                 break;
749         }
750
751         return ret;
752 }
753
754 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
755 {
756         int ret;
757
758         switch (attr->group) {
759         case KVM_S390_VM_MEM_CTRL:
760                 switch (attr->attr) {
761                 case KVM_S390_VM_MEM_ENABLE_CMMA:
762                 case KVM_S390_VM_MEM_CLR_CMMA:
763                 case KVM_S390_VM_MEM_LIMIT_SIZE:
764                         ret = 0;
765                         break;
766                 default:
767                         ret = -ENXIO;
768                         break;
769                 }
770                 break;
771         case KVM_S390_VM_TOD:
772                 switch (attr->attr) {
773                 case KVM_S390_VM_TOD_LOW:
774                 case KVM_S390_VM_TOD_HIGH:
775                         ret = 0;
776                         break;
777                 default:
778                         ret = -ENXIO;
779                         break;
780                 }
781                 break;
782         case KVM_S390_VM_CPU_MODEL:
783                 switch (attr->attr) {
784                 case KVM_S390_VM_CPU_PROCESSOR:
785                 case KVM_S390_VM_CPU_MACHINE:
786                         ret = 0;
787                         break;
788                 default:
789                         ret = -ENXIO;
790                         break;
791                 }
792                 break;
793         case KVM_S390_VM_CRYPTO:
794                 switch (attr->attr) {
795                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
796                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
797                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
798                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
799                         ret = 0;
800                         break;
801                 default:
802                         ret = -ENXIO;
803                         break;
804                 }
805                 break;
806         default:
807                 ret = -ENXIO;
808                 break;
809         }
810
811         return ret;
812 }
813
814 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
815 {
816         uint8_t *keys;
817         uint64_t hva;
818         unsigned long curkey;
819         int i, r = 0;
820
821         if (args->flags != 0)
822                 return -EINVAL;
823
824         /* Is this guest using storage keys? */
825         if (!mm_use_skey(current->mm))
826                 return KVM_S390_GET_SKEYS_NONE;
827
828         /* Enforce sane limit on memory allocation */
829         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
830                 return -EINVAL;
831
832         keys = kmalloc_array(args->count, sizeof(uint8_t),
833                              GFP_KERNEL | __GFP_NOWARN);
834         if (!keys)
835                 keys = vmalloc(sizeof(uint8_t) * args->count);
836         if (!keys)
837                 return -ENOMEM;
838
839         for (i = 0; i < args->count; i++) {
840                 hva = gfn_to_hva(kvm, args->start_gfn + i);
841                 if (kvm_is_error_hva(hva)) {
842                         r = -EFAULT;
843                         goto out;
844                 }
845
846                 curkey = get_guest_storage_key(current->mm, hva);
847                 if (IS_ERR_VALUE(curkey)) {
848                         r = curkey;
849                         goto out;
850                 }
851                 keys[i] = curkey;
852         }
853
854         r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
855                          sizeof(uint8_t) * args->count);
856         if (r)
857                 r = -EFAULT;
858 out:
859         kvfree(keys);
860         return r;
861 }
862
863 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
864 {
865         uint8_t *keys;
866         uint64_t hva;
867         int i, r = 0;
868
869         if (args->flags != 0)
870                 return -EINVAL;
871
872         /* Enforce sane limit on memory allocation */
873         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
874                 return -EINVAL;
875
876         keys = kmalloc_array(args->count, sizeof(uint8_t),
877                              GFP_KERNEL | __GFP_NOWARN);
878         if (!keys)
879                 keys = vmalloc(sizeof(uint8_t) * args->count);
880         if (!keys)
881                 return -ENOMEM;
882
883         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
884                            sizeof(uint8_t) * args->count);
885         if (r) {
886                 r = -EFAULT;
887                 goto out;
888         }
889
890         /* Enable storage key handling for the guest */
891         r = s390_enable_skey();
892         if (r)
893                 goto out;
894
895         for (i = 0; i < args->count; i++) {
896                 hva = gfn_to_hva(kvm, args->start_gfn + i);
897                 if (kvm_is_error_hva(hva)) {
898                         r = -EFAULT;
899                         goto out;
900                 }
901
902                 /* Lowest order bit is reserved */
903                 if (keys[i] & 0x01) {
904                         r = -EINVAL;
905                         goto out;
906                 }
907
908                 r = set_guest_storage_key(current->mm, hva,
909                                           (unsigned long)keys[i], 0);
910                 if (r)
911                         goto out;
912         }
913 out:
914         kvfree(keys);
915         return r;
916 }
917
918 long kvm_arch_vm_ioctl(struct file *filp,
919                        unsigned int ioctl, unsigned long arg)
920 {
921         struct kvm *kvm = filp->private_data;
922         void __user *argp = (void __user *)arg;
923         struct kvm_device_attr attr;
924         int r;
925
926         switch (ioctl) {
927         case KVM_S390_INTERRUPT: {
928                 struct kvm_s390_interrupt s390int;
929
930                 r = -EFAULT;
931                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
932                         break;
933                 r = kvm_s390_inject_vm(kvm, &s390int);
934                 break;
935         }
936         case KVM_ENABLE_CAP: {
937                 struct kvm_enable_cap cap;
938                 r = -EFAULT;
939                 if (copy_from_user(&cap, argp, sizeof(cap)))
940                         break;
941                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
942                 break;
943         }
944         case KVM_CREATE_IRQCHIP: {
945                 struct kvm_irq_routing_entry routing;
946
947                 r = -EINVAL;
948                 if (kvm->arch.use_irqchip) {
949                         /* Set up dummy routing. */
950                         memset(&routing, 0, sizeof(routing));
951                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
952                 }
953                 break;
954         }
955         case KVM_SET_DEVICE_ATTR: {
956                 r = -EFAULT;
957                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
958                         break;
959                 r = kvm_s390_vm_set_attr(kvm, &attr);
960                 break;
961         }
962         case KVM_GET_DEVICE_ATTR: {
963                 r = -EFAULT;
964                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
965                         break;
966                 r = kvm_s390_vm_get_attr(kvm, &attr);
967                 break;
968         }
969         case KVM_HAS_DEVICE_ATTR: {
970                 r = -EFAULT;
971                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
972                         break;
973                 r = kvm_s390_vm_has_attr(kvm, &attr);
974                 break;
975         }
976         case KVM_S390_GET_SKEYS: {
977                 struct kvm_s390_skeys args;
978
979                 r = -EFAULT;
980                 if (copy_from_user(&args, argp,
981                                    sizeof(struct kvm_s390_skeys)))
982                         break;
983                 r = kvm_s390_get_skeys(kvm, &args);
984                 break;
985         }
986         case KVM_S390_SET_SKEYS: {
987                 struct kvm_s390_skeys args;
988
989                 r = -EFAULT;
990                 if (copy_from_user(&args, argp,
991                                    sizeof(struct kvm_s390_skeys)))
992                         break;
993                 r = kvm_s390_set_skeys(kvm, &args);
994                 break;
995         }
996         default:
997                 r = -ENOTTY;
998         }
999
1000         return r;
1001 }
1002
1003 static int kvm_s390_query_ap_config(u8 *config)
1004 {
1005         u32 fcn_code = 0x04000000UL;
1006         u32 cc = 0;
1007
1008         memset(config, 0, 128);
1009         asm volatile(
1010                 "lgr 0,%1\n"
1011                 "lgr 2,%2\n"
1012                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1013                 "0: ipm %0\n"
1014                 "srl %0,28\n"
1015                 "1:\n"
1016                 EX_TABLE(0b, 1b)
1017                 : "+r" (cc)
1018                 : "r" (fcn_code), "r" (config)
1019                 : "cc", "0", "2", "memory"
1020         );
1021
1022         return cc;
1023 }
1024
1025 static int kvm_s390_apxa_installed(void)
1026 {
1027         u8 config[128];
1028         int cc;
1029
1030         if (test_facility(2) && test_facility(12)) {
1031                 cc = kvm_s390_query_ap_config(config);
1032
1033                 if (cc)
1034                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1035                 else
1036                         return config[0] & 0x40;
1037         }
1038
1039         return 0;
1040 }
1041
1042 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1043 {
1044         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1045
1046         if (kvm_s390_apxa_installed())
1047                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1048         else
1049                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1050 }
1051
1052 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1053 {
1054         get_cpu_id(cpu_id);
1055         cpu_id->version = 0xff;
1056 }
1057
1058 static int kvm_s390_crypto_init(struct kvm *kvm)
1059 {
1060         if (!test_kvm_facility(kvm, 76))
1061                 return 0;
1062
1063         kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1064                                          GFP_KERNEL | GFP_DMA);
1065         if (!kvm->arch.crypto.crycb)
1066                 return -ENOMEM;
1067
1068         kvm_s390_set_crycb_format(kvm);
1069
1070         /* Enable AES/DEA protected key functions by default */
1071         kvm->arch.crypto.aes_kw = 1;
1072         kvm->arch.crypto.dea_kw = 1;
1073         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1074                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1075         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1076                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1077
1078         return 0;
1079 }
1080
1081 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1082 {
1083         int i, rc;
1084         char debug_name[16];
1085         static unsigned long sca_offset;
1086
1087         rc = -EINVAL;
1088 #ifdef CONFIG_KVM_S390_UCONTROL
1089         if (type & ~KVM_VM_S390_UCONTROL)
1090                 goto out_err;
1091         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1092                 goto out_err;
1093 #else
1094         if (type)
1095                 goto out_err;
1096 #endif
1097
1098         rc = s390_enable_sie();
1099         if (rc)
1100                 goto out_err;
1101
1102         rc = -ENOMEM;
1103
1104         kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
1105         if (!kvm->arch.sca)
1106                 goto out_err;
1107         spin_lock(&kvm_lock);
1108         sca_offset += 16;
1109         if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE)
1110                 sca_offset = 0;
1111         kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
1112         spin_unlock(&kvm_lock);
1113
1114         sprintf(debug_name, "kvm-%u", current->pid);
1115
1116         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1117         if (!kvm->arch.dbf)
1118                 goto out_err;
1119
1120         /*
1121          * The architectural maximum amount of facilities is 16 kbit. To store
1122          * this amount, 2 kbyte of memory is required. Thus we need a full
1123          * page to hold the guest facility list (arch.model.fac->list) and the
1124          * facility mask (arch.model.fac->mask). Its address size has to be
1125          * 31 bits and word aligned.
1126          */
1127         kvm->arch.model.fac =
1128                 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1129         if (!kvm->arch.model.fac)
1130                 goto out_err;
1131
1132         /* Populate the facility mask initially. */
1133         memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1134                S390_ARCH_FAC_LIST_SIZE_BYTE);
1135         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1136                 if (i < kvm_s390_fac_list_mask_size())
1137                         kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1138                 else
1139                         kvm->arch.model.fac->mask[i] = 0UL;
1140         }
1141
1142         /* Populate the facility list initially. */
1143         memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1144                S390_ARCH_FAC_LIST_SIZE_BYTE);
1145
1146         kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1147         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1148
1149         if (kvm_s390_crypto_init(kvm) < 0)
1150                 goto out_err;
1151
1152         spin_lock_init(&kvm->arch.float_int.lock);
1153         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1154                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1155         init_waitqueue_head(&kvm->arch.ipte_wq);
1156         mutex_init(&kvm->arch.ipte_mutex);
1157
1158         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1159         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1160
1161         if (type & KVM_VM_S390_UCONTROL) {
1162                 kvm->arch.gmap = NULL;
1163         } else {
1164                 kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
1165                 if (!kvm->arch.gmap)
1166                         goto out_err;
1167                 kvm->arch.gmap->private = kvm;
1168                 kvm->arch.gmap->pfault_enabled = 0;
1169         }
1170
1171         kvm->arch.css_support = 0;
1172         kvm->arch.use_irqchip = 0;
1173         kvm->arch.epoch = 0;
1174
1175         spin_lock_init(&kvm->arch.start_stop_lock);
1176         KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
1177
1178         return 0;
1179 out_err:
1180         kfree(kvm->arch.crypto.crycb);
1181         free_page((unsigned long)kvm->arch.model.fac);
1182         debug_unregister(kvm->arch.dbf);
1183         free_page((unsigned long)(kvm->arch.sca));
1184         KVM_EVENT(3, "creation of vm failed: %d", rc);
1185         return rc;
1186 }
1187
1188 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1189 {
1190         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1191         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1192         kvm_s390_clear_local_irqs(vcpu);
1193         kvm_clear_async_pf_completion_queue(vcpu);
1194         if (!kvm_is_ucontrol(vcpu->kvm)) {
1195                 clear_bit(63 - vcpu->vcpu_id,
1196                           (unsigned long *) &vcpu->kvm->arch.sca->mcn);
1197                 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
1198                     (__u64) vcpu->arch.sie_block)
1199                         vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
1200         }
1201         smp_mb();
1202
1203         if (kvm_is_ucontrol(vcpu->kvm))
1204                 gmap_free(vcpu->arch.gmap);
1205
1206         if (vcpu->kvm->arch.use_cmma)
1207                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1208         free_page((unsigned long)(vcpu->arch.sie_block));
1209
1210         kvm_vcpu_uninit(vcpu);
1211         kmem_cache_free(kvm_vcpu_cache, vcpu);
1212 }
1213
1214 static void kvm_free_vcpus(struct kvm *kvm)
1215 {
1216         unsigned int i;
1217         struct kvm_vcpu *vcpu;
1218
1219         kvm_for_each_vcpu(i, vcpu, kvm)
1220                 kvm_arch_vcpu_destroy(vcpu);
1221
1222         mutex_lock(&kvm->lock);
1223         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1224                 kvm->vcpus[i] = NULL;
1225
1226         atomic_set(&kvm->online_vcpus, 0);
1227         mutex_unlock(&kvm->lock);
1228 }
1229
1230 void kvm_arch_destroy_vm(struct kvm *kvm)
1231 {
1232         kvm_free_vcpus(kvm);
1233         free_page((unsigned long)kvm->arch.model.fac);
1234         free_page((unsigned long)(kvm->arch.sca));
1235         debug_unregister(kvm->arch.dbf);
1236         kfree(kvm->arch.crypto.crycb);
1237         if (!kvm_is_ucontrol(kvm))
1238                 gmap_free(kvm->arch.gmap);
1239         kvm_s390_destroy_adapters(kvm);
1240         kvm_s390_clear_float_irqs(kvm);
1241         KVM_EVENT(3, "vm 0x%p destroyed", kvm);
1242 }
1243
1244 /* Section: vcpu related */
1245 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1246 {
1247         vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1248         if (!vcpu->arch.gmap)
1249                 return -ENOMEM;
1250         vcpu->arch.gmap->private = vcpu->kvm;
1251
1252         return 0;
1253 }
1254
1255 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1256 {
1257         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1258         kvm_clear_async_pf_completion_queue(vcpu);
1259         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1260                                     KVM_SYNC_GPRS |
1261                                     KVM_SYNC_ACRS |
1262                                     KVM_SYNC_CRS |
1263                                     KVM_SYNC_ARCH0 |
1264                                     KVM_SYNC_PFAULT;
1265         if (test_kvm_facility(vcpu->kvm, 129))
1266                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1267
1268         if (kvm_is_ucontrol(vcpu->kvm))
1269                 return __kvm_ucontrol_vcpu_init(vcpu);
1270
1271         return 0;
1272 }
1273
1274 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1275 {
1276         /* Save host register state */
1277         save_fpu_regs();
1278         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1279         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1280
1281         /* Depending on MACHINE_HAS_VX, data stored to vrs either
1282          * has vector register or floating point register format.
1283          */
1284         current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1285         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1286         if (test_fp_ctl(current->thread.fpu.fpc))
1287                 /* User space provided an invalid FPC, let's clear it */
1288                 current->thread.fpu.fpc = 0;
1289
1290         save_access_regs(vcpu->arch.host_acrs);
1291         restore_access_regs(vcpu->run->s.regs.acrs);
1292         gmap_enable(vcpu->arch.gmap);
1293         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1294 }
1295
1296 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1297 {
1298         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1299         gmap_disable(vcpu->arch.gmap);
1300
1301         /* Save guest register state */
1302         save_fpu_regs();
1303         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1304
1305         /* Restore host register state */
1306         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1307         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1308
1309         save_access_regs(vcpu->run->s.regs.acrs);
1310         restore_access_regs(vcpu->arch.host_acrs);
1311 }
1312
1313 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1314 {
1315         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1316         vcpu->arch.sie_block->gpsw.mask = 0UL;
1317         vcpu->arch.sie_block->gpsw.addr = 0UL;
1318         kvm_s390_set_prefix(vcpu, 0);
1319         vcpu->arch.sie_block->cputm     = 0UL;
1320         vcpu->arch.sie_block->ckc       = 0UL;
1321         vcpu->arch.sie_block->todpr     = 0;
1322         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1323         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1324         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1325         /* make sure the new fpc will be lazily loaded */
1326         save_fpu_regs();
1327         current->thread.fpu.fpc = 0;
1328         vcpu->arch.sie_block->gbea = 1;
1329         vcpu->arch.sie_block->pp = 0;
1330         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1331         kvm_clear_async_pf_completion_queue(vcpu);
1332         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1333                 kvm_s390_vcpu_stop(vcpu);
1334         kvm_s390_clear_local_irqs(vcpu);
1335 }
1336
1337 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1338 {
1339         mutex_lock(&vcpu->kvm->lock);
1340         preempt_disable();
1341         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1342         preempt_enable();
1343         mutex_unlock(&vcpu->kvm->lock);
1344         if (!kvm_is_ucontrol(vcpu->kvm))
1345                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1346 }
1347
1348 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1349 {
1350         if (!test_kvm_facility(vcpu->kvm, 76))
1351                 return;
1352
1353         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1354
1355         if (vcpu->kvm->arch.crypto.aes_kw)
1356                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1357         if (vcpu->kvm->arch.crypto.dea_kw)
1358                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1359
1360         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1361 }
1362
1363 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1364 {
1365         free_page(vcpu->arch.sie_block->cbrlo);
1366         vcpu->arch.sie_block->cbrlo = 0;
1367 }
1368
1369 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1370 {
1371         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1372         if (!vcpu->arch.sie_block->cbrlo)
1373                 return -ENOMEM;
1374
1375         vcpu->arch.sie_block->ecb2 |= 0x80;
1376         vcpu->arch.sie_block->ecb2 &= ~0x08;
1377         return 0;
1378 }
1379
1380 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1381 {
1382         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1383
1384         vcpu->arch.cpu_id = model->cpu_id;
1385         vcpu->arch.sie_block->ibc = model->ibc;
1386         vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1387 }
1388
1389 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1390 {
1391         int rc = 0;
1392
1393         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1394                                                     CPUSTAT_SM |
1395                                                     CPUSTAT_STOPPED);
1396
1397         if (test_kvm_facility(vcpu->kvm, 78))
1398                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1399         else if (test_kvm_facility(vcpu->kvm, 8))
1400                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1401
1402         kvm_s390_vcpu_setup_model(vcpu);
1403
1404         vcpu->arch.sie_block->ecb   = 6;
1405         if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1406                 vcpu->arch.sie_block->ecb |= 0x10;
1407
1408         vcpu->arch.sie_block->ecb2  = 8;
1409         vcpu->arch.sie_block->eca   = 0xC1002000U;
1410         if (sclp.has_siif)
1411                 vcpu->arch.sie_block->eca |= 1;
1412         if (sclp.has_sigpif)
1413                 vcpu->arch.sie_block->eca |= 0x10000000U;
1414         if (test_kvm_facility(vcpu->kvm, 129)) {
1415                 vcpu->arch.sie_block->eca |= 0x00020000;
1416                 vcpu->arch.sie_block->ecd |= 0x20000000;
1417         }
1418         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1419
1420         if (vcpu->kvm->arch.use_cmma) {
1421                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1422                 if (rc)
1423                         return rc;
1424         }
1425         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1426         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1427
1428         kvm_s390_vcpu_crypto_setup(vcpu);
1429
1430         return rc;
1431 }
1432
1433 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1434                                       unsigned int id)
1435 {
1436         struct kvm_vcpu *vcpu;
1437         struct sie_page *sie_page;
1438         int rc = -EINVAL;
1439
1440         if (id >= KVM_MAX_VCPUS)
1441                 goto out;
1442
1443         rc = -ENOMEM;
1444
1445         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1446         if (!vcpu)
1447                 goto out;
1448
1449         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1450         if (!sie_page)
1451                 goto out_free_cpu;
1452
1453         vcpu->arch.sie_block = &sie_page->sie_block;
1454         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1455
1456         vcpu->arch.sie_block->icpua = id;
1457         if (!kvm_is_ucontrol(kvm)) {
1458                 if (!kvm->arch.sca) {
1459                         WARN_ON_ONCE(1);
1460                         goto out_free_cpu;
1461                 }
1462                 if (!kvm->arch.sca->cpu[id].sda)
1463                         kvm->arch.sca->cpu[id].sda =
1464                                 (__u64) vcpu->arch.sie_block;
1465                 vcpu->arch.sie_block->scaoh =
1466                         (__u32)(((__u64)kvm->arch.sca) >> 32);
1467                 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
1468                 set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
1469         }
1470
1471         spin_lock_init(&vcpu->arch.local_int.lock);
1472         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1473         vcpu->arch.local_int.wq = &vcpu->wq;
1474         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1475
1476         rc = kvm_vcpu_init(vcpu, kvm, id);
1477         if (rc)
1478                 goto out_free_sie_block;
1479         VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
1480                  vcpu->arch.sie_block);
1481         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1482
1483         return vcpu;
1484 out_free_sie_block:
1485         free_page((unsigned long)(vcpu->arch.sie_block));
1486 out_free_cpu:
1487         kmem_cache_free(kvm_vcpu_cache, vcpu);
1488 out:
1489         return ERR_PTR(rc);
1490 }
1491
1492 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1493 {
1494         return kvm_s390_vcpu_has_irq(vcpu, 0);
1495 }
1496
1497 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1498 {
1499         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1500         exit_sie(vcpu);
1501 }
1502
1503 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1504 {
1505         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1506 }
1507
1508 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1509 {
1510         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1511         exit_sie(vcpu);
1512 }
1513
1514 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1515 {
1516         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1517 }
1518
1519 /*
1520  * Kick a guest cpu out of SIE and wait until SIE is not running.
1521  * If the CPU is not running (e.g. waiting as idle) the function will
1522  * return immediately. */
1523 void exit_sie(struct kvm_vcpu *vcpu)
1524 {
1525         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1526         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1527                 cpu_relax();
1528 }
1529
1530 /* Kick a guest cpu out of SIE to process a request synchronously */
1531 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1532 {
1533         kvm_make_request(req, vcpu);
1534         kvm_s390_vcpu_request(vcpu);
1535 }
1536
1537 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1538 {
1539         int i;
1540         struct kvm *kvm = gmap->private;
1541         struct kvm_vcpu *vcpu;
1542
1543         kvm_for_each_vcpu(i, vcpu, kvm) {
1544                 /* match against both prefix pages */
1545                 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1546                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1547                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1548                 }
1549         }
1550 }
1551
1552 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1553 {
1554         /* kvm common code refers to this, but never calls it */
1555         BUG();
1556         return 0;
1557 }
1558
1559 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1560                                            struct kvm_one_reg *reg)
1561 {
1562         int r = -EINVAL;
1563
1564         switch (reg->id) {
1565         case KVM_REG_S390_TODPR:
1566                 r = put_user(vcpu->arch.sie_block->todpr,
1567                              (u32 __user *)reg->addr);
1568                 break;
1569         case KVM_REG_S390_EPOCHDIFF:
1570                 r = put_user(vcpu->arch.sie_block->epoch,
1571                              (u64 __user *)reg->addr);
1572                 break;
1573         case KVM_REG_S390_CPU_TIMER:
1574                 r = put_user(vcpu->arch.sie_block->cputm,
1575                              (u64 __user *)reg->addr);
1576                 break;
1577         case KVM_REG_S390_CLOCK_COMP:
1578                 r = put_user(vcpu->arch.sie_block->ckc,
1579                              (u64 __user *)reg->addr);
1580                 break;
1581         case KVM_REG_S390_PFTOKEN:
1582                 r = put_user(vcpu->arch.pfault_token,
1583                              (u64 __user *)reg->addr);
1584                 break;
1585         case KVM_REG_S390_PFCOMPARE:
1586                 r = put_user(vcpu->arch.pfault_compare,
1587                              (u64 __user *)reg->addr);
1588                 break;
1589         case KVM_REG_S390_PFSELECT:
1590                 r = put_user(vcpu->arch.pfault_select,
1591                              (u64 __user *)reg->addr);
1592                 break;
1593         case KVM_REG_S390_PP:
1594                 r = put_user(vcpu->arch.sie_block->pp,
1595                              (u64 __user *)reg->addr);
1596                 break;
1597         case KVM_REG_S390_GBEA:
1598                 r = put_user(vcpu->arch.sie_block->gbea,
1599                              (u64 __user *)reg->addr);
1600                 break;
1601         default:
1602                 break;
1603         }
1604
1605         return r;
1606 }
1607
1608 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1609                                            struct kvm_one_reg *reg)
1610 {
1611         int r = -EINVAL;
1612
1613         switch (reg->id) {
1614         case KVM_REG_S390_TODPR:
1615                 r = get_user(vcpu->arch.sie_block->todpr,
1616                              (u32 __user *)reg->addr);
1617                 break;
1618         case KVM_REG_S390_EPOCHDIFF:
1619                 r = get_user(vcpu->arch.sie_block->epoch,
1620                              (u64 __user *)reg->addr);
1621                 break;
1622         case KVM_REG_S390_CPU_TIMER:
1623                 r = get_user(vcpu->arch.sie_block->cputm,
1624                              (u64 __user *)reg->addr);
1625                 break;
1626         case KVM_REG_S390_CLOCK_COMP:
1627                 r = get_user(vcpu->arch.sie_block->ckc,
1628                              (u64 __user *)reg->addr);
1629                 break;
1630         case KVM_REG_S390_PFTOKEN:
1631                 r = get_user(vcpu->arch.pfault_token,
1632                              (u64 __user *)reg->addr);
1633                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1634                         kvm_clear_async_pf_completion_queue(vcpu);
1635                 break;
1636         case KVM_REG_S390_PFCOMPARE:
1637                 r = get_user(vcpu->arch.pfault_compare,
1638                              (u64 __user *)reg->addr);
1639                 break;
1640         case KVM_REG_S390_PFSELECT:
1641                 r = get_user(vcpu->arch.pfault_select,
1642                              (u64 __user *)reg->addr);
1643                 break;
1644         case KVM_REG_S390_PP:
1645                 r = get_user(vcpu->arch.sie_block->pp,
1646                              (u64 __user *)reg->addr);
1647                 break;
1648         case KVM_REG_S390_GBEA:
1649                 r = get_user(vcpu->arch.sie_block->gbea,
1650                              (u64 __user *)reg->addr);
1651                 break;
1652         default:
1653                 break;
1654         }
1655
1656         return r;
1657 }
1658
1659 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1660 {
1661         kvm_s390_vcpu_initial_reset(vcpu);
1662         return 0;
1663 }
1664
1665 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1666 {
1667         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1668         return 0;
1669 }
1670
1671 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1672 {
1673         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1674         return 0;
1675 }
1676
1677 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1678                                   struct kvm_sregs *sregs)
1679 {
1680         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1681         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1682         restore_access_regs(vcpu->run->s.regs.acrs);
1683         return 0;
1684 }
1685
1686 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1687                                   struct kvm_sregs *sregs)
1688 {
1689         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1690         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1691         return 0;
1692 }
1693
1694 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1695 {
1696         /* make sure the new values will be lazily loaded */
1697         save_fpu_regs();
1698         if (test_fp_ctl(fpu->fpc))
1699                 return -EINVAL;
1700         current->thread.fpu.fpc = fpu->fpc;
1701         if (MACHINE_HAS_VX)
1702                 convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
1703         else
1704                 memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
1705         return 0;
1706 }
1707
1708 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1709 {
1710         /* make sure we have the latest values */
1711         save_fpu_regs();
1712         if (MACHINE_HAS_VX)
1713                 convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
1714         else
1715                 memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
1716         fpu->fpc = current->thread.fpu.fpc;
1717         return 0;
1718 }
1719
1720 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1721 {
1722         int rc = 0;
1723
1724         if (!is_vcpu_stopped(vcpu))
1725                 rc = -EBUSY;
1726         else {
1727                 vcpu->run->psw_mask = psw.mask;
1728                 vcpu->run->psw_addr = psw.addr;
1729         }
1730         return rc;
1731 }
1732
1733 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1734                                   struct kvm_translation *tr)
1735 {
1736         return -EINVAL; /* not implemented yet */
1737 }
1738
1739 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1740                               KVM_GUESTDBG_USE_HW_BP | \
1741                               KVM_GUESTDBG_ENABLE)
1742
1743 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1744                                         struct kvm_guest_debug *dbg)
1745 {
1746         int rc = 0;
1747
1748         vcpu->guest_debug = 0;
1749         kvm_s390_clear_bp_data(vcpu);
1750
1751         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1752                 return -EINVAL;
1753
1754         if (dbg->control & KVM_GUESTDBG_ENABLE) {
1755                 vcpu->guest_debug = dbg->control;
1756                 /* enforce guest PER */
1757                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1758
1759                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1760                         rc = kvm_s390_import_bp_data(vcpu, dbg);
1761         } else {
1762                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1763                 vcpu->arch.guestdbg.last_bp = 0;
1764         }
1765
1766         if (rc) {
1767                 vcpu->guest_debug = 0;
1768                 kvm_s390_clear_bp_data(vcpu);
1769                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1770         }
1771
1772         return rc;
1773 }
1774
1775 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1776                                     struct kvm_mp_state *mp_state)
1777 {
1778         /* CHECK_STOP and LOAD are not supported yet */
1779         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1780                                        KVM_MP_STATE_OPERATING;
1781 }
1782
1783 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1784                                     struct kvm_mp_state *mp_state)
1785 {
1786         int rc = 0;
1787
1788         /* user space knows about this interface - let it control the state */
1789         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1790
1791         switch (mp_state->mp_state) {
1792         case KVM_MP_STATE_STOPPED:
1793                 kvm_s390_vcpu_stop(vcpu);
1794                 break;
1795         case KVM_MP_STATE_OPERATING:
1796                 kvm_s390_vcpu_start(vcpu);
1797                 break;
1798         case KVM_MP_STATE_LOAD:
1799         case KVM_MP_STATE_CHECK_STOP:
1800                 /* fall through - CHECK_STOP and LOAD are not supported yet */
1801         default:
1802                 rc = -ENXIO;
1803         }
1804
1805         return rc;
1806 }
1807
1808 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1809 {
1810         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1811 }
1812
1813 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1814 {
1815 retry:
1816         kvm_s390_vcpu_request_handled(vcpu);
1817         if (!vcpu->requests)
1818                 return 0;
1819         /*
1820          * We use MMU_RELOAD just to re-arm the ipte notifier for the
1821          * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1822          * This ensures that the ipte instruction for this request has
1823          * already finished. We might race against a second unmapper that
1824          * wants to set the blocking bit. Lets just retry the request loop.
1825          */
1826         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1827                 int rc;
1828                 rc = gmap_ipte_notify(vcpu->arch.gmap,
1829                                       kvm_s390_get_prefix(vcpu),
1830                                       PAGE_SIZE * 2);
1831                 if (rc)
1832                         return rc;
1833                 goto retry;
1834         }
1835
1836         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1837                 vcpu->arch.sie_block->ihcpu = 0xffff;
1838                 goto retry;
1839         }
1840
1841         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1842                 if (!ibs_enabled(vcpu)) {
1843                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1844                         atomic_or(CPUSTAT_IBS,
1845                                         &vcpu->arch.sie_block->cpuflags);
1846                 }
1847                 goto retry;
1848         }
1849
1850         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1851                 if (ibs_enabled(vcpu)) {
1852                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
1853                         atomic_andnot(CPUSTAT_IBS,
1854                                           &vcpu->arch.sie_block->cpuflags);
1855                 }
1856                 goto retry;
1857         }
1858
1859         /* nothing to do, just clear the request */
1860         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
1861
1862         return 0;
1863 }
1864
1865 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
1866 {
1867         struct kvm_vcpu *vcpu;
1868         int i;
1869
1870         mutex_lock(&kvm->lock);
1871         preempt_disable();
1872         kvm->arch.epoch = tod - get_tod_clock();
1873         kvm_s390_vcpu_block_all(kvm);
1874         kvm_for_each_vcpu(i, vcpu, kvm)
1875                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
1876         kvm_s390_vcpu_unblock_all(kvm);
1877         preempt_enable();
1878         mutex_unlock(&kvm->lock);
1879 }
1880
1881 /**
1882  * kvm_arch_fault_in_page - fault-in guest page if necessary
1883  * @vcpu: The corresponding virtual cpu
1884  * @gpa: Guest physical address
1885  * @writable: Whether the page should be writable or not
1886  *
1887  * Make sure that a guest page has been faulted-in on the host.
1888  *
1889  * Return: Zero on success, negative error code otherwise.
1890  */
1891 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1892 {
1893         return gmap_fault(vcpu->arch.gmap, gpa,
1894                           writable ? FAULT_FLAG_WRITE : 0);
1895 }
1896
1897 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
1898                                       unsigned long token)
1899 {
1900         struct kvm_s390_interrupt inti;
1901         struct kvm_s390_irq irq;
1902
1903         if (start_token) {
1904                 irq.u.ext.ext_params2 = token;
1905                 irq.type = KVM_S390_INT_PFAULT_INIT;
1906                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
1907         } else {
1908                 inti.type = KVM_S390_INT_PFAULT_DONE;
1909                 inti.parm64 = token;
1910                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
1911         }
1912 }
1913
1914 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1915                                      struct kvm_async_pf *work)
1916 {
1917         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
1918         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
1919 }
1920
1921 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
1922                                  struct kvm_async_pf *work)
1923 {
1924         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
1925         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
1926 }
1927
1928 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
1929                                struct kvm_async_pf *work)
1930 {
1931         /* s390 will always inject the page directly */
1932 }
1933
1934 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
1935 {
1936         /*
1937          * s390 will always inject the page directly,
1938          * but we still want check_async_completion to cleanup
1939          */
1940         return true;
1941 }
1942
1943 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
1944 {
1945         hva_t hva;
1946         struct kvm_arch_async_pf arch;
1947         int rc;
1948
1949         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1950                 return 0;
1951         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
1952             vcpu->arch.pfault_compare)
1953                 return 0;
1954         if (psw_extint_disabled(vcpu))
1955                 return 0;
1956         if (kvm_s390_vcpu_has_irq(vcpu, 0))
1957                 return 0;
1958         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
1959                 return 0;
1960         if (!vcpu->arch.gmap->pfault_enabled)
1961                 return 0;
1962
1963         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
1964         hva += current->thread.gmap_addr & ~PAGE_MASK;
1965         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
1966                 return 0;
1967
1968         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
1969         return rc;
1970 }
1971
1972 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
1973 {
1974         int rc, cpuflags;
1975
1976         /*
1977          * On s390 notifications for arriving pages will be delivered directly
1978          * to the guest but the house keeping for completed pfaults is
1979          * handled outside the worker.
1980          */
1981         kvm_check_async_pf_completion(vcpu);
1982
1983         memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
1984
1985         if (need_resched())
1986                 schedule();
1987
1988         if (test_cpu_flag(CIF_MCCK_PENDING))
1989                 s390_handle_mcck();
1990
1991         if (!kvm_is_ucontrol(vcpu->kvm)) {
1992                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
1993                 if (rc)
1994                         return rc;
1995         }
1996
1997         rc = kvm_s390_handle_requests(vcpu);
1998         if (rc)
1999                 return rc;
2000
2001         if (guestdbg_enabled(vcpu)) {
2002                 kvm_s390_backup_guest_per_regs(vcpu);
2003                 kvm_s390_patch_guest_per_regs(vcpu);
2004         }
2005
2006         vcpu->arch.sie_block->icptcode = 0;
2007         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2008         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2009         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2010
2011         return 0;
2012 }
2013
2014 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2015 {
2016         psw_t *psw = &vcpu->arch.sie_block->gpsw;
2017         u8 opcode;
2018         int rc;
2019
2020         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2021         trace_kvm_s390_sie_fault(vcpu);
2022
2023         /*
2024          * We want to inject an addressing exception, which is defined as a
2025          * suppressing or terminating exception. However, since we came here
2026          * by a DAT access exception, the PSW still points to the faulting
2027          * instruction since DAT exceptions are nullifying. So we've got
2028          * to look up the current opcode to get the length of the instruction
2029          * to be able to forward the PSW.
2030          */
2031         rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
2032         if (rc)
2033                 return kvm_s390_inject_prog_cond(vcpu, rc);
2034         psw->addr = __rewind_psw(*psw, -insn_length(opcode));
2035
2036         return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
2037 }
2038
2039 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2040 {
2041         int rc = -1;
2042
2043         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2044                    vcpu->arch.sie_block->icptcode);
2045         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2046
2047         if (guestdbg_enabled(vcpu))
2048                 kvm_s390_restore_guest_per_regs(vcpu);
2049
2050         if (exit_reason >= 0) {
2051                 rc = 0;
2052         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2053                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2054                 vcpu->run->s390_ucontrol.trans_exc_code =
2055                                                 current->thread.gmap_addr;
2056                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2057                 rc = -EREMOTE;
2058
2059         } else if (current->thread.gmap_pfault) {
2060                 trace_kvm_s390_major_guest_pfault(vcpu);
2061                 current->thread.gmap_pfault = 0;
2062                 if (kvm_arch_setup_async_pf(vcpu)) {
2063                         rc = 0;
2064                 } else {
2065                         gpa_t gpa = current->thread.gmap_addr;
2066                         rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
2067                 }
2068         }
2069
2070         if (rc == -1)
2071                 rc = vcpu_post_run_fault_in_sie(vcpu);
2072
2073         memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
2074
2075         if (rc == 0) {
2076                 if (kvm_is_ucontrol(vcpu->kvm))
2077                         /* Don't exit for host interrupts. */
2078                         rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
2079                 else
2080                         rc = kvm_handle_sie_intercept(vcpu);
2081         }
2082
2083         return rc;
2084 }
2085
2086 static int __vcpu_run(struct kvm_vcpu *vcpu)
2087 {
2088         int rc, exit_reason;
2089
2090         /*
2091          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2092          * ning the guest), so that memslots (and other stuff) are protected
2093          */
2094         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2095
2096         do {
2097                 rc = vcpu_pre_run(vcpu);
2098                 if (rc)
2099                         break;
2100
2101                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2102                 /*
2103                  * As PF_VCPU will be used in fault handler, between
2104                  * guest_enter and guest_exit should be no uaccess.
2105                  */
2106                 local_irq_disable();
2107                 __kvm_guest_enter();
2108                 local_irq_enable();
2109                 exit_reason = sie64a(vcpu->arch.sie_block,
2110                                      vcpu->run->s.regs.gprs);
2111                 local_irq_disable();
2112                 __kvm_guest_exit();
2113                 local_irq_enable();
2114                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2115
2116                 rc = vcpu_post_run(vcpu, exit_reason);
2117         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2118
2119         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2120         return rc;
2121 }
2122
2123 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2124 {
2125         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2126         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2127         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2128                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2129         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2130                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2131                 /* some control register changes require a tlb flush */
2132                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2133         }
2134         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2135                 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2136                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2137                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2138                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2139                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2140         }
2141         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2142                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2143                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2144                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2145                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2146                         kvm_clear_async_pf_completion_queue(vcpu);
2147         }
2148         kvm_run->kvm_dirty_regs = 0;
2149 }
2150
2151 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2152 {
2153         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2154         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2155         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2156         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2157         kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2158         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2159         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2160         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2161         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2162         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2163         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2164         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2165 }
2166
2167 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2168 {
2169         int rc;
2170         sigset_t sigsaved;
2171
2172         if (guestdbg_exit_pending(vcpu)) {
2173                 kvm_s390_prepare_debug_exit(vcpu);
2174                 return 0;
2175         }
2176
2177         if (vcpu->sigset_active)
2178                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2179
2180         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2181                 kvm_s390_vcpu_start(vcpu);
2182         } else if (is_vcpu_stopped(vcpu)) {
2183                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2184                                    vcpu->vcpu_id);
2185                 return -EINVAL;
2186         }
2187
2188         sync_regs(vcpu, kvm_run);
2189
2190         might_fault();
2191         rc = __vcpu_run(vcpu);
2192
2193         if (signal_pending(current) && !rc) {
2194                 kvm_run->exit_reason = KVM_EXIT_INTR;
2195                 rc = -EINTR;
2196         }
2197
2198         if (guestdbg_exit_pending(vcpu) && !rc)  {
2199                 kvm_s390_prepare_debug_exit(vcpu);
2200                 rc = 0;
2201         }
2202
2203         if (rc == -EOPNOTSUPP) {
2204                 /* intercept cannot be handled in-kernel, prepare kvm-run */
2205                 kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
2206                 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2207                 kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
2208                 kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
2209                 rc = 0;
2210         }
2211
2212         if (rc == -EREMOTE) {
2213                 /* intercept was handled, but userspace support is needed
2214                  * kvm_run has been prepared by the handler */
2215                 rc = 0;
2216         }
2217
2218         store_regs(vcpu, kvm_run);
2219
2220         if (vcpu->sigset_active)
2221                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2222
2223         vcpu->stat.exit_userspace++;
2224         return rc;
2225 }
2226
2227 /*
2228  * store status at address
2229  * we use have two special cases:
2230  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2231  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2232  */
2233 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2234 {
2235         unsigned char archmode = 1;
2236         freg_t fprs[NUM_FPRS];
2237         unsigned int px;
2238         u64 clkcomp;
2239         int rc;
2240
2241         px = kvm_s390_get_prefix(vcpu);
2242         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2243                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2244                         return -EFAULT;
2245                 gpa = 0;
2246         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2247                 if (write_guest_real(vcpu, 163, &archmode, 1))
2248                         return -EFAULT;
2249                 gpa = px;
2250         } else
2251                 gpa -= __LC_FPREGS_SAVE_AREA;
2252
2253         /* manually convert vector registers if necessary */
2254         if (MACHINE_HAS_VX) {
2255                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2256                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2257                                      fprs, 128);
2258         } else {
2259                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2260                                      vcpu->run->s.regs.vrs, 128);
2261         }
2262         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2263                               vcpu->run->s.regs.gprs, 128);
2264         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2265                               &vcpu->arch.sie_block->gpsw, 16);
2266         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2267                               &px, 4);
2268         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2269                               &vcpu->run->s.regs.fpc, 4);
2270         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2271                               &vcpu->arch.sie_block->todpr, 4);
2272         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2273                               &vcpu->arch.sie_block->cputm, 8);
2274         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2275         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2276                               &clkcomp, 8);
2277         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2278                               &vcpu->run->s.regs.acrs, 64);
2279         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2280                               &vcpu->arch.sie_block->gcr, 128);
2281         return rc ? -EFAULT : 0;
2282 }
2283
2284 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2285 {
2286         /*
2287          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2288          * copying in vcpu load/put. Lets update our copies before we save
2289          * it into the save area
2290          */
2291         save_fpu_regs();
2292         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2293         save_access_regs(vcpu->run->s.regs.acrs);
2294
2295         return kvm_s390_store_status_unloaded(vcpu, addr);
2296 }
2297
2298 /*
2299  * store additional status at address
2300  */
2301 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2302                                         unsigned long gpa)
2303 {
2304         /* Only bits 0-53 are used for address formation */
2305         if (!(gpa & ~0x3ff))
2306                 return 0;
2307
2308         return write_guest_abs(vcpu, gpa & ~0x3ff,
2309                                (void *)&vcpu->run->s.regs.vrs, 512);
2310 }
2311
2312 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2313 {
2314         if (!test_kvm_facility(vcpu->kvm, 129))
2315                 return 0;
2316
2317         /*
2318          * The guest VXRS are in the host VXRs due to the lazy
2319          * copying in vcpu load/put. We can simply call save_fpu_regs()
2320          * to save the current register state because we are in the
2321          * middle of a load/put cycle.
2322          *
2323          * Let's update our copies before we save it into the save area.
2324          */
2325         save_fpu_regs();
2326
2327         return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2328 }
2329
2330 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2331 {
2332         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2333         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2334 }
2335
2336 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2337 {
2338         unsigned int i;
2339         struct kvm_vcpu *vcpu;
2340
2341         kvm_for_each_vcpu(i, vcpu, kvm) {
2342                 __disable_ibs_on_vcpu(vcpu);
2343         }
2344 }
2345
2346 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2347 {
2348         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2349         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2350 }
2351
2352 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2353 {
2354         int i, online_vcpus, started_vcpus = 0;
2355
2356         if (!is_vcpu_stopped(vcpu))
2357                 return;
2358
2359         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2360         /* Only one cpu at a time may enter/leave the STOPPED state. */
2361         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2362         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2363
2364         for (i = 0; i < online_vcpus; i++) {
2365                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2366                         started_vcpus++;
2367         }
2368
2369         if (started_vcpus == 0) {
2370                 /* we're the only active VCPU -> speed it up */
2371                 __enable_ibs_on_vcpu(vcpu);
2372         } else if (started_vcpus == 1) {
2373                 /*
2374                  * As we are starting a second VCPU, we have to disable
2375                  * the IBS facility on all VCPUs to remove potentially
2376                  * oustanding ENABLE requests.
2377                  */
2378                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2379         }
2380
2381         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2382         /*
2383          * Another VCPU might have used IBS while we were offline.
2384          * Let's play safe and flush the VCPU at startup.
2385          */
2386         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2387         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2388         return;
2389 }
2390
2391 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2392 {
2393         int i, online_vcpus, started_vcpus = 0;
2394         struct kvm_vcpu *started_vcpu = NULL;
2395
2396         if (is_vcpu_stopped(vcpu))
2397                 return;
2398
2399         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2400         /* Only one cpu at a time may enter/leave the STOPPED state. */
2401         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2402         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2403
2404         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2405         kvm_s390_clear_stop_irq(vcpu);
2406
2407         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2408         __disable_ibs_on_vcpu(vcpu);
2409
2410         for (i = 0; i < online_vcpus; i++) {
2411                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2412                         started_vcpus++;
2413                         started_vcpu = vcpu->kvm->vcpus[i];
2414                 }
2415         }
2416
2417         if (started_vcpus == 1) {
2418                 /*
2419                  * As we only have one VCPU left, we want to enable the
2420                  * IBS facility for that VCPU to speed it up.
2421                  */
2422                 __enable_ibs_on_vcpu(started_vcpu);
2423         }
2424
2425         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2426         return;
2427 }
2428
2429 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2430                                      struct kvm_enable_cap *cap)
2431 {
2432         int r;
2433
2434         if (cap->flags)
2435                 return -EINVAL;
2436
2437         switch (cap->cap) {
2438         case KVM_CAP_S390_CSS_SUPPORT:
2439                 if (!vcpu->kvm->arch.css_support) {
2440                         vcpu->kvm->arch.css_support = 1;
2441                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2442                         trace_kvm_s390_enable_css(vcpu->kvm);
2443                 }
2444                 r = 0;
2445                 break;
2446         default:
2447                 r = -EINVAL;
2448                 break;
2449         }
2450         return r;
2451 }
2452
2453 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2454                                   struct kvm_s390_mem_op *mop)
2455 {
2456         void __user *uaddr = (void __user *)mop->buf;
2457         void *tmpbuf = NULL;
2458         int r, srcu_idx;
2459         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2460                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
2461
2462         if (mop->flags & ~supported_flags)
2463                 return -EINVAL;
2464
2465         if (mop->size > MEM_OP_MAX_SIZE)
2466                 return -E2BIG;
2467
2468         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2469                 tmpbuf = vmalloc(mop->size);
2470                 if (!tmpbuf)
2471                         return -ENOMEM;
2472         }
2473
2474         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2475
2476         switch (mop->op) {
2477         case KVM_S390_MEMOP_LOGICAL_READ:
2478                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2479                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2480                         break;
2481                 }
2482                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2483                 if (r == 0) {
2484                         if (copy_to_user(uaddr, tmpbuf, mop->size))
2485                                 r = -EFAULT;
2486                 }
2487                 break;
2488         case KVM_S390_MEMOP_LOGICAL_WRITE:
2489                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2490                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2491                         break;
2492                 }
2493                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2494                         r = -EFAULT;
2495                         break;
2496                 }
2497                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2498                 break;
2499         default:
2500                 r = -EINVAL;
2501         }
2502
2503         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2504
2505         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2506                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2507
2508         vfree(tmpbuf);
2509         return r;
2510 }
2511
2512 long kvm_arch_vcpu_ioctl(struct file *filp,
2513                          unsigned int ioctl, unsigned long arg)
2514 {
2515         struct kvm_vcpu *vcpu = filp->private_data;
2516         void __user *argp = (void __user *)arg;
2517         int idx;
2518         long r;
2519
2520         switch (ioctl) {
2521         case KVM_S390_IRQ: {
2522                 struct kvm_s390_irq s390irq;
2523
2524                 r = -EFAULT;
2525                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2526                         break;
2527                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2528                 break;
2529         }
2530         case KVM_S390_INTERRUPT: {
2531                 struct kvm_s390_interrupt s390int;
2532                 struct kvm_s390_irq s390irq;
2533
2534                 r = -EFAULT;
2535                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2536                         break;
2537                 if (s390int_to_s390irq(&s390int, &s390irq))
2538                         return -EINVAL;
2539                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2540                 break;
2541         }
2542         case KVM_S390_STORE_STATUS:
2543                 idx = srcu_read_lock(&vcpu->kvm->srcu);
2544                 r = kvm_s390_vcpu_store_status(vcpu, arg);
2545                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2546                 break;
2547         case KVM_S390_SET_INITIAL_PSW: {
2548                 psw_t psw;
2549
2550                 r = -EFAULT;
2551                 if (copy_from_user(&psw, argp, sizeof(psw)))
2552                         break;
2553                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2554                 break;
2555         }
2556         case KVM_S390_INITIAL_RESET:
2557                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2558                 break;
2559         case KVM_SET_ONE_REG:
2560         case KVM_GET_ONE_REG: {
2561                 struct kvm_one_reg reg;
2562                 r = -EFAULT;
2563                 if (copy_from_user(&reg, argp, sizeof(reg)))
2564                         break;
2565                 if (ioctl == KVM_SET_ONE_REG)
2566                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2567                 else
2568                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2569                 break;
2570         }
2571 #ifdef CONFIG_KVM_S390_UCONTROL
2572         case KVM_S390_UCAS_MAP: {
2573                 struct kvm_s390_ucas_mapping ucasmap;
2574
2575                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2576                         r = -EFAULT;
2577                         break;
2578                 }
2579
2580                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2581                         r = -EINVAL;
2582                         break;
2583                 }
2584
2585                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2586                                      ucasmap.vcpu_addr, ucasmap.length);
2587                 break;
2588         }
2589         case KVM_S390_UCAS_UNMAP: {
2590                 struct kvm_s390_ucas_mapping ucasmap;
2591
2592                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2593                         r = -EFAULT;
2594                         break;
2595                 }
2596
2597                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2598                         r = -EINVAL;
2599                         break;
2600                 }
2601
2602                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2603                         ucasmap.length);
2604                 break;
2605         }
2606 #endif
2607         case KVM_S390_VCPU_FAULT: {
2608                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2609                 break;
2610         }
2611         case KVM_ENABLE_CAP:
2612         {
2613                 struct kvm_enable_cap cap;
2614                 r = -EFAULT;
2615                 if (copy_from_user(&cap, argp, sizeof(cap)))
2616                         break;
2617                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2618                 break;
2619         }
2620         case KVM_S390_MEM_OP: {
2621                 struct kvm_s390_mem_op mem_op;
2622
2623                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2624                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2625                 else
2626                         r = -EFAULT;
2627                 break;
2628         }
2629         case KVM_S390_SET_IRQ_STATE: {
2630                 struct kvm_s390_irq_state irq_state;
2631
2632                 r = -EFAULT;
2633                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2634                         break;
2635                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2636                     irq_state.len == 0 ||
2637                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2638                         r = -EINVAL;
2639                         break;
2640                 }
2641                 r = kvm_s390_set_irq_state(vcpu,
2642                                            (void __user *) irq_state.buf,
2643                                            irq_state.len);
2644                 break;
2645         }
2646         case KVM_S390_GET_IRQ_STATE: {
2647                 struct kvm_s390_irq_state irq_state;
2648
2649                 r = -EFAULT;
2650                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2651                         break;
2652                 if (irq_state.len == 0) {
2653                         r = -EINVAL;
2654                         break;
2655                 }
2656                 r = kvm_s390_get_irq_state(vcpu,
2657                                            (__u8 __user *)  irq_state.buf,
2658                                            irq_state.len);
2659                 break;
2660         }
2661         default:
2662                 r = -ENOTTY;
2663         }
2664         return r;
2665 }
2666
2667 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2668 {
2669 #ifdef CONFIG_KVM_S390_UCONTROL
2670         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2671                  && (kvm_is_ucontrol(vcpu->kvm))) {
2672                 vmf->page = virt_to_page(vcpu->arch.sie_block);
2673                 get_page(vmf->page);
2674                 return 0;
2675         }
2676 #endif
2677         return VM_FAULT_SIGBUS;
2678 }
2679
2680 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2681                             unsigned long npages)
2682 {
2683         return 0;
2684 }
2685
2686 /* Section: memory related */
2687 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2688                                    struct kvm_memory_slot *memslot,
2689                                    const struct kvm_userspace_memory_region *mem,
2690                                    enum kvm_mr_change change)
2691 {
2692         /* A few sanity checks. We can have memory slots which have to be
2693            located/ended at a segment boundary (1MB). The memory in userland is
2694            ok to be fragmented into various different vmas. It is okay to mmap()
2695            and munmap() stuff in this slot after doing this call at any time */
2696
2697         if (mem->userspace_addr & 0xffffful)
2698                 return -EINVAL;
2699
2700         if (mem->memory_size & 0xffffful)
2701                 return -EINVAL;
2702
2703         return 0;
2704 }
2705
2706 void kvm_arch_commit_memory_region(struct kvm *kvm,
2707                                 const struct kvm_userspace_memory_region *mem,
2708                                 const struct kvm_memory_slot *old,
2709                                 const struct kvm_memory_slot *new,
2710                                 enum kvm_mr_change change)
2711 {
2712         int rc;
2713
2714         /* If the basics of the memslot do not change, we do not want
2715          * to update the gmap. Every update causes several unnecessary
2716          * segment translation exceptions. This is usually handled just
2717          * fine by the normal fault handler + gmap, but it will also
2718          * cause faults on the prefix page of running guest CPUs.
2719          */
2720         if (old->userspace_addr == mem->userspace_addr &&
2721             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2722             old->npages * PAGE_SIZE == mem->memory_size)
2723                 return;
2724
2725         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2726                 mem->guest_phys_addr, mem->memory_size);
2727         if (rc)
2728                 pr_warn("failed to commit memory region\n");
2729         return;
2730 }
2731
2732 static int __init kvm_s390_init(void)
2733 {
2734         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2735 }
2736
2737 static void __exit kvm_s390_exit(void)
2738 {
2739         kvm_exit();
2740 }
2741
2742 module_init(kvm_s390_init);
2743 module_exit(kvm_s390_exit);
2744
2745 /*
2746  * Enable autoloading of the kvm module.
2747  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2748  * since x86 takes a different approach.
2749  */
2750 #include <linux/miscdevice.h>
2751 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2752 MODULE_ALIAS("devname:kvm");