2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
32 #include <asm/pgtable.h>
34 #include <asm/switch_to.h>
40 #define KMSG_COMPONENT "kvm-s390"
42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
44 #define CREATE_TRACE_POINTS
46 #include "trace-s390.h"
48 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
51 (KVM_MAX_VCPUS + LOCAL_IRQS))
53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
55 struct kvm_stats_debugfs_item debugfs_entries[] = {
56 { "userspace_handled", VCPU_STAT(exit_userspace) },
57 { "exit_null", VCPU_STAT(exit_null) },
58 { "exit_validity", VCPU_STAT(exit_validity) },
59 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
60 { "exit_external_request", VCPU_STAT(exit_external_request) },
61 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
62 { "exit_instruction", VCPU_STAT(exit_instruction) },
63 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
64 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
65 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
66 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
67 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
68 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
69 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
70 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
71 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
72 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
73 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
74 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
75 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
76 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
77 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
78 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
79 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
80 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
81 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
82 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
83 { "instruction_spx", VCPU_STAT(instruction_spx) },
84 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
85 { "instruction_stap", VCPU_STAT(instruction_stap) },
86 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
87 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
88 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
89 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
90 { "instruction_essa", VCPU_STAT(instruction_essa) },
91 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
92 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
93 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
94 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
95 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
96 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
97 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
98 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
99 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
100 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
101 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
102 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
103 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
104 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
105 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
106 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
107 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
108 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
109 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
110 { "diagnose_10", VCPU_STAT(diagnose_10) },
111 { "diagnose_44", VCPU_STAT(diagnose_44) },
112 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
113 { "diagnose_258", VCPU_STAT(diagnose_258) },
114 { "diagnose_308", VCPU_STAT(diagnose_308) },
115 { "diagnose_500", VCPU_STAT(diagnose_500) },
119 /* upper facilities limit for kvm */
120 unsigned long kvm_s390_fac_list_mask[] = {
121 0xffe6fffbfcfdfc40UL,
122 0x005e800000000000UL,
125 unsigned long kvm_s390_fac_list_mask_size(void)
127 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
128 return ARRAY_SIZE(kvm_s390_fac_list_mask);
131 static struct gmap_notifier gmap_notifier;
132 debug_info_t *kvm_s390_dbf;
134 /* Section: not file related */
135 int kvm_arch_hardware_enable(void)
137 /* every s390 is virtualization enabled ;-) */
141 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
144 * This callback is executed during stop_machine(). All CPUs are therefore
145 * temporarily stopped. In order not to change guest behavior, we have to
146 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
147 * so a CPU won't be stopped while calculating with the epoch.
149 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
153 struct kvm_vcpu *vcpu;
155 unsigned long long *delta = v;
157 list_for_each_entry(kvm, &vm_list, vm_list) {
158 kvm->arch.epoch -= *delta;
159 kvm_for_each_vcpu(i, vcpu, kvm) {
160 vcpu->arch.sie_block->epoch -= *delta;
166 static struct notifier_block kvm_clock_notifier = {
167 .notifier_call = kvm_clock_sync,
170 int kvm_arch_hardware_setup(void)
172 gmap_notifier.notifier_call = kvm_gmap_notifier;
173 gmap_register_ipte_notifier(&gmap_notifier);
174 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
175 &kvm_clock_notifier);
179 void kvm_arch_hardware_unsetup(void)
181 gmap_unregister_ipte_notifier(&gmap_notifier);
182 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
183 &kvm_clock_notifier);
186 int kvm_arch_init(void *opaque)
188 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
192 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
193 debug_unregister(kvm_s390_dbf);
197 /* Register floating interrupt controller interface. */
198 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
201 void kvm_arch_exit(void)
203 debug_unregister(kvm_s390_dbf);
206 /* Section: device related */
207 long kvm_arch_dev_ioctl(struct file *filp,
208 unsigned int ioctl, unsigned long arg)
210 if (ioctl == KVM_S390_ENABLE_SIE)
211 return s390_enable_sie();
215 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
220 case KVM_CAP_S390_PSW:
221 case KVM_CAP_S390_GMAP:
222 case KVM_CAP_SYNC_MMU:
223 #ifdef CONFIG_KVM_S390_UCONTROL
224 case KVM_CAP_S390_UCONTROL:
226 case KVM_CAP_ASYNC_PF:
227 case KVM_CAP_SYNC_REGS:
228 case KVM_CAP_ONE_REG:
229 case KVM_CAP_ENABLE_CAP:
230 case KVM_CAP_S390_CSS_SUPPORT:
231 case KVM_CAP_IOEVENTFD:
232 case KVM_CAP_DEVICE_CTRL:
233 case KVM_CAP_ENABLE_CAP_VM:
234 case KVM_CAP_S390_IRQCHIP:
235 case KVM_CAP_VM_ATTRIBUTES:
236 case KVM_CAP_MP_STATE:
237 case KVM_CAP_S390_INJECT_IRQ:
238 case KVM_CAP_S390_USER_SIGP:
239 case KVM_CAP_S390_USER_STSI:
240 case KVM_CAP_S390_SKEYS:
241 case KVM_CAP_S390_IRQ_STATE:
244 case KVM_CAP_S390_MEM_OP:
247 case KVM_CAP_NR_VCPUS:
248 case KVM_CAP_MAX_VCPUS:
251 case KVM_CAP_NR_MEMSLOTS:
252 r = KVM_USER_MEM_SLOTS;
254 case KVM_CAP_S390_COW:
255 r = MACHINE_HAS_ESOP;
257 case KVM_CAP_S390_VECTOR_REGISTERS:
266 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
267 struct kvm_memory_slot *memslot)
269 gfn_t cur_gfn, last_gfn;
270 unsigned long address;
271 struct gmap *gmap = kvm->arch.gmap;
273 down_read(&gmap->mm->mmap_sem);
274 /* Loop over all guest pages */
275 last_gfn = memslot->base_gfn + memslot->npages;
276 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
277 address = gfn_to_hva_memslot(memslot, cur_gfn);
279 if (gmap_test_and_clear_dirty(address, gmap))
280 mark_page_dirty(kvm, cur_gfn);
282 up_read(&gmap->mm->mmap_sem);
285 /* Section: vm related */
287 * Get (and clear) the dirty memory log for a memory slot.
289 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
290 struct kvm_dirty_log *log)
294 struct kvm_memslots *slots;
295 struct kvm_memory_slot *memslot;
298 mutex_lock(&kvm->slots_lock);
301 if (log->slot >= KVM_USER_MEM_SLOTS)
304 slots = kvm_memslots(kvm);
305 memslot = id_to_memslot(slots, log->slot);
307 if (!memslot->dirty_bitmap)
310 kvm_s390_sync_dirty_log(kvm, memslot);
311 r = kvm_get_dirty_log(kvm, log, &is_dirty);
315 /* Clear the dirty log */
317 n = kvm_dirty_bitmap_bytes(memslot);
318 memset(memslot->dirty_bitmap, 0, n);
322 mutex_unlock(&kvm->slots_lock);
326 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
334 case KVM_CAP_S390_IRQCHIP:
335 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
336 kvm->arch.use_irqchip = 1;
339 case KVM_CAP_S390_USER_SIGP:
340 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
341 kvm->arch.user_sigp = 1;
344 case KVM_CAP_S390_VECTOR_REGISTERS:
345 if (MACHINE_HAS_VX) {
346 set_kvm_facility(kvm->arch.model.fac->mask, 129);
347 set_kvm_facility(kvm->arch.model.fac->list, 129);
351 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
352 r ? "(not available)" : "(success)");
354 case KVM_CAP_S390_USER_STSI:
355 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
356 kvm->arch.user_stsi = 1;
366 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
370 switch (attr->attr) {
371 case KVM_S390_VM_MEM_LIMIT_SIZE:
373 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
374 kvm->arch.gmap->asce_end);
375 if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
385 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
389 switch (attr->attr) {
390 case KVM_S390_VM_MEM_ENABLE_CMMA:
391 /* enable CMMA only for z10 and later (EDAT_1) */
393 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
397 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
398 mutex_lock(&kvm->lock);
399 if (atomic_read(&kvm->online_vcpus) == 0) {
400 kvm->arch.use_cmma = 1;
403 mutex_unlock(&kvm->lock);
405 case KVM_S390_VM_MEM_CLR_CMMA:
407 if (!kvm->arch.use_cmma)
410 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
411 mutex_lock(&kvm->lock);
412 idx = srcu_read_lock(&kvm->srcu);
413 s390_reset_cmma(kvm->arch.gmap->mm);
414 srcu_read_unlock(&kvm->srcu, idx);
415 mutex_unlock(&kvm->lock);
418 case KVM_S390_VM_MEM_LIMIT_SIZE: {
419 unsigned long new_limit;
421 if (kvm_is_ucontrol(kvm))
424 if (get_user(new_limit, (u64 __user *)attr->addr))
427 if (new_limit > kvm->arch.gmap->asce_end)
431 mutex_lock(&kvm->lock);
432 if (atomic_read(&kvm->online_vcpus) == 0) {
433 /* gmap_alloc will round the limit up */
434 struct gmap *new = gmap_alloc(current->mm, new_limit);
439 gmap_free(kvm->arch.gmap);
441 kvm->arch.gmap = new;
445 mutex_unlock(&kvm->lock);
446 VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
456 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
458 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
460 struct kvm_vcpu *vcpu;
463 if (!test_kvm_facility(kvm, 76))
466 mutex_lock(&kvm->lock);
467 switch (attr->attr) {
468 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
470 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
471 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
472 kvm->arch.crypto.aes_kw = 1;
473 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
475 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
477 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
478 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
479 kvm->arch.crypto.dea_kw = 1;
480 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
482 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
483 kvm->arch.crypto.aes_kw = 0;
484 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
485 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
486 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
488 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
489 kvm->arch.crypto.dea_kw = 0;
490 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
491 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
492 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
495 mutex_unlock(&kvm->lock);
499 kvm_for_each_vcpu(i, vcpu, kvm) {
500 kvm_s390_vcpu_crypto_setup(vcpu);
503 mutex_unlock(&kvm->lock);
507 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
511 if (copy_from_user(>od_high, (void __user *)attr->addr,
517 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x\n", gtod_high);
522 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
526 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
529 kvm_s390_set_tod_clock(kvm, gtod);
530 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx\n", gtod);
534 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
541 switch (attr->attr) {
542 case KVM_S390_VM_TOD_HIGH:
543 ret = kvm_s390_set_tod_high(kvm, attr);
545 case KVM_S390_VM_TOD_LOW:
546 ret = kvm_s390_set_tod_low(kvm, attr);
555 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
559 if (copy_to_user((void __user *)attr->addr, >od_high,
562 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x\n", gtod_high);
567 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
571 gtod = kvm_s390_get_tod_clock_fast(kvm);
572 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
574 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx\n", gtod);
579 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
586 switch (attr->attr) {
587 case KVM_S390_VM_TOD_HIGH:
588 ret = kvm_s390_get_tod_high(kvm, attr);
590 case KVM_S390_VM_TOD_LOW:
591 ret = kvm_s390_get_tod_low(kvm, attr);
600 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
602 struct kvm_s390_vm_cpu_processor *proc;
605 mutex_lock(&kvm->lock);
606 if (atomic_read(&kvm->online_vcpus)) {
610 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
615 if (!copy_from_user(proc, (void __user *)attr->addr,
617 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
618 sizeof(struct cpuid));
619 kvm->arch.model.ibc = proc->ibc;
620 memcpy(kvm->arch.model.fac->list, proc->fac_list,
621 S390_ARCH_FAC_LIST_SIZE_BYTE);
626 mutex_unlock(&kvm->lock);
630 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
634 switch (attr->attr) {
635 case KVM_S390_VM_CPU_PROCESSOR:
636 ret = kvm_s390_set_processor(kvm, attr);
642 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
644 struct kvm_s390_vm_cpu_processor *proc;
647 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
652 memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
653 proc->ibc = kvm->arch.model.ibc;
654 memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
655 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
662 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
664 struct kvm_s390_vm_cpu_machine *mach;
667 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
672 get_cpu_id((struct cpuid *) &mach->cpuid);
673 mach->ibc = sclp.ibc;
674 memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
675 S390_ARCH_FAC_LIST_SIZE_BYTE);
676 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
677 S390_ARCH_FAC_LIST_SIZE_BYTE);
678 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
685 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
689 switch (attr->attr) {
690 case KVM_S390_VM_CPU_PROCESSOR:
691 ret = kvm_s390_get_processor(kvm, attr);
693 case KVM_S390_VM_CPU_MACHINE:
694 ret = kvm_s390_get_machine(kvm, attr);
700 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
704 switch (attr->group) {
705 case KVM_S390_VM_MEM_CTRL:
706 ret = kvm_s390_set_mem_control(kvm, attr);
708 case KVM_S390_VM_TOD:
709 ret = kvm_s390_set_tod(kvm, attr);
711 case KVM_S390_VM_CPU_MODEL:
712 ret = kvm_s390_set_cpu_model(kvm, attr);
714 case KVM_S390_VM_CRYPTO:
715 ret = kvm_s390_vm_set_crypto(kvm, attr);
725 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
729 switch (attr->group) {
730 case KVM_S390_VM_MEM_CTRL:
731 ret = kvm_s390_get_mem_control(kvm, attr);
733 case KVM_S390_VM_TOD:
734 ret = kvm_s390_get_tod(kvm, attr);
736 case KVM_S390_VM_CPU_MODEL:
737 ret = kvm_s390_get_cpu_model(kvm, attr);
747 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
751 switch (attr->group) {
752 case KVM_S390_VM_MEM_CTRL:
753 switch (attr->attr) {
754 case KVM_S390_VM_MEM_ENABLE_CMMA:
755 case KVM_S390_VM_MEM_CLR_CMMA:
756 case KVM_S390_VM_MEM_LIMIT_SIZE:
764 case KVM_S390_VM_TOD:
765 switch (attr->attr) {
766 case KVM_S390_VM_TOD_LOW:
767 case KVM_S390_VM_TOD_HIGH:
775 case KVM_S390_VM_CPU_MODEL:
776 switch (attr->attr) {
777 case KVM_S390_VM_CPU_PROCESSOR:
778 case KVM_S390_VM_CPU_MACHINE:
786 case KVM_S390_VM_CRYPTO:
787 switch (attr->attr) {
788 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
789 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
790 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
791 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
807 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
811 unsigned long curkey;
814 if (args->flags != 0)
817 /* Is this guest using storage keys? */
818 if (!mm_use_skey(current->mm))
819 return KVM_S390_GET_SKEYS_NONE;
821 /* Enforce sane limit on memory allocation */
822 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
825 keys = kmalloc_array(args->count, sizeof(uint8_t),
826 GFP_KERNEL | __GFP_NOWARN);
828 keys = vmalloc(sizeof(uint8_t) * args->count);
832 for (i = 0; i < args->count; i++) {
833 hva = gfn_to_hva(kvm, args->start_gfn + i);
834 if (kvm_is_error_hva(hva)) {
839 curkey = get_guest_storage_key(current->mm, hva);
840 if (IS_ERR_VALUE(curkey)) {
847 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
848 sizeof(uint8_t) * args->count);
856 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
862 if (args->flags != 0)
865 /* Enforce sane limit on memory allocation */
866 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
869 keys = kmalloc_array(args->count, sizeof(uint8_t),
870 GFP_KERNEL | __GFP_NOWARN);
872 keys = vmalloc(sizeof(uint8_t) * args->count);
876 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
877 sizeof(uint8_t) * args->count);
883 /* Enable storage key handling for the guest */
884 r = s390_enable_skey();
888 for (i = 0; i < args->count; i++) {
889 hva = gfn_to_hva(kvm, args->start_gfn + i);
890 if (kvm_is_error_hva(hva)) {
895 /* Lowest order bit is reserved */
896 if (keys[i] & 0x01) {
901 r = set_guest_storage_key(current->mm, hva,
902 (unsigned long)keys[i], 0);
911 long kvm_arch_vm_ioctl(struct file *filp,
912 unsigned int ioctl, unsigned long arg)
914 struct kvm *kvm = filp->private_data;
915 void __user *argp = (void __user *)arg;
916 struct kvm_device_attr attr;
920 case KVM_S390_INTERRUPT: {
921 struct kvm_s390_interrupt s390int;
924 if (copy_from_user(&s390int, argp, sizeof(s390int)))
926 r = kvm_s390_inject_vm(kvm, &s390int);
929 case KVM_ENABLE_CAP: {
930 struct kvm_enable_cap cap;
932 if (copy_from_user(&cap, argp, sizeof(cap)))
934 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
937 case KVM_CREATE_IRQCHIP: {
938 struct kvm_irq_routing_entry routing;
941 if (kvm->arch.use_irqchip) {
942 /* Set up dummy routing. */
943 memset(&routing, 0, sizeof(routing));
944 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
948 case KVM_SET_DEVICE_ATTR: {
950 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
952 r = kvm_s390_vm_set_attr(kvm, &attr);
955 case KVM_GET_DEVICE_ATTR: {
957 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
959 r = kvm_s390_vm_get_attr(kvm, &attr);
962 case KVM_HAS_DEVICE_ATTR: {
964 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
966 r = kvm_s390_vm_has_attr(kvm, &attr);
969 case KVM_S390_GET_SKEYS: {
970 struct kvm_s390_skeys args;
973 if (copy_from_user(&args, argp,
974 sizeof(struct kvm_s390_skeys)))
976 r = kvm_s390_get_skeys(kvm, &args);
979 case KVM_S390_SET_SKEYS: {
980 struct kvm_s390_skeys args;
983 if (copy_from_user(&args, argp,
984 sizeof(struct kvm_s390_skeys)))
986 r = kvm_s390_set_skeys(kvm, &args);
996 static int kvm_s390_query_ap_config(u8 *config)
998 u32 fcn_code = 0x04000000UL;
1001 memset(config, 0, 128);
1005 ".long 0xb2af0000\n" /* PQAP(QCI) */
1011 : "r" (fcn_code), "r" (config)
1012 : "cc", "0", "2", "memory"
1018 static int kvm_s390_apxa_installed(void)
1023 if (test_facility(2) && test_facility(12)) {
1024 cc = kvm_s390_query_ap_config(config);
1027 pr_err("PQAP(QCI) failed with cc=%d", cc);
1029 return config[0] & 0x40;
1035 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1037 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1039 if (kvm_s390_apxa_installed())
1040 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1042 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1045 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1048 cpu_id->version = 0xff;
1051 static int kvm_s390_crypto_init(struct kvm *kvm)
1053 if (!test_kvm_facility(kvm, 76))
1056 kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1057 GFP_KERNEL | GFP_DMA);
1058 if (!kvm->arch.crypto.crycb)
1061 kvm_s390_set_crycb_format(kvm);
1063 /* Enable AES/DEA protected key functions by default */
1064 kvm->arch.crypto.aes_kw = 1;
1065 kvm->arch.crypto.dea_kw = 1;
1066 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1067 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1068 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1069 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1074 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1077 char debug_name[16];
1078 static unsigned long sca_offset;
1081 #ifdef CONFIG_KVM_S390_UCONTROL
1082 if (type & ~KVM_VM_S390_UCONTROL)
1084 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1091 rc = s390_enable_sie();
1097 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
1100 spin_lock(&kvm_lock);
1101 sca_offset = (sca_offset + 16) & 0x7f0;
1102 kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
1103 spin_unlock(&kvm_lock);
1105 sprintf(debug_name, "kvm-%u", current->pid);
1107 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1112 * The architectural maximum amount of facilities is 16 kbit. To store
1113 * this amount, 2 kbyte of memory is required. Thus we need a full
1114 * page to hold the guest facility list (arch.model.fac->list) and the
1115 * facility mask (arch.model.fac->mask). Its address size has to be
1116 * 31 bits and word aligned.
1118 kvm->arch.model.fac =
1119 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1120 if (!kvm->arch.model.fac)
1123 /* Populate the facility mask initially. */
1124 memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1125 S390_ARCH_FAC_LIST_SIZE_BYTE);
1126 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1127 if (i < kvm_s390_fac_list_mask_size())
1128 kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1130 kvm->arch.model.fac->mask[i] = 0UL;
1133 /* Populate the facility list initially. */
1134 memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1135 S390_ARCH_FAC_LIST_SIZE_BYTE);
1137 kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1138 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1140 if (kvm_s390_crypto_init(kvm) < 0)
1143 spin_lock_init(&kvm->arch.float_int.lock);
1144 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1145 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1146 init_waitqueue_head(&kvm->arch.ipte_wq);
1147 mutex_init(&kvm->arch.ipte_mutex);
1149 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1150 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1152 if (type & KVM_VM_S390_UCONTROL) {
1153 kvm->arch.gmap = NULL;
1155 kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
1156 if (!kvm->arch.gmap)
1158 kvm->arch.gmap->private = kvm;
1159 kvm->arch.gmap->pfault_enabled = 0;
1162 kvm->arch.css_support = 0;
1163 kvm->arch.use_irqchip = 0;
1164 kvm->arch.epoch = 0;
1166 spin_lock_init(&kvm->arch.start_stop_lock);
1167 KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
1171 kfree(kvm->arch.crypto.crycb);
1172 free_page((unsigned long)kvm->arch.model.fac);
1173 debug_unregister(kvm->arch.dbf);
1174 free_page((unsigned long)(kvm->arch.sca));
1175 KVM_EVENT(3, "creation of vm failed: %d", rc);
1179 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1181 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1182 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1183 kvm_s390_clear_local_irqs(vcpu);
1184 kvm_clear_async_pf_completion_queue(vcpu);
1185 if (!kvm_is_ucontrol(vcpu->kvm)) {
1186 clear_bit(63 - vcpu->vcpu_id,
1187 (unsigned long *) &vcpu->kvm->arch.sca->mcn);
1188 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
1189 (__u64) vcpu->arch.sie_block)
1190 vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
1194 if (kvm_is_ucontrol(vcpu->kvm))
1195 gmap_free(vcpu->arch.gmap);
1197 if (vcpu->kvm->arch.use_cmma)
1198 kvm_s390_vcpu_unsetup_cmma(vcpu);
1199 free_page((unsigned long)(vcpu->arch.sie_block));
1201 kvm_vcpu_uninit(vcpu);
1202 kmem_cache_free(kvm_vcpu_cache, vcpu);
1205 static void kvm_free_vcpus(struct kvm *kvm)
1208 struct kvm_vcpu *vcpu;
1210 kvm_for_each_vcpu(i, vcpu, kvm)
1211 kvm_arch_vcpu_destroy(vcpu);
1213 mutex_lock(&kvm->lock);
1214 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1215 kvm->vcpus[i] = NULL;
1217 atomic_set(&kvm->online_vcpus, 0);
1218 mutex_unlock(&kvm->lock);
1221 void kvm_arch_destroy_vm(struct kvm *kvm)
1223 kvm_free_vcpus(kvm);
1224 free_page((unsigned long)kvm->arch.model.fac);
1225 free_page((unsigned long)(kvm->arch.sca));
1226 debug_unregister(kvm->arch.dbf);
1227 kfree(kvm->arch.crypto.crycb);
1228 if (!kvm_is_ucontrol(kvm))
1229 gmap_free(kvm->arch.gmap);
1230 kvm_s390_destroy_adapters(kvm);
1231 kvm_s390_clear_float_irqs(kvm);
1232 KVM_EVENT(3, "vm 0x%p destroyed", kvm);
1235 /* Section: vcpu related */
1236 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1238 vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1239 if (!vcpu->arch.gmap)
1241 vcpu->arch.gmap->private = vcpu->kvm;
1246 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1248 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1249 kvm_clear_async_pf_completion_queue(vcpu);
1250 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1256 if (test_kvm_facility(vcpu->kvm, 129))
1257 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1259 if (kvm_is_ucontrol(vcpu->kvm))
1260 return __kvm_ucontrol_vcpu_init(vcpu);
1266 * Backs up the current FP/VX register save area on a particular
1267 * destination. Used to switch between different register save
1270 static inline void save_fpu_to(struct fpu *dst)
1272 dst->fpc = current->thread.fpu.fpc;
1273 dst->flags = current->thread.fpu.flags;
1274 dst->regs = current->thread.fpu.regs;
1278 * Switches the FP/VX register save area from which to lazy
1279 * restore register contents.
1281 static inline void load_fpu_from(struct fpu *from)
1283 current->thread.fpu.fpc = from->fpc;
1284 current->thread.fpu.flags = from->flags;
1285 current->thread.fpu.regs = from->regs;
1288 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1290 /* Save host register state */
1292 save_fpu_to(&vcpu->arch.host_fpregs);
1294 if (test_kvm_facility(vcpu->kvm, 129)) {
1295 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1296 current->thread.fpu.flags = FPU_USE_VX;
1298 * Use the register save area in the SIE-control block
1299 * for register restore and save in kvm_arch_vcpu_put()
1301 current->thread.fpu.vxrs =
1302 (__vector128 *)&vcpu->run->s.regs.vrs;
1303 /* Always enable the vector extension for KVM */
1306 load_fpu_from(&vcpu->arch.guest_fpregs);
1308 if (test_fp_ctl(current->thread.fpu.fpc))
1309 /* User space provided an invalid FPC, let's clear it */
1310 current->thread.fpu.fpc = 0;
1312 save_access_regs(vcpu->arch.host_acrs);
1313 restore_access_regs(vcpu->run->s.regs.acrs);
1314 gmap_enable(vcpu->arch.gmap);
1315 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1318 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1320 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1321 gmap_disable(vcpu->arch.gmap);
1325 if (test_kvm_facility(vcpu->kvm, 129))
1327 * kvm_arch_vcpu_load() set up the register save area to
1328 * the &vcpu->run->s.regs.vrs and, thus, the vector registers
1329 * are already saved. Only the floating-point control must be
1332 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1334 save_fpu_to(&vcpu->arch.guest_fpregs);
1335 load_fpu_from(&vcpu->arch.host_fpregs);
1337 save_access_regs(vcpu->run->s.regs.acrs);
1338 restore_access_regs(vcpu->arch.host_acrs);
1341 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1343 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1344 vcpu->arch.sie_block->gpsw.mask = 0UL;
1345 vcpu->arch.sie_block->gpsw.addr = 0UL;
1346 kvm_s390_set_prefix(vcpu, 0);
1347 vcpu->arch.sie_block->cputm = 0UL;
1348 vcpu->arch.sie_block->ckc = 0UL;
1349 vcpu->arch.sie_block->todpr = 0;
1350 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1351 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1352 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1353 vcpu->arch.guest_fpregs.fpc = 0;
1354 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1355 vcpu->arch.sie_block->gbea = 1;
1356 vcpu->arch.sie_block->pp = 0;
1357 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1358 kvm_clear_async_pf_completion_queue(vcpu);
1359 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1360 kvm_s390_vcpu_stop(vcpu);
1361 kvm_s390_clear_local_irqs(vcpu);
1364 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1366 mutex_lock(&vcpu->kvm->lock);
1368 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1370 mutex_unlock(&vcpu->kvm->lock);
1371 if (!kvm_is_ucontrol(vcpu->kvm))
1372 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1375 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1377 if (!test_kvm_facility(vcpu->kvm, 76))
1380 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1382 if (vcpu->kvm->arch.crypto.aes_kw)
1383 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1384 if (vcpu->kvm->arch.crypto.dea_kw)
1385 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1387 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1390 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1392 free_page(vcpu->arch.sie_block->cbrlo);
1393 vcpu->arch.sie_block->cbrlo = 0;
1396 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1398 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1399 if (!vcpu->arch.sie_block->cbrlo)
1402 vcpu->arch.sie_block->ecb2 |= 0x80;
1403 vcpu->arch.sie_block->ecb2 &= ~0x08;
1407 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1409 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1411 vcpu->arch.cpu_id = model->cpu_id;
1412 vcpu->arch.sie_block->ibc = model->ibc;
1413 vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1416 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1420 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1424 if (test_kvm_facility(vcpu->kvm, 78))
1425 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1426 else if (test_kvm_facility(vcpu->kvm, 8))
1427 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1429 kvm_s390_vcpu_setup_model(vcpu);
1431 vcpu->arch.sie_block->ecb = 6;
1432 if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1433 vcpu->arch.sie_block->ecb |= 0x10;
1435 vcpu->arch.sie_block->ecb2 = 8;
1436 vcpu->arch.sie_block->eca = 0xC1002000U;
1438 vcpu->arch.sie_block->eca |= 1;
1439 if (sclp.has_sigpif)
1440 vcpu->arch.sie_block->eca |= 0x10000000U;
1441 if (test_kvm_facility(vcpu->kvm, 129)) {
1442 vcpu->arch.sie_block->eca |= 0x00020000;
1443 vcpu->arch.sie_block->ecd |= 0x20000000;
1445 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1447 if (vcpu->kvm->arch.use_cmma) {
1448 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1452 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1453 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1455 kvm_s390_vcpu_crypto_setup(vcpu);
1460 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1463 struct kvm_vcpu *vcpu;
1464 struct sie_page *sie_page;
1467 if (id >= KVM_MAX_VCPUS)
1472 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1476 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1480 vcpu->arch.sie_block = &sie_page->sie_block;
1481 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1483 vcpu->arch.sie_block->icpua = id;
1484 if (!kvm_is_ucontrol(kvm)) {
1485 if (!kvm->arch.sca) {
1489 if (!kvm->arch.sca->cpu[id].sda)
1490 kvm->arch.sca->cpu[id].sda =
1491 (__u64) vcpu->arch.sie_block;
1492 vcpu->arch.sie_block->scaoh =
1493 (__u32)(((__u64)kvm->arch.sca) >> 32);
1494 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
1495 set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
1498 spin_lock_init(&vcpu->arch.local_int.lock);
1499 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1500 vcpu->arch.local_int.wq = &vcpu->wq;
1501 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1504 * Allocate a save area for floating-point registers. If the vector
1505 * extension is available, register contents are saved in the SIE
1506 * control block. The allocated save area is still required in
1507 * particular places, for example, in kvm_s390_vcpu_store_status().
1509 vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
1511 if (!vcpu->arch.guest_fpregs.fprs) {
1513 goto out_free_sie_block;
1516 rc = kvm_vcpu_init(vcpu, kvm, id);
1518 goto out_free_sie_block;
1519 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
1520 vcpu->arch.sie_block);
1521 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1525 free_page((unsigned long)(vcpu->arch.sie_block));
1527 kmem_cache_free(kvm_vcpu_cache, vcpu);
1532 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1534 return kvm_s390_vcpu_has_irq(vcpu, 0);
1537 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1539 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1543 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1545 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1548 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1550 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1554 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1556 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1560 * Kick a guest cpu out of SIE and wait until SIE is not running.
1561 * If the CPU is not running (e.g. waiting as idle) the function will
1562 * return immediately. */
1563 void exit_sie(struct kvm_vcpu *vcpu)
1565 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1566 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1570 /* Kick a guest cpu out of SIE to process a request synchronously */
1571 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1573 kvm_make_request(req, vcpu);
1574 kvm_s390_vcpu_request(vcpu);
1577 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1580 struct kvm *kvm = gmap->private;
1581 struct kvm_vcpu *vcpu;
1583 kvm_for_each_vcpu(i, vcpu, kvm) {
1584 /* match against both prefix pages */
1585 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1586 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1587 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1592 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1594 /* kvm common code refers to this, but never calls it */
1599 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1600 struct kvm_one_reg *reg)
1605 case KVM_REG_S390_TODPR:
1606 r = put_user(vcpu->arch.sie_block->todpr,
1607 (u32 __user *)reg->addr);
1609 case KVM_REG_S390_EPOCHDIFF:
1610 r = put_user(vcpu->arch.sie_block->epoch,
1611 (u64 __user *)reg->addr);
1613 case KVM_REG_S390_CPU_TIMER:
1614 r = put_user(vcpu->arch.sie_block->cputm,
1615 (u64 __user *)reg->addr);
1617 case KVM_REG_S390_CLOCK_COMP:
1618 r = put_user(vcpu->arch.sie_block->ckc,
1619 (u64 __user *)reg->addr);
1621 case KVM_REG_S390_PFTOKEN:
1622 r = put_user(vcpu->arch.pfault_token,
1623 (u64 __user *)reg->addr);
1625 case KVM_REG_S390_PFCOMPARE:
1626 r = put_user(vcpu->arch.pfault_compare,
1627 (u64 __user *)reg->addr);
1629 case KVM_REG_S390_PFSELECT:
1630 r = put_user(vcpu->arch.pfault_select,
1631 (u64 __user *)reg->addr);
1633 case KVM_REG_S390_PP:
1634 r = put_user(vcpu->arch.sie_block->pp,
1635 (u64 __user *)reg->addr);
1637 case KVM_REG_S390_GBEA:
1638 r = put_user(vcpu->arch.sie_block->gbea,
1639 (u64 __user *)reg->addr);
1648 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1649 struct kvm_one_reg *reg)
1654 case KVM_REG_S390_TODPR:
1655 r = get_user(vcpu->arch.sie_block->todpr,
1656 (u32 __user *)reg->addr);
1658 case KVM_REG_S390_EPOCHDIFF:
1659 r = get_user(vcpu->arch.sie_block->epoch,
1660 (u64 __user *)reg->addr);
1662 case KVM_REG_S390_CPU_TIMER:
1663 r = get_user(vcpu->arch.sie_block->cputm,
1664 (u64 __user *)reg->addr);
1666 case KVM_REG_S390_CLOCK_COMP:
1667 r = get_user(vcpu->arch.sie_block->ckc,
1668 (u64 __user *)reg->addr);
1670 case KVM_REG_S390_PFTOKEN:
1671 r = get_user(vcpu->arch.pfault_token,
1672 (u64 __user *)reg->addr);
1673 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1674 kvm_clear_async_pf_completion_queue(vcpu);
1676 case KVM_REG_S390_PFCOMPARE:
1677 r = get_user(vcpu->arch.pfault_compare,
1678 (u64 __user *)reg->addr);
1680 case KVM_REG_S390_PFSELECT:
1681 r = get_user(vcpu->arch.pfault_select,
1682 (u64 __user *)reg->addr);
1684 case KVM_REG_S390_PP:
1685 r = get_user(vcpu->arch.sie_block->pp,
1686 (u64 __user *)reg->addr);
1688 case KVM_REG_S390_GBEA:
1689 r = get_user(vcpu->arch.sie_block->gbea,
1690 (u64 __user *)reg->addr);
1699 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1701 kvm_s390_vcpu_initial_reset(vcpu);
1705 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1707 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
1711 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1713 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1717 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1718 struct kvm_sregs *sregs)
1720 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1721 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1722 restore_access_regs(vcpu->run->s.regs.acrs);
1726 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1727 struct kvm_sregs *sregs)
1729 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1730 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1734 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1736 if (test_fp_ctl(fpu->fpc))
1738 memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1739 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1741 load_fpu_from(&vcpu->arch.guest_fpregs);
1745 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1747 memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1748 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1752 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1756 if (!is_vcpu_stopped(vcpu))
1759 vcpu->run->psw_mask = psw.mask;
1760 vcpu->run->psw_addr = psw.addr;
1765 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1766 struct kvm_translation *tr)
1768 return -EINVAL; /* not implemented yet */
1771 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1772 KVM_GUESTDBG_USE_HW_BP | \
1773 KVM_GUESTDBG_ENABLE)
1775 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1776 struct kvm_guest_debug *dbg)
1780 vcpu->guest_debug = 0;
1781 kvm_s390_clear_bp_data(vcpu);
1783 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1786 if (dbg->control & KVM_GUESTDBG_ENABLE) {
1787 vcpu->guest_debug = dbg->control;
1788 /* enforce guest PER */
1789 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1791 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1792 rc = kvm_s390_import_bp_data(vcpu, dbg);
1794 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1795 vcpu->arch.guestdbg.last_bp = 0;
1799 vcpu->guest_debug = 0;
1800 kvm_s390_clear_bp_data(vcpu);
1801 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1807 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1808 struct kvm_mp_state *mp_state)
1810 /* CHECK_STOP and LOAD are not supported yet */
1811 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1812 KVM_MP_STATE_OPERATING;
1815 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1816 struct kvm_mp_state *mp_state)
1820 /* user space knows about this interface - let it control the state */
1821 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1823 switch (mp_state->mp_state) {
1824 case KVM_MP_STATE_STOPPED:
1825 kvm_s390_vcpu_stop(vcpu);
1827 case KVM_MP_STATE_OPERATING:
1828 kvm_s390_vcpu_start(vcpu);
1830 case KVM_MP_STATE_LOAD:
1831 case KVM_MP_STATE_CHECK_STOP:
1832 /* fall through - CHECK_STOP and LOAD are not supported yet */
1840 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1842 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1845 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1848 kvm_s390_vcpu_request_handled(vcpu);
1849 if (!vcpu->requests)
1852 * We use MMU_RELOAD just to re-arm the ipte notifier for the
1853 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1854 * This ensures that the ipte instruction for this request has
1855 * already finished. We might race against a second unmapper that
1856 * wants to set the blocking bit. Lets just retry the request loop.
1858 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1860 rc = gmap_ipte_notify(vcpu->arch.gmap,
1861 kvm_s390_get_prefix(vcpu),
1868 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1869 vcpu->arch.sie_block->ihcpu = 0xffff;
1873 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1874 if (!ibs_enabled(vcpu)) {
1875 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1876 atomic_or(CPUSTAT_IBS,
1877 &vcpu->arch.sie_block->cpuflags);
1882 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1883 if (ibs_enabled(vcpu)) {
1884 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
1885 atomic_andnot(CPUSTAT_IBS,
1886 &vcpu->arch.sie_block->cpuflags);
1891 /* nothing to do, just clear the request */
1892 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
1897 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
1899 struct kvm_vcpu *vcpu;
1902 mutex_lock(&kvm->lock);
1904 kvm->arch.epoch = tod - get_tod_clock();
1905 kvm_s390_vcpu_block_all(kvm);
1906 kvm_for_each_vcpu(i, vcpu, kvm)
1907 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
1908 kvm_s390_vcpu_unblock_all(kvm);
1910 mutex_unlock(&kvm->lock);
1914 * kvm_arch_fault_in_page - fault-in guest page if necessary
1915 * @vcpu: The corresponding virtual cpu
1916 * @gpa: Guest physical address
1917 * @writable: Whether the page should be writable or not
1919 * Make sure that a guest page has been faulted-in on the host.
1921 * Return: Zero on success, negative error code otherwise.
1923 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1925 return gmap_fault(vcpu->arch.gmap, gpa,
1926 writable ? FAULT_FLAG_WRITE : 0);
1929 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
1930 unsigned long token)
1932 struct kvm_s390_interrupt inti;
1933 struct kvm_s390_irq irq;
1936 irq.u.ext.ext_params2 = token;
1937 irq.type = KVM_S390_INT_PFAULT_INIT;
1938 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
1940 inti.type = KVM_S390_INT_PFAULT_DONE;
1941 inti.parm64 = token;
1942 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
1946 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1947 struct kvm_async_pf *work)
1949 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
1950 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
1953 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
1954 struct kvm_async_pf *work)
1956 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
1957 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
1960 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
1961 struct kvm_async_pf *work)
1963 /* s390 will always inject the page directly */
1966 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
1969 * s390 will always inject the page directly,
1970 * but we still want check_async_completion to cleanup
1975 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
1978 struct kvm_arch_async_pf arch;
1981 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1983 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
1984 vcpu->arch.pfault_compare)
1986 if (psw_extint_disabled(vcpu))
1988 if (kvm_s390_vcpu_has_irq(vcpu, 0))
1990 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
1992 if (!vcpu->arch.gmap->pfault_enabled)
1995 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
1996 hva += current->thread.gmap_addr & ~PAGE_MASK;
1997 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2000 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2004 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2009 * On s390 notifications for arriving pages will be delivered directly
2010 * to the guest but the house keeping for completed pfaults is
2011 * handled outside the worker.
2013 kvm_check_async_pf_completion(vcpu);
2015 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
2020 if (test_cpu_flag(CIF_MCCK_PENDING))
2023 if (!kvm_is_ucontrol(vcpu->kvm)) {
2024 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2029 rc = kvm_s390_handle_requests(vcpu);
2033 if (guestdbg_enabled(vcpu)) {
2034 kvm_s390_backup_guest_per_regs(vcpu);
2035 kvm_s390_patch_guest_per_regs(vcpu);
2038 vcpu->arch.sie_block->icptcode = 0;
2039 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2040 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2041 trace_kvm_s390_sie_enter(vcpu, cpuflags);
2046 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2048 psw_t *psw = &vcpu->arch.sie_block->gpsw;
2052 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2053 trace_kvm_s390_sie_fault(vcpu);
2056 * We want to inject an addressing exception, which is defined as a
2057 * suppressing or terminating exception. However, since we came here
2058 * by a DAT access exception, the PSW still points to the faulting
2059 * instruction since DAT exceptions are nullifying. So we've got
2060 * to look up the current opcode to get the length of the instruction
2061 * to be able to forward the PSW.
2063 rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
2065 return kvm_s390_inject_prog_cond(vcpu, rc);
2066 psw->addr = __rewind_psw(*psw, -insn_length(opcode));
2068 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
2071 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2075 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2076 vcpu->arch.sie_block->icptcode);
2077 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2079 if (guestdbg_enabled(vcpu))
2080 kvm_s390_restore_guest_per_regs(vcpu);
2082 if (exit_reason >= 0) {
2084 } else if (kvm_is_ucontrol(vcpu->kvm)) {
2085 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2086 vcpu->run->s390_ucontrol.trans_exc_code =
2087 current->thread.gmap_addr;
2088 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2091 } else if (current->thread.gmap_pfault) {
2092 trace_kvm_s390_major_guest_pfault(vcpu);
2093 current->thread.gmap_pfault = 0;
2094 if (kvm_arch_setup_async_pf(vcpu)) {
2097 gpa_t gpa = current->thread.gmap_addr;
2098 rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
2103 rc = vcpu_post_run_fault_in_sie(vcpu);
2105 memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
2108 if (kvm_is_ucontrol(vcpu->kvm))
2109 /* Don't exit for host interrupts. */
2110 rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
2112 rc = kvm_handle_sie_intercept(vcpu);
2118 static int __vcpu_run(struct kvm_vcpu *vcpu)
2120 int rc, exit_reason;
2123 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2124 * ning the guest), so that memslots (and other stuff) are protected
2126 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2129 rc = vcpu_pre_run(vcpu);
2133 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2135 * As PF_VCPU will be used in fault handler, between
2136 * guest_enter and guest_exit should be no uaccess.
2138 local_irq_disable();
2139 __kvm_guest_enter();
2141 exit_reason = sie64a(vcpu->arch.sie_block,
2142 vcpu->run->s.regs.gprs);
2143 local_irq_disable();
2146 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2148 rc = vcpu_post_run(vcpu, exit_reason);
2149 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2151 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2155 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2157 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2158 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2159 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2160 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2161 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2162 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2163 /* some control register changes require a tlb flush */
2164 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2166 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2167 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2168 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2169 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2170 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2171 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2173 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2174 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2175 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2176 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2177 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2178 kvm_clear_async_pf_completion_queue(vcpu);
2180 kvm_run->kvm_dirty_regs = 0;
2183 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2185 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2186 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2187 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2188 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2189 kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2190 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2191 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2192 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2193 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2194 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2195 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2196 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2199 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2204 if (guestdbg_exit_pending(vcpu)) {
2205 kvm_s390_prepare_debug_exit(vcpu);
2209 if (vcpu->sigset_active)
2210 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2212 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2213 kvm_s390_vcpu_start(vcpu);
2214 } else if (is_vcpu_stopped(vcpu)) {
2215 pr_err_ratelimited("can't run stopped vcpu %d\n",
2220 sync_regs(vcpu, kvm_run);
2223 rc = __vcpu_run(vcpu);
2225 if (signal_pending(current) && !rc) {
2226 kvm_run->exit_reason = KVM_EXIT_INTR;
2230 if (guestdbg_exit_pending(vcpu) && !rc) {
2231 kvm_s390_prepare_debug_exit(vcpu);
2235 if (rc == -EOPNOTSUPP) {
2236 /* intercept cannot be handled in-kernel, prepare kvm-run */
2237 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
2238 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2239 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2240 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2244 if (rc == -EREMOTE) {
2245 /* intercept was handled, but userspace support is needed
2246 * kvm_run has been prepared by the handler */
2250 store_regs(vcpu, kvm_run);
2252 if (vcpu->sigset_active)
2253 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2255 vcpu->stat.exit_userspace++;
2260 * store status at address
2261 * we use have two special cases:
2262 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2263 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2265 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2267 unsigned char archmode = 1;
2272 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2273 if (write_guest_abs(vcpu, 163, &archmode, 1))
2275 gpa = SAVE_AREA_BASE;
2276 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2277 if (write_guest_real(vcpu, 163, &archmode, 1))
2279 gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
2281 rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
2282 vcpu->arch.guest_fpregs.fprs, 128);
2283 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
2284 vcpu->run->s.regs.gprs, 128);
2285 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
2286 &vcpu->arch.sie_block->gpsw, 16);
2287 px = kvm_s390_get_prefix(vcpu);
2288 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
2290 rc |= write_guest_abs(vcpu,
2291 gpa + offsetof(struct save_area, fp_ctrl_reg),
2292 &vcpu->arch.guest_fpregs.fpc, 4);
2293 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
2294 &vcpu->arch.sie_block->todpr, 4);
2295 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
2296 &vcpu->arch.sie_block->cputm, 8);
2297 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2298 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
2300 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
2301 &vcpu->run->s.regs.acrs, 64);
2302 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
2303 &vcpu->arch.sie_block->gcr, 128);
2304 return rc ? -EFAULT : 0;
2307 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2310 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2311 * copying in vcpu load/put. Lets update our copies before we save
2312 * it into the save area
2315 if (test_kvm_facility(vcpu->kvm, 129)) {
2317 * If the vector extension is available, the vector registers
2318 * which overlaps with floating-point registers are saved in
2319 * the SIE-control block. Hence, extract the floating-point
2320 * registers and the FPC value and store them in the
2321 * guest_fpregs structure.
2323 WARN_ON(!is_vx_task(current)); /* XXX remove later */
2324 vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
2325 convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
2326 current->thread.fpu.vxrs);
2328 save_fpu_to(&vcpu->arch.guest_fpregs);
2329 save_access_regs(vcpu->run->s.regs.acrs);
2331 return kvm_s390_store_status_unloaded(vcpu, addr);
2335 * store additional status at address
2337 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2340 /* Only bits 0-53 are used for address formation */
2341 if (!(gpa & ~0x3ff))
2344 return write_guest_abs(vcpu, gpa & ~0x3ff,
2345 (void *)&vcpu->run->s.regs.vrs, 512);
2348 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2350 if (!test_kvm_facility(vcpu->kvm, 129))
2354 * The guest VXRS are in the host VXRs due to the lazy
2355 * copying in vcpu load/put. We can simply call save_fpu_regs()
2356 * to save the current register state because we are in the
2357 * middle of a load/put cycle.
2359 * Let's update our copies before we save it into the save area.
2363 return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2366 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2368 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2369 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2372 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2375 struct kvm_vcpu *vcpu;
2377 kvm_for_each_vcpu(i, vcpu, kvm) {
2378 __disable_ibs_on_vcpu(vcpu);
2382 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2384 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2385 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2388 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2390 int i, online_vcpus, started_vcpus = 0;
2392 if (!is_vcpu_stopped(vcpu))
2395 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2396 /* Only one cpu at a time may enter/leave the STOPPED state. */
2397 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2398 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2400 for (i = 0; i < online_vcpus; i++) {
2401 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2405 if (started_vcpus == 0) {
2406 /* we're the only active VCPU -> speed it up */
2407 __enable_ibs_on_vcpu(vcpu);
2408 } else if (started_vcpus == 1) {
2410 * As we are starting a second VCPU, we have to disable
2411 * the IBS facility on all VCPUs to remove potentially
2412 * oustanding ENABLE requests.
2414 __disable_ibs_on_all_vcpus(vcpu->kvm);
2417 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2419 * Another VCPU might have used IBS while we were offline.
2420 * Let's play safe and flush the VCPU at startup.
2422 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2423 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2427 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2429 int i, online_vcpus, started_vcpus = 0;
2430 struct kvm_vcpu *started_vcpu = NULL;
2432 if (is_vcpu_stopped(vcpu))
2435 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2436 /* Only one cpu at a time may enter/leave the STOPPED state. */
2437 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2438 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2440 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2441 kvm_s390_clear_stop_irq(vcpu);
2443 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2444 __disable_ibs_on_vcpu(vcpu);
2446 for (i = 0; i < online_vcpus; i++) {
2447 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2449 started_vcpu = vcpu->kvm->vcpus[i];
2453 if (started_vcpus == 1) {
2455 * As we only have one VCPU left, we want to enable the
2456 * IBS facility for that VCPU to speed it up.
2458 __enable_ibs_on_vcpu(started_vcpu);
2461 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2465 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2466 struct kvm_enable_cap *cap)
2474 case KVM_CAP_S390_CSS_SUPPORT:
2475 if (!vcpu->kvm->arch.css_support) {
2476 vcpu->kvm->arch.css_support = 1;
2477 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2478 trace_kvm_s390_enable_css(vcpu->kvm);
2489 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2490 struct kvm_s390_mem_op *mop)
2492 void __user *uaddr = (void __user *)mop->buf;
2493 void *tmpbuf = NULL;
2495 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2496 | KVM_S390_MEMOP_F_CHECK_ONLY;
2498 if (mop->flags & ~supported_flags)
2501 if (mop->size > MEM_OP_MAX_SIZE)
2504 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2505 tmpbuf = vmalloc(mop->size);
2510 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2513 case KVM_S390_MEMOP_LOGICAL_READ:
2514 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2515 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2518 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2520 if (copy_to_user(uaddr, tmpbuf, mop->size))
2524 case KVM_S390_MEMOP_LOGICAL_WRITE:
2525 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2526 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2529 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2533 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2539 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2541 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2542 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2548 long kvm_arch_vcpu_ioctl(struct file *filp,
2549 unsigned int ioctl, unsigned long arg)
2551 struct kvm_vcpu *vcpu = filp->private_data;
2552 void __user *argp = (void __user *)arg;
2557 case KVM_S390_IRQ: {
2558 struct kvm_s390_irq s390irq;
2561 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2563 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2566 case KVM_S390_INTERRUPT: {
2567 struct kvm_s390_interrupt s390int;
2568 struct kvm_s390_irq s390irq;
2571 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2573 if (s390int_to_s390irq(&s390int, &s390irq))
2575 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2578 case KVM_S390_STORE_STATUS:
2579 idx = srcu_read_lock(&vcpu->kvm->srcu);
2580 r = kvm_s390_vcpu_store_status(vcpu, arg);
2581 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2583 case KVM_S390_SET_INITIAL_PSW: {
2587 if (copy_from_user(&psw, argp, sizeof(psw)))
2589 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2592 case KVM_S390_INITIAL_RESET:
2593 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2595 case KVM_SET_ONE_REG:
2596 case KVM_GET_ONE_REG: {
2597 struct kvm_one_reg reg;
2599 if (copy_from_user(®, argp, sizeof(reg)))
2601 if (ioctl == KVM_SET_ONE_REG)
2602 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
2604 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
2607 #ifdef CONFIG_KVM_S390_UCONTROL
2608 case KVM_S390_UCAS_MAP: {
2609 struct kvm_s390_ucas_mapping ucasmap;
2611 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2616 if (!kvm_is_ucontrol(vcpu->kvm)) {
2621 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2622 ucasmap.vcpu_addr, ucasmap.length);
2625 case KVM_S390_UCAS_UNMAP: {
2626 struct kvm_s390_ucas_mapping ucasmap;
2628 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2633 if (!kvm_is_ucontrol(vcpu->kvm)) {
2638 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2643 case KVM_S390_VCPU_FAULT: {
2644 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2647 case KVM_ENABLE_CAP:
2649 struct kvm_enable_cap cap;
2651 if (copy_from_user(&cap, argp, sizeof(cap)))
2653 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2656 case KVM_S390_MEM_OP: {
2657 struct kvm_s390_mem_op mem_op;
2659 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2660 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2665 case KVM_S390_SET_IRQ_STATE: {
2666 struct kvm_s390_irq_state irq_state;
2669 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2671 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2672 irq_state.len == 0 ||
2673 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2677 r = kvm_s390_set_irq_state(vcpu,
2678 (void __user *) irq_state.buf,
2682 case KVM_S390_GET_IRQ_STATE: {
2683 struct kvm_s390_irq_state irq_state;
2686 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2688 if (irq_state.len == 0) {
2692 r = kvm_s390_get_irq_state(vcpu,
2693 (__u8 __user *) irq_state.buf,
2703 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2705 #ifdef CONFIG_KVM_S390_UCONTROL
2706 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2707 && (kvm_is_ucontrol(vcpu->kvm))) {
2708 vmf->page = virt_to_page(vcpu->arch.sie_block);
2709 get_page(vmf->page);
2713 return VM_FAULT_SIGBUS;
2716 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2717 unsigned long npages)
2722 /* Section: memory related */
2723 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2724 struct kvm_memory_slot *memslot,
2725 const struct kvm_userspace_memory_region *mem,
2726 enum kvm_mr_change change)
2728 /* A few sanity checks. We can have memory slots which have to be
2729 located/ended at a segment boundary (1MB). The memory in userland is
2730 ok to be fragmented into various different vmas. It is okay to mmap()
2731 and munmap() stuff in this slot after doing this call at any time */
2733 if (mem->userspace_addr & 0xffffful)
2736 if (mem->memory_size & 0xffffful)
2742 void kvm_arch_commit_memory_region(struct kvm *kvm,
2743 const struct kvm_userspace_memory_region *mem,
2744 const struct kvm_memory_slot *old,
2745 const struct kvm_memory_slot *new,
2746 enum kvm_mr_change change)
2750 /* If the basics of the memslot do not change, we do not want
2751 * to update the gmap. Every update causes several unnecessary
2752 * segment translation exceptions. This is usually handled just
2753 * fine by the normal fault handler + gmap, but it will also
2754 * cause faults on the prefix page of running guest CPUs.
2756 if (old->userspace_addr == mem->userspace_addr &&
2757 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2758 old->npages * PAGE_SIZE == mem->memory_size)
2761 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2762 mem->guest_phys_addr, mem->memory_size);
2764 pr_warn("failed to commit memory region\n");
2768 static int __init kvm_s390_init(void)
2770 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2773 static void __exit kvm_s390_exit(void)
2778 module_init(kvm_s390_init);
2779 module_exit(kvm_s390_exit);
2782 * Enable autoloading of the kvm module.
2783 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2784 * since x86 takes a different approach.
2786 #include <linux/miscdevice.h>
2787 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2788 MODULE_ALIAS("devname:kvm");