2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
32 #include <asm/pgtable.h>
34 #include <asm/switch_to.h>
40 #define KMSG_COMPONENT "kvm-s390"
42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
44 #define CREATE_TRACE_POINTS
46 #include "trace-s390.h"
48 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
51 (KVM_MAX_VCPUS + LOCAL_IRQS))
53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
55 struct kvm_stats_debugfs_item debugfs_entries[] = {
56 { "userspace_handled", VCPU_STAT(exit_userspace) },
57 { "exit_null", VCPU_STAT(exit_null) },
58 { "exit_validity", VCPU_STAT(exit_validity) },
59 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
60 { "exit_external_request", VCPU_STAT(exit_external_request) },
61 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
62 { "exit_instruction", VCPU_STAT(exit_instruction) },
63 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
64 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
65 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
66 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
67 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
68 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
69 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
70 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
71 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
72 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
73 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
74 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
75 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
76 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
77 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
78 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
79 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
80 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
81 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
82 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
83 { "instruction_spx", VCPU_STAT(instruction_spx) },
84 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
85 { "instruction_stap", VCPU_STAT(instruction_stap) },
86 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
87 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
88 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
89 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
90 { "instruction_essa", VCPU_STAT(instruction_essa) },
91 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
92 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
93 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
94 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
95 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
96 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
97 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
98 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
99 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
100 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
101 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
102 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
103 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
104 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
105 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
106 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
107 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
108 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
109 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
110 { "diagnose_10", VCPU_STAT(diagnose_10) },
111 { "diagnose_44", VCPU_STAT(diagnose_44) },
112 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
113 { "diagnose_258", VCPU_STAT(diagnose_258) },
114 { "diagnose_308", VCPU_STAT(diagnose_308) },
115 { "diagnose_500", VCPU_STAT(diagnose_500) },
119 /* upper facilities limit for kvm */
120 unsigned long kvm_s390_fac_list_mask[] = {
121 0xffe6fffbfcfdfc40UL,
122 0x005e800000000000UL,
125 unsigned long kvm_s390_fac_list_mask_size(void)
127 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
128 return ARRAY_SIZE(kvm_s390_fac_list_mask);
131 static struct gmap_notifier gmap_notifier;
132 debug_info_t *kvm_s390_dbf;
134 /* Section: not file related */
135 int kvm_arch_hardware_enable(void)
137 /* every s390 is virtualization enabled ;-) */
141 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
144 * This callback is executed during stop_machine(). All CPUs are therefore
145 * temporarily stopped. In order not to change guest behavior, we have to
146 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
147 * so a CPU won't be stopped while calculating with the epoch.
149 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
153 struct kvm_vcpu *vcpu;
155 unsigned long long *delta = v;
157 list_for_each_entry(kvm, &vm_list, vm_list) {
158 kvm->arch.epoch -= *delta;
159 kvm_for_each_vcpu(i, vcpu, kvm) {
160 vcpu->arch.sie_block->epoch -= *delta;
166 static struct notifier_block kvm_clock_notifier = {
167 .notifier_call = kvm_clock_sync,
170 int kvm_arch_hardware_setup(void)
172 gmap_notifier.notifier_call = kvm_gmap_notifier;
173 gmap_register_ipte_notifier(&gmap_notifier);
174 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
175 &kvm_clock_notifier);
179 void kvm_arch_hardware_unsetup(void)
181 gmap_unregister_ipte_notifier(&gmap_notifier);
182 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
183 &kvm_clock_notifier);
186 int kvm_arch_init(void *opaque)
188 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
192 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
193 debug_unregister(kvm_s390_dbf);
197 /* Register floating interrupt controller interface. */
198 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
201 void kvm_arch_exit(void)
203 debug_unregister(kvm_s390_dbf);
206 /* Section: device related */
207 long kvm_arch_dev_ioctl(struct file *filp,
208 unsigned int ioctl, unsigned long arg)
210 if (ioctl == KVM_S390_ENABLE_SIE)
211 return s390_enable_sie();
215 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
220 case KVM_CAP_S390_PSW:
221 case KVM_CAP_S390_GMAP:
222 case KVM_CAP_SYNC_MMU:
223 #ifdef CONFIG_KVM_S390_UCONTROL
224 case KVM_CAP_S390_UCONTROL:
226 case KVM_CAP_ASYNC_PF:
227 case KVM_CAP_SYNC_REGS:
228 case KVM_CAP_ONE_REG:
229 case KVM_CAP_ENABLE_CAP:
230 case KVM_CAP_S390_CSS_SUPPORT:
231 case KVM_CAP_IOEVENTFD:
232 case KVM_CAP_DEVICE_CTRL:
233 case KVM_CAP_ENABLE_CAP_VM:
234 case KVM_CAP_S390_IRQCHIP:
235 case KVM_CAP_VM_ATTRIBUTES:
236 case KVM_CAP_MP_STATE:
237 case KVM_CAP_S390_INJECT_IRQ:
238 case KVM_CAP_S390_USER_SIGP:
239 case KVM_CAP_S390_USER_STSI:
240 case KVM_CAP_S390_SKEYS:
241 case KVM_CAP_S390_IRQ_STATE:
244 case KVM_CAP_S390_MEM_OP:
247 case KVM_CAP_NR_VCPUS:
248 case KVM_CAP_MAX_VCPUS:
251 case KVM_CAP_NR_MEMSLOTS:
252 r = KVM_USER_MEM_SLOTS;
254 case KVM_CAP_S390_COW:
255 r = MACHINE_HAS_ESOP;
257 case KVM_CAP_S390_VECTOR_REGISTERS:
266 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
267 struct kvm_memory_slot *memslot)
269 gfn_t cur_gfn, last_gfn;
270 unsigned long address;
271 struct gmap *gmap = kvm->arch.gmap;
273 down_read(&gmap->mm->mmap_sem);
274 /* Loop over all guest pages */
275 last_gfn = memslot->base_gfn + memslot->npages;
276 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
277 address = gfn_to_hva_memslot(memslot, cur_gfn);
279 if (gmap_test_and_clear_dirty(address, gmap))
280 mark_page_dirty(kvm, cur_gfn);
282 up_read(&gmap->mm->mmap_sem);
285 /* Section: vm related */
287 * Get (and clear) the dirty memory log for a memory slot.
289 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
290 struct kvm_dirty_log *log)
294 struct kvm_memslots *slots;
295 struct kvm_memory_slot *memslot;
298 mutex_lock(&kvm->slots_lock);
301 if (log->slot >= KVM_USER_MEM_SLOTS)
304 slots = kvm_memslots(kvm);
305 memslot = id_to_memslot(slots, log->slot);
307 if (!memslot->dirty_bitmap)
310 kvm_s390_sync_dirty_log(kvm, memslot);
311 r = kvm_get_dirty_log(kvm, log, &is_dirty);
315 /* Clear the dirty log */
317 n = kvm_dirty_bitmap_bytes(memslot);
318 memset(memslot->dirty_bitmap, 0, n);
322 mutex_unlock(&kvm->slots_lock);
326 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
334 case KVM_CAP_S390_IRQCHIP:
335 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
336 kvm->arch.use_irqchip = 1;
339 case KVM_CAP_S390_USER_SIGP:
340 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
341 kvm->arch.user_sigp = 1;
344 case KVM_CAP_S390_VECTOR_REGISTERS:
345 if (MACHINE_HAS_VX) {
346 set_kvm_facility(kvm->arch.model.fac->mask, 129);
347 set_kvm_facility(kvm->arch.model.fac->list, 129);
351 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
352 r ? "(not available)" : "(success)");
354 case KVM_CAP_S390_USER_STSI:
355 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
356 kvm->arch.user_stsi = 1;
366 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
370 switch (attr->attr) {
371 case KVM_S390_VM_MEM_LIMIT_SIZE:
373 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
374 kvm->arch.gmap->asce_end);
375 if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
385 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
389 switch (attr->attr) {
390 case KVM_S390_VM_MEM_ENABLE_CMMA:
391 /* enable CMMA only for z10 and later (EDAT_1) */
393 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
397 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
398 mutex_lock(&kvm->lock);
399 if (atomic_read(&kvm->online_vcpus) == 0) {
400 kvm->arch.use_cmma = 1;
403 mutex_unlock(&kvm->lock);
405 case KVM_S390_VM_MEM_CLR_CMMA:
407 if (!kvm->arch.use_cmma)
410 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
411 mutex_lock(&kvm->lock);
412 idx = srcu_read_lock(&kvm->srcu);
413 s390_reset_cmma(kvm->arch.gmap->mm);
414 srcu_read_unlock(&kvm->srcu, idx);
415 mutex_unlock(&kvm->lock);
418 case KVM_S390_VM_MEM_LIMIT_SIZE: {
419 unsigned long new_limit;
421 if (kvm_is_ucontrol(kvm))
424 if (get_user(new_limit, (u64 __user *)attr->addr))
427 if (new_limit > kvm->arch.gmap->asce_end)
431 mutex_lock(&kvm->lock);
432 if (atomic_read(&kvm->online_vcpus) == 0) {
433 /* gmap_alloc will round the limit up */
434 struct gmap *new = gmap_alloc(current->mm, new_limit);
439 gmap_free(kvm->arch.gmap);
441 kvm->arch.gmap = new;
445 mutex_unlock(&kvm->lock);
446 VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
456 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
458 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
460 struct kvm_vcpu *vcpu;
463 if (!test_kvm_facility(kvm, 76))
466 mutex_lock(&kvm->lock);
467 switch (attr->attr) {
468 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
470 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
471 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
472 kvm->arch.crypto.aes_kw = 1;
473 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
475 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
477 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
478 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
479 kvm->arch.crypto.dea_kw = 1;
480 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
482 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
483 kvm->arch.crypto.aes_kw = 0;
484 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
485 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
486 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
488 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
489 kvm->arch.crypto.dea_kw = 0;
490 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
491 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
492 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
495 mutex_unlock(&kvm->lock);
499 kvm_for_each_vcpu(i, vcpu, kvm) {
500 kvm_s390_vcpu_crypto_setup(vcpu);
503 mutex_unlock(&kvm->lock);
507 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
511 if (copy_from_user(>od_high, (void __user *)attr->addr,
517 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x\n", gtod_high);
522 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
524 struct kvm_vcpu *cur_vcpu;
525 unsigned int vcpu_idx;
528 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
531 mutex_lock(&kvm->lock);
533 kvm->arch.epoch = gtod - get_tod_clock();
534 kvm_s390_vcpu_block_all(kvm);
535 kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm)
536 cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
537 kvm_s390_vcpu_unblock_all(kvm);
539 mutex_unlock(&kvm->lock);
540 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx\n", gtod);
544 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
551 switch (attr->attr) {
552 case KVM_S390_VM_TOD_HIGH:
553 ret = kvm_s390_set_tod_high(kvm, attr);
555 case KVM_S390_VM_TOD_LOW:
556 ret = kvm_s390_set_tod_low(kvm, attr);
565 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
569 if (copy_to_user((void __user *)attr->addr, >od_high,
572 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x\n", gtod_high);
577 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
582 gtod = get_tod_clock() + kvm->arch.epoch;
584 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
586 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx\n", gtod);
591 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
598 switch (attr->attr) {
599 case KVM_S390_VM_TOD_HIGH:
600 ret = kvm_s390_get_tod_high(kvm, attr);
602 case KVM_S390_VM_TOD_LOW:
603 ret = kvm_s390_get_tod_low(kvm, attr);
612 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
614 struct kvm_s390_vm_cpu_processor *proc;
617 mutex_lock(&kvm->lock);
618 if (atomic_read(&kvm->online_vcpus)) {
622 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
627 if (!copy_from_user(proc, (void __user *)attr->addr,
629 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
630 sizeof(struct cpuid));
631 kvm->arch.model.ibc = proc->ibc;
632 memcpy(kvm->arch.model.fac->list, proc->fac_list,
633 S390_ARCH_FAC_LIST_SIZE_BYTE);
638 mutex_unlock(&kvm->lock);
642 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
646 switch (attr->attr) {
647 case KVM_S390_VM_CPU_PROCESSOR:
648 ret = kvm_s390_set_processor(kvm, attr);
654 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
656 struct kvm_s390_vm_cpu_processor *proc;
659 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
664 memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
665 proc->ibc = kvm->arch.model.ibc;
666 memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
667 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
674 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
676 struct kvm_s390_vm_cpu_machine *mach;
679 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
684 get_cpu_id((struct cpuid *) &mach->cpuid);
685 mach->ibc = sclp.ibc;
686 memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
687 S390_ARCH_FAC_LIST_SIZE_BYTE);
688 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
689 S390_ARCH_FAC_LIST_SIZE_BYTE);
690 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
697 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
701 switch (attr->attr) {
702 case KVM_S390_VM_CPU_PROCESSOR:
703 ret = kvm_s390_get_processor(kvm, attr);
705 case KVM_S390_VM_CPU_MACHINE:
706 ret = kvm_s390_get_machine(kvm, attr);
712 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
716 switch (attr->group) {
717 case KVM_S390_VM_MEM_CTRL:
718 ret = kvm_s390_set_mem_control(kvm, attr);
720 case KVM_S390_VM_TOD:
721 ret = kvm_s390_set_tod(kvm, attr);
723 case KVM_S390_VM_CPU_MODEL:
724 ret = kvm_s390_set_cpu_model(kvm, attr);
726 case KVM_S390_VM_CRYPTO:
727 ret = kvm_s390_vm_set_crypto(kvm, attr);
737 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
741 switch (attr->group) {
742 case KVM_S390_VM_MEM_CTRL:
743 ret = kvm_s390_get_mem_control(kvm, attr);
745 case KVM_S390_VM_TOD:
746 ret = kvm_s390_get_tod(kvm, attr);
748 case KVM_S390_VM_CPU_MODEL:
749 ret = kvm_s390_get_cpu_model(kvm, attr);
759 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
763 switch (attr->group) {
764 case KVM_S390_VM_MEM_CTRL:
765 switch (attr->attr) {
766 case KVM_S390_VM_MEM_ENABLE_CMMA:
767 case KVM_S390_VM_MEM_CLR_CMMA:
768 case KVM_S390_VM_MEM_LIMIT_SIZE:
776 case KVM_S390_VM_TOD:
777 switch (attr->attr) {
778 case KVM_S390_VM_TOD_LOW:
779 case KVM_S390_VM_TOD_HIGH:
787 case KVM_S390_VM_CPU_MODEL:
788 switch (attr->attr) {
789 case KVM_S390_VM_CPU_PROCESSOR:
790 case KVM_S390_VM_CPU_MACHINE:
798 case KVM_S390_VM_CRYPTO:
799 switch (attr->attr) {
800 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
801 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
802 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
803 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
819 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
823 unsigned long curkey;
826 if (args->flags != 0)
829 /* Is this guest using storage keys? */
830 if (!mm_use_skey(current->mm))
831 return KVM_S390_GET_SKEYS_NONE;
833 /* Enforce sane limit on memory allocation */
834 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
837 keys = kmalloc_array(args->count, sizeof(uint8_t),
838 GFP_KERNEL | __GFP_NOWARN);
840 keys = vmalloc(sizeof(uint8_t) * args->count);
844 for (i = 0; i < args->count; i++) {
845 hva = gfn_to_hva(kvm, args->start_gfn + i);
846 if (kvm_is_error_hva(hva)) {
851 curkey = get_guest_storage_key(current->mm, hva);
852 if (IS_ERR_VALUE(curkey)) {
859 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
860 sizeof(uint8_t) * args->count);
868 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
874 if (args->flags != 0)
877 /* Enforce sane limit on memory allocation */
878 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
881 keys = kmalloc_array(args->count, sizeof(uint8_t),
882 GFP_KERNEL | __GFP_NOWARN);
884 keys = vmalloc(sizeof(uint8_t) * args->count);
888 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
889 sizeof(uint8_t) * args->count);
895 /* Enable storage key handling for the guest */
896 r = s390_enable_skey();
900 for (i = 0; i < args->count; i++) {
901 hva = gfn_to_hva(kvm, args->start_gfn + i);
902 if (kvm_is_error_hva(hva)) {
907 /* Lowest order bit is reserved */
908 if (keys[i] & 0x01) {
913 r = set_guest_storage_key(current->mm, hva,
914 (unsigned long)keys[i], 0);
923 long kvm_arch_vm_ioctl(struct file *filp,
924 unsigned int ioctl, unsigned long arg)
926 struct kvm *kvm = filp->private_data;
927 void __user *argp = (void __user *)arg;
928 struct kvm_device_attr attr;
932 case KVM_S390_INTERRUPT: {
933 struct kvm_s390_interrupt s390int;
936 if (copy_from_user(&s390int, argp, sizeof(s390int)))
938 r = kvm_s390_inject_vm(kvm, &s390int);
941 case KVM_ENABLE_CAP: {
942 struct kvm_enable_cap cap;
944 if (copy_from_user(&cap, argp, sizeof(cap)))
946 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
949 case KVM_CREATE_IRQCHIP: {
950 struct kvm_irq_routing_entry routing;
953 if (kvm->arch.use_irqchip) {
954 /* Set up dummy routing. */
955 memset(&routing, 0, sizeof(routing));
956 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
960 case KVM_SET_DEVICE_ATTR: {
962 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
964 r = kvm_s390_vm_set_attr(kvm, &attr);
967 case KVM_GET_DEVICE_ATTR: {
969 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
971 r = kvm_s390_vm_get_attr(kvm, &attr);
974 case KVM_HAS_DEVICE_ATTR: {
976 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
978 r = kvm_s390_vm_has_attr(kvm, &attr);
981 case KVM_S390_GET_SKEYS: {
982 struct kvm_s390_skeys args;
985 if (copy_from_user(&args, argp,
986 sizeof(struct kvm_s390_skeys)))
988 r = kvm_s390_get_skeys(kvm, &args);
991 case KVM_S390_SET_SKEYS: {
992 struct kvm_s390_skeys args;
995 if (copy_from_user(&args, argp,
996 sizeof(struct kvm_s390_skeys)))
998 r = kvm_s390_set_skeys(kvm, &args);
1008 static int kvm_s390_query_ap_config(u8 *config)
1010 u32 fcn_code = 0x04000000UL;
1013 memset(config, 0, 128);
1017 ".long 0xb2af0000\n" /* PQAP(QCI) */
1023 : "r" (fcn_code), "r" (config)
1024 : "cc", "0", "2", "memory"
1030 static int kvm_s390_apxa_installed(void)
1035 if (test_facility(2) && test_facility(12)) {
1036 cc = kvm_s390_query_ap_config(config);
1039 pr_err("PQAP(QCI) failed with cc=%d", cc);
1041 return config[0] & 0x40;
1047 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1049 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1051 if (kvm_s390_apxa_installed())
1052 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1054 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1057 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1060 cpu_id->version = 0xff;
1063 static int kvm_s390_crypto_init(struct kvm *kvm)
1065 if (!test_kvm_facility(kvm, 76))
1068 kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1069 GFP_KERNEL | GFP_DMA);
1070 if (!kvm->arch.crypto.crycb)
1073 kvm_s390_set_crycb_format(kvm);
1075 /* Enable AES/DEA protected key functions by default */
1076 kvm->arch.crypto.aes_kw = 1;
1077 kvm->arch.crypto.dea_kw = 1;
1078 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1079 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1080 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1081 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1086 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1089 char debug_name[16];
1090 static unsigned long sca_offset;
1093 #ifdef CONFIG_KVM_S390_UCONTROL
1094 if (type & ~KVM_VM_S390_UCONTROL)
1096 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1103 rc = s390_enable_sie();
1109 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
1112 spin_lock(&kvm_lock);
1113 sca_offset = (sca_offset + 16) & 0x7f0;
1114 kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
1115 spin_unlock(&kvm_lock);
1117 sprintf(debug_name, "kvm-%u", current->pid);
1119 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1124 * The architectural maximum amount of facilities is 16 kbit. To store
1125 * this amount, 2 kbyte of memory is required. Thus we need a full
1126 * page to hold the guest facility list (arch.model.fac->list) and the
1127 * facility mask (arch.model.fac->mask). Its address size has to be
1128 * 31 bits and word aligned.
1130 kvm->arch.model.fac =
1131 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1132 if (!kvm->arch.model.fac)
1135 /* Populate the facility mask initially. */
1136 memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1137 S390_ARCH_FAC_LIST_SIZE_BYTE);
1138 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1139 if (i < kvm_s390_fac_list_mask_size())
1140 kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1142 kvm->arch.model.fac->mask[i] = 0UL;
1145 /* Populate the facility list initially. */
1146 memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1147 S390_ARCH_FAC_LIST_SIZE_BYTE);
1149 kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1150 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1152 if (kvm_s390_crypto_init(kvm) < 0)
1155 spin_lock_init(&kvm->arch.float_int.lock);
1156 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1157 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1158 init_waitqueue_head(&kvm->arch.ipte_wq);
1159 mutex_init(&kvm->arch.ipte_mutex);
1161 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1162 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1164 if (type & KVM_VM_S390_UCONTROL) {
1165 kvm->arch.gmap = NULL;
1167 kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
1168 if (!kvm->arch.gmap)
1170 kvm->arch.gmap->private = kvm;
1171 kvm->arch.gmap->pfault_enabled = 0;
1174 kvm->arch.css_support = 0;
1175 kvm->arch.use_irqchip = 0;
1176 kvm->arch.epoch = 0;
1178 spin_lock_init(&kvm->arch.start_stop_lock);
1179 KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
1183 kfree(kvm->arch.crypto.crycb);
1184 free_page((unsigned long)kvm->arch.model.fac);
1185 debug_unregister(kvm->arch.dbf);
1186 free_page((unsigned long)(kvm->arch.sca));
1187 KVM_EVENT(3, "creation of vm failed: %d", rc);
1191 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1193 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1194 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1195 kvm_s390_clear_local_irqs(vcpu);
1196 kvm_clear_async_pf_completion_queue(vcpu);
1197 if (!kvm_is_ucontrol(vcpu->kvm)) {
1198 clear_bit(63 - vcpu->vcpu_id,
1199 (unsigned long *) &vcpu->kvm->arch.sca->mcn);
1200 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
1201 (__u64) vcpu->arch.sie_block)
1202 vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
1206 if (kvm_is_ucontrol(vcpu->kvm))
1207 gmap_free(vcpu->arch.gmap);
1209 if (vcpu->kvm->arch.use_cmma)
1210 kvm_s390_vcpu_unsetup_cmma(vcpu);
1211 free_page((unsigned long)(vcpu->arch.sie_block));
1213 kvm_vcpu_uninit(vcpu);
1214 kmem_cache_free(kvm_vcpu_cache, vcpu);
1217 static void kvm_free_vcpus(struct kvm *kvm)
1220 struct kvm_vcpu *vcpu;
1222 kvm_for_each_vcpu(i, vcpu, kvm)
1223 kvm_arch_vcpu_destroy(vcpu);
1225 mutex_lock(&kvm->lock);
1226 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1227 kvm->vcpus[i] = NULL;
1229 atomic_set(&kvm->online_vcpus, 0);
1230 mutex_unlock(&kvm->lock);
1233 void kvm_arch_destroy_vm(struct kvm *kvm)
1235 kvm_free_vcpus(kvm);
1236 free_page((unsigned long)kvm->arch.model.fac);
1237 free_page((unsigned long)(kvm->arch.sca));
1238 debug_unregister(kvm->arch.dbf);
1239 kfree(kvm->arch.crypto.crycb);
1240 if (!kvm_is_ucontrol(kvm))
1241 gmap_free(kvm->arch.gmap);
1242 kvm_s390_destroy_adapters(kvm);
1243 kvm_s390_clear_float_irqs(kvm);
1244 KVM_EVENT(3, "vm 0x%p destroyed", kvm);
1247 /* Section: vcpu related */
1248 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1250 vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1251 if (!vcpu->arch.gmap)
1253 vcpu->arch.gmap->private = vcpu->kvm;
1258 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1260 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1261 kvm_clear_async_pf_completion_queue(vcpu);
1262 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1268 if (test_kvm_facility(vcpu->kvm, 129))
1269 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1271 if (kvm_is_ucontrol(vcpu->kvm))
1272 return __kvm_ucontrol_vcpu_init(vcpu);
1278 * Backs up the current FP/VX register save area on a particular
1279 * destination. Used to switch between different register save
1282 static inline void save_fpu_to(struct fpu *dst)
1284 dst->fpc = current->thread.fpu.fpc;
1285 dst->flags = current->thread.fpu.flags;
1286 dst->regs = current->thread.fpu.regs;
1290 * Switches the FP/VX register save area from which to lazy
1291 * restore register contents.
1293 static inline void load_fpu_from(struct fpu *from)
1295 current->thread.fpu.fpc = from->fpc;
1296 current->thread.fpu.flags = from->flags;
1297 current->thread.fpu.regs = from->regs;
1300 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1302 /* Save host register state */
1304 save_fpu_to(&vcpu->arch.host_fpregs);
1306 if (test_kvm_facility(vcpu->kvm, 129)) {
1307 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1308 current->thread.fpu.flags = FPU_USE_VX;
1310 * Use the register save area in the SIE-control block
1311 * for register restore and save in kvm_arch_vcpu_put()
1313 current->thread.fpu.vxrs =
1314 (__vector128 *)&vcpu->run->s.regs.vrs;
1315 /* Always enable the vector extension for KVM */
1318 load_fpu_from(&vcpu->arch.guest_fpregs);
1320 if (test_fp_ctl(current->thread.fpu.fpc))
1321 /* User space provided an invalid FPC, let's clear it */
1322 current->thread.fpu.fpc = 0;
1324 save_access_regs(vcpu->arch.host_acrs);
1325 restore_access_regs(vcpu->run->s.regs.acrs);
1326 gmap_enable(vcpu->arch.gmap);
1327 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1330 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1332 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1333 gmap_disable(vcpu->arch.gmap);
1337 if (test_kvm_facility(vcpu->kvm, 129))
1339 * kvm_arch_vcpu_load() set up the register save area to
1340 * the &vcpu->run->s.regs.vrs and, thus, the vector registers
1341 * are already saved. Only the floating-point control must be
1344 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1346 save_fpu_to(&vcpu->arch.guest_fpregs);
1347 load_fpu_from(&vcpu->arch.host_fpregs);
1349 save_access_regs(vcpu->run->s.regs.acrs);
1350 restore_access_regs(vcpu->arch.host_acrs);
1353 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1355 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1356 vcpu->arch.sie_block->gpsw.mask = 0UL;
1357 vcpu->arch.sie_block->gpsw.addr = 0UL;
1358 kvm_s390_set_prefix(vcpu, 0);
1359 vcpu->arch.sie_block->cputm = 0UL;
1360 vcpu->arch.sie_block->ckc = 0UL;
1361 vcpu->arch.sie_block->todpr = 0;
1362 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1363 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1364 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1365 vcpu->arch.guest_fpregs.fpc = 0;
1366 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1367 vcpu->arch.sie_block->gbea = 1;
1368 vcpu->arch.sie_block->pp = 0;
1369 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1370 kvm_clear_async_pf_completion_queue(vcpu);
1371 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1372 kvm_s390_vcpu_stop(vcpu);
1373 kvm_s390_clear_local_irqs(vcpu);
1376 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1378 mutex_lock(&vcpu->kvm->lock);
1380 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1382 mutex_unlock(&vcpu->kvm->lock);
1383 if (!kvm_is_ucontrol(vcpu->kvm))
1384 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1387 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1389 if (!test_kvm_facility(vcpu->kvm, 76))
1392 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1394 if (vcpu->kvm->arch.crypto.aes_kw)
1395 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1396 if (vcpu->kvm->arch.crypto.dea_kw)
1397 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1399 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1402 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1404 free_page(vcpu->arch.sie_block->cbrlo);
1405 vcpu->arch.sie_block->cbrlo = 0;
1408 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1410 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1411 if (!vcpu->arch.sie_block->cbrlo)
1414 vcpu->arch.sie_block->ecb2 |= 0x80;
1415 vcpu->arch.sie_block->ecb2 &= ~0x08;
1419 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1421 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1423 vcpu->arch.cpu_id = model->cpu_id;
1424 vcpu->arch.sie_block->ibc = model->ibc;
1425 vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1428 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1432 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1436 if (test_kvm_facility(vcpu->kvm, 78))
1437 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1438 else if (test_kvm_facility(vcpu->kvm, 8))
1439 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1441 kvm_s390_vcpu_setup_model(vcpu);
1443 vcpu->arch.sie_block->ecb = 6;
1444 if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1445 vcpu->arch.sie_block->ecb |= 0x10;
1447 vcpu->arch.sie_block->ecb2 = 8;
1448 vcpu->arch.sie_block->eca = 0xC1002000U;
1450 vcpu->arch.sie_block->eca |= 1;
1451 if (sclp.has_sigpif)
1452 vcpu->arch.sie_block->eca |= 0x10000000U;
1453 if (test_kvm_facility(vcpu->kvm, 129)) {
1454 vcpu->arch.sie_block->eca |= 0x00020000;
1455 vcpu->arch.sie_block->ecd |= 0x20000000;
1457 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1459 if (vcpu->kvm->arch.use_cmma) {
1460 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1464 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1465 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1467 kvm_s390_vcpu_crypto_setup(vcpu);
1472 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1475 struct kvm_vcpu *vcpu;
1476 struct sie_page *sie_page;
1479 if (id >= KVM_MAX_VCPUS)
1484 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1488 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1492 vcpu->arch.sie_block = &sie_page->sie_block;
1493 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1495 vcpu->arch.sie_block->icpua = id;
1496 if (!kvm_is_ucontrol(kvm)) {
1497 if (!kvm->arch.sca) {
1501 if (!kvm->arch.sca->cpu[id].sda)
1502 kvm->arch.sca->cpu[id].sda =
1503 (__u64) vcpu->arch.sie_block;
1504 vcpu->arch.sie_block->scaoh =
1505 (__u32)(((__u64)kvm->arch.sca) >> 32);
1506 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
1507 set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
1510 spin_lock_init(&vcpu->arch.local_int.lock);
1511 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1512 vcpu->arch.local_int.wq = &vcpu->wq;
1513 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1516 * Allocate a save area for floating-point registers. If the vector
1517 * extension is available, register contents are saved in the SIE
1518 * control block. The allocated save area is still required in
1519 * particular places, for example, in kvm_s390_vcpu_store_status().
1521 vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
1523 if (!vcpu->arch.guest_fpregs.fprs) {
1525 goto out_free_sie_block;
1528 rc = kvm_vcpu_init(vcpu, kvm, id);
1530 goto out_free_sie_block;
1531 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
1532 vcpu->arch.sie_block);
1533 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1537 free_page((unsigned long)(vcpu->arch.sie_block));
1539 kmem_cache_free(kvm_vcpu_cache, vcpu);
1544 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1546 return kvm_s390_vcpu_has_irq(vcpu, 0);
1549 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1551 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1555 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1557 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1560 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1562 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1566 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1568 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1572 * Kick a guest cpu out of SIE and wait until SIE is not running.
1573 * If the CPU is not running (e.g. waiting as idle) the function will
1574 * return immediately. */
1575 void exit_sie(struct kvm_vcpu *vcpu)
1577 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1578 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1582 /* Kick a guest cpu out of SIE to process a request synchronously */
1583 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1585 kvm_make_request(req, vcpu);
1586 kvm_s390_vcpu_request(vcpu);
1589 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1592 struct kvm *kvm = gmap->private;
1593 struct kvm_vcpu *vcpu;
1595 kvm_for_each_vcpu(i, vcpu, kvm) {
1596 /* match against both prefix pages */
1597 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1598 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1599 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1604 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1606 /* kvm common code refers to this, but never calls it */
1611 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1612 struct kvm_one_reg *reg)
1617 case KVM_REG_S390_TODPR:
1618 r = put_user(vcpu->arch.sie_block->todpr,
1619 (u32 __user *)reg->addr);
1621 case KVM_REG_S390_EPOCHDIFF:
1622 r = put_user(vcpu->arch.sie_block->epoch,
1623 (u64 __user *)reg->addr);
1625 case KVM_REG_S390_CPU_TIMER:
1626 r = put_user(vcpu->arch.sie_block->cputm,
1627 (u64 __user *)reg->addr);
1629 case KVM_REG_S390_CLOCK_COMP:
1630 r = put_user(vcpu->arch.sie_block->ckc,
1631 (u64 __user *)reg->addr);
1633 case KVM_REG_S390_PFTOKEN:
1634 r = put_user(vcpu->arch.pfault_token,
1635 (u64 __user *)reg->addr);
1637 case KVM_REG_S390_PFCOMPARE:
1638 r = put_user(vcpu->arch.pfault_compare,
1639 (u64 __user *)reg->addr);
1641 case KVM_REG_S390_PFSELECT:
1642 r = put_user(vcpu->arch.pfault_select,
1643 (u64 __user *)reg->addr);
1645 case KVM_REG_S390_PP:
1646 r = put_user(vcpu->arch.sie_block->pp,
1647 (u64 __user *)reg->addr);
1649 case KVM_REG_S390_GBEA:
1650 r = put_user(vcpu->arch.sie_block->gbea,
1651 (u64 __user *)reg->addr);
1660 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1661 struct kvm_one_reg *reg)
1666 case KVM_REG_S390_TODPR:
1667 r = get_user(vcpu->arch.sie_block->todpr,
1668 (u32 __user *)reg->addr);
1670 case KVM_REG_S390_EPOCHDIFF:
1671 r = get_user(vcpu->arch.sie_block->epoch,
1672 (u64 __user *)reg->addr);
1674 case KVM_REG_S390_CPU_TIMER:
1675 r = get_user(vcpu->arch.sie_block->cputm,
1676 (u64 __user *)reg->addr);
1678 case KVM_REG_S390_CLOCK_COMP:
1679 r = get_user(vcpu->arch.sie_block->ckc,
1680 (u64 __user *)reg->addr);
1682 case KVM_REG_S390_PFTOKEN:
1683 r = get_user(vcpu->arch.pfault_token,
1684 (u64 __user *)reg->addr);
1685 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1686 kvm_clear_async_pf_completion_queue(vcpu);
1688 case KVM_REG_S390_PFCOMPARE:
1689 r = get_user(vcpu->arch.pfault_compare,
1690 (u64 __user *)reg->addr);
1692 case KVM_REG_S390_PFSELECT:
1693 r = get_user(vcpu->arch.pfault_select,
1694 (u64 __user *)reg->addr);
1696 case KVM_REG_S390_PP:
1697 r = get_user(vcpu->arch.sie_block->pp,
1698 (u64 __user *)reg->addr);
1700 case KVM_REG_S390_GBEA:
1701 r = get_user(vcpu->arch.sie_block->gbea,
1702 (u64 __user *)reg->addr);
1711 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1713 kvm_s390_vcpu_initial_reset(vcpu);
1717 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1719 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
1723 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1725 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1729 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1730 struct kvm_sregs *sregs)
1732 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1733 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1734 restore_access_regs(vcpu->run->s.regs.acrs);
1738 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1739 struct kvm_sregs *sregs)
1741 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1742 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1746 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1748 if (test_fp_ctl(fpu->fpc))
1750 memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1751 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1753 load_fpu_from(&vcpu->arch.guest_fpregs);
1757 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1759 memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1760 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1764 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1768 if (!is_vcpu_stopped(vcpu))
1771 vcpu->run->psw_mask = psw.mask;
1772 vcpu->run->psw_addr = psw.addr;
1777 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1778 struct kvm_translation *tr)
1780 return -EINVAL; /* not implemented yet */
1783 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1784 KVM_GUESTDBG_USE_HW_BP | \
1785 KVM_GUESTDBG_ENABLE)
1787 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1788 struct kvm_guest_debug *dbg)
1792 vcpu->guest_debug = 0;
1793 kvm_s390_clear_bp_data(vcpu);
1795 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1798 if (dbg->control & KVM_GUESTDBG_ENABLE) {
1799 vcpu->guest_debug = dbg->control;
1800 /* enforce guest PER */
1801 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1803 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1804 rc = kvm_s390_import_bp_data(vcpu, dbg);
1806 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1807 vcpu->arch.guestdbg.last_bp = 0;
1811 vcpu->guest_debug = 0;
1812 kvm_s390_clear_bp_data(vcpu);
1813 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1819 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1820 struct kvm_mp_state *mp_state)
1822 /* CHECK_STOP and LOAD are not supported yet */
1823 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1824 KVM_MP_STATE_OPERATING;
1827 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1828 struct kvm_mp_state *mp_state)
1832 /* user space knows about this interface - let it control the state */
1833 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1835 switch (mp_state->mp_state) {
1836 case KVM_MP_STATE_STOPPED:
1837 kvm_s390_vcpu_stop(vcpu);
1839 case KVM_MP_STATE_OPERATING:
1840 kvm_s390_vcpu_start(vcpu);
1842 case KVM_MP_STATE_LOAD:
1843 case KVM_MP_STATE_CHECK_STOP:
1844 /* fall through - CHECK_STOP and LOAD are not supported yet */
1852 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1854 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1857 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1860 kvm_s390_vcpu_request_handled(vcpu);
1861 if (!vcpu->requests)
1864 * We use MMU_RELOAD just to re-arm the ipte notifier for the
1865 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1866 * This ensures that the ipte instruction for this request has
1867 * already finished. We might race against a second unmapper that
1868 * wants to set the blocking bit. Lets just retry the request loop.
1870 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1872 rc = gmap_ipte_notify(vcpu->arch.gmap,
1873 kvm_s390_get_prefix(vcpu),
1880 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1881 vcpu->arch.sie_block->ihcpu = 0xffff;
1885 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1886 if (!ibs_enabled(vcpu)) {
1887 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1888 atomic_or(CPUSTAT_IBS,
1889 &vcpu->arch.sie_block->cpuflags);
1894 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1895 if (ibs_enabled(vcpu)) {
1896 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
1897 atomic_andnot(CPUSTAT_IBS,
1898 &vcpu->arch.sie_block->cpuflags);
1903 /* nothing to do, just clear the request */
1904 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
1910 * kvm_arch_fault_in_page - fault-in guest page if necessary
1911 * @vcpu: The corresponding virtual cpu
1912 * @gpa: Guest physical address
1913 * @writable: Whether the page should be writable or not
1915 * Make sure that a guest page has been faulted-in on the host.
1917 * Return: Zero on success, negative error code otherwise.
1919 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1921 return gmap_fault(vcpu->arch.gmap, gpa,
1922 writable ? FAULT_FLAG_WRITE : 0);
1925 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
1926 unsigned long token)
1928 struct kvm_s390_interrupt inti;
1929 struct kvm_s390_irq irq;
1932 irq.u.ext.ext_params2 = token;
1933 irq.type = KVM_S390_INT_PFAULT_INIT;
1934 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
1936 inti.type = KVM_S390_INT_PFAULT_DONE;
1937 inti.parm64 = token;
1938 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
1942 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1943 struct kvm_async_pf *work)
1945 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
1946 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
1949 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
1950 struct kvm_async_pf *work)
1952 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
1953 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
1956 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
1957 struct kvm_async_pf *work)
1959 /* s390 will always inject the page directly */
1962 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
1965 * s390 will always inject the page directly,
1966 * but we still want check_async_completion to cleanup
1971 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
1974 struct kvm_arch_async_pf arch;
1977 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1979 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
1980 vcpu->arch.pfault_compare)
1982 if (psw_extint_disabled(vcpu))
1984 if (kvm_s390_vcpu_has_irq(vcpu, 0))
1986 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
1988 if (!vcpu->arch.gmap->pfault_enabled)
1991 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
1992 hva += current->thread.gmap_addr & ~PAGE_MASK;
1993 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
1996 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2000 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2005 * On s390 notifications for arriving pages will be delivered directly
2006 * to the guest but the house keeping for completed pfaults is
2007 * handled outside the worker.
2009 kvm_check_async_pf_completion(vcpu);
2011 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
2016 if (test_cpu_flag(CIF_MCCK_PENDING))
2019 if (!kvm_is_ucontrol(vcpu->kvm)) {
2020 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2025 rc = kvm_s390_handle_requests(vcpu);
2029 if (guestdbg_enabled(vcpu)) {
2030 kvm_s390_backup_guest_per_regs(vcpu);
2031 kvm_s390_patch_guest_per_regs(vcpu);
2034 vcpu->arch.sie_block->icptcode = 0;
2035 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2036 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2037 trace_kvm_s390_sie_enter(vcpu, cpuflags);
2042 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2044 psw_t *psw = &vcpu->arch.sie_block->gpsw;
2048 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2049 trace_kvm_s390_sie_fault(vcpu);
2052 * We want to inject an addressing exception, which is defined as a
2053 * suppressing or terminating exception. However, since we came here
2054 * by a DAT access exception, the PSW still points to the faulting
2055 * instruction since DAT exceptions are nullifying. So we've got
2056 * to look up the current opcode to get the length of the instruction
2057 * to be able to forward the PSW.
2059 rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
2061 return kvm_s390_inject_prog_cond(vcpu, rc);
2062 psw->addr = __rewind_psw(*psw, -insn_length(opcode));
2064 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
2067 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2071 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2072 vcpu->arch.sie_block->icptcode);
2073 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2075 if (guestdbg_enabled(vcpu))
2076 kvm_s390_restore_guest_per_regs(vcpu);
2078 if (exit_reason >= 0) {
2080 } else if (kvm_is_ucontrol(vcpu->kvm)) {
2081 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2082 vcpu->run->s390_ucontrol.trans_exc_code =
2083 current->thread.gmap_addr;
2084 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2087 } else if (current->thread.gmap_pfault) {
2088 trace_kvm_s390_major_guest_pfault(vcpu);
2089 current->thread.gmap_pfault = 0;
2090 if (kvm_arch_setup_async_pf(vcpu)) {
2093 gpa_t gpa = current->thread.gmap_addr;
2094 rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
2099 rc = vcpu_post_run_fault_in_sie(vcpu);
2101 memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
2104 if (kvm_is_ucontrol(vcpu->kvm))
2105 /* Don't exit for host interrupts. */
2106 rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
2108 rc = kvm_handle_sie_intercept(vcpu);
2114 static int __vcpu_run(struct kvm_vcpu *vcpu)
2116 int rc, exit_reason;
2119 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2120 * ning the guest), so that memslots (and other stuff) are protected
2122 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2125 rc = vcpu_pre_run(vcpu);
2129 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2131 * As PF_VCPU will be used in fault handler, between
2132 * guest_enter and guest_exit should be no uaccess.
2134 local_irq_disable();
2135 __kvm_guest_enter();
2137 exit_reason = sie64a(vcpu->arch.sie_block,
2138 vcpu->run->s.regs.gprs);
2139 local_irq_disable();
2142 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2144 rc = vcpu_post_run(vcpu, exit_reason);
2145 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2147 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2151 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2153 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2154 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2155 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2156 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2157 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2158 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2159 /* some control register changes require a tlb flush */
2160 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2162 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2163 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2164 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2165 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2166 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2167 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2169 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2170 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2171 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2172 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2173 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2174 kvm_clear_async_pf_completion_queue(vcpu);
2176 kvm_run->kvm_dirty_regs = 0;
2179 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2181 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2182 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2183 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2184 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2185 kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2186 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2187 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2188 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2189 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2190 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2191 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2192 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2195 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2200 if (guestdbg_exit_pending(vcpu)) {
2201 kvm_s390_prepare_debug_exit(vcpu);
2205 if (vcpu->sigset_active)
2206 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2208 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2209 kvm_s390_vcpu_start(vcpu);
2210 } else if (is_vcpu_stopped(vcpu)) {
2211 pr_err_ratelimited("can't run stopped vcpu %d\n",
2216 sync_regs(vcpu, kvm_run);
2219 rc = __vcpu_run(vcpu);
2221 if (signal_pending(current) && !rc) {
2222 kvm_run->exit_reason = KVM_EXIT_INTR;
2226 if (guestdbg_exit_pending(vcpu) && !rc) {
2227 kvm_s390_prepare_debug_exit(vcpu);
2231 if (rc == -EOPNOTSUPP) {
2232 /* intercept cannot be handled in-kernel, prepare kvm-run */
2233 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
2234 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2235 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2236 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2240 if (rc == -EREMOTE) {
2241 /* intercept was handled, but userspace support is needed
2242 * kvm_run has been prepared by the handler */
2246 store_regs(vcpu, kvm_run);
2248 if (vcpu->sigset_active)
2249 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2251 vcpu->stat.exit_userspace++;
2256 * store status at address
2257 * we use have two special cases:
2258 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2259 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2261 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2263 unsigned char archmode = 1;
2268 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2269 if (write_guest_abs(vcpu, 163, &archmode, 1))
2271 gpa = SAVE_AREA_BASE;
2272 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2273 if (write_guest_real(vcpu, 163, &archmode, 1))
2275 gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
2277 rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
2278 vcpu->arch.guest_fpregs.fprs, 128);
2279 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
2280 vcpu->run->s.regs.gprs, 128);
2281 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
2282 &vcpu->arch.sie_block->gpsw, 16);
2283 px = kvm_s390_get_prefix(vcpu);
2284 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
2286 rc |= write_guest_abs(vcpu,
2287 gpa + offsetof(struct save_area, fp_ctrl_reg),
2288 &vcpu->arch.guest_fpregs.fpc, 4);
2289 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
2290 &vcpu->arch.sie_block->todpr, 4);
2291 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
2292 &vcpu->arch.sie_block->cputm, 8);
2293 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2294 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
2296 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
2297 &vcpu->run->s.regs.acrs, 64);
2298 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
2299 &vcpu->arch.sie_block->gcr, 128);
2300 return rc ? -EFAULT : 0;
2303 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2306 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2307 * copying in vcpu load/put. Lets update our copies before we save
2308 * it into the save area
2311 if (test_kvm_facility(vcpu->kvm, 129)) {
2313 * If the vector extension is available, the vector registers
2314 * which overlaps with floating-point registers are saved in
2315 * the SIE-control block. Hence, extract the floating-point
2316 * registers and the FPC value and store them in the
2317 * guest_fpregs structure.
2319 WARN_ON(!is_vx_task(current)); /* XXX remove later */
2320 vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
2321 convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
2322 current->thread.fpu.vxrs);
2324 save_fpu_to(&vcpu->arch.guest_fpregs);
2325 save_access_regs(vcpu->run->s.regs.acrs);
2327 return kvm_s390_store_status_unloaded(vcpu, addr);
2331 * store additional status at address
2333 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2336 /* Only bits 0-53 are used for address formation */
2337 if (!(gpa & ~0x3ff))
2340 return write_guest_abs(vcpu, gpa & ~0x3ff,
2341 (void *)&vcpu->run->s.regs.vrs, 512);
2344 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2346 if (!test_kvm_facility(vcpu->kvm, 129))
2350 * The guest VXRS are in the host VXRs due to the lazy
2351 * copying in vcpu load/put. We can simply call save_fpu_regs()
2352 * to save the current register state because we are in the
2353 * middle of a load/put cycle.
2355 * Let's update our copies before we save it into the save area.
2359 return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2362 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2364 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2365 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2368 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2371 struct kvm_vcpu *vcpu;
2373 kvm_for_each_vcpu(i, vcpu, kvm) {
2374 __disable_ibs_on_vcpu(vcpu);
2378 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2380 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2381 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2384 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2386 int i, online_vcpus, started_vcpus = 0;
2388 if (!is_vcpu_stopped(vcpu))
2391 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2392 /* Only one cpu at a time may enter/leave the STOPPED state. */
2393 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2394 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2396 for (i = 0; i < online_vcpus; i++) {
2397 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2401 if (started_vcpus == 0) {
2402 /* we're the only active VCPU -> speed it up */
2403 __enable_ibs_on_vcpu(vcpu);
2404 } else if (started_vcpus == 1) {
2406 * As we are starting a second VCPU, we have to disable
2407 * the IBS facility on all VCPUs to remove potentially
2408 * oustanding ENABLE requests.
2410 __disable_ibs_on_all_vcpus(vcpu->kvm);
2413 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2415 * Another VCPU might have used IBS while we were offline.
2416 * Let's play safe and flush the VCPU at startup.
2418 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2419 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2423 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2425 int i, online_vcpus, started_vcpus = 0;
2426 struct kvm_vcpu *started_vcpu = NULL;
2428 if (is_vcpu_stopped(vcpu))
2431 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2432 /* Only one cpu at a time may enter/leave the STOPPED state. */
2433 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2434 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2436 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2437 kvm_s390_clear_stop_irq(vcpu);
2439 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2440 __disable_ibs_on_vcpu(vcpu);
2442 for (i = 0; i < online_vcpus; i++) {
2443 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2445 started_vcpu = vcpu->kvm->vcpus[i];
2449 if (started_vcpus == 1) {
2451 * As we only have one VCPU left, we want to enable the
2452 * IBS facility for that VCPU to speed it up.
2454 __enable_ibs_on_vcpu(started_vcpu);
2457 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2461 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2462 struct kvm_enable_cap *cap)
2470 case KVM_CAP_S390_CSS_SUPPORT:
2471 if (!vcpu->kvm->arch.css_support) {
2472 vcpu->kvm->arch.css_support = 1;
2473 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2474 trace_kvm_s390_enable_css(vcpu->kvm);
2485 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2486 struct kvm_s390_mem_op *mop)
2488 void __user *uaddr = (void __user *)mop->buf;
2489 void *tmpbuf = NULL;
2491 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2492 | KVM_S390_MEMOP_F_CHECK_ONLY;
2494 if (mop->flags & ~supported_flags)
2497 if (mop->size > MEM_OP_MAX_SIZE)
2500 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2501 tmpbuf = vmalloc(mop->size);
2506 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2509 case KVM_S390_MEMOP_LOGICAL_READ:
2510 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2511 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2514 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2516 if (copy_to_user(uaddr, tmpbuf, mop->size))
2520 case KVM_S390_MEMOP_LOGICAL_WRITE:
2521 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2522 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2525 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2529 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2535 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2537 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2538 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2544 long kvm_arch_vcpu_ioctl(struct file *filp,
2545 unsigned int ioctl, unsigned long arg)
2547 struct kvm_vcpu *vcpu = filp->private_data;
2548 void __user *argp = (void __user *)arg;
2553 case KVM_S390_IRQ: {
2554 struct kvm_s390_irq s390irq;
2557 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2559 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2562 case KVM_S390_INTERRUPT: {
2563 struct kvm_s390_interrupt s390int;
2564 struct kvm_s390_irq s390irq;
2567 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2569 if (s390int_to_s390irq(&s390int, &s390irq))
2571 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2574 case KVM_S390_STORE_STATUS:
2575 idx = srcu_read_lock(&vcpu->kvm->srcu);
2576 r = kvm_s390_vcpu_store_status(vcpu, arg);
2577 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2579 case KVM_S390_SET_INITIAL_PSW: {
2583 if (copy_from_user(&psw, argp, sizeof(psw)))
2585 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2588 case KVM_S390_INITIAL_RESET:
2589 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2591 case KVM_SET_ONE_REG:
2592 case KVM_GET_ONE_REG: {
2593 struct kvm_one_reg reg;
2595 if (copy_from_user(®, argp, sizeof(reg)))
2597 if (ioctl == KVM_SET_ONE_REG)
2598 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
2600 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
2603 #ifdef CONFIG_KVM_S390_UCONTROL
2604 case KVM_S390_UCAS_MAP: {
2605 struct kvm_s390_ucas_mapping ucasmap;
2607 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2612 if (!kvm_is_ucontrol(vcpu->kvm)) {
2617 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2618 ucasmap.vcpu_addr, ucasmap.length);
2621 case KVM_S390_UCAS_UNMAP: {
2622 struct kvm_s390_ucas_mapping ucasmap;
2624 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2629 if (!kvm_is_ucontrol(vcpu->kvm)) {
2634 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2639 case KVM_S390_VCPU_FAULT: {
2640 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2643 case KVM_ENABLE_CAP:
2645 struct kvm_enable_cap cap;
2647 if (copy_from_user(&cap, argp, sizeof(cap)))
2649 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2652 case KVM_S390_MEM_OP: {
2653 struct kvm_s390_mem_op mem_op;
2655 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2656 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2661 case KVM_S390_SET_IRQ_STATE: {
2662 struct kvm_s390_irq_state irq_state;
2665 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2667 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2668 irq_state.len == 0 ||
2669 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2673 r = kvm_s390_set_irq_state(vcpu,
2674 (void __user *) irq_state.buf,
2678 case KVM_S390_GET_IRQ_STATE: {
2679 struct kvm_s390_irq_state irq_state;
2682 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2684 if (irq_state.len == 0) {
2688 r = kvm_s390_get_irq_state(vcpu,
2689 (__u8 __user *) irq_state.buf,
2699 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2701 #ifdef CONFIG_KVM_S390_UCONTROL
2702 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2703 && (kvm_is_ucontrol(vcpu->kvm))) {
2704 vmf->page = virt_to_page(vcpu->arch.sie_block);
2705 get_page(vmf->page);
2709 return VM_FAULT_SIGBUS;
2712 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2713 unsigned long npages)
2718 /* Section: memory related */
2719 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2720 struct kvm_memory_slot *memslot,
2721 const struct kvm_userspace_memory_region *mem,
2722 enum kvm_mr_change change)
2724 /* A few sanity checks. We can have memory slots which have to be
2725 located/ended at a segment boundary (1MB). The memory in userland is
2726 ok to be fragmented into various different vmas. It is okay to mmap()
2727 and munmap() stuff in this slot after doing this call at any time */
2729 if (mem->userspace_addr & 0xffffful)
2732 if (mem->memory_size & 0xffffful)
2738 void kvm_arch_commit_memory_region(struct kvm *kvm,
2739 const struct kvm_userspace_memory_region *mem,
2740 const struct kvm_memory_slot *old,
2741 const struct kvm_memory_slot *new,
2742 enum kvm_mr_change change)
2746 /* If the basics of the memslot do not change, we do not want
2747 * to update the gmap. Every update causes several unnecessary
2748 * segment translation exceptions. This is usually handled just
2749 * fine by the normal fault handler + gmap, but it will also
2750 * cause faults on the prefix page of running guest CPUs.
2752 if (old->userspace_addr == mem->userspace_addr &&
2753 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2754 old->npages * PAGE_SIZE == mem->memory_size)
2757 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2758 mem->guest_phys_addr, mem->memory_size);
2760 pr_warn("failed to commit memory region\n");
2764 static int __init kvm_s390_init(void)
2766 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2769 static void __exit kvm_s390_exit(void)
2774 module_init(kvm_s390_init);
2775 module_exit(kvm_s390_exit);
2778 * Enable autoloading of the kvm module.
2779 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2780 * since x86 takes a different approach.
2782 #include <linux/miscdevice.h>
2783 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2784 MODULE_ALIAS("devname:kvm");