2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
16 #include <linux/compiler.h>
17 #include <linux/err.h>
19 #include <linux/hrtimer.h>
20 #include <linux/init.h>
21 #include <linux/kvm.h>
22 #include <linux/kvm_host.h>
23 #include <linux/module.h>
24 #include <linux/slab.h>
25 #include <linux/timer.h>
26 #include <asm/asm-offsets.h>
27 #include <asm/lowcore.h>
28 #include <asm/pgtable.h>
30 #include <asm/switch_to.h>
31 #include <asm/facility.h>
36 #define CREATE_TRACE_POINTS
38 #include "trace-s390.h"
40 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
42 struct kvm_stats_debugfs_item debugfs_entries[] = {
43 { "userspace_handled", VCPU_STAT(exit_userspace) },
44 { "exit_null", VCPU_STAT(exit_null) },
45 { "exit_validity", VCPU_STAT(exit_validity) },
46 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
47 { "exit_external_request", VCPU_STAT(exit_external_request) },
48 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
49 { "exit_instruction", VCPU_STAT(exit_instruction) },
50 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
51 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
52 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
53 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
54 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
55 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
56 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
57 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
58 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
59 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
60 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
61 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
62 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
63 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
64 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
65 { "instruction_spx", VCPU_STAT(instruction_spx) },
66 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
67 { "instruction_stap", VCPU_STAT(instruction_stap) },
68 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
69 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
70 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
71 { "instruction_essa", VCPU_STAT(instruction_essa) },
72 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
73 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
74 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
75 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
76 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
77 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
78 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
79 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
80 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
81 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
82 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
83 { "diagnose_10", VCPU_STAT(diagnose_10) },
84 { "diagnose_44", VCPU_STAT(diagnose_44) },
85 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
89 unsigned long *vfacilities;
90 static struct gmap_notifier gmap_notifier;
92 /* test availability of vfacility */
93 static inline int test_vfacility(unsigned long nr)
95 return __test_facility(nr, (void *) vfacilities);
98 /* Section: not file related */
99 int kvm_arch_hardware_enable(void *garbage)
101 /* every s390 is virtualization enabled ;-) */
105 void kvm_arch_hardware_disable(void *garbage)
109 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
111 int kvm_arch_hardware_setup(void)
113 gmap_notifier.notifier_call = kvm_gmap_notifier;
114 gmap_register_ipte_notifier(&gmap_notifier);
118 void kvm_arch_hardware_unsetup(void)
120 gmap_unregister_ipte_notifier(&gmap_notifier);
123 void kvm_arch_check_processor_compat(void *rtn)
127 int kvm_arch_init(void *opaque)
132 void kvm_arch_exit(void)
136 /* Section: device related */
137 long kvm_arch_dev_ioctl(struct file *filp,
138 unsigned int ioctl, unsigned long arg)
140 if (ioctl == KVM_S390_ENABLE_SIE)
141 return s390_enable_sie();
145 int kvm_dev_ioctl_check_extension(long ext)
150 case KVM_CAP_S390_PSW:
151 case KVM_CAP_S390_GMAP:
152 case KVM_CAP_SYNC_MMU:
153 #ifdef CONFIG_KVM_S390_UCONTROL
154 case KVM_CAP_S390_UCONTROL:
156 case KVM_CAP_SYNC_REGS:
157 case KVM_CAP_ONE_REG:
158 case KVM_CAP_ENABLE_CAP:
159 case KVM_CAP_S390_CSS_SUPPORT:
160 case KVM_CAP_IOEVENTFD:
163 case KVM_CAP_NR_VCPUS:
164 case KVM_CAP_MAX_VCPUS:
167 case KVM_CAP_NR_MEMSLOTS:
168 r = KVM_USER_MEM_SLOTS;
170 case KVM_CAP_S390_COW:
171 r = MACHINE_HAS_ESOP;
179 /* Section: vm related */
181 * Get (and clear) the dirty memory log for a memory slot.
183 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
184 struct kvm_dirty_log *log)
189 long kvm_arch_vm_ioctl(struct file *filp,
190 unsigned int ioctl, unsigned long arg)
192 struct kvm *kvm = filp->private_data;
193 void __user *argp = (void __user *)arg;
197 case KVM_S390_INTERRUPT: {
198 struct kvm_s390_interrupt s390int;
201 if (copy_from_user(&s390int, argp, sizeof(s390int)))
203 r = kvm_s390_inject_vm(kvm, &s390int);
213 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
219 #ifdef CONFIG_KVM_S390_UCONTROL
220 if (type & ~KVM_VM_S390_UCONTROL)
222 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
229 rc = s390_enable_sie();
235 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
239 sprintf(debug_name, "kvm-%u", current->pid);
241 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
245 spin_lock_init(&kvm->arch.float_int.lock);
246 INIT_LIST_HEAD(&kvm->arch.float_int.list);
248 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
249 VM_EVENT(kvm, 3, "%s", "vm created");
251 if (type & KVM_VM_S390_UCONTROL) {
252 kvm->arch.gmap = NULL;
254 kvm->arch.gmap = gmap_alloc(current->mm);
257 kvm->arch.gmap->private = kvm;
260 kvm->arch.css_support = 0;
264 debug_unregister(kvm->arch.dbf);
266 free_page((unsigned long)(kvm->arch.sca));
271 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
273 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
274 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
275 if (!kvm_is_ucontrol(vcpu->kvm)) {
276 clear_bit(63 - vcpu->vcpu_id,
277 (unsigned long *) &vcpu->kvm->arch.sca->mcn);
278 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
279 (__u64) vcpu->arch.sie_block)
280 vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
284 if (kvm_is_ucontrol(vcpu->kvm))
285 gmap_free(vcpu->arch.gmap);
287 if (vcpu->arch.sie_block->cbrlo)
288 __free_page(__pfn_to_page(
289 vcpu->arch.sie_block->cbrlo >> PAGE_SHIFT));
290 free_page((unsigned long)(vcpu->arch.sie_block));
292 kvm_vcpu_uninit(vcpu);
293 kmem_cache_free(kvm_vcpu_cache, vcpu);
296 static void kvm_free_vcpus(struct kvm *kvm)
299 struct kvm_vcpu *vcpu;
301 kvm_for_each_vcpu(i, vcpu, kvm)
302 kvm_arch_vcpu_destroy(vcpu);
304 mutex_lock(&kvm->lock);
305 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
306 kvm->vcpus[i] = NULL;
308 atomic_set(&kvm->online_vcpus, 0);
309 mutex_unlock(&kvm->lock);
312 void kvm_arch_sync_events(struct kvm *kvm)
316 void kvm_arch_destroy_vm(struct kvm *kvm)
319 free_page((unsigned long)(kvm->arch.sca));
320 debug_unregister(kvm->arch.dbf);
321 if (!kvm_is_ucontrol(kvm))
322 gmap_free(kvm->arch.gmap);
325 /* Section: vcpu related */
326 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
328 if (kvm_is_ucontrol(vcpu->kvm)) {
329 vcpu->arch.gmap = gmap_alloc(current->mm);
330 if (!vcpu->arch.gmap)
332 vcpu->arch.gmap->private = vcpu->kvm;
336 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
337 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
344 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
349 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
351 save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
352 save_fp_regs(vcpu->arch.host_fpregs.fprs);
353 save_access_regs(vcpu->arch.host_acrs);
354 restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
355 restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
356 restore_access_regs(vcpu->run->s.regs.acrs);
357 gmap_enable(vcpu->arch.gmap);
358 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
361 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
363 atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
364 gmap_disable(vcpu->arch.gmap);
365 save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
366 save_fp_regs(vcpu->arch.guest_fpregs.fprs);
367 save_access_regs(vcpu->run->s.regs.acrs);
368 restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
369 restore_fp_regs(vcpu->arch.host_fpregs.fprs);
370 restore_access_regs(vcpu->arch.host_acrs);
373 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
375 /* this equals initial cpu reset in pop, but we don't switch to ESA */
376 vcpu->arch.sie_block->gpsw.mask = 0UL;
377 vcpu->arch.sie_block->gpsw.addr = 0UL;
378 kvm_s390_set_prefix(vcpu, 0);
379 vcpu->arch.sie_block->cputm = 0UL;
380 vcpu->arch.sie_block->ckc = 0UL;
381 vcpu->arch.sie_block->todpr = 0;
382 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
383 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
384 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
385 vcpu->arch.guest_fpregs.fpc = 0;
386 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
387 vcpu->arch.sie_block->gbea = 1;
388 atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
391 int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
396 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
400 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
404 vcpu->arch.sie_block->ecb = 6;
405 if (test_vfacility(50) && test_vfacility(73))
406 vcpu->arch.sie_block->ecb |= 0x10;
408 vcpu->arch.sie_block->ecb2 = 8;
409 vcpu->arch.sie_block->eca = 0xC1002001U;
410 vcpu->arch.sie_block->fac = (int) (long) vfacilities;
411 if (kvm_enabled_cmma()) {
412 cbrl = alloc_page(GFP_KERNEL | __GFP_ZERO);
414 vcpu->arch.sie_block->ecb2 |= 0x80;
415 vcpu->arch.sie_block->ecb2 &= ~0x08;
416 vcpu->arch.sie_block->cbrlo = page_to_phys(cbrl);
419 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
420 tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
421 (unsigned long) vcpu);
422 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
423 get_cpu_id(&vcpu->arch.cpu_id);
424 vcpu->arch.cpu_id.version = 0xff;
428 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
431 struct kvm_vcpu *vcpu;
432 struct sie_page *sie_page;
435 if (id >= KVM_MAX_VCPUS)
440 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
444 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
448 vcpu->arch.sie_block = &sie_page->sie_block;
449 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
451 vcpu->arch.sie_block->icpua = id;
452 if (!kvm_is_ucontrol(kvm)) {
453 if (!kvm->arch.sca) {
457 if (!kvm->arch.sca->cpu[id].sda)
458 kvm->arch.sca->cpu[id].sda =
459 (__u64) vcpu->arch.sie_block;
460 vcpu->arch.sie_block->scaoh =
461 (__u32)(((__u64)kvm->arch.sca) >> 32);
462 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
463 set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
466 spin_lock_init(&vcpu->arch.local_int.lock);
467 INIT_LIST_HEAD(&vcpu->arch.local_int.list);
468 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
469 spin_lock(&kvm->arch.float_int.lock);
470 kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
471 vcpu->arch.local_int.wq = &vcpu->wq;
472 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
473 spin_unlock(&kvm->arch.float_int.lock);
475 rc = kvm_vcpu_init(vcpu, kvm, id);
477 goto out_free_sie_block;
478 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
479 vcpu->arch.sie_block);
480 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
484 free_page((unsigned long)(vcpu->arch.sie_block));
486 kmem_cache_free(kvm_vcpu_cache, vcpu);
491 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
493 /* kvm common code refers to this, but never calls it */
498 void s390_vcpu_block(struct kvm_vcpu *vcpu)
500 atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
503 void s390_vcpu_unblock(struct kvm_vcpu *vcpu)
505 atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
509 * Kick a guest cpu out of SIE and wait until SIE is not running.
510 * If the CPU is not running (e.g. waiting as idle) the function will
511 * return immediately. */
512 void exit_sie(struct kvm_vcpu *vcpu)
514 atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
515 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
519 /* Kick a guest cpu out of SIE and prevent SIE-reentry */
520 void exit_sie_sync(struct kvm_vcpu *vcpu)
522 s390_vcpu_block(vcpu);
526 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
529 struct kvm *kvm = gmap->private;
530 struct kvm_vcpu *vcpu;
532 kvm_for_each_vcpu(i, vcpu, kvm) {
533 /* match against both prefix pages */
534 if (vcpu->arch.sie_block->prefix == (address & ~0x1000UL)) {
535 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
536 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
542 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
544 /* kvm common code refers to this, but never calls it */
549 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
550 struct kvm_one_reg *reg)
555 case KVM_REG_S390_TODPR:
556 r = put_user(vcpu->arch.sie_block->todpr,
557 (u32 __user *)reg->addr);
559 case KVM_REG_S390_EPOCHDIFF:
560 r = put_user(vcpu->arch.sie_block->epoch,
561 (u64 __user *)reg->addr);
563 case KVM_REG_S390_CPU_TIMER:
564 r = put_user(vcpu->arch.sie_block->cputm,
565 (u64 __user *)reg->addr);
567 case KVM_REG_S390_CLOCK_COMP:
568 r = put_user(vcpu->arch.sie_block->ckc,
569 (u64 __user *)reg->addr);
578 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
579 struct kvm_one_reg *reg)
584 case KVM_REG_S390_TODPR:
585 r = get_user(vcpu->arch.sie_block->todpr,
586 (u32 __user *)reg->addr);
588 case KVM_REG_S390_EPOCHDIFF:
589 r = get_user(vcpu->arch.sie_block->epoch,
590 (u64 __user *)reg->addr);
592 case KVM_REG_S390_CPU_TIMER:
593 r = get_user(vcpu->arch.sie_block->cputm,
594 (u64 __user *)reg->addr);
596 case KVM_REG_S390_CLOCK_COMP:
597 r = get_user(vcpu->arch.sie_block->ckc,
598 (u64 __user *)reg->addr);
607 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
609 kvm_s390_vcpu_initial_reset(vcpu);
613 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
615 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
619 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
621 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
625 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
626 struct kvm_sregs *sregs)
628 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
629 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
630 restore_access_regs(vcpu->run->s.regs.acrs);
634 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
635 struct kvm_sregs *sregs)
637 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
638 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
642 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
644 if (test_fp_ctl(fpu->fpc))
646 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
647 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
648 restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
649 restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
653 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
655 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
656 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
660 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
664 if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED))
667 vcpu->run->psw_mask = psw.mask;
668 vcpu->run->psw_addr = psw.addr;
673 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
674 struct kvm_translation *tr)
676 return -EINVAL; /* not implemented yet */
679 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
680 struct kvm_guest_debug *dbg)
682 return -EINVAL; /* not implemented yet */
685 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
686 struct kvm_mp_state *mp_state)
688 return -EINVAL; /* not implemented yet */
691 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
692 struct kvm_mp_state *mp_state)
694 return -EINVAL; /* not implemented yet */
697 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
700 * We use MMU_RELOAD just to re-arm the ipte notifier for the
701 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
702 * This ensures that the ipte instruction for this request has
703 * already finished. We might race against a second unmapper that
704 * wants to set the blocking bit. Lets just retry the request loop.
706 while (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
708 rc = gmap_ipte_notify(vcpu->arch.gmap,
709 vcpu->arch.sie_block->prefix,
713 s390_vcpu_unblock(vcpu);
718 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
722 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
727 if (test_thread_flag(TIF_MCCK_PENDING))
730 if (!kvm_is_ucontrol(vcpu->kvm))
731 kvm_s390_deliver_pending_interrupts(vcpu);
733 rc = kvm_s390_handle_requests(vcpu);
737 vcpu->arch.sie_block->icptcode = 0;
738 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
739 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
740 trace_kvm_s390_sie_enter(vcpu, cpuflags);
745 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
749 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
750 vcpu->arch.sie_block->icptcode);
751 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
753 if (exit_reason >= 0) {
755 } else if (kvm_is_ucontrol(vcpu->kvm)) {
756 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
757 vcpu->run->s390_ucontrol.trans_exc_code =
758 current->thread.gmap_addr;
759 vcpu->run->s390_ucontrol.pgm_code = 0x10;
762 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
763 trace_kvm_s390_sie_fault(vcpu);
764 rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
767 memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
770 if (kvm_is_ucontrol(vcpu->kvm))
773 rc = kvm_handle_sie_intercept(vcpu);
779 bool kvm_enabled_cmma(void)
781 if (!MACHINE_IS_LPAR)
783 /* only enable for z10 and later */
784 if (!MACHINE_HAS_EDAT1)
789 static int __vcpu_run(struct kvm_vcpu *vcpu)
794 * We try to hold kvm->srcu during most of vcpu_run (except when run-
795 * ning the guest), so that memslots (and other stuff) are protected
797 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
800 rc = vcpu_pre_run(vcpu);
804 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
806 * As PF_VCPU will be used in fault handler, between
807 * guest_enter and guest_exit should be no uaccess.
812 exit_reason = sie64a(vcpu->arch.sie_block,
813 vcpu->run->s.regs.gprs);
815 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
817 rc = vcpu_post_run(vcpu, exit_reason);
818 } while (!signal_pending(current) && !rc);
820 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
824 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
829 if (vcpu->sigset_active)
830 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
832 atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
834 BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
836 switch (kvm_run->exit_reason) {
837 case KVM_EXIT_S390_SIEIC:
838 case KVM_EXIT_UNKNOWN:
840 case KVM_EXIT_S390_RESET:
841 case KVM_EXIT_S390_UCONTROL:
842 case KVM_EXIT_S390_TSCH:
848 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
849 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
850 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) {
851 kvm_run->kvm_dirty_regs &= ~KVM_SYNC_PREFIX;
852 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
854 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
855 kvm_run->kvm_dirty_regs &= ~KVM_SYNC_CRS;
856 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
857 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
861 rc = __vcpu_run(vcpu);
863 if (signal_pending(current) && !rc) {
864 kvm_run->exit_reason = KVM_EXIT_INTR;
868 if (rc == -EOPNOTSUPP) {
869 /* intercept cannot be handled in-kernel, prepare kvm-run */
870 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
871 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
872 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
873 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
877 if (rc == -EREMOTE) {
878 /* intercept was handled, but userspace support is needed
879 * kvm_run has been prepared by the handler */
883 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
884 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
885 kvm_run->s.regs.prefix = vcpu->arch.sie_block->prefix;
886 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
888 if (vcpu->sigset_active)
889 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
891 vcpu->stat.exit_userspace++;
895 static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, void *from,
896 unsigned long n, int prefix)
899 return copy_to_guest(vcpu, guestdest, from, n);
901 return copy_to_guest_absolute(vcpu, guestdest, from, n);
905 * store status at address
906 * we use have two special cases:
907 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
908 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
910 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr)
912 unsigned char archmode = 1;
916 if (addr == KVM_S390_STORE_STATUS_NOADDR) {
917 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
919 addr = SAVE_AREA_BASE;
921 } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
922 if (copy_to_guest(vcpu, 163ul, &archmode, 1))
924 addr = SAVE_AREA_BASE;
929 if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),
930 vcpu->arch.guest_fpregs.fprs, 128, prefix))
933 if (__guestcopy(vcpu, addr + offsetof(struct save_area, gp_regs),
934 vcpu->run->s.regs.gprs, 128, prefix))
937 if (__guestcopy(vcpu, addr + offsetof(struct save_area, psw),
938 &vcpu->arch.sie_block->gpsw, 16, prefix))
941 if (__guestcopy(vcpu, addr + offsetof(struct save_area, pref_reg),
942 &vcpu->arch.sie_block->prefix, 4, prefix))
945 if (__guestcopy(vcpu,
946 addr + offsetof(struct save_area, fp_ctrl_reg),
947 &vcpu->arch.guest_fpregs.fpc, 4, prefix))
950 if (__guestcopy(vcpu, addr + offsetof(struct save_area, tod_reg),
951 &vcpu->arch.sie_block->todpr, 4, prefix))
954 if (__guestcopy(vcpu, addr + offsetof(struct save_area, timer),
955 &vcpu->arch.sie_block->cputm, 8, prefix))
958 clkcomp = vcpu->arch.sie_block->ckc >> 8;
959 if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp),
960 &clkcomp, 8, prefix))
963 if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs),
964 &vcpu->run->s.regs.acrs, 64, prefix))
967 if (__guestcopy(vcpu,
968 addr + offsetof(struct save_area, ctrl_regs),
969 &vcpu->arch.sie_block->gcr, 128, prefix))
974 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
977 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
978 * copying in vcpu load/put. Lets update our copies before we save
979 * it into the save area
981 save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
982 save_fp_regs(vcpu->arch.guest_fpregs.fprs);
983 save_access_regs(vcpu->run->s.regs.acrs);
985 return kvm_s390_store_status_unloaded(vcpu, addr);
988 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
989 struct kvm_enable_cap *cap)
997 case KVM_CAP_S390_CSS_SUPPORT:
998 if (!vcpu->kvm->arch.css_support) {
999 vcpu->kvm->arch.css_support = 1;
1000 trace_kvm_s390_enable_css(vcpu->kvm);
1011 long kvm_arch_vcpu_ioctl(struct file *filp,
1012 unsigned int ioctl, unsigned long arg)
1014 struct kvm_vcpu *vcpu = filp->private_data;
1015 void __user *argp = (void __user *)arg;
1020 case KVM_S390_INTERRUPT: {
1021 struct kvm_s390_interrupt s390int;
1024 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1026 r = kvm_s390_inject_vcpu(vcpu, &s390int);
1029 case KVM_S390_STORE_STATUS:
1030 idx = srcu_read_lock(&vcpu->kvm->srcu);
1031 r = kvm_s390_vcpu_store_status(vcpu, arg);
1032 srcu_read_unlock(&vcpu->kvm->srcu, idx);
1034 case KVM_S390_SET_INITIAL_PSW: {
1038 if (copy_from_user(&psw, argp, sizeof(psw)))
1040 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
1043 case KVM_S390_INITIAL_RESET:
1044 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
1046 case KVM_SET_ONE_REG:
1047 case KVM_GET_ONE_REG: {
1048 struct kvm_one_reg reg;
1050 if (copy_from_user(®, argp, sizeof(reg)))
1052 if (ioctl == KVM_SET_ONE_REG)
1053 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
1055 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
1058 #ifdef CONFIG_KVM_S390_UCONTROL
1059 case KVM_S390_UCAS_MAP: {
1060 struct kvm_s390_ucas_mapping ucasmap;
1062 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
1067 if (!kvm_is_ucontrol(vcpu->kvm)) {
1072 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
1073 ucasmap.vcpu_addr, ucasmap.length);
1076 case KVM_S390_UCAS_UNMAP: {
1077 struct kvm_s390_ucas_mapping ucasmap;
1079 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
1084 if (!kvm_is_ucontrol(vcpu->kvm)) {
1089 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
1094 case KVM_S390_VCPU_FAULT: {
1095 r = gmap_fault(arg, vcpu->arch.gmap);
1096 if (!IS_ERR_VALUE(r))
1100 case KVM_ENABLE_CAP:
1102 struct kvm_enable_cap cap;
1104 if (copy_from_user(&cap, argp, sizeof(cap)))
1106 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
1115 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
1117 #ifdef CONFIG_KVM_S390_UCONTROL
1118 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
1119 && (kvm_is_ucontrol(vcpu->kvm))) {
1120 vmf->page = virt_to_page(vcpu->arch.sie_block);
1121 get_page(vmf->page);
1125 return VM_FAULT_SIGBUS;
1128 void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
1129 struct kvm_memory_slot *dont)
1133 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
1134 unsigned long npages)
1139 void kvm_arch_memslots_updated(struct kvm *kvm)
1143 /* Section: memory related */
1144 int kvm_arch_prepare_memory_region(struct kvm *kvm,
1145 struct kvm_memory_slot *memslot,
1146 struct kvm_userspace_memory_region *mem,
1147 enum kvm_mr_change change)
1149 /* A few sanity checks. We can have memory slots which have to be
1150 located/ended at a segment boundary (1MB). The memory in userland is
1151 ok to be fragmented into various different vmas. It is okay to mmap()
1152 and munmap() stuff in this slot after doing this call at any time */
1154 if (mem->userspace_addr & 0xffffful)
1157 if (mem->memory_size & 0xffffful)
1163 void kvm_arch_commit_memory_region(struct kvm *kvm,
1164 struct kvm_userspace_memory_region *mem,
1165 const struct kvm_memory_slot *old,
1166 enum kvm_mr_change change)
1170 /* If the basics of the memslot do not change, we do not want
1171 * to update the gmap. Every update causes several unnecessary
1172 * segment translation exceptions. This is usually handled just
1173 * fine by the normal fault handler + gmap, but it will also
1174 * cause faults on the prefix page of running guest CPUs.
1176 if (old->userspace_addr == mem->userspace_addr &&
1177 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
1178 old->npages * PAGE_SIZE == mem->memory_size)
1181 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
1182 mem->guest_phys_addr, mem->memory_size);
1184 printk(KERN_WARNING "kvm-s390: failed to commit memory region\n");
1188 void kvm_arch_flush_shadow_all(struct kvm *kvm)
1192 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
1193 struct kvm_memory_slot *slot)
1197 static int __init kvm_s390_init(void)
1200 ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
1205 * guests can ask for up to 255+1 double words, we need a full page
1206 * to hold the maximum amount of facilities. On the other hand, we
1207 * only set facilities that are known to work in KVM.
1209 vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA);
1214 memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16);
1215 vfacilities[0] &= 0xff82fff3f4fc2000UL;
1216 vfacilities[1] &= 0x005c000000000000UL;
1220 static void __exit kvm_s390_exit(void)
1222 free_page((unsigned long) vfacilities);
1226 module_init(kvm_s390_init);
1227 module_exit(kvm_s390_exit);
1230 * Enable autoloading of the kvm module.
1231 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
1232 * since x86 takes a different approach.
1234 #include <linux/miscdevice.h>
1235 MODULE_ALIAS_MISCDEV(KVM_MINOR);
1236 MODULE_ALIAS("devname:kvm");