2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
16 #include <linux/compiler.h>
17 #include <linux/err.h>
19 #include <linux/hrtimer.h>
20 #include <linux/init.h>
21 #include <linux/kvm.h>
22 #include <linux/kvm_host.h>
23 #include <linux/module.h>
24 #include <linux/slab.h>
25 #include <linux/timer.h>
26 #include <asm/asm-offsets.h>
27 #include <asm/lowcore.h>
28 #include <asm/pgtable.h>
30 #include <asm/switch_to.h>
31 #include <asm/facility.h>
36 #define CREATE_TRACE_POINTS
38 #include "trace-s390.h"
40 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
42 struct kvm_stats_debugfs_item debugfs_entries[] = {
43 { "userspace_handled", VCPU_STAT(exit_userspace) },
44 { "exit_null", VCPU_STAT(exit_null) },
45 { "exit_validity", VCPU_STAT(exit_validity) },
46 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
47 { "exit_external_request", VCPU_STAT(exit_external_request) },
48 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
49 { "exit_instruction", VCPU_STAT(exit_instruction) },
50 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
51 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
52 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
53 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
54 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
55 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
56 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
57 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
58 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
59 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
60 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
61 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
62 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
63 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
64 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
65 { "instruction_spx", VCPU_STAT(instruction_spx) },
66 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
67 { "instruction_stap", VCPU_STAT(instruction_stap) },
68 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
69 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
70 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
71 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
72 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
73 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
74 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
75 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
76 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
77 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
78 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
79 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
80 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
81 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
82 { "diagnose_10", VCPU_STAT(diagnose_10) },
83 { "diagnose_44", VCPU_STAT(diagnose_44) },
84 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
88 unsigned long *vfacilities;
89 static struct gmap_notifier gmap_notifier;
91 /* test availability of vfacility */
92 static inline int test_vfacility(unsigned long nr)
94 return __test_facility(nr, (void *) vfacilities);
97 /* Section: not file related */
98 int kvm_arch_hardware_enable(void *garbage)
100 /* every s390 is virtualization enabled ;-) */
104 void kvm_arch_hardware_disable(void *garbage)
108 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
110 int kvm_arch_hardware_setup(void)
112 gmap_notifier.notifier_call = kvm_gmap_notifier;
113 gmap_register_ipte_notifier(&gmap_notifier);
117 void kvm_arch_hardware_unsetup(void)
119 gmap_unregister_ipte_notifier(&gmap_notifier);
122 void kvm_arch_check_processor_compat(void *rtn)
126 int kvm_arch_init(void *opaque)
131 void kvm_arch_exit(void)
135 /* Section: device related */
136 long kvm_arch_dev_ioctl(struct file *filp,
137 unsigned int ioctl, unsigned long arg)
139 if (ioctl == KVM_S390_ENABLE_SIE)
140 return s390_enable_sie();
144 int kvm_dev_ioctl_check_extension(long ext)
149 case KVM_CAP_S390_PSW:
150 case KVM_CAP_S390_GMAP:
151 case KVM_CAP_SYNC_MMU:
152 #ifdef CONFIG_KVM_S390_UCONTROL
153 case KVM_CAP_S390_UCONTROL:
155 case KVM_CAP_ASYNC_PF:
156 case KVM_CAP_SYNC_REGS:
157 case KVM_CAP_ONE_REG:
158 case KVM_CAP_ENABLE_CAP:
159 case KVM_CAP_S390_CSS_SUPPORT:
160 case KVM_CAP_IOEVENTFD:
161 case KVM_CAP_DEVICE_CTRL:
164 case KVM_CAP_NR_VCPUS:
165 case KVM_CAP_MAX_VCPUS:
168 case KVM_CAP_NR_MEMSLOTS:
169 r = KVM_USER_MEM_SLOTS;
171 case KVM_CAP_S390_COW:
172 r = MACHINE_HAS_ESOP;
180 /* Section: vm related */
182 * Get (and clear) the dirty memory log for a memory slot.
184 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
185 struct kvm_dirty_log *log)
190 long kvm_arch_vm_ioctl(struct file *filp,
191 unsigned int ioctl, unsigned long arg)
193 struct kvm *kvm = filp->private_data;
194 void __user *argp = (void __user *)arg;
198 case KVM_S390_INTERRUPT: {
199 struct kvm_s390_interrupt s390int;
202 if (copy_from_user(&s390int, argp, sizeof(s390int)))
204 r = kvm_s390_inject_vm(kvm, &s390int);
214 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
220 #ifdef CONFIG_KVM_S390_UCONTROL
221 if (type & ~KVM_VM_S390_UCONTROL)
223 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
230 rc = s390_enable_sie();
236 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
240 sprintf(debug_name, "kvm-%u", current->pid);
242 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
246 spin_lock_init(&kvm->arch.float_int.lock);
247 INIT_LIST_HEAD(&kvm->arch.float_int.list);
249 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
250 VM_EVENT(kvm, 3, "%s", "vm created");
252 if (type & KVM_VM_S390_UCONTROL) {
253 kvm->arch.gmap = NULL;
255 kvm->arch.gmap = gmap_alloc(current->mm);
258 kvm->arch.gmap->private = kvm;
259 kvm->arch.gmap->pfault_enabled = 0;
262 kvm->arch.css_support = 0;
266 debug_unregister(kvm->arch.dbf);
268 free_page((unsigned long)(kvm->arch.sca));
273 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
275 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
276 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
277 kvm_clear_async_pf_completion_queue(vcpu);
278 if (!kvm_is_ucontrol(vcpu->kvm)) {
279 clear_bit(63 - vcpu->vcpu_id,
280 (unsigned long *) &vcpu->kvm->arch.sca->mcn);
281 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
282 (__u64) vcpu->arch.sie_block)
283 vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
287 if (kvm_is_ucontrol(vcpu->kvm))
288 gmap_free(vcpu->arch.gmap);
290 free_page((unsigned long)(vcpu->arch.sie_block));
291 kvm_vcpu_uninit(vcpu);
292 kmem_cache_free(kvm_vcpu_cache, vcpu);
295 static void kvm_free_vcpus(struct kvm *kvm)
298 struct kvm_vcpu *vcpu;
300 kvm_for_each_vcpu(i, vcpu, kvm)
301 kvm_arch_vcpu_destroy(vcpu);
303 mutex_lock(&kvm->lock);
304 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
305 kvm->vcpus[i] = NULL;
307 atomic_set(&kvm->online_vcpus, 0);
308 mutex_unlock(&kvm->lock);
311 void kvm_arch_sync_events(struct kvm *kvm)
315 void kvm_arch_destroy_vm(struct kvm *kvm)
318 free_page((unsigned long)(kvm->arch.sca));
319 debug_unregister(kvm->arch.dbf);
320 if (!kvm_is_ucontrol(kvm))
321 gmap_free(kvm->arch.gmap);
324 /* Section: vcpu related */
325 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
327 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
328 kvm_clear_async_pf_completion_queue(vcpu);
329 if (kvm_is_ucontrol(vcpu->kvm)) {
330 vcpu->arch.gmap = gmap_alloc(current->mm);
331 if (!vcpu->arch.gmap)
333 vcpu->arch.gmap->private = vcpu->kvm;
337 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
338 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
345 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
350 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
352 save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
353 save_fp_regs(vcpu->arch.host_fpregs.fprs);
354 save_access_regs(vcpu->arch.host_acrs);
355 restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
356 restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
357 restore_access_regs(vcpu->run->s.regs.acrs);
358 gmap_enable(vcpu->arch.gmap);
359 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
362 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
364 atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
365 gmap_disable(vcpu->arch.gmap);
366 save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
367 save_fp_regs(vcpu->arch.guest_fpregs.fprs);
368 save_access_regs(vcpu->run->s.regs.acrs);
369 restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
370 restore_fp_regs(vcpu->arch.host_fpregs.fprs);
371 restore_access_regs(vcpu->arch.host_acrs);
374 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
376 /* this equals initial cpu reset in pop, but we don't switch to ESA */
377 vcpu->arch.sie_block->gpsw.mask = 0UL;
378 vcpu->arch.sie_block->gpsw.addr = 0UL;
379 kvm_s390_set_prefix(vcpu, 0);
380 vcpu->arch.sie_block->cputm = 0UL;
381 vcpu->arch.sie_block->ckc = 0UL;
382 vcpu->arch.sie_block->todpr = 0;
383 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
384 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
385 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
386 vcpu->arch.guest_fpregs.fpc = 0;
387 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
388 vcpu->arch.sie_block->gbea = 1;
389 vcpu->arch.sie_block->pp = 0;
390 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
391 kvm_clear_async_pf_completion_queue(vcpu);
392 atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
395 int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
400 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
402 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
406 vcpu->arch.sie_block->ecb = 6;
407 if (test_vfacility(50) && test_vfacility(73))
408 vcpu->arch.sie_block->ecb |= 0x10;
410 vcpu->arch.sie_block->ecb2 = 8;
411 vcpu->arch.sie_block->eca = 0xC1002001U;
412 vcpu->arch.sie_block->fac = (int) (long) vfacilities;
413 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
414 tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
415 (unsigned long) vcpu);
416 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
417 get_cpu_id(&vcpu->arch.cpu_id);
418 vcpu->arch.cpu_id.version = 0xff;
422 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
425 struct kvm_vcpu *vcpu;
426 struct sie_page *sie_page;
429 if (id >= KVM_MAX_VCPUS)
434 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
438 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
442 vcpu->arch.sie_block = &sie_page->sie_block;
443 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
445 vcpu->arch.sie_block->icpua = id;
446 if (!kvm_is_ucontrol(kvm)) {
447 if (!kvm->arch.sca) {
451 if (!kvm->arch.sca->cpu[id].sda)
452 kvm->arch.sca->cpu[id].sda =
453 (__u64) vcpu->arch.sie_block;
454 vcpu->arch.sie_block->scaoh =
455 (__u32)(((__u64)kvm->arch.sca) >> 32);
456 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
457 set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
460 spin_lock_init(&vcpu->arch.local_int.lock);
461 INIT_LIST_HEAD(&vcpu->arch.local_int.list);
462 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
463 vcpu->arch.local_int.wq = &vcpu->wq;
464 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
466 rc = kvm_vcpu_init(vcpu, kvm, id);
468 goto out_free_sie_block;
469 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
470 vcpu->arch.sie_block);
471 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
475 free_page((unsigned long)(vcpu->arch.sie_block));
477 kmem_cache_free(kvm_vcpu_cache, vcpu);
482 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
484 return kvm_cpu_has_interrupt(vcpu);
487 void s390_vcpu_block(struct kvm_vcpu *vcpu)
489 atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
492 void s390_vcpu_unblock(struct kvm_vcpu *vcpu)
494 atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
498 * Kick a guest cpu out of SIE and wait until SIE is not running.
499 * If the CPU is not running (e.g. waiting as idle) the function will
500 * return immediately. */
501 void exit_sie(struct kvm_vcpu *vcpu)
503 atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
504 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
508 /* Kick a guest cpu out of SIE and prevent SIE-reentry */
509 void exit_sie_sync(struct kvm_vcpu *vcpu)
511 s390_vcpu_block(vcpu);
515 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
518 struct kvm *kvm = gmap->private;
519 struct kvm_vcpu *vcpu;
521 kvm_for_each_vcpu(i, vcpu, kvm) {
522 /* match against both prefix pages */
523 if (vcpu->arch.sie_block->prefix == (address & ~0x1000UL)) {
524 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
525 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
531 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
533 /* kvm common code refers to this, but never calls it */
538 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
539 struct kvm_one_reg *reg)
544 case KVM_REG_S390_TODPR:
545 r = put_user(vcpu->arch.sie_block->todpr,
546 (u32 __user *)reg->addr);
548 case KVM_REG_S390_EPOCHDIFF:
549 r = put_user(vcpu->arch.sie_block->epoch,
550 (u64 __user *)reg->addr);
552 case KVM_REG_S390_CPU_TIMER:
553 r = put_user(vcpu->arch.sie_block->cputm,
554 (u64 __user *)reg->addr);
556 case KVM_REG_S390_CLOCK_COMP:
557 r = put_user(vcpu->arch.sie_block->ckc,
558 (u64 __user *)reg->addr);
560 case KVM_REG_S390_PFTOKEN:
561 r = put_user(vcpu->arch.pfault_token,
562 (u64 __user *)reg->addr);
564 case KVM_REG_S390_PFCOMPARE:
565 r = put_user(vcpu->arch.pfault_compare,
566 (u64 __user *)reg->addr);
568 case KVM_REG_S390_PFSELECT:
569 r = put_user(vcpu->arch.pfault_select,
570 (u64 __user *)reg->addr);
572 case KVM_REG_S390_PP:
573 r = put_user(vcpu->arch.sie_block->pp,
574 (u64 __user *)reg->addr);
576 case KVM_REG_S390_GBEA:
577 r = put_user(vcpu->arch.sie_block->gbea,
578 (u64 __user *)reg->addr);
587 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
588 struct kvm_one_reg *reg)
593 case KVM_REG_S390_TODPR:
594 r = get_user(vcpu->arch.sie_block->todpr,
595 (u32 __user *)reg->addr);
597 case KVM_REG_S390_EPOCHDIFF:
598 r = get_user(vcpu->arch.sie_block->epoch,
599 (u64 __user *)reg->addr);
601 case KVM_REG_S390_CPU_TIMER:
602 r = get_user(vcpu->arch.sie_block->cputm,
603 (u64 __user *)reg->addr);
605 case KVM_REG_S390_CLOCK_COMP:
606 r = get_user(vcpu->arch.sie_block->ckc,
607 (u64 __user *)reg->addr);
609 case KVM_REG_S390_PFTOKEN:
610 r = get_user(vcpu->arch.pfault_token,
611 (u64 __user *)reg->addr);
613 case KVM_REG_S390_PFCOMPARE:
614 r = get_user(vcpu->arch.pfault_compare,
615 (u64 __user *)reg->addr);
617 case KVM_REG_S390_PFSELECT:
618 r = get_user(vcpu->arch.pfault_select,
619 (u64 __user *)reg->addr);
621 case KVM_REG_S390_PP:
622 r = get_user(vcpu->arch.sie_block->pp,
623 (u64 __user *)reg->addr);
625 case KVM_REG_S390_GBEA:
626 r = get_user(vcpu->arch.sie_block->gbea,
627 (u64 __user *)reg->addr);
636 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
638 kvm_s390_vcpu_initial_reset(vcpu);
642 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
644 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
648 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
650 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
654 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
655 struct kvm_sregs *sregs)
657 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
658 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
659 restore_access_regs(vcpu->run->s.regs.acrs);
663 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
664 struct kvm_sregs *sregs)
666 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
667 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
671 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
673 if (test_fp_ctl(fpu->fpc))
675 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
676 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
677 restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
678 restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
682 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
684 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
685 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
689 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
693 if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED))
696 vcpu->run->psw_mask = psw.mask;
697 vcpu->run->psw_addr = psw.addr;
702 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
703 struct kvm_translation *tr)
705 return -EINVAL; /* not implemented yet */
708 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
709 struct kvm_guest_debug *dbg)
711 return -EINVAL; /* not implemented yet */
714 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
715 struct kvm_mp_state *mp_state)
717 return -EINVAL; /* not implemented yet */
720 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
721 struct kvm_mp_state *mp_state)
723 return -EINVAL; /* not implemented yet */
726 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
729 * We use MMU_RELOAD just to re-arm the ipte notifier for the
730 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
731 * This ensures that the ipte instruction for this request has
732 * already finished. We might race against a second unmapper that
733 * wants to set the blocking bit. Lets just retry the request loop.
735 while (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
737 rc = gmap_ipte_notify(vcpu->arch.gmap,
738 vcpu->arch.sie_block->prefix,
742 s390_vcpu_unblock(vcpu);
747 static long kvm_arch_fault_in_sync(struct kvm_vcpu *vcpu)
750 hva_t fault = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
751 struct mm_struct *mm = current->mm;
752 down_read(&mm->mmap_sem);
753 rc = get_user_pages(current, mm, fault, 1, 1, 0, NULL, NULL);
754 up_read(&mm->mmap_sem);
758 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
761 struct kvm_s390_interrupt inti;
765 inti.type = KVM_S390_INT_PFAULT_INIT;
766 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti));
768 inti.type = KVM_S390_INT_PFAULT_DONE;
769 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
773 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
774 struct kvm_async_pf *work)
776 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
777 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
780 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
781 struct kvm_async_pf *work)
783 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
784 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
787 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
788 struct kvm_async_pf *work)
790 /* s390 will always inject the page directly */
793 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
796 * s390 will always inject the page directly,
797 * but we still want check_async_completion to cleanup
802 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
805 struct kvm_arch_async_pf arch;
808 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
810 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
811 vcpu->arch.pfault_compare)
813 if (psw_extint_disabled(vcpu))
815 if (kvm_cpu_has_interrupt(vcpu))
817 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
819 if (!vcpu->arch.gmap->pfault_enabled)
822 hva = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
823 if (copy_from_guest(vcpu, &arch.pfault_token, vcpu->arch.pfault_token, 8))
826 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
830 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
835 * On s390 notifications for arriving pages will be delivered directly
836 * to the guest but the house keeping for completed pfaults is
837 * handled outside the worker.
839 kvm_check_async_pf_completion(vcpu);
841 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
846 if (test_thread_flag(TIF_MCCK_PENDING))
849 if (!kvm_is_ucontrol(vcpu->kvm))
850 kvm_s390_deliver_pending_interrupts(vcpu);
852 rc = kvm_s390_handle_requests(vcpu);
856 vcpu->arch.sie_block->icptcode = 0;
857 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
858 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
859 trace_kvm_s390_sie_enter(vcpu, cpuflags);
864 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
868 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
869 vcpu->arch.sie_block->icptcode);
870 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
872 if (exit_reason >= 0) {
874 } else if (kvm_is_ucontrol(vcpu->kvm)) {
875 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
876 vcpu->run->s390_ucontrol.trans_exc_code =
877 current->thread.gmap_addr;
878 vcpu->run->s390_ucontrol.pgm_code = 0x10;
881 } else if (current->thread.gmap_pfault) {
882 trace_kvm_s390_major_guest_pfault(vcpu);
883 current->thread.gmap_pfault = 0;
884 if (kvm_arch_setup_async_pf(vcpu) ||
885 (kvm_arch_fault_in_sync(vcpu) >= 0))
890 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
891 trace_kvm_s390_sie_fault(vcpu);
892 rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
895 memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
898 if (kvm_is_ucontrol(vcpu->kvm))
899 /* Don't exit for host interrupts. */
900 rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
902 rc = kvm_handle_sie_intercept(vcpu);
908 static int __vcpu_run(struct kvm_vcpu *vcpu)
913 * We try to hold kvm->srcu during most of vcpu_run (except when run-
914 * ning the guest), so that memslots (and other stuff) are protected
916 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
919 rc = vcpu_pre_run(vcpu);
923 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
925 * As PF_VCPU will be used in fault handler, between
926 * guest_enter and guest_exit should be no uaccess.
931 exit_reason = sie64a(vcpu->arch.sie_block,
932 vcpu->run->s.regs.gprs);
934 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
936 rc = vcpu_post_run(vcpu, exit_reason);
937 } while (!signal_pending(current) && !rc);
939 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
943 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
948 if (vcpu->sigset_active)
949 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
951 atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
953 switch (kvm_run->exit_reason) {
954 case KVM_EXIT_S390_SIEIC:
955 case KVM_EXIT_UNKNOWN:
957 case KVM_EXIT_S390_RESET:
958 case KVM_EXIT_S390_UCONTROL:
959 case KVM_EXIT_S390_TSCH:
965 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
966 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
967 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) {
968 kvm_run->kvm_dirty_regs &= ~KVM_SYNC_PREFIX;
969 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
971 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
972 kvm_run->kvm_dirty_regs &= ~KVM_SYNC_CRS;
973 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
974 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
978 rc = __vcpu_run(vcpu);
980 if (signal_pending(current) && !rc) {
981 kvm_run->exit_reason = KVM_EXIT_INTR;
985 if (rc == -EOPNOTSUPP) {
986 /* intercept cannot be handled in-kernel, prepare kvm-run */
987 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
988 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
989 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
990 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
994 if (rc == -EREMOTE) {
995 /* intercept was handled, but userspace support is needed
996 * kvm_run has been prepared by the handler */
1000 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
1001 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
1002 kvm_run->s.regs.prefix = vcpu->arch.sie_block->prefix;
1003 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
1005 if (vcpu->sigset_active)
1006 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1008 vcpu->stat.exit_userspace++;
1012 static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, void *from,
1013 unsigned long n, int prefix)
1016 return copy_to_guest(vcpu, guestdest, from, n);
1018 return copy_to_guest_absolute(vcpu, guestdest, from, n);
1022 * store status at address
1023 * we use have two special cases:
1024 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
1025 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
1027 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr)
1029 unsigned char archmode = 1;
1033 if (addr == KVM_S390_STORE_STATUS_NOADDR) {
1034 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
1036 addr = SAVE_AREA_BASE;
1038 } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
1039 if (copy_to_guest(vcpu, 163ul, &archmode, 1))
1041 addr = SAVE_AREA_BASE;
1046 if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),
1047 vcpu->arch.guest_fpregs.fprs, 128, prefix))
1050 if (__guestcopy(vcpu, addr + offsetof(struct save_area, gp_regs),
1051 vcpu->run->s.regs.gprs, 128, prefix))
1054 if (__guestcopy(vcpu, addr + offsetof(struct save_area, psw),
1055 &vcpu->arch.sie_block->gpsw, 16, prefix))
1058 if (__guestcopy(vcpu, addr + offsetof(struct save_area, pref_reg),
1059 &vcpu->arch.sie_block->prefix, 4, prefix))
1062 if (__guestcopy(vcpu,
1063 addr + offsetof(struct save_area, fp_ctrl_reg),
1064 &vcpu->arch.guest_fpregs.fpc, 4, prefix))
1067 if (__guestcopy(vcpu, addr + offsetof(struct save_area, tod_reg),
1068 &vcpu->arch.sie_block->todpr, 4, prefix))
1071 if (__guestcopy(vcpu, addr + offsetof(struct save_area, timer),
1072 &vcpu->arch.sie_block->cputm, 8, prefix))
1075 clkcomp = vcpu->arch.sie_block->ckc >> 8;
1076 if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp),
1077 &clkcomp, 8, prefix))
1080 if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs),
1081 &vcpu->run->s.regs.acrs, 64, prefix))
1084 if (__guestcopy(vcpu,
1085 addr + offsetof(struct save_area, ctrl_regs),
1086 &vcpu->arch.sie_block->gcr, 128, prefix))
1091 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
1094 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
1095 * copying in vcpu load/put. Lets update our copies before we save
1096 * it into the save area
1098 save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
1099 save_fp_regs(vcpu->arch.guest_fpregs.fprs);
1100 save_access_regs(vcpu->run->s.regs.acrs);
1102 return kvm_s390_store_status_unloaded(vcpu, addr);
1105 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
1106 struct kvm_enable_cap *cap)
1114 case KVM_CAP_S390_CSS_SUPPORT:
1115 if (!vcpu->kvm->arch.css_support) {
1116 vcpu->kvm->arch.css_support = 1;
1117 trace_kvm_s390_enable_css(vcpu->kvm);
1128 long kvm_arch_vcpu_ioctl(struct file *filp,
1129 unsigned int ioctl, unsigned long arg)
1131 struct kvm_vcpu *vcpu = filp->private_data;
1132 void __user *argp = (void __user *)arg;
1137 case KVM_S390_INTERRUPT: {
1138 struct kvm_s390_interrupt s390int;
1141 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1143 r = kvm_s390_inject_vcpu(vcpu, &s390int);
1146 case KVM_S390_STORE_STATUS:
1147 idx = srcu_read_lock(&vcpu->kvm->srcu);
1148 r = kvm_s390_vcpu_store_status(vcpu, arg);
1149 srcu_read_unlock(&vcpu->kvm->srcu, idx);
1151 case KVM_S390_SET_INITIAL_PSW: {
1155 if (copy_from_user(&psw, argp, sizeof(psw)))
1157 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
1160 case KVM_S390_INITIAL_RESET:
1161 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
1163 case KVM_SET_ONE_REG:
1164 case KVM_GET_ONE_REG: {
1165 struct kvm_one_reg reg;
1167 if (copy_from_user(®, argp, sizeof(reg)))
1169 if (ioctl == KVM_SET_ONE_REG)
1170 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
1172 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
1175 #ifdef CONFIG_KVM_S390_UCONTROL
1176 case KVM_S390_UCAS_MAP: {
1177 struct kvm_s390_ucas_mapping ucasmap;
1179 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
1184 if (!kvm_is_ucontrol(vcpu->kvm)) {
1189 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
1190 ucasmap.vcpu_addr, ucasmap.length);
1193 case KVM_S390_UCAS_UNMAP: {
1194 struct kvm_s390_ucas_mapping ucasmap;
1196 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
1201 if (!kvm_is_ucontrol(vcpu->kvm)) {
1206 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
1211 case KVM_S390_VCPU_FAULT: {
1212 r = gmap_fault(arg, vcpu->arch.gmap);
1213 if (!IS_ERR_VALUE(r))
1217 case KVM_ENABLE_CAP:
1219 struct kvm_enable_cap cap;
1221 if (copy_from_user(&cap, argp, sizeof(cap)))
1223 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
1232 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
1234 #ifdef CONFIG_KVM_S390_UCONTROL
1235 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
1236 && (kvm_is_ucontrol(vcpu->kvm))) {
1237 vmf->page = virt_to_page(vcpu->arch.sie_block);
1238 get_page(vmf->page);
1242 return VM_FAULT_SIGBUS;
1245 void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
1246 struct kvm_memory_slot *dont)
1250 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
1251 unsigned long npages)
1256 void kvm_arch_memslots_updated(struct kvm *kvm)
1260 /* Section: memory related */
1261 int kvm_arch_prepare_memory_region(struct kvm *kvm,
1262 struct kvm_memory_slot *memslot,
1263 struct kvm_userspace_memory_region *mem,
1264 enum kvm_mr_change change)
1266 /* A few sanity checks. We can have memory slots which have to be
1267 located/ended at a segment boundary (1MB). The memory in userland is
1268 ok to be fragmented into various different vmas. It is okay to mmap()
1269 and munmap() stuff in this slot after doing this call at any time */
1271 if (mem->userspace_addr & 0xffffful)
1274 if (mem->memory_size & 0xffffful)
1280 void kvm_arch_commit_memory_region(struct kvm *kvm,
1281 struct kvm_userspace_memory_region *mem,
1282 const struct kvm_memory_slot *old,
1283 enum kvm_mr_change change)
1287 /* If the basics of the memslot do not change, we do not want
1288 * to update the gmap. Every update causes several unnecessary
1289 * segment translation exceptions. This is usually handled just
1290 * fine by the normal fault handler + gmap, but it will also
1291 * cause faults on the prefix page of running guest CPUs.
1293 if (old->userspace_addr == mem->userspace_addr &&
1294 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
1295 old->npages * PAGE_SIZE == mem->memory_size)
1298 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
1299 mem->guest_phys_addr, mem->memory_size);
1301 printk(KERN_WARNING "kvm-s390: failed to commit memory region\n");
1305 void kvm_arch_flush_shadow_all(struct kvm *kvm)
1309 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
1310 struct kvm_memory_slot *slot)
1314 static int __init kvm_s390_init(void)
1317 ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
1322 * guests can ask for up to 255+1 double words, we need a full page
1323 * to hold the maximum amount of facilities. On the other hand, we
1324 * only set facilities that are known to work in KVM.
1326 vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA);
1331 memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16);
1332 vfacilities[0] &= 0xff82fff3f4fc2000UL;
1333 vfacilities[1] &= 0x005c000000000000UL;
1337 static void __exit kvm_s390_exit(void)
1339 free_page((unsigned long) vfacilities);
1343 module_init(kvm_s390_init);
1344 module_exit(kvm_s390_exit);
1347 * Enable autoloading of the kvm module.
1348 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
1349 * since x86 takes a different approach.
1351 #include <linux/miscdevice.h>
1352 MODULE_ALIAS_MISCDEV(KVM_MINOR);
1353 MODULE_ALIAS("devname:kvm");