arch/s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/module.h>
  25 #include <linux/random.h>
  26 #include <linux/slab.h>
  27 #include <linux/timer.h>
  28 #include <linux/vmalloc.h>
  29 #include <asm/asm-offsets.h>
  30 #include <asm/lowcore.h>
  31 #include <asm/etr.h>
  32 #include <asm/pgtable.h>
  33 #include <asm/nmi.h>
  34 #include <asm/switch_to.h>
  35 #include <asm/isc.h>
  36 #include <asm/sclp.h>
  37 #include "kvm-s390.h"
  38 #include "gaccess.h"
  39
  40 #define KMSG_COMPONENT "kvm-s390"
  41 #undef pr_fmt
  42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  43
  44 #define CREATE_TRACE_POINTS
  45 #include "trace.h"
  46 #include "trace-s390.h"
  47
  48 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  49 #define LOCAL_IRQS 32
  50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  51                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  52
  53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  54
  55 struct kvm_stats_debugfs_item debugfs_entries[] = {
  56         { "userspace_handled", VCPU_STAT(exit_userspace) },
  57         { "exit_null", VCPU_STAT(exit_null) },
  58         { "exit_validity", VCPU_STAT(exit_validity) },
  59         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  60         { "exit_external_request", VCPU_STAT(exit_external_request) },
  61         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  62         { "exit_instruction", VCPU_STAT(exit_instruction) },
  63         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  64         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  65         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  66         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  67         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  68         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  69         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  70         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  71         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  72         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  73         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  74         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  75         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  76         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  77         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  78         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  79         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  80         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  81         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  82         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  83         { "instruction_spx", VCPU_STAT(instruction_spx) },
  84         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  85         { "instruction_stap", VCPU_STAT(instruction_stap) },
  86         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  87         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
  88         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
  89         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
  90         { "instruction_essa", VCPU_STAT(instruction_essa) },
  91         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
  92         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
  93         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
  94         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
  95         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
  96         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
  97         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
  98         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
  99         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 100         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 101         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 102         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 103         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 104         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 105         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 106         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 107         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 108         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 109         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 110         { "diagnose_10", VCPU_STAT(diagnose_10) },
 111         { "diagnose_44", VCPU_STAT(diagnose_44) },
 112         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 113         { "diagnose_258", VCPU_STAT(diagnose_258) },
 114         { "diagnose_308", VCPU_STAT(diagnose_308) },
 115         { "diagnose_500", VCPU_STAT(diagnose_500) },
 116         { NULL }
 117 };
 118
 119 /* upper facilities limit for kvm */
 120 unsigned long kvm_s390_fac_list_mask[] = {
 121         0xffe6fffbfcfdfc40UL,
 122         0x005e800000000000UL,
 123 };
 124
 125 unsigned long kvm_s390_fac_list_mask_size(void)
 126 {
 127         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 128         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 129 }
 130
 131 static struct gmap_notifier gmap_notifier;
 132 debug_info_t *kvm_s390_dbf;
 133
 134 /* Section: not file related */
 135 int kvm_arch_hardware_enable(void)
 136 {
 137         /* every s390 is virtualization enabled ;-) */
 138         return 0;
 139 }
 140
 141 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
 142
 143 /*
 144  * This callback is executed during stop_machine(). All CPUs are therefore
 145  * temporarily stopped. In order not to change guest behavior, we have to
 146  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 147  * so a CPU won't be stopped while calculating with the epoch.
 148  */
 149 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 150                           void *v)
 151 {
 152         struct kvm *kvm;
 153         struct kvm_vcpu *vcpu;
 154         int i;
 155         unsigned long long *delta = v;
 156
 157         list_for_each_entry(kvm, &vm_list, vm_list) {
 158                 kvm->arch.epoch -= *delta;
 159                 kvm_for_each_vcpu(i, vcpu, kvm) {
 160                         vcpu->arch.sie_block->epoch -= *delta;
 161                 }
 162         }
 163         return NOTIFY_OK;
 164 }
 165
 166 static struct notifier_block kvm_clock_notifier = {
 167         .notifier_call = kvm_clock_sync,
 168 };
 169
 170 int kvm_arch_hardware_setup(void)
 171 {
 172         gmap_notifier.notifier_call = kvm_gmap_notifier;
 173         gmap_register_ipte_notifier(&gmap_notifier);
 174         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 175                                        &kvm_clock_notifier);
 176         return 0;
 177 }
 178
 179 void kvm_arch_hardware_unsetup(void)
 180 {
 181         gmap_unregister_ipte_notifier(&gmap_notifier);
 182         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 183                                          &kvm_clock_notifier);
 184 }
 185
 186 int kvm_arch_init(void *opaque)
 187 {
 188         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 189         if (!kvm_s390_dbf)
 190                 return -ENOMEM;
 191
 192         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 193                 debug_unregister(kvm_s390_dbf);
 194                 return -ENOMEM;
 195         }
 196
 197         /* Register floating interrupt controller interface. */
 198         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 199 }
 200
 201 void kvm_arch_exit(void)
 202 {
 203         debug_unregister(kvm_s390_dbf);
 204 }
 205
 206 /* Section: device related */
 207 long kvm_arch_dev_ioctl(struct file *filp,
 208                         unsigned int ioctl, unsigned long arg)
 209 {
 210         if (ioctl == KVM_S390_ENABLE_SIE)
 211                 return s390_enable_sie();
 212         return -EINVAL;
 213 }
 214
 215 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 216 {
 217         int r;
 218
 219         switch (ext) {
 220         case KVM_CAP_S390_PSW:
 221         case KVM_CAP_S390_GMAP:
 222         case KVM_CAP_SYNC_MMU:
 223 #ifdef CONFIG_KVM_S390_UCONTROL
 224         case KVM_CAP_S390_UCONTROL:
 225 #endif
 226         case KVM_CAP_ASYNC_PF:
 227         case KVM_CAP_SYNC_REGS:
 228         case KVM_CAP_ONE_REG:
 229         case KVM_CAP_ENABLE_CAP:
 230         case KVM_CAP_S390_CSS_SUPPORT:
 231         case KVM_CAP_IOEVENTFD:
 232         case KVM_CAP_DEVICE_CTRL:
 233         case KVM_CAP_ENABLE_CAP_VM:
 234         case KVM_CAP_S390_IRQCHIP:
 235         case KVM_CAP_VM_ATTRIBUTES:
 236         case KVM_CAP_MP_STATE:
 237         case KVM_CAP_S390_INJECT_IRQ:
 238         case KVM_CAP_S390_USER_SIGP:
 239         case KVM_CAP_S390_USER_STSI:
 240         case KVM_CAP_S390_SKEYS:
 241         case KVM_CAP_S390_IRQ_STATE:
 242                 r = 1;
 243                 break;
 244         case KVM_CAP_S390_MEM_OP:
 245                 r = MEM_OP_MAX_SIZE;
 246                 break;
 247         case KVM_CAP_NR_VCPUS:
 248         case KVM_CAP_MAX_VCPUS:
 249                 r = KVM_MAX_VCPUS;
 250                 break;
 251         case KVM_CAP_NR_MEMSLOTS:
 252                 r = KVM_USER_MEM_SLOTS;
 253                 break;
 254         case KVM_CAP_S390_COW:
 255                 r = MACHINE_HAS_ESOP;
 256                 break;
 257         case KVM_CAP_S390_VECTOR_REGISTERS:
 258                 r = MACHINE_HAS_VX;
 259                 break;
 260         default:
 261                 r = 0;
 262         }
 263         return r;
 264 }
 265
 266 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 267                                         struct kvm_memory_slot *memslot)
 268 {
 269         gfn_t cur_gfn, last_gfn;
 270         unsigned long address;
 271         struct gmap *gmap = kvm->arch.gmap;
 272
 273         down_read(&gmap->mm->mmap_sem);
 274         /* Loop over all guest pages */
 275         last_gfn = memslot->base_gfn + memslot->npages;
 276         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 277                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 278
 279                 if (gmap_test_and_clear_dirty(address, gmap))
 280                         mark_page_dirty(kvm, cur_gfn);
 281         }
 282         up_read(&gmap->mm->mmap_sem);
 283 }
 284
 285 /* Section: vm related */
 286 /*
 287  * Get (and clear) the dirty memory log for a memory slot.
 288  */
 289 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 290                                struct kvm_dirty_log *log)
 291 {
 292         int r;
 293         unsigned long n;
 294         struct kvm_memslots *slots;
 295         struct kvm_memory_slot *memslot;
 296         int is_dirty = 0;
 297
 298         mutex_lock(&kvm->slots_lock);
 299
 300         r = -EINVAL;
 301         if (log->slot >= KVM_USER_MEM_SLOTS)
 302                 goto out;
 303
 304         slots = kvm_memslots(kvm);
 305         memslot = id_to_memslot(slots, log->slot);
 306         r = -ENOENT;
 307         if (!memslot->dirty_bitmap)
 308                 goto out;
 309
 310         kvm_s390_sync_dirty_log(kvm, memslot);
 311         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 312         if (r)
 313                 goto out;
 314
 315         /* Clear the dirty log */
 316         if (is_dirty) {
 317                 n = kvm_dirty_bitmap_bytes(memslot);
 318                 memset(memslot->dirty_bitmap, 0, n);
 319         }
 320         r = 0;
 321 out:
 322         mutex_unlock(&kvm->slots_lock);
 323         return r;
 324 }
 325
 326 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 327 {
 328         int r;
 329
 330         if (cap->flags)
 331                 return -EINVAL;
 332
 333         switch (cap->cap) {
 334         case KVM_CAP_S390_IRQCHIP:
 335                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 336                 kvm->arch.use_irqchip = 1;
 337                 r = 0;
 338                 break;
 339         case KVM_CAP_S390_USER_SIGP:
 340                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 341                 kvm->arch.user_sigp = 1;
 342                 r = 0;
 343                 break;
 344         case KVM_CAP_S390_VECTOR_REGISTERS:
 345                 if (MACHINE_HAS_VX) {
 346                         set_kvm_facility(kvm->arch.model.fac->mask, 129);
 347                         set_kvm_facility(kvm->arch.model.fac->list, 129);
 348                         r = 0;
 349                 } else
 350                         r = -EINVAL;
 351                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 352                          r ? "(not available)" : "(success)");
 353                 break;
 354         case KVM_CAP_S390_USER_STSI:
 355                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 356                 kvm->arch.user_stsi = 1;
 357                 r = 0;
 358                 break;
 359         default:
 360                 r = -EINVAL;
 361                 break;
 362         }
 363         return r;
 364 }
 365
 366 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 367 {
 368         int ret;
 369
 370         switch (attr->attr) {
 371         case KVM_S390_VM_MEM_LIMIT_SIZE:
 372                 ret = 0;
 373                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 374                          kvm->arch.gmap->asce_end);
 375                 if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
 376                         ret = -EFAULT;
 377                 break;
 378         default:
 379                 ret = -ENXIO;
 380                 break;
 381         }
 382         return ret;
 383 }
 384
 385 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 386 {
 387         int ret;
 388         unsigned int idx;
 389         switch (attr->attr) {
 390         case KVM_S390_VM_MEM_ENABLE_CMMA:
 391                 /* enable CMMA only for z10 and later (EDAT_1) */
 392                 ret = -EINVAL;
 393                 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
 394                         break;
 395
 396                 ret = -EBUSY;
 397                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 398                 mutex_lock(&kvm->lock);
 399                 if (atomic_read(&kvm->online_vcpus) == 0) {
 400                         kvm->arch.use_cmma = 1;
 401                         ret = 0;
 402                 }
 403                 mutex_unlock(&kvm->lock);
 404                 break;
 405         case KVM_S390_VM_MEM_CLR_CMMA:
 406                 ret = -EINVAL;
 407                 if (!kvm->arch.use_cmma)
 408                         break;
 409
 410                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 411                 mutex_lock(&kvm->lock);
 412                 idx = srcu_read_lock(&kvm->srcu);
 413                 s390_reset_cmma(kvm->arch.gmap->mm);
 414                 srcu_read_unlock(&kvm->srcu, idx);
 415                 mutex_unlock(&kvm->lock);
 416                 ret = 0;
 417                 break;
 418         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 419                 unsigned long new_limit;
 420
 421                 if (kvm_is_ucontrol(kvm))
 422                         return -EINVAL;
 423
 424                 if (get_user(new_limit, (u64 __user *)attr->addr))
 425                         return -EFAULT;
 426
 427                 if (new_limit > kvm->arch.gmap->asce_end)
 428                         return -E2BIG;
 429
 430                 ret = -EBUSY;
 431                 mutex_lock(&kvm->lock);
 432                 if (atomic_read(&kvm->online_vcpus) == 0) {
 433                         /* gmap_alloc will round the limit up */
 434                         struct gmap *new = gmap_alloc(current->mm, new_limit);
 435
 436                         if (!new) {
 437                                 ret = -ENOMEM;
 438                         } else {
 439                                 gmap_free(kvm->arch.gmap);
 440                                 new->private = kvm;
 441                                 kvm->arch.gmap = new;
 442                                 ret = 0;
 443                         }
 444                 }
 445                 mutex_unlock(&kvm->lock);
 446                 VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
 447                 break;
 448         }
 449         default:
 450                 ret = -ENXIO;
 451                 break;
 452         }
 453         return ret;
 454 }
 455
 456 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 457
 458 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 459 {
 460         struct kvm_vcpu *vcpu;
 461         int i;
 462
 463         if (!test_kvm_facility(kvm, 76))
 464                 return -EINVAL;
 465
 466         mutex_lock(&kvm->lock);
 467         switch (attr->attr) {
 468         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 469                 get_random_bytes(
 470                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 471                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 472                 kvm->arch.crypto.aes_kw = 1;
 473                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 474                 break;
 475         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 476                 get_random_bytes(
 477                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 478                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 479                 kvm->arch.crypto.dea_kw = 1;
 480                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 481                 break;
 482         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 483                 kvm->arch.crypto.aes_kw = 0;
 484                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 485                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 486                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 487                 break;
 488         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 489                 kvm->arch.crypto.dea_kw = 0;
 490                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 491                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 492                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 493                 break;
 494         default:
 495                 mutex_unlock(&kvm->lock);
 496                 return -ENXIO;
 497         }
 498
 499         kvm_for_each_vcpu(i, vcpu, kvm) {
 500                 kvm_s390_vcpu_crypto_setup(vcpu);
 501                 exit_sie(vcpu);
 502         }
 503         mutex_unlock(&kvm->lock);
 504         return 0;
 505 }
 506
 507 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 508 {
 509         u8 gtod_high;
 510
 511         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 512                                            sizeof(gtod_high)))
 513                 return -EFAULT;
 514
 515         if (gtod_high != 0)
 516                 return -EINVAL;
 517         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x\n", gtod_high);
 518
 519         return 0;
 520 }
 521
 522 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 523 {
 524         struct kvm_vcpu *cur_vcpu;
 525         unsigned int vcpu_idx;
 526         u64 gtod;
 527
 528         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 529                 return -EFAULT;
 530
 531         mutex_lock(&kvm->lock);
 532         preempt_disable();
 533         kvm->arch.epoch = gtod - get_tod_clock();
 534         kvm_s390_vcpu_block_all(kvm);
 535         kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm)
 536                 cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
 537         kvm_s390_vcpu_unblock_all(kvm);
 538         preempt_enable();
 539         mutex_unlock(&kvm->lock);
 540         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx\n", gtod);
 541         return 0;
 542 }
 543
 544 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 545 {
 546         int ret;
 547
 548         if (attr->flags)
 549                 return -EINVAL;
 550
 551         switch (attr->attr) {
 552         case KVM_S390_VM_TOD_HIGH:
 553                 ret = kvm_s390_set_tod_high(kvm, attr);
 554                 break;
 555         case KVM_S390_VM_TOD_LOW:
 556                 ret = kvm_s390_set_tod_low(kvm, attr);
 557                 break;
 558         default:
 559                 ret = -ENXIO;
 560                 break;
 561         }
 562         return ret;
 563 }
 564
 565 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 566 {
 567         u8 gtod_high = 0;
 568
 569         if (copy_to_user((void __user *)attr->addr, &gtod_high,
 570                                          sizeof(gtod_high)))
 571                 return -EFAULT;
 572         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x\n", gtod_high);
 573
 574         return 0;
 575 }
 576
 577 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 578 {
 579         u64 gtod;
 580
 581         preempt_disable();
 582         gtod = get_tod_clock() + kvm->arch.epoch;
 583         preempt_enable();
 584         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 585                 return -EFAULT;
 586         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx\n", gtod);
 587
 588         return 0;
 589 }
 590
 591 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 592 {
 593         int ret;
 594
 595         if (attr->flags)
 596                 return -EINVAL;
 597
 598         switch (attr->attr) {
 599         case KVM_S390_VM_TOD_HIGH:
 600                 ret = kvm_s390_get_tod_high(kvm, attr);
 601                 break;
 602         case KVM_S390_VM_TOD_LOW:
 603                 ret = kvm_s390_get_tod_low(kvm, attr);
 604                 break;
 605         default:
 606                 ret = -ENXIO;
 607                 break;
 608         }
 609         return ret;
 610 }
 611
 612 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 613 {
 614         struct kvm_s390_vm_cpu_processor *proc;
 615         int ret = 0;
 616
 617         mutex_lock(&kvm->lock);
 618         if (atomic_read(&kvm->online_vcpus)) {
 619                 ret = -EBUSY;
 620                 goto out;
 621         }
 622         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 623         if (!proc) {
 624                 ret = -ENOMEM;
 625                 goto out;
 626         }
 627         if (!copy_from_user(proc, (void __user *)attr->addr,
 628                             sizeof(*proc))) {
 629                 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
 630                        sizeof(struct cpuid));
 631                 kvm->arch.model.ibc = proc->ibc;
 632                 memcpy(kvm->arch.model.fac->list, proc->fac_list,
 633                        S390_ARCH_FAC_LIST_SIZE_BYTE);
 634         } else
 635                 ret = -EFAULT;
 636         kfree(proc);
 637 out:
 638         mutex_unlock(&kvm->lock);
 639         return ret;
 640 }
 641
 642 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 643 {
 644         int ret = -ENXIO;
 645
 646         switch (attr->attr) {
 647         case KVM_S390_VM_CPU_PROCESSOR:
 648                 ret = kvm_s390_set_processor(kvm, attr);
 649                 break;
 650         }
 651         return ret;
 652 }
 653
 654 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 655 {
 656         struct kvm_s390_vm_cpu_processor *proc;
 657         int ret = 0;
 658
 659         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 660         if (!proc) {
 661                 ret = -ENOMEM;
 662                 goto out;
 663         }
 664         memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
 665         proc->ibc = kvm->arch.model.ibc;
 666         memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
 667         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 668                 ret = -EFAULT;
 669         kfree(proc);
 670 out:
 671         return ret;
 672 }
 673
 674 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 675 {
 676         struct kvm_s390_vm_cpu_machine *mach;
 677         int ret = 0;
 678
 679         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
 680         if (!mach) {
 681                 ret = -ENOMEM;
 682                 goto out;
 683         }
 684         get_cpu_id((struct cpuid *) &mach->cpuid);
 685         mach->ibc = sclp.ibc;
 686         memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
 687                S390_ARCH_FAC_LIST_SIZE_BYTE);
 688         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
 689                S390_ARCH_FAC_LIST_SIZE_BYTE);
 690         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 691                 ret = -EFAULT;
 692         kfree(mach);
 693 out:
 694         return ret;
 695 }
 696
 697 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 698 {
 699         int ret = -ENXIO;
 700
 701         switch (attr->attr) {
 702         case KVM_S390_VM_CPU_PROCESSOR:
 703                 ret = kvm_s390_get_processor(kvm, attr);
 704                 break;
 705         case KVM_S390_VM_CPU_MACHINE:
 706                 ret = kvm_s390_get_machine(kvm, attr);
 707                 break;
 708         }
 709         return ret;
 710 }
 711
 712 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 713 {
 714         int ret;
 715
 716         switch (attr->group) {
 717         case KVM_S390_VM_MEM_CTRL:
 718                 ret = kvm_s390_set_mem_control(kvm, attr);
 719                 break;
 720         case KVM_S390_VM_TOD:
 721                 ret = kvm_s390_set_tod(kvm, attr);
 722                 break;
 723         case KVM_S390_VM_CPU_MODEL:
 724                 ret = kvm_s390_set_cpu_model(kvm, attr);
 725                 break;
 726         case KVM_S390_VM_CRYPTO:
 727                 ret = kvm_s390_vm_set_crypto(kvm, attr);
 728                 break;
 729         default:
 730                 ret = -ENXIO;
 731                 break;
 732         }
 733
 734         return ret;
 735 }
 736
 737 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 738 {
 739         int ret;
 740
 741         switch (attr->group) {
 742         case KVM_S390_VM_MEM_CTRL:
 743                 ret = kvm_s390_get_mem_control(kvm, attr);
 744                 break;
 745         case KVM_S390_VM_TOD:
 746                 ret = kvm_s390_get_tod(kvm, attr);
 747                 break;
 748         case KVM_S390_VM_CPU_MODEL:
 749                 ret = kvm_s390_get_cpu_model(kvm, attr);
 750                 break;
 751         default:
 752                 ret = -ENXIO;
 753                 break;
 754         }
 755
 756         return ret;
 757 }
 758
 759 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 760 {
 761         int ret;
 762
 763         switch (attr->group) {
 764         case KVM_S390_VM_MEM_CTRL:
 765                 switch (attr->attr) {
 766                 case KVM_S390_VM_MEM_ENABLE_CMMA:
 767                 case KVM_S390_VM_MEM_CLR_CMMA:
 768                 case KVM_S390_VM_MEM_LIMIT_SIZE:
 769                         ret = 0;
 770                         break;
 771                 default:
 772                         ret = -ENXIO;
 773                         break;
 774                 }
 775                 break;
 776         case KVM_S390_VM_TOD:
 777                 switch (attr->attr) {
 778                 case KVM_S390_VM_TOD_LOW:
 779                 case KVM_S390_VM_TOD_HIGH:
 780                         ret = 0;
 781                         break;
 782                 default:
 783                         ret = -ENXIO;
 784                         break;
 785                 }
 786                 break;
 787         case KVM_S390_VM_CPU_MODEL:
 788                 switch (attr->attr) {
 789                 case KVM_S390_VM_CPU_PROCESSOR:
 790                 case KVM_S390_VM_CPU_MACHINE:
 791                         ret = 0;
 792                         break;
 793                 default:
 794                         ret = -ENXIO;
 795                         break;
 796                 }
 797                 break;
 798         case KVM_S390_VM_CRYPTO:
 799                 switch (attr->attr) {
 800                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 801                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 802                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 803                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 804                         ret = 0;
 805                         break;
 806                 default:
 807                         ret = -ENXIO;
 808                         break;
 809                 }
 810                 break;
 811         default:
 812                 ret = -ENXIO;
 813                 break;
 814         }
 815
 816         return ret;
 817 }
 818
 819 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 820 {
 821         uint8_t *keys;
 822         uint64_t hva;
 823         unsigned long curkey;
 824         int i, r = 0;
 825
 826         if (args->flags != 0)
 827                 return -EINVAL;
 828
 829         /* Is this guest using storage keys? */
 830         if (!mm_use_skey(current->mm))
 831                 return KVM_S390_GET_SKEYS_NONE;
 832
 833         /* Enforce sane limit on memory allocation */
 834         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
 835                 return -EINVAL;
 836
 837         keys = kmalloc_array(args->count, sizeof(uint8_t),
 838                              GFP_KERNEL | __GFP_NOWARN);
 839         if (!keys)
 840                 keys = vmalloc(sizeof(uint8_t) * args->count);
 841         if (!keys)
 842                 return -ENOMEM;
 843
 844         for (i = 0; i < args->count; i++) {
 845                 hva = gfn_to_hva(kvm, args->start_gfn + i);
 846                 if (kvm_is_error_hva(hva)) {
 847                         r = -EFAULT;
 848                         goto out;
 849                 }
 850
 851                 curkey = get_guest_storage_key(current->mm, hva);
 852                 if (IS_ERR_VALUE(curkey)) {
 853                         r = curkey;
 854                         goto out;
 855                 }
 856                 keys[i] = curkey;
 857         }
 858
 859         r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
 860                          sizeof(uint8_t) * args->count);
 861         if (r)
 862                 r = -EFAULT;
 863 out:
 864         kvfree(keys);
 865         return r;
 866 }
 867
 868 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 869 {
 870         uint8_t *keys;
 871         uint64_t hva;
 872         int i, r = 0;
 873
 874         if (args->flags != 0)
 875                 return -EINVAL;
 876
 877         /* Enforce sane limit on memory allocation */
 878         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
 879                 return -EINVAL;
 880
 881         keys = kmalloc_array(args->count, sizeof(uint8_t),
 882                              GFP_KERNEL | __GFP_NOWARN);
 883         if (!keys)
 884                 keys = vmalloc(sizeof(uint8_t) * args->count);
 885         if (!keys)
 886                 return -ENOMEM;
 887
 888         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
 889                            sizeof(uint8_t) * args->count);
 890         if (r) {
 891                 r = -EFAULT;
 892                 goto out;
 893         }
 894
 895         /* Enable storage key handling for the guest */
 896         r = s390_enable_skey();
 897         if (r)
 898                 goto out;
 899
 900         for (i = 0; i < args->count; i++) {
 901                 hva = gfn_to_hva(kvm, args->start_gfn + i);
 902                 if (kvm_is_error_hva(hva)) {
 903                         r = -EFAULT;
 904                         goto out;
 905                 }
 906
 907                 /* Lowest order bit is reserved */
 908                 if (keys[i] & 0x01) {
 909                         r = -EINVAL;
 910                         goto out;
 911                 }
 912
 913                 r = set_guest_storage_key(current->mm, hva,
 914                                           (unsigned long)keys[i], 0);
 915                 if (r)
 916                         goto out;
 917         }
 918 out:
 919         kvfree(keys);
 920         return r;
 921 }
 922
 923 long kvm_arch_vm_ioctl(struct file *filp,
 924                        unsigned int ioctl, unsigned long arg)
 925 {
 926         struct kvm *kvm = filp->private_data;
 927         void __user *argp = (void __user *)arg;
 928         struct kvm_device_attr attr;
 929         int r;
 930
 931         switch (ioctl) {
 932         case KVM_S390_INTERRUPT: {
 933                 struct kvm_s390_interrupt s390int;
 934
 935                 r = -EFAULT;
 936                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
 937                         break;
 938                 r = kvm_s390_inject_vm(kvm, &s390int);
 939                 break;
 940         }
 941         case KVM_ENABLE_CAP: {
 942                 struct kvm_enable_cap cap;
 943                 r = -EFAULT;
 944                 if (copy_from_user(&cap, argp, sizeof(cap)))
 945                         break;
 946                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
 947                 break;
 948         }
 949         case KVM_CREATE_IRQCHIP: {
 950                 struct kvm_irq_routing_entry routing;
 951
 952                 r = -EINVAL;
 953                 if (kvm->arch.use_irqchip) {
 954                         /* Set up dummy routing. */
 955                         memset(&routing, 0, sizeof(routing));
 956                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
 957                 }
 958                 break;
 959         }
 960         case KVM_SET_DEVICE_ATTR: {
 961                 r = -EFAULT;
 962                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
 963                         break;
 964                 r = kvm_s390_vm_set_attr(kvm, &attr);
 965                 break;
 966         }
 967         case KVM_GET_DEVICE_ATTR: {
 968                 r = -EFAULT;
 969                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
 970                         break;
 971                 r = kvm_s390_vm_get_attr(kvm, &attr);
 972                 break;
 973         }
 974         case KVM_HAS_DEVICE_ATTR: {
 975                 r = -EFAULT;
 976                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
 977                         break;
 978                 r = kvm_s390_vm_has_attr(kvm, &attr);
 979                 break;
 980         }
 981         case KVM_S390_GET_SKEYS: {
 982                 struct kvm_s390_skeys args;
 983
 984                 r = -EFAULT;
 985                 if (copy_from_user(&args, argp,
 986                                    sizeof(struct kvm_s390_skeys)))
 987                         break;
 988                 r = kvm_s390_get_skeys(kvm, &args);
 989                 break;
 990         }
 991         case KVM_S390_SET_SKEYS: {
 992                 struct kvm_s390_skeys args;
 993
 994                 r = -EFAULT;
 995                 if (copy_from_user(&args, argp,
 996                                    sizeof(struct kvm_s390_skeys)))
 997                         break;
 998                 r = kvm_s390_set_skeys(kvm, &args);
 999                 break;
1000         }
1001         default:
1002                 r = -ENOTTY;
1003         }
1004
1005         return r;
1006 }
1007
1008 static int kvm_s390_query_ap_config(u8 *config)
1009 {
1010         u32 fcn_code = 0x04000000UL;
1011         u32 cc = 0;
1012
1013         memset(config, 0, 128);
1014         asm volatile(
1015                 "lgr 0,%1\n"
1016                 "lgr 2,%2\n"
1017                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1018                 "0: ipm %0\n"
1019                 "srl %0,28\n"
1020                 "1:\n"
1021                 EX_TABLE(0b, 1b)
1022                 : "+r" (cc)
1023                 : "r" (fcn_code), "r" (config)
1024                 : "cc", "0", "2", "memory"
1025         );
1026
1027         return cc;
1028 }
1029
1030 static int kvm_s390_apxa_installed(void)
1031 {
1032         u8 config[128];
1033         int cc;
1034
1035         if (test_facility(2) && test_facility(12)) {
1036                 cc = kvm_s390_query_ap_config(config);
1037
1038                 if (cc)
1039                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1040                 else
1041                         return config[0] & 0x40;
1042         }
1043
1044         return 0;
1045 }
1046
1047 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1048 {
1049         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1050
1051         if (kvm_s390_apxa_installed())
1052                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1053         else
1054                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1055 }
1056
1057 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1058 {
1059         get_cpu_id(cpu_id);
1060         cpu_id->version = 0xff;
1061 }
1062
1063 static int kvm_s390_crypto_init(struct kvm *kvm)
1064 {
1065         if (!test_kvm_facility(kvm, 76))
1066                 return 0;
1067
1068         kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1069                                          GFP_KERNEL | GFP_DMA);
1070         if (!kvm->arch.crypto.crycb)
1071                 return -ENOMEM;
1072
1073         kvm_s390_set_crycb_format(kvm);
1074
1075         /* Enable AES/DEA protected key functions by default */
1076         kvm->arch.crypto.aes_kw = 1;
1077         kvm->arch.crypto.dea_kw = 1;
1078         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1079                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1080         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1081                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1082
1083         return 0;
1084 }
1085
1086 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1087 {
1088         int i, rc;
1089         char debug_name[16];
1090         static unsigned long sca_offset;
1091
1092         rc = -EINVAL;
1093 #ifdef CONFIG_KVM_S390_UCONTROL
1094         if (type & ~KVM_VM_S390_UCONTROL)
1095                 goto out_err;
1096         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1097                 goto out_err;
1098 #else
1099         if (type)
1100                 goto out_err;
1101 #endif
1102
1103         rc = s390_enable_sie();
1104         if (rc)
1105                 goto out_err;
1106
1107         rc = -ENOMEM;
1108
1109         kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
1110         if (!kvm->arch.sca)
1111                 goto out_err;
1112         spin_lock(&kvm_lock);
1113         sca_offset = (sca_offset + 16) & 0x7f0;
1114         kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
1115         spin_unlock(&kvm_lock);
1116
1117         sprintf(debug_name, "kvm-%u", current->pid);
1118
1119         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1120         if (!kvm->arch.dbf)
1121                 goto out_err;
1122
1123         /*
1124          * The architectural maximum amount of facilities is 16 kbit. To store
1125          * this amount, 2 kbyte of memory is required. Thus we need a full
1126          * page to hold the guest facility list (arch.model.fac->list) and the
1127          * facility mask (arch.model.fac->mask). Its address size has to be
1128          * 31 bits and word aligned.
1129          */
1130         kvm->arch.model.fac =
1131                 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1132         if (!kvm->arch.model.fac)
1133                 goto out_err;
1134
1135         /* Populate the facility mask initially. */
1136         memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1137                S390_ARCH_FAC_LIST_SIZE_BYTE);
1138         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1139                 if (i < kvm_s390_fac_list_mask_size())
1140                         kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1141                 else
1142                         kvm->arch.model.fac->mask[i] = 0UL;
1143         }
1144
1145         /* Populate the facility list initially. */
1146         memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1147                S390_ARCH_FAC_LIST_SIZE_BYTE);
1148
1149         kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1150         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1151
1152         if (kvm_s390_crypto_init(kvm) < 0)
1153                 goto out_err;
1154
1155         spin_lock_init(&kvm->arch.float_int.lock);
1156         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1157                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1158         init_waitqueue_head(&kvm->arch.ipte_wq);
1159         mutex_init(&kvm->arch.ipte_mutex);
1160
1161         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1162         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1163
1164         if (type & KVM_VM_S390_UCONTROL) {
1165                 kvm->arch.gmap = NULL;
1166         } else {
1167                 kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
1168                 if (!kvm->arch.gmap)
1169                         goto out_err;
1170                 kvm->arch.gmap->private = kvm;
1171                 kvm->arch.gmap->pfault_enabled = 0;
1172         }
1173
1174         kvm->arch.css_support = 0;
1175         kvm->arch.use_irqchip = 0;
1176         kvm->arch.epoch = 0;
1177
1178         spin_lock_init(&kvm->arch.start_stop_lock);
1179         KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
1180
1181         return 0;
1182 out_err:
1183         kfree(kvm->arch.crypto.crycb);
1184         free_page((unsigned long)kvm->arch.model.fac);
1185         debug_unregister(kvm->arch.dbf);
1186         free_page((unsigned long)(kvm->arch.sca));
1187         KVM_EVENT(3, "creation of vm failed: %d", rc);
1188         return rc;
1189 }
1190
1191 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1192 {
1193         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1194         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1195         kvm_s390_clear_local_irqs(vcpu);
1196         kvm_clear_async_pf_completion_queue(vcpu);
1197         if (!kvm_is_ucontrol(vcpu->kvm)) {
1198                 clear_bit(63 - vcpu->vcpu_id,
1199                           (unsigned long *) &vcpu->kvm->arch.sca->mcn);
1200                 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
1201                     (__u64) vcpu->arch.sie_block)
1202                         vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
1203         }
1204         smp_mb();
1205
1206         if (kvm_is_ucontrol(vcpu->kvm))
1207                 gmap_free(vcpu->arch.gmap);
1208
1209         if (vcpu->kvm->arch.use_cmma)
1210                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1211         free_page((unsigned long)(vcpu->arch.sie_block));
1212
1213         kvm_vcpu_uninit(vcpu);
1214         kmem_cache_free(kvm_vcpu_cache, vcpu);
1215 }
1216
1217 static void kvm_free_vcpus(struct kvm *kvm)
1218 {
1219         unsigned int i;
1220         struct kvm_vcpu *vcpu;
1221
1222         kvm_for_each_vcpu(i, vcpu, kvm)
1223                 kvm_arch_vcpu_destroy(vcpu);
1224
1225         mutex_lock(&kvm->lock);
1226         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1227                 kvm->vcpus[i] = NULL;
1228
1229         atomic_set(&kvm->online_vcpus, 0);
1230         mutex_unlock(&kvm->lock);
1231 }
1232
1233 void kvm_arch_destroy_vm(struct kvm *kvm)
1234 {
1235         kvm_free_vcpus(kvm);
1236         free_page((unsigned long)kvm->arch.model.fac);
1237         free_page((unsigned long)(kvm->arch.sca));
1238         debug_unregister(kvm->arch.dbf);
1239         kfree(kvm->arch.crypto.crycb);
1240         if (!kvm_is_ucontrol(kvm))
1241                 gmap_free(kvm->arch.gmap);
1242         kvm_s390_destroy_adapters(kvm);
1243         kvm_s390_clear_float_irqs(kvm);
1244         KVM_EVENT(3, "vm 0x%p destroyed", kvm);
1245 }
1246
1247 /* Section: vcpu related */
1248 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1249 {
1250         vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1251         if (!vcpu->arch.gmap)
1252                 return -ENOMEM;
1253         vcpu->arch.gmap->private = vcpu->kvm;
1254
1255         return 0;
1256 }
1257
1258 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1259 {
1260         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1261         kvm_clear_async_pf_completion_queue(vcpu);
1262         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1263                                     KVM_SYNC_GPRS |
1264                                     KVM_SYNC_ACRS |
1265                                     KVM_SYNC_CRS |
1266                                     KVM_SYNC_ARCH0 |
1267                                     KVM_SYNC_PFAULT;
1268         if (test_kvm_facility(vcpu->kvm, 129))
1269                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1270
1271         if (kvm_is_ucontrol(vcpu->kvm))
1272                 return __kvm_ucontrol_vcpu_init(vcpu);
1273
1274         return 0;
1275 }
1276
1277 /*
1278  * Backs up the current FP/VX register save area on a particular
1279  * destination.  Used to switch between different register save
1280  * areas.
1281  */
1282 static inline void save_fpu_to(struct fpu *dst)
1283 {
1284         dst->fpc = current->thread.fpu.fpc;
1285         dst->flags = current->thread.fpu.flags;
1286         dst->regs = current->thread.fpu.regs;
1287 }
1288
1289 /*
1290  * Switches the FP/VX register save area from which to lazy
1291  * restore register contents.
1292  */
1293 static inline void load_fpu_from(struct fpu *from)
1294 {
1295         current->thread.fpu.fpc = from->fpc;
1296         current->thread.fpu.flags = from->flags;
1297         current->thread.fpu.regs = from->regs;
1298 }
1299
1300 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1301 {
1302         /* Save host register state */
1303         save_fpu_regs();
1304         save_fpu_to(&vcpu->arch.host_fpregs);
1305
1306         if (test_kvm_facility(vcpu->kvm, 129)) {
1307                 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1308                 current->thread.fpu.flags = FPU_USE_VX;
1309                 /*
1310                  * Use the register save area in the SIE-control block
1311                  * for register restore and save in kvm_arch_vcpu_put()
1312                  */
1313                 current->thread.fpu.vxrs =
1314                         (__vector128 *)&vcpu->run->s.regs.vrs;
1315                 /* Always enable the vector extension for KVM */
1316                 __ctl_set_vx();
1317         } else
1318                 load_fpu_from(&vcpu->arch.guest_fpregs);
1319
1320         if (test_fp_ctl(current->thread.fpu.fpc))
1321                 /* User space provided an invalid FPC, let's clear it */
1322                 current->thread.fpu.fpc = 0;
1323
1324         save_access_regs(vcpu->arch.host_acrs);
1325         restore_access_regs(vcpu->run->s.regs.acrs);
1326         gmap_enable(vcpu->arch.gmap);
1327         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1328 }
1329
1330 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1331 {
1332         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1333         gmap_disable(vcpu->arch.gmap);
1334
1335         save_fpu_regs();
1336
1337         if (test_kvm_facility(vcpu->kvm, 129))
1338                 /*
1339                  * kvm_arch_vcpu_load() set up the register save area to
1340                  * the &vcpu->run->s.regs.vrs and, thus, the vector registers
1341                  * are already saved.  Only the floating-point control must be
1342                  * copied.
1343                  */
1344                 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1345         else
1346                 save_fpu_to(&vcpu->arch.guest_fpregs);
1347         load_fpu_from(&vcpu->arch.host_fpregs);
1348
1349         save_access_regs(vcpu->run->s.regs.acrs);
1350         restore_access_regs(vcpu->arch.host_acrs);
1351 }
1352
1353 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1354 {
1355         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1356         vcpu->arch.sie_block->gpsw.mask = 0UL;
1357         vcpu->arch.sie_block->gpsw.addr = 0UL;
1358         kvm_s390_set_prefix(vcpu, 0);
1359         vcpu->arch.sie_block->cputm     = 0UL;
1360         vcpu->arch.sie_block->ckc       = 0UL;
1361         vcpu->arch.sie_block->todpr     = 0;
1362         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1363         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1364         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1365         vcpu->arch.guest_fpregs.fpc = 0;
1366         asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1367         vcpu->arch.sie_block->gbea = 1;
1368         vcpu->arch.sie_block->pp = 0;
1369         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1370         kvm_clear_async_pf_completion_queue(vcpu);
1371         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1372                 kvm_s390_vcpu_stop(vcpu);
1373         kvm_s390_clear_local_irqs(vcpu);
1374 }
1375
1376 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1377 {
1378         mutex_lock(&vcpu->kvm->lock);
1379         preempt_disable();
1380         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1381         preempt_enable();
1382         mutex_unlock(&vcpu->kvm->lock);
1383         if (!kvm_is_ucontrol(vcpu->kvm))
1384                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1385 }
1386
1387 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1388 {
1389         if (!test_kvm_facility(vcpu->kvm, 76))
1390                 return;
1391
1392         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1393
1394         if (vcpu->kvm->arch.crypto.aes_kw)
1395                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1396         if (vcpu->kvm->arch.crypto.dea_kw)
1397                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1398
1399         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1400 }
1401
1402 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1403 {
1404         free_page(vcpu->arch.sie_block->cbrlo);
1405         vcpu->arch.sie_block->cbrlo = 0;
1406 }
1407
1408 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1409 {
1410         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1411         if (!vcpu->arch.sie_block->cbrlo)
1412                 return -ENOMEM;
1413
1414         vcpu->arch.sie_block->ecb2 |= 0x80;
1415         vcpu->arch.sie_block->ecb2 &= ~0x08;
1416         return 0;
1417 }
1418
1419 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1420 {
1421         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1422
1423         vcpu->arch.cpu_id = model->cpu_id;
1424         vcpu->arch.sie_block->ibc = model->ibc;
1425         vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1426 }
1427
1428 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1429 {
1430         int rc = 0;
1431
1432         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1433                                                     CPUSTAT_SM |
1434                                                     CPUSTAT_STOPPED);
1435
1436         if (test_kvm_facility(vcpu->kvm, 78))
1437                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1438         else if (test_kvm_facility(vcpu->kvm, 8))
1439                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1440
1441         kvm_s390_vcpu_setup_model(vcpu);
1442
1443         vcpu->arch.sie_block->ecb   = 6;
1444         if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1445                 vcpu->arch.sie_block->ecb |= 0x10;
1446
1447         vcpu->arch.sie_block->ecb2  = 8;
1448         vcpu->arch.sie_block->eca   = 0xC1002000U;
1449         if (sclp.has_siif)
1450                 vcpu->arch.sie_block->eca |= 1;
1451         if (sclp.has_sigpif)
1452                 vcpu->arch.sie_block->eca |= 0x10000000U;
1453         if (test_kvm_facility(vcpu->kvm, 129)) {
1454                 vcpu->arch.sie_block->eca |= 0x00020000;
1455                 vcpu->arch.sie_block->ecd |= 0x20000000;
1456         }
1457         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1458
1459         if (vcpu->kvm->arch.use_cmma) {
1460                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1461                 if (rc)
1462                         return rc;
1463         }
1464         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1465         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1466
1467         kvm_s390_vcpu_crypto_setup(vcpu);
1468
1469         return rc;
1470 }
1471
1472 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1473                                       unsigned int id)
1474 {
1475         struct kvm_vcpu *vcpu;
1476         struct sie_page *sie_page;
1477         int rc = -EINVAL;
1478
1479         if (id >= KVM_MAX_VCPUS)
1480                 goto out;
1481
1482         rc = -ENOMEM;
1483
1484         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1485         if (!vcpu)
1486                 goto out;
1487
1488         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1489         if (!sie_page)
1490                 goto out_free_cpu;
1491
1492         vcpu->arch.sie_block = &sie_page->sie_block;
1493         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1494
1495         vcpu->arch.sie_block->icpua = id;
1496         if (!kvm_is_ucontrol(kvm)) {
1497                 if (!kvm->arch.sca) {
1498                         WARN_ON_ONCE(1);
1499                         goto out_free_cpu;
1500                 }
1501                 if (!kvm->arch.sca->cpu[id].sda)
1502                         kvm->arch.sca->cpu[id].sda =
1503                                 (__u64) vcpu->arch.sie_block;
1504                 vcpu->arch.sie_block->scaoh =
1505                         (__u32)(((__u64)kvm->arch.sca) >> 32);
1506                 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
1507                 set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
1508         }
1509
1510         spin_lock_init(&vcpu->arch.local_int.lock);
1511         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1512         vcpu->arch.local_int.wq = &vcpu->wq;
1513         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1514
1515         /*
1516          * Allocate a save area for floating-point registers.  If the vector
1517          * extension is available, register contents are saved in the SIE
1518          * control block.  The allocated save area is still required in
1519          * particular places, for example, in kvm_s390_vcpu_store_status().
1520          */
1521         vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
1522                                                GFP_KERNEL);
1523         if (!vcpu->arch.guest_fpregs.fprs) {
1524                 rc = -ENOMEM;
1525                 goto out_free_sie_block;
1526         }
1527
1528         rc = kvm_vcpu_init(vcpu, kvm, id);
1529         if (rc)
1530                 goto out_free_sie_block;
1531         VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
1532                  vcpu->arch.sie_block);
1533         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1534
1535         return vcpu;
1536 out_free_sie_block:
1537         free_page((unsigned long)(vcpu->arch.sie_block));
1538 out_free_cpu:
1539         kmem_cache_free(kvm_vcpu_cache, vcpu);
1540 out:
1541         return ERR_PTR(rc);
1542 }
1543
1544 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1545 {
1546         return kvm_s390_vcpu_has_irq(vcpu, 0);
1547 }
1548
1549 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1550 {
1551         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1552         exit_sie(vcpu);
1553 }
1554
1555 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1556 {
1557         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1558 }
1559
1560 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1561 {
1562         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1563         exit_sie(vcpu);
1564 }
1565
1566 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1567 {
1568         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1569 }
1570
1571 /*
1572  * Kick a guest cpu out of SIE and wait until SIE is not running.
1573  * If the CPU is not running (e.g. waiting as idle) the function will
1574  * return immediately. */
1575 void exit_sie(struct kvm_vcpu *vcpu)
1576 {
1577         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1578         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1579                 cpu_relax();
1580 }
1581
1582 /* Kick a guest cpu out of SIE to process a request synchronously */
1583 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1584 {
1585         kvm_make_request(req, vcpu);
1586         kvm_s390_vcpu_request(vcpu);
1587 }
1588
1589 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1590 {
1591         int i;
1592         struct kvm *kvm = gmap->private;
1593         struct kvm_vcpu *vcpu;
1594
1595         kvm_for_each_vcpu(i, vcpu, kvm) {
1596                 /* match against both prefix pages */
1597                 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1598                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1599                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1600                 }
1601         }
1602 }
1603
1604 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1605 {
1606         /* kvm common code refers to this, but never calls it */
1607         BUG();
1608         return 0;
1609 }
1610
1611 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1612                                            struct kvm_one_reg *reg)
1613 {
1614         int r = -EINVAL;
1615
1616         switch (reg->id) {
1617         case KVM_REG_S390_TODPR:
1618                 r = put_user(vcpu->arch.sie_block->todpr,
1619                              (u32 __user *)reg->addr);
1620                 break;
1621         case KVM_REG_S390_EPOCHDIFF:
1622                 r = put_user(vcpu->arch.sie_block->epoch,
1623                              (u64 __user *)reg->addr);
1624                 break;
1625         case KVM_REG_S390_CPU_TIMER:
1626                 r = put_user(vcpu->arch.sie_block->cputm,
1627                              (u64 __user *)reg->addr);
1628                 break;
1629         case KVM_REG_S390_CLOCK_COMP:
1630                 r = put_user(vcpu->arch.sie_block->ckc,
1631                              (u64 __user *)reg->addr);
1632                 break;
1633         case KVM_REG_S390_PFTOKEN:
1634                 r = put_user(vcpu->arch.pfault_token,
1635                              (u64 __user *)reg->addr);
1636                 break;
1637         case KVM_REG_S390_PFCOMPARE:
1638                 r = put_user(vcpu->arch.pfault_compare,
1639                              (u64 __user *)reg->addr);
1640                 break;
1641         case KVM_REG_S390_PFSELECT:
1642                 r = put_user(vcpu->arch.pfault_select,
1643                              (u64 __user *)reg->addr);
1644                 break;
1645         case KVM_REG_S390_PP:
1646                 r = put_user(vcpu->arch.sie_block->pp,
1647                              (u64 __user *)reg->addr);
1648                 break;
1649         case KVM_REG_S390_GBEA:
1650                 r = put_user(vcpu->arch.sie_block->gbea,
1651                              (u64 __user *)reg->addr);
1652                 break;
1653         default:
1654                 break;
1655         }
1656
1657         return r;
1658 }
1659
1660 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1661                                            struct kvm_one_reg *reg)
1662 {
1663         int r = -EINVAL;
1664
1665         switch (reg->id) {
1666         case KVM_REG_S390_TODPR:
1667                 r = get_user(vcpu->arch.sie_block->todpr,
1668                              (u32 __user *)reg->addr);
1669                 break;
1670         case KVM_REG_S390_EPOCHDIFF:
1671                 r = get_user(vcpu->arch.sie_block->epoch,
1672                              (u64 __user *)reg->addr);
1673                 break;
1674         case KVM_REG_S390_CPU_TIMER:
1675                 r = get_user(vcpu->arch.sie_block->cputm,
1676                              (u64 __user *)reg->addr);
1677                 break;
1678         case KVM_REG_S390_CLOCK_COMP:
1679                 r = get_user(vcpu->arch.sie_block->ckc,
1680                              (u64 __user *)reg->addr);
1681                 break;
1682         case KVM_REG_S390_PFTOKEN:
1683                 r = get_user(vcpu->arch.pfault_token,
1684                              (u64 __user *)reg->addr);
1685                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1686                         kvm_clear_async_pf_completion_queue(vcpu);
1687                 break;
1688         case KVM_REG_S390_PFCOMPARE:
1689                 r = get_user(vcpu->arch.pfault_compare,
1690                              (u64 __user *)reg->addr);
1691                 break;
1692         case KVM_REG_S390_PFSELECT:
1693                 r = get_user(vcpu->arch.pfault_select,
1694                              (u64 __user *)reg->addr);
1695                 break;
1696         case KVM_REG_S390_PP:
1697                 r = get_user(vcpu->arch.sie_block->pp,
1698                              (u64 __user *)reg->addr);
1699                 break;
1700         case KVM_REG_S390_GBEA:
1701                 r = get_user(vcpu->arch.sie_block->gbea,
1702                              (u64 __user *)reg->addr);
1703                 break;
1704         default:
1705                 break;
1706         }
1707
1708         return r;
1709 }
1710
1711 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1712 {
1713         kvm_s390_vcpu_initial_reset(vcpu);
1714         return 0;
1715 }
1716
1717 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1718 {
1719         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1720         return 0;
1721 }
1722
1723 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1724 {
1725         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1726         return 0;
1727 }
1728
1729 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1730                                   struct kvm_sregs *sregs)
1731 {
1732         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1733         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1734         restore_access_regs(vcpu->run->s.regs.acrs);
1735         return 0;
1736 }
1737
1738 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1739                                   struct kvm_sregs *sregs)
1740 {
1741         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1742         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1743         return 0;
1744 }
1745
1746 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1747 {
1748         if (test_fp_ctl(fpu->fpc))
1749                 return -EINVAL;
1750         memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1751         vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1752         save_fpu_regs();
1753         load_fpu_from(&vcpu->arch.guest_fpregs);
1754         return 0;
1755 }
1756
1757 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1758 {
1759         memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1760         fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1761         return 0;
1762 }
1763
1764 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1765 {
1766         int rc = 0;
1767
1768         if (!is_vcpu_stopped(vcpu))
1769                 rc = -EBUSY;
1770         else {
1771                 vcpu->run->psw_mask = psw.mask;
1772                 vcpu->run->psw_addr = psw.addr;
1773         }
1774         return rc;
1775 }
1776
1777 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1778                                   struct kvm_translation *tr)
1779 {
1780         return -EINVAL; /* not implemented yet */
1781 }
1782
1783 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1784                               KVM_GUESTDBG_USE_HW_BP | \
1785                               KVM_GUESTDBG_ENABLE)
1786
1787 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1788                                         struct kvm_guest_debug *dbg)
1789 {
1790         int rc = 0;
1791
1792         vcpu->guest_debug = 0;
1793         kvm_s390_clear_bp_data(vcpu);
1794
1795         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1796                 return -EINVAL;
1797
1798         if (dbg->control & KVM_GUESTDBG_ENABLE) {
1799                 vcpu->guest_debug = dbg->control;
1800                 /* enforce guest PER */
1801                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1802
1803                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1804                         rc = kvm_s390_import_bp_data(vcpu, dbg);
1805         } else {
1806                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1807                 vcpu->arch.guestdbg.last_bp = 0;
1808         }
1809
1810         if (rc) {
1811                 vcpu->guest_debug = 0;
1812                 kvm_s390_clear_bp_data(vcpu);
1813                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1814         }
1815
1816         return rc;
1817 }
1818
1819 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1820                                     struct kvm_mp_state *mp_state)
1821 {
1822         /* CHECK_STOP and LOAD are not supported yet */
1823         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1824                                        KVM_MP_STATE_OPERATING;
1825 }
1826
1827 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1828                                     struct kvm_mp_state *mp_state)
1829 {
1830         int rc = 0;
1831
1832         /* user space knows about this interface - let it control the state */
1833         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1834
1835         switch (mp_state->mp_state) {
1836         case KVM_MP_STATE_STOPPED:
1837                 kvm_s390_vcpu_stop(vcpu);
1838                 break;
1839         case KVM_MP_STATE_OPERATING:
1840                 kvm_s390_vcpu_start(vcpu);
1841                 break;
1842         case KVM_MP_STATE_LOAD:
1843         case KVM_MP_STATE_CHECK_STOP:
1844                 /* fall through - CHECK_STOP and LOAD are not supported yet */
1845         default:
1846                 rc = -ENXIO;
1847         }
1848
1849         return rc;
1850 }
1851
1852 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1853 {
1854         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1855 }
1856
1857 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1858 {
1859 retry:
1860         kvm_s390_vcpu_request_handled(vcpu);
1861         if (!vcpu->requests)
1862                 return 0;
1863         /*
1864          * We use MMU_RELOAD just to re-arm the ipte notifier for the
1865          * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1866          * This ensures that the ipte instruction for this request has
1867          * already finished. We might race against a second unmapper that
1868          * wants to set the blocking bit. Lets just retry the request loop.
1869          */
1870         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1871                 int rc;
1872                 rc = gmap_ipte_notify(vcpu->arch.gmap,
1873                                       kvm_s390_get_prefix(vcpu),
1874                                       PAGE_SIZE * 2);
1875                 if (rc)
1876                         return rc;
1877                 goto retry;
1878         }
1879
1880         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1881                 vcpu->arch.sie_block->ihcpu = 0xffff;
1882                 goto retry;
1883         }
1884
1885         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1886                 if (!ibs_enabled(vcpu)) {
1887                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1888                         atomic_or(CPUSTAT_IBS,
1889                                         &vcpu->arch.sie_block->cpuflags);
1890                 }
1891                 goto retry;
1892         }
1893
1894         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1895                 if (ibs_enabled(vcpu)) {
1896                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
1897                         atomic_andnot(CPUSTAT_IBS,
1898                                           &vcpu->arch.sie_block->cpuflags);
1899                 }
1900                 goto retry;
1901         }
1902
1903         /* nothing to do, just clear the request */
1904         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
1905
1906         return 0;
1907 }
1908
1909 /**
1910  * kvm_arch_fault_in_page - fault-in guest page if necessary
1911  * @vcpu: The corresponding virtual cpu
1912  * @gpa: Guest physical address
1913  * @writable: Whether the page should be writable or not
1914  *
1915  * Make sure that a guest page has been faulted-in on the host.
1916  *
1917  * Return: Zero on success, negative error code otherwise.
1918  */
1919 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1920 {
1921         return gmap_fault(vcpu->arch.gmap, gpa,
1922                           writable ? FAULT_FLAG_WRITE : 0);
1923 }
1924
1925 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
1926                                       unsigned long token)
1927 {
1928         struct kvm_s390_interrupt inti;
1929         struct kvm_s390_irq irq;
1930
1931         if (start_token) {
1932                 irq.u.ext.ext_params2 = token;
1933                 irq.type = KVM_S390_INT_PFAULT_INIT;
1934                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
1935         } else {
1936                 inti.type = KVM_S390_INT_PFAULT_DONE;
1937                 inti.parm64 = token;
1938                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
1939         }
1940 }
1941
1942 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1943                                      struct kvm_async_pf *work)
1944 {
1945         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
1946         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
1947 }
1948
1949 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
1950                                  struct kvm_async_pf *work)
1951 {
1952         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
1953         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
1954 }
1955
1956 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
1957                                struct kvm_async_pf *work)
1958 {
1959         /* s390 will always inject the page directly */
1960 }
1961
1962 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
1963 {
1964         /*
1965          * s390 will always inject the page directly,
1966          * but we still want check_async_completion to cleanup
1967          */
1968         return true;
1969 }
1970
1971 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
1972 {
1973         hva_t hva;
1974         struct kvm_arch_async_pf arch;
1975         int rc;
1976
1977         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1978                 return 0;
1979         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
1980             vcpu->arch.pfault_compare)
1981                 return 0;
1982         if (psw_extint_disabled(vcpu))
1983                 return 0;
1984         if (kvm_s390_vcpu_has_irq(vcpu, 0))
1985                 return 0;
1986         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
1987                 return 0;
1988         if (!vcpu->arch.gmap->pfault_enabled)
1989                 return 0;
1990
1991         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
1992         hva += current->thread.gmap_addr & ~PAGE_MASK;
1993         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
1994                 return 0;
1995
1996         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
1997         return rc;
1998 }
1999
2000 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2001 {
2002         int rc, cpuflags;
2003
2004         /*
2005          * On s390 notifications for arriving pages will be delivered directly
2006          * to the guest but the house keeping for completed pfaults is
2007          * handled outside the worker.
2008          */
2009         kvm_check_async_pf_completion(vcpu);
2010
2011         memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
2012
2013         if (need_resched())
2014                 schedule();
2015
2016         if (test_cpu_flag(CIF_MCCK_PENDING))
2017                 s390_handle_mcck();
2018
2019         if (!kvm_is_ucontrol(vcpu->kvm)) {
2020                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2021                 if (rc)
2022                         return rc;
2023         }
2024
2025         rc = kvm_s390_handle_requests(vcpu);
2026         if (rc)
2027                 return rc;
2028
2029         if (guestdbg_enabled(vcpu)) {
2030                 kvm_s390_backup_guest_per_regs(vcpu);
2031                 kvm_s390_patch_guest_per_regs(vcpu);
2032         }
2033
2034         vcpu->arch.sie_block->icptcode = 0;
2035         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2036         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2037         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2038
2039         return 0;
2040 }
2041
2042 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2043 {
2044         psw_t *psw = &vcpu->arch.sie_block->gpsw;
2045         u8 opcode;
2046         int rc;
2047
2048         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2049         trace_kvm_s390_sie_fault(vcpu);
2050
2051         /*
2052          * We want to inject an addressing exception, which is defined as a
2053          * suppressing or terminating exception. However, since we came here
2054          * by a DAT access exception, the PSW still points to the faulting
2055          * instruction since DAT exceptions are nullifying. So we've got
2056          * to look up the current opcode to get the length of the instruction
2057          * to be able to forward the PSW.
2058          */
2059         rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
2060         if (rc)
2061                 return kvm_s390_inject_prog_cond(vcpu, rc);
2062         psw->addr = __rewind_psw(*psw, -insn_length(opcode));
2063
2064         return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
2065 }
2066
2067 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2068 {
2069         int rc = -1;
2070
2071         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2072                    vcpu->arch.sie_block->icptcode);
2073         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2074
2075         if (guestdbg_enabled(vcpu))
2076                 kvm_s390_restore_guest_per_regs(vcpu);
2077
2078         if (exit_reason >= 0) {
2079                 rc = 0;
2080         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2081                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2082                 vcpu->run->s390_ucontrol.trans_exc_code =
2083                                                 current->thread.gmap_addr;
2084                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2085                 rc = -EREMOTE;
2086
2087         } else if (current->thread.gmap_pfault) {
2088                 trace_kvm_s390_major_guest_pfault(vcpu);
2089                 current->thread.gmap_pfault = 0;
2090                 if (kvm_arch_setup_async_pf(vcpu)) {
2091                         rc = 0;
2092                 } else {
2093                         gpa_t gpa = current->thread.gmap_addr;
2094                         rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
2095                 }
2096         }
2097
2098         if (rc == -1)
2099                 rc = vcpu_post_run_fault_in_sie(vcpu);
2100
2101         memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
2102
2103         if (rc == 0) {
2104                 if (kvm_is_ucontrol(vcpu->kvm))
2105                         /* Don't exit for host interrupts. */
2106                         rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
2107                 else
2108                         rc = kvm_handle_sie_intercept(vcpu);
2109         }
2110
2111         return rc;
2112 }
2113
2114 static int __vcpu_run(struct kvm_vcpu *vcpu)
2115 {
2116         int rc, exit_reason;
2117
2118         /*
2119          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2120          * ning the guest), so that memslots (and other stuff) are protected
2121          */
2122         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2123
2124         do {
2125                 rc = vcpu_pre_run(vcpu);
2126                 if (rc)
2127                         break;
2128
2129                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2130                 /*
2131                  * As PF_VCPU will be used in fault handler, between
2132                  * guest_enter and guest_exit should be no uaccess.
2133                  */
2134                 local_irq_disable();
2135                 __kvm_guest_enter();
2136                 local_irq_enable();
2137                 exit_reason = sie64a(vcpu->arch.sie_block,
2138                                      vcpu->run->s.regs.gprs);
2139                 local_irq_disable();
2140                 __kvm_guest_exit();
2141                 local_irq_enable();
2142                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2143
2144                 rc = vcpu_post_run(vcpu, exit_reason);
2145         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2146
2147         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2148         return rc;
2149 }
2150
2151 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2152 {
2153         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2154         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2155         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2156                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2157         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2158                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2159                 /* some control register changes require a tlb flush */
2160                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2161         }
2162         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2163                 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2164                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2165                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2166                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2167                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2168         }
2169         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2170                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2171                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2172                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2173                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2174                         kvm_clear_async_pf_completion_queue(vcpu);
2175         }
2176         kvm_run->kvm_dirty_regs = 0;
2177 }
2178
2179 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2180 {
2181         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2182         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2183         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2184         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2185         kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2186         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2187         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2188         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2189         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2190         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2191         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2192         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2193 }
2194
2195 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2196 {
2197         int rc;
2198         sigset_t sigsaved;
2199
2200         if (guestdbg_exit_pending(vcpu)) {
2201                 kvm_s390_prepare_debug_exit(vcpu);
2202                 return 0;
2203         }
2204
2205         if (vcpu->sigset_active)
2206                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2207
2208         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2209                 kvm_s390_vcpu_start(vcpu);
2210         } else if (is_vcpu_stopped(vcpu)) {
2211                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2212                                    vcpu->vcpu_id);
2213                 return -EINVAL;
2214         }
2215
2216         sync_regs(vcpu, kvm_run);
2217
2218         might_fault();
2219         rc = __vcpu_run(vcpu);
2220
2221         if (signal_pending(current) && !rc) {
2222                 kvm_run->exit_reason = KVM_EXIT_INTR;
2223                 rc = -EINTR;
2224         }
2225
2226         if (guestdbg_exit_pending(vcpu) && !rc)  {
2227                 kvm_s390_prepare_debug_exit(vcpu);
2228                 rc = 0;
2229         }
2230
2231         if (rc == -EOPNOTSUPP) {
2232                 /* intercept cannot be handled in-kernel, prepare kvm-run */
2233                 kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
2234                 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2235                 kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
2236                 kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
2237                 rc = 0;
2238         }
2239
2240         if (rc == -EREMOTE) {
2241                 /* intercept was handled, but userspace support is needed
2242                  * kvm_run has been prepared by the handler */
2243                 rc = 0;
2244         }
2245
2246         store_regs(vcpu, kvm_run);
2247
2248         if (vcpu->sigset_active)
2249                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2250
2251         vcpu->stat.exit_userspace++;
2252         return rc;
2253 }
2254
2255 /*
2256  * store status at address
2257  * we use have two special cases:
2258  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2259  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2260  */
2261 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2262 {
2263         unsigned char archmode = 1;
2264         unsigned int px;
2265         u64 clkcomp;
2266         int rc;
2267
2268         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2269                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2270                         return -EFAULT;
2271                 gpa = SAVE_AREA_BASE;
2272         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2273                 if (write_guest_real(vcpu, 163, &archmode, 1))
2274                         return -EFAULT;
2275                 gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
2276         }
2277         rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
2278                              vcpu->arch.guest_fpregs.fprs, 128);
2279         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
2280                               vcpu->run->s.regs.gprs, 128);
2281         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
2282                               &vcpu->arch.sie_block->gpsw, 16);
2283         px = kvm_s390_get_prefix(vcpu);
2284         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
2285                               &px, 4);
2286         rc |= write_guest_abs(vcpu,
2287                               gpa + offsetof(struct save_area, fp_ctrl_reg),
2288                               &vcpu->arch.guest_fpregs.fpc, 4);
2289         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
2290                               &vcpu->arch.sie_block->todpr, 4);
2291         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
2292                               &vcpu->arch.sie_block->cputm, 8);
2293         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2294         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
2295                               &clkcomp, 8);
2296         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
2297                               &vcpu->run->s.regs.acrs, 64);
2298         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
2299                               &vcpu->arch.sie_block->gcr, 128);
2300         return rc ? -EFAULT : 0;
2301 }
2302
2303 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2304 {
2305         /*
2306          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2307          * copying in vcpu load/put. Lets update our copies before we save
2308          * it into the save area
2309          */
2310         save_fpu_regs();
2311         if (test_kvm_facility(vcpu->kvm, 129)) {
2312                 /*
2313                  * If the vector extension is available, the vector registers
2314                  * which overlaps with floating-point registers are saved in
2315                  * the SIE-control block.  Hence, extract the floating-point
2316                  * registers and the FPC value and store them in the
2317                  * guest_fpregs structure.
2318                  */
2319                 WARN_ON(!is_vx_task(current));    /* XXX remove later */
2320                 vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
2321                 convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
2322                                  current->thread.fpu.vxrs);
2323         } else
2324                 save_fpu_to(&vcpu->arch.guest_fpregs);
2325         save_access_regs(vcpu->run->s.regs.acrs);
2326
2327         return kvm_s390_store_status_unloaded(vcpu, addr);
2328 }
2329
2330 /*
2331  * store additional status at address
2332  */
2333 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2334                                         unsigned long gpa)
2335 {
2336         /* Only bits 0-53 are used for address formation */
2337         if (!(gpa & ~0x3ff))
2338                 return 0;
2339
2340         return write_guest_abs(vcpu, gpa & ~0x3ff,
2341                                (void *)&vcpu->run->s.regs.vrs, 512);
2342 }
2343
2344 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2345 {
2346         if (!test_kvm_facility(vcpu->kvm, 129))
2347                 return 0;
2348
2349         /*
2350          * The guest VXRS are in the host VXRs due to the lazy
2351          * copying in vcpu load/put. We can simply call save_fpu_regs()
2352          * to save the current register state because we are in the
2353          * middle of a load/put cycle.
2354          *
2355          * Let's update our copies before we save it into the save area.
2356          */
2357         save_fpu_regs();
2358
2359         return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2360 }
2361
2362 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2363 {
2364         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2365         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2366 }
2367
2368 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2369 {
2370         unsigned int i;
2371         struct kvm_vcpu *vcpu;
2372
2373         kvm_for_each_vcpu(i, vcpu, kvm) {
2374                 __disable_ibs_on_vcpu(vcpu);
2375         }
2376 }
2377
2378 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2379 {
2380         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2381         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2382 }
2383
2384 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2385 {
2386         int i, online_vcpus, started_vcpus = 0;
2387
2388         if (!is_vcpu_stopped(vcpu))
2389                 return;
2390
2391         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2392         /* Only one cpu at a time may enter/leave the STOPPED state. */
2393         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2394         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2395
2396         for (i = 0; i < online_vcpus; i++) {
2397                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2398                         started_vcpus++;
2399         }
2400
2401         if (started_vcpus == 0) {
2402                 /* we're the only active VCPU -> speed it up */
2403                 __enable_ibs_on_vcpu(vcpu);
2404         } else if (started_vcpus == 1) {
2405                 /*
2406                  * As we are starting a second VCPU, we have to disable
2407                  * the IBS facility on all VCPUs to remove potentially
2408                  * oustanding ENABLE requests.
2409                  */
2410                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2411         }
2412
2413         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2414         /*
2415          * Another VCPU might have used IBS while we were offline.
2416          * Let's play safe and flush the VCPU at startup.
2417          */
2418         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2419         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2420         return;
2421 }
2422
2423 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2424 {
2425         int i, online_vcpus, started_vcpus = 0;
2426         struct kvm_vcpu *started_vcpu = NULL;
2427
2428         if (is_vcpu_stopped(vcpu))
2429                 return;
2430
2431         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2432         /* Only one cpu at a time may enter/leave the STOPPED state. */
2433         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2434         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2435
2436         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2437         kvm_s390_clear_stop_irq(vcpu);
2438
2439         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2440         __disable_ibs_on_vcpu(vcpu);
2441
2442         for (i = 0; i < online_vcpus; i++) {
2443                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2444                         started_vcpus++;
2445                         started_vcpu = vcpu->kvm->vcpus[i];
2446                 }
2447         }
2448
2449         if (started_vcpus == 1) {
2450                 /*
2451                  * As we only have one VCPU left, we want to enable the
2452                  * IBS facility for that VCPU to speed it up.
2453                  */
2454                 __enable_ibs_on_vcpu(started_vcpu);
2455         }
2456
2457         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2458         return;
2459 }
2460
2461 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2462                                      struct kvm_enable_cap *cap)
2463 {
2464         int r;
2465
2466         if (cap->flags)
2467                 return -EINVAL;
2468
2469         switch (cap->cap) {
2470         case KVM_CAP_S390_CSS_SUPPORT:
2471                 if (!vcpu->kvm->arch.css_support) {
2472                         vcpu->kvm->arch.css_support = 1;
2473                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2474                         trace_kvm_s390_enable_css(vcpu->kvm);
2475                 }
2476                 r = 0;
2477                 break;
2478         default:
2479                 r = -EINVAL;
2480                 break;
2481         }
2482         return r;
2483 }
2484
2485 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2486                                   struct kvm_s390_mem_op *mop)
2487 {
2488         void __user *uaddr = (void __user *)mop->buf;
2489         void *tmpbuf = NULL;
2490         int r, srcu_idx;
2491         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2492                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
2493
2494         if (mop->flags & ~supported_flags)
2495                 return -EINVAL;
2496
2497         if (mop->size > MEM_OP_MAX_SIZE)
2498                 return -E2BIG;
2499
2500         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2501                 tmpbuf = vmalloc(mop->size);
2502                 if (!tmpbuf)
2503                         return -ENOMEM;
2504         }
2505
2506         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2507
2508         switch (mop->op) {
2509         case KVM_S390_MEMOP_LOGICAL_READ:
2510                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2511                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2512                         break;
2513                 }
2514                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2515                 if (r == 0) {
2516                         if (copy_to_user(uaddr, tmpbuf, mop->size))
2517                                 r = -EFAULT;
2518                 }
2519                 break;
2520         case KVM_S390_MEMOP_LOGICAL_WRITE:
2521                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2522                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2523                         break;
2524                 }
2525                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2526                         r = -EFAULT;
2527                         break;
2528                 }
2529                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2530                 break;
2531         default:
2532                 r = -EINVAL;
2533         }
2534
2535         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2536
2537         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2538                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2539
2540         vfree(tmpbuf);
2541         return r;
2542 }
2543
2544 long kvm_arch_vcpu_ioctl(struct file *filp,
2545                          unsigned int ioctl, unsigned long arg)
2546 {
2547         struct kvm_vcpu *vcpu = filp->private_data;
2548         void __user *argp = (void __user *)arg;
2549         int idx;
2550         long r;
2551
2552         switch (ioctl) {
2553         case KVM_S390_IRQ: {
2554                 struct kvm_s390_irq s390irq;
2555
2556                 r = -EFAULT;
2557                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2558                         break;
2559                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2560                 break;
2561         }
2562         case KVM_S390_INTERRUPT: {
2563                 struct kvm_s390_interrupt s390int;
2564                 struct kvm_s390_irq s390irq;
2565
2566                 r = -EFAULT;
2567                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2568                         break;
2569                 if (s390int_to_s390irq(&s390int, &s390irq))
2570                         return -EINVAL;
2571                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2572                 break;
2573         }
2574         case KVM_S390_STORE_STATUS:
2575                 idx = srcu_read_lock(&vcpu->kvm->srcu);
2576                 r = kvm_s390_vcpu_store_status(vcpu, arg);
2577                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2578                 break;
2579         case KVM_S390_SET_INITIAL_PSW: {
2580                 psw_t psw;
2581
2582                 r = -EFAULT;
2583                 if (copy_from_user(&psw, argp, sizeof(psw)))
2584                         break;
2585                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2586                 break;
2587         }
2588         case KVM_S390_INITIAL_RESET:
2589                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2590                 break;
2591         case KVM_SET_ONE_REG:
2592         case KVM_GET_ONE_REG: {
2593                 struct kvm_one_reg reg;
2594                 r = -EFAULT;
2595                 if (copy_from_user(&reg, argp, sizeof(reg)))
2596                         break;
2597                 if (ioctl == KVM_SET_ONE_REG)
2598                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2599                 else
2600                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2601                 break;
2602         }
2603 #ifdef CONFIG_KVM_S390_UCONTROL
2604         case KVM_S390_UCAS_MAP: {
2605                 struct kvm_s390_ucas_mapping ucasmap;
2606
2607                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2608                         r = -EFAULT;
2609                         break;
2610                 }
2611
2612                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2613                         r = -EINVAL;
2614                         break;
2615                 }
2616
2617                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2618                                      ucasmap.vcpu_addr, ucasmap.length);
2619                 break;
2620         }
2621         case KVM_S390_UCAS_UNMAP: {
2622                 struct kvm_s390_ucas_mapping ucasmap;
2623
2624                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2625                         r = -EFAULT;
2626                         break;
2627                 }
2628
2629                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2630                         r = -EINVAL;
2631                         break;
2632                 }
2633
2634                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2635                         ucasmap.length);
2636                 break;
2637         }
2638 #endif
2639         case KVM_S390_VCPU_FAULT: {
2640                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2641                 break;
2642         }
2643         case KVM_ENABLE_CAP:
2644         {
2645                 struct kvm_enable_cap cap;
2646                 r = -EFAULT;
2647                 if (copy_from_user(&cap, argp, sizeof(cap)))
2648                         break;
2649                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2650                 break;
2651         }
2652         case KVM_S390_MEM_OP: {
2653                 struct kvm_s390_mem_op mem_op;
2654
2655                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2656                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2657                 else
2658                         r = -EFAULT;
2659                 break;
2660         }
2661         case KVM_S390_SET_IRQ_STATE: {
2662                 struct kvm_s390_irq_state irq_state;
2663
2664                 r = -EFAULT;
2665                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2666                         break;
2667                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2668                     irq_state.len == 0 ||
2669                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2670                         r = -EINVAL;
2671                         break;
2672                 }
2673                 r = kvm_s390_set_irq_state(vcpu,
2674                                            (void __user *) irq_state.buf,
2675                                            irq_state.len);
2676                 break;
2677         }
2678         case KVM_S390_GET_IRQ_STATE: {
2679                 struct kvm_s390_irq_state irq_state;
2680
2681                 r = -EFAULT;
2682                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2683                         break;
2684                 if (irq_state.len == 0) {
2685                         r = -EINVAL;
2686                         break;
2687                 }
2688                 r = kvm_s390_get_irq_state(vcpu,
2689                                            (__u8 __user *)  irq_state.buf,
2690                                            irq_state.len);
2691                 break;
2692         }
2693         default:
2694                 r = -ENOTTY;
2695         }
2696         return r;
2697 }
2698
2699 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2700 {
2701 #ifdef CONFIG_KVM_S390_UCONTROL
2702         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2703                  && (kvm_is_ucontrol(vcpu->kvm))) {
2704                 vmf->page = virt_to_page(vcpu->arch.sie_block);
2705                 get_page(vmf->page);
2706                 return 0;
2707         }
2708 #endif
2709         return VM_FAULT_SIGBUS;
2710 }
2711
2712 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2713                             unsigned long npages)
2714 {
2715         return 0;
2716 }
2717
2718 /* Section: memory related */
2719 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2720                                    struct kvm_memory_slot *memslot,
2721                                    const struct kvm_userspace_memory_region *mem,
2722                                    enum kvm_mr_change change)
2723 {
2724         /* A few sanity checks. We can have memory slots which have to be
2725            located/ended at a segment boundary (1MB). The memory in userland is
2726            ok to be fragmented into various different vmas. It is okay to mmap()
2727            and munmap() stuff in this slot after doing this call at any time */
2728
2729         if (mem->userspace_addr & 0xffffful)
2730                 return -EINVAL;
2731
2732         if (mem->memory_size & 0xffffful)
2733                 return -EINVAL;
2734
2735         return 0;
2736 }
2737
2738 void kvm_arch_commit_memory_region(struct kvm *kvm,
2739                                 const struct kvm_userspace_memory_region *mem,
2740                                 const struct kvm_memory_slot *old,
2741                                 const struct kvm_memory_slot *new,
2742                                 enum kvm_mr_change change)
2743 {
2744         int rc;
2745
2746         /* If the basics of the memslot do not change, we do not want
2747          * to update the gmap. Every update causes several unnecessary
2748          * segment translation exceptions. This is usually handled just
2749          * fine by the normal fault handler + gmap, but it will also
2750          * cause faults on the prefix page of running guest CPUs.
2751          */
2752         if (old->userspace_addr == mem->userspace_addr &&
2753             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2754             old->npages * PAGE_SIZE == mem->memory_size)
2755                 return;
2756
2757         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2758                 mem->guest_phys_addr, mem->memory_size);
2759         if (rc)
2760                 pr_warn("failed to commit memory region\n");
2761         return;
2762 }
2763
2764 static int __init kvm_s390_init(void)
2765 {
2766         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2767 }
2768
2769 static void __exit kvm_s390_exit(void)
2770 {
2771         kvm_exit();
2772 }
2773
2774 module_init(kvm_s390_init);
2775 module_exit(kvm_s390_exit);
2776
2777 /*
2778  * Enable autoloading of the kvm module.
2779  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2780  * since x86 takes a different approach.
2781  */
2782 #include <linux/miscdevice.h>
2783 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2784 MODULE_ALIAS("devname:kvm");