virt/kvm/arm/vgic.c

   1 /*
   2  * Copyright (C) 2012 ARM Ltd.
   3  * Author: Marc Zyngier <marc.zyngier@arm.com>
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License version 2 as
   7  * published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program; if not, write to the Free Software
  16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  17  */
  18
  19 #include <linux/cpu.h>
  20 #include <linux/kvm.h>
  21 #include <linux/kvm_host.h>
  22 #include <linux/interrupt.h>
  23 #include <linux/io.h>
  24 #include <linux/of.h>
  25 #include <linux/of_address.h>
  26 #include <linux/of_irq.h>
  27 #include <linux/rculist.h>
  28 #include <linux/uaccess.h>
  29
  30 #include <asm/kvm_emulate.h>
  31 #include <asm/kvm_arm.h>
  32 #include <asm/kvm_mmu.h>
  33 #include <trace/events/kvm.h>
  34 #include <asm/kvm.h>
  35 #include <kvm/iodev.h>
  36
  37 #define CREATE_TRACE_POINTS
  38 #include "trace.h"
  39
  40 /*
  41  * How the whole thing works (courtesy of Christoffer Dall):
  42  *
  43  * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if
  44  *   something is pending on the CPU interface.
  45  * - Interrupts that are pending on the distributor are stored on the
  46  *   vgic.irq_pending vgic bitmap (this bitmap is updated by both user land
  47  *   ioctls and guest mmio ops, and other in-kernel peripherals such as the
  48  *   arch. timers).
  49  * - Every time the bitmap changes, the irq_pending_on_cpu oracle is
  50  *   recalculated
  51  * - To calculate the oracle, we need info for each cpu from
  52  *   compute_pending_for_cpu, which considers:
  53  *   - PPI: dist->irq_pending & dist->irq_enable
  54  *   - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target
  55  *   - irq_spi_target is a 'formatted' version of the GICD_ITARGETSRn
  56  *     registers, stored on each vcpu. We only keep one bit of
  57  *     information per interrupt, making sure that only one vcpu can
  58  *     accept the interrupt.
  59  * - If any of the above state changes, we must recalculate the oracle.
  60  * - The same is true when injecting an interrupt, except that we only
  61  *   consider a single interrupt at a time. The irq_spi_cpu array
  62  *   contains the target CPU for each SPI.
  63  *
  64  * The handling of level interrupts adds some extra complexity. We
  65  * need to track when the interrupt has been EOIed, so we can sample
  66  * the 'line' again. This is achieved as such:
  67  *
  68  * - When a level interrupt is moved onto a vcpu, the corresponding
  69  *   bit in irq_queued is set. As long as this bit is set, the line
  70  *   will be ignored for further interrupts. The interrupt is injected
  71  *   into the vcpu with the GICH_LR_EOI bit set (generate a
  72  *   maintenance interrupt on EOI).
  73  * - When the interrupt is EOIed, the maintenance interrupt fires,
  74  *   and clears the corresponding bit in irq_queued. This allows the
  75  *   interrupt line to be sampled again.
  76  * - Note that level-triggered interrupts can also be set to pending from
  77  *   writes to GICD_ISPENDRn and lowering the external input line does not
  78  *   cause the interrupt to become inactive in such a situation.
  79  *   Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become
  80  *   inactive as long as the external input line is held high.
  81  *
  82  *
  83  * Initialization rules: there are multiple stages to the vgic
  84  * initialization, both for the distributor and the CPU interfaces.
  85  *
  86  * Distributor:
  87  *
  88  * - kvm_vgic_early_init(): initialization of static data that doesn't
  89  *   depend on any sizing information or emulation type. No allocation
  90  *   is allowed there.
  91  *
  92  * - vgic_init(): allocation and initialization of the generic data
  93  *   structures that depend on sizing information (number of CPUs,
  94  *   number of interrupts). Also initializes the vcpu specific data
  95  *   structures. Can be executed lazily for GICv2.
  96  *   [to be renamed to kvm_vgic_init??]
  97  *
  98  * CPU Interface:
  99  *
 100  * - kvm_vgic_cpu_early_init(): initialization of static data that
 101  *   doesn't depend on any sizing information or emulation type. No
 102  *   allocation is allowed there.
 103  */
 104
 105 #include "vgic.h"
 106
 107 static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
 108 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
 109 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
 110 static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
 111 static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
 112                                                 int virt_irq);
 113 static int compute_pending_for_cpu(struct kvm_vcpu *vcpu);
 114
 115 static const struct vgic_ops *vgic_ops;
 116 static const struct vgic_params *vgic;
 117
 118 static void add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source)
 119 {
 120         vcpu->kvm->arch.vgic.vm_ops.add_sgi_source(vcpu, irq, source);
 121 }
 122
 123 static bool queue_sgi(struct kvm_vcpu *vcpu, int irq)
 124 {
 125         return vcpu->kvm->arch.vgic.vm_ops.queue_sgi(vcpu, irq);
 126 }
 127
 128 int kvm_vgic_map_resources(struct kvm *kvm)
 129 {
 130         return kvm->arch.vgic.vm_ops.map_resources(kvm, vgic);
 131 }
 132
 133 /*
 134  * struct vgic_bitmap contains a bitmap made of unsigned longs, but
 135  * extracts u32s out of them.
 136  *
 137  * This does not work on 64-bit BE systems, because the bitmap access
 138  * will store two consecutive 32-bit words with the higher-addressed
 139  * register's bits at the lower index and the lower-addressed register's
 140  * bits at the higher index.
 141  *
 142  * Therefore, swizzle the register index when accessing the 32-bit word
 143  * registers to access the right register's value.
 144  */
 145 #if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 64
 146 #define REG_OFFSET_SWIZZLE      1
 147 #else
 148 #define REG_OFFSET_SWIZZLE      0
 149 #endif
 150
 151 static int vgic_init_bitmap(struct vgic_bitmap *b, int nr_cpus, int nr_irqs)
 152 {
 153         int nr_longs;
 154
 155         nr_longs = nr_cpus + BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS);
 156
 157         b->private = kzalloc(sizeof(unsigned long) * nr_longs, GFP_KERNEL);
 158         if (!b->private)
 159                 return -ENOMEM;
 160
 161         b->shared = b->private + nr_cpus;
 162
 163         return 0;
 164 }
 165
 166 static void vgic_free_bitmap(struct vgic_bitmap *b)
 167 {
 168         kfree(b->private);
 169         b->private = NULL;
 170         b->shared = NULL;
 171 }
 172
 173 /*
 174  * Call this function to convert a u64 value to an unsigned long * bitmask
 175  * in a way that works on both 32-bit and 64-bit LE and BE platforms.
 176  *
 177  * Warning: Calling this function may modify *val.
 178  */
 179 static unsigned long *u64_to_bitmask(u64 *val)
 180 {
 181 #if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 32
 182         *val = (*val >> 32) | (*val << 32);
 183 #endif
 184         return (unsigned long *)val;
 185 }
 186
 187 u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset)
 188 {
 189         offset >>= 2;
 190         if (!offset)
 191                 return (u32 *)(x->private + cpuid) + REG_OFFSET_SWIZZLE;
 192         else
 193                 return (u32 *)(x->shared) + ((offset - 1) ^ REG_OFFSET_SWIZZLE);
 194 }
 195
 196 static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x,
 197                                    int cpuid, int irq)
 198 {
 199         if (irq < VGIC_NR_PRIVATE_IRQS)
 200                 return test_bit(irq, x->private + cpuid);
 201
 202         return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared);
 203 }
 204
 205 void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
 206                              int irq, int val)
 207 {
 208         unsigned long *reg;
 209
 210         if (irq < VGIC_NR_PRIVATE_IRQS) {
 211                 reg = x->private + cpuid;
 212         } else {
 213                 reg = x->shared;
 214                 irq -= VGIC_NR_PRIVATE_IRQS;
 215         }
 216
 217         if (val)
 218                 set_bit(irq, reg);
 219         else
 220                 clear_bit(irq, reg);
 221 }
 222
 223 static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid)
 224 {
 225         return x->private + cpuid;
 226 }
 227
 228 unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x)
 229 {
 230         return x->shared;
 231 }
 232
 233 static int vgic_init_bytemap(struct vgic_bytemap *x, int nr_cpus, int nr_irqs)
 234 {
 235         int size;
 236
 237         size  = nr_cpus * VGIC_NR_PRIVATE_IRQS;
 238         size += nr_irqs - VGIC_NR_PRIVATE_IRQS;
 239
 240         x->private = kzalloc(size, GFP_KERNEL);
 241         if (!x->private)
 242                 return -ENOMEM;
 243
 244         x->shared = x->private + nr_cpus * VGIC_NR_PRIVATE_IRQS / sizeof(u32);
 245         return 0;
 246 }
 247
 248 static void vgic_free_bytemap(struct vgic_bytemap *b)
 249 {
 250         kfree(b->private);
 251         b->private = NULL;
 252         b->shared = NULL;
 253 }
 254
 255 u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset)
 256 {
 257         u32 *reg;
 258
 259         if (offset < VGIC_NR_PRIVATE_IRQS) {
 260                 reg = x->private;
 261                 offset += cpuid * VGIC_NR_PRIVATE_IRQS;
 262         } else {
 263                 reg = x->shared;
 264                 offset -= VGIC_NR_PRIVATE_IRQS;
 265         }
 266
 267         return reg + (offset / sizeof(u32));
 268 }
 269
 270 #define VGIC_CFG_LEVEL  0
 271 #define VGIC_CFG_EDGE   1
 272
 273 static bool vgic_irq_is_edge(struct kvm_vcpu *vcpu, int irq)
 274 {
 275         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 276         int irq_val;
 277
 278         irq_val = vgic_bitmap_get_irq_val(&dist->irq_cfg, vcpu->vcpu_id, irq);
 279         return irq_val == VGIC_CFG_EDGE;
 280 }
 281
 282 static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq)
 283 {
 284         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 285
 286         return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq);
 287 }
 288
 289 static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq)
 290 {
 291         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 292
 293         return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq);
 294 }
 295
 296 static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq)
 297 {
 298         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 299
 300         return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq);
 301 }
 302
 303 static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq)
 304 {
 305         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 306
 307         vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 1);
 308 }
 309
 310 static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq)
 311 {
 312         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 313
 314         vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0);
 315 }
 316
 317 static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq)
 318 {
 319         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 320
 321         vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1);
 322 }
 323
 324 static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq)
 325 {
 326         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 327
 328         vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0);
 329 }
 330
 331 static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq)
 332 {
 333         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 334
 335         return vgic_bitmap_get_irq_val(&dist->irq_level, vcpu->vcpu_id, irq);
 336 }
 337
 338 static void vgic_dist_irq_set_level(struct kvm_vcpu *vcpu, int irq)
 339 {
 340         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 341
 342         vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 1);
 343 }
 344
 345 static void vgic_dist_irq_clear_level(struct kvm_vcpu *vcpu, int irq)
 346 {
 347         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 348
 349         vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 0);
 350 }
 351
 352 static int vgic_dist_irq_soft_pend(struct kvm_vcpu *vcpu, int irq)
 353 {
 354         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 355
 356         return vgic_bitmap_get_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq);
 357 }
 358
 359 static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq)
 360 {
 361         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 362
 363         vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0);
 364         if (!vgic_dist_irq_get_level(vcpu, irq)) {
 365                 vgic_dist_irq_clear_pending(vcpu, irq);
 366                 if (!compute_pending_for_cpu(vcpu))
 367                         clear_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
 368         }
 369 }
 370
 371 static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
 372 {
 373         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 374
 375         return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq);
 376 }
 377
 378 void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq)
 379 {
 380         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 381
 382         vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1);
 383 }
 384
 385 void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq)
 386 {
 387         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 388
 389         vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 0);
 390 }
 391
 392 static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq)
 393 {
 394         if (irq < VGIC_NR_PRIVATE_IRQS)
 395                 set_bit(irq, vcpu->arch.vgic_cpu.pending_percpu);
 396         else
 397                 set_bit(irq - VGIC_NR_PRIVATE_IRQS,
 398                         vcpu->arch.vgic_cpu.pending_shared);
 399 }
 400
 401 void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
 402 {
 403         if (irq < VGIC_NR_PRIVATE_IRQS)
 404                 clear_bit(irq, vcpu->arch.vgic_cpu.pending_percpu);
 405         else
 406                 clear_bit(irq - VGIC_NR_PRIVATE_IRQS,
 407                           vcpu->arch.vgic_cpu.pending_shared);
 408 }
 409
 410 static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq)
 411 {
 412         return !vgic_irq_is_queued(vcpu, irq);
 413 }
 414
 415 /**
 416  * vgic_reg_access - access vgic register
 417  * @mmio:   pointer to the data describing the mmio access
 418  * @reg:    pointer to the virtual backing of vgic distributor data
 419  * @offset: least significant 2 bits used for word offset
 420  * @mode:   ACCESS_ mode (see defines above)
 421  *
 422  * Helper to make vgic register access easier using one of the access
 423  * modes defined for vgic register access
 424  * (read,raz,write-ignored,setbit,clearbit,write)
 425  */
 426 void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg,
 427                      phys_addr_t offset, int mode)
 428 {
 429         int word_offset = (offset & 3) * 8;
 430         u32 mask = (1UL << (mmio->len * 8)) - 1;
 431         u32 regval;
 432
 433         /*
 434          * Any alignment fault should have been delivered to the guest
 435          * directly (ARM ARM B3.12.7 "Prioritization of aborts").
 436          */
 437
 438         if (reg) {
 439                 regval = *reg;
 440         } else {
 441                 BUG_ON(mode != (ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED));
 442                 regval = 0;
 443         }
 444
 445         if (mmio->is_write) {
 446                 u32 data = mmio_data_read(mmio, mask) << word_offset;
 447                 switch (ACCESS_WRITE_MASK(mode)) {
 448                 case ACCESS_WRITE_IGNORED:
 449                         return;
 450
 451                 case ACCESS_WRITE_SETBIT:
 452                         regval |= data;
 453                         break;
 454
 455                 case ACCESS_WRITE_CLEARBIT:
 456                         regval &= ~data;
 457                         break;
 458
 459                 case ACCESS_WRITE_VALUE:
 460                         regval = (regval & ~(mask << word_offset)) | data;
 461                         break;
 462                 }
 463                 *reg = regval;
 464         } else {
 465                 switch (ACCESS_READ_MASK(mode)) {
 466                 case ACCESS_READ_RAZ:
 467                         regval = 0;
 468                         /* fall through */
 469
 470                 case ACCESS_READ_VALUE:
 471                         mmio_data_write(mmio, mask, regval >> word_offset);
 472                 }
 473         }
 474 }
 475
 476 bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
 477                         phys_addr_t offset)
 478 {
 479         vgic_reg_access(mmio, NULL, offset,
 480                         ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
 481         return false;
 482 }
 483
 484 bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
 485                             phys_addr_t offset, int vcpu_id, int access)
 486 {
 487         u32 *reg;
 488         int mode = ACCESS_READ_VALUE | access;
 489         struct kvm_vcpu *target_vcpu = kvm_get_vcpu(kvm, vcpu_id);
 490
 491         reg = vgic_bitmap_get_reg(&kvm->arch.vgic.irq_enabled, vcpu_id, offset);
 492         vgic_reg_access(mmio, reg, offset, mode);
 493         if (mmio->is_write) {
 494                 if (access & ACCESS_WRITE_CLEARBIT) {
 495                         if (offset < 4) /* Force SGI enabled */
 496                                 *reg |= 0xffff;
 497                         vgic_retire_disabled_irqs(target_vcpu);
 498                 }
 499                 vgic_update_state(kvm);
 500                 return true;
 501         }
 502
 503         return false;
 504 }
 505
 506 bool vgic_handle_set_pending_reg(struct kvm *kvm,
 507                                  struct kvm_exit_mmio *mmio,
 508                                  phys_addr_t offset, int vcpu_id)
 509 {
 510         u32 *reg, orig;
 511         u32 level_mask;
 512         int mode = ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT;
 513         struct vgic_dist *dist = &kvm->arch.vgic;
 514
 515         reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu_id, offset);
 516         level_mask = (~(*reg));
 517
 518         /* Mark both level and edge triggered irqs as pending */
 519         reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset);
 520         orig = *reg;
 521         vgic_reg_access(mmio, reg, offset, mode);
 522
 523         if (mmio->is_write) {
 524                 /* Set the soft-pending flag only for level-triggered irqs */
 525                 reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
 526                                           vcpu_id, offset);
 527                 vgic_reg_access(mmio, reg, offset, mode);
 528                 *reg &= level_mask;
 529
 530                 /* Ignore writes to SGIs */
 531                 if (offset < 2) {
 532                         *reg &= ~0xffff;
 533                         *reg |= orig & 0xffff;
 534                 }
 535
 536                 vgic_update_state(kvm);
 537                 return true;
 538         }
 539
 540         return false;
 541 }
 542
 543 bool vgic_handle_clear_pending_reg(struct kvm *kvm,
 544                                    struct kvm_exit_mmio *mmio,
 545                                    phys_addr_t offset, int vcpu_id)
 546 {
 547         u32 *level_active;
 548         u32 *reg, orig;
 549         int mode = ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT;
 550         struct vgic_dist *dist = &kvm->arch.vgic;
 551
 552         reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset);
 553         orig = *reg;
 554         vgic_reg_access(mmio, reg, offset, mode);
 555         if (mmio->is_write) {
 556                 /* Re-set level triggered level-active interrupts */
 557                 level_active = vgic_bitmap_get_reg(&dist->irq_level,
 558                                           vcpu_id, offset);
 559                 reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset);
 560                 *reg |= *level_active;
 561
 562                 /* Ignore writes to SGIs */
 563                 if (offset < 2) {
 564                         *reg &= ~0xffff;
 565                         *reg |= orig & 0xffff;
 566                 }
 567
 568                 /* Clear soft-pending flags */
 569                 reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
 570                                           vcpu_id, offset);
 571                 vgic_reg_access(mmio, reg, offset, mode);
 572
 573                 vgic_update_state(kvm);
 574                 return true;
 575         }
 576         return false;
 577 }
 578
 579 bool vgic_handle_set_active_reg(struct kvm *kvm,
 580                                 struct kvm_exit_mmio *mmio,
 581                                 phys_addr_t offset, int vcpu_id)
 582 {
 583         u32 *reg;
 584         struct vgic_dist *dist = &kvm->arch.vgic;
 585
 586         reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset);
 587         vgic_reg_access(mmio, reg, offset,
 588                         ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
 589
 590         if (mmio->is_write) {
 591                 vgic_update_state(kvm);
 592                 return true;
 593         }
 594
 595         return false;
 596 }
 597
 598 bool vgic_handle_clear_active_reg(struct kvm *kvm,
 599                                   struct kvm_exit_mmio *mmio,
 600                                   phys_addr_t offset, int vcpu_id)
 601 {
 602         u32 *reg;
 603         struct vgic_dist *dist = &kvm->arch.vgic;
 604
 605         reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset);
 606         vgic_reg_access(mmio, reg, offset,
 607                         ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
 608
 609         if (mmio->is_write) {
 610                 vgic_update_state(kvm);
 611                 return true;
 612         }
 613
 614         return false;
 615 }
 616
 617 static u32 vgic_cfg_expand(u16 val)
 618 {
 619         u32 res = 0;
 620         int i;
 621
 622         /*
 623          * Turn a 16bit value like abcd...mnop into a 32bit word
 624          * a0b0c0d0...m0n0o0p0, which is what the HW cfg register is.
 625          */
 626         for (i = 0; i < 16; i++)
 627                 res |= ((val >> i) & VGIC_CFG_EDGE) << (2 * i + 1);
 628
 629         return res;
 630 }
 631
 632 static u16 vgic_cfg_compress(u32 val)
 633 {
 634         u16 res = 0;
 635         int i;
 636
 637         /*
 638          * Turn a 32bit word a0b0c0d0...m0n0o0p0 into 16bit value like
 639          * abcd...mnop which is what we really care about.
 640          */
 641         for (i = 0; i < 16; i++)
 642                 res |= ((val >> (i * 2 + 1)) & VGIC_CFG_EDGE) << i;
 643
 644         return res;
 645 }
 646
 647 /*
 648  * The distributor uses 2 bits per IRQ for the CFG register, but the
 649  * LSB is always 0. As such, we only keep the upper bit, and use the
 650  * two above functions to compress/expand the bits
 651  */
 652 bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
 653                          phys_addr_t offset)
 654 {
 655         u32 val;
 656
 657         if (offset & 4)
 658                 val = *reg >> 16;
 659         else
 660                 val = *reg & 0xffff;
 661
 662         val = vgic_cfg_expand(val);
 663         vgic_reg_access(mmio, &val, offset,
 664                         ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
 665         if (mmio->is_write) {
 666                 /* Ignore writes to read-only SGI and PPI bits */
 667                 if (offset < 8)
 668                         return false;
 669
 670                 val = vgic_cfg_compress(val);
 671                 if (offset & 4) {
 672                         *reg &= 0xffff;
 673                         *reg |= val << 16;
 674                 } else {
 675                         *reg &= 0xffff << 16;
 676                         *reg |= val;
 677                 }
 678         }
 679
 680         return false;
 681 }
 682
 683 /**
 684  * vgic_unqueue_irqs - move pending/active IRQs from LRs to the distributor
 685  * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs
 686  *
 687  * Move any IRQs that have already been assigned to LRs back to the
 688  * emulated distributor state so that the complete emulated state can be read
 689  * from the main emulation structures without investigating the LRs.
 690  */
 691 void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 692 {
 693         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 694         int i;
 695
 696         for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
 697                 struct vgic_lr lr = vgic_get_lr(vcpu, i);
 698
 699                 /*
 700                  * There are three options for the state bits:
 701                  *
 702                  * 01: pending
 703                  * 10: active
 704                  * 11: pending and active
 705                  */
 706                 BUG_ON(!(lr.state & LR_STATE_MASK));
 707
 708                 /* Reestablish SGI source for pending and active IRQs */
 709                 if (lr.irq < VGIC_NR_SGIS)
 710                         add_sgi_source(vcpu, lr.irq, lr.source);
 711
 712                 /*
 713                  * If the LR holds an active (10) or a pending and active (11)
 714                  * interrupt then move the active state to the
 715                  * distributor tracking bit.
 716                  */
 717                 if (lr.state & LR_STATE_ACTIVE) {
 718                         vgic_irq_set_active(vcpu, lr.irq);
 719                         lr.state &= ~LR_STATE_ACTIVE;
 720                 }
 721
 722                 /*
 723                  * Reestablish the pending state on the distributor and the
 724                  * CPU interface.  It may have already been pending, but that
 725                  * is fine, then we are only setting a few bits that were
 726                  * already set.
 727                  */
 728                 if (lr.state & LR_STATE_PENDING) {
 729                         vgic_dist_irq_set_pending(vcpu, lr.irq);
 730                         lr.state &= ~LR_STATE_PENDING;
 731                 }
 732
 733                 vgic_set_lr(vcpu, i, lr);
 734
 735                 /*
 736                  * Mark the LR as free for other use.
 737                  */
 738                 BUG_ON(lr.state & LR_STATE_MASK);
 739                 vgic_retire_lr(i, lr.irq, vcpu);
 740                 vgic_irq_clear_queued(vcpu, lr.irq);
 741
 742                 /* Finally update the VGIC state. */
 743                 vgic_update_state(vcpu->kvm);
 744         }
 745 }
 746
 747 const
 748 struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges,
 749                                       int len, gpa_t offset)
 750 {
 751         while (ranges->len) {
 752                 if (offset >= ranges->base &&
 753                     (offset + len) <= (ranges->base + ranges->len))
 754                         return ranges;
 755                 ranges++;
 756         }
 757
 758         return NULL;
 759 }
 760
 761 static bool vgic_validate_access(const struct vgic_dist *dist,
 762                                  const struct vgic_io_range *range,
 763                                  unsigned long offset)
 764 {
 765         int irq;
 766
 767         if (!range->bits_per_irq)
 768                 return true;    /* Not an irq-based access */
 769
 770         irq = offset * 8 / range->bits_per_irq;
 771         if (irq >= dist->nr_irqs)
 772                 return false;
 773
 774         return true;
 775 }
 776
 777 /*
 778  * Call the respective handler function for the given range.
 779  * We split up any 64 bit accesses into two consecutive 32 bit
 780  * handler calls and merge the result afterwards.
 781  * We do this in a little endian fashion regardless of the host's
 782  * or guest's endianness, because the GIC is always LE and the rest of
 783  * the code (vgic_reg_access) also puts it in a LE fashion already.
 784  * At this point we have already identified the handle function, so
 785  * range points to that one entry and offset is relative to this.
 786  */
 787 static bool call_range_handler(struct kvm_vcpu *vcpu,
 788                                struct kvm_exit_mmio *mmio,
 789                                unsigned long offset,
 790                                const struct vgic_io_range *range)
 791 {
 792         struct kvm_exit_mmio mmio32;
 793         bool ret;
 794
 795         if (likely(mmio->len <= 4))
 796                 return range->handle_mmio(vcpu, mmio, offset);
 797
 798         /*
 799          * Any access bigger than 4 bytes (that we currently handle in KVM)
 800          * is actually 8 bytes long, caused by a 64-bit access
 801          */
 802
 803         mmio32.len = 4;
 804         mmio32.is_write = mmio->is_write;
 805         mmio32.private = mmio->private;
 806
 807         mmio32.phys_addr = mmio->phys_addr + 4;
 808         mmio32.data = &((u32 *)mmio->data)[1];
 809         ret = range->handle_mmio(vcpu, &mmio32, offset + 4);
 810
 811         mmio32.phys_addr = mmio->phys_addr;
 812         mmio32.data = &((u32 *)mmio->data)[0];
 813         ret |= range->handle_mmio(vcpu, &mmio32, offset);
 814
 815         return ret;
 816 }
 817
 818 /**
 819  * vgic_handle_mmio_access - handle an in-kernel MMIO access
 820  * This is called by the read/write KVM IO device wrappers below.
 821  * @vcpu:       pointer to the vcpu performing the access
 822  * @this:       pointer to the KVM IO device in charge
 823  * @addr:       guest physical address of the access
 824  * @len:        size of the access
 825  * @val:        pointer to the data region
 826  * @is_write:   read or write access
 827  *
 828  * returns true if the MMIO access could be performed
 829  */
 830 static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu,
 831                                    struct kvm_io_device *this, gpa_t addr,
 832                                    int len, void *val, bool is_write)
 833 {
 834         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 835         struct vgic_io_device *iodev = container_of(this,
 836                                                     struct vgic_io_device, dev);
 837         struct kvm_run *run = vcpu->run;
 838         const struct vgic_io_range *range;
 839         struct kvm_exit_mmio mmio;
 840         bool updated_state;
 841         gpa_t offset;
 842
 843         offset = addr - iodev->addr;
 844         range = vgic_find_range(iodev->reg_ranges, len, offset);
 845         if (unlikely(!range || !range->handle_mmio)) {
 846                 pr_warn("Unhandled access %d %08llx %d\n", is_write, addr, len);
 847                 return -ENXIO;
 848         }
 849
 850         mmio.phys_addr = addr;
 851         mmio.len = len;
 852         mmio.is_write = is_write;
 853         mmio.data = val;
 854         mmio.private = iodev->redist_vcpu;
 855
 856         spin_lock(&dist->lock);
 857         offset -= range->base;
 858         if (vgic_validate_access(dist, range, offset)) {
 859                 updated_state = call_range_handler(vcpu, &mmio, offset, range);
 860         } else {
 861                 if (!is_write)
 862                         memset(val, 0, len);
 863                 updated_state = false;
 864         }
 865         spin_unlock(&dist->lock);
 866         run->mmio.is_write      = is_write;
 867         run->mmio.len           = len;
 868         run->mmio.phys_addr     = addr;
 869         memcpy(run->mmio.data, val, len);
 870
 871         kvm_handle_mmio_return(vcpu, run);
 872
 873         if (updated_state)
 874                 vgic_kick_vcpus(vcpu->kvm);
 875
 876         return 0;
 877 }
 878
 879 static int vgic_handle_mmio_read(struct kvm_vcpu *vcpu,
 880                                  struct kvm_io_device *this,
 881                                  gpa_t addr, int len, void *val)
 882 {
 883         return vgic_handle_mmio_access(vcpu, this, addr, len, val, false);
 884 }
 885
 886 static int vgic_handle_mmio_write(struct kvm_vcpu *vcpu,
 887                                   struct kvm_io_device *this,
 888                                   gpa_t addr, int len, const void *val)
 889 {
 890         return vgic_handle_mmio_access(vcpu, this, addr, len, (void *)val,
 891                                        true);
 892 }
 893
 894 struct kvm_io_device_ops vgic_io_ops = {
 895         .read   = vgic_handle_mmio_read,
 896         .write  = vgic_handle_mmio_write,
 897 };
 898
 899 /**
 900  * vgic_register_kvm_io_dev - register VGIC register frame on the KVM I/O bus
 901  * @kvm:            The VM structure pointer
 902  * @base:           The (guest) base address for the register frame
 903  * @len:            Length of the register frame window
 904  * @ranges:         Describing the handler functions for each register
 905  * @redist_vcpu_id: The VCPU ID to pass on to the handlers on call
 906  * @iodev:          Points to memory to be passed on to the handler
 907  *
 908  * @iodev stores the parameters of this function to be usable by the handler
 909  * respectively the dispatcher function (since the KVM I/O bus framework lacks
 910  * an opaque parameter). Initialization is done in this function, but the
 911  * reference should be valid and unique for the whole VGIC lifetime.
 912  * If the register frame is not mapped for a specific VCPU, pass -1 to
 913  * @redist_vcpu_id.
 914  */
 915 int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len,
 916                              const struct vgic_io_range *ranges,
 917                              int redist_vcpu_id,
 918                              struct vgic_io_device *iodev)
 919 {
 920         struct kvm_vcpu *vcpu = NULL;
 921         int ret;
 922
 923         if (redist_vcpu_id >= 0)
 924                 vcpu = kvm_get_vcpu(kvm, redist_vcpu_id);
 925
 926         iodev->addr             = base;
 927         iodev->len              = len;
 928         iodev->reg_ranges       = ranges;
 929         iodev->redist_vcpu      = vcpu;
 930
 931         kvm_iodevice_init(&iodev->dev, &vgic_io_ops);
 932
 933         mutex_lock(&kvm->slots_lock);
 934
 935         ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, base, len,
 936                                       &iodev->dev);
 937         mutex_unlock(&kvm->slots_lock);
 938
 939         /* Mark the iodev as invalid if registration fails. */
 940         if (ret)
 941                 iodev->dev.ops = NULL;
 942
 943         return ret;
 944 }
 945
 946 static int vgic_nr_shared_irqs(struct vgic_dist *dist)
 947 {
 948         return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS;
 949 }
 950
 951 static int compute_active_for_cpu(struct kvm_vcpu *vcpu)
 952 {
 953         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 954         unsigned long *active, *enabled, *act_percpu, *act_shared;
 955         unsigned long active_private, active_shared;
 956         int nr_shared = vgic_nr_shared_irqs(dist);
 957         int vcpu_id;
 958
 959         vcpu_id = vcpu->vcpu_id;
 960         act_percpu = vcpu->arch.vgic_cpu.active_percpu;
 961         act_shared = vcpu->arch.vgic_cpu.active_shared;
 962
 963         active = vgic_bitmap_get_cpu_map(&dist->irq_active, vcpu_id);
 964         enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
 965         bitmap_and(act_percpu, active, enabled, VGIC_NR_PRIVATE_IRQS);
 966
 967         active = vgic_bitmap_get_shared_map(&dist->irq_active);
 968         enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
 969         bitmap_and(act_shared, active, enabled, nr_shared);
 970         bitmap_and(act_shared, act_shared,
 971                    vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
 972                    nr_shared);
 973
 974         active_private = find_first_bit(act_percpu, VGIC_NR_PRIVATE_IRQS);
 975         active_shared = find_first_bit(act_shared, nr_shared);
 976
 977         return (active_private < VGIC_NR_PRIVATE_IRQS ||
 978                 active_shared < nr_shared);
 979 }
 980
 981 static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
 982 {
 983         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 984         unsigned long *pending, *enabled, *pend_percpu, *pend_shared;
 985         unsigned long pending_private, pending_shared;
 986         int nr_shared = vgic_nr_shared_irqs(dist);
 987         int vcpu_id;
 988
 989         vcpu_id = vcpu->vcpu_id;
 990         pend_percpu = vcpu->arch.vgic_cpu.pending_percpu;
 991         pend_shared = vcpu->arch.vgic_cpu.pending_shared;
 992
 993         if (!dist->enabled) {
 994                 bitmap_zero(pend_percpu, VGIC_NR_PRIVATE_IRQS);
 995                 bitmap_zero(pend_shared, nr_shared);
 996                 return 0;
 997         }
 998
 999         pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id);
1000         enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
1001         bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS);
1002
1003         pending = vgic_bitmap_get_shared_map(&dist->irq_pending);
1004         enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
1005         bitmap_and(pend_shared, pending, enabled, nr_shared);
1006         bitmap_and(pend_shared, pend_shared,
1007                    vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
1008                    nr_shared);
1009
1010         pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS);
1011         pending_shared = find_first_bit(pend_shared, nr_shared);
1012         return (pending_private < VGIC_NR_PRIVATE_IRQS ||
1013                 pending_shared < vgic_nr_shared_irqs(dist));
1014 }
1015
1016 /*
1017  * Update the interrupt state and determine which CPUs have pending
1018  * or active interrupts. Must be called with distributor lock held.
1019  */
1020 void vgic_update_state(struct kvm *kvm)
1021 {
1022         struct vgic_dist *dist = &kvm->arch.vgic;
1023         struct kvm_vcpu *vcpu;
1024         int c;
1025
1026         kvm_for_each_vcpu(c, vcpu, kvm) {
1027                 if (compute_pending_for_cpu(vcpu))
1028                         set_bit(c, dist->irq_pending_on_cpu);
1029
1030                 if (compute_active_for_cpu(vcpu))
1031                         set_bit(c, dist->irq_active_on_cpu);
1032                 else
1033                         clear_bit(c, dist->irq_active_on_cpu);
1034         }
1035 }
1036
1037 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
1038 {
1039         return vgic_ops->get_lr(vcpu, lr);
1040 }
1041
1042 static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr,
1043                                struct vgic_lr vlr)
1044 {
1045         vgic_ops->set_lr(vcpu, lr, vlr);
1046 }
1047
1048 static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
1049                                struct vgic_lr vlr)
1050 {
1051         vgic_ops->sync_lr_elrsr(vcpu, lr, vlr);
1052 }
1053
1054 static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu)
1055 {
1056         return vgic_ops->get_elrsr(vcpu);
1057 }
1058
1059 static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu)
1060 {
1061         return vgic_ops->get_eisr(vcpu);
1062 }
1063
1064 static inline void vgic_clear_eisr(struct kvm_vcpu *vcpu)
1065 {
1066         vgic_ops->clear_eisr(vcpu);
1067 }
1068
1069 static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu)
1070 {
1071         return vgic_ops->get_interrupt_status(vcpu);
1072 }
1073
1074 static inline void vgic_enable_underflow(struct kvm_vcpu *vcpu)
1075 {
1076         vgic_ops->enable_underflow(vcpu);
1077 }
1078
1079 static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu)
1080 {
1081         vgic_ops->disable_underflow(vcpu);
1082 }
1083
1084 void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
1085 {
1086         vgic_ops->get_vmcr(vcpu, vmcr);
1087 }
1088
1089 void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
1090 {
1091         vgic_ops->set_vmcr(vcpu, vmcr);
1092 }
1093
1094 static inline void vgic_enable(struct kvm_vcpu *vcpu)
1095 {
1096         vgic_ops->enable(vcpu);
1097 }
1098
1099 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
1100 {
1101         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1102         struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr);
1103
1104         /*
1105          * We must transfer the pending state back to the distributor before
1106          * retiring the LR, otherwise we may loose edge-triggered interrupts.
1107          */
1108         if (vlr.state & LR_STATE_PENDING) {
1109                 vgic_dist_irq_set_pending(vcpu, irq);
1110                 vlr.hwirq = 0;
1111         }
1112
1113         vlr.state = 0;
1114         vgic_set_lr(vcpu, lr_nr, vlr);
1115         clear_bit(lr_nr, vgic_cpu->lr_used);
1116         vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
1117         vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
1118 }
1119
1120 /*
1121  * An interrupt may have been disabled after being made pending on the
1122  * CPU interface (the classic case is a timer running while we're
1123  * rebooting the guest - the interrupt would kick as soon as the CPU
1124  * interface gets enabled, with deadly consequences).
1125  *
1126  * The solution is to examine already active LRs, and check the
1127  * interrupt is still enabled. If not, just retire it.
1128  */
1129 static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
1130 {
1131         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1132         int lr;
1133
1134         for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) {
1135                 struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
1136
1137                 if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
1138                         vgic_retire_lr(lr, vlr.irq, vcpu);
1139                         if (vgic_irq_is_queued(vcpu, vlr.irq))
1140                                 vgic_irq_clear_queued(vcpu, vlr.irq);
1141                 }
1142         }
1143 }
1144
1145 static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
1146                                  int lr_nr, struct vgic_lr vlr)
1147 {
1148         if (vgic_irq_is_active(vcpu, irq)) {
1149                 vlr.state |= LR_STATE_ACTIVE;
1150                 kvm_debug("Set active, clear distributor: 0x%x\n", vlr.state);
1151                 vgic_irq_clear_active(vcpu, irq);
1152                 vgic_update_state(vcpu->kvm);
1153         } else {
1154                 WARN_ON(!vgic_dist_irq_is_pending(vcpu, irq));
1155                 vlr.state |= LR_STATE_PENDING;
1156                 kvm_debug("Set pending: 0x%x\n", vlr.state);
1157         }
1158
1159         if (!vgic_irq_is_edge(vcpu, irq))
1160                 vlr.state |= LR_EOI_INT;
1161
1162         if (vlr.irq >= VGIC_NR_SGIS) {
1163                 struct irq_phys_map *map;
1164                 map = vgic_irq_map_search(vcpu, irq);
1165
1166                 if (map) {
1167                         vlr.hwirq = map->phys_irq;
1168                         vlr.state |= LR_HW;
1169                         vlr.state &= ~LR_EOI_INT;
1170
1171                         /*
1172                          * Make sure we're not going to sample this
1173                          * again, as a HW-backed interrupt cannot be
1174                          * in the PENDING_ACTIVE stage.
1175                          */
1176                         vgic_irq_set_queued(vcpu, irq);
1177                 }
1178         }
1179
1180         vgic_set_lr(vcpu, lr_nr, vlr);
1181         vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
1182 }
1183
1184 /*
1185  * Queue an interrupt to a CPU virtual interface. Return true on success,
1186  * or false if it wasn't possible to queue it.
1187  * sgi_source must be zero for any non-SGI interrupts.
1188  */
1189 bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
1190 {
1191         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1192         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1193         struct vgic_lr vlr;
1194         int lr;
1195
1196         /* Sanitize the input... */
1197         BUG_ON(sgi_source_id & ~7);
1198         BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS);
1199         BUG_ON(irq >= dist->nr_irqs);
1200
1201         kvm_debug("Queue IRQ%d\n", irq);
1202
1203         lr = vgic_cpu->vgic_irq_lr_map[irq];
1204
1205         /* Do we have an active interrupt for the same CPUID? */
1206         if (lr != LR_EMPTY) {
1207                 vlr = vgic_get_lr(vcpu, lr);
1208                 if (vlr.source == sgi_source_id) {
1209                         kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq);
1210                         BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
1211                         vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
1212                         return true;
1213                 }
1214         }
1215
1216         /* Try to use another LR for this interrupt */
1217         lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used,
1218                                vgic->nr_lr);
1219         if (lr >= vgic->nr_lr)
1220                 return false;
1221
1222         kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
1223         vgic_cpu->vgic_irq_lr_map[irq] = lr;
1224         set_bit(lr, vgic_cpu->lr_used);
1225
1226         vlr.irq = irq;
1227         vlr.source = sgi_source_id;
1228         vlr.state = 0;
1229         vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
1230
1231         return true;
1232 }
1233
1234 static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
1235 {
1236         if (!vgic_can_sample_irq(vcpu, irq))
1237                 return true; /* level interrupt, already queued */
1238
1239         if (vgic_queue_irq(vcpu, 0, irq)) {
1240                 if (vgic_irq_is_edge(vcpu, irq)) {
1241                         vgic_dist_irq_clear_pending(vcpu, irq);
1242                         vgic_cpu_irq_clear(vcpu, irq);
1243                 } else {
1244                         vgic_irq_set_queued(vcpu, irq);
1245                 }
1246
1247                 return true;
1248         }
1249
1250         return false;
1251 }
1252
1253 /*
1254  * Fill the list registers with pending interrupts before running the
1255  * guest.
1256  */
1257 static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
1258 {
1259         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1260         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1261         unsigned long *pa_percpu, *pa_shared;
1262         int i, vcpu_id;
1263         int overflow = 0;
1264         int nr_shared = vgic_nr_shared_irqs(dist);
1265
1266         vcpu_id = vcpu->vcpu_id;
1267
1268         pa_percpu = vcpu->arch.vgic_cpu.pend_act_percpu;
1269         pa_shared = vcpu->arch.vgic_cpu.pend_act_shared;
1270
1271         bitmap_or(pa_percpu, vgic_cpu->pending_percpu, vgic_cpu->active_percpu,
1272                   VGIC_NR_PRIVATE_IRQS);
1273         bitmap_or(pa_shared, vgic_cpu->pending_shared, vgic_cpu->active_shared,
1274                   nr_shared);
1275         /*
1276          * We may not have any pending interrupt, or the interrupts
1277          * may have been serviced from another vcpu. In all cases,
1278          * move along.
1279          */
1280         if (!kvm_vgic_vcpu_pending_irq(vcpu) && !kvm_vgic_vcpu_active_irq(vcpu))
1281                 goto epilog;
1282
1283         /* SGIs */
1284         for_each_set_bit(i, pa_percpu, VGIC_NR_SGIS) {
1285                 if (!queue_sgi(vcpu, i))
1286                         overflow = 1;
1287         }
1288
1289         /* PPIs */
1290         for_each_set_bit_from(i, pa_percpu, VGIC_NR_PRIVATE_IRQS) {
1291                 if (!vgic_queue_hwirq(vcpu, i))
1292                         overflow = 1;
1293         }
1294
1295         /* SPIs */
1296         for_each_set_bit(i, pa_shared, nr_shared) {
1297                 if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS))
1298                         overflow = 1;
1299         }
1300
1301
1302
1303
1304 epilog:
1305         if (overflow) {
1306                 vgic_enable_underflow(vcpu);
1307         } else {
1308                 vgic_disable_underflow(vcpu);
1309                 /*
1310                  * We're about to run this VCPU, and we've consumed
1311                  * everything the distributor had in store for
1312                  * us. Claim we don't have anything pending. We'll
1313                  * adjust that if needed while exiting.
1314                  */
1315                 clear_bit(vcpu_id, dist->irq_pending_on_cpu);
1316         }
1317 }
1318
1319 static int process_queued_irq(struct kvm_vcpu *vcpu,
1320                                    int lr, struct vgic_lr vlr)
1321 {
1322         int pending = 0;
1323
1324         /*
1325          * If the IRQ was EOIed (called from vgic_process_maintenance) or it
1326          * went from active to non-active (called from vgic_sync_hwirq) it was
1327          * also ACKed and we we therefore assume we can clear the soft pending
1328          * state (should it had been set) for this interrupt.
1329          *
1330          * Note: if the IRQ soft pending state was set after the IRQ was
1331          * acked, it actually shouldn't be cleared, but we have no way of
1332          * knowing that unless we start trapping ACKs when the soft-pending
1333          * state is set.
1334          */
1335         vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
1336
1337         /*
1338          * Tell the gic to start sampling this interrupt again.
1339          */
1340         vgic_irq_clear_queued(vcpu, vlr.irq);
1341
1342         /* Any additional pending interrupt? */
1343         if (vgic_irq_is_edge(vcpu, vlr.irq)) {
1344                 BUG_ON(!(vlr.state & LR_HW));
1345                 pending = vgic_dist_irq_is_pending(vcpu, vlr.irq);
1346         } else {
1347                 if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
1348                         vgic_cpu_irq_set(vcpu, vlr.irq);
1349                         pending = 1;
1350                 } else {
1351                         vgic_dist_irq_clear_pending(vcpu, vlr.irq);
1352                         vgic_cpu_irq_clear(vcpu, vlr.irq);
1353                 }
1354         }
1355
1356         /*
1357          * Despite being EOIed, the LR may not have
1358          * been marked as empty.
1359          */
1360         vlr.state = 0;
1361         vlr.hwirq = 0;
1362         vgic_set_lr(vcpu, lr, vlr);
1363
1364         vgic_sync_lr_elrsr(vcpu, lr, vlr);
1365
1366         return pending;
1367 }
1368
1369 static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
1370 {
1371         u32 status = vgic_get_interrupt_status(vcpu);
1372         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1373         struct kvm *kvm = vcpu->kvm;
1374         int level_pending = 0;
1375
1376         kvm_debug("STATUS = %08x\n", status);
1377
1378         if (status & INT_STATUS_EOI) {
1379                 /*
1380                  * Some level interrupts have been EOIed. Clear their
1381                  * active bit.
1382                  */
1383                 u64 eisr = vgic_get_eisr(vcpu);
1384                 unsigned long *eisr_ptr = u64_to_bitmask(&eisr);
1385                 int lr;
1386
1387                 for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
1388                         struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
1389
1390                         WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
1391                         WARN_ON(vlr.state & LR_STATE_MASK);
1392
1393
1394                         /*
1395                          * kvm_notify_acked_irq calls kvm_set_irq()
1396                          * to reset the IRQ level, which grabs the dist->lock
1397                          * so we call this before taking the dist->lock.
1398                          */
1399                         kvm_notify_acked_irq(kvm, 0,
1400                                              vlr.irq - VGIC_NR_PRIVATE_IRQS);
1401
1402                         spin_lock(&dist->lock);
1403                         level_pending |= process_queued_irq(vcpu, lr, vlr);
1404                         spin_unlock(&dist->lock);
1405                 }
1406         }
1407
1408         if (status & INT_STATUS_UNDERFLOW)
1409                 vgic_disable_underflow(vcpu);
1410
1411         /*
1412          * In the next iterations of the vcpu loop, if we sync the vgic state
1413          * after flushing it, but before entering the guest (this happens for
1414          * pending signals and vmid rollovers), then make sure we don't pick
1415          * up any old maintenance interrupts here.
1416          */
1417         vgic_clear_eisr(vcpu);
1418
1419         return level_pending;
1420 }
1421
1422 /*
1423  * Save the physical active state, and reset it to inactive.
1424  *
1425  * Return true if there's a pending forwarded interrupt to queue.
1426  */
1427 static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr)
1428 {
1429         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1430         struct irq_phys_map *map;
1431         bool phys_active;
1432         bool level_pending;
1433         int ret;
1434
1435         if (!(vlr.state & LR_HW))
1436                 return false;
1437
1438         map = vgic_irq_map_search(vcpu, vlr.irq);
1439         BUG_ON(!map);
1440
1441         ret = irq_get_irqchip_state(map->irq,
1442                                     IRQCHIP_STATE_ACTIVE,
1443                                     &phys_active);
1444
1445         WARN_ON(ret);
1446
1447         if (phys_active)
1448                 return 0;
1449
1450         spin_lock(&dist->lock);
1451         level_pending = process_queued_irq(vcpu, lr, vlr);
1452         spin_unlock(&dist->lock);
1453         return level_pending;
1454 }
1455
1456 /* Sync back the VGIC state after a guest run */
1457 static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
1458 {
1459         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1460         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1461         u64 elrsr;
1462         unsigned long *elrsr_ptr;
1463         int lr, pending;
1464         bool level_pending;
1465
1466         level_pending = vgic_process_maintenance(vcpu);
1467         elrsr = vgic_get_elrsr(vcpu);
1468         elrsr_ptr = u64_to_bitmask(&elrsr);
1469
1470         /* Deal with HW interrupts, and clear mappings for empty LRs */
1471         for (lr = 0; lr < vgic->nr_lr; lr++) {
1472                 struct vgic_lr vlr;
1473
1474                 if (!test_bit(lr, vgic_cpu->lr_used))
1475                         continue;
1476
1477                 vlr = vgic_get_lr(vcpu, lr);
1478                 if (vgic_sync_hwirq(vcpu, lr, vlr))
1479                         level_pending = true;
1480
1481                 if (!test_bit(lr, elrsr_ptr))
1482                         continue;
1483
1484                 clear_bit(lr, vgic_cpu->lr_used);
1485
1486                 BUG_ON(vlr.irq >= dist->nr_irqs);
1487                 vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
1488         }
1489
1490         /* Check if we still have something up our sleeve... */
1491         pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr);
1492         if (level_pending || pending < vgic->nr_lr)
1493                 set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
1494 }
1495
1496 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
1497 {
1498         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1499
1500         if (!irqchip_in_kernel(vcpu->kvm))
1501                 return;
1502
1503         spin_lock(&dist->lock);
1504         __kvm_vgic_flush_hwstate(vcpu);
1505         spin_unlock(&dist->lock);
1506 }
1507
1508 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
1509 {
1510         if (!irqchip_in_kernel(vcpu->kvm))
1511                 return;
1512
1513         __kvm_vgic_sync_hwstate(vcpu);
1514 }
1515
1516 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
1517 {
1518         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1519
1520         if (!irqchip_in_kernel(vcpu->kvm))
1521                 return 0;
1522
1523         return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
1524 }
1525
1526 int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu)
1527 {
1528         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1529
1530         if (!irqchip_in_kernel(vcpu->kvm))
1531                 return 0;
1532
1533         return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu);
1534 }
1535
1536
1537 void vgic_kick_vcpus(struct kvm *kvm)
1538 {
1539         struct kvm_vcpu *vcpu;
1540         int c;
1541
1542         /*
1543          * We've injected an interrupt, time to find out who deserves
1544          * a good kick...
1545          */
1546         kvm_for_each_vcpu(c, vcpu, kvm) {
1547                 if (kvm_vgic_vcpu_pending_irq(vcpu))
1548                         kvm_vcpu_kick(vcpu);
1549         }
1550 }
1551
1552 static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
1553 {
1554         int edge_triggered = vgic_irq_is_edge(vcpu, irq);
1555
1556         /*
1557          * Only inject an interrupt if:
1558          * - edge triggered and we have a rising edge
1559          * - level triggered and we change level
1560          */
1561         if (edge_triggered) {
1562                 int state = vgic_dist_irq_is_pending(vcpu, irq);
1563                 return level > state;
1564         } else {
1565                 int state = vgic_dist_irq_get_level(vcpu, irq);
1566                 return level != state;
1567         }
1568 }
1569
1570 static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
1571                                    struct irq_phys_map *map,
1572                                    unsigned int irq_num, bool level)
1573 {
1574         struct vgic_dist *dist = &kvm->arch.vgic;
1575         struct kvm_vcpu *vcpu;
1576         int edge_triggered, level_triggered;
1577         int enabled;
1578         bool ret = true, can_inject = true;
1579
1580         trace_vgic_update_irq_pending(cpuid, irq_num, level);
1581
1582         if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020))
1583                 return -EINVAL;
1584
1585         spin_lock(&dist->lock);
1586
1587         vcpu = kvm_get_vcpu(kvm, cpuid);
1588         edge_triggered = vgic_irq_is_edge(vcpu, irq_num);
1589         level_triggered = !edge_triggered;
1590
1591         if (!vgic_validate_injection(vcpu, irq_num, level)) {
1592                 ret = false;
1593                 goto out;
1594         }
1595
1596         if (irq_num >= VGIC_NR_PRIVATE_IRQS) {
1597                 cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS];
1598                 if (cpuid == VCPU_NOT_ALLOCATED) {
1599                         /* Pretend we use CPU0, and prevent injection */
1600                         cpuid = 0;
1601                         can_inject = false;
1602                 }
1603                 vcpu = kvm_get_vcpu(kvm, cpuid);
1604         }
1605
1606         kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid);
1607
1608         if (level) {
1609                 if (level_triggered)
1610                         vgic_dist_irq_set_level(vcpu, irq_num);
1611                 vgic_dist_irq_set_pending(vcpu, irq_num);
1612         } else {
1613                 if (level_triggered) {
1614                         vgic_dist_irq_clear_level(vcpu, irq_num);
1615                         if (!vgic_dist_irq_soft_pend(vcpu, irq_num)) {
1616                                 vgic_dist_irq_clear_pending(vcpu, irq_num);
1617                                 vgic_cpu_irq_clear(vcpu, irq_num);
1618                                 if (!compute_pending_for_cpu(vcpu))
1619                                         clear_bit(cpuid, dist->irq_pending_on_cpu);
1620                         }
1621                 }
1622
1623                 ret = false;
1624                 goto out;
1625         }
1626
1627         enabled = vgic_irq_is_enabled(vcpu, irq_num);
1628
1629         if (!enabled || !can_inject) {
1630                 ret = false;
1631                 goto out;
1632         }
1633
1634         if (!vgic_can_sample_irq(vcpu, irq_num)) {
1635                 /*
1636                  * Level interrupt in progress, will be picked up
1637                  * when EOId.
1638                  */
1639                 ret = false;
1640                 goto out;
1641         }
1642
1643         if (level) {
1644                 vgic_cpu_irq_set(vcpu, irq_num);
1645                 set_bit(cpuid, dist->irq_pending_on_cpu);
1646         }
1647
1648 out:
1649         spin_unlock(&dist->lock);
1650
1651         if (ret) {
1652                 /* kick the specified vcpu */
1653                 kvm_vcpu_kick(kvm_get_vcpu(kvm, cpuid));
1654         }
1655
1656         return 0;
1657 }
1658
1659 static int vgic_lazy_init(struct kvm *kvm)
1660 {
1661         int ret = 0;
1662
1663         if (unlikely(!vgic_initialized(kvm))) {
1664                 /*
1665                  * We only provide the automatic initialization of the VGIC
1666                  * for the legacy case of a GICv2. Any other type must
1667                  * be explicitly initialized once setup with the respective
1668                  * KVM device call.
1669                  */
1670                 if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2)
1671                         return -EBUSY;
1672
1673                 mutex_lock(&kvm->lock);
1674                 ret = vgic_init(kvm);
1675                 mutex_unlock(&kvm->lock);
1676         }
1677
1678         return ret;
1679 }
1680
1681 /**
1682  * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
1683  * @kvm:     The VM structure pointer
1684  * @cpuid:   The CPU for PPIs
1685  * @irq_num: The IRQ number that is assigned to the device. This IRQ
1686  *           must not be mapped to a HW interrupt.
1687  * @level:   Edge-triggered:  true:  to trigger the interrupt
1688  *                            false: to ignore the call
1689  *           Level-sensitive  true:  raise the input signal
1690  *                            false: lower the input signal
1691  *
1692  * The GIC is not concerned with devices being active-LOW or active-HIGH for
1693  * level-sensitive interrupts.  You can think of the level parameter as 1
1694  * being HIGH and 0 being LOW and all devices being active-HIGH.
1695  */
1696 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
1697                         bool level)
1698 {
1699         struct irq_phys_map *map;
1700         int ret;
1701
1702         ret = vgic_lazy_init(kvm);
1703         if (ret)
1704                 return ret;
1705
1706         map = vgic_irq_map_search(kvm_get_vcpu(kvm, cpuid), irq_num);
1707         if (map)
1708                 return -EINVAL;
1709
1710         return vgic_update_irq_pending(kvm, cpuid, NULL, irq_num, level);
1711 }
1712
1713 /**
1714  * kvm_vgic_inject_mapped_irq - Inject a physically mapped IRQ to the vgic
1715  * @kvm:     The VM structure pointer
1716  * @cpuid:   The CPU for PPIs
1717  * @map:     Pointer to a irq_phys_map structure describing the mapping
1718  * @level:   Edge-triggered:  true:  to trigger the interrupt
1719  *                            false: to ignore the call
1720  *           Level-sensitive  true:  raise the input signal
1721  *                            false: lower the input signal
1722  *
1723  * The GIC is not concerned with devices being active-LOW or active-HIGH for
1724  * level-sensitive interrupts.  You can think of the level parameter as 1
1725  * being HIGH and 0 being LOW and all devices being active-HIGH.
1726  */
1727 int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
1728                                struct irq_phys_map *map, bool level)
1729 {
1730         int ret;
1731
1732         ret = vgic_lazy_init(kvm);
1733         if (ret)
1734                 return ret;
1735
1736         return vgic_update_irq_pending(kvm, cpuid, map, map->virt_irq, level);
1737 }
1738
1739 static irqreturn_t vgic_maintenance_handler(int irq, void *data)
1740 {
1741         /*
1742          * We cannot rely on the vgic maintenance interrupt to be
1743          * delivered synchronously. This means we can only use it to
1744          * exit the VM, and we perform the handling of EOIed
1745          * interrupts on the exit path (see vgic_process_maintenance).
1746          */
1747         return IRQ_HANDLED;
1748 }
1749
1750 static struct list_head *vgic_get_irq_phys_map_list(struct kvm_vcpu *vcpu,
1751                                                     int virt_irq)
1752 {
1753         if (virt_irq < VGIC_NR_PRIVATE_IRQS)
1754                 return &vcpu->arch.vgic_cpu.irq_phys_map_list;
1755         else
1756                 return &vcpu->kvm->arch.vgic.irq_phys_map_list;
1757 }
1758
1759 /**
1760  * kvm_vgic_map_phys_irq - map a virtual IRQ to a physical IRQ
1761  * @vcpu: The VCPU pointer
1762  * @virt_irq: The virtual irq number
1763  * @irq: The Linux IRQ number
1764  *
1765  * Establish a mapping between a guest visible irq (@virt_irq) and a
1766  * Linux irq (@irq). On injection, @virt_irq will be associated with
1767  * the physical interrupt represented by @irq. This mapping can be
1768  * established multiple times as long as the parameters are the same.
1769  *
1770  * Returns a valid pointer on success, and an error pointer otherwise
1771  */
1772 struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
1773                                            int virt_irq, int irq)
1774 {
1775         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1776         struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq);
1777         struct irq_phys_map *map;
1778         struct irq_phys_map_entry *entry;
1779         struct irq_desc *desc;
1780         struct irq_data *data;
1781         int phys_irq;
1782
1783         desc = irq_to_desc(irq);
1784         if (!desc) {
1785                 kvm_err("%s: no interrupt descriptor\n", __func__);
1786                 return ERR_PTR(-EINVAL);
1787         }
1788
1789         data = irq_desc_get_irq_data(desc);
1790         while (data->parent_data)
1791                 data = data->parent_data;
1792
1793         phys_irq = data->hwirq;
1794
1795         /* Create a new mapping */
1796         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1797         if (!entry)
1798                 return ERR_PTR(-ENOMEM);
1799
1800         spin_lock(&dist->irq_phys_map_lock);
1801
1802         /* Try to match an existing mapping */
1803         map = vgic_irq_map_search(vcpu, virt_irq);
1804         if (map) {
1805                 /* Make sure this mapping matches */
1806                 if (map->phys_irq != phys_irq   ||
1807                     map->irq      != irq)
1808                         map = ERR_PTR(-EINVAL);
1809
1810                 /* Found an existing, valid mapping */
1811                 goto out;
1812         }
1813
1814         map           = &entry->map;
1815         map->virt_irq = virt_irq;
1816         map->phys_irq = phys_irq;
1817         map->irq      = irq;
1818
1819         list_add_tail_rcu(&entry->entry, root);
1820
1821 out:
1822         spin_unlock(&dist->irq_phys_map_lock);
1823         /* If we've found a hit in the existing list, free the useless
1824          * entry */
1825         if (IS_ERR(map) || map != &entry->map)
1826                 kfree(entry);
1827         return map;
1828 }
1829
1830 static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
1831                                                 int virt_irq)
1832 {
1833         struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq);
1834         struct irq_phys_map_entry *entry;
1835         struct irq_phys_map *map;
1836
1837         rcu_read_lock();
1838
1839         list_for_each_entry_rcu(entry, root, entry) {
1840                 map = &entry->map;
1841                 if (map->virt_irq == virt_irq) {
1842                         rcu_read_unlock();
1843                         return map;
1844                 }
1845         }
1846
1847         rcu_read_unlock();
1848
1849         return NULL;
1850 }
1851
1852 static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu)
1853 {
1854         struct irq_phys_map_entry *entry;
1855
1856         entry = container_of(rcu, struct irq_phys_map_entry, rcu);
1857         kfree(entry);
1858 }
1859
1860 /**
1861  * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping
1862  * @vcpu: The VCPU pointer
1863  * @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq
1864  *
1865  * Remove an existing mapping between virtual and physical interrupts.
1866  */
1867 int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map)
1868 {
1869         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1870         struct irq_phys_map_entry *entry;
1871         struct list_head *root;
1872
1873         if (!map)
1874                 return -EINVAL;
1875
1876         root = vgic_get_irq_phys_map_list(vcpu, map->virt_irq);
1877
1878         spin_lock(&dist->irq_phys_map_lock);
1879
1880         list_for_each_entry(entry, root, entry) {
1881                 if (&entry->map == map) {
1882                         list_del_rcu(&entry->entry);
1883                         call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu);
1884                         break;
1885                 }
1886         }
1887
1888         spin_unlock(&dist->irq_phys_map_lock);
1889
1890         return 0;
1891 }
1892
1893 static void vgic_destroy_irq_phys_map(struct kvm *kvm, struct list_head *root)
1894 {
1895         struct vgic_dist *dist = &kvm->arch.vgic;
1896         struct irq_phys_map_entry *entry;
1897
1898         spin_lock(&dist->irq_phys_map_lock);
1899
1900         list_for_each_entry(entry, root, entry) {
1901                 list_del_rcu(&entry->entry);
1902                 call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu);
1903         }
1904
1905         spin_unlock(&dist->irq_phys_map_lock);
1906 }
1907
1908 void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
1909 {
1910         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1911
1912         kfree(vgic_cpu->pending_shared);
1913         kfree(vgic_cpu->active_shared);
1914         kfree(vgic_cpu->pend_act_shared);
1915         kfree(vgic_cpu->vgic_irq_lr_map);
1916         vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list);
1917         vgic_cpu->pending_shared = NULL;
1918         vgic_cpu->active_shared = NULL;
1919         vgic_cpu->pend_act_shared = NULL;
1920         vgic_cpu->vgic_irq_lr_map = NULL;
1921 }
1922
1923 static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
1924 {
1925         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1926
1927         int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8;
1928         vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
1929         vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL);
1930         vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL);
1931         vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL);
1932
1933         if (!vgic_cpu->pending_shared
1934                 || !vgic_cpu->active_shared
1935                 || !vgic_cpu->pend_act_shared
1936                 || !vgic_cpu->vgic_irq_lr_map) {
1937                 kvm_vgic_vcpu_destroy(vcpu);
1938                 return -ENOMEM;
1939         }
1940
1941         memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs);
1942
1943         /*
1944          * Store the number of LRs per vcpu, so we don't have to go
1945          * all the way to the distributor structure to find out. Only
1946          * assembly code should use this one.
1947          */
1948         vgic_cpu->nr_lr = vgic->nr_lr;
1949
1950         return 0;
1951 }
1952
1953 /**
1954  * kvm_vgic_vcpu_early_init - Earliest possible per-vcpu vgic init stage
1955  *
1956  * No memory allocation should be performed here, only static init.
1957  */
1958 void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu)
1959 {
1960         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1961         INIT_LIST_HEAD(&vgic_cpu->irq_phys_map_list);
1962 }
1963
1964 /**
1965  * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW
1966  *
1967  * The host's GIC naturally limits the maximum amount of VCPUs a guest
1968  * can use.
1969  */
1970 int kvm_vgic_get_max_vcpus(void)
1971 {
1972         return vgic->max_gic_vcpus;
1973 }
1974
1975 void kvm_vgic_destroy(struct kvm *kvm)
1976 {
1977         struct vgic_dist *dist = &kvm->arch.vgic;
1978         struct kvm_vcpu *vcpu;
1979         int i;
1980
1981         kvm_for_each_vcpu(i, vcpu, kvm)
1982                 kvm_vgic_vcpu_destroy(vcpu);
1983
1984         vgic_free_bitmap(&dist->irq_enabled);
1985         vgic_free_bitmap(&dist->irq_level);
1986         vgic_free_bitmap(&dist->irq_pending);
1987         vgic_free_bitmap(&dist->irq_soft_pend);
1988         vgic_free_bitmap(&dist->irq_queued);
1989         vgic_free_bitmap(&dist->irq_cfg);
1990         vgic_free_bytemap(&dist->irq_priority);
1991         if (dist->irq_spi_target) {
1992                 for (i = 0; i < dist->nr_cpus; i++)
1993                         vgic_free_bitmap(&dist->irq_spi_target[i]);
1994         }
1995         kfree(dist->irq_sgi_sources);
1996         kfree(dist->irq_spi_cpu);
1997         kfree(dist->irq_spi_mpidr);
1998         kfree(dist->irq_spi_target);
1999         kfree(dist->irq_pending_on_cpu);
2000         kfree(dist->irq_active_on_cpu);
2001         vgic_destroy_irq_phys_map(kvm, &dist->irq_phys_map_list);
2002         dist->irq_sgi_sources = NULL;
2003         dist->irq_spi_cpu = NULL;
2004         dist->irq_spi_target = NULL;
2005         dist->irq_pending_on_cpu = NULL;
2006         dist->irq_active_on_cpu = NULL;
2007         dist->nr_cpus = 0;
2008 }
2009
2010 /*
2011  * Allocate and initialize the various data structures. Must be called
2012  * with kvm->lock held!
2013  */
2014 int vgic_init(struct kvm *kvm)
2015 {
2016         struct vgic_dist *dist = &kvm->arch.vgic;
2017         struct kvm_vcpu *vcpu;
2018         int nr_cpus, nr_irqs;
2019         int ret, i, vcpu_id;
2020
2021         if (vgic_initialized(kvm))
2022                 return 0;
2023
2024         nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus);
2025         if (!nr_cpus)           /* No vcpus? Can't be good... */
2026                 return -ENODEV;
2027
2028         /*
2029          * If nobody configured the number of interrupts, use the
2030          * legacy one.
2031          */
2032         if (!dist->nr_irqs)
2033                 dist->nr_irqs = VGIC_NR_IRQS_LEGACY;
2034
2035         nr_irqs = dist->nr_irqs;
2036
2037         ret  = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs);
2038         ret |= vgic_init_bitmap(&dist->irq_level, nr_cpus, nr_irqs);
2039         ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs);
2040         ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs);
2041         ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs);
2042         ret |= vgic_init_bitmap(&dist->irq_active, nr_cpus, nr_irqs);
2043         ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs);
2044         ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs);
2045
2046         if (ret)
2047                 goto out;
2048
2049         dist->irq_sgi_sources = kzalloc(nr_cpus * VGIC_NR_SGIS, GFP_KERNEL);
2050         dist->irq_spi_cpu = kzalloc(nr_irqs - VGIC_NR_PRIVATE_IRQS, GFP_KERNEL);
2051         dist->irq_spi_target = kzalloc(sizeof(*dist->irq_spi_target) * nr_cpus,
2052                                        GFP_KERNEL);
2053         dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long),
2054                                            GFP_KERNEL);
2055         dist->irq_active_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long),
2056                                            GFP_KERNEL);
2057         if (!dist->irq_sgi_sources ||
2058             !dist->irq_spi_cpu ||
2059             !dist->irq_spi_target ||
2060             !dist->irq_pending_on_cpu ||
2061             !dist->irq_active_on_cpu) {
2062                 ret = -ENOMEM;
2063                 goto out;
2064         }
2065
2066         for (i = 0; i < nr_cpus; i++)
2067                 ret |= vgic_init_bitmap(&dist->irq_spi_target[i],
2068                                         nr_cpus, nr_irqs);
2069
2070         if (ret)
2071                 goto out;
2072
2073         ret = kvm->arch.vgic.vm_ops.init_model(kvm);
2074         if (ret)
2075                 goto out;
2076
2077         kvm_for_each_vcpu(vcpu_id, vcpu, kvm) {
2078                 ret = vgic_vcpu_init_maps(vcpu, nr_irqs);
2079                 if (ret) {
2080                         kvm_err("VGIC: Failed to allocate vcpu memory\n");
2081                         break;
2082                 }
2083
2084                 /*
2085                  * Enable and configure all SGIs to be edge-triggere and
2086                  * configure all PPIs as level-triggered.
2087                  */
2088                 for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
2089                         if (i < VGIC_NR_SGIS) {
2090                                 /* SGIs */
2091                                 vgic_bitmap_set_irq_val(&dist->irq_enabled,
2092                                                         vcpu->vcpu_id, i, 1);
2093                                 vgic_bitmap_set_irq_val(&dist->irq_cfg,
2094                                                         vcpu->vcpu_id, i,
2095                                                         VGIC_CFG_EDGE);
2096                         } else if (i < VGIC_NR_PRIVATE_IRQS) {
2097                                 /* PPIs */
2098                                 vgic_bitmap_set_irq_val(&dist->irq_cfg,
2099                                                         vcpu->vcpu_id, i,
2100                                                         VGIC_CFG_LEVEL);
2101                         }
2102                 }
2103
2104                 vgic_enable(vcpu);
2105         }
2106
2107 out:
2108         if (ret)
2109                 kvm_vgic_destroy(kvm);
2110
2111         return ret;
2112 }
2113
2114 static int init_vgic_model(struct kvm *kvm, int type)
2115 {
2116         switch (type) {
2117         case KVM_DEV_TYPE_ARM_VGIC_V2:
2118                 vgic_v2_init_emulation(kvm);
2119                 break;
2120 #ifdef CONFIG_ARM_GIC_V3
2121         case KVM_DEV_TYPE_ARM_VGIC_V3:
2122                 vgic_v3_init_emulation(kvm);
2123                 break;
2124 #endif
2125         default:
2126                 return -ENODEV;
2127         }
2128
2129         if (atomic_read(&kvm->online_vcpus) > kvm->arch.max_vcpus)
2130                 return -E2BIG;
2131
2132         return 0;
2133 }
2134
2135 /**
2136  * kvm_vgic_early_init - Earliest possible vgic initialization stage
2137  *
2138  * No memory allocation should be performed here, only static init.
2139  */
2140 void kvm_vgic_early_init(struct kvm *kvm)
2141 {
2142         spin_lock_init(&kvm->arch.vgic.lock);
2143         spin_lock_init(&kvm->arch.vgic.irq_phys_map_lock);
2144         INIT_LIST_HEAD(&kvm->arch.vgic.irq_phys_map_list);
2145 }
2146
2147 int kvm_vgic_create(struct kvm *kvm, u32 type)
2148 {
2149         int i, vcpu_lock_idx = -1, ret;
2150         struct kvm_vcpu *vcpu;
2151
2152         mutex_lock(&kvm->lock);
2153
2154         if (irqchip_in_kernel(kvm)) {
2155                 ret = -EEXIST;
2156                 goto out;
2157         }
2158
2159         /*
2160          * This function is also called by the KVM_CREATE_IRQCHIP handler,
2161          * which had no chance yet to check the availability of the GICv2
2162          * emulation. So check this here again. KVM_CREATE_DEVICE does
2163          * the proper checks already.
2164          */
2165         if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && !vgic->can_emulate_gicv2) {
2166                 ret = -ENODEV;
2167                 goto out;
2168         }
2169
2170         /*
2171          * Any time a vcpu is run, vcpu_load is called which tries to grab the
2172          * vcpu->mutex.  By grabbing the vcpu->mutex of all VCPUs we ensure
2173          * that no other VCPUs are run while we create the vgic.
2174          */
2175         ret = -EBUSY;
2176         kvm_for_each_vcpu(i, vcpu, kvm) {
2177                 if (!mutex_trylock(&vcpu->mutex))
2178                         goto out_unlock;
2179                 vcpu_lock_idx = i;
2180         }
2181
2182         kvm_for_each_vcpu(i, vcpu, kvm) {
2183                 if (vcpu->arch.has_run_once)
2184                         goto out_unlock;
2185         }
2186         ret = 0;
2187
2188         ret = init_vgic_model(kvm, type);
2189         if (ret)
2190                 goto out_unlock;
2191
2192         kvm->arch.vgic.in_kernel = true;
2193         kvm->arch.vgic.vgic_model = type;
2194         kvm->arch.vgic.vctrl_base = vgic->vctrl_base;
2195         kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
2196         kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
2197         kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF;
2198
2199 out_unlock:
2200         for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
2201                 vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
2202                 mutex_unlock(&vcpu->mutex);
2203         }
2204
2205 out:
2206         mutex_unlock(&kvm->lock);
2207         return ret;
2208 }
2209
2210 static int vgic_ioaddr_overlap(struct kvm *kvm)
2211 {
2212         phys_addr_t dist = kvm->arch.vgic.vgic_dist_base;
2213         phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base;
2214
2215         if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu))
2216                 return 0;
2217         if ((dist <= cpu && dist + KVM_VGIC_V2_DIST_SIZE > cpu) ||
2218             (cpu <= dist && cpu + KVM_VGIC_V2_CPU_SIZE > dist))
2219                 return -EBUSY;
2220         return 0;
2221 }
2222
2223 static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
2224                               phys_addr_t addr, phys_addr_t size)
2225 {
2226         int ret;
2227
2228         if (addr & ~KVM_PHYS_MASK)
2229                 return -E2BIG;
2230
2231         if (addr & (SZ_4K - 1))
2232                 return -EINVAL;
2233
2234         if (!IS_VGIC_ADDR_UNDEF(*ioaddr))
2235                 return -EEXIST;
2236         if (addr + size < addr)
2237                 return -EINVAL;
2238
2239         *ioaddr = addr;
2240         ret = vgic_ioaddr_overlap(kvm);
2241         if (ret)
2242                 *ioaddr = VGIC_ADDR_UNDEF;
2243
2244         return ret;
2245 }
2246
2247 /**
2248  * kvm_vgic_addr - set or get vgic VM base addresses
2249  * @kvm:   pointer to the vm struct
2250  * @type:  the VGIC addr type, one of KVM_VGIC_V[23]_ADDR_TYPE_XXX
2251  * @addr:  pointer to address value
2252  * @write: if true set the address in the VM address space, if false read the
2253  *          address
2254  *
2255  * Set or get the vgic base addresses for the distributor and the virtual CPU
2256  * interface in the VM physical address space.  These addresses are properties
2257  * of the emulated core/SoC and therefore user space initially knows this
2258  * information.
2259  */
2260 int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
2261 {
2262         int r = 0;
2263         struct vgic_dist *vgic = &kvm->arch.vgic;
2264         int type_needed;
2265         phys_addr_t *addr_ptr, block_size;
2266         phys_addr_t alignment;
2267
2268         mutex_lock(&kvm->lock);
2269         switch (type) {
2270         case KVM_VGIC_V2_ADDR_TYPE_DIST:
2271                 type_needed = KVM_DEV_TYPE_ARM_VGIC_V2;
2272                 addr_ptr = &vgic->vgic_dist_base;
2273                 block_size = KVM_VGIC_V2_DIST_SIZE;
2274                 alignment = SZ_4K;
2275                 break;
2276         case KVM_VGIC_V2_ADDR_TYPE_CPU:
2277                 type_needed = KVM_DEV_TYPE_ARM_VGIC_V2;
2278                 addr_ptr = &vgic->vgic_cpu_base;
2279                 block_size = KVM_VGIC_V2_CPU_SIZE;
2280                 alignment = SZ_4K;
2281                 break;
2282 #ifdef CONFIG_ARM_GIC_V3
2283         case KVM_VGIC_V3_ADDR_TYPE_DIST:
2284                 type_needed = KVM_DEV_TYPE_ARM_VGIC_V3;
2285                 addr_ptr = &vgic->vgic_dist_base;
2286                 block_size = KVM_VGIC_V3_DIST_SIZE;
2287                 alignment = SZ_64K;
2288                 break;
2289         case KVM_VGIC_V3_ADDR_TYPE_REDIST:
2290                 type_needed = KVM_DEV_TYPE_ARM_VGIC_V3;
2291                 addr_ptr = &vgic->vgic_redist_base;
2292                 block_size = KVM_VGIC_V3_REDIST_SIZE;
2293                 alignment = SZ_64K;
2294                 break;
2295 #endif
2296         default:
2297                 r = -ENODEV;
2298                 goto out;
2299         }
2300
2301         if (vgic->vgic_model != type_needed) {
2302                 r = -ENODEV;
2303                 goto out;
2304         }
2305
2306         if (write) {
2307                 if (!IS_ALIGNED(*addr, alignment))
2308                         r = -EINVAL;
2309                 else
2310                         r = vgic_ioaddr_assign(kvm, addr_ptr, *addr,
2311                                                block_size);
2312         } else {
2313                 *addr = *addr_ptr;
2314         }
2315
2316 out:
2317         mutex_unlock(&kvm->lock);
2318         return r;
2319 }
2320
2321 int vgic_set_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
2322 {
2323         int r;
2324
2325         switch (attr->group) {
2326         case KVM_DEV_ARM_VGIC_GRP_ADDR: {
2327                 u64 __user *uaddr = (u64 __user *)(long)attr->addr;
2328                 u64 addr;
2329                 unsigned long type = (unsigned long)attr->attr;
2330
2331                 if (copy_from_user(&addr, uaddr, sizeof(addr)))
2332                         return -EFAULT;
2333
2334                 r = kvm_vgic_addr(dev->kvm, type, &addr, true);
2335                 return (r == -ENODEV) ? -ENXIO : r;
2336         }
2337         case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
2338                 u32 __user *uaddr = (u32 __user *)(long)attr->addr;
2339                 u32 val;
2340                 int ret = 0;
2341
2342                 if (get_user(val, uaddr))
2343                         return -EFAULT;
2344
2345                 /*
2346                  * We require:
2347                  * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs
2348                  * - at most 1024 interrupts
2349                  * - a multiple of 32 interrupts
2350                  */
2351                 if (val < (VGIC_NR_PRIVATE_IRQS + 32) ||
2352                     val > VGIC_MAX_IRQS ||
2353                     (val & 31))
2354                         return -EINVAL;
2355
2356                 mutex_lock(&dev->kvm->lock);
2357
2358                 if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_irqs)
2359                         ret = -EBUSY;
2360                 else
2361                         dev->kvm->arch.vgic.nr_irqs = val;
2362
2363                 mutex_unlock(&dev->kvm->lock);
2364
2365                 return ret;
2366         }
2367         case KVM_DEV_ARM_VGIC_GRP_CTRL: {
2368                 switch (attr->attr) {
2369                 case KVM_DEV_ARM_VGIC_CTRL_INIT:
2370                         r = vgic_init(dev->kvm);
2371                         return r;
2372                 }
2373                 break;
2374         }
2375         }
2376
2377         return -ENXIO;
2378 }
2379
2380 int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
2381 {
2382         int r = -ENXIO;
2383
2384         switch (attr->group) {
2385         case KVM_DEV_ARM_VGIC_GRP_ADDR: {
2386                 u64 __user *uaddr = (u64 __user *)(long)attr->addr;
2387                 u64 addr;
2388                 unsigned long type = (unsigned long)attr->attr;
2389
2390                 r = kvm_vgic_addr(dev->kvm, type, &addr, false);
2391                 if (r)
2392                         return (r == -ENODEV) ? -ENXIO : r;
2393
2394                 if (copy_to_user(uaddr, &addr, sizeof(addr)))
2395                         return -EFAULT;
2396                 break;
2397         }
2398         case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
2399                 u32 __user *uaddr = (u32 __user *)(long)attr->addr;
2400
2401                 r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr);
2402                 break;
2403         }
2404
2405         }
2406
2407         return r;
2408 }
2409
2410 int vgic_has_attr_regs(const struct vgic_io_range *ranges, phys_addr_t offset)
2411 {
2412         if (vgic_find_range(ranges, 4, offset))
2413                 return 0;
2414         else
2415                 return -ENXIO;
2416 }
2417
2418 static void vgic_init_maintenance_interrupt(void *info)
2419 {
2420         enable_percpu_irq(vgic->maint_irq, 0);
2421 }
2422
2423 static int vgic_cpu_notify(struct notifier_block *self,
2424                            unsigned long action, void *cpu)
2425 {
2426         switch (action) {
2427         case CPU_STARTING:
2428         case CPU_STARTING_FROZEN:
2429                 vgic_init_maintenance_interrupt(NULL);
2430                 break;
2431         case CPU_DYING:
2432         case CPU_DYING_FROZEN:
2433                 disable_percpu_irq(vgic->maint_irq);
2434                 break;
2435         }
2436
2437         return NOTIFY_OK;
2438 }
2439
2440 static struct notifier_block vgic_cpu_nb = {
2441         .notifier_call = vgic_cpu_notify,
2442 };
2443
2444 static const struct of_device_id vgic_ids[] = {
2445         { .compatible = "arm,cortex-a15-gic",   .data = vgic_v2_probe, },
2446         { .compatible = "arm,cortex-a7-gic",    .data = vgic_v2_probe, },
2447         { .compatible = "arm,gic-400",          .data = vgic_v2_probe, },
2448         { .compatible = "arm,gic-v3",           .data = vgic_v3_probe, },
2449         {},
2450 };
2451
2452 int kvm_vgic_hyp_init(void)
2453 {
2454         const struct of_device_id *matched_id;
2455         const int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
2456                                 const struct vgic_params **);
2457         struct device_node *vgic_node;
2458         int ret;
2459
2460         vgic_node = of_find_matching_node_and_match(NULL,
2461                                                     vgic_ids, &matched_id);
2462         if (!vgic_node) {
2463                 kvm_err("error: no compatible GIC node found\n");
2464                 return -ENODEV;
2465         }
2466
2467         vgic_probe = matched_id->data;
2468         ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
2469         if (ret)
2470                 return ret;
2471
2472         ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
2473                                  "vgic", kvm_get_running_vcpus());
2474         if (ret) {
2475                 kvm_err("Cannot register interrupt %d\n", vgic->maint_irq);
2476                 return ret;
2477         }
2478
2479         ret = __register_cpu_notifier(&vgic_cpu_nb);
2480         if (ret) {
2481                 kvm_err("Cannot register vgic CPU notifier\n");
2482                 goto out_free_irq;
2483         }
2484
2485         on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
2486
2487         return 0;
2488
2489 out_free_irq:
2490         free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
2491         return ret;
2492 }
2493
2494 int kvm_irq_map_gsi(struct kvm *kvm,
2495                     struct kvm_kernel_irq_routing_entry *entries,
2496                     int gsi)
2497 {
2498         return 0;
2499 }
2500
2501 int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
2502 {
2503         return pin;
2504 }
2505
2506 int kvm_set_irq(struct kvm *kvm, int irq_source_id,
2507                 u32 irq, int level, bool line_status)
2508 {
2509         unsigned int spi = irq + VGIC_NR_PRIVATE_IRQS;
2510
2511         trace_kvm_set_irq(irq, level, irq_source_id);
2512
2513         BUG_ON(!vgic_initialized(kvm));
2514
2515         return kvm_vgic_inject_irq(kvm, 0, spi, level);
2516 }
2517
2518 /* MSI not implemented yet */
2519 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
2520                 struct kvm *kvm, int irq_source_id,
2521                 int level, bool line_status)
2522 {
2523         return 0;
2524 }