From 663594aafb438f8c4e51d4bf2dbf48b9f68aedb7 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Thu, 19 Mar 2026 15:51:01 +0000 Subject: KVM: arm64: vgic: Split out mapping IRQs and setting irq_ops Prior to this change, the act of mapping a virtual IRQ to a physical one also set the irq_ops. Unmapping then reset the irq_ops to NULL. So far, this has been fine and hasn't caused any major issues. Now, however, as GICv5 support is being added to KVM, it has become apparent that conflating mapping/unmapping IRQs and setting/clearing irq_ops can cause issues. The reason is that the upcoming GICv5 support introduces a set of default irq_ops for PPIs, and removing this when unmapping will cause things to break rather horribly. Split out the mapping/unmapping of IRQs from the setting/clearing of irq_ops. The arch timer code is updated to set the irq_ops following a successful map. The irq_ops are intentionally not removed again on an unmap as the only irq_op introduced by the arch timer only takes effect if the hw bit in struct vgic_irq is set. Therefore, it is safe to leave this in place, and it avoids additional complexity when GICv5 support is introduced. Signed-off-by: Sascha Bischoff Link: https://patch.msgid.link/20260319154937.3619520-6-sascha.bischoff@arm.com Signed-off-by: Marc Zyngier --- include/kvm/arm_vgic.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index f2eafc65bbf4..46262d1433bc 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -397,8 +397,11 @@ void kvm_vgic_init_cpu_hardware(void); int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, unsigned int intid, bool level, void *owner); +void kvm_vgic_set_irq_ops(struct kvm_vcpu *vcpu, u32 vintid, + struct irq_ops *ops); +void kvm_vgic_clear_irq_ops(struct kvm_vcpu *vcpu, u32 vintid); int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, - u32 vintid, struct irq_ops *ops); + u32 vintid); int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid); int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid); bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid); -- cgit v1.2.3 From c547c51ff4d44c787330506737c5ce7808e536cc Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Thu, 19 Mar 2026 15:51:47 +0000 Subject: KVM: arm64: gic-v5: Add ARM_VGIC_V5 device to KVM headers This is the base GICv5 device which is to be used with the KVM_CREATE_DEVICE ioctl to create a GICv5-based vgic. Signed-off-by: Sascha Bischoff Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20260319154937.3619520-9-sascha.bischoff@arm.com Signed-off-by: Marc Zyngier --- include/uapi/linux/kvm.h | 2 ++ tools/include/uapi/linux/kvm.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 80364d4dbebb..d0c0c8605976 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1224,6 +1224,8 @@ enum kvm_device_type { #define KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_LOONGARCH_PCHPIC, #define KVM_DEV_TYPE_LOONGARCH_PCHPIC KVM_DEV_TYPE_LOONGARCH_PCHPIC + KVM_DEV_TYPE_ARM_VGIC_V5, +#define KVM_DEV_TYPE_ARM_VGIC_V5 KVM_DEV_TYPE_ARM_VGIC_V5 KVM_DEV_TYPE_MAX, diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 65500f5db379..713e4360eca0 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1220,6 +1220,8 @@ enum kvm_device_type { #define KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_LOONGARCH_PCHPIC, #define KVM_DEV_TYPE_LOONGARCH_PCHPIC KVM_DEV_TYPE_LOONGARCH_PCHPIC + KVM_DEV_TYPE_ARM_VGIC_V5, +#define KVM_DEV_TYPE_ARM_VGIC_V5 KVM_DEV_TYPE_ARM_VGIC_V5 KVM_DEV_TYPE_MAX, -- cgit v1.2.3 From eb8bce08ecb12fa0e76af23432f1adb162248ca6 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Thu, 19 Mar 2026 15:52:03 +0000 Subject: KVM: arm64: gic: Introduce interrupt type helpers GICv5 has moved from using interrupt ranges for different interrupt types to using some of the upper bits of the interrupt ID to denote the interrupt type. This is not compatible with older GICs (which rely on ranges of interrupts to determine the type), and hence a set of helpers is introduced. These helpers take a struct kvm*, and use the vgic model to determine how to interpret the interrupt ID. Helpers are introduced for PPIs, SPIs, and LPIs. Additionally, a helper is introduced to determine if an interrupt is private - SGIs and PPIs for older GICs, and PPIs only for GICv5. Additionally, vgic_is_v5() is introduced (which unsurpisingly returns true when running a GICv5 guest), and the existing vgic_is_v3() check is moved from vgic.h to arm_vgic.h (to live alongside the vgic_is_v5() one), and has been converted into a macro. The helpers are plumbed into the core vgic code, as well as the Arch Timer and PMU code. There should be no functional changes as part of this change. Signed-off-by: Sascha Bischoff Reviewed-by: Joey Gouly Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20260319154937.3619520-10-sascha.bischoff@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arch_timer.c | 2 +- arch/arm64/kvm/pmu-emul.c | 7 ++- arch/arm64/kvm/vgic/vgic-kvm-device.c | 2 +- arch/arm64/kvm/vgic/vgic.c | 14 ++--- arch/arm64/kvm/vgic/vgic.h | 5 -- include/kvm/arm_vgic.h | 102 ++++++++++++++++++++++++++++++++-- 6 files changed, 110 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index d31bc1e7a13c..92870ee6dacd 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -1603,7 +1603,7 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) if (get_user(irq, uaddr)) return -EFAULT; - if (!(irq_is_ppi(irq))) + if (!(irq_is_ppi(vcpu->kvm, irq))) return -EINVAL; mutex_lock(&vcpu->kvm->arch.config_lock); diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 93cc9bbb5cec..41a3c5dc2bca 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -939,7 +939,8 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) * number against the dimensions of the vgic and make sure * it's valid. */ - if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq)) + if (!irq_is_ppi(vcpu->kvm, irq) && + !vgic_valid_spi(vcpu->kvm, irq)) return -EINVAL; } else if (kvm_arm_pmu_irq_initialized(vcpu)) { return -EINVAL; @@ -991,7 +992,7 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq) if (!kvm_arm_pmu_irq_initialized(vcpu)) continue; - if (irq_is_ppi(irq)) { + if (irq_is_ppi(vcpu->kvm, irq)) { if (vcpu->arch.pmu.irq_num != irq) return false; } else { @@ -1142,7 +1143,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) return -EFAULT; /* The PMU overflow interrupt can be a PPI or a valid SPI. */ - if (!(irq_is_ppi(irq) || irq_is_spi(irq))) + if (!(irq_is_ppi(vcpu->kvm, irq) || irq_is_spi(vcpu->kvm, irq))) return -EINVAL; if (!pmu_irq_is_valid(kvm, irq)) diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index 3d1a776b716d..b12ba99a423e 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -639,7 +639,7 @@ static int vgic_v3_set_attr(struct kvm_device *dev, if (vgic_initialized(dev->kvm)) return -EBUSY; - if (!irq_is_ppi(val)) + if (!irq_is_ppi(dev->kvm, val)) return -EINVAL; dev->kvm->arch.vgic.mi_intid = val; diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 9e0d26348a2a..2f3f892cbddc 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -94,7 +94,7 @@ struct vgic_irq *vgic_get_irq(struct kvm *kvm, u32 intid) } /* LPIs */ - if (intid >= VGIC_MIN_LPI) + if (irq_is_lpi(kvm, intid)) return vgic_get_lpi(kvm, intid); return NULL; @@ -123,7 +123,7 @@ static void vgic_release_lpi_locked(struct vgic_dist *dist, struct vgic_irq *irq static __must_check bool __vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) { - if (irq->intid < VGIC_MIN_LPI) + if (!irq_is_lpi(kvm, irq->intid)) return false; return refcount_dec_and_test(&irq->refcount); @@ -148,7 +148,7 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) * Acquire/release it early on lockdep kernels to make locking issues * in rare release paths a bit more obvious. */ - if (IS_ENABLED(CONFIG_LOCKDEP) && irq->intid >= VGIC_MIN_LPI) { + if (IS_ENABLED(CONFIG_LOCKDEP) && irq_is_lpi(kvm, irq->intid)) { guard(spinlock_irqsave)(&dist->lpi_xa.xa_lock); } @@ -186,7 +186,7 @@ void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu) raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags); list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) { - if (irq->intid >= VGIC_MIN_LPI) { + if (irq_is_lpi(vcpu->kvm, irq->intid)) { raw_spin_lock(&irq->irq_lock); list_del(&irq->ap_list); irq->vcpu = NULL; @@ -521,12 +521,12 @@ int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, if (ret) return ret; - if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS) + if (!vcpu && irq_is_private(kvm, intid)) return -EINVAL; trace_vgic_update_irq_pending(vcpu ? vcpu->vcpu_idx : 0, intid, level); - if (intid < VGIC_NR_PRIVATE_IRQS) + if (irq_is_private(kvm, intid)) irq = vgic_get_vcpu_irq(vcpu, intid); else irq = vgic_get_irq(kvm, intid); @@ -700,7 +700,7 @@ int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner) return -EAGAIN; /* SGIs and LPIs cannot be wired up to any device */ - if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid)) + if (!irq_is_ppi(vcpu->kvm, intid) && !vgic_valid_spi(vcpu->kvm, intid)) return -EINVAL; irq = vgic_get_vcpu_irq(vcpu, intid); diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 0bb8fa10bb4e..f2924f821197 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -454,11 +454,6 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu); void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu); void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu); -static inline bool vgic_is_v3(struct kvm *kvm) -{ - return kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3; -} - static inline bool vgic_host_has_gicv3(void) { /* diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 46262d1433bc..b8011b395796 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -19,6 +19,7 @@ #include #include +#include #define VGIC_V3_MAX_CPUS 512 #define VGIC_V2_MAX_CPUS 8 @@ -31,9 +32,88 @@ #define VGIC_MIN_LPI 8192 #define KVM_IRQCHIP_NUM_PINS (1020 - 32) -#define irq_is_ppi(irq) ((irq) >= VGIC_NR_SGIS && (irq) < VGIC_NR_PRIVATE_IRQS) -#define irq_is_spi(irq) ((irq) >= VGIC_NR_PRIVATE_IRQS && \ - (irq) <= VGIC_MAX_SPI) +#define is_v5_type(t, i) (FIELD_GET(GICV5_HWIRQ_TYPE, (i)) == (t)) + +#define __irq_is_sgi(t, i) \ + ({ \ + bool __ret; \ + \ + switch (t) { \ + case KVM_DEV_TYPE_ARM_VGIC_V5: \ + __ret = false; \ + break; \ + default: \ + __ret = (i) < VGIC_NR_SGIS; \ + } \ + \ + __ret; \ + }) + +#define __irq_is_ppi(t, i) \ + ({ \ + bool __ret; \ + \ + switch (t) { \ + case KVM_DEV_TYPE_ARM_VGIC_V5: \ + __ret = is_v5_type(GICV5_HWIRQ_TYPE_PPI, (i)); \ + break; \ + default: \ + __ret = (i) >= VGIC_NR_SGIS; \ + __ret &= (i) < VGIC_NR_PRIVATE_IRQS; \ + } \ + \ + __ret; \ + }) + +#define __irq_is_spi(t, i) \ + ({ \ + bool __ret; \ + \ + switch (t) { \ + case KVM_DEV_TYPE_ARM_VGIC_V5: \ + __ret = is_v5_type(GICV5_HWIRQ_TYPE_SPI, (i)); \ + break; \ + default: \ + __ret = (i) <= VGIC_MAX_SPI; \ + __ret &= (i) >= VGIC_NR_PRIVATE_IRQS; \ + } \ + \ + __ret; \ + }) + +#define __irq_is_lpi(t, i) \ + ({ \ + bool __ret; \ + \ + switch (t) { \ + case KVM_DEV_TYPE_ARM_VGIC_V5: \ + __ret = is_v5_type(GICV5_HWIRQ_TYPE_LPI, (i)); \ + break; \ + default: \ + __ret = (i) >= 8192; \ + } \ + \ + __ret; \ + }) + +#define irq_is_sgi(k, i) __irq_is_sgi((k)->arch.vgic.vgic_model, i) +#define irq_is_ppi(k, i) __irq_is_ppi((k)->arch.vgic.vgic_model, i) +#define irq_is_spi(k, i) __irq_is_spi((k)->arch.vgic.vgic_model, i) +#define irq_is_lpi(k, i) __irq_is_lpi((k)->arch.vgic.vgic_model, i) + +#define irq_is_private(k, i) (irq_is_ppi(k, i) || irq_is_sgi(k, i)) + +#define vgic_v5_get_hwirq_id(x) FIELD_GET(GICV5_HWIRQ_ID, (x)) +#define vgic_v5_set_hwirq_id(x) FIELD_PREP(GICV5_HWIRQ_ID, (x)) + +#define __vgic_v5_set_type(t) (FIELD_PREP(GICV5_HWIRQ_TYPE, GICV5_HWIRQ_TYPE_##t)) +#define vgic_v5_make_ppi(x) (__vgic_v5_set_type(PPI) | vgic_v5_set_hwirq_id(x)) +#define vgic_v5_make_spi(x) (__vgic_v5_set_type(SPI) | vgic_v5_set_hwirq_id(x)) +#define vgic_v5_make_lpi(x) (__vgic_v5_set_type(LPI) | vgic_v5_set_hwirq_id(x)) + +#define __vgic_is_v(k, v) ((k)->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V##v) +#define vgic_is_v3(k) (__vgic_is_v(k, 3)) +#define vgic_is_v5(k) (__vgic_is_v(k, 5)) enum vgic_type { VGIC_V2, /* Good ol' GICv2 */ @@ -417,8 +497,20 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu); #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) #define vgic_initialized(k) ((k)->arch.vgic.initialized) -#define vgic_valid_spi(k, i) (((i) >= VGIC_NR_PRIVATE_IRQS) && \ - ((i) < (k)->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) +#define vgic_valid_spi(k, i) \ + ({ \ + bool __ret = irq_is_spi(k, i); \ + \ + switch ((k)->arch.vgic.vgic_model) { \ + case KVM_DEV_TYPE_ARM_VGIC_V5: \ + __ret &= FIELD_GET(GICV5_HWIRQ_ID, i) < (k)->arch.vgic.nr_spis; \ + break; \ + default: \ + __ret &= (i) < ((k)->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS); \ + } \ + \ + __ret; \ + }) bool kvm_vcpu_has_pending_irqs(struct kvm_vcpu *vcpu); void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From f656807150e3e1c6f76cab918e5adfad6d881d58 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Thu, 19 Mar 2026 15:52:34 +0000 Subject: KVM: arm64: gic-v5: Detect implemented PPIs on boot As part of booting the system and initialising KVM, create and populate a mask of the implemented PPIs. This mask allows future PPI operations (such as save/restore or state, or syncing back into the shadow state) to only consider PPIs that are actually implemented on the host. The set of implemented virtual PPIs matches the set of implemented physical PPIs for a GICv5 host. Therefore, this mask represents all PPIs that could ever by used by a GICv5-based guest on a specific host, albeit pre-filtered by what we support in KVM (see next paragraph). Only architected PPIs are currently supported in KVM with GICv5. Moreover, as KVM only supports a subset of all possible PPIS (Timers, PMU, GICv5 SW_PPI) the PPI mask only includes these PPIs, if present. The timers are always assumed to be present; if we have KVM we have EL2, which means that we have the EL1 & EL2 Timer PPIs. If we have a PMU (v3), then the PMUIRQ is present. The GICv5 SW_PPI is always assumed to be present. Signed-off-by: Sascha Bischoff Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20260319154937.3619520-12-sascha.bischoff@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-v5.c | 31 +++++++++++++++++++++++++++++++ include/kvm/arm_vgic.h | 13 +++++++++++++ include/linux/irqchip/arm-gic-v5.h | 22 ++++++++++++++++++++++ 3 files changed, 66 insertions(+) (limited to 'include') diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c index 9d9aa5774e63..cf8382a954bb 100644 --- a/arch/arm64/kvm/vgic/vgic-v5.c +++ b/arch/arm64/kvm/vgic/vgic-v5.c @@ -4,10 +4,39 @@ */ #include + +#include #include #include "vgic.h" +static struct vgic_v5_ppi_caps ppi_caps; + +/* + * Not all PPIs are guaranteed to be implemented for GICv5. Deterermine which + * ones are, and generate a mask. + */ +static void vgic_v5_get_implemented_ppis(void) +{ + if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF)) + return; + + /* + * If we have KVM, we have EL2, which means that we have support for the + * EL1 and EL2 Physical & Virtual timers. + */ + __assign_bit(GICV5_ARCH_PPI_CNTHP, ppi_caps.impl_ppi_mask, 1); + __assign_bit(GICV5_ARCH_PPI_CNTV, ppi_caps.impl_ppi_mask, 1); + __assign_bit(GICV5_ARCH_PPI_CNTHV, ppi_caps.impl_ppi_mask, 1); + __assign_bit(GICV5_ARCH_PPI_CNTP, ppi_caps.impl_ppi_mask, 1); + + /* The SW_PPI should be available */ + __assign_bit(GICV5_ARCH_PPI_SW_PPI, ppi_caps.impl_ppi_mask, 1); + + /* The PMUIRQ is available if we have the PMU */ + __assign_bit(GICV5_ARCH_PPI_PMUIRQ, ppi_caps.impl_ppi_mask, system_supports_pmuv3()); +} + /* * Probe for a vGICv5 compatible interrupt controller, returning 0 on success. * Currently only supports GICv3-based VMs on a GICv5 host, and hence only @@ -18,6 +47,8 @@ int vgic_v5_probe(const struct gic_kvm_info *info) u64 ich_vtr_el2; int ret; + vgic_v5_get_implemented_ppis(); + if (!cpus_have_final_cap(ARM64_HAS_GICV5_LEGACY)) return -ENODEV; diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index b8011b395796..0fabeabedd6d 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -32,6 +32,14 @@ #define VGIC_MIN_LPI 8192 #define KVM_IRQCHIP_NUM_PINS (1020 - 32) +/* + * GICv5 supports 128 PPIs, but only the first 64 are architected. We only + * support the timers and PMU in KVM, both of which are architected. Rather than + * handling twice the state, we instead opt to only support the architected set + * in KVM for now. At a future stage, this can be bumped up to 128, if required. + */ +#define VGIC_V5_NR_PRIVATE_IRQS 64 + #define is_v5_type(t, i) (FIELD_GET(GICV5_HWIRQ_TYPE, (i)) == (t)) #define __irq_is_sgi(t, i) \ @@ -420,6 +428,11 @@ struct vgic_v3_cpu_if { unsigned int used_lrs; }; +/* What PPI capabilities does a GICv5 host have */ +struct vgic_v5_ppi_caps { + DECLARE_BITMAP(impl_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS); +}; + struct vgic_cpu { /* CPU vif control registers for world switch */ union { diff --git a/include/linux/irqchip/arm-gic-v5.h b/include/linux/irqchip/arm-gic-v5.h index b78488df6c98..b1566a7c93ec 100644 --- a/include/linux/irqchip/arm-gic-v5.h +++ b/include/linux/irqchip/arm-gic-v5.h @@ -24,6 +24,28 @@ #define GICV5_HWIRQ_TYPE_LPI UL(0x2) #define GICV5_HWIRQ_TYPE_SPI UL(0x3) +/* + * Architected PPIs + */ +#define GICV5_ARCH_PPI_S_DB_PPI 0x0 +#define GICV5_ARCH_PPI_RL_DB_PPI 0x1 +#define GICV5_ARCH_PPI_NS_DB_PPI 0x2 +#define GICV5_ARCH_PPI_SW_PPI 0x3 +#define GICV5_ARCH_PPI_HACDBSIRQ 0xf +#define GICV5_ARCH_PPI_CNTHVS 0x13 +#define GICV5_ARCH_PPI_CNTHPS 0x14 +#define GICV5_ARCH_PPI_PMBIRQ 0x15 +#define GICV5_ARCH_PPI_COMMIRQ 0x16 +#define GICV5_ARCH_PPI_PMUIRQ 0x17 +#define GICV5_ARCH_PPI_CTIIRQ 0x18 +#define GICV5_ARCH_PPI_GICMNT 0x19 +#define GICV5_ARCH_PPI_CNTHP 0x1a +#define GICV5_ARCH_PPI_CNTV 0x1b +#define GICV5_ARCH_PPI_CNTHV 0x1c +#define GICV5_ARCH_PPI_CNTPS 0x1d +#define GICV5_ARCH_PPI_CNTP 0x1e +#define GICV5_ARCH_PPI_TRBIRQ 0x1f + /* * Tables attributes */ -- cgit v1.2.3 From a258a383b91774ac646517ec1003a442964d8946 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Thu, 19 Mar 2026 15:52:50 +0000 Subject: KVM: arm64: gic-v5: Sanitize ID_AA64PFR2_EL1.GCIE Add in a sanitization function for ID_AA64PFR2_EL1, preserving the already-present behaviour for the FPMR, MTEFAR, and MTESTOREONLY fields. Add sanitisation for the GCIE field, which is set to IMP if the host supports a GICv5 guest and NI, otherwise. Extend the sanitisation that takes place in kvm_vgic_create() to zero the ID_AA64PFR2.GCIE field when a non-GICv5 GIC is created. More importantly, move this sanitisation to a separate function, kvm_vgic_finalize_sysregs(), and call it from kvm_finalize_sys_regs(). We are required to finalize the GIC and GCIE fields a second time in kvm_finalize_sys_regs() due to how QEMU blindly reads out then verbatim restores the system register state. This avoids the issue where both the GCIE and GIC features are marked as present (an architecturally invalid combination), and hence guests fall over. See the comment in kvm_finalize_sys_regs() for more details. Overall, the following happens: * Before an irqchip is created, FEAT_GCIE is presented if the host supports GICv5-based guests. * Once an irqchip is created, all other supported irqchips are hidden from the guest; system register state reflects the guest's irqchip. * Userspace is allowed to set invalid irqchip feature combinations in the system registers, but... * ...invalid combinations are removed a second time prior to the first run of the guest, and things hopefully just work. All of this extra work is required to make sure that "legacy" GICv3 guests based on QEMU transparently work on compatible GICv5 hosts without modification. Signed-off-by: Sascha Bischoff Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20260319154937.3619520-13-sascha.bischoff@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 70 ++++++++++++++++++++++++++++++++++++----- arch/arm64/kvm/vgic/vgic-init.c | 49 ++++++++++++++++++++--------- include/kvm/arm_vgic.h | 1 + 3 files changed, 98 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 42c84b7900ff..140cf35f4eeb 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1758,6 +1758,7 @@ static u8 pmuver_to_perfmon(u8 pmuver) static u64 sanitise_id_aa64pfr0_el1(const struct kvm_vcpu *vcpu, u64 val); static u64 sanitise_id_aa64pfr1_el1(const struct kvm_vcpu *vcpu, u64 val); +static u64 sanitise_id_aa64pfr2_el1(const struct kvm_vcpu *vcpu, u64 val); static u64 sanitise_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val); /* Read a sanitised cpufeature ID register by sys_reg_desc */ @@ -1783,10 +1784,7 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, val = sanitise_id_aa64pfr1_el1(vcpu, val); break; case SYS_ID_AA64PFR2_EL1: - val &= ID_AA64PFR2_EL1_FPMR | - (kvm_has_mte(vcpu->kvm) ? - ID_AA64PFR2_EL1_MTEFAR | ID_AA64PFR2_EL1_MTESTOREONLY : - 0); + val = sanitise_id_aa64pfr2_el1(vcpu, val); break; case SYS_ID_AA64ISAR1_EL1: if (!vcpu_has_ptrauth(vcpu)) @@ -2027,6 +2025,23 @@ static u64 sanitise_id_aa64pfr1_el1(const struct kvm_vcpu *vcpu, u64 val) return val; } +static u64 sanitise_id_aa64pfr2_el1(const struct kvm_vcpu *vcpu, u64 val) +{ + val &= ID_AA64PFR2_EL1_FPMR | + ID_AA64PFR2_EL1_MTEFAR | + ID_AA64PFR2_EL1_MTESTOREONLY; + + if (!kvm_has_mte(vcpu->kvm)) { + val &= ~ID_AA64PFR2_EL1_MTEFAR; + val &= ~ID_AA64PFR2_EL1_MTESTOREONLY; + } + + if (vgic_host_has_gicv5()) + val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR2_EL1, GCIE, IMP); + + return val; +} + static u64 sanitise_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val) { val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64DFR0_EL1, DebugVer, V8P8); @@ -2216,6 +2231,12 @@ static int set_id_aa64pfr1_el1(struct kvm_vcpu *vcpu, return set_id_reg(vcpu, rd, user_val); } +static int set_id_aa64pfr2_el1(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, u64 user_val) +{ + return set_id_reg(vcpu, rd, user_val); +} + /* * Allow userspace to de-feature a stage-2 translation granule but prevent it * from claiming the impossible. @@ -3197,10 +3218,11 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_AA64PFR1_EL1_RES0 | ID_AA64PFR1_EL1_MPAM_frac | ID_AA64PFR1_EL1_MTE)), - ID_WRITABLE(ID_AA64PFR2_EL1, - ID_AA64PFR2_EL1_FPMR | - ID_AA64PFR2_EL1_MTEFAR | - ID_AA64PFR2_EL1_MTESTOREONLY), + ID_FILTERED(ID_AA64PFR2_EL1, id_aa64pfr2_el1, + ~(ID_AA64PFR2_EL1_FPMR | + ID_AA64PFR2_EL1_MTEFAR | + ID_AA64PFR2_EL1_MTESTOREONLY | + ID_AA64PFR2_EL1_GCIE)), ID_UNALLOCATED(4,3), ID_WRITABLE(ID_AA64ZFR0_EL1, ~ID_AA64ZFR0_EL1_RES0), ID_HIDDEN(ID_AA64SMFR0_EL1), @@ -5671,8 +5693,40 @@ int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu) val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC; kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, val); + val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR2_EL1) & ~ID_AA64PFR2_EL1_GCIE; + kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR2_EL1, val); val = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC; kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, val); + } else { + /* + * Certain userspace software - QEMU - samples the system + * register state without creating an irqchip, then blindly + * restores the state prior to running the final guest. This + * means that it restores the virtualization & emulation + * capabilities of the host system, rather than something that + * reflects the final guest state. Moreover, it checks that the + * state was "correctly" restored (i.e., verbatim), bailing if + * it isn't, so masking off invalid state isn't an option. + * + * On GICv5 hardware that supports FEAT_GCIE_LEGACY we can run + * both GICv3- and GICv5-based guests. Therefore, we initially + * present both ID_AA64PFR0.GIC and ID_AA64PFR2.GCIE as IMP to + * reflect that userspace can create EITHER a vGICv3 or a + * vGICv5. This is an architecturally invalid combination, of + * course. Once an in-kernel GIC is created, the sysreg state is + * updated to reflect the actual, valid configuration. + * + * Setting both the GIC and GCIE features to IMP unsurprisingly + * results in guests falling over, and hence we need to fix up + * this mess in KVM. Before running for the first time we yet + * again ensure that the GIC and GCIE fields accurately reflect + * the actual hardware the guest should see. + * + * This hack allows legacy QEMU-based GICv3 guests to run + * unmodified on compatible GICv5 hosts, and avoids the inverse + * problem for GICv5-based guests in the future. + */ + kvm_vgic_finalize_idregs(kvm); } if (vcpu_has_nv(vcpu)) { diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index e9b8b5fc480c..e1be9c5ada7b 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -71,7 +71,6 @@ static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type); int kvm_vgic_create(struct kvm *kvm, u32 type) { struct kvm_vcpu *vcpu; - u64 aa64pfr0, pfr1; unsigned long i; int ret; @@ -145,19 +144,11 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) kvm->arch.vgic.implementation_rev = KVM_VGIC_IMP_REV_LATEST; kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; - aa64pfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC; - pfr1 = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC; - - if (type == KVM_DEV_TYPE_ARM_VGIC_V2) { - kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; - } else { - INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions); - aa64pfr0 |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, GIC, IMP); - pfr1 |= SYS_FIELD_PREP_ENUM(ID_PFR1_EL1, GIC, GICv3); - } - - kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, aa64pfr0); - kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, pfr1); + /* + * We've now created the GIC. Update the system register state + * to accurately reflect what we've created. + */ + kvm_vgic_finalize_idregs(kvm); kvm_for_each_vcpu(i, vcpu, kvm) { ret = vgic_allocate_private_irqs_locked(vcpu, type); @@ -617,6 +608,36 @@ out_slots: return ret; } +void kvm_vgic_finalize_idregs(struct kvm *kvm) +{ + u32 type = kvm->arch.vgic.vgic_model; + u64 aa64pfr0, aa64pfr2, pfr1; + + aa64pfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC; + aa64pfr2 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR2_EL1) & ~ID_AA64PFR2_EL1_GCIE; + pfr1 = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC; + + switch (type) { + case KVM_DEV_TYPE_ARM_VGIC_V2: + kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; + break; + case KVM_DEV_TYPE_ARM_VGIC_V3: + INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions); + aa64pfr0 |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, GIC, IMP); + pfr1 |= SYS_FIELD_PREP_ENUM(ID_PFR1_EL1, GIC, GICv3); + break; + case KVM_DEV_TYPE_ARM_VGIC_V5: + aa64pfr2 |= SYS_FIELD_PREP_ENUM(ID_AA64PFR2_EL1, GCIE, IMP); + break; + default: + WARN_ONCE(1, "Unknown VGIC type!!!\n"); + } + + kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, aa64pfr0); + kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR2_EL1, aa64pfr2); + kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, pfr1); +} + /* GENERIC PROBE */ void kvm_vgic_cpu_up(void) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 0fabeabedd6d..24969fa8d02d 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -485,6 +485,7 @@ int kvm_vgic_create(struct kvm *kvm, u32 type); void kvm_vgic_destroy(struct kvm *kvm); void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); int kvm_vgic_map_resources(struct kvm *kvm); +void kvm_vgic_finalize_idregs(struct kvm *kvm); int kvm_vgic_hyp_init(void); void kvm_vgic_init_cpu_hardware(void); -- cgit v1.2.3 From af325e87af5da2f686d1ad547edc96f731418f2a Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Thu, 19 Mar 2026 15:53:52 +0000 Subject: KVM: arm64: gic-v5: Add vgic-v5 save/restore hyp interface Introduce the following hyp functions to save/restore GICv5 state: * __vgic_v5_save_apr() * __vgic_v5_restore_vmcr_apr() * __vgic_v5_save_ppi_state() - no hypercall required * __vgic_v5_restore_ppi_state() - no hypercall required * __vgic_v5_save_state() - no hypercall required * __vgic_v5_restore_state() - no hypercall required Note that the functions tagged as not requiring hypercalls are always called directly from the same context. They are either called via the vgic_save_state()/vgic_restore_state() path when running with VHE, or via __hyp_vgic_save_state()/__hyp_vgic_restore_state() otherwise. This mimics how vgic_v3_save_state()/vgic_v3_restore_state() are implemented. Overall, the state of the following registers is saved/restored: * ICC_ICSR_EL1 * ICH_APR_EL2 * ICH_PPI_ACTIVERx_EL2 * ICH_PPI_DVIRx_EL2 * ICH_PPI_ENABLERx_EL2 * ICH_PPI_PENDRx_EL2 * ICH_PPI_PRIORITYRx_EL2 * ICH_VMCR_EL2 All of these are saved/restored to/from the KVM vgic_v5 CPUIF shadow state, with the exception of the PPI active, pending, and enable state. The pending state is saved and restored from kvm_host_data as any changes here need to be tracked and propagated back to the vgic_irq shadow structures (coming in a future commit). Therefore, an entry and an exit copy is required. The active and enable state is restored from the vgic_v5 CPUIF, but is saved to kvm_host_data. Again, this needs to by synced back into the shadow data structures. The ICSR must be save/restored as this register is shared between host and guest. Therefore, to avoid leaking host state to the guest, this must be saved and restored. Moreover, as this can by used by the host at any time, it must be save/restored eagerly. Note: the host state is not preserved as the host should only use this register when preemption is disabled. As with GICv3, the VMCR is eagerly saved as this is required when checking if interrupts can be injected or not, and therefore impacts things such as WFI. As part of restoring the ICH_VMCR_EL2 and ICH_APR_EL2, GICv3-compat mode is also disabled by setting the ICH_VCTLR_EL2.V3 bit to 0. The correspoinding GICv3-compat mode enable is part of the VMCR & APR restore for a GICv3 guest as it only takes effect when actually running a guest. Co-authored-by: Timothy Hayes Signed-off-by: Timothy Hayes Signed-off-by: Sascha Bischoff Link: https://patch.msgid.link/20260319154937.3619520-17-sascha.bischoff@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_asm.h | 2 + arch/arm64/include/asm/kvm_host.h | 16 ++++ arch/arm64/include/asm/kvm_hyp.h | 9 ++ arch/arm64/kvm/hyp/nvhe/Makefile | 2 +- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 16 ++++ arch/arm64/kvm/hyp/vgic-v5-sr.c | 170 +++++++++++++++++++++++++++++++++++++ arch/arm64/kvm/hyp/vhe/Makefile | 2 +- include/kvm/arm_vgic.h | 22 +++++ 8 files changed, 237 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/kvm/hyp/vgic-v5-sr.c (limited to 'include') diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index a1ad12c72ebf..44e4696ca113 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -89,6 +89,8 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load, __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put, __KVM_HOST_SMCCC_FUNC___pkvm_tlb_flush_vmid, + __KVM_HOST_SMCCC_FUNC___vgic_v5_save_apr, + __KVM_HOST_SMCCC_FUNC___vgic_v5_restore_vmcr_apr, }; #define DECLARE_KVM_VHE_SYM(sym) extern char sym[] diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 64a1ee6c442f..c4a172b70206 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -800,6 +800,22 @@ struct kvm_host_data { /* Last vgic_irq part of the AP list recorded in an LR */ struct vgic_irq *last_lr_irq; + + /* PPI state tracking for GICv5-based guests */ + struct { + /* + * For tracking the PPI pending state, we need both the entry + * state and exit state to correctly detect edges as it is + * possible that an interrupt has been injected in software in + * the interim. + */ + DECLARE_BITMAP(pendr_entry, VGIC_V5_NR_PRIVATE_IRQS); + DECLARE_BITMAP(pendr_exit, VGIC_V5_NR_PRIVATE_IRQS); + + /* The saved state of the regs when leaving the guest */ + DECLARE_BITMAP(activer_exit, VGIC_V5_NR_PRIVATE_IRQS); + DECLARE_BITMAP(enabler_exit, VGIC_V5_NR_PRIVATE_IRQS); + } vgic_v5_ppi_state; }; struct kvm_host_psci_config { diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 76ce2b94bd97..2d8dfd534bd9 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -87,6 +87,15 @@ void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if); void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if); int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); +/* GICv5 */ +void __vgic_v5_save_apr(struct vgic_v5_cpu_if *cpu_if); +void __vgic_v5_restore_vmcr_apr(struct vgic_v5_cpu_if *cpu_if); +/* No hypercalls for the following */ +void __vgic_v5_save_ppi_state(struct vgic_v5_cpu_if *cpu_if); +void __vgic_v5_restore_ppi_state(struct vgic_v5_cpu_if *cpu_if); +void __vgic_v5_save_state(struct vgic_v5_cpu_if *cpu_if); +void __vgic_v5_restore_state(struct vgic_v5_cpu_if *cpu_if); + #ifdef __KVM_NVHE_HYPERVISOR__ void __timer_enable_traps(struct kvm_vcpu *vcpu); void __timer_disable_traps(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index a244ec25f8c5..84a3bf96def6 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -26,7 +26,7 @@ hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \ cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ - ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o + ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o ../vgic-v5-sr.o hyp-obj-y += ../../../kernel/smccc-call.o hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o hyp-obj-y += $(lib-objs) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index e7790097db93..007fc993f231 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -589,6 +589,20 @@ static void handle___pkvm_teardown_vm(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 1) = __pkvm_teardown_vm(handle); } +static void handle___vgic_v5_save_apr(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct vgic_v5_cpu_if *, cpu_if, host_ctxt, 1); + + __vgic_v5_save_apr(kern_hyp_va(cpu_if)); +} + +static void handle___vgic_v5_restore_vmcr_apr(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct vgic_v5_cpu_if *, cpu_if, host_ctxt, 1); + + __vgic_v5_restore_vmcr_apr(kern_hyp_va(cpu_if)); +} + typedef void (*hcall_t)(struct kvm_cpu_context *); #define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x @@ -630,6 +644,8 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__pkvm_vcpu_load), HANDLE_FUNC(__pkvm_vcpu_put), HANDLE_FUNC(__pkvm_tlb_flush_vmid), + HANDLE_FUNC(__vgic_v5_save_apr), + HANDLE_FUNC(__vgic_v5_restore_vmcr_apr), }; static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) diff --git a/arch/arm64/kvm/hyp/vgic-v5-sr.c b/arch/arm64/kvm/hyp/vgic-v5-sr.c new file mode 100644 index 000000000000..f34ea219cc4e --- /dev/null +++ b/arch/arm64/kvm/hyp/vgic-v5-sr.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025, 2026 - Arm Ltd + */ + +#include + +#include + +void __vgic_v5_save_apr(struct vgic_v5_cpu_if *cpu_if) +{ + cpu_if->vgic_apr = read_sysreg_s(SYS_ICH_APR_EL2); +} + +static void __vgic_v5_compat_mode_disable(void) +{ + sysreg_clear_set_s(SYS_ICH_VCTLR_EL2, ICH_VCTLR_EL2_V3, 0); + isb(); +} + +void __vgic_v5_restore_vmcr_apr(struct vgic_v5_cpu_if *cpu_if) +{ + __vgic_v5_compat_mode_disable(); + + write_sysreg_s(cpu_if->vgic_vmcr, SYS_ICH_VMCR_EL2); + write_sysreg_s(cpu_if->vgic_apr, SYS_ICH_APR_EL2); +} + +void __vgic_v5_save_ppi_state(struct vgic_v5_cpu_if *cpu_if) +{ + /* + * The following code assumes that the bitmap storage that we have for + * PPIs is either 64 (architected PPIs, only) or 128 bits (architected & + * impdef PPIs). + */ + BUILD_BUG_ON(VGIC_V5_NR_PRIVATE_IRQS % 64); + + bitmap_write(host_data_ptr(vgic_v5_ppi_state)->activer_exit, + read_sysreg_s(SYS_ICH_PPI_ACTIVER0_EL2), 0, 64); + bitmap_write(host_data_ptr(vgic_v5_ppi_state)->enabler_exit, + read_sysreg_s(SYS_ICH_PPI_ENABLER0_EL2), 0, 64); + bitmap_write(host_data_ptr(vgic_v5_ppi_state)->pendr_exit, + read_sysreg_s(SYS_ICH_PPI_PENDR0_EL2), 0, 64); + + cpu_if->vgic_ppi_priorityr[0] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR0_EL2); + cpu_if->vgic_ppi_priorityr[1] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR1_EL2); + cpu_if->vgic_ppi_priorityr[2] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR2_EL2); + cpu_if->vgic_ppi_priorityr[3] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR3_EL2); + cpu_if->vgic_ppi_priorityr[4] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR4_EL2); + cpu_if->vgic_ppi_priorityr[5] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR5_EL2); + cpu_if->vgic_ppi_priorityr[6] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR6_EL2); + cpu_if->vgic_ppi_priorityr[7] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR7_EL2); + + if (VGIC_V5_NR_PRIVATE_IRQS == 128) { + bitmap_write(host_data_ptr(vgic_v5_ppi_state)->activer_exit, + read_sysreg_s(SYS_ICH_PPI_ACTIVER1_EL2), 64, 64); + bitmap_write(host_data_ptr(vgic_v5_ppi_state)->enabler_exit, + read_sysreg_s(SYS_ICH_PPI_ENABLER1_EL2), 64, 64); + bitmap_write(host_data_ptr(vgic_v5_ppi_state)->pendr_exit, + read_sysreg_s(SYS_ICH_PPI_PENDR1_EL2), 64, 64); + + cpu_if->vgic_ppi_priorityr[8] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR8_EL2); + cpu_if->vgic_ppi_priorityr[9] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR9_EL2); + cpu_if->vgic_ppi_priorityr[10] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR10_EL2); + cpu_if->vgic_ppi_priorityr[11] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR11_EL2); + cpu_if->vgic_ppi_priorityr[12] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR12_EL2); + cpu_if->vgic_ppi_priorityr[13] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR13_EL2); + cpu_if->vgic_ppi_priorityr[14] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR14_EL2); + cpu_if->vgic_ppi_priorityr[15] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR15_EL2); + } + + /* Now that we are done, disable DVI */ + write_sysreg_s(0, SYS_ICH_PPI_DVIR0_EL2); + write_sysreg_s(0, SYS_ICH_PPI_DVIR1_EL2); +} + +void __vgic_v5_restore_ppi_state(struct vgic_v5_cpu_if *cpu_if) +{ + DECLARE_BITMAP(pendr, VGIC_V5_NR_PRIVATE_IRQS); + + /* We assume 64 or 128 PPIs - see above comment */ + BUILD_BUG_ON(VGIC_V5_NR_PRIVATE_IRQS % 64); + + /* Enable DVI so that the guest's interrupt config takes over */ + write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_dvir, 0, 64), + SYS_ICH_PPI_DVIR0_EL2); + + write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_activer, 0, 64), + SYS_ICH_PPI_ACTIVER0_EL2); + write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_enabler, 0, 64), + SYS_ICH_PPI_ENABLER0_EL2); + + /* Update the pending state of the NON-DVI'd PPIs, only */ + bitmap_andnot(pendr, host_data_ptr(vgic_v5_ppi_state)->pendr_entry, + cpu_if->vgic_ppi_dvir, VGIC_V5_NR_PRIVATE_IRQS); + write_sysreg_s(bitmap_read(pendr, 0, 64), SYS_ICH_PPI_PENDR0_EL2); + + write_sysreg_s(cpu_if->vgic_ppi_priorityr[0], + SYS_ICH_PPI_PRIORITYR0_EL2); + write_sysreg_s(cpu_if->vgic_ppi_priorityr[1], + SYS_ICH_PPI_PRIORITYR1_EL2); + write_sysreg_s(cpu_if->vgic_ppi_priorityr[2], + SYS_ICH_PPI_PRIORITYR2_EL2); + write_sysreg_s(cpu_if->vgic_ppi_priorityr[3], + SYS_ICH_PPI_PRIORITYR3_EL2); + write_sysreg_s(cpu_if->vgic_ppi_priorityr[4], + SYS_ICH_PPI_PRIORITYR4_EL2); + write_sysreg_s(cpu_if->vgic_ppi_priorityr[5], + SYS_ICH_PPI_PRIORITYR5_EL2); + write_sysreg_s(cpu_if->vgic_ppi_priorityr[6], + SYS_ICH_PPI_PRIORITYR6_EL2); + write_sysreg_s(cpu_if->vgic_ppi_priorityr[7], + SYS_ICH_PPI_PRIORITYR7_EL2); + + if (VGIC_V5_NR_PRIVATE_IRQS == 128) { + /* Enable DVI so that the guest's interrupt config takes over */ + write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_dvir, 64, 64), + SYS_ICH_PPI_DVIR1_EL2); + + write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_activer, 64, 64), + SYS_ICH_PPI_ACTIVER1_EL2); + write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_enabler, 64, 64), + SYS_ICH_PPI_ENABLER1_EL2); + write_sysreg_s(bitmap_read(pendr, 64, 64), + SYS_ICH_PPI_PENDR1_EL2); + + write_sysreg_s(cpu_if->vgic_ppi_priorityr[8], + SYS_ICH_PPI_PRIORITYR8_EL2); + write_sysreg_s(cpu_if->vgic_ppi_priorityr[9], + SYS_ICH_PPI_PRIORITYR9_EL2); + write_sysreg_s(cpu_if->vgic_ppi_priorityr[10], + SYS_ICH_PPI_PRIORITYR10_EL2); + write_sysreg_s(cpu_if->vgic_ppi_priorityr[11], + SYS_ICH_PPI_PRIORITYR11_EL2); + write_sysreg_s(cpu_if->vgic_ppi_priorityr[12], + SYS_ICH_PPI_PRIORITYR12_EL2); + write_sysreg_s(cpu_if->vgic_ppi_priorityr[13], + SYS_ICH_PPI_PRIORITYR13_EL2); + write_sysreg_s(cpu_if->vgic_ppi_priorityr[14], + SYS_ICH_PPI_PRIORITYR14_EL2); + write_sysreg_s(cpu_if->vgic_ppi_priorityr[15], + SYS_ICH_PPI_PRIORITYR15_EL2); + } else { + write_sysreg_s(0, SYS_ICH_PPI_DVIR1_EL2); + + write_sysreg_s(0, SYS_ICH_PPI_ACTIVER1_EL2); + write_sysreg_s(0, SYS_ICH_PPI_ENABLER1_EL2); + write_sysreg_s(0, SYS_ICH_PPI_PENDR1_EL2); + + write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR8_EL2); + write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR9_EL2); + write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR10_EL2); + write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR11_EL2); + write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR12_EL2); + write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR13_EL2); + write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR14_EL2); + write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR15_EL2); + } +} + +void __vgic_v5_save_state(struct vgic_v5_cpu_if *cpu_if) +{ + cpu_if->vgic_vmcr = read_sysreg_s(SYS_ICH_VMCR_EL2); + cpu_if->vgic_icsr = read_sysreg_s(SYS_ICC_ICSR_EL1); +} + +void __vgic_v5_restore_state(struct vgic_v5_cpu_if *cpu_if) +{ + write_sysreg_s(cpu_if->vgic_icsr, SYS_ICC_ICSR_EL1); +} diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile index afc4aed9231a..9695328bbd96 100644 --- a/arch/arm64/kvm/hyp/vhe/Makefile +++ b/arch/arm64/kvm/hyp/vhe/Makefile @@ -10,4 +10,4 @@ CFLAGS_switch.o += -Wno-override-init obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ - ../fpsimd.o ../hyp-entry.o ../exception.o + ../fpsimd.o ../hyp-entry.o ../exception.o ../vgic-v5-sr.o diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 24969fa8d02d..07e394690dcc 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -428,6 +428,27 @@ struct vgic_v3_cpu_if { unsigned int used_lrs; }; +struct vgic_v5_cpu_if { + u64 vgic_apr; + u64 vgic_vmcr; + + /* PPI register state */ + DECLARE_BITMAP(vgic_ppi_dvir, VGIC_V5_NR_PRIVATE_IRQS); + DECLARE_BITMAP(vgic_ppi_activer, VGIC_V5_NR_PRIVATE_IRQS); + DECLARE_BITMAP(vgic_ppi_enabler, VGIC_V5_NR_PRIVATE_IRQS); + /* We have one byte (of which 5 bits are used) per PPI for priority */ + u64 vgic_ppi_priorityr[VGIC_V5_NR_PRIVATE_IRQS / 8]; + + /* + * The ICSR is re-used across host and guest, and hence it needs to be + * saved/restored. Only one copy is required as the host should block + * preemption between executing GIC CDRCFG and acccessing the + * ICC_ICSR_EL1. A guest, of course, can never guarantee this, and hence + * it is the hyp's responsibility to keep the state constistent. + */ + u64 vgic_icsr; +}; + /* What PPI capabilities does a GICv5 host have */ struct vgic_v5_ppi_caps { DECLARE_BITMAP(impl_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS); @@ -438,6 +459,7 @@ struct vgic_cpu { union { struct vgic_v2_cpu_if vgic_v2; struct vgic_v3_cpu_if vgic_v3; + struct vgic_v5_cpu_if vgic_v5; }; struct vgic_irq *private_irqs; -- cgit v1.2.3 From 9b8e3d4ca0e734dd13dc261c5f888b359f8f5983 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Thu, 19 Mar 2026 15:54:08 +0000 Subject: KVM: arm64: gic-v5: Implement GICv5 load/put and save/restore This change introduces GICv5 load/put. Additionally, it plumbs in save/restore for: * PPIs (ICH_PPI_x_EL2 regs) * ICH_VMCR_EL2 * ICH_APR_EL2 * ICC_ICSR_EL1 A GICv5-specific enable bit is added to struct vgic_vmcr as this differs from previous GICs. On GICv5-native systems, the VMCR only contains the enable bit (driven by the guest via ICC_CR0_EL1.EN) and the priority mask (PCR). A struct gicv5_vpe is also introduced. This currently only contains a single field - bool resident - which is used to track if a VPE is currently running or not, and is used to avoid a case of double load or double put on the WFI path for a vCPU. This struct will be extended as additional GICv5 support is merged, specifically for VPE doorbells. Co-authored-by: Timothy Hayes Signed-off-by: Timothy Hayes Signed-off-by: Sascha Bischoff Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20260319154937.3619520-18-sascha.bischoff@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/switch.c | 12 +++++++ arch/arm64/kvm/vgic/vgic-mmio.c | 40 +++++++++++++++++---- arch/arm64/kvm/vgic/vgic-v5.c | 74 ++++++++++++++++++++++++++++++++++++++ arch/arm64/kvm/vgic/vgic.c | 74 ++++++++++++++++++++++++++++++-------- arch/arm64/kvm/vgic/vgic.h | 7 ++++ include/kvm/arm_vgic.h | 2 ++ include/linux/irqchip/arm-gic-v5.h | 5 +++ 7 files changed, 193 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index b41485ce295a..a88da302b6d0 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -113,6 +113,12 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) /* Save VGICv3 state on non-VHE systems */ static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu) { + if (vgic_is_v5(kern_hyp_va(vcpu->kvm))) { + __vgic_v5_save_state(&vcpu->arch.vgic_cpu.vgic_v5); + __vgic_v5_save_ppi_state(&vcpu->arch.vgic_cpu.vgic_v5); + return; + } + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3); @@ -122,6 +128,12 @@ static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu) /* Restore VGICv3 state on non-VHE systems */ static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) { + if (vgic_is_v5(kern_hyp_va(vcpu->kvm))) { + __vgic_v5_restore_state(&vcpu->arch.vgic_cpu.vgic_v5); + __vgic_v5_restore_ppi_state(&vcpu->arch.vgic_cpu.vgic_v5); + return; + } + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3); __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c index a573b1f0c6cb..74d76dec9730 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.c +++ b/arch/arm64/kvm/vgic/vgic-mmio.c @@ -842,18 +842,46 @@ vgic_find_mmio_region(const struct vgic_register_region *regions, void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) { - if (kvm_vgic_global_state.type == VGIC_V2) - vgic_v2_set_vmcr(vcpu, vmcr); - else + const struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + switch (dist->vgic_model) { + case KVM_DEV_TYPE_ARM_VGIC_V5: + vgic_v5_set_vmcr(vcpu, vmcr); + break; + case KVM_DEV_TYPE_ARM_VGIC_V3: vgic_v3_set_vmcr(vcpu, vmcr); + break; + case KVM_DEV_TYPE_ARM_VGIC_V2: + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + vgic_v3_set_vmcr(vcpu, vmcr); + else + vgic_v2_set_vmcr(vcpu, vmcr); + break; + default: + BUG(); + } } void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) { - if (kvm_vgic_global_state.type == VGIC_V2) - vgic_v2_get_vmcr(vcpu, vmcr); - else + const struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + switch (dist->vgic_model) { + case KVM_DEV_TYPE_ARM_VGIC_V5: + vgic_v5_get_vmcr(vcpu, vmcr); + break; + case KVM_DEV_TYPE_ARM_VGIC_V3: vgic_v3_get_vmcr(vcpu, vmcr); + break; + case KVM_DEV_TYPE_ARM_VGIC_V2: + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + vgic_v3_get_vmcr(vcpu, vmcr); + else + vgic_v2_get_vmcr(vcpu, vmcr); + break; + default: + BUG(); + } } /* diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c index cf8382a954bb..41317e1d94a2 100644 --- a/arch/arm64/kvm/vgic/vgic-v5.c +++ b/arch/arm64/kvm/vgic/vgic-v5.c @@ -86,3 +86,77 @@ int vgic_v5_probe(const struct gic_kvm_info *info) return 0; } + +void vgic_v5_load(struct kvm_vcpu *vcpu) +{ + struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5; + + /* + * On the WFI path, vgic_load is called a second time. The first is when + * scheduling in the vcpu thread again, and the second is when leaving + * WFI. Skip the second instance as it serves no purpose and just + * restores the same state again. + */ + if (cpu_if->gicv5_vpe.resident) + return; + + kvm_call_hyp(__vgic_v5_restore_vmcr_apr, cpu_if); + + cpu_if->gicv5_vpe.resident = true; +} + +void vgic_v5_put(struct kvm_vcpu *vcpu) +{ + struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5; + + /* + * Do nothing if we're not resident. This can happen in the WFI path + * where we do a vgic_put in the WFI path and again later when + * descheduling the thread. We risk losing VMCR state if we sync it + * twice, so instead return early in this case. + */ + if (!cpu_if->gicv5_vpe.resident) + return; + + kvm_call_hyp(__vgic_v5_save_apr, cpu_if); + + cpu_if->gicv5_vpe.resident = false; +} + +void vgic_v5_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5; + u64 vmcr = cpu_if->vgic_vmcr; + + vmcrp->en = FIELD_GET(FEAT_GCIE_ICH_VMCR_EL2_EN, vmcr); + vmcrp->pmr = FIELD_GET(FEAT_GCIE_ICH_VMCR_EL2_VPMR, vmcr); +} + +void vgic_v5_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5; + u64 vmcr; + + vmcr = FIELD_PREP(FEAT_GCIE_ICH_VMCR_EL2_VPMR, vmcrp->pmr) | + FIELD_PREP(FEAT_GCIE_ICH_VMCR_EL2_EN, vmcrp->en); + + cpu_if->vgic_vmcr = vmcr; +} + +void vgic_v5_restore_state(struct kvm_vcpu *vcpu) +{ + struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5; + + __vgic_v5_restore_state(cpu_if); + __vgic_v5_restore_ppi_state(cpu_if); + dsb(sy); +} + +void vgic_v5_save_state(struct kvm_vcpu *vcpu) +{ + struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5; + + __vgic_v5_save_state(cpu_if); + __vgic_v5_save_ppi_state(cpu_if); + dsb(sy); +} diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 2f3f892cbddc..84199d2df80a 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -1017,7 +1017,10 @@ static inline bool can_access_vgic_from_kernel(void) static inline void vgic_save_state(struct kvm_vcpu *vcpu) { - if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + /* No switch statement here. See comment in vgic_restore_state() */ + if (vgic_is_v5(vcpu->kvm)) + vgic_v5_save_state(vcpu); + else if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) vgic_v2_save_state(vcpu); else __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); @@ -1026,14 +1029,16 @@ static inline void vgic_save_state(struct kvm_vcpu *vcpu) /* Sync back the hardware VGIC state into our emulation after a guest's run. */ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { - /* If nesting, emulate the HW effect from L0 to L1 */ - if (vgic_state_is_nested(vcpu)) { - vgic_v3_sync_nested(vcpu); - return; - } + if (vgic_is_v3(vcpu->kvm)) { + /* If nesting, emulate the HW effect from L0 to L1 */ + if (vgic_state_is_nested(vcpu)) { + vgic_v3_sync_nested(vcpu); + return; + } - if (vcpu_has_nv(vcpu)) - vgic_v3_nested_update_mi(vcpu); + if (vcpu_has_nv(vcpu)) + vgic_v3_nested_update_mi(vcpu); + } if (can_access_vgic_from_kernel()) vgic_save_state(vcpu); @@ -1055,7 +1060,18 @@ void kvm_vgic_process_async_update(struct kvm_vcpu *vcpu) static inline void vgic_restore_state(struct kvm_vcpu *vcpu) { - if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + /* + * As nice as it would be to restructure this code into a switch + * statement as can be found elsewhere, the logic quickly gets ugly. + * + * __vgic_v3_restore_state() is doing a lot of heavy lifting here. It is + * required for GICv3-on-GICv3, GICv2-on-GICv3, GICv3-on-GICv5, and the + * no-in-kernel-irqchip case on GICv3 hardware. Hence, adding a switch + * here results in much more complex code. + */ + if (vgic_is_v5(vcpu->kvm)) + vgic_v5_restore_state(vcpu); + else if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) vgic_v2_restore_state(vcpu); else __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); @@ -1109,30 +1125,58 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) void kvm_vgic_load(struct kvm_vcpu *vcpu) { + const struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + if (unlikely(!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))) { if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3); return; } - if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) - vgic_v2_load(vcpu); - else + switch (dist->vgic_model) { + case KVM_DEV_TYPE_ARM_VGIC_V5: + vgic_v5_load(vcpu); + break; + case KVM_DEV_TYPE_ARM_VGIC_V3: vgic_v3_load(vcpu); + break; + case KVM_DEV_TYPE_ARM_VGIC_V2: + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + vgic_v3_load(vcpu); + else + vgic_v2_load(vcpu); + break; + default: + BUG(); + } } void kvm_vgic_put(struct kvm_vcpu *vcpu) { + const struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + if (unlikely(!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))) { if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3); return; } - if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) - vgic_v2_put(vcpu); - else + switch (dist->vgic_model) { + case KVM_DEV_TYPE_ARM_VGIC_V5: + vgic_v5_put(vcpu); + break; + case KVM_DEV_TYPE_ARM_VGIC_V3: vgic_v3_put(vcpu); + break; + case KVM_DEV_TYPE_ARM_VGIC_V2: + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + vgic_v3_put(vcpu); + else + vgic_v2_put(vcpu); + break; + default: + BUG(); + } } int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 7b7eed69d797..cc487a69d038 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -187,6 +187,7 @@ static inline u64 vgic_ich_hcr_trap_bits(void) * registers regardless of the hardware backed GIC used. */ struct vgic_vmcr { + u32 en; /* GICv5-specific */ u32 grpen0; u32 grpen1; @@ -363,6 +364,12 @@ void vgic_debug_init(struct kvm *kvm); void vgic_debug_destroy(struct kvm *kvm); int vgic_v5_probe(const struct gic_kvm_info *info); +void vgic_v5_load(struct kvm_vcpu *vcpu); +void vgic_v5_put(struct kvm_vcpu *vcpu); +void vgic_v5_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +void vgic_v5_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +void vgic_v5_restore_state(struct kvm_vcpu *vcpu); +void vgic_v5_save_state(struct kvm_vcpu *vcpu); static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu) { diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 07e394690dcc..b27bfc463a31 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -447,6 +447,8 @@ struct vgic_v5_cpu_if { * it is the hyp's responsibility to keep the state constistent. */ u64 vgic_icsr; + + struct gicv5_vpe gicv5_vpe; }; /* What PPI capabilities does a GICv5 host have */ diff --git a/include/linux/irqchip/arm-gic-v5.h b/include/linux/irqchip/arm-gic-v5.h index b1566a7c93ec..40d2fce68294 100644 --- a/include/linux/irqchip/arm-gic-v5.h +++ b/include/linux/irqchip/arm-gic-v5.h @@ -387,6 +387,11 @@ int gicv5_spi_irq_set_type(struct irq_data *d, unsigned int type); int gicv5_irs_iste_alloc(u32 lpi); void gicv5_irs_syncr(void); +/* Embedded in kvm.arch */ +struct gicv5_vpe { + bool resident; +}; + struct gicv5_its_devtab_cfg { union { struct { -- cgit v1.2.3 From 8f1fbe2fd279240d6999e3a975d0a51d816e080a Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Thu, 19 Mar 2026 15:54:23 +0000 Subject: KVM: arm64: gic-v5: Finalize GICv5 PPIs and generate mask We only want to expose a subset of the PPIs to a guest. If a PPI does not have an owner, it is not being actively driven by a device. The SW_PPI is a special case, as it is likely for userspace to wish to inject that. Therefore, just prior to running the guest for the first time, we need to finalize the PPIs. A mask is generated which, when combined with trapping a guest's PPI accesses, allows for the guest's view of the PPI to be filtered. This mask is global to the VM as all VCPUs PPI configurations must match. In addition, the PPI HMR is calculated. Signed-off-by: Sascha Bischoff Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20260319154937.3619520-19-sascha.bischoff@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arm.c | 4 ++++ arch/arm64/kvm/vgic/vgic-v5.c | 35 +++++++++++++++++++++++++++++++++++ include/kvm/arm_vgic.h | 24 ++++++++++++++++++++++++ 3 files changed, 63 insertions(+) (limited to 'include') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index aa69fd5b372f..5bbc1adb705e 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -934,6 +934,10 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) return ret; } + ret = vgic_v5_finalize_ppi_state(kvm); + if (ret) + return ret; + if (is_protected_kvm_enabled()) { ret = pkvm_create_hyp_vm(kvm); if (ret) diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c index 41317e1d94a2..07f416fbc4bc 100644 --- a/arch/arm64/kvm/vgic/vgic-v5.c +++ b/arch/arm64/kvm/vgic/vgic-v5.c @@ -87,6 +87,41 @@ int vgic_v5_probe(const struct gic_kvm_info *info) return 0; } +int vgic_v5_finalize_ppi_state(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu0; + int i; + + if (!vgic_is_v5(kvm)) + return 0; + + /* The PPI state for all VCPUs should be the same. Pick the first. */ + vcpu0 = kvm_get_vcpu(kvm, 0); + + bitmap_zero(kvm->arch.vgic.gicv5_vm.vgic_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS); + bitmap_zero(kvm->arch.vgic.gicv5_vm.vgic_ppi_hmr, VGIC_V5_NR_PRIVATE_IRQS); + + for_each_set_bit(i, ppi_caps.impl_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS) { + const u32 intid = vgic_v5_make_ppi(i); + struct vgic_irq *irq; + + irq = vgic_get_vcpu_irq(vcpu0, intid); + + /* Expose PPIs with an owner or the SW_PPI, only */ + scoped_guard(raw_spinlock_irqsave, &irq->irq_lock) { + if (irq->owner || i == GICV5_ARCH_PPI_SW_PPI) { + __assign_bit(i, kvm->arch.vgic.gicv5_vm.vgic_ppi_mask, 1); + __assign_bit(i, kvm->arch.vgic.gicv5_vm.vgic_ppi_hmr, + irq->config == VGIC_CONFIG_LEVEL); + } + } + + vgic_put_irq(vcpu0->kvm, irq); + } + + return 0; +} + void vgic_v5_load(struct kvm_vcpu *vcpu) { struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5; diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index b27bfc463a31..fdad0263499b 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -326,6 +326,23 @@ struct vgic_redist_region { struct list_head list; }; +struct vgic_v5_vm { + /* + * We only expose a subset of PPIs to the guest. This subset is a + * combination of the PPIs that are actually implemented and what we + * actually choose to expose. + */ + DECLARE_BITMAP(vgic_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS); + + /* + * The HMR itself is handled by the hardware, but we still need to have + * a mask that we can use when merging in pending state (only the state + * of Edge PPIs is merged back in from the guest an the HMR provides a + * convenient way to do that). + */ + DECLARE_BITMAP(vgic_ppi_hmr, VGIC_V5_NR_PRIVATE_IRQS); +}; + struct vgic_dist { bool in_kernel; bool ready; @@ -398,6 +415,11 @@ struct vgic_dist { * else. */ struct its_vm its_vm; + + /* + * GICv5 per-VM data. + */ + struct vgic_v5_vm gicv5_vm; }; struct vgic_v2_cpu_if { @@ -588,6 +610,8 @@ int vgic_v4_load(struct kvm_vcpu *vcpu); void vgic_v4_commit(struct kvm_vcpu *vcpu); int vgic_v4_put(struct kvm_vcpu *vcpu); +int vgic_v5_finalize_ppi_state(struct kvm *kvm); + bool vgic_state_is_nested(struct kvm_vcpu *vcpu); /* CPU HP callbacks */ -- cgit v1.2.3 From 4a9a32d3538a9d800067be113b0196271a478c6a Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Thu, 19 Mar 2026 15:54:39 +0000 Subject: KVM: arm64: gic: Introduce queue_irq_unlock to irq_ops There are times when the default behaviour of vgic_queue_irq_unlock() is undesirable. This is because some GICs, such a GICv5 which is the main driver for this change, handle the majority of the interrupt lifecycle in hardware. In this case, there is no need for a per-VCPU AP list as the interrupt can be made pending directly. This is done either via the ICH_PPI_x_EL2 registers for PPIs, or with the VDPEND system instruction for SPIs and LPIs. The vgic_queue_irq_unlock() function is made overridable using a new function pointer in struct irq_ops. vgic_queue_irq_unlock() is overridden if the function pointer is non-null. This new irq_op is unused in this change - it is purely providing the infrastructure itself. The subsequent PPI injection changes provide a demonstration of the usage of the queue_irq_unlock irq_op. Signed-off-by: Sascha Bischoff Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20260319154937.3619520-20-sascha.bischoff@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic.c | 3 +++ include/kvm/arm_vgic.h | 8 ++++++++ 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 84199d2df80a..c46c0e1db436 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -404,6 +404,9 @@ bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, lockdep_assert_held(&irq->irq_lock); + if (irq->ops && irq->ops->queue_irq_unlock) + return irq->ops->queue_irq_unlock(kvm, irq, flags); + retry: vcpu = vgic_target_oracle(irq); if (irq->vcpu || !vcpu) { diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index fdad0263499b..e9797c5dbbf0 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -189,6 +189,8 @@ enum vgic_irq_config { VGIC_CONFIG_LEVEL }; +struct vgic_irq; + /* * Per-irq ops overriding some common behavious. * @@ -207,6 +209,12 @@ struct irq_ops { * peaking into the physical GIC. */ bool (*get_input_level)(int vintid); + + /* + * Function pointer to override the queuing of an IRQ. + */ + bool (*queue_irq_unlock)(struct kvm *kvm, struct vgic_irq *irq, + unsigned long flags) __releases(&irq->irq_lock); }; struct vgic_irq { -- cgit v1.2.3 From 4a5444d23979b69e466f8080477112c264f194f2 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Thu, 19 Mar 2026 15:56:12 +0000 Subject: KVM: arm64: Introduce set_direct_injection irq_op GICv5 adds support for directly injected PPIs. The mechanism for setting this up is GICv5 specific, so rather than adding GICv5-specific code to the common vgic code, we introduce a new irq_op. This new irq_op is intended to be used to enable or disable direct injection for interrupts that support it. As it is an irq_op, it has no effect unless explicitly populated in the irq_ops structure for a particular interrupt. The usage is demonstracted in the subsequent change. Signed-off-by: Sascha Bischoff Link: https://patch.msgid.link/20260319154937.3619520-26-sascha.bischoff@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic.c | 7 +++++++ include/kvm/arm_vgic.h | 7 +++++++ 2 files changed, 14 insertions(+) (limited to 'include') diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index d9ca5509147a..9ac0ff60aa8a 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -608,12 +608,19 @@ static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq, irq->hw = true; irq->host_irq = host_irq; irq->hwintid = data->hwirq; + + if (irq->ops && irq->ops->set_direct_injection) + irq->ops->set_direct_injection(vcpu, irq, true); + return 0; } /* @irq->irq_lock must be held */ static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq) { + if (irq->ops && irq->ops->set_direct_injection) + irq->ops->set_direct_injection(irq->target_vcpu, irq, false); + irq->hw = false; irq->hwintid = 0; } diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index e9797c5dbbf0..a28cf765f3eb 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -215,6 +215,13 @@ struct irq_ops { */ bool (*queue_irq_unlock)(struct kvm *kvm, struct vgic_irq *irq, unsigned long flags) __releases(&irq->irq_lock); + + /* + * Callback function pointer to either enable or disable direct + * injection for a mapped interrupt. + */ + void (*set_direct_injection)(struct kvm_vcpu *vcpu, + struct vgic_irq *irq, bool direct); }; struct vgic_irq { -- cgit v1.2.3 From f4d37c7c35769579c51aa5fe00161c690b89811d Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Thu, 19 Mar 2026 15:56:59 +0000 Subject: KVM: arm64: gic-v5: Create and initialise vgic_v5 Update kvm_vgic_create to create a vgic_v5 device. When creating a vgic, FEAT_GCIE in the ID_AA64PFR2 is only exposed to vgic_v5-based guests, and is hidden otherwise. GIC in ~ID_AA64PFR0_EL1 is never exposed for a vgic_v5 guest. When initialising a vgic_v5, skip kvm_vgic_dist_init as GICv5 doesn't support one. The current vgic_v5 implementation only supports PPIs, so no SPIs are initialised either. The current vgic_v5 support doesn't extend to nested guests. Therefore, the init of vgic_v5 for a nested guest is failed in vgic_v5_init. As the current vgic_v5 doesn't require any resources to be mapped, vgic_v5_map_resources is simply used to check that the vgic has indeed been initialised. Again, this will change as more GICv5 support is merged in. Signed-off-by: Sascha Bischoff Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20260319154937.3619520-29-sascha.bischoff@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-init.c | 54 ++++++++++++++++++++++++++--------------- arch/arm64/kvm/vgic/vgic-v5.c | 26 ++++++++++++++++++++ arch/arm64/kvm/vgic/vgic.h | 2 ++ include/kvm/arm_vgic.h | 1 + 4 files changed, 63 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index e0366e8c144d..75185651ff64 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -66,7 +66,7 @@ static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type); * or through the generic KVM_CREATE_DEVICE API ioctl. * irqchip_in_kernel() tells you if this function succeeded or not. * @kvm: kvm struct pointer - * @type: KVM_DEV_TYPE_ARM_VGIC_V[23] + * @type: KVM_DEV_TYPE_ARM_VGIC_V[235] */ int kvm_vgic_create(struct kvm *kvm, u32 type) { @@ -131,8 +131,11 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) if (type == KVM_DEV_TYPE_ARM_VGIC_V2) kvm->max_vcpus = VGIC_V2_MAX_CPUS; - else + else if (type == KVM_DEV_TYPE_ARM_VGIC_V3) kvm->max_vcpus = VGIC_V3_MAX_CPUS; + else if (type == KVM_DEV_TYPE_ARM_VGIC_V5) + kvm->max_vcpus = min(VGIC_V5_MAX_CPUS, + kvm_vgic_global_state.max_gic_vcpus); if (atomic_read(&kvm->online_vcpus) > kvm->max_vcpus) { ret = -E2BIG; @@ -426,22 +429,28 @@ int vgic_init(struct kvm *kvm) if (kvm->created_vcpus != atomic_read(&kvm->online_vcpus)) return -EBUSY; - /* freeze the number of spis */ - if (!dist->nr_spis) - dist->nr_spis = VGIC_NR_IRQS_LEGACY - VGIC_NR_PRIVATE_IRQS; + if (!vgic_is_v5(kvm)) { + /* freeze the number of spis */ + if (!dist->nr_spis) + dist->nr_spis = VGIC_NR_IRQS_LEGACY - VGIC_NR_PRIVATE_IRQS; - ret = kvm_vgic_dist_init(kvm, dist->nr_spis); - if (ret) - goto out; + ret = kvm_vgic_dist_init(kvm, dist->nr_spis); + if (ret) + return ret; - /* - * Ensure vPEs are allocated if direct IRQ injection (e.g. vSGIs, - * vLPIs) is supported. - */ - if (vgic_supports_direct_irqs(kvm)) { - ret = vgic_v4_init(kvm); + /* + * Ensure vPEs are allocated if direct IRQ injection (e.g. vSGIs, + * vLPIs) is supported. + */ + if (vgic_supports_direct_irqs(kvm)) { + ret = vgic_v4_init(kvm); + if (ret) + return ret; + } + } else { + ret = vgic_v5_init(kvm); if (ret) - goto out; + return ret; } kvm_for_each_vcpu(idx, vcpu, kvm) @@ -449,12 +458,12 @@ int vgic_init(struct kvm *kvm) ret = kvm_vgic_setup_default_irq_routing(kvm); if (ret) - goto out; + return ret; vgic_debug_init(kvm); dist->initialized = true; -out: - return ret; + + return 0; } static void kvm_vgic_dist_destroy(struct kvm *kvm) @@ -598,6 +607,7 @@ int vgic_lazy_init(struct kvm *kvm) int kvm_vgic_map_resources(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; + bool needs_dist = true; enum vgic_type type; gpa_t dist_base; int ret = 0; @@ -616,12 +626,16 @@ int kvm_vgic_map_resources(struct kvm *kvm) if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) { ret = vgic_v2_map_resources(kvm); type = VGIC_V2; - } else { + } else if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { ret = vgic_v3_map_resources(kvm); type = VGIC_V3; + } else { + ret = vgic_v5_map_resources(kvm); + type = VGIC_V5; + needs_dist = false; } - if (ret) + if (ret || !needs_dist) goto out; dist_base = dist->vgic_dist_base; diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c index b84324f0a311..14e1fad913f0 100644 --- a/arch/arm64/kvm/vgic/vgic-v5.c +++ b/arch/arm64/kvm/vgic/vgic-v5.c @@ -87,6 +87,32 @@ int vgic_v5_probe(const struct gic_kvm_info *info) return 0; } +int vgic_v5_init(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + unsigned long idx; + + if (vgic_initialized(kvm)) + return 0; + + kvm_for_each_vcpu(idx, vcpu, kvm) { + if (vcpu_has_nv(vcpu)) { + kvm_err("Nested GICv5 VMs are currently unsupported\n"); + return -EINVAL; + } + } + + return 0; +} + +int vgic_v5_map_resources(struct kvm *kvm) +{ + if (!vgic_initialized(kvm)) + return -EBUSY; + + return 0; +} + int vgic_v5_finalize_ppi_state(struct kvm *kvm) { struct kvm_vcpu *vcpu0; diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 8f15f7472458..0f1986fcd7d0 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -364,6 +364,8 @@ void vgic_debug_init(struct kvm *kvm); void vgic_debug_destroy(struct kvm *kvm); int vgic_v5_probe(const struct gic_kvm_info *info); +int vgic_v5_init(struct kvm *kvm); +int vgic_v5_map_resources(struct kvm *kvm); void vgic_v5_set_ppi_ops(struct kvm_vcpu *vcpu, u32 vintid); bool vgic_v5_has_pending_ppi(struct kvm_vcpu *vcpu); void vgic_v5_flush_ppi_state(struct kvm_vcpu *vcpu); diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index a28cf765f3eb..a5ddccf7ef3b 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -21,6 +21,7 @@ #include #include +#define VGIC_V5_MAX_CPUS 512 #define VGIC_V3_MAX_CPUS 512 #define VGIC_V2_MAX_CPUS 8 #define VGIC_NR_IRQS_LEGACY 256 -- cgit v1.2.3 From 9491c63b6cd7bdae97cd29c7c6bf400adbd3578f Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Thu, 19 Mar 2026 15:57:45 +0000 Subject: KVM: arm64: gic-v5: Enlighten arch timer for GICv5 Now that GICv5 has arrived, the arch timer requires some TLC to address some of the key differences introduced with GICv5. For PPIs on GICv5, the queue_irq_unlock irq_op is used as AP lists are not required at all for GICv5. The arch timer also introduces an irq_op - get_input_level. Extend the arch-timer-provided irq_ops to include the PPI op for vgic_v5 guests. When possible, DVI (Direct Virtual Interrupt) is set for PPIs when using a vgic_v5, which directly inject the pending state into the guest. This means that the host never sees the interrupt for the guest for these interrupts. This has three impacts. * First of all, the kvm_cpu_has_pending_timer check is updated to explicitly check if the timers are expected to fire. * Secondly, for mapped timers (which use DVI) they must be masked on the host prior to entering a GICv5 guest, and unmasked on the return path. This is handled in set_timer_irq_phys_masked. * Thirdly, it makes zero sense to attempt to inject state for a DVI'd interrupt. Track which timers are direct, and skip the call to kvm_vgic_inject_irq() for these. The final, but rather important, change is that the architected PPIs for the timers are made mandatory for a GICv5 guest. Attempts to set them to anything else are actively rejected. Once a vgic_v5 is initialised, the arch timer PPIs are also explicitly reinitialised to ensure the correct GICv5-compatible PPIs are used - this also adds in the GICv5 PPI type to the intid. Signed-off-by: Sascha Bischoff Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20260319154937.3619520-32-sascha.bischoff@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arch_timer.c | 86 +++++++++++++++++++++++++++++++++-------- arch/arm64/kvm/vgic/vgic-init.c | 9 +++++ arch/arm64/kvm/vgic/vgic-v5.c | 7 ++-- include/kvm/arm_arch_timer.h | 11 +++++- include/kvm/arm_vgic.h | 3 ++ 5 files changed, 94 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 92870ee6dacd..67b989671b41 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -56,6 +56,12 @@ static struct irq_ops arch_timer_irq_ops = { .get_input_level = kvm_arch_timer_get_input_level, }; +static struct irq_ops arch_timer_irq_ops_vgic_v5 = { + .get_input_level = kvm_arch_timer_get_input_level, + .queue_irq_unlock = vgic_v5_ppi_queue_irq_unlock, + .set_direct_injection = vgic_v5_set_ppi_dvi, +}; + static int nr_timers(struct kvm_vcpu *vcpu) { if (!vcpu_has_nv(vcpu)) @@ -177,6 +183,10 @@ void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) map->emul_ptimer = vcpu_ptimer(vcpu); } + map->direct_vtimer->direct = true; + if (map->direct_ptimer) + map->direct_ptimer->direct = true; + trace_kvm_get_timer_map(vcpu->vcpu_id, map); } @@ -396,7 +406,11 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { - return vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + + return kvm_timer_should_fire(vtimer) || kvm_timer_should_fire(ptimer) || + (vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0); } /* @@ -447,6 +461,10 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, if (userspace_irqchip(vcpu->kvm)) return; + /* Skip injecting on GICv5 for directly injected (DVI'd) timers */ + if (vgic_is_v5(vcpu->kvm) && timer_ctx->direct) + return; + kvm_vgic_inject_irq(vcpu->kvm, vcpu, timer_irq(timer_ctx), timer_ctx->irq.level, @@ -674,6 +692,7 @@ static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx)); phys_active |= ctx->irq.level; + phys_active |= vgic_is_v5(vcpu->kvm); set_timer_irq_phys_active(ctx, phys_active); } @@ -862,7 +881,8 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) get_timer_map(vcpu, &map); if (static_branch_likely(&has_gic_active_state)) { - if (vcpu_has_nv(vcpu)) + /* We don't do NV on GICv5, yet */ + if (vcpu_has_nv(vcpu) && !vgic_is_v5(vcpu->kvm)) kvm_timer_vcpu_load_nested_switch(vcpu, &map); kvm_timer_vcpu_load_gic(map.direct_vtimer); @@ -932,6 +952,12 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) if (kvm_vcpu_is_blocking(vcpu)) kvm_timer_blocking(vcpu); + + if (vgic_is_v5(vcpu->kvm)) { + set_timer_irq_phys_active(map.direct_vtimer, false); + if (map.direct_ptimer) + set_timer_irq_phys_active(map.direct_ptimer, false); + } } void kvm_timer_sync_nested(struct kvm_vcpu *vcpu) @@ -1095,10 +1121,19 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) HRTIMER_MODE_ABS_HARD); } +/* + * This is always called during kvm_arch_init_vm, but will also be + * called from kvm_vgic_create if we have a vGICv5. + */ void kvm_timer_init_vm(struct kvm *kvm) { + /* + * Set up the default PPIs - note that we adjust them based on + * the model of the GIC as GICv5 uses a different way to + * describing interrupts. + */ for (int i = 0; i < NR_KVM_TIMERS; i++) - kvm->arch.timer_data.ppi[i] = default_ppi[i]; + kvm->arch.timer_data.ppi[i] = get_vgic_ppi(kvm, default_ppi[i]); } void kvm_timer_cpu_up(void) @@ -1267,7 +1302,15 @@ static int timer_irq_set_irqchip_state(struct irq_data *d, static void timer_irq_eoi(struct irq_data *d) { - if (!irqd_is_forwarded_to_vcpu(d)) + /* + * On a GICv5 host, we still need to call EOI on the parent for + * PPIs. The host driver already handles irqs which are forwarded to + * vcpus, and skips the GIC CDDI while still doing the GIC CDEOI. This + * is required to emulate the EOIMode=1 on GICv5 hardware. Failure to + * call EOI unsurprisingly results in *BAD* lock-ups. + */ + if (!irqd_is_forwarded_to_vcpu(d) || + kvm_vgic_global_state.type == VGIC_V5) irq_chip_eoi_parent(d); } @@ -1331,7 +1374,8 @@ static int kvm_irq_init(struct arch_timer_kvm_info *info) host_vtimer_irq = info->virtual_irq; kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags); - if (kvm_vgic_global_state.no_hw_deactivation) { + if (kvm_vgic_global_state.no_hw_deactivation || + kvm_vgic_global_state.type == VGIC_V5) { struct fwnode_handle *fwnode; struct irq_data *data; @@ -1349,7 +1393,8 @@ static int kvm_irq_init(struct arch_timer_kvm_info *info) return -ENOMEM; } - arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE; + if (kvm_vgic_global_state.no_hw_deactivation) + arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE; WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq, (void *)TIMER_VTIMER)); } @@ -1500,10 +1545,13 @@ static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) break; /* - * We know by construction that we only have PPIs, so - * all values are less than 32. + * We know by construction that we only have PPIs, so all values + * are less than 32 for non-GICv5 VGICs. On GICv5, they are + * architecturally defined to be under 32 too. However, we mask + * off most of the bits as we might be presented with a GICv5 + * style PPI where the type is encoded in the top-bits. */ - ppis |= BIT(irq); + ppis |= BIT(irq & 0x1f); } valid = hweight32(ppis) == nr_timers(vcpu); @@ -1562,7 +1610,8 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) get_timer_map(vcpu, &map); - ops = &arch_timer_irq_ops; + ops = vgic_is_v5(vcpu->kvm) ? &arch_timer_irq_ops_vgic_v5 : + &arch_timer_irq_ops; for (int i = 0; i < nr_timers(vcpu); i++) kvm_vgic_set_irq_ops(vcpu, timer_irq(vcpu_get_timer(vcpu, i)), ops); @@ -1606,12 +1655,11 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) if (!(irq_is_ppi(vcpu->kvm, irq))) return -EINVAL; - mutex_lock(&vcpu->kvm->arch.config_lock); + guard(mutex)(&vcpu->kvm->arch.config_lock); if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags)) { - ret = -EBUSY; - goto out; + return -EBUSY; } switch (attr->attr) { @@ -1628,10 +1676,16 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) idx = TIMER_HPTIMER; break; default: - ret = -ENXIO; - goto out; + return -ENXIO; } + /* + * The PPIs for the Arch Timers are architecturally defined for + * GICv5. Reject anything that changes them from the specified value. + */ + if (vgic_is_v5(vcpu->kvm) && vcpu->kvm->arch.timer_data.ppi[idx] != irq) + return -EINVAL; + /* * We cannot validate the IRQ unicity before we run, so take it at * face value. The verdict will be given on first vcpu run, for each @@ -1639,8 +1693,6 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) */ vcpu->kvm->arch.timer_data.ppi[idx] = irq; -out: - mutex_unlock(&vcpu->kvm->arch.config_lock); return ret; } diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index fe854cac5272..47169604100f 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -173,6 +173,15 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) if (type == KVM_DEV_TYPE_ARM_VGIC_V3) kvm->arch.vgic.nassgicap = system_supports_direct_sgis(); + /* + * We now know that we have a GICv5. The Arch Timer PPI interrupts may + * have been initialised at this stage, but will have done so assuming + * that we have an older GIC, meaning that the IntIDs won't be + * correct. We init them again, and this time they will be correct. + */ + if (type == KVM_DEV_TYPE_ARM_VGIC_V5) + kvm_timer_init_vm(kvm); + out_unlock: mutex_unlock(&kvm->arch.config_lock); kvm_unlock_all_vcpus(kvm); diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c index c263e097786f..9384c7fcb1aa 100644 --- a/arch/arm64/kvm/vgic/vgic-v5.c +++ b/arch/arm64/kvm/vgic/vgic-v5.c @@ -200,8 +200,8 @@ static u32 vgic_v5_get_effective_priority_mask(struct kvm_vcpu *vcpu) * need the PPIs to be queued on a per-VCPU AP list. Therefore, sanity check the * state, unlock, and return. */ -static bool vgic_v5_ppi_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, - unsigned long flags) +bool vgic_v5_ppi_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, + unsigned long flags) __releases(&irq->irq_lock) { struct kvm_vcpu *vcpu; @@ -232,8 +232,7 @@ out_unlock_fail: /* * Sets/clears the corresponding bit in the ICH_PPI_DVIR register. */ -static void vgic_v5_set_ppi_dvi(struct kvm_vcpu *vcpu, struct vgic_irq *irq, - bool dvi) +void vgic_v5_set_ppi_dvi(struct kvm_vcpu *vcpu, struct vgic_irq *irq, bool dvi) { struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5; u32 ppi; diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 7310841f4512..a7754e0a2ef4 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -10,6 +10,8 @@ #include #include +#include + enum kvm_arch_timers { TIMER_PTIMER, TIMER_VTIMER, @@ -47,7 +49,7 @@ struct arch_timer_vm_data { u64 poffset; /* The PPI for each timer, global to the VM */ - u8 ppi[NR_KVM_TIMERS]; + u32 ppi[NR_KVM_TIMERS]; }; struct arch_timer_context { @@ -74,6 +76,9 @@ struct arch_timer_context { /* Duplicated state from arch_timer.c for convenience */ u32 host_timer_irq; + + /* Is this a direct timer? */ + bool direct; }; struct timer_map { @@ -130,6 +135,10 @@ void kvm_timer_init_vhe(void); #define timer_vm_data(ctx) (&(timer_context_to_vcpu(ctx)->kvm->arch.timer_data)) #define timer_irq(ctx) (timer_vm_data(ctx)->ppi[arch_timer_ctx_index(ctx)]) +#define get_vgic_ppi(k, i) (((k)->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V5) ? \ + (i) : (FIELD_PREP(GICV5_HWIRQ_ID, i) | \ + FIELD_PREP(GICV5_HWIRQ_TYPE, GICV5_HWIRQ_TYPE_PPI))) + u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, enum kvm_arch_timers tmr, enum kvm_arch_timer_regs treg); diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index a5ddccf7ef3b..8cc3a7b4d815 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -627,6 +627,9 @@ void vgic_v4_commit(struct kvm_vcpu *vcpu); int vgic_v4_put(struct kvm_vcpu *vcpu); int vgic_v5_finalize_ppi_state(struct kvm *kvm); +bool vgic_v5_ppi_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, + unsigned long flags); +void vgic_v5_set_ppi_dvi(struct kvm_vcpu *vcpu, struct vgic_irq *irq, bool dvi); bool vgic_state_is_nested(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From 7c31c06e2d2d75859d773ba940e56d1db2bd1fcd Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Thu, 19 Mar 2026 15:58:01 +0000 Subject: KVM: arm64: gic-v5: Mandate architected PPI for PMU emulation on GICv5 Make it mandatory to use the architected PPI when running a GICv5 guest. Attempts to set anything other than the architected PPI (23) are rejected. Additionally, KVM_ARM_VCPU_PMU_V3_INIT is relaxed to no longer require KVM_ARM_VCPU_PMU_V3_IRQ to be called for GICv5-based guests. In this case, the architectued PPI is automatically used. Documentation is bumped accordingly. Signed-off-by: Sascha Bischoff Reviewed-by: Jonathan Cameron Reviewed-by: Joey Gouly Link: https://patch.msgid.link/20260319154937.3619520-33-sascha.bischoff@arm.com Signed-off-by: Marc Zyngier --- Documentation/virt/kvm/devices/vcpu.rst | 5 +++-- arch/arm64/kvm/pmu-emul.c | 13 +++++++++++-- include/kvm/arm_pmu.h | 5 ++++- 3 files changed, 18 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst index 60bf205cb373..5e3805820010 100644 --- a/Documentation/virt/kvm/devices/vcpu.rst +++ b/Documentation/virt/kvm/devices/vcpu.rst @@ -37,7 +37,8 @@ Returns: A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt type must be same for each vcpu. As a PPI, the interrupt number is the same for -all vcpus, while as an SPI it must be a separate number per vcpu. +all vcpus, while as an SPI it must be a separate number per vcpu. For +GICv5-based guests, the architected PPI (23) must be used. 1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT --------------------------------------- @@ -50,7 +51,7 @@ Returns: -EEXIST Interrupt number already used -ENODEV PMUv3 not supported or GIC not initialized -ENXIO PMUv3 not supported, missing VCPU feature or interrupt - number not set + number not set (non-GICv5 guests, only) -EBUSY PMUv3 already initialized ======= ====================================================== diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 41a3c5dc2bca..e1860acae641 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -962,8 +962,13 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) if (!vgic_initialized(vcpu->kvm)) return -ENODEV; - if (!kvm_arm_pmu_irq_initialized(vcpu)) - return -ENXIO; + if (!kvm_arm_pmu_irq_initialized(vcpu)) { + if (!vgic_is_v5(vcpu->kvm)) + return -ENXIO; + + /* Use the architected irq number for GICv5. */ + vcpu->arch.pmu.irq_num = KVM_ARMV8_PMU_GICV5_IRQ; + } ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num, &vcpu->arch.pmu); @@ -988,6 +993,10 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq) unsigned long i; struct kvm_vcpu *vcpu; + /* On GICv5, the PMUIRQ is architecturally mandated to be PPI 23 */ + if (vgic_is_v5(kvm) && irq != KVM_ARMV8_PMU_GICV5_IRQ) + return false; + kvm_for_each_vcpu(i, vcpu, kvm) { if (!kvm_arm_pmu_irq_initialized(vcpu)) continue; diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 96754b51b411..0a36a3d5c894 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -12,6 +12,9 @@ #define KVM_ARMV8_PMU_MAX_COUNTERS 32 +/* PPI #23 - architecturally specified for GICv5 */ +#define KVM_ARMV8_PMU_GICV5_IRQ 0x20000017 + #if IS_ENABLED(CONFIG_HW_PERF_EVENTS) && IS_ENABLED(CONFIG_KVM) struct kvm_pmc { u8 idx; /* index into the pmu->pmc array */ @@ -38,7 +41,7 @@ struct arm_pmu_entry { }; bool kvm_supports_guest_pmuv3(void); -#define kvm_arm_pmu_irq_initialized(v) ((v)->arch.pmu.irq_num >= VGIC_NR_SGIS) +#define kvm_arm_pmu_irq_initialized(v) ((v)->arch.pmu.irq_num != 0) u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx); void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val); void kvm_pmu_set_counter_value_user(struct kvm_vcpu *vcpu, u64 select_idx, u64 val); -- cgit v1.2.3 From 37a25294682d28ef3bd131566602450a72c4d839 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Thu, 19 Mar 2026 15:58:48 +0000 Subject: KVM: arm64: gic-v5: Introduce kvm_arm_vgic_v5_ops and register them Only the KVM_DEV_ARM_VGIC_GRP_CTRL->KVM_DEV_ARM_VGIC_CTRL_INIT op is currently supported. All other ops are stubbed out. Co-authored-by: Timothy Hayes Signed-off-by: Timothy Hayes Signed-off-by: Sascha Bischoff Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20260319154937.3619520-36-sascha.bischoff@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-kvm-device.c | 74 +++++++++++++++++++++++++++++++++++ include/linux/kvm_host.h | 1 + 2 files changed, 75 insertions(+) (limited to 'include') diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index b12ba99a423e..772da54c1518 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -336,6 +336,10 @@ int kvm_register_vgic_device(unsigned long type) break; ret = kvm_vgic_register_its_device(); break; + case KVM_DEV_TYPE_ARM_VGIC_V5: + ret = kvm_register_device_ops(&kvm_arm_vgic_v5_ops, + KVM_DEV_TYPE_ARM_VGIC_V5); + break; } return ret; @@ -715,3 +719,73 @@ struct kvm_device_ops kvm_arm_vgic_v3_ops = { .get_attr = vgic_v3_get_attr, .has_attr = vgic_v3_has_attr, }; + +static int vgic_v5_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: + return -ENXIO; + case KVM_DEV_ARM_VGIC_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_ARM_VGIC_CTRL_INIT: + return vgic_set_common_attr(dev, attr); + default: + return -ENXIO; + } + default: + return -ENXIO; + } + +} + +static int vgic_v5_get_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: + return -ENXIO; + case KVM_DEV_ARM_VGIC_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_ARM_VGIC_CTRL_INIT: + return vgic_get_common_attr(dev, attr); + default: + return -ENXIO; + } + default: + return -ENXIO; + } +} + +static int vgic_v5_has_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: + return -ENXIO; + case KVM_DEV_ARM_VGIC_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_ARM_VGIC_CTRL_INIT: + return 0; + default: + return -ENXIO; + } + default: + return -ENXIO; + } +} + +struct kvm_device_ops kvm_arm_vgic_v5_ops = { + .name = "kvm-arm-vgic-v5", + .create = vgic_create, + .destroy = vgic_destroy, + .set_attr = vgic_v5_set_attr, + .get_attr = vgic_v5_get_attr, + .has_attr = vgic_v5_has_attr, +}; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6b76e7a6f4c2..779d9ed85cbf 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2366,6 +2366,7 @@ void kvm_unregister_device_ops(u32 type); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_arm_vgic_v2_ops; extern struct kvm_device_ops kvm_arm_vgic_v3_ops; +extern struct kvm_device_ops kvm_arm_vgic_v5_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT -- cgit v1.2.3 From d51c978b7d3e143381f871d28d8a0437d446b51b Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Thu, 19 Mar 2026 15:59:50 +0000 Subject: KVM: arm64: gic-v5: Communicate userspace-driveable PPIs via a UAPI GICv5 systems will likely not support the full set of PPIs. The presence of any virtual PPI is tied to the presence of the physical PPI. Therefore, the available PPIs will be limited by the physical host. Userspace cannot drive any PPIs that are not implemented. Moreover, it is not desirable to expose all PPIs to the guest in the first place, even if they are supported in hardware. Some devices, such as the arch timer, are implemented in KVM, and hence those PPIs shouldn't be driven by userspace, either. Provided a new UAPI: KVM_DEV_ARM_VGIC_GRP_CTRL => KVM_DEV_ARM_VGIC_USERPSPACE_PPIs This allows userspace to query which PPIs it is able to drive via KVM_IRQ_LINE. Additionally, introduce a check in kvm_vm_ioctl_irq_line() to reject any PPIs not in the userspace mask. Signed-off-by: Sascha Bischoff Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20260319154937.3619520-40-sascha.bischoff@arm.com Signed-off-by: Marc Zyngier --- Documentation/virt/kvm/devices/arm-vgic-v5.rst | 13 +++++++++++ arch/arm64/include/uapi/asm/kvm.h | 1 + arch/arm64/kvm/arm.c | 11 ++++++++- arch/arm64/kvm/vgic/vgic-kvm-device.c | 31 ++++++++++++++++++++++++++ arch/arm64/kvm/vgic/vgic-v5.c | 10 +++++++++ include/kvm/arm_vgic.h | 3 +++ tools/arch/arm64/include/uapi/asm/kvm.h | 1 + 7 files changed, 69 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/virt/kvm/devices/arm-vgic-v5.rst b/Documentation/virt/kvm/devices/arm-vgic-v5.rst index 9904cb888277..29335ea823fc 100644 --- a/Documentation/virt/kvm/devices/arm-vgic-v5.rst +++ b/Documentation/virt/kvm/devices/arm-vgic-v5.rst @@ -25,6 +25,19 @@ Groups: request the initialization of the VGIC, no additional parameter in kvm_device_attr.addr. Must be called after all VCPUs have been created. + KVM_DEV_ARM_VGIC_USERPSPACE_PPIs + request the mask of userspace-drivable PPIs. Only a subset of the PPIs can + be directly driven from userspace with GICv5, and the returned mask + informs userspace of which it is allowed to drive via KVM_IRQ_LINE. + + Userspace must allocate and point to __u64[2] of data in + kvm_device_attr.addr. When this call returns, the provided memory will be + populated with the userspace PPI mask. The lower __u64 contains the mask + for the lower 64 PPIS, with the remaining 64 being in the second __u64. + + This is a read-only attribute, and cannot be set. Attempts to set it are + rejected. + Errors: ======= ======================================================== diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index a792a599b9d6..1c13bfa2d38a 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -428,6 +428,7 @@ enum { #define KVM_DEV_ARM_ITS_RESTORE_TABLES 2 #define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3 #define KVM_DEV_ARM_ITS_CTRL_RESET 4 +#define KVM_DEV_ARM_VGIC_USERSPACE_PPIS 5 /* Device Control API on vcpu fd */ #define KVM_ARM_VCPU_PMU_V3_CTRL 0 diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index cb22bed9c85d..36410f7cd2ad 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1449,10 +1449,11 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level) int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, bool line_status) { - u32 irq = irq_level->irq; unsigned int irq_type, vcpu_id, irq_num; struct kvm_vcpu *vcpu = NULL; bool level = irq_level->level; + u32 irq = irq_level->irq; + unsigned long *mask; irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK; vcpu_id = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK; @@ -1486,6 +1487,14 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, if (irq_num >= VGIC_V5_NR_PRIVATE_IRQS) return -EINVAL; + /* + * Only allow PPIs that are explicitly exposed to + * usespace to be driven via KVM_IRQ_LINE + */ + mask = kvm->arch.vgic.gicv5_vm.userspace_ppis; + if (!test_bit(irq_num, mask)) + return -EINVAL; + /* Build a GICv5-style IntID here */ irq_num = vgic_v5_make_ppi(irq_num); } else if (irq_num < VGIC_NR_SGIS || diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index 772da54c1518..a96c77dccf35 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -720,6 +720,32 @@ struct kvm_device_ops kvm_arm_vgic_v3_ops = { .has_attr = vgic_v3_has_attr, }; +static int vgic_v5_get_userspace_ppis(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + struct vgic_v5_vm *gicv5_vm = &dev->kvm->arch.vgic.gicv5_vm; + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + int ret; + + guard(mutex)(&dev->kvm->arch.config_lock); + + /* + * We either support 64 or 128 PPIs. In the former case, we need to + * return 0s for the second 64 bits as we have no storage backing those. + */ + ret = put_user(bitmap_read(gicv5_vm->userspace_ppis, 0, 64), uaddr); + if (ret) + return ret; + uaddr++; + + if (VGIC_V5_NR_PRIVATE_IRQS == 128) + ret = put_user(bitmap_read(gicv5_vm->userspace_ppis, 64, 128), uaddr); + else + ret = put_user(0, uaddr); + + return ret; +} + static int vgic_v5_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { @@ -732,6 +758,7 @@ static int vgic_v5_set_attr(struct kvm_device *dev, switch (attr->attr) { case KVM_DEV_ARM_VGIC_CTRL_INIT: return vgic_set_common_attr(dev, attr); + case KVM_DEV_ARM_VGIC_USERSPACE_PPIS: default: return -ENXIO; } @@ -753,6 +780,8 @@ static int vgic_v5_get_attr(struct kvm_device *dev, switch (attr->attr) { case KVM_DEV_ARM_VGIC_CTRL_INIT: return vgic_get_common_attr(dev, attr); + case KVM_DEV_ARM_VGIC_USERSPACE_PPIS: + return vgic_v5_get_userspace_ppis(dev, attr); default: return -ENXIO; } @@ -773,6 +802,8 @@ static int vgic_v5_has_attr(struct kvm_device *dev, switch (attr->attr) { case KVM_DEV_ARM_VGIC_CTRL_INIT: return 0; + case KVM_DEV_ARM_VGIC_USERSPACE_PPIS: + return 0; default: return -ENXIO; } diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c index f7a24ea6ad78..2b6cd5c3f9c2 100644 --- a/arch/arm64/kvm/vgic/vgic-v5.c +++ b/arch/arm64/kvm/vgic/vgic-v5.c @@ -143,6 +143,16 @@ int vgic_v5_init(struct kvm *kvm) } } + /* We only allow userspace to drive the SW_PPI, if it is implemented. */ + bitmap_zero(kvm->arch.vgic.gicv5_vm.userspace_ppis, + VGIC_V5_NR_PRIVATE_IRQS); + __assign_bit(GICV5_ARCH_PPI_SW_PPI, + kvm->arch.vgic.gicv5_vm.userspace_ppis, + VGIC_V5_NR_PRIVATE_IRQS); + bitmap_and(kvm->arch.vgic.gicv5_vm.userspace_ppis, + kvm->arch.vgic.gicv5_vm.userspace_ppis, + ppi_caps.impl_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS); + return 0; } diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 8cc3a7b4d815..1388dc6028a9 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -350,6 +350,9 @@ struct vgic_v5_vm { */ DECLARE_BITMAP(vgic_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS); + /* A mask of the PPIs that are exposed for userspace to drive. */ + DECLARE_BITMAP(userspace_ppis, VGIC_V5_NR_PRIVATE_IRQS); + /* * The HMR itself is handled by the hardware, but we still need to have * a mask that we can use when merging in pending state (only the state diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index a792a599b9d6..1c13bfa2d38a 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -428,6 +428,7 @@ enum { #define KVM_DEV_ARM_ITS_RESTORE_TABLES 2 #define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3 #define KVM_DEV_ARM_ITS_CTRL_RESET 4 +#define KVM_DEV_ARM_VGIC_USERSPACE_PPIS 5 /* Device Control API on vcpu fd */ #define KVM_ARM_VCPU_PMU_V3_CTRL 0 -- cgit v1.2.3