From d802057c7c553ad426520a053da9f9fe08e2c35a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 5 Apr 2022 19:50:38 +0100 Subject: genirq/msi: Shutdown managed interrupts with unsatifiable affinities When booting with maxcpus=, interrupt controllers such as the GICv3 ITS may not be able to satisfy the affinity of some managed interrupts, as some of the HW resources are simply not available. The same thing happens when loading a driver using managed interrupts while CPUs are offline. In order to deal with this, do not try to activate such interrupt if there is no online CPU capable of handling it. Instead, place it in shutdown state. Once a capable CPU shows up, it will be activated. Reported-by: John Garry Reported-by: David Decotigny Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: John Garry Link: https://lore.kernel.org/r/20220405185040.206297-2-maz@kernel.org --- kernel/irq/msi.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'kernel') diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 2bdfce5edafd..a9ee535293eb 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -818,6 +818,21 @@ static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflag irqd_clr_can_reserve(irqd); if (vflags & VIRQ_NOMASK_QUIRK) irqd_set_msi_nomask_quirk(irqd); + + /* + * If the interrupt is managed but no CPU is available to + * service it, shut it down until better times. Note that + * we only do this on the !RESERVE path as x86 (the only + * architecture using this flag) deals with this in a + * different way by using a catch-all vector. + */ + if ((vflags & VIRQ_ACTIVATE) && + irqd_affinity_is_managed(irqd) && + !cpumask_intersects(irq_data_get_affinity_mask(irqd), + cpu_online_mask)) { + irqd_set_managed_shutdown(irqd); + return 0; + } } if (!(vflags & VIRQ_ACTIVATE)) -- cgit v1.2.3 From 33de0aa4bae982ed6f7c777f86b5af3e627ac937 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 5 Apr 2022 19:50:39 +0100 Subject: genirq: Always limit the affinity to online CPUs When booting with maxcpus= (or even loading a driver while most CPUs are offline), it is pretty easy to observe managed affinities containing a mix of online and offline CPUs being passed to the irqchip driver. This means that the irqchip cannot trust the affinity passed down from the core code, which is a bit annoying and requires (at least in theory) all drivers to implement some sort of affinity narrowing. In order to address this, always limit the cpumask to the set of online CPUs. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220405185040.206297-3-maz@kernel.org --- kernel/irq/manage.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index c03f71d5ec10..f71ecc100545 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -222,11 +222,16 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, { struct irq_desc *desc = irq_data_to_desc(data); struct irq_chip *chip = irq_data_get_irq_chip(data); + const struct cpumask *prog_mask; int ret; + static DEFINE_RAW_SPINLOCK(tmp_mask_lock); + static struct cpumask tmp_mask; + if (!chip || !chip->irq_set_affinity) return -EINVAL; + raw_spin_lock(&tmp_mask_lock); /* * If this is a managed interrupt and housekeeping is enabled on * it check whether the requested affinity mask intersects with @@ -248,24 +253,28 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, */ if (irqd_affinity_is_managed(data) && housekeeping_enabled(HK_TYPE_MANAGED_IRQ)) { - const struct cpumask *hk_mask, *prog_mask; - - static DEFINE_RAW_SPINLOCK(tmp_mask_lock); - static struct cpumask tmp_mask; + const struct cpumask *hk_mask; hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ); - raw_spin_lock(&tmp_mask_lock); cpumask_and(&tmp_mask, mask, hk_mask); if (!cpumask_intersects(&tmp_mask, cpu_online_mask)) prog_mask = mask; else prog_mask = &tmp_mask; - ret = chip->irq_set_affinity(data, prog_mask, force); - raw_spin_unlock(&tmp_mask_lock); } else { - ret = chip->irq_set_affinity(data, mask, force); + prog_mask = mask; } + + /* Make sure we only provide online CPUs to the irqchip */ + cpumask_and(&tmp_mask, prog_mask, cpu_online_mask); + if (!cpumask_empty(&tmp_mask)) + ret = chip->irq_set_affinity(data, &tmp_mask, force); + else + ret = -EINVAL; + + raw_spin_unlock(&tmp_mask_lock); + switch (ret) { case IRQ_SET_MASK_OK: case IRQ_SET_MASK_OK_DONE: -- cgit v1.2.3 From 911488de0565f1d53bd36174d20917ebc4b44c0e Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 10 Feb 2022 14:49:05 -0800 Subject: genirq/affinity: Replace cpumask_weight() with cpumask_empty() where appropriate __irq_build_affinity_masks() calls cpumask_weight() to check if any bit of a given cpumask is set. This can be done more efficiently with cpumask_empty() because cpumask_empty() stops traversing the cpumask as soon as it finds first set bit, while cpumask_weight() counts all bits unconditionally. Signed-off-by: Yury Norov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220210224933.379149-22-yury.norov@gmail.com --- kernel/irq/affinity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index f7ff8919dc9b..18740faf0eb1 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -258,7 +258,7 @@ static int __irq_build_affinity_masks(unsigned int startvec, nodemask_t nodemsk = NODE_MASK_NONE; struct node_vectors *node_vectors; - if (!cpumask_weight(cpu_mask)) + if (cpumask_empty(cpu_mask)) return 0; nodes = get_nodes_in_cpumask(node_to_cpumask, cpu_mask, &nodemsk); -- cgit v1.2.3 From c48c8b829d2b966a6649827426bcdba082ccf922 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 14 Apr 2022 15:00:11 +0100 Subject: genirq: Take the proposed affinity at face value if force==true Although setting the affinity of an interrupt to a set of CPUs that doesn't have any online CPU is generally frowned apon, there are a few limited cases where such affinity is set from a CPUHP notifier, setting the affinity to a CPU that isn't online yet. The saving grace is that this is always done using the 'force' attribute, which gives a hint that the affinity setting can be outside of the online CPU mask and the callsite set this flag with the knowledge that the underlying interrupt controller knows to handle it. This restores the expected behaviour on Marek's system. Fixes: 33de0aa4bae9 ("genirq: Always limit the affinity to online CPUs") Reported-by: Marek Szyprowski Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Marek Szyprowski Link: https://lore.kernel.org/r/4b7fc13c-887b-a664-26e8-45aed13f048a@samsung.com Link: https://lore.kernel.org/r/20220414140011.541725-1-maz@kernel.org --- kernel/irq/manage.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index f71ecc100545..f1d5a94c6c9f 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -266,10 +266,16 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, prog_mask = mask; } - /* Make sure we only provide online CPUs to the irqchip */ + /* + * Make sure we only provide online CPUs to the irqchip, + * unless we are being asked to force the affinity (in which + * case we do as we are told). + */ cpumask_and(&tmp_mask, prog_mask, cpu_online_mask); - if (!cpumask_empty(&tmp_mask)) + if (!force && !cpumask_empty(&tmp_mask)) ret = chip->irq_set_affinity(data, &tmp_mask, force); + else if (force) + ret = chip->irq_set_affinity(data, mask, force); else ret = -EINVAL; -- cgit v1.2.3 From 6c846d026d490b2383d395bc8e7b06336219667b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 19 Apr 2022 15:18:37 +0100 Subject: gpio: Don't fiddle with irqchips marked as immutable In order to move away from gpiolib messing with the internals of unsuspecting irqchips, add a flag by which irqchips advertise that they are not to be messed with, and do solemnly swear that they correctly call into the gpiolib helpers when required. Also nudge the users into converting their drivers to the new model. Reviewed-by: Andy Shevchenko Reviewed-by: Bartosz Golaszewski Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220419141846.598305-2-maz@kernel.org --- drivers/gpio/gpiolib.c | 7 ++++++- include/linux/irq.h | 2 ++ kernel/irq/debugfs.c | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index e59884cc12a7..48191e62a3cc 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1475,6 +1475,11 @@ static void gpiochip_set_irq_hooks(struct gpio_chip *gc) { struct irq_chip *irqchip = gc->irq.chip; + if (irqchip->flags & IRQCHIP_IMMUTABLE) + return; + + chip_warn(gc, "not an immutable chip, please consider fixing it!\n"); + if (!irqchip->irq_request_resources && !irqchip->irq_release_resources) { irqchip->irq_request_resources = gpiochip_irq_reqres; @@ -1633,7 +1638,7 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gc) irq_domain_remove(gc->irq.domain); } - if (irqchip) { + if (irqchip && !(irqchip->flags & IRQCHIP_IMMUTABLE)) { if (irqchip->irq_request_resources == gpiochip_irq_reqres) { irqchip->irq_request_resources = NULL; irqchip->irq_release_resources = NULL; diff --git a/include/linux/irq.h b/include/linux/irq.h index f92788ccdba2..505308253d23 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -569,6 +569,7 @@ struct irq_chip { * IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND: Invokes __enable_irq()/__disable_irq() for wake irqs * in the suspend path if they are in disabled state * IRQCHIP_AFFINITY_PRE_STARTUP: Default affinity update before startup + * IRQCHIP_IMMUTABLE: Don't ever change anything in this chip */ enum { IRQCHIP_SET_TYPE_MASKED = (1 << 0), @@ -582,6 +583,7 @@ enum { IRQCHIP_SUPPORTS_NMI = (1 << 8), IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND = (1 << 9), IRQCHIP_AFFINITY_PRE_STARTUP = (1 << 10), + IRQCHIP_IMMUTABLE = (1 << 11), }; #include diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index 2b43f5f5033d..bc8e40cf2b65 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -58,6 +58,7 @@ static const struct irq_bit_descr irqchip_flags[] = { BIT_MASK_DESCR(IRQCHIP_SUPPORTS_LEVEL_MSI), BIT_MASK_DESCR(IRQCHIP_SUPPORTS_NMI), BIT_MASK_DESCR(IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND), + BIT_MASK_DESCR(IRQCHIP_IMMUTABLE), }; static void -- cgit v1.2.3 From 1adb4d7ad3a585b451f5cf6b0a90c5917af3eac5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 22 Apr 2022 12:04:18 +0100 Subject: genirq/matrix: Remove redundant assignment to variable 'end' Variable end is being initialized with a value that is never read, it is being re-assigned later with the same value. The initialization is redundant and can be removed. Cleans up clang scan build warning: kernel/irq/matrix.c:289:25: warning: Value stored to 'end' during its initialization is never read [deadcode.DeadStores] Signed-off-by: Colin Ian King Signed-off-by: Thomas Gleixner Reviewed-by: Tom Rix Link: https://lore.kernel.org/r/20220422110418.1264778-1-colin.i.king@gmail.com --- kernel/irq/matrix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c index bbfb26489aa1..1698e77645ac 100644 --- a/kernel/irq/matrix.c +++ b/kernel/irq/matrix.c @@ -286,7 +286,7 @@ void irq_matrix_remove_managed(struct irq_matrix *m, const struct cpumask *msk) int irq_matrix_alloc_managed(struct irq_matrix *m, const struct cpumask *msk, unsigned int *mapped_cpu) { - unsigned int bit, cpu, end = m->alloc_end; + unsigned int bit, cpu, end; struct cpumap *cm; if (cpumask_empty(msk)) -- cgit v1.2.3 From ce4818957fdc5bca57fc2c92b0dfe109d26bcc47 Mon Sep 17 00:00:00 2001 From: Minghao Chi Date: Mon, 18 Apr 2022 11:07:16 +0000 Subject: genirq: Use pm_runtime_resume_and_get() instead of pm_runtime_get_sync() pm_runtime_resume_and_get() achieves the same and simplifies the code. [ tglx: Simplify it further by presetting retval ] Reported-by: Zeal Robot Signed-off-by: Minghao Chi Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220418110716.2559453-1-chi.minghao@zte.com.cn --- kernel/irq/chip.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 54af0deb239b..e6b8e564b37f 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -1573,17 +1573,12 @@ static struct device *irq_get_parent_device(struct irq_data *data) int irq_chip_pm_get(struct irq_data *data) { struct device *dev = irq_get_parent_device(data); - int retval; + int retval = 0; - if (IS_ENABLED(CONFIG_PM) && dev) { - retval = pm_runtime_get_sync(dev); - if (retval < 0) { - pm_runtime_put_noidle(dev); - return retval; - } - } + if (IS_ENABLED(CONFIG_PM) && dev) + retval = pm_runtime_resume_and_get(dev); - return 0; + return retval; } /** -- cgit v1.2.3 From 21673fcb2532dcd189905ff5a5389eb7dcd0e57a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 11 May 2022 13:07:50 +0200 Subject: genirq/irq_sim: Make the irq_work always run in hard irq context The IRQ simulator uses irq_work to trigger an interrupt. Without the IRQ_WORK_HARD_IRQ flag the irq_work will be performed in thread context on PREEMPT_RT. This causes locking errors later in handle_simple_irq() which expects to be invoked with disabled interrupts. Triggering individual interrupts in hardirq context should not lead to unexpected high latencies since this is also what the hardware controller does. Also it is used as a simulator so... Use IRQ_WORK_INIT_HARD() to carry out the irq_work in hardirq context on PREEMPT_RT. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/YnuZBoEVMGwKkLm+@linutronix.de --- kernel/irq/irq_sim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/irq/irq_sim.c b/kernel/irq/irq_sim.c index 0cd02efa3a74..dd76323ea3fd 100644 --- a/kernel/irq/irq_sim.c +++ b/kernel/irq/irq_sim.c @@ -181,7 +181,7 @@ struct irq_domain *irq_domain_create_sim(struct fwnode_handle *fwnode, goto err_free_bitmap; work_ctx->irq_count = num_irqs; - init_irq_work(&work_ctx->work, irq_sim_handle_irq); + work_ctx->work = IRQ_WORK_INIT_HARD(irq_sim_handle_irq); return work_ctx->domain; -- cgit v1.2.3