From 501302d5cee0d8e8ec2c4a5919c37e0df9abc99b Mon Sep 17 00:00:00 2001 From: Xiao Liang Date: Sun, 17 Aug 2025 00:30:15 +0800 Subject: padata: Reset next CPU when reorder sequence wraps around When seq_nr wraps around, the next reorder job with seq 0 is hashed to the first CPU in padata_do_serial(). Correspondingly, need reset pd->cpu to the first one when pd->processed wraps around. Otherwise, if the number of used CPUs is not a power of 2, padata_find_next() will be checking a wrong list, hence deadlock. Fixes: 6fc4dbcf0276 ("padata: Replace delayed timer with immediate workqueue in padata_reorder") Cc: Signed-off-by: Xiao Liang Signed-off-by: Herbert Xu --- kernel/padata.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel/padata.c') diff --git a/kernel/padata.c b/kernel/padata.c index f85f8bd788d0..833740d75483 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -291,8 +291,12 @@ static void padata_reorder(struct padata_priv *padata) struct padata_serial_queue *squeue; int cb_cpu; - cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu); processed++; + /* When sequence wraps around, reset to the first CPU. */ + if (unlikely(processed == 0)) + cpu = cpumask_first(pd->cpumask.pcpu); + else + cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu); cb_cpu = padata->cb_cpu; squeue = per_cpu_ptr(pd->squeue, cb_cpu); -- cgit v1.2.3 From b6d02e0e41aa87aaa46b9b995e71a585284c5d27 Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Fri, 5 Sep 2025 11:05:32 +0200 Subject: padata: replace use of system_unbound_wq with system_dfl_wq Currently if a user enqueue a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistentcy cannot be addressed without refactoring the API. system_unbound_wq should be the default workqueue so as not to enforce locality constraints for random work whenever it's not required. Adding system_dfl_wq to encourage its use when unbound work should be used. queue_work() / queue_delayed_work() / mod_delayed_work() will now use the new unbound wq: whether the user still use the old wq a warn will be printed along with a wq redirect to the new one. The old system_unbound_wq will be kept for a few release cycles. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Signed-off-by: Herbert Xu --- kernel/padata.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/padata.c') diff --git a/kernel/padata.c b/kernel/padata.c index 833740d75483..d7cc95367209 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -490,9 +490,9 @@ void __init padata_do_multithreaded(struct padata_mt_job *job) do { nid = next_node_in(old_node, node_states[N_CPU]); } while (!atomic_try_cmpxchg(&last_used_nid, &old_node, nid)); - queue_work_node(nid, system_unbound_wq, &pw->pw_work); + queue_work_node(nid, system_dfl_wq, &pw->pw_work); } else { - queue_work(system_unbound_wq, &pw->pw_work); + queue_work(system_dfl_wq, &pw->pw_work); } /* Use the current thread, which saves starting a workqueue worker. */ -- cgit v1.2.3 From 4fcd322914068f2c6aec7bb5cfd12ce0207b3a21 Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Fri, 5 Sep 2025 11:05:33 +0200 Subject: padata: WQ_PERCPU added to alloc_workqueue users MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently if a user enqueue a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistentcy cannot be addressed without refactoring the API. alloc_workqueue() treats all queues as per-CPU by default, while unbound workqueues must opt-in via WQ_UNBOUND. This default is suboptimal: most workloads benefit from unbound queues, allowing the scheduler to place worker threads where they’re needed and reducing noise when CPUs are isolated. This default is suboptimal: most workloads benefit from unbound queues, allowing the scheduler to place worker threads where they’re needed and reducing noise when CPUs are isolated. This patch adds a new WQ_PERCPU flag to explicitly request the use of the per-CPU behavior. Both flags coexist for one release cycle to allow callers to transition their calls. Once migration is complete, WQ_UNBOUND can be removed and unbound will become the implicit default. With the introduction of the WQ_PERCPU flag (equivalent to !WQ_UNBOUND), any alloc_workqueue() caller that doesn’t explicitly specify WQ_UNBOUND must now use WQ_PERCPU. All existing users have been updated accordingly. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Signed-off-by: Herbert Xu --- kernel/padata.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel/padata.c') diff --git a/kernel/padata.c b/kernel/padata.c index d7cc95367209..f4def028c48c 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -967,8 +967,9 @@ struct padata_instance *padata_alloc(const char *name) cpus_read_lock(); - pinst->serial_wq = alloc_workqueue("%s_serial", WQ_MEM_RECLAIM | - WQ_CPU_INTENSIVE, 1, name); + pinst->serial_wq = alloc_workqueue("%s_serial", + WQ_MEM_RECLAIM | WQ_CPU_INTENSIVE | WQ_PERCPU, + 1, name); if (!pinst->serial_wq) goto err_put_cpus; -- cgit v1.2.3