From cd097df4596f3a1e9d75eb8520162de1eb8485b2 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Tue, 10 Jun 2025 07:42:26 +0530 Subject: powerpc/powernv/memtrace: Fix out of bounds issue in memtrace mmap memtrace mmap issue has an out of bounds issue. This patch fixes the by checking that the requested mapping region size should stay within the allocated region size. Reported-by: Jonathan Greental Fixes: 08a022ad3dfa ("powerpc/powernv/memtrace: Allow mmaping trace buffers") Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250610021227.361980-1-maddy@linux.ibm.com --- arch/powerpc/platforms/powernv/memtrace.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 4ac9808e55a4..2ea30b343354 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -48,11 +48,15 @@ static ssize_t memtrace_read(struct file *filp, char __user *ubuf, static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma) { struct memtrace_entry *ent = filp->private_data; + unsigned long ent_nrpages = ent->size >> PAGE_SHIFT; + unsigned long vma_nrpages = vma_pages(vma); - if (ent->size < vma->vm_end - vma->vm_start) + /* The requested page offset should be within object's page count */ + if (vma->vm_pgoff >= ent_nrpages) return -EINVAL; - if (vma->vm_pgoff << PAGE_SHIFT >= ent->size) + /* The requested mapping range should remain within the bounds */ + if (vma_nrpages > ent_nrpages - vma->vm_pgoff) return -EINVAL; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -- cgit v1.2.3 From 0d67f0dee6c9176bc09a5482dd7346e3a0f14d0b Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Tue, 10 Jun 2025 07:42:27 +0530 Subject: powerpc/vas: Return -EINVAL if the offset is non-zero in mmap() The user space calls mmap() to map VAS window paste address and the kernel returns the complete mapped page for each window. So return -EINVAL if non-zero is passed for offset parameter to mmap(). See Documentation/arch/powerpc/vas-api.rst for mmap() restrictions. Co-developed-by: Jonathan Greental Signed-off-by: Jonathan Greental Reported-by: Jonathan Greental Fixes: dda44eb29c23 ("powerpc/vas: Add VAS user space API") Signed-off-by: Haren Myneni Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250610021227.361980-2-maddy@linux.ibm.com --- arch/powerpc/platforms/book3s/vas-api.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index 0b6365d85d11..dc6f75d3ac6e 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -521,6 +521,15 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) return -EINVAL; } + /* + * Map complete page to the paste address. So the user + * space should pass 0ULL to the offset parameter. + */ + if (vma->vm_pgoff) { + pr_debug("Page offset unsupported to map paste address\n"); + return -EINVAL; + } + /* Ensure instance has an open send window */ if (!txwin) { pr_err("No send window open?\n"); -- cgit v1.2.3 From aef17cb3d3c43854002956f24c24ec8e1a0e3546 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 10 Jun 2025 10:22:15 -0700 Subject: Revert "mm/damon/Kconfig: enable CONFIG_DAMON by default" This reverts commit 28615e6eed152f2fda5486680090b74aeed7b554. No, we don't make random features default to being on. Reported-by: Geert Uytterhoeven Cc: Andrew Morton Cc: SeongJae Park Signed-off-by: Linus Torvalds --- mm/damon/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/damon/Kconfig b/mm/damon/Kconfig index 551745df011b..c93d0c56b963 100644 --- a/mm/damon/Kconfig +++ b/mm/damon/Kconfig @@ -4,7 +4,6 @@ menu "Data Access Monitoring" config DAMON bool "DAMON: Data Access Monitoring Framework" - default y help This builds a framework that allows kernel subsystems to monitor access frequency of each memory region. The information can be useful -- cgit v1.2.3 From 488ef3560196ee10fc1c5547e1574a87068c3494 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 28 May 2025 13:18:17 +0100 Subject: KEYS: Invert FINAL_PUT bit Invert the FINAL_PUT bit so that test_bit_acquire and clear_bit_unlock can be used instead of smp_mb. Signed-off-by: Herbert Xu Signed-off-by: David Howells Reviewed-by: Jarkko Sakkinen Signed-off-by: Linus Torvalds --- include/linux/key.h | 2 +- security/keys/gc.c | 4 ++-- security/keys/key.c | 5 +++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/linux/key.h b/include/linux/key.h index ba05de8579ec..81b8f05c6898 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -236,7 +236,7 @@ struct key { #define KEY_FLAG_ROOT_CAN_INVAL 7 /* set if key can be invalidated by root without permission */ #define KEY_FLAG_KEEP 8 /* set if key should not be removed */ #define KEY_FLAG_UID_KEYRING 9 /* set if key is a user or user session keyring */ -#define KEY_FLAG_FINAL_PUT 10 /* set if final put has happened on key */ +#define KEY_FLAG_USER_ALIVE 10 /* set if final put has not happened on key yet */ /* the key type and key description string * - the desc is used to match a key against search criteria diff --git a/security/keys/gc.c b/security/keys/gc.c index f27223ea4578..748e83818a76 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -218,8 +218,8 @@ continue_scanning: key = rb_entry(cursor, struct key, serial_node); cursor = rb_next(cursor); - if (test_bit(KEY_FLAG_FINAL_PUT, &key->flags)) { - smp_mb(); /* Clobber key->user after FINAL_PUT seen. */ + if (!test_bit_acquire(KEY_FLAG_USER_ALIVE, &key->flags)) { + /* Clobber key->user after final put seen. */ goto found_unreferenced_key; } diff --git a/security/keys/key.c b/security/keys/key.c index 7198cd2ac3a3..3bbdde778631 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -298,6 +298,7 @@ struct key *key_alloc(struct key_type *type, const char *desc, key->restrict_link = restrict_link; key->last_used_at = ktime_get_real_seconds(); + key->flags |= 1 << KEY_FLAG_USER_ALIVE; if (!(flags & KEY_ALLOC_NOT_IN_QUOTA)) key->flags |= 1 << KEY_FLAG_IN_QUOTA; if (flags & KEY_ALLOC_BUILT_IN) @@ -658,8 +659,8 @@ void key_put(struct key *key) key->user->qnbytes -= key->quotalen; spin_unlock_irqrestore(&key->user->lock, flags); } - smp_mb(); /* key->user before FINAL_PUT set. */ - set_bit(KEY_FLAG_FINAL_PUT, &key->flags); + /* Mark key as safe for GC after key->user done. */ + clear_bit_unlock(KEY_FLAG_USER_ALIVE, &key->flags); schedule_work(&key_gc_work); } } -- cgit v1.2.3 From 47096d301e39f96962ca1fd6c7b71bfa796c53db Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 11 Jun 2025 11:05:16 -1000 Subject: sched_ext: Update mailing list entry in MAINTAINERS Use sched-ext@lists.linux.dev instead of LKML. Signed-off-by: Tejun Heo --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa16..8841f24d408b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22171,7 +22171,7 @@ R: Tejun Heo R: David Vernet R: Andrea Righi R: Changwoo Min -L: linux-kernel@vger.kernel.org +L: sched-ext@lists.linux.dev S: Maintained W: https://github.com/sched-ext/scx T: git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext.git -- cgit v1.2.3 From c50784e99f0e7199cdb12dbddf02229b102744ef Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Jun 2025 13:23:07 -1000 Subject: sched_ext: Make scx_group_set_weight() always update tg->scx.weight Otherwise, tg->scx.weight can go out of sync while scx_cgroup is not enabled and ops.cgroup_init() may be called with a stale weight value. Signed-off-by: Tejun Heo Fixes: 819513666966 ("sched_ext: Add cgroup support") Cc: stable@vger.kernel.org # v6.12+ --- kernel/sched/ext.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 2c41c78be61e..33a0d8c6ff95 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4241,12 +4241,12 @@ void scx_group_set_weight(struct task_group *tg, unsigned long weight) percpu_down_read(&scx_cgroup_rwsem); - if (scx_cgroup_enabled && tg->scx_weight != weight) { - if (SCX_HAS_OP(sch, cgroup_set_weight)) - SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_set_weight, NULL, - tg_cgrp(tg), weight); - tg->scx_weight = weight; - } + if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_set_weight) && + tg->scx_weight != weight) + SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_set_weight, NULL, + tg_cgrp(tg), weight); + + tg->scx_weight = weight; percpu_up_read(&scx_cgroup_rwsem); } -- cgit v1.2.3 From 33796b91871ad4010c8188372dd1faf97cf0f1c0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 16 Jun 2025 10:13:25 -1000 Subject: sched_ext, sched/core: Don't call scx_group_set_weight() prematurely from sched_create_group() During task_group creation, sched_create_group() calls scx_group_set_weight() with CGROUP_WEIGHT_DFL to initialize the sched_ext portion. This is premature and ends up calling ops.cgroup_set_weight() with an incorrect @cgrp before ops.cgroup_init() is called. sched_create_group() should just initialize SCX related fields in the new task_group. Fix it by factoring out scx_tg_init() from sched_init() and making sched_create_group() call that function instead of scx_group_set_weight(). v2: Retain CONFIG_EXT_GROUP_SCHED ifdef in sched_init() as removing it leads to build failures on !CONFIG_GROUP_SCHED configs. Signed-off-by: Tejun Heo Fixes: 819513666966 ("sched_ext: Add cgroup support") Cc: stable@vger.kernel.org # v6.12+ --- kernel/sched/core.c | 4 ++-- kernel/sched/ext.c | 5 +++++ kernel/sched/ext.h | 2 ++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index dce50fa57471..8988d38d46a3 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8545,7 +8545,7 @@ void __init sched_init(void) init_cfs_bandwidth(&root_task_group.cfs_bandwidth, NULL); #endif /* CONFIG_FAIR_GROUP_SCHED */ #ifdef CONFIG_EXT_GROUP_SCHED - root_task_group.scx_weight = CGROUP_WEIGHT_DFL; + scx_tg_init(&root_task_group); #endif /* CONFIG_EXT_GROUP_SCHED */ #ifdef CONFIG_RT_GROUP_SCHED root_task_group.rt_se = (struct sched_rt_entity **)ptr; @@ -8985,7 +8985,7 @@ struct task_group *sched_create_group(struct task_group *parent) if (!alloc_rt_sched_group(tg, parent)) goto err; - scx_group_set_weight(tg, CGROUP_WEIGHT_DFL); + scx_tg_init(tg); alloc_uclamp_sched_group(tg, parent); return tg; diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 33a0d8c6ff95..b498d867ba21 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4092,6 +4092,11 @@ bool scx_can_stop_tick(struct rq *rq) DEFINE_STATIC_PERCPU_RWSEM(scx_cgroup_rwsem); static bool scx_cgroup_enabled; +void scx_tg_init(struct task_group *tg) +{ + tg->scx_weight = CGROUP_WEIGHT_DFL; +} + int scx_tg_online(struct task_group *tg) { struct scx_sched *sch = scx_root; diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h index 6e5072f57771..a75835c23f15 100644 --- a/kernel/sched/ext.h +++ b/kernel/sched/ext.h @@ -79,6 +79,7 @@ static inline void scx_update_idle(struct rq *rq, bool idle, bool do_notify) {} #ifdef CONFIG_CGROUP_SCHED #ifdef CONFIG_EXT_GROUP_SCHED +void scx_tg_init(struct task_group *tg); int scx_tg_online(struct task_group *tg); void scx_tg_offline(struct task_group *tg); int scx_cgroup_can_attach(struct cgroup_taskset *tset); @@ -88,6 +89,7 @@ void scx_cgroup_cancel_attach(struct cgroup_taskset *tset); void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight); void scx_group_set_idle(struct task_group *tg, bool idle); #else /* CONFIG_EXT_GROUP_SCHED */ +static inline void scx_tg_init(struct task_group *tg) {} static inline int scx_tg_online(struct task_group *tg) { return 0; } static inline void scx_tg_offline(struct task_group *tg) {} static inline int scx_cgroup_can_attach(struct cgroup_taskset *tset) { return 0; } -- cgit v1.2.3