diff options
| author | Suren Baghdasaryan <surenb@google.com> | 2025-08-08 08:28:49 -0700 |
|---|---|---|
| committer | Andrew Morton <akpm@linux-foundation.org> | 2025-09-13 16:54:48 -0700 |
| commit | d9d1c2d81797250270babcf108d990e3a4799a9d (patch) | |
| tree | ce5332c779b7276e2eec9cbae454679db4099f01 /fs/proc | |
| parent | fs/proc/task_mmu: factor out proc_maps_private fields used by PROCMAP_QUERY (diff) | |
| download | linux-d9d1c2d81797250270babcf108d990e3a4799a9d.tar.gz linux-d9d1c2d81797250270babcf108d990e3a4799a9d.zip | |
fs/proc/task_mmu: execute PROCMAP_QUERY ioctl under per-vma locks
Utilize per-vma locks to stabilize vma after lookup without taking
mmap_lock during PROCMAP_QUERY ioctl execution. If vma lock is
contended, we fall back to mmap_lock but take it only momentarily
to lock the vma and release the mmap_lock. In a very unlikely case
of vm_refcnt overflow, this fall back path will fail and ioctl is
done under mmap_lock protection.
This change is designed to reduce mmap_lock contention and prevent
PROCMAP_QUERY ioctl calls from blocking address space updates.
Link: https://lkml.kernel.org/r/20250808152850.2580887-4-surenb@google.com
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Acked-by: SeongJae Park <sj@kernel.org>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: "Paul E . McKenney" <paulmck@kernel.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Thomas Weißschuh <linux@weissschuh.net>
Cc: T.J. Mercier <tjmercier@google.com>
Cc: Ye Bin <yebin10@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'fs/proc')
| -rw-r--r-- | fs/proc/task_mmu.c | 103 |
1 files changed, 85 insertions, 18 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index c0968d293b61..e64cf40ce9c4 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -132,6 +132,12 @@ static void release_task_mempolicy(struct proc_maps_private *priv) #ifdef CONFIG_PER_VMA_LOCK +static void reset_lock_ctx(struct proc_maps_locking_ctx *lock_ctx) +{ + lock_ctx->locked_vma = NULL; + lock_ctx->mmap_locked = false; +} + static void unlock_ctx_vma(struct proc_maps_locking_ctx *lock_ctx) { if (lock_ctx->locked_vma) { @@ -157,8 +163,7 @@ static inline bool lock_vma_range(struct seq_file *m, lock_ctx->mmap_locked = true; } else { rcu_read_lock(); - lock_ctx->locked_vma = NULL; - lock_ctx->mmap_locked = false; + reset_lock_ctx(lock_ctx); } return true; @@ -522,28 +527,90 @@ static int pid_maps_open(struct inode *inode, struct file *file) PROCMAP_QUERY_VMA_FLAGS \ ) -static int query_vma_setup(struct mm_struct *mm) +#ifdef CONFIG_PER_VMA_LOCK + +static int query_vma_setup(struct proc_maps_locking_ctx *lock_ctx) { - return mmap_read_lock_killable(mm); + reset_lock_ctx(lock_ctx); + + return 0; } -static void query_vma_teardown(struct mm_struct *mm, struct vm_area_struct *vma) +static void query_vma_teardown(struct proc_maps_locking_ctx *lock_ctx) { - mmap_read_unlock(mm); + if (lock_ctx->mmap_locked) { + mmap_read_unlock(lock_ctx->mm); + lock_ctx->mmap_locked = false; + } else { + unlock_ctx_vma(lock_ctx); + } +} + +static struct vm_area_struct *query_vma_find_by_addr(struct proc_maps_locking_ctx *lock_ctx, + unsigned long addr) +{ + struct mm_struct *mm = lock_ctx->mm; + struct vm_area_struct *vma; + struct vma_iterator vmi; + + if (lock_ctx->mmap_locked) + return find_vma(mm, addr); + + /* Unlock previously locked VMA and find the next one under RCU */ + unlock_ctx_vma(lock_ctx); + rcu_read_lock(); + vma_iter_init(&vmi, mm, addr); + vma = lock_next_vma(mm, &vmi, addr); + rcu_read_unlock(); + + if (!vma) + return NULL; + + if (!IS_ERR(vma)) { + lock_ctx->locked_vma = vma; + return vma; + } + + if (PTR_ERR(vma) == -EAGAIN) { + /* Fallback to mmap_lock on vma->vm_refcnt overflow */ + mmap_read_lock(mm); + vma = find_vma(mm, addr); + lock_ctx->mmap_locked = true; + } + + return vma; } -static struct vm_area_struct *query_vma_find_by_addr(struct mm_struct *mm, unsigned long addr) +#else /* CONFIG_PER_VMA_LOCK */ + +static int query_vma_setup(struct proc_maps_locking_ctx *lock_ctx) +{ + return mmap_read_lock_killable(lock_ctx->mm); +} + +static void query_vma_teardown(struct proc_maps_locking_ctx *lock_ctx) +{ + mmap_read_unlock(lock_ctx->mm); +} + +static struct vm_area_struct *query_vma_find_by_addr(struct proc_maps_locking_ctx *lock_ctx, + unsigned long addr) { - return find_vma(mm, addr); + return find_vma(lock_ctx->mm, addr); } -static struct vm_area_struct *query_matching_vma(struct mm_struct *mm, +#endif /* CONFIG_PER_VMA_LOCK */ + +static struct vm_area_struct *query_matching_vma(struct proc_maps_locking_ctx *lock_ctx, unsigned long addr, u32 flags) { struct vm_area_struct *vma; next_vma: - vma = query_vma_find_by_addr(mm, addr); + vma = query_vma_find_by_addr(lock_ctx, addr); + if (IS_ERR(vma)) + return vma; + if (!vma) goto no_vma; @@ -584,11 +651,11 @@ no_vma: return ERR_PTR(-ENOENT); } -static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg) +static int do_procmap_query(struct mm_struct *mm, void __user *uarg) { + struct proc_maps_locking_ctx lock_ctx = { .mm = mm }; struct procmap_query karg; struct vm_area_struct *vma; - struct mm_struct *mm; const char *name = NULL; char build_id_buf[BUILD_ID_SIZE_MAX], *name_buf = NULL; __u64 usize; @@ -615,17 +682,16 @@ static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg) if (!!karg.build_id_size != !!karg.build_id_addr) return -EINVAL; - mm = priv->lock_ctx.mm; if (!mm || !mmget_not_zero(mm)) return -ESRCH; - err = query_vma_setup(mm); + err = query_vma_setup(&lock_ctx); if (err) { mmput(mm); return err; } - vma = query_matching_vma(mm, karg.query_addr, karg.query_flags); + vma = query_matching_vma(&lock_ctx, karg.query_addr, karg.query_flags); if (IS_ERR(vma)) { err = PTR_ERR(vma); vma = NULL; @@ -710,7 +776,7 @@ static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg) } /* unlock vma or mmap_lock, and put mm_struct before copying data to user */ - query_vma_teardown(mm, vma); + query_vma_teardown(&lock_ctx); mmput(mm); if (karg.vma_name_size && copy_to_user(u64_to_user_ptr(karg.vma_name_addr), @@ -730,7 +796,7 @@ static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg) return 0; out: - query_vma_teardown(mm, vma); + query_vma_teardown(&lock_ctx); mmput(mm); kfree(name_buf); return err; @@ -743,7 +809,8 @@ static long procfs_procmap_ioctl(struct file *file, unsigned int cmd, unsigned l switch (cmd) { case PROCMAP_QUERY: - return do_procmap_query(priv, (void __user *)arg); + /* priv->lock_ctx.mm is set during file open operation */ + return do_procmap_query(priv->lock_ctx.mm, (void __user *)arg); default: return -ENOIOCTLCMD; } |
