From 62685ab071de7c39499212bff19f1b5bc0148bc7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 1 Oct 2025 15:24:49 +0200 Subject: uprobe: Move arch_uprobe_optimize right after handlers execution It's less confusing to optimize uprobe right after handlers execution and before we do the check for changed ip register to avoid situations where changed ip register would skip uprobe optimization. Suggested-by: Linus Torvalds Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Masami Hiramatsu (Google) Acked-by: Andrii Nakryiko Acked-by: Oleg Nesterov --- kernel/events/uprobes.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 8709c69118b5..f11ceb8be8c4 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -2765,6 +2765,9 @@ static void handle_swbp(struct pt_regs *regs) handler_chain(uprobe, regs); + /* Try to optimize after first hit. */ + arch_uprobe_optimize(&uprobe->arch, bp_vaddr); + /* * If user decided to take execution elsewhere, it makes little sense * to execute the original instruction, so let's skip it. @@ -2772,9 +2775,6 @@ static void handle_swbp(struct pt_regs *regs) if (instruction_pointer(regs) != bp_vaddr) goto out; - /* Try to optimize after first hit. */ - arch_uprobe_optimize(&uprobe->arch, bp_vaddr); - if (arch_uprobe_skip_sstep(&uprobe->arch, regs)) goto out; -- cgit v1.2.3 From ebfc8542ad62d066771e46c8aa30f5624b89cad8 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 13 Oct 2025 10:22:42 +0300 Subject: perf/core: Fix address filter match with backing files It was reported that Intel PT address filters do not work in Docker containers. That relates to the use of overlayfs. overlayfs records the backing file in struct vm_area_struct vm_file, instead of the user file that the user mmapped. In order for an address filter to match, it must compare to the user file inode. There is an existing helper file_user_inode() for that situation. Use file_user_inode() instead of file_inode() to get the inode for address filter matching. Example: Setup: # cd /root # mkdir test ; cd test ; mkdir lower upper work merged # cp `which cat` lower # mount -t overlay overlay -olowerdir=lower,upperdir=upper,workdir=work merged # perf record --buildid-mmap -e intel_pt//u --filter 'filter * @ /root/test/merged/cat' -- /root/test/merged/cat /proc/self/maps ... 55d61d246000-55d61d2e1000 r-xp 00018000 00:1a 3418 /root/test/merged/cat ... [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.015 MB perf.data ] # perf buildid-cache --add /root/test/merged/cat Before: Address filter does not match so there are no control flow packets # perf script --itrace=e # perf script --itrace=b | wc -l 0 # perf script -D | grep 'TIP.PGE' | wc -l 0 # After: Address filter does match so there are control flow packets # perf script --itrace=e # perf script --itrace=b | wc -l 235 # perf script -D | grep 'TIP.PGE' | wc -l 57 # With respect to stable kernels, overlayfs mmap function ovl_mmap() was added in v4.19 but file_user_inode() was not added until v6.8 and never back-ported to stable kernels. FMODE_BACKING that it depends on was added in v6.5. This issue has gone largely unnoticed, so back-porting before v6.8 is probably not worth it, so put 6.8 as the stable kernel prerequisite version, although in practice the next long term kernel is 6.12. Closes: https://lore.kernel.org/linux-perf-users/aBCwoq7w8ohBRQCh@fremen.lan Reported-by: Edd Barrett Signed-off-by: Adrian Hunter Signed-off-by: Peter Zijlstra (Intel) Acked-by: Amir Goldstein Cc: stable@vger.kernel.org # 6.8 --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index 7541f6f85fcb..cd63ec84e386 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9492,7 +9492,7 @@ static bool perf_addr_filter_match(struct perf_addr_filter *filter, if (!filter->path.dentry) return false; - if (d_inode(filter->path.dentry) != file_inode(file)) + if (d_inode(filter->path.dentry) != file_user_inode(file)) return false; if (filter->offset > offset + size) -- cgit v1.2.3 From 8818f507a9391019a3ec7c57b1a32e4b386e48a5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 13 Oct 2025 10:22:43 +0300 Subject: perf/core: Fix MMAP event path names with backing files Some file systems like FUSE-based ones or overlayfs may record the backing file in struct vm_area_struct vm_file, instead of the user file that the user mmapped. Since commit def3ae83da02f ("fs: store real path instead of fake path in backing file f_path"), file_path() no longer returns the user file path when applied to a backing file. There is an existing helper file_user_path() for that situation. Use file_user_path() instead of file_path() to get the path for MMAP and MMAP2 events. Example: Setup: # cd /root # mkdir test ; cd test ; mkdir lower upper work merged # cp `which cat` lower # mount -t overlay overlay -olowerdir=lower,upperdir=upper,workdir=work merged # perf record -e intel_pt//u -- /root/test/merged/cat /proc/self/maps ... 55b0ba399000-55b0ba434000 r-xp 00018000 00:1a 3419 /root/test/merged/cat ... [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.060 MB perf.data ] # Before: File name is wrong (/cat), so decoding fails: # perf script --no-itrace --show-mmap-events cat 367 [016] 100.491492: PERF_RECORD_MMAP2 367/367: [0x55b0ba399000(0x9b000) @ 0x18000 00:02 3419 489959280]: r-xp /cat ... # perf script --itrace=e | wc -l Warning: 19 instruction trace errors 19 # After: File name is correct (/root/test/merged/cat), so decoding is ok: # perf script --no-itrace --show-mmap-events cat 364 [016] 72.153006: PERF_RECORD_MMAP2 364/364: [0x55ce4003d000(0x9b000) @ 0x18000 00:02 3419 3132534314]: r-xp /root/test/merged/cat # perf script --itrace=e # perf script --itrace=e | wc -l 0 # Fixes: def3ae83da02f ("fs: store real path instead of fake path in backing file f_path") Signed-off-by: Adrian Hunter Signed-off-by: Peter Zijlstra (Intel) Acked-by: Amir Goldstein Cc: stable@vger.kernel.org --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index cd63ec84e386..7b5c2373a8d7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9416,7 +9416,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) * need to add enough zero bytes after the string to handle * the 64bit alignment we do later. */ - name = file_path(file, buf, PATH_MAX - sizeof(u64)); + name = d_path(file_user_path(file), buf, PATH_MAX - sizeof(u64)); if (IS_ERR(name)) { name = "//toolong"; goto cpy_name; -- cgit v1.2.3 From fa4f4bae893fbce8a3edfff1ab7ece0c01dc1328 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 13 Oct 2025 10:22:44 +0300 Subject: perf/core: Fix MMAP2 event device with backing files Some file systems like FUSE-based ones or overlayfs may record the backing file in struct vm_area_struct vm_file, instead of the user file that the user mmapped. That causes perf to misreport the device major/minor numbers of the file system of the file, and the generation of the file, and potentially other inode details. There is an existing helper file_user_inode() for that situation. Use file_user_inode() instead of file_inode() to get the inode for MMAP2 events. Example: Setup: # cd /root # mkdir test ; cd test ; mkdir lower upper work merged # cp `which cat` lower # mount -t overlay overlay -olowerdir=lower,upperdir=upper,workdir=work merged # perf record -e cycles:u -- /root/test/merged/cat /proc/self/maps ... 55b2c91d0000-55b2c926b000 r-xp 00018000 00:1a 3419 /root/test/merged/cat ... [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.004 MB perf.data (5 samples) ] # # stat /root/test/merged/cat File: /root/test/merged/cat Size: 1127792 Blocks: 2208 IO Block: 4096 regular file Device: 0,26 Inode: 3419 Links: 1 Access: (0755/-rwxr-xr-x) Uid: ( 0/ root) Gid: ( 0/ root) Access: 2025-09-08 12:23:59.453309624 +0000 Modify: 2025-09-08 12:23:59.454309624 +0000 Change: 2025-09-08 12:23:59.454309624 +0000 Birth: 2025-09-08 12:23:59.453309624 +0000 Before: Device reported 00:02 differs from stat output and /proc/self/maps # perf script --show-mmap-events | grep /root/test/merged/cat cat 377 [-01] 243.078558: PERF_RECORD_MMAP2 377/377: [0x55b2c91d0000(0x9b000) @ 0x18000 00:02 3419 2068525940]: r-xp /root/test/merged/cat After: Device reported 00:1a is the same as stat output and /proc/self/maps # perf script --show-mmap-events | grep /root/test/merged/cat cat 362 [-01] 127.755167: PERF_RECORD_MMAP2 362/362: [0x55ba6e781000(0x9b000) @ 0x18000 00:1a 3419 0]: r-xp /root/test/merged/cat With respect to stable kernels, overlayfs mmap function ovl_mmap() was added in v4.19 but file_user_inode() was not added until v6.8 and never back-ported to stable kernels. FMODE_BACKING that it depends on was added in v6.5. This issue has gone largely unnoticed, so back-porting before v6.8 is probably not worth it, so put 6.8 as the stable kernel prerequisite version, although in practice the next long term kernel is 6.12. Signed-off-by: Adrian Hunter Signed-off-by: Peter Zijlstra (Intel) Acked-by: Amir Goldstein Cc: stable@vger.kernel.org # 6.8 --- kernel/events/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index 7b5c2373a8d7..177e57c1a362 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9403,7 +9403,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) flags |= MAP_HUGETLB; if (file) { - struct inode *inode; + const struct inode *inode; dev_t dev; buf = kmalloc(PATH_MAX, GFP_KERNEL); @@ -9421,7 +9421,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) name = "//toolong"; goto cpy_name; } - inode = file_inode(vma->vm_file); + inode = file_user_inode(vma->vm_file); dev = inode->i_sb->s_dev; ino = inode->i_ino; gen = inode->i_generation; -- cgit v1.2.3