From 62aa5790cec89108a23052cc2f00fdd31f9adbac Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Mon, 31 Mar 2025 20:36:17 +0000 Subject: bpf: Fix a comment describing bpf_attr The map_fd field of the bpf_attr union is used in the BPF_MAP_FREEZE syscall. Explicitly mention this in the comments. Signed-off-by: Anton Protopopov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250331203618.1973691-2-a.s.protopopov@gmail.com --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 28705ae67784..07ee73cdf97b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1506,7 +1506,7 @@ union bpf_attr { __s32 map_token_fd; }; - struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ + struct { /* anonymous struct used by BPF_MAP_*_ELEM and BPF_MAP_FREEZE commands */ __u32 map_fd; __aligned_u64 key; union { -- cgit v1.2.3 From b412fd6bcc4c1696b0674434f56b198950c0e2bf Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Tue, 8 Apr 2025 11:00:04 +0200 Subject: bpf: Clarify role of BPF_F_RECOMPUTE_CSUM BPF_F_RECOMPUTE_CSUM doesn't update the actual L3 and L4 checksums in the packet, but simply updates skb->csum (according to skb->ip_summed). This patch clarifies that to avoid confusions. Signed-off-by: Paul Chaignon Link: https://lore.kernel.org/r/ff6895d42936f03dbb82334d8bcfd50e00c79086.1744102490.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 14 +++++++++----- tools/include/uapi/linux/bpf.h | 14 +++++++++----- 2 files changed, 18 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 07ee73cdf97b..14ef3db844fa 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1995,11 +1995,15 @@ union bpf_attr { * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) * Description * Store *len* bytes from address *from* into the packet - * associated to *skb*, at *offset*. *flags* are a combination of - * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the - * checksum for the packet after storing the bytes) and - * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\ - * **->swhash** and *skb*\ **->l4hash** to 0). + * associated to *skb*, at *offset*. The *flags* are a combination + * of the following values: + * + * **BPF_F_RECOMPUTE_CSUM** + * Automatically update *skb*\ **->csum** after storing the + * bytes. + * **BPF_F_INVALIDATE_HASH** + * Set *skb*\ **->hash**, *skb*\ **->swhash** and *skb*\ + * **->l4hash** to 0. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 07ee73cdf97b..14ef3db844fa 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1995,11 +1995,15 @@ union bpf_attr { * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) * Description * Store *len* bytes from address *from* into the packet - * associated to *skb*, at *offset*. *flags* are a combination of - * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the - * checksum for the packet after storing the bytes) and - * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\ - * **->swhash** and *skb*\ **->l4hash** to 0). + * associated to *skb*, at *offset*. The *flags* are a combination + * of the following values: + * + * **BPF_F_RECOMPUTE_CSUM** + * Automatically update *skb*\ **->csum** after storing the + * bytes. + * **BPF_F_INVALIDATE_HASH** + * Set *skb*\ **->hash**, *skb*\ **->swhash** and *skb*\ + * **->l4hash** to 0. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers -- cgit v1.2.3 From 5a15a050df714959f0d5a57ac3201bd1c6594984 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Tue, 8 Apr 2025 11:00:51 +0200 Subject: bpf: Clarify the meaning of BPF_F_PSEUDO_HDR In the bpf_l4_csum_replace helper, the BPF_F_PSEUDO_HDR flag should only be set if the modified header field is part of the pseudo-header. If you modify for example the UDP ports and pass BPF_F_PSEUDO_HDR, inet_proto_csum_replace4 will update skb->csum even though it shouldn't (the port and the UDP checksum updates null each other). Signed-off-by: Paul Chaignon Link: https://lore.kernel.org/r/5126ef84ba75425b689482cbc98bffe75e5d8ab0.1744102490.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 +- tools/include/uapi/linux/bpf.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 14ef3db844fa..71d5ac83cf5d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2055,7 +2055,7 @@ union bpf_attr { * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and * for updates resulting in a null checksum the value is set to * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates - * the checksum is to be computed against a pseudo-header. + * that the modified header field is part of the pseudo-header. * * This helper works in combination with **bpf_csum_diff**\ (), * which does not update the checksum in-place, but offers more diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 14ef3db844fa..71d5ac83cf5d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2055,7 +2055,7 @@ union bpf_attr { * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and * for updates resulting in a null checksum the value is set to * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates - * the checksum is to be computed against a pseudo-header. + * that the modified header field is part of the pseudo-header. * * This helper works in combination with **bpf_csum_diff**\ (), * which does not update the checksum in-place, but offers more -- cgit v1.2.3 From ee971630f20fd421fffcdc4543731ebcb54ed6d0 Mon Sep 17 00:00:00 2001 From: Feng Yang Date: Tue, 6 May 2025 14:14:33 +0800 Subject: bpf: Allow some trace helpers for all prog types if it works under NMI and doesn't use any context-dependent things, should be fine for any program type. The detailed discussion is in [1]. [1] https://lore.kernel.org/all/CAEf4Bza6gK3dsrTosk6k3oZgtHesNDSrDd8sdeQ-GiS6oJixQg@mail.gmail.com/ Suggested-by: Andrii Nakryiko Signed-off-by: Feng Yang Signed-off-by: Andrii Nakryiko Acked-by: Tejun Heo Link: https://lore.kernel.org/bpf/20250506061434.94277-2-yangfeng59949@163.com --- include/linux/bpf-cgroup.h | 8 -------- kernel/bpf/cgroup.c | 32 -------------------------------- kernel/bpf/helpers.c | 42 ++++++++++++++++++++++++++++++++++++++++++ kernel/trace/bpf_trace.c | 41 ++++------------------------------------- net/core/filter.c | 14 -------------- 5 files changed, 46 insertions(+), 91 deletions(-) (limited to 'include') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 9de7adb68294..4847dcade917 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -427,8 +427,6 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr, const struct bpf_func_proto * cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); -const struct bpf_func_proto * -cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); #else static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } @@ -465,12 +463,6 @@ cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return NULL; } -static inline const struct bpf_func_proto * -cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) -{ - return NULL; -} - static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map) { return 0; } static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc( diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 84f58f3d028a..62a1d8deb3dc 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1653,10 +1653,6 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) if (func_proto) return func_proto; - func_proto = cgroup_current_func_proto(func_id, prog); - if (func_proto) - return func_proto; - switch (func_id) { case BPF_FUNC_perf_event_output: return &bpf_event_output_data_proto; @@ -2204,10 +2200,6 @@ sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) if (func_proto) return func_proto; - func_proto = cgroup_current_func_proto(func_id, prog); - if (func_proto) - return func_proto; - switch (func_id) { case BPF_FUNC_sysctl_get_name: return &bpf_sysctl_get_name_proto; @@ -2351,10 +2343,6 @@ cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) if (func_proto) return func_proto; - func_proto = cgroup_current_func_proto(func_id, prog); - if (func_proto) - return func_proto; - switch (func_id) { #ifdef CONFIG_NET case BPF_FUNC_get_netns_cookie: @@ -2601,23 +2589,3 @@ cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return NULL; } } - -/* Common helpers for cgroup hooks with valid process context. */ -const struct bpf_func_proto * -cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) -{ - switch (func_id) { - case BPF_FUNC_get_current_uid_gid: - return &bpf_get_current_uid_gid_proto; - case BPF_FUNC_get_current_comm: - return &bpf_get_current_comm_proto; -#ifdef CONFIG_CGROUP_NET_CLASSID - case BPF_FUNC_get_cgroup_classid: - return &bpf_get_cgroup_classid_curr_proto; -#endif - case BPF_FUNC_current_task_under_cgroup: - return &bpf_current_task_under_cgroup_proto; - default: - return NULL; - } -} diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 78cefb41266a..fed53da75025 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "../../lib/kstrtox.h" @@ -1912,6 +1913,12 @@ const struct bpf_func_proto bpf_probe_read_user_str_proto __weak; const struct bpf_func_proto bpf_probe_read_kernel_proto __weak; const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak; const struct bpf_func_proto bpf_task_pt_regs_proto __weak; +const struct bpf_func_proto bpf_perf_event_read_proto __weak; +const struct bpf_func_proto bpf_send_signal_proto __weak; +const struct bpf_func_proto bpf_send_signal_thread_proto __weak; +const struct bpf_func_proto bpf_get_task_stack_sleepable_proto __weak; +const struct bpf_func_proto bpf_get_task_stack_proto __weak; +const struct bpf_func_proto bpf_get_branch_snapshot_proto __weak; const struct bpf_func_proto * bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) @@ -1965,6 +1972,8 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_current_pid_tgid_proto; case BPF_FUNC_get_ns_current_pid_tgid: return &bpf_get_ns_current_pid_tgid_proto; + case BPF_FUNC_get_current_uid_gid: + return &bpf_get_current_uid_gid_proto; default: break; } @@ -2022,7 +2031,21 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_current_cgroup_id_proto; case BPF_FUNC_get_current_ancestor_cgroup_id: return &bpf_get_current_ancestor_cgroup_id_proto; + case BPF_FUNC_current_task_under_cgroup: + return &bpf_current_task_under_cgroup_proto; #endif +#ifdef CONFIG_CGROUP_NET_CLASSID + case BPF_FUNC_get_cgroup_classid: + return &bpf_get_cgroup_classid_curr_proto; +#endif + case BPF_FUNC_task_storage_get: + if (bpf_prog_check_recur(prog)) + return &bpf_task_storage_get_recur_proto; + return &bpf_task_storage_get_proto; + case BPF_FUNC_task_storage_delete: + if (bpf_prog_check_recur(prog)) + return &bpf_task_storage_delete_recur_proto; + return &bpf_task_storage_delete_proto; default: break; } @@ -2037,6 +2060,8 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_current_task_proto; case BPF_FUNC_get_current_task_btf: return &bpf_get_current_task_btf_proto; + case BPF_FUNC_get_current_comm: + return &bpf_get_current_comm_proto; case BPF_FUNC_probe_read_user: return &bpf_probe_read_user_proto; case BPF_FUNC_probe_read_kernel: @@ -2047,6 +2072,10 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_probe_read_kernel_str: return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_kernel_str_proto; + case BPF_FUNC_copy_from_user: + return &bpf_copy_from_user_proto; + case BPF_FUNC_copy_from_user_task: + return &bpf_copy_from_user_task_proto; case BPF_FUNC_snprintf_btf: return &bpf_snprintf_btf_proto; case BPF_FUNC_snprintf: @@ -2057,6 +2086,19 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return bpf_get_trace_vprintk_proto(); case BPF_FUNC_perf_event_read_value: return bpf_get_perf_event_read_value_proto(); + case BPF_FUNC_perf_event_read: + return &bpf_perf_event_read_proto; + case BPF_FUNC_send_signal: + return &bpf_send_signal_proto; + case BPF_FUNC_send_signal_thread: + return &bpf_send_signal_thread_proto; + case BPF_FUNC_get_task_stack: + return prog->sleepable ? &bpf_get_task_stack_sleepable_proto + : &bpf_get_task_stack_proto; + case BPF_FUNC_get_branch_snapshot: + return &bpf_get_branch_snapshot_proto; + case BPF_FUNC_find_vma: + return &bpf_find_vma_proto; default: return NULL; } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 52c432a44aeb..868920994517 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -572,7 +572,7 @@ BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags) return value; } -static const struct bpf_func_proto bpf_perf_event_read_proto = { +const struct bpf_func_proto bpf_perf_event_read_proto = { .func = bpf_perf_event_read, .gpl_only = true, .ret_type = RET_INTEGER, @@ -882,7 +882,7 @@ BPF_CALL_1(bpf_send_signal, u32, sig) return bpf_send_signal_common(sig, PIDTYPE_TGID, NULL, 0); } -static const struct bpf_func_proto bpf_send_signal_proto = { +const struct bpf_func_proto bpf_send_signal_proto = { .func = bpf_send_signal, .gpl_only = false, .ret_type = RET_INTEGER, @@ -894,7 +894,7 @@ BPF_CALL_1(bpf_send_signal_thread, u32, sig) return bpf_send_signal_common(sig, PIDTYPE_PID, NULL, 0); } -static const struct bpf_func_proto bpf_send_signal_thread_proto = { +const struct bpf_func_proto bpf_send_signal_thread_proto = { .func = bpf_send_signal_thread, .gpl_only = false, .ret_type = RET_INTEGER, @@ -1185,7 +1185,7 @@ BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags) return entry_cnt * br_entry_size; } -static const struct bpf_func_proto bpf_get_branch_snapshot_proto = { +const struct bpf_func_proto bpf_get_branch_snapshot_proto = { .func = bpf_get_branch_snapshot, .gpl_only = true, .ret_type = RET_INTEGER, @@ -1430,14 +1430,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) const struct bpf_func_proto *func_proto; switch (func_id) { - case BPF_FUNC_get_current_uid_gid: - return &bpf_get_current_uid_gid_proto; - case BPF_FUNC_get_current_comm: - return &bpf_get_current_comm_proto; case BPF_FUNC_get_smp_processor_id: return &bpf_get_smp_processor_id_proto; - case BPF_FUNC_perf_event_read: - return &bpf_perf_event_read_proto; #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE case BPF_FUNC_probe_read: return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? @@ -1446,35 +1440,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_compat_str_proto; #endif -#ifdef CONFIG_CGROUPS - case BPF_FUNC_current_task_under_cgroup: - return &bpf_current_task_under_cgroup_proto; -#endif - case BPF_FUNC_send_signal: - return &bpf_send_signal_proto; - case BPF_FUNC_send_signal_thread: - return &bpf_send_signal_thread_proto; - case BPF_FUNC_get_task_stack: - return prog->sleepable ? &bpf_get_task_stack_sleepable_proto - : &bpf_get_task_stack_proto; - case BPF_FUNC_copy_from_user: - return &bpf_copy_from_user_proto; - case BPF_FUNC_copy_from_user_task: - return &bpf_copy_from_user_task_proto; - case BPF_FUNC_task_storage_get: - if (bpf_prog_check_recur(prog)) - return &bpf_task_storage_get_recur_proto; - return &bpf_task_storage_get_proto; - case BPF_FUNC_task_storage_delete: - if (bpf_prog_check_recur(prog)) - return &bpf_task_storage_delete_recur_proto; - return &bpf_task_storage_delete_proto; case BPF_FUNC_get_func_ip: return &bpf_get_func_ip_proto_tracing; - case BPF_FUNC_get_branch_snapshot: - return &bpf_get_branch_snapshot_proto; - case BPF_FUNC_find_vma: - return &bpf_find_vma_proto; default: break; } diff --git a/net/core/filter.c b/net/core/filter.c index 79cab4d78dc3..30e7d3679088 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8022,10 +8022,6 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) if (func_proto) return func_proto; - func_proto = cgroup_current_func_proto(func_id, prog); - if (func_proto) - return func_proto; - switch (func_id) { case BPF_FUNC_get_socket_cookie: return &bpf_get_socket_cookie_sock_proto; @@ -8051,10 +8047,6 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) if (func_proto) return func_proto; - func_proto = cgroup_current_func_proto(func_id, prog); - if (func_proto) - return func_proto; - switch (func_id) { case BPF_FUNC_bind: switch (prog->expected_attach_type) { @@ -8488,18 +8480,12 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_msg_pop_data_proto; case BPF_FUNC_perf_event_output: return &bpf_event_output_data_proto; - case BPF_FUNC_get_current_uid_gid: - return &bpf_get_current_uid_gid_proto; case BPF_FUNC_sk_storage_get: return &bpf_sk_storage_get_proto; case BPF_FUNC_sk_storage_delete: return &bpf_sk_storage_delete_proto; case BPF_FUNC_get_netns_cookie: return &bpf_get_netns_cookie_sk_msg_proto; -#ifdef CONFIG_CGROUP_NET_CLASSID - case BPF_FUNC_get_cgroup_classid: - return &bpf_get_cgroup_classid_curr_proto; -#endif default: return bpf_sk_base_func_proto(func_id, prog); } -- cgit v1.2.3 From 823153334042746604fdb416ea358a90940c1d83 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 9 May 2025 17:35:37 +0200 Subject: bpf: Add support to retrieve ref_ctr_offset for uprobe perf link Adding support to retrieve ref_ctr_offset for uprobe perf link, which got somehow omitted from the initial uprobe link info changes. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Acked-by: Yafang Shao Link: https://lore.kernel.org/bpf/20250509153539.779599-2-jolsa@kernel.org --- include/uapi/linux/bpf.h | 1 + kernel/bpf/syscall.c | 5 +++-- kernel/trace/trace_uprobe.c | 2 +- tools/include/uapi/linux/bpf.h | 1 + 4 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 71d5ac83cf5d..16e95398c91c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6724,6 +6724,7 @@ struct bpf_link_info { __u32 name_len; __u32 offset; /* offset from file_name */ __u64 cookie; + __u64 ref_ctr_offset; } uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */ struct { __aligned_u64 func_name; /* in/out */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index df33d19c5c3b..4b5f29168618 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3800,14 +3800,14 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event, static int bpf_perf_link_fill_uprobe(const struct perf_event *event, struct bpf_link_info *info) { + u64 ref_ctr_offset, offset; char __user *uname; - u64 addr, offset; u32 ulen, type; int err; uname = u64_to_user_ptr(info->perf_event.uprobe.file_name); ulen = info->perf_event.uprobe.name_len; - err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &addr, + err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &ref_ctr_offset, &type, NULL); if (err) return err; @@ -3819,6 +3819,7 @@ static int bpf_perf_link_fill_uprobe(const struct perf_event *event, info->perf_event.uprobe.name_len = ulen; info->perf_event.uprobe.offset = offset; info->perf_event.uprobe.cookie = event->bpf_cookie; + info->perf_event.uprobe.ref_ctr_offset = ref_ctr_offset; return 0; } #endif diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 3386439ec9f6..d9cf6ed2c106 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1489,7 +1489,7 @@ int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type, : BPF_FD_TYPE_UPROBE; *filename = tu->filename; *probe_offset = tu->offset; - *probe_addr = 0; + *probe_addr = tu->ref_ctr_offset; return 0; } #endif /* CONFIG_PERF_EVENTS */ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 71d5ac83cf5d..16e95398c91c 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6724,6 +6724,7 @@ struct bpf_link_info { __u32 name_len; __u32 offset; /* offset from file_name */ __u64 cookie; + __u64 ref_ctr_offset; } uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */ struct { __aligned_u64 func_name; /* in/out */ -- cgit v1.2.3 From d060b6aab031b6113f78cd3d1585115f13386eec Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Mon, 12 May 2025 21:53:46 +0100 Subject: helpers: make few bpf helpers public Make bpf_dynptr_slice_rdwr, bpf_dynptr_check_off_len and __bpf_dynptr_write available outside of the helpers.c by adding their prototypes into linux/include/bpf.h. bpf_dynptr_check_off_len() implementation is moved to header and made inline explicitly, as small function should typically be inlined. These functions are going to be used from bpf_trace.c in the next patch of this series. Acked-by: Andrii Nakryiko Signed-off-by: Mykyta Yatsenko Link: https://lore.kernel.org/r/20250512205348.191079-2-mykyta.yatsenko5@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 14 ++++++++++++++ kernel/bpf/helpers.c | 14 ++------------ 2 files changed, 16 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3f0cc89c0622..83c56f40842b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1349,6 +1349,20 @@ u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr); const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len); void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len); bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr); +int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, + void *src, u32 len, u64 flags); +void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset, + void *buffer__opt, u32 buffer__szk); + +static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len) +{ + u32 size = __bpf_dynptr_size(ptr); + + if (len > size || offset > size - len) + return -E2BIG; + + return 0; +} #ifdef CONFIG_BPF_JIT int bpf_trampoline_link_prog(struct bpf_tramp_link *link, diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index fed53da75025..c96cc6aeae99 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1714,16 +1714,6 @@ void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr) memset(ptr, 0, sizeof(*ptr)); } -static int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len) -{ - u32 size = __bpf_dynptr_size(ptr); - - if (len > size || offset > size - len) - return -E2BIG; - - return 0; -} - BPF_CALL_4(bpf_dynptr_from_mem, void *, data, u32, size, u64, flags, struct bpf_dynptr_kern *, ptr) { int err; @@ -1810,8 +1800,8 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = { .arg5_type = ARG_ANYTHING, }; -static int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src, - u32 len, u64 flags) +int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src, + u32 len, u64 flags) { enum bpf_dynptr_type type; int err; -- cgit v1.2.3 From bc049387b41f41bee61e8cc338a5e99ca9798a09 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 13 May 2025 07:28:12 -0700 Subject: bpf: Add support for __prog argument suffix to pass in prog->aux Instead of hardcoding the list of kfuncs that need prog->aux passed to them with a combination of fixup_kfunc_call adjustment + __ign suffix, combine both in __prog suffix, which ignores the argument passed in, and fixes it up to the prog->aux. This allows kfuncs to have the prog->aux passed into them without having to touch the verifier. Cc: Tejun Heo Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20250513142812.1021591-1-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- Documentation/bpf/kfuncs.rst | 17 +++++++++++++++++ include/linux/bpf_verifier.h | 1 + kernel/bpf/helpers.c | 4 ++-- kernel/bpf/verifier.c | 33 +++++++++++++++++++++++++++------ 4 files changed, 47 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index a8f5782bd833..ae468b781d31 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -160,6 +160,23 @@ Or:: ... } +2.2.6 __prog Annotation +--------------------------- +This annotation is used to indicate that the argument needs to be fixed up to +the bpf_prog_aux of the caller BPF program. Any value passed into this argument +is ignored, and rewritten by the verifier. + +An example is given below:: + + __bpf_kfunc int bpf_wq_set_callback_impl(struct bpf_wq *wq, + int (callback_fn)(void *map, int *key, void *value), + unsigned int flags, + void *aux__prog) + { + struct bpf_prog_aux *aux = aux__prog; + ... + } + .. _BPF_kfunc_nodef: 2.3 Using an existing kernel function diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 9734544b6957..cedd66867ecf 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -591,6 +591,7 @@ struct bpf_insn_aux_data { * bpf_fastcall pattern. */ u8 fastcall_spills_num:3; + u8 arg_prog:4; /* below fields are initialized once */ unsigned int orig_idx; /* original instruction index */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index ebb604e39eb1..c1113b74e1e2 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -3002,9 +3002,9 @@ __bpf_kfunc int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __bpf_kfunc int bpf_wq_set_callback_impl(struct bpf_wq *wq, int (callback_fn)(void *map, int *key, void *value), unsigned int flags, - void *aux__ign) + void *aux__prog) { - struct bpf_prog_aux *aux = (struct bpf_prog_aux *)aux__ign; + struct bpf_prog_aux *aux = (struct bpf_prog_aux *)aux__prog; struct bpf_async_kern *async = (struct bpf_async_kern *)wq; if (flags) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 28f5a7899bd6..f6d3655b3a7a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -322,6 +322,7 @@ struct bpf_kfunc_call_arg_meta { struct btf *arg_btf; u32 arg_btf_id; bool arg_owning_ref; + bool arg_prog; struct { struct btf_field *field; @@ -11897,6 +11898,11 @@ static bool is_kfunc_arg_irq_flag(const struct btf *btf, const struct btf_param return btf_param_match_suffix(btf, arg, "__irq_flag"); } +static bool is_kfunc_arg_prog(const struct btf *btf, const struct btf_param *arg) +{ + return btf_param_match_suffix(btf, arg, "__prog"); +} + static bool is_kfunc_arg_scalar_with_name(const struct btf *btf, const struct btf_param *arg, const char *name) @@ -12938,6 +12944,17 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ if (is_kfunc_arg_ignore(btf, &args[i])) continue; + if (is_kfunc_arg_prog(btf, &args[i])) { + /* Used to reject repeated use of __prog. */ + if (meta->arg_prog) { + verbose(env, "Only 1 prog->aux argument supported per-kfunc\n"); + return -EFAULT; + } + meta->arg_prog = true; + cur_aux(env)->arg_prog = regno; + continue; + } + if (btf_type_is_scalar(t)) { if (reg->type != SCALAR_VALUE) { verbose(env, "R%d is not a scalar\n", regno); @@ -21517,13 +21534,17 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) { insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1); *cnt = 1; - } else if (is_bpf_wq_set_callback_impl_kfunc(desc->func_id)) { - struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(BPF_REG_4, (long)env->prog->aux) }; + } - insn_buf[0] = ld_addrs[0]; - insn_buf[1] = ld_addrs[1]; - insn_buf[2] = *insn; - *cnt = 3; + if (env->insn_aux_data[insn_idx].arg_prog) { + u32 regno = env->insn_aux_data[insn_idx].arg_prog; + struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(regno, (long)env->prog->aux) }; + int idx = *cnt; + + insn_buf[idx++] = ld_addrs[0]; + insn_buf[idx++] = ld_addrs[1]; + insn_buf[idx++] = *insn; + *cnt = idx; } return 0; } -- cgit v1.2.3 From 1cb0f56d96185cb20e63e191fc291191823e6f52 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Mon, 19 May 2025 15:43:57 +0200 Subject: bpf: WARN_ONCE on verifier bugs Throughout the verifier's logic, there are multiple checks for inconsistent states that should never happen and would indicate a verifier bug. These bugs are typically logged in the verifier logs and sometimes preceded by a WARN_ONCE. This patch reworks these checks to consistently emit a verifier log AND a warning when CONFIG_DEBUG_KERNEL is enabled. The consistent use of WARN_ONCE should help fuzzers (ex. syzkaller) expose any situation where they are actually able to reach one of those buggy verifier states. Acked-by: Andrii Nakryiko Signed-off-by: Paul Chaignon Link: https://lore.kernel.org/r/aCs1nYvNNMq8dAWP@mail.gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 6 ++ include/linux/bpf_verifier.h | 11 ++++ kernel/bpf/btf.c | 4 +- kernel/bpf/verifier.c | 141 ++++++++++++++++++------------------------- 4 files changed, 79 insertions(+), 83 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 83c56f40842b..5b25d278409b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -346,6 +346,12 @@ static inline const char *btf_field_type_name(enum btf_field_type type) } } +#if IS_ENABLED(CONFIG_DEBUG_KERNEL) +#define BPF_WARN_ONCE(cond, format...) WARN_ONCE(cond, format) +#else +#define BPF_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond) +#endif + static inline u32 btf_field_type_size(enum btf_field_type type) { switch (type) { diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index cedd66867ecf..78c97e12ea4e 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -839,6 +839,17 @@ __printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env, u32 insn_off, const char *prefix_fmt, ...); +#define verifier_bug_if(cond, env, fmt, args...) \ + ({ \ + bool __cond = (cond); \ + if (unlikely(__cond)) { \ + BPF_WARN_ONCE(1, "verifier bug: " fmt "(" #cond ")\n", ##args); \ + bpf_log(&env->log, "verifier bug: " fmt "(" #cond ")\n", ##args); \ + } \ + (__cond); \ + }) +#define verifier_bug(env, fmt, args...) verifier_bug_if(1, env, fmt, ##args) + static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env) { struct bpf_verifier_state *cur = env->cur_state; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 6b21ca67070c..0f7828380895 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -7659,7 +7659,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog) return 0; if (!prog->aux->func_info) { - bpf_log(log, "Verifier bug\n"); + verifier_bug(env, "func_info undefined"); return -EFAULT; } @@ -7683,7 +7683,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog) tname = btf_name_by_offset(btf, fn_t->name_off); if (prog->aux->func_info_aux[subprog].unreliable) { - bpf_log(log, "Verifier bug in function %s()\n", tname); + verifier_bug(env, "unreliable BTF for function %s()", tname); return -EFAULT; } if (prog_type == BPF_PROG_TYPE_EXT) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f6d3655b3a7a..d5807d2efc92 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1924,11 +1924,8 @@ static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_env *env, u32 steps = 0; while (topmost && topmost->loop_entry) { - if (steps++ > st->dfs_depth) { - WARN_ONCE(true, "verifier bug: infinite loop in get_loop_entry\n"); - verbose(env, "verifier bug: infinite loop in get_loop_entry()\n"); + if (verifier_bug_if(steps++ > st->dfs_depth, env, "infinite loop")) return ERR_PTR(-EFAULT); - } topmost = topmost->loop_entry; } return topmost; @@ -3460,12 +3457,11 @@ static int mark_reg_read(struct bpf_verifier_env *env, /* if read wasn't screened by an earlier write ... */ if (writes && state->live & REG_LIVE_WRITTEN) break; - if (parent->live & REG_LIVE_DONE) { - verbose(env, "verifier BUG type %s var_off %lld off %d\n", - reg_type_str(env, parent->type), - parent->var_off.value, parent->off); + if (verifier_bug_if(parent->live & REG_LIVE_DONE, env, + "type %s var_off %lld off %d", + reg_type_str(env, parent->type), + parent->var_off.value, parent->off)) return -EFAULT; - } /* The first condition is more likely to be true than the * second, checked it first. */ @@ -3858,14 +3854,14 @@ static int push_insn_history(struct bpf_verifier_env *env, struct bpf_verifier_s /* atomic instructions push insn_flags twice, for READ and * WRITE sides, but they should agree on stack slot */ - WARN_ONCE((env->cur_hist_ent->flags & insn_flags) && - (env->cur_hist_ent->flags & insn_flags) != insn_flags, - "verifier insn history bug: insn_idx %d cur flags %x new flags %x\n", - env->insn_idx, env->cur_hist_ent->flags, insn_flags); + verifier_bug_if((env->cur_hist_ent->flags & insn_flags) && + (env->cur_hist_ent->flags & insn_flags) != insn_flags, + env, "insn history: insn_idx %d cur flags %x new flags %x", + env->insn_idx, env->cur_hist_ent->flags, insn_flags); env->cur_hist_ent->flags |= insn_flags; - WARN_ONCE(env->cur_hist_ent->linked_regs != 0, - "verifier insn history bug: insn_idx %d linked_regs != 0: %#llx\n", - env->insn_idx, env->cur_hist_ent->linked_regs); + verifier_bug_if(env->cur_hist_ent->linked_regs != 0, env, + "insn history: insn_idx %d linked_regs: %#llx", + env->insn_idx, env->cur_hist_ent->linked_regs); env->cur_hist_ent->linked_regs = linked_regs; return 0; } @@ -3988,8 +3984,7 @@ static inline u32 bt_empty(struct backtrack_state *bt) static inline int bt_subprog_enter(struct backtrack_state *bt) { if (bt->frame == MAX_CALL_FRAMES - 1) { - verbose(bt->env, "BUG subprog enter from frame %d\n", bt->frame); - WARN_ONCE(1, "verifier backtracking bug"); + verifier_bug(bt->env, "subprog enter from frame %d", bt->frame); return -EFAULT; } bt->frame++; @@ -3999,8 +3994,7 @@ static inline int bt_subprog_enter(struct backtrack_state *bt) static inline int bt_subprog_exit(struct backtrack_state *bt) { if (bt->frame == 0) { - verbose(bt->env, "BUG subprog exit from frame 0\n"); - WARN_ONCE(1, "verifier backtracking bug"); + verifier_bug(bt->env, "subprog exit from frame 0"); return -EFAULT; } bt->frame--; @@ -4278,14 +4272,15 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, * should be literally next instruction in * caller program */ - WARN_ONCE(idx + 1 != subseq_idx, "verifier backtracking bug"); + verifier_bug_if(idx + 1 != subseq_idx, env, + "extra insn from subprog"); /* r1-r5 are invalidated after subprog call, * so for global func call it shouldn't be set * anymore */ if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) { - verbose(env, "BUG regs %x\n", bt_reg_mask(bt)); - WARN_ONCE(1, "verifier backtracking bug"); + verifier_bug(env, "global subprog unexpected regs %x", + bt_reg_mask(bt)); return -EFAULT; } /* global subprog always sets R0 */ @@ -4299,16 +4294,17 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, * the current frame should be zero by now */ if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) { - verbose(env, "BUG regs %x\n", bt_reg_mask(bt)); - WARN_ONCE(1, "verifier backtracking bug"); + verifier_bug(env, "static subprog unexpected regs %x", + bt_reg_mask(bt)); return -EFAULT; } /* we are now tracking register spills correctly, * so any instance of leftover slots is a bug */ if (bt_stack_mask(bt) != 0) { - verbose(env, "BUG stack slots %llx\n", bt_stack_mask(bt)); - WARN_ONCE(1, "verifier backtracking bug (subprog leftover stack slots)"); + verifier_bug(env, + "static subprog leftover stack slots %llx", + bt_stack_mask(bt)); return -EFAULT; } /* propagate r1-r5 to the caller */ @@ -4331,13 +4327,13 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, * not actually arguments passed directly to callback subprogs */ if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) { - verbose(env, "BUG regs %x\n", bt_reg_mask(bt)); - WARN_ONCE(1, "verifier backtracking bug"); + verifier_bug(env, "callback unexpected regs %x", + bt_reg_mask(bt)); return -EFAULT; } if (bt_stack_mask(bt) != 0) { - verbose(env, "BUG stack slots %llx\n", bt_stack_mask(bt)); - WARN_ONCE(1, "verifier backtracking bug (callback leftover stack slots)"); + verifier_bug(env, "callback leftover stack slots %llx", + bt_stack_mask(bt)); return -EFAULT; } /* clear r1-r5 in callback subprog's mask */ @@ -4356,11 +4352,11 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, /* regular helper call sets R0 */ bt_clear_reg(bt, BPF_REG_0); if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) { - /* if backtracing was looking for registers R1-R5 + /* if backtracking was looking for registers R1-R5 * they should have been found already. */ - verbose(env, "BUG regs %x\n", bt_reg_mask(bt)); - WARN_ONCE(1, "verifier backtracking bug"); + verifier_bug(env, "backtracking call unexpected regs %x", + bt_reg_mask(bt)); return -EFAULT; } } else if (opcode == BPF_EXIT) { @@ -4378,8 +4374,8 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, for (i = BPF_REG_1; i <= BPF_REG_5; i++) bt_clear_reg(bt, i); if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) { - verbose(env, "BUG regs %x\n", bt_reg_mask(bt)); - WARN_ONCE(1, "verifier backtracking bug"); + verifier_bug(env, "backtracking exit unexpected regs %x", + bt_reg_mask(bt)); return -EFAULT; } @@ -4720,9 +4716,8 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) return 0; } - verbose(env, "BUG backtracking func entry subprog %d reg_mask %x stack_mask %llx\n", - st->frame[0]->subprogno, bt_reg_mask(bt), bt_stack_mask(bt)); - WARN_ONCE(1, "verifier backtracking bug"); + verifier_bug(env, "backtracking func entry subprog %d reg_mask %x stack_mask %llx", + st->frame[0]->subprogno, bt_reg_mask(bt), bt_stack_mask(bt)); return -EFAULT; } @@ -4758,8 +4753,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) * It means the backtracking missed the spot where * particular register was initialized with a constant. */ - verbose(env, "BUG backtracking idx %d\n", i); - WARN_ONCE(1, "verifier backtracking bug"); + verifier_bug(env, "backtracking idx %d", i); return -EFAULT; } } @@ -4784,12 +4778,10 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr)); for_each_set_bit(i, mask, 64) { - if (i >= func->allocated_stack / BPF_REG_SIZE) { - verbose(env, "BUG backtracking (stack slot %d, total slots %d)\n", - i, func->allocated_stack / BPF_REG_SIZE); - WARN_ONCE(1, "verifier backtracking bug (stack slot out of bounds)"); + if (verifier_bug_if(i >= func->allocated_stack / BPF_REG_SIZE, + env, "stack slot %d, total slots %d", + i, func->allocated_stack / BPF_REG_SIZE)) return -EFAULT; - } if (!is_spilled_scalar_reg(&func->stack[i])) { bt_clear_frame_slot(bt, fr, i); @@ -6562,21 +6554,18 @@ continue_func: /* find the callee */ next_insn = i + insn[i].imm + 1; sidx = find_subprog(env, next_insn); - if (sidx < 0) { - WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", - next_insn); + if (verifier_bug_if(sidx < 0, env, "callee not found at insn %d", next_insn)) return -EFAULT; - } if (subprog[sidx].is_async_cb) { if (subprog[sidx].has_tail_call) { - verbose(env, "verifier bug. subprog has tail_call and async cb\n"); + verifier_bug(env, "subprog has tail_call and async cb"); return -EFAULT; } /* async callbacks don't increase bpf prog stack size unless called directly */ if (!bpf_pseudo_call(insn + i)) continue; if (subprog[sidx].is_exception_cb) { - verbose(env, "insn %d cannot call exception cb directly\n", i); + verbose(env, "insn %d cannot call exception cb directly", i); return -EINVAL; } } @@ -6676,11 +6665,8 @@ static int get_callee_stack_depth(struct bpf_verifier_env *env, int start = idx + insn->imm + 1, subprog; subprog = find_subprog(env, start); - if (subprog < 0) { - WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", - start); + if (verifier_bug_if(subprog < 0, env, "get stack depth: no program at insn %d", start)) return -EFAULT; - } return env->subprog_info[subprog].stack_depth; } #endif @@ -7985,7 +7971,7 @@ static int check_stack_range_initialized( slot = -i - 1; spi = slot / BPF_REG_SIZE; if (state->allocated_stack <= slot) { - verbose(env, "verifier bug: allocated_stack too small\n"); + verbose(env, "allocated_stack too small\n"); return -EFAULT; } @@ -8414,7 +8400,7 @@ static int process_timer_func(struct bpf_verifier_env *env, int regno, return -EINVAL; } if (meta->map_ptr) { - verbose(env, "verifier bug. Two map pointers in a timer helper\n"); + verifier_bug(env, "Two map pointers in a timer helper"); return -EFAULT; } meta->map_uid = reg->map_uid; @@ -10286,8 +10272,7 @@ static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int calls } if (state->frame[state->curframe + 1]) { - verbose(env, "verifier bug. Frame %d already allocated\n", - state->curframe + 1); + verifier_bug(env, "Frame %d already allocated", state->curframe + 1); return -EFAULT; } @@ -10401,8 +10386,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, if (err) return err; } else { - bpf_log(log, "verifier bug: unrecognized arg#%d type %d\n", - i, arg->arg_type); + verifier_bug(env, "unrecognized arg#%d type %d", i, arg->arg_type); return -EFAULT; } } @@ -10465,13 +10449,13 @@ static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *ins env->subprog_info[subprog].is_cb = true; if (bpf_pseudo_kfunc_call(insn) && !is_callback_calling_kfunc(insn->imm)) { - verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n", - func_id_name(insn->imm), insn->imm); + verifier_bug(env, "kfunc %s#%d not marked as callback-calling", + func_id_name(insn->imm), insn->imm); return -EFAULT; } else if (!bpf_pseudo_kfunc_call(insn) && !is_callback_calling_function(insn->imm)) { /* helper */ - verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n", - func_id_name(insn->imm), insn->imm); + verifier_bug(env, "helper %s#%d not marked as callback-calling", + func_id_name(insn->imm), insn->imm); return -EFAULT; } @@ -10523,10 +10507,9 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, target_insn = *insn_idx + insn->imm + 1; subprog = find_subprog(env, target_insn); - if (subprog < 0) { - verbose(env, "verifier bug. No program starts at insn %d\n", target_insn); + if (verifier_bug_if(subprog < 0, env, "target of func call at insn %d is not a program", + target_insn)) return -EFAULT; - } caller = state->frame[state->curframe]; err = btf_check_subprog_call(env, subprog, caller->regs); @@ -11125,7 +11108,7 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env, err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr, fmt_map_off); if (err) { - verbose(env, "verifier bug\n"); + verbose(env, "failed to retrieve map value address\n"); return -EFAULT; } fmt = (char *)(long)fmt_addr + fmt_map_off; @@ -19706,10 +19689,9 @@ process_bpf_exit: return err; break; } else { - if (WARN_ON_ONCE(env->cur_state->loop_entry)) { - verbose(env, "verifier bug: env->cur_state->loop_entry != NULL\n"); + if (verifier_bug_if(env->cur_state->loop_entry, env, + "broken loop detection")) return -EFAULT; - } do_print_state = true; continue; } @@ -20767,10 +20749,9 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env, if (bpf_pseudo_kfunc_call(&insn)) continue; - if (WARN_ON(load_reg == -1)) { - verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n"); + if (verifier_bug_if(load_reg == -1, env, + "zext_dst is set, but no reg is defined")) return -EFAULT; - } zext_patch[0] = insn; zext_patch[1].dst_reg = load_reg; @@ -21087,11 +21068,9 @@ static int jit_subprogs(struct bpf_verifier_env *env) * propagated in any case. */ subprog = find_subprog(env, i + insn->imm + 1); - if (subprog < 0) { - WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", - i + insn->imm + 1); + if (verifier_bug_if(subprog < 0, env, "No program to jit at insn %d", + i + insn->imm + 1)) return -EFAULT; - } /* temporarily remember subprog id inside insn instead of * aux_data, since next loop will split up all insns into funcs */ @@ -22454,7 +22433,7 @@ next_insn: continue; /* We need two slots in case timed may_goto is supported. */ if (stack_slots > slots) { - verbose(env, "verifier bug: stack_slots supports may_goto only\n"); + verifier_bug(env, "stack_slots supports may_goto only"); return -EFAULT; } -- cgit v1.2.3 From a539e2a6d51d1c12d89eec149ccc72ec561639bc Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Tue, 20 May 2025 14:01:17 +0100 Subject: btf: Allow mmap of vmlinux btf User space needs access to kernel BTF for many modern features of BPF. Right now each process needs to read the BTF blob either in pieces or as a whole. Allow mmaping the sysfs file so that processes can directly access the memory allocated for it in the kernel. remap_pfn_range is used instead of vm_insert_page due to aarch64 compatibility issues. Signed-off-by: Lorenz Bauer Signed-off-by: Andrii Nakryiko Tested-by: Alan Maguire Reviewed-by: Shakeel Butt Link: https://lore.kernel.org/bpf/20250520-vmlinux-mmap-v5-1-e8c941acc414@isovalent.com --- include/asm-generic/vmlinux.lds.h | 3 ++- kernel/bpf/sysfs_btf.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 58a635a6d5bd..1750390735fa 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -667,10 +667,11 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) */ #ifdef CONFIG_DEBUG_INFO_BTF #define BTF \ + . = ALIGN(PAGE_SIZE); \ .BTF : AT(ADDR(.BTF) - LOAD_OFFSET) { \ BOUNDED_SECTION_BY(.BTF, _BTF) \ } \ - . = ALIGN(4); \ + . = ALIGN(PAGE_SIZE); \ .BTF_ids : AT(ADDR(.BTF_ids) - LOAD_OFFSET) { \ *(.BTF_ids) \ } diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c index 81d6cf90584a..941d0d2427e3 100644 --- a/kernel/bpf/sysfs_btf.c +++ b/kernel/bpf/sysfs_btf.c @@ -7,14 +7,46 @@ #include #include #include +#include +#include +#include /* See scripts/link-vmlinux.sh, gen_btf() func for details */ extern char __start_BTF[]; extern char __stop_BTF[]; +static int btf_sysfs_vmlinux_mmap(struct file *filp, struct kobject *kobj, + const struct bin_attribute *attr, + struct vm_area_struct *vma) +{ + unsigned long pages = PAGE_ALIGN(attr->size) >> PAGE_SHIFT; + size_t vm_size = vma->vm_end - vma->vm_start; + phys_addr_t addr = virt_to_phys(__start_BTF); + unsigned long pfn = addr >> PAGE_SHIFT; + + if (attr->private != __start_BTF || !PAGE_ALIGNED(addr)) + return -EINVAL; + + if (vma->vm_pgoff) + return -EINVAL; + + if (vma->vm_flags & (VM_WRITE | VM_EXEC | VM_MAYSHARE)) + return -EACCES; + + if (pfn + pages < pfn) + return -EINVAL; + + if ((vm_size >> PAGE_SHIFT) > pages) + return -EINVAL; + + vm_flags_mod(vma, VM_DONTDUMP, VM_MAYEXEC | VM_MAYWRITE); + return remap_pfn_range(vma, vma->vm_start, pfn, vm_size, vma->vm_page_prot); +} + static struct bin_attribute bin_attr_btf_vmlinux __ro_after_init = { .attr = { .name = "vmlinux", .mode = 0444, }, .read_new = sysfs_bin_attr_simple_read, + .mmap = btf_sysfs_vmlinux_mmap, }; struct kobject *btf_kobj; -- cgit v1.2.3 From 89f9dba365e1b85caef556d28654c6d336629eef Mon Sep 17 00:00:00 2001 From: "T.J. Mercier" Date: Thu, 22 May 2025 23:04:25 +0000 Subject: dma-buf: Rename debugfs symbols MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename the debugfs list and mutex so it's clear they are now usable without the need for CONFIG_DEBUG_FS. The list will always be populated to support the creation of a BPF iterator for dmabufs. Signed-off-by: T.J. Mercier Reviewed-by: Christian König Acked-by: Song Liu Link: https://lore.kernel.org/r/20250522230429.941193-2-tjmercier@google.com Signed-off-by: Alexei Starovoitov --- drivers/dma-buf/dma-buf.c | 40 +++++++++++++++------------------------- include/linux/dma-buf.h | 2 -- 2 files changed, 15 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 5baa83b85515..8d151784e302 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -35,35 +35,25 @@ static inline int is_dma_buf_file(struct file *); -#if IS_ENABLED(CONFIG_DEBUG_FS) -static DEFINE_MUTEX(debugfs_list_mutex); -static LIST_HEAD(debugfs_list); +static DEFINE_MUTEX(dmabuf_list_mutex); +static LIST_HEAD(dmabuf_list); -static void __dma_buf_debugfs_list_add(struct dma_buf *dmabuf) +static void __dma_buf_list_add(struct dma_buf *dmabuf) { - mutex_lock(&debugfs_list_mutex); - list_add(&dmabuf->list_node, &debugfs_list); - mutex_unlock(&debugfs_list_mutex); + mutex_lock(&dmabuf_list_mutex); + list_add(&dmabuf->list_node, &dmabuf_list); + mutex_unlock(&dmabuf_list_mutex); } -static void __dma_buf_debugfs_list_del(struct dma_buf *dmabuf) +static void __dma_buf_list_del(struct dma_buf *dmabuf) { if (!dmabuf) return; - mutex_lock(&debugfs_list_mutex); + mutex_lock(&dmabuf_list_mutex); list_del(&dmabuf->list_node); - mutex_unlock(&debugfs_list_mutex); + mutex_unlock(&dmabuf_list_mutex); } -#else -static void __dma_buf_debugfs_list_add(struct dma_buf *dmabuf) -{ -} - -static void __dma_buf_debugfs_list_del(struct dma_buf *dmabuf) -{ -} -#endif static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen) { @@ -115,7 +105,7 @@ static int dma_buf_file_release(struct inode *inode, struct file *file) if (!is_dma_buf_file(file)) return -EINVAL; - __dma_buf_debugfs_list_del(file->private_data); + __dma_buf_list_del(file->private_data); return 0; } @@ -689,7 +679,7 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) file->f_path.dentry->d_fsdata = dmabuf; dmabuf->file = file; - __dma_buf_debugfs_list_add(dmabuf); + __dma_buf_list_add(dmabuf); return dmabuf; @@ -1630,7 +1620,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) size_t size = 0; int ret; - ret = mutex_lock_interruptible(&debugfs_list_mutex); + ret = mutex_lock_interruptible(&dmabuf_list_mutex); if (ret) return ret; @@ -1639,7 +1629,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) seq_printf(s, "%-8s\t%-8s\t%-8s\t%-8s\texp_name\t%-8s\tname\n", "size", "flags", "mode", "count", "ino"); - list_for_each_entry(buf_obj, &debugfs_list, list_node) { + list_for_each_entry(buf_obj, &dmabuf_list, list_node) { ret = dma_resv_lock_interruptible(buf_obj->resv, NULL); if (ret) @@ -1676,11 +1666,11 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) seq_printf(s, "\nTotal %d objects, %zu bytes\n", count, size); - mutex_unlock(&debugfs_list_mutex); + mutex_unlock(&dmabuf_list_mutex); return 0; error_unlock: - mutex_unlock(&debugfs_list_mutex); + mutex_unlock(&dmabuf_list_mutex); return ret; } diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 36216d28d8bd..8ff4add71f88 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -370,10 +370,8 @@ struct dma_buf { */ struct module *owner; -#if IS_ENABLED(CONFIG_DEBUG_FS) /** @list_node: node for dma_buf accounting and debugging. */ struct list_head list_node; -#endif /** @priv: exporter specific private data for this buffer object. */ void *priv; -- cgit v1.2.3 From 76ea95534995adde1aa3cb1aa97ef33f50a617a9 Mon Sep 17 00:00:00 2001 From: "T.J. Mercier" Date: Thu, 22 May 2025 23:04:26 +0000 Subject: bpf: Add dmabuf iterator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dmabuf iterator traverses the list of all DMA buffers. DMA buffers are refcounted through their associated struct file. A reference is taken on each buffer as the list is iterated to ensure each buffer persists for the duration of the bpf program execution without holding the list mutex. Signed-off-by: T.J. Mercier Reviewed-by: Christian König Acked-by: Song Liu Link: https://lore.kernel.org/r/20250522230429.941193-3-tjmercier@google.com Signed-off-by: Alexei Starovoitov --- drivers/dma-buf/dma-buf.c | 68 +++++++++++++++++++++++++++++++ include/linux/dma-buf.h | 2 + kernel/bpf/Makefile | 3 ++ kernel/bpf/dmabuf_iter.c | 102 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 175 insertions(+) create mode 100644 kernel/bpf/dmabuf_iter.c (limited to 'include') diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 8d151784e302..6b59506a1b94 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -19,7 +19,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -55,6 +57,72 @@ static void __dma_buf_list_del(struct dma_buf *dmabuf) mutex_unlock(&dmabuf_list_mutex); } +/** + * dma_buf_iter_begin - begin iteration through global list of all DMA buffers + * + * Returns the first buffer in the global list of DMA-bufs that's not in the + * process of being destroyed. Increments that buffer's reference count to + * prevent buffer destruction. Callers must release the reference, either by + * continuing iteration with dma_buf_iter_next(), or with dma_buf_put(). + * + * Return: + * * First buffer from global list, with refcount elevated + * * NULL if no active buffers are present + */ +struct dma_buf *dma_buf_iter_begin(void) +{ + struct dma_buf *ret = NULL, *dmabuf; + + /* + * The list mutex does not protect a dmabuf's refcount, so it can be + * zeroed while we are iterating. We cannot call get_dma_buf() since the + * caller may not already own a reference to the buffer. + */ + mutex_lock(&dmabuf_list_mutex); + list_for_each_entry(dmabuf, &dmabuf_list, list_node) { + if (file_ref_get(&dmabuf->file->f_ref)) { + ret = dmabuf; + break; + } + } + mutex_unlock(&dmabuf_list_mutex); + return ret; +} + +/** + * dma_buf_iter_next - continue iteration through global list of all DMA buffers + * @dmabuf: [in] pointer to dma_buf + * + * Decrements the reference count on the provided buffer. Returns the next + * buffer from the remainder of the global list of DMA-bufs with its reference + * count incremented. Callers must release the reference, either by continuing + * iteration with dma_buf_iter_next(), or with dma_buf_put(). + * + * Return: + * * Next buffer from global list, with refcount elevated + * * NULL if no additional active buffers are present + */ +struct dma_buf *dma_buf_iter_next(struct dma_buf *dmabuf) +{ + struct dma_buf *ret = NULL; + + /* + * The list mutex does not protect a dmabuf's refcount, so it can be + * zeroed while we are iterating. We cannot call get_dma_buf() since the + * caller may not already own a reference to the buffer. + */ + mutex_lock(&dmabuf_list_mutex); + dma_buf_put(dmabuf); + list_for_each_entry_continue(dmabuf, &dmabuf_list, list_node) { + if (file_ref_get(&dmabuf->file->f_ref)) { + ret = dmabuf; + break; + } + } + mutex_unlock(&dmabuf_list_mutex); + return ret; +} + static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen) { struct dma_buf *dmabuf; diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 8ff4add71f88..7af2ea839f58 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -634,4 +634,6 @@ int dma_buf_vmap(struct dma_buf *dmabuf, struct iosys_map *map); void dma_buf_vunmap(struct dma_buf *dmabuf, struct iosys_map *map); int dma_buf_vmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map); void dma_buf_vunmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map); +struct dma_buf *dma_buf_iter_begin(void); +struct dma_buf *dma_buf_iter_next(struct dma_buf *dmbuf); #endif /* __DMA_BUF_H__ */ diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 70502f038b92..3a335c50e6e3 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -53,6 +53,9 @@ obj-$(CONFIG_BPF_SYSCALL) += relo_core.o obj-$(CONFIG_BPF_SYSCALL) += btf_iter.o obj-$(CONFIG_BPF_SYSCALL) += btf_relocate.o obj-$(CONFIG_BPF_SYSCALL) += kmem_cache_iter.o +ifeq ($(CONFIG_DMA_SHARED_BUFFER),y) +obj-$(CONFIG_BPF_SYSCALL) += dmabuf_iter.o +endif CFLAGS_REMOVE_percpu_freelist.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_bpf_lru_list.o = $(CC_FLAGS_FTRACE) diff --git a/kernel/bpf/dmabuf_iter.c b/kernel/bpf/dmabuf_iter.c new file mode 100644 index 000000000000..83ef54d78b62 --- /dev/null +++ b/kernel/bpf/dmabuf_iter.c @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2025 Google LLC */ +#include +#include +#include +#include +#include + +static void *dmabuf_iter_seq_start(struct seq_file *seq, loff_t *pos) +{ + if (*pos) + return NULL; + + return dma_buf_iter_begin(); +} + +static void *dmabuf_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct dma_buf *dmabuf = v; + + ++*pos; + + return dma_buf_iter_next(dmabuf); +} + +struct bpf_iter__dmabuf { + __bpf_md_ptr(struct bpf_iter_meta *, meta); + __bpf_md_ptr(struct dma_buf *, dmabuf); +}; + +static int __dmabuf_seq_show(struct seq_file *seq, void *v, bool in_stop) +{ + struct bpf_iter_meta meta = { + .seq = seq, + }; + struct bpf_iter__dmabuf ctx = { + .meta = &meta, + .dmabuf = v, + }; + struct bpf_prog *prog = bpf_iter_get_info(&meta, in_stop); + + if (prog) + return bpf_iter_run_prog(prog, &ctx); + + return 0; +} + +static int dmabuf_iter_seq_show(struct seq_file *seq, void *v) +{ + return __dmabuf_seq_show(seq, v, false); +} + +static void dmabuf_iter_seq_stop(struct seq_file *seq, void *v) +{ + struct dma_buf *dmabuf = v; + + if (dmabuf) + dma_buf_put(dmabuf); +} + +static const struct seq_operations dmabuf_iter_seq_ops = { + .start = dmabuf_iter_seq_start, + .next = dmabuf_iter_seq_next, + .stop = dmabuf_iter_seq_stop, + .show = dmabuf_iter_seq_show, +}; + +static void bpf_iter_dmabuf_show_fdinfo(const struct bpf_iter_aux_info *aux, + struct seq_file *seq) +{ + seq_puts(seq, "dmabuf iter\n"); +} + +static const struct bpf_iter_seq_info dmabuf_iter_seq_info = { + .seq_ops = &dmabuf_iter_seq_ops, + .init_seq_private = NULL, + .fini_seq_private = NULL, + .seq_priv_size = 0, +}; + +static struct bpf_iter_reg bpf_dmabuf_reg_info = { + .target = "dmabuf", + .feature = BPF_ITER_RESCHED, + .show_fdinfo = bpf_iter_dmabuf_show_fdinfo, + .ctx_arg_info_size = 1, + .ctx_arg_info = { + { offsetof(struct bpf_iter__dmabuf, dmabuf), + PTR_TO_BTF_ID_OR_NULL }, + }, + .seq_info = &dmabuf_iter_seq_info, +}; + +DEFINE_BPF_ITER_FUNC(dmabuf, struct bpf_iter_meta *meta, struct dma_buf *dmabuf) +BTF_ID_LIST_SINGLE(bpf_dmabuf_btf_id, struct, dma_buf) + +static int __init dmabuf_iter_init(void) +{ + bpf_dmabuf_reg_info.ctx_arg_info[0].btf_id = bpf_dmabuf_btf_id[0]; + return bpf_iter_reg_target(&bpf_dmabuf_reg_info); +} + +late_initcall(dmabuf_iter_init); -- cgit v1.2.3 From e2d2115e56c4a02377189bfc3a9a7933552a7b0f Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 23 May 2025 21:13:35 -0700 Subject: bpf: Do not include stack ptr register in precision backtracking bookkeeping Yi Lai reported an issue ([1]) where the following warning appears in kernel dmesg: [ 60.643604] verifier backtracking bug [ 60.643635] WARNING: CPU: 10 PID: 2315 at kernel/bpf/verifier.c:4302 __mark_chain_precision+0x3a6c/0x3e10 [ 60.648428] Modules linked in: bpf_testmod(OE) [ 60.650471] CPU: 10 UID: 0 PID: 2315 Comm: test_progs Tainted: G OE 6.15.0-rc4-gef11287f8289-dirty #327 PREEMPT(full) [ 60.654385] Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE [ 60.656682] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [ 60.660475] RIP: 0010:__mark_chain_precision+0x3a6c/0x3e10 [ 60.662814] Code: 5a 30 84 89 ea e8 c4 d9 01 00 80 3d 3e 7d d8 04 00 0f 85 60 fa ff ff c6 05 31 7d d8 04 01 48 c7 c7 00 58 30 84 e8 c4 06 a5 ff <0f> 0b e9 46 fa ff ff 48 ... [ 60.668720] RSP: 0018:ffff888116cc7298 EFLAGS: 00010246 [ 60.671075] RAX: 54d70e82dfd31900 RBX: ffff888115b65e20 RCX: 0000000000000000 [ 60.673659] RDX: 0000000000000001 RSI: 0000000000000004 RDI: 00000000ffffffff [ 60.676241] RBP: 0000000000000400 R08: ffff8881f6f23bd3 R09: 1ffff1103ede477a [ 60.678787] R10: dffffc0000000000 R11: ffffed103ede477b R12: ffff888115b60ae8 [ 60.681420] R13: 1ffff11022b6cbc4 R14: 00000000fffffff2 R15: 0000000000000001 [ 60.684030] FS: 00007fc2aedd80c0(0000) GS:ffff88826fa8a000(0000) knlGS:0000000000000000 [ 60.686837] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 60.689027] CR2: 000056325369e000 CR3: 000000011088b002 CR4: 0000000000370ef0 [ 60.691623] Call Trace: [ 60.692821] [ 60.693960] ? __pfx_verbose+0x10/0x10 [ 60.695656] ? __pfx_disasm_kfunc_name+0x10/0x10 [ 60.697495] check_cond_jmp_op+0x16f7/0x39b0 [ 60.699237] do_check+0x58fa/0xab10 ... Further analysis shows the warning is at line 4302 as below: 4294 /* static subprog call instruction, which 4295 * means that we are exiting current subprog, 4296 * so only r1-r5 could be still requested as 4297 * precise, r0 and r6-r10 or any stack slot in 4298 * the current frame should be zero by now 4299 */ 4300 if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) { 4301 verbose(env, "BUG regs %x\n", bt_reg_mask(bt)); 4302 WARN_ONCE(1, "verifier backtracking bug"); 4303 return -EFAULT; 4304 } With the below test (also in the next patch): __used __naked static void __bpf_jmp_r10(void) { asm volatile ( "r2 = 2314885393468386424 ll;" "goto +0;" "if r2 <= r10 goto +3;" "if r1 >= -1835016 goto +0;" "if r2 <= 8 goto +0;" "if r3 <= 0 goto +0;" "exit;" ::: __clobber_all); } SEC("?raw_tp") __naked void bpf_jmp_r10(void) { asm volatile ( "r3 = 0 ll;" "call __bpf_jmp_r10;" "r0 = 0;" "exit;" ::: __clobber_all); } The following is the verifier failure log: 0: (18) r3 = 0x0 ; R3_w=0 2: (85) call pc+2 caller: R10=fp0 callee: frame1: R1=ctx() R3_w=0 R10=fp0 5: frame1: R1=ctx() R3_w=0 R10=fp0 ; asm volatile (" \ @ verifier_precision.c:184 5: (18) r2 = 0x20202000256c6c78 ; frame1: R2_w=0x20202000256c6c78 7: (05) goto pc+0 8: (bd) if r2 <= r10 goto pc+3 ; frame1: R2_w=0x20202000256c6c78 R10=fp0 9: (35) if r1 >= 0xffe3fff8 goto pc+0 ; frame1: R1=ctx() 10: (b5) if r2 <= 0x8 goto pc+0 mark_precise: frame1: last_idx 10 first_idx 0 subseq_idx -1 mark_precise: frame1: regs=r2 stack= before 9: (35) if r1 >= 0xffe3fff8 goto pc+0 mark_precise: frame1: regs=r2 stack= before 8: (bd) if r2 <= r10 goto pc+3 mark_precise: frame1: regs=r2,r10 stack= before 7: (05) goto pc+0 mark_precise: frame1: regs=r2,r10 stack= before 5: (18) r2 = 0x20202000256c6c78 mark_precise: frame1: regs=r10 stack= before 2: (85) call pc+2 BUG regs 400 The main failure reason is due to r10 in precision backtracking bookkeeping. Actually r10 is always precise and there is no need to add it for the precision backtracking bookkeeping. One way to fix the issue is to prevent bt_set_reg() if any src/dst reg is r10. Andrii suggested to go with push_insn_history() approach to avoid explicitly checking r10 in backtrack_insn(). This patch added push_insn_history() support for cond_jmp like 'rX rY' operations. In check_cond_jmp_op(), if any of rX or rY is a stack pointer, push_insn_history() will record such information, and later backtrack_insn() will do bt_set_reg() properly for those register(s). [1] https://lore.kernel.org/bpf/Z%2F8q3xzpU59CIYQE@ly-workstation/ Reported by: Yi Lai Fixes: 407958a0e980 ("bpf: encapsulate precision backtracking bookkeeping") Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250524041335.4046126-1-yonghong.song@linux.dev --- include/linux/bpf_verifier.h | 12 ++++++++---- kernel/bpf/verifier.c | 18 ++++++++++++++++-- 2 files changed, 24 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 78c97e12ea4e..256274acb1d8 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -356,7 +356,11 @@ enum { INSN_F_SPI_MASK = 0x3f, /* 6 bits */ INSN_F_SPI_SHIFT = 3, /* shifted 3 bits to the left */ - INSN_F_STACK_ACCESS = BIT(9), /* we need 10 bits total */ + INSN_F_STACK_ACCESS = BIT(9), + + INSN_F_DST_REG_STACK = BIT(10), /* dst_reg is PTR_TO_STACK */ + INSN_F_SRC_REG_STACK = BIT(11), /* src_reg is PTR_TO_STACK */ + /* total 12 bits are used now. */ }; static_assert(INSN_F_FRAMENO_MASK + 1 >= MAX_CALL_FRAMES); @@ -365,9 +369,9 @@ static_assert(INSN_F_SPI_MASK + 1 >= MAX_BPF_STACK / 8); struct bpf_insn_hist_entry { u32 idx; /* insn idx can't be bigger than 1 million */ - u32 prev_idx : 22; - /* special flags, e.g., whether insn is doing register stack spill/load */ - u32 flags : 10; + u32 prev_idx : 20; + /* special INSN_F_xxx flags */ + u32 flags : 12; /* additional registers that need precision tracking when this * jump is backtracked, vector of six 10-bit records */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 99582e5a8c69..98c52829936e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4410,8 +4410,10 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, * before it would be equally necessary to * propagate it to dreg. */ - bt_set_reg(bt, dreg); - bt_set_reg(bt, sreg); + if (!hist || !(hist->flags & INSN_F_SRC_REG_STACK)) + bt_set_reg(bt, sreg); + if (!hist || !(hist->flags & INSN_F_DST_REG_STACK)) + bt_set_reg(bt, dreg); } else if (BPF_SRC(insn->code) == BPF_K) { /* dreg K * Only dreg still needs precision before @@ -16392,6 +16394,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_reg_state *eq_branch_regs; struct linked_regs linked_regs = {}; u8 opcode = BPF_OP(insn->code); + int insn_flags = 0; bool is_jmp32; int pred = -1; int err; @@ -16450,6 +16453,9 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, insn->src_reg); return -EACCES; } + + if (src_reg->type == PTR_TO_STACK) + insn_flags |= INSN_F_SRC_REG_STACK; } else { if (insn->src_reg != BPF_REG_0) { verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); @@ -16461,6 +16467,14 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, __mark_reg_known(src_reg, insn->imm); } + if (dst_reg->type == PTR_TO_STACK) + insn_flags |= INSN_F_DST_REG_STACK; + if (insn_flags) { + err = push_insn_history(env, this_branch, insn_flags, 0); + if (err) + return err; + } + is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; pred = is_branch_taken(dst_reg, src_reg, opcode, is_jmp32); if (pred >= 0) { -- cgit v1.2.3