From 8b62645b09f870d70c7910e7550289d444239a46 Mon Sep 17 00:00:00 2001
From: Wander Lairson Costa <wander.lairson@gmail.com>
Date: Fri, 20 Sep 2024 16:06:59 -0300
Subject: bpf: Use raw_spinlock_t in ringbuf

The function __bpf_ringbuf_reserve is invoked from a tracepoint, which
disables preemption. Using spinlock_t in this context can lead to a
"sleep in atomic" warning in the RT variant. This issue is illustrated
in the example below:

BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48
in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 556208, name: test_progs
preempt_count: 1, expected: 0
RCU nest depth: 1, expected: 1
INFO: lockdep is turned off.
Preemption disabled at:
[<ffffd33a5c88ea44>] migrate_enable+0xc0/0x39c
CPU: 7 PID: 556208 Comm: test_progs Tainted: G
Hardware name: Qualcomm SA8775P Ride (DT)
Call trace:
 dump_backtrace+0xac/0x130
 show_stack+0x1c/0x30
 dump_stack_lvl+0xac/0xe8
 dump_stack+0x18/0x30
 __might_resched+0x3bc/0x4fc
 rt_spin_lock+0x8c/0x1a4
 __bpf_ringbuf_reserve+0xc4/0x254
 bpf_ringbuf_reserve_dynptr+0x5c/0xdc
 bpf_prog_ac3d15160d62622a_test_read_write+0x104/0x238
 trace_call_bpf+0x238/0x774
 perf_call_bpf_enter.isra.0+0x104/0x194
 perf_syscall_enter+0x2f8/0x510
 trace_sys_enter+0x39c/0x564
 syscall_trace_enter+0x220/0x3c0
 do_el0_svc+0x138/0x1dc
 el0_svc+0x54/0x130
 el0t_64_sync_handler+0x134/0x150
 el0t_64_sync+0x17c/0x180

Switch the spinlock to raw_spinlock_t to avoid this error.

Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it")
Reported-by: Brian Grech <bgrech@redhat.com>
Signed-off-by: Wander Lairson Costa <wander.lairson@gmail.com>
Signed-off-by: Wander Lairson Costa <wander@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/r/20240920190700.617253-1-wander@redhat.com
---
 kernel/bpf/ringbuf.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'kernel')
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index e20b90c36131..de3b681d1d13 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -29,7 +29,7 @@ struct bpf_ringbuf {
 	u64 mask;
 	struct page **pages;
 	int nr_pages;
-	spinlock_t spinlock ____cacheline_aligned_in_smp;
+	raw_spinlock_t spinlock ____cacheline_aligned_in_smp;
 	/* For user-space producer ring buffers, an atomic_t busy bit is used
 	 * to synchronize access to the ring buffers in the kernel, rather than
 	 * the spinlock that is used for kernel-producer ring buffers. This is
@@ -173,7 +173,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node)
 	if (!rb)
 		return NULL;
 
-	spin_lock_init(&rb->spinlock);
+	raw_spin_lock_init(&rb->spinlock);
 	atomic_set(&rb->busy, 0);
 	init_waitqueue_head(&rb->waitq);
 	init_irq_work(&rb->work, bpf_ringbuf_notify);
@@ -421,10 +421,10 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
 	cons_pos = smp_load_acquire(&rb->consumer_pos);
 
 	if (in_nmi()) {
-		if (!spin_trylock_irqsave(&rb->spinlock, flags))
+		if (!raw_spin_trylock_irqsave(&rb->spinlock, flags))
 			return NULL;
 	} else {
-		spin_lock_irqsave(&rb->spinlock, flags);
+		raw_spin_lock_irqsave(&rb->spinlock, flags);
 	}
 
 	pend_pos = rb->pending_pos;
@@ -450,7 +450,7 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
 	 */
 	if (new_prod_pos - cons_pos > rb->mask ||
 	    new_prod_pos - pend_pos > rb->mask) {
-		spin_unlock_irqrestore(&rb->spinlock, flags);
+		raw_spin_unlock_irqrestore(&rb->spinlock, flags);
 		return NULL;
 	}
 
@@ -462,7 +462,7 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
 	/* pairs with consumer's smp_load_acquire() */
 	smp_store_release(&rb->producer_pos, new_prod_pos);
 
-	spin_unlock_irqrestore(&rb->spinlock, flags);
+	raw_spin_unlock_irqrestore(&rb->spinlock, flags);
 
 	return (void *)hdr + BPF_RINGBUF_HDR_SZ;
 }
-- 
cgit v1.2.3


From e9bd9c498cb0f5843996dbe5cbce7a1836a83c70 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Tue, 24 Sep 2024 14:08:43 -0700
Subject: bpf: sync_linked_regs() must preserve subreg_def

Range propagation must not affect subreg_def marks, otherwise the
following example is rewritten by verifier incorrectly when
BPF_F_TEST_RND_HI32 flag is set:

  0: call bpf_ktime_get_ns                   call bpf_ktime_get_ns
  1: r0 &= 0x7fffffff       after verifier   r0 &= 0x7fffffff
  2: w1 = w0                rewrites         w1 = w0
  3: if w0 < 10 goto +0     -------------->  r11 = 0x2f5674a6     (r)
  4: r1 >>= 32                               r11 <<= 32           (r)
  5: r0 = r1                                 r1 |= r11            (r)
  6: exit;                                   if w0 < 0xa goto pc+0
                                             r1 >>= 32
                                             r0 = r1
                                             exit

(or zero extension of w1 at (2) is missing for architectures that
 require zero extension for upper register half).

The following happens w/o this patch:
- r0 is marked as not a subreg at (0);
- w1 is marked as subreg at (2);
- w1 subreg_def is overridden at (3) by copy_register_state();
- w1 is read at (5) but mark_insn_zext() does not mark (2)
  for zero extension, because w1 subreg_def is not set;
- because of BPF_F_TEST_RND_HI32 flag verifier inserts random
  value for hi32 bits of (2) (marked (r));
- this random value is read at (5).

Fixes: 75748837b7e5 ("bpf: Propagate scalar ranges through register assignments.")
Reported-by: Lonial Con <kongln9170@gmail.com>
Signed-off-by: Lonial Con <kongln9170@gmail.com>
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Closes: https://lore.kernel.org/bpf/7e2aa30a62d740db182c170fdd8f81c596df280d.camel@gmail.com
Link: https://lore.kernel.org/bpf/20240924210844.1758441-1-eddyz87@gmail.com
---
 kernel/bpf/verifier.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'kernel')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9a7ed527e47e..434de48cd24b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -15326,8 +15326,12 @@ static void sync_linked_regs(struct bpf_verifier_state *vstate, struct bpf_reg_s
 			continue;
 		if ((!(reg->id & BPF_ADD_CONST) && !(known_reg->id & BPF_ADD_CONST)) ||
 		    reg->off == known_reg->off) {
+			s32 saved_subreg_def = reg->subreg_def;
+
 			copy_register_state(reg, known_reg);
+			reg->subreg_def = saved_subreg_def;
 		} else {
+			s32 saved_subreg_def = reg->subreg_def;
 			s32 saved_off = reg->off;
 
 			fake_reg.type = SCALAR_VALUE;
@@ -15340,6 +15344,7 @@ static void sync_linked_regs(struct bpf_verifier_state *vstate, struct bpf_reg_s
 			 * otherwise another sync_linked_regs() will be incorrect.
 			 */
 			reg->off = saved_off;
+			reg->subreg_def = saved_subreg_def;
 
 			scalar32_min_max_add(reg, &fake_reg);
 			scalar_min_max_add(reg, &fake_reg);
-- 
cgit v1.2.3


From ca9984c5f0ab3690d98b13937b2485a978c8dd73 Mon Sep 17 00:00:00 2001
From: Florian Kauer <florian.kauer@linutronix.de>
Date: Wed, 11 Sep 2024 10:41:18 +0200
Subject: bpf: devmap: provide rxq after redirect
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

rxq contains a pointer to the device from where
the redirect happened. Currently, the BPF program
that was executed after a redirect via BPF_MAP_TYPE_DEVMAP*
does not have it set.

This is particularly bad since accessing ingress_ifindex, e.g.

SEC("xdp")
int prog(struct xdp_md *pkt)
{
        return bpf_redirect_map(&dev_redirect_map, 0, 0);
}

SEC("xdp/devmap")
int prog_after_redirect(struct xdp_md *pkt)
{
        bpf_printk("ifindex %i", pkt->ingress_ifindex);
        return XDP_PASS;
}

depends on access to rxq, so a NULL pointer gets dereferenced:

<1>[  574.475170] BUG: kernel NULL pointer dereference, address: 0000000000000000
<1>[  574.475188] #PF: supervisor read access in kernel mode
<1>[  574.475194] #PF: error_code(0x0000) - not-present page
<6>[  574.475199] PGD 0 P4D 0
<4>[  574.475207] Oops: Oops: 0000 [#1] PREEMPT SMP NOPTI
<4>[  574.475217] CPU: 4 UID: 0 PID: 217 Comm: kworker/4:1 Not tainted 6.11.0-rc5-reduced-00859-g780801200300 #23
<4>[  574.475226] Hardware name: Intel(R) Client Systems NUC13ANHi7/NUC13ANBi7, BIOS ANRPL357.0026.2023.0314.1458 03/14/2023
<4>[  574.475231] Workqueue: mld mld_ifc_work
<4>[  574.475247] RIP: 0010:bpf_prog_5e13354d9cf5018a_prog_after_redirect+0x17/0x3c
<4>[  574.475257] Code: cc cc cc cc cc cc cc 80 00 00 00 cc cc cc cc cc cc cc cc f3 0f 1e fa 0f 1f 44 00 00 66 90 55 48 89 e5 f3 0f 1e fa 48 8b 57 20 <48> 8b 52 00 8b 92 e0 00 00 00 48 bf f8 a6 d5 c4 5d a0 ff ff be 0b
<4>[  574.475263] RSP: 0018:ffffa62440280c98 EFLAGS: 00010206
<4>[  574.475269] RAX: ffffa62440280cd8 RBX: 0000000000000001 RCX: 0000000000000000
<4>[  574.475274] RDX: 0000000000000000 RSI: ffffa62440549048 RDI: ffffa62440280ce0
<4>[  574.475278] RBP: ffffa62440280c98 R08: 0000000000000002 R09: 0000000000000001
<4>[  574.475281] R10: ffffa05dc8b98000 R11: ffffa05f577fca40 R12: ffffa05dcab24000
<4>[  574.475285] R13: ffffa62440280ce0 R14: ffffa62440549048 R15: ffffa62440549000
<4>[  574.475289] FS:  0000000000000000(0000) GS:ffffa05f4f700000(0000) knlGS:0000000000000000
<4>[  574.475294] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
<4>[  574.475298] CR2: 0000000000000000 CR3: 000000025522e000 CR4: 0000000000f50ef0
<4>[  574.475303] PKRU: 55555554
<4>[  574.475306] Call Trace:
<4>[  574.475313]  <IRQ>
<4>[  574.475318]  ? __die+0x23/0x70
<4>[  574.475329]  ? page_fault_oops+0x180/0x4c0
<4>[  574.475339]  ? skb_pp_cow_data+0x34c/0x490
<4>[  574.475346]  ? kmem_cache_free+0x257/0x280
<4>[  574.475357]  ? exc_page_fault+0x67/0x150
<4>[  574.475368]  ? asm_exc_page_fault+0x26/0x30
<4>[  574.475381]  ? bpf_prog_5e13354d9cf5018a_prog_after_redirect+0x17/0x3c
<4>[  574.475386]  bq_xmit_all+0x158/0x420
<4>[  574.475397]  __dev_flush+0x30/0x90
<4>[  574.475407]  veth_poll+0x216/0x250 [veth]
<4>[  574.475421]  __napi_poll+0x28/0x1c0
<4>[  574.475430]  net_rx_action+0x32d/0x3a0
<4>[  574.475441]  handle_softirqs+0xcb/0x2c0
<4>[  574.475451]  do_softirq+0x40/0x60
<4>[  574.475458]  </IRQ>
<4>[  574.475461]  <TASK>
<4>[  574.475464]  __local_bh_enable_ip+0x66/0x70
<4>[  574.475471]  __dev_queue_xmit+0x268/0xe40
<4>[  574.475480]  ? selinux_ip_postroute+0x213/0x420
<4>[  574.475491]  ? alloc_skb_with_frags+0x4a/0x1d0
<4>[  574.475502]  ip6_finish_output2+0x2be/0x640
<4>[  574.475512]  ? nf_hook_slow+0x42/0xf0
<4>[  574.475521]  ip6_finish_output+0x194/0x300
<4>[  574.475529]  ? __pfx_ip6_finish_output+0x10/0x10
<4>[  574.475538]  mld_sendpack+0x17c/0x240
<4>[  574.475548]  mld_ifc_work+0x192/0x410
<4>[  574.475557]  process_one_work+0x15d/0x380
<4>[  574.475566]  worker_thread+0x29d/0x3a0
<4>[  574.475573]  ? __pfx_worker_thread+0x10/0x10
<4>[  574.475580]  ? __pfx_worker_thread+0x10/0x10
<4>[  574.475587]  kthread+0xcd/0x100
<4>[  574.475597]  ? __pfx_kthread+0x10/0x10
<4>[  574.475606]  ret_from_fork+0x31/0x50
<4>[  574.475615]  ? __pfx_kthread+0x10/0x10
<4>[  574.475623]  ret_from_fork_asm+0x1a/0x30
<4>[  574.475635]  </TASK>
<4>[  574.475637] Modules linked in: veth br_netfilter bridge stp llc iwlmvm x86_pkg_temp_thermal iwlwifi efivarfs nvme nvme_core
<4>[  574.475662] CR2: 0000000000000000
<4>[  574.475668] ---[ end trace 0000000000000000 ]---

Therefore, provide it to the program by setting rxq properly.

Fixes: cb261b594b41 ("bpf: Run devmap xdp_prog on flush instead of bulk enqueue")
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Florian Kauer <florian.kauer@linutronix.de>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/r/20240911-devel-koalo-fix-ingress-ifindex-v4-1-5c643ae10258@linutronix.de
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 kernel/bpf/devmap.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 9e0e3b0a18e4..7878be18e9d2 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -333,9 +333,11 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
 
 static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog,
 				struct xdp_frame **frames, int n,
-				struct net_device *dev)
+				struct net_device *tx_dev,
+				struct net_device *rx_dev)
 {
-	struct xdp_txq_info txq = { .dev = dev };
+	struct xdp_txq_info txq = { .dev = tx_dev };
+	struct xdp_rxq_info rxq = { .dev = rx_dev };
 	struct xdp_buff xdp;
 	int i, nframes = 0;
 
@@ -346,6 +348,7 @@ static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog,
 
 		xdp_convert_frame_to_buff(xdpf, &xdp);
 		xdp.txq = &txq;
+		xdp.rxq = &rxq;
 
 		act = bpf_prog_run_xdp(xdp_prog, &xdp);
 		switch (act) {
@@ -360,7 +363,7 @@ static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog,
 			bpf_warn_invalid_xdp_action(NULL, xdp_prog, act);
 			fallthrough;
 		case XDP_ABORTED:
-			trace_xdp_exception(dev, xdp_prog, act);
+			trace_xdp_exception(tx_dev, xdp_prog, act);
 			fallthrough;
 		case XDP_DROP:
 			xdp_return_frame_rx_napi(xdpf);
@@ -388,7 +391,7 @@ static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
 	}
 
 	if (bq->xdp_prog) {
-		to_send = dev_map_bpf_prog_run(bq->xdp_prog, bq->q, cnt, dev);
+		to_send = dev_map_bpf_prog_run(bq->xdp_prog, bq->q, cnt, dev, bq->dev_rx);
 		if (!to_send)
 			goto out;
 	}
-- 
cgit v1.2.3


From 45126b155e3b5201179cdc038504bf93a8ccd921 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Mon, 7 Oct 2024 18:09:58 +0200
Subject: bpf: Fix memory leak in bpf_core_apply

We need to free specs properly.

Fixes: 3d2786d65aaa ("bpf: correctly handle malformed BPF_CORE_TYPE_ID_LOCAL relos")
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/bpf/20241007160958.607434-1-jolsa@kernel.org
---
 kernel/bpf/btf.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel')

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 75e4fe83c509..a05da5f43547 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -8961,6 +8961,7 @@ int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo,
 	if (!type) {
 		bpf_log(ctx->log, "relo #%u: bad type id %u\n",
 			relo_idx, relo->type_id);
+		kfree(specs);
 		return -EINVAL;
 	}
 
-- 
cgit v1.2.3


From b24d7f0da6ef5a23456a301eaf51b170f961d4ae Mon Sep 17 00:00:00 2001
From: Thomas Weißschuh <linux@weissschuh.net>
Date: Sat, 5 Oct 2024 02:06:28 +0200
Subject: bpf, lsm: Remove bpf_lsm_key_free hook
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The key_free LSM hook has been removed.
Remove the corresponding BPF hook.

Avoid warnings during the build:
  BTFIDS  vmlinux
WARN: resolve_btfids: unresolved symbol bpf_lsm_key_free

Fixes: 5f8d28f6d7d5 ("lsm: infrastructure management of the key security blob")
Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Song Liu <song@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/bpf/20241005-lsm-key_free-v1-1-42ea801dbd63@weissschuh.net
---
 kernel/bpf/bpf_lsm.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 6292ac5f9bd1..3bc61628ab25 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -339,10 +339,6 @@ BTF_ID(func, bpf_lsm_path_chmod)
 BTF_ID(func, bpf_lsm_path_chown)
 #endif /* CONFIG_SECURITY_PATH */
 
-#ifdef CONFIG_KEYS
-BTF_ID(func, bpf_lsm_key_free)
-#endif /* CONFIG_KEYS */
-
 BTF_ID(func, bpf_lsm_mmap_file)
 BTF_ID(func, bpf_lsm_netlink_send)
 BTF_ID(func, bpf_lsm_path_notify)
-- 
cgit v1.2.3


From 797d73ee232dd1833dec4824bc53a22032e97c1c Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Tue, 8 Oct 2024 15:11:13 +0800
Subject: bpf: Check the remaining info_cnt before repeating btf fields

When trying to repeat the btf fields for array of nested struct, it
doesn't check the remaining info_cnt. The following splat will be
reported when the value of ret * nelems is greater than BTF_FIELDS_MAX:

  ------------[ cut here ]------------
  UBSAN: array-index-out-of-bounds in ../kernel/bpf/btf.c:3951:49
  index 11 is out of range for type 'btf_field_info [11]'
  CPU: 6 UID: 0 PID: 411 Comm: test_progs ...... 6.11.0-rc4+ #1
  Tainted: [O]=OOT_MODULE
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ...
  Call Trace:
   <TASK>
   dump_stack_lvl+0x57/0x70
   dump_stack+0x10/0x20
   ubsan_epilogue+0x9/0x40
   __ubsan_handle_out_of_bounds+0x6f/0x80
   ? kallsyms_lookup_name+0x48/0xb0
   btf_parse_fields+0x992/0xce0
   map_create+0x591/0x770
   __sys_bpf+0x229/0x2410
   __x64_sys_bpf+0x1f/0x30
   x64_sys_call+0x199/0x9f0
   do_syscall_64+0x3b/0xc0
   entry_SYSCALL_64_after_hwframe+0x4b/0x53
  RIP: 0033:0x7fea56f2cc5d
  ......
   </TASK>
  ---[ end trace ]---

Fix it by checking the remaining info_cnt in btf_repeat_fields() before
repeating the btf fields.

Fixes: 64e8ee814819 ("bpf: look into the types of the fields of a struct type recursively.")
Signed-off-by: Hou Tao <houtao1@huawei.com>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20241008071114.3718177-2-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/btf.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index a05da5f43547..5cd1c7a23848 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3523,7 +3523,7 @@ end:
  *   (i + 1) * elem_size
  * where i is the repeat index and elem_size is the size of an element.
  */
-static int btf_repeat_fields(struct btf_field_info *info,
+static int btf_repeat_fields(struct btf_field_info *info, int info_cnt,
 			     u32 field_cnt, u32 repeat_cnt, u32 elem_size)
 {
 	u32 i, j;
@@ -3543,6 +3543,12 @@ static int btf_repeat_fields(struct btf_field_info *info,
 		}
 	}
 
+	/* The type of struct size or variable size is u32,
+	 * so the multiplication will not overflow.
+	 */
+	if (field_cnt * (repeat_cnt + 1) > info_cnt)
+		return -E2BIG;
+
 	cur = field_cnt;
 	for (i = 0; i < repeat_cnt; i++) {
 		memcpy(&info[cur], &info[0], field_cnt * sizeof(info[0]));
@@ -3587,7 +3593,7 @@ static int btf_find_nested_struct(const struct btf *btf, const struct btf_type *
 		info[i].off += off;
 
 	if (nelems > 1) {
-		err = btf_repeat_fields(info, ret, nelems - 1, t->size);
+		err = btf_repeat_fields(info, info_cnt, ret, nelems - 1, t->size);
 		if (err == 0)
 			ret *= nelems;
 		else
@@ -3681,10 +3687,10 @@ static int btf_find_field_one(const struct btf *btf,
 
 	if (ret == BTF_FIELD_IGNORE)
 		return 0;
-	if (nelems > info_cnt)
+	if (!info_cnt)
 		return -E2BIG;
 	if (nelems > 1) {
-		ret = btf_repeat_fields(info, 1, nelems - 1, sz);
+		ret = btf_repeat_fields(info, info_cnt, 1, nelems - 1, sz);
 		if (ret < 0)
 			return ret;
 	}
-- 
cgit v1.2.3


From 434247637c66e1be2bc71a9987d4c3f0d8672387 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
Date: Tue, 8 Oct 2024 17:07:35 -0400
Subject: bpf: use kvzmalloc to allocate BPF verifier environment

The kzmalloc call in bpf_check can fail when memory is very fragmented,
which in turn can lead to an OOM kill.

Use kvzmalloc to fall back to vmalloc when memory is too fragmented to
allocate an order 3 sized bpf verifier environment.

Admittedly this is not a very common case, and only happens on systems
where memory has already been squeezed close to the limit, but this does
not seem like much of a hot path, and it's a simple enough fix.

Signed-off-by: Rik van Riel <riel@surriel.com>
Reviewed-by: Shakeel Butt <shakeel.butt@linux.dev>
Link: https://lore.kernel.org/r/20241008170735.16766766@imladris.surriel.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 434de48cd24b..633fd6da40c2 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -22315,7 +22315,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 	/* 'struct bpf_verifier_env' can be global, but since it's not small,
 	 * allocate/free it every time bpf_check() is called
 	 */
-	env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
+	env = kvzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
 	if (!env)
 		return -ENOMEM;
 
@@ -22551,6 +22551,6 @@ err_unlock:
 		mutex_unlock(&bpf_verifier_lock);
 	vfree(env->insn_aux_data);
 err_free_env:
-	kfree(env);
+	kvfree(env);
 	return ret;
 }
-- 
cgit v1.2.3


From 4deecdd29cf29844c7bd164d72dc38d2e672f64e Mon Sep 17 00:00:00 2001
From: Tyrone Wu <wudevelops@gmail.com>
Date: Tue, 8 Oct 2024 16:43:11 +0000
Subject: bpf: fix unpopulated name_len field in perf_event link info

Previously when retrieving `bpf_link_info.perf_event` for
kprobe/uprobe/tracepoint, the `name_len` field was not populated by the
kernel, leaving it to reflect the value initially set by the user. This
behavior was inconsistent with how other input/output string buffer
fields function (e.g. `raw_tracepoint.tp_name_len`).

This patch fills `name_len` with the actual size of the string name.

Fixes: 1b715e1b0ec5 ("bpf: Support ->fill_link_info for perf_event")
Signed-off-by: Tyrone Wu <wudevelops@gmail.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Yafang Shao <laoar.shao@gmail.com>
Link: https://lore.kernel.org/r/20241008164312.46269-1-wudevelops@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/syscall.c | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

(limited to 'kernel')

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index a8f1808a1ca5..8cfa7183d2ef 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3565,15 +3565,16 @@ static void bpf_perf_link_dealloc(struct bpf_link *link)
 }
 
 static int bpf_perf_link_fill_common(const struct perf_event *event,
-				     char __user *uname, u32 ulen,
+				     char __user *uname, u32 *ulenp,
 				     u64 *probe_offset, u64 *probe_addr,
 				     u32 *fd_type, unsigned long *missed)
 {
 	const char *buf;
-	u32 prog_id;
+	u32 prog_id, ulen;
 	size_t len;
 	int err;
 
+	ulen = *ulenp;
 	if (!ulen ^ !uname)
 		return -EINVAL;
 
@@ -3581,10 +3582,17 @@ static int bpf_perf_link_fill_common(const struct perf_event *event,
 				      probe_offset, probe_addr, missed);
 	if (err)
 		return err;
+
+	if (buf) {
+		len = strlen(buf);
+		*ulenp = len + 1;
+	} else {
+		*ulenp = 1;
+	}
 	if (!uname)
 		return 0;
+
 	if (buf) {
-		len = strlen(buf);
 		err = bpf_copy_to_user(uname, buf, ulen, len);
 		if (err)
 			return err;
@@ -3609,7 +3617,7 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event,
 
 	uname = u64_to_user_ptr(info->perf_event.kprobe.func_name);
 	ulen = info->perf_event.kprobe.name_len;
-	err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr,
+	err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &addr,
 					&type, &missed);
 	if (err)
 		return err;
@@ -3617,7 +3625,7 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event,
 		info->perf_event.type = BPF_PERF_EVENT_KRETPROBE;
 	else
 		info->perf_event.type = BPF_PERF_EVENT_KPROBE;
-
+	info->perf_event.kprobe.name_len = ulen;
 	info->perf_event.kprobe.offset = offset;
 	info->perf_event.kprobe.missed = missed;
 	if (!kallsyms_show_value(current_cred()))
@@ -3639,7 +3647,7 @@ static int bpf_perf_link_fill_uprobe(const struct perf_event *event,
 
 	uname = u64_to_user_ptr(info->perf_event.uprobe.file_name);
 	ulen = info->perf_event.uprobe.name_len;
-	err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr,
+	err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &addr,
 					&type, NULL);
 	if (err)
 		return err;
@@ -3648,6 +3656,7 @@ static int bpf_perf_link_fill_uprobe(const struct perf_event *event,
 		info->perf_event.type = BPF_PERF_EVENT_URETPROBE;
 	else
 		info->perf_event.type = BPF_PERF_EVENT_UPROBE;
+	info->perf_event.uprobe.name_len = ulen;
 	info->perf_event.uprobe.offset = offset;
 	info->perf_event.uprobe.cookie = event->bpf_cookie;
 	return 0;
@@ -3673,12 +3682,18 @@ static int bpf_perf_link_fill_tracepoint(const struct perf_event *event,
 {
 	char __user *uname;
 	u32 ulen;
+	int err;
 
 	uname = u64_to_user_ptr(info->perf_event.tracepoint.tp_name);
 	ulen = info->perf_event.tracepoint.name_len;
+	err = bpf_perf_link_fill_common(event, uname, &ulen, NULL, NULL, NULL, NULL);
+	if (err)
+		return err;
+
 	info->perf_event.type = BPF_PERF_EVENT_TRACEPOINT;
+	info->perf_event.tracepoint.name_len = ulen;
 	info->perf_event.tracepoint.cookie = event->bpf_cookie;
-	return bpf_perf_link_fill_common(event, uname, ulen, NULL, NULL, NULL, NULL);
+	return 0;
 }
 
 static int bpf_perf_link_fill_perf_event(const struct perf_event *event,
-- 
cgit v1.2.3


From 6cb86a0fdece87e126323ec1bb19deb16a52aedf Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Thu, 10 Oct 2024 15:27:07 +0200
Subject: bpf: fix kfunc btf caching for modules
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The verifier contains a cache for looking up module BTF objects when
calling kfuncs defined in modules. This cache uses a 'struct
bpf_kfunc_btf_tab', which contains a sorted list of BTF objects that
were already seen in the current verifier run, and the BTF objects are
looked up by the offset stored in the relocated call instruction using
bsearch().

The first time a given offset is seen, the module BTF is loaded from the
file descriptor passed in by libbpf, and stored into the cache. However,
there's a bug in the code storing the new entry: it stores a pointer to
the new cache entry, then calls sort() to keep the cache sorted for the
next lookup using bsearch(), and then returns the entry that was just
stored through the stored pointer. However, because sort() modifies the
list of entries in place *by value*, the stored pointer may no longer
point to the right entry, in which case the wrong BTF object will be
returned.

The end result of this is an intermittent bug where, if a BPF program
calls two functions with the same signature in two different modules,
the function from the wrong module may sometimes end up being called.
Whether this happens depends on the order of the calls in the BPF
program (as that affects whether sort() reorders the array of BTF
objects), making it especially hard to track down. Simon, credited as
reporter below, spent significant effort analysing and creating a
reproducer for this issue. The reproducer is added as a selftest in a
subsequent patch.

The fix is straight forward: simply don't use the stored pointer after
calling sort(). Since we already have an on-stack pointer to the BTF
object itself at the point where the function return, just use that, and
populate it from the cache entry in the branch where the lookup
succeeds.

Fixes: 2357672c54c3 ("bpf: Introduce BPF support for kernel module function calls")
Reported-by: Simon Sundberg <simon.sundberg@kau.se>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/r/20241010-fix-kfunc-btf-caching-for-modules-v2-1-745af6c1af98@redhat.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 633fd6da40c2..bf9996ea34fe 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2750,10 +2750,16 @@ static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
 		b->module = mod;
 		b->offset = offset;
 
+		/* sort() reorders entries by value, so b may no longer point
+		 * to the right entry after this
+		 */
 		sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
 		     kfunc_btf_cmp_by_off, NULL);
+	} else {
+		btf = b->btf;
 	}
-	return b->btf;
+
+	return btf;
 }
 
 void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
-- 
cgit v1.2.3


From ad6b5b6ea9b764018249285a4fe0a2226bef4caa Mon Sep 17 00:00:00 2001
From: Tyrone Wu <wudevelops@gmail.com>
Date: Fri, 11 Oct 2024 00:08:02 +0000
Subject: bpf: Fix unpopulated path_size when uprobe_multi fields unset

Previously when retrieving `bpf_link_info.uprobe_multi` with `path` and
`path_size` fields unset, the `path_size` field is not populated
(remains 0). This behavior was inconsistent with how other input/output
string buffer fields work, as the field should be populated in cases
when:
- both buffer and length are set (currently works as expected)
- both buffer and length are unset (not working as expected)

This patch now fills the `path_size` field when `path` and `path_size`
are unset.

Fixes: e56fdbfb06e2 ("bpf: Add link_info support for uprobe multi link")
Signed-off-by: Tyrone Wu <wudevelops@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20241011000803.681190-1-wudevelops@gmail.com
---
 kernel/trace/bpf_trace.c | 36 +++++++++++++++++-------------------
 1 file changed, 17 insertions(+), 19 deletions(-)

(limited to 'kernel')

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index a582cd25ca87..3bd402fa62a4 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -3133,7 +3133,8 @@ static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link,
 	struct bpf_uprobe_multi_link *umulti_link;
 	u32 ucount = info->uprobe_multi.count;
 	int err = 0, i;
-	long left;
+	char *p, *buf;
+	long left = 0;
 
 	if (!upath ^ !upath_size)
 		return -EINVAL;
@@ -3147,26 +3148,23 @@ static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link,
 	info->uprobe_multi.pid = umulti_link->task ?
 				 task_pid_nr_ns(umulti_link->task, task_active_pid_ns(current)) : 0;
 
-	if (upath) {
-		char *p, *buf;
-
-		upath_size = min_t(u32, upath_size, PATH_MAX);
-
-		buf = kmalloc(upath_size, GFP_KERNEL);
-		if (!buf)
-			return -ENOMEM;
-		p = d_path(&umulti_link->path, buf, upath_size);
-		if (IS_ERR(p)) {
-			kfree(buf);
-			return PTR_ERR(p);
-		}
-		upath_size = buf + upath_size - p;
-		left = copy_to_user(upath, p, upath_size);
+	upath_size = upath_size ? min_t(u32, upath_size, PATH_MAX) : PATH_MAX;
+	buf = kmalloc(upath_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+	p = d_path(&umulti_link->path, buf, upath_size);
+	if (IS_ERR(p)) {
 		kfree(buf);
-		if (left)
-			return -EFAULT;
-		info->uprobe_multi.path_size = upath_size;
+		return PTR_ERR(p);
 	}
+	upath_size = buf + upath_size - p;
+
+	if (upath)
+		left = copy_to_user(upath, p, upath_size);
+	kfree(buf);
+	if (left)
+		return -EFAULT;
+	info->uprobe_multi.path_size = upath_size;
 
 	if (!uoffsets && !ucookies && !uref_ctr_offsets)
 		return 0;
-- 
cgit v1.2.3


From ae67b9fb8c4e981e929a665dcaa070f4b05ebdb4 Mon Sep 17 00:00:00 2001
From: Dimitar Kanaliev <dimitar.kanaliev@siteground.com>
Date: Mon, 14 Oct 2024 15:11:53 +0300
Subject: bpf: Fix truncation bug in coerce_reg_to_size_sx()

coerce_reg_to_size_sx() updates the register state after a sign-extension
operation. However, there's a bug in the assignment order of the unsigned
min/max values, leading to incorrect truncation:

  0: (85) call bpf_get_prandom_u32#7    ; R0_w=scalar()
  1: (57) r0 &= 1                       ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=1,var_off=(0x0; 0x1))
  2: (07) r0 += 254                     ; R0_w=scalar(smin=umin=smin32=umin32=254,smax=umax=smax32=umax32=255,var_off=(0xfe; 0x1))
  3: (bf) r0 = (s8)r0                   ; R0_w=scalar(smin=smin32=-2,smax=smax32=-1,umin=umin32=0xfffffffe,umax=0xffffffff,var_off=(0xfffffffffffffffe; 0x1))

In the current implementation, the unsigned 32-bit min/max values
(u32_min_value and u32_max_value) are assigned directly from the 64-bit
signed min/max values (s64_min and s64_max):

  reg->umin_value = reg->u32_min_value = s64_min;
  reg->umax_value = reg->u32_max_value = s64_max;

Due to the chain assigmnent, this is equivalent to:

  reg->u32_min_value = s64_min;  // Unintended truncation
  reg->umin_value = reg->u32_min_value;
  reg->u32_max_value = s64_max;  // Unintended truncation
  reg->umax_value = reg->u32_max_value;

Fixes: 1f9a1ea821ff ("bpf: Support new sign-extension load insns")
Reported-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
Reported-by: Zac Ecob <zacecob@protonmail.com>
Signed-off-by: Dimitar Kanaliev <dimitar.kanaliev@siteground.com>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Reviewed-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
Link: https://lore.kernel.org/r/20241014121155.92887-2-dimitar.kanaliev@siteground.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index bf9996ea34fe..a8a0b6e4110e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6339,10 +6339,10 @@ static void coerce_reg_to_size_sx(struct bpf_reg_state *reg, int size)
 
 	/* both of s64_max/s64_min positive or negative */
 	if ((s64_max >= 0) == (s64_min >= 0)) {
-		reg->smin_value = reg->s32_min_value = s64_min;
-		reg->smax_value = reg->s32_max_value = s64_max;
-		reg->umin_value = reg->u32_min_value = s64_min;
-		reg->umax_value = reg->u32_max_value = s64_max;
+		reg->s32_min_value = reg->smin_value = s64_min;
+		reg->s32_max_value = reg->smax_value = s64_max;
+		reg->u32_min_value = reg->umin_value = s64_min;
+		reg->u32_max_value = reg->umax_value = s64_max;
 		reg->var_off = tnum_range(s64_min, s64_max);
 		return;
 	}
-- 
cgit v1.2.3


From 9495a5b731fcaf580448a3438d63601c88367661 Mon Sep 17 00:00:00 2001
From: Jordan Rome <linux@jordanrome.com>
Date: Wed, 16 Oct 2024 14:00:47 -0700
Subject: bpf: Fix iter/task tid filtering

In userspace, you can add a tid filter by setting
the "task.tid" field for "bpf_iter_link_info".
However, `get_pid_task` when called for the
`BPF_TASK_ITER_TID` type should have been using
`PIDTYPE_PID` (tid) instead of `PIDTYPE_TGID` (pid).

Fixes: f0d74c4da1f0 ("bpf: Parameterize task iterators.")
Signed-off-by: Jordan Rome <linux@jordanrome.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20241016210048.1213935-1-linux@jordanrome.com
---
 kernel/bpf/task_iter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 02aa9db8d796..5af9e130e500 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -99,7 +99,7 @@ static struct task_struct *task_seq_get_next(struct bpf_iter_seq_task_common *co
 		rcu_read_lock();
 		pid = find_pid_ns(common->pid, common->ns);
 		if (pid) {
-			task = get_pid_task(pid, PIDTYPE_TGID);
+			task = get_pid_task(pid, PIDTYPE_PID);
 			*tid = common->pid;
 		}
 		rcu_read_unlock();
-- 
cgit v1.2.3


From 3878ae04e9fc24dacb77a1d32bd87e7d8108599e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 16 Oct 2024 15:49:11 +0200
Subject: bpf: Fix incorrect delta propagation between linked registers

Nathaniel reported a bug in the linked scalar delta tracking, which can lead
to accepting a program with OOB access. The specific code is related to the
sync_linked_regs() function and the BPF_ADD_CONST flag, which signifies a
constant offset between two scalar registers tracked by the same register id.

The verifier attempts to track "similar" scalars in order to propagate bounds
information learned about one scalar to others. For instance, if r1 and r2
are known to contain the same value, then upon encountering 'if (r1 != 0x1234)
goto xyz', not only does it know that r1 is equal to 0x1234 on the path where
that conditional jump is not taken, it also knows that r2 is.

Additionally, with env->bpf_capable set, the verifier will track scalars
which should be a constant delta apart (if r1 is known to be one greater than
r2, then if r1 is known to be equal to 0x1234, r2 must be equal to 0x1233.)
The code path for the latter in adjust_reg_min_max_vals() is reached when
processing both 32 and 64-bit addition operations. While adjust_reg_min_max_vals()
knows whether dst_reg was produced by a 32 or a 64-bit addition (based on the
alu32 bool), the only information saved in dst_reg is the id of the source
register (reg->id, or'ed by BPF_ADD_CONST) and the value of the constant
offset (reg->off).

Later, the function sync_linked_regs() will attempt to use this information
to propagate bounds information from one register (known_reg) to others,
meaning, for all R in linked_regs, it copies known_reg range (and possibly
adjusting delta) into R for the case of R->id == known_reg->id.

For the delta adjustment, meaning, matching reg->id with BPF_ADD_CONST, the
verifier adjusts the register as reg = known_reg; reg += delta where delta
is computed as (s32)reg->off - (s32)known_reg->off and placed as a scalar
into a fake_reg to then simulate the addition of reg += fake_reg. This is
only correct, however, if the value in reg was created by a 64-bit addition.
When reg contains the result of a 32-bit addition operation, its upper 32
bits will always be zero. sync_linked_regs() on the other hand, may cause
the verifier to believe that the addition between fake_reg and reg overflows
into those upper bits. For example, if reg was generated by adding the
constant 1 to known_reg using a 32-bit alu operation, then reg->off is 1
and known_reg->off is 0. If known_reg is known to be the constant 0xFFFFFFFF,
sync_linked_regs() will tell the verifier that reg is equal to the constant
0x100000000. This is incorrect as the actual value of reg will be 0, as the
32-bit addition will wrap around.

Example:

  0: (b7) r0 = 0;             R0_w=0
  1: (18) r1 = 0x80000001;    R1_w=0x80000001
  3: (37) r1 /= 1;            R1_w=scalar()
  4: (bf) r2 = r1;            R1_w=scalar(id=1) R2_w=scalar(id=1)
  5: (bf) r4 = r1;            R1_w=scalar(id=1) R4_w=scalar(id=1)
  6: (04) w2 += 2147483647;   R2_w=scalar(id=1+2147483647,smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))
  7: (04) w4 += 0 ;           R4_w=scalar(id=1+0,smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))
  8: (15) if r2 == 0x0 goto pc+1
 10: R0=0 R1=0xffffffff80000001 R2=0x7fffffff R4=0xffffffff80000001 R10=fp0

What can be seen here is that r1 is copied to r2 and r4, such that {r1,r2,r4}.id
are all the same which later lets sync_linked_regs() to be invoked. Then, in
a next step constants are added with alu32 to r2 and r4, setting their ->off,
as well as id |= BPF_ADD_CONST. Next, the conditional will bind r2 and
propagate ranges to its linked registers. The verifier now believes the upper
32 bits of r4 are r4=0xffffffff80000001, while actually r4=r1=0x80000001.

One approach for a simple fix suitable also for stable is to limit the constant
delta tracking to only 64-bit alu addition. If necessary at some later point,
BPF_ADD_CONST could be split into BPF_ADD_CONST64 and BPF_ADD_CONST32 to avoid
mixing the two under the tradeoff to further complicate sync_linked_regs().
However, none of the added tests from dedf56d775c0 ("selftests/bpf: Add tests
for add_const") make this necessary at this point, meaning, BPF CI also passes
with just limiting tracking to 64-bit alu addition.

Fixes: 98d7ca374ba4 ("bpf: Track delta between "linked" registers.")
Reported-by: Nathaniel Theis <nathaniel.theis@nccgroup.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Reviewed-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/bpf/20241016134913.32249-1-daniel@iogearbox.net
---
 kernel/bpf/verifier.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a8a0b6e4110e..411ab1b57af4 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -14270,12 +14270,13 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
 	 * r1 += 0x1
 	 * if r2 < 1000 goto ...
 	 * use r1 in memory access
-	 * So remember constant delta between r2 and r1 and update r1 after
-	 * 'if' condition.
+	 * So for 64-bit alu remember constant delta between r2 and r1 and
+	 * update r1 after 'if' condition.
 	 */
-	if (env->bpf_capable && BPF_OP(insn->code) == BPF_ADD &&
-	    dst_reg->id && is_reg_const(src_reg, alu32)) {
-		u64 val = reg_const_value(src_reg, alu32);
+	if (env->bpf_capable &&
+	    BPF_OP(insn->code) == BPF_ADD && !alu32 &&
+	    dst_reg->id && is_reg_const(src_reg, false)) {
+		u64 val = reg_const_value(src_reg, false);
 
 		if ((dst_reg->id & BPF_ADD_CONST) ||
 		    /* prevent overflow in sync_linked_regs() later */
-- 
cgit v1.2.3


From 3e9e708757ca3b7eb65a820031d62fea1a265709 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 16 Oct 2024 15:49:12 +0200
Subject: bpf: Fix print_reg_state's constant scalar dump

print_reg_state() should not consider adding reg->off to reg->var_off.value
when dumping scalars. Scalars can be produced with reg->off != 0 through
BPF_ADD_CONST, and thus as-is this can skew the register log dump.

Fixes: 98d7ca374ba4 ("bpf: Track delta between "linked" registers.")
Reported-by: Nathaniel Theis <nathaniel.theis@nccgroup.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20241016134913.32249-2-daniel@iogearbox.net
---
 kernel/bpf/log.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index 5aebfc3051e3..4a858fdb6476 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -688,8 +688,7 @@ static void print_reg_state(struct bpf_verifier_env *env,
 	if (t == SCALAR_VALUE && reg->precise)
 		verbose(env, "P");
 	if (t == SCALAR_VALUE && tnum_is_const(reg->var_off)) {
-		/* reg->off should be 0 for SCALAR_VALUE */
-		verbose_snum(env, reg->var_off.value + reg->off);
+		verbose_snum(env, reg->var_off.value);
 		return;
 	}
 
-- 
cgit v1.2.3