From e509996b16728e37d5a909a5c63c1bd64f23b306 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Mon, 2 Sep 2024 17:07:09 -0700 Subject: xfrm: extract dst lookup parameters into a struct Preparation for adding more fields to dst lookup functions without changing their signatures. Signed-off-by: Eyal Birger Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index b6bfdc6416c7..f3ae50372707 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -349,20 +349,23 @@ struct xfrm_if_cb { void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb); void xfrm_if_unregister_cb(void); +struct xfrm_dst_lookup_params { + struct net *net; + int tos; + int oif; + xfrm_address_t *saddr; + xfrm_address_t *daddr; + u32 mark; +}; + struct net_device; struct xfrm_type; struct xfrm_dst; struct xfrm_policy_afinfo { struct dst_ops *dst_ops; - struct dst_entry *(*dst_lookup)(struct net *net, - int tos, int oif, - const xfrm_address_t *saddr, - const xfrm_address_t *daddr, - u32 mark); - int (*get_saddr)(struct net *net, int oif, - xfrm_address_t *saddr, - xfrm_address_t *daddr, - u32 mark); + struct dst_entry *(*dst_lookup)(const struct xfrm_dst_lookup_params *params); + int (*get_saddr)(xfrm_address_t *saddr, + const struct xfrm_dst_lookup_params *params); int (*fill_dst)(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl); @@ -1764,10 +1767,7 @@ static inline int xfrm_user_policy(struct sock *sk, int optname, } #endif -struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, - const xfrm_address_t *saddr, - const xfrm_address_t *daddr, - int family, u32 mark); +struct dst_entry *__xfrm_dst_lookup(int family, const struct xfrm_dst_lookup_params *params); struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp); -- cgit v1.2.3 From b8469721034300bbb6dec5b4bf32492c95e16a0c Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Mon, 2 Sep 2024 17:07:10 -0700 Subject: xfrm: respect ip protocols rules criteria when performing dst lookups The series in the "fixes" tag added the ability to consider L4 attributes in routing rules. The dst lookup on the outer packet of encapsulated traffic in the xfrm code was not adapted to this change, thus routing behavior that relies on L4 information is not respected. Pass the ip protocol information when performing dst lookups. Fixes: a25724b05af0 ("Merge branch 'fib_rules-support-sport-dport-and-proto-match'") Signed-off-by: Eyal Birger Tested-by: Antony Antony Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 2 ++ net/ipv4/xfrm4_policy.c | 2 ++ net/ipv6/xfrm6_policy.c | 3 +++ net/xfrm/xfrm_policy.c | 15 +++++++++++++++ 4 files changed, 22 insertions(+) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index f3ae50372707..a0bdd58f401c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -356,6 +356,8 @@ struct xfrm_dst_lookup_params { xfrm_address_t *saddr; xfrm_address_t *daddr; u32 mark; + __u8 ipproto; + union flowi_uli uli; }; struct net_device; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index ac1a28ef0c56..7e1c2faed1ff 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -30,6 +30,8 @@ static struct dst_entry *__xfrm4_dst_lookup(struct flowi4 *fl4, fl4->flowi4_mark = params->mark; if (params->saddr) fl4->saddr = params->saddr->a4; + fl4->flowi4_proto = params->ipproto; + fl4->uli = params->uli; rt = __ip_route_output_key(params->net, fl4); if (!IS_ERR(rt)) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index fc3f5eec6898..1f19b6f14484 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -37,6 +37,9 @@ static struct dst_entry *xfrm6_dst_lookup(const struct xfrm_dst_lookup_params *p if (params->saddr) memcpy(&fl6.saddr, params->saddr, sizeof(fl6.saddr)); + fl6.flowi4_proto = params->ipproto; + fl6.uli = params->uli; + dst = ip6_route_output(params->net, NULL, &fl6); err = dst->error; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ec8f2fe13a51..55cc9f4cb42b 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -315,6 +315,21 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, params.tos = tos; params.oif = oif; params.mark = mark; + params.ipproto = x->id.proto; + if (x->encap) { + switch (x->encap->encap_type) { + case UDP_ENCAP_ESPINUDP: + params.ipproto = IPPROTO_UDP; + params.uli.ports.sport = x->encap->encap_sport; + params.uli.ports.dport = x->encap->encap_dport; + break; + case TCP_ENCAP_ESPINTCP: + params.ipproto = IPPROTO_TCP; + params.uli.ports.sport = x->encap->encap_sport; + params.uli.ports.dport = x->encap->encap_dport; + break; + } + } dst = __xfrm_dst_lookup(family, ¶ms); -- cgit v1.2.3 From 645546a05b0370391c0eac0f14f5b9ddf8d00731 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Sep 2024 11:12:49 +0200 Subject: xfrm: policy: remove last remnants of pernet inexact list xfrm_net still contained the no-longer-used inexact policy list heads, remove them. Fixes: a54ad727f745 ("xfrm: policy: remove remaining use of inexact list") Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 1 - net/xfrm/xfrm_policy.c | 3 --- 2 files changed, 4 deletions(-) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index d489d9250bff..ae60d6664095 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -51,7 +51,6 @@ struct netns_xfrm { struct hlist_head *policy_byidx; unsigned int policy_idx_hmask; unsigned int idx_generator; - struct hlist_head policy_inexact[XFRM_POLICY_MAX]; struct xfrm_policy_hash policy_bydst[XFRM_POLICY_MAX]; unsigned int policy_count[XFRM_POLICY_MAX * 2]; struct work_struct policy_hash_work; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 55cc9f4cb42b..a2ea9dbac90b 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -4206,7 +4206,6 @@ static int __net_init xfrm_policy_init(struct net *net) net->xfrm.policy_count[dir] = 0; net->xfrm.policy_count[XFRM_POLICY_MAX + dir] = 0; - INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]); htab = &net->xfrm.policy_bydst[dir]; htab->table = xfrm_hash_alloc(sz); @@ -4260,8 +4259,6 @@ static void xfrm_policy_fini(struct net *net) for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { struct xfrm_policy_hash *htab; - WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir])); - htab = &net->xfrm.policy_bydst[dir]; sz = (htab->hmask + 1) * sizeof(struct hlist_head); WARN_ON(!hlist_empty(htab->table)); -- cgit v1.2.3 From 95ecba62e2fd201bcdcca636f5d774f1cd4f1458 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 15 Oct 2024 19:41:18 +0000 Subject: net: fix races in netdev_tx_sent_queue()/dev_watchdog() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some workloads hit the infamous dev_watchdog() message: "NETDEV WATCHDOG: eth0 (xxxx): transmit queue XX timed out" It seems possible to hit this even for perfectly normal BQL enabled drivers: 1) Assume a TX queue was idle for more than dev->watchdog_timeo (5 seconds unless changed by the driver) 2) Assume a big packet is sent, exceeding current BQL limit. 3) Driver ndo_start_xmit() puts the packet in TX ring, and netdev_tx_sent_queue() is called. 4) QUEUE_STATE_STACK_XOFF could be set from netdev_tx_sent_queue() before txq->trans_start has been written. 5) txq->trans_start is written later, from netdev_start_xmit() if (rc == NETDEV_TX_OK) txq_trans_update(txq) dev_watchdog() running on another cpu could read the old txq->trans_start, and then see QUEUE_STATE_STACK_XOFF, because 5) did not happen yet. To solve the issue, write txq->trans_start right before one XOFF bit is set : - _QUEUE_STATE_DRV_XOFF from netif_tx_stop_queue() - __QUEUE_STATE_STACK_XOFF from netdev_tx_sent_queue() From dev_watchdog(), we have to read txq->state before txq->trans_start. Add memory barriers to enforce correct ordering. In the future, we could avoid writing over txq->trans_start for normal operations, and rename this field to txq->xoff_start_time. Fixes: bec251bc8b6a ("net: no longer stop all TX queues in dev_watchdog()") Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Reviewed-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20241015194118.3951657-1-edumazet@google.com Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 12 ++++++++++++ net/sched/sch_generic.c | 8 +++++++- 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4d20c776a4ff..8896705ccd63 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3325,6 +3325,12 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev) static __always_inline void netif_tx_stop_queue(struct netdev_queue *dev_queue) { + /* Paired with READ_ONCE() from dev_watchdog() */ + WRITE_ONCE(dev_queue->trans_start, jiffies); + + /* This barrier is paired with smp_mb() from dev_watchdog() */ + smp_mb__before_atomic(); + /* Must be an atomic op see netif_txq_try_stop() */ set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); } @@ -3451,6 +3457,12 @@ static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue, if (likely(dql_avail(&dev_queue->dql) >= 0)) return; + /* Paired with READ_ONCE() from dev_watchdog() */ + WRITE_ONCE(dev_queue->trans_start, jiffies); + + /* This barrier is paired with smp_mb() from dev_watchdog() */ + smp_mb__before_atomic(); + set_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state); /* diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 2af24547a82c..38ec18f73de4 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -512,9 +512,15 @@ static void dev_watchdog(struct timer_list *t) struct netdev_queue *txq; txq = netdev_get_tx_queue(dev, i); - trans_start = READ_ONCE(txq->trans_start); if (!netif_xmit_stopped(txq)) continue; + + /* Paired with WRITE_ONCE() + smp_mb...() in + * netdev_tx_sent_queue() and netif_tx_stop_queue(). + */ + smp_mb(); + trans_start = READ_ONCE(txq->trans_start); + if (time_after(jiffies, trans_start + dev->watchdog_timeo)) { timedout_ms = jiffies_to_msecs(jiffies - trans_start); atomic_long_inc(&txq->trans_timeout); -- cgit v1.2.3 From 1bf4470a3939c678fb822073e9ea77a0560bc6bb Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 22 Oct 2024 12:31:08 -0400 Subject: Bluetooth: SCO: Fix UAF on sco_sock_timeout conn->sk maybe have been unlinked/freed while waiting for sco_conn_lock so this checks if the conn->sk is still valid by checking if it part of sco_sk_list. Reported-by: syzbot+4c0d0c4cde787116d465@syzkaller.appspotmail.com Tested-by: syzbot+4c0d0c4cde787116d465@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=4c0d0c4cde787116d465 Fixes: ba316be1b6a0 ("Bluetooth: schedule SCO timeouts with delayed_work") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/bluetooth.h | 1 + net/bluetooth/af_bluetooth.c | 22 ++++++++++++++++++++++ net/bluetooth/sco.c | 18 ++++++++++++------ 3 files changed, 35 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 5d655e109b2c..f66bc85c6411 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -403,6 +403,7 @@ int bt_sock_register(int proto, const struct net_proto_family *ops); void bt_sock_unregister(int proto); void bt_sock_link(struct bt_sock_list *l, struct sock *s); void bt_sock_unlink(struct bt_sock_list *l, struct sock *s); +bool bt_sock_linked(struct bt_sock_list *l, struct sock *s); struct sock *bt_sock_alloc(struct net *net, struct socket *sock, struct proto *prot, int proto, gfp_t prio, int kern); int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 67604ccec2f4..bdce9270487d 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -185,6 +185,28 @@ void bt_sock_unlink(struct bt_sock_list *l, struct sock *sk) } EXPORT_SYMBOL(bt_sock_unlink); +bool bt_sock_linked(struct bt_sock_list *l, struct sock *s) +{ + struct sock *sk; + + if (!l || !s) + return false; + + read_lock(&l->lock); + + sk_for_each(sk, &l->head) { + if (s == sk) { + read_unlock(&l->lock); + return true; + } + } + + read_unlock(&l->lock); + + return false; +} +EXPORT_SYMBOL(bt_sock_linked); + void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh) { const struct cred *old_cred; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index a5ac160c592e..1c7252a36866 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -76,6 +76,16 @@ struct sco_pinfo { #define SCO_CONN_TIMEOUT (HZ * 40) #define SCO_DISCONN_TIMEOUT (HZ * 2) +static struct sock *sco_sock_hold(struct sco_conn *conn) +{ + if (!conn || !bt_sock_linked(&sco_sk_list, conn->sk)) + return NULL; + + sock_hold(conn->sk); + + return conn->sk; +} + static void sco_sock_timeout(struct work_struct *work) { struct sco_conn *conn = container_of(work, struct sco_conn, @@ -87,9 +97,7 @@ static void sco_sock_timeout(struct work_struct *work) sco_conn_unlock(conn); return; } - sk = conn->sk; - if (sk) - sock_hold(sk); + sk = sco_sock_hold(conn); sco_conn_unlock(conn); if (!sk) @@ -194,9 +202,7 @@ static void sco_conn_del(struct hci_conn *hcon, int err) /* Kill socket */ sco_conn_lock(conn); - sk = conn->sk; - if (sk) - sock_hold(sk); + sk = sco_sock_hold(conn); sco_conn_unlock(conn); if (sk) { -- cgit v1.2.3