summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2026-04-12 13:33:21 -0700
committerJakub Kicinski <kuba@kernel.org>2026-04-12 13:33:21 -0700
commite802cd8cb0ffc4ddd2d6f78fc970d195af33deda (patch)
tree1275e335808d33c752cdd2394af19f36facdddc9
parent2654557112d6ca298d0ef43b56e4cafb5cc0cb10 (diff)
parentebf71dd4aff46e8e421d455db3e231ba43d2fa8a (diff)
downloadlinux-e802cd8cb0ffc4ddd2d6f78fc970d195af33deda.tar.gz
linux-e802cd8cb0ffc4ddd2d6f78fc970d195af33deda.zip
Merge branch 'net-rds-fix-use-after-free-in-rds-ib-for-non-init-namespaces'
Allison Henderson says: ==================== net/rds: Fix use-after-free in RDS/IB for non-init namespaces This series fixes syzbot bug da8e060735ae02c8f3d1 https://syzkaller.appspot.com/bug?extid=da8e060735ae02c8f3d1 The report finds a use-after-free bug where ib connections access an invalid network namespace after it has been freed. The stack is: rds_rdma_cm_event_handler_cmn rds_conn_path_drop rds_destroy_pending check_net() <-- use-after-free This is initially introduced in: d5a8ac28a7ff ("RDS-TCP: Make RDS-TCP work correctly when it is set up in a netns other than init_net"). Here, we made RDS aware of the namespace by storing a net pointer in each connection. But it is not explicitly restricted to init_net in the case of ib. The RDS/TCP transport has its own pernet exit handler (rds_tcp_exit_net) that destroys connections when a namespace is torn down. But RDS/IB does not support more than the initial namespace and has no such handler. The initial namespace is statically allocated, and never torn down, so it always has at least one reference. Allowing non init namespaces that do not have a persistent reference means that when their refcounts drop to zero, they are released through cleanup_net(). Which would call any registered pernet clean up handlers if it had any, but since they don't in this case, the extra rds_connections remain with stale c_net pointers. Which are then accessed later causing the use-after-free bug. So, the simple fix is to disallow more than the initial namespace to be created in the case of ib connections. Fixes are ported from UEK patches found here: https://github.com/oracle/linux-uek/commit/8ed9a82376b7 Patch 1 is a prerequisite optimization to rds_ib_laddr_check() that avoids excessive rdma_bind_addr() calls during transport probing by first checking rds_ib_get_device(). This is needed because patch 2 adds a namespace check at the top of the same function. UEK: 8ed9a82376b7 ("rds: ib: Optimize rds_ib_laddr_check") https://github.com/oracle/linux-uek/commit/bd9489a08004 Patch 2 restricts RDS/IB to the initial network namespace. It adds checks in both rds_ib_laddr_check() and rds_set_transport() to reject IB use from non-init namespaces with -EPROTOTYPE. This prevents the use-after-free by ensuring IB connections cannot exist in namespaces that may be torn down. UEK: bd9489a08004 ("net/rds: Restrict use of RDS/IB to the initial network namespace") Questions, comments and feedback appreciated! ==================== Link: https://patch.msgid.link/20260408080420.540032-1-achender@kernel.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--net/rds/af_rds.c10
-rw-r--r--net/rds/ib.c24
-rw-r--r--net/rds/ib.h1
-rw-r--r--net/rds/ib_rdma.c2
4 files changed, 32 insertions, 5 deletions
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index b396c673dfaf..76f625986a7f 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -357,7 +357,8 @@ static int rds_cong_monitor(struct rds_sock *rs, sockptr_t optval, int optlen)
return ret;
}
-static int rds_set_transport(struct rds_sock *rs, sockptr_t optval, int optlen)
+static int rds_set_transport(struct net *net, struct rds_sock *rs,
+ sockptr_t optval, int optlen)
{
int t_type;
@@ -373,6 +374,10 @@ static int rds_set_transport(struct rds_sock *rs, sockptr_t optval, int optlen)
if (t_type < 0 || t_type >= RDS_TRANS_COUNT)
return -EINVAL;
+ /* RDS/IB is restricted to the initial network namespace */
+ if (t_type != RDS_TRANS_TCP && !net_eq(net, &init_net))
+ return -EPROTOTYPE;
+
rs->rs_transport = rds_trans_get(t_type);
return rs->rs_transport ? 0 : -ENOPROTOOPT;
@@ -433,6 +438,7 @@ static int rds_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
struct rds_sock *rs = rds_sk_to_rs(sock->sk);
+ struct net *net = sock_net(sock->sk);
int ret;
if (level != SOL_RDS) {
@@ -461,7 +467,7 @@ static int rds_setsockopt(struct socket *sock, int level, int optname,
break;
case SO_RDS_TRANSPORT:
lock_sock(sock->sk);
- ret = rds_set_transport(rs, optval, optlen);
+ ret = rds_set_transport(net, rs, optval, optlen);
release_sock(sock->sk);
break;
case SO_TIMESTAMP_OLD:
diff --git a/net/rds/ib.c b/net/rds/ib.c
index ac6affa33ce7..39f87272e071 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -401,8 +401,8 @@ static void rds6_ib_ic_info(struct socket *sock, unsigned int len,
* allowed to influence which paths have priority. We could call userspace
* asserting this policy "routing".
*/
-static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr,
- __u32 scope_id)
+static int rds_ib_laddr_check_cm(struct net *net, const struct in6_addr *addr,
+ __u32 scope_id)
{
int ret;
struct rdma_cm_id *cm_id;
@@ -487,6 +487,26 @@ out:
return ret;
}
+static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr,
+ __u32 scope_id)
+{
+ struct rds_ib_device *rds_ibdev = NULL;
+
+ /* RDS/IB is restricted to the initial network namespace */
+ if (!net_eq(net, &init_net))
+ return -EPROTOTYPE;
+
+ if (ipv6_addr_v4mapped(addr)) {
+ rds_ibdev = rds_ib_get_device(addr->s6_addr32[3]);
+ if (rds_ibdev) {
+ rds_ib_dev_put(rds_ibdev);
+ return 0;
+ }
+ }
+
+ return rds_ib_laddr_check_cm(net, addr, scope_id);
+}
+
static void rds_ib_unregister_client(void)
{
ib_unregister_client(&rds_ib_client);
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 8ef3178ed4d6..5ff346a1e8ba 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -381,6 +381,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn,
__rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt)
/* ib_rdma.c */
+struct rds_ib_device *rds_ib_get_device(__be32 ipaddr);
int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev,
struct in6_addr *ipaddr);
void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 2cfec252eeac..9594ea245f7f 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -43,7 +43,7 @@ struct workqueue_struct *rds_ib_mr_wq;
static void rds_ib_odp_mr_worker(struct work_struct *work);
-static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
+struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
{
struct rds_ib_device *rds_ibdev;
struct rds_ib_ipaddr *i_ipaddr;