From ac5b70198adc25c73fba28de4f78adcee8f6be0b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 12 Feb 2018 21:35:31 -0800 Subject: net: fix race on decreasing number of TX queues netif_set_real_num_tx_queues() can be called when netdev is up. That usually happens when user requests change of number of channels/rings with ethtool -L. The procedure for changing the number of queues involves resetting the qdiscs and setting dev->num_tx_queues to the new value. When the new value is lower than the old one, extra care has to be taken to ensure ordering of accesses to the number of queues vs qdisc reset. Currently the queues are reset before new dev->num_tx_queues is assigned, leaving a window of time where packets can be enqueued onto the queues going down, leading to a likely crash in the drivers, since most drivers don't check if TX skbs are assigned to an active queue. Fixes: e6484930d7c7 ("net: allocate tx queues in register_netdevice") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/core/dev.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index dda9d7b9a840..d4362befe7e2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2382,8 +2382,11 @@ EXPORT_SYMBOL(netdev_set_num_tc); */ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) { + bool disabling; int rc; + disabling = txq < dev->real_num_tx_queues; + if (txq < 1 || txq > dev->num_tx_queues) return -EINVAL; @@ -2399,15 +2402,19 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) if (dev->num_tc) netif_setup_tc(dev, txq); - if (txq < dev->real_num_tx_queues) { + dev->real_num_tx_queues = txq; + + if (disabling) { + synchronize_net(); qdisc_reset_all_tx_gt(dev, txq); #ifdef CONFIG_XPS netif_reset_xps_queues_gt(dev, txq); #endif } + } else { + dev->real_num_tx_queues = txq; } - dev->real_num_tx_queues = txq; return 0; } EXPORT_SYMBOL(netif_set_real_num_tx_queues); -- cgit v1.2.3 From 50d629e7a843d1635ecb1658335279503c4ec9a8 Mon Sep 17 00:00:00 2001 From: Mike Manning Date: Mon, 26 Feb 2018 23:49:30 +0000 Subject: net: allow interface to be set into VRF if VLAN interface in same VRF Setting an interface into a VRF fails with 'RTNETLINK answers: File exists' if one of its VLAN interfaces is already in the same VRF. As the VRF is an upper device of the VLAN interface, it is also showing up as an upper device of the interface itself. The solution is to restrict this check to devices other than master. As only one master device can be linked to a device, the check in this case is that the upper device (VRF) being linked to is not the same as the master device instead of it not being any one of the upper devices. The following example shows an interface ens12 (with a VLAN interface ens12.10) being set into VRF green, which behaves as expected: # ip link add link ens12 ens12.10 type vlan id 10 # ip link set dev ens12 master vrfgreen # ip link show dev ens12 3: ens12: mtu 1500 qdisc fq_codel master vrfgreen state UP mode DEFAULT group default qlen 1000 link/ether 52:54:00:4c:a0:45 brd ff:ff:ff:ff:ff:ff But if the VLAN interface has previously been set into the same VRF, then setting the interface into the VRF fails: # ip link set dev ens12 nomaster # ip link set dev ens12.10 master vrfgreen # ip link show dev ens12.10 39: ens12.10@ens12: mtu 1500 qdisc noqueue master vrfgreen state UP mode DEFAULT group default qlen 1000 link/ether 52:54:00:4c:a0:45 brd ff:ff:ff:ff:ff:ff # ip link set dev ens12 master vrfgreen RTNETLINK answers: File exists The workaround is to move the VLAN interface back into the default VRF beforehand, but it has to be shut first so as to avoid the risk of traffic leaking from the VRF. This fix avoids needing this workaround. Signed-off-by: Mike Manning Signed-off-by: David S. Miller --- net/core/dev.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index d4362befe7e2..2cedf520cb28 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6396,6 +6396,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, .linking = true, .upper_info = upper_info, }; + struct net_device *master_dev; int ret = 0; ASSERT_RTNL(); @@ -6407,11 +6408,14 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (netdev_has_upper_dev(upper_dev, dev)) return -EBUSY; - if (netdev_has_upper_dev(dev, upper_dev)) - return -EEXIST; - - if (master && netdev_master_upper_dev_get(dev)) - return -EBUSY; + if (!master) { + if (netdev_has_upper_dev(dev, upper_dev)) + return -EEXIST; + } else { + master_dev = netdev_master_upper_dev_get(dev); + if (master_dev) + return master_dev == upper_dev ? -EEXIST : -EBUSY; + } ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, &changeupper_info.info); -- cgit v1.2.3 From 4dcb31d4649df36297296b819437709f5407059c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 14 Mar 2018 09:04:16 -0700 Subject: net: use skb_to_full_sk() in skb_update_prio() Andrei Vagin reported a KASAN: slab-out-of-bounds error in skb_update_prio() Since SYNACK might be attached to a request socket, we need to get back to the listener socket. Since this listener is manipulated without locks, add const qualifiers to sock_cgroup_prioidx() so that the const can also be used in skb_update_prio() Also add the const qualifier to sock_cgroup_classid() for consistency. Fixes: ca6fb0651883 ("tcp: attach SYNACK messages to request sockets instead of listener") Signed-off-by: Eric Dumazet Reported-by: Andrei Vagin Signed-off-by: David S. Miller --- include/linux/cgroup-defs.h | 4 ++-- net/core/dev.c | 22 +++++++++++++++------- 2 files changed, 17 insertions(+), 9 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 9f242b876fde..f8e76d01a5ad 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -755,13 +755,13 @@ struct sock_cgroup_data { * updaters and return part of the previous pointer as the prioidx or * classid. Such races are short-lived and the result isn't critical. */ -static inline u16 sock_cgroup_prioidx(struct sock_cgroup_data *skcd) +static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd) { /* fallback to 1 which is always the ID of the root cgroup */ return (skcd->is_data & 1) ? skcd->prioidx : 1; } -static inline u32 sock_cgroup_classid(struct sock_cgroup_data *skcd) +static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd) { /* fallback to 0 which is the unconfigured default classid */ return (skcd->is_data & 1) ? skcd->classid : 0; diff --git a/net/core/dev.c b/net/core/dev.c index 2cedf520cb28..12be20535714 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3278,15 +3278,23 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) static void skb_update_prio(struct sk_buff *skb) { - struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); + const struct netprio_map *map; + const struct sock *sk; + unsigned int prioidx; - if (!skb->priority && skb->sk && map) { - unsigned int prioidx = - sock_cgroup_prioidx(&skb->sk->sk_cgrp_data); + if (skb->priority) + return; + map = rcu_dereference_bh(skb->dev->priomap); + if (!map) + return; + sk = skb_to_full_sk(skb); + if (!sk) + return; - if (prioidx < map->priomap_len) - skb->priority = map->priomap[prioidx]; - } + prioidx = sock_cgroup_prioidx(&sk->sk_cgrp_data); + + if (prioidx < map->priomap_len) + skb->priority = map->priomap[prioidx]; } #else #define skb_update_prio(skb) -- cgit v1.2.3