diff options
Diffstat (limited to 'net/core/dev.c')
| -rw-r--r-- | net/core/dev.c | 1001 |
1 files changed, 755 insertions, 246 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 2d8aceee4284..4906b44af850 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -79,6 +79,7 @@ #include <linux/sched.h> #include <linux/sched/mm.h> #include <linux/mutex.h> +#include <linux/rwsem.h> #include <linux/string.h> #include <linux/mm.h> #include <linux/socket.h> @@ -142,6 +143,7 @@ #include <linux/net_namespace.h> #include <linux/indirect_call_wrapper.h> #include <net/devlink.h> +#include <linux/pm_runtime.h> #include "net-sysfs.h" @@ -194,7 +196,7 @@ static DEFINE_SPINLOCK(napi_hash_lock); static unsigned int napi_gen_id = NR_CPUS; static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8); -static seqcount_t devnet_rename_seq; +static DECLARE_RWSEM(devnet_rename_sem); static inline void dev_base_seq_inc(struct net *net) { @@ -398,6 +400,89 @@ static RAW_NOTIFIER_HEAD(netdev_chain); DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); EXPORT_PER_CPU_SYMBOL(softnet_data); +#ifdef CONFIG_LOCKDEP +/* + * register_netdevice() inits txq->_xmit_lock and sets lockdep class + * according to dev->type + */ +static const unsigned short netdev_lock_type[] = { + ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25, + ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET, + ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM, + ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP, + ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD, + ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25, + ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP, + ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD, + ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI, + ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE, + ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET, + ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, + ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM, + ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE, + ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE}; + +static const char *const netdev_lock_name[] = { + "_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", + "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET", + "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM", + "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP", + "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD", + "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25", + "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP", + "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD", + "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI", + "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE", + "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", + "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", + "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM", + "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE", + "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"}; + +static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; +static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; + +static inline unsigned short netdev_lock_pos(unsigned short dev_type) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++) + if (netdev_lock_type[i] == dev_type) + return i; + /* the last key is used by default */ + return ARRAY_SIZE(netdev_lock_type) - 1; +} + +static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, + unsigned short dev_type) +{ + int i; + + i = netdev_lock_pos(dev_type); + lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i], + netdev_lock_name[i]); +} + +static inline void netdev_set_addr_lockdep_class(struct net_device *dev) +{ + int i; + + i = netdev_lock_pos(dev->type); + lockdep_set_class_and_name(&dev->addr_list_lock, + &netdev_addr_lock_key[i], + netdev_lock_name[i]); +} +#else +static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, + unsigned short dev_type) +{ +} + +static inline void netdev_set_addr_lockdep_class(struct net_device *dev) +{ +} +#endif + /******************************************************************************* * * Protocol management and registration routines @@ -930,33 +1015,28 @@ EXPORT_SYMBOL(dev_get_by_napi_id); * @net: network namespace * @name: a pointer to the buffer where the name will be stored. * @ifindex: the ifindex of the interface to get the name from. - * - * The use of raw_seqcount_begin() and cond_resched() before - * retrying is required as we want to give the writers a chance - * to complete when CONFIG_PREEMPTION is not set. */ int netdev_get_name(struct net *net, char *name, int ifindex) { struct net_device *dev; - unsigned int seq; + int ret; -retry: - seq = raw_seqcount_begin(&devnet_rename_seq); + down_read(&devnet_rename_sem); rcu_read_lock(); + dev = dev_get_by_index_rcu(net, ifindex); if (!dev) { - rcu_read_unlock(); - return -ENODEV; + ret = -ENODEV; + goto out; } strcpy(name, dev->name); - rcu_read_unlock(); - if (read_seqcount_retry(&devnet_rename_seq, seq)) { - cond_resched(); - goto retry; - } - return 0; + ret = 0; +out: + rcu_read_unlock(); + up_read(&devnet_rename_sem); + return ret; } /** @@ -1228,10 +1308,10 @@ int dev_change_name(struct net_device *dev, const char *newname) likely(!(dev->priv_flags & IFF_LIVE_RENAME_OK))) return -EBUSY; - write_seqcount_begin(&devnet_rename_seq); + down_write(&devnet_rename_sem); if (strncmp(newname, dev->name, IFNAMSIZ) == 0) { - write_seqcount_end(&devnet_rename_seq); + up_write(&devnet_rename_sem); return 0; } @@ -1239,7 +1319,7 @@ int dev_change_name(struct net_device *dev, const char *newname) err = dev_get_valid_name(net, dev, newname); if (err < 0) { - write_seqcount_end(&devnet_rename_seq); + up_write(&devnet_rename_sem); return err; } @@ -1254,11 +1334,11 @@ rollback: if (ret) { memcpy(dev->name, oldname, IFNAMSIZ); dev->name_assign_type = old_assign_type; - write_seqcount_end(&devnet_rename_seq); + up_write(&devnet_rename_sem); return ret; } - write_seqcount_end(&devnet_rename_seq); + up_write(&devnet_rename_sem); netdev_adjacent_rename_links(dev, oldname); @@ -1279,7 +1359,7 @@ rollback: /* err >= 0 after dev_alloc_name() or stores the first errno */ if (err >= 0) { err = ret; - write_seqcount_begin(&devnet_rename_seq); + down_write(&devnet_rename_sem); memcpy(dev->name, oldname, IFNAMSIZ); memcpy(oldname, newname, IFNAMSIZ); dev->name_assign_type = old_assign_type; @@ -1413,8 +1493,13 @@ static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack) ASSERT_RTNL(); - if (!netif_device_present(dev)) - return -ENODEV; + if (!netif_device_present(dev)) { + /* may be detached because parent is runtime-suspended */ + if (dev->dev.parent) + pm_runtime_resume(dev->dev.parent); + if (!netif_device_present(dev)) + return -ENODEV; + } /* Block netpoll from trying to do any rx path servicing. * If we don't do this there is a chance ndo_poll_controller @@ -3369,10 +3454,9 @@ static netdev_features_t net_mpls_features(struct sk_buff *skb, static netdev_features_t harmonize_features(struct sk_buff *skb, netdev_features_t features) { - int tmp; __be16 type; - type = skb_network_protocol(skb, &tmp); + type = skb_network_protocol(skb, NULL); features = net_mpls_features(skb, features, type); if (skb->ip_summed != CHECKSUM_NONE && @@ -4113,10 +4197,12 @@ int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) local_bh_disable(); + dev_xmit_recursion_inc(); HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_drv_stopped(txq)) ret = netdev_start_xmit(skb, dev, txq, false); HARD_TX_UNLOCK(dev, txq); + dev_xmit_recursion_dec(); local_bh_enable(); @@ -4549,6 +4635,11 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, xdp->data_meta = xdp->data; xdp->data_end = xdp->data + hlen; xdp->data_hard_start = skb->data - skb_headroom(skb); + + /* SKB "head" area always have tailroom for skb_shared_info */ + xdp->frame_sz = (void *)skb_end_pointer(skb) - xdp->data_hard_start; + xdp->frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + orig_data_end = xdp->data_end; orig_data = xdp->data; eth = (struct ethhdr *)xdp->data; @@ -4572,14 +4663,11 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, skb_reset_network_header(skb); } - /* check if bpf_xdp_adjust_tail was used. it can only "shrink" - * pckt. - */ - off = orig_data_end - xdp->data_end; + /* check if bpf_xdp_adjust_tail was used */ + off = xdp->data_end - orig_data_end; if (off != 0) { skb_set_tail_pointer(skb, xdp->data_end - xdp->data); - skb->len -= off; - + skb->len += off; /* positive on grow, negative on shrink */ } /* check if XDP changed eth hdr such SKB needs update */ @@ -4602,10 +4690,10 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, break; default: bpf_warn_invalid_xdp_action(act); - /* fall through */ + fallthrough; case XDP_ABORTED: trace_xdp_exception(skb->dev, xdp_prog, act); - /* fall through */ + fallthrough; case XDP_DROP: do_drop: kfree_skb(skb); @@ -5350,6 +5438,20 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp) struct bpf_prog *new = xdp->prog; int ret = 0; + if (new) { + u32 i; + + /* generic XDP does not work with DEVMAPs that can + * have a bpf_prog installed on an entry + */ + for (i = 0; i < new->aux->used_map_cnt; i++) { + if (dev_map_can_have_prog(new->aux->used_maps[i])) + return -EINVAL; + if (cpu_map_prog_allowed(new->aux->used_maps[i])) + return -EINVAL; + } + } + switch (xdp->command) { case XDP_SETUP_PROG: rcu_assign_pointer(dev->xdp_prog, new); @@ -5365,10 +5467,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp) } break; - case XDP_QUERY_PROG: - xdp->prog_id = old ? old->aux->id : 0; - break; - default: ret = -EINVAL; break; @@ -5490,7 +5588,7 @@ void netif_receive_skb_list(struct list_head *head) } EXPORT_SYMBOL(netif_receive_skb_list); -DEFINE_PER_CPU(struct work_struct, flush_works); +static DEFINE_PER_CPU(struct work_struct, flush_works); /* Network device is going away, flush any packets still pending */ static void flush_backlog(struct work_struct *work) @@ -5506,7 +5604,7 @@ static void flush_backlog(struct work_struct *work) skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { if (skb->dev->reg_state == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->input_pkt_queue); - kfree_skb(skb); + dev_kfree_skb_irq(skb); input_queue_head_incr(sd); } } @@ -6237,7 +6335,8 @@ EXPORT_SYMBOL(__napi_schedule_irqoff); bool napi_complete_done(struct napi_struct *n, int work_done) { - unsigned long flags, val, new; + unsigned long flags, val, new, timeout = 0; + bool ret = true; /* * 1) Don't let napi dequeue from the cpu poll list @@ -6249,20 +6348,23 @@ bool napi_complete_done(struct napi_struct *n, int work_done) NAPIF_STATE_IN_BUSY_POLL))) return false; + if (work_done) { + if (n->gro_bitmask) + timeout = READ_ONCE(n->dev->gro_flush_timeout); + n->defer_hard_irqs_count = READ_ONCE(n->dev->napi_defer_hard_irqs); + } + if (n->defer_hard_irqs_count > 0) { + n->defer_hard_irqs_count--; + timeout = READ_ONCE(n->dev->gro_flush_timeout); + if (timeout) + ret = false; + } if (n->gro_bitmask) { - unsigned long timeout = 0; - - if (work_done) - timeout = n->dev->gro_flush_timeout; - /* When the NAPI instance uses a timeout and keeps postponing * it, we need to bound somehow the time packets are kept in * the GRO layer */ napi_gro_flush(n, !!timeout); - if (timeout) - hrtimer_start(&n->timer, ns_to_ktime(timeout), - HRTIMER_MODE_REL_PINNED); } gro_normal_list(n); @@ -6294,7 +6396,10 @@ bool napi_complete_done(struct napi_struct *n, int work_done) return false; } - return true; + if (timeout) + hrtimer_start(&n->timer, ns_to_ktime(timeout), + HRTIMER_MODE_REL_PINNED); + return ret; } EXPORT_SYMBOL(napi_complete_done); @@ -6474,7 +6579,7 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer) /* Note : we use a relaxed variant of napi_schedule_prep() not setting * NAPI_STATE_MISSED, since we do not react to a device IRQ. */ - if (napi->gro_bitmask && !napi_disable_pending(napi) && + if (!napi_disable_pending(napi) && !test_and_set_bit(NAPI_STATE_SCHED, &napi->state)) __napi_schedule_irqoff(napi); @@ -6507,12 +6612,13 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, netdev_err_once(dev, "%s() called with weight %d\n", __func__, weight); napi->weight = weight; - list_add(&napi->dev_list, &dev->napi_list); napi->dev = dev; #ifdef CONFIG_NETPOLL napi->poll_owner = -1; #endif set_bit(NAPI_STATE_SCHED, &napi->state); + set_bit(NAPI_STATE_NPSVC, &napi->state); + list_add_rcu(&napi->dev_list, &dev->napi_list); napi_hash_add(napi); } EXPORT_SYMBOL(netif_napi_add); @@ -6583,7 +6689,9 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) trace_napi_poll(n, work, weight); } - WARN_ON_ONCE(work > weight); + if (unlikely(work > weight)) + pr_err_once("NAPI poll function %pS returned %d, exceeding its budget of %d.\n", + n->poll, work, weight); if (likely(work < weight)) goto out_unlock; @@ -6704,9 +6812,10 @@ static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev, return NULL; } -static int ____netdev_has_upper_dev(struct net_device *upper_dev, void *data) +static int ____netdev_has_upper_dev(struct net_device *upper_dev, + struct netdev_nested_priv *priv) { - struct net_device *dev = data; + struct net_device *dev = (struct net_device *)priv->data; return upper_dev == dev; } @@ -6723,10 +6832,14 @@ static int ____netdev_has_upper_dev(struct net_device *upper_dev, void *data) bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev) { + struct netdev_nested_priv priv = { + .data = (void *)upper_dev, + }; + ASSERT_RTNL(); return netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev, - upper_dev); + &priv); } EXPORT_SYMBOL(netdev_has_upper_dev); @@ -6743,8 +6856,12 @@ EXPORT_SYMBOL(netdev_has_upper_dev); bool netdev_has_upper_dev_all_rcu(struct net_device *dev, struct net_device *upper_dev) { + struct netdev_nested_priv priv = { + .data = (void *)upper_dev, + }; + return !!netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev, - upper_dev); + &priv); } EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu); @@ -6889,8 +7006,8 @@ static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev, static int __netdev_walk_all_upper_dev(struct net_device *dev, int (*fn)(struct net_device *dev, - void *data), - void *data) + struct netdev_nested_priv *priv), + struct netdev_nested_priv *priv) { struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; @@ -6902,7 +7019,7 @@ static int __netdev_walk_all_upper_dev(struct net_device *dev, while (1) { if (now != dev) { - ret = fn(now, data); + ret = fn(now, priv); if (ret) return ret; } @@ -6938,8 +7055,8 @@ static int __netdev_walk_all_upper_dev(struct net_device *dev, int netdev_walk_all_upper_dev_rcu(struct net_device *dev, int (*fn)(struct net_device *dev, - void *data), - void *data) + struct netdev_nested_priv *priv), + struct netdev_nested_priv *priv) { struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; @@ -6950,7 +7067,7 @@ int netdev_walk_all_upper_dev_rcu(struct net_device *dev, while (1) { if (now != dev) { - ret = fn(now, data); + ret = fn(now, priv); if (ret) return ret; } @@ -6986,10 +7103,15 @@ EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu); static bool __netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev) { + struct netdev_nested_priv priv = { + .flags = 0, + .data = (void *)upper_dev, + }; + ASSERT_RTNL(); return __netdev_walk_all_upper_dev(dev, ____netdev_has_upper_dev, - upper_dev); + &priv); } /** @@ -7107,8 +7229,8 @@ static struct net_device *__netdev_next_lower_dev(struct net_device *dev, int netdev_walk_all_lower_dev(struct net_device *dev, int (*fn)(struct net_device *dev, - void *data), - void *data) + struct netdev_nested_priv *priv), + struct netdev_nested_priv *priv) { struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; @@ -7119,7 +7241,7 @@ int netdev_walk_all_lower_dev(struct net_device *dev, while (1) { if (now != dev) { - ret = fn(now, data); + ret = fn(now, priv); if (ret) return ret; } @@ -7154,8 +7276,8 @@ EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev); static int __netdev_walk_all_lower_dev(struct net_device *dev, int (*fn)(struct net_device *dev, - void *data), - void *data) + struct netdev_nested_priv *priv), + struct netdev_nested_priv *priv) { struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; @@ -7167,7 +7289,7 @@ static int __netdev_walk_all_lower_dev(struct net_device *dev, while (1) { if (now != dev) { - ret = fn(now, data); + ret = fn(now, priv); if (ret) return ret; } @@ -7256,22 +7378,34 @@ static u8 __netdev_lower_depth(struct net_device *dev) return max_depth; } -static int __netdev_update_upper_level(struct net_device *dev, void *data) +static int __netdev_update_upper_level(struct net_device *dev, + struct netdev_nested_priv *__unused) { dev->upper_level = __netdev_upper_depth(dev) + 1; return 0; } -static int __netdev_update_lower_level(struct net_device *dev, void *data) +static int __netdev_update_lower_level(struct net_device *dev, + struct netdev_nested_priv *priv) { dev->lower_level = __netdev_lower_depth(dev) + 1; + +#ifdef CONFIG_LOCKDEP + if (!priv) + return 0; + + if (priv->flags & NESTED_SYNC_IMM) + dev->nested_level = dev->lower_level - 1; + if (priv->flags & NESTED_SYNC_TODO) + net_unlink_todo(dev); +#endif return 0; } int netdev_walk_all_lower_dev_rcu(struct net_device *dev, int (*fn)(struct net_device *dev, - void *data), - void *data) + struct netdev_nested_priv *priv), + struct netdev_nested_priv *priv) { struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; @@ -7282,7 +7416,7 @@ int netdev_walk_all_lower_dev_rcu(struct net_device *dev, while (1) { if (now != dev) { - ret = fn(now, data); + ret = fn(now, priv); if (ret) return ret; } @@ -7542,6 +7676,7 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, static int __netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, bool master, void *upper_priv, void *upper_info, + struct netdev_nested_priv *priv, struct netlink_ext_ack *extack) { struct netdev_notifier_changeupper_info changeupper_info = { @@ -7598,9 +7733,9 @@ static int __netdev_upper_dev_link(struct net_device *dev, __netdev_update_upper_level(dev, NULL); __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); - __netdev_update_lower_level(upper_dev, NULL); + __netdev_update_lower_level(upper_dev, priv); __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, - NULL); + priv); return 0; @@ -7625,8 +7760,13 @@ int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, struct netlink_ext_ack *extack) { + struct netdev_nested_priv priv = { + .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO, + .data = NULL, + }; + return __netdev_upper_dev_link(dev, upper_dev, false, - NULL, NULL, extack); + NULL, NULL, &priv, extack); } EXPORT_SYMBOL(netdev_upper_dev_link); @@ -7649,21 +7789,19 @@ int netdev_master_upper_dev_link(struct net_device *dev, void *upper_priv, void *upper_info, struct netlink_ext_ack *extack) { + struct netdev_nested_priv priv = { + .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO, + .data = NULL, + }; + return __netdev_upper_dev_link(dev, upper_dev, true, - upper_priv, upper_info, extack); + upper_priv, upper_info, &priv, extack); } EXPORT_SYMBOL(netdev_master_upper_dev_link); -/** - * netdev_upper_dev_unlink - Removes a link to upper device - * @dev: device - * @upper_dev: new upper device - * - * Removes a link to device which is upper to this one. The caller must hold - * the RTNL lock. - */ -void netdev_upper_dev_unlink(struct net_device *dev, - struct net_device *upper_dev) +static void __netdev_upper_dev_unlink(struct net_device *dev, + struct net_device *upper_dev, + struct netdev_nested_priv *priv) { struct netdev_notifier_changeupper_info changeupper_info = { .info = { @@ -7688,9 +7826,28 @@ void netdev_upper_dev_unlink(struct net_device *dev, __netdev_update_upper_level(dev, NULL); __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); - __netdev_update_lower_level(upper_dev, NULL); + __netdev_update_lower_level(upper_dev, priv); __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, - NULL); + priv); +} + +/** + * netdev_upper_dev_unlink - Removes a link to upper device + * @dev: device + * @upper_dev: new upper device + * + * Removes a link to device which is upper to this one. The caller must hold + * the RTNL lock. + */ +void netdev_upper_dev_unlink(struct net_device *dev, + struct net_device *upper_dev) +{ + struct netdev_nested_priv priv = { + .flags = NESTED_SYNC_TODO, + .data = NULL, + }; + + __netdev_upper_dev_unlink(dev, upper_dev, &priv); } EXPORT_SYMBOL(netdev_upper_dev_unlink); @@ -7726,6 +7883,10 @@ int netdev_adjacent_change_prepare(struct net_device *old_dev, struct net_device *dev, struct netlink_ext_ack *extack) { + struct netdev_nested_priv priv = { + .flags = 0, + .data = NULL, + }; int err; if (!new_dev) @@ -7733,8 +7894,8 @@ int netdev_adjacent_change_prepare(struct net_device *old_dev, if (old_dev && new_dev != old_dev) netdev_adjacent_dev_disable(dev, old_dev); - - err = netdev_upper_dev_link(new_dev, dev, extack); + err = __netdev_upper_dev_link(new_dev, dev, false, NULL, NULL, &priv, + extack); if (err) { if (old_dev && new_dev != old_dev) netdev_adjacent_dev_enable(dev, old_dev); @@ -7749,6 +7910,11 @@ void netdev_adjacent_change_commit(struct net_device *old_dev, struct net_device *new_dev, struct net_device *dev) { + struct netdev_nested_priv priv = { + .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO, + .data = NULL, + }; + if (!new_dev || !old_dev) return; @@ -7756,7 +7922,7 @@ void netdev_adjacent_change_commit(struct net_device *old_dev, return; netdev_adjacent_dev_enable(dev, old_dev); - netdev_upper_dev_unlink(old_dev, dev); + __netdev_upper_dev_unlink(old_dev, dev, &priv); } EXPORT_SYMBOL(netdev_adjacent_change_commit); @@ -7764,13 +7930,18 @@ void netdev_adjacent_change_abort(struct net_device *old_dev, struct net_device *new_dev, struct net_device *dev) { + struct netdev_nested_priv priv = { + .flags = 0, + .data = NULL, + }; + if (!new_dev) return; if (old_dev && new_dev != old_dev) netdev_adjacent_dev_enable(dev, old_dev); - netdev_upper_dev_unlink(new_dev, dev); + __netdev_upper_dev_unlink(new_dev, dev, &priv); } EXPORT_SYMBOL(netdev_adjacent_change_abort); @@ -7796,6 +7967,29 @@ void netdev_bonding_info_change(struct net_device *dev, } EXPORT_SYMBOL(netdev_bonding_info_change); +/** + * netdev_get_xmit_slave - Get the xmit slave of master device + * @dev: device + * @skb: The packet + * @all_slaves: assume all the slaves are active + * + * The reference counters are not incremented so the caller must be + * careful with locks. The caller must hold RCU lock. + * %NULL is returned if no slave is found. + */ + +struct net_device *netdev_get_xmit_slave(struct net_device *dev, + struct sk_buff *skb, + bool all_slaves) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (!ops->ndo_get_xmit_slave) + return NULL; + return ops->ndo_get_xmit_slave(dev, skb, all_slaves); +} +EXPORT_SYMBOL(netdev_get_xmit_slave); + static void netdev_adjacent_add_links(struct net_device *dev) { struct netdev_adjacent *iter; @@ -8516,7 +8710,7 @@ int dev_get_port_parent_id(struct net_device *dev, if (!first.id_len) first = *ppid; else if (memcmp(&first, ppid, sizeof(*ppid))) - return -ENODATA; + return -EOPNOTSUPP; } return err; @@ -8581,84 +8775,446 @@ int dev_change_proto_down_generic(struct net_device *dev, bool proto_down) } EXPORT_SYMBOL(dev_change_proto_down_generic); -u32 __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op, - enum bpf_netdev_command cmd) +/** + * dev_change_proto_down_reason - proto down reason + * + * @dev: device + * @mask: proto down mask + * @value: proto down value + */ +void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask, + u32 value) { - struct netdev_bpf xdp; + int b; - if (!bpf_op) - return 0; + if (!mask) { + dev->proto_down_reason = value; + } else { + for_each_set_bit(b, &mask, 32) { + if (value & (1 << b)) + dev->proto_down_reason |= BIT(b); + else + dev->proto_down_reason &= ~BIT(b); + } + } +} +EXPORT_SYMBOL(dev_change_proto_down_reason); - memset(&xdp, 0, sizeof(xdp)); - xdp.command = cmd; +struct bpf_xdp_link { + struct bpf_link link; + struct net_device *dev; /* protected by rtnl_lock, no refcnt held */ + int flags; +}; + +static enum bpf_xdp_mode dev_xdp_mode(struct net_device *dev, u32 flags) +{ + if (flags & XDP_FLAGS_HW_MODE) + return XDP_MODE_HW; + if (flags & XDP_FLAGS_DRV_MODE) + return XDP_MODE_DRV; + if (flags & XDP_FLAGS_SKB_MODE) + return XDP_MODE_SKB; + return dev->netdev_ops->ndo_bpf ? XDP_MODE_DRV : XDP_MODE_SKB; +} + +static bpf_op_t dev_xdp_bpf_op(struct net_device *dev, enum bpf_xdp_mode mode) +{ + switch (mode) { + case XDP_MODE_SKB: + return generic_xdp_install; + case XDP_MODE_DRV: + case XDP_MODE_HW: + return dev->netdev_ops->ndo_bpf; + default: + return NULL; + }; +} + +static struct bpf_xdp_link *dev_xdp_link(struct net_device *dev, + enum bpf_xdp_mode mode) +{ + return dev->xdp_state[mode].link; +} - /* Query must always succeed. */ - WARN_ON(bpf_op(dev, &xdp) < 0 && cmd == XDP_QUERY_PROG); +static struct bpf_prog *dev_xdp_prog(struct net_device *dev, + enum bpf_xdp_mode mode) +{ + struct bpf_xdp_link *link = dev_xdp_link(dev, mode); + + if (link) + return link->link.prog; + return dev->xdp_state[mode].prog; +} - return xdp.prog_id; +u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode) +{ + struct bpf_prog *prog = dev_xdp_prog(dev, mode); + + return prog ? prog->aux->id : 0; } -static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op, - struct netlink_ext_ack *extack, u32 flags, - struct bpf_prog *prog) +static void dev_xdp_set_link(struct net_device *dev, enum bpf_xdp_mode mode, + struct bpf_xdp_link *link) +{ + dev->xdp_state[mode].link = link; + dev->xdp_state[mode].prog = NULL; +} + +static void dev_xdp_set_prog(struct net_device *dev, enum bpf_xdp_mode mode, + struct bpf_prog *prog) +{ + dev->xdp_state[mode].link = NULL; + dev->xdp_state[mode].prog = prog; +} + +static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode, + bpf_op_t bpf_op, struct netlink_ext_ack *extack, + u32 flags, struct bpf_prog *prog) { - bool non_hw = !(flags & XDP_FLAGS_HW_MODE); - struct bpf_prog *prev_prog = NULL; struct netdev_bpf xdp; int err; - if (non_hw) { - prev_prog = bpf_prog_by_id(__dev_xdp_query(dev, bpf_op, - XDP_QUERY_PROG)); - if (IS_ERR(prev_prog)) - prev_prog = NULL; - } - memset(&xdp, 0, sizeof(xdp)); - if (flags & XDP_FLAGS_HW_MODE) - xdp.command = XDP_SETUP_PROG_HW; - else - xdp.command = XDP_SETUP_PROG; + xdp.command = mode == XDP_MODE_HW ? XDP_SETUP_PROG_HW : XDP_SETUP_PROG; xdp.extack = extack; xdp.flags = flags; xdp.prog = prog; + /* Drivers assume refcnt is already incremented (i.e, prog pointer is + * "moved" into driver), so they don't increment it on their own, but + * they do decrement refcnt when program is detached or replaced. + * Given net_device also owns link/prog, we need to bump refcnt here + * to prevent drivers from underflowing it. + */ + if (prog) + bpf_prog_inc(prog); err = bpf_op(dev, &xdp); - if (!err && non_hw) - bpf_prog_change_xdp(prev_prog, prog); + if (err) { + if (prog) + bpf_prog_put(prog); + return err; + } - if (prev_prog) - bpf_prog_put(prev_prog); + if (mode != XDP_MODE_HW) + bpf_prog_change_xdp(dev_xdp_prog(dev, mode), prog); - return err; + return 0; } static void dev_xdp_uninstall(struct net_device *dev) { - struct netdev_bpf xdp; - bpf_op_t ndo_bpf; + struct bpf_xdp_link *link; + struct bpf_prog *prog; + enum bpf_xdp_mode mode; + bpf_op_t bpf_op; - /* Remove generic XDP */ - WARN_ON(dev_xdp_install(dev, generic_xdp_install, NULL, 0, NULL)); + ASSERT_RTNL(); - /* Remove from the driver */ - ndo_bpf = dev->netdev_ops->ndo_bpf; - if (!ndo_bpf) - return; + for (mode = XDP_MODE_SKB; mode < __MAX_XDP_MODE; mode++) { + prog = dev_xdp_prog(dev, mode); + if (!prog) + continue; - memset(&xdp, 0, sizeof(xdp)); - xdp.command = XDP_QUERY_PROG; - WARN_ON(ndo_bpf(dev, &xdp)); - if (xdp.prog_id) - WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags, - NULL)); + bpf_op = dev_xdp_bpf_op(dev, mode); + if (!bpf_op) + continue; - /* Remove HW offload */ - memset(&xdp, 0, sizeof(xdp)); - xdp.command = XDP_QUERY_PROG_HW; - if (!ndo_bpf(dev, &xdp) && xdp.prog_id) - WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags, - NULL)); + WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL)); + + /* auto-detach link from net device */ + link = dev_xdp_link(dev, mode); + if (link) + link->dev = NULL; + else + bpf_prog_put(prog); + + dev_xdp_set_link(dev, mode, NULL); + } +} + +static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack, + struct bpf_xdp_link *link, struct bpf_prog *new_prog, + struct bpf_prog *old_prog, u32 flags) +{ + struct bpf_prog *cur_prog; + enum bpf_xdp_mode mode; + bpf_op_t bpf_op; + int err; + + ASSERT_RTNL(); + + /* either link or prog attachment, never both */ + if (link && (new_prog || old_prog)) + return -EINVAL; + /* link supports only XDP mode flags */ + if (link && (flags & ~XDP_FLAGS_MODES)) { + NL_SET_ERR_MSG(extack, "Invalid XDP flags for BPF link attachment"); + return -EINVAL; + } + /* just one XDP mode bit should be set, zero defaults to SKB mode */ + if (hweight32(flags & XDP_FLAGS_MODES) > 1) { + NL_SET_ERR_MSG(extack, "Only one XDP mode flag can be set"); + return -EINVAL; + } + /* old_prog != NULL implies XDP_FLAGS_REPLACE is set */ + if (old_prog && !(flags & XDP_FLAGS_REPLACE)) { + NL_SET_ERR_MSG(extack, "XDP_FLAGS_REPLACE is not specified"); + return -EINVAL; + } + + mode = dev_xdp_mode(dev, flags); + /* can't replace attached link */ + if (dev_xdp_link(dev, mode)) { + NL_SET_ERR_MSG(extack, "Can't replace active BPF XDP link"); + return -EBUSY; + } + + cur_prog = dev_xdp_prog(dev, mode); + /* can't replace attached prog with link */ + if (link && cur_prog) { + NL_SET_ERR_MSG(extack, "Can't replace active XDP program with BPF link"); + return -EBUSY; + } + if ((flags & XDP_FLAGS_REPLACE) && cur_prog != old_prog) { + NL_SET_ERR_MSG(extack, "Active program does not match expected"); + return -EEXIST; + } + + /* put effective new program into new_prog */ + if (link) + new_prog = link->link.prog; + + if (new_prog) { + bool offload = mode == XDP_MODE_HW; + enum bpf_xdp_mode other_mode = mode == XDP_MODE_SKB + ? XDP_MODE_DRV : XDP_MODE_SKB; + + if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && cur_prog) { + NL_SET_ERR_MSG(extack, "XDP program already attached"); + return -EBUSY; + } + if (!offload && dev_xdp_prog(dev, other_mode)) { + NL_SET_ERR_MSG(extack, "Native and generic XDP can't be active at the same time"); + return -EEXIST; + } + if (!offload && bpf_prog_is_dev_bound(new_prog->aux)) { + NL_SET_ERR_MSG(extack, "Using device-bound program without HW_MODE flag is not supported"); + return -EINVAL; + } + if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) { + NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device"); + return -EINVAL; + } + if (new_prog->expected_attach_type == BPF_XDP_CPUMAP) { + NL_SET_ERR_MSG(extack, "BPF_XDP_CPUMAP programs can not be attached to a device"); + return -EINVAL; + } + } + + /* don't call drivers if the effective program didn't change */ + if (new_prog != cur_prog) { + bpf_op = dev_xdp_bpf_op(dev, mode); + if (!bpf_op) { + NL_SET_ERR_MSG(extack, "Underlying driver does not support XDP in native mode"); + return -EOPNOTSUPP; + } + + err = dev_xdp_install(dev, mode, bpf_op, extack, flags, new_prog); + if (err) + return err; + } + + if (link) + dev_xdp_set_link(dev, mode, link); + else + dev_xdp_set_prog(dev, mode, new_prog); + if (cur_prog) + bpf_prog_put(cur_prog); + + return 0; +} + +static int dev_xdp_attach_link(struct net_device *dev, + struct netlink_ext_ack *extack, + struct bpf_xdp_link *link) +{ + return dev_xdp_attach(dev, extack, link, NULL, NULL, link->flags); +} + +static int dev_xdp_detach_link(struct net_device *dev, + struct netlink_ext_ack *extack, + struct bpf_xdp_link *link) +{ + enum bpf_xdp_mode mode; + bpf_op_t bpf_op; + + ASSERT_RTNL(); + + mode = dev_xdp_mode(dev, link->flags); + if (dev_xdp_link(dev, mode) != link) + return -EINVAL; + + bpf_op = dev_xdp_bpf_op(dev, mode); + WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL)); + dev_xdp_set_link(dev, mode, NULL); + return 0; +} + +static void bpf_xdp_link_release(struct bpf_link *link) +{ + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); + + rtnl_lock(); + + /* if racing with net_device's tear down, xdp_link->dev might be + * already NULL, in which case link was already auto-detached + */ + if (xdp_link->dev) { + WARN_ON(dev_xdp_detach_link(xdp_link->dev, NULL, xdp_link)); + xdp_link->dev = NULL; + } + + rtnl_unlock(); +} + +static int bpf_xdp_link_detach(struct bpf_link *link) +{ + bpf_xdp_link_release(link); + return 0; +} + +static void bpf_xdp_link_dealloc(struct bpf_link *link) +{ + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); + + kfree(xdp_link); +} + +static void bpf_xdp_link_show_fdinfo(const struct bpf_link *link, + struct seq_file *seq) +{ + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); + u32 ifindex = 0; + + rtnl_lock(); + if (xdp_link->dev) + ifindex = xdp_link->dev->ifindex; + rtnl_unlock(); + + seq_printf(seq, "ifindex:\t%u\n", ifindex); +} + +static int bpf_xdp_link_fill_link_info(const struct bpf_link *link, + struct bpf_link_info *info) +{ + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); + u32 ifindex = 0; + + rtnl_lock(); + if (xdp_link->dev) + ifindex = xdp_link->dev->ifindex; + rtnl_unlock(); + + info->xdp.ifindex = ifindex; + return 0; +} + +static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog, + struct bpf_prog *old_prog) +{ + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); + enum bpf_xdp_mode mode; + bpf_op_t bpf_op; + int err = 0; + + rtnl_lock(); + + /* link might have been auto-released already, so fail */ + if (!xdp_link->dev) { + err = -ENOLINK; + goto out_unlock; + } + + if (old_prog && link->prog != old_prog) { + err = -EPERM; + goto out_unlock; + } + old_prog = link->prog; + if (old_prog == new_prog) { + /* no-op, don't disturb drivers */ + bpf_prog_put(new_prog); + goto out_unlock; + } + + mode = dev_xdp_mode(xdp_link->dev, xdp_link->flags); + bpf_op = dev_xdp_bpf_op(xdp_link->dev, mode); + err = dev_xdp_install(xdp_link->dev, mode, bpf_op, NULL, + xdp_link->flags, new_prog); + if (err) + goto out_unlock; + + old_prog = xchg(&link->prog, new_prog); + bpf_prog_put(old_prog); + +out_unlock: + rtnl_unlock(); + return err; +} + +static const struct bpf_link_ops bpf_xdp_link_lops = { + .release = bpf_xdp_link_release, + .dealloc = bpf_xdp_link_dealloc, + .detach = bpf_xdp_link_detach, + .show_fdinfo = bpf_xdp_link_show_fdinfo, + .fill_link_info = bpf_xdp_link_fill_link_info, + .update_prog = bpf_xdp_link_update, +}; + +int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) +{ + struct net *net = current->nsproxy->net_ns; + struct bpf_link_primer link_primer; + struct bpf_xdp_link *link; + struct net_device *dev; + int err, fd; + + dev = dev_get_by_index(net, attr->link_create.target_ifindex); + if (!dev) + return -EINVAL; + + link = kzalloc(sizeof(*link), GFP_USER); + if (!link) { + err = -ENOMEM; + goto out_put_dev; + } + + bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog); + link->dev = dev; + link->flags = attr->link_create.flags; + + err = bpf_link_prime(&link->link, &link_primer); + if (err) { + kfree(link); + goto out_put_dev; + } + + rtnl_lock(); + err = dev_xdp_attach_link(dev, NULL, link); + rtnl_unlock(); + + if (err) { + bpf_link_cleanup(&link_primer); + goto out_put_dev; + } + + fd = bpf_link_settle(&link_primer); + /* link itself doesn't hold dev's refcnt to not complicate shutdown */ + dev_put(dev); + return fd; + +out_put_dev: + dev_put(dev); + return err; } /** @@ -8674,83 +9230,36 @@ static void dev_xdp_uninstall(struct net_device *dev) int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, int expected_fd, u32 flags) { - const struct net_device_ops *ops = dev->netdev_ops; - enum bpf_netdev_command query; - u32 prog_id, expected_id = 0; - bpf_op_t bpf_op, bpf_chk; - struct bpf_prog *prog; - bool offload; + enum bpf_xdp_mode mode = dev_xdp_mode(dev, flags); + struct bpf_prog *new_prog = NULL, *old_prog = NULL; int err; ASSERT_RTNL(); - offload = flags & XDP_FLAGS_HW_MODE; - query = offload ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG; - - bpf_op = bpf_chk = ops->ndo_bpf; - if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) { - NL_SET_ERR_MSG(extack, "underlying driver does not support XDP in native mode"); - return -EOPNOTSUPP; - } - if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE)) - bpf_op = generic_xdp_install; - if (bpf_op == bpf_chk) - bpf_chk = generic_xdp_install; - - prog_id = __dev_xdp_query(dev, bpf_op, query); - if (flags & XDP_FLAGS_REPLACE) { - if (expected_fd >= 0) { - prog = bpf_prog_get_type_dev(expected_fd, - BPF_PROG_TYPE_XDP, - bpf_op == ops->ndo_bpf); - if (IS_ERR(prog)) - return PTR_ERR(prog); - expected_id = prog->aux->id; - bpf_prog_put(prog); - } - - if (prog_id != expected_id) { - NL_SET_ERR_MSG(extack, "Active program does not match expected"); - return -EEXIST; - } - } if (fd >= 0) { - if (!offload && __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG)) { - NL_SET_ERR_MSG(extack, "native and generic XDP can't be active at the same time"); - return -EEXIST; + new_prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, + mode != XDP_MODE_SKB); + if (IS_ERR(new_prog)) + return PTR_ERR(new_prog); + } + + if (expected_fd >= 0) { + old_prog = bpf_prog_get_type_dev(expected_fd, BPF_PROG_TYPE_XDP, + mode != XDP_MODE_SKB); + if (IS_ERR(old_prog)) { + err = PTR_ERR(old_prog); + old_prog = NULL; + goto err_out; } - - if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && prog_id) { - NL_SET_ERR_MSG(extack, "XDP program already attached"); - return -EBUSY; - } - - prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, - bpf_op == ops->ndo_bpf); - if (IS_ERR(prog)) - return PTR_ERR(prog); - - if (!offload && bpf_prog_is_dev_bound(prog->aux)) { - NL_SET_ERR_MSG(extack, "using device-bound program without HW_MODE flag is not supported"); - bpf_prog_put(prog); - return -EINVAL; - } - - /* prog->aux->id may be 0 for orphaned device-bound progs */ - if (prog->aux->id && prog->aux->id == prog_id) { - bpf_prog_put(prog); - return 0; - } - } else { - if (!prog_id) - return 0; - prog = NULL; } - err = dev_xdp_install(dev, bpf_op, extack, flags, prog); - if (err < 0 && prog) - bpf_prog_put(prog); + err = dev_xdp_attach(dev, extack, NULL, new_prog, old_prog, flags); +err_out: + if (err && new_prog) + bpf_prog_put(new_prog); + if (old_prog) + bpf_prog_put(old_prog); return err; } @@ -9148,6 +9657,11 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev, else netif_dormant_off(dev); + if (rootdev->operstate == IF_OPER_TESTING) + netif_testing_on(dev); + else + netif_testing_off(dev); + if (netif_carrier_ok(rootdev)) netif_carrier_on(dev); else @@ -9208,7 +9722,7 @@ static void netdev_init_one_queue(struct net_device *dev, { /* Initialize queue lock */ spin_lock_init(&queue->_xmit_lock); - lockdep_set_class(&queue->_xmit_lock, &dev->qdisc_xmit_lock_key); + netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); queue->xmit_lock_owner = -1; netdev_queue_numa_node_write(queue, NUMA_NO_NODE); queue->dev = dev; @@ -9255,31 +9769,6 @@ void netif_tx_stop_all_queues(struct net_device *dev) } EXPORT_SYMBOL(netif_tx_stop_all_queues); -static void netdev_register_lockdep_key(struct net_device *dev) -{ - lockdep_register_key(&dev->qdisc_tx_busylock_key); - lockdep_register_key(&dev->qdisc_running_key); - lockdep_register_key(&dev->qdisc_xmit_lock_key); - lockdep_register_key(&dev->addr_list_lock_key); -} - -static void netdev_unregister_lockdep_key(struct net_device *dev) -{ - lockdep_unregister_key(&dev->qdisc_tx_busylock_key); - lockdep_unregister_key(&dev->qdisc_running_key); - lockdep_unregister_key(&dev->qdisc_xmit_lock_key); - lockdep_unregister_key(&dev->addr_list_lock_key); -} - -void netdev_update_lockdep_key(struct net_device *dev) -{ - lockdep_unregister_key(&dev->addr_list_lock_key); - lockdep_register_key(&dev->addr_list_lock_key); - - lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key); -} -EXPORT_SYMBOL(netdev_update_lockdep_key); - /** * register_netdevice - register a network device * @dev: device to register @@ -9318,7 +9807,7 @@ int register_netdevice(struct net_device *dev) return ret; spin_lock_init(&dev->addr_list_lock); - lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key); + netdev_set_addr_lockdep_class(dev); ret = dev_get_valid_name(net, dev, dev->name); if (ret < 0) @@ -9439,6 +9928,13 @@ int register_netdevice(struct net_device *dev) rcu_barrier(); dev->reg_state = NETREG_UNREGISTERED; + /* We should put the kobject that hold in + * netdev_unregister_kobject(), otherwise + * the net device cannot be freed when + * driver calls free_netdev(), because the + * kobject is being hold. + */ + kobject_put(&dev->dev.kobj); } /* * Prevent userspace races by waiting until the network @@ -9629,6 +10125,19 @@ static void netdev_wait_allrefs(struct net_device *dev) void netdev_run_todo(void) { struct list_head list; +#ifdef CONFIG_LOCKDEP + struct list_head unlink_list; + + list_replace_init(&net_unlink_list, &unlink_list); + + while (!list_empty(&unlink_list)) { + struct net_device *dev = list_first_entry(&unlink_list, + struct net_device, + unlink_list); + list_del(&dev->unlink_list); + dev->nested_level = dev->lower_level - 1; + } +#endif /* Snapshot list, allow later requests */ list_replace_init(&net_todo_list, &list); @@ -9837,12 +10346,14 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev_net_set(dev, &init_net); - netdev_register_lockdep_key(dev); - dev->gso_max_size = GSO_MAX_SIZE; dev->gso_max_segs = GSO_MAX_SEGS; dev->upper_level = 1; dev->lower_level = 1; +#ifdef CONFIG_LOCKDEP + dev->nested_level = 0; + INIT_LIST_HEAD(&dev->unlink_list); +#endif INIT_LIST_HEAD(&dev->napi_list); INIT_LIST_HEAD(&dev->unreg_list); @@ -9926,8 +10437,6 @@ void free_netdev(struct net_device *dev) free_percpu(dev->xdp_bulkq); dev->xdp_bulkq = NULL; - netdev_unregister_lockdep_key(dev); - /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { netdev_freemem(dev); |
