From 2ea5eabaf04a1829383aefe98ac38a2e5ae2d698 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 2 Jul 2021 16:48:24 +0530 Subject: bpf: devmap: Implement devmap prog execution for generic XDP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This lifts the restriction on running devmap BPF progs in generic redirect mode. To match native XDP behavior, it is invoked right before generic_xdp_tx is called, and only supports XDP_PASS/XDP_ABORTED/ XDP_DROP actions. We also return 0 even if devmap program drops the packet, as semantically redirect has already succeeded and the devmap prog is the last point before TX of the packet to device where it can deliver a verdict on the packet. This also means it must take care of freeing the skb, as xdp_do_generic_redirect callers only do that in case an error is returned. Since devmap entry prog is supported, remove the check in generic_xdp_install entirely. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Reviewed-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20210702111825.491065-5-memxor@gmail.com --- kernel/bpf/devmap.c | 49 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 10 deletions(-) (limited to 'kernel/bpf/devmap.c') diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 2546dafd6672..fa26eac5e4b6 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -322,16 +322,6 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key, return -ENOENT; } -bool dev_map_can_have_prog(struct bpf_map *map) -{ - if ((map->map_type == BPF_MAP_TYPE_DEVMAP || - map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) && - map->value_size != offsetofend(struct bpf_devmap_val, ifindex)) - return true; - - return false; -} - static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog, struct xdp_frame **frames, int n, struct net_device *dev) @@ -499,6 +489,37 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, return 0; } +static u32 dev_map_bpf_prog_run_skb(struct sk_buff *skb, struct bpf_dtab_netdev *dst) +{ + struct xdp_txq_info txq = { .dev = dst->dev }; + struct xdp_buff xdp; + u32 act; + + if (!dst->xdp_prog) + return XDP_PASS; + + __skb_pull(skb, skb->mac_len); + xdp.txq = &txq; + + act = bpf_prog_run_generic_xdp(skb, &xdp, dst->xdp_prog); + switch (act) { + case XDP_PASS: + __skb_push(skb, skb->mac_len); + break; + default: + bpf_warn_invalid_xdp_action(act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(dst->dev, dst->xdp_prog, act); + fallthrough; + case XDP_DROP: + kfree_skb(skb); + break; + } + + return act; +} + int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, struct net_device *dev_rx) { @@ -614,6 +635,14 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, err = xdp_ok_fwd_dev(dst->dev, skb->len); if (unlikely(err)) return err; + + /* Redirect has already succeeded semantically at this point, so we just + * return 0 even if packet is dropped. Helper below takes care of + * freeing skb. + */ + if (dev_map_bpf_prog_run_skb(skb, dst) != XDP_PASS) + return 0; + skb->dev = dst->dev; generic_xdp_tx(skb, xdp_prog); -- cgit v1.2.3 From aeea1b86f9363f3feabb496534d886f082a89f21 Mon Sep 17 00:00:00 2001 From: Jussi Maki Date: Sat, 31 Jul 2021 05:57:35 +0000 Subject: bpf, devmap: Exclude XDP broadcast to master device If the ingress device is bond slave, do not broadcast back through it or the bond master. Signed-off-by: Jussi Maki Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210731055738.16820-5-joamaki@gmail.com --- kernel/bpf/devmap.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 9 deletions(-) (limited to 'kernel/bpf/devmap.c') diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 542e94fa30b4..f02d04540c0c 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -534,10 +534,9 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, return __xdp_enqueue(dev, xdp, dev_rx, dst->xdp_prog); } -static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp, - int exclude_ifindex) +static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp) { - if (!obj || obj->dev->ifindex == exclude_ifindex || + if (!obj || !obj->dev->netdev_ops->ndo_xdp_xmit) return false; @@ -562,17 +561,48 @@ static int dev_map_enqueue_clone(struct bpf_dtab_netdev *obj, return 0; } +static inline bool is_ifindex_excluded(int *excluded, int num_excluded, int ifindex) +{ + while (num_excluded--) { + if (ifindex == excluded[num_excluded]) + return true; + } + return false; +} + +/* Get ifindex of each upper device. 'indexes' must be able to hold at + * least MAX_NEST_DEV elements. + * Returns the number of ifindexes added. + */ +static int get_upper_ifindexes(struct net_device *dev, int *indexes) +{ + struct net_device *upper; + struct list_head *iter; + int n = 0; + + netdev_for_each_upper_dev_rcu(dev, upper, iter) { + indexes[n++] = upper->ifindex; + } + return n; +} + int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, struct bpf_map *map, bool exclude_ingress) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); - int exclude_ifindex = exclude_ingress ? dev_rx->ifindex : 0; struct bpf_dtab_netdev *dst, *last_dst = NULL; + int excluded_devices[1+MAX_NEST_DEV]; struct hlist_head *head; struct xdp_frame *xdpf; + int num_excluded = 0; unsigned int i; int err; + if (exclude_ingress) { + num_excluded = get_upper_ifindexes(dev_rx, excluded_devices); + excluded_devices[num_excluded++] = dev_rx->ifindex; + } + xdpf = xdp_convert_buff_to_frame(xdp); if (unlikely(!xdpf)) return -EOVERFLOW; @@ -581,7 +611,10 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, for (i = 0; i < map->max_entries; i++) { dst = rcu_dereference_check(dtab->netdev_map[i], rcu_read_lock_bh_held()); - if (!is_valid_dst(dst, xdp, exclude_ifindex)) + if (!is_valid_dst(dst, xdp)) + continue; + + if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex)) continue; /* we only need n-1 clones; last_dst enqueued below */ @@ -601,7 +634,11 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, head = dev_map_index_hash(dtab, i); hlist_for_each_entry_rcu(dst, head, index_hlist, lockdep_is_held(&dtab->index_lock)) { - if (!is_valid_dst(dst, xdp, exclude_ifindex)) + if (!is_valid_dst(dst, xdp)) + continue; + + if (is_ifindex_excluded(excluded_devices, num_excluded, + dst->dev->ifindex)) continue; /* we only need n-1 clones; last_dst enqueued below */ @@ -675,18 +712,27 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, bool exclude_ingress) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); - int exclude_ifindex = exclude_ingress ? dev->ifindex : 0; struct bpf_dtab_netdev *dst, *last_dst = NULL; + int excluded_devices[1+MAX_NEST_DEV]; struct hlist_head *head; struct hlist_node *next; + int num_excluded = 0; unsigned int i; int err; + if (exclude_ingress) { + num_excluded = get_upper_ifindexes(dev, excluded_devices); + excluded_devices[num_excluded++] = dev->ifindex; + } + if (map->map_type == BPF_MAP_TYPE_DEVMAP) { for (i = 0; i < map->max_entries; i++) { dst = rcu_dereference_check(dtab->netdev_map[i], rcu_read_lock_bh_held()); - if (!dst || dst->dev->ifindex == exclude_ifindex) + if (!dst) + continue; + + if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex)) continue; /* we only need n-1 clones; last_dst enqueued below */ @@ -700,12 +746,17 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, return err; last_dst = dst; + } } else { /* BPF_MAP_TYPE_DEVMAP_HASH */ for (i = 0; i < dtab->n_buckets; i++) { head = dev_map_index_hash(dtab, i); hlist_for_each_entry_safe(dst, next, head, index_hlist) { - if (!dst || dst->dev->ifindex == exclude_ifindex) + if (!dst) + continue; + + if (is_ifindex_excluded(excluded_devices, num_excluded, + dst->dev->ifindex)) continue; /* we only need n-1 clones; last_dst enqueued below */ -- cgit v1.2.3