diff options
| author | David S. Miller <davem@davemloft.net> | 2018-03-04 13:04:24 -0500 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2018-03-04 13:04:24 -0500 |
| commit | b33cc2cec9e2e58ec5d220e26f1111117ea782d9 (patch) | |
| tree | 7c717d159d438db0ac887315bbe55d2fb1d75101 /drivers | |
| parent | Merge branch 'sctp-clean-up-sctp_sendmsg' (diff) | |
| parent | selftests: forwarding: Add multipath test for L4 hashing (diff) | |
| download | linux-b33cc2cec9e2e58ec5d220e26f1111117ea782d9.tar.gz linux-b33cc2cec9e2e58ec5d220e26f1111117ea782d9.zip | |
Merge branch 'net-ipv6-Add-support-for-path-selection-using-hash-of-5-tuple'
David Ahern says:
====================
net/ipv6: Add support for path selection using hash of 5-tuple
Hardware supports multipath selection using the standard L4 5-tuple
instead of just L3 and the flow label. In addition, some network
operators prefer IPv6 path selection to use the 5-tuple. To that end,
add support to IPv6 for multipath hash policy similar to
bf4e0a3db97eb ("net: ipv4: add support for ECMP hash policy choice").
The default is still L3 which covers source and destination addresses
along with flow label and IPv6 protocol. This gives users a choice in
hash algorithms if they believe L3 only and the IPv6 flow label are not
sufficient for their use case.
A separate sysctl is added for IPv6, allowing IPv4 and IPv6 to use
different algorithms if desired.
The first 3 patches modify the IPv4 variant so that at the end of the
patch set the ipv4 and ipv6 implementations are direct parallels.
Patch 4 refactors the existing rt6_multipath_hash in preparation for
adding the policy option.
Patch 5 renames the existing netevent to have IPv4 in the name so ipv4
changes can be distinguished from IPv6 if the netevent handler cares.
Patch 6 adds the skb as an argument through the FIB lookup functions
to the multipath selection. Needed for the forwarding case.
Patch 7 adds the L4 hash support.
Patch 8 adds the hook for the netevent to the spectrum driver to update
the ASIC.
Patch 9 removes no longer used code.
Patch 10 adds a testcase for IPv6 multipath with L4 hash.
v3
- comments from Ido:
- removed fib_info arg in patch 1; left by mistake on rebase to net-next
- removed __get_hash_from_flowi4 declaration
- line wrap change to spectrum_router.c to maintain 80 chars
v2
- rebased to top of tree
- added refactor of fib_multipath_hash following recent change
- plumb skb through lookup functions to multipath selection
- fix sysctl setting; was missing the data set in ipv6_sysctl_net_init
- added test case
RFC to v1:
- rebase to top of net-next
- fix addr_type in hash_keys and removed flow label as noticed by Ido
- added a comment to cover letter about choice in algorithms based on
use case per Or's comments
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers')
| -rw-r--r-- | drivers/infiniband/core/cma.c | 2 | ||||
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 17 | ||||
| -rw-r--r-- | drivers/net/ipvlan/ipvlan_core.c | 3 | ||||
| -rw-r--r-- | drivers/net/vrf.c | 7 |
4 files changed, 22 insertions, 7 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 3ae32d1ddd27..915bbd867b61 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1334,7 +1334,7 @@ static bool validate_ipv6_net_dev(struct net_device *net_dev, IPV6_ADDR_LINKLOCAL; struct rt6_info *rt = rt6_lookup(dev_net(net_dev), &dst_addr->sin6_addr, &src_addr->sin6_addr, net_dev->ifindex, - strict); + NULL, strict); bool ret; if (!rt) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 69f16c605b9d..a8a578610a7b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2430,7 +2430,8 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *nb, mlxsw_core_schedule_work(&net_work->work); mlxsw_sp_port_dev_put(mlxsw_sp_port); break; - case NETEVENT_MULTIPATH_HASH_UPDATE: + case NETEVENT_IPV4_MPATH_HASH_UPDATE: + case NETEVENT_IPV6_MPATH_HASH_UPDATE: net = ptr; if (!net_eq(net, &init_net)) @@ -7030,13 +7031,25 @@ static void mlxsw_sp_mp4_hash_init(char *recr2_pl) static void mlxsw_sp_mp6_hash_init(char *recr2_pl) { + bool only_l3 = !init_net.ipv6.sysctl.multipath_hash_policy; + mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP); mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP); mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl); mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl); - mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_FLOW_LABEL); mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER); + if (only_l3) { + mlxsw_sp_mp_hash_field_set(recr2_pl, + MLXSW_REG_RECR2_IPV6_FLOW_LABEL); + } else { + mlxsw_sp_mp_hash_header_set(recr2_pl, + MLXSW_REG_RECR2_TCP_UDP_EN_IPV6); + mlxsw_sp_mp_hash_field_set(recr2_pl, + MLXSW_REG_RECR2_TCP_UDP_SPORT); + mlxsw_sp_mp_hash_field_set(recr2_pl, + MLXSW_REG_RECR2_TCP_UDP_DPORT); + } } static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp) diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 17daebd19e65..1a8132eb2a3e 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -817,7 +817,8 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, }; skb_dst_drop(skb); - dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags); + dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, + skb, flags); skb_dst_set(skb, dst); break; } diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index e459e601c57f..c6be49d3a9eb 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -941,6 +941,7 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net, const struct net_device *dev, struct flowi6 *fl6, int ifindex, + const struct sk_buff *skb, int flags) { struct net_vrf *vrf = netdev_priv(dev); @@ -959,7 +960,7 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net, if (!table) return NULL; - return ip6_pol_route(net, table, ifindex, fl6, flags); + return ip6_pol_route(net, table, ifindex, fl6, skb, flags); } static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev, @@ -977,7 +978,7 @@ static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev, struct net *net = dev_net(vrf_dev); struct rt6_info *rt6; - rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex, + rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex, skb, RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE); if (unlikely(!rt6)) return; @@ -1110,7 +1111,7 @@ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev, if (!ipv6_addr_any(&fl6->saddr)) flags |= RT6_LOOKUP_F_HAS_SADDR; - rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags); + rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, NULL, flags); if (rt) dst = &rt->dst; |
