From 4f2673b3a2b6246729a1ff13b8945a040839dbd3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 23 Mar 2021 01:51:44 +0200 Subject: net: bridge: add helper to replay port and host-joined mdb entries I have a system with DSA ports, and udhcpcd is configured to bring interfaces up as soon as they are created. I create a bridge as follows: ip link add br0 type bridge As soon as I create the bridge and udhcpcd brings it up, I also have avahi which automatically starts sending IPv6 packets to advertise some local services, and because of that, the br0 bridge joins the following IPv6 groups due to the code path detailed below: 33:33:ff:6d:c1:9c vid 0 33:33:00:00:00:6a vid 0 33:33:00:00:00:fb vid 0 br_dev_xmit -> br_multicast_rcv -> br_ip6_multicast_add_group -> __br_multicast_add_group -> br_multicast_host_join -> br_mdb_notify This is all fine, but inside br_mdb_notify we have br_mdb_switchdev_host hooked up, and switchdev will attempt to offload the host joined groups to an empty list of ports. Of course nobody offloads them. Then when we add a port to br0: ip link set swp0 master br0 the bridge doesn't replay the host-joined MDB entries from br_add_if, and eventually the host joined addresses expire, and a switchdev notification for deleting it is emitted, but surprise, the original addition was already completely missed. The strategy to address this problem is to replay the MDB entries (both the port ones and the host joined ones) when the new port joins the bridge, similar to what vxlan_fdb_replay does (in that case, its FDB can be populated and only then attached to a bridge that you offload). However there are 2 possibilities: the addresses can be 'pushed' by the bridge into the port, or the port can 'pull' them from the bridge. Considering that in the general case, the new port can be really late to the party, and there may have been many other switchdev ports that already received the initial notification, we would like to avoid delivering duplicate events to them, since they might misbehave. And currently, the bridge calls the entire switchdev notifier chain, whereas for replaying it should just call the notifier block of the new guy. But the bridge doesn't know what is the new guy's notifier block, it just knows where the switchdev notifier chain is. So for simplification, we make this a driver-initiated pull for now, and the notifier block is passed as an argument. To emulate the calling context for mdb objects (deferred and put on the blocking notifier chain), we must iterate under RCU protection through the bridge's mdb entries, queue them, and only call them once we're out of the RCU read-side critical section. There was some opportunity for reuse between br_mdb_switchdev_host_port, br_mdb_notify and the newly added br_mdb_queue_one in how the switchdev mdb object is created, so a helper was created. Suggested-by: Ido Schimmel Signed-off-by: Vladimir Oltean Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/switchdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net/switchdev.h') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index b7fc7d0f54e2..8c3218177136 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -68,6 +68,7 @@ enum switchdev_obj_id { }; struct switchdev_obj { + struct list_head list; struct net_device *orig_dev; enum switchdev_obj_id id; u32 flags; -- cgit v1.2.3 From 2c4eca3ef7161f6632959c00c8eae182f4398901 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 14 Apr 2021 19:52:56 +0300 Subject: net: bridge: switchdev: include local flag in FDB notifications As explained in bugfix commit 6ab4c3117aec ("net: bridge: don't notify switchdev for local FDB addresses") as well as in this discussion: https://lore.kernel.org/netdev/20210117193009.io3nungdwuzmo5f7@skbuf/ the switchdev notifiers for FDB entries managed to have a zero-day bug, which was that drivers would not know what to do with local FDB entries, because they were not told that they are local. The bug fix was to simply not notify them of those addresses. Let us now add the 'is_local' bit to bridge FDB entries, and make all drivers ignore these entries by their own choice. Co-developed-by: Tobias Waldekranz Signed-off-by: Tobias Waldekranz Signed-off-by: Vladimir Oltean Reviewed-by: Grygorii Strashko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c | 4 ++-- drivers/net/ethernet/marvell/prestera/prestera_switchdev.c | 2 +- drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 5 +++-- drivers/net/ethernet/rocker/rocker_main.c | 4 ++-- drivers/net/ethernet/ti/am65-cpsw-switchdev.c | 4 ++-- drivers/net/ethernet/ti/cpsw_switchdev.c | 4 ++-- include/net/switchdev.h | 1 + net/bridge/br_switchdev.c | 3 +-- net/dsa/slave.c | 2 +- 9 files changed, 15 insertions(+), 14 deletions(-) (limited to 'include/net/switchdev.h') diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index 5250d51d783c..05de37c3b64c 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -2098,7 +2098,7 @@ static void dpaa2_switch_event_work(struct work_struct *work) switch (switchdev_work->event) { case SWITCHDEV_FDB_ADD_TO_DEVICE: - if (!fdb_info->added_by_user) + if (!fdb_info->added_by_user || fdb_info->is_local) break; if (is_unicast_ether_addr(fdb_info->addr)) err = dpaa2_switch_port_fdb_add_uc(netdev_priv(dev), @@ -2113,7 +2113,7 @@ static void dpaa2_switch_event_work(struct work_struct *work) &fdb_info->info, NULL); break; case SWITCHDEV_FDB_DEL_TO_DEVICE: - if (!fdb_info->added_by_user) + if (!fdb_info->added_by_user || fdb_info->is_local) break; if (is_unicast_ether_addr(fdb_info->addr)) dpaa2_switch_port_fdb_del_uc(netdev_priv(dev), fdb_info->addr); diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c index 49e052273f30..cb564890a3dc 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c @@ -798,7 +798,7 @@ static void prestera_fdb_event_work(struct work_struct *work) switch (swdev_work->event) { case SWITCHDEV_FDB_ADD_TO_DEVICE: fdb_info = &swdev_work->fdb_info; - if (!fdb_info->added_by_user) + if (!fdb_info->added_by_user || fdb_info->is_local) break; err = prestera_port_fdb_set(port, fdb_info, true); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index c1f05c17557d..eeccd586e781 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -2916,7 +2916,8 @@ mlxsw_sp_switchdev_bridge_nve_fdb_event(struct mlxsw_sp_switchdev_event_work * return; if (switchdev_work->event == SWITCHDEV_FDB_ADD_TO_DEVICE && - !switchdev_work->fdb_info.added_by_user) + (!switchdev_work->fdb_info.added_by_user || + switchdev_work->fdb_info.is_local)) return; if (!netif_running(dev)) @@ -2971,7 +2972,7 @@ static void mlxsw_sp_switchdev_bridge_fdb_event_work(struct work_struct *work) switch (switchdev_work->event) { case SWITCHDEV_FDB_ADD_TO_DEVICE: fdb_info = &switchdev_work->fdb_info; - if (!fdb_info->added_by_user) + if (!fdb_info->added_by_user || fdb_info->is_local) break; err = mlxsw_sp_port_fdb_set(mlxsw_sp_port, fdb_info, true); if (err) diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 3473d296b2e2..a46633606cae 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2736,7 +2736,7 @@ static void rocker_switchdev_event_work(struct work_struct *work) switch (switchdev_work->event) { case SWITCHDEV_FDB_ADD_TO_DEVICE: fdb_info = &switchdev_work->fdb_info; - if (!fdb_info->added_by_user) + if (!fdb_info->added_by_user || fdb_info->is_local) break; err = rocker_world_port_fdb_add(rocker_port, fdb_info); if (err) { @@ -2747,7 +2747,7 @@ static void rocker_switchdev_event_work(struct work_struct *work) break; case SWITCHDEV_FDB_DEL_TO_DEVICE: fdb_info = &switchdev_work->fdb_info; - if (!fdb_info->added_by_user) + if (!fdb_info->added_by_user || fdb_info->is_local) break; err = rocker_world_port_fdb_del(rocker_port, fdb_info); if (err) diff --git a/drivers/net/ethernet/ti/am65-cpsw-switchdev.c b/drivers/net/ethernet/ti/am65-cpsw-switchdev.c index d93ffd8a08b0..23cfb91e9c4d 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-switchdev.c +++ b/drivers/net/ethernet/ti/am65-cpsw-switchdev.c @@ -385,7 +385,7 @@ static void am65_cpsw_switchdev_event_work(struct work_struct *work) fdb->addr, fdb->vid, fdb->added_by_user, fdb->offloaded, port_id); - if (!fdb->added_by_user) + if (!fdb->added_by_user || fdb->is_local) break; if (memcmp(port->slave.mac_addr, (u8 *)fdb->addr, ETH_ALEN) == 0) port_id = HOST_PORT_NUM; @@ -401,7 +401,7 @@ static void am65_cpsw_switchdev_event_work(struct work_struct *work) fdb->addr, fdb->vid, fdb->added_by_user, fdb->offloaded, port_id); - if (!fdb->added_by_user) + if (!fdb->added_by_user || fdb->is_local) break; if (memcmp(port->slave.mac_addr, (u8 *)fdb->addr, ETH_ALEN) == 0) port_id = HOST_PORT_NUM; diff --git a/drivers/net/ethernet/ti/cpsw_switchdev.c b/drivers/net/ethernet/ti/cpsw_switchdev.c index a72bb570756f..05a64fb7a04f 100644 --- a/drivers/net/ethernet/ti/cpsw_switchdev.c +++ b/drivers/net/ethernet/ti/cpsw_switchdev.c @@ -395,7 +395,7 @@ static void cpsw_switchdev_event_work(struct work_struct *work) fdb->addr, fdb->vid, fdb->added_by_user, fdb->offloaded, port); - if (!fdb->added_by_user) + if (!fdb->added_by_user || fdb->is_local) break; if (memcmp(priv->mac_addr, (u8 *)fdb->addr, ETH_ALEN) == 0) port = HOST_PORT_NUM; @@ -411,7 +411,7 @@ static void cpsw_switchdev_event_work(struct work_struct *work) fdb->addr, fdb->vid, fdb->added_by_user, fdb->offloaded, port); - if (!fdb->added_by_user) + if (!fdb->added_by_user || fdb->is_local) break; if (memcmp(priv->mac_addr, (u8 *)fdb->addr, ETH_ALEN) == 0) port = HOST_PORT_NUM; diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 8c3218177136..f1a5a9a3634d 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -209,6 +209,7 @@ struct switchdev_notifier_fdb_info { const unsigned char *addr; u16 vid; u8 added_by_user:1, + is_local:1, offloaded:1; }; diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index c390f84adea2..a5e601e41cb9 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -114,13 +114,12 @@ br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type) .addr = fdb->key.addr.addr, .vid = fdb->key.vlan_id, .added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags), + .is_local = test_bit(BR_FDB_LOCAL, &fdb->flags), .offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags), }; if (!fdb->dst) return; - if (test_bit(BR_FDB_LOCAL, &fdb->flags)) - return; switch (type) { case RTM_DELNEIGH: diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 9300cb66e500..3ae67202fda2 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -2329,7 +2329,7 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused, fdb_info = ptr; if (dsa_slave_dev_check(dev)) { - if (!fdb_info->added_by_user) + if (!fdb_info->added_by_user || fdb_info->is_local) return NOTIFY_OK; dp = dsa_slave_to_port(dev); -- cgit v1.2.3