From 0ba5b2f2c381dbec9ed9e4ab3ae5d3e667de0dc3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 4 Sep 2025 15:52:37 +0300 Subject: net: phylink: add lock for serializing concurrent pl->phydev writes with resolver Currently phylink_resolve() protects itself against concurrent phylink_bringup_phy() or phylink_disconnect_phy() calls which modify pl->phydev by relying on pl->state_mutex. The problem is that in phylink_resolve(), pl->state_mutex is in a lock inversion state with pl->phydev->lock. So pl->phydev->lock needs to be acquired prior to pl->state_mutex. But that requires dereferencing pl->phydev in the first place, and without pl->state_mutex, that is racy. Hence the reason for the extra lock. Currently it is redundant, but it will serve a functional purpose once mutex_lock(&phy->lock) will be moved outside of the mutex_lock(&pl->state_mutex) section. Another alternative considered would have been to let phylink_resolve() acquire the rtnl_mutex, which is also held when phylink_bringup_phy() and phylink_disconnect_phy() are called. But since phylink_disconnect_phy() runs under rtnl_lock(), it would deadlock with phylink_resolve() when calling flush_work(&pl->resolve). Additionally, it would have been undesirable because it would have unnecessarily blocked many other call paths as well in the entire kernel, so the smaller-scoped lock was preferred. Link: https://lore.kernel.org/netdev/aLb6puGVzR29GpPx@shell.armlinux.org.uk/ Signed-off-by: Vladimir Oltean Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20250904125238.193990-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'drivers/net/phy') diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index c7cb95aa8007..aa17ad2622fc 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -67,6 +67,8 @@ struct phylink { struct timer_list link_poll; struct mutex state_mutex; + /* Serialize updates to pl->phydev with phylink_resolve() */ + struct mutex phydev_mutex; struct phylink_link_state phy_state; unsigned int phy_ib_mode; struct work_struct resolve; @@ -1591,8 +1593,11 @@ static void phylink_resolve(struct work_struct *w) struct phylink_link_state link_state; bool mac_config = false; bool retrigger = false; + struct phy_device *phy; bool cur_link_state; + mutex_lock(&pl->phydev_mutex); + phy = pl->phydev; mutex_lock(&pl->state_mutex); cur_link_state = phylink_link_is_up(pl); @@ -1626,11 +1631,11 @@ static void phylink_resolve(struct work_struct *w) /* If we have a phy, the "up" state is the union of both the * PHY and the MAC */ - if (pl->phydev) + if (phy) link_state.link &= pl->phy_state.link; /* Only update if the PHY link is up */ - if (pl->phydev && pl->phy_state.link) { + if (phy && pl->phy_state.link) { /* If the interface has changed, force a link down * event if the link isn't already down, and re-resolve. */ @@ -1694,6 +1699,7 @@ static void phylink_resolve(struct work_struct *w) queue_work(system_power_efficient_wq, &pl->resolve); } mutex_unlock(&pl->state_mutex); + mutex_unlock(&pl->phydev_mutex); } static void phylink_run_resolve(struct phylink *pl) @@ -1829,6 +1835,7 @@ struct phylink *phylink_create(struct phylink_config *config, if (!pl) return ERR_PTR(-ENOMEM); + mutex_init(&pl->phydev_mutex); mutex_init(&pl->state_mutex); INIT_WORK(&pl->resolve, phylink_resolve); @@ -2089,6 +2096,7 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy, dev_name(&phy->mdio.dev), phy->drv->name, irq_str); kfree(irq_str); + mutex_lock(&pl->phydev_mutex); mutex_lock(&phy->lock); mutex_lock(&pl->state_mutex); pl->phydev = phy; @@ -2134,6 +2142,7 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy, mutex_unlock(&pl->state_mutex); mutex_unlock(&phy->lock); + mutex_unlock(&pl->phydev_mutex); phylink_dbg(pl, "phy: %s setting supported %*pb advertising %*pb\n", @@ -2312,6 +2321,7 @@ void phylink_disconnect_phy(struct phylink *pl) ASSERT_RTNL(); + mutex_lock(&pl->phydev_mutex); phy = pl->phydev; if (phy) { mutex_lock(&phy->lock); @@ -2321,8 +2331,11 @@ void phylink_disconnect_phy(struct phylink *pl) pl->mac_tx_clk_stop = false; mutex_unlock(&pl->state_mutex); mutex_unlock(&phy->lock); - flush_work(&pl->resolve); + } + mutex_unlock(&pl->phydev_mutex); + if (phy) { + flush_work(&pl->resolve); phy_disconnect(phy); } } -- cgit v1.2.3 From e2a10daba84968f6b5777d150985fd7d6abc9c84 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 4 Sep 2025 15:52:38 +0300 Subject: net: phy: transfer phy_config_inband() locking responsibility to phylink Problem description =================== Lockdep reports a possible circular locking dependency (AB/BA) between &pl->state_mutex and &phy->lock, as follows. phylink_resolve() // acquires &pl->state_mutex -> phylink_major_config() -> phy_config_inband() // acquires &pl->phydev->lock whereas all the other call sites where &pl->state_mutex and &pl->phydev->lock have the locking scheme reversed. Everywhere else, &pl->phydev->lock is acquired at the top level, and &pl->state_mutex at the lower level. A clear example is phylink_bringup_phy(). The outlier is the newly introduced phy_config_inband() and the existing lock order is the correct one. To understand why it cannot be the other way around, it is sufficient to consider phylink_phy_change(), phylink's callback from the PHY device's phy->phy_link_change() virtual method, invoked by the PHY state machine. phy_link_up() and phy_link_down(), the (indirect) callers of phylink_phy_change(), are called with &phydev->lock acquired. Then phylink_phy_change() acquires its own &pl->state_mutex, to serialize changes made to its pl->phy_state and pl->link_config. So all other instances of &pl->state_mutex and &phydev->lock must be consistent with this order. Problem impact ============== I think the kernel runs a serious deadlock risk if an existing phylink_resolve() thread, which results in a phy_config_inband() call, is concurrent with a phy_link_up() or phy_link_down() call, which will deadlock on &pl->state_mutex in phylink_phy_change(). Practically speaking, the impact may be limited by the slow speed of the medium auto-negotiation protocol, which makes it unlikely for the current state to still be unresolved when a new one is detected, but I think the problem is there. Nonetheless, the problem was discovered using lockdep. Proposed solution ================= Practically speaking, the phy_config_inband() requirement of having phydev->lock acquired must transfer to the caller (phylink is the only caller). There, it must bubble up until immediately before &pl->state_mutex is acquired, for the cases where that takes place. Solution details, considerations, notes ======================================= This is the phy_config_inband() call graph: sfp_upstream_ops :: connect_phy() | v phylink_sfp_connect_phy() | v phylink_sfp_config_phy() | | sfp_upstream_ops :: module_insert() | | | v | phylink_sfp_module_insert() | | | | sfp_upstream_ops :: module_start() | | | | | v | | phylink_sfp_module_start() | | | | v v | phylink_sfp_config_optical() phylink_start() | | | phylink_resume() v v | | phylink_sfp_set_config() | | | v v v phylink_mac_initial_config() | phylink_resolve() | | phylink_ethtool_ksettings_set() v v v phylink_major_config() | v phy_config_inband() phylink_major_config() caller #1, phylink_mac_initial_config(), does not acquire &pl->state_mutex nor do its callers. It must acquire &pl->phydev->lock prior to calling phylink_major_config(). phylink_major_config() caller #2, phylink_resolve() acquires &pl->state_mutex, thus also needs to acquire &pl->phydev->lock. phylink_major_config() caller #3, phylink_ethtool_ksettings_set(), is completely uninteresting, because it only calls phylink_major_config() if pl->phydev is NULL (otherwise it calls phy_ethtool_ksettings_set()). We need to change nothing there. Other solutions =============== The lock inversion between &pl->state_mutex and &pl->phydev->lock has occurred at least once before, as seen in commit c718af2d00a3 ("net: phylink: fix ethtool -A with attached PHYs"). The solution there was to simply not call phy_set_asym_pause() under the &pl->state_mutex. That cannot be extended to our case though, where the phy_config_inband() call is much deeper inside the &pl->state_mutex section. Fixes: 5fd0f1a02e75 ("net: phylink: add negotiation of in-band capabilities") Signed-off-by: Vladimir Oltean Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20250904125238.193990-2-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy.c | 12 ++++-------- drivers/net/phy/phylink.c | 9 +++++++++ 2 files changed, 13 insertions(+), 8 deletions(-) (limited to 'drivers/net/phy') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 13df28445f02..c02da57a4da5 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1065,23 +1065,19 @@ EXPORT_SYMBOL_GPL(phy_inband_caps); */ int phy_config_inband(struct phy_device *phydev, unsigned int modes) { - int err; + lockdep_assert_held(&phydev->lock); if (!!(modes & LINK_INBAND_DISABLE) + !!(modes & LINK_INBAND_ENABLE) + !!(modes & LINK_INBAND_BYPASS) != 1) return -EINVAL; - mutex_lock(&phydev->lock); if (!phydev->drv) - err = -EIO; + return -EIO; else if (!phydev->drv->config_inband) - err = -EOPNOTSUPP; - else - err = phydev->drv->config_inband(phydev, modes); - mutex_unlock(&phydev->lock); + return -EOPNOTSUPP; - return err; + return phydev->drv->config_inband(phydev, modes); } EXPORT_SYMBOL(phy_config_inband); diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index aa17ad2622fc..1988b7d2089a 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1434,6 +1434,7 @@ static void phylink_get_fixed_state(struct phylink *pl, static void phylink_mac_initial_config(struct phylink *pl, bool force_restart) { struct phylink_link_state link_state; + struct phy_device *phy = pl->phydev; switch (pl->req_link_an_mode) { case MLO_AN_PHY: @@ -1457,7 +1458,11 @@ static void phylink_mac_initial_config(struct phylink *pl, bool force_restart) link_state.link = false; phylink_apply_manual_flow(pl, &link_state); + if (phy) + mutex_lock(&phy->lock); phylink_major_config(pl, force_restart, &link_state); + if (phy) + mutex_unlock(&phy->lock); } static const char *phylink_pause_to_str(int pause) @@ -1598,6 +1603,8 @@ static void phylink_resolve(struct work_struct *w) mutex_lock(&pl->phydev_mutex); phy = pl->phydev; + if (phy) + mutex_lock(&phy->lock); mutex_lock(&pl->state_mutex); cur_link_state = phylink_link_is_up(pl); @@ -1699,6 +1706,8 @@ static void phylink_resolve(struct work_struct *w) queue_work(system_power_efficient_wq, &pl->resolve); } mutex_unlock(&pl->state_mutex); + if (phy) + mutex_unlock(&phy->lock); mutex_unlock(&pl->phydev_mutex); } -- cgit v1.2.3 From d3b28612bc5500133260aaf36794a0a0c287d61b Mon Sep 17 00:00:00 2001 From: Jonas Rebmann Date: Fri, 5 Sep 2025 14:20:50 +0200 Subject: net: phy: NXP_TJA11XX: Update Kconfig with TJA1102 support Update the Kconfig description to indicate support for the TJA1102. Signed-off-by: Jonas Rebmann Link: https://patch.msgid.link/20250905-tja1102-kconfig-v1-1-a57e6ac4e264@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/phy/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/phy') diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 28acc6392cfc..392749aae54d 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -361,7 +361,7 @@ config NXP_TJA11XX_PHY tristate "NXP TJA11xx PHYs support" depends on HWMON help - Currently supports the NXP TJA1100 and TJA1101 PHY. + Currently supports the NXP TJA1100, TJA1101 and TJA1102 PHYs. config NCN26000_PHY tristate "Onsemi 10BASE-T1S Ethernet PHY" -- cgit v1.2.3 From e0d1c55501d377163eb57feed863777ed1c973ad Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sun, 7 Sep 2025 21:44:01 +0100 Subject: net: phy: fix phy_uses_state_machine() The blamed commit changed the conditions which phylib uses to stop and start the state machine in the suspend and resume paths, and while improving it, has caused two issues. The original code used this test: phydev->attached_dev && phydev->adjust_link and if true, the paths would handle the PHY state machine. This test evaluates true for normal drivers that are using phylib directly while the PHY is attached to the network device, but false in all other cases, which include the following cases: - when the PHY has never been attached to a network device. - when the PHY has been detached from a network device (as phy_detach() sets phydev->attached_dev to NULL, phy_disconnect() calls phy_detach() and additionally sets phydev->adjust_link NULL.) - when phylink is using the driver (as phydev->adjust_link is NULL.) Only the third case was incorrect, and the blamed commit attempted to fix this by changing this test to (simplified for brevity, see phy_uses_state_machine()): phydev->phy_link_change == phy_link_change ? phydev->attached_dev && phydev->adjust_link : true However, this also incorrectly evaluates true in the first two cases. Fix the first case by ensuring that phy_uses_state_machine() returns false when phydev->phy_link_change is NULL. Fix the second case by ensuring that phydev->phy_link_change is set to NULL when phy_detach() is called. Reported-by: Xu Yang Link: https://lore.kernel.org/r/20250806082931.3289134-1-xu.yang_2@nxp.com Fixes: fc75ea20ffb4 ("net: phy: allow MDIO bus PM ops to start/stop state machine for phylink-controlled PHY") Signed-off-by: Russell King (Oracle) Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/E1uvMEz-00000003Aoe-3qWe@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy_device.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/net/phy') diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 7556aa3dd7ee..c82c1997147b 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -287,8 +287,7 @@ static bool phy_uses_state_machine(struct phy_device *phydev) if (phydev->phy_link_change == phy_link_change) return phydev->attached_dev && phydev->adjust_link; - /* phydev->phy_link_change is implicitly phylink_phy_change() */ - return true; + return !!phydev->phy_link_change; } static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) @@ -1864,6 +1863,8 @@ void phy_detach(struct phy_device *phydev) phydev->attached_dev = NULL; phy_link_topo_del_phy(dev, phydev); } + + phydev->phy_link_change = NULL; phydev->phylink = NULL; if (!phydev->is_on_sfp_module) -- cgit v1.2.3