From 6648e613226e18897231ab5e42ffc29e63fa3365 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 4 Apr 2024 10:10:01 +0800 Subject: bpf, skmsg: Fix NULL pointer dereference in sk_psock_skb_ingress_enqueue Fix NULL pointer data-races in sk_psock_skb_ingress_enqueue() which syzbot reported [1]. [1] BUG: KCSAN: data-race in sk_psock_drop / sk_psock_skb_ingress_enqueue write to 0xffff88814b3278b8 of 8 bytes by task 10724 on cpu 1: sk_psock_stop_verdict net/core/skmsg.c:1257 [inline] sk_psock_drop+0x13e/0x1f0 net/core/skmsg.c:843 sk_psock_put include/linux/skmsg.h:459 [inline] sock_map_close+0x1a7/0x260 net/core/sock_map.c:1648 unix_release+0x4b/0x80 net/unix/af_unix.c:1048 __sock_release net/socket.c:659 [inline] sock_close+0x68/0x150 net/socket.c:1421 __fput+0x2c1/0x660 fs/file_table.c:422 __fput_sync+0x44/0x60 fs/file_table.c:507 __do_sys_close fs/open.c:1556 [inline] __se_sys_close+0x101/0x1b0 fs/open.c:1541 __x64_sys_close+0x1f/0x30 fs/open.c:1541 do_syscall_64+0xd3/0x1d0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 read to 0xffff88814b3278b8 of 8 bytes by task 10713 on cpu 0: sk_psock_data_ready include/linux/skmsg.h:464 [inline] sk_psock_skb_ingress_enqueue+0x32d/0x390 net/core/skmsg.c:555 sk_psock_skb_ingress_self+0x185/0x1e0 net/core/skmsg.c:606 sk_psock_verdict_apply net/core/skmsg.c:1008 [inline] sk_psock_verdict_recv+0x3e4/0x4a0 net/core/skmsg.c:1202 unix_read_skb net/unix/af_unix.c:2546 [inline] unix_stream_read_skb+0x9e/0xf0 net/unix/af_unix.c:2682 sk_psock_verdict_data_ready+0x77/0x220 net/core/skmsg.c:1223 unix_stream_sendmsg+0x527/0x860 net/unix/af_unix.c:2339 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x140/0x180 net/socket.c:745 ____sys_sendmsg+0x312/0x410 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x1e9/0x280 net/socket.c:2667 __do_sys_sendmsg net/socket.c:2676 [inline] __se_sys_sendmsg net/socket.c:2674 [inline] __x64_sys_sendmsg+0x46/0x50 net/socket.c:2674 do_syscall_64+0xd3/0x1d0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 value changed: 0xffffffff83d7feb0 -> 0x0000000000000000 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 10713 Comm: syz-executor.4 Tainted: G W 6.8.0-syzkaller-08951-gfe46a7dd189e #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/29/2024 Prior to this, commit 4cd12c6065df ("bpf, sockmap: Fix NULL pointer dereference in sk_psock_verdict_data_ready()") fixed one NULL pointer similarly due to no protection of saved_data_ready. Here is another different caller causing the same issue because of the same reason. So we should protect it with sk_callback_lock read lock because the writer side in the sk_psock_drop() uses "write_lock_bh(&sk->sk_callback_lock);". To avoid errors that could happen in future, I move those two pairs of lock into the sk_psock_data_ready(), which is suggested by John Fastabend. Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") Reported-by: syzbot+aa8c8ec2538929f18f2d@syzkaller.appspotmail.com Signed-off-by: Jason Xing Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Closes: https://syzkaller.appspot.com/bug?extid=aa8c8ec2538929f18f2d Link: https://lore.kernel.org/all/20240329134037.92124-1-kerneljasonxing@gmail.com Link: https://lore.kernel.org/bpf/20240404021001.94815-1-kerneljasonxing@gmail.com --- include/linux/skmsg.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index e65ec3fd2799..a509caf823d6 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -461,10 +461,12 @@ static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock) static inline void sk_psock_data_ready(struct sock *sk, struct sk_psock *psock) { + read_lock_bh(&sk->sk_callback_lock); if (psock->saved_data_ready) psock->saved_data_ready(sk); else sk->sk_data_ready(sk); + read_unlock_bh(&sk->sk_callback_lock); } static inline void psock_set_prog(struct bpf_prog **pprog, -- cgit v1.2.3 From 70ee853eec5693fefd8348a2b049d9cb83362e58 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 8 Apr 2024 11:18:00 +0100 Subject: regmap: Add regmap_read_bypassed() Add a regmap_read_bypassed() to allow reads from the hardware registers while the regmap is in cache-only mode. A typical use for this is to keep the cache in cache-only mode until the hardware has reached a valid state, but one or more status registers must be polled to determine when this state is reached. For example, firmware download on the cs35l56 can take several seconds if there are multiple amps sharing limited bus bandwidth. This is too long to block in probe() so it is done as a background task. The device must be soft-reset to reboot the firmware and during this time the registers are not accessible, so the cache should be in cache-only. But the driver must poll a register to detect when reboot has completed. Signed-off-by: Richard Fitzgerald Fixes: 8a731fd37f8b ("ASoC: cs35l56: Move utility functions to shared file") Link: https://msgid.link/r/20240408101803.43183-2-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 37 +++++++++++++++++++++++++++++++++++++ include/linux/regmap.h | 8 ++++++++ 2 files changed, 45 insertions(+) (limited to 'include') diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 5cb425f6f02d..0a34dd3c4f38 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -2838,6 +2838,43 @@ int regmap_read(struct regmap *map, unsigned int reg, unsigned int *val) } EXPORT_SYMBOL_GPL(regmap_read); +/** + * regmap_read_bypassed() - Read a value from a single register direct + * from the device, bypassing the cache + * + * @map: Register map to read from + * @reg: Register to be read from + * @val: Pointer to store read value + * + * A value of zero will be returned on success, a negative errno will + * be returned in error cases. + */ +int regmap_read_bypassed(struct regmap *map, unsigned int reg, unsigned int *val) +{ + int ret; + bool bypass, cache_only; + + if (!IS_ALIGNED(reg, map->reg_stride)) + return -EINVAL; + + map->lock(map->lock_arg); + + bypass = map->cache_bypass; + cache_only = map->cache_only; + map->cache_bypass = true; + map->cache_only = false; + + ret = _regmap_read(map, reg, val); + + map->cache_bypass = bypass; + map->cache_only = cache_only; + + map->unlock(map->lock_arg); + + return ret; +} +EXPORT_SYMBOL_GPL(regmap_read_bypassed); + /** * regmap_raw_read() - Read raw data from the device * diff --git a/include/linux/regmap.h b/include/linux/regmap.h index b743241cfb7c..d470303b1bbb 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1230,6 +1230,7 @@ int regmap_multi_reg_write_bypassed(struct regmap *map, int regmap_raw_write_async(struct regmap *map, unsigned int reg, const void *val, size_t val_len); int regmap_read(struct regmap *map, unsigned int reg, unsigned int *val); +int regmap_read_bypassed(struct regmap *map, unsigned int reg, unsigned int *val); int regmap_raw_read(struct regmap *map, unsigned int reg, void *val, size_t val_len); int regmap_noinc_read(struct regmap *map, unsigned int reg, @@ -1739,6 +1740,13 @@ static inline int regmap_read(struct regmap *map, unsigned int reg, return -EINVAL; } +static inline int regmap_read_bypassed(struct regmap *map, unsigned int reg, + unsigned int *val) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + static inline int regmap_raw_read(struct regmap *map, unsigned int reg, void *val, size_t val_len) { -- cgit v1.2.3 From dfd2ffb373999630a14d7ff614440f1c2fcc704c Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 8 Apr 2024 11:18:03 +0100 Subject: ASoC: cs35l56: Prevent overwriting firmware ASP config Only populate the ASP1 config registers in the regmap cache if the ASP DAI is used. This prevents regcache_sync() from overwriting these registers with their defaults when the firmware owns control of these registers. On a SoundWire system the ASP could be owned by the firmware to share reference audio with the firmware on other cs35l56. Or it can be used as a normal codec-codec interface owned by the driver. The driver must not overwrite the registers if the firmware has control of them. The original implementation for this in commit 07f7d6e7a124 ("ASoC: cs35l56: Fix for initializing ASP1 mixer registers") was to still provide defaults for these registers, assuming that if they were never reconfigured from defaults then regcache_sync() would not write them out because they are not dirty. Unfortunately regcache_sync() is not that smart. If the chip has not reset (so the driver has not called regcache_mark_dirty()) a regcache_sync() could write out registers that are not dirty. To avoid accidental overwriting of the ASP registers, they are removed from the table of defaults and instead are populated with defaults only if one of the ASP DAI configuration functions is called. So if the DAI has never been configured, the firmware is assumed to have ownership of these registers, and the regmap cache will not contain any entries for them. Signed-off-by: Richard Fitzgerald Fixes: 07f7d6e7a124 ("ASoC: cs35l56: Fix for initializing ASP1 mixer registers") Link: https://msgid.link/r/20240408101803.43183-5-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 2 ++ sound/soc/codecs/cs35l56-shared.c | 63 ++++++++++++++++++++++++++------------- sound/soc/codecs/cs35l56.c | 24 ++++++++++++++- 3 files changed, 67 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index e0629699b563..1a3c6f66f620 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -267,6 +267,7 @@ struct cs35l56_base { bool fw_patched; bool secured; bool can_hibernate; + bool fw_owns_asp1; bool cal_data_valid; s8 cal_index; struct cirrus_amp_cal_data cal_data; @@ -283,6 +284,7 @@ extern const char * const cs35l56_tx_input_texts[CS35L56_NUM_INPUT_SRC]; extern const unsigned int cs35l56_tx_input_values[CS35L56_NUM_INPUT_SRC]; int cs35l56_set_patch(struct cs35l56_base *cs35l56_base); +int cs35l56_init_asp1_regs_for_driver_control(struct cs35l56_base *cs35l56_base); int cs35l56_force_sync_asp1_registers_from_cache(struct cs35l56_base *cs35l56_base); int cs35l56_mbox_send(struct cs35l56_base *cs35l56_base, unsigned int command); int cs35l56_firmware_shutdown(struct cs35l56_base *cs35l56_base); diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index a83317db75ed..ec1d95e57061 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -40,16 +40,11 @@ EXPORT_SYMBOL_NS_GPL(cs35l56_set_patch, SND_SOC_CS35L56_SHARED); static const struct reg_default cs35l56_reg_defaults[] = { /* no defaults for OTP_MEM - first read populates cache */ - { CS35L56_ASP1_ENABLES1, 0x00000000 }, - { CS35L56_ASP1_CONTROL1, 0x00000028 }, - { CS35L56_ASP1_CONTROL2, 0x18180200 }, - { CS35L56_ASP1_CONTROL3, 0x00000002 }, - { CS35L56_ASP1_FRAME_CONTROL1, 0x03020100 }, - { CS35L56_ASP1_FRAME_CONTROL5, 0x00020100 }, - { CS35L56_ASP1_DATA_CONTROL1, 0x00000018 }, - { CS35L56_ASP1_DATA_CONTROL5, 0x00000018 }, - - /* no defaults for ASP1TX mixer */ + /* + * No defaults for ASP1 control or ASP1TX mixer. See + * cs35l56_populate_asp1_register_defaults() and + * cs35l56_sync_asp1_mixer_widgets_with_firmware(). + */ { CS35L56_SWIRE_DP3_CH1_INPUT, 0x00000018 }, { CS35L56_SWIRE_DP3_CH2_INPUT, 0x00000019 }, @@ -210,6 +205,36 @@ static bool cs35l56_volatile_reg(struct device *dev, unsigned int reg) } } +static const struct reg_sequence cs35l56_asp1_defaults[] = { + REG_SEQ0(CS35L56_ASP1_ENABLES1, 0x00000000), + REG_SEQ0(CS35L56_ASP1_CONTROL1, 0x00000028), + REG_SEQ0(CS35L56_ASP1_CONTROL2, 0x18180200), + REG_SEQ0(CS35L56_ASP1_CONTROL3, 0x00000002), + REG_SEQ0(CS35L56_ASP1_FRAME_CONTROL1, 0x03020100), + REG_SEQ0(CS35L56_ASP1_FRAME_CONTROL5, 0x00020100), + REG_SEQ0(CS35L56_ASP1_DATA_CONTROL1, 0x00000018), + REG_SEQ0(CS35L56_ASP1_DATA_CONTROL5, 0x00000018), +}; + +/* + * The firmware can have control of the ASP so we don't provide regmap + * with defaults for these registers, to prevent a regcache_sync() from + * overwriting the firmware settings. But if the machine driver hooks up + * the ASP it means the driver is taking control of the ASP, so then the + * registers are populated with the defaults. + */ +int cs35l56_init_asp1_regs_for_driver_control(struct cs35l56_base *cs35l56_base) +{ + if (!cs35l56_base->fw_owns_asp1) + return 0; + + cs35l56_base->fw_owns_asp1 = false; + + return regmap_multi_reg_write(cs35l56_base->regmap, cs35l56_asp1_defaults, + ARRAY_SIZE(cs35l56_asp1_defaults)); +} +EXPORT_SYMBOL_NS_GPL(cs35l56_init_asp1_regs_for_driver_control, SND_SOC_CS35L56_SHARED); + /* * The firmware boot sequence can overwrite the ASP1 config registers so that * they don't match regmap's view of their values. Rewrite the values from the @@ -217,19 +242,15 @@ static bool cs35l56_volatile_reg(struct device *dev, unsigned int reg) */ int cs35l56_force_sync_asp1_registers_from_cache(struct cs35l56_base *cs35l56_base) { - struct reg_sequence asp1_regs[] = { - { .reg = CS35L56_ASP1_ENABLES1 }, - { .reg = CS35L56_ASP1_CONTROL1 }, - { .reg = CS35L56_ASP1_CONTROL2 }, - { .reg = CS35L56_ASP1_CONTROL3 }, - { .reg = CS35L56_ASP1_FRAME_CONTROL1 }, - { .reg = CS35L56_ASP1_FRAME_CONTROL5 }, - { .reg = CS35L56_ASP1_DATA_CONTROL1 }, - { .reg = CS35L56_ASP1_DATA_CONTROL5 }, - }; + struct reg_sequence asp1_regs[ARRAY_SIZE(cs35l56_asp1_defaults)]; int i, ret; - /* Read values from regmap cache into a write sequence */ + if (cs35l56_base->fw_owns_asp1) + return 0; + + memcpy(asp1_regs, cs35l56_asp1_defaults, sizeof(asp1_regs)); + + /* Read current values from regmap cache into the write sequence */ for (i = 0; i < ARRAY_SIZE(asp1_regs); ++i) { ret = regmap_read(cs35l56_base->regmap, asp1_regs[i].reg, &asp1_regs[i].def); if (ret) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 5a4e0e479414..6331b8c6136e 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -454,9 +454,14 @@ static int cs35l56_asp_dai_set_fmt(struct snd_soc_dai *codec_dai, unsigned int f { struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(codec_dai->component); unsigned int val; + int ret; dev_dbg(cs35l56->base.dev, "%s: %#x\n", __func__, fmt); + ret = cs35l56_init_asp1_regs_for_driver_control(&cs35l56->base); + if (ret) + return ret; + switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { case SND_SOC_DAIFMT_CBC_CFC: break; @@ -530,6 +535,11 @@ static int cs35l56_asp_dai_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx unsigned int rx_mask, int slots, int slot_width) { struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(dai->component); + int ret; + + ret = cs35l56_init_asp1_regs_for_driver_control(&cs35l56->base); + if (ret) + return ret; if ((slots == 0) || (slot_width == 0)) { dev_dbg(cs35l56->base.dev, "tdm config cleared\n"); @@ -578,6 +588,11 @@ static int cs35l56_asp_dai_hw_params(struct snd_pcm_substream *substream, struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(dai->component); unsigned int rate = params_rate(params); u8 asp_width, asp_wl; + int ret; + + ret = cs35l56_init_asp1_regs_for_driver_control(&cs35l56->base); + if (ret) + return ret; asp_wl = params_width(params); if (cs35l56->asp_slot_width) @@ -634,7 +649,11 @@ static int cs35l56_asp_dai_set_sysclk(struct snd_soc_dai *dai, int clk_id, unsigned int freq, int dir) { struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(dai->component); - int freq_id; + int freq_id, ret; + + ret = cs35l56_init_asp1_regs_for_driver_control(&cs35l56->base); + if (ret) + return ret; if (freq == 0) { cs35l56->sysclk_set = false; @@ -1403,6 +1422,9 @@ int cs35l56_common_probe(struct cs35l56_private *cs35l56) cs35l56->base.cal_index = -1; cs35l56->speaker_id = -ENOENT; + /* Assume that the firmware owns ASP1 until we know different */ + cs35l56->base.fw_owns_asp1 = true; + dev_set_drvdata(cs35l56->base.dev, cs35l56); cs35l56_fill_supply_names(cs35l56->supplies); -- cgit v1.2.3 From ab9177d83c040eba58387914077ebca56f14fae6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 26 Mar 2024 22:08:54 +0100 Subject: wifi: mac80211: don't use rate mask for scanning The rate mask is intended for use during operation, and can be set to only have masks for the currently active band. As such, it cannot be used for scanning which can be on other bands as well. Simply ignore the rate masks during scanning to avoid warnings from incorrect settings. Reported-by: syzbot+fdc5123366fb9c3fdc6d@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=fdc5123366fb9c3fdc6d Co-developed-by: Dmitry Antipov Signed-off-by: Dmitry Antipov Tested-by: Dmitry Antipov Link: https://msgid.link/20240326220854.9594cbb418ca.I7f86c0ba1f98cf7e27c2bacf6c2d417200ecea5c@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 3 +++ net/mac80211/rate.c | 6 +++++- net/mac80211/scan.c | 1 + net/mac80211/tx.c | 13 +++++++++---- 4 files changed, 18 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 353488ab94a2..2d7f87bc5324 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -953,6 +953,8 @@ enum mac80211_tx_info_flags { * of their QoS TID or other priority field values. * @IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX: first MLO TX, used mostly internally * for sequence number assignment + * @IEEE80211_TX_CTRL_SCAN_TX: Indicates that this frame is transmitted + * due to scanning, not in normal operation on the interface. * @IEEE80211_TX_CTRL_MLO_LINK: If not @IEEE80211_LINK_UNSPECIFIED, this * frame should be transmitted on the specific link. This really is * only relevant for frames that do not have data present, and is @@ -973,6 +975,7 @@ enum mac80211_tx_control_flags { IEEE80211_TX_CTRL_NO_SEQNO = BIT(7), IEEE80211_TX_CTRL_DONT_REORDER = BIT(8), IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX = BIT(9), + IEEE80211_TX_CTRL_SCAN_TX = BIT(10), IEEE80211_TX_CTRL_MLO_LINK = 0xf0000000, }; diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 23404b275457..4dc1def69548 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -877,6 +877,7 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif, struct ieee80211_sub_if_data *sdata; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_supported_band *sband; + u32 mask = ~0; rate_control_fill_sta_table(sta, info, dest, max_rates); @@ -889,9 +890,12 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif, if (ieee80211_is_tx_data(skb)) rate_control_apply_mask(sdata, sta, sband, dest, max_rates); + if (!(info->control.flags & IEEE80211_TX_CTRL_SCAN_TX)) + mask = sdata->rc_rateidx_mask[info->band]; + if (dest[0].idx < 0) __rate_control_send_low(&sdata->local->hw, sband, sta, info, - sdata->rc_rateidx_mask[info->band]); + mask); if (sta) rate_fixup_ratelist(vif, sband, info, dest, max_rates); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 0429e59ba387..73850312580f 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -648,6 +648,7 @@ static void ieee80211_send_scan_probe_req(struct ieee80211_sub_if_data *sdata, cpu_to_le16(IEEE80211_SN_TO_SEQ(sn)); } IEEE80211_SKB_CB(skb)->flags |= tx_flags; + IEEE80211_SKB_CB(skb)->control.flags |= IEEE80211_TX_CTRL_SCAN_TX; ieee80211_tx_skb_tid_band(sdata, skb, 7, channel->band); } } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 6bf223e6cd1a..cfd0a62d0152 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -698,11 +698,16 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) txrc.bss_conf = &tx->sdata->vif.bss_conf; txrc.skb = tx->skb; txrc.reported_rate.idx = -1; - txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[info->band]; - if (tx->sdata->rc_has_mcs_mask[info->band]) - txrc.rate_idx_mcs_mask = - tx->sdata->rc_rateidx_mcs_mask[info->band]; + if (unlikely(info->control.flags & IEEE80211_TX_CTRL_SCAN_TX)) { + txrc.rate_idx_mask = ~0; + } else { + txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[info->band]; + + if (tx->sdata->rc_has_mcs_mask[info->band]) + txrc.rate_idx_mcs_mask = + tx->sdata->rc_rateidx_mcs_mask[info->band]; + } txrc.bss = (tx->sdata->vif.type == NL80211_IFTYPE_AP || tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT || -- cgit v1.2.3 From ed09f81eeaa8f9265e1787282cb283f10285c259 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sat, 6 Apr 2024 15:01:09 +0200 Subject: firmware: qcom: uefisecapp: Fix memory related IO errors and crashes It turns out that while the QSEECOM APP_SEND command has specific fields for request and response buffers, uefisecapp expects them both to be in a single memory region. Failure to adhere to this has (so far) resulted in either no response being written to the response buffer (causing an EIO to be emitted down the line), the SCM call to fail with EINVAL (i.e., directly from TZ/firmware), or the device to be hard-reset. While this issue can be triggered deterministically, in the current form it seems to happen rather sporadically (which is why it has gone unnoticed during earlier testing). This is likely due to the two kzalloc() calls (for request and response) being directly after each other. Which means that those likely return consecutive regions most of the time, especially when not much else is going on in the system. Fix this by allocating a single memory region for both request and response buffers, properly aligning both structs inside it. This unfortunately also means that the qcom_scm_qseecom_app_send() interface needs to be restructured, as it should no longer map the DMA regions separately. Therefore, move the responsibility of DMA allocation (or mapping) to the caller. Fixes: 759e7a2b62eb ("firmware: Add support for Qualcomm UEFI Secure Application") Cc: stable@vger.kernel.org # 6.7 Tested-by: Johan Hovold Reviewed-by: Johan Hovold Signed-off-by: Maximilian Luz Tested-by: Konrad Dybcio # X13s Link: https://lore.kernel.org/r/20240406130125.1047436-1-luzmaximilian@gmail.com Signed-off-by: Bjorn Andersson --- drivers/firmware/qcom/qcom_qseecom_uefisecapp.c | 137 ++++++++++++++++-------- drivers/firmware/qcom/qcom_scm.c | 37 ++----- include/linux/firmware/qcom/qcom_qseecom.h | 55 +++++++++- include/linux/firmware/qcom/qcom_scm.h | 10 +- 4 files changed, 153 insertions(+), 86 deletions(-) (limited to 'include') diff --git a/drivers/firmware/qcom/qcom_qseecom_uefisecapp.c b/drivers/firmware/qcom/qcom_qseecom_uefisecapp.c index 32188f098ef3..bc550ad0dbe0 100644 --- a/drivers/firmware/qcom/qcom_qseecom_uefisecapp.c +++ b/drivers/firmware/qcom/qcom_qseecom_uefisecapp.c @@ -221,6 +221,19 @@ struct qsee_rsp_uefi_query_variable_info { * alignment of 8 bytes (64 bits) for GUIDs. Our definition of efi_guid_t, * however, has an alignment of 4 byte (32 bits). So far, this seems to work * fine here. See also the comment on the typedef of efi_guid_t. + * + * Note: It looks like uefisecapp is quite picky about how the memory passed to + * it is structured and aligned. In particular the request/response setup used + * for QSEE_CMD_UEFI_GET_VARIABLE. While qcom_qseecom_app_send(), in theory, + * accepts separate buffers/addresses for the request and response parts, in + * practice, however, it seems to expect them to be both part of a larger + * contiguous block. We initially allocated separate buffers for the request + * and response but this caused the QSEE_CMD_UEFI_GET_VARIABLE command to + * either not write any response to the response buffer or outright crash the + * device. Therefore, we now allocate a single contiguous block of DMA memory + * for both and properly align the data using the macros below. In particular, + * request and response structs are aligned at 8 byte (via __reqdata_offs()), + * following the driver that this has been reverse-engineered from. */ #define qcuefi_buf_align_fields(fields...) \ ({ \ @@ -244,6 +257,12 @@ struct qsee_rsp_uefi_query_variable_info { #define __array_offs(type, count, offset) \ __field_impl(sizeof(type) * (count), __alignof__(type), offset) +#define __array_offs_aligned(type, count, align, offset) \ + __field_impl(sizeof(type) * (count), align, offset) + +#define __reqdata_offs(size, offset) \ + __array_offs_aligned(u8, size, 8, offset) + #define __array(type, count) __array_offs(type, count, NULL) #define __field_offs(type, offset) __array_offs(type, 1, offset) #define __field(type) __array_offs(type, 1, NULL) @@ -277,10 +296,15 @@ static efi_status_t qsee_uefi_get_variable(struct qcuefi_client *qcuefi, const e unsigned long buffer_size = *data_size; efi_status_t efi_status = EFI_SUCCESS; unsigned long name_length; + dma_addr_t cmd_buf_dma; + size_t cmd_buf_size; + void *cmd_buf; size_t guid_offs; size_t name_offs; size_t req_size; size_t rsp_size; + size_t req_offs; + size_t rsp_offs; ssize_t status; if (!name || !guid) @@ -304,17 +328,19 @@ static efi_status_t qsee_uefi_get_variable(struct qcuefi_client *qcuefi, const e __array(u8, buffer_size) ); - req_data = kzalloc(req_size, GFP_KERNEL); - if (!req_data) { + cmd_buf_size = qcuefi_buf_align_fields( + __reqdata_offs(req_size, &req_offs) + __reqdata_offs(rsp_size, &rsp_offs) + ); + + cmd_buf = qseecom_dma_alloc(qcuefi->client, cmd_buf_size, &cmd_buf_dma, GFP_KERNEL); + if (!cmd_buf) { efi_status = EFI_OUT_OF_RESOURCES; goto out; } - rsp_data = kzalloc(rsp_size, GFP_KERNEL); - if (!rsp_data) { - efi_status = EFI_OUT_OF_RESOURCES; - goto out_free_req; - } + req_data = cmd_buf + req_offs; + rsp_data = cmd_buf + rsp_offs; req_data->command_id = QSEE_CMD_UEFI_GET_VARIABLE; req_data->data_size = buffer_size; @@ -332,7 +358,9 @@ static efi_status_t qsee_uefi_get_variable(struct qcuefi_client *qcuefi, const e memcpy(((void *)req_data) + req_data->guid_offset, guid, req_data->guid_size); - status = qcom_qseecom_app_send(qcuefi->client, req_data, req_size, rsp_data, rsp_size); + status = qcom_qseecom_app_send(qcuefi->client, + cmd_buf_dma + req_offs, req_size, + cmd_buf_dma + rsp_offs, rsp_size); if (status) { efi_status = EFI_DEVICE_ERROR; goto out_free; @@ -407,9 +435,7 @@ static efi_status_t qsee_uefi_get_variable(struct qcuefi_client *qcuefi, const e memcpy(data, ((void *)rsp_data) + rsp_data->data_offset, rsp_data->data_size); out_free: - kfree(rsp_data); -out_free_req: - kfree(req_data); + qseecom_dma_free(qcuefi->client, cmd_buf_size, cmd_buf, cmd_buf_dma); out: return efi_status; } @@ -422,10 +448,15 @@ static efi_status_t qsee_uefi_set_variable(struct qcuefi_client *qcuefi, const e struct qsee_rsp_uefi_set_variable *rsp_data; efi_status_t efi_status = EFI_SUCCESS; unsigned long name_length; + dma_addr_t cmd_buf_dma; + size_t cmd_buf_size; + void *cmd_buf; size_t name_offs; size_t guid_offs; size_t data_offs; size_t req_size; + size_t req_offs; + size_t rsp_offs; ssize_t status; if (!name || !guid) @@ -450,17 +481,19 @@ static efi_status_t qsee_uefi_set_variable(struct qcuefi_client *qcuefi, const e __array_offs(u8, data_size, &data_offs) ); - req_data = kzalloc(req_size, GFP_KERNEL); - if (!req_data) { + cmd_buf_size = qcuefi_buf_align_fields( + __reqdata_offs(req_size, &req_offs) + __reqdata_offs(sizeof(*rsp_data), &rsp_offs) + ); + + cmd_buf = qseecom_dma_alloc(qcuefi->client, cmd_buf_size, &cmd_buf_dma, GFP_KERNEL); + if (!cmd_buf) { efi_status = EFI_OUT_OF_RESOURCES; goto out; } - rsp_data = kzalloc(sizeof(*rsp_data), GFP_KERNEL); - if (!rsp_data) { - efi_status = EFI_OUT_OF_RESOURCES; - goto out_free_req; - } + req_data = cmd_buf + req_offs; + rsp_data = cmd_buf + rsp_offs; req_data->command_id = QSEE_CMD_UEFI_SET_VARIABLE; req_data->attributes = attributes; @@ -483,8 +516,9 @@ static efi_status_t qsee_uefi_set_variable(struct qcuefi_client *qcuefi, const e if (data_size) memcpy(((void *)req_data) + req_data->data_offset, data, req_data->data_size); - status = qcom_qseecom_app_send(qcuefi->client, req_data, req_size, rsp_data, - sizeof(*rsp_data)); + status = qcom_qseecom_app_send(qcuefi->client, + cmd_buf_dma + req_offs, req_size, + cmd_buf_dma + rsp_offs, sizeof(*rsp_data)); if (status) { efi_status = EFI_DEVICE_ERROR; goto out_free; @@ -507,9 +541,7 @@ static efi_status_t qsee_uefi_set_variable(struct qcuefi_client *qcuefi, const e } out_free: - kfree(rsp_data); -out_free_req: - kfree(req_data); + qseecom_dma_free(qcuefi->client, cmd_buf_size, cmd_buf, cmd_buf_dma); out: return efi_status; } @@ -521,10 +553,15 @@ static efi_status_t qsee_uefi_get_next_variable(struct qcuefi_client *qcuefi, struct qsee_req_uefi_get_next_variable *req_data; struct qsee_rsp_uefi_get_next_variable *rsp_data; efi_status_t efi_status = EFI_SUCCESS; + dma_addr_t cmd_buf_dma; + size_t cmd_buf_size; + void *cmd_buf; size_t guid_offs; size_t name_offs; size_t req_size; size_t rsp_size; + size_t req_offs; + size_t rsp_offs; ssize_t status; if (!name_size || !name || !guid) @@ -545,17 +582,19 @@ static efi_status_t qsee_uefi_get_next_variable(struct qcuefi_client *qcuefi, __array(*name, *name_size / sizeof(*name)) ); - req_data = kzalloc(req_size, GFP_KERNEL); - if (!req_data) { + cmd_buf_size = qcuefi_buf_align_fields( + __reqdata_offs(req_size, &req_offs) + __reqdata_offs(rsp_size, &rsp_offs) + ); + + cmd_buf = qseecom_dma_alloc(qcuefi->client, cmd_buf_size, &cmd_buf_dma, GFP_KERNEL); + if (!cmd_buf) { efi_status = EFI_OUT_OF_RESOURCES; goto out; } - rsp_data = kzalloc(rsp_size, GFP_KERNEL); - if (!rsp_data) { - efi_status = EFI_OUT_OF_RESOURCES; - goto out_free_req; - } + req_data = cmd_buf + req_offs; + rsp_data = cmd_buf + rsp_offs; req_data->command_id = QSEE_CMD_UEFI_GET_NEXT_VARIABLE; req_data->guid_offset = guid_offs; @@ -572,7 +611,9 @@ static efi_status_t qsee_uefi_get_next_variable(struct qcuefi_client *qcuefi, goto out_free; } - status = qcom_qseecom_app_send(qcuefi->client, req_data, req_size, rsp_data, rsp_size); + status = qcom_qseecom_app_send(qcuefi->client, + cmd_buf_dma + req_offs, req_size, + cmd_buf_dma + rsp_offs, rsp_size); if (status) { efi_status = EFI_DEVICE_ERROR; goto out_free; @@ -645,9 +686,7 @@ static efi_status_t qsee_uefi_get_next_variable(struct qcuefi_client *qcuefi, } out_free: - kfree(rsp_data); -out_free_req: - kfree(req_data); + qseecom_dma_free(qcuefi->client, cmd_buf_size, cmd_buf, cmd_buf_dma); out: return efi_status; } @@ -659,26 +698,34 @@ static efi_status_t qsee_uefi_query_variable_info(struct qcuefi_client *qcuefi, struct qsee_req_uefi_query_variable_info *req_data; struct qsee_rsp_uefi_query_variable_info *rsp_data; efi_status_t efi_status = EFI_SUCCESS; + dma_addr_t cmd_buf_dma; + size_t cmd_buf_size; + void *cmd_buf; + size_t req_offs; + size_t rsp_offs; int status; - req_data = kzalloc(sizeof(*req_data), GFP_KERNEL); - if (!req_data) { + cmd_buf_size = qcuefi_buf_align_fields( + __reqdata_offs(sizeof(*req_data), &req_offs) + __reqdata_offs(sizeof(*rsp_data), &rsp_offs) + ); + + cmd_buf = qseecom_dma_alloc(qcuefi->client, cmd_buf_size, &cmd_buf_dma, GFP_KERNEL); + if (!cmd_buf) { efi_status = EFI_OUT_OF_RESOURCES; goto out; } - rsp_data = kzalloc(sizeof(*rsp_data), GFP_KERNEL); - if (!rsp_data) { - efi_status = EFI_OUT_OF_RESOURCES; - goto out_free_req; - } + req_data = cmd_buf + req_offs; + rsp_data = cmd_buf + rsp_offs; req_data->command_id = QSEE_CMD_UEFI_QUERY_VARIABLE_INFO; req_data->attributes = attr; req_data->length = sizeof(*req_data); - status = qcom_qseecom_app_send(qcuefi->client, req_data, sizeof(*req_data), rsp_data, - sizeof(*rsp_data)); + status = qcom_qseecom_app_send(qcuefi->client, + cmd_buf_dma + req_offs, sizeof(*req_data), + cmd_buf_dma + rsp_offs, sizeof(*rsp_data)); if (status) { efi_status = EFI_DEVICE_ERROR; goto out_free; @@ -711,9 +758,7 @@ static efi_status_t qsee_uefi_query_variable_info(struct qcuefi_client *qcuefi, *max_variable_size = rsp_data->max_variable_size; out_free: - kfree(rsp_data); -out_free_req: - kfree(req_data); + qseecom_dma_free(qcuefi->client, cmd_buf_size, cmd_buf, cmd_buf_dma); out: return efi_status; } diff --git a/drivers/firmware/qcom/qcom_scm.c b/drivers/firmware/qcom/qcom_scm.c index 520de9b5633a..90283f160a22 100644 --- a/drivers/firmware/qcom/qcom_scm.c +++ b/drivers/firmware/qcom/qcom_scm.c @@ -1576,9 +1576,9 @@ EXPORT_SYMBOL_GPL(qcom_scm_qseecom_app_get_id); /** * qcom_scm_qseecom_app_send() - Send to and receive data from a given QSEE app. * @app_id: The ID of the target app. - * @req: Request buffer sent to the app (must be DMA-mappable). + * @req: DMA address of the request buffer sent to the app. * @req_size: Size of the request buffer. - * @rsp: Response buffer, written to by the app (must be DMA-mappable). + * @rsp: DMA address of the response buffer, written to by the app. * @rsp_size: Size of the response buffer. * * Sends a request to the QSEE app associated with the given ID and read back @@ -1589,33 +1589,13 @@ EXPORT_SYMBOL_GPL(qcom_scm_qseecom_app_get_id); * * Return: Zero on success, nonzero on failure. */ -int qcom_scm_qseecom_app_send(u32 app_id, void *req, size_t req_size, void *rsp, - size_t rsp_size) +int qcom_scm_qseecom_app_send(u32 app_id, dma_addr_t req, size_t req_size, + dma_addr_t rsp, size_t rsp_size) { struct qcom_scm_qseecom_resp res = {}; struct qcom_scm_desc desc = {}; - dma_addr_t req_phys; - dma_addr_t rsp_phys; int status; - /* Map request buffer */ - req_phys = dma_map_single(__scm->dev, req, req_size, DMA_TO_DEVICE); - status = dma_mapping_error(__scm->dev, req_phys); - if (status) { - dev_err(__scm->dev, "qseecom: failed to map request buffer\n"); - return status; - } - - /* Map response buffer */ - rsp_phys = dma_map_single(__scm->dev, rsp, rsp_size, DMA_FROM_DEVICE); - status = dma_mapping_error(__scm->dev, rsp_phys); - if (status) { - dma_unmap_single(__scm->dev, req_phys, req_size, DMA_TO_DEVICE); - dev_err(__scm->dev, "qseecom: failed to map response buffer\n"); - return status; - } - - /* Set up SCM call data */ desc.owner = QSEECOM_TZ_OWNER_TZ_APPS; desc.svc = QSEECOM_TZ_SVC_APP_ID_PLACEHOLDER; desc.cmd = QSEECOM_TZ_CMD_APP_SEND; @@ -1623,18 +1603,13 @@ int qcom_scm_qseecom_app_send(u32 app_id, void *req, size_t req_size, void *rsp, QCOM_SCM_RW, QCOM_SCM_VAL, QCOM_SCM_RW, QCOM_SCM_VAL); desc.args[0] = app_id; - desc.args[1] = req_phys; + desc.args[1] = req; desc.args[2] = req_size; - desc.args[3] = rsp_phys; + desc.args[3] = rsp; desc.args[4] = rsp_size; - /* Perform call */ status = qcom_scm_qseecom_call(&desc, &res); - /* Unmap buffers */ - dma_unmap_single(__scm->dev, rsp_phys, rsp_size, DMA_FROM_DEVICE); - dma_unmap_single(__scm->dev, req_phys, req_size, DMA_TO_DEVICE); - if (status) return status; diff --git a/include/linux/firmware/qcom/qcom_qseecom.h b/include/linux/firmware/qcom/qcom_qseecom.h index 5c28298a98be..366243ee9609 100644 --- a/include/linux/firmware/qcom/qcom_qseecom.h +++ b/include/linux/firmware/qcom/qcom_qseecom.h @@ -10,6 +10,7 @@ #define __QCOM_QSEECOM_H #include +#include #include #include @@ -24,12 +25,57 @@ struct qseecom_client { u32 app_id; }; +/** + * qseecom_scm_dev() - Get the SCM device associated with the QSEECOM client. + * @client: The QSEECOM client device. + * + * Returns the SCM device under which the provided QSEECOM client device + * operates. This function is intended to be used for DMA allocations. + */ +static inline struct device *qseecom_scm_dev(struct qseecom_client *client) +{ + return client->aux_dev.dev.parent->parent; +} + +/** + * qseecom_dma_alloc() - Allocate DMA memory for a QSEECOM client. + * @client: The QSEECOM client to allocate the memory for. + * @size: The number of bytes to allocate. + * @dma_handle: Pointer to where the DMA address should be stored. + * @gfp: Allocation flags. + * + * Wrapper function for dma_alloc_coherent(), allocating DMA memory usable for + * TZ/QSEECOM communication. Refer to dma_alloc_coherent() for details. + */ +static inline void *qseecom_dma_alloc(struct qseecom_client *client, size_t size, + dma_addr_t *dma_handle, gfp_t gfp) +{ + return dma_alloc_coherent(qseecom_scm_dev(client), size, dma_handle, gfp); +} + +/** + * dma_free_coherent() - Free QSEECOM DMA memory. + * @client: The QSEECOM client for which the memory has been allocated. + * @size: The number of bytes allocated. + * @cpu_addr: Virtual memory address to free. + * @dma_handle: DMA memory address to free. + * + * Wrapper function for dma_free_coherent(), freeing memory previously + * allocated with qseecom_dma_alloc(). Refer to dma_free_coherent() for + * details. + */ +static inline void qseecom_dma_free(struct qseecom_client *client, size_t size, + void *cpu_addr, dma_addr_t dma_handle) +{ + return dma_free_coherent(qseecom_scm_dev(client), size, cpu_addr, dma_handle); +} + /** * qcom_qseecom_app_send() - Send to and receive data from a given QSEE app. * @client: The QSEECOM client associated with the target app. - * @req: Request buffer sent to the app (must be DMA-mappable). + * @req: DMA address of the request buffer sent to the app. * @req_size: Size of the request buffer. - * @rsp: Response buffer, written to by the app (must be DMA-mappable). + * @rsp: DMA address of the response buffer, written to by the app. * @rsp_size: Size of the response buffer. * * Sends a request to the QSEE app associated with the given client and read @@ -43,8 +89,9 @@ struct qseecom_client { * * Return: Zero on success, nonzero on failure. */ -static inline int qcom_qseecom_app_send(struct qseecom_client *client, void *req, size_t req_size, - void *rsp, size_t rsp_size) +static inline int qcom_qseecom_app_send(struct qseecom_client *client, + dma_addr_t req, size_t req_size, + dma_addr_t rsp, size_t rsp_size) { return qcom_scm_qseecom_app_send(client->app_id, req, req_size, rsp, rsp_size); } diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h index ccaf28846054..aaa19f93ac43 100644 --- a/include/linux/firmware/qcom/qcom_scm.h +++ b/include/linux/firmware/qcom/qcom_scm.h @@ -118,8 +118,8 @@ bool qcom_scm_lmh_dcvsh_available(void); #ifdef CONFIG_QCOM_QSEECOM int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id); -int qcom_scm_qseecom_app_send(u32 app_id, void *req, size_t req_size, void *rsp, - size_t rsp_size); +int qcom_scm_qseecom_app_send(u32 app_id, dma_addr_t req, size_t req_size, + dma_addr_t rsp, size_t rsp_size); #else /* CONFIG_QCOM_QSEECOM */ @@ -128,9 +128,9 @@ static inline int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id) return -EINVAL; } -static inline int qcom_scm_qseecom_app_send(u32 app_id, void *req, - size_t req_size, void *rsp, - size_t rsp_size) +static inline int qcom_scm_qseecom_app_send(u32 app_id, + dma_addr_t req, size_t req_size, + dma_addr_t rsp, size_t rsp_size) { return -EINVAL; } -- cgit v1.2.3 From 32cf5a4eda464d76d553ee3f1b06c4d33d796c52 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 19 Apr 2024 11:51:12 -0400 Subject: Revert "svcrdma: Add Write chunk WRs to the RPC's Send WR chain" Performance regression reported with NFS/RDMA using Omnipath, bisected to commit e084ee673c77 ("svcrdma: Add Write chunk WRs to the RPC's Send WR chain"). Tracing on the server reports: nfsd-7771 [060] 1758.891809: svcrdma_sq_post_err: cq.id=205 cid=226 sc_sq_avail=13643/851 status=-12 sq_post_err reports ENOMEM, and the rdma->sc_sq_avail (13643) is larger than rdma->sc_sq_depth (851). The number of available Send Queue entries is always supposed to be smaller than the Send Queue depth. That seems like a Send Queue accounting bug in svcrdma. As it's getting to be late in the 6.9-rc cycle, revert this commit. It can be revisited in a subsequent kernel release. Link: https://bugzilla.kernel.org/show_bug.cgi?id=218743 Fixes: e084ee673c77 ("svcrdma: Add Write chunk WRs to the RPC's Send WR chain") Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 13 ++---- net/sunrpc/xprtrdma/svc_rdma_rw.c | 86 +++++++++-------------------------- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 5 +- 3 files changed, 26 insertions(+), 78 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 24cd199dd6f3..d33bab33099a 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -210,7 +210,6 @@ struct svc_rdma_recv_ctxt { */ struct svc_rdma_write_info { struct svcxprt_rdma *wi_rdma; - struct list_head wi_list; const struct svc_rdma_chunk *wi_chunk; @@ -239,10 +238,7 @@ struct svc_rdma_send_ctxt { struct ib_cqe sc_cqe; struct xdr_buf sc_hdrbuf; struct xdr_stream sc_stream; - - struct list_head sc_write_info_list; struct svc_rdma_write_info sc_reply_info; - void *sc_xprt_buf; int sc_page_count; int sc_cur_sge_no; @@ -274,14 +270,11 @@ extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma, extern void svc_rdma_cc_release(struct svcxprt_rdma *rdma, struct svc_rdma_chunk_ctxt *cc, enum dma_data_direction dir); -extern void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma, - struct svc_rdma_send_ctxt *ctxt); extern void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt); -extern int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma, - const struct svc_rdma_pcl *write_pcl, - struct svc_rdma_send_ctxt *sctxt, - const struct xdr_buf *xdr); +extern int svc_rdma_send_write_list(struct svcxprt_rdma *rdma, + const struct svc_rdma_recv_ctxt *rctxt, + const struct xdr_buf *xdr); extern int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma, const struct svc_rdma_pcl *write_pcl, const struct svc_rdma_pcl *reply_pcl, diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index f2a100c4c81f..40797114d50a 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -230,28 +230,6 @@ static void svc_rdma_write_info_free(struct svc_rdma_write_info *info) queue_work(svcrdma_wq, &info->wi_work); } -/** - * svc_rdma_write_chunk_release - Release Write chunk I/O resources - * @rdma: controlling transport - * @ctxt: Send context that is being released - */ -void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma, - struct svc_rdma_send_ctxt *ctxt) -{ - struct svc_rdma_write_info *info; - struct svc_rdma_chunk_ctxt *cc; - - while (!list_empty(&ctxt->sc_write_info_list)) { - info = list_first_entry(&ctxt->sc_write_info_list, - struct svc_rdma_write_info, wi_list); - list_del(&info->wi_list); - - cc = &info->wi_cc; - svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount); - svc_rdma_write_info_free(info); - } -} - /** * svc_rdma_reply_chunk_release - Release Reply chunk I/O resources * @rdma: controlling transport @@ -308,11 +286,13 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) struct ib_cqe *cqe = wc->wr_cqe; struct svc_rdma_chunk_ctxt *cc = container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe); + struct svc_rdma_write_info *info = + container_of(cc, struct svc_rdma_write_info, wi_cc); switch (wc->status) { case IB_WC_SUCCESS: trace_svcrdma_wc_write(&cc->cc_cid); - return; + break; case IB_WC_WR_FLUSH_ERR: trace_svcrdma_wc_write_flush(wc, &cc->cc_cid); break; @@ -320,11 +300,12 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) trace_svcrdma_wc_write_err(wc, &cc->cc_cid); } - /* The RDMA Write has flushed, so the client won't get - * some of the outgoing RPC message. Signal the loss - * to the client by closing the connection. - */ - svc_xprt_deferred_close(&rdma->sc_xprt); + svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount); + + if (unlikely(wc->status != IB_WC_SUCCESS)) + svc_xprt_deferred_close(&rdma->sc_xprt); + + svc_rdma_write_info_free(info); } /** @@ -620,19 +601,13 @@ static int svc_rdma_xb_write(const struct xdr_buf *xdr, void *data) return xdr->len; } -/* Link Write WRs for @chunk onto @sctxt's WR chain. - */ -static int svc_rdma_prepare_write_chunk(struct svcxprt_rdma *rdma, - struct svc_rdma_send_ctxt *sctxt, - const struct svc_rdma_chunk *chunk, - const struct xdr_buf *xdr) +static int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, + const struct svc_rdma_chunk *chunk, + const struct xdr_buf *xdr) { struct svc_rdma_write_info *info; struct svc_rdma_chunk_ctxt *cc; - struct ib_send_wr *first_wr; struct xdr_buf payload; - struct list_head *pos; - struct ib_cqe *cqe; int ret; if (xdr_buf_subsegment(xdr, &payload, chunk->ch_position, @@ -648,25 +623,10 @@ static int svc_rdma_prepare_write_chunk(struct svcxprt_rdma *rdma, if (ret != payload.len) goto out_err; - ret = -EINVAL; - if (unlikely(cc->cc_sqecount > rdma->sc_sq_depth)) - goto out_err; - - first_wr = sctxt->sc_wr_chain; - cqe = &cc->cc_cqe; - list_for_each(pos, &cc->cc_rwctxts) { - struct svc_rdma_rw_ctxt *rwc; - - rwc = list_entry(pos, struct svc_rdma_rw_ctxt, rw_list); - first_wr = rdma_rw_ctx_wrs(&rwc->rw_ctx, rdma->sc_qp, - rdma->sc_port_num, cqe, first_wr); - cqe = NULL; - } - sctxt->sc_wr_chain = first_wr; - sctxt->sc_sqecount += cc->cc_sqecount; - list_add(&info->wi_list, &sctxt->sc_write_info_list); - trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount); + ret = svc_rdma_post_chunk_ctxt(rdma, cc); + if (ret < 0) + goto out_err; return 0; out_err: @@ -675,27 +635,25 @@ out_err: } /** - * svc_rdma_prepare_write_list - Construct WR chain for sending Write list + * svc_rdma_send_write_list - Send all chunks on the Write list * @rdma: controlling RDMA transport - * @write_pcl: Write list provisioned by the client - * @sctxt: Send WR resources + * @rctxt: Write list provisioned by the client * @xdr: xdr_buf containing an RPC Reply message * * Returns zero on success, or a negative errno if one or more * Write chunks could not be sent. */ -int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma, - const struct svc_rdma_pcl *write_pcl, - struct svc_rdma_send_ctxt *sctxt, - const struct xdr_buf *xdr) +int svc_rdma_send_write_list(struct svcxprt_rdma *rdma, + const struct svc_rdma_recv_ctxt *rctxt, + const struct xdr_buf *xdr) { struct svc_rdma_chunk *chunk; int ret; - pcl_for_each_chunk(chunk, write_pcl) { + pcl_for_each_chunk(chunk, &rctxt->rc_write_pcl) { if (!chunk->ch_payload_length) break; - ret = svc_rdma_prepare_write_chunk(rdma, sctxt, chunk, xdr); + ret = svc_rdma_send_write_chunk(rdma, chunk, xdr); if (ret < 0) return ret; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index dfca39abd16c..bb5436b719e0 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -142,7 +142,6 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma) ctxt->sc_send_wr.sg_list = ctxt->sc_sges; ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED; ctxt->sc_cqe.done = svc_rdma_wc_send; - INIT_LIST_HEAD(&ctxt->sc_write_info_list); ctxt->sc_xprt_buf = buffer; xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf, rdma->sc_max_req_size); @@ -228,7 +227,6 @@ static void svc_rdma_send_ctxt_release(struct svcxprt_rdma *rdma, struct ib_device *device = rdma->sc_cm_id->device; unsigned int i; - svc_rdma_write_chunk_release(rdma, ctxt); svc_rdma_reply_chunk_release(rdma, ctxt); if (ctxt->sc_page_count) @@ -1015,8 +1013,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) if (!p) goto put_ctxt; - ret = svc_rdma_prepare_write_list(rdma, &rctxt->rc_write_pcl, sctxt, - &rqstp->rq_res); + ret = svc_rdma_send_write_list(rdma, rctxt, &rqstp->rq_res); if (ret < 0) goto put_ctxt; -- cgit v1.2.3 From 98a821546b3919a10a58faa12ebe5e9a55cd638e Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Mon, 15 Apr 2024 19:10:47 +0800 Subject: vDPA: code clean for vhost_vdpa uapi This commit cleans up the uapi for vhost_vdpa by better naming some of the enums which report blk information to user space, and they are not in any official releases yet. Fixes: 1ac61ddfee93 ("vDPA: report virtio-blk flush info to user space") Fixes: ae1374b7f72c ("vDPA: report virtio-block read-only info to user space") Fixes: 330b8aea6924 ("vDPA: report virtio-block max segment size to user space") Signed-off-by: Zhu Lingshan Message-Id: <20240415111047.1047774-1-lingshan.zhu@intel.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 6 +++--- include/uapi/linux/vdpa.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index b246067e074b..6cb96a1e8b7d 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -967,7 +967,7 @@ vdpa_dev_blk_seg_size_config_fill(struct sk_buff *msg, u64 features, val_u32 = __virtio32_to_cpu(true, config->size_max); - return nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_SEG_SIZE, val_u32); + return nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_SIZE_MAX, val_u32); } /* fill the block size*/ @@ -1089,7 +1089,7 @@ static int vdpa_dev_blk_ro_config_fill(struct sk_buff *msg, u64 features) u8 ro; ro = ((features & BIT_ULL(VIRTIO_BLK_F_RO)) == 0) ? 0 : 1; - if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_CFG_READ_ONLY, ro)) + if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_READ_ONLY, ro)) return -EMSGSIZE; return 0; @@ -1100,7 +1100,7 @@ static int vdpa_dev_blk_flush_config_fill(struct sk_buff *msg, u64 features) u8 flush; flush = ((features & BIT_ULL(VIRTIO_BLK_F_FLUSH)) == 0) ? 0 : 1; - if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_CFG_FLUSH, flush)) + if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_FLUSH, flush)) return -EMSGSIZE; return 0; diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h index 43c51698195c..842bf1201ac4 100644 --- a/include/uapi/linux/vdpa.h +++ b/include/uapi/linux/vdpa.h @@ -57,7 +57,7 @@ enum vdpa_attr { VDPA_ATTR_DEV_FEATURES, /* u64 */ VDPA_ATTR_DEV_BLK_CFG_CAPACITY, /* u64 */ - VDPA_ATTR_DEV_BLK_CFG_SEG_SIZE, /* u32 */ + VDPA_ATTR_DEV_BLK_CFG_SIZE_MAX, /* u32 */ VDPA_ATTR_DEV_BLK_CFG_BLK_SIZE, /* u32 */ VDPA_ATTR_DEV_BLK_CFG_SEG_MAX, /* u32 */ VDPA_ATTR_DEV_BLK_CFG_NUM_QUEUES, /* u16 */ @@ -70,8 +70,8 @@ enum vdpa_attr { VDPA_ATTR_DEV_BLK_CFG_DISCARD_SEC_ALIGN,/* u32 */ VDPA_ATTR_DEV_BLK_CFG_MAX_WRITE_ZEROES_SEC, /* u32 */ VDPA_ATTR_DEV_BLK_CFG_MAX_WRITE_ZEROES_SEG, /* u32 */ - VDPA_ATTR_DEV_BLK_CFG_READ_ONLY, /* u8 */ - VDPA_ATTR_DEV_BLK_CFG_FLUSH, /* u8 */ + VDPA_ATTR_DEV_BLK_READ_ONLY, /* u8 */ + VDPA_ATTR_DEV_BLK_FLUSH, /* u8 */ /* new attributes must be added above here */ VDPA_ATTR_MAX, -- cgit v1.2.3 From 96e20adc43c4f81e9163a5188cee75a6dd393e09 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 22 Apr 2024 09:38:33 +0300 Subject: regulator: change stubbed devm_regulator_get_enable to return Ok The devm_regulator_get_enable() should be a 'call and forget' API, meaning, when it is used to enable the regulators, the API does not provide a handle to do any further control of the regulators. It gives no real benefit to return an error from the stub if CONFIG_REGULATOR is not set. On the contrary, returning and error is causing problems to drivers when hardware is such it works out just fine with no regulator control. Returning an error forces drivers to specifically handle the case where CONFIG_REGULATOR is not set, making the mere existence of the stub questionalble. Furthermore, the stub of the regulator_enable() seems to be returning Ok. Change the stub implementation for the devm_regulator_get_enable() to return Ok so drivers do not separately handle the case where the CONFIG_REGULATOR is not set. Signed-off-by: Matti Vaittinen Reported-by: Aleksander Mazur Suggested-by: Guenter Roeck Fixes: da279e6965b3 ("regulator: Add devm helpers for get and enable") Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/ZiYF6d1V1vSPcsJS@drtxq0yyyyyyyyyyyyyby-3.rev.dnainternet.fi Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 4660582a3302..71232fb7dda3 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -320,7 +320,7 @@ devm_regulator_get_exclusive(struct device *dev, const char *id) static inline int devm_regulator_get_enable(struct device *dev, const char *id) { - return -ENODEV; + return 0; } static inline int devm_regulator_get_enable_optional(struct device *dev, -- cgit v1.2.3 From ff33132605c1a0acea59e4c523cb7c6fabe856b2 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Tue, 23 Apr 2024 14:38:28 +0300 Subject: regulator: change devm_regulator_get_enable_optional() stub to return Ok The devm_regulator_get_enable_optional() should be a 'call and forget' API, meaning, when it is used to enable the regulators, the API does not provide a handle to do any further control of the regulators. It gives no real benefit to return an error from the stub if CONFIG_REGULATOR is not set. On the contrary, returning an error is causing problems to drivers when hardware is such it works out just fine with no regulator control. Returning an error forces drivers to specifically handle the case where CONFIG_REGULATOR is not set, making the mere existence of the stub questionalble. Change the stub implementation for the devm_regulator_get_enable_optional() to return Ok so drivers do not separately handle the case where the CONFIG_REGULATOR is not set. Signed-off-by: Matti Vaittinen Fixes: da279e6965b3 ("regulator: Add devm helpers for get and enable") Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/ZiedtOE00Zozd3XO@fedora Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 71232fb7dda3..ed180ca419da 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -326,7 +326,7 @@ static inline int devm_regulator_get_enable(struct device *dev, const char *id) static inline int devm_regulator_get_enable_optional(struct device *dev, const char *id) { - return -ENODEV; + return 0; } static inline struct regulator *__must_check -- cgit v1.2.3 From 3584718cf2ec7e79b6814f2596dcf398c5fb2eca Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 21 Apr 2024 17:52:48 +0000 Subject: net: fix sk_memory_allocated_{add|sub} vs softirqs Jonathan Heathcote reported a regression caused by blamed commit on aarch64 architecture. x86 happens to have irq-safe __this_cpu_add_return() and __this_cpu_sub(), but this is not generic. I think my confusion came from "struct sock" argument, because these helpers are called with a locked socket. But the memory accounting is per-proto (and per-cpu after the blamed commit). We might cleanup these helpers later to directly accept a "struct proto *proto" argument. Switch to this_cpu_add_return() and this_cpu_xchg() operations, and get rid of preempt_disable()/preempt_enable() pairs. Fast path becomes a bit faster as a result :) Many thanks to Jonathan Heathcote for his awesome report and investigations. Fixes: 3cd3399dd7a8 ("net: implement per-cpu reserves for memory_allocated") Reported-by: Jonathan Heathcote Closes: https://lore.kernel.org/netdev/VI1PR01MB42407D7947B2EA448F1E04EFD10D2@VI1PR01MB4240.eurprd01.prod.exchangelabs.com/ Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: Shakeel Butt Link: https://lore.kernel.org/r/20240421175248.1692552-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index f57bfd8a2ad2..b4b553df7870 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1410,32 +1410,34 @@ sk_memory_allocated(const struct sock *sk) #define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT)) extern int sysctl_mem_pcpu_rsv; +static inline void proto_memory_pcpu_drain(struct proto *proto) +{ + int val = this_cpu_xchg(*proto->per_cpu_fw_alloc, 0); + + if (val) + atomic_long_add(val, proto->memory_allocated); +} + static inline void -sk_memory_allocated_add(struct sock *sk, int amt) +sk_memory_allocated_add(const struct sock *sk, int val) { - int local_reserve; + struct proto *proto = sk->sk_prot; - preempt_disable(); - local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt); - if (local_reserve >= READ_ONCE(sysctl_mem_pcpu_rsv)) { - __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); - atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); - } - preempt_enable(); + val = this_cpu_add_return(*proto->per_cpu_fw_alloc, val); + + if (unlikely(val >= READ_ONCE(sysctl_mem_pcpu_rsv))) + proto_memory_pcpu_drain(proto); } static inline void -sk_memory_allocated_sub(struct sock *sk, int amt) +sk_memory_allocated_sub(const struct sock *sk, int val) { - int local_reserve; + struct proto *proto = sk->sk_prot; - preempt_disable(); - local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt); - if (local_reserve <= -READ_ONCE(sysctl_mem_pcpu_rsv)) { - __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); - atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); - } - preempt_enable(); + val = this_cpu_sub_return(*proto->per_cpu_fw_alloc, val); + + if (unlikely(val <= -READ_ONCE(sysctl_mem_pcpu_rsv))) + proto_memory_pcpu_drain(proto); } #define SK_ALLOC_PERCPU_COUNTER_BATCH 16 -- cgit v1.2.3 From 2e7ed5f5e69b6fe93dd3c6b651d041e0a7a456d1 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 5 Apr 2024 16:40:33 -0400 Subject: Bluetooth: hci_sync: Use advertised PHYs on hci_le_ext_create_conn_sync The extended advertising reports do report the PHYs so this store then in hci_conn so it can be later used in hci_le_ext_create_conn_sync to narrow the PHYs to be scanned since the controller will also perform a scan having a smaller set of PHYs shall reduce the time it takes to find and connect peers. Fixes: 288c90224eec ("Bluetooth: Enable all supported LE PHY by default") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 4 +++- net/bluetooth/hci_conn.c | 6 ++++-- net/bluetooth/hci_event.c | 20 ++++++++++++-------- net/bluetooth/hci_sync.c | 9 ++++++--- net/bluetooth/l2cap_core.c | 2 +- 5 files changed, 26 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 56fb42df44a3..02af7d7013da 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -738,6 +738,8 @@ struct hci_conn { __u8 le_per_adv_data[HCI_MAX_PER_AD_TOT_LEN]; __u16 le_per_adv_data_len; __u16 le_per_adv_data_offset; + __u8 le_adv_phy; + __u8 le_adv_sec_phy; __u8 le_tx_phy; __u8 le_rx_phy; __s8 rssi; @@ -1512,7 +1514,7 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, enum conn_reasons conn_reason); struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, bool dst_resolved, u8 sec_level, - u16 conn_timeout, u8 role); + u16 conn_timeout, u8 role, u8 phy, u8 sec_phy); void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status); struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, u8 sec_level, u8 auth_type, diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 3ad74f76983b..05346250f719 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1263,7 +1263,7 @@ u8 hci_conn_set_handle(struct hci_conn *conn, u16 handle) struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, bool dst_resolved, u8 sec_level, - u16 conn_timeout, u8 role) + u16 conn_timeout, u8 role, u8 phy, u8 sec_phy) { struct hci_conn *conn; struct smp_irk *irk; @@ -1326,6 +1326,8 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, conn->dst_type = dst_type; conn->sec_level = BT_SECURITY_LOW; conn->conn_timeout = conn_timeout; + conn->le_adv_phy = phy; + conn->le_adv_sec_phy = sec_phy; err = hci_connect_le_sync(hdev, conn); if (err) { @@ -2273,7 +2275,7 @@ struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst, le = hci_connect_le(hdev, dst, dst_type, false, BT_SECURITY_LOW, HCI_LE_CONN_TIMEOUT, - HCI_ROLE_SLAVE); + HCI_ROLE_SLAVE, 0, 0); else le = hci_connect_le_scan(hdev, dst, dst_type, BT_SECURITY_LOW, diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index a8b8cfebe018..4d70402e295f 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -6038,7 +6038,7 @@ static void hci_le_conn_update_complete_evt(struct hci_dev *hdev, void *data, static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type, bool addr_resolved, - u8 adv_type) + u8 adv_type, u8 phy, u8 sec_phy) { struct hci_conn *conn; struct hci_conn_params *params; @@ -6093,7 +6093,7 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, conn = hci_connect_le(hdev, addr, addr_type, addr_resolved, BT_SECURITY_LOW, hdev->def_le_autoconnect_timeout, - HCI_ROLE_MASTER); + HCI_ROLE_MASTER, phy, sec_phy); if (!IS_ERR(conn)) { /* If HCI_AUTO_CONN_EXPLICIT is set, conn is already owned * by higher layer that tried to connect, if no then @@ -6128,8 +6128,9 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, u8 bdaddr_type, bdaddr_t *direct_addr, - u8 direct_addr_type, s8 rssi, u8 *data, u8 len, - bool ext_adv, bool ctl_time, u64 instant) + u8 direct_addr_type, u8 phy, u8 sec_phy, s8 rssi, + u8 *data, u8 len, bool ext_adv, bool ctl_time, + u64 instant) { struct discovery_state *d = &hdev->discovery; struct smp_irk *irk; @@ -6217,7 +6218,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, * for advertising reports) and is already verified to be RPA above. */ conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, bdaddr_resolved, - type); + type, phy, sec_phy); if (!ext_adv && conn && type == LE_ADV_IND && len <= max_adv_len(hdev)) { /* Store report for later inclusion by @@ -6363,7 +6364,8 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, void *data, if (info->length <= max_adv_len(hdev)) { rssi = info->data[info->length]; process_adv_report(hdev, info->type, &info->bdaddr, - info->bdaddr_type, NULL, 0, rssi, + info->bdaddr_type, NULL, 0, + HCI_ADV_PHY_1M, 0, rssi, info->data, info->length, false, false, instant); } else { @@ -6448,6 +6450,8 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data, if (legacy_evt_type != LE_ADV_INVALID) { process_adv_report(hdev, legacy_evt_type, &info->bdaddr, info->bdaddr_type, NULL, 0, + info->primary_phy, + info->secondary_phy, info->rssi, info->data, info->length, !(evt_type & LE_EXT_ADV_LEGACY_PDU), false, instant); @@ -6730,8 +6734,8 @@ static void hci_le_direct_adv_report_evt(struct hci_dev *hdev, void *data, process_adv_report(hdev, info->type, &info->bdaddr, info->bdaddr_type, &info->direct_addr, - info->direct_addr_type, info->rssi, NULL, 0, - false, false, instant); + info->direct_addr_type, HCI_ADV_PHY_1M, 0, + info->rssi, NULL, 0, false, false, instant); } hci_dev_unlock(hdev); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index c5d8799046cc..4c707eb64e6f 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -6346,7 +6346,8 @@ static int hci_le_ext_create_conn_sync(struct hci_dev *hdev, plen = sizeof(*cp); - if (scan_1m(hdev)) { + if (scan_1m(hdev) && (conn->le_adv_phy == HCI_ADV_PHY_1M || + conn->le_adv_sec_phy == HCI_ADV_PHY_1M)) { cp->phys |= LE_SCAN_PHY_1M; set_ext_conn_params(conn, p); @@ -6354,7 +6355,8 @@ static int hci_le_ext_create_conn_sync(struct hci_dev *hdev, plen += sizeof(*p); } - if (scan_2m(hdev)) { + if (scan_2m(hdev) && (conn->le_adv_phy == HCI_ADV_PHY_2M || + conn->le_adv_sec_phy == HCI_ADV_PHY_2M)) { cp->phys |= LE_SCAN_PHY_2M; set_ext_conn_params(conn, p); @@ -6362,7 +6364,8 @@ static int hci_le_ext_create_conn_sync(struct hci_dev *hdev, plen += sizeof(*p); } - if (scan_coded(hdev)) { + if (scan_coded(hdev) && (conn->le_adv_phy == HCI_ADV_PHY_CODED || + conn->le_adv_sec_phy == HCI_ADV_PHY_CODED)) { cp->phys |= LE_SCAN_PHY_CODED; set_ext_conn_params(conn, p); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index dc0897408793..84fc70862d78 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7018,7 +7018,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) hcon = hci_connect_le(hdev, dst, dst_type, false, chan->sec_level, timeout, - HCI_ROLE_SLAVE); + HCI_ROLE_SLAVE, 0, 0); else hcon = hci_connect_le_scan(hdev, dst, dst_type, chan->sec_level, timeout, -- cgit v1.2.3 From a9a830a676a9a93c5020f5c61236166931fa4266 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 15 Apr 2024 13:41:01 -0400 Subject: Bluetooth: hci_event: Fix sending HCI_OP_READ_ENC_KEY_SIZE The code shall always check if HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE has been set before attempting to use HCI_OP_READ_ENC_KEY_SIZE. Fixes: c569242cd492 ("Bluetooth: hci_event: set the conn encrypted before conn establishes") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 4 ++++ net/bluetooth/hci_event.c | 5 ++--- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 02af7d7013da..e8f581f3f3ce 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1907,6 +1907,10 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define privacy_mode_capable(dev) (use_ll_privacy(dev) && \ (hdev->commands[39] & 0x04)) +#define read_key_size_capable(dev) \ + ((dev)->commands[20] & 0x10 && \ + !test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks)) + /* Use enhanced synchronous connection if command is supported and its quirk * has not been set. */ diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 4d70402e295f..4a27e4a17a67 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3218,7 +3218,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, if (key) { set_bit(HCI_CONN_ENCRYPT, &conn->flags); - if (!(hdev->commands[20] & 0x10)) { + if (!read_key_size_capable(hdev)) { conn->enc_key_size = HCI_LINK_KEY_SIZE; } else { cp.handle = cpu_to_le16(conn->handle); @@ -3666,8 +3666,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data, * controller really supports it. If it doesn't, assume * the default size (16). */ - if (!(hdev->commands[20] & 0x10) || - test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks)) { + if (!read_key_size_capable(hdev)) { conn->enc_key_size = HCI_LINK_KEY_SIZE; goto notify; } -- cgit v1.2.3 From 12bbaae7635a56049779db3bef6e7140d9aa5f67 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 21 Mar 2024 14:24:40 +0000 Subject: mm: create FOLIO_FLAG_FALSE and FOLIO_TYPE_OPS macros Following the separation of FOLIO_FLAGS from PAGEFLAGS, separate FOLIO_FLAG_FALSE from PAGEFLAG_FALSE and FOLIO_TYPE_OPS from PAGE_TYPE_OPS. Link: https://lkml.kernel.org/r/20240321142448.1645400-3-willy@infradead.org Fixes: 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR") Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Acked-by: Vlastimil Babka Cc: Miaohe Lin Cc: Muchun Song Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 70 +++++++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 652d77805e99..dc1607f1415e 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -458,30 +458,51 @@ static __always_inline int TestClearPage##uname(struct page *page) \ TESTSETFLAG(uname, lname, policy) \ TESTCLEARFLAG(uname, lname, policy) +#define FOLIO_TEST_FLAG_FALSE(name) \ +static inline bool folio_test_##name(const struct folio *folio) \ +{ return false; } +#define FOLIO_SET_FLAG_NOOP(name) \ +static inline void folio_set_##name(struct folio *folio) { } +#define FOLIO_CLEAR_FLAG_NOOP(name) \ +static inline void folio_clear_##name(struct folio *folio) { } +#define __FOLIO_SET_FLAG_NOOP(name) \ +static inline void __folio_set_##name(struct folio *folio) { } +#define __FOLIO_CLEAR_FLAG_NOOP(name) \ +static inline void __folio_clear_##name(struct folio *folio) { } +#define FOLIO_TEST_SET_FLAG_FALSE(name) \ +static inline bool folio_test_set_##name(struct folio *folio) \ +{ return false; } +#define FOLIO_TEST_CLEAR_FLAG_FALSE(name) \ +static inline bool folio_test_clear_##name(struct folio *folio) \ +{ return false; } + +#define FOLIO_FLAG_FALSE(name) \ +FOLIO_TEST_FLAG_FALSE(name) \ +FOLIO_SET_FLAG_NOOP(name) \ +FOLIO_CLEAR_FLAG_NOOP(name) + #define TESTPAGEFLAG_FALSE(uname, lname) \ -static inline bool folio_test_##lname(const struct folio *folio) { return false; } \ +FOLIO_TEST_FLAG_FALSE(lname) \ static inline int Page##uname(const struct page *page) { return 0; } #define SETPAGEFLAG_NOOP(uname, lname) \ -static inline void folio_set_##lname(struct folio *folio) { } \ +FOLIO_SET_FLAG_NOOP(lname) \ static inline void SetPage##uname(struct page *page) { } #define CLEARPAGEFLAG_NOOP(uname, lname) \ -static inline void folio_clear_##lname(struct folio *folio) { } \ +FOLIO_CLEAR_FLAG_NOOP(lname) \ static inline void ClearPage##uname(struct page *page) { } #define __CLEARPAGEFLAG_NOOP(uname, lname) \ -static inline void __folio_clear_##lname(struct folio *folio) { } \ +__FOLIO_CLEAR_FLAG_NOOP(lname) \ static inline void __ClearPage##uname(struct page *page) { } #define TESTSETFLAG_FALSE(uname, lname) \ -static inline bool folio_test_set_##lname(struct folio *folio) \ -{ return 0; } \ +FOLIO_TEST_SET_FLAG_FALSE(lname) \ static inline int TestSetPage##uname(struct page *page) { return 0; } #define TESTCLEARFLAG_FALSE(uname, lname) \ -static inline bool folio_test_clear_##lname(struct folio *folio) \ -{ return 0; } \ +FOLIO_TEST_CLEAR_FLAG_FALSE(lname) \ static inline int TestClearPage##uname(struct page *page) { return 0; } #define PAGEFLAG_FALSE(uname, lname) TESTPAGEFLAG_FALSE(uname, lname) \ @@ -977,35 +998,38 @@ static inline int page_has_type(const struct page *page) return page_type_has_type(page->page_type); } +#define FOLIO_TYPE_OPS(lname, fname) \ +static __always_inline bool folio_test_##fname(const struct folio *folio)\ +{ \ + return folio_test_type(folio, PG_##lname); \ +} \ +static __always_inline void __folio_set_##fname(struct folio *folio) \ +{ \ + VM_BUG_ON_FOLIO(!folio_test_type(folio, 0), folio); \ + folio->page.page_type &= ~PG_##lname; \ +} \ +static __always_inline void __folio_clear_##fname(struct folio *folio) \ +{ \ + VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \ + folio->page.page_type |= PG_##lname; \ +} + #define PAGE_TYPE_OPS(uname, lname, fname) \ +FOLIO_TYPE_OPS(lname, fname) \ static __always_inline int Page##uname(const struct page *page) \ { \ return PageType(page, PG_##lname); \ } \ -static __always_inline int folio_test_##fname(const struct folio *folio)\ -{ \ - return folio_test_type(folio, PG_##lname); \ -} \ static __always_inline void __SetPage##uname(struct page *page) \ { \ VM_BUG_ON_PAGE(!PageType(page, 0), page); \ page->page_type &= ~PG_##lname; \ } \ -static __always_inline void __folio_set_##fname(struct folio *folio) \ -{ \ - VM_BUG_ON_FOLIO(!folio_test_type(folio, 0), folio); \ - folio->page.page_type &= ~PG_##lname; \ -} \ static __always_inline void __ClearPage##uname(struct page *page) \ { \ VM_BUG_ON_PAGE(!Page##uname(page), page); \ page->page_type |= PG_##lname; \ -} \ -static __always_inline void __folio_clear_##fname(struct folio *folio) \ -{ \ - VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \ - folio->page.page_type |= PG_##lname; \ -} \ +} /* * PageBuddy() indicates that the page is free and in the buddy system -- cgit v1.2.3 From fd1a745ce03e37945674c14833870a9af0882e2d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 21 Mar 2024 14:24:42 +0000 Subject: mm: support page_mapcount() on page_has_type() pages Return 0 for pages which can't be mapped. This matches how page_mapped() works. It is more convenient for users to not have to filter out these pages. Link: https://lkml.kernel.org/r/20240321142448.1645400-5-willy@infradead.org Fixes: 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR") Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Acked-by: Vlastimil Babka Cc: Miaohe Lin Cc: Muchun Song Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton --- fs/proc/page.c | 7 ++----- include/linux/mm.h | 8 +++++--- include/linux/page-flags.h | 4 ++-- 3 files changed, 9 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/fs/proc/page.c b/fs/proc/page.c index 195b077c0fac..9223856c934b 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -67,7 +67,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, */ ppage = pfn_to_online_page(pfn); - if (!ppage || PageSlab(ppage) || page_has_type(ppage)) + if (!ppage) pcount = 0; else pcount = page_mapcount(ppage); @@ -124,11 +124,8 @@ u64 stable_page_flags(struct page *page) /* * pseudo flags for the well known (anonymous) memory mapped pages - * - * Note that page->_mapcount is overloaded in SLAB, so the - * simple test in page_mapped() is not enough. */ - if (!PageSlab(page) && page_mapped(page)) + if (page_mapped(page)) u |= 1 << KPF_MMAP; if (PageAnon(page)) u |= 1 << KPF_ANON; diff --git a/include/linux/mm.h b/include/linux/mm.h index 7b0ee64225de..b6bdaa18b9e9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1223,14 +1223,16 @@ static inline void page_mapcount_reset(struct page *page) * a large folio, it includes the number of times this page is mapped * as part of that folio. * - * The result is undefined for pages which cannot be mapped into userspace. - * For example SLAB or special types of pages. See function page_has_type(). - * They use this field in struct page differently. + * Will report 0 for pages which cannot be mapped into userspace, eg + * slab, page tables and similar. */ static inline int page_mapcount(struct page *page) { int mapcount = atomic_read(&page->_mapcount) + 1; + /* Handle page_has_type() pages */ + if (mapcount < 0) + mapcount = 0; if (unlikely(PageCompound(page))) mapcount += folio_entire_mapcount(page_folio(page)); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index dc1607f1415e..35a0087d0910 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -971,12 +971,12 @@ static inline bool is_page_hwpoison(struct page *page) * page_type may be used. Because it is initialised to -1, we invert the * sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and * __ClearPageFoo *sets* the bit used for PageFoo. We reserve a few high and - * low bits so that an underflow or overflow of page_mapcount() won't be + * low bits so that an underflow or overflow of _mapcount won't be * mistaken for a page type value. */ #define PAGE_TYPE_BASE 0xf0000000 -/* Reserve 0x0000007f to catch underflows of page_mapcount */ +/* Reserve 0x0000007f to catch underflows of _mapcount */ #define PAGE_MAPCOUNT_RESERVE -128 #define PG_buddy 0x00000080 #define PG_offline 0x00000100 -- cgit v1.2.3 From d99e3140a4d33e26066183ff727d8f02f56bec64 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 21 Mar 2024 14:24:43 +0000 Subject: mm: turn folio_test_hugetlb into a PageType The current folio_test_hugetlb() can be fooled by a concurrent folio split into returning true for a folio which has never belonged to hugetlbfs. This can't happen if the caller holds a refcount on it, but we have a few places (memory-failure, compaction, procfs) which do not and should not take a speculative reference. Since hugetlb pages do not use individual page mapcounts (they are always fully mapped and use the entire_mapcount field to record the number of mappings), the PageType field is available now that page_mapcount() ignores the value in this field. In compaction and with CONFIG_DEBUG_VM enabled, the current implementation can result in an oops, as reported by Luis. This happens since 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR") effectively added some VM_BUG_ON() checks in the PageHuge() testing path. [willy@infradead.org: update vmcoreinfo] Link: https://lkml.kernel.org/r/ZgGZUvsdhaT1Va-T@casper.infradead.org Link: https://lkml.kernel.org/r/20240321142448.1645400-6-willy@infradead.org Fixes: 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR") Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Acked-by: Vlastimil Babka Reported-by: Luis Chamberlain Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218227 Cc: Miaohe Lin Cc: Muchun Song Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 70 ++++++++++++++++++++---------------------- include/trace/events/mmflags.h | 1 + kernel/vmcore_info.c | 5 ++- mm/hugetlb.c | 22 ++----------- 4 files changed, 39 insertions(+), 59 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 35a0087d0910..4bf1c25fd1dc 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -190,7 +190,6 @@ enum pageflags { /* At least one page in this folio has the hwpoison flag set */ PG_has_hwpoisoned = PG_error, - PG_hugetlb = PG_active, PG_large_rmappable = PG_workingset, /* anon or file-backed */ }; @@ -876,29 +875,6 @@ TESTPAGEFLAG_FALSE(LargeRmappable, large_rmappable) #define PG_head_mask ((1UL << PG_head)) -#ifdef CONFIG_HUGETLB_PAGE -int PageHuge(const struct page *page); -SETPAGEFLAG(HugeTLB, hugetlb, PF_SECOND) -CLEARPAGEFLAG(HugeTLB, hugetlb, PF_SECOND) - -/** - * folio_test_hugetlb - Determine if the folio belongs to hugetlbfs - * @folio: The folio to test. - * - * Context: Any context. Caller should have a reference on the folio to - * prevent it from being turned into a tail page. - * Return: True for hugetlbfs folios, false for anon folios or folios - * belonging to other filesystems. - */ -static inline bool folio_test_hugetlb(const struct folio *folio) -{ - return folio_test_large(folio) && - test_bit(PG_hugetlb, const_folio_flags(folio, 1)); -} -#else -TESTPAGEFLAG_FALSE(Huge, hugetlb) -#endif - #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * PageHuge() only returns true for hugetlbfs pages, but not for @@ -954,18 +930,6 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) TESTSCFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) #endif -/* - * Check if a page is currently marked HWPoisoned. Note that this check is - * best effort only and inherently racy: there is no way to synchronize with - * failing hardware. - */ -static inline bool is_page_hwpoison(struct page *page) -{ - if (PageHWPoison(page)) - return true; - return PageHuge(page) && PageHWPoison(compound_head(page)); -} - /* * For pages that are never mapped to userspace (and aren't PageSlab), * page_type may be used. Because it is initialised to -1, we invert the @@ -982,6 +946,7 @@ static inline bool is_page_hwpoison(struct page *page) #define PG_offline 0x00000100 #define PG_table 0x00000200 #define PG_guard 0x00000400 +#define PG_hugetlb 0x00000800 #define PageType(page, flag) \ ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) @@ -1076,6 +1041,37 @@ PAGE_TYPE_OPS(Table, table, pgtable) */ PAGE_TYPE_OPS(Guard, guard, guard) +#ifdef CONFIG_HUGETLB_PAGE +FOLIO_TYPE_OPS(hugetlb, hugetlb) +#else +FOLIO_TEST_FLAG_FALSE(hugetlb) +#endif + +/** + * PageHuge - Determine if the page belongs to hugetlbfs + * @page: The page to test. + * + * Context: Any context. + * Return: True for hugetlbfs pages, false for anon pages or pages + * belonging to other filesystems. + */ +static inline bool PageHuge(const struct page *page) +{ + return folio_test_hugetlb(page_folio(page)); +} + +/* + * Check if a page is currently marked HWPoisoned. Note that this check is + * best effort only and inherently racy: there is no way to synchronize with + * failing hardware. + */ +static inline bool is_page_hwpoison(struct page *page) +{ + if (PageHWPoison(page)) + return true; + return PageHuge(page) && PageHWPoison(compound_head(page)); +} + extern bool is_free_buddy_page(struct page *page); PAGEFLAG(Isolated, isolated, PF_ANY); @@ -1142,7 +1138,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) */ #define PAGE_FLAGS_SECOND \ (0xffUL /* order */ | 1UL << PG_has_hwpoisoned | \ - 1UL << PG_hugetlb | 1UL << PG_large_rmappable) + 1UL << PG_large_rmappable) #define PAGE_FLAGS_PRIVATE \ (1UL << PG_private | 1UL << PG_private_2) diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index d801409b33cf..d55e53ac91bd 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -135,6 +135,7 @@ IF_HAVE_PG_ARCH_X(arch_3) #define DEF_PAGETYPE_NAME(_name) { PG_##_name, __stringify(_name) } #define __def_pagetype_names \ + DEF_PAGETYPE_NAME(hugetlb), \ DEF_PAGETYPE_NAME(offline), \ DEF_PAGETYPE_NAME(guard), \ DEF_PAGETYPE_NAME(table), \ diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c index f95516cd45bb..23c125c2e243 100644 --- a/kernel/vmcore_info.c +++ b/kernel/vmcore_info.c @@ -205,11 +205,10 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_NUMBER(PG_head_mask); #define PAGE_BUDDY_MAPCOUNT_VALUE (~PG_buddy) VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); -#ifdef CONFIG_HUGETLB_PAGE - VMCOREINFO_NUMBER(PG_hugetlb); +#define PAGE_HUGETLB_MAPCOUNT_VALUE (~PG_hugetlb) + VMCOREINFO_NUMBER(PAGE_HUGETLB_MAPCOUNT_VALUE); #define PAGE_OFFLINE_MAPCOUNT_VALUE (~PG_offline) VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE); -#endif #ifdef CONFIG_KALLSYMS VMCOREINFO_SYMBOL(kallsyms_names); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 53e0ab5c0845..4553241f0fb2 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1624,7 +1624,7 @@ static inline void __clear_hugetlb_destructor(struct hstate *h, { lockdep_assert_held(&hugetlb_lock); - folio_clear_hugetlb(folio); + __folio_clear_hugetlb(folio); } /* @@ -1711,7 +1711,7 @@ static void add_hugetlb_folio(struct hstate *h, struct folio *folio, h->surplus_huge_pages_node[nid]++; } - folio_set_hugetlb(folio); + __folio_set_hugetlb(folio); folio_change_private(folio, NULL); /* * We have to set hugetlb_vmemmap_optimized again as above @@ -2049,7 +2049,7 @@ static void __prep_account_new_huge_page(struct hstate *h, int nid) static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio) { - folio_set_hugetlb(folio); + __folio_set_hugetlb(folio); INIT_LIST_HEAD(&folio->lru); hugetlb_set_folio_subpool(folio, NULL); set_hugetlb_cgroup(folio, NULL); @@ -2159,22 +2159,6 @@ static bool prep_compound_gigantic_folio_for_demote(struct folio *folio, return __prep_compound_gigantic_folio(folio, order, true); } -/* - * PageHuge() only returns true for hugetlbfs pages, but not for normal or - * transparent huge pages. See the PageTransHuge() documentation for more - * details. - */ -int PageHuge(const struct page *page) -{ - const struct folio *folio; - - if (!PageCompound(page)) - return 0; - folio = page_folio(page); - return folio_test_hugetlb(folio); -} -EXPORT_SYMBOL_GPL(PageHuge); - /* * Find and lock address space (mapping) in write mode. * -- cgit v1.2.3 From ce0abef6a1d540acef85068e0e82bdf1fbeeb0e9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Apr 2024 17:05:55 -0700 Subject: cpu: Ignore "mitigations" kernel parameter if CPU_MITIGATIONS=n Explicitly disallow enabling mitigations at runtime for kernels that were built with CONFIG_CPU_MITIGATIONS=n, as some architectures may omit code entirely if mitigations are disabled at compile time. E.g. on x86, a large pile of Kconfigs are buried behind CPU_MITIGATIONS, and trying to provide sane behavior for retroactively enabling mitigations is extremely difficult, bordering on impossible. E.g. page table isolation and call depth tracking require build-time support, BHI mitigations will still be off without additional kernel parameters, etc. [ bp: Touchups. ] Signed-off-by: Sean Christopherson Signed-off-by: Borislav Petkov (AMD) Acked-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240420000556.2645001-3-seanjc@google.com --- Documentation/admin-guide/kernel-parameters.txt | 3 +++ arch/x86/Kconfig | 8 ++++++-- include/linux/cpu.h | 11 +++++++++++ kernel/cpu.c | 14 ++++++++++---- 4 files changed, 30 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 902ecd92a29f..213d0719e2b7 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3423,6 +3423,9 @@ arch-independent options, each of which is an aggregation of existing arch-specific options. + Note, "mitigations" is supported if and only if the + kernel was built with CPU_MITIGATIONS=y. + off Disable all optional CPU mitigations. This improves system performance, but it may also diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 619a04d5c131..928820e61cb5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2495,9 +2495,13 @@ menuconfig CPU_MITIGATIONS help Say Y here to enable options which enable mitigations for hardware vulnerabilities (usually related to speculative execution). + Mitigations can be disabled or restricted to SMT systems at runtime + via the "mitigations" kernel parameter. - If you say N, all mitigations will be disabled. You really - should know what you are doing to say so. + If you say N, all mitigations will be disabled. This CANNOT be + overridden at runtime. + + Say 'Y', unless you really know what you are doing. if CPU_MITIGATIONS diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 272e4e79e15c..861c3bfc5f17 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -221,7 +221,18 @@ void cpuhp_report_idle_dead(void); static inline void cpuhp_report_idle_dead(void) { } #endif /* #ifdef CONFIG_HOTPLUG_CPU */ +#ifdef CONFIG_CPU_MITIGATIONS extern bool cpu_mitigations_off(void); extern bool cpu_mitigations_auto_nosmt(void); +#else +static inline bool cpu_mitigations_off(void) +{ + return true; +} +static inline bool cpu_mitigations_auto_nosmt(void) +{ + return false; +} +#endif #endif /* _LINUX_CPU_H_ */ diff --git a/kernel/cpu.c b/kernel/cpu.c index bb0ff275fb46..63447eb85dab 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -3196,6 +3196,7 @@ void __init boot_cpu_hotplug_init(void) this_cpu_write(cpuhp_state.target, CPUHP_ONLINE); } +#ifdef CONFIG_CPU_MITIGATIONS /* * These are used for a global "mitigations=" cmdline option for toggling * optional CPU mitigations. @@ -3206,9 +3207,7 @@ enum cpu_mitigations { CPU_MITIGATIONS_AUTO_NOSMT, }; -static enum cpu_mitigations cpu_mitigations __ro_after_init = - IS_ENABLED(CONFIG_CPU_MITIGATIONS) ? CPU_MITIGATIONS_AUTO : - CPU_MITIGATIONS_OFF; +static enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO; static int __init mitigations_parse_cmdline(char *arg) { @@ -3224,7 +3223,6 @@ static int __init mitigations_parse_cmdline(char *arg) return 0; } -early_param("mitigations", mitigations_parse_cmdline); /* mitigations=off */ bool cpu_mitigations_off(void) @@ -3239,3 +3237,11 @@ bool cpu_mitigations_auto_nosmt(void) return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT; } EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt); +#else +static int __init mitigations_parse_cmdline(char *arg) +{ + pr_crit("Kernel compiled without mitigations, ignoring 'mitigations'; system may still be vulnerable\n"); + return 0; +} +#endif +early_param("mitigations", mitigations_parse_cmdline); -- cgit v1.2.3 From e877d705704d7c8fe17b6b5ebdfdb14b84c207a7 Mon Sep 17 00:00:00 2001 From: Christian Gmeiner Date: Sat, 20 Apr 2024 15:41:58 +0200 Subject: Revert "drm/etnaviv: Expose a few more chipspecs to userspace" This reverts commit 1dccdba084897443d116508a8ed71e0ac8a031a4. In userspace a different approach was choosen - hwdb. As a result, there is no need for these values. Signed-off-by: Christian Gmeiner Reviewed-by: Tomeu Vizoso Signed-off-by: Lucas Stach --- drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 20 -------------------- drivers/gpu/drm/etnaviv/etnaviv_gpu.h | 12 ------------ drivers/gpu/drm/etnaviv/etnaviv_hwdb.c | 34 ---------------------------------- include/uapi/drm/etnaviv_drm.h | 5 ----- 4 files changed, 71 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 6f763038c21a..a9bf426f69b3 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -164,26 +164,6 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value) *value = gpu->identity.eco_id; break; - case ETNAVIV_PARAM_GPU_NN_CORE_COUNT: - *value = gpu->identity.nn_core_count; - break; - - case ETNAVIV_PARAM_GPU_NN_MAD_PER_CORE: - *value = gpu->identity.nn_mad_per_core; - break; - - case ETNAVIV_PARAM_GPU_TP_CORE_COUNT: - *value = gpu->identity.tp_core_count; - break; - - case ETNAVIV_PARAM_GPU_ON_CHIP_SRAM_SIZE: - *value = gpu->identity.on_chip_sram_size; - break; - - case ETNAVIV_PARAM_GPU_AXI_SRAM_SIZE: - *value = gpu->identity.axi_sram_size; - break; - default: DBG("%s: invalid param: %u", dev_name(gpu->dev), param); return -EINVAL; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h index 7d5e9158e13c..197e0037732e 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h @@ -54,18 +54,6 @@ struct etnaviv_chip_identity { /* Number of Neural Network cores. */ u32 nn_core_count; - /* Number of MAD units per Neural Network core. */ - u32 nn_mad_per_core; - - /* Number of Tensor Processing cores. */ - u32 tp_core_count; - - /* Size in bytes of the SRAM inside the NPU. */ - u32 on_chip_sram_size; - - /* Size in bytes of the SRAM across the AXI bus. */ - u32 axi_sram_size; - /* Size of the vertex cache. */ u32 vertex_cache_size; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c index d8e7334de8ce..8665f2658d51 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c @@ -17,10 +17,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { .thread_count = 128, .shader_core_count = 1, .nn_core_count = 0, - .nn_mad_per_core = 0, - .tp_core_count = 0, - .on_chip_sram_size = 0, - .axi_sram_size = 0, .vertex_cache_size = 8, .vertex_output_buffer_size = 1024, .pixel_pipes = 1, @@ -52,11 +48,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { .register_max = 64, .thread_count = 256, .shader_core_count = 1, - .nn_core_count = 0, - .nn_mad_per_core = 0, - .tp_core_count = 0, - .on_chip_sram_size = 0, - .axi_sram_size = 0, .vertex_cache_size = 8, .vertex_output_buffer_size = 512, .pixel_pipes = 1, @@ -89,10 +80,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { .thread_count = 512, .shader_core_count = 2, .nn_core_count = 0, - .nn_mad_per_core = 0, - .tp_core_count = 0, - .on_chip_sram_size = 0, - .axi_sram_size = 0, .vertex_cache_size = 16, .vertex_output_buffer_size = 1024, .pixel_pipes = 1, @@ -125,10 +112,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { .thread_count = 512, .shader_core_count = 2, .nn_core_count = 0, - .nn_mad_per_core = 0, - .tp_core_count = 0, - .on_chip_sram_size = 0, - .axi_sram_size = 0, .vertex_cache_size = 16, .vertex_output_buffer_size = 1024, .pixel_pipes = 1, @@ -160,11 +143,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { .register_max = 64, .thread_count = 512, .shader_core_count = 2, - .nn_core_count = 0, - .nn_mad_per_core = 0, - .tp_core_count = 0, - .on_chip_sram_size = 0, - .axi_sram_size = 0, .vertex_cache_size = 16, .vertex_output_buffer_size = 1024, .pixel_pipes = 1, @@ -197,10 +175,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { .thread_count = 1024, .shader_core_count = 4, .nn_core_count = 0, - .nn_mad_per_core = 0, - .tp_core_count = 0, - .on_chip_sram_size = 0, - .axi_sram_size = 0, .vertex_cache_size = 16, .vertex_output_buffer_size = 1024, .pixel_pipes = 2, @@ -233,10 +207,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { .thread_count = 256, .shader_core_count = 1, .nn_core_count = 8, - .nn_mad_per_core = 64, - .tp_core_count = 4, - .on_chip_sram_size = 524288, - .axi_sram_size = 1048576, .vertex_cache_size = 16, .vertex_output_buffer_size = 1024, .pixel_pipes = 1, @@ -269,10 +239,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { .thread_count = 256, .shader_core_count = 1, .nn_core_count = 6, - .nn_mad_per_core = 64, - .tp_core_count = 3, - .on_chip_sram_size = 262144, - .axi_sram_size = 0, .vertex_cache_size = 16, .vertex_output_buffer_size = 1024, .pixel_pipes = 1, diff --git a/include/uapi/drm/etnaviv_drm.h b/include/uapi/drm/etnaviv_drm.h index d87410a8443a..af024d90453d 100644 --- a/include/uapi/drm/etnaviv_drm.h +++ b/include/uapi/drm/etnaviv_drm.h @@ -77,11 +77,6 @@ struct drm_etnaviv_timespec { #define ETNAVIV_PARAM_GPU_PRODUCT_ID 0x1c #define ETNAVIV_PARAM_GPU_CUSTOMER_ID 0x1d #define ETNAVIV_PARAM_GPU_ECO_ID 0x1e -#define ETNAVIV_PARAM_GPU_NN_CORE_COUNT 0x1f -#define ETNAVIV_PARAM_GPU_NN_MAD_PER_CORE 0x20 -#define ETNAVIV_PARAM_GPU_TP_CORE_COUNT 0x21 -#define ETNAVIV_PARAM_GPU_ON_CHIP_SRAM_SIZE 0x22 -#define ETNAVIV_PARAM_GPU_AXI_SRAM_SIZE 0x23 #define ETNA_MAX_PIPES 4 -- cgit v1.2.3 From 475747a19316b08e856c666a20503e73d7ed67ed Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 23 Apr 2024 11:13:02 -0700 Subject: macsec: Enable devices to advertise whether they update sk_buff md_dst during offloads Cannot know whether a Rx skb missing md_dst is intended for MACsec or not without knowing whether the device is able to update this field during an offload. Assume that an offload to a MACsec device cannot support updating md_dst by default. Capable devices can advertise that they do indicate that an skb is related to a MACsec offloaded packet using the md_dst. Cc: Sabrina Dubroca Cc: stable@vger.kernel.org Fixes: 860ead89b851 ("net/macsec: Add MACsec skb_metadata_dst Rx Data path support") Signed-off-by: Rahul Rameshbabu Reviewed-by: Benjamin Poirier Reviewed-by: Cosmin Ratiu Reviewed-by: Sabrina Dubroca Link: https://lore.kernel.org/r/20240423181319.115860-2-rrameshbabu@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/macsec.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/macsec.h b/include/net/macsec.h index dbd22180cc5c..de216cbc6b05 100644 --- a/include/net/macsec.h +++ b/include/net/macsec.h @@ -321,6 +321,7 @@ struct macsec_context { * for the TX tag * @needed_tailroom: number of bytes reserved at the end of the sk_buff for the * TX tag + * @rx_uses_md_dst: whether MACsec device offload supports sk_buff md_dst */ struct macsec_ops { /* Device wide */ @@ -352,6 +353,7 @@ struct macsec_ops { struct sk_buff *skb); unsigned int needed_headroom; unsigned int needed_tailroom; + bool rx_uses_md_dst; }; void macsec_pn_wrapped(struct macsec_secy *secy, struct macsec_tx_sa *tx_sa); -- cgit v1.2.3 From 6e159fd653d7ebf6290358e0330a0cb8a75cf73b Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 23 Apr 2024 11:13:03 -0700 Subject: ethernet: Add helper for assigning packet type when dest address does not match device address Enable reuse of logic in eth_type_trans for determining packet type. Suggested-by: Sabrina Dubroca Cc: stable@vger.kernel.org Signed-off-by: Rahul Rameshbabu Reviewed-by: Sabrina Dubroca Link: https://lore.kernel.org/r/20240423181319.115860-3-rrameshbabu@nvidia.com Signed-off-by: Jakub Kicinski --- include/linux/etherdevice.h | 25 +++++++++++++++++++++++++ net/ethernet/eth.c | 12 +----------- 2 files changed, 26 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 224645f17c33..297231854ada 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -607,6 +607,31 @@ static inline void eth_hw_addr_gen(struct net_device *dev, const u8 *base_addr, eth_hw_addr_set(dev, addr); } +/** + * eth_skb_pkt_type - Assign packet type if destination address does not match + * @skb: Assigned a packet type if address does not match @dev address + * @dev: Network device used to compare packet address against + * + * If the destination MAC address of the packet does not match the network + * device address, assign an appropriate packet type. + */ +static inline void eth_skb_pkt_type(struct sk_buff *skb, + const struct net_device *dev) +{ + const struct ethhdr *eth = eth_hdr(skb); + + if (unlikely(!ether_addr_equal_64bits(eth->h_dest, dev->dev_addr))) { + if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) { + if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast)) + skb->pkt_type = PACKET_BROADCAST; + else + skb->pkt_type = PACKET_MULTICAST; + } else { + skb->pkt_type = PACKET_OTHERHOST; + } + } +} + /** * eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame * @skb: Buffer to pad diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 2edc8b796a4e..049c3adeb850 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -164,17 +164,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) eth = (struct ethhdr *)skb->data; skb_pull_inline(skb, ETH_HLEN); - if (unlikely(!ether_addr_equal_64bits(eth->h_dest, - dev->dev_addr))) { - if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) { - if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast)) - skb->pkt_type = PACKET_BROADCAST; - else - skb->pkt_type = PACKET_MULTICAST; - } else { - skb->pkt_type = PACKET_OTHERHOST; - } - } + eth_skb_pkt_type(skb, dev); /* * Some variants of DSA tagging don't have an ethertype field -- cgit v1.2.3 From 0844370f8945086eb9335739d10205dcea8d707b Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 24 Apr 2024 12:25:47 +0200 Subject: tls: fix lockless read of strp->msg_ready in ->poll tls_sk_poll is called without locking the socket, and needs to read strp->msg_ready (via tls_strp_msg_ready). Convert msg_ready to a bool and use READ_ONCE/WRITE_ONCE where needed. The remaining reads are only performed when the socket is locked. Fixes: 121dca784fc0 ("tls: suppress wakeups unless we have a full record") Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/0b7ee062319037cf86af6b317b3d72f7bfcd2e97.1713797701.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- include/net/tls.h | 3 ++- net/tls/tls.h | 2 +- net/tls/tls_strp.c | 6 +++--- 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/tls.h b/include/net/tls.h index 340ad43971e4..33f657d3c051 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -111,7 +111,8 @@ struct tls_strparser { u32 stopped : 1; u32 copy_mode : 1; u32 mixed_decrypted : 1; - u32 msg_ready : 1; + + bool msg_ready; struct strp_msg stm; diff --git a/net/tls/tls.h b/net/tls/tls.h index 762f424ff2d5..e5e47452308a 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -215,7 +215,7 @@ static inline struct sk_buff *tls_strp_msg(struct tls_sw_context_rx *ctx) static inline bool tls_strp_msg_ready(struct tls_sw_context_rx *ctx) { - return ctx->strp.msg_ready; + return READ_ONCE(ctx->strp.msg_ready); } static inline bool tls_strp_msg_mixed_decrypted(struct tls_sw_context_rx *ctx) diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c index ca1e0e198ceb..5df08d848b5c 100644 --- a/net/tls/tls_strp.c +++ b/net/tls/tls_strp.c @@ -360,7 +360,7 @@ static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb, if (strp->stm.full_len && strp->stm.full_len == skb->len) { desc->count = 0; - strp->msg_ready = 1; + WRITE_ONCE(strp->msg_ready, 1); tls_rx_msg_ready(strp); } @@ -528,7 +528,7 @@ static int tls_strp_read_sock(struct tls_strparser *strp) if (!tls_strp_check_queue_ok(strp)) return tls_strp_read_copy(strp, false); - strp->msg_ready = 1; + WRITE_ONCE(strp->msg_ready, 1); tls_rx_msg_ready(strp); return 0; @@ -580,7 +580,7 @@ void tls_strp_msg_done(struct tls_strparser *strp) else tls_strp_flush_anchor_copy(strp); - strp->msg_ready = 0; + WRITE_ONCE(strp->msg_ready, 0); memset(&strp->stm, 0, sizeof(strp->stm)); tls_strp_check_rcv(strp); -- cgit v1.2.3 From 1971d13ffa84a551d29a81fdf5b5ec5be166ac83 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 24 Apr 2024 10:04:43 -0700 Subject: af_unix: Suppress false-positive lockdep splat for spin_lock() in __unix_gc(). syzbot reported a lockdep splat regarding unix_gc_lock and unix_state_lock(). One is called from recvmsg() for a connected socket, and another is called from GC for TCP_LISTEN socket. So, the splat is false-positive. Let's add a dedicated lock class for the latter to suppress the splat. Note that this change is not necessary for net-next.git as the issue is only applied to the old GC impl. [0]: WARNING: possible circular locking dependency detected 6.9.0-rc5-syzkaller-00007-g4d2008430ce8 #0 Not tainted ----------------------------------------------------- kworker/u8:1/11 is trying to acquire lock: ffff88807cea4e70 (&u->lock){+.+.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] ffff88807cea4e70 (&u->lock){+.+.}-{2:2}, at: __unix_gc+0x40e/0xf70 net/unix/garbage.c:302 but task is already holding lock: ffffffff8f6ab638 (unix_gc_lock){+.+.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] ffffffff8f6ab638 (unix_gc_lock){+.+.}-{2:2}, at: __unix_gc+0x117/0xf70 net/unix/garbage.c:261 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (unix_gc_lock){+.+.}-{2:2}: lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5754 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] unix_notinflight+0x13d/0x390 net/unix/garbage.c:140 unix_detach_fds net/unix/af_unix.c:1819 [inline] unix_destruct_scm+0x221/0x350 net/unix/af_unix.c:1876 skb_release_head_state+0x100/0x250 net/core/skbuff.c:1188 skb_release_all net/core/skbuff.c:1200 [inline] __kfree_skb net/core/skbuff.c:1216 [inline] kfree_skb_reason+0x16d/0x3b0 net/core/skbuff.c:1252 kfree_skb include/linux/skbuff.h:1262 [inline] manage_oob net/unix/af_unix.c:2672 [inline] unix_stream_read_generic+0x1125/0x2700 net/unix/af_unix.c:2749 unix_stream_splice_read+0x239/0x320 net/unix/af_unix.c:2981 do_splice_read fs/splice.c:985 [inline] splice_file_to_pipe+0x299/0x500 fs/splice.c:1295 do_splice+0xf2d/0x1880 fs/splice.c:1379 __do_splice fs/splice.c:1436 [inline] __do_sys_splice fs/splice.c:1652 [inline] __se_sys_splice+0x331/0x4a0 fs/splice.c:1634 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf5/0x240 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f -> #0 (&u->lock){+.+.}-{2:2}: check_prev_add kernel/locking/lockdep.c:3134 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain+0x18cb/0x58e0 kernel/locking/lockdep.c:3869 __lock_acquire+0x1346/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5754 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] __unix_gc+0x40e/0xf70 net/unix/garbage.c:302 process_one_work kernel/workqueue.c:3254 [inline] process_scheduled_works+0xa10/0x17c0 kernel/workqueue.c:3335 worker_thread+0x86d/0xd70 kernel/workqueue.c:3416 kthread+0x2f0/0x390 kernel/kthread.c:388 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(unix_gc_lock); lock(&u->lock); lock(unix_gc_lock); lock(&u->lock); *** DEADLOCK *** 3 locks held by kworker/u8:1/11: #0: ffff888015089148 ((wq_completion)events_unbound){+.+.}-{0:0}, at: process_one_work kernel/workqueue.c:3229 [inline] #0: ffff888015089148 ((wq_completion)events_unbound){+.+.}-{0:0}, at: process_scheduled_works+0x8e0/0x17c0 kernel/workqueue.c:3335 #1: ffffc90000107d00 (unix_gc_work){+.+.}-{0:0}, at: process_one_work kernel/workqueue.c:3230 [inline] #1: ffffc90000107d00 (unix_gc_work){+.+.}-{0:0}, at: process_scheduled_works+0x91b/0x17c0 kernel/workqueue.c:3335 #2: ffffffff8f6ab638 (unix_gc_lock){+.+.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] #2: ffffffff8f6ab638 (unix_gc_lock){+.+.}-{2:2}, at: __unix_gc+0x117/0xf70 net/unix/garbage.c:261 stack backtrace: CPU: 0 PID: 11 Comm: kworker/u8:1 Not tainted 6.9.0-rc5-syzkaller-00007-g4d2008430ce8 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024 Workqueue: events_unbound __unix_gc Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:114 check_noncircular+0x36a/0x4a0 kernel/locking/lockdep.c:2187 check_prev_add kernel/locking/lockdep.c:3134 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain+0x18cb/0x58e0 kernel/locking/lockdep.c:3869 __lock_acquire+0x1346/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5754 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] __unix_gc+0x40e/0xf70 net/unix/garbage.c:302 process_one_work kernel/workqueue.c:3254 [inline] process_scheduled_works+0xa10/0x17c0 kernel/workqueue.c:3335 worker_thread+0x86d/0xd70 kernel/workqueue.c:3416 kthread+0x2f0/0x390 kernel/kthread.c:388 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 Fixes: 47d8ac011fe1 ("af_unix: Fix garbage collector racing against connect()") Reported-and-tested-by: syzbot+fa379358c28cc87cc307@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=fa379358c28cc87cc307 Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240424170443.9832-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/af_unix.h | 3 +++ net/unix/garbage.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 627ea8e2d915..3dee0b2721aa 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -85,6 +85,9 @@ enum unix_socket_lock_class { U_LOCK_NORMAL, U_LOCK_SECOND, /* for double locking, see unix_state_double_lock(). */ U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */ + U_LOCK_GC_LISTENER, /* used for listening socket while determining gc + * candidates to close a small race window. + */ }; static inline void unix_state_lock_nested(struct sock *sk, diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 6433a414acf8..0104be9d4704 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -299,7 +299,7 @@ static void __unix_gc(struct work_struct *work) __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); if (sk->sk_state == TCP_LISTEN) { - unix_state_lock(sk); + unix_state_lock_nested(sk, U_LOCK_GC_LISTENER); unix_state_unlock(sk); } } -- cgit v1.2.3 From 66e13b615a0ce76b785d780ecc9776ba71983629 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Wed, 24 Apr 2024 10:02:08 +0000 Subject: bpf: verifier: prevent userspace memory access With BPF_PROBE_MEM, BPF allows de-referencing an untrusted pointer. To thwart invalid memory accesses, the JITs add an exception table entry for all such accesses. But in case the src_reg + offset is a userspace address, the BPF program might read that memory if the user has mapped it. Make the verifier add guard instructions around such memory accesses and skip the load if the address falls into the userspace region. The JITs need to implement bpf_arch_uaddress_limit() to define where the userspace addresses end for that architecture or TASK_SIZE is taken as default. The implementation is as follows: REG_AX = SRC_REG if(offset) REG_AX += offset; REG_AX >>= 32; if (REG_AX <= (uaddress_limit >> 32)) DST_REG = 0; else DST_REG = *(size *)(SRC_REG + offset); Comparing just the upper 32 bits of the load address with the upper 32 bits of uaddress_limit implies that the values are being aligned down to a 4GB boundary before comparison. The above means that all loads with address <= uaddress_limit + 4GB are skipped. This is acceptable because there is a large hole (much larger than 4GB) between userspace and kernel space memory, therefore a correctly functioning BPF program should not access this 4GB memory above the userspace. Let's analyze what this patch does to the following fentry program dereferencing an untrusted pointer: SEC("fentry/tcp_v4_connect") int BPF_PROG(fentry_tcp_v4_connect, struct sock *sk) { *(volatile long *)sk; return 0; } BPF Program before | BPF Program after ------------------ | ----------------- 0: (79) r1 = *(u64 *)(r1 +0) 0: (79) r1 = *(u64 *)(r1 +0) ----------------------------------------------------------------------- 1: (79) r1 = *(u64 *)(r1 +0) --\ 1: (bf) r11 = r1 ----------------------------\ \ 2: (77) r11 >>= 32 2: (b7) r0 = 0 \ \ 3: (b5) if r11 <= 0x8000 goto pc+2 3: (95) exit \ \-> 4: (79) r1 = *(u64 *)(r1 +0) \ 5: (05) goto pc+1 \ 6: (b7) r1 = 0 \-------------------------------------- 7: (b7) r0 = 0 8: (95) exit As you can see from above, in the best case (off=0), 5 extra instructions are emitted. Now, we analyze the same program after it has gone through the JITs of ARM64 and RISC-V architectures. We follow the single load instruction that has the untrusted pointer and see what instrumentation has been added around it. x86-64 JIT ========== JIT's Instrumentation (upstream) --------------------- 0: nopl 0x0(%rax,%rax,1) 5: xchg %ax,%ax 7: push %rbp 8: mov %rsp,%rbp b: mov 0x0(%rdi),%rdi --------------------------------- f: movabs $0x800000000000,%r11 19: cmp %r11,%rdi 1c: jb 0x000000000000002a 1e: mov %rdi,%r11 21: add $0x0,%r11 28: jae 0x000000000000002e 2a: xor %edi,%edi 2c: jmp 0x0000000000000032 2e: mov 0x0(%rdi),%rdi --------------------------------- 32: xor %eax,%eax 34: leave 35: ret The x86-64 JIT already emits some instructions to protect against user memory access. This patch doesn't make any changes for the x86-64 JIT. ARM64 JIT ========= No Intrumentation Verifier's Instrumentation (upstream) (This patch) ----------------- -------------------------- 0: add x9, x30, #0x0 0: add x9, x30, #0x0 4: nop 4: nop 8: paciasp 8: paciasp c: stp x29, x30, [sp, #-16]! c: stp x29, x30, [sp, #-16]! 10: mov x29, sp 10: mov x29, sp 14: stp x19, x20, [sp, #-16]! 14: stp x19, x20, [sp, #-16]! 18: stp x21, x22, [sp, #-16]! 18: stp x21, x22, [sp, #-16]! 1c: stp x25, x26, [sp, #-16]! 1c: stp x25, x26, [sp, #-16]! 20: stp x27, x28, [sp, #-16]! 20: stp x27, x28, [sp, #-16]! 24: mov x25, sp 24: mov x25, sp 28: mov x26, #0x0 28: mov x26, #0x0 2c: sub x27, x25, #0x0 2c: sub x27, x25, #0x0 30: sub sp, sp, #0x0 30: sub sp, sp, #0x0 34: ldr x0, [x0] 34: ldr x0, [x0] -------------------------------------------------------------------------------- 38: ldr x0, [x0] ----------\ 38: add x9, x0, #0x0 -----------------------------------\\ 3c: lsr x9, x9, #32 3c: mov x7, #0x0 \\ 40: cmp x9, #0x10, lsl #12 40: mov sp, sp \\ 44: b.ls 0x0000000000000050 44: ldp x27, x28, [sp], #16 \\--> 48: ldr x0, [x0] 48: ldp x25, x26, [sp], #16 \ 4c: b 0x0000000000000054 4c: ldp x21, x22, [sp], #16 \ 50: mov x0, #0x0 50: ldp x19, x20, [sp], #16 \--------------------------------------- 54: ldp x29, x30, [sp], #16 54: mov x7, #0x0 58: add x0, x7, #0x0 58: mov sp, sp 5c: autiasp 5c: ldp x27, x28, [sp], #16 60: ret 60: ldp x25, x26, [sp], #16 64: nop 64: ldp x21, x22, [sp], #16 68: ldr x10, 0x0000000000000070 68: ldp x19, x20, [sp], #16 6c: br x10 6c: ldp x29, x30, [sp], #16 70: add x0, x7, #0x0 74: autiasp 78: ret 7c: nop 80: ldr x10, 0x0000000000000088 84: br x10 There are 6 extra instructions added in ARM64 in the best case. This will become 7 in the worst case (off != 0). RISC-V JIT (RISCV_ISA_C Disabled) ========== No Intrumentation Verifier's Instrumentation (upstream) (This patch) ----------------- -------------------------- 0: nop 0: nop 4: nop 4: nop 8: li a6, 33 8: li a6, 33 c: addi sp, sp, -16 c: addi sp, sp, -16 10: sd s0, 8(sp) 10: sd s0, 8(sp) 14: addi s0, sp, 16 14: addi s0, sp, 16 18: ld a0, 0(a0) 18: ld a0, 0(a0) --------------------------------------------------------------- 1c: ld a0, 0(a0) --\ 1c: mv t0, a0 --------------------------\ \ 20: srli t0, t0, 32 20: li a5, 0 \ \ 24: lui t1, 4096 24: ld s0, 8(sp) \ \ 28: sext.w t1, t1 28: addi sp, sp, 16 \ \ 2c: bgeu t1, t0, 12 2c: sext.w a0, a5 \ \--> 30: ld a0, 0(a0) 30: ret \ 34: j 8 \ 38: li a0, 0 \------------------------------ 3c: li a5, 0 40: ld s0, 8(sp) 44: addi sp, sp, 16 48: sext.w a0, a5 4c: ret There are 7 extra instructions added in RISC-V. Fixes: 800834285361 ("bpf, arm64: Add BPF exception tables") Reported-by: Breno Leitao Suggested-by: Alexei Starovoitov Acked-by: Ilya Leoshkevich Signed-off-by: Puranjay Mohan Link: https://lore.kernel.org/r/20240424100210.11982-2-puranjay@kernel.org Signed-off-by: Alexei Starovoitov --- arch/x86/net/bpf_jit_comp.c | 6 ++++++ include/linux/filter.h | 1 + kernel/bpf/core.c | 9 +++++++++ kernel/bpf/verifier.c | 30 ++++++++++++++++++++++++++++++ 4 files changed, 46 insertions(+) (limited to 'include') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index df5fac428408..b520b66ad14b 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -3473,3 +3473,9 @@ bool bpf_jit_supports_ptr_xchg(void) { return true; } + +/* x86-64 JIT emits its own code to filter user addresses so return 0 here */ +u64 bpf_arch_uaddress_limit(void) +{ + return 0; +} diff --git a/include/linux/filter.h b/include/linux/filter.h index c99bc3df2d28..219ee7a76874 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -963,6 +963,7 @@ bool bpf_jit_supports_far_kfunc_call(void); bool bpf_jit_supports_exceptions(void); bool bpf_jit_supports_ptr_xchg(void); bool bpf_jit_supports_arena(void); +u64 bpf_arch_uaddress_limit(void); void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie); bool bpf_helper_changes_pkt_data(void *func); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 696bc55de8e8..1ea5ce5bb599 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2942,6 +2942,15 @@ bool __weak bpf_jit_supports_arena(void) return false; } +u64 __weak bpf_arch_uaddress_limit(void) +{ +#if defined(CONFIG_64BIT) && defined(CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE) + return TASK_SIZE; +#else + return 0; +#endif +} + /* Return TRUE if the JIT backend satisfies the following two conditions: * 1) JIT backend supports atomic_xchg() on pointer-sized words. * 2) Under the specific arch, the implementation of xchg() is the same diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index aa24beb65393..cb7ad1f795e1 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -19675,6 +19675,36 @@ static int do_misc_fixups(struct bpf_verifier_env *env) goto next_insn; } + /* Make it impossible to de-reference a userspace address */ + if (BPF_CLASS(insn->code) == BPF_LDX && + (BPF_MODE(insn->code) == BPF_PROBE_MEM || + BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) { + struct bpf_insn *patch = &insn_buf[0]; + u64 uaddress_limit = bpf_arch_uaddress_limit(); + + if (!uaddress_limit) + goto next_insn; + + *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg); + if (insn->off) + *patch++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_AX, insn->off); + *patch++ = BPF_ALU64_IMM(BPF_RSH, BPF_REG_AX, 32); + *patch++ = BPF_JMP_IMM(BPF_JLE, BPF_REG_AX, uaddress_limit >> 32, 2); + *patch++ = *insn; + *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); + *patch++ = BPF_MOV64_IMM(insn->dst_reg, 0); + + cnt = patch - insn_buf; + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + goto next_insn; + } + /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */ if (BPF_CLASS(insn->code) == BPF_LD && (BPF_MODE(insn->code) == BPF_ABS || -- cgit v1.2.3 From 2e5449f4f21a1b0bd9beec4c4b580eb1f9b9ed7f Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 27 Apr 2024 15:27:58 +0900 Subject: profiling: Remove create_prof_cpu_mask(). create_prof_cpu_mask() is no longer used after commit 1f44a225777e ("s390: convert interrupt handling to use generic hardirq"). Signed-off-by: Tetsuo Handa Signed-off-by: Linus Torvalds --- include/linux/profile.h | 5 ----- kernel/profile.c | 43 ------------------------------------------- 2 files changed, 48 deletions(-) (limited to 'include') diff --git a/include/linux/profile.h b/include/linux/profile.h index 11db1ec516e2..04ae5ebcb637 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -18,13 +18,8 @@ struct proc_dir_entry; struct notifier_block; #if defined(CONFIG_PROFILING) && defined(CONFIG_PROC_FS) -void create_prof_cpu_mask(void); int create_proc_profile(void); #else -static inline void create_prof_cpu_mask(void) -{ -} - static inline int create_proc_profile(void) { return 0; diff --git a/kernel/profile.c b/kernel/profile.c index 8a77769bc4b4..2b775cc5c28f 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -344,49 +344,6 @@ void profile_tick(int type) #include #include -static int prof_cpu_mask_proc_show(struct seq_file *m, void *v) -{ - seq_printf(m, "%*pb\n", cpumask_pr_args(prof_cpu_mask)); - return 0; -} - -static int prof_cpu_mask_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, prof_cpu_mask_proc_show, NULL); -} - -static ssize_t prof_cpu_mask_proc_write(struct file *file, - const char __user *buffer, size_t count, loff_t *pos) -{ - cpumask_var_t new_value; - int err; - - if (!zalloc_cpumask_var(&new_value, GFP_KERNEL)) - return -ENOMEM; - - err = cpumask_parse_user(buffer, count, new_value); - if (!err) { - cpumask_copy(prof_cpu_mask, new_value); - err = count; - } - free_cpumask_var(new_value); - return err; -} - -static const struct proc_ops prof_cpu_mask_proc_ops = { - .proc_open = prof_cpu_mask_proc_open, - .proc_read = seq_read, - .proc_lseek = seq_lseek, - .proc_release = single_release, - .proc_write = prof_cpu_mask_proc_write, -}; - -void create_prof_cpu_mask(void) -{ - /* create /proc/irq/prof_cpu_mask */ - proc_create("irq/prof_cpu_mask", 0600, NULL, &prof_cpu_mask_proc_ops); -} - /* * This function accesses profiling information. The returned data is * binary: the sampling step and the actual contents of the profile -- cgit v1.2.3 From f848337cd801c7106a4ec0d61765771dab2a5909 Mon Sep 17 00:00:00 2001 From: Oswald Buddenhagen Date: Sun, 28 Apr 2024 11:37:13 +0200 Subject: ALSA: emu10k1: move the whole GPIO event handling to the workqueue The actual event processing was already done by workqueue items. We can move the event dispatching there as well, rather than doing it already in the interrupt handler callback. This change has a rather profound "side effect" on the reliability of the FPGA programming: once we enter programming mode, we must not issue any snd_emu1010_fpga_{read,write}() calls until we're done, as these would badly mess up the programming protocol. But exactly that would happen when trying to program the dock, as that triggers GPIO interrupts as a side effect. This is mitigated by deferring the actual interrupt handling, as workqueue items are not re-entrant. To avoid scheduling the dispatcher on non-events, we now explicitly ignore GPIO IRQs triggered by "uninteresting" pins, which happens a lot as a side effect of calling snd_emu1010_fpga_{read,write}(). Fixes: fbb64eedf5a3 ("ALSA: emu10k1: make E-MU dock monitoring interrupt-driven") Link: https://bugzilla.kernel.org/show_bug.cgi?id=218584 Signed-off-by: Oswald Buddenhagen Signed-off-by: Takashi Iwai Message-ID: <20240428093716.3198666-4-oswald.buddenhagen@gmx.de> --- include/sound/emu10k1.h | 3 +-- sound/pci/emu10k1/emu10k1.c | 3 +-- sound/pci/emu10k1/emu10k1_main.c | 56 ++++++++++++++++++++-------------------- 3 files changed, 30 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h index 1af9e6819392..9cc10fab01a8 100644 --- a/include/sound/emu10k1.h +++ b/include/sound/emu10k1.h @@ -1684,8 +1684,7 @@ struct snd_emu1010 { unsigned int clock_fallback; unsigned int optical_in; /* 0:SPDIF, 1:ADAT */ unsigned int optical_out; /* 0:SPDIF, 1:ADAT */ - struct work_struct firmware_work; - struct work_struct clock_work; + struct work_struct work; }; struct snd_emu10k1 { diff --git a/sound/pci/emu10k1/emu10k1.c b/sound/pci/emu10k1/emu10k1.c index fe72e7d77241..dadeda7758ce 100644 --- a/sound/pci/emu10k1/emu10k1.c +++ b/sound/pci/emu10k1/emu10k1.c @@ -189,8 +189,7 @@ static int snd_emu10k1_suspend(struct device *dev) emu->suspend = 1; - cancel_work_sync(&emu->emu1010.firmware_work); - cancel_work_sync(&emu->emu1010.clock_work); + cancel_work_sync(&emu->emu1010.work); snd_ac97_suspend(emu->ac97); diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c index 6265fc9ae260..86eaf5963502 100644 --- a/sound/pci/emu10k1/emu10k1_main.c +++ b/sound/pci/emu10k1/emu10k1_main.c @@ -765,19 +765,10 @@ static void snd_emu1010_load_dock_firmware(struct snd_emu10k1 *emu) msleep(10); } -static void emu1010_firmware_work(struct work_struct *work) +static void emu1010_dock_event(struct snd_emu10k1 *emu) { - struct snd_emu10k1 *emu; u32 reg; - emu = container_of(work, struct snd_emu10k1, - emu1010.firmware_work); - if (emu->card->shutdown) - return; -#ifdef CONFIG_PM_SLEEP - if (emu->suspend) - return; -#endif snd_emu1010_fpga_read(emu, EMU_HANA_OPTION_CARDS, ®); /* OPTIONS: Which cards are attached to the EMU */ if (reg & EMU_HANA_OPTION_DOCK_OFFLINE) { /* Audio Dock attached */ @@ -792,20 +783,10 @@ static void emu1010_firmware_work(struct work_struct *work) } } -static void emu1010_clock_work(struct work_struct *work) +static void emu1010_clock_event(struct snd_emu10k1 *emu) { - struct snd_emu10k1 *emu; struct snd_ctl_elem_id id; - emu = container_of(work, struct snd_emu10k1, - emu1010.clock_work); - if (emu->card->shutdown) - return; -#ifdef CONFIG_PM_SLEEP - if (emu->suspend) - return; -#endif - spin_lock_irq(&emu->reg_lock); // This is the only thing that can actually happen. emu->emu1010.clock_source = emu->emu1010.clock_fallback; @@ -816,19 +797,40 @@ static void emu1010_clock_work(struct work_struct *work) snd_ctl_notify(emu->card, SNDRV_CTL_EVENT_MASK_VALUE, &id); } -static void emu1010_interrupt(struct snd_emu10k1 *emu) +static void emu1010_work(struct work_struct *work) { + struct snd_emu10k1 *emu; u32 sts; + emu = container_of(work, struct snd_emu10k1, emu1010.work); + if (emu->card->shutdown) + return; +#ifdef CONFIG_PM_SLEEP + if (emu->suspend) + return; +#endif + snd_emu1010_fpga_read(emu, EMU_HANA_IRQ_STATUS, &sts); // The distinction of the IRQ status bits is unreliable, // so we dispatch later based on option card status. if (sts & (EMU_HANA_IRQ_DOCK | EMU_HANA_IRQ_DOCK_LOST)) - schedule_work(&emu->emu1010.firmware_work); + emu1010_dock_event(emu); if (sts & EMU_HANA_IRQ_WCLK_CHANGED) - schedule_work(&emu->emu1010.clock_work); + emu1010_clock_event(emu); +} + +static void emu1010_interrupt(struct snd_emu10k1 *emu) +{ + // We get an interrupt on each GPIO input pin change, but we + // care only about the ones triggered by the dedicated pin. + u16 sts = inw(emu->port + A_GPIO); + u16 bit = emu->card_capabilities->ca0108_chip ? 0x2000 : 0x8000; + if (!(sts & bit)) + return; + + schedule_work(&emu->emu1010.work); } /* @@ -969,8 +971,7 @@ static void snd_emu10k1_free(struct snd_card *card) /* Disable 48Volt power to Audio Dock */ snd_emu1010_fpga_write(emu, EMU_HANA_DOCK_PWR, 0); } - cancel_work_sync(&emu->emu1010.firmware_work); - cancel_work_sync(&emu->emu1010.clock_work); + cancel_work_sync(&emu->emu1010.work); release_firmware(emu->firmware); release_firmware(emu->dock_fw); snd_util_memhdr_free(emu->memhdr); @@ -1549,8 +1550,7 @@ int snd_emu10k1_create(struct snd_card *card, emu->irq = -1; emu->synth = NULL; emu->get_synth_voice = NULL; - INIT_WORK(&emu->emu1010.firmware_work, emu1010_firmware_work); - INIT_WORK(&emu->emu1010.clock_work, emu1010_clock_work); + INIT_WORK(&emu->emu1010.work, emu1010_work); /* read revision & serial */ emu->revision = pci->revision; pci_read_config_dword(pci, PCI_SUBSYSTEM_VENDOR_ID, &emu->serial); -- cgit v1.2.3 From 2d3f4810886eb7c319cec41b6d725d2953bfa88a Mon Sep 17 00:00:00 2001 From: Oswald Buddenhagen Date: Sun, 28 Apr 2024 11:37:14 +0200 Subject: ALSA: emu10k1: use mutex for E-MU FPGA access locking The FPGA access through the GPIO port does not interfere with other sound processor register access, so there is no need to subject it to emu_lock. And after moving all FPGA access out of the interrupt handler, it does not need to be IRQ-safe, either. What's more, attaching the dock causes a firmware upload, which takes several seconds. We really don't want to disable IRQs for this long, and even less also have someone else spin with IRQs disabled waiting for us. Therefore, use a mutex for FPGA access locking. This makes the code somewhat more noisy, as we need to wrap bigger sections into the mutex, as it needs to enclose the spinlocks. The latter has the "side effect" of fixing dock FPGA programming in a corner case: a really badly timed mixer access right between entering FPGA programming mode and uploading the netlist would mess up the protocol. Signed-off-by: Oswald Buddenhagen Signed-off-by: Takashi Iwai Message-ID: <20240428093716.3198666-5-oswald.buddenhagen@gmx.de> --- include/sound/emu10k1.h | 4 ++++ sound/pci/emu10k1/emu10k1_main.c | 19 ++++++++++++---- sound/pci/emu10k1/emumixer.c | 18 ++++++++++----- sound/pci/emu10k1/emuproc.c | 9 ++++++++ sound/pci/emu10k1/io.c | 48 +++++++++++++++++----------------------- 5 files changed, 61 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h index 9cc10fab01a8..234b5baea69c 100644 --- a/include/sound/emu10k1.h +++ b/include/sound/emu10k1.h @@ -1685,6 +1685,7 @@ struct snd_emu1010 { unsigned int optical_in; /* 0:SPDIF, 1:ADAT */ unsigned int optical_out; /* 0:SPDIF, 1:ADAT */ struct work_struct work; + struct mutex lock; }; struct snd_emu10k1 { @@ -1833,6 +1834,9 @@ unsigned int snd_emu10k1_ptr20_read(struct snd_emu10k1 * emu, unsigned int reg, void snd_emu10k1_ptr20_write(struct snd_emu10k1 *emu, unsigned int reg, unsigned int chn, unsigned int data); int snd_emu10k1_spi_write(struct snd_emu10k1 * emu, unsigned int data); int snd_emu10k1_i2c_write(struct snd_emu10k1 *emu, u32 reg, u32 value); +static inline void snd_emu1010_fpga_lock(struct snd_emu10k1 *emu) { mutex_lock(&emu->emu1010.lock); }; +static inline void snd_emu1010_fpga_unlock(struct snd_emu10k1 *emu) { mutex_unlock(&emu->emu1010.lock); }; +void snd_emu1010_fpga_write_lock(struct snd_emu10k1 *emu, u32 reg, u32 value); void snd_emu1010_fpga_write(struct snd_emu10k1 *emu, u32 reg, u32 value); void snd_emu1010_fpga_read(struct snd_emu10k1 *emu, u32 reg, u32 *value); void snd_emu1010_fpga_link_dst_src_write(struct snd_emu10k1 *emu, u32 dst, u32 src); diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c index 86eaf5963502..1a2905e8672b 100644 --- a/sound/pci/emu10k1/emu10k1_main.c +++ b/sound/pci/emu10k1/emu10k1_main.c @@ -810,6 +810,8 @@ static void emu1010_work(struct work_struct *work) return; #endif + snd_emu1010_fpga_lock(emu); + snd_emu1010_fpga_read(emu, EMU_HANA_IRQ_STATUS, &sts); // The distinction of the IRQ status bits is unreliable, @@ -819,6 +821,8 @@ static void emu1010_work(struct work_struct *work) if (sts & EMU_HANA_IRQ_WCLK_CHANGED) emu1010_clock_event(emu); + + snd_emu1010_fpga_unlock(emu); } static void emu1010_interrupt(struct snd_emu10k1 *emu) @@ -852,6 +856,8 @@ static int snd_emu10k1_emu1010_init(struct snd_emu10k1 *emu) * Proper init follows in snd_emu10k1_init(). */ outl(HCFG_LOCKSOUNDCACHE | HCFG_LOCKTANKCACHE_MASK, emu->port + HCFG); + snd_emu1010_fpga_lock(emu); + /* Disable 48Volt power to Audio Dock */ snd_emu1010_fpga_write(emu, EMU_HANA_DOCK_PWR, 0); @@ -877,7 +883,7 @@ static int snd_emu10k1_emu1010_init(struct snd_emu10k1 *emu) err = snd_emu1010_load_firmware(emu, 0, &emu->firmware); if (err < 0) { dev_info(emu->card->dev, "emu1010: Loading Firmware failed\n"); - return err; + goto fail; } /* ID, should read & 0x7f = 0x55 when FPGA programmed. */ @@ -887,7 +893,8 @@ static int snd_emu10k1_emu1010_init(struct snd_emu10k1 *emu) dev_info(emu->card->dev, "emu1010: Loading Hana Firmware file failed, reg = 0x%x\n", reg); - return -ENODEV; + err = -ENODEV; + goto fail; } dev_info(emu->card->dev, "emu1010: Hana Firmware loaded\n"); @@ -947,7 +954,9 @@ static int snd_emu10k1_emu1010_init(struct snd_emu10k1 *emu) // so it is safe to simply enable the outputs. snd_emu1010_fpga_write(emu, EMU_HANA_UNMUTE, EMU_UNMUTE); - return 0; +fail: + snd_emu1010_fpga_unlock(emu); + return err; } /* * Create the EMU10K1 instance @@ -969,9 +978,10 @@ static void snd_emu10k1_free(struct snd_card *card) } if (emu->card_capabilities->emu_model == EMU_MODEL_EMU1010) { /* Disable 48Volt power to Audio Dock */ - snd_emu1010_fpga_write(emu, EMU_HANA_DOCK_PWR, 0); + snd_emu1010_fpga_write_lock(emu, EMU_HANA_DOCK_PWR, 0); } cancel_work_sync(&emu->emu1010.work); + mutex_destroy(&emu->emu1010.lock); release_firmware(emu->firmware); release_firmware(emu->dock_fw); snd_util_memhdr_free(emu->memhdr); @@ -1551,6 +1561,7 @@ int snd_emu10k1_create(struct snd_card *card, emu->synth = NULL; emu->get_synth_voice = NULL; INIT_WORK(&emu->emu1010.work, emu1010_work); + mutex_init(&emu->emu1010.lock); /* read revision & serial */ emu->revision = pci->revision; pci_read_config_dword(pci, PCI_SUBSYSTEM_VENDOR_ID, &emu->serial); diff --git a/sound/pci/emu10k1/emumixer.c b/sound/pci/emu10k1/emumixer.c index 0a32ea53d8c6..05b98d9b547b 100644 --- a/sound/pci/emu10k1/emumixer.c +++ b/sound/pci/emu10k1/emumixer.c @@ -661,7 +661,9 @@ static int snd_emu1010_output_source_put(struct snd_kcontrol *kcontrol, change = (emu->emu1010.output_source[channel] != val); if (change) { emu->emu1010.output_source[channel] = val; + snd_emu1010_fpga_lock(emu); snd_emu1010_output_source_apply(emu, channel, val); + snd_emu1010_fpga_unlock(emu); } return change; } @@ -705,7 +707,9 @@ static int snd_emu1010_input_source_put(struct snd_kcontrol *kcontrol, change = (emu->emu1010.input_source[channel] != val); if (change) { emu->emu1010.input_source[channel] = val; + snd_emu1010_fpga_lock(emu); snd_emu1010_input_source_apply(emu, channel, val); + snd_emu1010_fpga_unlock(emu); } return change; } @@ -774,7 +778,7 @@ static int snd_emu1010_adc_pads_put(struct snd_kcontrol *kcontrol, struct snd_ct cache = cache & ~mask; change = (cache != emu->emu1010.adc_pads); if (change) { - snd_emu1010_fpga_write(emu, EMU_HANA_ADC_PADS, cache ); + snd_emu1010_fpga_write_lock(emu, EMU_HANA_ADC_PADS, cache ); emu->emu1010.adc_pads = cache; } @@ -832,7 +836,7 @@ static int snd_emu1010_dac_pads_put(struct snd_kcontrol *kcontrol, struct snd_ct cache = cache & ~mask; change = (cache != emu->emu1010.dac_pads); if (change) { - snd_emu1010_fpga_write(emu, EMU_HANA_DAC_PADS, cache ); + snd_emu1010_fpga_write_lock(emu, EMU_HANA_DAC_PADS, cache ); emu->emu1010.dac_pads = cache; } @@ -980,6 +984,7 @@ static int snd_emu1010_clock_source_put(struct snd_kcontrol *kcontrol, val = ucontrol->value.enumerated.item[0] ; if (val >= emu_ci->num) return -EINVAL; + snd_emu1010_fpga_lock(emu); spin_lock_irq(&emu->reg_lock); change = (emu->emu1010.clock_source != val); if (change) { @@ -996,6 +1001,7 @@ static int snd_emu1010_clock_source_put(struct snd_kcontrol *kcontrol, } else { spin_unlock_irq(&emu->reg_lock); } + snd_emu1010_fpga_unlock(emu); return change; } @@ -1041,7 +1047,7 @@ static int snd_emu1010_clock_fallback_put(struct snd_kcontrol *kcontrol, change = (emu->emu1010.clock_fallback != val); if (change) { emu->emu1010.clock_fallback = val; - snd_emu1010_fpga_write(emu, EMU_HANA_DEFCLOCK, 1 - val); + snd_emu1010_fpga_write_lock(emu, EMU_HANA_DEFCLOCK, 1 - val); } return change; } @@ -1093,7 +1099,7 @@ static int snd_emu1010_optical_out_put(struct snd_kcontrol *kcontrol, emu->emu1010.optical_out = val; tmp = (emu->emu1010.optical_in ? EMU_HANA_OPTICAL_IN_ADAT : EMU_HANA_OPTICAL_IN_SPDIF) | (emu->emu1010.optical_out ? EMU_HANA_OPTICAL_OUT_ADAT : EMU_HANA_OPTICAL_OUT_SPDIF); - snd_emu1010_fpga_write(emu, EMU_HANA_OPTICAL_TYPE, tmp); + snd_emu1010_fpga_write_lock(emu, EMU_HANA_OPTICAL_TYPE, tmp); } return change; } @@ -1144,7 +1150,7 @@ static int snd_emu1010_optical_in_put(struct snd_kcontrol *kcontrol, emu->emu1010.optical_in = val; tmp = (emu->emu1010.optical_in ? EMU_HANA_OPTICAL_IN_ADAT : EMU_HANA_OPTICAL_IN_SPDIF) | (emu->emu1010.optical_out ? EMU_HANA_OPTICAL_OUT_ADAT : EMU_HANA_OPTICAL_OUT_SPDIF); - snd_emu1010_fpga_write(emu, EMU_HANA_OPTICAL_TYPE, tmp); + snd_emu1010_fpga_write_lock(emu, EMU_HANA_OPTICAL_TYPE, tmp); } return change; } @@ -2323,7 +2329,9 @@ int snd_emu10k1_mixer(struct snd_emu10k1 *emu, for (i = 0; i < emu_ri->n_outs; i++) emu->emu1010.output_source[i] = emu1010_map_source(emu_ri, emu_ri->out_dflts[i]); + snd_emu1010_fpga_lock(emu); snd_emu1010_apply_sources(emu); + snd_emu1010_fpga_unlock(emu); kctl = emu->ctl_clock_source = snd_ctl_new1(&snd_emu1010_clock_source, emu); err = snd_ctl_add(card, kctl); diff --git a/sound/pci/emu10k1/emuproc.c b/sound/pci/emu10k1/emuproc.c index 2f80fd91017c..737c28d31b41 100644 --- a/sound/pci/emu10k1/emuproc.c +++ b/sound/pci/emu10k1/emuproc.c @@ -165,6 +165,8 @@ static void snd_emu10k1_proc_spdif_read(struct snd_info_entry *entry, u32 value2; if (emu->card_capabilities->emu_model) { + snd_emu1010_fpga_lock(emu); + // This represents the S/PDIF lock status on 0404b, which is // kinda weird and unhelpful, because monitoring it via IRQ is // impractical (one gets an IRQ flood as long as it is desynced). @@ -197,6 +199,8 @@ static void snd_emu10k1_proc_spdif_read(struct snd_info_entry *entry, snd_iprintf(buffer, "\nS/PDIF mode: %s%s\n", value & EMU_HANA_SPDIF_MODE_RX_PRO ? "professional" : "consumer", value & EMU_HANA_SPDIF_MODE_RX_NOCOPY ? ", no copy" : ""); + + snd_emu1010_fpga_unlock(emu); } else { snd_emu10k1_proc_spdif_status(emu, buffer, "CD-ROM S/PDIF In", CDCS, CDSRCS); snd_emu10k1_proc_spdif_status(emu, buffer, "Optical or Coax S/PDIF In", GPSCS, GPSRCS); @@ -458,6 +462,9 @@ static void snd_emu_proc_emu1010_reg_read(struct snd_info_entry *entry, struct snd_emu10k1 *emu = entry->private_data; u32 value; int i; + + snd_emu1010_fpga_lock(emu); + snd_iprintf(buffer, "EMU1010 Registers:\n\n"); for(i = 0; i < 0x40; i+=1) { @@ -496,6 +503,8 @@ static void snd_emu_proc_emu1010_reg_read(struct snd_info_entry *entry, snd_emu_proc_emu1010_link_read(emu, buffer, 0x701); } } + + snd_emu1010_fpga_unlock(emu); } static void snd_emu_proc_io_reg_read(struct snd_info_entry *entry, diff --git a/sound/pci/emu10k1/io.c b/sound/pci/emu10k1/io.c index 74df2330015f..f3260a81e47b 100644 --- a/sound/pci/emu10k1/io.c +++ b/sound/pci/emu10k1/io.c @@ -289,20 +289,28 @@ static void snd_emu1010_fpga_write_locked(struct snd_emu10k1 *emu, u32 reg, u32 void snd_emu1010_fpga_write(struct snd_emu10k1 *emu, u32 reg, u32 value) { - unsigned long flags; + if (snd_BUG_ON(!mutex_is_locked(&emu->emu1010.lock))) + return; + snd_emu1010_fpga_write_locked(emu, reg, value); +} - spin_lock_irqsave(&emu->emu_lock, flags); +void snd_emu1010_fpga_write_lock(struct snd_emu10k1 *emu, u32 reg, u32 value) +{ + snd_emu1010_fpga_lock(emu); snd_emu1010_fpga_write_locked(emu, reg, value); - spin_unlock_irqrestore(&emu->emu_lock, flags); + snd_emu1010_fpga_unlock(emu); } -static void snd_emu1010_fpga_read_locked(struct snd_emu10k1 *emu, u32 reg, u32 *value) +void snd_emu1010_fpga_read(struct snd_emu10k1 *emu, u32 reg, u32 *value) { // The higest input pin is used as the designated interrupt trigger, // so it needs to be masked out. // But note that any other input pin change will also cause an IRQ, // so using this function often causes an IRQ as a side effect. u32 mask = emu->card_capabilities->ca0108_chip ? 0x1f : 0x7f; + + if (snd_BUG_ON(!mutex_is_locked(&emu->emu1010.lock))) + return; if (snd_BUG_ON(reg > 0x3f)) return; reg += 0x40; /* 0x40 upwards are registers. */ @@ -313,47 +321,31 @@ static void snd_emu1010_fpga_read_locked(struct snd_emu10k1 *emu, u32 reg, u32 * *value = ((inw(emu->port + A_GPIO) >> 8) & mask); } -void snd_emu1010_fpga_read(struct snd_emu10k1 *emu, u32 reg, u32 *value) -{ - unsigned long flags; - - spin_lock_irqsave(&emu->emu_lock, flags); - snd_emu1010_fpga_read_locked(emu, reg, value); - spin_unlock_irqrestore(&emu->emu_lock, flags); -} - /* Each Destination has one and only one Source, * but one Source can feed any number of Destinations simultaneously. */ void snd_emu1010_fpga_link_dst_src_write(struct snd_emu10k1 *emu, u32 dst, u32 src) { - unsigned long flags; - if (snd_BUG_ON(dst & ~0x71f)) return; if (snd_BUG_ON(src & ~0x71f)) return; - spin_lock_irqsave(&emu->emu_lock, flags); - snd_emu1010_fpga_write_locked(emu, EMU_HANA_DESTHI, dst >> 8); - snd_emu1010_fpga_write_locked(emu, EMU_HANA_DESTLO, dst & 0x1f); - snd_emu1010_fpga_write_locked(emu, EMU_HANA_SRCHI, src >> 8); - snd_emu1010_fpga_write_locked(emu, EMU_HANA_SRCLO, src & 0x1f); - spin_unlock_irqrestore(&emu->emu_lock, flags); + snd_emu1010_fpga_write(emu, EMU_HANA_DESTHI, dst >> 8); + snd_emu1010_fpga_write(emu, EMU_HANA_DESTLO, dst & 0x1f); + snd_emu1010_fpga_write(emu, EMU_HANA_SRCHI, src >> 8); + snd_emu1010_fpga_write(emu, EMU_HANA_SRCLO, src & 0x1f); } u32 snd_emu1010_fpga_link_dst_src_read(struct snd_emu10k1 *emu, u32 dst) { - unsigned long flags; u32 hi, lo; if (snd_BUG_ON(dst & ~0x71f)) return 0; - spin_lock_irqsave(&emu->emu_lock, flags); - snd_emu1010_fpga_write_locked(emu, EMU_HANA_DESTHI, dst >> 8); - snd_emu1010_fpga_write_locked(emu, EMU_HANA_DESTLO, dst & 0x1f); - snd_emu1010_fpga_read_locked(emu, EMU_HANA_SRCHI, &hi); - snd_emu1010_fpga_read_locked(emu, EMU_HANA_SRCLO, &lo); - spin_unlock_irqrestore(&emu->emu_lock, flags); + snd_emu1010_fpga_write(emu, EMU_HANA_DESTHI, dst >> 8); + snd_emu1010_fpga_write(emu, EMU_HANA_DESTLO, dst & 0x1f); + snd_emu1010_fpga_read(emu, EMU_HANA_SRCHI, &hi); + snd_emu1010_fpga_read(emu, EMU_HANA_SRCLO, &lo); return (hi << 8) | lo; } -- cgit v1.2.3 From 5ef31ea5d053a8f493a772ebad3f3ce82c35d845 Mon Sep 17 00:00:00 2001 From: Richard Gobert Date: Tue, 30 Apr 2024 16:35:54 +0200 Subject: net: gro: fix udp bad offset in socket lookup by adding {inner_}network_offset to napi_gro_cb Commits a602456 ("udp: Add GRO functions to UDP socket") and 57c67ff ("udp: additional GRO support") introduce incorrect usage of {ip,ipv6}_hdr in the complete phase of gro. The functions always return skb->network_header, which in the case of encapsulated packets at the gro complete phase, is always set to the innermost L3 of the packet. That means that calling {ip,ipv6}_hdr for skbs which completed the GRO receive phase (both in gro_list and *_gro_complete) when parsing an encapsulated packet's _outer_ L3/L4 may return an unexpected value. This incorrect usage leads to a bug in GRO's UDP socket lookup. udp{4,6}_lib_lookup_skb functions use ip_hdr/ipv6_hdr respectively. These *_hdr functions return network_header which will point to the innermost L3, resulting in the wrong offset being used in __udp{4,6}_lib_lookup with encapsulated packets. This patch adds network_offset and inner_network_offset to napi_gro_cb, and makes sure both are set correctly. To fix the issue, network_offsets union is used inside napi_gro_cb, in which both the outer and the inner network offsets are saved. Reproduction example: Endpoint configuration example (fou + local address bind) # ip fou add port 6666 ipproto 4 # ip link add name tun1 type ipip remote 2.2.2.1 local 2.2.2.2 encap fou encap-dport 5555 encap-sport 6666 mode ipip # ip link set tun1 up # ip a add 1.1.1.2/24 dev tun1 Netperf TCP_STREAM result on net-next before patch is applied: net-next main, GRO enabled: $ netperf -H 1.1.1.2 -t TCP_STREAM -l 5 Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 131072 16384 16384 5.28 2.37 net-next main, GRO disabled: $ netperf -H 1.1.1.2 -t TCP_STREAM -l 5 Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 131072 16384 16384 5.01 2745.06 patch applied, GRO enabled: $ netperf -H 1.1.1.2 -t TCP_STREAM -l 5 Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 131072 16384 16384 5.01 2877.38 Fixes: a6024562ffd7 ("udp: Add GRO functions to UDP socket") Signed-off-by: Richard Gobert Reviewed-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/net/gro.h | 9 +++++++++ net/8021q/vlan_core.c | 2 ++ net/core/gro.c | 1 + net/ipv4/af_inet.c | 1 + net/ipv4/udp.c | 3 ++- net/ipv4/udp_offload.c | 3 ++- net/ipv6/ip6_offload.c | 1 + net/ipv6/udp.c | 3 ++- net/ipv6/udp_offload.c | 3 ++- 9 files changed, 22 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/gro.h b/include/net/gro.h index 50f1e403dbbb..c1d4ca0463a1 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -87,6 +87,15 @@ struct napi_gro_cb { /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; + + /* L3 offsets */ + union { + struct { + u16 network_offset; + u16 inner_network_offset; + }; + u16 network_offsets[2]; + }; }; #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index f00158234505..9404dd551dfd 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -478,6 +478,8 @@ static struct sk_buff *vlan_gro_receive(struct list_head *head, if (unlikely(!vhdr)) goto out; + NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark] = hlen; + type = vhdr->h_vlan_encapsulated_proto; ptype = gro_find_receive_by_type(type); diff --git a/net/core/gro.c b/net/core/gro.c index 83f35d99a682..c7901253a1a8 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -371,6 +371,7 @@ static inline void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff) const skb_frag_t *frag0; unsigned int headlen; + NAPI_GRO_CB(skb)->network_offset = 0; NAPI_GRO_CB(skb)->data_offset = 0; headlen = skb_headlen(skb); NAPI_GRO_CB(skb)->frag0 = skb->data; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 55bd72997b31..fafb123f798b 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1572,6 +1572,7 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb) /* The above will be needed by the transport layer if there is one * immediately following this IP hdr. */ + NAPI_GRO_CB(skb)->inner_network_offset = off; /* Note : No need to call skb_gro_postpull_rcsum() here, * as we already checked checksum over ipv4 header was 0 diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 420905be5f30..b32cf2eeeb41 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -532,7 +532,8 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport) { - const struct iphdr *iph = ip_hdr(skb); + const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; + const struct iphdr *iph = (struct iphdr *)(skb->data + offset); struct net *net = dev_net(skb->dev); int iif, sdif; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 3498dd1d0694..fd29d21d579c 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -718,7 +718,8 @@ EXPORT_SYMBOL(udp_gro_complete); INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff) { - const struct iphdr *iph = ip_hdr(skb); + const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; + const struct iphdr *iph = (struct iphdr *)(skb->data + offset); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); /* do fraglist only if there is no outer UDP encap (or we already processed it) */ diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index b41e35af69ea..c8b909a9904f 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -237,6 +237,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head, goto out; skb_set_network_header(skb, off); + NAPI_GRO_CB(skb)->inner_network_offset = off; flush += ntohs(iph->payload_len) != skb->len - hlen; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1a4cccdd40c9..8f7aa8bac1e7 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -272,7 +272,8 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport) { - const struct ipv6hdr *iph = ipv6_hdr(skb); + const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; + const struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + offset); struct net *net = dev_net(skb->dev); int iif, sdif; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index bbd347de00b4..b41152dd4246 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -164,7 +164,8 @@ flush: INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff) { - const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; + const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + offset); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); /* do fraglist only if there is no outer UDP encap (or we already processed it) */ -- cgit v1.2.3 From b63db58e2fa5d6963db9c45df88e60060f0ff35f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 2 May 2024 09:03:15 -0400 Subject: eventfs/tracing: Add callback for release of an eventfs_inode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Synthetic events create and destroy tracefs files when they are created and removed. The tracing subsystem has its own file descriptor representing the state of the events attached to the tracefs files. There's a race between the eventfs files and this file descriptor of the tracing system where the following can cause an issue: With two scripts 'A' and 'B' doing: Script 'A': echo "hello int aaa" > /sys/kernel/tracing/synthetic_events while : do echo 0 > /sys/kernel/tracing/events/synthetic/hello/enable done Script 'B': echo > /sys/kernel/tracing/synthetic_events Script 'A' creates a synthetic event "hello" and then just writes zero into its enable file. Script 'B' removes all synthetic events (including the newly created "hello" event). What happens is that the opening of the "enable" file has: { struct trace_event_file *file = inode->i_private; int ret; ret = tracing_check_open_get_tr(file->tr); [..] But deleting the events frees the "file" descriptor, and a "use after free" happens with the dereference at "file->tr". The file descriptor does have a reference counter, but there needs to be a way to decrement it from the eventfs when the eventfs_inode is removed that represents this file descriptor. Add an optional "release" callback to the eventfs_entry array structure, that gets called when the eventfs file is about to be removed. This allows for the creating on the eventfs file to increment the tracing file descriptor ref counter. When the eventfs file is deleted, it can call the release function that will call the put function for the tracing file descriptor. This will protect the tracing file from being freed while a eventfs file that references it is being opened. Link: https://lore.kernel.org/linux-trace-kernel/20240426073410.17154-1-Tze-nan.Wu@mediatek.com/ Link: https://lore.kernel.org/linux-trace-kernel/20240502090315.448cba46@gandalf.local.home Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode") Reported-by: Tze-nan wu Tested-by: Tze-nan Wu (吳澤南) Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 23 +++++++++++++++++++++-- include/linux/tracefs.h | 3 +++ kernel/trace/trace_events.c | 12 ++++++++++++ 3 files changed, 36 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 894c6ca1e500..f5510e26f0f6 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -84,10 +84,17 @@ enum { static void release_ei(struct kref *ref) { struct eventfs_inode *ei = container_of(ref, struct eventfs_inode, kref); + const struct eventfs_entry *entry; struct eventfs_root_inode *rei; WARN_ON_ONCE(!ei->is_freed); + for (int i = 0; i < ei->nr_entries; i++) { + entry = &ei->entries[i]; + if (entry->release) + entry->release(entry->name, ei->data); + } + kfree(ei->entry_attrs); kfree_const(ei->name); if (ei->is_events) { @@ -112,6 +119,18 @@ static inline void free_ei(struct eventfs_inode *ei) } } +/* + * Called when creation of an ei fails, do not call release() functions. + */ +static inline void cleanup_ei(struct eventfs_inode *ei) +{ + if (ei) { + /* Set nr_entries to 0 to prevent release() function being called */ + ei->nr_entries = 0; + free_ei(ei); + } +} + static inline struct eventfs_inode *get_ei(struct eventfs_inode *ei) { if (ei) @@ -734,7 +753,7 @@ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode /* Was the parent freed? */ if (list_empty(&ei->list)) { - free_ei(ei); + cleanup_ei(ei); ei = NULL; } return ei; @@ -835,7 +854,7 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry return ei; fail: - free_ei(ei); + cleanup_ei(ei); tracefs_failed_creating(dentry); return ERR_PTR(-ENOMEM); } diff --git a/include/linux/tracefs.h b/include/linux/tracefs.h index 7a5fe17b6bf9..d03f74658716 100644 --- a/include/linux/tracefs.h +++ b/include/linux/tracefs.h @@ -62,6 +62,8 @@ struct eventfs_file; typedef int (*eventfs_callback)(const char *name, umode_t *mode, void **data, const struct file_operations **fops); +typedef void (*eventfs_release)(const char *name, void *data); + /** * struct eventfs_entry - dynamically created eventfs file call back handler * @name: Then name of the dynamic file in an eventfs directory @@ -72,6 +74,7 @@ typedef int (*eventfs_callback)(const char *name, umode_t *mode, void **data, struct eventfs_entry { const char *name; eventfs_callback callback; + eventfs_release release; }; struct eventfs_inode; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 52f75c36bbca..6ef29eba90ce 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2552,6 +2552,14 @@ static int event_callback(const char *name, umode_t *mode, void **data, return 0; } +/* The file is incremented on creation and freeing the enable file decrements it */ +static void event_release(const char *name, void *data) +{ + struct trace_event_file *file = data; + + event_file_put(file); +} + static int event_create_dir(struct eventfs_inode *parent, struct trace_event_file *file) { @@ -2566,6 +2574,7 @@ event_create_dir(struct eventfs_inode *parent, struct trace_event_file *file) { .name = "enable", .callback = event_callback, + .release = event_release, }, { .name = "filter", @@ -2634,6 +2643,9 @@ event_create_dir(struct eventfs_inode *parent, struct trace_event_file *file) return ret; } + /* Gets decremented on freeing of the "enable" file */ + event_file_get(file); + return 0; } -- cgit v1.2.3