From aec7961916f3f9e88766e2688992da6980f11b8d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Feb 2024 17:18:19 -0800 Subject: tls: fix race between async notify and socket close The submitting thread (one which called recvmsg/sendmsg) may exit as soon as the async crypto handler calls complete() so any code past that point risks touching already freed data. Try to avoid the locking and extra flags altogether. Have the main thread hold an extra reference, this way we can depend solely on the atomic ref counter for synchronization. Don't futz with reiniting the completion, either, we are now tightly controlling when completion fires. Reported-by: valis Fixes: 0cada33241d9 ("net/tls: fix race condition causing kernel panic") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/net/tls.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/net/tls.h b/include/net/tls.h index 962f0c501111..340ad43971e4 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -97,9 +97,6 @@ struct tls_sw_context_tx { struct tls_rec *open_rec; struct list_head tx_list; atomic_t encrypt_pending; - /* protect crypto_wait with encrypt_pending */ - spinlock_t encrypt_compl_lock; - int async_notify; u8 async_capable:1; #define BIT_TX_SCHEDULED 0 @@ -136,8 +133,6 @@ struct tls_sw_context_rx { struct tls_strparser strp; atomic_t decrypt_pending; - /* protect crypto_wait with decrypt_pending*/ - spinlock_t decrypt_compl_lock; struct sk_buff_head async_hold; struct wait_queue_head wq; }; -- cgit v1.2.3 From 119ff04864a24470b1e531bb53e5c141aa8fefb0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Feb 2024 14:43:21 +0000 Subject: tcp: move tp->scaling_ratio to tcp_sock_read_txrx group tp->scaling_ratio is a read mostly field, used in rx and tx fast paths. Fixes: d5fed5addb2b ("tcp: reorganize tcp_sock fast path variables") Signed-off-by: Eric Dumazet Cc: Coco Li Cc: Wei Wang Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- Documentation/networking/net_cachelines/tcp_sock.rst | 2 +- include/linux/tcp.h | 2 +- net/ipv4/tcp.c | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/Documentation/networking/net_cachelines/tcp_sock.rst b/Documentation/networking/net_cachelines/tcp_sock.rst index 97d7a5c8e01c..803912291479 100644 --- a/Documentation/networking/net_cachelines/tcp_sock.rst +++ b/Documentation/networking/net_cachelines/tcp_sock.rst @@ -38,7 +38,7 @@ u32 max_window read_mostly - u32 mss_cache read_mostly read_mostly tcp_rate_check_app_limited,tcp_current_mss,tcp_sync_mss,tcp_sndbuf_expand,tcp_tso_should_defer(tx);tcp_update_pacing_rate,tcp_clean_rtx_queue(rx) u32 window_clamp read_mostly read_write tcp_rcv_space_adjust,__tcp_select_window u32 rcv_ssthresh read_mostly - __tcp_select_window -u82 scaling_ratio +u8 scaling_ratio read_mostly read_mostly tcp_win_from_space struct tcp_rack u16 advmss - read_mostly tcp_rcv_space_adjust u8 compressed_ack diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 89b290d8c8dc..168f5dca6609 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -221,6 +221,7 @@ struct tcp_sock { u32 lost_out; /* Lost packets */ u32 sacked_out; /* SACK'd packets */ u16 tcp_header_len; /* Bytes of tcp header to send */ + u8 scaling_ratio; /* see tcp_win_from_space() */ u8 chrono_type : 2, /* current chronograph type */ repair : 1, is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ @@ -352,7 +353,6 @@ struct tcp_sock { u32 compressed_ack_rcv_nxt; struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ - u8 scaling_ratio; /* see tcp_win_from_space() */ /* Information of the most recently (s)acked skb */ struct tcp_rack { u64 mstamp; /* (Re)sent time of the skb */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7e2481b9eae1..c82dc42f57c6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4615,7 +4615,8 @@ static void __init tcp_struct_check(void) CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, prr_out); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, lost_out); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, sacked_out); - CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_txrx, 31); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, scaling_ratio); + CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_txrx, 32); /* RX read-mostly hotpath cache lines */ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, copied_seq); -- cgit v1.2.3 From 666a877deab2bcf8fd11c962d69e687e18168a6f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Feb 2024 14:43:22 +0000 Subject: tcp: move tp->tcp_usec_ts to tcp_sock_read_txrx group tp->tcp_usec_ts is a read mostly field, used in rx and tx fast paths. Fixes: d5fed5addb2b ("tcp: reorganize tcp_sock fast path variables") Signed-off-by: Eric Dumazet Cc: Coco Li Cc: Wei Wang Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- Documentation/networking/net_cachelines/tcp_sock.rst | 2 +- include/linux/tcp.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/Documentation/networking/net_cachelines/tcp_sock.rst b/Documentation/networking/net_cachelines/tcp_sock.rst index 803912291479..1c154cbd1848 100644 --- a/Documentation/networking/net_cachelines/tcp_sock.rst +++ b/Documentation/networking/net_cachelines/tcp_sock.rst @@ -44,7 +44,7 @@ u16 advmss - read_m u8 compressed_ack u8:2 dup_ack_counter u8:1 tlp_retrans -u8:1 tcp_usec_ts +u8:1 tcp_usec_ts read_mostly read_mostly u32 chrono_start read_write - tcp_chrono_start/stop(tcp_write_xmit,tcp_cwnd_validate,tcp_send_syn_data) u32[3] chrono_stat read_write - tcp_chrono_start/stop(tcp_write_xmit,tcp_cwnd_validate,tcp_send_syn_data) u8:2 chrono_type read_write - tcp_chrono_start/stop(tcp_write_xmit,tcp_cwnd_validate,tcp_send_syn_data) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 168f5dca6609..a1c47a6d69b0 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -224,6 +224,7 @@ struct tcp_sock { u8 scaling_ratio; /* see tcp_win_from_space() */ u8 chrono_type : 2, /* current chronograph type */ repair : 1, + tcp_usec_ts : 1, /* TSval values in usec */ is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ __cacheline_group_end(tcp_sock_read_txrx); @@ -368,8 +369,7 @@ struct tcp_sock { u8 compressed_ack; u8 dup_ack_counter:2, tlp_retrans:1, /* TLP is a retransmission */ - tcp_usec_ts:1, /* TSval values in usec */ - unused:4; + unused:5; u8 thin_lto : 1,/* Use linear timeouts for thin streams */ recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */ fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ -- cgit v1.2.3 From c353c7b7ffb7ae6ed8f3339906fe33c8be6cf344 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Feb 2024 14:43:23 +0000 Subject: net-device: move lstats in net_device_read_txrx dev->lstats is notably used from loopback ndo_start_xmit() and other virtual drivers. Per cpu stats updates are dirtying per-cpu data, but the pointer itself is read-only. Fixes: 43a71cd66b9c ("net-device: reorganize net_device fast path variables") Signed-off-by: Eric Dumazet Cc: Coco Li Cc: Simon Horman Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- Documentation/networking/net_cachelines/net_device.rst | 4 ++-- include/linux/netdevice.h | 10 +++++----- net/core/dev.c | 3 ++- 3 files changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index e75a53593bb9..dceb49d56a91 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -136,8 +136,8 @@ struct_netpoll_info* npinfo - possible_net_t nd_net - read_mostly (dev_net)napi_busy_loop,tcp_v(4/6)_rcv,ip(v6)_rcv,ip(6)_input,ip(6)_input_finish void* ml_priv enum_netdev_ml_priv_type ml_priv_type -struct_pcpu_lstats__percpu* lstats -struct_pcpu_sw_netstats__percpu* tstats +struct_pcpu_lstats__percpu* lstats read_mostly dev_lstats_add() +struct_pcpu_sw_netstats__percpu* tstats read_mostly dev_sw_netstats_tx_add() struct_pcpu_dstats__percpu* dstats struct_garp_port* garp_port struct_mrp_port* mrp_port diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 118c40258d07..ef7bfbb98497 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2141,6 +2141,11 @@ struct net_device { /* TXRX read-mostly hotpath */ __cacheline_group_begin(net_device_read_txrx); + union { + struct pcpu_lstats __percpu *lstats; + struct pcpu_sw_netstats __percpu *tstats; + struct pcpu_dstats __percpu *dstats; + }; unsigned int flags; unsigned short hard_header_len; netdev_features_t features; @@ -2395,11 +2400,6 @@ struct net_device { enum netdev_ml_priv_type ml_priv_type; enum netdev_stat_type pcpu_stat_type:8; - union { - struct pcpu_lstats __percpu *lstats; - struct pcpu_sw_netstats __percpu *tstats; - struct pcpu_dstats __percpu *dstats; - }; #if IS_ENABLED(CONFIG_GARP) struct garp_port __rcu *garp_port; diff --git a/net/core/dev.c b/net/core/dev.c index cb2dab0feee0..9bb792cecc16 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11652,11 +11652,12 @@ static void __init net_dev_struct_check(void) CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_tx, 160); /* TXRX read-mostly hotpath */ + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, lstats); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, flags); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, hard_header_len); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, features); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, ip6_ptr); - CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_txrx, 30); + CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_txrx, 38); /* RX read-mostly hotpath */ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ptype_specific); -- cgit v1.2.3