diff options
Diffstat (limited to 'net')
| -rw-r--r-- | net/core/skbuff.c | 6 | ||||
| -rw-r--r-- | net/core/sock.c | 18 | ||||
| -rw-r--r-- | net/ipv4/tcp.c | 91 | ||||
| -rw-r--r-- | net/ipv4/tcp_input.c | 8 | ||||
| -rw-r--r-- | net/ipv4/tcp_ipv4.c | 10 | ||||
| -rw-r--r-- | net/ipv4/tcp_output.c | 2 | ||||
| -rw-r--r-- | net/ipv4/udp.c | 2 | ||||
| -rw-r--r-- | net/ipv6/ip6_output.c | 2 | ||||
| -rw-r--r-- | net/ipv6/tcp_ipv6.c | 10 | ||||
| -rw-r--r-- | net/mptcp/protocol.c | 2 |
10 files changed, 93 insertions, 58 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ba2f38246f07..d57796f38a0b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4849,8 +4849,7 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb, serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0; if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) { serr->ee.ee_data = skb_shinfo(skb)->tskey; - if (sk->sk_protocol == IPPROTO_TCP && - sk->sk_type == SOCK_STREAM) + if (sk_is_tcp(sk)) serr->ee.ee_data -= sk->sk_tskey; } @@ -4919,8 +4918,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, if (tsonly) { #ifdef CONFIG_INET if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) && - sk->sk_protocol == IPPROTO_TCP && - sk->sk_type == SOCK_STREAM) { + sk_is_tcp(sk)) { skb = tcp_get_timestamping_opt_stats(sk, orig_skb, ack_skb); opt_stats = true; diff --git a/net/core/sock.c b/net/core/sock.c index 8f2b2f2c0e7b..c57d9883f62c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -327,7 +327,10 @@ int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) BUG_ON(!sock_flag(sk, SOCK_MEMALLOC)); noreclaim_flag = memalloc_noreclaim_save(); - ret = sk->sk_backlog_rcv(sk, skb); + ret = INDIRECT_CALL_INET(sk->sk_backlog_rcv, + tcp_v6_do_rcv, + tcp_v4_do_rcv, + sk, skb); memalloc_noreclaim_restore(noreclaim_flag); return ret; @@ -872,8 +875,7 @@ int sock_set_timestamping(struct sock *sk, int optname, if (val & SOF_TIMESTAMPING_OPT_ID && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) { - if (sk->sk_protocol == IPPROTO_TCP && - sk->sk_type == SOCK_STREAM) { + if (sk_is_tcp(sk)) { if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) return -EINVAL; @@ -1370,8 +1372,7 @@ set_sndbuf: case SO_ZEROCOPY: if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) { - if (!((sk->sk_type == SOCK_STREAM && - sk->sk_protocol == IPPROTO_TCP) || + if (!(sk_is_tcp(sk) || (sk->sk_type == SOCK_DGRAM && sk->sk_protocol == IPPROTO_UDP))) ret = -ENOTSUPP; @@ -2246,10 +2247,13 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) u32 max_segs = 1; sk_dst_set(sk, dst); - sk->sk_route_caps = dst->dev->features | sk->sk_route_forced_caps; + sk->sk_route_caps = dst->dev->features; + if (sk_is_tcp(sk)) + sk->sk_route_caps |= NETIF_F_GSO; if (sk->sk_route_caps & NETIF_F_GSO) sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; - sk->sk_route_caps &= ~sk->sk_route_nocaps; + if (unlikely(sk->sk_gso_disabled)) + sk->sk_route_caps &= ~NETIF_F_GSO_MASK; if (sk_can_gso(sk)) { if (dst->header_len && !xfrm_dst_offload_ok(dst)) { sk->sk_route_caps &= ~NETIF_F_GSO_MASK; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b7796b4cf0a0..d1949fdb1462 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -292,7 +292,7 @@ EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count); long sysctl_tcp_mem[3] __read_mostly; EXPORT_SYMBOL(sysctl_tcp_mem); -atomic_long_t tcp_memory_allocated; /* Current allocated memory. */ +atomic_long_t tcp_memory_allocated ____cacheline_aligned_in_smp; /* Current allocated memory. */ EXPORT_SYMBOL(tcp_memory_allocated); #if IS_ENABLED(CONFIG_SMC) @@ -303,7 +303,7 @@ EXPORT_SYMBOL(tcp_have_smc); /* * Current number of TCP sockets. */ -struct percpu_counter tcp_sockets_allocated; +struct percpu_counter tcp_sockets_allocated ____cacheline_aligned_in_smp; EXPORT_SYMBOL(tcp_sockets_allocated); /* @@ -456,7 +456,6 @@ void tcp_init_sock(struct sock *sk) WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]); sk_sockets_allocated_inc(sk); - sk->sk_route_forced_caps = NETIF_F_GSO; } EXPORT_SYMBOL(tcp_init_sock); @@ -546,10 +545,11 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (state != TCP_SYN_SENT && (state != TCP_SYN_RECV || rcu_access_pointer(tp->fastopen_rsk))) { int target = sock_rcvlowat(sk, 0, INT_MAX); + u16 urg_data = READ_ONCE(tp->urg_data); - if (READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq) && - !sock_flag(sk, SOCK_URGINLINE) && - tp->urg_data) + if (unlikely(urg_data) && + READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq) && + !sock_flag(sk, SOCK_URGINLINE)) target++; if (tcp_stream_is_readable(sk, target)) @@ -574,7 +574,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) } else mask |= EPOLLOUT | EPOLLWRNORM; - if (tp->urg_data & TCP_URG_VALID) + if (urg_data & TCP_URG_VALID) mask |= EPOLLPRI; } else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) { /* Active TCP fastopen socket with defer_connect @@ -608,7 +608,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) unlock_sock_fast(sk, slow); break; case SIOCATMARK: - answ = tp->urg_data && + answ = READ_ONCE(tp->urg_data) && READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq); break; case SIOCOUTQ: @@ -1466,7 +1466,7 @@ static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags) char c = tp->urg_data; if (!(flags & MSG_PEEK)) - tp->urg_data = TCP_URG_READ; + WRITE_ONCE(tp->urg_data, TCP_URG_READ); /* Read urgent data. */ msg->msg_flags |= MSG_OOB; @@ -1580,6 +1580,36 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) tcp_send_ack(sk); } +void __sk_defer_free_flush(struct sock *sk) +{ + struct llist_node *head; + struct sk_buff *skb, *n; + + head = llist_del_all(&sk->defer_list); + llist_for_each_entry_safe(skb, n, head, ll_node) { + prefetch(n); + skb_mark_not_on_list(skb); + __kfree_skb(skb); + } +} +EXPORT_SYMBOL(__sk_defer_free_flush); + +static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb) +{ + __skb_unlink(skb, &sk->sk_receive_queue); + if (likely(skb->destructor == sock_rfree)) { + sock_rfree(skb); + skb->destructor = NULL; + skb->sk = NULL; + if (!skb_queue_empty(&sk->sk_receive_queue) || + !llist_empty(&sk->defer_list)) { + llist_add(&skb->ll_node, &sk->defer_list); + return; + } + } + __kfree_skb(skb); +} + static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) { struct sk_buff *skb; @@ -1599,7 +1629,7 @@ static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) * splitted a fat GRO packet, while we released socket lock * in skb_splice_bits() */ - sk_eat_skb(sk, skb); + tcp_eat_recv_skb(sk, skb); } return NULL; } @@ -1633,7 +1663,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, len = skb->len - offset; /* Stop reading if we hit a patch of urgent data */ - if (tp->urg_data) { + if (unlikely(tp->urg_data)) { u32 urg_offset = tp->urg_seq - seq; if (urg_offset < len) len = urg_offset; @@ -1665,11 +1695,11 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, continue; } if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) { - sk_eat_skb(sk, skb); + tcp_eat_recv_skb(sk, skb); ++seq; break; } - sk_eat_skb(sk, skb); + tcp_eat_recv_skb(sk, skb); if (!desc->count) break; WRITE_ONCE(tp->copied_seq, seq); @@ -2326,7 +2356,7 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, u32 offset; /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */ - if (tp->urg_data && tp->urg_seq == *seq) { + if (unlikely(tp->urg_data) && tp->urg_seq == *seq) { if (copied) break; if (signal_pending(current)) { @@ -2369,10 +2399,10 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, break; if (copied) { - if (sk->sk_err || + if (!timeo || + sk->sk_err || sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN) || - !timeo || signal_pending(current)) break; } else { @@ -2406,13 +2436,12 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, } } - tcp_cleanup_rbuf(sk, copied); - if (copied >= target) { /* Do not sleep, just process backlog. */ - release_sock(sk); - lock_sock(sk); + __sk_flush_backlog(sk); } else { + tcp_cleanup_rbuf(sk, copied); + sk_defer_free_flush(sk); sk_wait_data(sk, &timeo, last); } @@ -2432,7 +2461,7 @@ found_ok_skb: used = len; /* Do we have urgent data here? */ - if (tp->urg_data) { + if (unlikely(tp->urg_data)) { u32 urg_offset = tp->urg_seq - *seq; if (urg_offset < used) { if (!urg_offset) { @@ -2466,8 +2495,8 @@ found_ok_skb: tcp_rcv_space_adjust(sk); skip_copy: - if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) { - tp->urg_data = 0; + if (unlikely(tp->urg_data) && after(tp->copied_seq, tp->urg_seq)) { + WRITE_ONCE(tp->urg_data, 0); tcp_fast_path_check(sk); } @@ -2482,14 +2511,14 @@ skip_copy: if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) goto found_fin_ok; if (!(flags & MSG_PEEK)) - sk_eat_skb(sk, skb); + tcp_eat_recv_skb(sk, skb); continue; found_fin_ok: /* Process the FIN. */ WRITE_ONCE(*seq, *seq + 1); if (!(flags & MSG_PEEK)) - sk_eat_skb(sk, skb); + tcp_eat_recv_skb(sk, skb); break; } while (len > 0); @@ -2531,6 +2560,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, ret = tcp_recvmsg_locked(sk, msg, len, nonblock, flags, &tss, &cmsg_flags); release_sock(sk); + sk_defer_free_flush(sk); if (cmsg_flags && ret >= 0) { if (cmsg_flags & TCP_CMSG_TS) @@ -2961,7 +2991,7 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_clear_xmit_timers(sk); __skb_queue_purge(&sk->sk_receive_queue); WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); - tp->urg_data = 0; + WRITE_ONCE(tp->urg_data, 0); tcp_write_queue_purge(sk); tcp_fastopen_active_disable_ofo_check(sk); skb_rbtree_purge(&tp->out_of_order_queue); @@ -3056,7 +3086,7 @@ int tcp_disconnect(struct sock *sk, int flags) sk->sk_frag.page = NULL; sk->sk_frag.offset = 0; } - + sk_defer_free_flush(sk); sk_error_report(sk); return 0; } @@ -3771,10 +3801,12 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) tcp_get_info_chrono_stats(tp, info); info->tcpi_segs_out = tp->segs_out; - info->tcpi_segs_in = tp->segs_in; + + /* segs_in and data_segs_in can be updated from tcp_segs_in() from BH */ + info->tcpi_segs_in = READ_ONCE(tp->segs_in); + info->tcpi_data_segs_in = READ_ONCE(tp->data_segs_in); info->tcpi_min_rtt = tcp_min_rtt(tp); - info->tcpi_data_segs_in = tp->data_segs_in; info->tcpi_data_segs_out = tp->data_segs_out; info->tcpi_delivery_rate_app_limited = tp->rate_app_limited ? 1 : 0; @@ -4183,6 +4215,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, err = BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sk, level, optname, &zc, &len, err); release_sock(sk); + sk_defer_free_flush(sk); if (len >= offsetofend(struct tcp_zerocopy_receive, msg_flags)) goto zerocopy_rcv_cmsg; switch (len) { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 246ab7b5e857..3658b9c3dd2b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5591,7 +5591,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th) } } - tp->urg_data = TCP_URG_NOTYET; + WRITE_ONCE(tp->urg_data, TCP_URG_NOTYET); WRITE_ONCE(tp->urg_seq, ptr); /* Disable header prediction. */ @@ -5604,11 +5604,11 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t struct tcp_sock *tp = tcp_sk(sk); /* Check if we get a new urgent pointer - normally not. */ - if (th->urg) + if (unlikely(th->urg)) tcp_check_urg(sk, th); /* Do we wait for any urgent data? - normally not... */ - if (tp->urg_data == TCP_URG_NOTYET) { + if (unlikely(tp->urg_data == TCP_URG_NOTYET)) { u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) - th->syn; @@ -5617,7 +5617,7 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t u8 tmp; if (skb_copy_bits(skb, ptr, &tmp, 1)) BUG(); - tp->urg_data = TCP_URG_VALID | tmp; + WRITE_ONCE(tp->urg_data, TCP_URG_VALID | tmp); if (!sock_flag(sk, SOCK_DEAD)) sk->sk_data_ready(sk); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 13d868c43284..3dd19a2bf06c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1182,7 +1182,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, if (!md5sig) return -ENOMEM; - sk_nocaps_add(sk, NETIF_F_GSO_MASK); + sk_gso_disable(sk); INIT_HLIST_HEAD(&md5sig->head); rcu_assign_pointer(tp->md5sig_info, md5sig); } @@ -1620,7 +1620,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, */ tcp_md5_do_add(newsk, addr, AF_INET, 32, l3index, key->flags, key->key, key->keylen, GFP_ATOMIC); - sk_nocaps_add(newsk, NETIF_F_GSO_MASK); + sk_gso_disable(newsk); } #endif @@ -1800,8 +1800,7 @@ int tcp_v4_early_demux(struct sk_buff *skb) bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) { - u32 limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf); - u32 tail_gso_size, tail_gso_segs; + u32 limit, tail_gso_size, tail_gso_segs; struct skb_shared_info *shinfo; const struct tcphdr *th; struct tcphdr *thtail; @@ -1909,7 +1908,7 @@ no_coalesce: * to reduce memory overhead, so add a little headroom here. * Few sockets backlog are possibly concurrently non empty. */ - limit += 64*1024; + limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf) + 64*1024; if (unlikely(sk_add_backlog(sk, skb, limit))) { bh_unlock_sock(sk); @@ -2103,6 +2102,7 @@ process: sk_incoming_cpu_update(sk); + sk_defer_free_flush(sk); bh_lock_sock_nested(sk); tcp_segs_in(tcp_sk(sk), skb); ret = 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2e6e5a70168e..5079832af5c1 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1359,7 +1359,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, #ifdef CONFIG_TCP_MD5SIG /* Calculate the MD5 hash, as we have all we need now */ if (md5) { - sk_nocaps_add(sk, NETIF_F_GSO_MASK); + sk_gso_disable(sk); tp->af_specific->calc_md5_hash(opts.hash_location, md5, sk, skb); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0e2f1c05da28..7101e6d892d6 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -122,7 +122,7 @@ EXPORT_SYMBOL(udp_table); long sysctl_udp_mem[3] __read_mostly; EXPORT_SYMBOL(sysctl_udp_mem); -atomic_long_t udp_memory_allocated; +atomic_long_t udp_memory_allocated ____cacheline_aligned_in_smp; EXPORT_SYMBOL(udp_memory_allocated); #define MAX_UDP_PORTS 65536 diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 2f044a49afa8..007e433d4d4d 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -977,7 +977,7 @@ slow_path: fail_toobig: if (skb->sk && dst_allfrag(skb_dst(skb))) - sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); + sk_gso_disable(skb->sk); icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); err = -EMSGSIZE; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 551fce49841d..3b7d6ede1364 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -72,7 +72,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *req); -static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); +INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); static const struct inet_connection_sock_af_ops ipv6_mapped; const struct inet_connection_sock_af_ops ipv6_specific; @@ -1466,7 +1466,8 @@ INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, * This is because we cannot sleep with the original spinlock * held. */ -static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) +INDIRECT_CALLABLE_SCOPE +int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) { struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct sk_buff *opt_skb = NULL; @@ -1757,6 +1758,7 @@ process: sk_incoming_cpu_update(sk); + sk_defer_free_flush(sk); bh_lock_sock_nested(sk); tcp_segs_in(tcp_sk(sk), skb); ret = 0; @@ -1893,9 +1895,7 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = { INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) { - struct ipv6_pinfo *np = inet6_sk(sk); - - __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr); + __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr); } const struct inet_connection_sock_af_ops ipv6_specific = { diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b7e32e316738..6db93da59843 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -48,7 +48,7 @@ enum { MPTCP_CMSG_TS = BIT(0), }; -static struct percpu_counter mptcp_sockets_allocated; +static struct percpu_counter mptcp_sockets_allocated ____cacheline_aligned_in_smp; static void __mptcp_destroy_sock(struct sock *sk); static void __mptcp_check_send_data_fin(struct sock *sk); |
