diff options
| author | Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com> | 2025-09-16 10:24:31 +0200 |
|---|---|---|
| committer | Paolo Abeni <pabeni@redhat.com> | 2025-09-18 08:47:52 +0200 |
| commit | aa55a7dde7ec506bb23448a5005ae3f4f809d022 (patch) | |
| tree | 41675fff92b54c414bbc37a465fc7dd8d4d46e0c /Documentation/networking | |
| parent | tcp: accecn: AccECN option (diff) | |
| download | linux-aa55a7dde7ec506bb23448a5005ae3f4f809d022.tar.gz linux-aa55a7dde7ec506bb23448a5005ae3f4f809d022.zip | |
tcp: accecn: AccECN option send control
Instead of sending the option in every ACK, limit sending to
those ACKs where the option is necessary:
- Handshake
- "Change-triggered ACK" + the ACK following it. The
2nd ACK is necessary to unambiguously indicate which
of the ECN byte counters in increasing. The first
ACK has two counters increasing due to the ecnfield
edge.
- ACKs with CE to allow CEP delta validations to take
advantage of the option.
- Force option to be sent every at least once per 2^22
bytes. The check is done using the bit edges of the
byte counters (avoids need for extra variables).
- AccECN option beacon to send a few times per RTT even if
nothing in the ECN state requires that. The default is 3
times per RTT, and its period can be set via
sysctl_tcp_ecn_option_beacon.
Below are the pahole outcomes before and after this patch,
in which the group size of tcp_sock_write_tx is increased
from 89 to 97 due to the new u64 accecn_opt_tstamp member:
[BEFORE THIS PATCH]
struct tcp_sock {
[...]
u64 tcp_wstamp_ns; /* 2488 8 */
struct list_head tsorted_sent_queue; /* 2496 16 */
[...]
__cacheline_group_end__tcp_sock_write_tx[0]; /* 2521 0 */
__cacheline_group_begin__tcp_sock_write_txrx[0]; /* 2521 0 */
u8 nonagle:4; /* 2521: 0 1 */
u8 rate_app_limited:1; /* 2521: 4 1 */
/* XXX 3 bits hole, try to pack */
/* Force alignment to the next boundary: */
u8 :0;
u8 received_ce_pending:4;/* 2522: 0 1 */
u8 unused2:4; /* 2522: 4 1 */
u8 accecn_minlen:2; /* 2523: 0 1 */
u8 est_ecnfield:2; /* 2523: 2 1 */
u8 unused3:4; /* 2523: 4 1 */
[...]
__cacheline_group_end__tcp_sock_write_txrx[0]; /* 2628 0 */
[...]
/* size: 3200, cachelines: 50, members: 171 */
}
[AFTER THIS PATCH]
struct tcp_sock {
[...]
u64 tcp_wstamp_ns; /* 2488 8 */
u64 accecn_opt_tstamp; /* 2596 8 */
struct list_head tsorted_sent_queue; /* 2504 16 */
[...]
__cacheline_group_end__tcp_sock_write_tx[0]; /* 2529 0 */
__cacheline_group_begin__tcp_sock_write_txrx[0]; /* 2529 0 */
u8 nonagle:4; /* 2529: 0 1 */
u8 rate_app_limited:1; /* 2529: 4 1 */
/* XXX 3 bits hole, try to pack */
/* Force alignment to the next boundary: */
u8 :0;
u8 received_ce_pending:4;/* 2530: 0 1 */
u8 unused2:4; /* 2530: 4 1 */
u8 accecn_minlen:2; /* 2531: 0 1 */
u8 est_ecnfield:2; /* 2531: 2 1 */
u8 accecn_opt_demand:2; /* 2531: 4 1 */
u8 prev_ecnfield:2; /* 2531: 6 1 */
[...]
__cacheline_group_end__tcp_sock_write_txrx[0]; /* 2636 0 */
[...]
/* size: 3200, cachelines: 50, members: 173 */
}
Signed-off-by: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Co-developed-by: Ilpo Järvinen <ij@kernel.org>
Signed-off-by: Ilpo Järvinen <ij@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20250916082434.100722-8-chia-yu.chang@nokia-bell-labs.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Diffstat (limited to 'Documentation/networking')
| -rw-r--r-- | Documentation/networking/ip-sysctl.rst | 6 | ||||
| -rw-r--r-- | Documentation/networking/net_cachelines/tcp_sock.rst | 3 |
2 files changed, 9 insertions, 0 deletions
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 1c206501b973..a06cb99d66dc 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -487,6 +487,12 @@ tcp_ecn_option - INTEGER Default: 2 +tcp_ecn_option_beacon - INTEGER + Control Accurate ECN (AccECN) option sending frequency per RTT and it + takes effect only when tcp_ecn_option is set to 2. + + Default: 3 (AccECN will be send at least 3 times per RTT) + tcp_ecn_fallback - BOOLEAN If the kernel detects that ECN connection misbehaves, enable fall back to non-ECN. Currently, this knob implements the fallback diff --git a/Documentation/networking/net_cachelines/tcp_sock.rst b/Documentation/networking/net_cachelines/tcp_sock.rst index b941151f8c0a..d4dc01800945 100644 --- a/Documentation/networking/net_cachelines/tcp_sock.rst +++ b/Documentation/networking/net_cachelines/tcp_sock.rst @@ -109,6 +109,9 @@ u8:2 syn_ect_snt write_mostly read_w u8:2 syn_ect_rcv read_mostly read_write u8:2 accecn_minlen write_mostly read_write u8:2 est_ecnfield read_write +u8:2 accecn_opt_demand read_mostly read_write +u8:2 prev_ecnfield read_write +u64 accecn_opt_tstamp read_write u8:4 accecn_fail_mode u32 lost read_mostly tcp_ack u32 app_limited read_write read_mostly tcp_rate_check_app_limited,tcp_rate_skb_sent(tx);tcp_rate_gen(rx) |
