aboutsummaryrefslogtreecommitdiffstats
path: root/include/net
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2025-06-17 18:34:15 -0700
committerJakub Kicinski <kuba@kernel.org>2025-06-17 18:34:15 -0700
commit8152c4028cb8b69933ebb93e7b0cc58bc47dfb2e (patch)
tree43cc3e4bde87752271cae18022c04cf3f67687d0 /include/net
parentnet: tcp: tsq: Convert from tasklet to BH workqueue (diff)
parentnet/mlx5e: Add TX support for netmems (diff)
downloadlinux-8152c4028cb8b69933ebb93e7b0cc58bc47dfb2e.tar.gz
linux-8152c4028cb8b69933ebb93e7b0cc58bc47dfb2e.zip
Merge branch 'net-mlx5e-add-support-for-devmem-and-io_uring-tcp-zero-copy'
Mark Bloch says: ==================== net/mlx5e: Add support for devmem and io_uring TCP zero-copy This series adds support for zerocopy rx TCP with devmem and io_uring for ConnectX7 NICs and above. For performance reasons and simplicity HW-GRO will also be turned on when header-data split mode is on. Performance =========== Test setup: * CPU: Intel(R) Xeon(R) Platinum 8380 CPU @ 2.30GHz (single NUMA) * NIC: ConnectX7 * Benchmarking tool: kperf [0] * Single TCP flow * Test duration: 60s With application thread and interrupts pinned to the *same* core: |------+-----------+----------| | MTU | epoll | io_uring | |------+-----------+----------| | 1500 | 61.6 Gbps | 114 Gbps | | 4096 | 69.3 Gbps | 151 Gbps | | 9000 | 67.8 Gbps | 187 Gbps | |------+-----------+----------| The CPU usage for io_uring is 95%. Reproduction steps for io_uring: server --no-daemon -a 2001:db8::1 --no-memcmp --iou --iou_sendzc \ --iou_zcrx --iou_dev_name eth2 --iou_zcrx_queue_id 2 server --no-daemon -a 2001:db8::2 --no-memcmp --iou --iou_sendzc client --src 2001:db8::2 --dst 2001:db8::1 \ --msg-zerocopy -t 60 --cpu-min=2 --cpu-max=2 Patch overview: ================ First, a netmem API for skb_can_coalesce is added to the core to be able to do skb fragment coalescing on netmems. The next patches introduce some cleanups in the internal SHAMPO code and improvements to hw gro capability checks in FW. A separate page_pool is introduced for headers, to be used only when the rxq has a memory provider. Then the driver is converted to use the netmem API and to allow support for unreadable netmem page pool. The queue management ops are implemented. Finally, the tcp-data-split ring parameter is exposed. References ========== [0] kperf: git://git.kernel.dk/kperf.git v1: https://lore.kernel.org/20250116215530.158886-1-saeed@kernel.org v2: https://lore.kernel.org/1747950086-1246773-1-git-send-email-tariqt@nvidia.com v3: https://lore.kernel.org/20250609145833.990793-1-mbloch@nvidia.com v4: https://lore.kernel.org/20250610150950.1094376-1-mbloch@nvidia.com v5: https://lore.kernel.org/20250612154648.1161201-1-mbloch@nvidia.com ==================== Link: https://patch.msgid.link/20250616141441.1243044-1-mbloch@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include/net')
-rw-r--r--include/net/netmem.h2
-rw-r--r--include/net/page_pool/helpers.h7
2 files changed, 8 insertions, 1 deletions
diff --git a/include/net/netmem.h b/include/net/netmem.h
index 850869b45b45..7a1dafa3f080 100644
--- a/include/net/netmem.h
+++ b/include/net/netmem.h
@@ -139,7 +139,7 @@ static inline netmem_ref net_iov_to_netmem(struct net_iov *niov)
return (__force netmem_ref)((unsigned long)niov | NET_IOV);
}
-static inline netmem_ref page_to_netmem(struct page *page)
+static inline netmem_ref page_to_netmem(const struct page *page)
{
return (__force netmem_ref)page;
}
diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
index 93f2c31baf9b..773fc65780b5 100644
--- a/include/net/page_pool/helpers.h
+++ b/include/net/page_pool/helpers.h
@@ -153,6 +153,13 @@ static inline netmem_ref page_pool_dev_alloc_netmem(struct page_pool *pool,
return page_pool_alloc_netmem(pool, offset, size, gfp);
}
+static inline netmem_ref page_pool_dev_alloc_netmems(struct page_pool *pool)
+{
+ gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
+
+ return page_pool_alloc_netmems(pool, gfp);
+}
+
static inline struct page *page_pool_alloc(struct page_pool *pool,
unsigned int *offset,
unsigned int *size, gfp_t gfp)