aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2025-09-24 17:45:14 -0700
committerJakub Kicinski <kuba@kernel.org>2025-09-24 17:45:15 -0700
commitc7ab8024ca124afa8eab9a07a470a34676efe123 (patch)
treed3456d6ba5593a700da2c121d81b222feb03fc58
parentMerge branch 'net-stmmac-yet-more-cleanups' (diff)
parentnetfilter: nf_conntrack: do not skip entries in /proc/net/nf_conntrack (diff)
downloadlinux-c7ab8024ca124afa8eab9a07a470a34676efe123.tar.gz
linux-c7ab8024ca124afa8eab9a07a470a34676efe123.zip
Merge tag 'nf-next-25-09-24' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next
Florian Westphal says: ==================== netfilter: fixes for net-next These fixes target next because the bug is either not severe or has existed for so long that there is no reason to cram them in at the last minute. 1) Fix IPVS ftp unregistering during netns cleanup, broken since netns support was introduced in 2011 in the 2.6.39 kernel. From Slavin Liu. 2) nfnetlink must reset the 'nlh' pointer back to the original address when a batch is replayed, else we emit bogus ACK messages and conceal real errno from userspace. From Fernando Fernandez Mancera. This was broken since 6.10. 3) Recent fix for nftables 'pipapo' set type was incomplete, it only made things work for the AVX2 version of the algorithm. 4) Testing revealed another problem with avx2 version that results in out-of-bounds read access, this bug always existed since feature was added in 5.7 kernel. This also comes with a selftest update. Last fix resolves a long-standing bug (since 4.9) in conntrack /proc interface: Decrease skip count when we reap an expired entry during dump. As-is we erronously elide one conntrack entry from dump for every expired entry seen. From Eric Dumazet. * tag 'nf-next-25-09-24' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next: netfilter: nf_conntrack: do not skip entries in /proc/net/nf_conntrack selftests: netfilter: nft_concat_range.sh: add check for double-create bug netfilter: nft_set_pipapo_avx2: fix skip of expired entries netfilter: nft_set_pipapo: use 0 genmask for packetpath lookups netfilter: nfnetlink: reset nlh pointer during batch replay ipvs: Defer ip_vs_ftp unregister during netns cleanup ==================== Link: https://patch.msgid.link/20250924140654.10210-1-fw@strlen.de Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c4
-rw-r--r--net/netfilter/nf_conntrack_standalone.c3
-rw-r--r--net/netfilter/nfnetlink.c2
-rw-r--r--net/netfilter/nft_set_pipapo.c9
-rw-r--r--net/netfilter/nft_set_pipapo_avx2.c9
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range.sh56
6 files changed, 73 insertions, 10 deletions
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index d8a284999544..206c6700e200 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -53,6 +53,7 @@ enum {
IP_VS_FTP_EPSV,
};
+static bool exiting_module;
/*
* List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper
* First port is set to the default port.
@@ -605,7 +606,7 @@ static void __ip_vs_ftp_exit(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
- if (!ipvs)
+ if (!ipvs || !exiting_module)
return;
unregister_ip_vs_app(ipvs, &ip_vs_ftp);
@@ -627,6 +628,7 @@ static int __init ip_vs_ftp_init(void)
*/
static void __exit ip_vs_ftp_exit(void)
{
+ exiting_module = true;
unregister_pernet_subsys(&ip_vs_ftp_ops);
/* rcu_barrier() is called by netns */
}
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 1f14ef0436c6..708b79380f04 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -317,6 +317,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
smp_acquire__after_ctrl_dep();
if (nf_ct_should_gc(ct)) {
+ struct ct_iter_state *st = s->private;
+
+ st->skip_elems--;
nf_ct_kill(ct);
goto release;
}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index e598a2a252b0..811d02b4c4f7 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -376,6 +376,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
const struct nfnetlink_subsystem *ss;
const struct nfnl_callback *nc;
struct netlink_ext_ack extack;
+ struct nlmsghdr *onlh = nlh;
LIST_HEAD(err_list);
u32 status;
int err;
@@ -386,6 +387,7 @@ replay:
status = 0;
replay_abort:
skb = netlink_skb_clone(oskb, GFP_KERNEL);
+ nlh = onlh;
if (!skb)
return netlink_ack(oskb, nlh, -ENOMEM, NULL);
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index a7b8fa8cab7c..112fe46788b6 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -549,8 +549,7 @@ static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m,
*
* This function is called from the data path. It will search for
* an element matching the given key in the current active copy.
- * Unlike other set types, this uses NFT_GENMASK_ANY instead of
- * nft_genmask_cur().
+ * Unlike other set types, this uses 0 instead of nft_genmask_cur().
*
* This is because new (future) elements are not reachable from
* priv->match, they get added to priv->clone instead.
@@ -560,8 +559,8 @@ static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m,
* inconsistent state: matching old entries get skipped but thew
* newly matching entries are unreachable.
*
- * GENMASK will still find the 'now old' entries which ensures consistent
- * priv->match view.
+ * GENMASK_ANY doesn't work for the same reason: old-gen entries get
+ * skipped, new-gen entries are only reachable from priv->clone.
*
* nft_pipapo_commit swaps ->clone and ->match shortly after the
* genbit flip. As ->clone doesn't contain the old entries in the first
@@ -578,7 +577,7 @@ nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
const struct nft_pipapo_elem *e;
m = rcu_dereference(priv->match);
- e = pipapo_get_slow(m, (const u8 *)key, NFT_GENMASK_ANY, get_jiffies_64());
+ e = pipapo_get_slow(m, (const u8 *)key, 0, get_jiffies_64());
return e ? &e->ext : NULL;
}
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index 27dab3667548..7ff90325c97f 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -1179,7 +1179,6 @@ struct nft_pipapo_elem *pipapo_get_avx2(const struct nft_pipapo_match *m,
nft_pipapo_avx2_prepare();
-next_match:
nft_pipapo_for_each_field(f, i, m) {
bool last = i == m->field_count - 1, first = !i;
int ret = 0;
@@ -1226,6 +1225,7 @@ next_match:
#undef NFT_SET_PIPAPO_AVX2_LOOKUP
+next_match:
if (ret < 0) {
scratch->map_index = map_index;
kernel_fpu_end();
@@ -1238,8 +1238,11 @@ next_match:
e = f->mt[ret].e;
if (unlikely(__nft_set_elem_expired(&e->ext, tstamp) ||
- !nft_set_elem_active(&e->ext, genmask)))
+ !nft_set_elem_active(&e->ext, genmask))) {
+ ret = pipapo_refill(res, f->bsize, f->rules,
+ fill, f->mt, last);
goto next_match;
+ }
scratch->map_index = map_index;
kernel_fpu_end();
@@ -1292,7 +1295,7 @@ nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
m = rcu_dereference(priv->match);
- e = pipapo_get_avx2(m, rp, NFT_GENMASK_ANY, get_jiffies_64());
+ e = pipapo_get_avx2(m, rp, 0, get_jiffies_64());
local_bh_enable();
return e ? &e->ext : NULL;
diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
index 20e76b395c85..ad97c6227f35 100755
--- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
@@ -29,7 +29,7 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
net6_port_net6_port net_port_mac_proto_net"
# Reported bugs, also described by TYPE_ variables below
-BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch"
+BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate"
# List of possible paths to pktgen script from kernel tree for performance tests
PKTGEN_SCRIPT_PATHS="
@@ -408,6 +408,18 @@ perf_duration 0
"
+TYPE_doublecreate="
+display cannot create same element twice
+type_spec ipv4_addr . ipv4_addr
+chain_spec ip saddr . ip daddr
+dst addr4
+proto icmp
+
+race_repeat 0
+
+perf_duration 0
+"
+
# Set template for all tests, types and rules are filled in depending on test
set_template='
flush ruleset
@@ -1900,6 +1912,48 @@ test_bug_avx2_mismatch()
fi
}
+test_bug_doublecreate()
+{
+ local elements="1.2.3.4 . 1.2.4.1, 1.2.4.1 . 1.2.3.4"
+ local ret=1
+ local i
+
+ setup veth send_"${proto}" set || return ${ksft_skip}
+
+ add "{ $elements }" || return 1
+ # expected to work: 'add' on existing should be no-op.
+ add "{ $elements }" || return 1
+
+ # 'create' should return an error.
+ if nft create element inet filter test "{ $elements }" 2>/dev/null; then
+ err "Could create an existing element"
+ return 1
+ fi
+nft -f - <<EOF 2>/dev/null
+flush set inet filter test
+create element inet filter test { $elements }
+create element inet filter test { $elements }
+EOF
+ ret=$?
+ if [ $ret -eq 0 ]; then
+ err "Could create element twice in one transaction"
+ err "$(nft -a list ruleset)"
+ return 1
+ fi
+
+nft -f - <<EOF 2>/dev/null
+flush set inet filter test
+create element inet filter test { $elements }
+EOF
+ ret=$?
+ if [ $ret -ne 0 ]; then
+ err "Could not flush and re-create element in one transaction"
+ return 1
+ fi
+
+ return 0
+}
+
test_reported_issues() {
eval test_bug_"${subtest}"
}