From 1a9dcf69c7a97e733aa2fc026db22f22928ca7b7 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 28 May 2025 10:55:19 +0200 Subject: selftests/futex: getopt() requires int as return value. Mark reported that futex_priv_hash fails on ARM64. It turns out that the command line parsing does not terminate properly and ends in the default case assuming an invalid option was passed. Use an int as the return type for getopt(). Closes: https://lore.kernel.org/all/31869a69-063f-44a3-a079-ba71b2506cce@sirena.org.uk/ Fixes: 3163369407baf ("selftests/futex: Add futex_numa_mpol") Fixes: cda95faef7bcf ("selftests/futex: Add futex_priv_hash") Reported-by: Mark Brown Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20250528085521.1938355-2-bigeasy@linutronix.de --- tools/testing/selftests/futex/functional/futex_numa_mpol.c | 2 +- tools/testing/selftests/futex/functional/futex_priv_hash.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/futex/functional/futex_numa_mpol.c b/tools/testing/selftests/futex/functional/futex_numa_mpol.c index 20a9d3ecf743..564dbd02d2f4 100644 --- a/tools/testing/selftests/futex/functional/futex_numa_mpol.c +++ b/tools/testing/selftests/futex/functional/futex_numa_mpol.c @@ -144,7 +144,7 @@ int main(int argc, char *argv[]) struct futex32_numa *futex_numa; int mem_size, i; void *futex_ptr; - char c; + int c; while ((c = getopt(argc, argv, "chv:")) != -1) { switch (c) { diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c index 2dca18fefedc..24a92dc94eb8 100644 --- a/tools/testing/selftests/futex/functional/futex_priv_hash.c +++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c @@ -130,7 +130,7 @@ int main(int argc, char *argv[]) pthread_mutexattr_t mutex_attr_pi; int use_global_hash = 0; int ret; - char c; + int c; while ((c = getopt(argc, argv, "cghv:")) != -1) { switch (c) { -- cgit v1.2.3 From 0ecb4232fc65e659ca7020f8bb2e0fc347acfb7d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 28 May 2025 10:55:20 +0200 Subject: selftests/futex: Set the home_node in futex_numa_mpol The test fails at the MPOL step if multiple nodes are available. The reason is that mbind() sets the policy but the home_node, which is retrieved by the futex code, is not set. This causes to retrieve the current node and with multiple nodes it fails on one of the iterations. Use numa_set_mempolicy_home_node() to set the expected node. Use ksft_exit_fail_msg() to fail and exit in order not to confuse ktap. Fixes: 3163369407baf ("selftests/futex: Add futex_numa_mpol") Suggested-by: Vlastimil Babka Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20250528085521.1938355-3-bigeasy@linutronix.de --- tools/testing/selftests/futex/functional/futex_numa_mpol.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/futex/functional/futex_numa_mpol.c b/tools/testing/selftests/futex/functional/futex_numa_mpol.c index 564dbd02d2f4..a9ecfb2d3932 100644 --- a/tools/testing/selftests/futex/functional/futex_numa_mpol.c +++ b/tools/testing/selftests/futex/functional/futex_numa_mpol.c @@ -210,6 +210,10 @@ int main(int argc, char *argv[]) ret = mbind(futex_ptr, mem_size, MPOL_BIND, &nodemask, sizeof(nodemask) * 8, 0); if (ret == 0) { + ret = numa_set_mempolicy_home_node(futex_ptr, mem_size, i, 0); + if (ret != 0) + ksft_exit_fail_msg("Failed to set home node: %m, %d\n", errno); + ksft_print_msg("Node %d test\n", i); futex_numa->futex = 0; futex_numa->numa = FUTEX_NO_NODE; @@ -220,8 +224,8 @@ int main(int argc, char *argv[]) if (0) test_futex_mpol(futex_numa, 0); if (futex_numa->numa != i) { - ksft_test_result_fail("Returned NUMA node is %d expected %d\n", - futex_numa->numa, i); + ksft_exit_fail_msg("Returned NUMA node is %d expected %d\n", + futex_numa->numa, i); } } } -- cgit v1.2.3 From 25a0d04d3883888ccd59918aede44b109abdb099 Mon Sep 17 00:00:00 2001 From: Tao Chen Date: Tue, 3 Jun 2025 23:43:08 +0800 Subject: selftests/bpf: Add cookies check for raw_tp fill_link_info test Adding tests for getting cookie with fill_link_info for raw_tp. Signed-off-by: Tao Chen Signed-off-by: Andrii Nakryiko Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20250603154309.3063644-2-chen.dylane@linux.dev --- .../testing/selftests/bpf/prog_tests/bpf_cookie.c | 26 +++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c index 6befa870434b..0774ae6c1bec 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c @@ -635,10 +635,29 @@ cleanup: bpf_link__destroy(link); } +static int verify_raw_tp_link_info(int fd, u64 cookie) +{ + struct bpf_link_info info; + int err; + u32 len = sizeof(info); + + memset(&info, 0, sizeof(info)); + err = bpf_link_get_info_by_fd(fd, &info, &len); + if (!ASSERT_OK(err, "get_link_info")) + return -1; + + if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_RAW_TRACEPOINT, "link_type")) + return -1; + + ASSERT_EQ(info.raw_tracepoint.cookie, cookie, "raw_tp_cookie"); + + return 0; +} + static void raw_tp_subtest(struct test_bpf_cookie *skel) { __u64 cookie; - int prog_fd, link_fd = -1; + int err, prog_fd, link_fd = -1; struct bpf_link *link = NULL; LIBBPF_OPTS(bpf_raw_tp_opts, raw_tp_opts); LIBBPF_OPTS(bpf_raw_tracepoint_opts, opts); @@ -656,6 +675,11 @@ static void raw_tp_subtest(struct test_bpf_cookie *skel) goto cleanup; usleep(1); /* trigger */ + + err = verify_raw_tp_link_info(link_fd, cookie); + if (!ASSERT_OK(err, "verify_raw_tp_link_info")) + goto cleanup; + close(link_fd); /* detach */ link_fd = -1; -- cgit v1.2.3 From a570f386f3d19aa64fb3764504ac80cddb42e579 Mon Sep 17 00:00:00 2001 From: Blake Jones Date: Tue, 3 Jun 2025 13:37:01 -0700 Subject: Tests for the ".emit_strings" functionality in the BTF dumper. When this mode is turned on, "emit_zeroes" and "compact" have no effect, and embedded NUL characters always terminate printing of an array. Signed-off-by: Blake Jones Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250603203701.520541-2-blakejones@google.com --- tools/testing/selftests/bpf/prog_tests/btf_dump.c | 118 ++++++++++++++++++++++ 1 file changed, 118 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index c0a776feec23..82903585c870 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -879,6 +879,122 @@ static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d, "static int bpf_cgrp_storage_busy = (int)2", 2); } +struct btf_dump_string_ctx { + struct btf *btf; + struct btf_dump *d; + char *str; + struct btf_dump_type_data_opts *opts; + int array_id; +}; + +static int btf_dump_one_string(struct btf_dump_string_ctx *ctx, + char *ptr, size_t ptr_sz, + const char *expected_val) +{ + size_t type_sz; + int ret; + + ctx->str[0] = '\0'; + type_sz = btf__resolve_size(ctx->btf, ctx->array_id); + ret = btf_dump__dump_type_data(ctx->d, ctx->array_id, ptr, ptr_sz, ctx->opts); + if (type_sz <= ptr_sz) { + if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz")) + return -EINVAL; + } + if (!ASSERT_STREQ(ctx->str, expected_val, "ensure expected/actual match")) + return -EFAULT; + return 0; +} + +static void btf_dump_strings(struct btf_dump_string_ctx *ctx) +{ + struct btf_dump_type_data_opts *opts = ctx->opts; + + opts->emit_strings = true; + + opts->compact = true; + opts->emit_zeroes = false; + + opts->skip_names = false; + btf_dump_one_string(ctx, "foo", 4, "(char[4])\"foo\""); + + opts->skip_names = true; + btf_dump_one_string(ctx, "foo", 4, "\"foo\""); + + /* This should have no effect. */ + opts->emit_zeroes = false; + btf_dump_one_string(ctx, "foo", 4, "\"foo\""); + + /* This should have no effect. */ + opts->compact = false; + btf_dump_one_string(ctx, "foo", 4, "\"foo\""); + + /* Non-printable characters come out as hex. */ + btf_dump_one_string(ctx, "fo\xff", 4, "\"fo\\xff\""); + btf_dump_one_string(ctx, "fo\x7", 4, "\"fo\\x07\""); + + /* + * Strings that are too long for the specified type ("char[4]") + * should fall back to the current behavior. + */ + opts->compact = true; + btf_dump_one_string(ctx, "abcde", 6, "['a','b','c','d',]"); + + /* + * Strings that are too short for the specified type ("char[4]") + * should work normally. + */ + btf_dump_one_string(ctx, "ab", 3, "\"ab\""); + + /* Non-NUL-terminated arrays don't get printed as strings. */ + char food[4] = { 'f', 'o', 'o', 'd' }; + char bye[3] = { 'b', 'y', 'e' }; + + btf_dump_one_string(ctx, food, 4, "['f','o','o','d',]"); + btf_dump_one_string(ctx, bye, 3, "['b','y','e',]"); + + /* The embedded NUL should terminate the string. */ + char embed[4] = { 'f', 'o', '\0', 'd' }; + + btf_dump_one_string(ctx, embed, 4, "\"fo\""); +} + +static void test_btf_dump_string_data(void) +{ + struct test_ctx t = {}; + char str[STRSIZE]; + struct btf_dump *d; + DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts); + struct btf_dump_string_ctx ctx; + int char_id, int_id, array_id; + + if (test_ctx__init(&t)) + return; + + d = btf_dump__new(t.btf, btf_dump_snprintf, str, NULL); + if (!ASSERT_OK_PTR(d, "could not create BTF dump")) + return; + + /* Generate BTF for a four-element char array. */ + char_id = btf__add_int(t.btf, "char", 1, BTF_INT_CHAR); + ASSERT_EQ(char_id, 1, "char_id"); + int_id = btf__add_int(t.btf, "int", 4, BTF_INT_SIGNED); + ASSERT_EQ(int_id, 2, "int_id"); + array_id = btf__add_array(t.btf, int_id, char_id, 4); + ASSERT_EQ(array_id, 3, "array_id"); + + ctx.btf = t.btf; + ctx.d = d; + ctx.str = str; + ctx.opts = &opts; + ctx.array_id = array_id; + + btf_dump_strings(&ctx); + + btf_dump__free(d); + test_ctx__free(&t); +} + static void test_btf_datasec(struct btf *btf, struct btf_dump *d, char *str, const char *name, const char *expected_val, void *data, size_t data_sz) @@ -970,6 +1086,8 @@ void test_btf_dump() { test_btf_dump_struct_data(btf, d, str); if (test__start_subtest("btf_dump: var_data")) test_btf_dump_var_data(btf, d, str); + if (test__start_subtest("btf_dump: string_data")) + test_btf_dump_string_data(); btf_dump__free(d); btf__free(btf); -- cgit v1.2.3 From 64a064ce33b193a681f1eb52ac1ad09aadda9944 Mon Sep 17 00:00:00 2001 From: Rong Tao Date: Thu, 5 Jun 2025 16:45:14 +0800 Subject: selftests/bpf: rbtree: Fix incorrect global variable usage Within __add_three() function, should use function parameters instead of global variables. So that the variables groot_nested.inner.root and groot_nested.inner.glock in rbtree_add_nodes_nested() are tested correctly. Signed-off-by: Rong Tao Link: https://lore.kernel.org/r/tencent_3DD7405C0839EBE2724AC5FA357B5402B105@qq.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/rbtree.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/rbtree.c b/tools/testing/selftests/bpf/progs/rbtree.c index a3620c15c136..49fe93d7e059 100644 --- a/tools/testing/selftests/bpf/progs/rbtree.c +++ b/tools/testing/selftests/bpf/progs/rbtree.c @@ -61,19 +61,19 @@ static long __add_three(struct bpf_rb_root *root, struct bpf_spin_lock *lock) } m->key = 1; - bpf_spin_lock(&glock); - bpf_rbtree_add(&groot, &n->node, less); - bpf_rbtree_add(&groot, &m->node, less); - bpf_spin_unlock(&glock); + bpf_spin_lock(lock); + bpf_rbtree_add(root, &n->node, less); + bpf_rbtree_add(root, &m->node, less); + bpf_spin_unlock(lock); n = bpf_obj_new(typeof(*n)); if (!n) return 3; n->key = 3; - bpf_spin_lock(&glock); - bpf_rbtree_add(&groot, &n->node, less); - bpf_spin_unlock(&glock); + bpf_spin_lock(lock); + bpf_rbtree_add(root, &n->node, less); + bpf_spin_unlock(lock); return 0; } -- cgit v1.2.3 From ae8824037a0ac550745b223aa3680819ed65e8db Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 6 Jun 2025 18:36:10 -0700 Subject: selftests/bpf: Reduce test_xdp_adjust_frags_tail_grow logs For selftest xdp_adjust_tail/xdp_adjust_frags_tail_grow, if tested failure, I see a long list of log output like ... test_xdp_adjust_frags_tail_grow:PASS:9Kb+10b-untouched 0 nsec test_xdp_adjust_frags_tail_grow:PASS:9Kb+10b-untouched 0 nsec test_xdp_adjust_frags_tail_grow:PASS:9Kb+10b-untouched 0 nsec test_xdp_adjust_frags_tail_grow:PASS:9Kb+10b-untouched 0 nsec ... There are total 7374 lines of the above which is too much. Let us only issue such logs when it is an assert failure. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20250607013610.1551399-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/xdp_adjust_tail.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c index b2b2d85dbb1b..e361129402a1 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c @@ -246,14 +246,20 @@ static void test_xdp_adjust_frags_tail_grow(void) ASSERT_EQ(topts.retval, XDP_TX, "9Kb+10b retval"); ASSERT_EQ(topts.data_size_out, exp_size, "9Kb+10b size"); - for (i = 0; i < 9000; i++) - ASSERT_EQ(buf[i], 1, "9Kb+10b-old"); + for (i = 0; i < 9000; i++) { + if (buf[i] != 1) + ASSERT_EQ(buf[i], 1, "9Kb+10b-old"); + } - for (i = 9000; i < 9010; i++) - ASSERT_EQ(buf[i], 0, "9Kb+10b-new"); + for (i = 9000; i < 9010; i++) { + if (buf[i] != 0) + ASSERT_EQ(buf[i], 0, "9Kb+10b-new"); + } - for (i = 9010; i < 16384; i++) - ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched"); + for (i = 9010; i < 16384; i++) { + if (buf[i] != 1) + ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched"); + } /* Test a too large grow */ memset(buf, 1, 16384); -- cgit v1.2.3 From 377d3715900c1f465a5ea8a6a11bcbdc42b18668 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 6 Jun 2025 18:36:15 -0700 Subject: selftests/bpf: Fix bpf_mod_race test failure with arm64 64KB page size Currently, uffd_register.range.len is set to 4096 for command 'ioctl(uffd, UFFDIO_REGISTER, &uffd_register)'. For arm64 64KB page size, the len must be 64KB size aligned as page size alignment is required. See fs/userfaultfd.c:validate_unaligned_range(). Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20250607013615.1551783-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c index fe2c502e5089..ecc3d47919ad 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c @@ -78,7 +78,7 @@ static int test_setup_uffd(void *fault_addr) } uffd_register.range.start = (unsigned long)fault_addr; - uffd_register.range.len = 4096; + uffd_register.range.len = getpagesize(); uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING; if (ioctl(uffd, UFFDIO_REGISTER, &uffd_register)) { close(uffd); -- cgit v1.2.3 From 8c8c5e3c854a2593ec90dacd868f3066b67de1c4 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 6 Jun 2025 18:36:21 -0700 Subject: selftests/bpf: Fix ringbuf/ringbuf_write test failure with arm64 64KB page size The ringbuf max_entries must be PAGE_ALIGNED. See kernel function ringbuf_map_alloc(). So for arm64 64KB page size, adjust max_entries and other related metrics properly. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20250607013621.1552332-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/ringbuf.c | 4 ++-- tools/testing/selftests/bpf/progs/test_ringbuf_write.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index da430df45aa4..d1e4cb28a72c 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -97,7 +97,7 @@ static void ringbuf_write_subtest(void) if (!ASSERT_OK_PTR(skel, "skel_open")) return; - skel->maps.ringbuf.max_entries = 0x4000; + skel->maps.ringbuf.max_entries = 0x40000; err = test_ringbuf_write_lskel__load(skel); if (!ASSERT_OK(err, "skel_load")) @@ -108,7 +108,7 @@ static void ringbuf_write_subtest(void) mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0); if (!ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos")) goto cleanup; - *mmap_ptr = 0x3000; + *mmap_ptr = 0x30000; ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw"); skel->bss->pid = getpid(); diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_write.c b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c index 350513c0e4c9..f063a0013f85 100644 --- a/tools/testing/selftests/bpf/progs/test_ringbuf_write.c +++ b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c @@ -26,11 +26,11 @@ int test_ringbuf_write(void *ctx) if (cur_pid != pid) return 0; - sample1 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0); + sample1 = bpf_ringbuf_reserve(&ringbuf, 0x30000, 0); if (!sample1) return 0; /* first one can pass */ - sample2 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0); + sample2 = bpf_ringbuf_reserve(&ringbuf, 0x30000, 0); if (!sample2) { bpf_ringbuf_discard(sample1, 0); __sync_fetch_and_add(&discarded, 1); -- cgit v1.2.3 From bbc7bd658ddc662083639b9e9a280b90225ecd9a Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 6 Jun 2025 18:36:26 -0700 Subject: selftests/bpf: Fix a user_ringbuf failure with arm64 64KB page size The ringbuf max_entries must be PAGE_ALIGNED. See kernel function ringbuf_map_alloc(). So for arm64 64KB page size, adjust max_entries properly. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20250607013626.1553001-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/user_ringbuf.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c index d424e7ecbd12..9fd3ae987321 100644 --- a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c @@ -21,8 +21,7 @@ #include "../progs/test_user_ringbuf.h" static const long c_sample_size = sizeof(struct sample) + BPF_RINGBUF_HDR_SZ; -static const long c_ringbuf_size = 1 << 12; /* 1 small page */ -static const long c_max_entries = c_ringbuf_size / c_sample_size; +static long c_ringbuf_size, c_max_entries; static void drain_current_samples(void) { @@ -424,7 +423,9 @@ static void test_user_ringbuf_loop(void) uint32_t remaining_samples = total_samples; int err; - BUILD_BUG_ON(total_samples <= c_max_entries); + if (!ASSERT_LT(c_max_entries, total_samples, "compare_c_max_entries")) + return; + err = load_skel_create_user_ringbuf(&skel, &ringbuf); if (err) return; @@ -686,6 +687,9 @@ void test_user_ringbuf(void) { int i; + c_ringbuf_size = getpagesize(); /* 1 page */ + c_max_entries = c_ringbuf_size / c_sample_size; + for (i = 0; i < ARRAY_SIZE(success_tests); i++) { if (!test__start_subtest(success_tests[i].test_name)) continue; -- cgit v1.2.3 From f287822688eeb44ae1cf6ac45701d965efc33218 Mon Sep 17 00:00:00 2001 From: "Xin Li (Intel)" Date: Mon, 9 Jun 2025 01:40:54 -0700 Subject: selftests/x86: Add a test to detect infinite SIGTRAP handler loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When FRED is enabled, if the Trap Flag (TF) is set without an external debugger attached, it can lead to an infinite loop in the SIGTRAP handler. To avoid this, the software event flag in the augmented SS must be cleared, ensuring that no single-step trap remains pending when ERETU completes. This test checks for that specific scenario—verifying whether the kernel correctly prevents an infinite SIGTRAP loop in this edge case when FRED is enabled. The test should _always_ pass with IDT event delivery, thus no need to disable the test even when FRED is not enabled. Signed-off-by: Xin Li (Intel) Signed-off-by: Dave Hansen Tested-by: Sohil Mehta Cc:stable@vger.kernel.org Link: https://lore.kernel.org/all/20250609084054.2083189-3-xin%40zytor.com --- tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/sigtrap_loop.c | 101 +++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/x86/sigtrap_loop.c (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index f703fcfe9f7c..83148875a12c 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -12,7 +12,7 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh "$(CC)" trivial_program.c -no-pie) TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ check_initial_reg_state sigreturn iopl ioperm \ - test_vsyscall mov_ss_trap \ + test_vsyscall mov_ss_trap sigtrap_loop \ syscall_arg_fault fsgsbase_restore sigaltstack TARGETS_C_BOTHBITS += nx_stack TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ diff --git a/tools/testing/selftests/x86/sigtrap_loop.c b/tools/testing/selftests/x86/sigtrap_loop.c new file mode 100644 index 000000000000..9d065479e89f --- /dev/null +++ b/tools/testing/selftests/x86/sigtrap_loop.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025 Intel Corporation + */ +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include + +#ifdef __x86_64__ +# define REG_IP REG_RIP +#else +# define REG_IP REG_EIP +#endif + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), int flags) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); + + return; +} + +static void sigtrap(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t *)ctx_void; + static unsigned int loop_count_on_same_ip; + static unsigned long last_trap_ip; + + if (last_trap_ip == ctx->uc_mcontext.gregs[REG_IP]) { + printf("\tTrapped at %016lx\n", last_trap_ip); + + /* + * If the same IP is hit more than 10 times in a row, it is + * _considered_ an infinite loop. + */ + if (++loop_count_on_same_ip > 10) { + printf("[FAIL]\tDetected SIGTRAP infinite loop\n"); + exit(1); + } + + return; + } + + loop_count_on_same_ip = 0; + last_trap_ip = ctx->uc_mcontext.gregs[REG_IP]; + printf("\tTrapped at %016lx\n", last_trap_ip); +} + +int main(int argc, char *argv[]) +{ + sethandler(SIGTRAP, sigtrap, 0); + + /* + * Set the Trap Flag (TF) to single-step the test code, therefore to + * trigger a SIGTRAP signal after each instruction until the TF is + * cleared. + * + * Because the arithmetic flags are not significant here, the TF is + * set by pushing 0x302 onto the stack and then popping it into the + * flags register. + * + * Four instructions in the following asm code are executed with the + * TF set, thus the SIGTRAP handler is expected to run four times. + */ + printf("[RUN]\tSIGTRAP infinite loop detection\n"); + asm volatile( +#ifdef __x86_64__ + /* + * Avoid clobbering the redzone + * + * Equivalent to "sub $128, %rsp", however -128 can be encoded + * in a single byte immediate while 128 uses 4 bytes. + */ + "add $-128, %rsp\n\t" +#endif + "push $0x302\n\t" + "popf\n\t" + "nop\n\t" + "nop\n\t" + "push $0x202\n\t" + "popf\n\t" +#ifdef __x86_64__ + "sub $-128, %rsp\n\t" +#endif + ); + + printf("[OK]\tNo SIGTRAP infinite loop detected\n"); + return 0; +} -- cgit v1.2.3 From c1bb68656bc18fd59bbd630fd1b5a86875b464b3 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 6 Jun 2025 09:31:51 -0700 Subject: selftests/bpf: Move some tc_helpers.h functions to test_progs.h Move static inline functions id_from_prog_fd() and id_from_link_fd() from prog_tests/tc_helpers.h to test_progs.h so these two functions can be reused for later cgroup mprog selftests. Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250606163151.2429325-1-yonghong.song@linux.dev --- .../testing/selftests/bpf/prog_tests/tc_helpers.h | 28 ---------------------- tools/testing/selftests/bpf/test_progs.h | 28 ++++++++++++++++++++++ 2 files changed, 28 insertions(+), 28 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h index 924d0e25320c..d52a62af77bf 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h @@ -8,34 +8,6 @@ # define loopback 1 #endif -static inline __u32 id_from_prog_fd(int fd) -{ - struct bpf_prog_info prog_info = {}; - __u32 prog_info_len = sizeof(prog_info); - int err; - - err = bpf_obj_get_info_by_fd(fd, &prog_info, &prog_info_len); - if (!ASSERT_OK(err, "id_from_prog_fd")) - return 0; - - ASSERT_NEQ(prog_info.id, 0, "prog_info.id"); - return prog_info.id; -} - -static inline __u32 id_from_link_fd(int fd) -{ - struct bpf_link_info link_info = {}; - __u32 link_info_len = sizeof(link_info); - int err; - - err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len); - if (!ASSERT_OK(err, "id_from_link_fd")) - return 0; - - ASSERT_NEQ(link_info.id, 0, "link_info.id"); - return link_info.id; -} - static inline __u32 ifindex_from_link_fd(int fd) { struct bpf_link_info link_info = {}; diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 870694f2a359..df2222a1806f 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -460,6 +460,34 @@ static inline void *u64_to_ptr(__u64 ptr) return (void *) (unsigned long) ptr; } +static inline __u32 id_from_prog_fd(int fd) +{ + struct bpf_prog_info prog_info = {}; + __u32 prog_info_len = sizeof(prog_info); + int err; + + err = bpf_obj_get_info_by_fd(fd, &prog_info, &prog_info_len); + if (!ASSERT_OK(err, "id_from_prog_fd")) + return 0; + + ASSERT_NEQ(prog_info.id, 0, "prog_info.id"); + return prog_info.id; +} + +static inline __u32 id_from_link_fd(int fd) +{ + struct bpf_link_info link_info = {}; + __u32 link_info_len = sizeof(link_info); + int err; + + err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len); + if (!ASSERT_OK(err, "id_from_link_fd")) + return 0; + + ASSERT_NEQ(link_info.id, 0, "link_info.id"); + return link_info.id; +} + int bpf_find_map(const char *test, struct bpf_object *obj, const char *name); int compare_map_keys(int map1_fd, int map2_fd); int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len); -- cgit v1.2.3 From e422d5f118e4da5d15b57c5721ee68ae39b512ec Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 6 Jun 2025 09:31:56 -0700 Subject: selftests/bpf: Add two selftests for mprog API based cgroup progs Two tests are added: - cgroup_mprog_opts, which mimics tc_opts.c ([1]). Both prog and link attach are tested. Some negative tests are also included. - cgroup_mprog_ordering, which actually runs the program with some mprog API flags. [1] https://github.com/torvalds/linux/blob/master/tools/testing/selftests/bpf/prog_tests/tc_opts.c Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250606163156.2429955-1-yonghong.song@linux.dev --- .../selftests/bpf/prog_tests/cgroup_mprog_opts.c | 617 +++++++++++++++++++++ .../bpf/prog_tests/cgroup_mprog_ordering.c | 77 +++ tools/testing/selftests/bpf/progs/cgroup_mprog.c | 30 + 3 files changed, 724 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c create mode 100644 tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c create mode 100644 tools/testing/selftests/bpf/progs/cgroup_mprog.c (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c new file mode 100644 index 000000000000..bb60704a3ef9 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c @@ -0,0 +1,617 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include +#include "cgroup_helpers.h" +#include "cgroup_mprog.skel.h" + +static void assert_mprog_count(int cg, int atype, int expected) +{ + __u32 count = 0, attach_flags = 0; + int err; + + err = bpf_prog_query(cg, atype, 0, &attach_flags, + NULL, &count); + ASSERT_EQ(count, expected, "count"); + ASSERT_EQ(err, 0, "prog_query"); +} + +static void test_prog_attach_detach(int atype) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4; + struct cgroup_mprog *skel; + __u32 prog_ids[10]; + int cg, err; + + cg = test__join_cgroup("/prog_attach_detach"); + if (!ASSERT_GE(cg, 0, "join_cgroup /prog_attach_detach")) + return; + + skel = cgroup_mprog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.getsockopt_1); + fd2 = bpf_program__fd(skel->progs.getsockopt_2); + fd3 = bpf_program__fd(skel->progs.getsockopt_3); + fd4 = bpf_program__fd(skel->progs.getsockopt_4); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + id3 = id_from_prog_fd(fd3); + id4 = id_from_prog_fd(fd4); + + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER, + .expected_revision = 1, + ); + + /* ordering: [fd1] */ + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE, + .expected_revision = 2, + ); + + /* ordering: [fd2, fd1] */ + err = bpf_prog_attach_opts(fd2, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup1; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER, + .relative_fd = fd2, + .expected_revision = 3, + ); + + /* ordering: [fd2, fd3, fd1] */ + err = bpf_prog_attach_opts(fd3, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 3); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI, + .expected_revision = 4, + ); + + /* ordering: [fd2, fd3, fd1, fd4] */ + err = bpf_prog_attach_opts(fd4, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup3; + + assert_mprog_count(cg, atype, 4); + + /* retrieve optq.prog_cnt */ + err = bpf_prog_query_opts(cg, atype, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + /* optq.prog_cnt will be used in below query */ + memset(prog_ids, 0, sizeof(prog_ids)); + optq.prog_ids = prog_ids; + err = bpf_prog_query_opts(cg, atype, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], id1, "prog_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]"); + ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + ASSERT_EQ(optq.link_ids, NULL, "link_ids"); + +cleanup4: + optd.expected_revision = 5; + err = bpf_prog_detach_opts(fd4, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 3); + +cleanup3: + LIBBPF_OPTS_RESET(optd); + err = bpf_prog_detach_opts(fd3, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 2); + + /* Check revision after two detach operations */ + err = bpf_prog_query_opts(cg, atype, &optq); + ASSERT_OK(err, "prog_query"); + ASSERT_EQ(optq.revision, 7, "revision"); + +cleanup2: + err = bpf_prog_detach_opts(fd2, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 1); + +cleanup1: + err = bpf_prog_detach_opts(fd1, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 0); + +cleanup: + cgroup_mprog__destroy(skel); + close(cg); +} + +static void test_link_attach_detach(int atype) +{ + LIBBPF_OPTS(bpf_cgroup_opts, opta); + LIBBPF_OPTS(bpf_cgroup_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + struct bpf_link *link1, *link2, *link3, *link4; + __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4; + struct cgroup_mprog *skel; + __u32 prog_ids[10]; + int cg, err; + + cg = test__join_cgroup("/link_attach_detach"); + if (!ASSERT_GE(cg, 0, "join_cgroup /link_attach_detach")) + return; + + skel = cgroup_mprog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.getsockopt_1); + fd2 = bpf_program__fd(skel->progs.getsockopt_2); + fd3 = bpf_program__fd(skel->progs.getsockopt_3); + fd4 = bpf_program__fd(skel->progs.getsockopt_4); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + id3 = id_from_prog_fd(fd3); + id4 = id_from_prog_fd(fd4); + + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 1, + ); + + /* ordering: [fd1] */ + link1 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_1, cg, &opta); + if (!ASSERT_OK_PTR(link1, "link_attach")) + goto cleanup; + + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_BEFORE | BPF_F_LINK, + .relative_id = id_from_link_fd(bpf_link__fd(link1)), + .expected_revision = 2, + ); + + /* ordering: [fd2, fd1] */ + link2 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_2, cg, &opta); + if (!ASSERT_OK_PTR(link2, "link_attach")) + goto cleanup1; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_AFTER | BPF_F_LINK, + .relative_fd = bpf_link__fd(link2), + .expected_revision = 3, + ); + + /* ordering: [fd2, fd3, fd1] */ + link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta); + if (!ASSERT_OK_PTR(link3, "link_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 3); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 4, + ); + + /* ordering: [fd2, fd3, fd1, fd4] */ + link4 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_4, cg, &opta); + if (!ASSERT_OK_PTR(link4, "link_attach")) + goto cleanup3; + + assert_mprog_count(cg, atype, 4); + + /* retrieve optq.prog_cnt */ + err = bpf_prog_query_opts(cg, atype, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + /* optq.prog_cnt will be used in below query */ + memset(prog_ids, 0, sizeof(prog_ids)); + optq.prog_ids = prog_ids; + err = bpf_prog_query_opts(cg, atype, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], id1, "prog_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]"); + ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + ASSERT_EQ(optq.link_ids, NULL, "link_ids"); + +cleanup4: + bpf_link__destroy(link4); + assert_mprog_count(cg, atype, 3); + +cleanup3: + bpf_link__destroy(link3); + assert_mprog_count(cg, atype, 2); + + /* Check revision after two detach operations */ + err = bpf_prog_query_opts(cg, atype, &optq); + ASSERT_OK(err, "prog_query"); + ASSERT_EQ(optq.revision, 7, "revision"); + +cleanup2: + bpf_link__destroy(link2); + assert_mprog_count(cg, atype, 1); + +cleanup1: + bpf_link__destroy(link1); + assert_mprog_count(cg, atype, 0); + +cleanup: + cgroup_mprog__destroy(skel); + close(cg); +} + +static void test_preorder_prog_attach_detach(int atype) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + __u32 fd1, fd2, fd3, fd4; + struct cgroup_mprog *skel; + int cg, err; + + cg = test__join_cgroup("/preorder_prog_attach_detach"); + if (!ASSERT_GE(cg, 0, "join_cgroup /preorder_prog_attach_detach")) + return; + + skel = cgroup_mprog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.getsockopt_1); + fd2 = bpf_program__fd(skel->progs.getsockopt_2); + fd3 = bpf_program__fd(skel->progs.getsockopt_3); + fd4 = bpf_program__fd(skel->progs.getsockopt_4); + + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI, + .expected_revision = 1, + ); + + /* ordering: [fd1] */ + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_PREORDER, + .expected_revision = 2, + ); + + /* ordering: [fd1, fd2] */ + err = bpf_prog_attach_opts(fd2, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup1; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER, + .relative_fd = fd2, + .expected_revision = 3, + ); + + err = bpf_prog_attach_opts(fd3, cg, atype, &opta); + if (!ASSERT_EQ(err, -EINVAL, "prog_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER | BPF_F_PREORDER, + .relative_fd = fd2, + .expected_revision = 3, + ); + + /* ordering: [fd1, fd2, fd3] */ + err = bpf_prog_attach_opts(fd3, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 3); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI, + .expected_revision = 4, + ); + + /* ordering: [fd2, fd3, fd1, fd4] */ + err = bpf_prog_attach_opts(fd4, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup3; + + assert_mprog_count(cg, atype, 4); + + err = bpf_prog_detach_opts(fd4, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 3); + +cleanup3: + err = bpf_prog_detach_opts(fd3, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 2); + +cleanup2: + err = bpf_prog_detach_opts(fd2, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 1); + +cleanup1: + err = bpf_prog_detach_opts(fd1, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 0); + +cleanup: + cgroup_mprog__destroy(skel); + close(cg); +} + +static void test_preorder_link_attach_detach(int atype) +{ + LIBBPF_OPTS(bpf_cgroup_opts, opta); + struct bpf_link *link1, *link2, *link3, *link4; + struct cgroup_mprog *skel; + __u32 fd2; + int cg; + + cg = test__join_cgroup("/preorder_link_attach_detach"); + if (!ASSERT_GE(cg, 0, "join_cgroup /preorder_link_attach_detach")) + return; + + skel = cgroup_mprog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd2 = bpf_program__fd(skel->progs.getsockopt_2); + + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 1, + ); + + /* ordering: [fd1] */ + link1 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_1, cg, &opta); + if (!ASSERT_OK_PTR(link1, "link_attach")) + goto cleanup; + + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_PREORDER, + .expected_revision = 2, + ); + + /* ordering: [fd1, fd2] */ + link2 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_2, cg, &opta); + if (!ASSERT_OK_PTR(link2, "link_attach")) + goto cleanup1; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_AFTER, + .relative_fd = fd2, + .expected_revision = 3, + ); + + link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta); + if (!ASSERT_ERR_PTR(link3, "link_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_AFTER | BPF_F_PREORDER | BPF_F_LINK, + .relative_fd = bpf_link__fd(link2), + .expected_revision = 3, + ); + + /* ordering: [fd1, fd2, fd3] */ + link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta); + if (!ASSERT_OK_PTR(link3, "link_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 3); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 4, + ); + + /* ordering: [fd2, fd3, fd1, fd4] */ + link4 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_4, cg, &opta); + if (!ASSERT_OK_PTR(link4, "prog_attach")) + goto cleanup3; + + assert_mprog_count(cg, atype, 4); + + bpf_link__destroy(link4); + assert_mprog_count(cg, atype, 3); + +cleanup3: + bpf_link__destroy(link3); + assert_mprog_count(cg, atype, 2); + +cleanup2: + bpf_link__destroy(link2); + assert_mprog_count(cg, atype, 1); + +cleanup1: + bpf_link__destroy(link1); + assert_mprog_count(cg, atype, 0); + +cleanup: + cgroup_mprog__destroy(skel); + close(cg); +} + +static void test_invalid_attach_detach(int atype) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + __u32 fd1, fd2, id2; + struct cgroup_mprog *skel; + int cg, err; + + cg = test__join_cgroup("/invalid_attach_detach"); + if (!ASSERT_GE(cg, 0, "join_cgroup /invalid_attach_detach")) + return; + + skel = cgroup_mprog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.getsockopt_1); + fd2 = bpf_program__fd(skel->progs.getsockopt_2); + + id2 = id_from_prog_fd(fd2); + + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER, + .relative_id = id2, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_ID, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -ENOENT, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER | BPF_F_ID, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -ENOENT, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER, + .relative_id = id2, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_LINK, + .relative_id = id2, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI, + .relative_id = id2, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE, + .relative_fd = fd1, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -ENOENT, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER, + .relative_fd = fd1, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -ENOENT, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE | BPF_F_AFTER, + .replace_prog_fd = fd1, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 1); +cleanup: + cgroup_mprog__destroy(skel); + close(cg); +} + +void test_cgroup_mprog_opts(void) +{ + if (test__start_subtest("prog_attach_detach")) + test_prog_attach_detach(BPF_CGROUP_GETSOCKOPT); + if (test__start_subtest("link_attach_detach")) + test_link_attach_detach(BPF_CGROUP_GETSOCKOPT); + if (test__start_subtest("preorder_prog_attach_detach")) + test_preorder_prog_attach_detach(BPF_CGROUP_GETSOCKOPT); + if (test__start_subtest("preorder_link_attach_detach")) + test_preorder_link_attach_detach(BPF_CGROUP_GETSOCKOPT); + if (test__start_subtest("invalid_attach_detach")) + test_invalid_attach_detach(BPF_CGROUP_GETSOCKOPT); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c new file mode 100644 index 000000000000..4a4e9710b474 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include +#include "cgroup_helpers.h" +#include "cgroup_preorder.skel.h" + +static int run_getsockopt_test(int cg_parent, int sock_fd, bool has_relative_fd) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opts); + enum bpf_attach_type prog_p_atype, prog_p2_atype; + int prog_p_fd, prog_p2_fd; + struct cgroup_preorder *skel = NULL; + struct bpf_program *prog; + __u8 *result, buf; + socklen_t optlen; + int err = 0; + + skel = cgroup_preorder__open_and_load(); + if (!ASSERT_OK_PTR(skel, "cgroup_preorder__open_and_load")) + return 0; + + LIBBPF_OPTS_RESET(opts); + opts.flags = BPF_F_ALLOW_MULTI; + prog = skel->progs.parent; + prog_p_fd = bpf_program__fd(prog); + prog_p_atype = bpf_program__expected_attach_type(prog); + err = bpf_prog_attach_opts(prog_p_fd, cg_parent, prog_p_atype, &opts); + if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent")) + goto close_skel; + + opts.flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE; + if (has_relative_fd) + opts.relative_fd = prog_p_fd; + prog = skel->progs.parent_2; + prog_p2_fd = bpf_program__fd(prog); + prog_p2_atype = bpf_program__expected_attach_type(prog); + err = bpf_prog_attach_opts(prog_p2_fd, cg_parent, prog_p2_atype, &opts); + if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent_2")) + goto detach_parent; + + err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen); + if (!ASSERT_OK(err, "getsockopt")) + goto detach_parent_2; + + result = skel->bss->result; + ASSERT_TRUE(result[0] == 4 && result[1] == 3, "result values"); + +detach_parent_2: + ASSERT_OK(bpf_prog_detach2(prog_p2_fd, cg_parent, prog_p2_atype), + "bpf_prog_detach2-parent_2"); +detach_parent: + ASSERT_OK(bpf_prog_detach2(prog_p_fd, cg_parent, prog_p_atype), + "bpf_prog_detach2-parent"); +close_skel: + cgroup_preorder__destroy(skel); + return err; +} + +void test_cgroup_mprog_ordering(void) +{ + int cg_parent = -1, sock_fd = -1; + + cg_parent = test__join_cgroup("/parent"); + if (!ASSERT_GE(cg_parent, 0, "join_cgroup /parent")) + goto out; + + sock_fd = socket(AF_INET, SOCK_STREAM, 0); + if (!ASSERT_GE(sock_fd, 0, "socket")) + goto out; + + ASSERT_OK(run_getsockopt_test(cg_parent, sock_fd, false), "getsockopt_test_1"); + ASSERT_OK(run_getsockopt_test(cg_parent, sock_fd, true), "getsockopt_test_2"); + +out: + close(sock_fd); + close(cg_parent); +} diff --git a/tools/testing/selftests/bpf/progs/cgroup_mprog.c b/tools/testing/selftests/bpf/progs/cgroup_mprog.c new file mode 100644 index 000000000000..6a0ea02c4de2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgroup_mprog.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include +#include + +char _license[] SEC("license") = "GPL"; + +SEC("cgroup/getsockopt") +int getsockopt_1(struct bpf_sockopt *ctx) +{ + return 1; +} + +SEC("cgroup/getsockopt") +int getsockopt_2(struct bpf_sockopt *ctx) +{ + return 1; +} + +SEC("cgroup/getsockopt") +int getsockopt_3(struct bpf_sockopt *ctx) +{ + return 1; +} + +SEC("cgroup/getsockopt") +int getsockopt_4(struct bpf_sockopt *ctx) +{ + return 1; +} -- cgit v1.2.3 From 5534e58f2e9bd72b253d033ee0af6e68eb8ac96b Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Mon, 9 Jun 2025 11:30:22 -0700 Subject: bpf: Make reg_not_null() true for CONST_PTR_TO_MAP When reg->type is CONST_PTR_TO_MAP, it can not be null. However the verifier explores the branches under rX == 0 in check_cond_jmp_op() even if reg->type is CONST_PTR_TO_MAP, because it was not checked for in reg_not_null(). Fix this by adding CONST_PTR_TO_MAP to the set of types that are considered non nullable in reg_not_null(). An old "unpriv: cmp map pointer with zero" selftest fails with this change, because now early out correctly triggers in check_cond_jmp_op(), making the verification to pass. In practice verifier may allow pointer to null comparison in unpriv, since in many cases the relevant branch and comparison op are removed as dead code. So change the expected test result to __success_unpriv. Signed-off-by: Ihor Solodrai Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250609183024.359974-2-isolodrai@meta.com --- kernel/bpf/verifier.c | 3 ++- tools/testing/selftests/bpf/progs/verifier_unpriv.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e31f6b0ccb30..48a3241cda0f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -405,7 +405,8 @@ static bool reg_not_null(const struct bpf_reg_state *reg) type == PTR_TO_MAP_KEY || type == PTR_TO_SOCK_COMMON || (type == PTR_TO_BTF_ID && is_trusted_reg(reg)) || - type == PTR_TO_MEM; + type == PTR_TO_MEM || + type == CONST_PTR_TO_MAP; } static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg) diff --git a/tools/testing/selftests/bpf/progs/verifier_unpriv.c b/tools/testing/selftests/bpf/progs/verifier_unpriv.c index a4a5e2071604..28200f068ce5 100644 --- a/tools/testing/selftests/bpf/progs/verifier_unpriv.c +++ b/tools/testing/selftests/bpf/progs/verifier_unpriv.c @@ -619,7 +619,7 @@ __naked void pass_pointer_to_tail_call(void) SEC("socket") __description("unpriv: cmp map pointer with zero") -__success __failure_unpriv __msg_unpriv("R1 pointer comparison") +__success __success_unpriv __retval(0) __naked void cmp_map_pointer_with_zero(void) { -- cgit v1.2.3 From eb6c99278490a9045662cbc2df9e2a99489df37a Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Mon, 9 Jun 2025 11:30:23 -0700 Subject: selftests/bpf: Add cmp_map_pointer_with_const test Add a test for CONST_PTR_TO_MAP comparison with a non-0 constant. A BPF program with this code must not pass verification in unpriv. Signed-off-by: Ihor Solodrai Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250609183024.359974-3-isolodrai@meta.com --- tools/testing/selftests/bpf/progs/verifier_unpriv.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/verifier_unpriv.c b/tools/testing/selftests/bpf/progs/verifier_unpriv.c index 28200f068ce5..db52ba66e880 100644 --- a/tools/testing/selftests/bpf/progs/verifier_unpriv.c +++ b/tools/testing/selftests/bpf/progs/verifier_unpriv.c @@ -624,7 +624,6 @@ __retval(0) __naked void cmp_map_pointer_with_zero(void) { asm volatile (" \ - r1 = 0; \ r1 = %[map_hash_8b] ll; \ if r1 == 0 goto l0_%=; \ l0_%=: r0 = 0; \ @@ -634,6 +633,22 @@ l0_%=: r0 = 0; \ : __clobber_all); } +SEC("socket") +__description("unpriv: cmp map pointer with const") +__success __failure_unpriv __msg_unpriv("R1 pointer comparison prohibited") +__retval(0) +__naked void cmp_map_pointer_with_const(void) +{ + asm volatile (" \ + r1 = %[map_hash_8b] ll; \ + if r1 == 0x0000beef goto l0_%=; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_addr(map_hash_8b) + : __clobber_all); +} + SEC("socket") __description("unpriv: write into frame pointer") __failure __msg("frame pointer is read only") -- cgit v1.2.3 From 260b8629189611ab5c4894205955b371bed9b75d Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Mon, 9 Jun 2025 11:30:24 -0700 Subject: selftests/bpf: Add test cases with CONST_PTR_TO_MAP null checks A test requires the following to happen: * CONST_PTR_TO_MAP value is checked for null * the code in the null branch fails verification Add test cases: * direct global map_ptr comparison to null * lookup inner map, then two checks (the first transforms map_value_or_null into map_ptr) * lookup inner map, spill-fill it, then check for null * use an array of ringbufs to recreate a common coding pattern [1] [1] https://lore.kernel.org/bpf/CAEf4BzZNU0gX_sQ8k8JaLe1e+Veth3Rk=4x7MDhv=hQxvO8EDw@mail.gmail.com/ Suggested-by: Andrii Nakryiko Signed-off-by: Ihor Solodrai Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250609183024.359974-4-isolodrai@meta.com --- .../selftests/bpf/progs/verifier_map_in_map.c | 118 +++++++++++++++++++++ 1 file changed, 118 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c index 7d088ba99ea5..16b761e510f0 100644 --- a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c +++ b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c @@ -139,4 +139,122 @@ __naked void on_the_inner_map_pointer(void) : __clobber_all); } +SEC("socket") +__description("map_ptr is never null") +__success +__naked void map_ptr_is_never_null(void) +{ + asm volatile (" \ + r0 = 0; \ + r1 = %[map_in_map] ll; \ + if r1 != 0 goto l0_%=; \ + r10 = 42; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_in_map) + : __clobber_all); +} + +SEC("socket") +__description("map_ptr is never null inner") +__success +__naked void map_ptr_is_never_null_inner(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u32*)(r10 - 4) = r1; \ + r2 = r10; \ + r2 += -4; \ + r1 = %[map_in_map] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + if r0 != 0 goto l0_%=; \ + r10 = 42; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_in_map) + : __clobber_all); +} + +SEC("socket") +__description("map_ptr is never null inner spill fill") +__success +__naked void map_ptr_is_never_null_inner_spill_fill(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u32*)(r10 - 4) = r1; \ + r2 = r10; \ + r2 += -4; \ + r1 = %[map_in_map] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 != 0 goto l0_%=; \ + exit; \ +l0_%=: *(u64 *)(r10 -16) = r0; \ + r1 = *(u64 *)(r10 -16); \ + if r1 == 0 goto l1_%=; \ + exit; \ +l1_%=: r10 = 42; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_in_map) + : __clobber_all); +} + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); + __array(values, struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 64 * 1024); + }); +} rb_in_map SEC(".maps"); + +struct rb_ctx { + void *rb; + struct bpf_dynptr dptr; +}; + +static __always_inline struct rb_ctx __rb_event_reserve(__u32 sz) +{ + struct rb_ctx rb_ctx = {}; + void *rb; + __u32 cpu = bpf_get_smp_processor_id(); + __u32 rb_slot = cpu & 1; + + rb = bpf_map_lookup_elem(&rb_in_map, &rb_slot); + if (!rb) + return rb_ctx; + + rb_ctx.rb = rb; + bpf_ringbuf_reserve_dynptr(rb, sz, 0, &rb_ctx.dptr); + + return rb_ctx; +} + +static __noinline void __rb_event_submit(struct rb_ctx *ctx) +{ + if (!ctx->rb) + return; + + /* If the verifier (incorrectly) concludes that ctx->rb can be + * NULL at this point, we'll get "BPF_EXIT instruction in main + * prog would lead to reference leak" error + */ + bpf_ringbuf_submit_dynptr(&ctx->dptr, 0); +} + +SEC("socket") +int map_ptr_is_never_null_rb(void *ctx) +{ + struct rb_ctx event_ctx = __rb_event_reserve(256); + __rb_event_submit(&event_ctx); + return 0; +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From d77efc0ef5b0adb0de3c15f5c9d33ea6d60bd449 Mon Sep 17 00:00:00 2001 From: Tao Chen Date: Sat, 7 Jun 2025 00:58:15 +0800 Subject: selftests/bpf: Add cookies check for tracing fill_link_info test Adding tests for getting cookie with fill_link_info for tracing. Signed-off-by: Tao Chen Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250606165818.3394397-2-chen.dylane@linux.dev --- .../testing/selftests/bpf/prog_tests/bpf_cookie.c | 24 +++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c index 0774ae6c1bec..4a0670c056ba 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c @@ -489,10 +489,28 @@ cleanup: bpf_link__destroy(link); } +static int verify_tracing_link_info(int fd, u64 cookie) +{ + struct bpf_link_info info; + int err; + u32 len = sizeof(info); + + err = bpf_link_get_info_by_fd(fd, &info, &len); + if (!ASSERT_OK(err, "get_link_info")) + return -1; + + if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_TRACING, "link_type")) + return -1; + + ASSERT_EQ(info.tracing.cookie, cookie, "tracing_cookie"); + + return 0; +} + static void tracing_subtest(struct test_bpf_cookie *skel) { __u64 cookie; - int prog_fd; + int prog_fd, err; int fentry_fd = -1, fexit_fd = -1, fmod_ret_fd = -1; LIBBPF_OPTS(bpf_test_run_opts, opts); LIBBPF_OPTS(bpf_link_create_opts, link_opts); @@ -507,6 +525,10 @@ static void tracing_subtest(struct test_bpf_cookie *skel) if (!ASSERT_GE(fentry_fd, 0, "fentry.link_create")) goto cleanup; + err = verify_tracing_link_info(fentry_fd, cookie); + if (!ASSERT_OK(err, "verify_tracing_link_info")) + goto cleanup; + cookie = 0x20000000000000L; prog_fd = bpf_program__fd(skel->progs.fexit_test1); link_opts.tracing.cookie = cookie; -- cgit v1.2.3 From d6f1c85f22534d2d9fea9b32645da19c91ebe7d2 Mon Sep 17 00:00:00 2001 From: Luis Gerhorst Date: Tue, 3 Jun 2025 23:24:28 +0200 Subject: bpf: Fall back to nospec for Spectre v1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This implements the core of the series and causes the verifier to fall back to mitigating Spectre v1 using speculation barriers. The approach was presented at LPC'24 [1] and RAID'24 [2]. If we find any forbidden behavior on a speculative path, we insert a nospec (e.g., lfence speculation barrier on x86) before the instruction and stop verifying the path. While verifying a speculative path, we can furthermore stop verification of that path whenever we encounter a nospec instruction. A minimal example program would look as follows: A = true B = true if A goto e f() if B goto e unsafe() e: exit There are the following speculative and non-speculative paths (`cur->speculative` and `speculative` referring to the value of the push_stack() parameters): - A = true - B = true - if A goto e - A && !cur->speculative && !speculative - exit - !A && !cur->speculative && speculative - f() - if B goto e - B && cur->speculative && !speculative - exit - !B && cur->speculative && speculative - unsafe() If f() contains any unsafe behavior under Spectre v1 and the unsafe behavior matches `state->speculative && error_recoverable_with_nospec(err)`, do_check() will now add a nospec before f() instead of rejecting the program: A = true B = true if A goto e nospec f() if B goto e unsafe() e: exit Alternatively, the algorithm also takes advantage of nospec instructions inserted for other reasons (e.g., Spectre v4). Taking the program above as an example, speculative path exploration can stop before f() if a nospec was inserted there because of Spectre v4 sanitization. In this example, all instructions after the nospec are dead code (and with the nospec they are also dead code speculatively). For this, it relies on the fact that speculation barriers generally prevent all later instructions from executing if the speculation was not correct: * On Intel x86_64, lfence acts as full speculation barrier, not only as a load fence [3]: An LFENCE instruction or a serializing instruction will ensure that no later instructions execute, even speculatively, until all prior instructions complete locally. [...] Inserting an LFENCE instruction after a bounds check prevents later operations from executing before the bound check completes. This was experimentally confirmed in [4]. * On AMD x86_64, lfence is dispatch-serializing [5] (requires MSR C001_1029[1] to be set if the MSR is supported, this happens in init_amd()). AMD further specifies "A dispatch serializing instruction forces the processor to retire the serializing instruction and all previous instructions before the next instruction is executed" [8]. As dispatch is not specific to memory loads or branches, lfence therefore also affects all instructions there. Also, if retiring a branch means it's PC change becomes architectural (should be), this means any "wrong" speculation is aborted as required for this series. * ARM's SB speculation barrier instruction also affects "any instruction that appears later in the program order than the barrier" [6]. * PowerPC's barrier also affects all subsequent instructions [7]: [...] executing an ori R31,R31,0 instruction ensures that all instructions preceding the ori R31,R31,0 instruction have completed before the ori R31,R31,0 instruction completes, and that no subsequent instructions are initiated, even out-of-order, until after the ori R31,R31,0 instruction completes. The ori R31,R31,0 instruction may complete before storage accesses associated with instructions preceding the ori R31,R31,0 instruction have been performed Regarding the example, this implies that `if B goto e` will not execute before `if A goto e` completes. Once `if A goto e` completes, the CPU should find that the speculation was wrong and continue with `exit`. If there is any other path that leads to `if B goto e` (and therefore `unsafe()`) without going through `if A goto e`, then a nospec will still be needed there. However, this patch assumes this other path will be explored separately and therefore be discovered by the verifier even if the exploration discussed here stops at the nospec. This patch furthermore has the unfortunate consequence that Spectre v1 mitigations now only support architectures which implement BPF_NOSPEC. Before this commit, Spectre v1 mitigations prevented exploits by rejecting the programs on all architectures. Because some JITs do not implement BPF_NOSPEC, this patch therefore may regress unpriv BPF's security to a limited extent: * The regression is limited to systems vulnerable to Spectre v1, have unprivileged BPF enabled, and do NOT emit insns for BPF_NOSPEC. The latter is not the case for x86 64- and 32-bit, arm64, and powerpc 64-bit and they are therefore not affected by the regression. According to commit a6f6a95f2580 ("LoongArch, bpf: Fix jit to skip speculation barrier opcode"), LoongArch is not vulnerable to Spectre v1 and therefore also not affected by the regression. * To the best of my knowledge this regression may therefore only affect MIPS. This is deemed acceptable because unpriv BPF is still disabled there by default. As stated in a previous commit, BPF_NOSPEC could be implemented for MIPS based on GCC's speculation_barrier implementation. * It is unclear which other architectures (besides x86 64- and 32-bit, ARM64, PowerPC 64-bit, LoongArch, and MIPS) supported by the kernel are vulnerable to Spectre v1. Also, it is not clear if barriers are available on these architectures. Implementing BPF_NOSPEC on these architectures therefore is non-trivial. Searching GCC and the kernel for speculation barrier implementations for these architectures yielded no result. * If any of those regressed systems is also vulnerable to Spectre v4, the system was already vulnerable to Spectre v4 attacks based on unpriv BPF before this patch and the impact is therefore further limited. As an alternative to regressing security, one could still reject programs if the architecture does not emit BPF_NOSPEC (e.g., by removing the empty BPF_NOSPEC-case from all JITs except for LoongArch where it appears justified). However, this will cause rejections on these archs that are likely unfounded in the vast majority of cases. In the tests, some are now successful where we previously had a false-positive (i.e., rejection). Change them to reflect where the nospec should be inserted (using __xlated_unpriv) and modify the error message if the nospec is able to mitigate a problem that previously shadowed another problem (in that case __xlated_unpriv does not work, therefore just add a comment). Define SPEC_V1 to avoid duplicating this ifdef whenever we check for nospec insns using __xlated_unpriv, define it here once. This also improves readability. PowerPC can probably also be added here. However, omit it for now because the BPF CI currently does not include a test. Limit it to EPERM, EACCES, and EINVAL (and not everything except for EFAULT and ENOMEM) as it already has the desired effect for most real-world programs. Briefly went through all the occurrences of EPERM, EINVAL, and EACCESS in verifier.c to validate that catching them like this makes sense. Thanks to Dustin for their help in checking the vendor documentation. [1] https://lpc.events/event/18/contributions/1954/ ("Mitigating Spectre-PHT using Speculation Barriers in Linux eBPF") [2] https://arxiv.org/pdf/2405.00078 ("VeriFence: Lightweight and Precise Spectre Defenses for Untrusted Linux Kernel Extensions") [3] https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/runtime-speculative-side-channel-mitigations.html ("Managed Runtime Speculative Execution Side Channel Mitigations") [4] https://dl.acm.org/doi/pdf/10.1145/3359789.3359837 ("Speculator: a tool to analyze speculative execution attacks and mitigations" - Section 4.6 "Stopping Speculative Execution") [5] https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/software-techniques-for-managing-speculation.pdf ("White Paper - SOFTWARE TECHNIQUES FOR MANAGING SPECULATION ON AMD PROCESSORS - REVISION 5.09.23") [6] https://developer.arm.com/documentation/ddi0597/2020-12/Base-Instructions/SB--Speculation-Barrier- ("SB - Speculation Barrier - Arm Armv8-A A32/T32 Instruction Set Architecture (2020-12)") [7] https://wiki.raptorcs.com/w/images/5/5f/OPF_PowerISA_v3.1C.pdf ("Power ISA™ - Version 3.1C - May 26, 2024 - Section 9.2.1 of Book III") [8] https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/40332.pdf ("AMD64 Architecture Programmer’s Manual Volumes 1–5 - Revision 4.08 - April 2024 - 7.6.4 Serializing Instructions") Signed-off-by: Luis Gerhorst Acked-by: Kumar Kartikeya Dwivedi Acked-by: Henriette Herzog Cc: Dustin Nguyen Cc: Maximilian Ott Cc: Milan Stephan Link: https://lore.kernel.org/r/20250603212428.338473-1-luis.gerhorst@fau.de Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 78 ++++++++++++++++++++-- tools/testing/selftests/bpf/progs/bpf_misc.h | 4 ++ tools/testing/selftests/bpf/progs/verifier_and.c | 8 ++- .../testing/selftests/bpf/progs/verifier_bounds.c | 61 +++++++++++++---- tools/testing/selftests/bpf/progs/verifier_movsx.c | 16 ++++- .../testing/selftests/bpf/progs/verifier_unpriv.c | 8 ++- .../selftests/bpf/progs/verifier_value_ptr_arith.c | 16 +++-- tools/testing/selftests/bpf/verifier/dead_code.c | 3 +- tools/testing/selftests/bpf/verifier/jmp32.c | 33 +++------ tools/testing/selftests/bpf/verifier/jset.c | 10 ++- 11 files changed, 184 insertions(+), 54 deletions(-) (limited to 'tools/testing/selftests') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 2b0954202226..e6c26393c029 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -580,6 +580,7 @@ struct bpf_insn_aux_data { u64 map_key_state; /* constant (32 bit) key tracking for maps */ int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ u32 seen; /* this insn was processed by the verifier at env->pass_cnt */ + bool nospec; /* do not execute this instruction speculatively */ bool nospec_result; /* result is unsafe under speculation, nospec must follow */ bool zext_dst; /* this insn zero extends dst reg */ bool needs_zext; /* alu op needs to clear upper bits */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 79ae0ee395b0..b1f797616f20 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2013,6 +2013,18 @@ static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, return 0; } +static bool error_recoverable_with_nospec(int err) +{ + /* Should only return true for non-fatal errors that are allowed to + * occur during speculative verification. For these we can insert a + * nospec and the program might still be accepted. Do not include + * something like ENOMEM because it is likely to re-occur for the next + * architectural path once it has been recovered-from in all speculative + * paths. + */ + return err == -EPERM || err == -EACCES || err == -EINVAL; +} + static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx, bool speculative) @@ -11147,7 +11159,7 @@ static int check_get_func_ip(struct bpf_verifier_env *env) return -ENOTSUPP; } -static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env) +static struct bpf_insn_aux_data *cur_aux(const struct bpf_verifier_env *env) { return &env->insn_aux_data[env->insn_idx]; } @@ -14015,7 +14027,9 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env, const struct bpf_insn *insn) { - return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K; + return env->bypass_spec_v1 || + BPF_SRC(insn->code) == BPF_K || + cur_aux(env)->nospec; } static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux, @@ -19732,10 +19746,41 @@ static int do_check(struct bpf_verifier_env *env) sanitize_mark_insn_seen(env); prev_insn_idx = env->insn_idx; + /* Reduce verification complexity by stopping speculative path + * verification when a nospec is encountered. + */ + if (state->speculative && cur_aux(env)->nospec) + goto process_bpf_exit; + err = do_check_insn(env, &do_print_state); - if (err < 0) { + if (state->speculative && error_recoverable_with_nospec(err)) { + /* Prevent this speculative path from ever reaching the + * insn that would have been unsafe to execute. + */ + cur_aux(env)->nospec = true; + /* If it was an ADD/SUB insn, potentially remove any + * markings for alu sanitization. + */ + cur_aux(env)->alu_state = 0; + goto process_bpf_exit; + } else if (err < 0) { return err; } else if (err == PROCESS_BPF_EXIT) { + goto process_bpf_exit; + } + WARN_ON_ONCE(err); + + if (state->speculative && cur_aux(env)->nospec_result) { + /* If we are on a path that performed a jump-op, this + * may skip a nospec patched-in after the jump. This can + * currently never happen because nospec_result is only + * used for the write-ops + * `*(size*)(dst_reg+off)=src_reg|imm32` which must + * never skip the following insn. Still, add a warning + * to document this in case nospec_result is used + * elsewhere in the future. + */ + WARN_ON_ONCE(env->insn_idx != prev_insn_idx + 1); process_bpf_exit: mark_verifier_state_scratched(env); update_branch_counts(env, env->cur_state); @@ -19753,7 +19798,6 @@ process_bpf_exit: continue; } } - WARN_ON_ONCE(err); } return 0; @@ -20881,6 +20925,29 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) bpf_convert_ctx_access_t convert_ctx_access; u8 mode; + if (env->insn_aux_data[i + delta].nospec) { + WARN_ON_ONCE(env->insn_aux_data[i + delta].alu_state); + struct bpf_insn patch[] = { + BPF_ST_NOSPEC(), + *insn, + }; + + cnt = ARRAY_SIZE(patch); + new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = new_prog; + insn = new_prog->insnsi + i + delta; + /* This can not be easily merged with the + * nospec_result-case, because an insn may require a + * nospec before and after itself. Therefore also do not + * 'continue' here but potentially apply further + * patching to insn. *insn should equal patch[1] now. + */ + } + if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || insn->code == (BPF_LDX | BPF_MEM | BPF_H) || insn->code == (BPF_LDX | BPF_MEM | BPF_W) || @@ -20931,6 +20998,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) if (type == BPF_WRITE && env->insn_aux_data[i + delta].nospec_result) { + /* nospec_result is only used to mitigate Spectre v4 and + * to limit verification-time for Spectre v1. + */ struct bpf_insn patch[] = { *insn, BPF_ST_NOSPEC(), diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 6e208e24ba3b..a678463e972c 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -231,4 +231,8 @@ #define CAN_USE_LOAD_ACQ_STORE_REL #endif +#if defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) +#define SPEC_V1 +#endif + #endif diff --git a/tools/testing/selftests/bpf/progs/verifier_and.c b/tools/testing/selftests/bpf/progs/verifier_and.c index e97e518516b6..2b4fdca162be 100644 --- a/tools/testing/selftests/bpf/progs/verifier_and.c +++ b/tools/testing/selftests/bpf/progs/verifier_and.c @@ -85,8 +85,14 @@ l0_%=: r0 = r0; \ SEC("socket") __description("check known subreg with unknown reg") -__success __failure_unpriv __msg_unpriv("R1 !read_ok") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if w0 < 0x1 goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R1 !read_ok'` */ +__xlated_unpriv("goto pc-1") /* `r1 = *(u32*)(r1 + 512)`, sanitized dead code */ +__xlated_unpriv("r0 = 0") +#endif __naked void known_subreg_with_unknown_reg(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index 0eb33bb801b5..30e16153fdf1 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -620,8 +620,14 @@ l1_%=: exit; \ SEC("socket") __description("bounds check mixed 32bit and 64bit arithmetic. test1") -__success __failure_unpriv __msg_unpriv("R0 invalid mem access 'scalar'") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R0 invalid mem access 'scalar'` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("exit") +#endif __naked void _32bit_and_64bit_arithmetic_test1(void) { asm volatile (" \ @@ -643,8 +649,14 @@ l1_%=: exit; \ SEC("socket") __description("bounds check mixed 32bit and 64bit arithmetic. test2") -__success __failure_unpriv __msg_unpriv("R0 invalid mem access 'scalar'") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R0 invalid mem access 'scalar'` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("exit") +#endif __naked void _32bit_and_64bit_arithmetic_test2(void) { asm volatile (" \ @@ -691,9 +703,14 @@ l0_%=: r0 = 0; \ SEC("socket") __description("bounds check for reg = 0, reg xor 1") -__success __failure_unpriv -__msg_unpriv("R0 min value is outside of the allowed memory range") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if r1 != 0x0 goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("r0 = 0") +#endif __naked void reg_0_reg_xor_1(void) { asm volatile (" \ @@ -719,9 +736,14 @@ l1_%=: r0 = 0; \ SEC("socket") __description("bounds check for reg32 = 0, reg32 xor 1") -__success __failure_unpriv -__msg_unpriv("R0 min value is outside of the allowed memory range") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if w1 != 0x0 goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("r0 = 0") +#endif __naked void reg32_0_reg32_xor_1(void) { asm volatile (" \ @@ -747,9 +769,14 @@ l1_%=: r0 = 0; \ SEC("socket") __description("bounds check for reg = 2, reg xor 3") -__success __failure_unpriv -__msg_unpriv("R0 min value is outside of the allowed memory range") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if r1 > 0x0 goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("r0 = 0") +#endif __naked void reg_2_reg_xor_3(void) { asm volatile (" \ @@ -829,9 +856,14 @@ l1_%=: r0 = 0; \ SEC("socket") __description("bounds check for reg > 0, reg xor 3") -__success __failure_unpriv -__msg_unpriv("R0 min value is outside of the allowed memory range") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if r1 >= 0x0 goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("r0 = 0") +#endif __naked void reg_0_reg_xor_3(void) { asm volatile (" \ @@ -858,9 +890,14 @@ l1_%=: r0 = 0; \ SEC("socket") __description("bounds check for reg32 > 0, reg32 xor 3") -__success __failure_unpriv -__msg_unpriv("R0 min value is outside of the allowed memory range") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if w1 >= 0x0 goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("r0 = 0") +#endif __naked void reg32_0_reg32_xor_3(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c index 994bbc346d25..a4d8814eb5ed 100644 --- a/tools/testing/selftests/bpf/progs/verifier_movsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c @@ -245,7 +245,13 @@ l0_%=: \ SEC("socket") __description("MOV32SX, S8, var_off not u32_max, positive after s8 extension") __success __retval(0) -__failure_unpriv __msg_unpriv("frame pointer is read only") +__success_unpriv +#ifdef SPEC_V1 +__xlated_unpriv("w0 = 0") +__xlated_unpriv("exit") +__xlated_unpriv("nospec") /* inserted to prevent `frame pointer is read only` */ +__xlated_unpriv("goto pc-1") +#endif __naked void mov64sx_s32_varoff_2(void) { asm volatile (" \ @@ -267,7 +273,13 @@ l0_%=: \ SEC("socket") __description("MOV32SX, S8, var_off not u32_max, negative after s8 extension") __success __retval(0) -__failure_unpriv __msg_unpriv("frame pointer is read only") +__success_unpriv +#ifdef SPEC_V1 +__xlated_unpriv("w0 = 0") +__xlated_unpriv("exit") +__xlated_unpriv("nospec") /* inserted to prevent `frame pointer is read only` */ +__xlated_unpriv("goto pc-1") +#endif __naked void mov64sx_s32_varoff_3(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_unpriv.c b/tools/testing/selftests/bpf/progs/verifier_unpriv.c index db52ba66e880..9bd4aef72140 100644 --- a/tools/testing/selftests/bpf/progs/verifier_unpriv.c +++ b/tools/testing/selftests/bpf/progs/verifier_unpriv.c @@ -572,8 +572,14 @@ l0_%=: exit; \ SEC("socket") __description("alu32: mov u32 const") -__success __failure_unpriv __msg_unpriv("R7 invalid mem access 'scalar'") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if r0 == 0x0 goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R7 invalid mem access 'scalar'` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("exit") +#endif __naked void alu32_mov_u32_const(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c b/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c index 5ba6e53571c8..fcea9819e359 100644 --- a/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c +++ b/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c @@ -398,7 +398,8 @@ l2_%=: r0 = 1; \ SEC("socket") __description("map access: mixing value pointer and scalar, 1") -__success __failure_unpriv __msg_unpriv("R2 pointer comparison prohibited") +__success __failure_unpriv +__msg_unpriv("R2 tried to add from different maps, paths or scalars, pointer arithmetic with it prohibited for !root") __retval(0) __naked void value_pointer_and_scalar_1(void) { @@ -433,6 +434,7 @@ l2_%=: /* common instruction */ \ l3_%=: /* branch B */ \ r0 = 0x13371337; \ /* verifier follows fall-through */ \ + /* unpriv: nospec (inserted to prevent `R2 pointer comparison prohibited`) */\ if r2 != 0x100000 goto l4_%=; \ r0 = 0; \ exit; \ @@ -450,7 +452,8 @@ l4_%=: /* fake-dead code; targeted from branch A to \ SEC("socket") __description("map access: mixing value pointer and scalar, 2") -__success __failure_unpriv __msg_unpriv("R0 invalid mem access 'scalar'") +__success __failure_unpriv +__msg_unpriv("R2 tried to add from different maps, paths or scalars, pointer arithmetic with it prohibited for !root") __retval(0) __naked void value_pointer_and_scalar_2(void) { @@ -492,6 +495,7 @@ l4_%=: /* fake-dead code; targeted from branch A to \ * prevent dead code sanitization, rejected \ * via branch B however \ */ \ + /* unpriv: nospec (inserted to prevent `R0 invalid mem access 'scalar'`) */\ r0 = *(u8*)(r0 + 0); \ r0 = 0; \ exit; \ @@ -1296,9 +1300,13 @@ l0_%=: r0 = 1; \ SEC("socket") __description("map access: value_ptr -= unknown scalar, 2") -__success __failure_unpriv -__msg_unpriv("R0 pointer arithmetic of map value goes out of range") +__success __success_unpriv __retval(1) +#ifdef SPEC_V1 +__xlated_unpriv("r1 &= 7") +__xlated_unpriv("nospec") /* inserted to prevent `R0 pointer arithmetic of map value goes out of range` */ +__xlated_unpriv("r0 -= r1") +#endif __naked void value_ptr_unknown_scalar_2_2(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c index ee454327e5c6..77207b498c6f 100644 --- a/tools/testing/selftests/bpf/verifier/dead_code.c +++ b/tools/testing/selftests/bpf/verifier/dead_code.c @@ -2,14 +2,13 @@ "dead code: start", .insns = { BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_JMP_IMM(BPF_JA, 0, 0, 2), BPF_MOV64_IMM(BPF_REG_0, 7), BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, -4), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R9 !read_ok", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 7, }, diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c index 43776f6f92f4..91d83e9cb148 100644 --- a/tools/testing/selftests/bpf/verifier/jmp32.c +++ b/tools/testing/selftests/bpf/verifier/jmp32.c @@ -84,11 +84,10 @@ BPF_JMP32_IMM(BPF_JSET, BPF_REG_7, 0x10, 1), BPF_EXIT_INSN(), BPF_JMP32_IMM(BPF_JGE, BPF_REG_7, 0x10, 1), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R9 !read_ok", - .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -149,11 +148,10 @@ BPF_JMP32_IMM(BPF_JEQ, BPF_REG_7, 0x10, 1), BPF_EXIT_INSN(), BPF_JMP32_IMM(BPF_JSGE, BPF_REG_7, 0xf, 1), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R9 !read_ok", - .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -214,11 +212,10 @@ BPF_JMP32_IMM(BPF_JNE, BPF_REG_7, 0x10, 1), BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x10, 1), BPF_EXIT_INSN(), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R9 !read_ok", - .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -283,11 +280,10 @@ BPF_JMP32_REG(BPF_JGE, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP32_IMM(BPF_JGE, BPF_REG_7, 0x7ffffff0, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -354,11 +350,10 @@ BPF_JMP32_REG(BPF_JGT, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 0x7ffffff0, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -425,11 +420,10 @@ BPF_JMP32_REG(BPF_JLE, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP32_IMM(BPF_JLE, BPF_REG_7, 0x7ffffff0, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -496,11 +490,10 @@ BPF_JMP32_REG(BPF_JLT, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0x7ffffff0, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -567,11 +560,10 @@ BPF_JMP32_REG(BPF_JSGE, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0x7ffffff0, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -638,11 +630,10 @@ BPF_JMP32_REG(BPF_JSGT, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, -2, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -709,11 +700,10 @@ BPF_JMP32_REG(BPF_JSLE, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JSLE, BPF_REG_7, 0x7ffffff0, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -780,11 +770,10 @@ BPF_JMP32_REG(BPF_JSLT, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP32_IMM(BPF_JSLT, BPF_REG_7, -1, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, diff --git a/tools/testing/selftests/bpf/verifier/jset.c b/tools/testing/selftests/bpf/verifier/jset.c index 11fc68da735e..e901eefd774a 100644 --- a/tools/testing/selftests/bpf/verifier/jset.c +++ b/tools/testing/selftests/bpf/verifier/jset.c @@ -78,12 +78,11 @@ .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .errstr_unpriv = "R9 !read_ok", - .result_unpriv = REJECT, .retval = 1, .result = ACCEPT, }, @@ -136,13 +135,12 @@ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), BPF_ALU64_IMM(BPF_OR, BPF_REG_0, 2), BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 3, 1), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .errstr_unpriv = "R9 !read_ok", - .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -154,16 +152,16 @@ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xff), BPF_JMP_IMM(BPF_JSET, BPF_REG_1, 0xf0, 3), BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 0x10, 1), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JSET, BPF_REG_1, 0x10, 1), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0x10, 1), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .errstr_unpriv = "R9 !read_ok", - .result_unpriv = REJECT, .result = ACCEPT, }, -- cgit v1.2.3 From 4a8765d9a5278a9ff9341678ae60eb4f8d0f5d8c Mon Sep 17 00:00:00 2001 From: Luis Gerhorst Date: Tue, 3 Jun 2025 23:28:14 +0200 Subject: selftests/bpf: Add test for Spectre v1 mitigation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is based on the gadget from the description of commit 9183671af6db ("bpf: Fix leakage under speculation on mispredicted branches"). Signed-off-by: Luis Gerhorst Acked-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20250603212814.338867-1-luis.gerhorst@fau.de Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/progs/verifier_unpriv.c | 57 ++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/verifier_unpriv.c b/tools/testing/selftests/bpf/progs/verifier_unpriv.c index 9bd4aef72140..4470541b5e71 100644 --- a/tools/testing/selftests/bpf/progs/verifier_unpriv.c +++ b/tools/testing/selftests/bpf/progs/verifier_unpriv.c @@ -744,4 +744,61 @@ l0_%=: r0 = 0; \ " ::: __clobber_all); } +SEC("socket") +__description("unpriv: Spectre v1 path-based type confusion of scalar as stack-ptr") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if r0 != 0x1 goto pc+2") +/* This nospec prevents the exploit because it forces the mispredicted (not + * taken) `if r0 != 0x0 goto l0_%=` to resolve before using r6 as a pointer. + * This causes the CPU to realize that `r6 = r9` should have never executed. It + * ensures that r6 always contains a readable stack slot ptr when the insn after + * the nospec executes. + */ +__xlated_unpriv("nospec") +__xlated_unpriv("r9 = *(u8 *)(r6 +0)") +#endif +__naked void unpriv_spec_v1_type_confusion(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l2_%=; \ + /* r0: pointer to a map array entry */ \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + /* r1, r2: prepared call args */ \ + r6 = r10; \ + r6 += -8; \ + /* r6: pointer to readable stack slot */ \ + r9 = 0xffffc900; \ + r9 <<= 32; \ + /* r9: scalar controlled by attacker */ \ + r0 = *(u64 *)(r0 + 0); /* cache miss */ \ + if r0 != 0x0 goto l0_%=; \ + r6 = r9; \ +l0_%=: if r0 != 0x1 goto l1_%=; \ + r9 = *(u8 *)(r6 + 0); \ +l1_%=: /* leak r9 */ \ + r9 &= 1; \ + r9 <<= 9; \ + *(u64*)(r10 - 8) = r9; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l2_%=; \ + /* leak secret into is_cached(map[0|512]): */ \ + r0 = *(u64 *)(r0 + 0); \ +l2_%=: \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From b2c765778a2ea953a448816ba7c034f02a2fd6c6 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 9 Jun 2025 09:39:16 +0200 Subject: selftests/nolibc: make stackprotector probing more robust MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit nolibc only supports symbol-based stackprotectors, based on the global variable __stack_chk_guard. Support for this differs between architectures and toolchains. Some use the symbol mode by default, some require a flag to enable it and some don't support it at all. Before the nolibc test Makefile required the availability of "-mstack-protector-guard=global" to enable stackprotectors. While this flag makes sure that the correct mode is available it doesn't work where the correct mode is the only supported one and therefore the flag is not implemented. Switch to a more dynamic probing mechanism. This correctly enables stack protectors for mips, loongarch and m68k. Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250609-nolibc-stackprotector-robust-v1-1-a1cfc92a568a@weissschuh.net Signed-off-by: Thomas Weißschuh --- tools/testing/selftests/nolibc/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index 94176ffe4646..147ce411b46a 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -195,7 +195,10 @@ CFLAGS_sparc32 = $(call cc-option,-m32) ifeq ($(origin XARCH),command line) CFLAGS_XARCH = $(CFLAGS_$(XARCH)) endif -CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all)) +_CFLAGS_STACKPROTECTOR = $(call cc-option,-fstack-protector-all) $(call cc-option,-mstack-protector-guard=global) +CFLAGS_STACKPROTECTOR ?= $(call try-run, \ + echo 'void foo(void) {}' | $(CC) -x c - -o - -S $(CLANG_CROSS_FLAGS) $(_CFLAGS_STACKPROTECTOR) | grep -q __stack_chk_guard, \ + $(_CFLAGS_STACKPROTECTOR)) CFLAGS_SANITIZER ?= $(call cc-option,-fsanitize=undefined -fsanitize-trap=all) CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \ $(call cc-option,-fno-stack-protector) $(call cc-option,-Wmissing-prototypes) \ -- cgit v1.2.3 From 69b25dd20c8368750d1e8b12cfe31387c545bdd1 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 9 Jun 2025 02:46:28 -0700 Subject: selftests: netconsole: Do not exit from inside the validation function Remove the exit call from validate_result() function and move the test exit logic to the main script. This allows the function to be reused in scenarios where the test needs to continue execution after validation, rather than terminating immediately. The validate_result() function should focus on validation logic only, while the calling script maintains control over program flow and exit conditions. This change improves code modularity and prepares for potential future enhancements where multiple validations might be needed in a single test run. Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20250609-netcons_ext-v3-3-5336fa670326@debian.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh | 1 - tools/testing/selftests/drivers/net/netcons_basic.sh | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index 29b01b8e2215..2d5dd3297693 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -185,7 +185,6 @@ function validate_result() { # Delete the file once it is validated, otherwise keep it # for debugging purposes rm "${TMPFILENAME}" - exit "${ksft_pass}" } function check_for_dependencies() { diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh index fe765da498e8..d2f0685d24ba 100755 --- a/tools/testing/selftests/drivers/net/netcons_basic.sh +++ b/tools/testing/selftests/drivers/net/netcons_basic.sh @@ -50,3 +50,5 @@ busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" # Make sure the message was received in the dst part # and exit validate_result "${OUTPUT_FILE}" + +exit "${ksft_pass}" -- cgit v1.2.3 From 224a6e602fb371b42ba5f854f32191d23b7e140a Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 9 Jun 2025 02:46:29 -0700 Subject: selftests: netconsole: Add support for basic netconsole target format Extend the netconsole selftest to validate both basic and extended target formats. The basic format is a simpler variant that doesn't support userdata or release functionality. The test now validates that netconsole works correctly in both configurations, improving test coverage for different netconsole deployment scenarios. Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20250609-netcons_ext-v3-4-5336fa670326@debian.org Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/lib/sh/lib_netcons.sh | 26 ++++++++++-- .../testing/selftests/drivers/net/netcons_basic.sh | 48 ++++++++++++++-------- 2 files changed, 52 insertions(+), 22 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index 2d5dd3297693..71a5a8b1712c 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -95,6 +95,8 @@ function set_network() { } function create_dynamic_target() { + local FORMAT=${1:-"extended"} + DSTMAC=$(ip netns exec "${NAMESPACE}" \ ip link show "${DSTIF}" | awk '/ether/ {print $2}') @@ -106,6 +108,16 @@ function create_dynamic_target() { echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name + if [ "${FORMAT}" == "basic" ] + then + # Basic target does not support release + echo 0 > "${NETCONS_PATH}"/release + echo 0 > "${NETCONS_PATH}"/extended + elif [ "${FORMAT}" == "extended" ] + then + echo 1 > "${NETCONS_PATH}"/extended + fi + echo 1 > "${NETCONS_PATH}"/enabled } @@ -159,6 +171,7 @@ function listen_port_and_save_to() { function validate_result() { local TMPFILENAME="$1" + local FORMAT=${2:-"extended"} # TMPFILENAME will contain something like: # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM @@ -176,10 +189,15 @@ function validate_result() { exit "${ksft_fail}" fi - if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then - echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2 - cat "${TMPFILENAME}" >&2 - exit "${ksft_fail}" + # userdata is not supported on basic format target, + # thus, do not validate it. + if [ "${FORMAT}" != "basic" ]; + then + if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then + echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2 + cat "${TMPFILENAME}" >&2 + exit "${ksft_fail}" + fi fi # Delete the file once it is validated, otherwise keep it diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh index d2f0685d24ba..40a6ac6191b8 100755 --- a/tools/testing/selftests/drivers/net/netcons_basic.sh +++ b/tools/testing/selftests/drivers/net/netcons_basic.sh @@ -32,23 +32,35 @@ check_for_dependencies echo "6 5" > /proc/sys/kernel/printk # Remove the namespace, interfaces and netconsole target on exit trap cleanup EXIT -# Create one namespace and two interfaces -set_network -# Create a dynamic target for netconsole -create_dynamic_target -# Set userdata "key" with the "value" value -set_user_data -# Listed for netconsole port inside the namespace and destination interface -listen_port_and_save_to "${OUTPUT_FILE}" & -# Wait for socat to start and listen to the port. -wait_local_port_listen "${NAMESPACE}" "${PORT}" udp -# Send the message -echo "${MSG}: ${TARGET}" > /dev/kmsg -# Wait until socat saves the file to disk -busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" - -# Make sure the message was received in the dst part -# and exit -validate_result "${OUTPUT_FILE}" +# Run the test twice, with different format modes +for FORMAT in "basic" "extended" +do + echo "Running with target mode: ${FORMAT}" + # Create one namespace and two interfaces + set_network + # Create a dynamic target for netconsole + create_dynamic_target "${FORMAT}" + # Only set userdata for extended format + if [ "$FORMAT" == "extended" ] + then + # Set userdata "key" with the "value" value + set_user_data + fi + # Listed for netconsole port inside the namespace and destination interface + listen_port_and_save_to "${OUTPUT_FILE}" & + # Wait for socat to start and listen to the port. + wait_local_port_listen "${NAMESPACE}" "${PORT}" udp + # Send the message + echo "${MSG}: ${TARGET}" > /dev/kmsg + # Wait until socat saves the file to disk + busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" + + # Make sure the message was received in the dst part + # and exit + validate_result "${OUTPUT_FILE}" "${FORMAT}" + cleanup +done + +trap - EXIT exit "${ksft_pass}" -- cgit v1.2.3 From 689883de94dd8a43a0ea77311327effab240afda Mon Sep 17 00:00:00 2001 From: Samiullah Khawaja Date: Mon, 9 Jun 2025 17:30:15 +0000 Subject: net: stop napi kthreads when THREADED napi is disabled Once the THREADED napi is disabled, the napi kthread should also be stopped. Keeping the kthread intact after disabling THREADED napi makes the PID of this kthread show up in the output of netlink 'napi-get' and ps -ef output. The is discussed in the patch below: https://lore.kernel.org/all/20250502191548.559cc416@kernel.org NAPI kthread should stop only if, - There are no pending napi poll scheduled for this thread. - There are no new napi poll scheduled for this thread while it has stopped. - The ____napi_schedule can correctly fallback to the softirq for napi polling. Since napi_schedule_prep provides mutual exclusion over STATE_SCHED bit, it is safe to unset the STATE_THREADED when SCHED_THREADED is set or the SCHED bit is not set. SCHED_THREADED being set means that SCHED is already set and the kthread owns this napi. To disable threaded napi, unset STATE_THREADED bit safely if SCHED_THREADED is set or SCHED is unset. Once STATE_THREADED is unset safely then wait for the kthread to unset the SCHED_THREADED bit so it safe to stop the kthread. Add a new test in nl_netdev to verify this behaviour. Tested: ./tools/testing/selftests/net/nl_netdev.py TAP version 13 1..6 ok 1 nl_netdev.empty_check ok 2 nl_netdev.lo_check ok 3 nl_netdev.page_pool_check ok 4 nl_netdev.napi_list_check ok 5 nl_netdev.dev_set_threaded ok 6 nl_netdev.nsim_rxq_reset_down # Totals: pass:6 fail:0 xfail:0 xpass:0 skip:0 error:0 Ran neper for 300 seconds and did enable/disable of thread napi in a loop continuously. Signed-off-by: Samiullah Khawaja Link: https://patch.msgid.link/20250609173015.3851695-1-skhawaja@google.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 45 ++++++++++++++++++++++++++++++-- tools/testing/selftests/net/nl_netdev.py | 38 +++++++++++++++++++++++++-- 2 files changed, 79 insertions(+), 4 deletions(-) (limited to 'tools/testing/selftests') diff --git a/net/core/dev.c b/net/core/dev.c index be97c440ecd5..5baa4691074f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6926,6 +6926,43 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer) return HRTIMER_NORESTART; } +static void napi_stop_kthread(struct napi_struct *napi) +{ + unsigned long val, new; + + /* Wait until the napi STATE_THREADED is unset. */ + while (true) { + val = READ_ONCE(napi->state); + + /* If napi kthread own this napi or the napi is idle, + * STATE_THREADED can be unset here. + */ + if ((val & NAPIF_STATE_SCHED_THREADED) || + !(val & NAPIF_STATE_SCHED)) { + new = val & (~NAPIF_STATE_THREADED); + } else { + msleep(20); + continue; + } + + if (try_cmpxchg(&napi->state, &val, new)) + break; + } + + /* Once STATE_THREADED is unset, wait for SCHED_THREADED to be unset by + * the kthread. + */ + while (true) { + if (!test_bit(NAPIF_STATE_SCHED_THREADED, &napi->state)) + break; + + msleep(20); + } + + kthread_stop(napi->thread); + napi->thread = NULL; +} + int dev_set_threaded(struct net_device *dev, bool threaded) { struct napi_struct *napi; @@ -6961,8 +6998,12 @@ int dev_set_threaded(struct net_device *dev, bool threaded) * softirq mode will happen in the next round of napi_schedule(). * This should not cause hiccups/stalls to the live traffic. */ - list_for_each_entry(napi, &dev->napi_list, dev_list) - assign_bit(NAPI_STATE_THREADED, &napi->state, threaded); + list_for_each_entry(napi, &dev->napi_list, dev_list) { + if (!threaded && napi->thread) + napi_stop_kthread(napi); + else + assign_bit(NAPI_STATE_THREADED, &napi->state, threaded); + } return err; } diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py index beaee5e4e2aa..c9109627a741 100755 --- a/tools/testing/selftests/net/nl_netdev.py +++ b/tools/testing/selftests/net/nl_netdev.py @@ -2,8 +2,9 @@ # SPDX-License-Identifier: GPL-2.0 import time +from os import system from lib.py import ksft_run, ksft_exit, ksft_pr -from lib.py import ksft_eq, ksft_ge, ksft_busy_wait +from lib.py import ksft_eq, ksft_ge, ksft_ne, ksft_busy_wait from lib.py import NetdevFamily, NetdevSimDev, ip @@ -34,6 +35,39 @@ def napi_list_check(nf) -> None: ksft_eq(len(napis), 100, comment=f"queue count after reset queue {q} mode {i}") +def dev_set_threaded(nf) -> None: + """ + Test that verifies various cases of napi threaded + set and unset at device level using sysfs. + """ + with NetdevSimDev(queue_count=2) as nsimdev: + nsim = nsimdev.nsims[0] + + ip(f"link set dev {nsim.ifname} up") + + napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True) + ksft_eq(len(napis), 2) + + napi0_id = napis[0]['id'] + napi1_id = napis[1]['id'] + + # set threaded + system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is set for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_ne(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_ne(napi1.get('pid'), None) + + # unset threaded + system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is unset for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1.get('pid'), None) def nsim_rxq_reset_down(nf) -> None: """ @@ -122,7 +156,7 @@ def page_pool_check(nf) -> None: def main() -> None: nf = NetdevFamily() ksft_run([empty_check, lo_check, page_pool_check, napi_list_check, - nsim_rxq_reset_down], + dev_set_threaded, nsim_rxq_reset_down], args=(nf, )) ksft_exit() -- cgit v1.2.3 From 265c6ff0f8c2feef5981e9a1aedf6b5b476d7492 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 9 Jun 2025 17:00:01 -0700 Subject: selftests/net: packetdrill: more xfail changes Most of the packetdrill tests have not flaked once last week. Add the few which did to the XFAIL list. Acked-by: Matthieu Baerts (NGI0) Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250610000001.1970934-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/packetdrill/ksft_runner.sh | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh index ef8b25a606d8..c5b01e1bd4c7 100755 --- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh +++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh @@ -39,11 +39,15 @@ if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then # xfail tests that are known flaky with dbg config, not fixable. # still run them for coverage (and expect 100% pass without dbg). declare -ar xfail_list=( + "tcp_blocking_blocking-connect.pkt" + "tcp_blocking_blocking-read.pkt" "tcp_eor_no-coalesce-retrans.pkt" "tcp_fast_recovery_prr-ss.*.pkt" + "tcp_sack_sack-route-refresh-ip-tos.pkt" "tcp_slow_start_slow-start-after-win-update.pkt" "tcp_timestamping.*.pkt" "tcp_user_timeout_user-timeout-probe.pkt" + "tcp_zerocopy_cl.*.pkt" "tcp_zerocopy_epoll_.*.pkt" "tcp_tcp_info_tcp-info-.*-limited.pkt" ) -- cgit v1.2.3 From f1c025773f257b534f6fa77dca29b0967dfed459 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Mon, 9 Jun 2025 10:08:53 +0800 Subject: selftests/bpf: Add test to cover ktls with bpf_msg_pop_data The selftest can reproduce an issue where using bpf_msg_pop_data() in ktls causes errors on the receiving end. Signed-off-by: Jiayuan Chen Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Link: https://lore.kernel.org/bpf/20250609020910.397930-3-jiayuan.chen@linux.dev --- .../selftests/bpf/prog_tests/sockmap_ktls.c | 91 ++++++++++++++++++++++ .../selftests/bpf/progs/test_sockmap_ktls.c | 4 + 2 files changed, 95 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c index b6c471da5c28..b87e7f39e15a 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -314,6 +314,95 @@ out: test_sockmap_ktls__destroy(skel); } +static void test_sockmap_ktls_tx_pop(int family, int sotype) +{ + char msg[37] = "0123456789abcdefghijklmnopqrstuvwxyz\0"; + int c = 0, p = 0, one = 1, sent, recvd; + struct test_sockmap_ktls *skel; + int prog_fd, map_fd; + char rcv[50] = {0}; + int err; + int i, m, r; + + skel = test_sockmap_ktls__open_and_load(); + if (!ASSERT_TRUE(skel, "open ktls skel")) + return; + + err = create_pair(family, sotype, &c, &p); + if (!ASSERT_OK(err, "create_pair()")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.prog_sk_policy); + map_fd = bpf_map__fd(skel->maps.sock_map); + + err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach sk msg")) + goto out; + + err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(c)")) + goto out; + + err = init_ktls_pairs(c, p); + if (!ASSERT_OK(err, "init_ktls_pairs(c, p)")) + goto out; + + struct { + int pop_start; + int pop_len; + } pop_policy[] = { + /* trim the start */ + {0, 2}, + {0, 10}, + {1, 2}, + {1, 10}, + /* trim the end */ + {35, 2}, + /* New entries should be added before this line */ + {-1, -1}, + }; + + i = 0; + while (pop_policy[i].pop_start >= 0) { + skel->bss->pop_start = pop_policy[i].pop_start; + skel->bss->pop_end = pop_policy[i].pop_len; + + sent = send(c, msg, sizeof(msg), 0); + if (!ASSERT_EQ(sent, sizeof(msg), "send(msg)")) + goto out; + + recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1); + if (!ASSERT_EQ(recvd, sizeof(msg) - pop_policy[i].pop_len, "pop len mismatch")) + goto out; + + /* verify the data + * msg: 0123456789a bcdefghij klmnopqrstuvwxyz + * | | + * popped data + */ + for (m = 0, r = 0; m < sizeof(msg);) { + /* skip checking the data that has been popped */ + if (m >= pop_policy[i].pop_start && + m <= pop_policy[i].pop_start + pop_policy[i].pop_len - 1) { + m++; + continue; + } + + if (!ASSERT_EQ(msg[m], rcv[r], "data mismatch")) + goto out; + m++; + r++; + } + i++; + } +out: + if (c) + close(c); + if (p) + close(p); + test_sockmap_ktls__destroy(skel); +} + static void run_tests(int family, enum bpf_map_type map_type) { int map; @@ -338,6 +427,8 @@ static void run_ktls_test(int family, int sotype) test_sockmap_ktls_tx_cork(family, sotype, true); if (test__start_subtest("tls tx egress with no buf")) test_sockmap_ktls_tx_no_buf(family, sotype, true); + if (test__start_subtest("tls tx with pop")) + test_sockmap_ktls_tx_pop(family, sotype); } void test_sockmap_ktls(void) diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c index 8bdb9987c0c7..83df4919c224 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c @@ -7,6 +7,8 @@ int cork_byte; int push_start; int push_end; int apply_bytes; +int pop_start; +int pop_end; struct { __uint(type, BPF_MAP_TYPE_SOCKMAP); @@ -22,6 +24,8 @@ int prog_sk_policy(struct sk_msg_md *msg) bpf_msg_cork_bytes(msg, cork_byte); if (push_start > 0 && push_end > 0) bpf_msg_push_data(msg, push_start, push_end, 0); + if (pop_start >= 0 && pop_end > 0) + bpf_msg_pop_data(msg, pop_start, pop_end, 0); return SK_PASS; } -- cgit v1.2.3 From 517b088a846b3ce56b3ff07cdf24cd68c89b3a9e Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 11 Jun 2025 09:21:03 -0700 Subject: selftests/bpf: Fix cgroup_mprog_ordering failure due to uninitialized variable On arm64, the cgroup_mprog_ordering selftest failed with test_progs run when building with clang compiler. The reason is due to socklen_t optlen not initialized. In kernel function do_ip_getsockopt(), we have if (copy_from_sockptr(&len, optlen, sizeof(int))) return -EFAULT; if (len < 0) return -EINVAL; The above 'len' variable is a negative value and hence the test failed. But the test is okay on x86_64. I checked the x86_64 asm code and I didn't see explicit initialization of 'optlen' but its value is 0 so kernel didn't return error. This should be a pure luck. Fix the bug by initializing 'oplen' var properly. Fixes: e422d5f118e4 ("selftests/bpf: Add two selftests for mprog API based cgroup progs") Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20250611162103.1623692-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c index 4a4e9710b474..a36d2e968bc5 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c @@ -12,7 +12,7 @@ static int run_getsockopt_test(int cg_parent, int sock_fd, bool has_relative_fd) struct cgroup_preorder *skel = NULL; struct bpf_program *prog; __u8 *result, buf; - socklen_t optlen; + socklen_t optlen = 1; int err = 0; skel = cgroup_preorder__open_and_load(); -- cgit v1.2.3 From a4a65c6fe08bcf48ce404037a9e20a2d14b16855 Mon Sep 17 00:00:00 2001 From: Bobby Eshleman Date: Mon, 9 Jun 2025 09:39:24 -0700 Subject: selftests/vsock: add initial vmtest.sh for vsock This commit introduces a new vmtest.sh runner for vsock. It uses virtme-ng/qemu to run tests in a VM. The tests validate G2H, H2G, and loopback. The testing tools from tools/testing/vsock/ are reused. Currently, only vsock_test is used. VMCI and hyperv support is included in the config file to be built with the -b option, though not used in the tests. Only tested on x86. To run: $ make -C tools/testing/selftests TARGETS=vsock $ tools/testing/selftests/vsock/vmtest.sh or $ make -C tools/testing/selftests TARGETS=vsock run_tests Example runs (after make -C tools/testing/selftests TARGETS=vsock): $ ./tools/testing/selftests/vsock/vmtest.sh 1..3 ok 0 vm_server_host_client ok 1 vm_client_host_server ok 2 vm_loopback SUMMARY: PASS=3 SKIP=0 FAIL=0 Log: /tmp/vsock_vmtest_m7DI.log $ ./tools/testing/selftests/vsock/vmtest.sh vm_loopback 1..1 ok 0 vm_loopback SUMMARY: PASS=1 SKIP=0 FAIL=0 Log: /tmp/vsock_vmtest_a1IO.log $ mkdir -p ~/scratch $ make -C tools/testing/selftests install TARGETS=vsock INSTALL_PATH=~/scratch [... omitted ...] $ cd ~/scratch $ ./run_kselftest.sh TAP version 13 1..1 # timeout set to 300 # selftests: vsock: vmtest.sh # 1..3 # ok 0 vm_server_host_client # ok 1 vm_client_host_server # ok 2 vm_loopback # SUMMARY: PASS=3 SKIP=0 FAIL=0 # Log: /tmp/vsock_vmtest_svEl.log ok 1 selftests: vsock: vmtest.sh Future work can include vsock_diag_test. Because vsock requires a VM to test anything other than loopback, this patch adds vmtest.sh as a kselftest itself. This is different than other systems that have a "vmtest.sh", where it is used as a utility script to spin up a VM to run the selftests as a guest (but isn't hooked into kselftest). Signed-off-by: Bobby Eshleman Reviewed-by: Stefano Garzarella Link: https://patch.msgid.link/20250609-vsock-vmtest-v10-1-7f37198e1cd4@gmail.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + tools/testing/selftests/vsock/.gitignore | 2 + tools/testing/selftests/vsock/Makefile | 17 ++ tools/testing/selftests/vsock/config | 111 +++++++ tools/testing/selftests/vsock/settings | 1 + tools/testing/selftests/vsock/vmtest.sh | 487 +++++++++++++++++++++++++++++++ 6 files changed, 619 insertions(+) create mode 100644 tools/testing/selftests/vsock/.gitignore create mode 100644 tools/testing/selftests/vsock/Makefile create mode 100644 tools/testing/selftests/vsock/config create mode 100644 tools/testing/selftests/vsock/settings create mode 100755 tools/testing/selftests/vsock/vmtest.sh (limited to 'tools/testing/selftests') diff --git a/MAINTAINERS b/MAINTAINERS index f2668b81115c..dd7366aafe64 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -26323,6 +26323,7 @@ F: include/uapi/linux/vm_sockets.h F: include/uapi/linux/vm_sockets_diag.h F: include/uapi/linux/vsockmon.h F: net/vmw_vsock/ +F: tools/testing/selftests/vsock/ F: tools/testing/vsock/ VMALLOC diff --git a/tools/testing/selftests/vsock/.gitignore b/tools/testing/selftests/vsock/.gitignore new file mode 100644 index 000000000000..9c5bf379480f --- /dev/null +++ b/tools/testing/selftests/vsock/.gitignore @@ -0,0 +1,2 @@ +vmtest.log +vsock_test diff --git a/tools/testing/selftests/vsock/Makefile b/tools/testing/selftests/vsock/Makefile new file mode 100644 index 000000000000..c407c0afd938 --- /dev/null +++ b/tools/testing/selftests/vsock/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0 + +CURDIR := $(abspath .) +TOOLSDIR := $(abspath ../../..) +VSOCK_TEST_DIR := $(TOOLSDIR)/testing/vsock +VSOCK_TEST_SRCS := $(wildcard $(VSOCK_TEST_DIR)/*.c $(VSOCK_TEST_DIR)/*.h) + +$(OUTPUT)/vsock_test: $(VSOCK_TEST_DIR)/vsock_test + install -m 755 $< $@ + +$(VSOCK_TEST_DIR)/vsock_test: $(VSOCK_TEST_SRCS) + $(MAKE) -C $(VSOCK_TEST_DIR) vsock_test +TEST_PROGS += vmtest.sh +TEST_GEN_FILES := vsock_test + +include ../lib.mk + diff --git a/tools/testing/selftests/vsock/config b/tools/testing/selftests/vsock/config new file mode 100644 index 000000000000..5f0a4f17dfc9 --- /dev/null +++ b/tools/testing/selftests/vsock/config @@ -0,0 +1,111 @@ +CONFIG_BLK_DEV_INITRD=y +CONFIG_BPF=y +CONFIG_BPF_SYSCALL=y +CONFIG_BPF_JIT=y +CONFIG_HAVE_EBPF_JIT=y +CONFIG_BPF_EVENTS=y +CONFIG_FTRACE_SYSCALLS=y +CONFIG_FUNCTION_TRACER=y +CONFIG_HAVE_DYNAMIC_FTRACE=y +CONFIG_DYNAMIC_FTRACE=y +CONFIG_HAVE_KPROBES=y +CONFIG_KPROBES=y +CONFIG_KPROBE_EVENTS=y +CONFIG_ARCH_SUPPORTS_UPROBES=y +CONFIG_UPROBES=y +CONFIG_UPROBE_EVENTS=y +CONFIG_DEBUG_FS=y +CONFIG_FW_CFG_SYSFS=y +CONFIG_FW_CFG_SYSFS_CMDLINE=y +CONFIG_DRM=y +CONFIG_DRM_VIRTIO_GPU=y +CONFIG_DRM_VIRTIO_GPU_KMS=y +CONFIG_DRM_BOCHS=y +CONFIG_VIRTIO_IOMMU=y +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_SND_SEQUENCER=y +CONFIG_SND_PCI=y +CONFIG_SND_INTEL8X0=y +CONFIG_SND_HDA_CODEC_REALTEK=y +CONFIG_SECURITYFS=y +CONFIG_CGROUP_BPF=y +CONFIG_SQUASHFS=y +CONFIG_SQUASHFS_XZ=y +CONFIG_SQUASHFS_ZSTD=y +CONFIG_FUSE_FS=y +CONFIG_VIRTIO_FS=y +CONFIG_SERIO=y +CONFIG_PCI=y +CONFIG_INPUT=y +CONFIG_INPUT_KEYBOARD=y +CONFIG_KEYBOARD_ATKBD=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_X86_VERBOSE_BOOTUP=y +CONFIG_VGA_CONSOLE=y +CONFIG_FB=y +CONFIG_FB_VESA=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_HCTOSYS=y +CONFIG_RTC_DRV_CMOS=y +CONFIG_HYPERVISOR_GUEST=y +CONFIG_PARAVIRT=y +CONFIG_KVM_GUEST=y +CONFIG_KVM=y +CONFIG_KVM_INTEL=y +CONFIG_KVM_AMD=y +CONFIG_VSOCKETS=y +CONFIG_VSOCKETS_DIAG=y +CONFIG_VSOCKETS_LOOPBACK=y +CONFIG_VMWARE_VMCI_VSOCKETS=y +CONFIG_VIRTIO_VSOCKETS=y +CONFIG_VIRTIO_VSOCKETS_COMMON=y +CONFIG_HYPERV_VSOCKETS=y +CONFIG_VMWARE_VMCI=y +CONFIG_VHOST_VSOCK=y +CONFIG_HYPERV=y +CONFIG_UEVENT_HELPER=n +CONFIG_VIRTIO=y +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_NET=y +CONFIG_NET_CORE=y +CONFIG_NETDEVICES=y +CONFIG_NETWORK_FILESYSTEMS=y +CONFIG_INET=y +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_9P_FS=y +CONFIG_VIRTIO_NET=y +CONFIG_CMDLINE_OVERRIDE=n +CONFIG_BINFMT_SCRIPT=y +CONFIG_SHMEM=y +CONFIG_TMPFS=y +CONFIG_UNIX=y +CONFIG_MODULE_SIG_FORCE=n +CONFIG_DEVTMPFS=y +CONFIG_TTY=y +CONFIG_VT=y +CONFIG_UNIX98_PTYS=y +CONFIG_EARLY_PRINTK=y +CONFIG_INOTIFY_USER=y +CONFIG_BLOCK=y +CONFIG_SCSI_LOWLEVEL=y +CONFIG_SCSI=y +CONFIG_SCSI_VIRTIO=y +CONFIG_BLK_DEV_SD=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_WATCHDOG=y +CONFIG_WATCHDOG_CORE=y +CONFIG_I6300ESB_WDT=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y +CONFIG_OVERLAY_FS=y +CONFIG_DAX=y +CONFIG_DAX_DRIVER=y +CONFIG_FS_DAX=y +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTREMOVE=y +CONFIG_ZONE_DEVICE=y diff --git a/tools/testing/selftests/vsock/settings b/tools/testing/selftests/vsock/settings new file mode 100644 index 000000000000..694d70710ff0 --- /dev/null +++ b/tools/testing/selftests/vsock/settings @@ -0,0 +1 @@ +timeout=300 diff --git a/tools/testing/selftests/vsock/vmtest.sh b/tools/testing/selftests/vsock/vmtest.sh new file mode 100755 index 000000000000..edacebfc1632 --- /dev/null +++ b/tools/testing/selftests/vsock/vmtest.sh @@ -0,0 +1,487 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Meta Platforms, Inc. and affiliates +# +# Dependencies: +# * virtme-ng +# * busybox-static (used by virtme-ng) +# * qemu (used by virtme-ng) + +readonly SCRIPT_DIR="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)" +readonly KERNEL_CHECKOUT=$(realpath "${SCRIPT_DIR}"/../../../../) + +source "${SCRIPT_DIR}"/../kselftest/ktap_helpers.sh + +readonly VSOCK_TEST="${SCRIPT_DIR}"/vsock_test +readonly TEST_GUEST_PORT=51000 +readonly TEST_HOST_PORT=50000 +readonly TEST_HOST_PORT_LISTENER=50001 +readonly SSH_GUEST_PORT=22 +readonly SSH_HOST_PORT=2222 +readonly VSOCK_CID=1234 +readonly WAIT_PERIOD=3 +readonly WAIT_PERIOD_MAX=60 +readonly WAIT_TOTAL=$(( WAIT_PERIOD * WAIT_PERIOD_MAX )) +readonly QEMU_PIDFILE=$(mktemp /tmp/qemu_vsock_vmtest_XXXX.pid) + +# virtme-ng offers a netdev for ssh when using "--ssh", but we also need a +# control port forwarded for vsock_test. Because virtme-ng doesn't support +# adding an additional port to forward to the device created from "--ssh" and +# virtme-init mistakenly sets identical IPs to the ssh device and additional +# devices, we instead opt out of using --ssh, add the device manually, and also +# add the kernel cmdline options that virtme-init uses to setup the interface. +readonly QEMU_TEST_PORT_FWD="hostfwd=tcp::${TEST_HOST_PORT}-:${TEST_GUEST_PORT}" +readonly QEMU_SSH_PORT_FWD="hostfwd=tcp::${SSH_HOST_PORT}-:${SSH_GUEST_PORT}" +readonly QEMU_OPTS="\ + -netdev user,id=n0,${QEMU_TEST_PORT_FWD},${QEMU_SSH_PORT_FWD} \ + -device virtio-net-pci,netdev=n0 \ + -device vhost-vsock-pci,guest-cid=${VSOCK_CID} \ + --pidfile ${QEMU_PIDFILE} \ +" +readonly KERNEL_CMDLINE="\ + virtme.dhcp net.ifnames=0 biosdevname=0 \ + virtme.ssh virtme_ssh_channel=tcp virtme_ssh_user=$USER \ +" +readonly LOG=$(mktemp /tmp/vsock_vmtest_XXXX.log) +readonly TEST_NAMES=(vm_server_host_client vm_client_host_server vm_loopback) +readonly TEST_DESCS=( + "Run vsock_test in server mode on the VM and in client mode on the host." + "Run vsock_test in client mode on the VM and in server mode on the host." + "Run vsock_test using the loopback transport in the VM." +) + +VERBOSE=0 + +usage() { + local name + local desc + local i + + echo + echo "$0 [OPTIONS] [TEST]..." + echo "If no TEST argument is given, all tests will be run." + echo + echo "Options" + echo " -b: build the kernel from the current source tree and use it for guest VMs" + echo " -q: set the path to or name of qemu binary" + echo " -v: verbose output" + echo + echo "Available tests" + + for ((i = 0; i < ${#TEST_NAMES[@]}; i++)); do + name=${TEST_NAMES[${i}]} + desc=${TEST_DESCS[${i}]} + printf "\t%-35s%-35s\n" "${name}" "${desc}" + done + echo + + exit 1 +} + +die() { + echo "$*" >&2 + exit "${KSFT_FAIL}" +} + +vm_ssh() { + ssh -q -o UserKnownHostsFile=/dev/null -p ${SSH_HOST_PORT} localhost "$@" + return $? +} + +cleanup() { + if [[ -s "${QEMU_PIDFILE}" ]]; then + pkill -SIGTERM -F "${QEMU_PIDFILE}" > /dev/null 2>&1 + fi + + # If failure occurred during or before qemu start up, then we need + # to clean this up ourselves. + if [[ -e "${QEMU_PIDFILE}" ]]; then + rm "${QEMU_PIDFILE}" + fi +} + +check_args() { + local found + + for arg in "$@"; do + found=0 + for name in "${TEST_NAMES[@]}"; do + if [[ "${name}" = "${arg}" ]]; then + found=1 + break + fi + done + + if [[ "${found}" -eq 0 ]]; then + echo "${arg} is not an available test" >&2 + usage + fi + done + + for arg in "$@"; do + if ! command -v > /dev/null "test_${arg}"; then + echo "Test ${arg} not found" >&2 + usage + fi + done +} + +check_deps() { + for dep in vng ${QEMU} busybox pkill ssh; do + if [[ ! -x $(command -v "${dep}") ]]; then + echo -e "skip: dependency ${dep} not found!\n" + exit "${KSFT_SKIP}" + fi + done + + if [[ ! -x $(command -v "${VSOCK_TEST}") ]]; then + printf "skip: %s not found!" "${VSOCK_TEST}" + printf " Please build the kselftest vsock target.\n" + exit "${KSFT_SKIP}" + fi +} + +check_vng() { + local tested_versions + local version + local ok + + tested_versions=("1.33" "1.36") + version="$(vng --version)" + + ok=0 + for tv in "${tested_versions[@]}"; do + if [[ "${version}" == *"${tv}"* ]]; then + ok=1 + break + fi + done + + if [[ ! "${ok}" -eq 1 ]]; then + printf "warning: vng version '%s' has not been tested and may " "${version}" >&2 + printf "not function properly.\n\tThe following versions have been tested: " >&2 + echo "${tested_versions[@]}" >&2 + fi +} + +handle_build() { + if [[ ! "${BUILD}" -eq 1 ]]; then + return + fi + + if [[ ! -d "${KERNEL_CHECKOUT}" ]]; then + echo "-b requires vmtest.sh called from the kernel source tree" >&2 + exit 1 + fi + + pushd "${KERNEL_CHECKOUT}" &>/dev/null + + if ! vng --kconfig --config "${SCRIPT_DIR}"/config; then + die "failed to generate .config for kernel source tree (${KERNEL_CHECKOUT})" + fi + + if ! make -j$(nproc); then + die "failed to build kernel from source tree (${KERNEL_CHECKOUT})" + fi + + popd &>/dev/null +} + +vm_start() { + local logfile=/dev/null + local verbose_opt="" + local kernel_opt="" + local qemu + + qemu=$(command -v "${QEMU}") + + if [[ "${VERBOSE}" -eq 1 ]]; then + verbose_opt="--verbose" + logfile=/dev/stdout + fi + + if [[ "${BUILD}" -eq 1 ]]; then + kernel_opt="${KERNEL_CHECKOUT}" + fi + + vng \ + --run \ + ${kernel_opt} \ + ${verbose_opt} \ + --qemu-opts="${QEMU_OPTS}" \ + --qemu="${qemu}" \ + --user root \ + --append "${KERNEL_CMDLINE}" \ + --rw &> ${logfile} & + + if ! timeout ${WAIT_TOTAL} \ + bash -c 'while [[ ! -s '"${QEMU_PIDFILE}"' ]]; do sleep 1; done; exit 0'; then + die "failed to boot VM" + fi +} + +vm_wait_for_ssh() { + local i + + i=0 + while true; do + if [[ ${i} -gt ${WAIT_PERIOD_MAX} ]]; then + die "Timed out waiting for guest ssh" + fi + if vm_ssh -- true; then + break + fi + i=$(( i + 1 )) + sleep ${WAIT_PERIOD} + done +} + +# derived from selftests/net/net_helper.sh +wait_for_listener() +{ + local port=$1 + local interval=$2 + local max_intervals=$3 + local protocol=tcp + local pattern + local i + + pattern=":$(printf "%04X" "${port}") " + + # for tcp protocol additionally check the socket state + [ "${protocol}" = "tcp" ] && pattern="${pattern}0A" + for i in $(seq "${max_intervals}"); do + if awk '{print $2" "$4}' /proc/net/"${protocol}"* | \ + grep -q "${pattern}"; then + break + fi + sleep "${interval}" + done +} + +vm_wait_for_listener() { + local port=$1 + + vm_ssh < ${redirect} + else + __log_args "$@" | tee -a "${LOG}" > ${redirect} + fi +} + +log_setup() { + log "setup" "$@" +} + +log_host() { + local testname=$1 + + shift + log "test:${testname}:host" "$@" +} + +log_guest() { + local testname=$1 + + shift + log "test:${testname}:guest" "$@" +} + +test_vm_server_host_client() { + local testname="${FUNCNAME[0]#test_}" + + vm_ssh -- "${VSOCK_TEST}" \ + --mode=server \ + --control-port="${TEST_GUEST_PORT}" \ + --peer-cid=2 \ + 2>&1 | log_guest "${testname}" & + + vm_wait_for_listener "${TEST_GUEST_PORT}" + + ${VSOCK_TEST} \ + --mode=client \ + --control-host=127.0.0.1 \ + --peer-cid="${VSOCK_CID}" \ + --control-port="${TEST_HOST_PORT}" 2>&1 | log_host "${testname}" + + return $? +} + +test_vm_client_host_server() { + local testname="${FUNCNAME[0]#test_}" + + ${VSOCK_TEST} \ + --mode "server" \ + --control-port "${TEST_HOST_PORT_LISTENER}" \ + --peer-cid "${VSOCK_CID}" 2>&1 | log_host "${testname}" & + + host_wait_for_listener + + vm_ssh -- "${VSOCK_TEST}" \ + --mode=client \ + --control-host=10.0.2.2 \ + --peer-cid=2 \ + --control-port="${TEST_HOST_PORT_LISTENER}" 2>&1 | log_guest "${testname}" + + return $? +} + +test_vm_loopback() { + local testname="${FUNCNAME[0]#test_}" + local port=60000 # non-forwarded local port + + vm_ssh -- "${VSOCK_TEST}" \ + --mode=server \ + --control-port="${port}" \ + --peer-cid=1 2>&1 | log_guest "${testname}" & + + vm_wait_for_listener "${port}" + + vm_ssh -- "${VSOCK_TEST}" \ + --mode=client \ + --control-host="127.0.0.1" \ + --control-port="${port}" \ + --peer-cid=1 2>&1 | log_guest "${testname}" + + return $? +} + +run_test() { + local host_oops_cnt_before + local host_warn_cnt_before + local vm_oops_cnt_before + local vm_warn_cnt_before + local host_oops_cnt_after + local host_warn_cnt_after + local vm_oops_cnt_after + local vm_warn_cnt_after + local name + local rc + + host_oops_cnt_before=$(dmesg | grep -c -i 'Oops') + host_warn_cnt_before=$(dmesg --level=warn | wc -l) + vm_oops_cnt_before=$(vm_ssh -- dmesg | grep -c -i 'Oops') + vm_warn_cnt_before=$(vm_ssh -- dmesg --level=warn | wc -l) + + name=$(echo "${1}" | awk '{ print $1 }') + eval test_"${name}" + rc=$? + + host_oops_cnt_after=$(dmesg | grep -i 'Oops' | wc -l) + if [[ ${host_oops_cnt_after} -gt ${host_oops_cnt_before} ]]; then + echo "FAIL: kernel oops detected on host" | log_host "${name}" + rc=$KSFT_FAIL + fi + + host_warn_cnt_after=$(dmesg --level=warn | wc -l) + if [[ ${host_warn_cnt_after} -gt ${host_warn_cnt_before} ]]; then + echo "FAIL: kernel warning detected on host" | log_host "${name}" + rc=$KSFT_FAIL + fi + + vm_oops_cnt_after=$(vm_ssh -- dmesg | grep -i 'Oops' | wc -l) + if [[ ${vm_oops_cnt_after} -gt ${vm_oops_cnt_before} ]]; then + echo "FAIL: kernel oops detected on vm" | log_host "${name}" + rc=$KSFT_FAIL + fi + + vm_warn_cnt_after=$(vm_ssh -- dmesg --level=warn | wc -l) + if [[ ${vm_warn_cnt_after} -gt ${vm_warn_cnt_before} ]]; then + echo "FAIL: kernel warning detected on vm" | log_host "${name}" + rc=$KSFT_FAIL + fi + + return "${rc}" +} + +QEMU="qemu-system-$(uname -m)" + +while getopts :hvsq:b o +do + case $o in + v) VERBOSE=1;; + b) BUILD=1;; + q) QEMU=$OPTARG;; + h|*) usage;; + esac +done +shift $((OPTIND-1)) + +trap cleanup EXIT + +if [[ ${#} -eq 0 ]]; then + ARGS=("${TEST_NAMES[@]}") +else + ARGS=("$@") +fi + +check_args "${ARGS[@]}" +check_deps +check_vng +handle_build + +echo "1..${#ARGS[@]}" + +log_setup "Booting up VM" +vm_start +vm_wait_for_ssh +log_setup "VM booted up" + +cnt_pass=0 +cnt_fail=0 +cnt_skip=0 +cnt_total=0 +for arg in "${ARGS[@]}"; do + run_test "${arg}" + rc=$? + if [[ ${rc} -eq $KSFT_PASS ]]; then + cnt_pass=$(( cnt_pass + 1 )) + echo "ok ${cnt_total} ${arg}" + elif [[ ${rc} -eq $KSFT_SKIP ]]; then + cnt_skip=$(( cnt_skip + 1 )) + echo "ok ${cnt_total} ${arg} # SKIP" + elif [[ ${rc} -eq $KSFT_FAIL ]]; then + cnt_fail=$(( cnt_fail + 1 )) + echo "not ok ${cnt_total} ${arg} # exit=$rc" + fi + cnt_total=$(( cnt_total + 1 )) +done + +echo "SUMMARY: PASS=${cnt_pass} SKIP=${cnt_skip} FAIL=${cnt_fail}" +echo "Log: ${LOG}" + +if [ $((cnt_pass + cnt_skip)) -eq ${cnt_total} ]; then + exit "$KSFT_PASS" +else + exit "$KSFT_FAIL" +fi -- cgit v1.2.3 From 994dc26302ed744960ad74932eb206b49c0ebb44 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 3 Jun 2025 15:31:56 +0200 Subject: selftests/coredump: fix build Fix various warnings in the selftest build. Link: https://lore.kernel.org/20250603-work-coredump-socket-protocol-v2-2-05a5f0c18ecc@kernel.org Acked-by: Lennart Poettering Reviewed-by: Alexander Mikhalitsyn Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- tools/testing/selftests/coredump/Makefile | 2 +- tools/testing/selftests/coredump/stackdump_test.c | 17 +++++------------ 2 files changed, 6 insertions(+), 13 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/coredump/Makefile b/tools/testing/selftests/coredump/Makefile index ed210037b29d..bc287a85b825 100644 --- a/tools/testing/selftests/coredump/Makefile +++ b/tools/testing/selftests/coredump/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS = $(KHDR_INCLUDES) +CFLAGS = -Wall -O0 $(KHDR_INCLUDES) TEST_GEN_PROGS := stackdump_test TEST_FILES := stackdump diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c index 9984413be9f0..aa366e6f13a7 100644 --- a/tools/testing/selftests/coredump/stackdump_test.c +++ b/tools/testing/selftests/coredump/stackdump_test.c @@ -24,6 +24,8 @@ static void *do_nothing(void *) { while (1) pause(); + + return NULL; } static void crashing_child(void) @@ -46,9 +48,7 @@ FIXTURE(coredump) FIXTURE_SETUP(coredump) { - char buf[PATH_MAX]; FILE *file; - char *dir; int ret; self->pid_coredump_server = -ESRCH; @@ -106,7 +106,6 @@ fail: TEST_F_TIMEOUT(coredump, stackdump, 120) { - struct sigaction action = {}; unsigned long long stack; char *test_dir, *line; size_t line_length; @@ -171,11 +170,10 @@ TEST_F_TIMEOUT(coredump, stackdump, 120) TEST_F(coredump, socket) { - int fd, pidfd, ret, status; + int pidfd, ret, status; FILE *file; pid_t pid, pid_coredump_server; struct stat st; - char core_file[PATH_MAX]; struct pidfd_info info = {}; int ipc_sockets[2]; char c; @@ -356,11 +354,10 @@ TEST_F(coredump, socket) TEST_F(coredump, socket_detect_userspace_client) { - int fd, pidfd, ret, status; + int pidfd, ret, status; FILE *file; pid_t pid, pid_coredump_server; struct stat st; - char core_file[PATH_MAX]; struct pidfd_info info = {}; int ipc_sockets[2]; char c; @@ -384,7 +381,7 @@ TEST_F(coredump, socket_detect_userspace_client) pid_coredump_server = fork(); ASSERT_GE(pid_coredump_server, 0); if (pid_coredump_server == 0) { - int fd_server, fd_coredump, fd_peer_pidfd, fd_core_file; + int fd_server, fd_coredump, fd_peer_pidfd; socklen_t fd_peer_pidfd_len; close(ipc_sockets[0]); @@ -464,7 +461,6 @@ TEST_F(coredump, socket_detect_userspace_client) close(fd_coredump); close(fd_server); close(fd_peer_pidfd); - close(fd_core_file); _exit(EXIT_SUCCESS); } self->pid_coredump_server = pid_coredump_server; @@ -488,7 +484,6 @@ TEST_F(coredump, socket_detect_userspace_client) if (ret < 0) _exit(EXIT_FAILURE); - (void *)write(fd_socket, &(char){ 0 }, 1); close(fd_socket); _exit(EXIT_SUCCESS); } @@ -519,7 +514,6 @@ TEST_F(coredump, socket_enoent) int pidfd, ret, status; FILE *file; pid_t pid; - char core_file[PATH_MAX]; file = fopen("/proc/sys/kernel/core_pattern", "w"); ASSERT_NE(file, NULL); @@ -569,7 +563,6 @@ TEST_F(coredump, socket_no_listener) ASSERT_GE(pid_coredump_server, 0); if (pid_coredump_server == 0) { int fd_server; - socklen_t fd_peer_pidfd_len; close(ipc_sockets[0]); -- cgit v1.2.3 From 474dd09d22df1d3bae9211078185aab9b6f1635e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 3 Jun 2025 15:31:57 +0200 Subject: selftests/coredump: cleanup coredump tests Make the selftests we added this cycle easier to read. Link: https://lore.kernel.org/20250603-work-coredump-socket-protocol-v2-3-05a5f0c18ecc@kernel.org Acked-by: Lennart Poettering Reviewed-by: Alexander Mikhalitsyn Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- tools/testing/selftests/coredump/stackdump_test.c | 409 +++++++++------------- 1 file changed, 174 insertions(+), 235 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c index aa366e6f13a7..4d922e5f89fe 100644 --- a/tools/testing/selftests/coredump/stackdump_test.c +++ b/tools/testing/selftests/coredump/stackdump_test.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include #include @@ -20,6 +21,10 @@ #define STACKDUMP_SCRIPT "stackdump" #define NUM_THREAD_SPAWN 128 +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif + static void *do_nothing(void *) { while (1) @@ -109,7 +114,7 @@ TEST_F_TIMEOUT(coredump, stackdump, 120) unsigned long long stack; char *test_dir, *line; size_t line_length; - char buf[PATH_MAX]; + char buf[PAGE_SIZE]; int ret, i, status; FILE *file; pid_t pid; @@ -168,152 +173,163 @@ TEST_F_TIMEOUT(coredump, stackdump, 120) fclose(file); } +static int create_and_listen_unix_socket(const char *path) +{ + struct sockaddr_un addr = { + .sun_family = AF_UNIX, + }; + assert(strlen(path) < sizeof(addr.sun_path) - 1); + strncpy(addr.sun_path, path, sizeof(addr.sun_path) - 1); + size_t addr_len = + offsetof(struct sockaddr_un, sun_path) + strlen(path) + 1; + int fd, ret; + + fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + if (fd < 0) + goto out; + + ret = bind(fd, (const struct sockaddr *)&addr, addr_len); + if (ret < 0) + goto out; + + ret = listen(fd, 1); + if (ret < 0) + goto out; + + return fd; + +out: + if (fd >= 0) + close(fd); + return -1; +} + +static bool set_core_pattern(const char *pattern) +{ + FILE *file; + int ret; + + file = fopen("/proc/sys/kernel/core_pattern", "w"); + if (!file) + return false; + + ret = fprintf(file, "%s", pattern); + fclose(file); + + return ret == strlen(pattern); +} + +static int get_peer_pidfd(int fd) +{ + int fd_peer_pidfd; + socklen_t fd_peer_pidfd_len = sizeof(fd_peer_pidfd); + int ret = getsockopt(fd, SOL_SOCKET, SO_PEERPIDFD, &fd_peer_pidfd, + &fd_peer_pidfd_len); + if (ret < 0) { + fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n"); + return -1; + } + return fd_peer_pidfd; +} + +static bool get_pidfd_info(int fd_peer_pidfd, struct pidfd_info *info) +{ + memset(info, 0, sizeof(*info)); + info->mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; + return ioctl(fd_peer_pidfd, PIDFD_GET_INFO, info) == 0; +} + +static void +wait_and_check_coredump_server(pid_t pid_coredump_server, + struct __test_metadata *const _metadata, + FIXTURE_DATA(coredump)* self) +{ + int status; + waitpid(pid_coredump_server, &status, 0); + self->pid_coredump_server = -ESRCH; + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); +} + TEST_F(coredump, socket) { int pidfd, ret, status; - FILE *file; pid_t pid, pid_coredump_server; struct stat st; struct pidfd_info info = {}; int ipc_sockets[2]; char c; - const struct sockaddr_un coredump_sk = { - .sun_family = AF_UNIX, - .sun_path = "/tmp/coredump.socket", - }; - size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) + - sizeof("/tmp/coredump.socket"); + + ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket")); ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); ASSERT_EQ(ret, 0); - file = fopen("/proc/sys/kernel/core_pattern", "w"); - ASSERT_NE(file, NULL); - - ret = fprintf(file, "@/tmp/coredump.socket"); - ASSERT_EQ(ret, strlen("@/tmp/coredump.socket")); - ASSERT_EQ(fclose(file), 0); - pid_coredump_server = fork(); ASSERT_GE(pid_coredump_server, 0); if (pid_coredump_server == 0) { - int fd_server, fd_coredump, fd_peer_pidfd, fd_core_file; - socklen_t fd_peer_pidfd_len; + int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1; + int exit_code = EXIT_FAILURE; close(ipc_sockets[0]); - fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); if (fd_server < 0) - _exit(EXIT_FAILURE); - - ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len); - if (ret < 0) { - fprintf(stderr, "Failed to bind coredump socket\n"); - close(fd_server); - close(ipc_sockets[1]); - _exit(EXIT_FAILURE); - } - - ret = listen(fd_server, 1); - if (ret < 0) { - fprintf(stderr, "Failed to listen on coredump socket\n"); - close(fd_server); - close(ipc_sockets[1]); - _exit(EXIT_FAILURE); - } + goto out; - if (write_nointr(ipc_sockets[1], "1", 1) < 0) { - close(fd_server); - close(ipc_sockets[1]); - _exit(EXIT_FAILURE); - } + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; close(ipc_sockets[1]); fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); - if (fd_coredump < 0) { - fprintf(stderr, "Failed to accept coredump socket connection\n"); - close(fd_server); - _exit(EXIT_FAILURE); - } + if (fd_coredump < 0) + goto out; - fd_peer_pidfd_len = sizeof(fd_peer_pidfd); - ret = getsockopt(fd_coredump, SOL_SOCKET, SO_PEERPIDFD, - &fd_peer_pidfd, &fd_peer_pidfd_len); - if (ret < 0) { - fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n"); - close(fd_coredump); - close(fd_server); - _exit(EXIT_FAILURE); - } + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; - memset(&info, 0, sizeof(info)); - info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; - ret = ioctl(fd_peer_pidfd, PIDFD_GET_INFO, &info); - if (ret < 0) { - fprintf(stderr, "Failed to retrieve pidfd info from peer pidfd for coredump socket connection\n"); - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - _exit(EXIT_FAILURE); - } + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; - if (!(info.mask & PIDFD_INFO_COREDUMP)) { - fprintf(stderr, "Missing coredump information from coredumping task\n"); - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - _exit(EXIT_FAILURE); - } + if (!(info.mask & PIDFD_INFO_COREDUMP)) + goto out; - if (!(info.coredump_mask & PIDFD_COREDUMPED)) { - fprintf(stderr, "Received connection from non-coredumping task\n"); - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - _exit(EXIT_FAILURE); - } + if (!(info.coredump_mask & PIDFD_COREDUMPED)) + goto out; fd_core_file = creat("/tmp/coredump.file", 0644); - if (fd_core_file < 0) { - fprintf(stderr, "Failed to create coredump file\n"); - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - _exit(EXIT_FAILURE); - } + if (fd_core_file < 0) + goto out; for (;;) { char buffer[4096]; ssize_t bytes_read, bytes_write; bytes_read = read(fd_coredump, buffer, sizeof(buffer)); - if (bytes_read < 0) { - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - close(fd_core_file); - _exit(EXIT_FAILURE); - } + if (bytes_read < 0) + goto out; if (bytes_read == 0) break; bytes_write = write(fd_core_file, buffer, bytes_read); - if (bytes_read != bytes_write) { - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - close(fd_core_file); - _exit(EXIT_FAILURE); - } + if (bytes_read != bytes_write) + goto out; } - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - close(fd_core_file); - _exit(EXIT_SUCCESS); + exit_code = EXIT_SUCCESS; +out: + if (fd_core_file >= 0) + close(fd_core_file); + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_server >= 0) + close(fd_server); + _exit(exit_code); } self->pid_coredump_server = pid_coredump_server; @@ -333,47 +349,27 @@ TEST_F(coredump, socket) ASSERT_TRUE(WIFSIGNALED(status)); ASSERT_TRUE(WCOREDUMP(status)); - info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; - ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0); + ASSERT_TRUE(get_pidfd_info(pidfd, &info)); ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); - waitpid(pid_coredump_server, &status, 0); - self->pid_coredump_server = -ESRCH; - ASSERT_TRUE(WIFEXITED(status)); - ASSERT_EQ(WEXITSTATUS(status), 0); + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); ASSERT_EQ(stat("/tmp/coredump.file", &st), 0); ASSERT_GT(st.st_size, 0); - /* - * We should somehow validate the produced core file. - * For now just allow for visual inspection - */ system("file /tmp/coredump.file"); } TEST_F(coredump, socket_detect_userspace_client) { int pidfd, ret, status; - FILE *file; pid_t pid, pid_coredump_server; struct stat st; struct pidfd_info info = {}; int ipc_sockets[2]; char c; - const struct sockaddr_un coredump_sk = { - .sun_family = AF_UNIX, - .sun_path = "/tmp/coredump.socket", - }; - size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) + - sizeof("/tmp/coredump.socket"); - file = fopen("/proc/sys/kernel/core_pattern", "w"); - ASSERT_NE(file, NULL); - - ret = fprintf(file, "@/tmp/coredump.socket"); - ASSERT_EQ(ret, strlen("@/tmp/coredump.socket")); - ASSERT_EQ(fclose(file), 0); + ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket")); ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); ASSERT_EQ(ret, 0); @@ -381,87 +377,46 @@ TEST_F(coredump, socket_detect_userspace_client) pid_coredump_server = fork(); ASSERT_GE(pid_coredump_server, 0); if (pid_coredump_server == 0) { - int fd_server, fd_coredump, fd_peer_pidfd; - socklen_t fd_peer_pidfd_len; + int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1; + int exit_code = EXIT_FAILURE; close(ipc_sockets[0]); - fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); if (fd_server < 0) - _exit(EXIT_FAILURE); + goto out; - ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len); - if (ret < 0) { - fprintf(stderr, "Failed to bind coredump socket\n"); - close(fd_server); - close(ipc_sockets[1]); - _exit(EXIT_FAILURE); - } - - ret = listen(fd_server, 1); - if (ret < 0) { - fprintf(stderr, "Failed to listen on coredump socket\n"); - close(fd_server); - close(ipc_sockets[1]); - _exit(EXIT_FAILURE); - } - - if (write_nointr(ipc_sockets[1], "1", 1) < 0) { - close(fd_server); - close(ipc_sockets[1]); - _exit(EXIT_FAILURE); - } + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; close(ipc_sockets[1]); fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); - if (fd_coredump < 0) { - fprintf(stderr, "Failed to accept coredump socket connection\n"); - close(fd_server); - _exit(EXIT_FAILURE); - } + if (fd_coredump < 0) + goto out; - fd_peer_pidfd_len = sizeof(fd_peer_pidfd); - ret = getsockopt(fd_coredump, SOL_SOCKET, SO_PEERPIDFD, - &fd_peer_pidfd, &fd_peer_pidfd_len); - if (ret < 0) { - fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n"); - close(fd_coredump); - close(fd_server); - _exit(EXIT_FAILURE); - } + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; - memset(&info, 0, sizeof(info)); - info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; - ret = ioctl(fd_peer_pidfd, PIDFD_GET_INFO, &info); - if (ret < 0) { - fprintf(stderr, "Failed to retrieve pidfd info from peer pidfd for coredump socket connection\n"); - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - _exit(EXIT_FAILURE); - } + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; - if (!(info.mask & PIDFD_INFO_COREDUMP)) { - fprintf(stderr, "Missing coredump information from coredumping task\n"); - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - _exit(EXIT_FAILURE); - } + if (!(info.mask & PIDFD_INFO_COREDUMP)) + goto out; - if (info.coredump_mask & PIDFD_COREDUMPED) { - fprintf(stderr, "Received unexpected connection from coredumping task\n"); + if (info.coredump_mask & PIDFD_COREDUMPED) + goto out; + + exit_code = EXIT_SUCCESS; +out: + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) close(fd_coredump); + if (fd_server >= 0) close(fd_server); - close(fd_peer_pidfd); - _exit(EXIT_FAILURE); - } - - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - _exit(EXIT_SUCCESS); + _exit(exit_code); } self->pid_coredump_server = pid_coredump_server; @@ -474,12 +429,18 @@ TEST_F(coredump, socket_detect_userspace_client) if (pid == 0) { int fd_socket; ssize_t ret; + const struct sockaddr_un coredump_sk = { + .sun_family = AF_UNIX, + .sun_path = "/tmp/coredump.socket", + }; + size_t coredump_sk_len = + offsetof(struct sockaddr_un, sun_path) + + sizeof("/tmp/coredump.socket"); fd_socket = socket(AF_UNIX, SOCK_STREAM, 0); if (fd_socket < 0) _exit(EXIT_FAILURE); - ret = connect(fd_socket, (const struct sockaddr *)&coredump_sk, coredump_sk_len); if (ret < 0) _exit(EXIT_FAILURE); @@ -495,15 +456,11 @@ TEST_F(coredump, socket_detect_userspace_client) ASSERT_TRUE(WIFEXITED(status)); ASSERT_EQ(WEXITSTATUS(status), 0); - info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; - ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0); + ASSERT_TRUE(get_pidfd_info(pidfd, &info)); ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); ASSERT_EQ((info.coredump_mask & PIDFD_COREDUMPED), 0); - waitpid(pid_coredump_server, &status, 0); - self->pid_coredump_server = -ESRCH; - ASSERT_TRUE(WIFEXITED(status)); - ASSERT_EQ(WEXITSTATUS(status), 0); + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); ASSERT_NE(stat("/tmp/coredump.file", &st), 0); ASSERT_EQ(errno, ENOENT); @@ -511,16 +468,10 @@ TEST_F(coredump, socket_detect_userspace_client) TEST_F(coredump, socket_enoent) { - int pidfd, ret, status; - FILE *file; + int pidfd, status; pid_t pid; - file = fopen("/proc/sys/kernel/core_pattern", "w"); - ASSERT_NE(file, NULL); - - ret = fprintf(file, "@/tmp/coredump.socket"); - ASSERT_EQ(ret, strlen("@/tmp/coredump.socket")); - ASSERT_EQ(fclose(file), 0); + ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket")); pid = fork(); ASSERT_GE(pid, 0); @@ -538,7 +489,6 @@ TEST_F(coredump, socket_enoent) TEST_F(coredump, socket_no_listener) { int pidfd, ret, status; - FILE *file; pid_t pid, pid_coredump_server; int ipc_sockets[2]; char c; @@ -549,44 +499,36 @@ TEST_F(coredump, socket_no_listener) size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) + sizeof("/tmp/coredump.socket"); + ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket")); + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); ASSERT_EQ(ret, 0); - file = fopen("/proc/sys/kernel/core_pattern", "w"); - ASSERT_NE(file, NULL); - - ret = fprintf(file, "@/tmp/coredump.socket"); - ASSERT_EQ(ret, strlen("@/tmp/coredump.socket")); - ASSERT_EQ(fclose(file), 0); - pid_coredump_server = fork(); ASSERT_GE(pid_coredump_server, 0); if (pid_coredump_server == 0) { - int fd_server; + int fd_server = -1; + int exit_code = EXIT_FAILURE; close(ipc_sockets[0]); fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); if (fd_server < 0) - _exit(EXIT_FAILURE); + goto out; ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len); - if (ret < 0) { - fprintf(stderr, "Failed to bind coredump socket\n"); - close(fd_server); - close(ipc_sockets[1]); - _exit(EXIT_FAILURE); - } + if (ret < 0) + goto out; - if (write_nointr(ipc_sockets[1], "1", 1) < 0) { - close(fd_server); - close(ipc_sockets[1]); - _exit(EXIT_FAILURE); - } + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; - close(fd_server); + exit_code = EXIT_SUCCESS; +out: + if (fd_server >= 0) + close(fd_server); close(ipc_sockets[1]); - _exit(EXIT_SUCCESS); + _exit(exit_code); } self->pid_coredump_server = pid_coredump_server; @@ -606,10 +548,7 @@ TEST_F(coredump, socket_no_listener) ASSERT_TRUE(WIFSIGNALED(status)); ASSERT_FALSE(WCOREDUMP(status)); - waitpid(pid_coredump_server, &status, 0); - self->pid_coredump_server = -ESRCH; - ASSERT_TRUE(WIFEXITED(status)); - ASSERT_EQ(WEXITSTATUS(status), 0); + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); } TEST_HARNESS_MAIN -- cgit v1.2.3 From 59cd658eaf404e3634624b25afc3233066bea34c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 3 Jun 2025 15:31:59 +0200 Subject: selftests/coredump: add coredump server selftests This adds extensive tests for the coredump server. Link: https://lore.kernel.org/20250603-work-coredump-socket-protocol-v2-5-05a5f0c18ecc@kernel.org Acked-by: Lennart Poettering Reviewed-by: Alexander Mikhalitsyn Signed-off-by: Christian Brauner --- tools/testing/selftests/coredump/Makefile | 2 +- tools/testing/selftests/coredump/config | 3 + tools/testing/selftests/coredump/stackdump_test.c | 1255 ++++++++++++++++++++- 3 files changed, 1258 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/coredump/config (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/coredump/Makefile b/tools/testing/selftests/coredump/Makefile index bc287a85b825..77b3665c73c7 100644 --- a/tools/testing/selftests/coredump/Makefile +++ b/tools/testing/selftests/coredump/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS = -Wall -O0 $(KHDR_INCLUDES) +CFLAGS += -Wall -O0 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) TEST_GEN_PROGS := stackdump_test TEST_FILES := stackdump diff --git a/tools/testing/selftests/coredump/config b/tools/testing/selftests/coredump/config new file mode 100644 index 000000000000..a05ef112b4f9 --- /dev/null +++ b/tools/testing/selftests/coredump/config @@ -0,0 +1,3 @@ +CONFIG_COREDUMP=y +CONFIG_NET=y +CONFIG_UNIX=y diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c index 4d922e5f89fe..9a789156f27e 100644 --- a/tools/testing/selftests/coredump/stackdump_test.c +++ b/tools/testing/selftests/coredump/stackdump_test.c @@ -4,10 +4,15 @@ #include #include #include +#include +#include +#include #include #include #include #include +#include +#include #include #include #include @@ -15,6 +20,7 @@ #include #include "../kselftest_harness.h" +#include "../filesystems/wrappers.h" #include "../pidfd/pidfd.h" #define STACKDUMP_FILE "stack_values" @@ -49,14 +55,32 @@ FIXTURE(coredump) { char original_core_pattern[256]; pid_t pid_coredump_server; + int fd_tmpfs_detached; }; +static int create_detached_tmpfs(void) +{ + int fd_context, fd_tmpfs; + + fd_context = sys_fsopen("tmpfs", 0); + if (fd_context < 0) + return -1; + + if (sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0) + return -1; + + fd_tmpfs = sys_fsmount(fd_context, 0, 0); + close(fd_context); + return fd_tmpfs; +} + FIXTURE_SETUP(coredump) { FILE *file; int ret; self->pid_coredump_server = -ESRCH; + self->fd_tmpfs_detached = -1; file = fopen("/proc/sys/kernel/core_pattern", "r"); ASSERT_NE(NULL, file); @@ -65,6 +89,8 @@ FIXTURE_SETUP(coredump) ASSERT_LT(ret, sizeof(self->original_core_pattern)); self->original_core_pattern[ret] = '\0'; + self->fd_tmpfs_detached = create_detached_tmpfs(); + ASSERT_GE(self->fd_tmpfs_detached, 0); ret = fclose(file); ASSERT_EQ(0, ret); @@ -103,6 +129,15 @@ FIXTURE_TEARDOWN(coredump) goto fail; } + if (self->fd_tmpfs_detached >= 0) { + ret = close(self->fd_tmpfs_detached); + if (ret < 0) { + reason = "Unable to close detached tmpfs"; + goto fail; + } + self->fd_tmpfs_detached = -1; + } + return; fail: /* This should never happen */ @@ -192,7 +227,7 @@ static int create_and_listen_unix_socket(const char *path) if (ret < 0) goto out; - ret = listen(fd, 1); + ret = listen(fd, 128); if (ret < 0) goto out; @@ -551,4 +586,1222 @@ out: wait_and_check_coredump_server(pid_coredump_server, _metadata, self); } +static ssize_t recv_marker(int fd) +{ + enum coredump_mark mark = COREDUMP_MARK_REQACK; + ssize_t ret; + + ret = recv(fd, &mark, sizeof(mark), MSG_WAITALL); + if (ret != sizeof(mark)) + return -1; + + switch (mark) { + case COREDUMP_MARK_REQACK: + fprintf(stderr, "Received marker: ReqAck\n"); + return COREDUMP_MARK_REQACK; + case COREDUMP_MARK_MINSIZE: + fprintf(stderr, "Received marker: MinSize\n"); + return COREDUMP_MARK_MINSIZE; + case COREDUMP_MARK_MAXSIZE: + fprintf(stderr, "Received marker: MaxSize\n"); + return COREDUMP_MARK_MAXSIZE; + case COREDUMP_MARK_UNSUPPORTED: + fprintf(stderr, "Received marker: Unsupported\n"); + return COREDUMP_MARK_UNSUPPORTED; + case COREDUMP_MARK_CONFLICTING: + fprintf(stderr, "Received marker: Conflicting\n"); + return COREDUMP_MARK_CONFLICTING; + default: + fprintf(stderr, "Received unknown marker: %u\n", mark); + break; + } + return -1; +} + +static bool read_marker(int fd, enum coredump_mark mark) +{ + ssize_t ret; + + ret = recv_marker(fd); + if (ret < 0) + return false; + return ret == mark; +} + +static bool read_coredump_req(int fd, struct coredump_req *req) +{ + ssize_t ret; + size_t field_size, user_size, ack_size, kernel_size, remaining_size; + + memset(req, 0, sizeof(*req)); + field_size = sizeof(req->size); + + /* Peek the size of the coredump request. */ + ret = recv(fd, req, field_size, MSG_PEEK | MSG_WAITALL); + if (ret != field_size) + return false; + kernel_size = req->size; + + if (kernel_size < COREDUMP_ACK_SIZE_VER0) + return false; + if (kernel_size >= PAGE_SIZE) + return false; + + /* Use the minimum of user and kernel size to read the full request. */ + user_size = sizeof(struct coredump_req); + ack_size = user_size < kernel_size ? user_size : kernel_size; + ret = recv(fd, req, ack_size, MSG_WAITALL); + if (ret != ack_size) + return false; + + fprintf(stderr, "Read coredump request with size %u and mask 0x%llx\n", + req->size, (unsigned long long)req->mask); + + if (user_size > kernel_size) + remaining_size = user_size - kernel_size; + else + remaining_size = kernel_size - user_size; + + if (PAGE_SIZE <= remaining_size) + return false; + + /* + * Discard any additional data if the kernel's request was larger than + * what we knew about or cared about. + */ + if (remaining_size) { + char buffer[PAGE_SIZE]; + + ret = recv(fd, buffer, sizeof(buffer), MSG_WAITALL); + if (ret != remaining_size) + return false; + fprintf(stderr, "Discarded %zu bytes of data after coredump request\n", remaining_size); + } + + return true; +} + +static bool send_coredump_ack(int fd, const struct coredump_req *req, + __u64 mask, size_t size_ack) +{ + ssize_t ret; + /* + * Wrap struct coredump_ack in a larger struct so we can + * simulate sending to much data to the kernel. + */ + struct large_ack_for_size_testing { + struct coredump_ack ack; + char buffer[PAGE_SIZE]; + } large_ack = {}; + + if (!size_ack) + size_ack = sizeof(struct coredump_ack) < req->size_ack ? + sizeof(struct coredump_ack) : + req->size_ack; + large_ack.ack.mask = mask; + large_ack.ack.size = size_ack; + ret = send(fd, &large_ack, size_ack, MSG_NOSIGNAL); + if (ret != size_ack) + return false; + + fprintf(stderr, "Sent coredump ack with size %zu and mask 0x%llx\n", + size_ack, (unsigned long long)mask); + return true; +} + +static bool check_coredump_req(const struct coredump_req *req, size_t min_size, + __u64 required_mask) +{ + if (req->size < min_size) + return false; + if ((req->mask & required_mask) != required_mask) + return false; + if (req->mask & ~required_mask) + return false; + return true; +} + +TEST_F(coredump, socket_request_kernel) +{ + int pidfd, ret, status; + pid_t pid, pid_coredump_server; + struct stat st; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + + ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket")); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + struct coredump_req req = {}; + int fd_server = -1, fd_coredump = -1, fd_core_file = -1, fd_peer_pidfd = -1; + int exit_code = EXIT_FAILURE; + + close(ipc_sockets[0]); + + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); + if (fd_server < 0) + goto out; + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; + + close(ipc_sockets[1]); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) + goto out; + + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; + + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; + + if (!(info.mask & PIDFD_INFO_COREDUMP)) + goto out; + + if (!(info.coredump_mask & PIDFD_COREDUMPED)) + goto out; + + fd_core_file = creat("/tmp/coredump.file", 0644); + if (fd_core_file < 0) + goto out; + + if (!read_coredump_req(fd_coredump, &req)) + goto out; + + if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0, + COREDUMP_KERNEL | COREDUMP_USERSPACE | + COREDUMP_REJECT | COREDUMP_WAIT)) + goto out; + + if (!send_coredump_ack(fd_coredump, &req, + COREDUMP_KERNEL | COREDUMP_WAIT, 0)) + goto out; + + if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) + goto out; + + for (;;) { + char buffer[4096]; + ssize_t bytes_read, bytes_write; + + bytes_read = read(fd_coredump, buffer, sizeof(buffer)); + if (bytes_read < 0) + goto out; + + if (bytes_read == 0) + break; + + bytes_write = write(fd_core_file, buffer, bytes_read); + if (bytes_read != bytes_write) + goto out; + } + + exit_code = EXIT_SUCCESS; +out: + if (fd_core_file >= 0) + close(fd_core_file); + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_server >= 0) + close(fd_server); + _exit(exit_code); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_TRUE(WCOREDUMP(status)); + + ASSERT_TRUE(get_pidfd_info(pidfd, &info)); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); + + ASSERT_EQ(stat("/tmp/coredump.file", &st), 0); + ASSERT_GT(st.st_size, 0); + system("file /tmp/coredump.file"); +} + +TEST_F(coredump, socket_request_userspace) +{ + int pidfd, ret, status; + pid_t pid, pid_coredump_server; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + + ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket")); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + struct coredump_req req = {}; + int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1; + int exit_code = EXIT_FAILURE; + + close(ipc_sockets[0]); + + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); + if (fd_server < 0) + goto out; + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; + + close(ipc_sockets[1]); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) + goto out; + + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; + + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; + + if (!(info.mask & PIDFD_INFO_COREDUMP)) + goto out; + + if (!(info.coredump_mask & PIDFD_COREDUMPED)) + goto out; + + if (!read_coredump_req(fd_coredump, &req)) + goto out; + + if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0, + COREDUMP_KERNEL | COREDUMP_USERSPACE | + COREDUMP_REJECT | COREDUMP_WAIT)) + goto out; + + if (!send_coredump_ack(fd_coredump, &req, + COREDUMP_USERSPACE | COREDUMP_WAIT, 0)) + goto out; + + if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) + goto out; + + for (;;) { + char buffer[4096]; + ssize_t bytes_read; + + bytes_read = read(fd_coredump, buffer, sizeof(buffer)); + if (bytes_read > 0) + goto out; + + if (bytes_read < 0) + goto out; + + if (bytes_read == 0) + break; + } + + exit_code = EXIT_SUCCESS; +out: + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_server >= 0) + close(fd_server); + _exit(exit_code); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_TRUE(WCOREDUMP(status)); + + ASSERT_TRUE(get_pidfd_info(pidfd, &info)); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); +} + +TEST_F(coredump, socket_request_reject) +{ + int pidfd, ret, status; + pid_t pid, pid_coredump_server; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + + ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket")); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + struct coredump_req req = {}; + int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1; + int exit_code = EXIT_FAILURE; + + close(ipc_sockets[0]); + + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); + if (fd_server < 0) + goto out; + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; + + close(ipc_sockets[1]); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) + goto out; + + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; + + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; + + if (!(info.mask & PIDFD_INFO_COREDUMP)) + goto out; + + if (!(info.coredump_mask & PIDFD_COREDUMPED)) + goto out; + + if (!read_coredump_req(fd_coredump, &req)) + goto out; + + if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0, + COREDUMP_KERNEL | COREDUMP_USERSPACE | + COREDUMP_REJECT | COREDUMP_WAIT)) + goto out; + + if (!send_coredump_ack(fd_coredump, &req, + COREDUMP_REJECT | COREDUMP_WAIT, 0)) + goto out; + + if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) + goto out; + + for (;;) { + char buffer[4096]; + ssize_t bytes_read; + + bytes_read = read(fd_coredump, buffer, sizeof(buffer)); + if (bytes_read > 0) + goto out; + + if (bytes_read < 0) + goto out; + + if (bytes_read == 0) + break; + } + + exit_code = EXIT_SUCCESS; +out: + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_server >= 0) + close(fd_server); + _exit(exit_code); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_FALSE(WCOREDUMP(status)); + + ASSERT_TRUE(get_pidfd_info(pidfd, &info)); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); +} + +TEST_F(coredump, socket_request_invalid_flag_combination) +{ + int pidfd, ret, status; + pid_t pid, pid_coredump_server; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + + ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket")); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + struct coredump_req req = {}; + int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1; + int exit_code = EXIT_FAILURE; + + close(ipc_sockets[0]); + + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); + if (fd_server < 0) + goto out; + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; + + close(ipc_sockets[1]); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) + goto out; + + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; + + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; + + if (!(info.mask & PIDFD_INFO_COREDUMP)) + goto out; + + if (!(info.coredump_mask & PIDFD_COREDUMPED)) + goto out; + + if (!read_coredump_req(fd_coredump, &req)) + goto out; + + if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0, + COREDUMP_KERNEL | COREDUMP_USERSPACE | + COREDUMP_REJECT | COREDUMP_WAIT)) + goto out; + + if (!send_coredump_ack(fd_coredump, &req, + COREDUMP_KERNEL | COREDUMP_REJECT | COREDUMP_WAIT, 0)) + goto out; + + if (!read_marker(fd_coredump, COREDUMP_MARK_CONFLICTING)) + goto out; + + exit_code = EXIT_SUCCESS; +out: + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_server >= 0) + close(fd_server); + _exit(exit_code); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_FALSE(WCOREDUMP(status)); + + ASSERT_TRUE(get_pidfd_info(pidfd, &info)); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); +} + +TEST_F(coredump, socket_request_unknown_flag) +{ + int pidfd, ret, status; + pid_t pid, pid_coredump_server; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + + ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket")); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + struct coredump_req req = {}; + int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1; + int exit_code = EXIT_FAILURE; + + close(ipc_sockets[0]); + + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); + if (fd_server < 0) + goto out; + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; + + close(ipc_sockets[1]); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) + goto out; + + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; + + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; + + if (!(info.mask & PIDFD_INFO_COREDUMP)) + goto out; + + if (!(info.coredump_mask & PIDFD_COREDUMPED)) + goto out; + + if (!read_coredump_req(fd_coredump, &req)) + goto out; + + if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0, + COREDUMP_KERNEL | COREDUMP_USERSPACE | + COREDUMP_REJECT | COREDUMP_WAIT)) + goto out; + + if (!send_coredump_ack(fd_coredump, &req, (1ULL << 63), 0)) + goto out; + + if (!read_marker(fd_coredump, COREDUMP_MARK_UNSUPPORTED)) + goto out; + + exit_code = EXIT_SUCCESS; +out: + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_server >= 0) + close(fd_server); + _exit(exit_code); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_FALSE(WCOREDUMP(status)); + + ASSERT_TRUE(get_pidfd_info(pidfd, &info)); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); +} + +TEST_F(coredump, socket_request_invalid_size_small) +{ + int pidfd, ret, status; + pid_t pid, pid_coredump_server; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + + ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket")); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + struct coredump_req req = {}; + int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1; + int exit_code = EXIT_FAILURE; + + close(ipc_sockets[0]); + + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); + if (fd_server < 0) + goto out; + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; + + close(ipc_sockets[1]); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) + goto out; + + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; + + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; + + if (!(info.mask & PIDFD_INFO_COREDUMP)) + goto out; + + if (!(info.coredump_mask & PIDFD_COREDUMPED)) + goto out; + + if (!read_coredump_req(fd_coredump, &req)) + goto out; + + if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0, + COREDUMP_KERNEL | COREDUMP_USERSPACE | + COREDUMP_REJECT | COREDUMP_WAIT)) + goto out; + + if (!send_coredump_ack(fd_coredump, &req, + COREDUMP_REJECT | COREDUMP_WAIT, + COREDUMP_ACK_SIZE_VER0 / 2)) + goto out; + + if (!read_marker(fd_coredump, COREDUMP_MARK_MINSIZE)) + goto out; + + exit_code = EXIT_SUCCESS; +out: + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_server >= 0) + close(fd_server); + _exit(exit_code); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_FALSE(WCOREDUMP(status)); + + ASSERT_TRUE(get_pidfd_info(pidfd, &info)); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); +} + +TEST_F(coredump, socket_request_invalid_size_large) +{ + int pidfd, ret, status; + pid_t pid, pid_coredump_server; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + + ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket")); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + struct coredump_req req = {}; + int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1; + int exit_code = EXIT_FAILURE; + + close(ipc_sockets[0]); + + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); + if (fd_server < 0) + goto out; + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; + + close(ipc_sockets[1]); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) + goto out; + + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; + + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; + + if (!(info.mask & PIDFD_INFO_COREDUMP)) + goto out; + + if (!(info.coredump_mask & PIDFD_COREDUMPED)) + goto out; + + if (!read_coredump_req(fd_coredump, &req)) + goto out; + + if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0, + COREDUMP_KERNEL | COREDUMP_USERSPACE | + COREDUMP_REJECT | COREDUMP_WAIT)) + goto out; + + if (!send_coredump_ack(fd_coredump, &req, + COREDUMP_REJECT | COREDUMP_WAIT, + COREDUMP_ACK_SIZE_VER0 + PAGE_SIZE)) + goto out; + + if (!read_marker(fd_coredump, COREDUMP_MARK_MAXSIZE)) + goto out; + + exit_code = EXIT_SUCCESS; +out: + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_server >= 0) + close(fd_server); + _exit(exit_code); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_FALSE(WCOREDUMP(status)); + + ASSERT_TRUE(get_pidfd_info(pidfd, &info)); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); +} + +static int open_coredump_tmpfile(int fd_tmpfs_detached) +{ + return openat(fd_tmpfs_detached, ".", O_TMPFILE | O_RDWR | O_EXCL, 0600); +} + +#define NUM_CRASHING_COREDUMPS 5 + +TEST_F_TIMEOUT(coredump, socket_multiple_crashing_coredumps, 500) +{ + int pidfd[NUM_CRASHING_COREDUMPS], status[NUM_CRASHING_COREDUMPS]; + pid_t pid[NUM_CRASHING_COREDUMPS], pid_coredump_server; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + + ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket")); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1; + int exit_code = EXIT_FAILURE; + struct coredump_req req = {}; + + close(ipc_sockets[0]); + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); + if (fd_server < 0) { + fprintf(stderr, "Failed to create and listen on unix socket\n"); + goto out; + } + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) { + fprintf(stderr, "Failed to notify parent via ipc socket\n"); + goto out; + } + close(ipc_sockets[1]); + + for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) { + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) { + fprintf(stderr, "accept4 failed: %m\n"); + goto out; + } + + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) { + fprintf(stderr, "get_peer_pidfd failed for fd %d: %m\n", fd_coredump); + goto out; + } + + if (!get_pidfd_info(fd_peer_pidfd, &info)) { + fprintf(stderr, "get_pidfd_info failed for fd %d\n", fd_peer_pidfd); + goto out; + } + + if (!(info.mask & PIDFD_INFO_COREDUMP)) { + fprintf(stderr, "pidfd info missing PIDFD_INFO_COREDUMP for fd %d\n", fd_peer_pidfd); + goto out; + } + if (!(info.coredump_mask & PIDFD_COREDUMPED)) { + fprintf(stderr, "pidfd info missing PIDFD_COREDUMPED for fd %d\n", fd_peer_pidfd); + goto out; + } + + if (!read_coredump_req(fd_coredump, &req)) { + fprintf(stderr, "read_coredump_req failed for fd %d\n", fd_coredump); + goto out; + } + + if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0, + COREDUMP_KERNEL | COREDUMP_USERSPACE | + COREDUMP_REJECT | COREDUMP_WAIT)) { + fprintf(stderr, "check_coredump_req failed for fd %d\n", fd_coredump); + goto out; + } + + if (!send_coredump_ack(fd_coredump, &req, + COREDUMP_KERNEL | COREDUMP_WAIT, 0)) { + fprintf(stderr, "send_coredump_ack failed for fd %d\n", fd_coredump); + goto out; + } + + if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) { + fprintf(stderr, "read_marker failed for fd %d\n", fd_coredump); + goto out; + } + + fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached); + if (fd_core_file < 0) { + fprintf(stderr, "%m - open_coredump_tmpfile failed for fd %d\n", fd_coredump); + goto out; + } + + for (;;) { + char buffer[4096]; + ssize_t bytes_read, bytes_write; + + bytes_read = read(fd_coredump, buffer, sizeof(buffer)); + if (bytes_read < 0) { + fprintf(stderr, "read failed for fd %d: %m\n", fd_coredump); + goto out; + } + + if (bytes_read == 0) + break; + + bytes_write = write(fd_core_file, buffer, bytes_read); + if (bytes_read != bytes_write) { + fprintf(stderr, "write failed for fd %d: %m\n", fd_core_file); + goto out; + } + } + + close(fd_core_file); + close(fd_peer_pidfd); + close(fd_coredump); + fd_peer_pidfd = -1; + fd_coredump = -1; + } + + exit_code = EXIT_SUCCESS; +out: + if (fd_core_file >= 0) + close(fd_core_file); + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_server >= 0) + close(fd_server); + _exit(exit_code); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) { + pid[i] = fork(); + ASSERT_GE(pid[i], 0); + if (pid[i] == 0) + crashing_child(); + pidfd[i] = sys_pidfd_open(pid[i], 0); + ASSERT_GE(pidfd[i], 0); + } + + for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) { + waitpid(pid[i], &status[i], 0); + ASSERT_TRUE(WIFSIGNALED(status[i])); + ASSERT_TRUE(WCOREDUMP(status[i])); + } + + for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) { + info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; + ASSERT_EQ(ioctl(pidfd[i], PIDFD_GET_INFO, &info), 0); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + } + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); +} + +#define MAX_EVENTS 128 + +static void process_coredump_worker(int fd_coredump, int fd_peer_pidfd, int fd_core_file) +{ + int epfd = -1; + int exit_code = EXIT_FAILURE; + + epfd = epoll_create1(0); + if (epfd < 0) + goto out; + + struct epoll_event ev; + ev.events = EPOLLIN | EPOLLRDHUP | EPOLLET; + ev.data.fd = fd_coredump; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd_coredump, &ev) < 0) + goto out; + + for (;;) { + struct epoll_event events[1]; + int n = epoll_wait(epfd, events, 1, -1); + if (n < 0) + break; + + if (events[0].events & (EPOLLIN | EPOLLRDHUP)) { + for (;;) { + char buffer[4096]; + ssize_t bytes_read = read(fd_coredump, buffer, sizeof(buffer)); + if (bytes_read < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) + break; + goto out; + } + if (bytes_read == 0) + goto done; + ssize_t bytes_write = write(fd_core_file, buffer, bytes_read); + if (bytes_write != bytes_read) + goto out; + } + } + } + +done: + exit_code = EXIT_SUCCESS; +out: + if (epfd >= 0) + close(epfd); + if (fd_core_file >= 0) + close(fd_core_file); + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + _exit(exit_code); +} + +TEST_F_TIMEOUT(coredump, socket_multiple_crashing_coredumps_epoll_workers, 500) +{ + int pidfd[NUM_CRASHING_COREDUMPS], status[NUM_CRASHING_COREDUMPS]; + pid_t pid[NUM_CRASHING_COREDUMPS], pid_coredump_server, worker_pids[NUM_CRASHING_COREDUMPS]; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + + ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket")); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + int fd_server = -1, exit_code = EXIT_FAILURE, n_conns = 0; + fd_server = -1; + exit_code = EXIT_FAILURE; + n_conns = 0; + close(ipc_sockets[0]); + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); + if (fd_server < 0) + goto out; + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; + close(ipc_sockets[1]); + + while (n_conns < NUM_CRASHING_COREDUMPS) { + int fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1; + struct coredump_req req = {}; + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) + continue; + goto out; + } + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; + if (!(info.mask & PIDFD_INFO_COREDUMP) || !(info.coredump_mask & PIDFD_COREDUMPED)) + goto out; + if (!read_coredump_req(fd_coredump, &req)) + goto out; + if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0, + COREDUMP_KERNEL | COREDUMP_USERSPACE | + COREDUMP_REJECT | COREDUMP_WAIT)) + goto out; + if (!send_coredump_ack(fd_coredump, &req, COREDUMP_KERNEL | COREDUMP_WAIT, 0)) + goto out; + if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) + goto out; + fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached); + if (fd_core_file < 0) + goto out; + pid_t worker = fork(); + if (worker == 0) { + close(fd_server); + process_coredump_worker(fd_coredump, fd_peer_pidfd, fd_core_file); + } + worker_pids[n_conns] = worker; + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_core_file >= 0) + close(fd_core_file); + n_conns++; + } + exit_code = EXIT_SUCCESS; +out: + if (fd_server >= 0) + close(fd_server); + + // Reap all worker processes + for (int i = 0; i < n_conns; i++) { + int wstatus; + if (waitpid(worker_pids[i], &wstatus, 0) < 0) { + fprintf(stderr, "Failed to wait for worker %d: %m\n", worker_pids[i]); + } else if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != EXIT_SUCCESS) { + fprintf(stderr, "Worker %d exited with error code %d\n", worker_pids[i], WEXITSTATUS(wstatus)); + exit_code = EXIT_FAILURE; + } + } + + _exit(exit_code); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) { + pid[i] = fork(); + ASSERT_GE(pid[i], 0); + if (pid[i] == 0) + crashing_child(); + pidfd[i] = sys_pidfd_open(pid[i], 0); + ASSERT_GE(pidfd[i], 0); + } + + for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) { + ASSERT_GE(waitpid(pid[i], &status[i], 0), 0); + ASSERT_TRUE(WIFSIGNALED(status[i])); + ASSERT_TRUE(WCOREDUMP(status[i])); + } + + for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) { + info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; + ASSERT_EQ(ioctl(pidfd[i], PIDFD_GET_INFO, &info), 0); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + } + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); +} + TEST_HARNESS_MAIN -- cgit v1.2.3 From 5159482fdb2b4c15cb0a087e41d8bc5d730bb697 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 11 Jun 2025 13:08:36 -0700 Subject: selftests/bpf: tests with a loop state missing read/precision mark The test case absent_mark_in_the_middle_state is equivalent of the following C program: 1: r8 = bpf_get_prandom_u32(); 2: r6 = -32; 3: bpf_iter_num_new(&fp[-8], 0, 10); 4: if (unlikely(bpf_get_prandom_u32())) 5: r6 = -31; 6: for (;;) { 7: if (!bpf_iter_num_next(&fp[-8])) 8: break; 9: if (unlikely(bpf_get_prandom_u32())) 10: *(u64 *)(fp + r6) = 7; 11: } 12: bpf_iter_num_destroy(&fp[-8]); 13: return 0; W/o a fix that instructs verifier to ignore branches count for loop entries verification proceeds as follows: - 1-4, state is {r6=-32,fp-8=active}; - 6, checkpoint A is created with {r6=-32,fp-8=active}; - 7, checkpoint B is created with {r6=-32,fp-8=active}, push state {r6=-32,fp-8=active} from 7 to 9; - 8,12,13, {r6=-32,fp-8=drained}, exit; - pop state with {r6=-32,fp-8=active} from 7 to 9; - 9, push state {r6=-32,fp-8=active} from 9 to 10; - 6, checkpoint C is created with {r6=-32,fp-8=active}; - 7, checkpoint A is hit, no precision propagated for r6 to C; - pop state {r6=-32,fp-8=active} from 9 to 10; - 10, state is {r6=-31,fp-8=active}, r6 is marked as read and precise, these marks are propagated to checkpoints A and B (but not C, as it is not the parent of current state; - 6, {r6=-31,fp-8=active} checkpoint C is hit, because r6 is not marked precise for this checkpoint; - the program is accepted, despite a possibility of unaligned u64 stack access at offset -31. The test case absent_mark_in_the_middle_state2 is similar except the following change: r8 = bpf_get_prandom_u32(); r6 = -32; bpf_iter_num_new(&fp[-8], 0, 10); if (unlikely(bpf_get_prandom_u32())) { r6 = -31; + jump_into_loop: + goto +0; + goto loop; + } + if (unlikely(bpf_get_prandom_u32())) + goto jump_into_loop; + loop: for (;;) { if (!bpf_iter_num_next(&fp[-8])) break; if (unlikely(bpf_get_prandom_u32())) *(u64 *)(fp + r6) = 7; } bpf_iter_num_destroy(&fp[-8]) return 0 The goal is to check that read/precision marks are propagated to checkpoint created at 'goto +0' that resides outside of the loop. The test case absent_mark_in_the_middle_state3 is a bit different and is equivalent to the C program below: int absent_mark_in_the_middle_state3(void) { bpf_iter_num_new(&fp[-8], 0, 10) loop1(-32, &fp[-8]) loop1_wrapper(&fp[-8]) bpf_iter_num_destroy(&fp[-8]) } int loop1(num, iter) { while (bpf_iter_num_next(iter)) { if (unlikely(bpf_get_prandom_u32())) *(fp + num) = 7; } return 0 } int loop1_wrapper(iter) { r6 = -32; if (unlikely(bpf_get_prandom_u32())) r6 = -31; loop1(r6, iter); return 0; } The unsafe state is reached in a similar manner, but the loop is located inside a subprogram that is called from two locations in the main subprogram. This detail is important for exercising bpf_scc_visit->backedges memory management. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250611200836.4135542-11-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/iters.c | 277 ++++++++++++++++++++++++++++++ 1 file changed, 277 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 76adf4a8f2da..7dd92a303bf6 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -1649,4 +1649,281 @@ int clean_live_states(const void *ctx) return 0; } +SEC("?raw_tp") +__flag(BPF_F_TEST_STATE_FREQ) +__failure __msg("misaligned stack access off 0+-31+0 size 8") +__naked int absent_mark_in_the_middle_state(void) +{ + /* This is equivalent to C program below. + * + * r8 = bpf_get_prandom_u32(); + * r6 = -32; + * bpf_iter_num_new(&fp[-8], 0, 10); + * if (unlikely(bpf_get_prandom_u32())) + * r6 = -31; + * while (bpf_iter_num_next(&fp[-8])) { + * if (unlikely(bpf_get_prandom_u32())) + * *(fp + r6) = 7; + * } + * bpf_iter_num_destroy(&fp[-8]) + * return 0 + */ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r8 = r0;" + "r7 = 0;" + "r6 = -32;" + "r0 = 0;" + "*(u64 *)(r10 - 16) = r0;" + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto change_r6_%=;" + "loop_%=:" + "call noop;" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto loop_end_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto use_r6_%=;" + "goto loop_%=;" + "loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + "use_r6_%=:" + "r0 = r10;" + "r0 += r6;" + "r1 = 7;" + "*(u64 *)(r0 + 0) = r1;" + "goto loop_%=;" + "change_r6_%=:" + "r6 = -31;" + "goto loop_%=;" + : + : __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy), + __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + +__used __naked +static int noop(void) +{ + asm volatile ( + "r0 = 0;" + "exit;" + ); +} + +SEC("?raw_tp") +__flag(BPF_F_TEST_STATE_FREQ) +__failure __msg("misaligned stack access off 0+-31+0 size 8") +__naked int absent_mark_in_the_middle_state2(void) +{ + /* This is equivalent to C program below. + * + * r8 = bpf_get_prandom_u32(); + * r6 = -32; + * bpf_iter_num_new(&fp[-8], 0, 10); + * if (unlikely(bpf_get_prandom_u32())) { + * r6 = -31; + * jump_into_loop: + * goto +0; + * goto loop; + * } + * if (unlikely(bpf_get_prandom_u32())) + * goto jump_into_loop; + * loop: + * while (bpf_iter_num_next(&fp[-8])) { + * if (unlikely(bpf_get_prandom_u32())) + * *(fp + r6) = 7; + * } + * bpf_iter_num_destroy(&fp[-8]) + * return 0 + */ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r8 = r0;" + "r7 = 0;" + "r6 = -32;" + "r0 = 0;" + "*(u64 *)(r10 - 16) = r0;" + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto change_r6_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto jump_into_loop_%=;" + "loop_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto loop_end_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto use_r6_%=;" + "goto loop_%=;" + "loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + "use_r6_%=:" + "r0 = r10;" + "r0 += r6;" + "r1 = 7;" + "*(u64 *)(r0 + 0) = r1;" + "goto loop_%=;" + "change_r6_%=:" + "r6 = -31;" + "jump_into_loop_%=: " + "goto +0;" + "goto loop_%=;" + : + : __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy), + __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + +SEC("?raw_tp") +__flag(BPF_F_TEST_STATE_FREQ) +__failure __msg("misaligned stack access off 0+-31+0 size 8") +__naked int absent_mark_in_the_middle_state3(void) +{ + /* + * bpf_iter_num_new(&fp[-8], 0, 10) + * loop1(-32, &fp[-8]) + * loop1_wrapper(&fp[-8]) + * bpf_iter_num_destroy(&fp[-8]) + */ + asm volatile ( + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + /* call #1 */ + "r1 = -32;" + "r2 = r10;" + "r2 += -8;" + "call loop1;" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + /* call #2 */ + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "r1 = r10;" + "r1 += -8;" + "call loop1_wrapper;" + /* return */ + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + : + : __imm(bpf_iter_num_new), + __imm(bpf_iter_num_destroy), + __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + +__used __naked +static int loop1(void) +{ + /* + * int loop1(num, iter) { + * r6 = num; + * r7 = iter; + * while (bpf_iter_num_next(r7)) { + * if (unlikely(bpf_get_prandom_u32())) + * *(fp + r6) = 7; + * } + * return 0 + * } + */ + asm volatile ( + "r6 = r1;" + "r7 = r2;" + "call %[bpf_get_prandom_u32];" + "r8 = r0;" + "loop_%=:" + "r1 = r7;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto loop_end_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto use_r6_%=;" + "goto loop_%=;" + "loop_end_%=:" + "r0 = 0;" + "exit;" + "use_r6_%=:" + "r0 = r10;" + "r0 += r6;" + "r1 = 7;" + "*(u64 *)(r0 + 0) = r1;" + "goto loop_%=;" + : + : __imm(bpf_iter_num_next), + __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + +__used __naked +static int loop1_wrapper(void) +{ + /* + * int loop1_wrapper(iter) { + * r6 = -32; + * r7 = iter; + * if (unlikely(bpf_get_prandom_u32())) + * r6 = -31; + * loop1(r6, r7); + * return 0; + * } + */ + asm volatile ( + "r6 = -32;" + "r7 = r1;" + "call %[bpf_get_prandom_u32];" + "r8 = r0;" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto change_r6_%=;" + "loop_%=:" + "r1 = r6;" + "r2 = r7;" + "call loop1;" + "r0 = 0;" + "exit;" + "change_r6_%=:" + "r6 = -31;" + "goto loop_%=;" + : + : __imm(bpf_iter_num_next), + __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 6a4bd31f680a1d1cf06492fe6dc4f08da09769e6 Mon Sep 17 00:00:00 2001 From: Fushuai Wang Date: Thu, 12 Jun 2025 16:42:08 +0800 Subject: selftests/bpf: fix signedness bug in redir_partial() When xsend() returns -1 (error), the check 'n < sizeof(buf)' incorrectly treats it as success due to unsigned promotion. Explicitly check for -1 first. Fixes: a4b7193d8efd ("selftests/bpf: Add sockmap test for redirecting partial skb data") Signed-off-by: Fushuai Wang Link: https://lore.kernel.org/r/20250612084208.27722-1-wangfushuai@baidu.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/sockmap_listen.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 1d98eee7a2c3..f1bdccc7e4e7 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -924,6 +924,8 @@ static void redir_partial(int family, int sotype, int sock_map, int parser_map) goto close; n = xsend(c1, buf, sizeof(buf), 0); + if (n == -1) + goto close; if (n < sizeof(buf)) FAIL("incomplete write"); -- cgit v1.2.3 From 221dfdb2df90b0e4df12371e6b549ca8ebba719d Mon Sep 17 00:00:00 2001 From: Ankit Chauhan Date: Tue, 10 Jun 2025 12:49:03 +0530 Subject: selftests: tcp_ao: fix spelling in seq-ext.c comment Spelling fix: conneciton --> connection This is a non-functional change aimed at improving code clarity. Signed-off-by: Ankit Chauhan Link: https://patch.msgid.link/20250610071903.67180-1-ankitchauhan2065@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tcp_ao/seq-ext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c index f00245263b20..6478da6a71c3 100644 --- a/tools/testing/selftests/net/tcp_ao/seq-ext.c +++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Check that after SEQ number wrap-around: * 1. SEQ-extension has upper bytes set - * 2. TCP conneciton is alive and no TCPAOBad segments + * 2. TCP connection is alive and no TCPAOBad segments * In order to test (2), the test doesn't just adjust seq number for a queue * on a connected socket, but migrates it to another sk+port number, so * that there won't be any delayed packets that will fail to verify -- cgit v1.2.3 From 4fc012daf9c074772421c904357abf586336b1ca Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 11 Jun 2025 20:50:32 -0700 Subject: bpf: Fix an issue in bpf_prog_test_run_xdp when page size greater than 4K The bpf selftest xdp_adjust_tail/xdp_adjust_frags_tail_grow failed on arm64 with 64KB page: xdp_adjust_tail/xdp_adjust_frags_tail_grow:FAIL In bpf_prog_test_run_xdp(), the xdp->frame_sz is set to 4K, but later on when constructing frags, with 64K page size, the frag data_len could be more than 4K. This will cause problems in bpf_xdp_frags_increase_tail(). To fix the failure, the xdp->frame_sz is set to be PAGE_SIZE so kernel can test different page size properly. With the kernel change, the user space and bpf prog needs adjustment. Currently, the MAX_SKB_FRAGS default value is 17, so for 4K page, the maximum packet size will be less than 68K. To test 64K page, a bigger maximum packet size than 68K is desired. So two different functions are implemented for subtest xdp_adjust_frags_tail_grow. Depending on different page size, different data input/output sizes are used to adapt with different page size. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20250612035032.2207498-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- net/bpf/test_run.c | 2 +- .../selftests/bpf/prog_tests/xdp_adjust_tail.c | 96 ++++++++++++++++++++-- .../bpf/progs/test_xdp_adjust_tail_grow.c | 8 +- 3 files changed, 97 insertions(+), 9 deletions(-) (limited to 'tools/testing/selftests') diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index aaf13a7d58ed..9728dbd4c66c 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -1255,7 +1255,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, headroom -= ctx->data; } - max_data_sz = 4096 - headroom - tailroom; + max_data_sz = PAGE_SIZE - headroom - tailroom; if (size > max_data_sz) { /* disallow live data mode for jumbo frames */ if (do_live) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c index e361129402a1..43264347e7d7 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c @@ -37,21 +37,26 @@ static void test_xdp_adjust_tail_shrink(void) bpf_object__close(obj); } -static void test_xdp_adjust_tail_grow(void) +static void test_xdp_adjust_tail_grow(bool is_64k_pagesize) { const char *file = "./test_xdp_adjust_tail_grow.bpf.o"; struct bpf_object *obj; - char buf[4096]; /* avoid segfault: large buf to hold grow results */ + char buf[8192]; /* avoid segfault: large buf to hold grow results */ __u32 expect_sz; int err, prog_fd; LIBBPF_OPTS(bpf_test_run_opts, topts, .data_in = &pkt_v4, - .data_size_in = sizeof(pkt_v4), .data_out = buf, .data_size_out = sizeof(buf), .repeat = 1, ); + /* topts.data_size_in as a special signal to bpf prog */ + if (is_64k_pagesize) + topts.data_size_in = sizeof(pkt_v4) - 1; + else + topts.data_size_in = sizeof(pkt_v4); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); if (!ASSERT_OK(err, "test_xdp_adjust_tail_grow")) return; @@ -208,7 +213,7 @@ out: bpf_object__close(obj); } -static void test_xdp_adjust_frags_tail_grow(void) +static void test_xdp_adjust_frags_tail_grow_4k(void) { const char *file = "./test_xdp_adjust_tail_grow.bpf.o"; __u32 exp_size; @@ -279,16 +284,93 @@ out: bpf_object__close(obj); } +static void test_xdp_adjust_frags_tail_grow_64k(void) +{ + const char *file = "./test_xdp_adjust_tail_grow.bpf.o"; + __u32 exp_size; + struct bpf_program *prog; + struct bpf_object *obj; + int err, i, prog_fd; + __u8 *buf; + LIBBPF_OPTS(bpf_test_run_opts, topts); + + obj = bpf_object__open(file); + if (libbpf_get_error(obj)) + return; + + prog = bpf_object__next_program(obj, NULL); + if (bpf_object__load(obj)) + goto out; + + prog_fd = bpf_program__fd(prog); + + buf = malloc(262144); + if (!ASSERT_OK_PTR(buf, "alloc buf 256Kb")) + goto out; + + /* Test case add 10 bytes to last frag */ + memset(buf, 1, 262144); + exp_size = 90000 + 10; + + topts.data_in = buf; + topts.data_out = buf; + topts.data_size_in = 90000; + topts.data_size_out = 262144; + err = bpf_prog_test_run_opts(prog_fd, &topts); + + ASSERT_OK(err, "90Kb+10b"); + ASSERT_EQ(topts.retval, XDP_TX, "90Kb+10b retval"); + ASSERT_EQ(topts.data_size_out, exp_size, "90Kb+10b size"); + + for (i = 0; i < 90000; i++) { + if (buf[i] != 1) + ASSERT_EQ(buf[i], 1, "90Kb+10b-old"); + } + + for (i = 90000; i < 90010; i++) { + if (buf[i] != 0) + ASSERT_EQ(buf[i], 0, "90Kb+10b-new"); + } + + for (i = 90010; i < 262144; i++) { + if (buf[i] != 1) + ASSERT_EQ(buf[i], 1, "90Kb+10b-untouched"); + } + + /* Test a too large grow */ + memset(buf, 1, 262144); + exp_size = 90001; + + topts.data_in = topts.data_out = buf; + topts.data_size_in = 90001; + topts.data_size_out = 262144; + err = bpf_prog_test_run_opts(prog_fd, &topts); + + ASSERT_OK(err, "90Kb+10b"); + ASSERT_EQ(topts.retval, XDP_DROP, "90Kb+10b retval"); + ASSERT_EQ(topts.data_size_out, exp_size, "90Kb+10b size"); + + free(buf); +out: + bpf_object__close(obj); +} + void test_xdp_adjust_tail(void) { + int page_size = getpagesize(); + if (test__start_subtest("xdp_adjust_tail_shrink")) test_xdp_adjust_tail_shrink(); if (test__start_subtest("xdp_adjust_tail_grow")) - test_xdp_adjust_tail_grow(); + test_xdp_adjust_tail_grow(page_size == 65536); if (test__start_subtest("xdp_adjust_tail_grow2")) test_xdp_adjust_tail_grow2(); if (test__start_subtest("xdp_adjust_frags_tail_shrink")) test_xdp_adjust_frags_tail_shrink(); - if (test__start_subtest("xdp_adjust_frags_tail_grow")) - test_xdp_adjust_frags_tail_grow(); + if (test__start_subtest("xdp_adjust_frags_tail_grow")) { + if (page_size == 65536) + test_xdp_adjust_frags_tail_grow_64k(); + else + test_xdp_adjust_frags_tail_grow_4k(); + } } diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c index dc74d8cf9e3f..5904f45cfbc4 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c @@ -19,7 +19,9 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp) /* Data length determine test case */ if (data_len == 54) { /* sizeof(pkt_v4) */ - offset = 4096; /* test too large offset */ + offset = 4096; /* test too large offset, 4k page size */ + } else if (data_len == 53) { /* sizeof(pkt_v4) - 1 */ + offset = 65536; /* test too large offset, 64k page size */ } else if (data_len == 74) { /* sizeof(pkt_v6) */ offset = 40; } else if (data_len == 64) { @@ -31,6 +33,10 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp) offset = 10; } else if (data_len == 9001) { offset = 4096; + } else if (data_len == 90000) { + offset = 10; /* test a small offset, 64k page size */ + } else if (data_len == 90001) { + offset = 65536; /* test too large offset, 64k page size */ } else { return XDP_ABORTED; /* No matching test */ } -- cgit v1.2.3 From 96fcf7e7a71c2b21c002e9ba9362d88f8beac09a Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 11 Jun 2025 20:50:37 -0700 Subject: selftests/bpf: Fix two net related test failures with 64K page size When running BPF selftests on arm64 with a 64K page size, I encountered the following two test failures: sockmap_basic/sockmap skb_verdict change tail:FAIL tc_change_tail:FAIL With further debugging, I identified the root cause in the following kernel code within __bpf_skb_change_tail(): u32 max_len = BPF_SKB_MAX_LEN; u32 min_len = __bpf_skb_min_len(skb); int ret; if (unlikely(flags || new_len > max_len || new_len < min_len)) return -EINVAL; With a 4K page size, new_len = 65535 and max_len = 16064, the function returns -EINVAL. However, With a 64K page size, max_len increases to 261824, allowing execution to proceed further in the function. This is because BPF_SKB_MAX_LEN scales with the page size and larger page sizes result in higher max_len values. Updating the new_len parameter in both tests based on actual kernel page size resolved both failures. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20250612035037.2207911-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/progs/test_sockmap_change_tail.c | 9 +++++++-- tools/testing/selftests/bpf/progs/test_tc_change_tail.c | 14 +++++++------- 2 files changed, 14 insertions(+), 9 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c index 2796dd8545eb..1c7941a4ad00 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c @@ -1,8 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2024 ByteDance */ -#include +#include "vmlinux.h" #include +#ifndef PAGE_SIZE +#define PAGE_SIZE __PAGE_SIZE +#endif +#define BPF_SKB_MAX_LEN (PAGE_SIZE << 2) + struct { __uint(type, BPF_MAP_TYPE_SOCKMAP); __uint(max_entries, 1); @@ -31,7 +36,7 @@ int prog_skb_verdict(struct __sk_buff *skb) change_tail_ret = bpf_skb_change_tail(skb, skb->len + 1, 0); return SK_PASS; } else if (data[0] == 'E') { /* Error */ - change_tail_ret = bpf_skb_change_tail(skb, 65535, 0); + change_tail_ret = bpf_skb_change_tail(skb, BPF_SKB_MAX_LEN, 0); return SK_PASS; } return SK_PASS; diff --git a/tools/testing/selftests/bpf/progs/test_tc_change_tail.c b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c index 28edafe803f0..fcba8299f0bc 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_change_tail.c +++ b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 -#include +#include "vmlinux.h" #include -#include -#include -#include -#include -#include + +#ifndef PAGE_SIZE +#define PAGE_SIZE __PAGE_SIZE +#endif +#define BPF_SKB_MAX_LEN (PAGE_SIZE << 2) long change_tail_ret = 1; @@ -94,7 +94,7 @@ int change_tail(struct __sk_buff *skb) bpf_skb_change_tail(skb, len, 0); return TCX_PASS; } else if (payload[0] == 'E') { /* Error */ - change_tail_ret = bpf_skb_change_tail(skb, 65535, 0); + change_tail_ret = bpf_skb_change_tail(skb, BPF_SKB_MAX_LEN, 0); return TCX_PASS; } else if (payload[0] == 'Z') { /* Zero */ change_tail_ret = bpf_skb_change_tail(skb, 0, 0); -- cgit v1.2.3 From 44df9e0d4eec45060ccee72217f56fe3178074a5 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 11 Jun 2025 20:50:42 -0700 Subject: selftests/bpf: Fix xdp_do_redirect failure with 64KB page size On arm64 with 64KB page size, the selftest xdp_do_redirect failed like below: ... test_xdp_do_redirect:PASS:pkt_count_tc 0 nsec test_max_pkt_size:PASS:prog_run_max_size 0 nsec test_max_pkt_size:FAIL:prog_run_too_big unexpected prog_run_too_big: actual -28 != expected -22 With 64KB page size, the xdp frame size will be much bigger so the existing test will fail. Adjust various parameters so the test can also work on 64K page size. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20250612035042.2208630-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c index 7dac044664ac..dd34b0cc4b4e 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c @@ -66,16 +66,25 @@ static int attach_tc_prog(struct bpf_tc_hook *hook, int fd) #else #define MAX_PKT_SIZE 3408 #endif + +#define PAGE_SIZE_4K 4096 +#define PAGE_SIZE_64K 65536 + static void test_max_pkt_size(int fd) { - char data[MAX_PKT_SIZE + 1] = {}; + char data[PAGE_SIZE_64K + 1] = {}; int err; DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = &data, - .data_size_in = MAX_PKT_SIZE, .flags = BPF_F_TEST_XDP_LIVE_FRAMES, .repeat = 1, ); + + if (getpagesize() == PAGE_SIZE_64K) + opts.data_size_in = MAX_PKT_SIZE + PAGE_SIZE_64K - PAGE_SIZE_4K; + else + opts.data_size_in = MAX_PKT_SIZE; + err = bpf_prog_test_run_opts(fd, &opts); ASSERT_OK(err, "prog_run_max_size"); -- cgit v1.2.3 From b89732c8c8357487185f260a723a060b3476144e Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Wed, 21 May 2025 17:04:28 +0200 Subject: selftests: Fix errno checking in syscall_user_dispatch test Successful syscalls don't change errno, so checking errno is wrong to ensure that a syscall has failed. For example for the following sequence: prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0xff, 0); EXPECT_EQ(EINVAL, errno); prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x0, &sel); EXPECT_EQ(EINVAL, errno); only the first syscall may fail and set errno, but the second may succeed and keep errno intact, and the check will falsely pass. Or if errno happened to be EINVAL before, even the first check may falsely pass. Also use EXPECT/ASSERT consistently. Currently there is an inconsistent mix without obvious reasons for usage of one or another. Fixes: 179ef035992e ("selftests: Add kselftest for syscall user dispatch") Signed-off-by: Dmitry Vyukov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/af6a04dbfef9af8570f5bab43e3ef1416b62699a.1747839857.git.dvyukov@google.com --- .../selftests/syscall_user_dispatch/sud_test.c | 50 +++++++++++----------- 1 file changed, 25 insertions(+), 25 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_test.c b/tools/testing/selftests/syscall_user_dispatch/sud_test.c index d975a6767329..48cf01aeec3e 100644 --- a/tools/testing/selftests/syscall_user_dispatch/sud_test.c +++ b/tools/testing/selftests/syscall_user_dispatch/sud_test.c @@ -79,6 +79,21 @@ TEST_SIGNAL(dispatch_trigger_sigsys, SIGSYS) } } +static void prctl_valid(struct __test_metadata *_metadata, + unsigned long op, unsigned long off, + unsigned long size, void *sel) +{ + EXPECT_EQ(0, prctl(PR_SET_SYSCALL_USER_DISPATCH, op, off, size, sel)); +} + +static void prctl_invalid(struct __test_metadata *_metadata, + unsigned long op, unsigned long off, + unsigned long size, void *sel, int err) +{ + EXPECT_EQ(-1, prctl(PR_SET_SYSCALL_USER_DISPATCH, op, off, size, sel)); + EXPECT_EQ(err, errno); +} + TEST(bad_prctl_param) { char sel = SYSCALL_DISPATCH_FILTER_ALLOW; @@ -86,57 +101,42 @@ TEST(bad_prctl_param) /* Invalid op */ op = -1; - prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0, 0, &sel); - ASSERT_EQ(EINVAL, errno); + prctl_invalid(_metadata, op, 0, 0, &sel, EINVAL); /* PR_SYS_DISPATCH_OFF */ op = PR_SYS_DISPATCH_OFF; /* offset != 0 */ - prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x1, 0x0, 0); - EXPECT_EQ(EINVAL, errno); + prctl_invalid(_metadata, op, 0x1, 0x0, 0, EINVAL); /* len != 0 */ - prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0xff, 0); - EXPECT_EQ(EINVAL, errno); + prctl_invalid(_metadata, op, 0x0, 0xff, 0, EINVAL); /* sel != NULL */ - prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x0, &sel); - EXPECT_EQ(EINVAL, errno); + prctl_invalid(_metadata, op, 0x0, 0x0, &sel, EINVAL); /* Valid parameter */ - errno = 0; - prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x0, 0x0); - EXPECT_EQ(0, errno); + prctl_valid(_metadata, op, 0x0, 0x0, 0x0); /* PR_SYS_DISPATCH_ON */ op = PR_SYS_DISPATCH_ON; /* Dispatcher region is bad (offset > 0 && len == 0) */ - prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x1, 0x0, &sel); - EXPECT_EQ(EINVAL, errno); - prctl(PR_SET_SYSCALL_USER_DISPATCH, op, -1L, 0x0, &sel); - EXPECT_EQ(EINVAL, errno); + prctl_invalid(_metadata, op, 0x1, 0x0, &sel, EINVAL); + prctl_invalid(_metadata, op, -1L, 0x0, &sel, EINVAL); /* Invalid selector */ - prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x1, (void *) -1); - ASSERT_EQ(EFAULT, errno); + prctl_invalid(_metadata, op, 0x0, 0x1, (void *) -1, EFAULT); /* * Dispatcher range overflows unsigned long */ - prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 1, -1L, &sel); - ASSERT_EQ(EINVAL, errno) { - TH_LOG("Should reject bad syscall range"); - } + prctl_invalid(_metadata, PR_SYS_DISPATCH_ON, 1, -1L, &sel, EINVAL); /* * Allowed range overflows usigned long */ - prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, -1L, 0x1, &sel); - ASSERT_EQ(EINVAL, errno) { - TH_LOG("Should reject bad syscall range"); - } + prctl_invalid(_metadata, PR_SYS_DISPATCH_ON, -1L, 0x1, &sel, EINVAL); } /* -- cgit v1.2.3 From b6a5a16b8b59476156dc6d6f73bffaf3a1707adb Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Wed, 21 May 2025 17:04:30 +0200 Subject: selftests: Add tests for PR_SYS_DISPATCH_INCLUSIVE_ON Add tests for PR_SYS_DISPATCH_INCLUSIVE_ON correct/incorrect args, and a test that ensures that the specified range is respected by both PR_SYS_DISPATCH_EXCLUSIVE_ON and PR_SYS_DISPATCH_INCLUSIVE_ON. Signed-off-by: Dmitry Vyukov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/8df4b50b176b073550bab5b3f3faff752c5f8e17.1747839857.git.dvyukov@google.com --- .../selftests/syscall_user_dispatch/sud_test.c | 94 +++++++++++++++++----- 1 file changed, 74 insertions(+), 20 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_test.c b/tools/testing/selftests/syscall_user_dispatch/sud_test.c index 48cf01aeec3e..2eb2c06303f2 100644 --- a/tools/testing/selftests/syscall_user_dispatch/sud_test.c +++ b/tools/testing/selftests/syscall_user_dispatch/sud_test.c @@ -10,6 +10,8 @@ #include #include #include +#include +#include #include #include "../kselftest_harness.h" @@ -17,11 +19,15 @@ #ifndef PR_SET_SYSCALL_USER_DISPATCH # define PR_SET_SYSCALL_USER_DISPATCH 59 # define PR_SYS_DISPATCH_OFF 0 -# define PR_SYS_DISPATCH_ON 1 # define SYSCALL_DISPATCH_FILTER_ALLOW 0 # define SYSCALL_DISPATCH_FILTER_BLOCK 1 #endif +#ifndef PR_SYS_DISPATCH_EXCLUSIVE_ON +# define PR_SYS_DISPATCH_EXCLUSIVE_ON 1 +# define PR_SYS_DISPATCH_INCLUSIVE_ON 2 +#endif + #ifndef SYS_USER_DISPATCH # define SYS_USER_DISPATCH 2 #endif @@ -65,7 +71,7 @@ TEST_SIGNAL(dispatch_trigger_sigsys, SIGSYS) ret = sysinfo(&info); ASSERT_EQ(0, ret); - ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &sel); + ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, 0, &sel); ASSERT_EQ(0, ret) { TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH"); } @@ -118,8 +124,8 @@ TEST(bad_prctl_param) /* Valid parameter */ prctl_valid(_metadata, op, 0x0, 0x0, 0x0); - /* PR_SYS_DISPATCH_ON */ - op = PR_SYS_DISPATCH_ON; + /* PR_SYS_DISPATCH_EXCLUSIVE_ON */ + op = PR_SYS_DISPATCH_EXCLUSIVE_ON; /* Dispatcher region is bad (offset > 0 && len == 0) */ prctl_invalid(_metadata, op, 0x1, 0x0, &sel, EINVAL); @@ -131,12 +137,24 @@ TEST(bad_prctl_param) /* * Dispatcher range overflows unsigned long */ - prctl_invalid(_metadata, PR_SYS_DISPATCH_ON, 1, -1L, &sel, EINVAL); + prctl_invalid(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, 1, -1L, &sel, EINVAL); /* * Allowed range overflows usigned long */ - prctl_invalid(_metadata, PR_SYS_DISPATCH_ON, -1L, 0x1, &sel, EINVAL); + prctl_invalid(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, -1L, 0x1, &sel, EINVAL); + + /* 0 len should fail for PR_SYS_DISPATCH_INCLUSIVE_ON */ + prctl_invalid(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, 1, 0, 0, EINVAL); + + /* Range wrap-around should fail */ + prctl_invalid(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, -1L, 2, 0, EINVAL); + + /* Normal range shouldn't fail */ + prctl_valid(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, 2, 3, 0); + + /* Invalid selector */ + prctl_invalid(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, 2, 3, (void *) -1, EFAULT); } /* @@ -147,11 +165,13 @@ char glob_sel; int nr_syscalls_emulated; int si_code; int si_errno; +unsigned long syscall_addr; static void handle_sigsys(int sig, siginfo_t *info, void *ucontext) { si_code = info->si_code; si_errno = info->si_errno; + syscall_addr = (unsigned long)info->si_call_addr; if (info->si_syscall == MAGIC_SYSCALL_1) nr_syscalls_emulated++; @@ -174,31 +194,34 @@ static void handle_sigsys(int sig, siginfo_t *info, void *ucontext) #endif } -TEST(dispatch_and_return) +int setup_sigsys_handler(void) { - long ret; struct sigaction act; sigset_t mask; - glob_sel = 0; - nr_syscalls_emulated = 0; - si_code = 0; - si_errno = 0; - memset(&act, 0, sizeof(act)); sigemptyset(&mask); - act.sa_sigaction = handle_sigsys; act.sa_flags = SA_SIGINFO; act.sa_mask = mask; + return sigaction(SIGSYS, &act, NULL); +} - ret = sigaction(SIGSYS, &act, NULL); - ASSERT_EQ(0, ret); +TEST(dispatch_and_return) +{ + long ret; + + glob_sel = 0; + nr_syscalls_emulated = 0; + si_code = 0; + si_errno = 0; + + ASSERT_EQ(0, setup_sigsys_handler()); /* Make sure selector is good prior to prctl. */ SYSCALL_DISPATCH_OFF(glob_sel); - ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &glob_sel); + ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, 0, &glob_sel); ASSERT_EQ(0, ret) { TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH"); } @@ -254,7 +277,7 @@ TEST_SIGNAL(bad_selector, SIGSYS) /* Make sure selector is good prior to prctl. */ SYSCALL_DISPATCH_OFF(glob_sel); - ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &glob_sel); + ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, 0, &glob_sel); ASSERT_EQ(0, ret) { TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH"); } @@ -278,7 +301,7 @@ TEST(disable_dispatch) struct sysinfo info; char sel = 0; - ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &sel); + ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, 0, &sel); ASSERT_EQ(0, ret) { TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH"); } @@ -310,7 +333,7 @@ TEST(direct_dispatch_range) * Instead of calculating libc addresses; allow the entire * memory map and lock the selector. */ - ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, -1L, &sel); + ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, -1L, &sel); ASSERT_EQ(0, ret) { TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH"); } @@ -323,4 +346,35 @@ TEST(direct_dispatch_range) } } +static void test_range(struct __test_metadata *_metadata, + unsigned long op, unsigned long off, + unsigned long size, bool dispatch) +{ + nr_syscalls_emulated = 0; + SYSCALL_DISPATCH_OFF(glob_sel); + EXPECT_EQ(0, prctl(PR_SET_SYSCALL_USER_DISPATCH, op, off, size, &glob_sel)); + SYSCALL_DISPATCH_ON(glob_sel); + if (dispatch) { + EXPECT_EQ(syscall(MAGIC_SYSCALL_1), MAGIC_SYSCALL_1); + EXPECT_EQ(nr_syscalls_emulated, 1); + } else { + EXPECT_EQ(syscall(MAGIC_SYSCALL_1), -1); + EXPECT_EQ(nr_syscalls_emulated, 0); + } +} + +TEST(dispatch_range) +{ + ASSERT_EQ(0, setup_sigsys_handler()); + test_range(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, 0, true); + test_range(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, syscall_addr, 1, false); + test_range(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, syscall_addr-100, 200, false); + test_range(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, syscall_addr+1, 100, true); + test_range(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, syscall_addr-100, 100, true); + test_range(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, syscall_addr, 1, true); + test_range(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, syscall_addr-1, 1, false); + test_range(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, syscall_addr+1, 1, false); + SYSCALL_DISPATCH_OFF(glob_sel); +} + TEST_HARNESS_MAIN -- cgit v1.2.3 From ccefa19335a0b81f11b0856e951ca909445b3783 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 12 Jun 2025 22:00:01 -0700 Subject: bpf/veristat: Fix veristat for map type BPF_MAP_TYPE_CGRP_STORAGE BPF_MAP_TYPE_CGRP_STORAGE doesn't allow non-zero max_entries. So veristat should not set it to 1. Signed-off-by: Song Liu Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250613050001.1058733-1-song@kernel.org --- tools/testing/selftests/bpf/veristat.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index b2bb20b00952..70cdabd88c50 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -1182,6 +1182,7 @@ static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const ch case BPF_MAP_TYPE_TASK_STORAGE: case BPF_MAP_TYPE_INODE_STORAGE: case BPF_MAP_TYPE_CGROUP_STORAGE: + case BPF_MAP_TYPE_CGRP_STORAGE: break; case BPF_MAP_TYPE_STRUCT_OPS: mask_unrelated_struct_ops_progs(obj, map, prog); -- cgit v1.2.3 From 67cdcc405b46c13446d6d220a108daa2f8de3436 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Fri, 13 Jun 2025 00:21:47 -0700 Subject: veristat: Memory accounting for bpf programs This commit adds a new field mem_peak / "Peak memory (MiB)" field to a set of gathered statistics. The field is intended as an estimate for peak verifier memory consumption for processing of a given program. Mechanically stat is collected as follows: - At the beginning of handle_verif_mode() a new cgroup is created and veristat process is moved into this cgroup. - At each program load: - bpf_object__load() is split into bpf_object__prepare() and bpf_object__load() to avoid accounting for memory allocated for maps; - before bpf_object__load(): - a write to "memory.peak" file of the new cgroup is used to reset cgroup statistics; - updated value is read from "memory.peak" file and stashed; - after bpf_object__load() "memory.peak" is read again and difference between new and stashed values is used as a metric. If any of the above steps fails veristat proceeds w/o collecting mem_peak information for a program, reporting mem_peak as -1. While memcg provides data in bytes (converted from pages), veristat converts it to megabytes to avoid jitter when comparing results of different executions. The change has no measurable impact on veristat running time. A correlation between "Peak states" and "Peak memory" fields provides a sanity check for gathered statistics, e.g. a sample of data for sched_ext programs: Program Peak states Peak memory (MiB) ------------------------ ----------- ----------------- lavd_select_cpu 2153 44 lavd_enqueue 1982 41 lavd_dispatch 3480 28 layered_dispatch 1417 17 layered_enqueue 760 11 lavd_cpu_offline 349 6 lavd_cpu_online 349 6 lavd_init 394 6 rusty_init 350 5 layered_select_cpu 391 4 ... rusty_stopping 134 1 arena_topology_node_init 170 0 Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250613072147.3938139-3-eddyz87@gmail.com --- tools/testing/selftests/bpf/Makefile | 5 + tools/testing/selftests/bpf/veristat.c | 248 ++++++++++++++++++++++++++++++++- 2 files changed, 246 insertions(+), 7 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index cf5ed3bee573..97013c49920b 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -841,6 +841,11 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ +# This works around GCC warning about snprintf truncating strings like: +# +# char a[PATH_MAX], b[PATH_MAX]; +# snprintf(a, "%s/foo", b); // triggers -Wformat-truncation +$(OUTPUT)/veristat.o: CFLAGS += -Wno-format-truncation $(OUTPUT)/veristat.o: $(BPFOBJ) $(OUTPUT)/veristat: $(OUTPUT)/veristat.o $(call msg,BINARY,,$@) diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 70cdabd88c50..56771650ee8c 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -49,6 +49,7 @@ enum stat_id { STACK, PROG_TYPE, ATTACH_TYPE, + MEMORY_PEAK, FILE_NAME, PROG_NAME, @@ -208,6 +209,9 @@ static struct env { int top_src_lines; struct var_preset *presets; int npresets; + char orig_cgroup[PATH_MAX]; + char stat_cgroup[PATH_MAX]; + int memory_peak_fd; } env; static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) @@ -219,6 +223,22 @@ static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va return vfprintf(stderr, format, args); } +#define log_errno(fmt, ...) log_errno_aux(__FILE__, __LINE__, fmt, ##__VA_ARGS__) + +__printf(3, 4) +static int log_errno_aux(const char *file, int line, const char *fmt, ...) +{ + int err = -errno; + va_list ap; + + va_start(ap, fmt); + fprintf(stderr, "%s:%d: ", file, line); + vfprintf(stderr, fmt, ap); + fprintf(stderr, " failed with error '%s'.\n", strerror(errno)); + va_end(ap); + return err; +} + #ifndef VERISTAT_VERSION #define VERISTAT_VERSION "" #endif @@ -734,13 +754,13 @@ cleanup: } static const struct stat_specs default_csv_output_spec = { - .spec_cnt = 14, + .spec_cnt = 15, .ids = { FILE_NAME, PROG_NAME, VERDICT, DURATION, TOTAL_INSNS, TOTAL_STATES, PEAK_STATES, MAX_STATES_PER_INSN, MARK_READ_MAX_LEN, SIZE, JITED_SIZE, PROG_TYPE, ATTACH_TYPE, - STACK, + STACK, MEMORY_PEAK, }, }; @@ -781,6 +801,7 @@ static struct stat_def { [STACK] = {"Stack depth", {"stack_depth", "stack"}, }, [PROG_TYPE] = { "Program type", {"prog_type"}, }, [ATTACH_TYPE] = { "Attach type", {"attach_type", }, }, + [MEMORY_PEAK] = { "Peak memory (MiB)", {"mem_peak", }, }, }; static bool parse_stat_id_var(const char *name, size_t len, int *id, @@ -1279,16 +1300,214 @@ static int max_verifier_log_size(void) return log_size; } +static bool output_stat_enabled(int id) +{ + int i; + + for (i = 0; i < env.output_spec.spec_cnt; i++) + if (env.output_spec.ids[i] == id) + return true; + return false; +} + +__printf(2, 3) +static int write_one_line(const char *file, const char *fmt, ...) +{ + int err, saved_errno; + va_list ap; + FILE *f; + + f = fopen(file, "w"); + if (!f) + return -1; + + va_start(ap, fmt); + errno = 0; + err = vfprintf(f, fmt, ap); + saved_errno = errno; + va_end(ap); + fclose(f); + errno = saved_errno; + return err < 0 ? -1 : 0; +} + +__scanf(3, 4) +static int scanf_one_line(const char *file, int fields_expected, const char *fmt, ...) +{ + int res = 0, saved_errno = 0; + char *line = NULL; + size_t line_len; + va_list ap; + FILE *f; + + f = fopen(file, "r"); + if (!f) + return -1; + + va_start(ap, fmt); + while (getline(&line, &line_len, f) > 0) { + res = vsscanf(line, fmt, ap); + if (res == fields_expected) + goto out; + } + if (ferror(f)) { + saved_errno = errno; + res = -1; + } + +out: + va_end(ap); + free(line); + fclose(f); + errno = saved_errno; + return res; +} + +static void destroy_stat_cgroup(void) +{ + char buf[PATH_MAX]; + int err; + + close(env.memory_peak_fd); + + if (env.orig_cgroup[0]) { + snprintf(buf, sizeof(buf), "%s/cgroup.procs", env.orig_cgroup); + err = write_one_line(buf, "%d\n", getpid()); + if (err < 0) + log_errno("moving self to original cgroup %s\n", env.orig_cgroup); + } + + if (env.stat_cgroup[0]) { + err = rmdir(env.stat_cgroup); + if (err < 0) + log_errno("deletion of cgroup %s", env.stat_cgroup); + } + + env.memory_peak_fd = -1; + env.orig_cgroup[0] = 0; + env.stat_cgroup[0] = 0; +} + +/* + * Creates a cgroup at /sys/fs/cgroup/veristat-accounting-, + * moves current process to this cgroup. + */ +static void create_stat_cgroup(void) +{ + char cgroup_fs_mount[4096]; + char buf[4096]; + int err; + + env.memory_peak_fd = -1; + + if (!output_stat_enabled(MEMORY_PEAK)) + return; + + err = scanf_one_line("/proc/self/mounts", 2, "%*s %4095s cgroup2 %s", + cgroup_fs_mount, buf); + if (err != 2) { + if (err < 0) + log_errno("reading /proc/self/mounts"); + else if (!env.quiet) + fprintf(stderr, "Can't find cgroupfs v2 mount point.\n"); + goto err_out; + } + + /* cgroup-v2.rst promises the line "0::" for cgroups v2 */ + err = scanf_one_line("/proc/self/cgroup", 1, "0::%4095s", buf); + if (err != 1) { + if (err < 0) + log_errno("reading /proc/self/cgroup"); + else if (!env.quiet) + fprintf(stderr, "Can't infer veristat process cgroup."); + goto err_out; + } + + snprintf(env.orig_cgroup, sizeof(env.orig_cgroup), "%s/%s", cgroup_fs_mount, buf); + + snprintf(buf, sizeof(buf), "%s/veristat-accounting-%d", cgroup_fs_mount, getpid()); + err = mkdir(buf, 0777); + if (err < 0) { + log_errno("creation of cgroup %s", buf); + goto err_out; + } + strcpy(env.stat_cgroup, buf); + + snprintf(buf, sizeof(buf), "%s/cgroup.procs", env.stat_cgroup); + err = write_one_line(buf, "%d\n", getpid()); + if (err < 0) { + log_errno("entering cgroup %s", buf); + goto err_out; + } + + snprintf(buf, sizeof(buf), "%s/memory.peak", env.stat_cgroup); + env.memory_peak_fd = open(buf, O_RDWR | O_APPEND); + if (env.memory_peak_fd < 0) { + log_errno("opening %s", buf); + goto err_out; + } + + return; + +err_out: + if (!env.quiet) + fprintf(stderr, "Memory usage metric unavailable.\n"); + destroy_stat_cgroup(); +} + +/* Current value of /sys/fs/cgroup/veristat-accounting-/memory.peak */ +static long cgroup_memory_peak(void) +{ + long err, memory_peak; + char buf[32]; + + if (env.memory_peak_fd < 0) + return -1; + + err = pread(env.memory_peak_fd, buf, sizeof(buf) - 1, 0); + if (err <= 0) { + log_errno("pread(%s/memory.peak)", env.stat_cgroup); + return -1; + } + + buf[err] = 0; + errno = 0; + memory_peak = strtoll(buf, NULL, 10); + if (errno) { + log_errno("%s/memory.peak:strtoll(%s)", env.stat_cgroup, buf); + return -1; + } + + return memory_peak; +} + +static int reset_stat_cgroup(void) +{ + char buf[] = "r\n"; + int err; + + if (env.memory_peak_fd < 0) + return -1; + + err = pwrite(env.memory_peak_fd, buf, sizeof(buf), 0); + if (err <= 0) { + log_errno("pwrite(%s/memory.peak)", env.stat_cgroup); + return -1; + } + return 0; +} + static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog) { const char *base_filename = basename(strdupa(filename)); const char *prog_name = bpf_program__name(prog); + long mem_peak_a, mem_peak_b, mem_peak = -1; char *buf; int buf_sz, log_level; struct verif_stats *stats; struct bpf_prog_info info; __u32 info_len = sizeof(info); - int err = 0; + int err = 0, cgroup_err; void *tmp; int fd; @@ -1333,7 +1552,15 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf if (env.force_reg_invariants) bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS); - err = bpf_object__load(obj); + err = bpf_object__prepare(obj); + if (!err) { + cgroup_err = reset_stat_cgroup(); + mem_peak_a = cgroup_memory_peak(); + err = bpf_object__load(obj); + mem_peak_b = cgroup_memory_peak(); + if (!cgroup_err && mem_peak_a >= 0 && mem_peak_b >= 0) + mem_peak = mem_peak_b - mem_peak_a; + } env.progs_processed++; stats->file_name = strdup(base_filename); @@ -1342,6 +1569,7 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf stats->stats[SIZE] = bpf_program__insn_cnt(prog); stats->stats[PROG_TYPE] = bpf_program__type(prog); stats->stats[ATTACH_TYPE] = bpf_program__expected_attach_type(prog); + stats->stats[MEMORY_PEAK] = mem_peak < 0 ? -1 : mem_peak / (1024 * 1024); memset(&info, 0, info_len); fd = bpf_program__fd(prog); @@ -1825,6 +2053,7 @@ static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2, case TOTAL_STATES: case PEAK_STATES: case MAX_STATES_PER_INSN: + case MEMORY_PEAK: case MARK_READ_MAX_LEN: { long v1 = s1->stats[id]; long v2 = s2->stats[id]; @@ -2054,6 +2283,7 @@ static void prepare_value(const struct verif_stats *s, enum stat_id id, case STACK: case SIZE: case JITED_SIZE: + case MEMORY_PEAK: *val = s ? s->stats[id] : 0; break; default: @@ -2140,6 +2370,7 @@ static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats case MARK_READ_MAX_LEN: case SIZE: case JITED_SIZE: + case MEMORY_PEAK: case STACK: { long val; int err, n; @@ -2777,7 +3008,7 @@ static void output_prog_stats(void) static int handle_verif_mode(void) { - int i, err; + int i, err = 0; if (env.filename_cnt == 0) { fprintf(stderr, "Please provide path to BPF object file!\n\n"); @@ -2785,11 +3016,12 @@ static int handle_verif_mode(void) return -EINVAL; } + create_stat_cgroup(); for (i = 0; i < env.filename_cnt; i++) { err = process_obj(env.filenames[i]); if (err) { fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err); - return err; + goto out; } } @@ -2797,7 +3029,9 @@ static int handle_verif_mode(void) output_prog_stats(); - return 0; +out: + destroy_stat_cgroup(); + return err; } static int handle_replay_mode(void) -- cgit v1.2.3 From 4a4b84ba9e453295c746d81cb245c0c5d80050f0 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Fri, 13 Jun 2025 10:53:31 -0700 Subject: selftests/bpf: verify jset handling in CFG computation A test case to check if both branches of jset are explored when computing program CFG. At 'if r1 & 0x7 ...': - register 'r2' is computed alive only if jump branch of jset instruction is followed; - register 'r0' is computed alive only if fallthrough branch of jset instruction is followed. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250613175331.3238739-2-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/progs/compute_live_registers.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/compute_live_registers.c b/tools/testing/selftests/bpf/progs/compute_live_registers.c index f3d79aecbf93..6884ab99a421 100644 --- a/tools/testing/selftests/bpf/progs/compute_live_registers.c +++ b/tools/testing/selftests/bpf/progs/compute_live_registers.c @@ -240,6 +240,22 @@ __naked void if2(void) ::: __clobber_all); } +/* Verifier misses that r2 is alive if jset is not handled properly */ +SEC("socket") +__log_level(2) +__msg("2: 012....... (45) if r1 & 0x7 goto pc+1") +__naked void if3_jset_bug(void) +{ + asm volatile ( + "r0 = 1;" + "r2 = 2;" + "if r1 & 0x7 goto +1;" + "exit;" + "r0 = r2;" + "exit;" + ::: __clobber_all); +} + SEC("socket") __log_level(2) __msg("0: .......... (b7) r1 = 0") -- cgit v1.2.3 From 8a25350fa430a28d0595a6d14af661d4f151b123 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:22 +0200 Subject: selftests/coredump: make sure invalid paths are rejected Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-8-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- tools/testing/selftests/coredump/stackdump_test.c | 32 ++++++++++++++++++----- 1 file changed, 26 insertions(+), 6 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c index 9a789156f27e..a4ac80bb1003 100644 --- a/tools/testing/selftests/coredump/stackdump_test.c +++ b/tools/testing/selftests/coredump/stackdump_test.c @@ -241,16 +241,19 @@ out: static bool set_core_pattern(const char *pattern) { - FILE *file; - int ret; + int fd; + ssize_t ret; - file = fopen("/proc/sys/kernel/core_pattern", "w"); - if (!file) + fd = open("/proc/sys/kernel/core_pattern", O_WRONLY | O_CLOEXEC); + if (fd < 0) return false; - ret = fprintf(file, "%s", pattern); - fclose(file); + ret = write(fd, pattern, strlen(pattern)); + close(fd); + if (ret < 0) + return false; + fprintf(stderr, "Set core_pattern to '%s' | %zu == %zu\n", pattern, ret, strlen(pattern)); return ret == strlen(pattern); } @@ -1804,4 +1807,21 @@ out: wait_and_check_coredump_server(pid_coredump_server, _metadata, self); } +TEST_F(coredump, socket_invalid_paths) +{ + ASSERT_FALSE(set_core_pattern("@ /tmp/coredump.socket")); + ASSERT_FALSE(set_core_pattern("@/tmp/../coredump.socket")); + ASSERT_FALSE(set_core_pattern("@../coredump.socket")); + ASSERT_FALSE(set_core_pattern("@/tmp/coredump.socket/..")); + ASSERT_FALSE(set_core_pattern("@..")); + + ASSERT_FALSE(set_core_pattern("@@ /tmp/coredump.socket")); + ASSERT_FALSE(set_core_pattern("@@/tmp/../coredump.socket")); + ASSERT_FALSE(set_core_pattern("@@../coredump.socket")); + ASSERT_FALSE(set_core_pattern("@@/tmp/coredump.socket/..")); + ASSERT_FALSE(set_core_pattern("@@..")); + + ASSERT_FALSE(set_core_pattern("@@@/tmp/coredump.socket")); +} + TEST_HARNESS_MAIN -- cgit v1.2.3 From de74998c3008a2e0af97a918dfbb7560ecb79ee4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 13 Jun 2025 06:41:36 +0000 Subject: selftests/tc-testing: sfq: check perturb timer values Add one test to check that the kernel rejects a negative perturb timer. Add a second test checking that the kernel rejects a too big perturb timer. All test results: 1..2 ok 1 cdc1 - Check that a negative perturb timer is rejected ok 2 a9f0 - Check that a too big perturb timer is rejected Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Reviewed-by: Cong Wang Link: https://patch.msgid.link/20250613064136.3911944-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- .../selftests/tc-testing/tc-tests/qdiscs/sfq.json | 36 ++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json index 28c6ce6da7db..531a2f6e4900 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json @@ -264,5 +264,41 @@ "matchPattern": "sfq", "matchCount": "0", "teardown": [] + }, + { + "id": "cdc1", + "name": "Check that a negative perturb timer is rejected", + "category": [ + "qdisc", + "sfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq perturb -10", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "sfq", + "matchCount": "0", + "teardown": [] + }, + { + "id": "a9f0", + "name": "Check that a too big perturb timer is rejected", + "category": [ + "qdisc", + "sfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq perturb 1000000000", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "sfq", + "matchCount": "0", + "teardown": [] } ] -- cgit v1.2.3 From bed365ca56cadeabb4f0484dc9e12b3c78ac0ab7 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 13 Jun 2025 04:31:36 -0700 Subject: selftests: net: Refactor cleanup logic in lib_netcons.sh Extract the network device and namespace cleanup logic from the cleanup() function into a new do_cleanup() helper in lib_netcons.sh. The do_cleanup() function only unconfigure the network and printk, while cleanup() cleans the netconsole targets plus the network and printk. This refactoring let this code to be reused in cases netconsole dynamic is not being used, as in the upcoming patch. Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20250613-rework-v3-7-0752bf2e6912@debian.org Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/lib/sh/lib_netcons.sh | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index 71a5a8b1712c..598279139a6e 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -128,16 +128,9 @@ function disable_release_append() { echo 1 > "${NETCONS_PATH}"/enabled } -function cleanup() { +function do_cleanup() { local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device" - # delete netconsole dynamic reconfiguration - echo 0 > "${NETCONS_PATH}"/enabled - # Remove all the keys that got created during the selftest - find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete - # Remove the configfs entry - rmdir "${NETCONS_PATH}" - # Delete netdevsim devices echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_DEL" echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_DEL" @@ -149,6 +142,17 @@ function cleanup() { echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk } +function cleanup() { + # delete netconsole dynamic reconfiguration + echo 0 > "${NETCONS_PATH}"/enabled + # Remove all the keys that got created during the selftest + find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete + # Remove the configfs entry + rmdir "${NETCONS_PATH}" + + do_cleanup +} + function set_user_data() { if [[ ! -d "${NETCONS_PATH}""/userdata" ]] then -- cgit v1.2.3 From 69d094ef69b9548c89b8e32d24e3d9fc19731a26 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 13 Jun 2025 04:31:37 -0700 Subject: selftests: net: add netconsole test for cmdline configuration Add a new selftest to verify netconsole module loading with command line arguments. This test exercises the init_netconsole() path and validates proper parsing of the netconsole= parameter format. The test: - Loads netconsole module with cmdline configuration instead of dynamic reconfiguration - Validates message transmission through the configured target - Adds helper functions for cmdline string generation and module validation This complements existing netconsole selftests by covering the module initialization code path that processes boot-time parameters. This test is useful to test issues like the one described in [1]. Link: https://lore.kernel.org/netdev/Z36TlACdNMwFD7wv@dev-ushankar.dev.purestorage.com/ [1] Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20250613-rework-v3-8-0752bf2e6912@debian.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/Makefile | 1 + .../selftests/drivers/net/lib/sh/lib_netcons.sh | 39 +++++++++++++--- .../selftests/drivers/net/netcons_cmdline.sh | 52 ++++++++++++++++++++++ 3 files changed, 86 insertions(+), 6 deletions(-) create mode 100755 tools/testing/selftests/drivers/net/netcons_cmdline.sh (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index be780bcb73a3..bd309b2d3909 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -12,6 +12,7 @@ TEST_GEN_FILES := \ TEST_PROGS := \ napi_id.py \ netcons_basic.sh \ + netcons_cmdline.sh \ netcons_fragmented_msg.sh \ netcons_overflow.sh \ netcons_sysdata.sh \ diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index 598279139a6e..3fcf85a34596 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -121,6 +121,17 @@ function create_dynamic_target() { echo 1 > "${NETCONS_PATH}"/enabled } +# Generate the command line argument for netconsole following: +# netconsole=[+][src-port]@[src-ip]/[],[tgt-port]@/[tgt-macaddr] +function create_cmdline_str() { + DSTMAC=$(ip netns exec "${NAMESPACE}" \ + ip link show "${DSTIF}" | awk '/ether/ {print $2}') + SRCPORT="1514" + TGTPORT="6666" + + echo "netconsole=\"+${SRCPORT}@${SRCIP}/${SRCIF},${TGTPORT}@${DSTIP}/${DSTMAC}\"" +} + # Do not append the release to the header of the message function disable_release_append() { echo 0 > "${NETCONS_PATH}"/enabled @@ -173,13 +184,9 @@ function listen_port_and_save_to() { socat UDP-LISTEN:"${PORT}",fork "${OUTPUT}" } -function validate_result() { +# Only validate that the message arrived properly +function validate_msg() { local TMPFILENAME="$1" - local FORMAT=${2:-"extended"} - - # TMPFILENAME will contain something like: - # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM - # key=value # Check if the file exists if [ ! -f "$TMPFILENAME" ]; then @@ -192,6 +199,17 @@ function validate_result() { cat "${TMPFILENAME}" >&2 exit "${ksft_fail}" fi +} + +# Validate the message and userdata +function validate_result() { + local TMPFILENAME="$1" + + # TMPFILENAME will contain something like: + # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM + # key=value + + validate_msg "${TMPFILENAME}" # userdata is not supported on basic format target, # thus, do not validate it. @@ -267,3 +285,12 @@ function pkill_socat() { pkill -f "${PROCESS_NAME}" set -e } + +# Check if netconsole was compiled as a module, otherwise exit +function check_netconsole_module() { + if modinfo netconsole | grep filename: | grep -q builtin + then + echo "SKIP: netconsole should be compiled as a module" >&2 + exit "${ksft_skip}" + fi +} diff --git a/tools/testing/selftests/drivers/net/netcons_cmdline.sh b/tools/testing/selftests/drivers/net/netcons_cmdline.sh new file mode 100755 index 000000000000..ad2fb8b1c463 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netcons_cmdline.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# This is a selftest to test cmdline arguments on netconsole. +# It exercises loading of netconsole from cmdline instead of the dynamic +# reconfiguration. This includes parsing the long netconsole= line and all the +# flow through init_netconsole(). +# +# Author: Breno Leitao + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh + +check_netconsole_module + +modprobe netdevsim 2> /dev/null || true +rmmod netconsole 2> /dev/null || true + +# The content of kmsg will be save to the following file +OUTPUT_FILE="/tmp/${TARGET}" + +# Check for basic system dependency and exit if not found +# check_for_dependencies +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace and network interfaces +trap do_cleanup EXIT +# Create one namespace and two interfaces +set_network +# Create the command line for netconsole, with the configuration from the +# function above +CMDLINE="$(create_cmdline_str)" + +# Load the module, with the cmdline set +modprobe netconsole "${CMDLINE}" + +# Listed for netconsole port inside the namespace and destination interface +listen_port_and_save_to "${OUTPUT_FILE}" & +# Wait for socat to start and listen to the port. +wait_local_port_listen "${NAMESPACE}" "${PORT}" udp +# Send the message +echo "${MSG}: ${TARGET}" > /dev/kmsg +# Wait until socat saves the file to disk +busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" +# Make sure the message was received in the dst part +# and exit +validate_msg "${OUTPUT_FILE}" + +exit "${ksft_pass}" -- cgit v1.2.3 From 04d752d60c190e754cfe4f95a7451afeb752adbd Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 12 Jun 2025 15:23:23 +0300 Subject: selftests: seg6: Add test cases for End.X with link-local nexthop In the current test topology, all the routers are connected to each other via dedicated links with addresses of the form fcf0:0:x:y::/64. The test configures rt-3 with an adjacency with rt-4 and rt-4 with an adjacency with rt-1: # ip -n rt_3-IgWSBJ -6 route show tab 90 fcbb:0:300::/48 fcbb:0:300::/48 encap seg6local action End.X nh6 fcf0:0:3:4::4 flavors next-csid lblen 32 nflen 16 dev dum0 metric 1024 pref medium # ip -n rt_4-JdCunK -6 route show tab 90 fcbb:0:400::/48 fcbb:0:400::/48 encap seg6local action End.X nh6 fcf0:0:1:4::1 flavors next-csid lblen 32 nflen 16 dev dum0 metric 1024 pref medium The routes are used when pinging hs-2 from hs-1 and vice-versa. Extend the test to also cover End.X behavior with an IPv6 link-local nexthop address and an output interface. Configure every router interface with an IPv6 link-local address of the form fe80::x:y/64 and before re-running the ping tests, replace the previous End.X routes with routes that use the new IPv6 link-local addresses: # ip -n rt_3-IgWSBJ -6 route show tab 90 fcbb:0:300::/48 fcbb:0:300::/48 encap seg6local action End.X nh6 fe80::4:3 oif veth-rt-3-4 flavors next-csid lblen 32 nflen 16 dev dum0 metric 1024 pref medium # ip -n rt_4-JdCunK -6 route show tab 90 fcbb:0:400::/48 fcbb:0:400::/48 encap seg6local action End.X nh6 fe80::1:4 oif veth-rt-4-1 flavors next-csid lblen 32 nflen 16 dev dum0 metric 1024 pref medium The new test cases fail without the previous patch ("seg6: Allow End.X behavior to accept an oif"): # ./srv6_end_x_next_csid_l3vpn_test.sh [...] ################################################################################ TEST SECTION: SRv6 VPN connectivity test hosts (h1 <-> h2, IPv6), link-local ################################################################################ TEST: IPv6 Hosts connectivity: hs-1 -> hs-2 [FAIL] TEST: IPv6 Hosts connectivity: hs-2 -> hs-1 [FAIL] ################################################################################ TEST SECTION: SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4), link-local ################################################################################ TEST: IPv4 Hosts connectivity: hs-1 -> hs-2 [FAIL] TEST: IPv4 Hosts connectivity: hs-2 -> hs-1 [FAIL] Tests passed: 40 Tests failed: 4 And pass with it: # ./srv6_end_x_next_csid_l3vpn_test.sh [...] ################################################################################ TEST SECTION: SRv6 VPN connectivity test hosts (h1 <-> h2, IPv6), link-local ################################################################################ TEST: IPv6 Hosts connectivity: hs-1 -> hs-2 [ OK ] TEST: IPv6 Hosts connectivity: hs-2 -> hs-1 [ OK ] ################################################################################ TEST SECTION: SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4), link-local ################################################################################ TEST: IPv4 Hosts connectivity: hs-1 -> hs-2 [ OK ] TEST: IPv4 Hosts connectivity: hs-2 -> hs-1 [ OK ] Tests passed: 44 Tests failed: 0 Without the previous patch, rt-3 and rt-4 resolve the wrong routes for the link-local nexthops, with the output interface being the input interface: # perf script [...] ping 1067 [001] 37.554486: fib6:fib6_table_lookup: table 254 oif 0 iif 11 proto 41 cafe::254/0 -> fe80::4:3/0 flowlabel 0xb7973 tos 0 scope 0 flags 2 ==> dev veth-rt-3-1 gw :: err 0 [...] ping 1069 [002] 41.573360: fib6:fib6_table_lookup: table 254 oif 0 iif 12 proto 41 cafe::254/0 -> fe80::1:4/0 flowlabel 0xb7973 tos 0 scope 0 flags 2 ==> dev veth-rt-4-2 gw :: err 0 But the correct routes are resolved with the patch: # perf script [...] ping 1066 [006] 30.672355: fib6:fib6_table_lookup: table 254 oif 13 iif 1 proto 41 cafe::254/0 -> fe80::4:3/0 flowlabel 0x85941 tos 0 scope 0 flags 6 ==> dev veth-rt-3-4 gw :: err 0 [...] ping 1066 [006] 30.672411: fib6:fib6_table_lookup: table 254 oif 11 iif 1 proto 41 cafe::254/0 -> fe80::1:4/0 flowlabel 0x91de0 tos 0 scope 0 flags 6 ==> dev veth-rt-4-1 gw :: err 0 Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Reviewed-by: Andrea Mayer Link: https://patch.msgid.link/20250612122323.584113-5-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/srv6_end_x_next_csid_l3vpn_test.sh | 48 ++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh index 4b86040c58c6..bedf0ce885c2 100755 --- a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh @@ -72,6 +72,9 @@ # Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y in # the selftest network. # +# In addition, every router interface connecting rt-x to rt-y is assigned an +# IPv6 link-local address fe80::x:y/64. +# # Local SID/C-SID table # ===================== # @@ -521,6 +524,9 @@ setup_rt_networking() ip -netns "${nsname}" addr \ add "${net_prefix}::${rt}/64" dev "${devname}" nodad + ip -netns "${nsname}" addr \ + add "fe80::${rt}:${neigh}/64" dev "${devname}" nodad + ip -netns "${nsname}" link set "${devname}" up done @@ -609,6 +615,27 @@ set_end_x_nextcsid() nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}" } +set_end_x_ll_nextcsid() +{ + local rt="$1" + local adj="$2" + + eval nsname=\${$(get_rtname "${rt}")} + lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")" + nh6_ll_addr="fe80::${adj}:${rt}" + oifname="veth-rt-${rt}-${adj}" + + # enabled NEXT-C-SID SRv6 End.X behavior via an IPv6 link-local nexthop + # address (note that "dev" is the dummy dum0 device chosen for the sake + # of simplicity). + ip -netns "${nsname}" -6 route \ + replace "${lcnode_func_prefix}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End.X nh6 "${nh6_ll_addr}" \ + oif "${oifname}" flavors next-csid lblen "${LCBLOCK_BLEN}" \ + nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}" +} + set_underlay_sids_reachability() { local rt="$1" @@ -1016,6 +1043,27 @@ host_vpn_tests() check_and_log_hs_ipv4_connectivity 1 2 check_and_log_hs_ipv4_connectivity 2 1 + + # Setup the adjacencies in the SRv6 aware routers using IPv6 link-local + # addresses. + # - rt-3 SRv6 End.X adjacency with rt-4 + # - rt-4 SRv6 End.X adjacency with rt-1 + set_end_x_ll_nextcsid 3 4 + set_end_x_ll_nextcsid 4 1 + + log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv6), link-local" + + check_and_log_hs_ipv6_connectivity 1 2 + check_and_log_hs_ipv6_connectivity 2 1 + + log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4), link-local" + + check_and_log_hs_ipv4_connectivity 1 2 + check_and_log_hs_ipv4_connectivity 2 1 + + # Restore the previous adjacencies. + set_end_x_nextcsid 3 4 + set_end_x_nextcsid 4 1 } __nextcsid_end_x_behavior_test() -- cgit v1.2.3 From a766cfbbeb3a74397965a8fa2e9a402026d3e1d8 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 12 Jun 2025 22:28:56 -0700 Subject: bpf: Mark dentry->d_inode as trusted_or_null LSM hooks such as security_path_mknod() and security_inode_rename() have access to newly allocated negative dentry, which has NULL d_inode. Therefore, it is necessary to do the NULL pointer check for d_inode. Also add selftests that checks the verifier enforces the NULL pointer check. Signed-off-by: Song Liu Reviewed-by: Matt Bobrowski Link: https://lore.kernel.org/r/20250613052857.1992233-1-song@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 5 ++--- .../testing/selftests/bpf/progs/verifier_vfs_accept.c | 18 ++++++++++++++++++ .../testing/selftests/bpf/progs/verifier_vfs_reject.c | 15 +++++++++++++++ 3 files changed, 35 insertions(+), 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a7d6e0c5928b..169845710c7e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7027,8 +7027,7 @@ BTF_TYPE_SAFE_TRUSTED(struct file) { struct inode *f_inode; }; -BTF_TYPE_SAFE_TRUSTED(struct dentry) { - /* no negative dentry-s in places where bpf can see it */ +BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry) { struct inode *d_inode; }; @@ -7066,7 +7065,6 @@ static bool type_is_trusted(struct bpf_verifier_env *env, BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task)); BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm)); BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file)); - BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct dentry)); return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted"); } @@ -7076,6 +7074,7 @@ static bool type_is_trusted_or_null(struct bpf_verifier_env *env, const char *field_name, u32 btf_id) { BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket)); + BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry)); return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted_or_null"); diff --git a/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c b/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c index a7c0a553aa50..3e2d76ee8050 100644 --- a/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c +++ b/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c @@ -2,6 +2,7 @@ /* Copyright (c) 2024 Google LLC. */ #include +#include #include #include @@ -82,4 +83,21 @@ int BPF_PROG(path_d_path_from_file_argument, struct file *file) return 0; } +SEC("lsm.s/inode_rename") +__success +int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + struct inode *inode = new_dentry->d_inode; + ino_t ino; + + if (!inode) + return 0; + ino = inode->i_ino; + if (ino == 0) + return -EACCES; + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c b/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c index d6d3f4fcb24c..4b392c6c8fc4 100644 --- a/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c +++ b/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c @@ -2,6 +2,7 @@ /* Copyright (c) 2024 Google LLC. */ #include +#include #include #include #include @@ -158,4 +159,18 @@ int BPF_PROG(path_d_path_kfunc_non_lsm, struct path *path, struct file *f) return 0; } +SEC("lsm.s/inode_rename") +__failure __msg("invalid mem access 'trusted_ptr_or_null_'") +int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + struct inode *inode = new_dentry->d_inode; + ino_t ino; + + ino = inode->i_ino; + if (ino == 0) + return -EACCES; + return 0; +} char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From a633dab4b4d2f06c0fcb3caa6f110efabdf889f9 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 16 Jun 2025 21:49:56 -0700 Subject: selftests/bpf: Fix RELEASE build failure with gcc14 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With gcc14, when building with RELEASE=1, I hit four below compilation failure: Error 1: In file included from test_loader.c:6: test_loader.c: In function ‘run_subtest’: test_progs.h:194:17: error: ‘retval’ may be used uninitialized in this function [-Werror=maybe-uninitialized] 194 | fprintf(stdout, ##format); \ | ^~~~~~~ test_loader.c:958:13: note: ‘retval’ was declared here 958 | int retval, err, i; | ^~~~~~ The uninitialized var 'retval' actually could cause incorrect result. Error 2: In function ‘test_fd_array_cnt’: prog_tests/fd_array.c:71:14: error: ‘btf_id’ may be used uninitialized in this function [-Werror=maybe-uninitialized] 71 | fd = bpf_btf_get_fd_by_id(id); | ^~~~~~~~~~~~~~~~~~~~~~~~ prog_tests/fd_array.c:302:15: note: ‘btf_id’ was declared here 302 | __u32 btf_id; | ^~~~~~ Changing ASSERT_GE to ASSERT_EQ can fix the compilation error. Otherwise, there is no functionality change. Error 3: prog_tests/tailcalls.c: In function ‘test_tailcall_hierarchy_count’: prog_tests/tailcalls.c:1402:23: error: ‘fentry_data_fd’ may be used uninitialized in this function [-Werror=maybe-uninitialized] 1402 | err = bpf_map_lookup_elem(fentry_data_fd, &i, &val); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The code is correct. The change intends to silence gcc errors. Error 4: (this error only happens on arm64) In file included from prog_tests/log_buf.c:4: prog_tests/log_buf.c: In function ‘bpf_prog_load_log_buf’: ./test_progs.h:390:22: error: ‘log_buf’ may be used uninitialized [-Werror=maybe-uninitialized] 390 | int ___err = libbpf_get_error(___res); \ | ^~~~~~~~~~~~~~~~~~~~~~~~ prog_tests/log_buf.c:158:14: note: in expansion of macro ‘ASSERT_OK_PTR’ 158 | if (!ASSERT_OK_PTR(log_buf, "log_buf_alloc")) | ^~~~~~~~~~~~~ In file included from selftests/bpf/tools/include/bpf/bpf.h:32, from ./test_progs.h:36: selftests/bpf/tools/include/bpf/libbpf_legacy.h:113:17: note: by argument 1 of type ‘const void *’ to ‘libbpf_get_error’ declared here 113 | LIBBPF_API long libbpf_get_error(const void *ptr); | ^~~~~~~~~~~~~~~~ Adding a pragma to disable maybe-uninitialized fixed the issue. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20250617044956.2686668-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/fd_array.c | 2 +- tools/testing/selftests/bpf/prog_tests/log_buf.c | 4 ++++ tools/testing/selftests/bpf/prog_tests/tailcalls.c | 2 +- tools/testing/selftests/bpf/test_loader.c | 6 +++--- 4 files changed, 9 insertions(+), 5 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/fd_array.c b/tools/testing/selftests/bpf/prog_tests/fd_array.c index 9add890c2d37..241b2c8c6e0f 100644 --- a/tools/testing/selftests/bpf/prog_tests/fd_array.c +++ b/tools/testing/selftests/bpf/prog_tests/fd_array.c @@ -312,7 +312,7 @@ static void check_fd_array_cnt__referenced_btfs(void) /* btf should still exist when original file descriptor is closed */ err = get_btf_id_by_fd(extra_fds[0], &btf_id); - if (!ASSERT_GE(err, 0, "get_btf_id_by_fd")) + if (!ASSERT_EQ(err, 0, "get_btf_id_by_fd")) goto cleanup; Close(extra_fds[0]); diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c index 169ce689b97c..d6f14a232002 100644 --- a/tools/testing/selftests/bpf/prog_tests/log_buf.c +++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c @@ -7,6 +7,10 @@ #include "test_log_buf.skel.h" #include "bpf_util.h" +#if !defined(__clang__) +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#endif + static size_t libbpf_log_pos; static char libbpf_log_buf[1024 * 1024]; static bool libbpf_log_error; diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index 66a900327f91..0ab36503c3b2 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -1195,7 +1195,7 @@ static void test_tailcall_hierarchy_count(const char *which, bool test_fentry, bool test_fexit, bool test_fentry_entry) { - int err, map_fd, prog_fd, main_data_fd, fentry_data_fd, fexit_data_fd, i, val; + int err, map_fd, prog_fd, main_data_fd, fentry_data_fd = 0, fexit_data_fd = 0, i, val; struct bpf_object *obj = NULL, *fentry_obj = NULL, *fexit_obj = NULL; struct bpf_link *fentry_link = NULL, *fexit_link = NULL; struct bpf_program *prog, *fentry_prog; diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 9551d8d5f8f9..2c7e9729d5fe 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -1103,9 +1103,9 @@ void run_subtest(struct test_loader *tester, } } - do_prog_test_run(bpf_program__fd(tprog), &retval, - bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false); - if (retval != subspec->retval && subspec->retval != POINTER_VALUE) { + err = do_prog_test_run(bpf_program__fd(tprog), &retval, + bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false); + if (!err && retval != subspec->retval && subspec->retval != POINTER_VALUE) { PRINT_FAIL("Unexpected retval: %d != %d\n", retval, subspec->retval); goto tobj_cleanup; } -- cgit v1.2.3 From 0925275a173d07786bfddf453f629f78d7fc4278 Mon Sep 17 00:00:00 2001 From: Michal Koutný Date: Tue, 17 Jun 2025 15:36:53 +0200 Subject: selftests: cgroup_util: Add helpers for testing named v1 hierarchies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Non-functional change, the control variable will be wired in a separate commit. Signed-off-by: Michal Koutný Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/lib/cgroup_util.c | 4 +++- tools/testing/selftests/cgroup/lib/include/cgroup_util.h | 5 +++++ tools/testing/selftests/cgroup/test_core.c | 6 +++--- 3 files changed, 11 insertions(+), 4 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/cgroup/lib/cgroup_util.c b/tools/testing/selftests/cgroup/lib/cgroup_util.c index 8832f3d1cb61..0e89fcff4d05 100644 --- a/tools/testing/selftests/cgroup/lib/cgroup_util.c +++ b/tools/testing/selftests/cgroup/lib/cgroup_util.c @@ -19,6 +19,8 @@ #include "cgroup_util.h" #include "../../clone3/clone3_selftests.h" +bool cg_test_v1_named; + /* Returns read len on success, or -errno on failure. */ ssize_t read_text(const char *path, char *buf, size_t max_len) { @@ -361,7 +363,7 @@ int cg_enter_current(const char *cgroup) int cg_enter_current_thread(const char *cgroup) { - return cg_write(cgroup, "cgroup.threads", "0"); + return cg_write(cgroup, CG_THREADS_FILE, "0"); } int cg_run(const char *cgroup, diff --git a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h index adb2bc193183..c69cab66254b 100644 --- a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h +++ b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h @@ -13,6 +13,10 @@ #define TEST_UID 65534 /* usually nobody, any !root is fine */ +#define CG_THREADS_FILE (!cg_test_v1_named ? "cgroup.threads" : "tasks") +#define CG_NAMED_NAME "selftest" +#define CG_PATH_FORMAT (!cg_test_v1_named ? "0::%s" : (":name=" CG_NAMED_NAME ":%s")) + /* * Checks if two given values differ by less than err% of their sum. */ @@ -65,3 +69,4 @@ extern int dirfd_open_opath(const char *dir); extern int cg_prepare_for_wait(const char *cgroup); extern int memcg_prepare_for_wait(const char *cgroup); extern int cg_wait_for(int fd); +extern bool cg_test_v1_named; diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index a5672a91d273..0c4cc4e5fc8c 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -573,7 +573,7 @@ static int test_cgcore_proc_migration(const char *root) } cg_enter_current(dst); - if (cg_read_lc(dst, "cgroup.threads") != n_threads + 1) + if (cg_read_lc(dst, CG_THREADS_FILE) != n_threads + 1) goto cleanup; ret = KSFT_PASS; @@ -605,7 +605,7 @@ static void *migrating_thread_fn(void *arg) char lines[3][PATH_MAX]; for (g = 1; g < 3; ++g) - snprintf(lines[g], sizeof(lines[g]), "0::%s", grps[g] + strlen(grps[0])); + snprintf(lines[g], sizeof(lines[g]), CG_PATH_FORMAT, grps[g] + strlen(grps[0])); for (i = 0; i < n_iterations; ++i) { cg_enter_current_thread(grps[(i % 2) + 1]); @@ -659,7 +659,7 @@ static int test_cgcore_thread_migration(const char *root) if (retval) goto cleanup; - snprintf(line, sizeof(line), "0::%s", grps[1] + strlen(grps[0])); + snprintf(line, sizeof(line), CG_PATH_FORMAT, grps[1] + strlen(grps[0])); if (proc_read_strstr(0, 1, "cgroup", line)) goto cleanup; -- cgit v1.2.3 From dd7588e455f847d3b0108d9981b1fcff4441f77b Mon Sep 17 00:00:00 2001 From: Michal Koutný Date: Tue, 17 Jun 2025 15:36:54 +0200 Subject: selftests: cgroup: Add support for named v1 hierarchies in test_core MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This comes useful when using selftests from mainline on older kernels/setups that still rely on cgroup v1. The core tests that rely on v2 specific features are skipped, the remaining ones are adjusted to work with a v1 hierarchy. The expected output on v1 system: ok 1 # SKIP test_cgcore_internal_process_constraint ok 2 # SKIP test_cgcore_top_down_constraint_enable ok 3 # SKIP test_cgcore_top_down_constraint_disable ok 4 # SKIP test_cgcore_no_internal_process_constraint_on_threads ok 5 # SKIP test_cgcore_parent_becomes_threaded ok 6 # SKIP test_cgcore_invalid_domain ok 7 # SKIP test_cgcore_populated ok 8 test_cgcore_proc_migration ok 9 test_cgcore_thread_migration ok 10 test_cgcore_destroy ok 11 test_cgcore_lesser_euid_open ok 12 # SKIP test_cgcore_lesser_ns_open Signed-off-by: Michal Koutný Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_core.c | 31 +++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index 0c4cc4e5fc8c..338e276aae5d 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -148,6 +148,9 @@ static int test_cgcore_populated(const char *root) int cgroup_fd = -EBADF; pid_t pid; + if (cg_test_v1_named) + return KSFT_SKIP; + cg_test_a = cg_name(root, "cg_test_a"); cg_test_b = cg_name(root, "cg_test_a/cg_test_b"); cg_test_c = cg_name(root, "cg_test_a/cg_test_b/cg_test_c"); @@ -277,6 +280,9 @@ static int test_cgcore_invalid_domain(const char *root) int ret = KSFT_FAIL; char *grandparent = NULL, *parent = NULL, *child = NULL; + if (cg_test_v1_named) + return KSFT_SKIP; + grandparent = cg_name(root, "cg_test_grandparent"); parent = cg_name(root, "cg_test_grandparent/cg_test_parent"); child = cg_name(root, "cg_test_grandparent/cg_test_parent/cg_test_child"); @@ -339,6 +345,9 @@ static int test_cgcore_parent_becomes_threaded(const char *root) int ret = KSFT_FAIL; char *parent = NULL, *child = NULL; + if (cg_test_v1_named) + return KSFT_SKIP; + parent = cg_name(root, "cg_test_parent"); child = cg_name(root, "cg_test_parent/cg_test_child"); if (!parent || !child) @@ -378,7 +387,8 @@ static int test_cgcore_no_internal_process_constraint_on_threads(const char *roo int ret = KSFT_FAIL; char *parent = NULL, *child = NULL; - if (cg_read_strstr(root, "cgroup.controllers", "cpu") || + if (cg_test_v1_named || + cg_read_strstr(root, "cgroup.controllers", "cpu") || cg_write(root, "cgroup.subtree_control", "+cpu")) { ret = KSFT_SKIP; goto cleanup; @@ -430,6 +440,9 @@ static int test_cgcore_top_down_constraint_enable(const char *root) int ret = KSFT_FAIL; char *parent = NULL, *child = NULL; + if (cg_test_v1_named) + return KSFT_SKIP; + parent = cg_name(root, "cg_test_parent"); child = cg_name(root, "cg_test_parent/cg_test_child"); if (!parent || !child) @@ -465,6 +478,9 @@ static int test_cgcore_top_down_constraint_disable(const char *root) int ret = KSFT_FAIL; char *parent = NULL, *child = NULL; + if (cg_test_v1_named) + return KSFT_SKIP; + parent = cg_name(root, "cg_test_parent"); child = cg_name(root, "cg_test_parent/cg_test_child"); if (!parent || !child) @@ -506,6 +522,9 @@ static int test_cgcore_internal_process_constraint(const char *root) int ret = KSFT_FAIL; char *parent = NULL, *child = NULL; + if (cg_test_v1_named) + return KSFT_SKIP; + parent = cg_name(root, "cg_test_parent"); child = cg_name(root, "cg_test_parent/cg_test_child"); if (!parent || !child) @@ -642,10 +661,12 @@ static int test_cgcore_thread_migration(const char *root) if (cg_create(grps[2])) goto cleanup; - if (cg_write(grps[1], "cgroup.type", "threaded")) - goto cleanup; - if (cg_write(grps[2], "cgroup.type", "threaded")) - goto cleanup; + if (!cg_test_v1_named) { + if (cg_write(grps[1], "cgroup.type", "threaded")) + goto cleanup; + if (cg_write(grps[2], "cgroup.type", "threaded")) + goto cleanup; + } if (cg_enter_current(grps[1])) goto cleanup; -- cgit v1.2.3 From d74cd7864ffa6913f1d70f80858bd3fd19101cdf Mon Sep 17 00:00:00 2001 From: Michal Koutný Date: Tue, 17 Jun 2025 15:36:55 +0200 Subject: selftests: cgroup: Optionally set up v1 environment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the missing mount of the unifier hierarchy as a hint of legacy system and prepare our own named v1 hierarchy for tests. The code is only in test_core.c and not cgroup_util.c because other selftests are related to controllers which will be exposed on v2 hierarchy but named hierarchies are only v1 thing. Signed-off-by: Michal Koutný Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_core.c | 44 ++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index 338e276aae5d..452c2abf9794 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -5,6 +5,8 @@ #include #include #include +#include +#include #include #include #include @@ -863,6 +865,38 @@ cleanup: return ret; } +static int setup_named_v1_root(char *root, size_t len, const char *name) +{ + char options[PATH_MAX]; + int r; + + r = snprintf(root, len, "/mnt/cg_selftest"); + if (r < 0) + return r; + + r = snprintf(options, sizeof(options), "none,name=%s", name); + if (r < 0) + return r; + + r = mkdir(root, 0755); + if (r < 0 && errno != EEXIST) + return r; + + r = mount("none", root, "cgroup", 0, options); + if (r < 0) + return r; + + return 0; +} + +static void cleanup_named_v1_root(char *root) +{ + if (!cg_test_v1_named) + return; + umount(root); + rmdir(root); +} + #define T(x) { x, #x } struct corecg_test { int (*fn)(const char *root); @@ -888,13 +922,18 @@ int main(int argc, char *argv[]) char root[PATH_MAX]; int i, ret = EXIT_SUCCESS; - if (cg_find_unified_root(root, sizeof(root), &nsdelegate)) - ksft_exit_skip("cgroup v2 isn't mounted\n"); + if (cg_find_unified_root(root, sizeof(root), &nsdelegate)) { + if (setup_named_v1_root(root, sizeof(root), CG_NAMED_NAME)) + ksft_exit_skip("cgroup v2 isn't mounted and could not setup named v1 hierarchy\n"); + cg_test_v1_named = true; + goto post_v2_setup; + } if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) if (cg_write(root, "cgroup.subtree_control", "+memory")) ksft_exit_skip("Failed to set memory controller\n"); +post_v2_setup: for (i = 0; i < ARRAY_SIZE(tests); i++) { switch (tests[i].fn(root)) { case KSFT_PASS: @@ -910,5 +949,6 @@ int main(int argc, char *argv[]) } } + cleanup_named_v1_root(root); return ret; } -- cgit v1.2.3 From 5da4f9db980cc475bb6f027153cce75eaa3026ec Mon Sep 17 00:00:00 2001 From: Michal Koutný Date: Tue, 17 Jun 2025 15:36:56 +0200 Subject: selftests: cgroup: Fix compilation on pre-cgroupns kernels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The test would be skipped because of nsdelegate, so the defined value is not used (0 is always acceptable). Signed-off-by: Michal Koutný Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_core.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index 452c2abf9794..a360e2eb2eef 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -21,6 +21,9 @@ #include "cgroup_util.h" static bool nsdelegate; +#ifndef CLONE_NEWCGROUP +#define CLONE_NEWCGROUP 0 +#endif static int touch_anon(char *buf, size_t size) { -- cgit v1.2.3 From 44c71c16f37d5c40418801e7868b62acdcc6b701 Mon Sep 17 00:00:00 2001 From: Jihed Chaibi Date: Fri, 23 May 2025 22:22:39 +0200 Subject: selftests/cpu-hotplug: fix typo in hotplaggable_offline_cpus function name Fix spelling error replacing "hotplaggable" by "hotpluggable" Fixed change log: Shuah Khan Link: https://lore.kernel.org/r/20250523202239.276760-1-jihed.chaibi.dev@gmail.com Signed-off-by: Jihed Chaibi Signed-off-by: Shuah Khan --- tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh index d5dc7e0dc726..6232a46ca6e1 100755 --- a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh +++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh @@ -67,7 +67,7 @@ hotpluggable_cpus() done } -hotplaggable_offline_cpus() +hotpluggable_offline_cpus() { hotpluggable_cpus 0 } @@ -151,7 +151,7 @@ offline_cpu_expect_fail() online_all_hot_pluggable_cpus() { - for cpu in `hotplaggable_offline_cpus`; do + for cpu in `hotpluggable_offline_cpus`; do online_cpu_expect_success $cpu done } -- cgit v1.2.3 From cd9f02adca658f74cd7b28334ce163170bd3c19c Mon Sep 17 00:00:00 2001 From: Tianyi Cui <1997cui@gmail.com> Date: Tue, 10 Jun 2025 15:12:07 -0700 Subject: selftests: Add version file to kselftest installation dir As titled, adding version file to kselftest installation dir, so the user of the tarball can know which kernel version the tarball belongs to. Link: https://lore.kernel.org/r/20250610221248.819519-1-1997cui@gmail.com Signed-off-by: Tianyi Cui <1997cui@gmail.com> Signed-off-by: Shuah Khan --- tools/testing/selftests/Makefile | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 339b31e6a6b5..9dae84a74e7f 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -293,6 +293,13 @@ ifdef INSTALL_PATH $(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET COLLECTION=$$TARGET \ -C $$TARGET emit_tests >> $(TEST_LIST); \ done; + @VERSION=$$(git describe HEAD 2>/dev/null); \ + if [ -n "$$VERSION" ]; then \ + echo "$$VERSION" > $(INSTALL_PATH)/VERSION; \ + printf "Version saved to $(INSTALL_PATH)/VERSION\n"; \ + else \ + printf "Unable to get version from git describe\n"; \ + fi else $(error Error: set INSTALL_PATH to use install) endif -- cgit v1.2.3 From fc2915bb8bfcfdc6e5ea2cfc8d13fbaefe77c2c8 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Mon, 16 Jun 2025 17:57:10 -0700 Subject: selftests/bpf: More precise cpu_mitigations state detection test_progs and test_verifier binaries execute unpriv tests under the following conditions: - unpriv BPF is enabled; - CPU mitigations are enabled (see [1] for details). The detection of the "mitigations enabled" state is performed by unpriv_helpers.c:get_mitigations_off() via inspecting kernel boot command line, looking for a parameter "mitigations=off". Such detection scheme won't work for certain configurations, e.g. when CONFIG_CPU_MITIGATIONS is disabled and boot parameter is not supplied. Miss-detection leads to test_progs executing tests meant to be run only with mitigations enabled, e.g. verifier_and.c:known_subreg_with_unknown_reg(), and reporting false failures. Internally, verifier sets bpf_verifier_env->bypass_spec_{v1,v4} basing on the value returned by kernel/cpu.c:cpu_mitigations_off(). This function is backed by a variable kernel/cpu.c:cpu_mitigations. This state is not fully introspect-able via sysfs. The closest proxy is /sys/devices/system/cpu/vulnerabilities/spectre_v1, but it reports "vulnerable" state only if mitigations are disabled *and* current cpu is vulnerable, while verifier does not check cpu state. There are only two ways the kernel/cpu.c:cpu_mitigations can be set: - via boot parameter; - via CONFIG_CPU_MITIGATIONS option. This commit updates unpriv_helpers.c:get_mitigations_off() to scan /boot/config-$(uname -r) and /proc/config.gz for CONFIG_CPU_MITIGATIONS value in addition to boot command line check. Tested using the following configurations: - mitigations enabled (unpriv tests are enabled) - mitigations disabled via boot cmdline (unpriv tests skipped) - mitigations disabled via CONFIG_CPU_MITIGATIONS (unpriv tests skipped) [1] https://lore.kernel.org/bpf/20231025031144.5508-1-laoar.shao@gmail.com/ Reported-by: Mykyta Yatsenko Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250617005710.1066165-2-eddyz87@gmail.com --- tools/testing/selftests/bpf/unpriv_helpers.c | 93 +++++++++++++++++++++++++++- 1 file changed, 90 insertions(+), 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/unpriv_helpers.c b/tools/testing/selftests/bpf/unpriv_helpers.c index 220f6a963813..3aa9ee80a55e 100644 --- a/tools/testing/selftests/bpf/unpriv_helpers.c +++ b/tools/testing/selftests/bpf/unpriv_helpers.c @@ -1,15 +1,75 @@ // SPDX-License-Identifier: GPL-2.0-only +#include #include #include #include #include +#include #include #include +#include #include "unpriv_helpers.h" -static bool get_mitigations_off(void) +static gzFile open_config(void) +{ + struct utsname uts; + char buf[PATH_MAX]; + gzFile config; + + if (uname(&uts)) { + perror("uname"); + goto config_gz; + } + + snprintf(buf, sizeof(buf), "/boot/config-%s", uts.release); + config = gzopen(buf, "rb"); + if (config) + return config; + fprintf(stderr, "gzopen %s: %s\n", buf, strerror(errno)); + +config_gz: + config = gzopen("/proc/config.gz", "rb"); + if (!config) + perror("gzopen /proc/config.gz"); + return config; +} + +static int config_contains(const char *pat) +{ + const char *msg; + char buf[1024]; + gzFile config; + int n, err; + + config = open_config(); + if (!config) + return -1; + + for (;;) { + if (!gzgets(config, buf, sizeof(buf))) { + msg = gzerror(config, &err); + if (err == Z_ERRNO) + perror("gzgets /proc/config.gz"); + else if (err != Z_OK) + fprintf(stderr, "gzgets /proc/config.gz: %s", msg); + gzclose(config); + return -1; + } + n = strlen(buf); + if (buf[n - 1] == '\n') + buf[n - 1] = 0; + if (strcmp(buf, pat) == 0) { + gzclose(config); + return 1; + } + } + gzclose(config); + return 0; +} + +static bool cmdline_contains(const char *pat) { char cmdline[4096], *c; int fd, ret = false; @@ -27,7 +87,7 @@ static bool get_mitigations_off(void) cmdline[sizeof(cmdline) - 1] = '\0'; for (c = strtok(cmdline, " \n"); c; c = strtok(NULL, " \n")) { - if (strncmp(c, "mitigations=off", strlen(c))) + if (strncmp(c, pat, strlen(c))) continue; ret = true; break; @@ -37,8 +97,21 @@ out: return ret; } +static int get_mitigations_off(void) +{ + int enabled_in_config; + + if (cmdline_contains("mitigations=off")) + return 1; + enabled_in_config = config_contains("CONFIG_CPU_MITIGATIONS=y"); + if (enabled_in_config < 0) + return -1; + return !enabled_in_config; +} + bool get_unpriv_disabled(void) { + int mitigations_off; bool disabled; char buf[2]; FILE *fd; @@ -52,5 +125,19 @@ bool get_unpriv_disabled(void) disabled = true; } - return disabled ? true : get_mitigations_off(); + if (disabled) + return true; + + /* + * Some unpriv tests rely on spectre mitigations being on. + * If mitigations are off or status can't be determined + * assume that unpriv tests are disabled. + */ + mitigations_off = get_mitigations_off(); + if (mitigations_off < 0) { + fprintf(stderr, + "Can't determine if mitigations are enabled, disabling unpriv tests."); + return true; + } + return mitigations_off; } -- cgit v1.2.3 From 66ab68c9de89672366fdc474f4f185bb58cecf2d Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Tue, 17 Jun 2025 13:15:36 +0100 Subject: selftests/bpf: Fix unintentional switch case fall through Break from switch expression after parsing -n CLI argument in veristat, instead of falling through and enabling comparison mode. Fixes: a5c57f81eb2b ("veristat: add ability to set BPF_F_TEST_SANITY_STRICT flag with -r flag") Signed-off-by: Mykyta Yatsenko Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20250617121536.1320074-1-mykyta.yatsenko5@gmail.com --- tools/testing/selftests/bpf/veristat.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 56771650ee8c..4da627ca5749 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -364,6 +364,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) fprintf(stderr, "invalid top N specifier: %s\n", arg); argp_usage(state); } + break; case 'C': env.comparison_mode = true; break; -- cgit v1.2.3 From da9ba4132057cc31ac2ab9dfa9c98b7baaac3ca8 Mon Sep 17 00:00:00 2001 From: Nick Huang Date: Sat, 31 May 2025 15:01:40 +0800 Subject: selftests: ipc: Replace fail print statements with ksft_test_result_fail Use the standard kselftest failure report function to ensure consistent test output formatting. This improves readability and integration with automated test frameworks. Link: https://lore.kernel.org/r/20250531070140.24287-1-sef1548@gmail.com Signed-off-by: Nick Huang Signed-off-by: Shuah Khan --- tools/testing/selftests/ipc/msgque.c | 47 ++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 24 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c index e9dbb84c100a..5e36aeeb9901 100644 --- a/tools/testing/selftests/ipc/msgque.c +++ b/tools/testing/selftests/ipc/msgque.c @@ -39,26 +39,26 @@ int restore_queue(struct msgque_data *msgque) fd = open("/proc/sys/kernel/msg_next_id", O_WRONLY); if (fd == -1) { - printf("Failed to open /proc/sys/kernel/msg_next_id\n"); + ksft_test_result_fail("Failed to open /proc/sys/kernel/msg_next_id\n"); return -errno; } sprintf(buf, "%d", msgque->msq_id); ret = write(fd, buf, strlen(buf)); if (ret != strlen(buf)) { - printf("Failed to write to /proc/sys/kernel/msg_next_id\n"); + ksft_test_result_fail("Failed to write to /proc/sys/kernel/msg_next_id\n"); return -errno; } id = msgget(msgque->key, msgque->mode | IPC_CREAT | IPC_EXCL); if (id == -1) { - printf("Failed to create queue\n"); + ksft_test_result_fail("Failed to create queue\n"); return -errno; } if (id != msgque->msq_id) { - printf("Restored queue has wrong id (%d instead of %d)\n", - id, msgque->msq_id); + ksft_test_result_fail("Restored queue has wrong id (%d instead of %d)\n" + , id, msgque->msq_id); ret = -EFAULT; goto destroy; } @@ -66,7 +66,7 @@ int restore_queue(struct msgque_data *msgque) for (i = 0; i < msgque->qnum; i++) { if (msgsnd(msgque->msq_id, &msgque->messages[i].mtype, msgque->messages[i].msize, IPC_NOWAIT) != 0) { - printf("msgsnd failed (%m)\n"); + ksft_test_result_fail("msgsnd failed (%m)\n"); ret = -errno; goto destroy; } @@ -90,23 +90,22 @@ int check_and_destroy_queue(struct msgque_data *msgque) if (ret < 0) { if (errno == ENOMSG) break; - printf("Failed to read IPC message: %m\n"); + ksft_test_result_fail("Failed to read IPC message: %m\n"); ret = -errno; goto err; } if (ret != msgque->messages[cnt].msize) { - printf("Wrong message size: %d (expected %d)\n", ret, - msgque->messages[cnt].msize); + ksft_test_result_fail("Wrong message size: %d (expected %d)\n", ret, msgque->messages[cnt].msize); ret = -EINVAL; goto err; } if (message.mtype != msgque->messages[cnt].mtype) { - printf("Wrong message type\n"); + ksft_test_result_fail("Wrong message type\n"); ret = -EINVAL; goto err; } if (memcmp(message.mtext, msgque->messages[cnt].mtext, ret)) { - printf("Wrong message content\n"); + ksft_test_result_fail("Wrong message content\n"); ret = -EINVAL; goto err; } @@ -114,7 +113,7 @@ int check_and_destroy_queue(struct msgque_data *msgque) } if (cnt != msgque->qnum) { - printf("Wrong message number\n"); + ksft_test_result_fail("Wrong message number\n"); ret = -EINVAL; goto err; } @@ -139,7 +138,7 @@ int dump_queue(struct msgque_data *msgque) if (ret < 0) { if (errno == EINVAL) continue; - printf("Failed to get stats for IPC queue with id %d\n", + ksft_test_result_fail("Failed to get stats for IPC queue with id %d\n", kern_id); return -errno; } @@ -150,7 +149,7 @@ int dump_queue(struct msgque_data *msgque) msgque->messages = malloc(sizeof(struct msg1) * ds.msg_qnum); if (msgque->messages == NULL) { - printf("Failed to get stats for IPC queue\n"); + ksft_test_result_fail("Failed to get stats for IPC queue\n"); return -ENOMEM; } @@ -162,7 +161,7 @@ int dump_queue(struct msgque_data *msgque) ret = msgrcv(msgque->msq_id, &msgque->messages[i].mtype, MAX_MSG_SIZE, i, IPC_NOWAIT | MSG_COPY); if (ret < 0) { - printf("Failed to copy IPC message: %m (%d)\n", errno); + ksft_test_result_fail("Failed to copy IPC message: %m (%d)\n", errno); return -errno; } msgque->messages[i].msize = ret; @@ -178,7 +177,7 @@ int fill_msgque(struct msgque_data *msgque) memcpy(msgbuf.mtext, TEST_STRING, sizeof(TEST_STRING)); if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(TEST_STRING), IPC_NOWAIT) != 0) { - printf("First message send failed (%m)\n"); + ksft_test_result_fail("First message send failed (%m)\n"); return -errno; } @@ -186,7 +185,7 @@ int fill_msgque(struct msgque_data *msgque) memcpy(msgbuf.mtext, ANOTHER_TEST_STRING, sizeof(ANOTHER_TEST_STRING)); if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(ANOTHER_TEST_STRING), IPC_NOWAIT) != 0) { - printf("Second message send failed (%m)\n"); + ksft_test_result_fail("Second message send failed (%m)\n"); return -errno; } return 0; @@ -202,44 +201,44 @@ int main(int argc, char **argv) msgque.key = ftok(argv[0], 822155650); if (msgque.key == -1) { - printf("Can't make key: %d\n", -errno); + ksft_test_result_fail("Can't make key: %d\n", -errno); ksft_exit_fail(); } msgque.msq_id = msgget(msgque.key, IPC_CREAT | IPC_EXCL | 0666); if (msgque.msq_id == -1) { err = -errno; - printf("Can't create queue: %d\n", err); + ksft_test_result_fail("Can't create queue: %d\n", err); goto err_out; } err = fill_msgque(&msgque); if (err) { - printf("Failed to fill queue: %d\n", err); + ksft_test_result_fail("Failed to fill queue: %d\n", err); goto err_destroy; } err = dump_queue(&msgque); if (err) { - printf("Failed to dump queue: %d\n", err); + ksft_test_result_fail("Failed to dump queue: %d\n", err); goto err_destroy; } err = check_and_destroy_queue(&msgque); if (err) { - printf("Failed to check and destroy queue: %d\n", err); + ksft_test_result_fail("Failed to check and destroy queue: %d\n", err); goto err_out; } err = restore_queue(&msgque); if (err) { - printf("Failed to restore queue: %d\n", err); + ksft_test_result_fail("Failed to restore queue: %d\n", err); goto err_destroy; } err = check_and_destroy_queue(&msgque); if (err) { - printf("Failed to test queue: %d\n", err); + ksft_test_result_fail("Failed to test queue: %d\n", err); goto err_out; } ksft_exit_pass(); -- cgit v1.2.3 From 416b6030e39e29f82e1a39fd9f321e5b69d935c1 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Sun, 15 Jun 2025 01:48:12 -0700 Subject: selftests: nettest: Fix typo in log and error messages for clarity This patch corrects several logging and error message in nettest.c: - Corrects function name in log messages "setsockopt" -> "getsockopt". - Closes missing parentheses in "setsockopt(IPV6_FREEBIND)". - Replaces misleading error text ("Invalid port") with the correct description ("Invalid prefix length"). - remove Redundant wording like "status from status" and clarifies context in IPC error messages. These changes improve readability and aid in debugging test output. Signed-off-by: Alok Tiwari Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250615084822.1344759-1-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/nettest.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index cd8a58097448..1f5227f3d64d 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -385,7 +385,7 @@ static int get_bind_to_device(int sd, char *name, size_t len) name[0] = '\0'; rc = getsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, name, &optlen); if (rc < 0) - log_err_errno("setsockopt(SO_BINDTODEVICE)"); + log_err_errno("getsockopt(SO_BINDTODEVICE)"); return rc; } @@ -535,7 +535,7 @@ static int set_freebind(int sd, int version) break; case AF_INET6: if (setsockopt(sd, SOL_IPV6, IPV6_FREEBIND, &one, sizeof(one))) { - log_err_errno("setsockopt(IPV6_FREEBIND"); + log_err_errno("setsockopt(IPV6_FREEBIND)"); rc = -1; } break; @@ -812,7 +812,7 @@ static int convert_addr(struct sock_args *args, const char *_str, sep++; if (str_to_uint(sep, 1, pfx_len_max, &args->prefix_len) != 0) { - fprintf(stderr, "Invalid port\n"); + fprintf(stderr, "Invalid prefix length\n"); return 1; } } else { @@ -1272,7 +1272,7 @@ static int msg_loop(int client, int sd, void *addr, socklen_t alen, } } - nfds = interactive ? MAX(fileno(stdin), sd) + 1 : sd + 1; + nfds = interactive ? MAX(fileno(stdin), sd) + 1 : sd + 1; while (1) { FD_ZERO(&rfds); FD_SET(sd, &rfds); @@ -1492,7 +1492,7 @@ static int lsock_init(struct sock_args *args) sd = socket(args->version, args->type, args->protocol); if (sd < 0) { log_err_errno("Error opening socket"); - return -1; + return -1; } if (set_reuseaddr(sd) != 0) @@ -1912,7 +1912,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args) * waiting to be told when to continue */ if (read(fd, &buf, sizeof(buf)) <= 0) { - log_err_errno("Failed to read IPC status from status"); + log_err_errno("Failed to read IPC status from pipe"); return 1; } if (!buf) { -- cgit v1.2.3 From e74058f5619f17a4ee2ffa2b426d989a9b9c6293 Mon Sep 17 00:00:00 2001 From: Yuyang Huang Date: Sat, 14 Jun 2025 14:35:22 +0900 Subject: selftest: Add selftest for multicast address notifications MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds a new kernel selftest to verify RTNLGRP_IPV4_MCADDR and RTNLGRP_IPV6_MCADDR notifications. The test works by adding and removing a dummy interface and then confirming that the system correctly receives join and removal notifications for the 224.0.0.1 and ff02::1 multicast addresses. The test relies on the iproute2 version to be 6.13+. Tested by the following command: $ vng -v --user root --cpus 16 -- \ make -C tools/testing/selftests TARGETS=net TEST_PROGS=rtnetlink_notification.sh \ TEST_GEN_PROGS="" run_tests Cc: Maciej Żenczykowski Cc: Lorenzo Colitti Signed-off-by: Yuyang Huang Link: https://patch.msgid.link/20250614053522.623820-1-yuyanghuang@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + .../selftests/net/rtnetlink_notification.sh | 70 ++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100755 tools/testing/selftests/net/rtnetlink_notification.sh (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index ab996bd22a5f..3abb74d563a7 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -41,6 +41,7 @@ TEST_PROGS += netns-name.sh TEST_PROGS += link_netns.py TEST_PROGS += nl_netdev.py TEST_PROGS += rtnetlink.py +TEST_PROGS += rtnetlink_notification.sh TEST_PROGS += srv6_end_dt46_l3vpn_test.sh TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh diff --git a/tools/testing/selftests/net/rtnetlink_notification.sh b/tools/testing/selftests/net/rtnetlink_notification.sh new file mode 100755 index 000000000000..39c1b815bbe4 --- /dev/null +++ b/tools/testing/selftests/net/rtnetlink_notification.sh @@ -0,0 +1,70 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking rtnetlink notification callpaths, and get as much +# coverage as possible. +# +# set -e + +ALL_TESTS=" + kci_test_mcast_addr_notification +" + +source lib.sh + +kci_test_mcast_addr_notification() +{ + RET=0 + local tmpfile + local monitor_pid + local match_result + local test_dev="test-dummy1" + + tmpfile=$(mktemp) + defer rm "$tmpfile" + + ip monitor maddr > $tmpfile & + monitor_pid=$! + defer kill_process "$monitor_pid" + + sleep 1 + + if [ ! -e "/proc/$monitor_pid" ]; then + RET=$ksft_skip + log_test "mcast addr notification: iproute2 too old" + return $RET + fi + + ip link add name "$test_dev" type dummy + check_err $? "failed to add dummy interface" + ip link set "$test_dev" up + check_err $? "failed to set dummy interface up" + ip link del dev "$test_dev" + check_err $? "Failed to delete dummy interface" + sleep 1 + + # There should be 4 line matches as follows. + # 13: test-dummy1    inet6 mcast ff02::1 scope global  + # 13: test-dummy1    inet mcast 224.0.0.1 scope global  + # Deleted 13: test-dummy1    inet mcast 224.0.0.1 scope global  + # Deleted 13: test-dummy1    inet6 mcast ff02::1 scope global  + match_result=$(grep -cE "$test_dev.*(224.0.0.1|ff02::1)" "$tmpfile") + if [ "$match_result" -ne 4 ]; then + RET=$ksft_fail + fi + log_test "mcast addr notification: Expected 4 matches, got $match_result" + return $RET +} + +#check for needed privileges +if [ "$(id -u)" -ne 0 ];then + RET=$ksft_skip + log_test "need root privileges" + exit $RET +fi + +require_command ip + +tests_run + +exit $EXIT_STATUS -- cgit v1.2.3 From 46cbaef5d8162de8b0b8faf562b69313de6638ef Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Sun, 15 Jun 2025 20:35:10 +0000 Subject: selftests: devmem: remove unused variable Trivial fix to unused variable. Signed-off-by: Mina Almasry Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250615203511.591438-2-almasrymina@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/ncdevmem.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index 02e4d3d7ded2..cc9b40d9c5d5 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -852,7 +852,6 @@ static int do_client(struct memory_buffer *mem) ssize_t line_size = 0; struct cmsghdr *cmsg; char *line = NULL; - unsigned long mid; size_t len = 0; int socket_fd; __u32 ddmabuf; -- cgit v1.2.3 From fb7612b6c44b12d46d56eab92f5c9ceb7057dc40 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Sun, 15 Jun 2025 20:35:11 +0000 Subject: selftests: devmem: add ipv4 support to chunks test Add ipv4 support to the recently added chunks tests, which was added as ipv6 only. Signed-off-by: Mina Almasry Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250615203511.591438-3-almasrymina@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/devmem.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py index 7947650210a0..baa2f24240ba 100755 --- a/tools/testing/selftests/drivers/net/hw/devmem.py +++ b/tools/testing/selftests/drivers/net/hw/devmem.py @@ -51,15 +51,14 @@ def check_tx(cfg) -> None: @ksft_disruptive def check_tx_chunks(cfg) -> None: - cfg.require_ipver("6") require_devmem(cfg) port = rand_port() - listen_cmd = f"socat -U - TCP6-LISTEN:{port}" + listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}" with bkg(listen_cmd, exit_wait=True) as socat: wait_port_listen(port) - cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr_v['6']} -p {port} -z 3", host=cfg.remote, shell=True) + cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr} -p {port} -z 3", host=cfg.remote, shell=True) ksft_eq(socat.stdout.strip(), "hello\nworld") -- cgit v1.2.3 From bd07bd12f2c17d3e13d58c09f5eac5d021ec14ea Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Tue, 17 Jun 2025 10:57:36 -0400 Subject: bpf: Fix key serial argument of bpf_lookup_user_key() The underlying lookup_user_key() function uses a signed 32 bit integer for key serial numbers because legitimate serial numbers are positive (and > 3) and keyrings are negative. Using a u32 for the keyring in the bpf function doesn't currently cause any conversion problems but will start to trip the signed to unsigned conversion warnings when the kernel enables them, so convert the argument to signed (and update the tests accordingly) before it acquires more users. Signed-off-by: James Bottomley Reviewed-by: Roberto Sassu Link: https://lore.kernel.org/r/84cdb0775254d297d75e21f577089f64abdfbd28.camel@HansenPartnership.com Signed-off-by: Alexei Starovoitov --- kernel/trace/bpf_trace.c | 2 +- tools/testing/selftests/bpf/bpf_kfuncs.h | 2 +- tools/testing/selftests/bpf/progs/rcu_read_lock.c | 5 +++-- tools/testing/selftests/bpf/progs/test_lookup_key.c | 4 ++-- tools/testing/selftests/bpf/progs/test_sig_in_xattr.c | 2 +- tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c | 2 +- tools/testing/selftests/bpf/progs/verifier_ref_tracking.c | 2 +- 7 files changed, 10 insertions(+), 9 deletions(-) (limited to 'tools/testing/selftests') diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 132c8be6f635..0a06ea6638fe 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1270,7 +1270,7 @@ __bpf_kfunc_start_defs(); * Return: a bpf_key pointer with a valid key pointer if the key is found, a * NULL pointer otherwise. */ -__bpf_kfunc struct bpf_key *bpf_lookup_user_key(u32 serial, u64 flags) +__bpf_kfunc struct bpf_key *bpf_lookup_user_key(s32 serial, u64 flags) { key_ref_t key_ref; struct bpf_key *bkey; diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 8215c9b3115e..9386dfe8b884 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -69,7 +69,7 @@ extern int bpf_get_file_xattr(struct file *file, const char *name, struct bpf_dynptr *value_ptr) __ksym; extern int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr *digest_ptr) __ksym; -extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; +extern struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym; extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym; extern void bpf_key_put(struct bpf_key *key) __ksym; extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr, diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c index 43637ee2cdcd..3a868a199349 100644 --- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c @@ -16,10 +16,11 @@ struct { __type(value, long); } map_a SEC(".maps"); -__u32 user_data, key_serial, target_pid; +__u32 user_data, target_pid; +__s32 key_serial; __u64 flags, task_storage_val, cgroup_id; -struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; +struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym; void bpf_key_put(struct bpf_key *key) __ksym; void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; diff --git a/tools/testing/selftests/bpf/progs/test_lookup_key.c b/tools/testing/selftests/bpf/progs/test_lookup_key.c index cdbbb12f1491..1f7e1e59b073 100644 --- a/tools/testing/selftests/bpf/progs/test_lookup_key.c +++ b/tools/testing/selftests/bpf/progs/test_lookup_key.c @@ -14,11 +14,11 @@ char _license[] SEC("license") = "GPL"; __u32 monitored_pid; -__u32 key_serial; +__s32 key_serial; __u32 key_id; __u64 flags; -extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; +extern struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym; extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym; extern void bpf_key_put(struct bpf_key *key) __ksym; diff --git a/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c index 8ef6b39335b6..34b30e2603f0 100644 --- a/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c +++ b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c @@ -40,7 +40,7 @@ char digest[MAGIC_SIZE + SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE]; __u32 monitored_pid; char sig[MAX_SIG_SIZE]; __u32 sig_size; -__u32 user_keyring_serial; +__s32 user_keyring_serial; SEC("lsm.s/file_open") int BPF_PROG(test_file_open, struct file *f) diff --git a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c index e96d09e11115..ff8d755548b9 100644 --- a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c +++ b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c @@ -17,7 +17,7 @@ #define MAX_SIG_SIZE 1024 __u32 monitored_pid; -__u32 user_keyring_serial; +__s32 user_keyring_serial; __u64 system_keyring_id; struct data { diff --git a/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c b/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c index 683a882b3e6d..910365201f68 100644 --- a/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c +++ b/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c @@ -27,7 +27,7 @@ struct bpf_key {} __attribute__((preserve_access_index)); extern void bpf_key_put(struct bpf_key *key) __ksym; extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym; -extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; +extern struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym; /* BTF FUNC records are not generated for kfuncs referenced * from inline assembly. These records are necessary for -- cgit v1.2.3 From 2a719b7bacc74be9f04147be01262b6289ad8dc5 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 17 Jun 2025 00:44:20 +0200 Subject: selftests: forwarding: lib: Move smcrouted helpers here router_multicast.sh has several helpers for work with smcrouted. Extract them to lib.sh so that other selftests can use them as well. Convert the helpers to defer in the process, because that simplifies the interface quite a bit. Therefore have router_multicast.sh invoke defer_scopes_cleanup() in its cleanup() function. Signed-off-by: Petr Machata Link: https://patch.msgid.link/410411c1a81225ce6e44542289b9c3ec21e5786c.1750113335.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/lib.sh | 33 ++++++++++++++++++++ .../selftests/net/forwarding/router_multicast.sh | 35 ++++------------------ 2 files changed, 39 insertions(+), 29 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 508f3c700d71..253847372062 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -37,6 +37,7 @@ declare -A NETIFS=( : "${TEAMD:=teamd}" : "${MCD:=smcrouted}" : "${MC_CLI:=smcroutectl}" +: "${MCD_TABLE_NAME:=selftests}" # Constants for netdevice bring-up: # Default time in seconds to wait for an interface to come up before giving up @@ -1757,6 +1758,38 @@ mc_send() msend -g $groups -I $if_name -c 1 > /dev/null 2>&1 } +adf_mcd_start() +{ + local table_name="$MCD_TABLE_NAME" + local smcroutedir + local pid + local i + + check_command "$MCD" || return 1 + check_command "$MC_CLI" || return 1 + + smcroutedir=$(mktemp -d) + defer rm -rf "$smcroutedir" + + for ((i = 1; i <= NUM_NETIFS; ++i)); do + echo "phyint ${NETIFS[p$i]} enable" >> \ + "$smcroutedir/$table_name.conf" + done + + "$MCD" -N -I "$table_name" -f "$smcroutedir/$table_name.conf" \ + -P "$smcroutedir/$table_name.pid" + busywait "$BUSYWAIT_TIMEOUT" test -e "$smcroutedir/$table_name.pid" + pid=$(cat "$smcroutedir/$table_name.pid") + defer kill_process "$pid" +} + +mc_cli() +{ + local table_name="$MCD_TABLE_NAME" + + "$MC_CLI" -I "$table_name" "$@" +} + start_ip_monitor() { local mtype=$1; shift diff --git a/tools/testing/selftests/net/forwarding/router_multicast.sh b/tools/testing/selftests/net/forwarding/router_multicast.sh index 5a58b1ec8aef..83e52abdbc2e 100755 --- a/tools/testing/selftests/net/forwarding/router_multicast.sh +++ b/tools/testing/selftests/net/forwarding/router_multicast.sh @@ -33,10 +33,6 @@ NUM_NETIFS=6 source lib.sh source tc_common.sh -require_command $MCD -require_command $MC_CLI -table_name=selftests - h1_create() { simple_if_init $h1 198.51.100.2/28 2001:db8:1::2/64 @@ -149,25 +145,6 @@ router_destroy() ip link set dev $rp1 down } -start_mcd() -{ - SMCROUTEDIR="$(mktemp -d)" - - for ((i = 1; i <= $NUM_NETIFS; ++i)); do - echo "phyint ${NETIFS[p$i]} enable" >> \ - $SMCROUTEDIR/$table_name.conf - done - - $MCD -N -I $table_name -f $SMCROUTEDIR/$table_name.conf \ - -P $SMCROUTEDIR/$table_name.pid -} - -kill_mcd() -{ - pkill $MCD - rm -rf $SMCROUTEDIR -} - setup_prepare() { h1=${NETIFS[p1]} @@ -179,7 +156,7 @@ setup_prepare() rp3=${NETIFS[p5]} h3=${NETIFS[p6]} - start_mcd + adf_mcd_start || exit "$EXIT_STATUS" vrf_prepare @@ -206,7 +183,7 @@ cleanup() vrf_cleanup - kill_mcd + defer_scopes_cleanup } create_mcast_sg() @@ -214,9 +191,9 @@ create_mcast_sg() local if_name=$1; shift local s_addr=$1; shift local mcast=$1; shift - local dest_ifs=${@} + local dest_ifs=("${@}") - $MC_CLI -I $table_name add $if_name $s_addr $mcast $dest_ifs + mc_cli add "$if_name" "$s_addr" "$mcast" "${dest_ifs[@]}" } delete_mcast_sg() @@ -224,9 +201,9 @@ delete_mcast_sg() local if_name=$1; shift local s_addr=$1; shift local mcast=$1; shift - local dest_ifs=${@} + local dest_ifs=("${@}") - $MC_CLI -I $table_name remove $if_name $s_addr $mcast $dest_ifs + mc_cli remove "$if_name" "$s_addr" "$mcast" "${dest_ifs[@]}" } mcast_v4() -- cgit v1.2.3 From 4baa1d3a5080c18a079d3688fa9726973959ee20 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 17 Jun 2025 00:44:21 +0200 Subject: selftests: net: lib: Add ip_link_has_flag() Add a helper to determine whether a given netdevice has a given flag. Rewrite ip_link_is_up() in terms of the new helper. Signed-off-by: Petr Machata Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/e1eb174a411f9d24735d095984c731d1d4a5a592.1750113335.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib.sh | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 006fdadcc4b9..ff0dbe23e8e0 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -547,13 +547,19 @@ ip_link_set_addr() defer ip link set dev "$name" address "$old_addr" } -ip_link_is_up() +ip_link_has_flag() { local name=$1; shift + local flag=$1; shift local state=$(ip -j link show "$name" | - jq -r '(.[].flags[] | select(. == "UP")) // "DOWN"') - [[ $state == "UP" ]] + jq --arg flag "$flag" 'any(.[].flags.[]; . == $flag)') + [[ $state == true ]] +} + +ip_link_is_up() +{ + ip_link_has_flag "$1" UP } ip_link_set_up() -- cgit v1.2.3 From 237f84a6d24a413b3a0795079afeb903ab1dec8a Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 17 Jun 2025 00:44:22 +0200 Subject: selftests: forwarding: adf_mcd_start(): Allow configuring custom interfaces Tests may wish to add other interfaces to listen on. Notably locally generated traffic uses dummy interfaces. The multicast daemon needs to know about these so that it allows forming rules that involve these interfaces, and so that net.ipv4.conf.X.mc_forwarding is set for the interfaces. To that end, allow passing in a list of interfaces to configure in addition to all the physical ones. Signed-off-by: Petr Machata Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/2e8d83297985933be4850f2b9f296b3c27110388.1750113335.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/lib.sh | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 253847372062..83ee6a07e072 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -1760,9 +1760,12 @@ mc_send() adf_mcd_start() { + local ifs=("$@") + local table_name="$MCD_TABLE_NAME" local smcroutedir local pid + local if local i check_command "$MCD" || return 1 @@ -1776,6 +1779,16 @@ adf_mcd_start() "$smcroutedir/$table_name.conf" done + for if in "${ifs[@]}"; do + if ! ip_link_has_flag "$if" MULTICAST; then + ip link set dev "$if" multicast on + defer ip link set dev "$if" multicast off + fi + + echo "phyint $if enable" >> \ + "$smcroutedir/$table_name.conf" + done + "$MCD" -N -I "$table_name" -f "$smcroutedir/$table_name.conf" \ -P "$smcroutedir/$table_name.pid" busywait "$BUSYWAIT_TIMEOUT" test -e "$smcroutedir/$table_name.pid" -- cgit v1.2.3 From e3180379e2df535ac492c24ecc9719bf83b7a0f9 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 17 Jun 2025 00:44:23 +0200 Subject: selftests: forwarding: Add a test for verifying VXLAN MC underlay Add tests for MC-routing underlay VXLAN traffic. Signed-off-by: Petr Machata Link: https://patch.msgid.link/eecd2c0fefc754182e74be8e8e65751bf5749c21.1750113335.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/Makefile | 1 + .../net/forwarding/vxlan_bridge_1q_mc_ul.sh | 771 +++++++++++++++++++++ 2 files changed, 772 insertions(+) create mode 100755 tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 00bde7b6f39e..d7bb2e80e88c 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -102,6 +102,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ vxlan_bridge_1d_port_8472.sh \ vxlan_bridge_1d.sh \ vxlan_bridge_1q_ipv6.sh \ + vxlan_bridge_1q_mc_ul.sh \ vxlan_bridge_1q_port_8472_ipv6.sh \ vxlan_bridge_1q_port_8472.sh \ vxlan_bridge_1q.sh \ diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh new file mode 100755 index 000000000000..7ec58b6b1128 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh @@ -0,0 +1,771 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------------------------+ +# | + $h1.10 + $h1.20 | +# | | 192.0.2.1/28 | 2001:db8:1::1/64 | +# | \________ ________/ | +# | \ / | +# | + $h1 H1 (vrf) | +# +-----------|-----------------------------+ +# | +# +-----------|----------------------------------------------------------------+ +# | +---------|--------------------------------------+ SWITCH (main vrf) | +# | | + $swp1 BR1 (802.1q) | | +# | | vid 10 20 | | +# | | | | +# | | + vx10 (vxlan) + vx20 (vxlan) | + lo10 (dummy) | +# | | local 192.0.2.100 local 2001:db8:4::1 | 192.0.2.100/28 | +# | | group 233.252.0.1 group ff0e::1:2:3 | 2001:db8:4::1/64 | +# | | id 1000 id 2000 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | +------------------------------------------------+ | +# | | +# | + $swp2 $swp3 + | +# | | 192.0.2.33/28 192.0.2.65/28 | | +# | | 2001:db8:2::1/64 2001:db8:3::1/64 | | +# | | | | +# +---|--------------------------------------------------------------------|---+ +# | | +# +---|--------------------------------+ +--------------------------------|---+ +# | | H2 (vrf) | | H3 (vrf) | | +# | +-|----------------------------+ | | +-----------------------------|-+ | +# | | + $h2 BR2 (802.1d) | | | | BR3 (802.1d) $h3 + | | +# | | | | | | | | +# | | + v1$h2 (veth) | | | | v1$h3 (veth) + | | +# | +-|----------------------------+ | | +-----------------------------|-+ | +# | | | | | | +# +---|--------------------------------+ +--------------------------------|---+ +# | | +# +---|--------------------------------+ +--------------------------------|---+ +# | + v2$h2 (veth) NS2 (netns) | | NS3 (netns) v2$h3 (veth) + | +# | 192.0.2.34/28 | | 192.0.2.66/28 | +# | 2001:db8:2::2/64 | | 2001:db8:3::2/64 | +# | | | | +# | +--------------------------------+ | | +--------------------------------+ | +# | | BR1 (802.1q) | | | | BR1 (802.1q) | | +# | | + vx10 (vxlan) | | | | + vx10 (vxlan) | | +# | | local 192.0.2.34 | | | | local 192.0.2.50 | | +# | | group 233.252.0.1 dev v2$h2 | | | | group 233.252.0.1 dev v2$h3 | | +# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | | +# | | vid 10 pvid untagged | | | | vid 10 pvid untagged | | +# | | | | | | | | +# | | + vx20 (vxlan) | | | | + vx20 (vxlan) | | +# | | local 2001:db8:2::2 | | | | local 2001:db8:3::2 | | +# | | group ff0e::1:2:3 dev v2$h2 | | | | group ff0e::1:2:3 dev v2$h3 | | +# | | id 2000 dstport $VXPORT | | | | id 2000 dstport $VXPORT | | +# | | vid 20 pvid untagged | | | | vid 20 pvid untagged | | +# | | | | | | | | +# | | + w1 (veth) | | | | + w1 (veth) | | +# | | | vid 10 20 | | | | | vid 10 20 | | +# | +--|-----------------------------+ | | +--|-----------------------------+ | +# | | | | | | +# | +--|-----------------------------+ | | +--|-----------------------------+ | +# | | + w2 (veth) VW2 (vrf) | | | | + w2 (veth) VW2 (vrf) | | +# | | |\ | | | | |\ | | +# | | | + w2.10 | | | | | + w2.10 | | +# | | | 192.0.2.3/28 | | | | | 192.0.2.4/28 | | +# | | | | | | | | | | +# | | + w2.20 | | | | + w2.20 | | +# | | 2001:db8:1::3/64 | | | | 2001:db8:1::4/64 | | +# | +--------------------------------+ | | +--------------------------------+ | +# +------------------------------------+ +------------------------------------+ +# +#shellcheck disable=SC2317 # SC doesn't see our uses of functions. + +: "${VXPORT:=4789}" +export VXPORT + +: "${GROUP4:=233.252.0.1}" +export GROUP4 + +: "${GROUP6:=ff0e::1:2:3}" +export GROUP6 + +: "${IPMR:=lo10}" + +ALL_TESTS=" + ipv4_nomcroute + ipv4_mcroute + ipv4_mcroute_changelink + ipv4_mcroute_starg + ipv4_mcroute_noroute + ipv4_mcroute_fdb + ipv4_mcroute_fdb_oif0 + ipv4_mcroute_fdb_oif0_sep + + ipv6_nomcroute + ipv6_mcroute + ipv6_mcroute_changelink + ipv6_mcroute_starg + ipv6_mcroute_noroute + ipv6_mcroute_fdb + ipv6_mcroute_fdb_oif0 + + ipv4_nomcroute_rx + ipv4_mcroute_rx + ipv4_mcroute_starg_rx + ipv4_mcroute_fdb_oif0_sep_rx + ipv4_mcroute_fdb_sep_rx + + ipv6_nomcroute_rx + ipv6_mcroute_rx + ipv6_mcroute_starg_rx + ipv6_mcroute_fdb_sep_rx +" + +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init "$h1" + defer simple_if_fini "$h1" + + ip_link_add "$h1.10" master "v$h1" link "$h1" type vlan id 10 + ip_link_set_up "$h1.10" + ip_addr_add "$h1.10" 192.0.2.1/28 + + ip_link_add "$h1.20" master "v$h1" link "$h1" type vlan id 20 + ip_link_set_up "$h1.20" + ip_addr_add "$h1.20" 2001:db8:1::1/64 +} + +install_capture() +{ + local dev=$1; shift + + tc qdisc add dev "$dev" clsact + defer tc qdisc del dev "$dev" clsact + + tc filter add dev "$dev" ingress proto ip pref 104 \ + flower skip_hw ip_proto udp dst_port "$VXPORT" \ + action pass + defer tc filter del dev "$dev" ingress proto ip pref 104 + + tc filter add dev "$dev" ingress proto ipv6 pref 106 \ + flower skip_hw ip_proto udp dst_port "$VXPORT" \ + action pass + defer tc filter del dev "$dev" ingress proto ipv6 pref 106 +} + +h2_create() +{ + # $h2 + ip_link_set_up "$h2" + + # H2 + vrf_create "v$h2" + defer vrf_destroy "v$h2" + + ip_link_set_up "v$h2" + + # br2 + ip_link_add br2 type bridge vlan_filtering 0 mcast_snooping 0 + ip_link_set_master br2 "v$h2" + ip_link_set_up br2 + + # $h2 + ip_link_set_master "$h2" br2 + install_capture "$h2" + + # v1$h2 + ip_link_set_up "v1$h2" + ip_link_set_master "v1$h2" br2 +} + +h3_create() +{ + # $h3 + ip_link_set_up "$h3" + + # H3 + vrf_create "v$h3" + defer vrf_destroy "v$h3" + + ip_link_set_up "v$h3" + + # br3 + ip_link_add br3 type bridge vlan_filtering 0 mcast_snooping 0 + ip_link_set_master br3 "v$h3" + ip_link_set_up br3 + + # $h3 + ip_link_set_master "$h3" br3 + install_capture "$h3" + + # v1$h3 + ip_link_set_up "v1$h3" + ip_link_set_master "v1$h3" br3 +} + +switch_create() +{ + local swp1_mac + + # br1 + swp1_mac=$(mac_get "$swp1") + ip_link_add br1 type bridge vlan_filtering 1 \ + vlan_default_pvid 0 mcast_snooping 0 + ip_link_set_addr br1 "$swp1_mac" + ip_link_set_up br1 + + # A dummy to force the IPv6 OIF=0 test to install a suitable MC route on + # $IPMR to be deterministic. Also used for the IPv6 RX!=TX ping test. + ip_link_add "X$IPMR" up type dummy + + # IPMR + ip_link_add "$IPMR" up type dummy + ip_addr_add "$IPMR" 192.0.2.100/28 + ip_addr_add "$IPMR" 2001:db8:4::1/64 + + # $swp1 + ip_link_set_up "$swp1" + ip_link_set_master "$swp1" br1 + bridge_vlan_add vid 10 dev "$swp1" + bridge_vlan_add vid 20 dev "$swp1" + + # $swp2 + ip_link_set_up "$swp2" + ip_addr_add "$swp2" 192.0.2.33/28 + ip_addr_add "$swp2" 2001:db8:2::1/64 + + # $swp3 + ip_link_set_up "$swp3" + ip_addr_add "$swp3" 192.0.2.65/28 + ip_addr_add "$swp3" 2001:db8:3::1/64 +} + +vx_create() +{ + local name=$1; shift + local vid=$1; shift + + ip_link_add "$name" up type vxlan dstport "$VXPORT" \ + nolearning noudpcsum tos inherit ttl 16 \ + "$@" + ip_link_set_master "$name" br1 + bridge_vlan_add vid "$vid" dev "$name" pvid untagged +} +export -f vx_create + +vx_wait() +{ + # Wait for all the ARP, IGMP etc. noise to settle down so that the + # tunnel is clear for measurements. + sleep 10 +} + +vx10_create() +{ + vx_create vx10 10 id 1000 "$@" +} +export -f vx10_create + +vx20_create() +{ + vx_create vx20 20 id 2000 "$@" +} +export -f vx20_create + +vx10_create_wait() +{ + vx10_create "$@" + vx_wait +} + +vx20_create_wait() +{ + vx20_create "$@" + vx_wait +} + +ns_init_common() +{ + local ns=$1; shift + local if_in=$1; shift + local ipv4_in=$1; shift + local ipv6_in=$1; shift + local ipv4_host=$1; shift + local ipv6_host=$1; shift + + # v2$h2 / v2$h3 + ip_link_set_up "$if_in" + ip_addr_add "$if_in" "$ipv4_in" + ip_addr_add "$if_in" "$ipv6_in" + + # br1 + ip_link_add br1 type bridge vlan_filtering 1 \ + vlan_default_pvid 0 mcast_snooping 0 + ip_link_set_up br1 + + # vx10, vx20 + vx10_create local "${ipv4_in%/*}" group "$GROUP4" dev "$if_in" + vx20_create local "${ipv6_in%/*}" group "$GROUP6" dev "$if_in" + + # w1 + ip_link_add w1 type veth peer name w2 + ip_link_set_master w1 br1 + ip_link_set_up w1 + bridge_vlan_add vid 10 dev w1 + bridge_vlan_add vid 20 dev w1 + + # w2 + simple_if_init w2 + defer simple_if_fini w2 + + # w2.10 + ip_link_add w2.10 master vw2 link w2 type vlan id 10 + ip_link_set_up w2.10 + ip_addr_add w2.10 "$ipv4_host" + + # w2.20 + ip_link_add w2.20 master vw2 link w2 type vlan id 20 + ip_link_set_up w2.20 + ip_addr_add w2.20 "$ipv6_host" +} +export -f ns_init_common + +ns2_create() +{ + # NS2 + ip netns add ns2 + defer ip netns del ns2 + + # v2$h2 + ip link set dev "v2$h2" netns ns2 + defer ip -n ns2 link set dev "v2$h2" netns 1 + + in_ns ns2 \ + ns_init_common ns2 "v2$h2" \ + 192.0.2.34/28 2001:db8:2::2/64 \ + 192.0.2.3/28 2001:db8:1::3/64 +} + +ns3_create() +{ + # NS3 + ip netns add ns3 + defer ip netns del ns3 + + # v2$h3 + ip link set dev "v2$h3" netns ns3 + defer ip -n ns3 link set dev "v2$h3" netns 1 + + ip -n ns3 link set dev "v2$h3" up + + in_ns ns3 \ + ns_init_common ns3 "v2$h3" \ + 192.0.2.66/28 2001:db8:3::2/64 \ + 192.0.2.4/28 2001:db8:1::4/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + defer vrf_cleanup + + forwarding_enable + defer forwarding_restore + + ip_link_add "v1$h2" type veth peer name "v2$h2" + ip_link_add "v1$h3" type veth peer name "v2$h3" + + h1_create + h2_create + h3_create + switch_create + ns2_create + ns3_create +} + +adf_install_broken_sg() +{ + adf_mcd_start "$IPMR" || exit "$EXIT_STATUS" + + mc_cli add "$swp2" 192.0.2.100 "$GROUP4" "$swp1" "$swp3" + defer mc_cli remove "$swp2" 192.0.2.100 "$GROUP4" "$swp1" "$swp3" + + mc_cli add "$swp2" 2001:db8:4::1 "$GROUP6" "$swp1" "$swp3" + defer mc_cli remove "$swp2" 2001:db8:4::1 "$GROUP6" "$swp1" "$swp3" +} + +adf_install_rx() +{ + mc_cli add "$swp2" 0.0.0.0 "$GROUP4" "$IPMR" + defer mc_cli remove "$swp2" 0.0.0.0 "$GROUP4" lo10 + + mc_cli add "$swp3" 0.0.0.0 "$GROUP4" "$IPMR" + defer mc_cli remove "$swp3" 0.0.0.0 "$GROUP4" lo10 + + mc_cli add "$swp2" :: "$GROUP6" "$IPMR" + defer mc_cli remove "$swp2" :: "$GROUP6" lo10 + + mc_cli add "$swp3" :: "$GROUP6" "$IPMR" + defer mc_cli remove "$swp3" :: "$GROUP6" lo10 +} + +adf_install_sg() +{ + adf_mcd_start "$IPMR" || exit "$EXIT_STATUS" + + mc_cli add "$IPMR" 192.0.2.100 "$GROUP4" "$swp2" "$swp3" + defer mc_cli remove "$IPMR" 192.0.2.33 "$GROUP4" "$swp2" "$swp3" + + mc_cli add "$IPMR" 2001:db8:4::1 "$GROUP6" "$swp2" "$swp3" + defer mc_cli remove "$IPMR" 2001:db8:4::1 "$GROUP6" "$swp2" "$swp3" + + adf_install_rx +} + +adf_install_sg_sep() +{ + adf_mcd_start lo || exit "$EXIT_STATUS" + + mc_cli add lo 192.0.2.120 "$GROUP4" "$swp2" "$swp3" + defer mc_cli remove lo 192.0.2.120 "$GROUP4" "$swp2" "$swp3" + + mc_cli add lo 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3" + defer mc_cli remove lo 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3" +} + +adf_install_sg_sep_rx() +{ + local lo=$1; shift + + adf_mcd_start "$IPMR" "$lo" || exit "$EXIT_STATUS" + + mc_cli add "$lo" 192.0.2.120 "$GROUP4" "$swp2" "$swp3" + defer mc_cli remove "$lo" 192.0.2.120 "$GROUP4" "$swp2" "$swp3" + + mc_cli add "$lo" 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3" + defer mc_cli remove "$lo" 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3" + + adf_install_rx +} + +adf_install_starg() +{ + adf_mcd_start "$IPMR" || exit "$EXIT_STATUS" + + mc_cli add "$IPMR" 0.0.0.0 "$GROUP4" "$swp2" "$swp3" + defer mc_cli remove "$IPMR" 0.0.0.0 "$GROUP4" "$swp2" "$swp3" + + mc_cli add "$IPMR" :: "$GROUP6" "$swp2" "$swp3" + defer mc_cli remove "$IPMR" :: "$GROUP6" "$swp2" "$swp3" + + adf_install_rx +} + +do_packets_v4() +{ + local mac + + mac=$(mac_get "$h2") + "$MZ" "$h1" -Q 10 -c 10 -d 100msec -p 64 -a own -b "$mac" \ + -A 192.0.2.1 -B 192.0.2.2 -t udp sp=1234,dp=2345 -q +} + +do_packets_v6() +{ + local mac + + mac=$(mac_get "$h2") + "$MZ" -6 "$h1" -Q 20 -c 10 -d 100msec -p 64 -a own -b "$mac" \ + -A 2001:db8:1::1 -B 2001:db8:1::2 -t udp sp=1234,dp=2345 -q +} + +do_test() +{ + local ipv=$1; shift + local expect_h2=$1; shift + local expect_h3=$1; shift + local what=$1; shift + + local pref=$((100 + ipv)) + local t0_h2 + local t0_h3 + local t1_h2 + local t1_h3 + local d_h2 + local d_h3 + + RET=0 + + t0_h2=$(tc_rule_stats_get "$h2" "$pref" ingress) + t0_h3=$(tc_rule_stats_get "$h3" "$pref" ingress) + + "do_packets_v$ipv" + sleep 1 + + t1_h2=$(tc_rule_stats_get "$h2" "$pref" ingress) + t1_h3=$(tc_rule_stats_get "$h3" "$pref" ingress) + + d_h2=$((t1_h2 - t0_h2)) + d_h3=$((t1_h3 - t0_h3)) + + ((d_h2 == expect_h2)) + check_err $? "Expected $expect_h2 packets on H2, got $d_h2" + + ((d_h3 == expect_h3)) + check_err $? "Expected $expect_h3 packets on H3, got $d_h3" + + log_test "VXLAN MC flood $what" +} + +ipv4_do_test_rx() +{ + local h3_should_fail=$1; shift + local what=$1; shift + + RET=0 + + ping_do "$h1.10" 192.0.2.3 + check_err $? "H2 should respond" + + ping_do "$h1.10" 192.0.2.4 + check_err_fail "$h3_should_fail" $? "H3 responds" + + log_test "VXLAN MC flood $what" +} + +ipv6_do_test_rx() +{ + local h3_should_fail=$1; shift + local what=$1; shift + + RET=0 + + ping6_do "$h1.20" 2001:db8:1::3 + check_err $? "H2 should respond" + + ping6_do "$h1.20" 2001:db8:1::4 + check_err_fail "$h3_should_fail" $? "H3 responds" + + log_test "VXLAN MC flood $what" +} + +ipv4_nomcroute() +{ + # Install a misleading (S,G) rule to attempt to trick the system into + # pushing the packets elsewhere. + adf_install_broken_sg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$swp2" + do_test 4 10 0 "IPv4 nomcroute" +} + +ipv6_nomcroute() +{ + # Like for IPv4, install a misleading (S,G). + adf_install_broken_sg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$swp2" + do_test 6 10 0 "IPv6 nomcroute" +} + +ipv4_nomcroute_rx() +{ + vx10_create local 192.0.2.100 group "$GROUP4" dev "$swp2" + ipv4_do_test_rx 1 "IPv4 nomcroute ping" +} + +ipv6_nomcroute_rx() +{ + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$swp2" + ipv6_do_test_rx 1 "IPv6 nomcroute ping" +} + +ipv4_mcroute() +{ + adf_install_sg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + do_test 4 10 10 "IPv4 mcroute" +} + +ipv6_mcroute() +{ + adf_install_sg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + do_test 6 10 10 "IPv6 mcroute" +} + +ipv4_mcroute_rx() +{ + adf_install_sg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + ipv4_do_test_rx 0 "IPv4 mcroute ping" +} + +ipv6_mcroute_rx() +{ + adf_install_sg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + ipv6_do_test_rx 0 "IPv6 mcroute ping" +} + +ipv4_mcroute_changelink() +{ + adf_install_sg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" + ip link set dev vx10 type vxlan mcroute + sleep 1 + do_test 4 10 10 "IPv4 mcroute changelink" +} + +ipv6_mcroute_changelink() +{ + adf_install_sg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + ip link set dev vx20 type vxlan mcroute + sleep 1 + do_test 6 10 10 "IPv6 mcroute changelink" +} + +ipv4_mcroute_starg() +{ + adf_install_starg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + do_test 4 10 10 "IPv4 mcroute (*,G)" +} + +ipv6_mcroute_starg() +{ + adf_install_starg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + do_test 6 10 10 "IPv6 mcroute (*,G)" +} + +ipv4_mcroute_starg_rx() +{ + adf_install_starg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + ipv4_do_test_rx 0 "IPv4 mcroute (*,G) ping" +} + +ipv6_mcroute_starg_rx() +{ + adf_install_starg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + ipv6_do_test_rx 0 "IPv6 mcroute (*,G) ping" +} + +ipv4_mcroute_noroute() +{ + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + do_test 4 0 0 "IPv4 mcroute, no route" +} + +ipv6_mcroute_noroute() +{ + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + do_test 6 0 0 "IPv6 mcroute, no route" +} + +ipv4_mcroute_fdb() +{ + adf_install_sg + vx10_create_wait local 192.0.2.100 dev "$IPMR" mcroute + bridge fdb add dev vx10 \ + 00:00:00:00:00:00 self static dst "$GROUP4" via "$IPMR" + do_test 4 10 10 "IPv4 mcroute FDB" +} + +ipv6_mcroute_fdb() +{ + adf_install_sg + vx20_create_wait local 2001:db8:4::1 dev "$IPMR" mcroute + bridge -6 fdb add dev vx20 \ + 00:00:00:00:00:00 self static dst "$GROUP6" via "$IPMR" + do_test 6 10 10 "IPv6 mcroute FDB" +} + +# Use FDB to configure VXLAN in a way where oif=0 for purposes of FIB lookup. +ipv4_mcroute_fdb_oif0() +{ + adf_install_sg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + bridge fdb del dev vx10 00:00:00:00:00:00 + bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" + do_test 4 10 10 "IPv4 mcroute oif=0" +} + +ipv6_mcroute_fdb_oif0() +{ + # The IPv6 tunnel lookup does not fall back to selection by source + # address. Instead it just does a FIB match, and that would find one of + # the several ff00::/8 multicast routes -- each device has one. In order + # to reliably force the $IPMR device, add a /128 route for the + # destination group address. + ip -6 route add table local multicast "$GROUP6/128" dev "$IPMR" + defer ip -6 route del table local multicast "$GROUP6/128" dev "$IPMR" + + adf_install_sg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + bridge -6 fdb del dev vx20 00:00:00:00:00:00 + bridge -6 fdb add dev vx20 00:00:00:00:00:00 self static dst "$GROUP6" + do_test 6 10 10 "IPv6 mcroute oif=0" +} + +# In oif=0 test as above, have FIB lookup resolve to loopback instead of IPMR. +# This doesn't work with IPv6 -- a MC route on lo would be marked as RTF_REJECT. +ipv4_mcroute_fdb_oif0_sep() +{ + adf_install_sg_sep + + ip_addr_add lo 192.0.2.120/28 + vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute + bridge fdb del dev vx10 00:00:00:00:00:00 + bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" + do_test 4 10 10 "IPv4 mcroute TX!=RX oif=0" +} + +ipv4_mcroute_fdb_oif0_sep_rx() +{ + adf_install_sg_sep_rx lo + + ip_addr_add lo 192.0.2.120/28 + vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute + bridge fdb del dev vx10 00:00:00:00:00:00 + bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" + ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX oif=0 ping" +} + +ipv4_mcroute_fdb_sep_rx() +{ + adf_install_sg_sep_rx lo + + ip_addr_add lo 192.0.2.120/28 + vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute + bridge fdb del dev vx10 00:00:00:00:00:00 + bridge fdb add \ + dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" via lo + ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX ping" +} + +ipv6_mcroute_fdb_sep_rx() +{ + adf_install_sg_sep_rx "X$IPMR" + + ip_addr_add "X$IPMR" 2001:db8:5::1/64 + vx20_create_wait local 2001:db8:5::1 group "$GROUP6" dev "$IPMR" mcroute + bridge -6 fdb del dev vx20 00:00:00:00:00:00 + bridge -6 fdb add dev vx20 00:00:00:00:00:00 \ + self static dst "$GROUP6" via "X$IPMR" + ipv6_do_test_rx 0 "IPv6 mcroute TX!=RX ping" +} + +trap cleanup EXIT + +setup_prepare +setup_wait "$NUM_NETIFS" +tests_run + +exit "$EXIT_STATUS" -- cgit v1.2.3 From 68707c079e584f11b3f768f6ac1066a501c650b4 Mon Sep 17 00:00:00 2001 From: Gustavo Luiz Duarte Date: Mon, 16 Jun 2025 10:08:38 -0700 Subject: selftests: netconsole: Add tests for 'msgid' feature in sysdata Extend the self-tests to cover the 'msgid' feature in sysdata. Verify that msgid is appended to the message when the feature is enabled and that it is not appended when the feature is disabled. Signed-off-by: Gustavo Luiz Duarte Reviewed-by: Breno Leitao Signed-off-by: David S. Miller --- .../selftests/drivers/net/netcons_sysdata.sh | 30 ++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/netcons_sysdata.sh b/tools/testing/selftests/drivers/net/netcons_sysdata.sh index a737e377bf08..baf69031089e 100755 --- a/tools/testing/selftests/drivers/net/netcons_sysdata.sh +++ b/tools/testing/selftests/drivers/net/netcons_sysdata.sh @@ -53,6 +53,17 @@ function set_release() { echo 1 > "${NETCONS_PATH}/userdata/release_enabled" } +# Enable the msgid to be appended to sysdata +function set_msgid() { + if [[ ! -f "${NETCONS_PATH}/userdata/msgid_enabled" ]] + then + echo "Not able to enable msgid sysdata append. Configfs not available in ${NETCONS_PATH}/userdata/msgid_enabled" >&2 + exit "${ksft_skip}" + fi + + echo 1 > "${NETCONS_PATH}/userdata/msgid_enabled" +} + # Disable the sysdata cpu_nr feature function unset_cpu_nr() { echo 0 > "${NETCONS_PATH}/userdata/cpu_nr_enabled" @@ -67,6 +78,10 @@ function unset_release() { echo 0 > "${NETCONS_PATH}/userdata/release_enabled" } +function unset_msgid() { + echo 0 > "${NETCONS_PATH}/userdata/msgid_enabled" +} + # Test if MSG contains sysdata function validate_sysdata() { # OUTPUT_FILE will contain something like: @@ -74,6 +89,7 @@ function validate_sysdata() { # userdatakey=userdatavalue # cpu=X # taskname= + # msgid= # Echo is what this test uses to create the message. See runtest() # function @@ -104,6 +120,12 @@ function validate_sysdata() { exit "${ksft_fail}" fi + if ! grep -q "msgid=[0-9]\+$" "${OUTPUT_FILE}"; then + echo "FAIL: 'msgid=' not found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + rm "${OUTPUT_FILE}" pkill_socat } @@ -155,6 +177,12 @@ function validate_no_sysdata() { exit "${ksft_fail}" fi + if grep -q "msgid=" "${OUTPUT_FILE}"; then + echo "FAIL: 'msgid= found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + rm "${OUTPUT_FILE}" } @@ -206,6 +234,7 @@ set_cpu_nr # Enable taskname to be appended to sysdata set_taskname set_release +set_msgid runtest # Make sure the message was received in the dst part # and exit @@ -235,6 +264,7 @@ MSG="Test #3 from CPU${CPU}" unset_cpu_nr unset_taskname unset_release +unset_msgid runtest # At this time, cpu= shouldn't be present in the msg validate_no_sysdata -- cgit v1.2.3 From cd7312a78f36e981939abe1cd1f21d355e083dfe Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 18 Jun 2025 02:31:34 -0700 Subject: selftests/bpf: include limits.h needed for PATH_MAX directly Constant PATH_MAX is used in function unpriv_helpers.c:open_config(). This constant is provided by include file . The dependency was added by commit [1], which does not include directly, relying instead on being included from zlib.h -> zconf.h. As it turns out, this is not the case for all systems, e.g. on Fedora 41 zlib 1.3.1 is used, and there is not included from zconf.h. Hence, there is a compilation error on Fedora 41. [1] commit fc2915bb8bfc ("selftests/bpf: More precise cpu_mitigations state detection") Fixes: fc2915bb8bfc ("selftests/bpf: More precise cpu_mitigations state detection") Signed-off-by: Eduard Zingerman Acked-by: Viktor Malik Link: https://lore.kernel.org/r/20250618093134.3078870-1-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/unpriv_helpers.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/unpriv_helpers.c b/tools/testing/selftests/bpf/unpriv_helpers.c index 3aa9ee80a55e..f997d7ec8fd0 100644 --- a/tools/testing/selftests/bpf/unpriv_helpers.c +++ b/tools/testing/selftests/bpf/unpriv_helpers.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only #include +#include #include #include #include -- cgit v1.2.3 From e72fe8cbd8a0c111542ccaae932ad323c7edaeb3 Mon Sep 17 00:00:00 2001 From: Ankit Chauhan Date: Thu, 1 May 2025 11:33:29 +0530 Subject: selftests/ptrace: Fix spelling mistake "multible" -> "multiple" Fix the spelling error from "multible" to "multiple". Link: https://lore.kernel.org/r/20250501060329.126117-1-ankitchauhan2065@gmail.com Signed-off-by: Ankit Chauhan Reviewed-by: Brigham Campbell Signed-off-by: Shuah Khan --- tools/testing/selftests/ptrace/peeksiginfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/ptrace/peeksiginfo.c b/tools/testing/selftests/ptrace/peeksiginfo.c index a6884f66dc01..2f345d11e4b8 100644 --- a/tools/testing/selftests/ptrace/peeksiginfo.c +++ b/tools/testing/selftests/ptrace/peeksiginfo.c @@ -199,7 +199,7 @@ int main(int argc, char *argv[]) /* * Dump signal from the process-wide queue. - * The number of signals is not multible to the buffer size + * The number of signals is not multiple to the buffer size */ if (check_direct_path(child, 1, 3)) goto out; -- cgit v1.2.3 From 3168276591210d147ed9e6dc267f8a04007593c7 Mon Sep 17 00:00:00 2001 From: David Wei Date: Tue, 17 Jun 2025 14:20:59 -0700 Subject: selftests: netdevsim: improve lib.sh include in peer.sh Fix the peer.sh test to run from INSTALL_PATH. Signed-off-by: David Wei Link: https://patch.msgid.link/20250617212102.175711-2-dw@davidwei.uk Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/netdevsim/peer.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/netdevsim/peer.sh b/tools/testing/selftests/drivers/net/netdevsim/peer.sh index 1bb46ec435d4..7f32b5600925 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/peer.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/peer.sh @@ -1,7 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0-only -source ../../../net/lib.sh +lib_dir=$(dirname $0)/../../../net +source $lib_dir/lib.sh NSIM_DEV_1_ID=$((256 + RANDOM % 256)) NSIM_DEV_1_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_1_ID -- cgit v1.2.3 From c65b5bb2329e36340eba76f05752f8f1eb15bee0 Mon Sep 17 00:00:00 2001 From: David Wei Date: Tue, 17 Jun 2025 14:21:00 -0700 Subject: selftests: net: add passive TFO test binary Add a simple passive TFO server and client test binary. This will be used to test the SO_INCOMING_NAPI_ID of passive TFO accepted sockets. Signed-off-by: David Wei Link: https://patch.msgid.link/20250617212102.175711-3-dw@davidwei.uk Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/.gitignore | 1 + tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/tfo.c | 171 +++++++++++++++++++++++++++++++++ 3 files changed, 173 insertions(+) create mode 100644 tools/testing/selftests/net/tfo.c (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 532bb732bc6d..c6dd2a335cf4 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -50,6 +50,7 @@ tap tcp_fastopen_backup_key tcp_inq tcp_mmap +tfo timestamping tls toeplitz diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index ab996bd22a5f..ab8da438fd78 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -110,6 +110,7 @@ TEST_GEN_PROGS += proc_net_pktgen TEST_PROGS += lwt_dst_cache_ref_loop.sh TEST_PROGS += skf_net_off.sh TEST_GEN_FILES += skf_net_off +TEST_GEN_FILES += tfo # YNL files, must be before "include ..lib.mk" YNL_GEN_FILES := busy_poller netlink-dumps diff --git a/tools/testing/selftests/net/tfo.c b/tools/testing/selftests/net/tfo.c new file mode 100644 index 000000000000..eb3cac5e583c --- /dev/null +++ b/tools/testing/selftests/net/tfo.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int cfg_server; +static int cfg_client; +static int cfg_port = 8000; +static struct sockaddr_in6 cfg_addr; +static char *cfg_outfile; + +static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) +{ + int ret; + + sin6->sin6_family = AF_INET6; + sin6->sin6_port = htons(port); + + ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr); + if (ret != 1) { + /* fallback to plain IPv4 */ + ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]); + if (ret != 1) + return -1; + + /* add ::ffff prefix */ + sin6->sin6_addr.s6_addr32[0] = 0; + sin6->sin6_addr.s6_addr32[1] = 0; + sin6->sin6_addr.s6_addr16[4] = 0; + sin6->sin6_addr.s6_addr16[5] = 0xffff; + } + + return 0; +} + +static void run_server(void) +{ + unsigned long qlen = 32; + int fd, opt, connfd; + socklen_t len; + char buf[64]; + FILE *outfile; + + outfile = fopen(cfg_outfile, "w"); + if (!outfile) + error(1, errno, "fopen() outfile"); + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) + error(1, errno, "socket()"); + + opt = 1; + if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) + error(1, errno, "setsockopt(SO_REUSEADDR)"); + + if (setsockopt(fd, SOL_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) < 0) + error(1, errno, "setsockopt(TCP_FASTOPEN)"); + + if (bind(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)) < 0) + error(1, errno, "bind()"); + + if (listen(fd, 5) < 0) + error(1, errno, "listen()"); + + len = sizeof(cfg_addr); + connfd = accept(fd, (struct sockaddr *)&cfg_addr, &len); + if (connfd < 0) + error(1, errno, "accept()"); + + len = sizeof(opt); + if (getsockopt(connfd, SOL_SOCKET, SO_INCOMING_NAPI_ID, &opt, &len) < 0) + error(1, errno, "getsockopt(SO_INCOMING_NAPI_ID)"); + + read(connfd, buf, 64); + fprintf(outfile, "%d\n", opt); + + fclose(outfile); + close(connfd); + close(fd); +} + +static void run_client(void) +{ + int fd; + char *msg = "Hello, world!"; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) + error(1, errno, "socket()"); + + sendto(fd, msg, strlen(msg), MSG_FASTOPEN, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)); + + close(fd); +} + +static void usage(const char *filepath) +{ + error(1, 0, "Usage: %s (-s|-c) -h -p -o ", filepath); +} + +static void parse_opts(int argc, char **argv) +{ + struct sockaddr_in6 *addr6 = (void *) &cfg_addr; + char *addr = NULL; + int ret; + int c; + + if (argc <= 1) + usage(argv[0]); + + while ((c = getopt(argc, argv, "sch:p:o:")) != -1) { + switch (c) { + case 's': + if (cfg_client) + error(1, 0, "Pass one of -s or -c"); + cfg_server = 1; + break; + case 'c': + if (cfg_server) + error(1, 0, "Pass one of -s or -c"); + cfg_client = 1; + break; + case 'h': + addr = optarg; + break; + case 'p': + cfg_port = strtoul(optarg, NULL, 0); + break; + case 'o': + cfg_outfile = strdup(optarg); + if (!cfg_outfile) + error(1, 0, "outfile invalid"); + break; + } + } + + if (cfg_server && addr) + error(1, 0, "Server cannot have -h specified"); + + memset(addr6, 0, sizeof(*addr6)); + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + addr6->sin6_addr = in6addr_any; + if (addr) { + ret = parse_address(addr, cfg_port, addr6); + if (ret) + error(1, 0, "Client address parse error: %s", addr); + } +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + + if (cfg_server) + run_server(); + else if (cfg_client) + run_client(); + + return 0; +} -- cgit v1.2.3 From 137e7b5cceda2fd634ff47fde6c4226092d09442 Mon Sep 17 00:00:00 2001 From: David Wei Date: Tue, 17 Jun 2025 14:21:01 -0700 Subject: selftests: net: add test for passive TFO socket NAPI ID Add a test that checks that the NAPI ID of a passive TFO socket is valid i.e. not zero. Signed-off-by: David Wei Link: https://patch.msgid.link/20250617212102.175711-4-dw@davidwei.uk Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/tfo_passive.sh | 112 +++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100755 tools/testing/selftests/net/tfo_passive.sh (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index ab8da438fd78..332f387615d7 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -111,6 +111,7 @@ TEST_PROGS += lwt_dst_cache_ref_loop.sh TEST_PROGS += skf_net_off.sh TEST_GEN_FILES += skf_net_off TEST_GEN_FILES += tfo +TEST_PROGS += tfo_passive.sh # YNL files, must be before "include ..lib.mk" YNL_GEN_FILES := busy_poller netlink-dumps diff --git a/tools/testing/selftests/net/tfo_passive.sh b/tools/testing/selftests/net/tfo_passive.sh new file mode 100755 index 000000000000..80bf11fdc046 --- /dev/null +++ b/tools/testing/selftests/net/tfo_passive.sh @@ -0,0 +1,112 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +source lib.sh + +NSIM_SV_ID=$((256 + RANDOM % 256)) +NSIM_SV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_SV_ID +NSIM_CL_ID=$((512 + RANDOM % 256)) +NSIM_CL_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_CL_ID + +NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device +NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device +NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device +NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device + +SERVER_IP=192.168.1.1 +CLIENT_IP=192.168.1.2 +SERVER_PORT=48675 + +setup_ns() +{ + set -e + ip netns add nssv + ip netns add nscl + + NSIM_SV_NAME=$(find $NSIM_SV_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_SV_SYS/net -exec basename {} \;) + NSIM_CL_NAME=$(find $NSIM_CL_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_CL_SYS/net -exec basename {} \;) + + ip link set $NSIM_SV_NAME netns nssv + ip link set $NSIM_CL_NAME netns nscl + + ip netns exec nssv ip addr add "${SERVER_IP}/24" dev $NSIM_SV_NAME + ip netns exec nscl ip addr add "${CLIENT_IP}/24" dev $NSIM_CL_NAME + + ip netns exec nssv ip link set dev $NSIM_SV_NAME up + ip netns exec nscl ip link set dev $NSIM_CL_NAME up + + # Enable passive TFO + ip netns exec nssv sysctl -w net.ipv4.tcp_fastopen=519 > /dev/null + + set +e +} + +cleanup_ns() +{ + ip netns del nscl + ip netns del nssv +} + +### +### Code start +### + +modprobe netdevsim + +# linking + +echo $NSIM_SV_ID > $NSIM_DEV_SYS_NEW +echo $NSIM_CL_ID > $NSIM_DEV_SYS_NEW +udevadm settle + +setup_ns + +NSIM_SV_FD=$((256 + RANDOM % 256)) +exec {NSIM_SV_FD} \ + $NSIM_DEV_SYS_LINK + +if [ $? -ne 0 ]; then + echo "linking netdevsim1 with netdevsim2 should succeed" + cleanup_ns + exit 1 +fi + +out_file=$(mktemp) + +timeout -k 1s 30s ip netns exec nssv ./tfo \ + -s \ + -p ${SERVER_PORT} \ + -o ${out_file}& + +wait_local_port_listen nssv ${SERVER_PORT} tcp + +ip netns exec nscl ./tfo -c -h ${SERVER_IP} -p ${SERVER_PORT} + +wait + +res=$(cat $out_file) +rm $out_file + +if [ $res -eq 0 ]; then + echo "got invalid NAPI ID from passive TFO socket" + cleanup_ns + exit 1 +fi + +echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK + +echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL + +cleanup_ns + +modprobe -r netdevsim + +exit 0 -- cgit v1.2.3 From d4adf1c9ee7722545450608bcb095fb31512f0c6 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 18 Jun 2025 17:57:40 -0400 Subject: bpf: Adjust free target to avoid global starvation of LRU map BPF_MAP_TYPE_LRU_HASH can recycle most recent elements well before the map is full, due to percpu reservations and force shrink before neighbor stealing. Once a CPU is unable to borrow from the global map, it will once steal one elem from a neighbor and after that each time flush this one element to the global list and immediately recycle it. Batch value LOCAL_FREE_TARGET (128) will exhaust a 10K element map with 79 CPUs. CPU 79 will observe this behavior even while its neighbors hold 78 * 127 + 1 * 15 == 9921 free elements (99%). CPUs need not be active concurrently. The issue can appear with affinity migration, e.g., irqbalance. Each CPU can reserve and then hold onto its 128 elements indefinitely. Avoid global list exhaustion by limiting aggregate percpu caches to half of map size, by adjusting LOCAL_FREE_TARGET based on cpu count. This change has no effect on sufficiently large tables. Similar to LOCAL_NR_SCANS and lru->nr_scans, introduce a map variable lru->free_target. The extra field fits in a hole in struct bpf_lru. The cacheline is already warm where read in the hot path. The field is only accessed with the lru lock held. Tested-by: Anton Protopopov Signed-off-by: Willem de Bruijn Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20250618215803.3587312-1-willemdebruijn.kernel@gmail.com Signed-off-by: Alexei Starovoitov --- Documentation/bpf/map_hash.rst | 8 +++- Documentation/bpf/map_lru_hash_update.dot | 6 +-- kernel/bpf/bpf_lru_list.c | 9 ++-- kernel/bpf/bpf_lru_list.h | 1 + tools/testing/selftests/bpf/test_lru_map.c | 72 +++++++++++++++--------------- 5 files changed, 52 insertions(+), 44 deletions(-) (limited to 'tools/testing/selftests') diff --git a/Documentation/bpf/map_hash.rst b/Documentation/bpf/map_hash.rst index d2343952f2cb..8606bf958a8c 100644 --- a/Documentation/bpf/map_hash.rst +++ b/Documentation/bpf/map_hash.rst @@ -233,10 +233,16 @@ attempts in order to enforce the LRU property which have increasing impacts on other CPUs involved in the following operation attempts: - Attempt to use CPU-local state to batch operations -- Attempt to fetch free nodes from global lists +- Attempt to fetch ``target_free`` free nodes from global lists - Attempt to pull any node from a global list and remove it from the hashmap - Attempt to pull any node from any CPU's list and remove it from the hashmap +The number of nodes to borrow from the global list in a batch, ``target_free``, +depends on the size of the map. Larger batch size reduces lock contention, but +may also exhaust the global structure. The value is computed at map init to +avoid exhaustion, by limiting aggregate reservation by all CPUs to half the map +size. With a minimum of a single element and maximum budget of 128 at a time. + This algorithm is described visually in the following diagram. See the description in commit 3a08c2fd7634 ("bpf: LRU List") for a full explanation of the corresponding operations: diff --git a/Documentation/bpf/map_lru_hash_update.dot b/Documentation/bpf/map_lru_hash_update.dot index a0fee349d29c..ab10058f5b79 100644 --- a/Documentation/bpf/map_lru_hash_update.dot +++ b/Documentation/bpf/map_lru_hash_update.dot @@ -35,18 +35,18 @@ digraph { fn_bpf_lru_list_pop_free_to_local [shape=rectangle,fillcolor=2, label="Flush local pending, Rotate Global list, move - LOCAL_FREE_TARGET + target_free from global -> local"] // Also corresponds to: // fn__local_list_flush() // fn_bpf_lru_list_rotate() fn___bpf_lru_node_move_to_free[shape=diamond,fillcolor=2, - label="Able to free\nLOCAL_FREE_TARGET\nnodes?"] + label="Able to free\ntarget_free\nnodes?"] fn___bpf_lru_list_shrink_inactive [shape=rectangle,fillcolor=3, label="Shrink inactive list up to remaining - LOCAL_FREE_TARGET + target_free (global LRU -> local)"] fn___bpf_lru_list_shrink [shape=diamond,fillcolor=2, label="> 0 entries in\nlocal free list?"] diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c index 3dabdd137d10..2d6e1c98d8ad 100644 --- a/kernel/bpf/bpf_lru_list.c +++ b/kernel/bpf/bpf_lru_list.c @@ -337,12 +337,12 @@ static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru, list) { __bpf_lru_node_move_to_free(l, node, local_free_list(loc_l), BPF_LRU_LOCAL_LIST_T_FREE); - if (++nfree == LOCAL_FREE_TARGET) + if (++nfree == lru->target_free) break; } - if (nfree < LOCAL_FREE_TARGET) - __bpf_lru_list_shrink(lru, l, LOCAL_FREE_TARGET - nfree, + if (nfree < lru->target_free) + __bpf_lru_list_shrink(lru, l, lru->target_free - nfree, local_free_list(loc_l), BPF_LRU_LOCAL_LIST_T_FREE); @@ -577,6 +577,9 @@ static void bpf_common_lru_populate(struct bpf_lru *lru, void *buf, list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]); buf += elem_size; } + + lru->target_free = clamp((nr_elems / num_possible_cpus()) / 2, + 1, LOCAL_FREE_TARGET); } static void bpf_percpu_lru_populate(struct bpf_lru *lru, void *buf, diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h index cbd8d3720c2b..fe2661a58ea9 100644 --- a/kernel/bpf/bpf_lru_list.h +++ b/kernel/bpf/bpf_lru_list.h @@ -58,6 +58,7 @@ struct bpf_lru { del_from_htab_func del_from_htab; void *del_arg; unsigned int hash_offset; + unsigned int target_free; unsigned int nr_scans; bool percpu; }; diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c index fda7589c5023..4ae83f4b7fc7 100644 --- a/tools/testing/selftests/bpf/test_lru_map.c +++ b/tools/testing/selftests/bpf/test_lru_map.c @@ -138,6 +138,12 @@ static int sched_next_online(int pid, int *next_to_try) return ret; } +/* Inverse of how bpf_common_lru_populate derives target_free from map_size. */ +static unsigned int __map_size(unsigned int tgt_free) +{ + return tgt_free * nr_cpus * 2; +} + /* Size of the LRU map is 2 * Add key=1 (+1 key) * Add key=2 (+1 key) @@ -231,11 +237,11 @@ static void test_lru_sanity0(int map_type, int map_flags) printf("Pass\n"); } -/* Size of the LRU map is 1.5*tgt_free - * Insert 1 to tgt_free (+tgt_free keys) - * Lookup 1 to tgt_free/2 - * Insert 1+tgt_free to 2*tgt_free (+tgt_free keys) - * => 1+tgt_free/2 to LOCALFREE_TARGET will be removed by LRU +/* Verify that unreferenced elements are recycled before referenced ones. + * Insert elements. + * Reference a subset of these. + * Insert more, enough to trigger recycling. + * Verify that unreferenced are recycled. */ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) { @@ -257,7 +263,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) batch_size = tgt_free / 2; assert(batch_size * 2 == tgt_free); - map_size = tgt_free + batch_size; + map_size = __map_size(tgt_free) + batch_size; lru_map_fd = create_map(map_type, map_flags, map_size); assert(lru_map_fd != -1); @@ -266,13 +272,13 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) value[0] = 1234; - /* Insert 1 to tgt_free (+tgt_free keys) */ - end_key = 1 + tgt_free; + /* Insert map_size - batch_size keys */ + end_key = 1 + __map_size(tgt_free); for (key = 1; key < end_key; key++) assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); - /* Lookup 1 to tgt_free/2 */ + /* Lookup 1 to batch_size */ end_key = 1 + batch_size; for (key = 1; key < end_key; key++) { assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); @@ -280,12 +286,13 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) BPF_NOEXIST)); } - /* Insert 1+tgt_free to 2*tgt_free - * => 1+tgt_free/2 to LOCALFREE_TARGET will be + /* Insert another map_size - batch_size keys + * Map will contain 1 to batch_size plus these latest, i.e., + * => previous 1+batch_size to map_size - batch_size will have been * removed by LRU */ - key = 1 + tgt_free; - end_key = key + tgt_free; + key = 1 + __map_size(tgt_free); + end_key = key + __map_size(tgt_free); for (; key < end_key; key++) { assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); @@ -301,17 +308,8 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) printf("Pass\n"); } -/* Size of the LRU map 1.5 * tgt_free - * Insert 1 to tgt_free (+tgt_free keys) - * Update 1 to tgt_free/2 - * => The original 1 to tgt_free/2 will be removed due to - * the LRU shrink process - * Re-insert 1 to tgt_free/2 again and do a lookup immeidately - * Insert 1+tgt_free to tgt_free*3/2 - * Insert 1+tgt_free*3/2 to tgt_free*5/2 - * => Key 1+tgt_free to tgt_free*3/2 - * will be removed from LRU because it has never - * been lookup and ref bit is not set +/* Verify that insertions exceeding map size will recycle the oldest. + * Verify that unreferenced elements are recycled before referenced. */ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) { @@ -334,7 +332,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) batch_size = tgt_free / 2; assert(batch_size * 2 == tgt_free); - map_size = tgt_free + batch_size; + map_size = __map_size(tgt_free) + batch_size; lru_map_fd = create_map(map_type, map_flags, map_size); assert(lru_map_fd != -1); @@ -343,8 +341,8 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) value[0] = 1234; - /* Insert 1 to tgt_free (+tgt_free keys) */ - end_key = 1 + tgt_free; + /* Insert map_size - batch_size keys */ + end_key = 1 + __map_size(tgt_free); for (key = 1; key < end_key; key++) assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); @@ -357,8 +355,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) * shrink the inactive list to get tgt_free * number of free nodes. * - * Hence, the oldest key 1 to tgt_free/2 - * are removed from the LRU list. + * Hence, the oldest key is removed from the LRU list. */ key = 1; if (map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { @@ -370,8 +367,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) BPF_EXIST)); } - /* Re-insert 1 to tgt_free/2 again and do a lookup - * immeidately. + /* Re-insert 1 to batch_size again and do a lookup immediately. */ end_key = 1 + batch_size; value[0] = 4321; @@ -387,17 +383,18 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) value[0] = 1234; - /* Insert 1+tgt_free to tgt_free*3/2 */ - end_key = 1 + tgt_free + batch_size; - for (key = 1 + tgt_free; key < end_key; key++) + /* Insert batch_size new elements */ + key = 1 + __map_size(tgt_free); + end_key = key + batch_size; + for (; key < end_key; key++) /* These newly added but not referenced keys will be * gone during the next LRU shrink. */ assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); - /* Insert 1+tgt_free*3/2 to tgt_free*5/2 */ - end_key = key + tgt_free; + /* Insert map_size - batch_size elements */ + end_key += __map_size(tgt_free); for (; key < end_key; key++) { assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); @@ -500,7 +497,8 @@ static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free) lru_map_fd = create_map(map_type, map_flags, 3 * tgt_free * nr_cpus); else - lru_map_fd = create_map(map_type, map_flags, 3 * tgt_free); + lru_map_fd = create_map(map_type, map_flags, + 3 * __map_size(tgt_free)); assert(lru_map_fd != -1); expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, -- cgit v1.2.3 From e054c8ba3bce6b65aa81a894ad70a68fa34636a5 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 16 Jun 2025 09:21:16 -0700 Subject: netdevsim: remove udp_ports_sleep Now that there is only one path in udp_tunnel, there is no need to have udp_ports_sleep knob. Remove it and adjust the test. Cc: Michael Chan Reviewed-by: Aleksandr Loktionov Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250616162117.287806-6-stfomichev@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/netdevsim.h | 2 -- drivers/net/netdevsim/udp_tunnels.c | 8 -------- .../drivers/net/netdevsim/udp_tunnel_nic.sh | 23 +--------------------- 3 files changed, 1 insertion(+), 32 deletions(-) (limited to 'tools/testing/selftests') diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index d04401f0bdf7..511ed72a93ce 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -131,7 +131,6 @@ struct netdevsim { struct nsim_macsec macsec; struct { u32 inject_error; - u32 sleep; u32 __ports[2][NSIM_UDP_TUNNEL_N_PORTS]; u32 (*ports)[NSIM_UDP_TUNNEL_N_PORTS]; struct dentry *ddir; @@ -342,7 +341,6 @@ struct nsim_dev { bool ipv4_only; bool shared; bool static_iana_vxlan; - u32 sleep; } udp_ports; struct nsim_dev_psample *psample; u16 esw_mode; diff --git a/drivers/net/netdevsim/udp_tunnels.c b/drivers/net/netdevsim/udp_tunnels.c index 10cbbf1c584b..89fff76e51cf 100644 --- a/drivers/net/netdevsim/udp_tunnels.c +++ b/drivers/net/netdevsim/udp_tunnels.c @@ -18,9 +18,6 @@ nsim_udp_tunnel_set_port(struct net_device *dev, unsigned int table, ret = -ns->udp_ports.inject_error; ns->udp_ports.inject_error = 0; - if (ns->udp_ports.sleep) - msleep(ns->udp_ports.sleep); - if (!ret) { if (ns->udp_ports.ports[table][entry]) { WARN(1, "entry already in use\n"); @@ -47,8 +44,6 @@ nsim_udp_tunnel_unset_port(struct net_device *dev, unsigned int table, ret = -ns->udp_ports.inject_error; ns->udp_ports.inject_error = 0; - if (ns->udp_ports.sleep) - msleep(ns->udp_ports.sleep); if (!ret) { u32 val = be16_to_cpu(ti->port) << 16 | ti->type; @@ -170,7 +165,6 @@ int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev, GFP_KERNEL); if (!info) return -ENOMEM; - ns->udp_ports.sleep = nsim_dev->udp_ports.sleep; if (nsim_dev->udp_ports.sync_all) { info->set_port = NULL; @@ -213,6 +207,4 @@ void nsim_udp_tunnels_debugfs_create(struct nsim_dev *nsim_dev) &nsim_dev->udp_ports.shared); debugfs_create_bool("udp_ports_static_iana_vxlan", 0600, nsim_dev->ddir, &nsim_dev->udp_ports.static_iana_vxlan); - debugfs_create_u32("udp_ports_sleep", 0600, nsim_dev->ddir, - &nsim_dev->udp_ports.sleep); } diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh index 92c2f0376c08..4c859ecdad94 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh @@ -266,7 +266,6 @@ for port in 0 1; do echo $NSIM_ID > /sys/bus/netdevsim/new_device else echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep echo 1 > $NSIM_DEV_SYS/new_port fi NSIM_NETDEV=`get_netdev_name old_netdevs` @@ -350,23 +349,11 @@ old_netdevs=$(ls /sys/class/net) port=0 echo $NSIM_ID > /sys/bus/netdevsim/new_device echo 0 > $NSIM_DEV_SYS/del_port -echo 1000 > $NSIM_DEV_DFS/udp_ports_sleep echo 0 > $NSIM_DEV_SYS/new_port NSIM_NETDEV=`get_netdev_name old_netdevs` msg="create VxLANs" -exp0=( 0 0 0 0 ) # sleep is longer than out wait -new_vxlan vxlan0 10000 $NSIM_NETDEV - -modprobe -r vxlan -modprobe -r udp_tunnel - -msg="remove tunnels" -exp0=( 0 0 0 0 ) -check_tables - -msg="create VxLANs" -exp0=( 0 0 0 0 ) # sleep is longer than out wait +exp0=( `mke 10000 1` 0 0 0 ) new_vxlan vxlan0 10000 $NSIM_NETDEV exp0=( 0 0 0 0 ) @@ -428,7 +415,6 @@ echo 0 > $NSIM_DEV_SYS/del_port for port in 0 1; do if [ $port -ne 0 ]; then echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep fi echo $port > $NSIM_DEV_SYS/new_port @@ -486,7 +472,6 @@ echo 1 > $NSIM_DEV_DFS/udp_ports_sync_all for port in 0 1; do if [ $port -ne 0 ]; then echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep fi echo $port > $NSIM_DEV_SYS/new_port @@ -543,7 +528,6 @@ echo 0 > $NSIM_DEV_SYS/del_port for port in 0 1; do if [ $port -ne 0 ]; then echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep fi echo $port > $NSIM_DEV_SYS/new_port @@ -573,7 +557,6 @@ echo 1 > $NSIM_DEV_DFS/udp_ports_ipv4_only for port in 0 1; do if [ $port -ne 0 ]; then echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep fi echo $port > $NSIM_DEV_SYS/new_port @@ -634,7 +617,6 @@ echo 0 > $NSIM_DEV_SYS/del_port for port in 0 1; do if [ $port -ne 0 ]; then echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep fi echo $port > $NSIM_DEV_SYS/new_port @@ -690,7 +672,6 @@ echo 0 > $NSIM_DEV_SYS/del_port for port in 0 1; do if [ $port -ne 0 ]; then echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep fi echo $port > $NSIM_DEV_SYS/new_port @@ -750,7 +731,6 @@ echo 0 > $NSIM_DEV_SYS/del_port for port in 0 1; do if [ $port -ne 0 ]; then echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep fi echo $port > $NSIM_DEV_SYS/new_port @@ -809,7 +789,6 @@ echo $NSIM_ID > /sys/bus/netdevsim/new_device echo 0 > $NSIM_DEV_SYS/del_port echo 0 > $NSIM_DEV_DFS/udp_ports_open_only -echo 1 > $NSIM_DEV_DFS/udp_ports_sleep echo 1 > $NSIM_DEV_DFS/udp_ports_shared old_netdevs=$(ls /sys/class/net) -- cgit v1.2.3 From 56a14984505b11674df5e01407748236bc4bc8f8 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Sun, 8 Jun 2025 17:54:02 +0800 Subject: KVM: arm64: selftests: Close the GIC FD in arch_timer_edge_cases Close the GIC FD to free the reference it holds to the VM so that we can correctly clean up the VM. This also gets rid of the "KVM: debugfs: duplicate directory 395722-4" warning when running arch_timer_edge_cases. Signed-off-by: Zenghui Yu Reviewed-by: Miguel Luis Reviewed-by: Sebastian Ott Link: https://lore.kernel.org/r/20250608095402.1131-1-yuzenghui@huawei.com Signed-off-by: Marc Zyngier --- .../testing/selftests/kvm/arm64/arch_timer_edge_cases.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c index b4d22b3ab7cc..4e71740a098b 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c @@ -954,6 +954,8 @@ static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu) pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq); } +static int gic_fd; + static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu, enum arch_timer timer) { @@ -968,12 +970,20 @@ static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu, vcpu_args_set(*vcpu, 1, timer); test_init_timer_irq(*vm, *vcpu); - vgic_v3_setup(*vm, 1, 64); + gic_fd = vgic_v3_setup(*vm, 1, 64); + __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3"); + sync_global_to_guest(*vm, test_args); sync_global_to_guest(*vm, CVAL_MAX); sync_global_to_guest(*vm, DEF_CNT); } +static void test_vm_cleanup(struct kvm_vm *vm) +{ + close(gic_fd); + kvm_vm_free(vm); +} + static void test_print_help(char *name) { pr_info("Usage: %s [-h] [-b] [-i iterations] [-l long_wait_ms] [-p] [-v]\n" @@ -1060,13 +1070,13 @@ int main(int argc, char *argv[]) if (test_args.test_virtual) { test_vm_create(&vm, &vcpu, VIRTUAL); test_run(vm, vcpu); - kvm_vm_free(vm); + test_vm_cleanup(vm); } if (test_args.test_physical) { test_vm_create(&vm, &vcpu, PHYSICAL); test_run(vm, vcpu); - kvm_vm_free(vm); + test_vm_cleanup(vm); } return 0; -- cgit v1.2.3 From 94a7ce26428d3a7ceb46c503ed726160578b9fcc Mon Sep 17 00:00:00 2001 From: Mickaël Salaün Date: Wed, 28 May 2025 16:44:25 +0200 Subject: selftests/landlock: Fix readlink check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The audit_init_filter_exe() helper incorrectly checks the readlink(2) error because an unsigned integer is used to store the result. Use a signed integer for this check. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/aDbFwyZ_fM-IO7sC@stanley.mountain Fixes: 6a500b22971c ("selftests/landlock: Add tests for audit flags and domain IDs") Reviewed-by: Günther Noack Link: https://lore.kernel.org/r/20250528144426.1709063-1-mic@digikod.net Signed-off-by: Mickaël Salaün --- tools/testing/selftests/landlock/audit.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/landlock/audit.h b/tools/testing/selftests/landlock/audit.h index 18a6014920b5..b16986aa6442 100644 --- a/tools/testing/selftests/landlock/audit.h +++ b/tools/testing/selftests/landlock/audit.h @@ -403,11 +403,12 @@ static int audit_init_filter_exe(struct audit_filter *filter, const char *path) /* It is assume that there is not already filtering rules. */ filter->record_type = AUDIT_EXE; if (!path) { - filter->exe_len = readlink("/proc/self/exe", filter->exe, - sizeof(filter->exe) - 1); - if (filter->exe_len < 0) + int ret = readlink("/proc/self/exe", filter->exe, + sizeof(filter->exe) - 1); + if (ret < 0) return -errno; + filter->exe_len = ret; return 0; } -- cgit v1.2.3 From dc58130bc38f09b162aa3b216f8b8f1e0a56127b Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 5 Jun 2025 14:44:16 -0700 Subject: selftests/landlock: Fix build of audit_test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are hitting build error on CentOS 9: audit_test.c:232:40: error: ‘O_CLOEXEC’ undeclared (...) Fix this by including fcntl.h. Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20250605214416.1885878-1-song@kernel.org Fixes: 6b4566400a29 ("selftests/landlock: Add PID tests for audit records") Signed-off-by: Mickaël Salaün --- tools/testing/selftests/landlock/audit_test.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c index cfc571afd0eb..46d02d49835a 100644 --- a/tools/testing/selftests/landlock/audit_test.c +++ b/tools/testing/selftests/landlock/audit_test.c @@ -7,6 +7,7 @@ #define _GNU_SOURCE #include +#include #include #include #include -- cgit v1.2.3 From dae01387e6a9bbce31f6c62839733ab8b63e16f3 Mon Sep 17 00:00:00 2001 From: Mickaël Salaün Date: Fri, 6 Jun 2025 13:08:09 +0200 Subject: selftests/landlock: Add test to check rule tied to covered mount point MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This test checks that a rule on a directory used as a mount point does not grant access to the mount covering it. It is a generalization of the bind mount case in layout3_fs.hostfs.release_inodes [1] that tests hidden mount points. Cc: Günther Noack Cc: Song Liu Cc: Tingmao Wang Link: https://lore.kernel.org/r/20250606.zo5aekae6Da6@digikod.net [1] Link: https://lore.kernel.org/r/20250606110811.211297-1-mic@digikod.net Signed-off-by: Mickaël Salaün --- tools/testing/selftests/landlock/fs_test.c | 40 ++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index 73729382d40f..fa0f18ec62c4 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -1832,6 +1832,46 @@ TEST_F_FORK(layout1, release_inodes) ASSERT_EQ(ENOENT, test_open(dir_s3d3, O_RDONLY)); } +/* + * This test checks that a rule on a directory used as a mount point does not + * grant access to the mount covering it. It is a generalization of the bind + * mount case in layout3_fs.hostfs.release_inodes that tests hidden mount points. + */ +TEST_F_FORK(layout1, covered_rule) +{ + const struct rule layer1[] = { + { + .path = dir_s3d2, + .access = LANDLOCK_ACCESS_FS_READ_DIR, + }, + {}, + }; + int ruleset_fd; + + /* Unmount to simplify FIXTURE_TEARDOWN. */ + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(0, umount(dir_s3d2)); + clear_cap(_metadata, CAP_SYS_ADMIN); + + /* Creates a ruleset with the future hidden directory. */ + ruleset_fd = + create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_DIR, layer1); + ASSERT_LE(0, ruleset_fd); + + /* Covers with a new mount point. */ + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(0, mount_opt(&mnt_tmp, dir_s3d2)); + clear_cap(_metadata, CAP_SYS_ADMIN); + + ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY)); + + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks that access to the new mount point is denied. */ + ASSERT_EQ(EACCES, test_open(dir_s3d2, O_RDONLY)); +} + enum relative_access { REL_OPEN, REL_CHDIR, -- cgit v1.2.3 From d83a5806759278c4898cd7c2116432e5b08df1df Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 17 Jun 2025 10:50:59 +0000 Subject: selftests: net: use slowwait to stabilize vrf_route_leaking test The vrf_route_leaking test occasionally fails due to connectivity issues in our testing environment. A sample failure message shows that the ping check fails intermittently PING 2001:db8:16:2::2 (2001:db8:16:2::2) 56 data bytes --- 2001:db8:16:2::2 ping statistics --- 1 packets transmitted, 0 received, 100% packet loss, time 0ms TEST: Basic IPv6 connectivity [FAIL] This is likely due to insufficient wait time on slower machines. To address this, switch to using slowwait, which provides a longer and more reliable wait for setup completion. Before this change, the test failed 3 out of 10 times. After applying this fix, the test was run 30 times without any failure. Signed-off-by: Hangbin Liu Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250617105101.433718-2-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/vrf_route_leaking.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh index e9c2f71da207..ce34cb2e6e0b 100755 --- a/tools/testing/selftests/net/vrf_route_leaking.sh +++ b/tools/testing/selftests/net/vrf_route_leaking.sh @@ -275,7 +275,7 @@ setup_sym() # Wait for ip config to settle - sleep 2 + slowwait 5 ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1 } setup_asym() @@ -370,7 +370,7 @@ setup_asym() ip -netns $r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad # Wait for ip config to settle - sleep 2 + slowwait 5 ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1 } check_connectivity() -- cgit v1.2.3 From 948670361c0c189556f3d97c5b2e6ed7ae198da9 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 17 Jun 2025 10:51:00 +0000 Subject: selftests: net: use slowwait to make sure IPv6 setup finished Sometimes the vxlan vnifiltering test failed on slow machines due to network setup not finished. e.g. TEST: VM connectivity over vnifiltering vxlan (ipv4 default rdst) [ OK ] TEST: VM connectivity over vnifiltering vxlan (ipv6 default rdst) [FAIL] Let's use slowwait to make sure the connection is finished. Signed-off-by: Hangbin Liu Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250617105101.433718-3-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/test_vxlan_vnifiltering.sh | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh index 6127a78ee988..8deacc565afa 100755 --- a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh +++ b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh @@ -146,18 +146,17 @@ run_cmd() } check_hv_connectivity() { - ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null - sleep 1 - ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null + slowwait 5 ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null + slowwait 5 ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null return $? } check_vm_connectivity() { - run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12" + slowwait 5 run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12" log_test $? 0 "VM connectivity over $1 (ipv4 default rdst)" - run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22" + slowwait 5 run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22" log_test $? 0 "VM connectivity over $1 (ipv6 default rdst)" } -- cgit v1.2.3 From 965f87700adbcc6d72430f524d88135027f5bba3 Mon Sep 17 00:00:00 2001 From: Shivank Garg Date: Mon, 9 Jun 2025 12:06:07 +0000 Subject: selftests/mm: increase timeout from 180 to 900 seconds The mm selftests are timing out with the current 180-second limit. Testing shows that run_vmtests.sh takes approximately 11 minutes (664 seconds) to complete. Increase the timeout to 900 seconds (15 minutes) to provide sufficient buffer for the tests to complete successfully. Link: https://lkml.kernel.org/r/20250609120606.73145-2-shivankg@amd.com Signed-off-by: Shivank Garg Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/settings | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/settings b/tools/testing/selftests/mm/settings index a953c96aa16e..e2206265f67c 100644 --- a/tools/testing/selftests/mm/settings +++ b/tools/testing/selftests/mm/settings @@ -1 +1 @@ -timeout=180 +timeout=900 -- cgit v1.2.3 From 223731cd63004cb07edfc6257d53565995c00cbb Mon Sep 17 00:00:00 2001 From: Dev Jain Date: Fri, 13 Jun 2025 09:19:12 +0530 Subject: selftests/mm: add configs to fix testcase failure If CONFIG_UPROBES is not set, a merge subtest fails: Failure log: 7151 12:46:54.627936 # # # RUN merge.handle_uprobe_upon_merged_vma ... 7152 12:46:54.639014 # # f /sys/bus/event_source/devices/uprobe/type 7153 12:46:54.639306 # # fopen: No such file or directory 7154 12:46:54.650451 # # # merge.c:473:handle_uprobe_upon_merged_vma:Expected read_sysfs("/sys/bus/event_source/devices/uprobe/type", &type) (1) == 0 (0) 7155 12:46:54.650730 # # # handle_uprobe_upon_merged_vma: Test terminated by assertion 7156 12:46:54.661750 # # # FAIL merge.handle_uprobe_upon_merged_vma 7157 12:46:54.662030 # # not ok 8 merge.handle_uprobe_upon_merged_vma CONFIG_UPROBES is enabled by CONFIG_UPROBE_EVENTS, which gets enabled by CONFIG_FTRACE. Therefore add these configs to selftests/mm/config so that CI systems can include this config in the kernel build. To be completely safe, add CONFIG_PROFILING too, to enable the dependency chain PROFILING -> PERF_EVENTS -> UPROBE_EVENTS -> UPROBES. Link: https://lkml.kernel.org/r/20250613034912.53791-1-dev.jain@arm.com Fixes: efe99fabeb11 ("selftests/mm: add test about uprobe pte be orphan during vma merge") Signed-off-by: Dev Jain Reported-by: Aishwarya Closes: https://lore.kernel.org/all/20250610103729.72440-1-aishwarya.tcv@arm.com/ Tested-by: Aishwarya TCV Tested-by : Donet Tom Reviewed-by: Lorenzo Stoakes Reviewed-by: Anshuman Khandual Reviewed-by: Mark Brown Reviewed-by: Donet Tom Reviewed-by: Pedro Falcato Cc: Jann Horn Cc: Liam Howlett Cc: Pu Lehui Cc: Ryan Roberts Cc: Shuah Khan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/config | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/config b/tools/testing/selftests/mm/config index a28baa536332..deba93379c80 100644 --- a/tools/testing/selftests/mm/config +++ b/tools/testing/selftests/mm/config @@ -8,3 +8,6 @@ CONFIG_GUP_TEST=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_MEM_SOFT_DIRTY=y CONFIG_ANON_VMA_NAME=y +CONFIG_FTRACE=y +CONFIG_PROFILING=y +CONFIG_UPROBES=y -- cgit v1.2.3 From 3333871296efd52ef6f6d5cce5a92dc7b06ba879 Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Tue, 10 Jun 2025 13:22:09 +0100 Subject: selftests/mm: skip uprobe vma merge test if uprobes are not enabled If uprobes are not enabled, the test currently fails with: 7151 12:46:54.627936 # # # RUN merge.handle_uprobe_upon_merged_vma ... 7152 12:46:54.639014 # # f /sys/bus/event_source/devices/uprobe/type 7153 12:46:54.639306 # # fopen: No such file or directory 7154 12:46:54.650451 # # # merge.c:473:handle_uprobe_upon_merged_vma:Expected read_sysfs("/sys/bus/event_source/devices/uprobe/type", &type) (1) == 0 (0) 7155 12:46:54.650730 # # # handle_uprobe_upon_merged_vma: Test terminated by assertion 7156 12:46:54.661750 # # # FAIL merge.handle_uprobe_upon_merged_vma 7157 12:46:54.662030 # # not ok 8 merge.handle_uprobe_upon_merged_vma Skipping is a more sane and friendly behavior here. Link: https://lkml.kernel.org/r/20250610122209.3177587-1-pfalcato@suse.de Fixes: efe99fabeb11 ("selftests/mm: add test about uprobe pte be orphan during vma merge") Signed-off-by: Pedro Falcato Reported-by: Aishwarya Closes: https://lore.kernel.org/linux-mm/20250610103729.72440-1-aishwarya.tcv@arm.com/ Reviewed-by: Lorenzo Stoakes Tested-by : Donet Tom Reviewed-by : Donet Tom Reviewed-by: Dev Jain Reviewed-by: Pu Lehui Cc: Jann Horn Cc: Liam Howlett Cc: Mark Brown Cc: Shuah Khan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/merge.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/merge.c b/tools/testing/selftests/mm/merge.c index bbae66fc5038..cc26480098ae 100644 --- a/tools/testing/selftests/mm/merge.c +++ b/tools/testing/selftests/mm/merge.c @@ -470,7 +470,9 @@ TEST_F(merge, handle_uprobe_upon_merged_vma) ASSERT_GE(fd, 0); ASSERT_EQ(ftruncate(fd, page_size), 0); - ASSERT_EQ(read_sysfs("/sys/bus/event_source/devices/uprobe/type", &type), 0); + if (read_sysfs("/sys/bus/event_source/devices/uprobe/type", &type) != 0) { + SKIP(goto out, "Failed to read uprobe sysfs file, skipping"); + } memset(&attr, 0, attr_sz); attr.size = attr_sz; @@ -491,6 +493,7 @@ TEST_F(merge, handle_uprobe_upon_merged_vma) ASSERT_NE(mremap(ptr2, page_size, page_size, MREMAP_MAYMOVE | MREMAP_FIXED, ptr1), MAP_FAILED); +out: close(fd); remove(probe_file); } -- cgit v1.2.3 From f8b19aeca1652fcadefce8529cd85e5fd475dd69 Mon Sep 17 00:00:00 2001 From: Slava Imameev Date: Sat, 21 Jun 2025 01:18:12 +1000 Subject: selftests/bpf: Add test for bpftool access to read-only protected maps Add selftest cases that validate bpftool's expected behavior when accessing maps protected from modification via security_bpf_map. The test includes a BPF program attached to security_bpf_map with two maps: - A protected map that only allows read-only access - An unprotected map that allows full access The test script attaches the BPF program to security_bpf_map and verifies that for the bpftool map command: - Read access works on both maps - Write access fails on the protected map - Write access succeeds on the unprotected map - These behaviors remain consistent when the maps are pinned Signed-off-by: Slava Imameev Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/r/20250620151812.13952-2-slava.imameev@crowdstrike.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 1 + .../selftests/bpf/progs/bpf_iter_map_elem.c | 22 ++ .../testing/selftests/bpf/progs/security_bpf_map.c | 69 ++++ tools/testing/selftests/bpf/test_bpftool_map.sh | 398 +++++++++++++++++++++ 4 files changed, 490 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c create mode 100644 tools/testing/selftests/bpf/progs/security_bpf_map.c create mode 100755 tools/testing/selftests/bpf/test_bpftool_map.sh (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 97013c49920b..da868bb8c421 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -109,6 +109,7 @@ TEST_PROGS := test_kmod.sh \ test_xdping.sh \ test_bpftool_build.sh \ test_bpftool.sh \ + test_bpftool_map.sh \ test_bpftool_metadata.sh \ test_doc_build.sh \ test_xsk.sh \ diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c b/tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c new file mode 100644 index 000000000000..2f20485e0de3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "vmlinux.h" +#include +#include + +char _license[] SEC("license") = "GPL"; + +__u32 value_sum = 0; + +SEC("iter/bpf_map_elem") +int dump_bpf_map_values(struct bpf_iter__bpf_map_elem *ctx) +{ + __u32 value = 0; + + if (ctx->value == (void *)0) + return 0; + + bpf_probe_read_kernel(&value, sizeof(value), ctx->value); + value_sum += value; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/security_bpf_map.c b/tools/testing/selftests/bpf/progs/security_bpf_map.c new file mode 100644 index 000000000000..7216b3450e96 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/security_bpf_map.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "vmlinux.h" +#include +#include + +char _license[] SEC("license") = "GPL"; + +#define EPERM 1 /* Operation not permitted */ + +/* From include/linux/mm.h. */ +#define FMODE_WRITE 0x2 + +struct map; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 1); +} prot_status_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 3); +} prot_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 3); +} not_prot_map SEC(".maps"); + +SEC("fmod_ret/security_bpf_map") +int BPF_PROG(fmod_bpf_map, struct bpf_map *map, int fmode) +{ + __u32 key = 0; + __u32 *status_ptr = bpf_map_lookup_elem(&prot_status_map, &key); + + if (!status_ptr || !*status_ptr) + return 0; + + if (map == &prot_map) { + /* Allow read-only access */ + if (fmode & FMODE_WRITE) + return -EPERM; + } + + return 0; +} + +/* + * This program keeps references to maps. This is needed to prevent + * optimizing them out. + */ +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(fentry_dummy1, int a) +{ + __u32 key = 0; + __u32 val1 = a; + __u32 val2 = a + 1; + + bpf_map_update_elem(&prot_map, &key, &val1, BPF_ANY); + bpf_map_update_elem(¬_prot_map, &key, &val2, BPF_ANY); + return 0; +} diff --git a/tools/testing/selftests/bpf/test_bpftool_map.sh b/tools/testing/selftests/bpf/test_bpftool_map.sh new file mode 100755 index 000000000000..515b1df0501e --- /dev/null +++ b/tools/testing/selftests/bpf/test_bpftool_map.sh @@ -0,0 +1,398 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +TESTNAME="bpftool_map" +BPF_FILE="security_bpf_map.bpf.o" +BPF_ITER_FILE="bpf_iter_map_elem.bpf.o" +PROTECTED_MAP_NAME="prot_map" +NOT_PROTECTED_MAP_NAME="not_prot_map" +BPF_FS_TMP_PARENT="/tmp" +BPF_FS_PARENT=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts) +BPF_FS_PARENT=${BPF_FS_PARENT:-$BPF_FS_TMP_PARENT} +# bpftool will mount bpf file system under BPF_DIR if it is not mounted +# under BPF_FS_PARENT. +BPF_DIR="$BPF_FS_PARENT/test_$TESTNAME" +SCRIPT_DIR=$(dirname $(realpath "$0")) +BPF_FILE_PATH="$SCRIPT_DIR/$BPF_FILE" +BPF_ITER_FILE_PATH="$SCRIPT_DIR/$BPF_ITER_FILE" +BPFTOOL_PATH="bpftool" +# Assume the script is located under tools/testing/selftests/bpf/ +KDIR_ROOT_DIR=$(realpath "$SCRIPT_DIR"/../../../../) + +_cleanup() +{ + set +eu + + # If BPF_DIR is a mount point this will not remove the mount point itself. + [ -d "$BPF_DIR" ] && rm -rf "$BPF_DIR" 2> /dev/null + + # Unmount if BPF filesystem was temporarily created. + if [ "$BPF_FS_PARENT" = "$BPF_FS_TMP_PARENT" ]; then + # A loop and recursive unmount are required as bpftool might + # create multiple mounts. For example, a bind mount of the directory + # to itself. The bind mount is created to change mount propagation + # flags on an actual mount point. + max_attempts=3 + attempt=0 + while mountpoint -q "$BPF_DIR" && [ $attempt -lt $max_attempts ]; do + umount -R "$BPF_DIR" 2>/dev/null + attempt=$((attempt+1)) + done + + # The directory still exists. Remove it now. + [ -d "$BPF_DIR" ] && rm -rf "$BPF_DIR" 2>/dev/null + fi +} + +cleanup_skip() +{ + echo "selftests: $TESTNAME [SKIP]" + _cleanup + + exit $ksft_skip +} + +cleanup() +{ + if [ "$?" = 0 ]; then + echo "selftests: $TESTNAME [PASS]" + else + echo "selftests: $TESTNAME [FAILED]" + fi + _cleanup +} + +check_root_privileges() { + if [ $(id -u) -ne 0 ]; then + echo "Need root privileges" + exit $ksft_skip + fi +} + +# Function to verify bpftool path. +# Parameters: +# $1: bpftool path +verify_bpftool_path() { + local bpftool_path="$1" + if ! "$bpftool_path" version > /dev/null 2>&1; then + echo "Could not run test without bpftool" + exit $ksft_skip + fi +} + +# Function to verify BTF support. +# The test requires BTF support for fmod_ret programs. +verify_btf_support() { + if [ ! -f /sys/kernel/btf/vmlinux ]; then + echo "Could not run test without BTF support" + exit $ksft_skip + fi +} + +# Function to initialize map entries with keys [0..2] and values set to 0. +# Parameters: +# $1: Map name +# $2: bpftool path +initialize_map_entries() { + local map_name="$1" + local bpftool_path="$2" + + for key in 0 1 2; do + "$bpftool_path" map update name "$map_name" key $key 0 0 0 value 0 0 0 $key + done +} + +# Test read access to the map. +# Parameters: +# $1: Name command (name/pinned) +# $2: Map name +# $3: bpftool path +# $4: key +access_for_read() { + local name_cmd="$1" + local map_name="$2" + local bpftool_path="$3" + local key="$4" + + # Test read access to the map. + if ! "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then + echo " Read access to $key in $map_name failed" + exit 1 + fi + + # Test read access to map's BTF data. + if ! "$bpftool_path" btf dump map "$name_cmd" "$map_name" 1>/dev/null; then + echo " Read access to $map_name for BTF data failed" + exit 1 + fi +} + +# Test write access to the map. +# Parameters: +# $1: Name command (name/pinned) +# $2: Map name +# $3: bpftool path +# $4: key +# $5: Whether write should succeed (true/false) +access_for_write() { + local name_cmd="$1" + local map_name="$2" + local bpftool_path="$3" + local key="$4" + local write_should_succeed="$5" + local value="1 1 1 1" + + if "$bpftool_path" map update "$name_cmd" "$map_name" key $key value \ + $value 2>/dev/null; then + if [ "$write_should_succeed" = "false" ]; then + echo " Write access to $key in $map_name succeeded but should have failed" + exit 1 + fi + else + if [ "$write_should_succeed" = "true" ]; then + echo " Write access to $key in $map_name failed but should have succeeded" + exit 1 + fi + fi +} + +# Test entry deletion for the map. +# Parameters: +# $1: Name command (name/pinned) +# $2: Map name +# $3: bpftool path +# $4: key +# $5: Whether write should succeed (true/false) +access_for_deletion() { + local name_cmd="$1" + local map_name="$2" + local bpftool_path="$3" + local key="$4" + local write_should_succeed="$5" + local value="1 1 1 1" + + # Test deletion by key for the map. + # Before deleting, check the key exists. + if ! "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then + echo " Key $key does not exist in $map_name" + exit 1 + fi + + # Delete by key. + if "$bpftool_path" map delete "$name_cmd" "$map_name" key $key 2>/dev/null; then + if [ "$write_should_succeed" = "false" ]; then + echo " Deletion for $key in $map_name succeeded but should have failed" + exit 1 + fi + else + if [ "$write_should_succeed" = "true" ]; then + echo " Deletion for $key in $map_name failed but should have succeeded" + exit 1 + fi + fi + + # After deleting, check the entry existence according to the expected status. + if "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then + if [ "$write_should_succeed" = "true" ]; then + echo " Key $key for $map_name was not deleted but should have been deleted" + exit 1 + fi + else + if [ "$write_should_succeed" = "false" ]; then + echo "Key $key for $map_name was deleted but should have not been deleted" + exit 1 + fi + fi + + # Test creation of map's deleted entry, if deletion was successful. + # Otherwise, the entry exists. + if "$bpftool_path" map update "$name_cmd" "$map_name" key $key value \ + $value 2>/dev/null; then + if [ "$write_should_succeed" = "false" ]; then + echo " Write access to $key in $map_name succeeded after deletion attempt but should have failed" + exit 1 + fi + else + if [ "$write_should_succeed" = "true" ]; then + echo " Write access to $key in $map_name failed after deletion attempt but should have succeeded" + exit 1 + fi + fi +} + +# Test map elements iterator. +# Parameters: +# $1: Name command (name/pinned) +# $2: Map name +# $3: bpftool path +# $4: BPF_DIR +# $5: bpf iterator object file path +iterate_map_elem() { + local name_cmd="$1" + local map_name="$2" + local bpftool_path="$3" + local bpf_dir="$4" + local bpf_file="$5" + local pin_path="$bpf_dir/map_iterator" + + "$bpftool_path" iter pin "$bpf_file" "$pin_path" map "$name_cmd" "$map_name" + if [ ! -f "$pin_path" ]; then + echo " Failed to pin iterator to $pin_path" + exit 1 + fi + + cat "$pin_path" 1>/dev/null + rm "$pin_path" 2>/dev/null +} + +# Function to test map access with configurable write expectations +# Parameters: +# $1: Name command (name/pinned) +# $2: Map name +# $3: bpftool path +# $4: key for rw +# $5: key to delete +# $6: Whether write should succeed (true/false) +# $7: BPF_DIR +# $8: bpf iterator object file path +access_map() { + local name_cmd="$1" + local map_name="$2" + local bpftool_path="$3" + local key_for_rw="$4" + local key_to_del="$5" + local write_should_succeed="$6" + local bpf_dir="$7" + local bpf_iter_file_path="$8" + + access_for_read "$name_cmd" "$map_name" "$bpftool_path" "$key_for_rw" + access_for_write "$name_cmd" "$map_name" "$bpftool_path" "$key_for_rw" \ + "$write_should_succeed" + access_for_deletion "$name_cmd" "$map_name" "$bpftool_path" "$key_to_del" \ + "$write_should_succeed" + iterate_map_elem "$name_cmd" "$map_name" "$bpftool_path" "$bpf_dir" \ + "$bpf_iter_file_path" +} + +# Function to test map access with configurable write expectations +# Parameters: +# $1: Map name +# $2: bpftool path +# $3: BPF_DIR +# $4: Whether write should succeed (true/false) +# $5: bpf iterator object file path +test_map_access() { + local map_name="$1" + local bpftool_path="$2" + local bpf_dir="$3" + local pin_path="$bpf_dir/${map_name}_pinned" + local write_should_succeed="$4" + local bpf_iter_file_path="$5" + + # Test access to the map by name. + access_map "name" "$map_name" "$bpftool_path" "0 0 0 0" "1 0 0 0" \ + "$write_should_succeed" "$bpf_dir" "$bpf_iter_file_path" + + # Pin the map to the BPF filesystem + "$bpftool_path" map pin name "$map_name" "$pin_path" + if [ ! -e "$pin_path" ]; then + echo " Failed to pin $map_name" + exit 1 + fi + + # Test access to the pinned map. + access_map "pinned" "$pin_path" "$bpftool_path" "0 0 0 0" "2 0 0 0" \ + "$write_should_succeed" "$bpf_dir" "$bpf_iter_file_path" +} + +# Function to test map creation and map-of-maps +# Parameters: +# $1: bpftool path +# $2: BPF_DIR +test_map_creation_and_map_of_maps() { + local bpftool_path="$1" + local bpf_dir="$2" + local outer_map_name="outer_map_tt" + local inner_map_name="inner_map_tt" + + "$bpftool_path" map create "$bpf_dir/$inner_map_name" type array key 4 \ + value 4 entries 4 name "$inner_map_name" + if [ ! -f "$bpf_dir/$inner_map_name" ]; then + echo " Failed to create inner map file at $bpf_dir/$outer_map_name" + return 1 + fi + + "$bpftool_path" map create "$bpf_dir/$outer_map_name" type hash_of_maps \ + key 4 value 4 entries 2 name "$outer_map_name" inner_map name "$inner_map_name" + if [ ! -f "$bpf_dir/$outer_map_name" ]; then + echo " Failed to create outer map file at $bpf_dir/$outer_map_name" + return 1 + fi + + # Add entries to the outer map by name and by pinned path. + "$bpftool_path" map update pinned "$bpf_dir/$outer_map_name" key 0 0 0 0 \ + value pinned "$bpf_dir/$inner_map_name" + "$bpftool_path" map update name "$outer_map_name" key 1 0 0 0 value \ + name "$inner_map_name" + + # The outer map should be full by now. + # The following map update command is expected to fail. + if "$bpftool_path" map update name "$outer_map_name" key 2 0 0 0 value name \ + "$inner_map_name" 2>/dev/null; then + echo " Update for $outer_map_name succeeded but should have failed" + exit 1 + fi +} + +# Function to test map access with the btf list command +# Parameters: +# $1: bpftool path +test_map_access_with_btf_list() { + local bpftool_path="$1" + + # The btf list command iterates over maps for + # loaded BPF programs. + if ! "$bpftool_path" btf list 1>/dev/null; then + echo " Failed to access btf data" + exit 1 + fi +} + +set -eu + +trap cleanup_skip EXIT + +check_root_privileges + +verify_bpftool_path "$BPFTOOL_PATH" + +verify_btf_support + +trap cleanup EXIT + +# Load and attach the BPF programs to control maps access. +"$BPFTOOL_PATH" prog loadall "$BPF_FILE_PATH" "$BPF_DIR" autoattach + +initialize_map_entries "$PROTECTED_MAP_NAME" "$BPFTOOL_PATH" +initialize_map_entries "$NOT_PROTECTED_MAP_NAME" "$BPFTOOL_PATH" + +# Activate the map protection mechanism. Protection status is controlled +# by a value stored in the prot_status_map at index 0. +"$BPFTOOL_PATH" map update name prot_status_map key 0 0 0 0 value 1 0 0 0 + +# Test protected map (write should fail). +test_map_access "$PROTECTED_MAP_NAME" "$BPFTOOL_PATH" "$BPF_DIR" "false" \ + "$BPF_ITER_FILE_PATH" + +# Test not protected map (write should succeed). +test_map_access "$NOT_PROTECTED_MAP_NAME" "$BPFTOOL_PATH" "$BPF_DIR" "true" \ + "$BPF_ITER_FILE_PATH" + +test_map_creation_and_map_of_maps "$BPFTOOL_PATH" "$BPF_DIR" + +test_map_access_with_btf_list "$BPFTOOL_PATH" + +exit 0 -- cgit v1.2.3 From 0792c71c1c94964952339f3251818b6dcf66c19b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 10 Jun 2025 15:57:37 -0700 Subject: KVM: selftests: Verify KVM disable interception (for userspace) on filter change Re-read MSR_{FS,GS}_BASE after restoring the "allow everything" userspace MSR filter to verify that KVM stops forwarding exits to userspace. This can also be used in conjunction with manual verification (e.g. printk) to ensure KVM is correctly updating the MSR bitmaps consumed by hardware. Tested-by: Dapeng Mi Tested-by: Manali Shukla Link: https://lore.kernel.org/r/20250610225737.156318-33-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c index 32b2794b78fe..8463a9956410 100644 --- a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c +++ b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c @@ -343,6 +343,12 @@ static void guest_code_permission_bitmap(void) data = test_rdmsr(MSR_GS_BASE); GUEST_ASSERT(data == MSR_GS_BASE); + /* Access the MSRs again to ensure KVM has disabled interception.*/ + data = test_rdmsr(MSR_FS_BASE); + GUEST_ASSERT(data != MSR_FS_BASE); + data = test_rdmsr(MSR_GS_BASE); + GUEST_ASSERT(data != MSR_GS_BASE); + GUEST_DONE(); } @@ -682,6 +688,8 @@ KVM_ONE_VCPU_TEST(user_msr, msr_permission_bitmap, guest_code_permission_bitmap) "Expected ucall state to be UCALL_SYNC."); vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs); run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE); + + vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow); run_guest_then_process_ucall_done(vcpu); } -- cgit v1.2.3 From 30142a93b1641b19bf4ba2cd6beb5814adc5b746 Mon Sep 17 00:00:00 2001 From: Rahul Kumar Date: Fri, 23 May 2025 23:43:52 +0530 Subject: KVM: selftests: Fix spelling of 'occurrences' in sparsebit.c comments Correct two instances of the misspelled word 'occurences' to 'occurrences' in comments explaining node invariants in sparsebit.c. Signed-off-by: Rahul Kumar Link: https://lore.kernel.org/r/20250523181606.568320-1-rk0006818@gmail.com [sean: massage changelog] Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/lib/sparsebit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c index cfed9d26cc71..a99188f87a38 100644 --- a/tools/testing/selftests/kvm/lib/sparsebit.c +++ b/tools/testing/selftests/kvm/lib/sparsebit.c @@ -116,7 +116,7 @@ * * + A node with all mask bits set only occurs when the last bit * described by the previous node is not equal to this nodes - * starting index - 1. All such occurences of this condition are + * starting index - 1. All such occurrences of this condition are * avoided by moving the setting of the nodes mask bits into * the previous nodes num_after setting. * @@ -592,7 +592,7 @@ static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx) * * + A node with all mask bits set only occurs when the last bit * described by the previous node is not equal to this nodes - * starting index - 1. All such occurences of this condition are + * starting index - 1. All such occurrences of this condition are * avoided by moving the setting of the nodes mask bits into * the previous nodes num_after setting. */ -- cgit v1.2.3 From fcab107abe1ab5be9dbe874baa722372da8f4f73 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 16 May 2025 14:59:06 -0700 Subject: KVM: selftests: Verify KVM is loaded when getting a KVM module param Probe /dev/kvm when getting a KVM module param so that attempting to load a module param super early in a selftest generates a SKIP message about KVM not being loaded/enabled, versus some random parameter not existing. E.g. KVM x86's unconditional retrieval of force_emulation_prefix during kvm_selftest_arch_init() generates a rather confusing error message that takes far too much triage to understand. Link: https://lore.kernel.org/r/20250516215909.2551628-2-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/include/x86/processor.h | 6 +++++- tools/testing/selftests/kvm/lib/kvm_util.c | 3 +++ tools/testing/selftests/kvm/lib/x86/processor.c | 10 ---------- .../selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c | 2 +- 4 files changed, 9 insertions(+), 12 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index b11b5a53ebd5..2efb05c2f2fb 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -1150,7 +1150,6 @@ do { \ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); void kvm_init_vm_address_properties(struct kvm_vm *vm); -bool vm_is_unrestricted_guest(struct kvm_vm *vm); struct ex_regs { uint64_t rax, rcx, rdx, rbx; @@ -1325,6 +1324,11 @@ static inline bool kvm_is_forced_emulation_enabled(void) return !!get_kvm_param_integer("force_emulation_prefix"); } +static inline bool kvm_is_unrestricted_guest_enabled(void) +{ + return get_kvm_intel_param_bool("unrestricted_guest"); +} + uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, int *level); uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index a055343a7bf7..7573771a6146 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -64,6 +64,9 @@ static ssize_t get_module_param(const char *module_name, const char *param, ssize_t bytes_read; int fd, r; + /* Verify KVM is loaded, to provide a more helpful SKIP message. */ + close(open_kvm_dev_path_or_exit()); + r = snprintf(path, path_size, "/sys/module/%s/parameters/%s", module_name, param); TEST_ASSERT(r < path_size, diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index a92dc1dad085..d4c19ac885a9 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -1264,16 +1264,6 @@ done: return min(max_gfn, ht_gfn - 1); } -/* Returns true if kvm_intel was loaded with unrestricted_guest=1. */ -bool vm_is_unrestricted_guest(struct kvm_vm *vm) -{ - /* Ensure that a KVM vendor-specific module is loaded. */ - if (vm == NULL) - close(open_kvm_dev_path_or_exit()); - - return get_kvm_intel_param_bool("unrestricted_guest"); -} - void kvm_selftest_arch_init(void) { host_cpu_is_intel = this_cpu_is_intel(); diff --git a/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c index 3fd6eceab46f..2cae86d9d5e2 100644 --- a/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c +++ b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c @@ -110,7 +110,7 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; TEST_REQUIRE(host_cpu_is_intel); - TEST_REQUIRE(!vm_is_unrestricted_guest(NULL)); + TEST_REQUIRE(!kvm_is_unrestricted_guest_enabled()); vm = vm_create_with_one_vcpu(&vcpu, guest_code); get_set_sigalrm_vcpu(vcpu); -- cgit v1.2.3 From 6e1cce7cda1b69e455f89dbc0eeec128506fe969 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 16 May 2025 14:59:07 -0700 Subject: KVM: selftests: Add __open_path_or_exit() variant to provide extra help info Add an inner __open_path_or_exit() API to let the caller provide additional information on ENOENT to try and help the user figure out why the test is being skipped, e.g. for files like the page_idle bitmap needed by the access tracking perf, which is dependent on a Kconfig. Immediately convert /dev/kvm to the new API, both as an example and because users might not know that some architectures/setups require loading KVM. Link: https://lore.kernel.org/r/20250516215909.2551628-3-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/include/kvm_util.h | 1 + tools/testing/selftests/kvm/lib/kvm_util.c | 19 +++++++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index bee65ca08721..e82af690edc5 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -253,6 +253,7 @@ struct vm_guest_mode_params { }; extern const struct vm_guest_mode_params vm_guest_mode_params[]; +int __open_path_or_exit(const char *path, int flags, const char *enoent_help); int open_path_or_exit(const char *path, int flags); int open_kvm_dev_path_or_exit(void); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 7573771a6146..41c7d04d32bd 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -26,15 +26,26 @@ static uint32_t last_guest_seed; static int vcpu_mmap_sz(void); -int open_path_or_exit(const char *path, int flags) +int __open_path_or_exit(const char *path, int flags, const char *enoent_help) { int fd; fd = open(path, flags); - __TEST_REQUIRE(fd >= 0 || errno != ENOENT, "Cannot open %s: %s", path, strerror(errno)); - TEST_ASSERT(fd >= 0, "Failed to open '%s'", path); + if (fd < 0) + goto error; return fd; + +error: + if (errno == ENOENT) + ksft_exit_skip("- Cannot open '%s': %s. %s\n", + path, strerror(errno), enoent_help); + TEST_FAIL("Failed to open '%s'", path); +} + +int open_path_or_exit(const char *path, int flags) +{ + return __open_path_or_exit(path, flags, ""); } /* @@ -48,7 +59,7 @@ int open_path_or_exit(const char *path, int flags) */ static int _open_kvm_dev_path_or_exit(int flags) { - return open_path_or_exit(KVM_DEV_PATH, flags); + return __open_path_or_exit(KVM_DEV_PATH, flags, "Is KVM loaded and enabled?"); } int open_kvm_dev_path_or_exit(void) -- cgit v1.2.3 From ba300a728f6f764828d735f01f314900fb6a8315 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 16 May 2025 14:59:08 -0700 Subject: KVM: selftests: Play nice with EACCES errors in open_path_or_exit() Expand the SKIP conditions of the open_path_or_exit() helper to skip on EACCES as well as ENOENT. Most often, lack of permissions to a file needed by a KVM selftests is due to a file being root-only by default, not because of any bug/misconfiguration that warrants failing a test. Link: https://lore.kernel.org/r/20250516215909.2551628-4-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/lib/kvm_util.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 41c7d04d32bd..f3a80057816e 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -37,9 +37,10 @@ int __open_path_or_exit(const char *path, int flags, const char *enoent_help) return fd; error: - if (errno == ENOENT) + if (errno == EACCES || errno == ENOENT) ksft_exit_skip("- Cannot open '%s': %s. %s\n", - path, strerror(errno), enoent_help); + path, strerror(errno), + errno == EACCES ? "Root required?" : enoent_help); TEST_FAIL("Failed to open '%s'", path); } -- cgit v1.2.3 From 71443210e26de3b35aea8dced894ad3c420d55d5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 16 May 2025 14:59:09 -0700 Subject: KVM: selftests: Print a more helpful message for EACCESS in access tracking test Use open_path_or_exit() helper to probe /sys/kernel/mm/page_idle/bitmap in the access tracking perf test so that a helpful/pertinent SKIP message is printed if the file exists but is inaccessible, e.g. because the file has the kernel's default 0600 permissions. Cc: James Houghton Link: https://lore.kernel.org/r/20250516215909.2551628-5-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/access_tracking_perf_test.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index da7196fd1b23..c9de66537ec3 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -596,11 +596,8 @@ int main(int argc, char *argv[]) if (ret) return ret; } else { - page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR); - __TEST_REQUIRE(page_idle_fd >= 0, - "Couldn't open /sys/kernel/mm/page_idle/bitmap. " - "Is CONFIG_IDLE_PAGE_TRACKING enabled?"); - + page_idle_fd = __open_path_or_exit("/sys/kernel/mm/page_idle/bitmap", O_RDWR, + "Is CONFIG_IDLE_PAGE_TRACKING enabled?"); close(page_idle_fd); puts("Using page_idle for aging"); -- cgit v1.2.3 From 8fd2a6d43a10184f8edcc7adc0547e1871df6705 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 11 Jun 2025 14:35:55 -0700 Subject: KVM: selftests: Fall back to split IRQ chip if full in-kernel chip is unsupported Now that KVM x86 allows compiling out support for in-kernel I/O APIC (and PIC and PIT) emulation, i.e. allows disabling KVM_CREATE_IRQCHIP for all intents and purposes, fall back to a split IRQ chip for x86 if creating the full in-kernel version fails with ENOTTY. Acked-by: Kai Huang Link: https://lore.kernel.org/r/20250611213557.294358-17-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/lib/kvm_util.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index a055343a7bf7..93a921deeb99 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1716,7 +1716,18 @@ void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa) /* Create an interrupt controller chip for the specified VM. */ void vm_create_irqchip(struct kvm_vm *vm) { - vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL); + int r; + + /* + * Allocate a fully in-kernel IRQ chip by default, but fall back to a + * split model (x86 only) if that fails (KVM x86 allows compiling out + * support for KVM_CREATE_IRQCHIP). + */ + r = __vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL); + if (r && errno == ENOTTY && kvm_has_cap(KVM_CAP_SPLIT_IRQCHIP)) + vm_enable_cap(vm, KVM_CAP_SPLIT_IRQCHIP, 24); + else + TEST_ASSERT_VM_VCPU_IOCTL(!r, KVM_CREATE_IRQCHIP, r, vm); vm->has_irqchip = true; } -- cgit v1.2.3 From ddceadce63d9cb752c2472e220ded05cabaf7971 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Jun 2025 15:34:22 -1000 Subject: sched_ext: Add support for cgroup bandwidth control interface From 077814f57f8acce13f91dc34bbd2b7e4911fbf25 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Jun 2025 15:06:47 -1000 - Add CONFIG_GROUP_SCHED_BANDWIDTH which is selected by both CONFIG_CFS_BANDWIDTH and EXT_GROUP_SCHED. - Put bandwidth control interface files for both cgroup v1 and v2 under CONFIG_GROUP_SCHED_BANDWIDTH. - Update tg_bandwidth() to fetch configuration parameters from fair if CONFIG_CFS_BANDWIDTH, SCX otherwise. - Update tg_set_bandwidth() to update the parameters for both fair and SCX. - Add bandwidth control parameters to struct scx_cgroup_init_args. - Add sched_ext_ops.cgroup_set_bandwidth() which is invoked on bandwidth control parameter updates. - Update scx_qmap and maximal selftest to test the new feature. Signed-off-by: Tejun Heo --- include/linux/sched/ext.h | 3 ++ init/Kconfig | 5 ++ kernel/sched/core.c | 29 +++++++++-- kernel/sched/ext.c | 66 +++++++++++++++++++++++-- kernel/sched/ext.h | 2 + kernel/sched/sched.h | 4 +- tools/sched_ext/scx_qmap.bpf.c | 23 +++++++++ tools/testing/selftests/sched_ext/maximal.bpf.c | 5 ++ 8 files changed, 127 insertions(+), 10 deletions(-) (limited to 'tools/testing/selftests') diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index eda89acdb7ab..8b92842776cb 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -219,6 +219,9 @@ struct scx_task_group { #ifdef CONFIG_EXT_GROUP_SCHED u32 flags; /* SCX_TG_* */ u32 weight; + u64 bw_period_us; + u64 bw_quota_us; + u64 bw_burst_us; #endif }; diff --git a/init/Kconfig b/init/Kconfig index af4c2f085455..baf59d2a20a2 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1065,6 +1065,9 @@ if CGROUP_SCHED config GROUP_SCHED_WEIGHT def_bool n +config GROUP_SCHED_BANDWIDTH + def_bool n + config FAIR_GROUP_SCHED bool "Group scheduling for SCHED_OTHER" depends on CGROUP_SCHED @@ -1074,6 +1077,7 @@ config FAIR_GROUP_SCHED config CFS_BANDWIDTH bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED" depends on FAIR_GROUP_SCHED + select GROUP_SCHED_BANDWIDTH default n help This option allows users to define CPU bandwidth rates (limits) for @@ -1108,6 +1112,7 @@ config EXT_GROUP_SCHED bool depends on SCHED_CLASS_EXT && CGROUP_SCHED select GROUP_SCHED_WEIGHT + select GROUP_SCHED_BANDWIDTH default y endif #CGROUP_SCHED diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0e3a00e2a2cc..91845d00a1cd 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -9545,7 +9545,9 @@ static int cpu_cfs_local_stat_show(struct seq_file *sf, void *v) return 0; } +#endif /* CONFIG_CFS_BANDWIDTH */ +#ifdef CONFIG_GROUP_SCHED_BANDWIDTH const u64 max_bw_quota_period_us = 1 * USEC_PER_SEC; /* 1s */ static const u64 min_bw_quota_period_us = 1 * USEC_PER_MSEC; /* 1ms */ /* More than 203 days if BW_SHIFT equals 20. */ @@ -9554,12 +9556,21 @@ static const u64 max_bw_runtime_us = MAX_BW; static void tg_bandwidth(struct task_group *tg, u64 *period_us_p, u64 *quota_us_p, u64 *burst_us_p) { +#ifdef CONFIG_CFS_BANDWIDTH if (period_us_p) *period_us_p = tg_get_cfs_period(tg); if (quota_us_p) *quota_us_p = tg_get_cfs_quota(tg); if (burst_us_p) *burst_us_p = tg_get_cfs_burst(tg); +#else /* !CONFIG_CFS_BANDWIDTH */ + if (period_us_p) + *period_us_p = tg->scx.bw_period_us; + if (quota_us_p) + *quota_us_p = tg->scx.bw_quota_us; + if (burst_us_p) + *burst_us_p = tg->scx.bw_burst_us; +#endif /* CONFIG_CFS_BANDWIDTH */ } static u64 cpu_period_read_u64(struct cgroup_subsys_state *css, @@ -9575,6 +9586,7 @@ static int tg_set_bandwidth(struct task_group *tg, u64 period_us, u64 quota_us, u64 burst_us) { const u64 max_usec = U64_MAX / NSEC_PER_USEC; + int ret = 0; if (tg == &root_task_group) return -EINVAL; @@ -9612,7 +9624,12 @@ static int tg_set_bandwidth(struct task_group *tg, burst_us + quota_us > max_bw_runtime_us)) return -EINVAL; - return tg_set_cfs_bandwidth(tg, period_us, quota_us, burst_us); +#ifdef CONFIG_CFS_BANDWIDTH + ret = tg_set_cfs_bandwidth(tg, period_us, quota_us, burst_us); +#endif /* CONFIG_CFS_BANDWIDTH */ + if (!ret) + scx_group_set_bandwidth(tg, period_us, quota_us, burst_us); + return ret; } static s64 cpu_quota_read_s64(struct cgroup_subsys_state *css, @@ -9665,7 +9682,7 @@ static int cpu_burst_write_u64(struct cgroup_subsys_state *css, tg_bandwidth(tg, &period_us, "a_us, NULL); return tg_set_bandwidth(tg, period_us, quota_us, burst_us); } -#endif /* CONFIG_CFS_BANDWIDTH */ +#endif /* CONFIG_GROUP_SCHED_BANDWIDTH */ #ifdef CONFIG_RT_GROUP_SCHED static int cpu_rt_runtime_write(struct cgroup_subsys_state *css, @@ -9725,7 +9742,7 @@ static struct cftype cpu_legacy_files[] = { .write_s64 = cpu_idle_write_s64, }, #endif -#ifdef CONFIG_CFS_BANDWIDTH +#ifdef CONFIG_GROUP_SCHED_BANDWIDTH { .name = "cfs_period_us", .read_u64 = cpu_period_read_u64, @@ -9741,6 +9758,8 @@ static struct cftype cpu_legacy_files[] = { .read_u64 = cpu_burst_read_u64, .write_u64 = cpu_burst_write_u64, }, +#endif +#ifdef CONFIG_CFS_BANDWIDTH { .name = "stat", .seq_show = cpu_cfs_stat_show, @@ -9954,7 +9973,7 @@ static int __maybe_unused cpu_period_quota_parse(char *buf, u64 *period_us_p, return 0; } -#ifdef CONFIG_CFS_BANDWIDTH +#ifdef CONFIG_GROUP_SCHED_BANDWIDTH static int cpu_max_show(struct seq_file *sf, void *v) { struct task_group *tg = css_tg(seq_css(sf)); @@ -10001,7 +10020,7 @@ static struct cftype cpu_files[] = { .write_s64 = cpu_idle_write_s64, }, #endif -#ifdef CONFIG_CFS_BANDWIDTH +#ifdef CONFIG_GROUP_SCHED_BANDWIDTH { .name = "max", .flags = CFTYPE_NOT_ON_ROOT, diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 6732e50e0679..39cba11688a9 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -203,6 +203,11 @@ struct scx_exit_task_args { struct scx_cgroup_init_args { /* the weight of the cgroup [1..10000] */ u32 weight; + + /* bandwidth control parameters from cpu.max and cpu.max.burst */ + u64 bw_period_us; + u64 bw_quota_us; + u64 bw_burst_us; }; enum scx_cpu_preempt_reason { @@ -664,9 +669,31 @@ struct sched_ext_ops { * @cgrp: cgroup whose weight is being updated * @weight: new weight [1..10000] * - * Update @tg's weight to @weight. + * Update @cgrp's weight to @weight. */ void (*cgroup_set_weight)(struct cgroup *cgrp, u32 weight); + + /** + * @cgroup_set_bandwidth: A cgroup's bandwidth is being changed + * @cgrp: cgroup whose bandwidth is being updated + * @period_us: bandwidth control period + * @quota_us: bandwidth control quota + * @burst_us: bandwidth control burst + * + * Update @cgrp's bandwidth control parameters. This is from the cpu.max + * cgroup interface. + * + * @quota_us / @period_us determines the CPU bandwidth @cgrp is entitled + * to. For example, if @period_us is 1_000_000 and @quota_us is + * 2_500_000. @cgrp is entitled to 2.5 CPUs. @burst_us can be + * interpreted in the same fashion and specifies how much @cgrp can + * burst temporarily. The specific control mechanism and thus the + * interpretation of @period_us and burstiness is upto to the BPF + * scheduler. + */ + void (*cgroup_set_bandwidth)(struct cgroup *cgrp, + u64 period_us, u64 quota_us, u64 burst_us); + #endif /* CONFIG_EXT_GROUP_SCHED */ /* @@ -4059,6 +4086,8 @@ static bool scx_cgroup_enabled; void scx_tg_init(struct task_group *tg) { tg->scx.weight = CGROUP_WEIGHT_DFL; + tg->scx.bw_period_us = default_bw_period_us(); + tg->scx.bw_quota_us = RUNTIME_INF; } int scx_tg_online(struct task_group *tg) @@ -4073,7 +4102,10 @@ int scx_tg_online(struct task_group *tg) if (scx_cgroup_enabled) { if (SCX_HAS_OP(sch, cgroup_init)) { struct scx_cgroup_init_args args = - { .weight = tg->scx.weight }; + { .weight = tg->scx.weight, + .bw_period_us = tg->scx.bw_period_us, + .bw_quota_us = tg->scx.bw_quota_us, + .bw_burst_us = tg->scx.bw_burst_us }; ret = SCX_CALL_OP_RET(sch, SCX_KF_UNLOCKED, cgroup_init, NULL, tg->css.cgroup, &args); @@ -4225,6 +4257,27 @@ void scx_group_set_idle(struct task_group *tg, bool idle) /* TODO: Implement ops->cgroup_set_idle() */ } +void scx_group_set_bandwidth(struct task_group *tg, + u64 period_us, u64 quota_us, u64 burst_us) +{ + struct scx_sched *sch = scx_root; + + percpu_down_read(&scx_cgroup_rwsem); + + if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_set_bandwidth) && + (tg->scx.bw_period_us != period_us || + tg->scx.bw_quota_us != quota_us || + tg->scx.bw_burst_us != burst_us)) + SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_set_bandwidth, NULL, + tg_cgrp(tg), period_us, quota_us, burst_us); + + tg->scx.bw_period_us = period_us; + tg->scx.bw_quota_us = quota_us; + tg->scx.bw_burst_us = burst_us; + + percpu_up_read(&scx_cgroup_rwsem); +} + static void scx_cgroup_lock(void) { percpu_down_write(&scx_cgroup_rwsem); @@ -4400,7 +4453,12 @@ static int scx_cgroup_init(struct scx_sched *sch) rcu_read_lock(); css_for_each_descendant_pre(css, &root_task_group.css) { struct task_group *tg = css_tg(css); - struct scx_cgroup_init_args args = { .weight = tg->scx.weight }; + struct scx_cgroup_init_args args = { + .weight = tg->scx.weight, + .bw_period_us = tg->scx.bw_period_us, + .bw_quota_us = tg->scx.bw_quota_us, + .bw_burst_us = tg->scx.bw_burst_us, + }; if ((tg->scx.flags & (SCX_TG_ONLINE | SCX_TG_INITED)) != SCX_TG_ONLINE) @@ -5902,6 +5960,7 @@ static s32 sched_ext_ops__cgroup_prep_move(struct task_struct *p, struct cgroup static void sched_ext_ops__cgroup_move(struct task_struct *p, struct cgroup *from, struct cgroup *to) {} static void sched_ext_ops__cgroup_cancel_move(struct task_struct *p, struct cgroup *from, struct cgroup *to) {} static void sched_ext_ops__cgroup_set_weight(struct cgroup *cgrp, u32 weight) {} +static void sched_ext_ops__cgroup_set_bandwidth(struct cgroup *cgrp, u64 period_us, u64 quota_us, u64 burst_us) {} #endif static void sched_ext_ops__cpu_online(s32 cpu) {} static void sched_ext_ops__cpu_offline(s32 cpu) {} @@ -5939,6 +5998,7 @@ static struct sched_ext_ops __bpf_ops_sched_ext_ops = { .cgroup_move = sched_ext_ops__cgroup_move, .cgroup_cancel_move = sched_ext_ops__cgroup_cancel_move, .cgroup_set_weight = sched_ext_ops__cgroup_set_weight, + .cgroup_set_bandwidth = sched_ext_ops__cgroup_set_bandwidth, #endif .cpu_online = sched_ext_ops__cpu_online, .cpu_offline = sched_ext_ops__cpu_offline, diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h index e7bcaa02ea56..292bb41a242e 100644 --- a/kernel/sched/ext.h +++ b/kernel/sched/ext.h @@ -104,6 +104,7 @@ void scx_cgroup_finish_attach(void); void scx_cgroup_cancel_attach(struct cgroup_taskset *tset); void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight); void scx_group_set_idle(struct task_group *tg, bool idle); +void scx_group_set_bandwidth(struct task_group *tg, u64 period_us, u64 quota_us, u64 burst_us); #else /* CONFIG_EXT_GROUP_SCHED */ static inline void scx_tg_init(struct task_group *tg) {} static inline int scx_tg_online(struct task_group *tg) { return 0; } @@ -114,5 +115,6 @@ static inline void scx_cgroup_finish_attach(void) {} static inline void scx_cgroup_cancel_attach(struct cgroup_taskset *tset) {} static inline void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight) {} static inline void scx_group_set_idle(struct task_group *tg, bool idle) {} +static inline void scx_group_set_bandwidth(struct task_group *tg, u64 period_us, u64 quota_us, u64 burst_us) {} #endif /* CONFIG_EXT_GROUP_SCHED */ #endif /* CONFIG_CGROUP_SCHED */ diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index fdf5f52b54a3..06767a210717 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -402,7 +402,7 @@ static inline bool dl_server_active(struct sched_dl_entity *dl_se) extern struct list_head task_groups; -#ifdef CONFIG_CFS_BANDWIDTH +#ifdef CONFIG_GROUP_SCHED_BANDWIDTH extern const u64 max_bw_quota_period_us; /* @@ -413,7 +413,7 @@ static inline u64 default_bw_period_us(void) { return 100000ULL; } -#endif /* CONFIG_CFS_BANDWIDTH */ +#endif /* CONFIG_GROUP_SCHED_BANDWIDTH */ struct cfs_bandwidth { #ifdef CONFIG_CFS_BANDWIDTH diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c index c3cd9a17d48e..69d877501cb7 100644 --- a/tools/sched_ext/scx_qmap.bpf.c +++ b/tools/sched_ext/scx_qmap.bpf.c @@ -615,6 +615,26 @@ void BPF_STRUCT_OPS(qmap_dump_task, struct scx_dump_ctx *dctx, struct task_struc taskc->force_local, taskc->core_sched_seq); } +s32 BPF_STRUCT_OPS(qmap_cgroup_init, struct cgroup *cgrp, struct scx_cgroup_init_args *args) +{ + bpf_printk("CGRP INIT %llu weight=%u period=%lu quota=%ld burst=%lu", + cgrp->kn->id, args->weight, args->bw_period_us, + args->bw_quota_us, args->bw_burst_us); + return 0; +} + +void BPF_STRUCT_OPS(qmap_cgroup_set_weight, struct cgroup *cgrp, u32 weight) +{ + bpf_printk("CGRP SET %llu weight=%u", cgrp->kn->id, weight); +} + +void BPF_STRUCT_OPS(qmap_cgroup_set_bandwidth, struct cgroup *cgrp, + u64 period_us, u64 quota_us, u64 burst_us) +{ + bpf_printk("CGRP SET %llu period=%lu quota=%ld burst=%lu", cgrp->kn->id, + period_us, quota_us, burst_us); +} + /* * Print out the online and possible CPU map using bpf_printk() as a * demonstration of using the cpumask kfuncs and ops.cpu_on/offline(). @@ -840,6 +860,9 @@ SCX_OPS_DEFINE(qmap_ops, .dump = (void *)qmap_dump, .dump_cpu = (void *)qmap_dump_cpu, .dump_task = (void *)qmap_dump_task, + .cgroup_init = (void *)qmap_cgroup_init, + .cgroup_set_weight = (void *)qmap_cgroup_set_weight, + .cgroup_set_bandwidth = (void *)qmap_cgroup_set_bandwidth, .cpu_online = (void *)qmap_cpu_online, .cpu_offline = (void *)qmap_cpu_offline, .init = (void *)qmap_init, diff --git a/tools/testing/selftests/sched_ext/maximal.bpf.c b/tools/testing/selftests/sched_ext/maximal.bpf.c index 430f5e13bf55..01cf4f3da4e0 100644 --- a/tools/testing/selftests/sched_ext/maximal.bpf.c +++ b/tools/testing/selftests/sched_ext/maximal.bpf.c @@ -123,6 +123,10 @@ void BPF_STRUCT_OPS(maximal_cgroup_cancel_move, struct task_struct *p, void BPF_STRUCT_OPS(maximal_cgroup_set_weight, struct cgroup *cgrp, u32 weight) {} +void BPF_STRUCT_OPS(maximal_cgroup_set_bandwidth, struct cgroup *cgrp, + u64 period_us, u64 quota_us, u64 burst_us) +{} + s32 BPF_STRUCT_OPS_SLEEPABLE(maximal_init) { return scx_bpf_create_dsq(DSQ_ID, -1); @@ -160,6 +164,7 @@ struct sched_ext_ops maximal_ops = { .cgroup_move = (void *) maximal_cgroup_move, .cgroup_cancel_move = (void *) maximal_cgroup_cancel_move, .cgroup_set_weight = (void *) maximal_cgroup_set_weight, + .cgroup_set_bandwidth = (void *) maximal_cgroup_set_bandwidth, .init = (void *) maximal_init, .exit = (void *) maximal_exit, .name = "maximal", -- cgit v1.2.3 From 14966a8df77e2dffdf765b56abe194aa6b0da325 Mon Sep 17 00:00:00 2001 From: Yuyang Huang Date: Thu, 19 Jun 2025 12:51:16 +0900 Subject: selftest: add selftest for anycast notifications MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds a new kernel selftest to verify RTNLGRP_IPV6_ACADDR notifications. The test works by adding/removing a dummy interface, enabling packet forwarding, and then confirming that user space can correctly receive anycast notifications. The test relies on the iproute2 version to be 6.13+. Tested by the following command: $ vng -v --user root --cpus 16 -- \ make -C tools/testing/selftests TARGETS=net TEST_PROGS=rtnetlink_notification.sh \ TEST_GEN_PROGS="" run_tests Cc: Maciej Żenczykowski Cc: Lorenzo Colitti Signed-off-by: Yuyang Huang Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- .../selftests/net/rtnetlink_notification.sh | 44 +++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/rtnetlink_notification.sh b/tools/testing/selftests/net/rtnetlink_notification.sh index 39c1b815bbe4..3f9780232bd6 100755 --- a/tools/testing/selftests/net/rtnetlink_notification.sh +++ b/tools/testing/selftests/net/rtnetlink_notification.sh @@ -8,9 +8,11 @@ ALL_TESTS=" kci_test_mcast_addr_notification + kci_test_anycast_addr_notification " source lib.sh +test_dev="test-dummy1" kci_test_mcast_addr_notification() { @@ -18,7 +20,6 @@ kci_test_mcast_addr_notification() local tmpfile local monitor_pid local match_result - local test_dev="test-dummy1" tmpfile=$(mktemp) defer rm "$tmpfile" @@ -56,6 +57,47 @@ kci_test_mcast_addr_notification() return $RET } +kci_test_anycast_addr_notification() +{ + RET=0 + local tmpfile + local monitor_pid + local match_result + + tmpfile=$(mktemp) + defer rm "$tmpfile" + + ip monitor acaddress > "$tmpfile" & + monitor_pid=$! + defer kill_process "$monitor_pid" + sleep 1 + + if [ ! -e "/proc/$monitor_pid" ]; then + RET=$ksft_skip + log_test "anycast addr notification: iproute2 too old" + return "$RET" + fi + + ip link add name "$test_dev" type dummy + check_err $? "failed to add dummy interface" + ip link set "$test_dev" up + check_err $? "failed to set dummy interface up" + sysctl -qw net.ipv6.conf."$test_dev".forwarding=1 + ip link del dev "$test_dev" + check_err $? "Failed to delete dummy interface" + sleep 1 + + # There should be 2 line matches as follows. + # 9: dummy2 inet6 any fe80:: scope global + # Deleted 9: dummy2 inet6 any fe80:: scope global + match_result=$(grep -cE "$test_dev.*(fe80::)" "$tmpfile") + if [ "$match_result" -ne 2 ]; then + RET=$ksft_fail + fi + log_test "anycast addr notification: Expected 2 matches, got $match_result" + return "$RET" +} + #check for needed privileges if [ "$(id -u)" -ne 0 ];then RET=$ksft_skip -- cgit v1.2.3 From 49fba3725910c54878212ca08b968b9e1285866c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 18 Jun 2025 22:53:47 +0200 Subject: selftests/pidfd: test extended attribute support Add tests for extended attribute support on pidfds. Link: https://lore.kernel.org/20250618-work-pidfs-persistent-v2-13-98f3456fd552@kernel.org Reviewed-by: Alexander Mikhalitsyn Signed-off-by: Christian Brauner --- tools/testing/selftests/pidfd/.gitignore | 1 + tools/testing/selftests/pidfd/Makefile | 3 +- tools/testing/selftests/pidfd/pidfd_xattr_test.c | 97 ++++++++++++++++++++++++ 3 files changed, 100 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/pidfd/pidfd_xattr_test.c (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore index 0406a065deb4..bc4130506eda 100644 --- a/tools/testing/selftests/pidfd/.gitignore +++ b/tools/testing/selftests/pidfd/.gitignore @@ -10,3 +10,4 @@ pidfd_file_handle_test pidfd_bind_mount pidfd_info_test pidfd_exec_helper +pidfd_xattr_test diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile index fcbefc0d77f6..c9fd5023ef15 100644 --- a/tools/testing/selftests/pidfd/Makefile +++ b/tools/testing/selftests/pidfd/Makefile @@ -3,7 +3,8 @@ CFLAGS += -g $(KHDR_INCLUDES) -pthread -Wall TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \ pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test \ - pidfd_file_handle_test pidfd_bind_mount pidfd_info_test + pidfd_file_handle_test pidfd_bind_mount pidfd_info_test \ + pidfd_xattr_test TEST_GEN_PROGS_EXTENDED := pidfd_exec_helper diff --git a/tools/testing/selftests/pidfd/pidfd_xattr_test.c b/tools/testing/selftests/pidfd/pidfd_xattr_test.c new file mode 100644 index 000000000000..00d400ac515b --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_xattr_test.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pidfd.h" +#include "../kselftest_harness.h" + +FIXTURE(pidfs_xattr) +{ + pid_t child_pid; + int child_pidfd; +}; + +FIXTURE_SETUP(pidfs_xattr) +{ + self->child_pid = create_child(&self->child_pidfd, CLONE_NEWUSER | CLONE_NEWPID); + EXPECT_GE(self->child_pid, 0); + + if (self->child_pid == 0) + _exit(EXIT_SUCCESS); +} + +FIXTURE_TEARDOWN(pidfs_xattr) +{ + sys_waitid(P_PID, self->child_pid, NULL, WEXITED); +} + +TEST_F(pidfs_xattr, set_get_list_xattr_multiple) +{ + int ret, i; + char xattr_name[32]; + char xattr_value[32]; + char buf[32]; + const int num_xattrs = 10; + char list[PATH_MAX] = {}; + + for (i = 0; i < num_xattrs; i++) { + snprintf(xattr_name, sizeof(xattr_name), "trusted.testattr%d", i); + snprintf(xattr_value, sizeof(xattr_value), "testvalue%d", i); + ret = fsetxattr(self->child_pidfd, xattr_name, xattr_value, strlen(xattr_value), 0); + ASSERT_EQ(ret, 0); + } + + for (i = 0; i < num_xattrs; i++) { + snprintf(xattr_name, sizeof(xattr_name), "trusted.testattr%d", i); + snprintf(xattr_value, sizeof(xattr_value), "testvalue%d", i); + memset(buf, 0, sizeof(buf)); + ret = fgetxattr(self->child_pidfd, xattr_name, buf, sizeof(buf)); + ASSERT_EQ(ret, strlen(xattr_value)); + ASSERT_EQ(strcmp(buf, xattr_value), 0); + } + + ret = flistxattr(self->child_pidfd, list, sizeof(list)); + ASSERT_GT(ret, 0); + for (i = 0; i < num_xattrs; i++) { + snprintf(xattr_name, sizeof(xattr_name), "trusted.testattr%d", i); + bool found = false; + for (char *it = list; it < list + ret; it += strlen(it) + 1) { + if (strcmp(it, xattr_name)) + continue; + found = true; + break; + } + ASSERT_TRUE(found); + } + + for (i = 0; i < num_xattrs; i++) { + snprintf(xattr_name, sizeof(xattr_name), "trusted.testattr%d", i); + ret = fremovexattr(self->child_pidfd, xattr_name); + ASSERT_EQ(ret, 0); + + ret = fgetxattr(self->child_pidfd, xattr_name, buf, sizeof(buf)); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENODATA); + } +} + +TEST_HARNESS_MAIN -- cgit v1.2.3 From 7442d093dfae0c9482eeeb8bebfd682459244be0 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 18 Jun 2025 22:53:48 +0200 Subject: selftests/pidfd: test extended attribute support Test that extended attributes are permanent. Link: https://lore.kernel.org/20250618-work-pidfs-persistent-v2-14-98f3456fd552@kernel.org Reviewed-by: Alexander Mikhalitsyn Signed-off-by: Christian Brauner --- tools/testing/selftests/pidfd/pidfd_xattr_test.c | 35 ++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/pidfd/pidfd_xattr_test.c b/tools/testing/selftests/pidfd/pidfd_xattr_test.c index 00d400ac515b..5cf7bb0e4bf2 100644 --- a/tools/testing/selftests/pidfd/pidfd_xattr_test.c +++ b/tools/testing/selftests/pidfd/pidfd_xattr_test.c @@ -94,4 +94,39 @@ TEST_F(pidfs_xattr, set_get_list_xattr_multiple) } } +TEST_F(pidfs_xattr, set_get_list_xattr_persistent) +{ + int ret; + char buf[32]; + char list[PATH_MAX] = {}; + + ret = fsetxattr(self->child_pidfd, "trusted.persistent", "persistent value", strlen("persistent value"), 0); + ASSERT_EQ(ret, 0); + + memset(buf, 0, sizeof(buf)); + ret = fgetxattr(self->child_pidfd, "trusted.persistent", buf, sizeof(buf)); + ASSERT_EQ(ret, strlen("persistent value")); + ASSERT_EQ(strcmp(buf, "persistent value"), 0); + + ret = flistxattr(self->child_pidfd, list, sizeof(list)); + ASSERT_GT(ret, 0); + ASSERT_EQ(strcmp(list, "trusted.persistent"), 0) + + ASSERT_EQ(close(self->child_pidfd), 0); + self->child_pidfd = -EBADF; + sleep(2); + + self->child_pidfd = sys_pidfd_open(self->child_pid, 0); + ASSERT_GE(self->child_pidfd, 0); + + memset(buf, 0, sizeof(buf)); + ret = fgetxattr(self->child_pidfd, "trusted.persistent", buf, sizeof(buf)); + ASSERT_EQ(ret, strlen("persistent value")); + ASSERT_EQ(strcmp(buf, "persistent value"), 0); + + ret = flistxattr(self->child_pidfd, list, sizeof(list)); + ASSERT_GT(ret, 0); + ASSERT_EQ(strcmp(list, "trusted.persistent"), 0); +} + TEST_HARNESS_MAIN -- cgit v1.2.3 From 8c2ab04135682d0f5b1eb1c74ac5f328b65015ea Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 18 Jun 2025 22:53:49 +0200 Subject: selftests/pidfd: test setattr support Verify that ->setattr() on a pidfd doens't work. Link: https://lore.kernel.org/20250618-work-pidfs-persistent-v2-15-98f3456fd552@kernel.org Reviewed-by: Alexander Mikhalitsyn Signed-off-by: Christian Brauner --- tools/testing/selftests/pidfd/.gitignore | 1 + tools/testing/selftests/pidfd/Makefile | 2 +- tools/testing/selftests/pidfd/pidfd_setattr_test.c | 69 ++++++++++++++++++++++ 3 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/pidfd/pidfd_setattr_test.c (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore index bc4130506eda..144e7ff65d6a 100644 --- a/tools/testing/selftests/pidfd/.gitignore +++ b/tools/testing/selftests/pidfd/.gitignore @@ -11,3 +11,4 @@ pidfd_bind_mount pidfd_info_test pidfd_exec_helper pidfd_xattr_test +pidfd_setattr_test diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile index c9fd5023ef15..03a6eede9c9e 100644 --- a/tools/testing/selftests/pidfd/Makefile +++ b/tools/testing/selftests/pidfd/Makefile @@ -4,7 +4,7 @@ CFLAGS += -g $(KHDR_INCLUDES) -pthread -Wall TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \ pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test \ pidfd_file_handle_test pidfd_bind_mount pidfd_info_test \ - pidfd_xattr_test + pidfd_xattr_test pidfd_setattr_test TEST_GEN_PROGS_EXTENDED := pidfd_exec_helper diff --git a/tools/testing/selftests/pidfd/pidfd_setattr_test.c b/tools/testing/selftests/pidfd/pidfd_setattr_test.c new file mode 100644 index 000000000000..d7de05edc4b3 --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_setattr_test.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pidfd.h" +#include "../kselftest_harness.h" + +FIXTURE(pidfs_setattr) +{ + pid_t child_pid; + int child_pidfd; +}; + +FIXTURE_SETUP(pidfs_setattr) +{ + self->child_pid = create_child(&self->child_pidfd, CLONE_NEWUSER | CLONE_NEWPID); + EXPECT_GE(self->child_pid, 0); + + if (self->child_pid == 0) + _exit(EXIT_SUCCESS); +} + +FIXTURE_TEARDOWN(pidfs_setattr) +{ + sys_waitid(P_PID, self->child_pid, NULL, WEXITED); + EXPECT_EQ(close(self->child_pidfd), 0); +} + +TEST_F(pidfs_setattr, no_chown) +{ + ASSERT_LT(fchown(self->child_pidfd, 1234, 5678), 0); + ASSERT_EQ(errno, EOPNOTSUPP); +} + +TEST_F(pidfs_setattr, no_chmod) +{ + ASSERT_LT(fchmod(self->child_pidfd, 0777), 0); + ASSERT_EQ(errno, EOPNOTSUPP); +} + +TEST_F(pidfs_setattr, no_exec) +{ + char *const argv[] = { NULL }; + char *const envp[] = { NULL }; + + ASSERT_LT(execveat(self->child_pidfd, "", argv, envp, AT_EMPTY_PATH), 0); + ASSERT_EQ(errno, EACCES); +} + +TEST_HARNESS_MAIN -- cgit v1.2.3 From 6a68d28066b6257b8d09b1daa91db43d56dbb6ad Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Fri, 20 Jun 2025 13:02:52 +0200 Subject: selftests/coredump: Fix "socket_detect_userspace_client" test failure The coredump.socket_detect_userspace_client test occasionally fails: # RUN coredump.socket_detect_userspace_client ... # stackdump_test.c:500:socket_detect_userspace_client:Expected 0 (0) != WIFEXITED(status) (0) # socket_detect_userspace_client: Test terminated by assertion # FAIL coredump.socket_detect_userspace_client not ok 3 coredump.socket_detect_userspace_client because there is no guarantee that client's write() happens before server's close(). The client gets terminated SIGPIPE, and thus the test fails. Add a read() to server to make sure server's close() doesn't happen before client's write(). Fixes: 7b6724fe9a6b ("selftests/coredump: add tests for AF_UNIX coredumps") Signed-off-by: Nam Cao Link: https://lore.kernel.org/20250620110252.1640391-1-namcao@linutronix.de Signed-off-by: Christian Brauner --- tools/testing/selftests/coredump/stackdump_test.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c index 9984413be9f0..68f8e479ac36 100644 --- a/tools/testing/selftests/coredump/stackdump_test.c +++ b/tools/testing/selftests/coredump/stackdump_test.c @@ -461,10 +461,15 @@ TEST_F(coredump, socket_detect_userspace_client) _exit(EXIT_FAILURE); } + ret = read(fd_coredump, &c, 1); + close(fd_coredump); close(fd_server); close(fd_peer_pidfd); close(fd_core_file); + + if (ret < 1) + _exit(EXIT_FAILURE); _exit(EXIT_SUCCESS); } self->pid_coredump_server = pid_coredump_server; -- cgit v1.2.3 From f4fba2d6d2822efd2733949c0831435dcd96cbd3 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Sun, 22 Jun 2025 23:38:54 -0700 Subject: selftests/bpf: Add tests for bpf_cgroup_read_xattr Add tests for different scenarios with bpf_cgroup_read_xattr: 1. Read cgroup xattr from bpf_cgroup_from_id; 2. Read cgroup xattr from bpf_cgroup_ancestor; 3. Read cgroup xattr from css_iter; 4. Use bpf_cgroup_read_xattr in LSM hook security_socket_connect. 5. Use bpf_cgroup_read_xattr in cgroup program. Signed-off-by: Song Liu Link: https://lore.kernel.org/20250623063854.1896364-5-song@kernel.org Signed-off-by: Christian Brauner --- tools/testing/selftests/bpf/bpf_experimental.h | 3 + .../selftests/bpf/prog_tests/cgroup_xattr.c | 145 +++++++++++++++++++ .../selftests/bpf/progs/cgroup_read_xattr.c | 158 +++++++++++++++++++++ .../selftests/bpf/progs/read_cgroupfs_xattr.c | 60 ++++++++ 4 files changed, 366 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c create mode 100644 tools/testing/selftests/bpf/progs/cgroup_read_xattr.c create mode 100644 tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index 5e512a1d09d1..da7e230f2781 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -596,4 +596,7 @@ extern int bpf_iter_dmabuf_new(struct bpf_iter_dmabuf *it) __weak __ksym; extern struct dma_buf *bpf_iter_dmabuf_next(struct bpf_iter_dmabuf *it) __weak __ksym; extern void bpf_iter_dmabuf_destroy(struct bpf_iter_dmabuf *it) __weak __ksym; +extern int bpf_cgroup_read_xattr(struct cgroup *cgroup, const char *name__str, + struct bpf_dynptr *value_p) __weak __ksym; + #endif diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c new file mode 100644 index 000000000000..87978a0f7eb7 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "read_cgroupfs_xattr.skel.h" +#include "cgroup_read_xattr.skel.h" + +#define CGROUP_FS_ROOT "/sys/fs/cgroup/" +#define CGROUP_FS_PARENT CGROUP_FS_ROOT "foo/" +#define CGROUP_FS_CHILD CGROUP_FS_PARENT "bar/" + +static int move_pid_to_cgroup(const char *cgroup_folder, pid_t pid) +{ + char filename[128]; + char pid_str[64]; + int procs_fd; + int ret; + + snprintf(filename, sizeof(filename), "%scgroup.procs", cgroup_folder); + snprintf(pid_str, sizeof(pid_str), "%d", pid); + + procs_fd = open(filename, O_WRONLY | O_APPEND); + if (!ASSERT_OK_FD(procs_fd, "open")) + return -1; + + ret = write(procs_fd, pid_str, strlen(pid_str)); + close(procs_fd); + if (!ASSERT_GT(ret, 0, "write cgroup.procs")) + return -1; + return 0; +} + +static void reset_cgroups_and_lo(void) +{ + rmdir(CGROUP_FS_CHILD); + rmdir(CGROUP_FS_PARENT); + system("ip addr del 1.1.1.1/32 dev lo"); + system("ip link set dev lo down"); +} + +static const char xattr_value_a[] = "bpf_selftest_value_a"; +static const char xattr_value_b[] = "bpf_selftest_value_b"; +static const char xattr_name[] = "user.bpf_test"; + +static int setup_cgroups_and_lo(void) +{ + int err; + + err = mkdir(CGROUP_FS_PARENT, 0755); + if (!ASSERT_OK(err, "mkdir 1")) + goto error; + err = mkdir(CGROUP_FS_CHILD, 0755); + if (!ASSERT_OK(err, "mkdir 2")) + goto error; + + err = setxattr(CGROUP_FS_PARENT, xattr_name, xattr_value_a, + strlen(xattr_value_a) + 1, 0); + if (!ASSERT_OK(err, "setxattr 1")) + goto error; + + err = setxattr(CGROUP_FS_CHILD, xattr_name, xattr_value_b, + strlen(xattr_value_b) + 1, 0); + if (!ASSERT_OK(err, "setxattr 2")) + goto error; + + err = system("ip link set dev lo up"); + if (!ASSERT_OK(err, "lo up")) + goto error; + + err = system("ip addr add 1.1.1.1 dev lo"); + if (!ASSERT_OK(err, "lo addr v4")) + goto error; + + err = write_sysctl("/proc/sys/net/ipv4/ping_group_range", "0 0"); + if (!ASSERT_OK(err, "write_sysctl")) + goto error; + + return 0; +error: + reset_cgroups_and_lo(); + return err; +} + +static void test_read_cgroup_xattr(void) +{ + struct sockaddr_in sa4 = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_LOOPBACK), + }; + struct read_cgroupfs_xattr *skel = NULL; + pid_t pid = gettid(); + int sock_fd = -1; + int connect_fd = -1; + + if (!ASSERT_OK(setup_cgroups_and_lo(), "setup_cgroups_and_lo")) + return; + if (!ASSERT_OK(move_pid_to_cgroup(CGROUP_FS_CHILD, pid), + "move_pid_to_cgroup")) + goto out; + + skel = read_cgroupfs_xattr__open_and_load(); + if (!ASSERT_OK_PTR(skel, "read_cgroupfs_xattr__open_and_load")) + goto out; + + skel->bss->target_pid = pid; + + if (!ASSERT_OK(read_cgroupfs_xattr__attach(skel), "read_cgroupfs_xattr__attach")) + goto out; + + sock_fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP); + if (!ASSERT_OK_FD(sock_fd, "sock create")) + goto out; + + connect_fd = connect(sock_fd, &sa4, sizeof(sa4)); + if (!ASSERT_OK_FD(connect_fd, "connect 1")) + goto out; + close(connect_fd); + + ASSERT_TRUE(skel->bss->found_value_a, "found_value_a"); + ASSERT_TRUE(skel->bss->found_value_b, "found_value_b"); + +out: + close(connect_fd); + close(sock_fd); + read_cgroupfs_xattr__destroy(skel); + move_pid_to_cgroup(CGROUP_FS_ROOT, pid); + reset_cgroups_and_lo(); +} + +void test_cgroup_xattr(void) +{ + RUN_TESTS(cgroup_read_xattr); + + if (test__start_subtest("read_cgroupfs_xattr")) + test_read_cgroup_xattr(); +} diff --git a/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c b/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c new file mode 100644 index 000000000000..092db1d0435e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include +#include "bpf_experimental.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +char value[16]; + +static __always_inline void read_xattr(struct cgroup *cgroup) +{ + struct bpf_dynptr value_ptr; + + bpf_dynptr_from_mem(value, sizeof(value), 0, &value_ptr); + bpf_cgroup_read_xattr(cgroup, "user.bpf_test", + &value_ptr); +} + +SEC("lsm.s/socket_connect") +__success +int BPF_PROG(trusted_cgroup_ptr_sleepable) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup *cgrp; + + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) + return 0; + + read_xattr(cgrp); + bpf_cgroup_release(cgrp); + return 0; +} + +SEC("lsm/socket_connect") +__success +int BPF_PROG(trusted_cgroup_ptr_non_sleepable) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup *cgrp; + + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) + return 0; + + read_xattr(cgrp); + bpf_cgroup_release(cgrp); + return 0; +} + +SEC("lsm/socket_connect") +__success +int BPF_PROG(use_css_iter_non_sleepable) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup_subsys_state *css; + struct cgroup *cgrp; + + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) + return 0; + + bpf_for_each(css, css, &cgrp->self, BPF_CGROUP_ITER_ANCESTORS_UP) + read_xattr(css->cgroup); + + bpf_cgroup_release(cgrp); + return 0; +} + +SEC("lsm.s/socket_connect") +__failure __msg("expected an RCU CS") +int BPF_PROG(use_css_iter_sleepable_missing_rcu_lock) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup_subsys_state *css; + struct cgroup *cgrp; + + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) + return 0; + + bpf_for_each(css, css, &cgrp->self, BPF_CGROUP_ITER_ANCESTORS_UP) + read_xattr(css->cgroup); + + bpf_cgroup_release(cgrp); + return 0; +} + +SEC("lsm.s/socket_connect") +__success +int BPF_PROG(use_css_iter_sleepable_with_rcu_lock) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup_subsys_state *css; + struct cgroup *cgrp; + + bpf_rcu_read_lock(); + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) + goto out; + + bpf_for_each(css, css, &cgrp->self, BPF_CGROUP_ITER_ANCESTORS_UP) + read_xattr(css->cgroup); + + bpf_cgroup_release(cgrp); +out: + bpf_rcu_read_unlock(); + return 0; +} + +SEC("lsm/socket_connect") +__success +int BPF_PROG(use_bpf_cgroup_ancestor) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup *cgrp, *ancestor; + + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) + return 0; + + ancestor = bpf_cgroup_ancestor(cgrp, 1); + if (!ancestor) + goto out; + + read_xattr(cgrp); + bpf_cgroup_release(ancestor); +out: + bpf_cgroup_release(cgrp); + return 0; +} + +SEC("cgroup/sendmsg4") +__success +int BPF_PROG(cgroup_skb) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup *cgrp, *ancestor; + + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) + return 0; + + ancestor = bpf_cgroup_ancestor(cgrp, 1); + if (!ancestor) + goto out; + + read_xattr(cgrp); + bpf_cgroup_release(ancestor); +out: + bpf_cgroup_release(cgrp); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c b/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c new file mode 100644 index 000000000000..855f85fc5522 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include +#include "bpf_experimental.h" + +char _license[] SEC("license") = "GPL"; + +pid_t target_pid = 0; + +char xattr_value[64]; +static const char expected_value_a[] = "bpf_selftest_value_a"; +static const char expected_value_b[] = "bpf_selftest_value_b"; +bool found_value_a; +bool found_value_b; + +SEC("lsm.s/socket_connect") +int BPF_PROG(test_socket_connect) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup_subsys_state *css, *tmp; + struct bpf_dynptr value_ptr; + struct cgroup *cgrp; + + if ((bpf_get_current_pid_tgid() >> 32) != target_pid) + return 0; + + bpf_rcu_read_lock(); + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) { + bpf_rcu_read_unlock(); + return 0; + } + + css = &cgrp->self; + bpf_dynptr_from_mem(xattr_value, sizeof(xattr_value), 0, &value_ptr); + bpf_for_each(css, tmp, css, BPF_CGROUP_ITER_ANCESTORS_UP) { + int ret; + + ret = bpf_cgroup_read_xattr(tmp->cgroup, "user.bpf_test", + &value_ptr); + if (ret < 0) + continue; + + if (ret == sizeof(expected_value_a) && + !bpf_strncmp(xattr_value, sizeof(expected_value_a), expected_value_a)) + found_value_a = true; + if (ret == sizeof(expected_value_b) && + !bpf_strncmp(xattr_value, sizeof(expected_value_b), expected_value_b)) + found_value_b = true; + } + + bpf_rcu_read_unlock(); + bpf_cgroup_release(cgrp); + + return 0; +} -- cgit v1.2.3 From 27390db9592d828b6d3c2764305a5037a9aa969d Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Thu, 19 Jun 2025 15:53:42 +0200 Subject: testptp: add option to enable external timestamping edges Some drivers (e.g. ice) don't enable any edges by default when external timestamping is requested by the PTP_EXTTS_REQUEST ioctl, which makes testptp -e unusable for testing hardware supported by these drivers. Add -E option to specify if the rising, falling, or both edges should be enabled by the ioctl. Signed-off-by: Miroslav Lichvar Cc: Richard Cochran Cc: Jacob Keller Reviewed-by: Vadim Fedorenko Signed-off-by: David S. Miller --- tools/testing/selftests/ptp/testptp.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c index edc08a4433fd..ed1e2886ba3c 100644 --- a/tools/testing/selftests/ptp/testptp.c +++ b/tools/testing/selftests/ptp/testptp.c @@ -120,6 +120,7 @@ static void usage(char *progname) " -c query the ptp clock's capabilities\n" " -d name device to open\n" " -e val read 'val' external time stamp events\n" + " -E val enable rising (1), falling (2), or both (3) edges\n" " -f val adjust the ptp clock frequency by 'val' ppb\n" " -F chan Enable single channel mask and keep device open for debugfs verification.\n" " -g get the ptp clock time\n" @@ -178,6 +179,7 @@ int main(int argc, char *argv[]) int adjphase = 0; int capabilities = 0; int extts = 0; + int edge = 0; int flagtest = 0; int gettime = 0; int index = 0; @@ -202,7 +204,7 @@ int main(int argc, char *argv[]) progname = strrchr(argv[0], '/'); progname = progname ? 1+progname : argv[0]; - while (EOF != (c = getopt(argc, argv, "cd:e:f:F:ghH:i:k:lL:n:o:p:P:rsSt:T:w:x:Xy:z"))) { + while (EOF != (c = getopt(argc, argv, "cd:e:E:f:F:ghH:i:k:lL:n:o:p:P:rsSt:T:w:x:Xy:z"))) { switch (c) { case 'c': capabilities = 1; @@ -213,6 +215,11 @@ int main(int argc, char *argv[]) case 'e': extts = atoi(optarg); break; + case 'E': + edge = atoi(optarg); + edge = (edge & 1 ? PTP_RISING_EDGE : 0) | + (edge & 2 ? PTP_FALLING_EDGE : 0); + break; case 'f': adjfreq = atoi(optarg); break; @@ -444,7 +451,7 @@ int main(int argc, char *argv[]) if (!readonly) { memset(&extts_request, 0, sizeof(extts_request)); extts_request.index = index; - extts_request.flags = PTP_ENABLE_FEATURE; + extts_request.flags = PTP_ENABLE_FEATURE | edge; if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) { perror("PTP_EXTTS_REQUEST"); extts = 0; -- cgit v1.2.3 From 3ce7cdde66e65a400b2d1b2a7f72c499e1db26b6 Mon Sep 17 00:00:00 2001 From: Luis Gerhorst Date: Thu, 19 Jun 2025 16:08:53 +0200 Subject: selftests/bpf: Support ppc64el in vmtest With a rootfs built using libbpf's BPF CI [1], we can run specific tests as follows: $ ../libbpf-ci/rootfs/mkrootfs_debian.sh --arch ppc64el --distro noble $ PLATFORM=ppc64el CROSS_COMPILE=powerpc64le-linux-gnu- \ tools/testing/selftests/bpf/vmtest.sh \ -l libbpf-vmtest-rootfs-*-noble-ppc64el.tar.zst \ -- ./test_progs -t verifier_array_access Does not include a DENYLIST or support for KVM for now. [1] https://github.com/libbpf/ci Signed-off-by: Luis Gerhorst Link: https://lore.kernel.org/r/20250619140854.2135283-1-luis.gerhorst@fau.de Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/config.ppc64el | 93 ++++++++++++++++++++++++++++++ tools/testing/selftests/bpf/vmtest.sh | 9 +++ 2 files changed, 102 insertions(+) create mode 100644 tools/testing/selftests/bpf/config.ppc64el (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/config.ppc64el b/tools/testing/selftests/bpf/config.ppc64el new file mode 100644 index 000000000000..9acf389dc4ce --- /dev/null +++ b/tools/testing/selftests/bpf/config.ppc64el @@ -0,0 +1,93 @@ +CONFIG_ALTIVEC=y +CONFIG_AUDIT=y +CONFIG_BLK_CGROUP=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BONDING=y +CONFIG_BPF_JIT_ALWAYS_ON=y +CONFIG_BPF_PRELOAD_UMD=y +CONFIG_BPF_PRELOAD=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CGROUP_NET_CLASSID=y +CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUPS=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=hvc0 wg.success=hvc1 panic_on_warn=1" +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_CPUSETS=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_DEBUG_FS=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_DEVTMPFS_MOUNT=y +CONFIG_DEVTMPFS=y +CONFIG_EXPERT=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_FRAME_POINTER=y +CONFIG_FRAME_WARN=1280 +CONFIG_HARDLOCKUP_DETECTOR=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_HUGETLBFS=y +CONFIG_HVC_CONSOLE=y +CONFIG_INET=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IPV6_SEG6_LWTUNNEL=y +CONFIG_JUMP_LABEL=y +CONFIG_KALLSYMS_ALL=y +CONFIG_KPROBES=y +CONFIG_MEMCG=y +CONFIG_NAMESPACES=y +CONFIG_NET_ACT_BPF=y +CONFIG_NETDEVICES=y +CONFIG_NETFILTER_XT_MATCH_BPF=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_VRF=y +CONFIG_NET=y +CONFIG_NO_HZ_IDLE=y +CONFIG_NONPORTABLE=y +CONFIG_NR_CPUS=256 +CONFIG_PACKET=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_PCI_HOST_GENERIC=y +CONFIG_PCI=y +CONFIG_POSIX_MQUEUE=y +CONFIG_PPC64=y +CONFIG_PPC_OF_BOOT_TRAMPOLINE=y +CONFIG_PPC_PSERIES=y +CONFIG_PPC_RADIX_MMU=y +CONFIG_PRINTK_TIME=y +CONFIG_PROC_KCORE=y +CONFIG_PROFILING=y +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_RT_GROUP_SCHED=y +CONFIG_SECTION_MISMATCH_WARN_ONLY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_OF_PLATFORM=y +CONFIG_SMP=y +CONFIG_SOC_VIRT=y +CONFIG_SYSVIPC=y +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_THREAD_SHIFT=14 +CONFIG_TLS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_TMPFS=y +CONFIG_TUN=y +CONFIG_UNIX=y +CONFIG_UPROBES=y +CONFIG_USER_NS=y +CONFIG_VETH=y +CONFIG_VLAN_8021Q=y +CONFIG_VSOCKETS_LOOPBACK=y +CONFIG_VSX=y +CONFIG_XFRM_USER=y diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh index 79505d294c44..2f869daf8a06 100755 --- a/tools/testing/selftests/bpf/vmtest.sh +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -43,6 +43,15 @@ riscv64) BZIMAGE="arch/riscv/boot/Image" ARCH="riscv" ;; +ppc64el) + QEMU_BINARY=qemu-system-ppc64 + QEMU_CONSOLE="hvc0" + # KVM could not be tested for powerpc, therefore not enabled for now. + HOST_FLAGS=(-machine pseries -cpu POWER9) + CROSS_FLAGS=(-machine pseries -cpu POWER9) + BZIMAGE="vmlinux" + ARCH="powerpc" + ;; *) echo "Unsupported architecture" exit 1 -- cgit v1.2.3 From 033b76bc7f066b3e2e024e5ced0de0768d7d57b5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 22 May 2025 16:52:21 -0700 Subject: KVM: selftests: Assert that eventfd() succeeds in Xen shinfo test Assert that eventfd() succeeds in the Xen shinfo test instead of skipping the associated testcase. While eventfd() is outside the scope of KVM, KVM unconditionally selects EVENTFD, i.e. the syscall should always succeed. Tested-by: K Prateek Nayak Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250522235223.3178519-12-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86/xen_shinfo_test.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c index 287829f850f7..34d180cf4eed 100644 --- a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c @@ -548,14 +548,11 @@ int main(int argc, char *argv[]) if (do_eventfd_tests) { irq_fd[0] = eventfd(0, 0); - irq_fd[1] = eventfd(0, 0); + TEST_ASSERT(irq_fd[0] >= 0, __KVM_SYSCALL_ERROR("eventfd()", irq_fd[0])); - /* Unexpected, but not a KVM failure */ - if (irq_fd[0] == -1 || irq_fd[1] == -1) - do_evtchn_tests = do_eventfd_tests = false; - } + irq_fd[1] = eventfd(0, 0); + TEST_ASSERT(irq_fd[1] >= 0, __KVM_SYSCALL_ERROR("eventfd()", irq_fd[1])); - if (do_eventfd_tests) { irq_routes.info.nr = 2; irq_routes.entries[0].gsi = 32; -- cgit v1.2.3 From 74e5e3fb0dd71790a1974d63420d43bc5743a56b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 22 May 2025 16:52:22 -0700 Subject: KVM: selftests: Add utilities to create eventfds and do KVM_IRQFD Add helpers to create eventfds and to (de)assign eventfds via KVM_IRQFD. Tested-by: K Prateek Nayak Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250522235223.3178519-13-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/arm64/vgic_irq.c | 12 ++----- tools/testing/selftests/kvm/include/kvm_util.h | 40 +++++++++++++++++++++++ tools/testing/selftests/kvm/x86/xen_shinfo_test.c | 18 +++------- 3 files changed, 47 insertions(+), 23 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/arm64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c index f4ac28d53747..a09dd423c2d7 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_irq.c +++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c @@ -620,18 +620,12 @@ static void kvm_routing_and_irqfd_check(struct kvm_vm *vm, * that no actual interrupt was injected for those cases. */ - for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { - fd[f] = eventfd(0, 0); - TEST_ASSERT(fd[f] != -1, __KVM_SYSCALL_ERROR("eventfd()", fd[f])); - } + for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) + fd[f] = kvm_new_eventfd(); for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { - struct kvm_irqfd irqfd = { - .fd = fd[f], - .gsi = i - MIN_SPI, - }; assert(i <= (uint64_t)UINT_MAX); - vm_ioctl(vm, KVM_IRQFD, &irqfd); + kvm_assign_irqfd(vm, i - MIN_SPI, fd[f]); } for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index bee65ca08721..1ddf85eebd1d 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -18,6 +18,7 @@ #include #include +#include #include #include "kvm_util_arch.h" @@ -502,6 +503,45 @@ static inline int vm_get_stats_fd(struct kvm_vm *vm) return fd; } +static inline int __kvm_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd, + uint32_t flags) +{ + struct kvm_irqfd irqfd = { + .fd = eventfd, + .gsi = gsi, + .flags = flags, + .resamplefd = -1, + }; + + return __vm_ioctl(vm, KVM_IRQFD, &irqfd); +} + +static inline void kvm_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd, + uint32_t flags) +{ + int ret = __kvm_irqfd(vm, gsi, eventfd, flags); + + TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_IRQFD, ret, vm); +} + +static inline void kvm_assign_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd) +{ + kvm_irqfd(vm, gsi, eventfd, 0); +} + +static inline void kvm_deassign_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd) +{ + kvm_irqfd(vm, gsi, eventfd, KVM_IRQFD_FLAG_DEASSIGN); +} + +static inline int kvm_new_eventfd(void) +{ + int fd = eventfd(0, 0); + + TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("eventfd()", fd)); + return fd; +} + static inline void read_stats_header(int stats_fd, struct kvm_stats_header *header) { ssize_t ret; diff --git a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c index 34d180cf4eed..23909b501ac2 100644 --- a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c @@ -547,11 +547,8 @@ int main(int argc, char *argv[]) int irq_fd[2] = { -1, -1 }; if (do_eventfd_tests) { - irq_fd[0] = eventfd(0, 0); - TEST_ASSERT(irq_fd[0] >= 0, __KVM_SYSCALL_ERROR("eventfd()", irq_fd[0])); - - irq_fd[1] = eventfd(0, 0); - TEST_ASSERT(irq_fd[1] >= 0, __KVM_SYSCALL_ERROR("eventfd()", irq_fd[1])); + irq_fd[0] = kvm_new_eventfd(); + irq_fd[1] = kvm_new_eventfd(); irq_routes.info.nr = 2; @@ -569,15 +566,8 @@ int main(int argc, char *argv[]) vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info); - struct kvm_irqfd ifd = { }; - - ifd.fd = irq_fd[0]; - ifd.gsi = 32; - vm_ioctl(vm, KVM_IRQFD, &ifd); - - ifd.fd = irq_fd[1]; - ifd.gsi = 33; - vm_ioctl(vm, KVM_IRQFD, &ifd); + kvm_assign_irqfd(vm, 32, irq_fd[0]); + kvm_assign_irqfd(vm, 33, irq_fd[1]); struct sigaction sa = { }; sa.sa_handler = handle_alrm; -- cgit v1.2.3 From 7e9b231c402a297251b3e6e0f5cc16cef7dd3ce5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 22 May 2025 16:52:23 -0700 Subject: KVM: selftests: Add a KVM_IRQFD test to verify uniqueness requirements Add a selftest to verify that eventfd+irqfd bindings are globally unique, i.e. that KVM doesn't allow multiple irqfds to bind to a single eventfd, even across VMs. Tested-by: K Prateek Nayak Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250522235223.3178519-14-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/Makefile.kvm | 1 + tools/testing/selftests/kvm/irqfd_test.c | 135 +++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 tools/testing/selftests/kvm/irqfd_test.c (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index 38b95998e1e6..028456f1aae1 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -59,6 +59,7 @@ TEST_PROGS_x86 += x86/nx_huge_pages_test.sh TEST_GEN_PROGS_COMMON = demand_paging_test TEST_GEN_PROGS_COMMON += dirty_log_test TEST_GEN_PROGS_COMMON += guest_print_test +TEST_GEN_PROGS_COMMON += irqfd_test TEST_GEN_PROGS_COMMON += kvm_binary_stats_test TEST_GEN_PROGS_COMMON += kvm_create_max_vcpus TEST_GEN_PROGS_COMMON += kvm_page_table_test diff --git a/tools/testing/selftests/kvm/irqfd_test.c b/tools/testing/selftests/kvm/irqfd_test.c new file mode 100644 index 000000000000..7c301b4c7005 --- /dev/null +++ b/tools/testing/selftests/kvm/irqfd_test.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kvm_util.h" + +static struct kvm_vm *vm1; +static struct kvm_vm *vm2; +static int __eventfd; +static bool done; + +/* + * KVM de-assigns based on eventfd *and* GSI, but requires unique eventfds when + * assigning (the API isn't symmetrical). Abuse the oddity and use a per-task + * GSI base to avoid false failures due to cross-task de-assign, i.e. so that + * the secondary doesn't de-assign the primary's eventfd and cause assign to + * unexpectedly succeed on the primary. + */ +#define GSI_BASE_PRIMARY 0x20 +#define GSI_BASE_SECONDARY 0x30 + +static void juggle_eventfd_secondary(struct kvm_vm *vm, int eventfd) +{ + int r, i; + + /* + * The secondary task can encounter EBADF since the primary can close + * the eventfd at any time. And because the primary can recreate the + * eventfd, at the safe fd in the file table, the secondary can also + * encounter "unexpected" success, e.g. if the close+recreate happens + * between the first and second assignments. The secondary's role is + * mostly to antagonize KVM, not to detect bugs. + */ + for (i = 0; i < 2; i++) { + r = __kvm_irqfd(vm, GSI_BASE_SECONDARY, eventfd, 0); + TEST_ASSERT(!r || errno == EBUSY || errno == EBADF, + "Wanted success, EBUSY, or EBADF, r = %d, errno = %d", + r, errno); + + /* De-assign should succeed unless the eventfd was closed. */ + r = __kvm_irqfd(vm, GSI_BASE_SECONDARY + i, eventfd, KVM_IRQFD_FLAG_DEASSIGN); + TEST_ASSERT(!r || errno == EBADF, + "De-assign should succeed unless the fd was closed"); + } +} + +static void *secondary_irqfd_juggler(void *ign) +{ + while (!READ_ONCE(done)) { + juggle_eventfd_secondary(vm1, READ_ONCE(__eventfd)); + juggle_eventfd_secondary(vm2, READ_ONCE(__eventfd)); + } + + return NULL; +} + +static void juggle_eventfd_primary(struct kvm_vm *vm, int eventfd) +{ + int r1, r2; + + /* + * At least one of the assigns should fail. KVM disallows assigning a + * single eventfd to multiple GSIs (or VMs), so it's possible that both + * assignments can fail, too. + */ + r1 = __kvm_irqfd(vm, GSI_BASE_PRIMARY, eventfd, 0); + TEST_ASSERT(!r1 || errno == EBUSY, + "Wanted success or EBUSY, r = %d, errno = %d", r1, errno); + + r2 = __kvm_irqfd(vm, GSI_BASE_PRIMARY + 1, eventfd, 0); + TEST_ASSERT(r1 || (r2 && errno == EBUSY), + "Wanted failure (EBUSY), r1 = %d, r2 = %d, errno = %d", + r1, r2, errno); + + /* + * De-assign should always succeed, even if the corresponding assign + * failed. + */ + kvm_irqfd(vm, GSI_BASE_PRIMARY, eventfd, KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm, GSI_BASE_PRIMARY + 1, eventfd, KVM_IRQFD_FLAG_DEASSIGN); +} + +int main(int argc, char *argv[]) +{ + pthread_t racing_thread; + int r, i; + + /* Create "full" VMs, as KVM_IRQFD requires an in-kernel IRQ chip. */ + vm1 = vm_create(1); + vm2 = vm_create(1); + + WRITE_ONCE(__eventfd, kvm_new_eventfd()); + + kvm_irqfd(vm1, 10, __eventfd, 0); + + r = __kvm_irqfd(vm1, 11, __eventfd, 0); + TEST_ASSERT(r && errno == EBUSY, + "Wanted EBUSY, r = %d, errno = %d", r, errno); + + r = __kvm_irqfd(vm2, 12, __eventfd, 0); + TEST_ASSERT(r && errno == EBUSY, + "Wanted EBUSY, r = %d, errno = %d", r, errno); + + /* + * De-assign all eventfds, along with multiple eventfds that were never + * assigned. KVM's ABI is that de-assign is allowed so long as the + * eventfd itself is valid. + */ + kvm_irqfd(vm1, 11, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm1, 12, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm1, 13, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm1, 14, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm1, 10, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + + close(__eventfd); + + pthread_create(&racing_thread, NULL, secondary_irqfd_juggler, vm2); + + for (i = 0; i < 10000; i++) { + WRITE_ONCE(__eventfd, kvm_new_eventfd()); + + juggle_eventfd_primary(vm1, __eventfd); + juggle_eventfd_primary(vm2, __eventfd); + close(__eventfd); + } + + WRITE_ONCE(done, true); + pthread_join(racing_thread, NULL); +} -- cgit v1.2.3 From bfb4a6c721517a11b277e8841f8a7a64b1b14b72 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 21 Jun 2025 10:19:43 -0700 Subject: selftests: drv-net: import things in lib one by one pylint doesn't understand our path hacks, and it generates a lot of warnings for driver tests. Import what we use one by one, this is hopefully not too tedious and it makes pylint happy. Link: https://patch.msgid.link/20250621171944.2619249-9-kuba@kernel.org Signed-off-by: Jakub Kicinski --- .../testing/selftests/drivers/net/hw/lib/py/__init__.py | 17 +++++++++++++++++ tools/testing/selftests/drivers/net/lib/py/__init__.py | 14 ++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py index b582885786f5..56ff11074b55 100644 --- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py @@ -7,8 +7,25 @@ KSFT_DIR = (Path(__file__).parent / "../../../../..").resolve() try: sys.path.append(KSFT_DIR.as_posix()) + from net.lib.py import * from drivers.net.lib.py import * + + # Import one by one to avoid pylint false positives + from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \ + NlError, RtnlFamily + from net.lib.py import CmdExitFailure + from net.lib.py import bkg, cmd, defer, ethtool, fd_read_timeout, ip, \ + rand_port, tool, wait_port_listen + from net.lib.py import fd_read_timeout + from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx + from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ + ksft_setup + from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \ + ksft_ne, ksft_not_in, ksft_raises, ksft_true + from net.lib.py import NetNSEnter + from drivers.net.lib.py import GenerateTraffic + from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv except ModuleNotFoundError as e: ksft_pr("Failed importing `net` library from kernel sources") ksft_pr(str(e)) diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py index 401e70f7f136..9ed1d8f70524 100644 --- a/tools/testing/selftests/drivers/net/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py @@ -7,7 +7,21 @@ KSFT_DIR = (Path(__file__).parent / "../../../..").resolve() try: sys.path.append(KSFT_DIR.as_posix()) + from net.lib.py import * + + # Import one by one to avoid pylint false positives + from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \ + NlError, RtnlFamily + from net.lib.py import CmdExitFailure + from net.lib.py import bkg, cmd, defer, ethtool, fd_read_timeout, ip, \ + rand_port, tool, wait_port_listen + from net.lib.py import fd_read_timeout + from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx + from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ + ksft_setup + from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \ + ksft_ne, ksft_not_in, ksft_raises, ksft_true except ModuleNotFoundError as e: ksft_pr("Failed importing `net` library from kernel sources") ksft_pr(str(e)) -- cgit v1.2.3 From 54c18a8f07aa9dbb66b8b194dd38d2bc55121bfe Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 20 Jun 2025 23:39:30 +0200 Subject: selftests/nolibc: drop implicit defconfig executions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit d7d271ec30dd ("selftests/nolibc: execute defconfig before other targets") accidentally introduced implicit executions of the defconfig target. These executions were unintentional and come from a misunderstanding of ordering dependencies. Drop the dependencies again. Reported-by: Mark Brown Closes: https://lore.kernel.org/all/3d5128b9-b4b6-4a8e-94ce-ea5ff4ea655b@sirena.org.uk/ Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250620-nolibc-selftests-v1-1-f6b2ce7c5071@weissschuh.net Signed-off-by: Thomas Weißschuh --- tools/testing/selftests/nolibc/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index 147ce411b46a..41b97dfd02bf 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -307,10 +307,10 @@ defconfig: $(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) olddefconfig < /dev/null; \ fi -kernel: | defconfig +kernel: $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) < /dev/null -kernel-standalone: initramfs | defconfig +kernel-standalone: initramfs $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs < /dev/null # run the tests after building the kernel -- cgit v1.2.3 From a09db6afe292cba6ccb0966d4266b78b1a8a8055 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 20 Jun 2025 23:39:31 +0200 Subject: selftests/nolibc: split out CFLAGS logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some upcoming changes will reuse the CFLAGS. Split the computation into a reusable Makefile. Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250620-nolibc-selftests-v1-2-f6b2ce7c5071@weissschuh.net Signed-off-by: Thomas Weißschuh --- tools/testing/selftests/nolibc/Makefile | 12 ++++-------- tools/testing/selftests/nolibc/Makefile.include | 10 ++++++++++ 2 files changed, 14 insertions(+), 8 deletions(-) create mode 100644 tools/testing/selftests/nolibc/Makefile.include (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index 41b97dfd02bf..6d62f350d0c1 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -195,14 +195,10 @@ CFLAGS_sparc32 = $(call cc-option,-m32) ifeq ($(origin XARCH),command line) CFLAGS_XARCH = $(CFLAGS_$(XARCH)) endif -_CFLAGS_STACKPROTECTOR = $(call cc-option,-fstack-protector-all) $(call cc-option,-mstack-protector-guard=global) -CFLAGS_STACKPROTECTOR ?= $(call try-run, \ - echo 'void foo(void) {}' | $(CC) -x c - -o - -S $(CLANG_CROSS_FLAGS) $(_CFLAGS_STACKPROTECTOR) | grep -q __stack_chk_guard, \ - $(_CFLAGS_STACKPROTECTOR)) -CFLAGS_SANITIZER ?= $(call cc-option,-fsanitize=undefined -fsanitize-trap=all) -CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \ - $(call cc-option,-fno-stack-protector) $(call cc-option,-Wmissing-prototypes) \ - $(CFLAGS_XARCH) $(CFLAGS_STACKPROTECTOR) $(CFLAGS_SANITIZER) $(CFLAGS_EXTRA) + +include Makefile.include + +CFLAGS ?= $(CFLAGS_NOLIBC_TEST) $(CFLAGS_XARCH) $(CFLAGS_EXTRA) LDFLAGS := LIBGCC := -lgcc diff --git a/tools/testing/selftests/nolibc/Makefile.include b/tools/testing/selftests/nolibc/Makefile.include new file mode 100644 index 000000000000..66287fafbbe0 --- /dev/null +++ b/tools/testing/selftests/nolibc/Makefile.include @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 + +__CFLAGS_STACKPROTECTOR = $(call cc-option,-fstack-protector-all) $(call cc-option,-mstack-protector-guard=global) +_CFLAGS_STACKPROTECTOR ?= $(call try-run, \ + echo 'void foo(void) {}' | $(CC) -x c - -o - -S $(CLANG_CROSS_FLAGS) $(__CFLAGS_STACKPROTECTOR) | grep -q __stack_chk_guard, \ + $(__CFLAGS_STACKPROTECTOR)) +_CFLAGS_SANITIZER ?= $(call cc-option,-fsanitize=undefined -fsanitize-trap=all) +CFLAGS_NOLIBC_TEST ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \ + $(call cc-option,-fno-stack-protector) $(call cc-option,-Wmissing-prototypes) \ + $(_CFLAGS_STACKPROTECTOR) $(_CFLAGS_SANITIZER) -- cgit v1.2.3 From 3adf4f90c9bda3d875ce05721ee4bb91c552faa5 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 20 Jun 2025 23:39:32 +0200 Subject: selftests/nolibc: rename Makefile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The nolibc tests are not real kselftests, they work differently and provide a different interface. Users trying to use them like real selftests may be confused and the tests are not executed by CI systems. To make space for an integration with the kselftest framework, move the custom tests out of the way. The custom tests are still useful to keep as they provide functionality not provided by kselftests. Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250620-nolibc-selftests-v1-3-f6b2ce7c5071@weissschuh.net Signed-off-by: Thomas Weißschuh --- tools/testing/selftests/nolibc/Makefile | 340 ------------------------- tools/testing/selftests/nolibc/Makefile.nolibc | 340 +++++++++++++++++++++++++ tools/testing/selftests/nolibc/run-tests.sh | 2 +- 3 files changed, 341 insertions(+), 341 deletions(-) delete mode 100644 tools/testing/selftests/nolibc/Makefile create mode 100644 tools/testing/selftests/nolibc/Makefile.nolibc (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile deleted file mode 100644 index 6d62f350d0c1..000000000000 --- a/tools/testing/selftests/nolibc/Makefile +++ /dev/null @@ -1,340 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# Makefile for nolibc tests -# we're in ".../tools/testing/selftests/nolibc" -ifeq ($(srctree),) -srctree := $(patsubst %/tools/testing/selftests/,%,$(dir $(CURDIR))) -endif - -include $(srctree)/tools/scripts/utilities.mak -# We need this for the "__cc-option" macro. -include $(srctree)/scripts/Makefile.compiler - -ifneq ($(O),) -ifneq ($(call is-absolute,$(O)),y) -$(error Only absolute O= parameters are supported) -endif -objtree := $(O) -else -objtree ?= $(srctree) -endif - -ifeq ($(ARCH),) -include $(srctree)/scripts/subarch.include -ARCH = $(SUBARCH) -endif - -cc-option = $(call __cc-option, $(CC),$(CLANG_CROSS_FLAGS),$(1),$(2)) - -# XARCH extends the kernel's ARCH with a few variants of the same -# architecture that only differ by the configuration, the toolchain -# and the Qemu program used. It is copied as-is into ARCH except for -# a few specific values which are mapped like this: -# -# XARCH | ARCH | config -# -------------|-----------|------------------------- -# ppc | powerpc | 32 bits -# ppc64 | powerpc | 64 bits big endian -# ppc64le | powerpc | 64 bits little endian -# -# It is recommended to only use XARCH, though it does not harm if -# ARCH is already set. For simplicity, ARCH is sufficient for all -# architectures where both are equal. - -# configure default variants for target kernel supported architectures -XARCH_powerpc = ppc -XARCH_mips = mips32le -XARCH_riscv = riscv64 -XARCH = $(or $(XARCH_$(ARCH)),$(ARCH)) - -# map from user input variants to their kernel supported architectures -ARCH_armthumb = arm -ARCH_ppc = powerpc -ARCH_ppc64 = powerpc -ARCH_ppc64le = powerpc -ARCH_mips32le = mips -ARCH_mips32be = mips -ARCH_riscv32 = riscv -ARCH_riscv64 = riscv -ARCH_s390x = s390 -ARCH_sparc32 = sparc -ARCH_sparc64 = sparc -ARCH := $(or $(ARCH_$(XARCH)),$(XARCH)) - -# kernel image names by architecture -IMAGE_i386 = arch/x86/boot/bzImage -IMAGE_x86_64 = arch/x86/boot/bzImage -IMAGE_x86 = arch/x86/boot/bzImage -IMAGE_arm64 = arch/arm64/boot/Image -IMAGE_arm = arch/arm/boot/zImage -IMAGE_armthumb = arch/arm/boot/zImage -IMAGE_mips32le = vmlinuz -IMAGE_mips32be = vmlinuz -IMAGE_ppc = vmlinux -IMAGE_ppc64 = vmlinux -IMAGE_ppc64le = arch/powerpc/boot/zImage -IMAGE_riscv = arch/riscv/boot/Image -IMAGE_riscv32 = arch/riscv/boot/Image -IMAGE_riscv64 = arch/riscv/boot/Image -IMAGE_s390x = arch/s390/boot/bzImage -IMAGE_s390 = arch/s390/boot/bzImage -IMAGE_loongarch = arch/loongarch/boot/vmlinuz.efi -IMAGE_sparc32 = arch/sparc/boot/image -IMAGE_sparc64 = arch/sparc/boot/image -IMAGE_m68k = vmlinux -IMAGE = $(objtree)/$(IMAGE_$(XARCH)) -IMAGE_NAME = $(notdir $(IMAGE)) - -# default kernel configurations that appear to be usable -DEFCONFIG_i386 = defconfig -DEFCONFIG_x86_64 = defconfig -DEFCONFIG_x86 = defconfig -DEFCONFIG_arm64 = defconfig -DEFCONFIG_arm = multi_v7_defconfig -DEFCONFIG_armthumb = multi_v7_defconfig -DEFCONFIG_mips32le = malta_defconfig -DEFCONFIG_mips32be = malta_defconfig generic/eb.config -DEFCONFIG_ppc = pmac32_defconfig -DEFCONFIG_ppc64 = powernv_be_defconfig -DEFCONFIG_ppc64le = powernv_defconfig -DEFCONFIG_riscv = defconfig -DEFCONFIG_riscv32 = rv32_defconfig -DEFCONFIG_riscv64 = defconfig -DEFCONFIG_s390x = defconfig -DEFCONFIG_s390 = defconfig compat.config -DEFCONFIG_loongarch = defconfig -DEFCONFIG_sparc32 = sparc32_defconfig -DEFCONFIG_sparc64 = sparc64_defconfig -DEFCONFIG_m68k = virt_defconfig -DEFCONFIG = $(DEFCONFIG_$(XARCH)) - -EXTRACONFIG_m68k = -e CONFIG_BLK_DEV_INITRD -EXTRACONFIG = $(EXTRACONFIG_$(XARCH)) -EXTRACONFIG_arm = -e CONFIG_NAMESPACES -EXTRACONFIG_armthumb = -e CONFIG_NAMESPACES - -# optional tests to run (default = all) -TEST = - -# QEMU_ARCH: arch names used by qemu -QEMU_ARCH_i386 = i386 -QEMU_ARCH_x86_64 = x86_64 -QEMU_ARCH_x86 = x86_64 -QEMU_ARCH_arm64 = aarch64 -QEMU_ARCH_arm = arm -QEMU_ARCH_armthumb = arm -QEMU_ARCH_mips32le = mipsel # works with malta_defconfig -QEMU_ARCH_mips32be = mips -QEMU_ARCH_ppc = ppc -QEMU_ARCH_ppc64 = ppc64 -QEMU_ARCH_ppc64le = ppc64 -QEMU_ARCH_riscv = riscv64 -QEMU_ARCH_riscv32 = riscv32 -QEMU_ARCH_riscv64 = riscv64 -QEMU_ARCH_s390x = s390x -QEMU_ARCH_s390 = s390x -QEMU_ARCH_loongarch = loongarch64 -QEMU_ARCH_sparc32 = sparc -QEMU_ARCH_sparc64 = sparc64 -QEMU_ARCH_m68k = m68k -QEMU_ARCH = $(QEMU_ARCH_$(XARCH)) - -QEMU_ARCH_USER_ppc64le = ppc64le -QEMU_ARCH_USER = $(or $(QEMU_ARCH_USER_$(XARCH)),$(QEMU_ARCH_$(XARCH))) - -QEMU_BIOS_DIR = /usr/share/edk2/ -QEMU_BIOS_loongarch = $(QEMU_BIOS_DIR)/loongarch64/OVMF_CODE.fd - -ifneq ($(QEMU_BIOS_$(XARCH)),) -QEMU_ARGS_BIOS = -bios $(QEMU_BIOS_$(XARCH)) -endif - -# QEMU_ARGS : some arch-specific args to pass to qemu -QEMU_ARGS_i386 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_x86_64 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_x86 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_arm64 = -M virt -cpu cortex-a53 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_arm = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_armthumb = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_mips32le = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_mips32be = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_ppc = -M g3beige -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_ppc64 = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_ppc64le = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_riscv = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_riscv32 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_riscv64 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_s390x = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_s390 = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_sparc32 = -M SS-5 -m 256M -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_sparc64 = -M sun4u -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_m68k = -M virt -append "console=ttyGF0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS = -m 1G $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA) - -# OUTPUT is only set when run from the main makefile, otherwise -# it defaults to this nolibc directory. -OUTPUT ?= $(CURDIR)/ - -ifeq ($(V),1) -Q= -else -Q=@ -endif - -CFLAGS_i386 = $(call cc-option,-m32) -CFLAGS_arm = -marm -CFLAGS_armthumb = -mthumb -march=armv6t2 -CFLAGS_ppc = -m32 -mbig-endian -mno-vsx $(call cc-option,-mmultiple) -CFLAGS_ppc64 = -m64 -mbig-endian -mno-vsx $(call cc-option,-mmultiple) -CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2) -CFLAGS_s390x = -m64 -CFLAGS_s390 = -m31 -CFLAGS_mips32le = -EL -mabi=32 -fPIC -CFLAGS_mips32be = -EB -mabi=32 -CFLAGS_sparc32 = $(call cc-option,-m32) -ifeq ($(origin XARCH),command line) -CFLAGS_XARCH = $(CFLAGS_$(XARCH)) -endif - -include Makefile.include - -CFLAGS ?= $(CFLAGS_NOLIBC_TEST) $(CFLAGS_XARCH) $(CFLAGS_EXTRA) -LDFLAGS := - -LIBGCC := -lgcc - -ifneq ($(LLVM),) -# Not needed for clang -LIBGCC := -endif - -# Modify CFLAGS based on LLVM= -include $(srctree)/tools/scripts/Makefile.include - -# GCC uses "s390", clang "systemz" -CLANG_CROSS_FLAGS := $(subst --target=s390-linux,--target=systemz-linux,$(CLANG_CROSS_FLAGS)) - -REPORT ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{if (!f) printf("\n"); f++; print;} /\[SKIPPED\][\r]*$$/{s++} \ - END{ printf("\n%3d test(s): %3d passed, %3d skipped, %3d failed => status: ", p+s+f, p, s, f); \ - if (f || !p) printf("failure\n"); else if (s) printf("warning\n"); else printf("success\n");; \ - printf("\nSee all results in %s\n", ARGV[1]); }' - -help: - @echo "Supported targets under selftests/nolibc:" - @echo " all call the \"run\" target below" - @echo " help this help" - @echo " sysroot create the nolibc sysroot here (uses \$$ARCH)" - @echo " nolibc-test build the executable (uses \$$CC and \$$CROSS_COMPILE)" - @echo " libc-test build an executable using the compiler's default libc instead" - @echo " run-user runs the executable under QEMU (uses \$$XARCH, \$$TEST)" - @echo " initramfs.cpio prepare the initramfs archive with nolibc-test" - @echo " initramfs prepare the initramfs tree with nolibc-test" - @echo " defconfig create a fresh new default config (uses \$$XARCH)" - @echo " kernel (re)build the kernel (uses \$$XARCH)" - @echo " kernel-standalone (re)build the kernel with the initramfs (uses \$$XARCH)" - @echo " run runs the kernel in QEMU after building it (uses \$$XARCH, \$$TEST)" - @echo " rerun runs a previously prebuilt kernel in QEMU (uses \$$XARCH, \$$TEST)" - @echo " clean clean the sysroot, initramfs, build and output files" - @echo "" - @echo "The output file is \"run.out\". Test ranges may be passed using \$$TEST." - @echo "" - @echo "Currently using the following variables:" - @echo " ARCH = $(ARCH)" - @echo " XARCH = $(XARCH)" - @echo " CROSS_COMPILE = $(CROSS_COMPILE)" - @echo " CC = $(CC)" - @echo " OUTPUT = $(OUTPUT)" - @echo " TEST = $(TEST)" - @echo " QEMU_ARCH = $(if $(QEMU_ARCH),$(QEMU_ARCH),UNKNOWN_ARCH) [determined from \$$XARCH]" - @echo " IMAGE_NAME = $(if $(IMAGE_NAME),$(IMAGE_NAME),UNKNOWN_ARCH) [determined from \$$XARCH]" - @echo "" - -all: run - -sysroot: sysroot/$(ARCH)/include - -sysroot/$(ARCH)/include: - $(Q)rm -rf sysroot/$(ARCH) sysroot/sysroot - $(QUIET_MKDIR)mkdir -p sysroot - $(Q)$(MAKE) -C $(srctree) outputmakefile - $(Q)$(MAKE) -C $(srctree)/tools/include/nolibc ARCH=$(ARCH) OUTPUT=$(CURDIR)/sysroot/ headers_standalone headers_check - $(Q)mv sysroot/sysroot sysroot/$(ARCH) - -ifneq ($(NOLIBC_SYSROOT),0) -nolibc-test: nolibc-test.c nolibc-test-linkage.c sysroot/$(ARCH)/include - $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \ - -nostdlib -nostdinc -static -Isysroot/$(ARCH)/include nolibc-test.c nolibc-test-linkage.c $(LIBGCC) -else -nolibc-test: nolibc-test.c nolibc-test-linkage.c - $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \ - -nostdlib -static -include $(srctree)/tools/include/nolibc/nolibc.h nolibc-test.c nolibc-test-linkage.c $(LIBGCC) -endif - -libc-test: nolibc-test.c nolibc-test-linkage.c - $(QUIET_CC)$(HOSTCC) -o $@ nolibc-test.c nolibc-test-linkage.c - -# local libc-test -run-libc-test: libc-test - $(Q)./libc-test > "$(CURDIR)/run.out" || : - $(Q)$(REPORT) $(CURDIR)/run.out - -# local nolibc-test -run-nolibc-test: nolibc-test - $(Q)./nolibc-test > "$(CURDIR)/run.out" || : - $(Q)$(REPORT) $(CURDIR)/run.out - -# qemu user-land test -run-user: nolibc-test - $(Q)qemu-$(QEMU_ARCH_USER) ./nolibc-test > "$(CURDIR)/run.out" || : - $(Q)$(REPORT) $(CURDIR)/run.out - -initramfs.cpio: kernel nolibc-test - $(QUIET_GEN)echo 'file /init nolibc-test 755 0 0' | $(objtree)/usr/gen_init_cpio - > initramfs.cpio - -initramfs: nolibc-test - $(QUIET_MKDIR)mkdir -p initramfs - $(call QUIET_INSTALL, initramfs/init) - $(Q)cp nolibc-test initramfs/init - -defconfig: - $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(DEFCONFIG) - $(Q)if [ -n "$(EXTRACONFIG)" ]; then \ - $(srctree)/scripts/config --file $(objtree)/.config $(EXTRACONFIG); \ - $(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) olddefconfig < /dev/null; \ - fi - -kernel: - $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) < /dev/null - -kernel-standalone: initramfs - $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs < /dev/null - -# run the tests after building the kernel -run: kernel initramfs.cpio - $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(IMAGE)" -initrd initramfs.cpio -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out" - $(Q)$(REPORT) $(CURDIR)/run.out - -# re-run the tests from an existing kernel -rerun: - $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(IMAGE)" -initrd initramfs.cpio -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out" - $(Q)$(REPORT) $(CURDIR)/run.out - -# report with existing test log -report: - $(Q)$(REPORT) $(CURDIR)/run.out - -clean: - $(call QUIET_CLEAN, sysroot) - $(Q)rm -rf sysroot - $(call QUIET_CLEAN, nolibc-test) - $(Q)rm -f nolibc-test - $(call QUIET_CLEAN, libc-test) - $(Q)rm -f libc-test - $(call QUIET_CLEAN, initramfs.cpio) - $(Q)rm -rf initramfs.cpio - $(call QUIET_CLEAN, initramfs) - $(Q)rm -rf initramfs - $(call QUIET_CLEAN, run.out) - $(Q)rm -rf run.out - -.PHONY: sysroot/$(ARCH)/include diff --git a/tools/testing/selftests/nolibc/Makefile.nolibc b/tools/testing/selftests/nolibc/Makefile.nolibc new file mode 100644 index 000000000000..6d62f350d0c1 --- /dev/null +++ b/tools/testing/selftests/nolibc/Makefile.nolibc @@ -0,0 +1,340 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for nolibc tests +# we're in ".../tools/testing/selftests/nolibc" +ifeq ($(srctree),) +srctree := $(patsubst %/tools/testing/selftests/,%,$(dir $(CURDIR))) +endif + +include $(srctree)/tools/scripts/utilities.mak +# We need this for the "__cc-option" macro. +include $(srctree)/scripts/Makefile.compiler + +ifneq ($(O),) +ifneq ($(call is-absolute,$(O)),y) +$(error Only absolute O= parameters are supported) +endif +objtree := $(O) +else +objtree ?= $(srctree) +endif + +ifeq ($(ARCH),) +include $(srctree)/scripts/subarch.include +ARCH = $(SUBARCH) +endif + +cc-option = $(call __cc-option, $(CC),$(CLANG_CROSS_FLAGS),$(1),$(2)) + +# XARCH extends the kernel's ARCH with a few variants of the same +# architecture that only differ by the configuration, the toolchain +# and the Qemu program used. It is copied as-is into ARCH except for +# a few specific values which are mapped like this: +# +# XARCH | ARCH | config +# -------------|-----------|------------------------- +# ppc | powerpc | 32 bits +# ppc64 | powerpc | 64 bits big endian +# ppc64le | powerpc | 64 bits little endian +# +# It is recommended to only use XARCH, though it does not harm if +# ARCH is already set. For simplicity, ARCH is sufficient for all +# architectures where both are equal. + +# configure default variants for target kernel supported architectures +XARCH_powerpc = ppc +XARCH_mips = mips32le +XARCH_riscv = riscv64 +XARCH = $(or $(XARCH_$(ARCH)),$(ARCH)) + +# map from user input variants to their kernel supported architectures +ARCH_armthumb = arm +ARCH_ppc = powerpc +ARCH_ppc64 = powerpc +ARCH_ppc64le = powerpc +ARCH_mips32le = mips +ARCH_mips32be = mips +ARCH_riscv32 = riscv +ARCH_riscv64 = riscv +ARCH_s390x = s390 +ARCH_sparc32 = sparc +ARCH_sparc64 = sparc +ARCH := $(or $(ARCH_$(XARCH)),$(XARCH)) + +# kernel image names by architecture +IMAGE_i386 = arch/x86/boot/bzImage +IMAGE_x86_64 = arch/x86/boot/bzImage +IMAGE_x86 = arch/x86/boot/bzImage +IMAGE_arm64 = arch/arm64/boot/Image +IMAGE_arm = arch/arm/boot/zImage +IMAGE_armthumb = arch/arm/boot/zImage +IMAGE_mips32le = vmlinuz +IMAGE_mips32be = vmlinuz +IMAGE_ppc = vmlinux +IMAGE_ppc64 = vmlinux +IMAGE_ppc64le = arch/powerpc/boot/zImage +IMAGE_riscv = arch/riscv/boot/Image +IMAGE_riscv32 = arch/riscv/boot/Image +IMAGE_riscv64 = arch/riscv/boot/Image +IMAGE_s390x = arch/s390/boot/bzImage +IMAGE_s390 = arch/s390/boot/bzImage +IMAGE_loongarch = arch/loongarch/boot/vmlinuz.efi +IMAGE_sparc32 = arch/sparc/boot/image +IMAGE_sparc64 = arch/sparc/boot/image +IMAGE_m68k = vmlinux +IMAGE = $(objtree)/$(IMAGE_$(XARCH)) +IMAGE_NAME = $(notdir $(IMAGE)) + +# default kernel configurations that appear to be usable +DEFCONFIG_i386 = defconfig +DEFCONFIG_x86_64 = defconfig +DEFCONFIG_x86 = defconfig +DEFCONFIG_arm64 = defconfig +DEFCONFIG_arm = multi_v7_defconfig +DEFCONFIG_armthumb = multi_v7_defconfig +DEFCONFIG_mips32le = malta_defconfig +DEFCONFIG_mips32be = malta_defconfig generic/eb.config +DEFCONFIG_ppc = pmac32_defconfig +DEFCONFIG_ppc64 = powernv_be_defconfig +DEFCONFIG_ppc64le = powernv_defconfig +DEFCONFIG_riscv = defconfig +DEFCONFIG_riscv32 = rv32_defconfig +DEFCONFIG_riscv64 = defconfig +DEFCONFIG_s390x = defconfig +DEFCONFIG_s390 = defconfig compat.config +DEFCONFIG_loongarch = defconfig +DEFCONFIG_sparc32 = sparc32_defconfig +DEFCONFIG_sparc64 = sparc64_defconfig +DEFCONFIG_m68k = virt_defconfig +DEFCONFIG = $(DEFCONFIG_$(XARCH)) + +EXTRACONFIG_m68k = -e CONFIG_BLK_DEV_INITRD +EXTRACONFIG = $(EXTRACONFIG_$(XARCH)) +EXTRACONFIG_arm = -e CONFIG_NAMESPACES +EXTRACONFIG_armthumb = -e CONFIG_NAMESPACES + +# optional tests to run (default = all) +TEST = + +# QEMU_ARCH: arch names used by qemu +QEMU_ARCH_i386 = i386 +QEMU_ARCH_x86_64 = x86_64 +QEMU_ARCH_x86 = x86_64 +QEMU_ARCH_arm64 = aarch64 +QEMU_ARCH_arm = arm +QEMU_ARCH_armthumb = arm +QEMU_ARCH_mips32le = mipsel # works with malta_defconfig +QEMU_ARCH_mips32be = mips +QEMU_ARCH_ppc = ppc +QEMU_ARCH_ppc64 = ppc64 +QEMU_ARCH_ppc64le = ppc64 +QEMU_ARCH_riscv = riscv64 +QEMU_ARCH_riscv32 = riscv32 +QEMU_ARCH_riscv64 = riscv64 +QEMU_ARCH_s390x = s390x +QEMU_ARCH_s390 = s390x +QEMU_ARCH_loongarch = loongarch64 +QEMU_ARCH_sparc32 = sparc +QEMU_ARCH_sparc64 = sparc64 +QEMU_ARCH_m68k = m68k +QEMU_ARCH = $(QEMU_ARCH_$(XARCH)) + +QEMU_ARCH_USER_ppc64le = ppc64le +QEMU_ARCH_USER = $(or $(QEMU_ARCH_USER_$(XARCH)),$(QEMU_ARCH_$(XARCH))) + +QEMU_BIOS_DIR = /usr/share/edk2/ +QEMU_BIOS_loongarch = $(QEMU_BIOS_DIR)/loongarch64/OVMF_CODE.fd + +ifneq ($(QEMU_BIOS_$(XARCH)),) +QEMU_ARGS_BIOS = -bios $(QEMU_BIOS_$(XARCH)) +endif + +# QEMU_ARGS : some arch-specific args to pass to qemu +QEMU_ARGS_i386 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_x86_64 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_x86 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_arm64 = -M virt -cpu cortex-a53 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_arm = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_armthumb = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_mips32le = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_mips32be = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_ppc = -M g3beige -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_ppc64 = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_ppc64le = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_riscv = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_riscv32 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_riscv64 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_s390x = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_s390 = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_sparc32 = -M SS-5 -m 256M -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_sparc64 = -M sun4u -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_m68k = -M virt -append "console=ttyGF0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS = -m 1G $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA) + +# OUTPUT is only set when run from the main makefile, otherwise +# it defaults to this nolibc directory. +OUTPUT ?= $(CURDIR)/ + +ifeq ($(V),1) +Q= +else +Q=@ +endif + +CFLAGS_i386 = $(call cc-option,-m32) +CFLAGS_arm = -marm +CFLAGS_armthumb = -mthumb -march=armv6t2 +CFLAGS_ppc = -m32 -mbig-endian -mno-vsx $(call cc-option,-mmultiple) +CFLAGS_ppc64 = -m64 -mbig-endian -mno-vsx $(call cc-option,-mmultiple) +CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2) +CFLAGS_s390x = -m64 +CFLAGS_s390 = -m31 +CFLAGS_mips32le = -EL -mabi=32 -fPIC +CFLAGS_mips32be = -EB -mabi=32 +CFLAGS_sparc32 = $(call cc-option,-m32) +ifeq ($(origin XARCH),command line) +CFLAGS_XARCH = $(CFLAGS_$(XARCH)) +endif + +include Makefile.include + +CFLAGS ?= $(CFLAGS_NOLIBC_TEST) $(CFLAGS_XARCH) $(CFLAGS_EXTRA) +LDFLAGS := + +LIBGCC := -lgcc + +ifneq ($(LLVM),) +# Not needed for clang +LIBGCC := +endif + +# Modify CFLAGS based on LLVM= +include $(srctree)/tools/scripts/Makefile.include + +# GCC uses "s390", clang "systemz" +CLANG_CROSS_FLAGS := $(subst --target=s390-linux,--target=systemz-linux,$(CLANG_CROSS_FLAGS)) + +REPORT ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{if (!f) printf("\n"); f++; print;} /\[SKIPPED\][\r]*$$/{s++} \ + END{ printf("\n%3d test(s): %3d passed, %3d skipped, %3d failed => status: ", p+s+f, p, s, f); \ + if (f || !p) printf("failure\n"); else if (s) printf("warning\n"); else printf("success\n");; \ + printf("\nSee all results in %s\n", ARGV[1]); }' + +help: + @echo "Supported targets under selftests/nolibc:" + @echo " all call the \"run\" target below" + @echo " help this help" + @echo " sysroot create the nolibc sysroot here (uses \$$ARCH)" + @echo " nolibc-test build the executable (uses \$$CC and \$$CROSS_COMPILE)" + @echo " libc-test build an executable using the compiler's default libc instead" + @echo " run-user runs the executable under QEMU (uses \$$XARCH, \$$TEST)" + @echo " initramfs.cpio prepare the initramfs archive with nolibc-test" + @echo " initramfs prepare the initramfs tree with nolibc-test" + @echo " defconfig create a fresh new default config (uses \$$XARCH)" + @echo " kernel (re)build the kernel (uses \$$XARCH)" + @echo " kernel-standalone (re)build the kernel with the initramfs (uses \$$XARCH)" + @echo " run runs the kernel in QEMU after building it (uses \$$XARCH, \$$TEST)" + @echo " rerun runs a previously prebuilt kernel in QEMU (uses \$$XARCH, \$$TEST)" + @echo " clean clean the sysroot, initramfs, build and output files" + @echo "" + @echo "The output file is \"run.out\". Test ranges may be passed using \$$TEST." + @echo "" + @echo "Currently using the following variables:" + @echo " ARCH = $(ARCH)" + @echo " XARCH = $(XARCH)" + @echo " CROSS_COMPILE = $(CROSS_COMPILE)" + @echo " CC = $(CC)" + @echo " OUTPUT = $(OUTPUT)" + @echo " TEST = $(TEST)" + @echo " QEMU_ARCH = $(if $(QEMU_ARCH),$(QEMU_ARCH),UNKNOWN_ARCH) [determined from \$$XARCH]" + @echo " IMAGE_NAME = $(if $(IMAGE_NAME),$(IMAGE_NAME),UNKNOWN_ARCH) [determined from \$$XARCH]" + @echo "" + +all: run + +sysroot: sysroot/$(ARCH)/include + +sysroot/$(ARCH)/include: + $(Q)rm -rf sysroot/$(ARCH) sysroot/sysroot + $(QUIET_MKDIR)mkdir -p sysroot + $(Q)$(MAKE) -C $(srctree) outputmakefile + $(Q)$(MAKE) -C $(srctree)/tools/include/nolibc ARCH=$(ARCH) OUTPUT=$(CURDIR)/sysroot/ headers_standalone headers_check + $(Q)mv sysroot/sysroot sysroot/$(ARCH) + +ifneq ($(NOLIBC_SYSROOT),0) +nolibc-test: nolibc-test.c nolibc-test-linkage.c sysroot/$(ARCH)/include + $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \ + -nostdlib -nostdinc -static -Isysroot/$(ARCH)/include nolibc-test.c nolibc-test-linkage.c $(LIBGCC) +else +nolibc-test: nolibc-test.c nolibc-test-linkage.c + $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \ + -nostdlib -static -include $(srctree)/tools/include/nolibc/nolibc.h nolibc-test.c nolibc-test-linkage.c $(LIBGCC) +endif + +libc-test: nolibc-test.c nolibc-test-linkage.c + $(QUIET_CC)$(HOSTCC) -o $@ nolibc-test.c nolibc-test-linkage.c + +# local libc-test +run-libc-test: libc-test + $(Q)./libc-test > "$(CURDIR)/run.out" || : + $(Q)$(REPORT) $(CURDIR)/run.out + +# local nolibc-test +run-nolibc-test: nolibc-test + $(Q)./nolibc-test > "$(CURDIR)/run.out" || : + $(Q)$(REPORT) $(CURDIR)/run.out + +# qemu user-land test +run-user: nolibc-test + $(Q)qemu-$(QEMU_ARCH_USER) ./nolibc-test > "$(CURDIR)/run.out" || : + $(Q)$(REPORT) $(CURDIR)/run.out + +initramfs.cpio: kernel nolibc-test + $(QUIET_GEN)echo 'file /init nolibc-test 755 0 0' | $(objtree)/usr/gen_init_cpio - > initramfs.cpio + +initramfs: nolibc-test + $(QUIET_MKDIR)mkdir -p initramfs + $(call QUIET_INSTALL, initramfs/init) + $(Q)cp nolibc-test initramfs/init + +defconfig: + $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(DEFCONFIG) + $(Q)if [ -n "$(EXTRACONFIG)" ]; then \ + $(srctree)/scripts/config --file $(objtree)/.config $(EXTRACONFIG); \ + $(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) olddefconfig < /dev/null; \ + fi + +kernel: + $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) < /dev/null + +kernel-standalone: initramfs + $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs < /dev/null + +# run the tests after building the kernel +run: kernel initramfs.cpio + $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(IMAGE)" -initrd initramfs.cpio -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out" + $(Q)$(REPORT) $(CURDIR)/run.out + +# re-run the tests from an existing kernel +rerun: + $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(IMAGE)" -initrd initramfs.cpio -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out" + $(Q)$(REPORT) $(CURDIR)/run.out + +# report with existing test log +report: + $(Q)$(REPORT) $(CURDIR)/run.out + +clean: + $(call QUIET_CLEAN, sysroot) + $(Q)rm -rf sysroot + $(call QUIET_CLEAN, nolibc-test) + $(Q)rm -f nolibc-test + $(call QUIET_CLEAN, libc-test) + $(Q)rm -f libc-test + $(call QUIET_CLEAN, initramfs.cpio) + $(Q)rm -rf initramfs.cpio + $(call QUIET_CLEAN, initramfs) + $(Q)rm -rf initramfs + $(call QUIET_CLEAN, run.out) + $(Q)rm -rf run.out + +.PHONY: sysroot/$(ARCH)/include diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh index 8277599e6441..279fbd93ef70 100755 --- a/tools/testing/selftests/nolibc/run-tests.sh +++ b/tools/testing/selftests/nolibc/run-tests.sh @@ -169,7 +169,7 @@ test_arch() { if [ "$werror" -ne 0 ]; then CFLAGS_EXTRA="$CFLAGS_EXTRA -Werror" fi - MAKE=(make -j"${nproc}" XARCH="${arch}" CROSS_COMPILE="${cross_compile}" LLVM="${llvm}" O="${build_dir}") + MAKE=(make -f Makefile.nolibc -j"${nproc}" XARCH="${arch}" CROSS_COMPILE="${cross_compile}" LLVM="${llvm}" O="${build_dir}") case "$test_mode" in 'system') -- cgit v1.2.3 From eb135311083100b6590a7545618cd9760d896a86 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 20 Jun 2025 23:39:33 +0200 Subject: selftests/nolibc: integrate with kselftests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hook up nolibc-test with the kselftests framework. This enables CI systems and developers to easily execute the tests. While nolibc-test does not emit KTAP output itself that is not a problem, as the kselftest executor will wrap the output in KTAP. Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250620-nolibc-selftests-v1-4-f6b2ce7c5071@weissschuh.net Signed-off-by: Thomas Weißschuh --- tools/testing/selftests/Makefile | 1 + tools/testing/selftests/nolibc/Makefile | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 tools/testing/selftests/nolibc/Makefile (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 339b31e6a6b5..3a4c98102f69 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -77,6 +77,7 @@ TARGETS += net/ovpn TARGETS += net/packetdrill TARGETS += net/rds TARGETS += net/tcp_ao +TARGETS += nolibc TARGETS += nsfs TARGETS += pci_endpoint TARGETS += pcie_bwctrl diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile new file mode 100644 index 000000000000..40f5c2908dda --- /dev/null +++ b/tools/testing/selftests/nolibc/Makefile @@ -0,0 +1,26 @@ +# SPDX-License-Identifier: GPL-2.0 + +TEST_GEN_PROGS := nolibc-test + +include ../lib.mk +include $(top_srcdir)/scripts/Makefile.compiler + +cc-option = $(call __cc-option, $(CC),,$(1),$(2)) + +include Makefile.include + +CFLAGS = -nostdlib -nostdinc -static \ + -isystem $(top_srcdir)/tools/include/nolibc -isystem $(top_srcdir)/usr/include \ + $(CFLAGS_NOLIBC_TEST) + +ifeq ($(LLVM),) +LDLIBS := -lgcc +endif + +$(OUTPUT)/nolibc-test: nolibc-test.c nolibc-test-linkage.c | headers + +help: + @echo "For the custom nolibc testsuite use '$(MAKE) -f Makefile.nolibc'; available targets:" + @$(MAKE) -f Makefile.nolibc help + +.PHONY: help -- cgit v1.2.3 From fb1cacdbacd905d2099656458082482c6d1bc0c0 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 23 Jun 2025 23:34:32 +0200 Subject: selftests/nolibc: avoid GCC 15 -Wunterminated-string-initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On GCC 15 the following warnings is emitted: nolibc-test.c: In function ‘run_stdlib’: nolibc-test.c:1416:32: warning: initializer-string for array of ‘char’ truncates NUL terminator but destination lacks ‘nonstring’ attribute (11 chars into 10 available) [-Wunterminated-string-initialization] 1416 | char buf[10] = "test123456"; | ^~~~~~~~~~~~ Increase the size of buf to avoid the warning. It would also be possible to use __attribute__((nonstring)) but that would require some ifdeffery to work with older compilers. Fixes: 1063649cf531 ("selftests/nolibc: Add tests for strlcat() and strlcpy()") Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250623-nolibc-nonstring-v1-1-11282204766a@weissschuh.net Signed-off-by: Thomas Weißschuh --- tools/testing/selftests/nolibc/nolibc-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index dbe13000fb1a..52640d8ae402 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -1413,7 +1413,7 @@ int run_stdlib(int min, int max) * Add some more chars after the \0, to test functions that overwrite the buffer set * the \0 at the exact right position. */ - char buf[10] = "test123456"; + char buf[11] = "test123456"; buf[4] = '\0'; -- cgit v1.2.3 From ca6a3faee66e6dc84f3d24fa1b1fa8b0628871e9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 20 Jun 2025 09:11:08 -0700 Subject: selftests: drv-net: stats: fix pylint issues Small adjustments to make pylint happy. One warning about unused argument remains because the test uses global variables rather than attaching netlink sockets to cfg. Fixing this would be too much of a change for a linter fix commit like this one. Link: https://patch.msgid.link/20250620161109.2146242-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/stats.py | 45 ++++++++++++++++++---------- 1 file changed, 30 insertions(+), 15 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py index efcc1e10575b..00a85d6e54f9 100755 --- a/tools/testing/selftests/drivers/net/stats.py +++ b/tools/testing/selftests/drivers/net/stats.py @@ -1,12 +1,16 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 +""" +Tests related to standard netdevice statistics. +""" + import errno import subprocess import time from lib.py import ksft_run, ksft_exit, ksft_pr from lib.py import ksft_ge, ksft_eq, ksft_is, ksft_in, ksft_lt, ksft_true, ksft_raises -from lib.py import KsftSkipEx, KsftXfailEx +from lib.py import KsftSkipEx, KsftXfailEx, KsftFailEx from lib.py import ksft_disruptive from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError from lib.py import NetDrvEnv @@ -18,13 +22,16 @@ rtnl = RtnlFamily() def check_pause(cfg) -> None: - global ethnl + """ + Check that drivers which support Pause config also report standard + pause stats. + """ try: ethnl.pause_get({"header": {"dev-index": cfg.ifindex}}) except NlError as e: if e.error == errno.EOPNOTSUPP: - raise KsftXfailEx("pause not supported by the device") + raise KsftXfailEx("pause not supported by the device") from e raise data = ethnl.pause_get({"header": {"dev-index": cfg.ifindex, @@ -33,13 +40,16 @@ def check_pause(cfg) -> None: def check_fec(cfg) -> None: - global ethnl + """ + Check that drivers which support FEC config also report standard + FEC stats. + """ try: ethnl.fec_get({"header": {"dev-index": cfg.ifindex}}) except NlError as e: if e.error == errno.EOPNOTSUPP: - raise KsftXfailEx("FEC not supported by the device") + raise KsftXfailEx("FEC not supported by the device") from e raise data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex, @@ -48,15 +58,17 @@ def check_fec(cfg) -> None: def pkt_byte_sum(cfg) -> None: - global netfam, rtnl + """ + Check that qstat and interface stats match in value. + """ def get_qstat(test): - global netfam stats = netfam.qstats_get({}, dump=True) if stats: for qs in stats: if qs["ifindex"]== test.ifindex: return qs + return None qstat = get_qstat(cfg) if qstat is None: @@ -77,15 +89,14 @@ def pkt_byte_sum(cfg) -> None: for _ in range(10): rtstat = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64'] if stat_cmp(rtstat, qstat) < 0: - raise Exception("RTNL stats are lower, fetched later") + raise KsftFailEx("RTNL stats are lower, fetched later") qstat = get_qstat(cfg) if stat_cmp(rtstat, qstat) > 0: - raise Exception("Qstats are lower, fetched later") + raise KsftFailEx("Qstats are lower, fetched later") def qstat_by_ifindex(cfg) -> None: - global netfam - global rtnl + """ Qstats Netlink API tests - querying by ifindex. """ # Construct a map ifindex -> [dump, by-index, dump] ifindexes = {} @@ -93,7 +104,7 @@ def qstat_by_ifindex(cfg) -> None: for entry in stats: ifindexes[entry['ifindex']] = [entry, None, None] - for ifindex in ifindexes.keys(): + for ifindex in ifindexes: entry = netfam.qstats_get({"ifindex": ifindex}, dump=True) ksft_eq(len(entry), 1) ifindexes[entry[0]['ifindex']][1] = entry[0] @@ -145,7 +156,7 @@ def qstat_by_ifindex(cfg) -> None: # Try to get stats for lowest unused ifindex but not 0 devs = rtnl.getlink({}, dump=True) - all_ifindexes = set([dev["ifi-index"] for dev in devs]) + all_ifindexes = set(dev["ifi-index"] for dev in devs) lowest = 2 while lowest in all_ifindexes: lowest += 1 @@ -158,18 +169,20 @@ def qstat_by_ifindex(cfg) -> None: @ksft_disruptive def check_down(cfg) -> None: + """ Test statistics (interface and qstat) are not impacted by ifdown """ + try: qstat = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] except NlError as e: if e.error == errno.EOPNOTSUPP: - raise KsftSkipEx("qstats not supported by the device") + raise KsftSkipEx("qstats not supported by the device") from e raise ip(f"link set dev {cfg.dev['ifname']} down") defer(ip, f"link set dev {cfg.dev['ifname']} up") qstat2 = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] - for k, v in qstat.items(): + for k in qstat: ksft_ge(qstat2[k], qstat[k], comment=f"{k} went backwards on device down") # exercise per-queue API to make sure that "device down" state @@ -263,6 +276,8 @@ def procfs_downup_hammer(cfg) -> None: def main() -> None: + """ Ksft boiler plate main """ + with NetDrvEnv(__file__, queue_count=100) as cfg: ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex, check_down, procfs_hammer, procfs_downup_hammer], -- cgit v1.2.3 From 2baa45432d9a84d69f6ae247f5d6eb6525572bbe Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 20 Jun 2025 09:11:09 -0700 Subject: selftests: drv-net: stats: use skip instead of xfail for unsupported features XFAIL is considered a form of a pass by our CI. For HW devices returning XFAIL for unsupported features is counter-productive because our CI knows not to expect any HW test to pass until it sees 10 passes in a row. If we return xfail the test shows up as pass even if the device doesn't support the feature. netdevsim supports all features necessary for the stats test so there is no concern about running the test in SW mode. Make the test skip rather than xfail if driver doesn't support FEC or pause. Link: https://patch.msgid.link/20250620161109.2146242-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/stats.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py index 00a85d6e54f9..c2bb5d3f1ca1 100755 --- a/tools/testing/selftests/drivers/net/stats.py +++ b/tools/testing/selftests/drivers/net/stats.py @@ -10,7 +10,7 @@ import subprocess import time from lib.py import ksft_run, ksft_exit, ksft_pr from lib.py import ksft_ge, ksft_eq, ksft_is, ksft_in, ksft_lt, ksft_true, ksft_raises -from lib.py import KsftSkipEx, KsftXfailEx, KsftFailEx +from lib.py import KsftSkipEx, KsftFailEx from lib.py import ksft_disruptive from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError from lib.py import NetDrvEnv @@ -31,7 +31,7 @@ def check_pause(cfg) -> None: ethnl.pause_get({"header": {"dev-index": cfg.ifindex}}) except NlError as e: if e.error == errno.EOPNOTSUPP: - raise KsftXfailEx("pause not supported by the device") from e + raise KsftSkipEx("pause not supported by the device") from e raise data = ethnl.pause_get({"header": {"dev-index": cfg.ifindex, @@ -49,7 +49,7 @@ def check_fec(cfg) -> None: ethnl.fec_get({"header": {"dev-index": cfg.ifindex}}) except NlError as e: if e.error == errno.EOPNOTSUPP: - raise KsftXfailEx("FEC not supported by the device") from e + raise KsftSkipEx("FEC not supported by the device") from e raise data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex, -- cgit v1.2.3 From cccfe0982208c72e116462e5431919c88505e890 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 19 Jun 2025 18:15:18 +0000 Subject: page_pool: import Jesper's page_pool benchmark MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We frequently consult with Jesper's out-of-tree page_pool benchmark to evaluate page_pool changes. Import the benchmark into the upstream linux kernel tree so that (a) we're all running the same version, (b) pave the way for shared improvements, and (c) maybe one day integrate it with nipa, if possible. Import bench_page_pool_simple from commit 35b1716d0c30 ("Add page_bench06_walk_all"), from this repository: https://github.com/netoptimizer/prototype-kernel.git Changes done during upstreaming: - Fix checkpatch issues. - Remove the tasklet logic not needed. - Move under tools/testing - Create ksft for the benchmark. - Changed slightly how the benchmark gets build. Out of tree, time_bench is built as an independent .ko. Here it is included in bench_page_pool.ko Steps to run: ``` mkdir -p /tmp/run-pp-bench make -C ./tools/testing/selftests/net/bench make -C ./tools/testing/selftests/net/bench install INSTALL_PATH=/tmp/run-pp-bench rsync --delete -avz --progress /tmp/run-pp-bench mina@$SERVER:~/ ssh mina@$SERVER << EOF cd ~/run-pp-bench && sudo ./test_bench_page_pool.sh EOF ``` Note that by default, the Makefile will build the benchmark for the currently installed kernel in /lib/modules/$(shell uname -r)/build. To build against the current tree, do: make KDIR=$(pwd) -C ./tools/testing/selftests/net/bench Output (from Jesper): ``` sudo ./test_bench_page_pool.sh (benchmark dmesg logs snipped) Fast path results: no-softirq-page_pool01 Per elem: 23 cycles(tsc) 6.571 ns ptr_ring results: no-softirq-page_pool02 Per elem: 60 cycles(tsc) 16.862 ns slow path results: no-softirq-page_pool03 Per elem: 265 cycles(tsc) 73.739 ns ``` Output (from me): ``` sudo ./test_bench_page_pool.sh (benchmark dmesg logs snipped) Fast path results: no-softirq-page_pool01 Per elem: 11 cycles(tsc) 4.177 ns ptr_ring results: no-softirq-page_pool02 Per elem: 51 cycles(tsc) 19.117 ns slow path results: no-softirq-page_pool03 Per elem: 168 cycles(tsc) 62.469 ns ``` Results of course will vary based on hardware/kernel/configs, and some variance may be there from run to run due to some noise. Signed-off-by: Mina Almasry Acked-by: Ilias Apalodimas Signed-off-by: Jesper Dangaard Brouer Acked-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20250619181519.3102426-1-almasrymina@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/bench/Makefile | 7 + .../testing/selftests/net/bench/page_pool/Makefile | 17 + .../net/bench/page_pool/bench_page_pool_simple.c | 276 +++++++++++++++ .../selftests/net/bench/page_pool/time_bench.c | 394 +++++++++++++++++++++ .../selftests/net/bench/page_pool/time_bench.h | 238 +++++++++++++ .../selftests/net/bench/test_bench_page_pool.sh | 32 ++ 6 files changed, 964 insertions(+) create mode 100644 tools/testing/selftests/net/bench/Makefile create mode 100644 tools/testing/selftests/net/bench/page_pool/Makefile create mode 100644 tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c create mode 100644 tools/testing/selftests/net/bench/page_pool/time_bench.c create mode 100644 tools/testing/selftests/net/bench/page_pool/time_bench.h create mode 100755 tools/testing/selftests/net/bench/test_bench_page_pool.sh (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/bench/Makefile b/tools/testing/selftests/net/bench/Makefile new file mode 100644 index 000000000000..2546c45e42f7 --- /dev/null +++ b/tools/testing/selftests/net/bench/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 + +TEST_GEN_MODS_DIR := page_pool + +TEST_PROGS += test_bench_page_pool.sh + +include ../../lib.mk diff --git a/tools/testing/selftests/net/bench/page_pool/Makefile b/tools/testing/selftests/net/bench/page_pool/Makefile new file mode 100644 index 000000000000..0549a16ba275 --- /dev/null +++ b/tools/testing/selftests/net/bench/page_pool/Makefile @@ -0,0 +1,17 @@ +BENCH_PAGE_POOL_SIMPLE_TEST_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) +KDIR ?= /lib/modules/$(shell uname -r)/build + +ifeq ($(V),1) +Q = +else +Q = @ +endif + +obj-m += bench_page_pool.o +bench_page_pool-y += bench_page_pool_simple.o time_bench.o + +all: + +$(Q)make -C $(KDIR) M=$(BENCH_PAGE_POOL_SIMPLE_TEST_DIR) modules + +clean: + +$(Q)make -C $(KDIR) M=$(BENCH_PAGE_POOL_SIMPLE_TEST_DIR) clean diff --git a/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c new file mode 100644 index 000000000000..f183d5e30dc6 --- /dev/null +++ b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c @@ -0,0 +1,276 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Benchmark module for page_pool. + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include + +#include +#include + +#include +#include + +#include "time_bench.h" + +static int verbose = 1; +#define MY_POOL_SIZE 1024 + +static void _page_pool_put_page(struct page_pool *pool, struct page *page, + bool allow_direct) +{ + page_pool_put_page(pool, page, -1, allow_direct); +} + +/* Makes tests selectable. Useful for perf-record to analyze a single test. + * Hint: Bash shells support writing binary number like: $((2#101010) + * + * # modprobe bench_page_pool_simple run_flags=$((2#100)) + */ +static unsigned long run_flags = 0xFFFFFFFF; +module_param(run_flags, ulong, 0); +MODULE_PARM_DESC(run_flags, "Limit which bench test that runs"); + +/* Count the bit number from the enum */ +enum benchmark_bit { + bit_run_bench_baseline, + bit_run_bench_no_softirq01, + bit_run_bench_no_softirq02, + bit_run_bench_no_softirq03, +}; + +#define bit(b) (1 << (b)) +#define enabled(b) ((run_flags & (bit(b)))) + +/* notice time_bench is limited to U32_MAX nr loops */ +static unsigned long loops = 10000000; +module_param(loops, ulong, 0); +MODULE_PARM_DESC(loops, "Specify loops bench will run"); + +/* Timing at the nanosec level, we need to know the overhead + * introduced by the for loop itself + */ +static int time_bench_for_loop(struct time_bench_record *rec, void *data) +{ + uint64_t loops_cnt = 0; + int i; + + time_bench_start(rec); + /** Loop to measure **/ + for (i = 0; i < rec->loops; i++) { + loops_cnt++; + barrier(); /* avoid compiler to optimize this loop */ + } + time_bench_stop(rec, loops_cnt); + return loops_cnt; +} + +static int time_bench_atomic_inc(struct time_bench_record *rec, void *data) +{ + uint64_t loops_cnt = 0; + atomic_t cnt; + int i; + + atomic_set(&cnt, 0); + + time_bench_start(rec); + /** Loop to measure **/ + for (i = 0; i < rec->loops; i++) { + atomic_inc(&cnt); + barrier(); /* avoid compiler to optimize this loop */ + } + loops_cnt = atomic_read(&cnt); + time_bench_stop(rec, loops_cnt); + return loops_cnt; +} + +/* The ptr_ping in page_pool uses a spinlock. We need to know the minimum + * overhead of taking+releasing a spinlock, to know the cycles that can be saved + * by e.g. amortizing this via bulking. + */ +static int time_bench_lock(struct time_bench_record *rec, void *data) +{ + uint64_t loops_cnt = 0; + spinlock_t lock; + int i; + + spin_lock_init(&lock); + + time_bench_start(rec); + /** Loop to measure **/ + for (i = 0; i < rec->loops; i++) { + spin_lock(&lock); + loops_cnt++; + barrier(); /* avoid compiler to optimize this loop */ + spin_unlock(&lock); + } + time_bench_stop(rec, loops_cnt); + return loops_cnt; +} + +/* Helper for filling some page's into ptr_ring */ +static void pp_fill_ptr_ring(struct page_pool *pp, int elems) +{ + /* GFP_ATOMIC needed when under run softirq */ + gfp_t gfp_mask = GFP_ATOMIC; + struct page **array; + int i; + + array = kcalloc(elems, sizeof(struct page *), gfp_mask); + + for (i = 0; i < elems; i++) + array[i] = page_pool_alloc_pages(pp, gfp_mask); + for (i = 0; i < elems; i++) + _page_pool_put_page(pp, array[i], false); + + kfree(array); +} + +enum test_type { type_fast_path, type_ptr_ring, type_page_allocator }; + +/* Depends on compile optimizing this function */ +static int time_bench_page_pool(struct time_bench_record *rec, void *data, + enum test_type type, const char *func) +{ + uint64_t loops_cnt = 0; + gfp_t gfp_mask = GFP_ATOMIC; /* GFP_ATOMIC is not really needed */ + int i, err; + + struct page_pool *pp; + struct page *page; + + struct page_pool_params pp_params = { + .order = 0, + .flags = 0, + .pool_size = MY_POOL_SIZE, + .nid = NUMA_NO_NODE, + .dev = NULL, /* Only use for DMA mapping */ + .dma_dir = DMA_BIDIRECTIONAL, + }; + + pp = page_pool_create(&pp_params); + if (IS_ERR(pp)) { + err = PTR_ERR(pp); + pr_warn("%s: Error(%d) creating page_pool\n", func, err); + goto out; + } + pp_fill_ptr_ring(pp, 64); + + if (in_serving_softirq()) + pr_warn("%s(): in_serving_softirq fast-path\n", func); + else + pr_warn("%s(): Cannot use page_pool fast-path\n", func); + + time_bench_start(rec); + /** Loop to measure **/ + for (i = 0; i < rec->loops; i++) { + /* Common fast-path alloc that depend on in_serving_softirq() */ + page = page_pool_alloc_pages(pp, gfp_mask); + if (!page) + break; + loops_cnt++; + barrier(); /* avoid compiler to optimize this loop */ + + /* The benchmarks purpose it to test different return paths. + * Compiler should inline optimize other function calls out + */ + if (type == type_fast_path) { + /* Fast-path recycling e.g. XDP_DROP use-case */ + page_pool_recycle_direct(pp, page); + + } else if (type == type_ptr_ring) { + /* Normal return path */ + _page_pool_put_page(pp, page, false); + + } else if (type == type_page_allocator) { + /* Test if not pages are recycled, but instead + * returned back into systems page allocator + */ + get_page(page); /* cause no-recycling */ + _page_pool_put_page(pp, page, false); + put_page(page); + } else { + BUILD_BUG(); + } + } + time_bench_stop(rec, loops_cnt); +out: + page_pool_destroy(pp); + return loops_cnt; +} + +static int time_bench_page_pool01_fast_path(struct time_bench_record *rec, + void *data) +{ + return time_bench_page_pool(rec, data, type_fast_path, __func__); +} + +static int time_bench_page_pool02_ptr_ring(struct time_bench_record *rec, + void *data) +{ + return time_bench_page_pool(rec, data, type_ptr_ring, __func__); +} + +static int time_bench_page_pool03_slow(struct time_bench_record *rec, + void *data) +{ + return time_bench_page_pool(rec, data, type_page_allocator, __func__); +} + +static int run_benchmark_tests(void) +{ + uint32_t nr_loops = loops; + + /* Baseline tests */ + if (enabled(bit_run_bench_baseline)) { + time_bench_loop(nr_loops * 10, 0, "for_loop", NULL, + time_bench_for_loop); + time_bench_loop(nr_loops * 10, 0, "atomic_inc", NULL, + time_bench_atomic_inc); + time_bench_loop(nr_loops, 0, "lock", NULL, time_bench_lock); + } + + /* This test cannot activate correct code path, due to no-softirq ctx */ + if (enabled(bit_run_bench_no_softirq01)) + time_bench_loop(nr_loops, 0, "no-softirq-page_pool01", NULL, + time_bench_page_pool01_fast_path); + if (enabled(bit_run_bench_no_softirq02)) + time_bench_loop(nr_loops, 0, "no-softirq-page_pool02", NULL, + time_bench_page_pool02_ptr_ring); + if (enabled(bit_run_bench_no_softirq03)) + time_bench_loop(nr_loops, 0, "no-softirq-page_pool03", NULL, + time_bench_page_pool03_slow); + + return 0; +} + +static int __init bench_page_pool_simple_module_init(void) +{ + if (verbose) + pr_info("Loaded\n"); + + if (loops > U32_MAX) { + pr_err("Module param loops(%lu) exceeded U32_MAX(%u)\n", loops, + U32_MAX); + return -ECHRNG; + } + + run_benchmark_tests(); + + return 0; +} +module_init(bench_page_pool_simple_module_init); + +static void __exit bench_page_pool_simple_module_exit(void) +{ + if (verbose) + pr_info("Unloaded\n"); +} +module_exit(bench_page_pool_simple_module_exit); + +MODULE_DESCRIPTION("Benchmark of page_pool simple cases"); +MODULE_AUTHOR("Jesper Dangaard Brouer "); +MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/net/bench/page_pool/time_bench.c b/tools/testing/selftests/net/bench/page_pool/time_bench.c new file mode 100644 index 000000000000..073bb36ec5f2 --- /dev/null +++ b/tools/testing/selftests/net/bench/page_pool/time_bench.c @@ -0,0 +1,394 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Benchmarking code execution time inside the kernel + * + * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include + +#include /* perf_event_create_kernel_counter() */ + +/* For concurrency testing */ +#include +#include +#include +#include + +#include "time_bench.h" + +static int verbose = 1; + +/** TSC (Time-Stamp Counter) based ** + * See: linux/time_bench.h + * tsc_start_clock() and tsc_stop_clock() + */ + +/** Wall-clock based ** + */ + +/** PMU (Performance Monitor Unit) based ** + */ +#define PERF_FORMAT \ + (PERF_FORMAT_GROUP | PERF_FORMAT_ID | PERF_FORMAT_TOTAL_TIME_ENABLED | \ + PERF_FORMAT_TOTAL_TIME_RUNNING) + +struct raw_perf_event { + uint64_t config; /* event */ + uint64_t config1; /* umask */ + struct perf_event *save; + char *desc; +}; + +/* if HT is enable a maximum of 4 events (5 if one is instructions + * retired can be specified, if HT is disabled a maximum of 8 (9 if + * one is instructions retired) can be specified. + * + * From Table 19-1. Architectural Performance Events + * Architectures Software Developer’s Manual Volume 3: System Programming + * Guide + */ +struct raw_perf_event perf_events[] = { + { 0x3c, 0x00, NULL, "Unhalted CPU Cycles" }, + { 0xc0, 0x00, NULL, "Instruction Retired" } +}; + +#define NUM_EVTS (ARRAY_SIZE(perf_events)) + +/* WARNING: PMU config is currently broken! + */ +bool time_bench_PMU_config(bool enable) +{ + int i; + struct perf_event_attr perf_conf; + struct perf_event *perf_event; + int cpu; + + preempt_disable(); + cpu = smp_processor_id(); + pr_info("DEBUG: cpu:%d\n", cpu); + preempt_enable(); + + memset(&perf_conf, 0, sizeof(struct perf_event_attr)); + perf_conf.type = PERF_TYPE_RAW; + perf_conf.size = sizeof(struct perf_event_attr); + perf_conf.read_format = PERF_FORMAT; + perf_conf.pinned = 1; + perf_conf.exclude_user = 1; /* No userspace events */ + perf_conf.exclude_kernel = 0; /* Only kernel events */ + + for (i = 0; i < NUM_EVTS; i++) { + perf_conf.disabled = enable; + //perf_conf.disabled = (i == 0) ? 1 : 0; + perf_conf.config = perf_events[i].config; + perf_conf.config1 = perf_events[i].config1; + if (verbose) + pr_info("%s() enable PMU counter: %s\n", + __func__, perf_events[i].desc); + perf_event = perf_event_create_kernel_counter(&perf_conf, cpu, + NULL /* task */, + NULL /* overflow_handler*/, + NULL /* context */); + if (perf_event) { + perf_events[i].save = perf_event; + pr_info("%s():DEBUG perf_event success\n", __func__); + + perf_event_enable(perf_event); + } else { + pr_info("%s():DEBUG perf_event is NULL\n", __func__); + } + } + + return true; +} + +/** Generic functions ** + */ + +/* Calculate stats, store results in record */ +bool time_bench_calc_stats(struct time_bench_record *rec) +{ +#define NANOSEC_PER_SEC 1000000000 /* 10^9 */ + uint64_t ns_per_call_tmp_rem = 0; + uint32_t ns_per_call_remainder = 0; + uint64_t pmc_ipc_tmp_rem = 0; + uint32_t pmc_ipc_remainder = 0; + uint32_t pmc_ipc_div = 0; + uint32_t invoked_cnt_precision = 0; + uint32_t invoked_cnt = 0; /* 32-bit due to div_u64_rem() */ + + if (rec->flags & TIME_BENCH_LOOP) { + if (rec->invoked_cnt < 1000) { + pr_err("ERR: need more(>1000) loops(%llu) for timing\n", + rec->invoked_cnt); + return false; + } + if (rec->invoked_cnt > ((1ULL << 32) - 1)) { + /* div_u64_rem() can only support div with 32bit*/ + pr_err("ERR: Invoke cnt(%llu) too big overflow 32bit\n", + rec->invoked_cnt); + return false; + } + invoked_cnt = (uint32_t)rec->invoked_cnt; + } + + /* TSC (Time-Stamp Counter) records */ + if (rec->flags & TIME_BENCH_TSC) { + rec->tsc_interval = rec->tsc_stop - rec->tsc_start; + if (rec->tsc_interval == 0) { + pr_err("ABORT: timing took ZERO TSC time\n"); + return false; + } + /* Calculate stats */ + if (rec->flags & TIME_BENCH_LOOP) + rec->tsc_cycles = rec->tsc_interval / invoked_cnt; + else + rec->tsc_cycles = rec->tsc_interval; + } + + /* Wall-clock time calc */ + if (rec->flags & TIME_BENCH_WALLCLOCK) { + rec->time_start = rec->ts_start.tv_nsec + + (NANOSEC_PER_SEC * rec->ts_start.tv_sec); + rec->time_stop = rec->ts_stop.tv_nsec + + (NANOSEC_PER_SEC * rec->ts_stop.tv_sec); + rec->time_interval = rec->time_stop - rec->time_start; + if (rec->time_interval == 0) { + pr_err("ABORT: timing took ZERO wallclock time\n"); + return false; + } + /* Calculate stats */ + /*** Division in kernel it tricky ***/ + /* Orig: time_sec = (time_interval / NANOSEC_PER_SEC); */ + /* remainder only correct because NANOSEC_PER_SEC is 10^9 */ + rec->time_sec = div_u64_rem(rec->time_interval, NANOSEC_PER_SEC, + &rec->time_sec_remainder); + //TODO: use existing struct timespec records instead of div? + + if (rec->flags & TIME_BENCH_LOOP) { + /*** Division in kernel it tricky ***/ + /* Orig: ns = ((double)time_interval / invoked_cnt); */ + /* First get quotient */ + rec->ns_per_call_quotient = + div_u64_rem(rec->time_interval, invoked_cnt, + &ns_per_call_remainder); + /* Now get decimals .xxx precision (incorrect roundup)*/ + ns_per_call_tmp_rem = ns_per_call_remainder; + invoked_cnt_precision = invoked_cnt / 1000; + if (invoked_cnt_precision > 0) { + rec->ns_per_call_decimal = + div_u64_rem(ns_per_call_tmp_rem, + invoked_cnt_precision, + &ns_per_call_remainder); + } + } + } + + /* Performance Monitor Unit (PMU) counters */ + if (rec->flags & TIME_BENCH_PMU) { + //FIXME: Overflow handling??? + rec->pmc_inst = rec->pmc_inst_stop - rec->pmc_inst_start; + rec->pmc_clk = rec->pmc_clk_stop - rec->pmc_clk_start; + + /* Calc Instruction Per Cycle (IPC) */ + /* First get quotient */ + rec->pmc_ipc_quotient = div_u64_rem(rec->pmc_inst, rec->pmc_clk, + &pmc_ipc_remainder); + /* Now get decimals .xxx precision (incorrect roundup)*/ + pmc_ipc_tmp_rem = pmc_ipc_remainder; + pmc_ipc_div = rec->pmc_clk / 1000; + if (pmc_ipc_div > 0) { + rec->pmc_ipc_decimal = div_u64_rem(pmc_ipc_tmp_rem, + pmc_ipc_div, + &pmc_ipc_remainder); + } + } + + return true; +} + +/* Generic function for invoking a loop function and calculating + * execution time stats. The function being called/timed is assumed + * to perform a tight loop, and update the timing record struct. + */ +bool time_bench_loop(uint32_t loops, int step, char *txt, void *data, + int (*func)(struct time_bench_record *record, void *data)) +{ + struct time_bench_record rec; + + /* Setup record */ + memset(&rec, 0, sizeof(rec)); /* zero func might not update all */ + rec.version_abi = 1; + rec.loops = loops; + rec.step = step; + rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC | TIME_BENCH_WALLCLOCK); + + /*** Loop function being timed ***/ + if (!func(&rec, data)) { + pr_err("ABORT: function being timed failed\n"); + return false; + } + + if (rec.invoked_cnt < loops) + pr_warn("WARNING: Invoke count(%llu) smaller than loops(%d)\n", + rec.invoked_cnt, loops); + + /* Calculate stats */ + time_bench_calc_stats(&rec); + + pr_info("Type:%s Per elem: %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n", + txt, rec.tsc_cycles, rec.ns_per_call_quotient, + rec.ns_per_call_decimal, rec.step, rec.time_sec, + rec.time_sec_remainder, rec.time_interval, rec.invoked_cnt, + rec.tsc_interval); + if (rec.flags & TIME_BENCH_PMU) + pr_info("Type:%s PMU inst/clock%llu/%llu = %llu.%03llu IPC (inst per cycle)\n", + txt, rec.pmc_inst, rec.pmc_clk, rec.pmc_ipc_quotient, + rec.pmc_ipc_decimal); + return true; +} + +/* Function getting invoked by kthread */ +static int invoke_test_on_cpu_func(void *private) +{ + struct time_bench_cpu *cpu = private; + struct time_bench_sync *sync = cpu->sync; + cpumask_t newmask = CPU_MASK_NONE; + void *data = cpu->data; + + /* Restrict CPU */ + cpumask_set_cpu(cpu->rec.cpu, &newmask); + set_cpus_allowed_ptr(current, &newmask); + + /* Synchronize start of concurrency test */ + atomic_inc(&sync->nr_tests_running); + wait_for_completion(&sync->start_event); + + /* Start benchmark function */ + if (!cpu->bench_func(&cpu->rec, data)) { + pr_err("ERROR: function being timed failed on CPU:%d(%d)\n", + cpu->rec.cpu, smp_processor_id()); + } else { + if (verbose) + pr_info("SUCCESS: ran on CPU:%d(%d)\n", cpu->rec.cpu, + smp_processor_id()); + } + cpu->did_bench_run = true; + + /* End test */ + atomic_dec(&sync->nr_tests_running); + /* Wait for kthread_stop() telling us to stop */ + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + } + __set_current_state(TASK_RUNNING); + return 0; +} + +void time_bench_print_stats_cpumask(const char *desc, + struct time_bench_cpu *cpu_tasks, + const struct cpumask *mask) +{ + uint64_t average = 0; + int cpu; + int step = 0; + struct sum { + uint64_t tsc_cycles; + int records; + } sum = { 0 }; + + /* Get stats */ + for_each_cpu(cpu, mask) { + struct time_bench_cpu *c = &cpu_tasks[cpu]; + struct time_bench_record *rec = &c->rec; + + /* Calculate stats */ + time_bench_calc_stats(rec); + + pr_info("Type:%s CPU(%d) %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n", + desc, cpu, rec->tsc_cycles, rec->ns_per_call_quotient, + rec->ns_per_call_decimal, rec->step, rec->time_sec, + rec->time_sec_remainder, rec->time_interval, + rec->invoked_cnt, rec->tsc_interval); + + /* Collect average */ + sum.records++; + sum.tsc_cycles += rec->tsc_cycles; + step = rec->step; + } + + if (sum.records) /* avoid div-by-zero */ + average = sum.tsc_cycles / sum.records; + pr_info("Sum Type:%s Average: %llu cycles(tsc) CPUs:%d step:%d\n", desc, + average, sum.records, step); +} + +void time_bench_run_concurrent(uint32_t loops, int step, void *data, + const struct cpumask *mask, /* Support masking outsome CPUs*/ + struct time_bench_sync *sync, + struct time_bench_cpu *cpu_tasks, + int (*func)(struct time_bench_record *record, void *data)) +{ + int cpu, running = 0; + + if (verbose) // DEBUG + pr_warn("%s() Started on CPU:%d\n", __func__, + smp_processor_id()); + + /* Reset sync conditions */ + atomic_set(&sync->nr_tests_running, 0); + init_completion(&sync->start_event); + + /* Spawn off jobs on all CPUs */ + for_each_cpu(cpu, mask) { + struct time_bench_cpu *c = &cpu_tasks[cpu]; + + running++; + c->sync = sync; /* Send sync variable along */ + c->data = data; /* Send opaque along */ + + /* Init benchmark record */ + memset(&c->rec, 0, sizeof(struct time_bench_record)); + c->rec.version_abi = 1; + c->rec.loops = loops; + c->rec.step = step; + c->rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC | + TIME_BENCH_WALLCLOCK); + c->rec.cpu = cpu; + c->bench_func = func; + c->task = kthread_run(invoke_test_on_cpu_func, c, + "time_bench%d", cpu); + if (IS_ERR(c->task)) { + pr_err("%s(): Failed to start test func\n", __func__); + return; /* Argh, what about cleanup?! */ + } + } + + /* Wait until all processes are running */ + while (atomic_read(&sync->nr_tests_running) < running) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(10); + } + /* Kick off all CPU concurrently on completion event */ + complete_all(&sync->start_event); + + /* Wait for CPUs to finish */ + while (atomic_read(&sync->nr_tests_running)) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(10); + } + + /* Stop the kthreads */ + for_each_cpu(cpu, mask) { + struct time_bench_cpu *c = &cpu_tasks[cpu]; + + kthread_stop(c->task); + } + + if (verbose) // DEBUG - happens often, finish on another CPU + pr_warn("%s() Finished on CPU:%d\n", __func__, + smp_processor_id()); +} diff --git a/tools/testing/selftests/net/bench/page_pool/time_bench.h b/tools/testing/selftests/net/bench/page_pool/time_bench.h new file mode 100644 index 000000000000..e113fcf341dc --- /dev/null +++ b/tools/testing/selftests/net/bench/page_pool/time_bench.h @@ -0,0 +1,238 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Benchmarking code execution time inside the kernel + * + * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer + * for licensing details see kernel-base/COPYING + */ +#ifndef _LINUX_TIME_BENCH_H +#define _LINUX_TIME_BENCH_H + +/* Main structure used for recording a benchmark run */ +struct time_bench_record { + uint32_t version_abi; + uint32_t loops; /* Requested loop invocations */ + uint32_t step; /* option for e.g. bulk invocations */ + + uint32_t flags; /* Measurements types enabled */ +#define TIME_BENCH_LOOP BIT(0) +#define TIME_BENCH_TSC BIT(1) +#define TIME_BENCH_WALLCLOCK BIT(2) +#define TIME_BENCH_PMU BIT(3) + + uint32_t cpu; /* Used when embedded in time_bench_cpu */ + + /* Records */ + uint64_t invoked_cnt; /* Returned actual invocations */ + uint64_t tsc_start; + uint64_t tsc_stop; + struct timespec64 ts_start; + struct timespec64 ts_stop; + /* PMU counters for instruction and cycles + * instructions counter including pipelined instructions + */ + uint64_t pmc_inst_start; + uint64_t pmc_inst_stop; + /* CPU unhalted clock counter */ + uint64_t pmc_clk_start; + uint64_t pmc_clk_stop; + + /* Result records */ + uint64_t tsc_interval; + uint64_t time_start, time_stop, time_interval; /* in nanosec */ + uint64_t pmc_inst, pmc_clk; + + /* Derived result records */ + uint64_t tsc_cycles; // +decimal? + uint64_t ns_per_call_quotient, ns_per_call_decimal; + uint64_t time_sec; + uint32_t time_sec_remainder; + uint64_t pmc_ipc_quotient, pmc_ipc_decimal; /* inst per cycle */ +}; + +/* For synchronizing parallel CPUs to run concurrently */ +struct time_bench_sync { + atomic_t nr_tests_running; + struct completion start_event; +}; + +/* Keep track of CPUs executing our bench function. + * + * Embed a time_bench_record for storing info per cpu + */ +struct time_bench_cpu { + struct time_bench_record rec; + struct time_bench_sync *sync; /* back ptr */ + struct task_struct *task; + /* "data" opaque could have been placed in time_bench_sync, + * but to avoid any false sharing, place it per CPU + */ + void *data; + /* Support masking outsome CPUs, mark if it ran */ + bool did_bench_run; + /* int cpu; // note CPU stored in time_bench_record */ + int (*bench_func)(struct time_bench_record *record, void *data); +}; + +/* + * Below TSC assembler code is not compatible with other archs, and + * can also fail on guests if cpu-flags are not correct. + * + * The way TSC reading is used, many iterations, does not require as + * high accuracy as described below (in Intel Doc #324264). + * + * Considering changing to use get_cycles() (#include ). + */ + +/** TSC (Time-Stamp Counter) based ** + * Recommend reading, to understand details of reading TSC accurately: + * Intel Doc #324264, "How to Benchmark Code Execution Times on Intel" + * + * Consider getting exclusive ownership of CPU by using: + * unsigned long flags; + * preempt_disable(); + * raw_local_irq_save(flags); + * _your_code_ + * raw_local_irq_restore(flags); + * preempt_enable(); + * + * Clobbered registers: "%rax", "%rbx", "%rcx", "%rdx" + * RDTSC only change "%rax" and "%rdx" but + * CPUID clears the high 32-bits of all (rax/rbx/rcx/rdx) + */ +static __always_inline uint64_t tsc_start_clock(void) +{ + /* See: Intel Doc #324264 */ + unsigned int hi, lo; + + asm volatile("CPUID\n\t" + "RDTSC\n\t" + "mov %%edx, %0\n\t" + "mov %%eax, %1\n\t" + : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx"); + //FIXME: on 32bit use clobbered %eax + %edx + return ((uint64_t)lo) | (((uint64_t)hi) << 32); +} + +static __always_inline uint64_t tsc_stop_clock(void) +{ + /* See: Intel Doc #324264 */ + unsigned int hi, lo; + + asm volatile("RDTSCP\n\t" + "mov %%edx, %0\n\t" + "mov %%eax, %1\n\t" + "CPUID\n\t" + : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx"); + return ((uint64_t)lo) | (((uint64_t)hi) << 32); +} + +/** Wall-clock based ** + * + * use: getnstimeofday() + * getnstimeofday(&rec->ts_start); + * getnstimeofday(&rec->ts_stop); + * + * API changed see: Documentation/core-api/timekeeping.rst + * https://www.kernel.org/doc/html/latest/core-api/timekeeping.html#c.getnstimeofday + * + * We should instead use: ktime_get_real_ts64() is a direct + * replacement, but consider using monotonic time (ktime_get_ts64()) + * and/or a ktime_t based interface (ktime_get()/ktime_get_real()). + */ + +/** PMU (Performance Monitor Unit) based ** + * + * Needed for calculating: Instructions Per Cycle (IPC) + * - The IPC number tell how efficient the CPU pipelining were + */ +//lookup: perf_event_create_kernel_counter() + +bool time_bench_PMU_config(bool enable); + +/* Raw reading via rdpmc() using fixed counters + * + * From: https://github.com/andikleen/simple-pmu + */ +enum { + FIXED_SELECT = (1U << 30), /* == 0x40000000 */ + FIXED_INST_RETIRED_ANY = 0, + FIXED_CPU_CLK_UNHALTED_CORE = 1, + FIXED_CPU_CLK_UNHALTED_REF = 2, +}; + +static __always_inline unsigned int long long p_rdpmc(unsigned int in) +{ + unsigned int d, a; + + asm volatile("rdpmc" : "=d"(d), "=a"(a) : "c"(in) : "memory"); + return ((unsigned long long)d << 32) | a; +} + +/* These PMU counter needs to be enabled, but I don't have the + * configure code implemented. My current hack is running: + * sudo perf stat -e cycles:k -e instructions:k insmod lib/ring_queue_test.ko + */ +/* Reading all pipelined instruction */ +static __always_inline unsigned long long pmc_inst(void) +{ + return p_rdpmc(FIXED_SELECT | FIXED_INST_RETIRED_ANY); +} + +/* Reading CPU clock cycles */ +static __always_inline unsigned long long pmc_clk(void) +{ + return p_rdpmc(FIXED_SELECT | FIXED_CPU_CLK_UNHALTED_CORE); +} + +/* Raw reading via MSR rdmsr() is likely wrong + * FIXME: How can I know which raw MSR registers are conf for what? + */ +#define MSR_IA32_PCM0 0x400000C1 /* PERFCTR0 */ +#define MSR_IA32_PCM1 0x400000C2 /* PERFCTR1 */ +#define MSR_IA32_PCM2 0x400000C3 +static inline uint64_t msr_inst(unsigned long long *msr_result) +{ + return rdmsrq_safe(MSR_IA32_PCM0, msr_result); +} + +/** Generic functions ** + */ +bool time_bench_loop(uint32_t loops, int step, char *txt, void *data, + int (*func)(struct time_bench_record *rec, void *data)); +bool time_bench_calc_stats(struct time_bench_record *rec); + +void time_bench_run_concurrent(uint32_t loops, int step, void *data, + const struct cpumask *mask, /* Support masking outsome CPUs*/ + struct time_bench_sync *sync, struct time_bench_cpu *cpu_tasks, + int (*func)(struct time_bench_record *record, void *data)); +void time_bench_print_stats_cpumask(const char *desc, + struct time_bench_cpu *cpu_tasks, + const struct cpumask *mask); + +//FIXME: use rec->flags to select measurement, should be MACRO +static __always_inline void time_bench_start(struct time_bench_record *rec) +{ + //getnstimeofday(&rec->ts_start); + ktime_get_real_ts64(&rec->ts_start); + if (rec->flags & TIME_BENCH_PMU) { + rec->pmc_inst_start = pmc_inst(); + rec->pmc_clk_start = pmc_clk(); + } + rec->tsc_start = tsc_start_clock(); +} + +static __always_inline void time_bench_stop(struct time_bench_record *rec, + uint64_t invoked_cnt) +{ + rec->tsc_stop = tsc_stop_clock(); + if (rec->flags & TIME_BENCH_PMU) { + rec->pmc_inst_stop = pmc_inst(); + rec->pmc_clk_stop = pmc_clk(); + } + //getnstimeofday(&rec->ts_stop); + ktime_get_real_ts64(&rec->ts_stop); + rec->invoked_cnt = invoked_cnt; +} + +#endif /* _LINUX_TIME_BENCH_H */ diff --git a/tools/testing/selftests/net/bench/test_bench_page_pool.sh b/tools/testing/selftests/net/bench/test_bench_page_pool.sh new file mode 100755 index 000000000000..7b8b18cfedce --- /dev/null +++ b/tools/testing/selftests/net/bench/test_bench_page_pool.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# + +set -e + +DRIVER="./page_pool/bench_page_pool.ko" +result="" + +function run_test() +{ + rmmod "bench_page_pool.ko" || true + insmod $DRIVER > /dev/null 2>&1 + result=$(dmesg | tail -10) + echo "$result" + + echo + echo "Fast path results:" + echo "${result}" | grep -o -E "no-softirq-page_pool01 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns" + + echo + echo "ptr_ring results:" + echo "${result}" | grep -o -E "no-softirq-page_pool02 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns" + + echo + echo "slow path results:" + echo "${result}" | grep -o -E "no-softirq-page_pool03 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns" +} + +run_test + +exit 0 -- cgit v1.2.3 From b8a205486ed5c0c5c0386e472157a81ce686af25 Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Thu, 19 Jun 2025 16:06:03 +0200 Subject: selftests/bpf: Convert test_sysctl to prog_tests Convert test_sysctl test to prog_tests with minimal change to the tests themselves. Signed-off-by: Jerome Marchand Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20250619140603.148942-3-jmarchan@redhat.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/.gitignore | 1 - tools/testing/selftests/bpf/Makefile | 5 +- .../testing/selftests/bpf/prog_tests/test_sysctl.c | 1612 +++++++++++++++++++ tools/testing/selftests/bpf/test_sysctl.c | 1633 -------------------- 4 files changed, 1614 insertions(+), 1637 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/test_sysctl.c delete mode 100644 tools/testing/selftests/bpf/test_sysctl.c (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index e2a2c46c008b..3d8378972d26 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -21,7 +21,6 @@ test_lirc_mode2_user flow_dissector_load test_tcpnotify_user test_libbpf -test_sysctl xdping test_cpp *.d diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index cf5ed3bee573..910d8d6402ef 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -73,7 +73,7 @@ endif # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_progs \ test_sockmap \ - test_tcpnotify_user test_sysctl \ + test_tcpnotify_user \ test_progs-no_alu32 TEST_INST_SUBDIRS := no_alu32 @@ -220,7 +220,7 @@ ifeq ($(VMLINUX_BTF),) $(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)") endif -# Define simple and short `make test_progs`, `make test_sysctl`, etc targets +# Define simple and short `make test_progs`, `make test_maps`, etc targets # to build individual tests. # NOTE: Semicolon at the end is critical to override lib.mk's default static # rule for binaries. @@ -329,7 +329,6 @@ NETWORK_HELPERS := $(OUTPUT)/network_helpers.o $(OUTPUT)/test_sockmap: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(OUTPUT)/test_tcpnotify_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(TRACE_HELPERS) $(OUTPUT)/test_sock_fields: $(CGROUP_HELPERS) $(TESTING_HELPERS) -$(OUTPUT)/test_sysctl: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(OUTPUT)/test_tag: $(TESTING_HELPERS) $(OUTPUT)/test_lirc_mode2_user: $(TESTING_HELPERS) $(OUTPUT)/xdping: $(TESTING_HELPERS) diff --git a/tools/testing/selftests/bpf/prog_tests/test_sysctl.c b/tools/testing/selftests/bpf/prog_tests/test_sysctl.c new file mode 100644 index 000000000000..273dd41ca09e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_sysctl.c @@ -0,0 +1,1612 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook + +#include "test_progs.h" +#include "cgroup_helpers.h" + +#define CG_PATH "/foo" +#define MAX_INSNS 512 +#define FIXUP_SYSCTL_VALUE 0 + +char bpf_log_buf[BPF_LOG_BUF_SIZE]; + +struct sysctl_test { + const char *descr; + size_t fixup_value_insn; + struct bpf_insn insns[MAX_INSNS]; + const char *prog_file; + enum bpf_attach_type attach_type; + const char *sysctl; + int open_flags; + int seek; + const char *newval; + const char *oldval; + enum { + LOAD_REJECT, + ATTACH_REJECT, + OP_EPERM, + SUCCESS, + } result; +}; + +static struct sysctl_test tests[] = { + { + .descr = "sysctl wrong attach_type", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = 0, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .result = ATTACH_REJECT, + }, + { + .descr = "sysctl:read allow all", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl:read deny all", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .result = OP_EPERM, + }, + { + .descr = "ctx:write sysctl:read read ok", + .insns = { + /* If (write) */ + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sysctl, write)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 1, 2), + + /* return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + /* else return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "ctx:write sysctl:write read ok", + .insns = { + /* If (write) */ + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sysctl, write)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 1, 2), + + /* return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + /* else return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/domainname", + .open_flags = O_WRONLY, + .newval = "(none)", /* same as default, should fail anyway */ + .result = OP_EPERM, + }, + { + .descr = "ctx:write sysctl:write read ok narrow", + .insns = { + /* u64 w = (u16)write & 1; */ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sysctl, write)), +#else + BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sysctl, write) + 2), +#endif + BPF_ALU64_IMM(BPF_AND, BPF_REG_7, 1), + /* return 1 - w; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/domainname", + .open_flags = O_WRONLY, + .newval = "(none)", /* same as default, should fail anyway */ + .result = OP_EPERM, + }, + { + .descr = "ctx:write sysctl:read write reject", + .insns = { + /* write = X */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sysctl, write)), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .result = LOAD_REJECT, + }, + { + .descr = "ctx:file_pos sysctl:read read ok", + .insns = { + /* If (file_pos == X) */ + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sysctl, file_pos)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 3, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .seek = 3, + .result = SUCCESS, + }, + { + .descr = "ctx:file_pos sysctl:read read ok narrow", + .insns = { + /* If (file_pos == X) */ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sysctl, file_pos)), +#else + BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sysctl, file_pos) + 3), +#endif + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 4, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .seek = 4, + .result = SUCCESS, + }, + { + .descr = "ctx:file_pos sysctl:read write ok", + .insns = { + /* file_pos = X */ + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sysctl, file_pos)), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .oldval = "nux\n", + .result = SUCCESS, + }, + { + .descr = "sysctl_get_name sysctl_value:base ok", + .insns = { + /* sysctl_get_name arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_name arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 8), + + /* sysctl_get_name arg4 (flags) */ + BPF_MOV64_IMM(BPF_REG_4, BPF_F_SYSCTL_BASE_NAME), + + /* sysctl_get_name(ctx, buf, buf_len, flags) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, sizeof("tcp_mem") - 1, 6), + /* buf == "tcp_mem\0") */ + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x7463705f6d656d00ULL)), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_get_name sysctl_value:base E2BIG truncated", + .insns = { + /* sysctl_get_name arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_name arg3 (buf_len) too small */ + BPF_MOV64_IMM(BPF_REG_3, 7), + + /* sysctl_get_name arg4 (flags) */ + BPF_MOV64_IMM(BPF_REG_4, BPF_F_SYSCTL_BASE_NAME), + + /* sysctl_get_name(ctx, buf, buf_len, flags) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6), + + /* buf[0:7] == "tcp_me\0") */ + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x7463705f6d650000ULL)), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_get_name sysctl:full ok", + .insns = { + /* sysctl_get_name arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_name arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 17), + + /* sysctl_get_name arg4 (flags) */ + BPF_MOV64_IMM(BPF_REG_4, 0), + + /* sysctl_get_name(ctx, buf, buf_len, flags) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 16, 14), + + /* buf[0:8] == "net/ipv4" && */ + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x6e65742f69707634ULL)), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 10), + + /* buf[8:16] == "/tcp_mem" && */ + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x2f7463705f6d656dULL)), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6), + + /* buf[16:24] == "\0") */ + BPF_LD_IMM64(BPF_REG_8, 0x0ULL), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 16), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_get_name sysctl:full E2BIG truncated", + .insns = { + /* sysctl_get_name arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -16), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_name arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 16), + + /* sysctl_get_name arg4 (flags) */ + BPF_MOV64_IMM(BPF_REG_4, 0), + + /* sysctl_get_name(ctx, buf, buf_len, flags) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 10), + + /* buf[0:8] == "net/ipv4" && */ + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x6e65742f69707634ULL)), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6), + + /* buf[8:16] == "/tcp_me\0") */ + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x2f7463705f6d6500ULL)), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_get_name sysctl:full E2BIG truncated small", + .insns = { + /* sysctl_get_name arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_name arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 7), + + /* sysctl_get_name arg4 (flags) */ + BPF_MOV64_IMM(BPF_REG_4, 0), + + /* sysctl_get_name(ctx, buf, buf_len, flags) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6), + + /* buf[0:8] == "net/ip\0") */ + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x6e65742f69700000ULL)), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_get_current_value sysctl:read ok, gt", + .insns = { + /* sysctl_get_current_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_current_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 8), + + /* sysctl_get_current_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 6, 6), + + /* buf[0:6] == "Linux\n\0") */ + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x4c696e75780a0000ULL)), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_get_current_value sysctl:read ok, eq", + .insns = { + /* sysctl_get_current_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_B, BPF_REG_7, BPF_REG_0, 7), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_current_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 7), + + /* sysctl_get_current_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 6, 6), + + /* buf[0:6] == "Linux\n\0") */ + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x4c696e75780a0000ULL)), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_get_current_value sysctl:read E2BIG truncated", + .insns = { + /* sysctl_get_current_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_H, BPF_REG_7, BPF_REG_0, 6), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_current_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 6), + + /* sysctl_get_current_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6), + + /* buf[0:6] == "Linux\0") */ + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x4c696e7578000000ULL)), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_get_current_value sysctl:read EINVAL", + .insns = { + /* sysctl_get_current_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_current_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 8), + + /* sysctl_get_current_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 4), + + /* buf[0:8] is NUL-filled) */ + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2), + + /* return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + /* else return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv6/conf/lo/stable_secret", /* -EIO */ + .open_flags = O_RDONLY, + .result = OP_EPERM, + }, + { + .descr = "sysctl_get_current_value sysctl:write ok", + .fixup_value_insn = 6, + .insns = { + /* sysctl_get_current_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_current_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 8), + + /* sysctl_get_current_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, 6), + + /* buf[0:4] == expected) */ + BPF_LD_IMM64(BPF_REG_8, FIXUP_SYSCTL_VALUE), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + /* else return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_WRONLY, + .newval = "600", /* same as default, should fail anyway */ + .result = OP_EPERM, + }, + { + .descr = "sysctl_get_new_value sysctl:read EINVAL", + .insns = { + /* sysctl_get_new_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_new_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 8), + + /* sysctl_get_new_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value), + + /* if (ret == expected) */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_get_new_value sysctl:write ok", + .insns = { + /* sysctl_get_new_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_new_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 4), + + /* sysctl_get_new_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4), + + /* buf[0:4] == "606\0") */ + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, + bpf_ntohl(0x36303600), 2), + + /* return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + /* else return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_WRONLY, + .newval = "606", + .result = OP_EPERM, + }, + { + .descr = "sysctl_get_new_value sysctl:write ok long", + .insns = { + /* sysctl_get_new_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_new_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 24), + + /* sysctl_get_new_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 23, 14), + + /* buf[0:8] == "3000000 " && */ + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x3330303030303020ULL)), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 10), + + /* buf[8:16] == "4000000 " && */ + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x3430303030303020ULL)), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6), + + /* buf[16:24] == "6000000\0") */ + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x3630303030303000ULL)), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 16), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + /* else return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_WRONLY, + .newval = "3000000 4000000 6000000", + .result = OP_EPERM, + }, + { + .descr = "sysctl_get_new_value sysctl:write E2BIG", + .insns = { + /* sysctl_get_new_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_B, BPF_REG_7, BPF_REG_0, 3), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_new_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 3), + + /* sysctl_get_new_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 4), + + /* buf[0:3] == "60\0") */ + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, + bpf_ntohl(0x36300000), 2), + + /* return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + /* else return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_WRONLY, + .newval = "606", + .result = OP_EPERM, + }, + { + .descr = "sysctl_set_new_value sysctl:read EINVAL", + .insns = { + /* sysctl_set_new_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x36303000)), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_set_new_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 3), + + /* sysctl_set_new_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_set_new_value), + + /* if (ret == expected) */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_set_new_value sysctl:write ok", + .fixup_value_insn = 2, + .insns = { + /* sysctl_set_new_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_LD_IMM64(BPF_REG_0, FIXUP_SYSCTL_VALUE), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_set_new_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 3), + + /* sysctl_set_new_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_set_new_value), + + /* if (ret == expected) */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_WRONLY, + .newval = "606", + .result = SUCCESS, + }, + { + "bpf_strtoul one number string", + .insns = { + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x36303000)), + BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 4), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4), + /* res == expected) */ + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 600, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "bpf_strtoul multi number string", + .insns = { + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + /* "600 602\0" */ + BPF_LD_IMM64(BPF_REG_0, + bpf_be64_to_cpu(0x3630302036303200ULL)), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 8), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 18), + /* res == expected) */ + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 600, 16), + + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_0), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* arg4 (res) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -16), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, 4), + /* res == expected) */ + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 602, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "bpf_strtoul buf_len = 0, reject", + .insns = { + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x36303000)), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 0), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = LOAD_REJECT, + }, + { + "bpf_strtoul supported base, ok", + .insns = { + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x30373700)), + BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 4), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 8), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4), + /* res == expected) */ + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 63, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "bpf_strtoul unsupported base, EINVAL", + .insns = { + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x36303000)), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 4), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 3), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + /* if (ret == expected) */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "bpf_strtoul buf with spaces only, EINVAL", + .insns = { + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x0d0c0a09)), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 4), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + /* if (ret == expected) */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "bpf_strtoul negative number, EINVAL", + .insns = { + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + /* " -6\0" */ + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x0a2d3600)), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 4), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + /* if (ret == expected) */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "bpf_strtol negative number, ok", + .insns = { + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + /* " -6\0" */ + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x0a2d3600)), + BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 4), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 10), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtol), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4), + /* res == expected) */ + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, -6, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "bpf_strtol hex number, ok", + .insns = { + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + /* "0xfe" */ + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x30786665)), + BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 4), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtol), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, 4), + /* res == expected) */ + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 254, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "bpf_strtol max long", + .insns = { + /* arg1 (buf) 9223372036854775807 */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24), + BPF_LD_IMM64(BPF_REG_0, + bpf_be64_to_cpu(0x3932323333373230ULL)), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_LD_IMM64(BPF_REG_0, + bpf_be64_to_cpu(0x3336383534373735ULL)), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8), + BPF_LD_IMM64(BPF_REG_0, + bpf_be64_to_cpu(0x3830370000000000ULL)), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 19), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtol), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 19, 6), + /* res == expected) */ + BPF_LD_IMM64(BPF_REG_8, 0x7fffffffffffffffULL), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "bpf_strtol overflow, ERANGE", + .insns = { + /* arg1 (buf) 9223372036854775808 */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24), + BPF_LD_IMM64(BPF_REG_0, + bpf_be64_to_cpu(0x3932323333373230ULL)), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_LD_IMM64(BPF_REG_0, + bpf_be64_to_cpu(0x3336383534373735ULL)), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8), + BPF_LD_IMM64(BPF_REG_0, + bpf_be64_to_cpu(0x3830380000000000ULL)), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 19), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtol), + + /* if (ret == expected) */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -ERANGE, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "C prog: deny all writes", + .prog_file = "./test_sysctl_prog.bpf.o", + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_WRONLY, + .newval = "123 456 789", + .result = OP_EPERM, + }, + { + "C prog: deny access by name", + .prog_file = "./test_sysctl_prog.bpf.o", + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = OP_EPERM, + }, + { + "C prog: read tcp_mem", + .prog_file = "./test_sysctl_prog.bpf.o", + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, +}; + +static size_t probe_prog_length(const struct bpf_insn *fp) +{ + size_t len; + + for (len = MAX_INSNS - 1; len > 0; --len) + if (fp[len].code != 0 || fp[len].imm != 0) + break; + return len + 1; +} + +static int fixup_sysctl_value(const char *buf, size_t buf_len, + struct bpf_insn *prog, size_t insn_num) +{ + union { + uint8_t raw[sizeof(uint64_t)]; + uint64_t num; + } value = {}; + + if (buf_len > sizeof(value)) { + log_err("Value is too big (%zd) to use in fixup", buf_len); + return -1; + } + if (prog[insn_num].code != (BPF_LD | BPF_DW | BPF_IMM)) { + log_err("Can fixup only BPF_LD_IMM64 insns"); + return -1; + } + + memcpy(value.raw, buf, buf_len); + prog[insn_num].imm = (uint32_t)value.num; + prog[insn_num + 1].imm = (uint32_t)(value.num >> 32); + + return 0; +} + +static int load_sysctl_prog_insns(struct sysctl_test *test, + const char *sysctl_path) +{ + struct bpf_insn *prog = test->insns; + LIBBPF_OPTS(bpf_prog_load_opts, opts); + int ret, insn_cnt; + + insn_cnt = probe_prog_length(prog); + + if (test->fixup_value_insn) { + char buf[128]; + ssize_t len; + int fd; + + fd = open(sysctl_path, O_RDONLY | O_CLOEXEC); + if (fd < 0) { + log_err("open(%s) failed", sysctl_path); + return -1; + } + len = read(fd, buf, sizeof(buf)); + if (len == -1) { + log_err("read(%s) failed", sysctl_path); + close(fd); + return -1; + } + close(fd); + if (fixup_sysctl_value(buf, len, prog, test->fixup_value_insn)) + return -1; + } + + opts.log_buf = bpf_log_buf; + opts.log_size = BPF_LOG_BUF_SIZE; + + ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SYSCTL, NULL, "GPL", prog, insn_cnt, &opts); + if (ret < 0 && test->result != LOAD_REJECT) { + log_err(">>> Loading program error.\n" + ">>> Verifier output:\n%s\n-------\n", bpf_log_buf); + } + + return ret; +} + +static int load_sysctl_prog_file(struct sysctl_test *test) +{ + struct bpf_object *obj; + int prog_fd; + + if (bpf_prog_test_load(test->prog_file, BPF_PROG_TYPE_CGROUP_SYSCTL, &obj, &prog_fd)) { + if (test->result != LOAD_REJECT) + log_err(">>> Loading program (%s) error.\n", + test->prog_file); + return -1; + } + + return prog_fd; +} + +static int load_sysctl_prog(struct sysctl_test *test, const char *sysctl_path) +{ + return test->prog_file + ? load_sysctl_prog_file(test) + : load_sysctl_prog_insns(test, sysctl_path); +} + +static int access_sysctl(const char *sysctl_path, + const struct sysctl_test *test) +{ + int err = 0; + int fd; + + fd = open(sysctl_path, test->open_flags | O_CLOEXEC); + if (fd < 0) + return fd; + + if (test->seek && lseek(fd, test->seek, SEEK_SET) == -1) { + log_err("lseek(%d) failed", test->seek); + goto err; + } + + if (test->open_flags == O_RDONLY) { + char buf[128]; + + if (read(fd, buf, sizeof(buf)) == -1) + goto err; + if (test->oldval && + strncmp(buf, test->oldval, strlen(test->oldval))) { + log_err("Read value %s != %s", buf, test->oldval); + goto err; + } + } else if (test->open_flags == O_WRONLY) { + if (!test->newval) { + log_err("New value for sysctl is not set"); + goto err; + } + if (write(fd, test->newval, strlen(test->newval)) == -1) + goto err; + } else { + log_err("Unexpected sysctl access: neither read nor write"); + goto err; + } + + goto out; +err: + err = -1; +out: + close(fd); + return err; +} + +static int run_test_case(int cgfd, struct sysctl_test *test) +{ + enum bpf_attach_type atype = test->attach_type; + char sysctl_path[128]; + int progfd = -1; + int err = 0; + + printf("Test case: %s .. ", test->descr); + + snprintf(sysctl_path, sizeof(sysctl_path), "/proc/sys/%s", + test->sysctl); + + progfd = load_sysctl_prog(test, sysctl_path); + if (progfd < 0) { + if (test->result == LOAD_REJECT) + goto out; + else + goto err; + } + + if (bpf_prog_attach(progfd, cgfd, atype, BPF_F_ALLOW_OVERRIDE) < 0) { + if (test->result == ATTACH_REJECT) + goto out; + else + goto err; + } + + errno = 0; + if (access_sysctl(sysctl_path, test) == -1) { + if (test->result == OP_EPERM && errno == EPERM) + goto out; + else + goto err; + } + + if (test->result != SUCCESS) { + log_err("Unexpected success"); + goto err; + } + + goto out; +err: + err = -1; +out: + /* Detaching w/o checking return code: best effort attempt. */ + if (progfd != -1) + bpf_prog_detach(cgfd, atype); + close(progfd); + printf("[%s]\n", err ? "FAIL" : "PASS"); + return err; +} + +static int run_tests(int cgfd) +{ + int passes = 0; + int fails = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(tests); ++i) { + if (run_test_case(cgfd, &tests[i])) + ++fails; + else + ++passes; + } + printf("Summary: %d PASSED, %d FAILED\n", passes, fails); + return fails ? -1 : 0; +} + +void test_sysctl(void) +{ + int cgfd; + + cgfd = cgroup_setup_and_join(CG_PATH); + if (!ASSERT_OK_FD(cgfd < 0, "create_cgroup")) + goto out; + + if (!ASSERT_OK(run_tests(cgfd), "run_tests")) + goto out; + +out: + close(cgfd); + cleanup_cgroup_environment(); + return; +} diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c deleted file mode 100644 index bcdbd27f22f0..000000000000 --- a/tools/testing/selftests/bpf/test_sysctl.c +++ /dev/null @@ -1,1633 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright (c) 2019 Facebook - -#include -#include -#include -#include -#include -#include - -#include - -#include -#include - -#include -#include "bpf_util.h" -#include "cgroup_helpers.h" -#include "testing_helpers.h" - -#define CG_PATH "/foo" -#define MAX_INSNS 512 -#define FIXUP_SYSCTL_VALUE 0 - -char bpf_log_buf[BPF_LOG_BUF_SIZE]; - -struct sysctl_test { - const char *descr; - size_t fixup_value_insn; - struct bpf_insn insns[MAX_INSNS]; - const char *prog_file; - enum bpf_attach_type attach_type; - const char *sysctl; - int open_flags; - int seek; - const char *newval; - const char *oldval; - enum { - LOAD_REJECT, - ATTACH_REJECT, - OP_EPERM, - SUCCESS, - } result; -}; - -static struct sysctl_test tests[] = { - { - .descr = "sysctl wrong attach_type", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .attach_type = 0, - .sysctl = "kernel/ostype", - .open_flags = O_RDONLY, - .result = ATTACH_REJECT, - }, - { - .descr = "sysctl:read allow all", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "kernel/ostype", - .open_flags = O_RDONLY, - .result = SUCCESS, - }, - { - .descr = "sysctl:read deny all", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "kernel/ostype", - .open_flags = O_RDONLY, - .result = OP_EPERM, - }, - { - .descr = "ctx:write sysctl:read read ok", - .insns = { - /* If (write) */ - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct bpf_sysctl, write)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 1, 2), - - /* return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_A(1), - - /* else return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "kernel/ostype", - .open_flags = O_RDONLY, - .result = SUCCESS, - }, - { - .descr = "ctx:write sysctl:write read ok", - .insns = { - /* If (write) */ - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct bpf_sysctl, write)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 1, 2), - - /* return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_A(1), - - /* else return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "kernel/domainname", - .open_flags = O_WRONLY, - .newval = "(none)", /* same as default, should fail anyway */ - .result = OP_EPERM, - }, - { - .descr = "ctx:write sysctl:write read ok narrow", - .insns = { - /* u64 w = (u16)write & 1; */ -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_1, - offsetof(struct bpf_sysctl, write)), -#else - BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_1, - offsetof(struct bpf_sysctl, write) + 2), -#endif - BPF_ALU64_IMM(BPF_AND, BPF_REG_7, 1), - /* return 1 - w; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_7), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "kernel/domainname", - .open_flags = O_WRONLY, - .newval = "(none)", /* same as default, should fail anyway */ - .result = OP_EPERM, - }, - { - .descr = "ctx:write sysctl:read write reject", - .insns = { - /* write = X */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, - offsetof(struct bpf_sysctl, write)), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "kernel/ostype", - .open_flags = O_RDONLY, - .result = LOAD_REJECT, - }, - { - .descr = "ctx:file_pos sysctl:read read ok", - .insns = { - /* If (file_pos == X) */ - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct bpf_sysctl, file_pos)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 3, 2), - - /* return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_A(1), - - /* else return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "kernel/ostype", - .open_flags = O_RDONLY, - .seek = 3, - .result = SUCCESS, - }, - { - .descr = "ctx:file_pos sysctl:read read ok narrow", - .insns = { - /* If (file_pos == X) */ -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1, - offsetof(struct bpf_sysctl, file_pos)), -#else - BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1, - offsetof(struct bpf_sysctl, file_pos) + 3), -#endif - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 4, 2), - - /* return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_A(1), - - /* else return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "kernel/ostype", - .open_flags = O_RDONLY, - .seek = 4, - .result = SUCCESS, - }, - { - .descr = "ctx:file_pos sysctl:read write ok", - .insns = { - /* file_pos = X */ - BPF_MOV64_IMM(BPF_REG_0, 2), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, - offsetof(struct bpf_sysctl, file_pos)), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "kernel/ostype", - .open_flags = O_RDONLY, - .oldval = "nux\n", - .result = SUCCESS, - }, - { - .descr = "sysctl_get_name sysctl_value:base ok", - .insns = { - /* sysctl_get_name arg2 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - - BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), - - /* sysctl_get_name arg3 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_3, 8), - - /* sysctl_get_name arg4 (flags) */ - BPF_MOV64_IMM(BPF_REG_4, BPF_F_SYSCTL_BASE_NAME), - - /* sysctl_get_name(ctx, buf, buf_len, flags) */ - BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name), - - /* if (ret == expected && */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, sizeof("tcp_mem") - 1, 6), - /* buf == "tcp_mem\0") */ - BPF_LD_IMM64(BPF_REG_8, - bpf_be64_to_cpu(0x7463705f6d656d00ULL)), - BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), - BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), - - /* return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_A(1), - - /* else return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "net/ipv4/tcp_mem", - .open_flags = O_RDONLY, - .result = SUCCESS, - }, - { - .descr = "sysctl_get_name sysctl_value:base E2BIG truncated", - .insns = { - /* sysctl_get_name arg2 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - - BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), - - /* sysctl_get_name arg3 (buf_len) too small */ - BPF_MOV64_IMM(BPF_REG_3, 7), - - /* sysctl_get_name arg4 (flags) */ - BPF_MOV64_IMM(BPF_REG_4, BPF_F_SYSCTL_BASE_NAME), - - /* sysctl_get_name(ctx, buf, buf_len, flags) */ - BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name), - - /* if (ret == expected && */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6), - - /* buf[0:7] == "tcp_me\0") */ - BPF_LD_IMM64(BPF_REG_8, - bpf_be64_to_cpu(0x7463705f6d650000ULL)), - BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), - BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), - - /* return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_A(1), - - /* else return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "net/ipv4/tcp_mem", - .open_flags = O_RDONLY, - .result = SUCCESS, - }, - { - .descr = "sysctl_get_name sysctl:full ok", - .insns = { - /* sysctl_get_name arg2 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16), - - BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), - - /* sysctl_get_name arg3 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_3, 17), - - /* sysctl_get_name arg4 (flags) */ - BPF_MOV64_IMM(BPF_REG_4, 0), - - /* sysctl_get_name(ctx, buf, buf_len, flags) */ - BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name), - - /* if (ret == expected && */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 16, 14), - - /* buf[0:8] == "net/ipv4" && */ - BPF_LD_IMM64(BPF_REG_8, - bpf_be64_to_cpu(0x6e65742f69707634ULL)), - BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), - BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 10), - - /* buf[8:16] == "/tcp_mem" && */ - BPF_LD_IMM64(BPF_REG_8, - bpf_be64_to_cpu(0x2f7463705f6d656dULL)), - BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8), - BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6), - - /* buf[16:24] == "\0") */ - BPF_LD_IMM64(BPF_REG_8, 0x0ULL), - BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 16), - BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), - - /* return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_A(1), - - /* else return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "net/ipv4/tcp_mem", - .open_flags = O_RDONLY, - .result = SUCCESS, - }, - { - .descr = "sysctl_get_name sysctl:full E2BIG truncated", - .insns = { - /* sysctl_get_name arg2 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -16), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8), - - BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), - - /* sysctl_get_name arg3 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_3, 16), - - /* sysctl_get_name arg4 (flags) */ - BPF_MOV64_IMM(BPF_REG_4, 0), - - /* sysctl_get_name(ctx, buf, buf_len, flags) */ - BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name), - - /* if (ret == expected && */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 10), - - /* buf[0:8] == "net/ipv4" && */ - BPF_LD_IMM64(BPF_REG_8, - bpf_be64_to_cpu(0x6e65742f69707634ULL)), - BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), - BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6), - - /* buf[8:16] == "/tcp_me\0") */ - BPF_LD_IMM64(BPF_REG_8, - bpf_be64_to_cpu(0x2f7463705f6d6500ULL)), - BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8), - BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), - - /* return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_A(1), - - /* else return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "net/ipv4/tcp_mem", - .open_flags = O_RDONLY, - .result = SUCCESS, - }, - { - .descr = "sysctl_get_name sysctl:full E2BIG truncated small", - .insns = { - /* sysctl_get_name arg2 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - - BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), - - /* sysctl_get_name arg3 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_3, 7), - - /* sysctl_get_name arg4 (flags) */ - BPF_MOV64_IMM(BPF_REG_4, 0), - - /* sysctl_get_name(ctx, buf, buf_len, flags) */ - BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name), - - /* if (ret == expected && */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6), - - /* buf[0:8] == "net/ip\0") */ - BPF_LD_IMM64(BPF_REG_8, - bpf_be64_to_cpu(0x6e65742f69700000ULL)), - BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), - BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), - - /* return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_A(1), - - /* else return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "net/ipv4/tcp_mem", - .open_flags = O_RDONLY, - .result = SUCCESS, - }, - { - .descr = "sysctl_get_current_value sysctl:read ok, gt", - .insns = { - /* sysctl_get_current_value arg2 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), - - /* sysctl_get_current_value arg3 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_3, 8), - - /* sysctl_get_current_value(ctx, buf, buf_len) */ - BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value), - - /* if (ret == expected && */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 6, 6), - - /* buf[0:6] == "Linux\n\0") */ - BPF_LD_IMM64(BPF_REG_8, - bpf_be64_to_cpu(0x4c696e75780a0000ULL)), - BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), - BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), - - /* return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_A(1), - - /* else return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "kernel/ostype", - .open_flags = O_RDONLY, - .result = SUCCESS, - }, - { - .descr = "sysctl_get_current_value sysctl:read ok, eq", - .insns = { - /* sysctl_get_current_value arg2 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_B, BPF_REG_7, BPF_REG_0, 7), - - BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), - - /* sysctl_get_current_value arg3 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_3, 7), - - /* sysctl_get_current_value(ctx, buf, buf_len) */ - BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value), - - /* if (ret == expected && */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 6, 6), - - /* buf[0:6] == "Linux\n\0") */ - BPF_LD_IMM64(BPF_REG_8, - bpf_be64_to_cpu(0x4c696e75780a0000ULL)), - BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), - BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), - - /* return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_A(1), - - /* else return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "kernel/ostype", - .open_flags = O_RDONLY, - .result = SUCCESS, - }, - { - .descr = "sysctl_get_current_value sysctl:read E2BIG truncated", - .insns = { - /* sysctl_get_current_value arg2 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_H, BPF_REG_7, BPF_REG_0, 6), - - BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), - - /* sysctl_get_current_value arg3 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_3, 6), - - /* sysctl_get_current_value(ctx, buf, buf_len) */ - BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value), - - /* if (ret == expected && */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6), - - /* buf[0:6] == "Linux\0") */ - BPF_LD_IMM64(BPF_REG_8, - bpf_be64_to_cpu(0x4c696e7578000000ULL)), - BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), - BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), - - /* return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_A(1), - - /* else return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "kernel/ostype", - .open_flags = O_RDONLY, - .result = SUCCESS, - }, - { - .descr = "sysctl_get_current_value sysctl:read EINVAL", - .insns = { - /* sysctl_get_current_value arg2 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - - BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), - - /* sysctl_get_current_value arg3 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_3, 8), - - /* sysctl_get_current_value(ctx, buf, buf_len) */ - BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value), - - /* if (ret == expected && */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 4), - - /* buf[0:8] is NUL-filled) */ - BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2), - - /* return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_A(1), - - /* else return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "net/ipv6/conf/lo/stable_secret", /* -EIO */ - .open_flags = O_RDONLY, - .result = OP_EPERM, - }, - { - .descr = "sysctl_get_current_value sysctl:write ok", - .fixup_value_insn = 6, - .insns = { - /* sysctl_get_current_value arg2 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - - BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), - - /* sysctl_get_current_value arg3 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_3, 8), - - /* sysctl_get_current_value(ctx, buf, buf_len) */ - BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value), - - /* if (ret == expected && */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, 6), - - /* buf[0:4] == expected) */ - BPF_LD_IMM64(BPF_REG_8, FIXUP_SYSCTL_VALUE), - BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), - BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), - - /* return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_A(1), - - /* else return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "net/ipv4/route/mtu_expires", - .open_flags = O_WRONLY, - .newval = "600", /* same as default, should fail anyway */ - .result = OP_EPERM, - }, - { - .descr = "sysctl_get_new_value sysctl:read EINVAL", - .insns = { - /* sysctl_get_new_value arg2 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - - BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), - - /* sysctl_get_new_value arg3 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_3, 8), - - /* sysctl_get_new_value(ctx, buf, buf_len) */ - BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value), - - /* if (ret == expected) */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2), - - /* return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_A(1), - - /* else return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "net/ipv4/tcp_mem", - .open_flags = O_RDONLY, - .result = SUCCESS, - }, - { - .descr = "sysctl_get_new_value sysctl:write ok", - .insns = { - /* sysctl_get_new_value arg2 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - - BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), - - /* sysctl_get_new_value arg3 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_3, 4), - - /* sysctl_get_new_value(ctx, buf, buf_len) */ - BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value), - - /* if (ret == expected && */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4), - - /* buf[0:4] == "606\0") */ - BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_7, 0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_9, - bpf_ntohl(0x36303600), 2), - - /* return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_A(1), - - /* else return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "net/ipv4/route/mtu_expires", - .open_flags = O_WRONLY, - .newval = "606", - .result = OP_EPERM, - }, - { - .descr = "sysctl_get_new_value sysctl:write ok long", - .insns = { - /* sysctl_get_new_value arg2 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24), - - BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), - - /* sysctl_get_new_value arg3 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_3, 24), - - /* sysctl_get_new_value(ctx, buf, buf_len) */ - BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value), - - /* if (ret == expected && */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 23, 14), - - /* buf[0:8] == "3000000 " && */ - BPF_LD_IMM64(BPF_REG_8, - bpf_be64_to_cpu(0x3330303030303020ULL)), - BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), - BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 10), - - /* buf[8:16] == "4000000 " && */ - BPF_LD_IMM64(BPF_REG_8, - bpf_be64_to_cpu(0x3430303030303020ULL)), - BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8), - BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6), - - /* buf[16:24] == "6000000\0") */ - BPF_LD_IMM64(BPF_REG_8, - bpf_be64_to_cpu(0x3630303030303000ULL)), - BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 16), - BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), - - /* return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_A(1), - - /* else return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "net/ipv4/tcp_mem", - .open_flags = O_WRONLY, - .newval = "3000000 4000000 6000000", - .result = OP_EPERM, - }, - { - .descr = "sysctl_get_new_value sysctl:write E2BIG", - .insns = { - /* sysctl_get_new_value arg2 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_B, BPF_REG_7, BPF_REG_0, 3), - - BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), - - /* sysctl_get_new_value arg3 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_3, 3), - - /* sysctl_get_new_value(ctx, buf, buf_len) */ - BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value), - - /* if (ret == expected && */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 4), - - /* buf[0:3] == "60\0") */ - BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_7, 0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_9, - bpf_ntohl(0x36300000), 2), - - /* return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_A(1), - - /* else return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "net/ipv4/route/mtu_expires", - .open_flags = O_WRONLY, - .newval = "606", - .result = OP_EPERM, - }, - { - .descr = "sysctl_set_new_value sysctl:read EINVAL", - .insns = { - /* sysctl_set_new_value arg2 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, - bpf_ntohl(0x36303000)), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - - BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), - - /* sysctl_set_new_value arg3 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_3, 3), - - /* sysctl_set_new_value(ctx, buf, buf_len) */ - BPF_EMIT_CALL(BPF_FUNC_sysctl_set_new_value), - - /* if (ret == expected) */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2), - - /* return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_A(1), - - /* else return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "net/ipv4/route/mtu_expires", - .open_flags = O_RDONLY, - .result = SUCCESS, - }, - { - .descr = "sysctl_set_new_value sysctl:write ok", - .fixup_value_insn = 2, - .insns = { - /* sysctl_set_new_value arg2 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_LD_IMM64(BPF_REG_0, FIXUP_SYSCTL_VALUE), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - - BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), - - /* sysctl_set_new_value arg3 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_3, 3), - - /* sysctl_set_new_value(ctx, buf, buf_len) */ - BPF_EMIT_CALL(BPF_FUNC_sysctl_set_new_value), - - /* if (ret == expected) */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), - - /* return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_A(1), - - /* else return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "net/ipv4/route/mtu_expires", - .open_flags = O_WRONLY, - .newval = "606", - .result = SUCCESS, - }, - { - "bpf_strtoul one number string", - .insns = { - /* arg1 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, - bpf_ntohl(0x36303000)), - BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0), - - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - - /* arg2 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_2, 4), - - /* arg3 (flags) */ - BPF_MOV64_IMM(BPF_REG_3, 0), - - /* arg4 (res) */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), - - BPF_EMIT_CALL(BPF_FUNC_strtoul), - - /* if (ret == expected && */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4), - /* res == expected) */ - BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 600, 2), - - /* return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_A(1), - - /* else return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "net/ipv4/route/mtu_expires", - .open_flags = O_RDONLY, - .result = SUCCESS, - }, - { - "bpf_strtoul multi number string", - .insns = { - /* arg1 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - /* "600 602\0" */ - BPF_LD_IMM64(BPF_REG_0, - bpf_be64_to_cpu(0x3630302036303200ULL)), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - - /* arg2 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_2, 8), - - /* arg3 (flags) */ - BPF_MOV64_IMM(BPF_REG_3, 0), - - /* arg4 (res) */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), - - BPF_EMIT_CALL(BPF_FUNC_strtoul), - - /* if (ret == expected && */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 18), - /* res == expected) */ - BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 600, 16), - - /* arg1 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - - /* arg2 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_2, 8), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_0), - - /* arg3 (flags) */ - BPF_MOV64_IMM(BPF_REG_3, 0), - - /* arg4 (res) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -16), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), - - BPF_EMIT_CALL(BPF_FUNC_strtoul), - - /* if (ret == expected && */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, 4), - /* res == expected) */ - BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 602, 2), - - /* return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_A(1), - - /* else return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "net/ipv4/tcp_mem", - .open_flags = O_RDONLY, - .result = SUCCESS, - }, - { - "bpf_strtoul buf_len = 0, reject", - .insns = { - /* arg1 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, - bpf_ntohl(0x36303000)), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - - /* arg2 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_2, 0), - - /* arg3 (flags) */ - BPF_MOV64_IMM(BPF_REG_3, 0), - - /* arg4 (res) */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), - - BPF_EMIT_CALL(BPF_FUNC_strtoul), - - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "net/ipv4/route/mtu_expires", - .open_flags = O_RDONLY, - .result = LOAD_REJECT, - }, - { - "bpf_strtoul supported base, ok", - .insns = { - /* arg1 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, - bpf_ntohl(0x30373700)), - BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0), - - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - - /* arg2 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_2, 4), - - /* arg3 (flags) */ - BPF_MOV64_IMM(BPF_REG_3, 8), - - /* arg4 (res) */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), - - BPF_EMIT_CALL(BPF_FUNC_strtoul), - - /* if (ret == expected && */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4), - /* res == expected) */ - BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 63, 2), - - /* return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_A(1), - - /* else return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "net/ipv4/route/mtu_expires", - .open_flags = O_RDONLY, - .result = SUCCESS, - }, - { - "bpf_strtoul unsupported base, EINVAL", - .insns = { - /* arg1 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, - bpf_ntohl(0x36303000)), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - - /* arg2 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_2, 4), - - /* arg3 (flags) */ - BPF_MOV64_IMM(BPF_REG_3, 3), - - /* arg4 (res) */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), - - BPF_EMIT_CALL(BPF_FUNC_strtoul), - - /* if (ret == expected) */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2), - - /* return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_A(1), - - /* else return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "net/ipv4/route/mtu_expires", - .open_flags = O_RDONLY, - .result = SUCCESS, - }, - { - "bpf_strtoul buf with spaces only, EINVAL", - .insns = { - /* arg1 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, - bpf_ntohl(0x0d0c0a09)), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - - /* arg2 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_2, 4), - - /* arg3 (flags) */ - BPF_MOV64_IMM(BPF_REG_3, 0), - - /* arg4 (res) */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), - - BPF_EMIT_CALL(BPF_FUNC_strtoul), - - /* if (ret == expected) */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2), - - /* return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_A(1), - - /* else return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "net/ipv4/route/mtu_expires", - .open_flags = O_RDONLY, - .result = SUCCESS, - }, - { - "bpf_strtoul negative number, EINVAL", - .insns = { - /* arg1 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - /* " -6\0" */ - BPF_MOV64_IMM(BPF_REG_0, - bpf_ntohl(0x0a2d3600)), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - - /* arg2 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_2, 4), - - /* arg3 (flags) */ - BPF_MOV64_IMM(BPF_REG_3, 0), - - /* arg4 (res) */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), - - BPF_EMIT_CALL(BPF_FUNC_strtoul), - - /* if (ret == expected) */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2), - - /* return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_A(1), - - /* else return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "net/ipv4/route/mtu_expires", - .open_flags = O_RDONLY, - .result = SUCCESS, - }, - { - "bpf_strtol negative number, ok", - .insns = { - /* arg1 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - /* " -6\0" */ - BPF_MOV64_IMM(BPF_REG_0, - bpf_ntohl(0x0a2d3600)), - BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0), - - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - - /* arg2 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_2, 4), - - /* arg3 (flags) */ - BPF_MOV64_IMM(BPF_REG_3, 10), - - /* arg4 (res) */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), - - BPF_EMIT_CALL(BPF_FUNC_strtol), - - /* if (ret == expected && */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4), - /* res == expected) */ - BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_9, -6, 2), - - /* return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_A(1), - - /* else return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "net/ipv4/route/mtu_expires", - .open_flags = O_RDONLY, - .result = SUCCESS, - }, - { - "bpf_strtol hex number, ok", - .insns = { - /* arg1 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - /* "0xfe" */ - BPF_MOV64_IMM(BPF_REG_0, - bpf_ntohl(0x30786665)), - BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0), - - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - - /* arg2 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_2, 4), - - /* arg3 (flags) */ - BPF_MOV64_IMM(BPF_REG_3, 0), - - /* arg4 (res) */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), - - BPF_EMIT_CALL(BPF_FUNC_strtol), - - /* if (ret == expected && */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, 4), - /* res == expected) */ - BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 254, 2), - - /* return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_A(1), - - /* else return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "net/ipv4/route/mtu_expires", - .open_flags = O_RDONLY, - .result = SUCCESS, - }, - { - "bpf_strtol max long", - .insns = { - /* arg1 (buf) 9223372036854775807 */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24), - BPF_LD_IMM64(BPF_REG_0, - bpf_be64_to_cpu(0x3932323333373230ULL)), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_LD_IMM64(BPF_REG_0, - bpf_be64_to_cpu(0x3336383534373735ULL)), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8), - BPF_LD_IMM64(BPF_REG_0, - bpf_be64_to_cpu(0x3830370000000000ULL)), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16), - - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - - /* arg2 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_2, 19), - - /* arg3 (flags) */ - BPF_MOV64_IMM(BPF_REG_3, 0), - - /* arg4 (res) */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), - - BPF_EMIT_CALL(BPF_FUNC_strtol), - - /* if (ret == expected && */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 19, 6), - /* res == expected) */ - BPF_LD_IMM64(BPF_REG_8, 0x7fffffffffffffffULL), - BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), - BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), - - /* return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_A(1), - - /* else return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "net/ipv4/route/mtu_expires", - .open_flags = O_RDONLY, - .result = SUCCESS, - }, - { - "bpf_strtol overflow, ERANGE", - .insns = { - /* arg1 (buf) 9223372036854775808 */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24), - BPF_LD_IMM64(BPF_REG_0, - bpf_be64_to_cpu(0x3932323333373230ULL)), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_LD_IMM64(BPF_REG_0, - bpf_be64_to_cpu(0x3336383534373735ULL)), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8), - BPF_LD_IMM64(BPF_REG_0, - bpf_be64_to_cpu(0x3830380000000000ULL)), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16), - - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - - /* arg2 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_2, 19), - - /* arg3 (flags) */ - BPF_MOV64_IMM(BPF_REG_3, 0), - - /* arg4 (res) */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), - - BPF_EMIT_CALL(BPF_FUNC_strtol), - - /* if (ret == expected) */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -ERANGE, 2), - - /* return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_A(1), - - /* else return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "net/ipv4/route/mtu_expires", - .open_flags = O_RDONLY, - .result = SUCCESS, - }, - { - "C prog: deny all writes", - .prog_file = "./test_sysctl_prog.bpf.o", - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "net/ipv4/tcp_mem", - .open_flags = O_WRONLY, - .newval = "123 456 789", - .result = OP_EPERM, - }, - { - "C prog: deny access by name", - .prog_file = "./test_sysctl_prog.bpf.o", - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "net/ipv4/route/mtu_expires", - .open_flags = O_RDONLY, - .result = OP_EPERM, - }, - { - "C prog: read tcp_mem", - .prog_file = "./test_sysctl_prog.bpf.o", - .attach_type = BPF_CGROUP_SYSCTL, - .sysctl = "net/ipv4/tcp_mem", - .open_flags = O_RDONLY, - .result = SUCCESS, - }, -}; - -static size_t probe_prog_length(const struct bpf_insn *fp) -{ - size_t len; - - for (len = MAX_INSNS - 1; len > 0; --len) - if (fp[len].code != 0 || fp[len].imm != 0) - break; - return len + 1; -} - -static int fixup_sysctl_value(const char *buf, size_t buf_len, - struct bpf_insn *prog, size_t insn_num) -{ - union { - uint8_t raw[sizeof(uint64_t)]; - uint64_t num; - } value = {}; - - if (buf_len > sizeof(value)) { - log_err("Value is too big (%zd) to use in fixup", buf_len); - return -1; - } - if (prog[insn_num].code != (BPF_LD | BPF_DW | BPF_IMM)) { - log_err("Can fixup only BPF_LD_IMM64 insns"); - return -1; - } - - memcpy(value.raw, buf, buf_len); - prog[insn_num].imm = (uint32_t)value.num; - prog[insn_num + 1].imm = (uint32_t)(value.num >> 32); - - return 0; -} - -static int load_sysctl_prog_insns(struct sysctl_test *test, - const char *sysctl_path) -{ - struct bpf_insn *prog = test->insns; - LIBBPF_OPTS(bpf_prog_load_opts, opts); - int ret, insn_cnt; - - insn_cnt = probe_prog_length(prog); - - if (test->fixup_value_insn) { - char buf[128]; - ssize_t len; - int fd; - - fd = open(sysctl_path, O_RDONLY | O_CLOEXEC); - if (fd < 0) { - log_err("open(%s) failed", sysctl_path); - return -1; - } - len = read(fd, buf, sizeof(buf)); - if (len == -1) { - log_err("read(%s) failed", sysctl_path); - close(fd); - return -1; - } - close(fd); - if (fixup_sysctl_value(buf, len, prog, test->fixup_value_insn)) - return -1; - } - - opts.log_buf = bpf_log_buf; - opts.log_size = BPF_LOG_BUF_SIZE; - - ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SYSCTL, NULL, "GPL", prog, insn_cnt, &opts); - if (ret < 0 && test->result != LOAD_REJECT) { - log_err(">>> Loading program error.\n" - ">>> Verifier output:\n%s\n-------\n", bpf_log_buf); - } - - return ret; -} - -static int load_sysctl_prog_file(struct sysctl_test *test) -{ - struct bpf_object *obj; - int prog_fd; - - if (bpf_prog_test_load(test->prog_file, BPF_PROG_TYPE_CGROUP_SYSCTL, &obj, &prog_fd)) { - if (test->result != LOAD_REJECT) - log_err(">>> Loading program (%s) error.\n", - test->prog_file); - return -1; - } - - return prog_fd; -} - -static int load_sysctl_prog(struct sysctl_test *test, const char *sysctl_path) -{ - return test->prog_file - ? load_sysctl_prog_file(test) - : load_sysctl_prog_insns(test, sysctl_path); -} - -static int access_sysctl(const char *sysctl_path, - const struct sysctl_test *test) -{ - int err = 0; - int fd; - - fd = open(sysctl_path, test->open_flags | O_CLOEXEC); - if (fd < 0) - return fd; - - if (test->seek && lseek(fd, test->seek, SEEK_SET) == -1) { - log_err("lseek(%d) failed", test->seek); - goto err; - } - - if (test->open_flags == O_RDONLY) { - char buf[128]; - - if (read(fd, buf, sizeof(buf)) == -1) - goto err; - if (test->oldval && - strncmp(buf, test->oldval, strlen(test->oldval))) { - log_err("Read value %s != %s", buf, test->oldval); - goto err; - } - } else if (test->open_flags == O_WRONLY) { - if (!test->newval) { - log_err("New value for sysctl is not set"); - goto err; - } - if (write(fd, test->newval, strlen(test->newval)) == -1) - goto err; - } else { - log_err("Unexpected sysctl access: neither read nor write"); - goto err; - } - - goto out; -err: - err = -1; -out: - close(fd); - return err; -} - -static int run_test_case(int cgfd, struct sysctl_test *test) -{ - enum bpf_attach_type atype = test->attach_type; - char sysctl_path[128]; - int progfd = -1; - int err = 0; - - printf("Test case: %s .. ", test->descr); - - snprintf(sysctl_path, sizeof(sysctl_path), "/proc/sys/%s", - test->sysctl); - - progfd = load_sysctl_prog(test, sysctl_path); - if (progfd < 0) { - if (test->result == LOAD_REJECT) - goto out; - else - goto err; - } - - if (bpf_prog_attach(progfd, cgfd, atype, BPF_F_ALLOW_OVERRIDE) < 0) { - if (test->result == ATTACH_REJECT) - goto out; - else - goto err; - } - - errno = 0; - if (access_sysctl(sysctl_path, test) == -1) { - if (test->result == OP_EPERM && errno == EPERM) - goto out; - else - goto err; - } - - if (test->result != SUCCESS) { - log_err("Unexpected success"); - goto err; - } - - goto out; -err: - err = -1; -out: - /* Detaching w/o checking return code: best effort attempt. */ - if (progfd != -1) - bpf_prog_detach(cgfd, atype); - close(progfd); - printf("[%s]\n", err ? "FAIL" : "PASS"); - return err; -} - -static int run_tests(int cgfd) -{ - int passes = 0; - int fails = 0; - int i; - - for (i = 0; i < ARRAY_SIZE(tests); ++i) { - if (run_test_case(cgfd, &tests[i])) - ++fails; - else - ++passes; - } - printf("Summary: %d PASSED, %d FAILED\n", passes, fails); - return fails ? -1 : 0; -} - -int main(int argc, char **argv) -{ - int cgfd = -1; - int err = 0; - - cgfd = cgroup_setup_and_join(CG_PATH); - if (cgfd < 0) - goto err; - - /* Use libbpf 1.0 API mode */ - libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - - if (run_tests(cgfd)) - goto err; - - goto out; -err: - err = -1; -out: - close(cgfd); - cleanup_cgroup_environment(); - return err; -} -- cgit v1.2.3 From e1ca44e85f652a6ebd657c67c394894c1fdfb403 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 18 Jun 2025 21:13:56 -0700 Subject: af_unix: Add test for consecutive consumed OOB. Let's add a test case where consecutive concumed OOB skbs stay at the head of the queue. Without the previous patch, ioctl(SIOCATMARK) assertion fails. Before: # RUN msg_oob.no_peek.ex_oob_ex_oob_oob ... # msg_oob.c:305:ex_oob_ex_oob_oob:Expected answ[0] (0) == oob_head (1) # ex_oob_ex_oob_oob: Test terminated by assertion # FAIL msg_oob.no_peek.ex_oob_ex_oob_oob not ok 12 msg_oob.no_peek.ex_oob_ex_oob_oob After: # RUN msg_oob.no_peek.ex_oob_ex_oob_oob ... # OK msg_oob.no_peek.ex_oob_ex_oob_oob ok 12 msg_oob.no_peek.ex_oob_ex_oob_oob Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250619041457.1132791-3-kuni1840@gmail.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/af_unix/msg_oob.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c index 3ed3882a93b8..918509a3f040 100644 --- a/tools/testing/selftests/net/af_unix/msg_oob.c +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -548,6 +548,29 @@ TEST_F(msg_oob, ex_oob_oob) siocatmarkpair(false); } +TEST_F(msg_oob, ex_oob_ex_oob_oob) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + sendpair("z", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); +} + TEST_F(msg_oob, ex_oob_ahead_break) { sendpair("hello", 5, MSG_OOB); -- cgit v1.2.3 From 632f55fa60c481035297739ecd374d945c9b32c7 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 18 Jun 2025 21:13:58 -0700 Subject: selftest: af_unix: Add tests for -ECONNRESET. A new function resetpair() calls close() for the receiver and checks the return value from recv() on the initial sender side. Now resetpair() is added to each test case and some additional test cases. Note that TCP sets -ECONNRESET to the consumed OOB, but we have decided not to touch TCP MSG_OOB code in the past. Before: # RUN msg_oob.no_peek.ex_oob_ex_oob ... # msg_oob.c:236:ex_oob_ex_oob:AF_UNIX :Connection reset by peer # msg_oob.c:237:ex_oob_ex_oob:Expected: # msg_oob.c:239:ex_oob_ex_oob:Expected ret[0] (-1) == expected_len (0) # ex_oob_ex_oob: Test terminated by assertion # FAIL msg_oob.no_peek.ex_oob_ex_oob not ok 14 msg_oob.no_peek.ex_oob_ex_oob ... # FAILED: 36 / 48 tests passed. # Totals: pass:36 fail:12 xfail:0 xpass:0 skip:0 error:0 After: # RUN msg_oob.no_peek.ex_oob_ex_oob ... # msg_oob.c:244:ex_oob_ex_oob:AF_UNIX : # msg_oob.c:245:ex_oob_ex_oob:TCP :Connection reset by peer # OK msg_oob.no_peek.ex_oob_ex_oob ok 14 msg_oob.no_peek.ex_oob_ex_oob ... # PASSED: 48 / 48 tests passed. # Totals: pass:48 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250619041457.1132791-5-kuni1840@gmail.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/af_unix/msg_oob.c | 119 +++++++++++++++++++++++++- 1 file changed, 115 insertions(+), 4 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c index 918509a3f040..b5f474969917 100644 --- a/tools/testing/selftests/net/af_unix/msg_oob.c +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -210,7 +210,7 @@ static void __sendpair(struct __test_metadata *_metadata, static void __recvpair(struct __test_metadata *_metadata, FIXTURE_DATA(msg_oob) *self, const char *expected_buf, int expected_len, - int buf_len, int flags) + int buf_len, int flags, bool is_sender) { int i, ret[2], recv_errno[2], expected_errno = 0; char recv_buf[2][BUF_SZ] = {}; @@ -221,7 +221,9 @@ static void __recvpair(struct __test_metadata *_metadata, errno = 0; for (i = 0; i < 2; i++) { - ret[i] = recv(self->fd[i * 2 + 1], recv_buf[i], buf_len, flags); + int index = is_sender ? i * 2 : i * 2 + 1; + + ret[i] = recv(self->fd[index], recv_buf[i], buf_len, flags); recv_errno[i] = errno; } @@ -308,6 +310,20 @@ static void __siocatmarkpair(struct __test_metadata *_metadata, ASSERT_EQ(answ[0], answ[1]); } +static void __resetpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + const FIXTURE_VARIANT(msg_oob) *variant, + bool reset) +{ + int i; + + for (i = 0; i < 2; i++) + close(self->fd[i * 2 + 1]); + + __recvpair(_metadata, self, "", reset ? -ECONNRESET : 0, 1, + variant->peek ? MSG_PEEK : 0, true); +} + #define sendpair(buf, len, flags) \ __sendpair(_metadata, self, buf, len, flags) @@ -316,9 +332,10 @@ static void __siocatmarkpair(struct __test_metadata *_metadata, if (variant->peek) \ __recvpair(_metadata, self, \ expected_buf, expected_len, \ - buf_len, (flags) | MSG_PEEK); \ + buf_len, (flags) | MSG_PEEK, false); \ __recvpair(_metadata, self, \ - expected_buf, expected_len, buf_len, flags); \ + expected_buf, expected_len, \ + buf_len, flags, false); \ } while (0) #define epollpair(oob_remaining) \ @@ -330,6 +347,9 @@ static void __siocatmarkpair(struct __test_metadata *_metadata, #define setinlinepair() \ __setinlinepair(_metadata, self) +#define resetpair(reset) \ + __resetpair(_metadata, self, variant, reset) + #define tcp_incompliant \ for (self->tcp_compliant = false; \ self->tcp_compliant == false; \ @@ -344,6 +364,21 @@ TEST_F(msg_oob, non_oob) recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); siocatmarkpair(false); + + resetpair(true); +} + +TEST_F(msg_oob, non_oob_no_reset) +{ + sendpair("x", 1, 0); + epollpair(false); + siocatmarkpair(false); + + recvpair("x", 1, 1, 0); + epollpair(false); + siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, oob) @@ -355,6 +390,19 @@ TEST_F(msg_oob, oob) recvpair("x", 1, 1, MSG_OOB); epollpair(false); siocatmarkpair(true); + + tcp_incompliant { + resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */ + } +} + +TEST_F(msg_oob, oob_reset) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + resetpair(true); } TEST_F(msg_oob, oob_drop) @@ -370,6 +418,8 @@ TEST_F(msg_oob, oob_drop) recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, oob_ahead) @@ -385,6 +435,10 @@ TEST_F(msg_oob, oob_ahead) recvpair("hell", 4, 4, 0); epollpair(false); siocatmarkpair(true); + + tcp_incompliant { + resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */ + } } TEST_F(msg_oob, oob_break) @@ -403,6 +457,8 @@ TEST_F(msg_oob, oob_break) recvpair("", -EAGAIN, 1, 0); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, oob_ahead_break) @@ -426,6 +482,8 @@ TEST_F(msg_oob, oob_ahead_break) recvpair("world", 5, 5, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, oob_break_drop) @@ -449,6 +507,8 @@ TEST_F(msg_oob, oob_break_drop) recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, ex_oob_break) @@ -476,6 +536,8 @@ TEST_F(msg_oob, ex_oob_break) recvpair("ld", 2, 2, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, ex_oob_drop) @@ -498,6 +560,8 @@ TEST_F(msg_oob, ex_oob_drop) epollpair(false); siocatmarkpair(true); } + + resetpair(false); } TEST_F(msg_oob, ex_oob_drop_2) @@ -523,6 +587,8 @@ TEST_F(msg_oob, ex_oob_drop_2) epollpair(false); siocatmarkpair(true); } + + resetpair(false); } TEST_F(msg_oob, ex_oob_oob) @@ -546,6 +612,31 @@ TEST_F(msg_oob, ex_oob_oob) recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); siocatmarkpair(false); + + resetpair(false); +} + +TEST_F(msg_oob, ex_oob_ex_oob) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + tcp_incompliant { + resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */ + } } TEST_F(msg_oob, ex_oob_ex_oob_oob) @@ -599,6 +690,10 @@ TEST_F(msg_oob, ex_oob_ahead_break) recvpair("d", 1, 1, MSG_OOB); epollpair(false); siocatmarkpair(true); + + tcp_incompliant { + resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */ + } } TEST_F(msg_oob, ex_oob_siocatmark) @@ -618,6 +713,8 @@ TEST_F(msg_oob, ex_oob_siocatmark) recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */ epollpair(true); siocatmarkpair(false); + + resetpair(true); } TEST_F(msg_oob, inline_oob) @@ -635,6 +732,8 @@ TEST_F(msg_oob, inline_oob) recvpair("x", 1, 1, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_oob_break) @@ -656,6 +755,8 @@ TEST_F(msg_oob, inline_oob_break) recvpair("o", 1, 1, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_oob_ahead_break) @@ -684,6 +785,8 @@ TEST_F(msg_oob, inline_oob_ahead_break) epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_ex_oob_break) @@ -709,6 +812,8 @@ TEST_F(msg_oob, inline_ex_oob_break) recvpair("rld", 3, 3, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_ex_oob_no_drop) @@ -730,6 +835,8 @@ TEST_F(msg_oob, inline_ex_oob_no_drop) recvpair("y", 1, 1, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_ex_oob_drop) @@ -754,6 +861,8 @@ TEST_F(msg_oob, inline_ex_oob_drop) epollpair(false); siocatmarkpair(false); } + + resetpair(false); } TEST_F(msg_oob, inline_ex_oob_siocatmark) @@ -775,6 +884,8 @@ TEST_F(msg_oob, inline_ex_oob_siocatmark) recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */ epollpair(true); siocatmarkpair(false); + + resetpair(true); } TEST_HARNESS_MAIN -- cgit v1.2.3 From 67fcec2919e4ed31ab845eb456ad7d6f1e85505c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 24 Jun 2025 15:48:49 +0200 Subject: fcntl/pidfd: redefine PIDFD_SELF_THREAD_GROUP Don't jump somewhere into the middle of the reserved range. We're still able to change that value it won't be that widely used yet. If not, we can revert. Signed-off-by: Christian Brauner --- include/uapi/linux/fcntl.h | 2 +- tools/testing/selftests/pidfd/pidfd.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index ed02d8ae0667..ba4a698d2f33 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -108,7 +108,7 @@ * group leader... */ #define PIDFD_SELF_THREAD -10000 /* Current thread. */ -#define PIDFD_SELF_THREAD_GROUP -20000 /* Current thread group leader. */ +#define PIDFD_SELF_THREAD_GROUP -10001 /* Current thread group leader. */ /* Generic flags for the *at(2) family of syscalls. */ diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index efd74063126e..5dfeb1bdf399 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -56,7 +56,7 @@ #endif #ifndef PIDFD_SELF_THREAD_GROUP -#define PIDFD_SELF_THREAD_GROUP -20000 /* Current thread group leader. */ +#define PIDFD_SELF_THREAD_GROUP -10001 /* Current thread group leader. */ #endif #ifndef PIDFD_SELF -- cgit v1.2.3 From 914e6b1e85c5715ca2e7ec6293c05c71e9a98e86 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 24 Jun 2025 10:29:14 +0200 Subject: selftests/pidfd: decode pidfd file handles withou having to specify an fd Link: https://lore.kernel.org/20250624-work-pidfs-fhandle-v2-11-d02a04858fe3@kernel.org Reviewed-by: Jan Kara Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- tools/testing/selftests/pidfd/Makefile | 2 +- tools/testing/selftests/pidfd/pidfd.h | 4 ++ .../selftests/pidfd/pidfd_file_handle_test.c | 60 ++++++++++++++++++++++ 3 files changed, 65 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile index 03a6eede9c9e..764a8f9ecefa 100644 --- a/tools/testing/selftests/pidfd/Makefile +++ b/tools/testing/selftests/pidfd/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS += -g $(KHDR_INCLUDES) -pthread -Wall +CFLAGS += -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) -pthread -Wall TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \ pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test \ diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index 5dfeb1bdf399..cd244d0860ff 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -19,6 +19,10 @@ #include "../kselftest.h" #include "../clone3/clone3_selftests.h" +#ifndef FD_PIDFS_ROOT +#define FD_PIDFS_ROOT -10002 +#endif + #ifndef P_PIDFD #define P_PIDFD 3 #endif diff --git a/tools/testing/selftests/pidfd/pidfd_file_handle_test.c b/tools/testing/selftests/pidfd/pidfd_file_handle_test.c index 439b9c6c0457..6bd2e9c9565b 100644 --- a/tools/testing/selftests/pidfd/pidfd_file_handle_test.c +++ b/tools/testing/selftests/pidfd/pidfd_file_handle_test.c @@ -500,4 +500,64 @@ TEST_F(file_handle, valid_name_to_handle_at_flags) ASSERT_EQ(close(pidfd), 0); } +/* + * That we decode a file handle without having to pass a pidfd. + */ +TEST_F(file_handle, decode_purely_based_on_file_handle) +{ + int mnt_id; + struct file_handle *fh; + int pidfd = -EBADF; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd1, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(fstat(self->child_pidfd1, &st1), 0); + + pidfd = open_by_handle_at(FD_PIDFS_ROOT, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(FD_PIDFS_ROOT, fh, O_CLOEXEC); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(FD_PIDFS_ROOT, fh, O_NONBLOCK); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(-EBADF, fh, 0); + ASSERT_LT(pidfd, 0); + + pidfd = open_by_handle_at(AT_FDCWD, fh, 0); + ASSERT_LT(pidfd, 0); + + free(fh); +} + TEST_HARNESS_MAIN -- cgit v1.2.3 From 818625570558cd91082c9bafd6f2b59b73241a69 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 24 Jun 2025 11:00:45 -0700 Subject: iommufd/selftest: Fix iommufd_dirty_tracking with large hugepage sizes The hugepage test cases of iommufd_dirty_tracking have the 64MB and 128MB coverages. Both of them are smaller than the default hugepage size 512MB, when CONFIG_PAGE_SIZE_64KB=y. However, these test cases have a variant of using huge pages, which would mmap(MAP_HUGETLB) using these smaller sizes than the system hugepag size. This results in the kernel aligning up the smaller size to 512MB. If a memory was located between the upper 64/128MB size boundary and the hugepage 512MB boundary, it would get wiped out: https://lore.kernel.org/all/aEoUhPYIAizTLADq@nvidia.com/ Given that this aligning up behavior is well documented, we have no choice but to allocate a hugepage aligned size to avoid this unintended wipe out. Instead of relying on the kernel's internal force alignment, pass the same size to posix_memalign() and map(). Also, fix the FIXTURE_TEARDOWN() misusing munmap() to free the memory from posix_memalign(), as munmap() doesn't destroy the allocator meta data. So, call free() instead. Fixes: a9af47e382a4 ("iommufd/selftest: Test IOMMU_HWPT_GET_DIRTY_BITMAP") Link: https://patch.msgid.link/r/1ea8609ae6d523fdd4d8efb179ddee79c8582cb6.1750787928.git.nicolinc@nvidia.com Cc: stable@vger.kernel.org Suggested-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- tools/testing/selftests/iommu/iommufd.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 1a8e85afe9aa..e7eb11a94034 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -2008,6 +2008,7 @@ FIXTURE_VARIANT(iommufd_dirty_tracking) FIXTURE_SETUP(iommufd_dirty_tracking) { + size_t mmap_buffer_size; unsigned long size; int mmap_flags; void *vrc; @@ -2022,22 +2023,33 @@ FIXTURE_SETUP(iommufd_dirty_tracking) self->fd = open("/dev/iommu", O_RDWR); ASSERT_NE(-1, self->fd); - rc = posix_memalign(&self->buffer, HUGEPAGE_SIZE, variant->buffer_size); - if (rc || !self->buffer) { - SKIP(return, "Skipping buffer_size=%lu due to errno=%d", - variant->buffer_size, rc); - } - mmap_flags = MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED; + mmap_buffer_size = variant->buffer_size; if (variant->hugepages) { /* * MAP_POPULATE will cause the kernel to fail mmap if THPs are * not available. */ mmap_flags |= MAP_HUGETLB | MAP_POPULATE; + + /* + * Allocation must be aligned to the HUGEPAGE_SIZE, because the + * following mmap() will automatically align the length to be a + * multiple of the underlying huge page size. Failing to do the + * same at this allocation will result in a memory overwrite by + * the mmap(). + */ + if (mmap_buffer_size < HUGEPAGE_SIZE) + mmap_buffer_size = HUGEPAGE_SIZE; + } + + rc = posix_memalign(&self->buffer, HUGEPAGE_SIZE, mmap_buffer_size); + if (rc || !self->buffer) { + SKIP(return, "Skipping buffer_size=%lu due to errno=%d", + mmap_buffer_size, rc); } assert((uintptr_t)self->buffer % HUGEPAGE_SIZE == 0); - vrc = mmap(self->buffer, variant->buffer_size, PROT_READ | PROT_WRITE, + vrc = mmap(self->buffer, mmap_buffer_size, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); assert(vrc == self->buffer); @@ -2066,8 +2078,8 @@ FIXTURE_SETUP(iommufd_dirty_tracking) FIXTURE_TEARDOWN(iommufd_dirty_tracking) { - munmap(self->buffer, variant->buffer_size); - munmap(self->bitmap, DIV_ROUND_UP(self->bitmap_size, BITS_PER_BYTE)); + free(self->buffer); + free(self->bitmap); teardown_iommufd(self->fd, _metadata); } -- cgit v1.2.3 From 4b75e3babb85238912f50bbe0647bae08242a9b6 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 24 Jun 2025 11:00:46 -0700 Subject: iommufd/selftest: Add missing close(mfd) in memfd_mmap() Do not forget to close mfd in the error paths, since none of the callers would close it when ASSERT_NE(MAP_FAILED, buf) fails. Fixes: 0bcceb1f51c7 ("iommufd: Selftest coverage for IOMMU_IOAS_MAP_FILE") Link: https://patch.msgid.link/r/a363a69dbf453d4bc1bde276f3b16778620488e1.1750787928.git.nicolinc@nvidia.com Cc: stable@vger.kernel.org Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- tools/testing/selftests/iommu/iommufd_utils.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 72f6636e5d90..6e967b58acfd 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -60,13 +60,18 @@ static inline void *memfd_mmap(size_t length, int prot, int flags, int *mfd_p) { int mfd_flags = (flags & MAP_HUGETLB) ? MFD_HUGETLB : 0; int mfd = memfd_create("buffer", mfd_flags); + void *buf = MAP_FAILED; if (mfd <= 0) return MAP_FAILED; if (ftruncate(mfd, length)) - return MAP_FAILED; + goto out; *mfd_p = mfd; - return mmap(0, length, prot, flags, mfd, 0); + buf = mmap(0, length, prot, flags, mfd, 0); +out: + if (buf == MAP_FAILED) + close(mfd); + return buf; } /* -- cgit v1.2.3 From a9bf67ee170514b17541038c60bb94cb2cf5732f Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 24 Jun 2025 11:00:47 -0700 Subject: iommufd/selftest: Add asserts testing global mfd The mfd and mfd_buffer will be used in the tests directly without an extra check. Test them in setup_sizes() to ensure they are safe to use. Fixes: 0bcceb1f51c7 ("iommufd: Selftest coverage for IOMMU_IOAS_MAP_FILE") Link: https://patch.msgid.link/r/94bdc11d2b6d5db337b1361c5e5fce0ed494bb40.1750787928.git.nicolinc@nvidia.com Cc: stable@vger.kernel.org Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- tools/testing/selftests/iommu/iommufd.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index e7eb11a94034..e61218c0537f 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -54,6 +54,8 @@ static __attribute__((constructor)) void setup_sizes(void) mfd_buffer = memfd_mmap(BUFFER_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, &mfd); + assert(mfd_buffer != MAP_FAILED); + assert(mfd > 0); } FIXTURE(iommufd) -- cgit v1.2.3 From 9a96876e3c6578031fa5dc5dde7759d383b2fb75 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 24 Jun 2025 11:00:48 -0700 Subject: iommufd/selftest: Fix build warnings due to uninitialized mfd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 869c788909b9 ("selftests: harness: Stop using setjmp()/longjmp()") changed the harness structure. For some unknown reason, two build warnings occur to the iommufd selftest: iommufd.c: In function ‘wrapper_iommufd_mock_domain_all_aligns’: iommufd.c:1807:17: warning: ‘mfd’ may be used uninitialized in this function 1807 | close(mfd); | ^~~~~~~~~~ iommufd.c:1767:13: note: ‘mfd’ was declared here 1767 | int mfd; | ^~~ iommufd.c: In function ‘wrapper_iommufd_mock_domain_all_aligns_copy’: iommufd.c:1870:17: warning: ‘mfd’ may be used uninitialized in this function 1870 | close(mfd); | ^~~~~~~~~~ iommufd.c:1819:13: note: ‘mfd’ was declared here 1819 | int mfd; | ^~~ All the mfd have been used in the variant->file path only, so it's likely a false alarm. FWIW, the commit mentioned above does not cause this, yet it might affect gcc in a certain way that resulted in the warnings. It is also found that ading a dummy setjmp (which doesn't make sense) could mute the warnings: https://lore.kernel.org/all/aEi8DV+ReF3v3Rlf@nvidia.com/ The job of this selftest is to catch kernel bug, while such warnings will unlikely disrupt its role. Mute the warning by force initializing the mfd and add an ASSERT_GT(). Link: https://patch.msgid.link/r/6951d85d5cd34cbf22abab7714542654e63ecc44.1750787928.git.nicolinc@nvidia.com Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- tools/testing/selftests/iommu/iommufd.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index e61218c0537f..1926ef6b40ab 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -1748,13 +1748,15 @@ TEST_F(iommufd_mock_domain, all_aligns) unsigned int end; uint8_t *buf; int prot = PROT_READ | PROT_WRITE; - int mfd; + int mfd = -1; if (variant->file) buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd); else buf = mmap(0, buf_size, prot, self->mmap_flags, -1, 0); ASSERT_NE(MAP_FAILED, buf); + if (variant->file) + ASSERT_GT(mfd, 0); check_refs(buf, buf_size, 0); /* @@ -1800,13 +1802,15 @@ TEST_F(iommufd_mock_domain, all_aligns_copy) unsigned int end; uint8_t *buf; int prot = PROT_READ | PROT_WRITE; - int mfd; + int mfd = -1; if (variant->file) buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd); else buf = mmap(0, buf_size, prot, self->mmap_flags, -1, 0); ASSERT_NE(MAP_FAILED, buf); + if (variant->file) + ASSERT_GT(mfd, 0); check_refs(buf, buf_size, 0); /* -- cgit v1.2.3 From 0048ca5e9945f487fc055dad987ee4c7fdc1ed18 Mon Sep 17 00:00:00 2001 From: Chenyi Qiang Date: Fri, 20 Jun 2025 14:22:18 +0800 Subject: KVM: selftests: Add back the missing check of MONITOR/MWAIT availability The revamp of monitor/mwait test missed the original check of feature availability [*]. If MONITOR/MWAIT is not supported or is disabled by IA32_MISC_ENABLE on the host, executing MONITOR or MWAIT instruction from guest doesn't cause monitor/mwait VM exits, but a #UD. [*] https://lore.kernel.org/all/20240411210237.34646-1-zide.chen@intel.com/ Reported-by: Xuelian Guo Fixes: 80fd663590cf ("selftests: kvm: revamp MONITOR/MWAIT tests") Signed-off-by: Chenyi Qiang Link: https://lore.kernel.org/r/20250620062219.342930-1-chenyi.qiang@intel.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86/monitor_mwait_test.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c index 390ae2d87493..0eb371c62ab8 100644 --- a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c +++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c @@ -74,6 +74,7 @@ int main(int argc, char *argv[]) int testcase; char test[80]; + TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2)); ksft_print_header(); -- cgit v1.2.3 From e1d794541b3f17c37f66d4134ee7c044fac4dc87 Mon Sep 17 00:00:00 2001 From: Harishankar Vishwanathan Date: Mon, 23 Jun 2025 00:03:57 -0400 Subject: selftests/bpf: Add testcases for BPF_ADD and BPF_SUB The previous commit improves the precision in scalar(32)_min_max_add, and scalar(32)_min_max_sub. The improvement in precision occurs in cases when all outcomes overflow or underflow, respectively. This commit adds selftests that exercise those cases. This commit also adds selftests for cases where the output register state bounds for u(32)_min/u(32)_max are conservatively set to unbounded (when there is partial overflow or underflow). Signed-off-by: Harishankar Vishwanathan Co-developed-by: Matan Shachnai Signed-off-by: Matan Shachnai Suggested-by: Eduard Zingerman Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250623040359.343235-3-harishankar.vishwanathan@gmail.com Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/progs/verifier_bounds.c | 161 +++++++++++++++++++++ 1 file changed, 161 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index 30e16153fdf1..e52a24e15b34 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -1371,4 +1371,165 @@ __naked void mult_sign_ovf(void) __imm(bpf_skb_store_bytes) : __clobber_all); } + +SEC("socket") +__description("64-bit addition, all outcomes overflow") +__success __log_level(2) +__msg("5: (0f) r3 += r3 {{.*}} R3_w=scalar(umin=0x4000000000000000,umax=0xfffffffffffffffe)") +__retval(0) +__naked void add64_full_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r4 = r0;" + "r3 = 0xa000000000000000 ll;" + "r3 |= r4;" + "r3 += r3;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("64-bit addition, partial overflow, result in unbounded reg") +__success __log_level(2) +__msg("4: (0f) r3 += r3 {{.*}} R3_w=scalar()") +__retval(0) +__naked void add64_partial_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r4 = r0;" + "r3 = 2;" + "r3 |= r4;" + "r3 += r3;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("32-bit addition overflow, all outcomes overflow") +__success __log_level(2) +__msg("4: (0c) w3 += w3 {{.*}} R3_w=scalar(smin=umin=umin32=0x40000000,smax=umax=umax32=0xfffffffe,var_off=(0x0; 0xffffffff))") +__retval(0) +__naked void add32_full_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "w4 = w0;" + "w3 = 0xa0000000;" + "w3 |= w4;" + "w3 += w3;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("32-bit addition, partial overflow, result in unbounded u32 bounds") +__success __log_level(2) +__msg("4: (0c) w3 += w3 {{.*}} R3_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))") +__retval(0) +__naked void add32_partial_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "w4 = w0;" + "w3 = 2;" + "w3 |= w4;" + "w3 += w3;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("64-bit subtraction, all outcomes underflow") +__success __log_level(2) +__msg("6: (1f) r3 -= r1 {{.*}} R3_w=scalar(umin=1,umax=0x8000000000000000)") +__retval(0) +__naked void sub64_full_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r1 = r0;" + "r2 = 0x8000000000000000 ll;" + "r1 |= r2;" + "r3 = 0;" + "r3 -= r1;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("64-bit subtration, partial overflow, result in unbounded reg") +__success __log_level(2) +__msg("3: (1f) r3 -= r2 {{.*}} R3_w=scalar()") +__retval(0) +__naked void sub64_partial_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r3 = r0;" + "r2 = 1;" + "r3 -= r2;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("32-bit subtraction overflow, all outcomes underflow") +__success __log_level(2) +__msg("5: (1c) w3 -= w1 {{.*}} R3_w=scalar(smin=umin=umin32=1,smax=umax=umax32=0x80000000,var_off=(0x0; 0xffffffff))") +__retval(0) +__naked void sub32_full_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "w1 = w0;" + "w2 = 0x80000000;" + "w1 |= w2;" + "w3 = 0;" + "w3 -= w1;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("32-bit subtration, partial overflow, result in unbounded u32 bounds") +__success __log_level(2) +__msg("3: (1c) w3 -= w2 {{.*}} R3_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))") +__retval(0) +__naked void sub32_partial_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "w3 = w0;" + "w2 = 1;" + "w3 -= w2;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 5223372e672eaa576c892984b438875426d8ff2b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 23 Jun 2025 09:19:27 +0800 Subject: selftests: ublk: don't take same backing file for more than one ublk devices Don't use same backing file for more than one ublk devices, and avoid concurrent write on same file from more ublk disks. Fixes: 8ccebc19ee3d ("selftests: ublk: support UBLK_F_AUTO_BUF_REG") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250623011934.741788-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/test_stress_03.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/ublk/test_stress_03.sh b/tools/testing/selftests/ublk/test_stress_03.sh index 6eef282d569f..3ed4c9b2d8c0 100755 --- a/tools/testing/selftests/ublk/test_stress_03.sh +++ b/tools/testing/selftests/ublk/test_stress_03.sh @@ -32,22 +32,23 @@ _create_backfile 2 128M ublk_io_and_remove 8G -t null -q 4 -z & ublk_io_and_remove 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" & ublk_io_and_remove 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait if _have_feature "AUTO_BUF_REG"; then ublk_io_and_remove 8G -t null -q 4 --auto_zc & ublk_io_and_remove 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" & ublk_io_and_remove 256M -t stripe -q 4 --auto_zc "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & ublk_io_and_remove 8G -t null -q 4 -z --auto_zc --auto_zc_fallback & + wait fi -wait if _have_feature "PER_IO_DAEMON"; then ublk_io_and_remove 8G -t null -q 4 --auto_zc --nthreads 8 --per_io_tasks & ublk_io_and_remove 256M -t loop -q 4 --auto_zc --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[0]}" & ublk_io_and_remove 256M -t stripe -q 4 --auto_zc --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & ublk_io_and_remove 8G -t null -q 4 -z --auto_zc --auto_zc_fallback --nthreads 8 --per_io_tasks & + wait fi -wait _cleanup_test "stress" _show_result $TID $ERR_CODE -- cgit v1.2.3 From 3b16e77e0706eb6cdfc850956ee1ed5e5c36c6ca Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 6 Jun 2025 06:29:19 -0700 Subject: rcutorture: Make BUSTED scenario check and log readers Because the BUSTED scenario intentionally executes too-short readers, this commit enables the RCU_TORTURE_TEST_CHK_RDR_STATE, RCU_TORTURE_TEST_LOG_CPU, and RCU_TORTURE_TEST_LOG_GP Kconfig options to test the resulting reader-segment dump. Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay (AMD) --- tools/testing/selftests/rcutorture/configs/rcu/BUSTED | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED index 48d8a245c7fa..7d75f4b94943 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED +++ b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED @@ -5,3 +5,6 @@ CONFIG_HOTPLUG_CPU=y CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=y +CONFIG_RCU_TORTURE_TEST_CHK_RDR_STATE=y +CONFIG_RCU_TORTURE_TEST_LOG_CPU=y +CONFIG_RCU_TORTURE_TEST_LOG_GP=y -- cgit v1.2.3 From e40e2391388d8db623fd2ac92d7750648155e9d3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 8 May 2025 16:44:58 -0700 Subject: torture: Suppress torture.sh "Zero time" messages for disabled tests The torture.sh script prints " --- Zero time for locktorture, disabling" when the --duration parameter is too short to allow the test to run even when locktorture has been disabled, for example, via --do-none. The same is true for scftorture and rcutorture. This commit therefore suppresses this message when the corresponding test has been disabled. Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes Signed-off-by: Neeraj Upadhyay (AMD) --- tools/testing/selftests/rcutorture/bin/torture.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index e03fdaca89b3..c518de296871 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -274,7 +274,7 @@ then configs_rcutorture=CFLIST fi duration_rcutorture=$((duration_base*duration_rcutorture_frac/10)) -if test "$duration_rcutorture" -eq 0 +if test "$duration_rcutorture" -eq 0 && test "$do_locktorture" = "yes" then echo " --- Zero time for rcutorture, disabling" | tee -a $T/log do_rcutorture=no @@ -286,7 +286,7 @@ then configs_locktorture=CFLIST fi duration_locktorture=$((duration_base*duration_locktorture_frac/10)) -if test "$duration_locktorture" -eq 0 +if test "$duration_locktorture" -eq 0 && test "$do_locktorture" = "yes" then echo " --- Zero time for locktorture, disabling" | tee -a $T/log do_locktorture=no @@ -298,7 +298,7 @@ then configs_scftorture=CFLIST fi duration_scftorture=$((duration_base*duration_scftorture_frac/10)) -if test "$duration_scftorture" -eq 0 +if test "$duration_scftorture" -eq 0 && test "$do_scftorture" = "yes" then echo " --- Zero time for scftorture, disabling" | tee -a $T/log do_scftorture=no -- cgit v1.2.3 From 4176ebdf97d1ad08e205ec97a8f5b66f9efa70bf Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 15 May 2025 15:25:42 -0700 Subject: torture: Permit multiple space characters in kvm.sh --kconfig argument The straightforward way of doing bash substitution for optional strings leaves a pair of space characters, which the kvm.sh --kconfig option rejects as ill-formed. This commit therefore changes the corresponding regular expression to accommodate more than one space character between successive Kconfig options. Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay (AMD) --- tools/testing/selftests/rcutorture/bin/kvm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 42e5e8597a1a..9c1b850b3227 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -199,7 +199,7 @@ do fi ;; --kconfig|--kconfigs) - checkarg --kconfig "(Kconfig options)" $# "$2" '^\(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\( \(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\)*$' '^error$' + checkarg --kconfig "(Kconfig options)" $# "$2" '^\(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\( \+\(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\)* *$' '^error$' TORTURE_KCONFIG_ARG="`echo "$TORTURE_KCONFIG_ARG $2" | sed -e 's/^ *//' -e 's/ *$//'`" shift ;; -- cgit v1.2.3 From 955a83469cb46a1bc4339425e5c9cb97fc6303a0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 15 May 2025 10:20:08 -0700 Subject: torture: Make torture.sh KCSAN runs set CONFIG_RCU_TORTURE_TEST_CHK_RDR_STATE=y The RCU_TORTURE_TEST_CHK_RDR_STATE Kconfig option is used for low-level debugging of rcutorture's generation of overlapping and nested RCU readers. It incurs significant overhead, and is thus not to be used lightly. But if it is not tested regularly, it won't be there when it is needed, for example, it would have found an rcutorture bug in the testing of srcu_up_read(). This commit therefore uses CONFIG_RCU_TORTURE_TEST_CHK_RDR_STATE=y when building KCSAN kernels, but only for the --do-rcutorture case. Reported-by: kernel test robot Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay (AMD) --- tools/testing/selftests/rcutorture/bin/torture.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index c518de296871..53f61f278fd7 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -378,7 +378,12 @@ function torture_set { kcsan_kmake_tag="--kmake-args" cur_kcsan_kmake_args="$kcsan_kmake_args" fi - torture_one "$@" --kconfig "CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y" $kcsan_kmake_tag $cur_kcsan_kmake_args --kcsan + chk_rdr_state= + if test "${flavor}" = rcutorture + then + chk_rdr_state="CONFIG_RCU_TORTURE_TEST_CHK_RDR_STATE=y" + fi + torture_one "$@" --kconfig "CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y ${chk_rdr_state}" $kcsan_kmake_tag $cur_kcsan_kmake_args --kcsan mv $T/last-resdir $T/last-resdir-kcsan || : fi } -- cgit v1.2.3 From 1524f2032aad74f604a54d5a5cbb8b214f4da41c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 15 May 2025 13:38:56 -0700 Subject: torture: Default --no-rcutasksflavors on arm64 Because arm64 does not support CONFIG_SMP=n kernels, --do-rcutasksflavors gets Kconfig errors when running the TINY01 rcutorture scenario. This commit therefore makes --no-rcutasksflavors be the default on arm64. Once kvm.sh automatically deselects CONFIG_SMP=n rcutorture scenarios on arm64, the two lines marked "FIXME" can be changed back from "${ifnotaarch64}" to "yes". Note that arm64 users can still specify --do-rcutasksflavors in order to override this default. Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay (AMD) --- tools/testing/selftests/rcutorture/bin/torture.sh | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index 53f61f278fd7..584d57b48036 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -30,6 +30,15 @@ then VERBOSE_BATCH_CPUS=0 fi +# Machine architecture? ("uname -p" is said to be less portable.)1 +thisarch="`uname -m`" +if test "${thisarch}" = aarch64 +then + ifnotaarch64=no +else + ifnotaarch64=yes +fi + # Configurations/scenarios. configs_rcutorture= configs_locktorture= @@ -57,7 +66,7 @@ do_kasan=yes do_kcsan=no do_clocksourcewd=yes do_rt=yes -do_rcutasksflavors=yes +do_rcutasksflavors="${ifnotaarch64}" # FIXME: Back to "yes" when SMP=n auto-avoided do_srcu_lockdep=yes do_rcu_rust=no @@ -124,7 +133,7 @@ do ;; --do-all|--doall) do_allmodconfig=yes - do_rcutasksflavor=yes + do_rcutasksflavors="${ifnotaarch64}" # FIXME: Back to "yes" when SMP=n auto-avoided do_rcutorture=yes do_locktorture=yes do_scftorture=yes -- cgit v1.2.3 From 103d567f51e0eaee441bb26769e443a719299412 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 15 May 2025 13:54:56 -0700 Subject: torture: Default --no-clocksourcewd on arm64 Because arm64 does not support CONFIG_CLOCKSOURCE_WATCHDOG=n kernels, --do-clocksourcewd gets Kconfig errors. This commit therefore makes --do-no-clocksourcewd be the default on arm64. Note that arm64 users can still specify --do-clocksourcewd in order to override this default. Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay (AMD) --- tools/testing/selftests/rcutorture/bin/torture.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index 584d57b48036..25847042e30e 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -64,7 +64,7 @@ do_normal=yes explicit_normal=no do_kasan=yes do_kcsan=no -do_clocksourcewd=yes +do_clocksourcewd="${ifnotaarch64}" do_rt=yes do_rcutasksflavors="${ifnotaarch64}" # FIXME: Back to "yes" when SMP=n auto-avoided do_srcu_lockdep=yes @@ -145,7 +145,7 @@ do explicit_normal=no do_kasan=yes do_kcsan=yes - do_clocksourcewd=yes + do_clocksourcewd="${ifnotaarch64}" do_srcu_lockdep=yes ;; --do-allmodconfig|--do-no-allmodconfig|--no-allmodconfig) -- cgit v1.2.3 From fa6f092cc0a02d0fcee37e9e8172eda372a03d33 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Tue, 24 Jun 2025 22:02:15 -0700 Subject: libbpf: Fix possible use-after-free for externs The `name` field in `obj->externs` points into the BTF data at initial open time. However, some functions may invalidate this after opening and before loading (e.g. `bpf_map__set_value_size`), which results in pointers into freed memory and undefined behavior. The simplest solution is to simply `strdup` these strings, similar to the `essent_name`, and free them at the same time. In order to test this path, the `global_map_resize` BPF selftest is modified slightly to ensure the presence of an extern, which causes this test to fail prior to the fix. Given there isn't an obvious API or error to test against, I opted to add this to the existing test as an aspect of the resizing feature rather than duplicate the test. Fixes: 9d0a23313b1a ("libbpf: Add capability for resizing datasec maps") Signed-off-by: Adin Scannell Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250625050215.2777374-1-amscanne@meta.com --- tools/lib/bpf/libbpf.c | 10 +++++++--- .../testing/selftests/bpf/progs/test_global_map_resize.c | 16 ++++++++++++++++ 2 files changed, 23 insertions(+), 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e9c641a2fb20..52e353368f58 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -597,7 +597,7 @@ struct extern_desc { int sym_idx; int btf_id; int sec_btf_id; - const char *name; + char *name; char *essent_name; bool is_set; bool is_weak; @@ -4259,7 +4259,9 @@ static int bpf_object__collect_externs(struct bpf_object *obj) return ext->btf_id; } t = btf__type_by_id(obj->btf, ext->btf_id); - ext->name = btf__name_by_offset(obj->btf, t->name_off); + ext->name = strdup(btf__name_by_offset(obj->btf, t->name_off)); + if (!ext->name) + return -ENOMEM; ext->sym_idx = i; ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK; @@ -9138,8 +9140,10 @@ void bpf_object__close(struct bpf_object *obj) zfree(&obj->btf_custom_path); zfree(&obj->kconfig); - for (i = 0; i < obj->nr_extern; i++) + for (i = 0; i < obj->nr_extern; i++) { + zfree(&obj->externs[i].name); zfree(&obj->externs[i].essent_name); + } zfree(&obj->externs); obj->nr_extern = 0; diff --git a/tools/testing/selftests/bpf/progs/test_global_map_resize.c b/tools/testing/selftests/bpf/progs/test_global_map_resize.c index a3f220ba7025..ee65bad0436d 100644 --- a/tools/testing/selftests/bpf/progs/test_global_map_resize.c +++ b/tools/testing/selftests/bpf/progs/test_global_map_resize.c @@ -32,6 +32,16 @@ int my_int_last SEC(".data.array_not_last"); int percpu_arr[1] SEC(".data.percpu_arr"); +/* at least one extern is included, to ensure that a specific + * regression is tested whereby resizing resulted in a free-after-use + * bug after type information is invalidated by the resize operation. + * + * There isn't a particularly good API to test for this specific condition, + * but by having externs for the resizing tests it will cover this path. + */ +extern int LINUX_KERNEL_VERSION __kconfig; +long version_sink; + SEC("tp/syscalls/sys_enter_getpid") int bss_array_sum(void *ctx) { @@ -44,6 +54,9 @@ int bss_array_sum(void *ctx) for (size_t i = 0; i < bss_array_len; ++i) sum += array[i]; + /* see above; ensure this is not optimized out */ + version_sink = LINUX_KERNEL_VERSION; + return 0; } @@ -59,6 +72,9 @@ int data_array_sum(void *ctx) for (size_t i = 0; i < data_array_len; ++i) sum += my_array[i]; + /* see above; ensure this is not optimized out */ + version_sink = LINUX_KERNEL_VERSION; + return 0; } -- cgit v1.2.3 From d69bafe6ee2b5eff6099fa26626ecc2963f0f363 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 24 Jun 2025 14:18:02 -0700 Subject: selftests/bpf: Fix usdt multispec failure with arm64/clang20 selftest build When building the selftest with arm64/clang20, the following test failed: ... ubtest_multispec_usdt:PASS:usdt_100_called 0 nsec subtest_multispec_usdt:PASS:usdt_100_sum 0 nsec subtest_multispec_usdt:FAIL:usdt_300_bad_attach unexpected pointer: 0xaaaad82a2a80 #471/2 usdt/multispec:FAIL #471 usdt:FAIL But arm64/gcc11 built kernel selftests succeeded. Further debug found arm64/clang generated code has much less argument pattern after dedup, but gcc generated code has a lot more. Check usdt probes with usdt.test.o on arm64 platform: with gcc11 build binary: stapsdt 0x0000002e NT_STAPSDT (SystemTap probe descriptors) Provider: test Name: usdt_300 Location: 0x00000000000054f8, Base: 0x0000000000000000, Semaphore: 0x0000000000000008 Arguments: -4@[sp] stapsdt 0x00000031 NT_STAPSDT (SystemTap probe descriptors) Provider: test Name: usdt_300 Location: 0x0000000000005510, Base: 0x0000000000000000, Semaphore: 0x0000000000000008 Arguments: -4@[sp, 4] ... stapsdt 0x00000032 NT_STAPSDT (SystemTap probe descriptors) Provider: test Name: usdt_300 Location: 0x0000000000005660, Base: 0x0000000000000000, Semaphore: 0x0000000000000008 Arguments: -4@[sp, 60] ... stapsdt 0x00000034 NT_STAPSDT (SystemTap probe descriptors) Provider: test Name: usdt_300 Location: 0x00000000000070e8, Base: 0x0000000000000000, Semaphore: 0x0000000000000008 Arguments: -4@[sp, 1192] stapsdt 0x00000034 NT_STAPSDT (SystemTap probe descriptors) Provider: test Name: usdt_300 Location: 0x0000000000007100, Base: 0x0000000000000000, Semaphore: 0x0000000000000008 Arguments: -4@[sp, 1196] ... stapsdt 0x00000032 NT_STAPSDT (SystemTap probe descriptors) Provider: test Name: usdt_300 Location: 0x0000000000009ec4, Base: 0x0000000000000000, Semaphore: 0x0000000000000008 Arguments: -4@[sp, 60] with clang20 build binary: stapsdt 0x0000002e NT_STAPSDT (SystemTap probe descriptors) Provider: test Name: usdt_300 Location: 0x00000000000009a0, Base: 0x0000000000000000, Semaphore: 0x0000000000000008 Arguments: -4@[x9] stapsdt 0x0000002e NT_STAPSDT (SystemTap probe descriptors) Provider: test Name: usdt_300 Location: 0x00000000000009b8, Base: 0x0000000000000000, Semaphore: 0x0000000000000008 Arguments: -4@[x9] ... stapsdt 0x0000002e NT_STAPSDT (SystemTap probe descriptors) Provider: test Name: usdt_300 Location: 0x0000000000002590, Base: 0x0000000000000000, Semaphore: 0x0000000000000008 Arguments: -4@[x9] stapsdt 0x0000002e NT_STAPSDT (SystemTap probe descriptors) Provider: test Name: usdt_300 Location: 0x00000000000025a8, Base: 0x0000000000000000, Semaphore: 0x0000000000000008 Arguments: -4@[x8] ... stapsdt 0x0000002f NT_STAPSDT (SystemTap probe descriptors) Provider: test Name: usdt_300 Location: 0x0000000000007fdc, Base: 0x0000000000000000, Semaphore: 0x0000000000000008 Arguments: -4@[x10] There are total 300 locations for usdt_300. For gcc11 built binary, there are 300 spec's. But for clang20 built binary, there are 3 spec's. The default BPF_USDT_MAX_SPEC_CNT is 256, so bpf_program__attach_usdt() will fail for gcc but it will succeed with clang. To fix the problem, do not do bpf_program__attach_usdt() for usdt_300 with arm64/clang setup. Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250624211802.2198821-1-yonghong.song@linux.dev --- tools/testing/selftests/bpf/prog_tests/usdt.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c index 495d66414b57..9057e983cc54 100644 --- a/tools/testing/selftests/bpf/prog_tests/usdt.c +++ b/tools/testing/selftests/bpf/prog_tests/usdt.c @@ -270,8 +270,16 @@ static void subtest_multispec_usdt(void) */ trigger_300_usdts(); - /* we'll reuse usdt_100 BPF program for usdt_300 test */ bpf_link__destroy(skel->links.usdt_100); + + bss->usdt_100_called = 0; + bss->usdt_100_sum = 0; + + /* If built with arm64/clang, there will be much less number of specs + * for usdt_300 call sites. + */ +#if !defined(__aarch64__) || !defined(__clang__) + /* we'll reuse usdt_100 BPF program for usdt_300 test */ skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1, "/proc/self/exe", "test", "usdt_300", NULL); err = -errno; @@ -282,13 +290,11 @@ static void subtest_multispec_usdt(void) /* let's check that there are no "dangling" BPF programs attached due * to partial success of the above test:usdt_300 attachment */ - bss->usdt_100_called = 0; - bss->usdt_100_sum = 0; - f300(777); /* this is 301st instance of usdt_300 */ ASSERT_EQ(bss->usdt_100_called, 0, "usdt_301_called"); ASSERT_EQ(bss->usdt_100_sum, 0, "usdt_301_sum"); +#endif /* This time we have USDT with 400 inlined invocations, but arg specs * should be the same across all sites, so libbpf will only need to -- cgit v1.2.3 From aced132599b3c8884c050218d4c48eef203678f6 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 25 Jun 2025 09:40:24 -0700 Subject: bpf: Add range tracking for BPF_NEG Add range tracking for instruction BPF_NEG. Without this logic, a trivial program like the following will fail volatile bool found_value_b; SEC("lsm.s/socket_connect") int BPF_PROG(test_socket_connect) { if (!found_value_b) return -1; return 0; } with verifier log: "At program exit the register R0 has smin=0 smax=4294967295 should have been in [-4095, 0]". This is because range information is lost in BPF_NEG: 0: R1=ctx() R10=fp0 ; if (!found_value_b) @ xxxx.c:24 0: (18) r1 = 0xffa00000011e7048 ; R1_w=map_value(...) 2: (71) r0 = *(u8 *)(r1 +0) ; R0_w=scalar(smin32=0,smax=255) 3: (a4) w0 ^= 1 ; R0_w=scalar(smin32=0,smax=255) 4: (84) w0 = -w0 ; R0_w=scalar(range info lost) Note that, the log above is manually modified to highlight relevant bits. Fix this by maintaining proper range information with BPF_NEG, so that the verifier will know: 4: (84) w0 = -w0 ; R0_w=scalar(smin32=-255,smax=0) Also updated selftests based on the expected behavior. Signed-off-by: Song Liu Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250625164025.3310203-2-song@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/tnum.h | 2 ++ kernel/bpf/tnum.c | 5 +++++ kernel/bpf/verifier.c | 17 ++++++++++++++++- .../bpf/progs/verifier_bounds_deduction.c | 11 +++++++---- .../selftests/bpf/progs/verifier_value_ptr_arith.c | 22 ++++++++++++++++------ 5 files changed, 46 insertions(+), 11 deletions(-) (limited to 'tools/testing/selftests') diff --git a/include/linux/tnum.h b/include/linux/tnum.h index 3c13240077b8..57ed3035cc30 100644 --- a/include/linux/tnum.h +++ b/include/linux/tnum.h @@ -40,6 +40,8 @@ struct tnum tnum_arshift(struct tnum a, u8 min_shift, u8 insn_bitness); struct tnum tnum_add(struct tnum a, struct tnum b); /* Subtract two tnums, return @a - @b */ struct tnum tnum_sub(struct tnum a, struct tnum b); +/* Neg of a tnum, return 0 - @a */ +struct tnum tnum_neg(struct tnum a); /* Bitwise-AND, return @a & @b */ struct tnum tnum_and(struct tnum a, struct tnum b); /* Bitwise-OR, return @a | @b */ diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c index 9dbc31b25e3d..fa353c5d550f 100644 --- a/kernel/bpf/tnum.c +++ b/kernel/bpf/tnum.c @@ -83,6 +83,11 @@ struct tnum tnum_sub(struct tnum a, struct tnum b) return TNUM(dv & ~mu, mu); } +struct tnum tnum_neg(struct tnum a) +{ + return tnum_sub(TNUM(0, 0), a); +} + struct tnum tnum_and(struct tnum a, struct tnum b) { u64 alpha, beta, v; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f403524bd215..2ff22ef42348 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -15182,6 +15182,7 @@ static bool is_safe_to_compute_dst_reg_range(struct bpf_insn *insn, switch (BPF_OP(insn->code)) { case BPF_ADD: case BPF_SUB: + case BPF_NEG: case BPF_AND: case BPF_XOR: case BPF_OR: @@ -15250,6 +15251,13 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, scalar_min_max_sub(dst_reg, &src_reg); dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off); break; + case BPF_NEG: + env->fake_reg[0] = *dst_reg; + __mark_reg_known(dst_reg, 0); + scalar32_min_max_sub(dst_reg, &env->fake_reg[0]); + scalar_min_max_sub(dst_reg, &env->fake_reg[0]); + dst_reg->var_off = tnum_neg(env->fake_reg[0].var_off); + break; case BPF_MUL: dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off); scalar32_min_max_mul(dst_reg, &src_reg); @@ -15473,7 +15481,14 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) } /* check dest operand */ - err = check_reg_arg(env, insn->dst_reg, DST_OP); + if (opcode == BPF_NEG) { + err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); + err = err ?: adjust_scalar_min_max_vals(env, insn, + ®s[insn->dst_reg], + regs[insn->dst_reg]); + } else { + err = check_reg_arg(env, insn->dst_reg, DST_OP); + } if (err) return err; diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c index c506afbdd936..260a6df264e3 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c @@ -159,13 +159,16 @@ __failure_unpriv __naked void deducing_bounds_from_const_10(void) { asm volatile (" \ + r6 = r1; \ r0 = 0; \ if r0 s<= 0 goto l0_%=; \ -l0_%=: /* Marks reg as unknown. */ \ - r0 = -r0; \ - r0 -= r1; \ +l0_%=: /* Marks r0 as unknown. */ \ + call %[bpf_get_prandom_u32]; \ + r0 -= r6; \ exit; \ -" ::: __clobber_all); +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); } char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c b/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c index fcea9819e359..af7938ce56cb 100644 --- a/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c +++ b/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c @@ -231,6 +231,10 @@ __retval(1) __naked void ptr_unknown_vs_unknown_lt(void) { asm volatile (" \ + r8 = r1; \ + call %[bpf_get_prandom_u32]; \ + r9 = r0; \ + r1 = r8; \ r0 = *(u32*)(r1 + %[__sk_buff_len]); \ r1 = 0; \ *(u64*)(r10 - 8) = r1; \ @@ -245,11 +249,11 @@ l1_%=: call %[bpf_map_lookup_elem]; \ r4 = *(u8*)(r0 + 0); \ if r4 == 1 goto l3_%=; \ r1 = 6; \ - r1 = -r1; \ + r1 = r9; \ r1 &= 0x3; \ goto l4_%=; \ l3_%=: r1 = 6; \ - r1 = -r1; \ + r1 = r9; \ r1 &= 0x7; \ l4_%=: r1 += r0; \ r0 = *(u8*)(r1 + 0); \ @@ -259,7 +263,8 @@ l2_%=: r0 = 1; \ : __imm(bpf_map_lookup_elem), __imm_addr(map_array_48b), __imm_addr(map_hash_16b), - __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)) + __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)), + __imm(bpf_get_prandom_u32) : __clobber_all); } @@ -271,6 +276,10 @@ __retval(1) __naked void ptr_unknown_vs_unknown_gt(void) { asm volatile (" \ + r8 = r1; \ + call %[bpf_get_prandom_u32]; \ + r9 = r0; \ + r1 = r8; \ r0 = *(u32*)(r1 + %[__sk_buff_len]); \ r1 = 0; \ *(u64*)(r10 - 8) = r1; \ @@ -285,11 +294,11 @@ l1_%=: call %[bpf_map_lookup_elem]; \ r4 = *(u8*)(r0 + 0); \ if r4 == 1 goto l3_%=; \ r1 = 6; \ - r1 = -r1; \ + r1 = r9; \ r1 &= 0x7; \ goto l4_%=; \ l3_%=: r1 = 6; \ - r1 = -r1; \ + r1 = r9; \ r1 &= 0x3; \ l4_%=: r1 += r0; \ r0 = *(u8*)(r1 + 0); \ @@ -299,7 +308,8 @@ l2_%=: r0 = 1; \ : __imm(bpf_map_lookup_elem), __imm_addr(map_array_48b), __imm_addr(map_hash_16b), - __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)) + __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)), + __imm(bpf_get_prandom_u32) : __clobber_all); } -- cgit v1.2.3 From 2945434e248fc66e0b44b054fa18b16b25c1c1f5 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 25 Jun 2025 09:40:25 -0700 Subject: selftests/bpf: Add tests for BPF_NEG range tracking logic BPF_REG now has range tracking logic. Add selftests for BPF_NEG. Specifically, return value of LSM hook lsm.s/socket_connect is used to show that the verifer tracks BPF_NEG(1) falls in the [-4095, 0] range; while BPF_NEG(100000) does not fall in that range. Signed-off-by: Song Liu Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250625164025.3310203-3-song@kernel.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/verifier_precision.c | 70 ++++++++++++++++++++++ 1 file changed, 70 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c index 9fe5d255ee37..73fee2aec698 100644 --- a/tools/testing/selftests/bpf/progs/verifier_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_precision.c @@ -231,4 +231,74 @@ __naked void bpf_cond_op_not_r10(void) ::: __clobber_all); } +SEC("lsm.s/socket_connect") +__success __log_level(2) +__msg("0: (b7) r0 = 1 ; R0_w=1") +__msg("1: (84) w0 = -w0 ; R0_w=0xffffffff") +__msg("mark_precise: frame0: last_idx 2 first_idx 0 subseq_idx -1") +__msg("mark_precise: frame0: regs=r0 stack= before 1: (84) w0 = -w0") +__msg("mark_precise: frame0: regs=r0 stack= before 0: (b7) r0 = 1") +__naked int bpf_neg_2(void) +{ + /* + * lsm.s/socket_connect requires a return value within [-4095, 0]. + * Returning -1 is allowed + */ + asm volatile ( + "r0 = 1;" + "w0 = -w0;" + "exit;" + ::: __clobber_all); +} + +SEC("lsm.s/socket_connect") +__failure __msg("At program exit the register R0 has") +__naked int bpf_neg_3(void) +{ + /* + * lsm.s/socket_connect requires a return value within [-4095, 0]. + * Returning -10000 is not allowed. + */ + asm volatile ( + "r0 = 10000;" + "w0 = -w0;" + "exit;" + ::: __clobber_all); +} + +SEC("lsm.s/socket_connect") +__success __log_level(2) +__msg("0: (b7) r0 = 1 ; R0_w=1") +__msg("1: (87) r0 = -r0 ; R0_w=-1") +__msg("mark_precise: frame0: last_idx 2 first_idx 0 subseq_idx -1") +__msg("mark_precise: frame0: regs=r0 stack= before 1: (87) r0 = -r0") +__msg("mark_precise: frame0: regs=r0 stack= before 0: (b7) r0 = 1") +__naked int bpf_neg_4(void) +{ + /* + * lsm.s/socket_connect requires a return value within [-4095, 0]. + * Returning -1 is allowed + */ + asm volatile ( + "r0 = 1;" + "r0 = -r0;" + "exit;" + ::: __clobber_all); +} + +SEC("lsm.s/socket_connect") +__failure __msg("At program exit the register R0 has") +__naked int bpf_neg_5(void) +{ + /* + * lsm.s/socket_connect requires a return value within [-4095, 0]. + * Returning -10000 is not allowed. + */ + asm volatile ( + "r0 = 10000;" + "r0 = -r0;" + "exit;" + ::: __clobber_all); +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 12ed81f82391d073982cf0cd0b2d14e374e5112a Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 25 Jun 2025 11:24:14 -0700 Subject: selftests/bpf: check operations on untrusted ro pointers to mem The following cases are tested: - it is ok to load memory at any offset from rdonly_untrusted_mem; - rdonly_untrusted_mem offset/bounds are not tracked; - writes into rdonly_untrusted_mem are forbidden; - atomic operations on rdonly_untrusted_mem are forbidden; - rdonly_untrusted_mem can't be passed as a memory argument of a helper of kfunc; - it is ok to use PTR_TO_MEM and PTR_TO_BTF_ID in a same load instruction. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250625182414.30659-4-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../bpf/prog_tests/mem_rdonly_untrusted.c | 9 ++ .../selftests/bpf/progs/mem_rdonly_untrusted.c | 136 +++++++++++++++++++++ 2 files changed, 145 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c create mode 100644 tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c b/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c new file mode 100644 index 000000000000..40d4f687bd9c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include "mem_rdonly_untrusted.skel.h" + +void test_mem_rdonly_untrusted(void) +{ + RUN_TESTS(mem_rdonly_untrusted); +} diff --git a/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c new file mode 100644 index 000000000000..00604755e698 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include "bpf_misc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" + +SEC("socket") +__success +__retval(0) +int ldx_is_ok_bad_addr(void *ctx) +{ + char *p; + + if (!bpf_core_enum_value_exists(enum bpf_features, BPF_FEAT_RDONLY_CAST_TO_VOID)) + return 42; + + p = bpf_rdonly_cast(0, 0); + return p[0x7fff]; +} + +SEC("socket") +__success +__retval(1) +int ldx_is_ok_good_addr(void *ctx) +{ + int v, *p; + + v = 1; + p = bpf_rdonly_cast(&v, 0); + return *p; +} + +SEC("socket") +__success +int offset_not_tracked(void *ctx) +{ + int *p, i, s; + + p = bpf_rdonly_cast(0, 0); + s = 0; + bpf_for(i, 0, 1000 * 1000 * 1000) { + p++; + s += *p; + } + return s; +} + +SEC("socket") +__failure +__msg("cannot write into rdonly_untrusted_mem") +int stx_not_ok(void *ctx) +{ + int v, *p; + + v = 1; + p = bpf_rdonly_cast(&v, 0); + *p = 1; + return 0; +} + +SEC("socket") +__failure +__msg("cannot write into rdonly_untrusted_mem") +int atomic_not_ok(void *ctx) +{ + int v, *p; + + v = 1; + p = bpf_rdonly_cast(&v, 0); + __sync_fetch_and_add(p, 1); + return 0; +} + +SEC("socket") +__failure +__msg("cannot write into rdonly_untrusted_mem") +int atomic_rmw_not_ok(void *ctx) +{ + long v, *p; + + v = 1; + p = bpf_rdonly_cast(&v, 0); + return __sync_val_compare_and_swap(p, 0, 42); +} + +SEC("socket") +__failure +__msg("invalid access to memory, mem_size=0 off=0 size=4") +__msg("R1 min value is outside of the allowed memory range") +int kfunc_param_not_ok(void *ctx) +{ + int *p; + + p = bpf_rdonly_cast(0, 0); + bpf_kfunc_trusted_num_test(p); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +__failure +__msg("R1 type=rdonly_untrusted_mem expected=") +int helper_param_not_ok(void *ctx) +{ + char *p; + + p = bpf_rdonly_cast(0, 0); + /* + * Any helper with ARG_CONST_SIZE_OR_ZERO constraint will do, + * the most permissive constraint + */ + bpf_copy_from_user(p, 0, (void *)42); + return 0; +} + +static __noinline u64 *get_some_addr(void) +{ + if (bpf_get_prandom_u32()) + return bpf_rdonly_cast(0, bpf_core_type_id_kernel(struct sock)); + else + return bpf_rdonly_cast(0, 0); +} + +SEC("socket") +__success +__retval(0) +int mixed_mem_type(void *ctx) +{ + u64 *p; + + /* Try to avoid compiler hoisting load to if branches by using __noinline func. */ + p = get_some_addr(); + return *p; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 5e9388f7984a9cc7e659a105113f6ccf0aebedd0 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 25 Jun 2025 17:03:55 -0400 Subject: selftests/bpf: adapt one more case in test_lru_map to the new target_free The below commit that updated BPF_MAP_TYPE_LRU_HASH free target, also updated tools/testing/selftests/bpf/test_lru_map to match. But that missed one case that passes with 4 cores, but fails at higher cpu counts. Update test_lru_sanity3 to also adjust its expectation of target_free. This time tested with 1, 4, 16, 64 and 384 cpu count. Fixes: d4adf1c9ee77 ("bpf: Adjust free target to avoid global starvation of LRU map") Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20250625210412.2732970-1-willemdebruijn.kernel@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_lru_map.c | 33 ++++++++++++++++-------------- 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c index 4ae83f4b7fc7..0921939532c6 100644 --- a/tools/testing/selftests/bpf/test_lru_map.c +++ b/tools/testing/selftests/bpf/test_lru_map.c @@ -138,6 +138,12 @@ static int sched_next_online(int pid, int *next_to_try) return ret; } +/* Derive target_free from map_size, same as bpf_common_lru_populate */ +static unsigned int __tgt_size(unsigned int map_size) +{ + return (map_size / nr_cpus) / 2; +} + /* Inverse of how bpf_common_lru_populate derives target_free from map_size. */ static unsigned int __map_size(unsigned int tgt_free) { @@ -410,12 +416,12 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) printf("Pass\n"); } -/* Size of the LRU map is 2*tgt_free - * It is to test the active/inactive list rotation - * Insert 1 to 2*tgt_free (+2*tgt_free keys) - * Lookup key 1 to tgt_free*3/2 - * Add 1+2*tgt_free to tgt_free*5/2 (+tgt_free/2 keys) - * => key 1+tgt_free*3/2 to 2*tgt_free are removed from LRU +/* Test the active/inactive list rotation + * + * Fill the whole map, deplete the free list. + * Reference all except the last lru->target_free elements. + * Insert lru->target_free new elements. This triggers one shrink. + * Verify that the non-referenced elements are replaced. */ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) { @@ -434,8 +440,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) assert(sched_next_online(0, &next_cpu) != -1); - batch_size = tgt_free / 2; - assert(batch_size * 2 == tgt_free); + batch_size = __tgt_size(tgt_free); map_size = tgt_free * 2; lru_map_fd = create_map(map_type, map_flags, map_size); @@ -446,23 +451,21 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) value[0] = 1234; - /* Insert 1 to 2*tgt_free (+2*tgt_free keys) */ - end_key = 1 + (2 * tgt_free); + /* Fill the map */ + end_key = 1 + map_size; for (key = 1; key < end_key; key++) assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); - /* Lookup key 1 to tgt_free*3/2 */ - end_key = tgt_free + batch_size; + /* Reference all but the last batch_size */ + end_key = 1 + map_size - batch_size; for (key = 1; key < end_key; key++) { assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); assert(!bpf_map_update_elem(expected_map_fd, &key, value, BPF_NOEXIST)); } - /* Add 1+2*tgt_free to tgt_free*5/2 - * (+tgt_free/2 keys) - */ + /* Insert new batch_size: replaces the non-referenced elements */ key = 2 * tgt_free + 1; end_key = key + batch_size; for (; key < end_key; key++) { -- cgit v1.2.3 From 4d13c6c449af374fbcd0580764a216668d970d26 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Jun 2025 16:17:20 -0700 Subject: selftests: drv-net: test RSS Netlink notifications Test that changing the RSS config generates Netlink notifications. # ./tools/testing/selftests/drivers/net/hw/rss_api.py TAP version 13 1..2 ok 1 rss_api.test_rxfh_indir_ntf ok 2 rss_api.test_rxfh_indir_ctx_ntf # Totals: pass:2 fail:0 xfail:0 xpass:0 skip:0 error:0 Link: https://patch.msgid.link/20250623231720.3124717-9-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/rss_api.py | 89 +++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/hw/rss_api.py (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/hw/rss_api.py b/tools/testing/selftests/drivers/net/hw/rss_api.py new file mode 100755 index 000000000000..db0f723a674b --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/rss_api.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +API level tests for RSS (mostly Netlink vs IOCTL). +""" + +import glob +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_is, ksft_ne +from lib.py import KsftSkipEx, KsftFailEx +from lib.py import defer, ethtool +from lib.py import EthtoolFamily +from lib.py import NetDrvEnv + + +def _ethtool_create(cfg, act, opts): + output = ethtool(f"{act} {cfg.ifname} {opts}").stdout + # Output will be something like: "New RSS context is 1" or + # "Added rule with ID 7", we want the integer from the end + return int(output.split()[-1]) + + +def test_rxfh_indir_ntf(cfg): + """ + Check that Netlink notifications are generated when RSS indirection + table was modified. + """ + + qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*")) + if qcnt < 2: + raise KsftSkipEx(f"Local has only {qcnt} queues") + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + ethtool(f"--disable-netlink -X {cfg.ifname} weight 0 1") + reset = defer(ethtool, f"-X {cfg.ifname} default") + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received") + ksft_eq(ntf["name"], "rss-ntf") + ksft_eq(set(ntf["msg"]["indir"]), {1}) + + reset.exec() + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received after reset") + ksft_eq(ntf["name"], "rss-ntf") + ksft_is(ntf["msg"].get("context"), None) + ksft_ne(set(ntf["msg"]["indir"]), {1}) + + +def test_rxfh_indir_ctx_ntf(cfg): + """ + Check that Netlink notifications are generated when RSS indirection + table was modified on an additional RSS context. + """ + + qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*")) + if qcnt < 2: + raise KsftSkipEx(f"Local has only {qcnt} queues") + + ctx_id = _ethtool_create(cfg, "-X", "context new") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + ethtool(f"--disable-netlink -X {cfg.ifname} context {ctx_id} weight 0 1") + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received") + ksft_eq(ntf["name"], "rss-ntf") + ksft_eq(ntf["msg"].get("context"), ctx_id) + ksft_eq(set(ntf["msg"]["indir"]), {1}) + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEnv(__file__, nsim_test=False) as cfg: + ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() -- cgit v1.2.3 From 07caaf875c937e5f0a262a1dbad307d08ecc8673 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 24 Jun 2025 14:09:55 -0700 Subject: netlink: specs: ethtool: replace underscores with dashes in names We're trying to add a strict regexp for the name format in the spec. Underscores will not be allowed, dashes should be used instead. This makes no difference to C (codegen replaces special chars in names) but gives more uniform naming in Python. Fixes: 13e59344fb9d ("net: ethtool: add support for symmetric-xor RSS hash") Fixes: 46fb3ba95b93 ("ethtool: Add an interface for flashing transceiver modules' firmware") Reviewed-by: Kory Maincent Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20250624211002.3475021-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ethtool.yaml | 6 +++--- tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools/testing/selftests') diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 72a076b0e1b5..348c6ad548f5 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -48,7 +48,7 @@ definitions: name: started doc: The firmware flashing process has started. - - name: in_progress + name: in-progress doc: The firmware flashing process is in progress. - name: completed @@ -1422,7 +1422,7 @@ attribute-sets: name: hkey type: binary - - name: input_xfrm + name: input-xfrm type: u32 - name: start-context @@ -2238,7 +2238,7 @@ operations: - hfunc - indir - hkey - - input_xfrm + - input-xfrm dump: request: attributes: diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py index f439c434ba36..648ff50bc1c3 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py +++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py @@ -38,7 +38,7 @@ def test_rss_input_xfrm(cfg, ipver): raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11") input_xfrm = cfg.ethnl.rss_get( - {'header': {'dev-name': cfg.ifname}}).get('input_xfrm') + {'header': {'dev-name': cfg.ifname}}).get('input-xfrm') # Check for symmetric xor/or-xor if not input_xfrm or (input_xfrm != 1 and input_xfrm != 2): -- cgit v1.2.3 From 7c942f87cc0be5699b1ce434d369eccd8b5321d4 Mon Sep 17 00:00:00 2001 From: Dev Jain Date: Fri, 20 Jun 2025 16:41:50 +0530 Subject: selftests/mm: fix validate_addr() helper validate_addr() checks whether the address returned by mmap() lies in the low or high VA space, according to whether a high addr hint was passed or not. The fix commit mentioned below changed the code in such a way that this function will always return failure when passed high_addr == 1; addr will be >= HIGH_ADDR_MARK always, we will fall down to "if (addr > HIGH_ADDR_MARK)" and return failure. Fix this. Link: https://lkml.kernel.org/r/20250620111150.50344-1-dev.jain@arm.com Fixes: d1d86ce28d0f ("selftests/mm: virtual_address_range: conform to TAP format output") Signed-off-by: Dev Jain Reviewed-by: Donet Tom Acked-by: David Hildenbrand Cc: Anshuman Khandual Cc: Lorenzo Stoakes Cc: Ryan Roberts Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/virtual_address_range.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c index b380e102b22f..169dbd692bf5 100644 --- a/tools/testing/selftests/mm/virtual_address_range.c +++ b/tools/testing/selftests/mm/virtual_address_range.c @@ -77,8 +77,11 @@ static void validate_addr(char *ptr, int high_addr) { unsigned long addr = (unsigned long) ptr; - if (high_addr && addr < HIGH_ADDR_MARK) - ksft_exit_fail_msg("Bad address %lx\n", addr); + if (high_addr) { + if (addr < HIGH_ADDR_MARK) + ksft_exit_fail_msg("Bad address %lx\n", addr); + return; + } if (addr > HIGH_ADDR_MARK) ksft_exit_fail_msg("Bad address %lx\n", addr); -- cgit v1.2.3 From a55b7d39328bc6de1131cb5496816129cab2728b Mon Sep 17 00:00:00 2001 From: Viktor Malik Date: Thu, 26 Jun 2025 08:08:30 +0200 Subject: selftests/bpf: Allow macros in __retval Allow macro expansion for values passed to the `__retval` and `__retval_unpriv` attributes. This is especially useful for testing programs which return various error codes. With this change, the code for parsing special literals can be made simpler, as the literals are defined via macros. The only exception is INT_MIN which expands to (-INT_MAX -1), which is not single number and cannot be parsed by strtol. So, we instead use a prefixed literal _INT_MIN in __retval and handle it separately (assign the expected return to INT_MIN). Also, strtol cannot handle the "ll" suffix so change the value of POINTER_VALUE from 0xcafe4all to 0xbadcafe. Signed-off-by: Viktor Malik Link: https://lore.kernel.org/r/a6c6b551ae0575351faa7b7a1df52f9341a5cbe8.1750917800.git.vmalik@redhat.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/bpf_misc.h | 14 +++++++------ .../selftests/bpf/progs/verifier_div_overflow.c | 4 ++-- tools/testing/selftests/bpf/test_loader.c | 24 ++++++++-------------- 3 files changed, 19 insertions(+), 23 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index a678463e972c..20dce508d8e0 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -83,9 +83,11 @@ * expect return value to match passed parameter: * - a decimal number * - a hexadecimal number, when starts from 0x - * - literal INT_MIN - * - literal POINTER_VALUE (see definition below) - * - literal TEST_DATA_LEN (see definition below) + * - a macro which expands to one of the above + * - literal _INT_MIN (expands to INT_MIN) + * In addition, two special macros are defined below: + * - POINTER_VALUE + * - TEST_DATA_LEN * __retval_unpriv Same, but load program in unprivileged mode. * * __description Text to be used instead of a program name for display @@ -125,8 +127,8 @@ #define __success_unpriv __attribute__((btf_decl_tag("comment:test_expect_success_unpriv"))) #define __log_level(lvl) __attribute__((btf_decl_tag("comment:test_log_level="#lvl))) #define __flag(flag) __attribute__((btf_decl_tag("comment:test_prog_flags="#flag))) -#define __retval(val) __attribute__((btf_decl_tag("comment:test_retval="#val))) -#define __retval_unpriv(val) __attribute__((btf_decl_tag("comment:test_retval_unpriv="#val))) +#define __retval(val) __attribute__((btf_decl_tag("comment:test_retval="XSTR(val)))) +#define __retval_unpriv(val) __attribute__((btf_decl_tag("comment:test_retval_unpriv="XSTR(val)))) #define __auxiliary __attribute__((btf_decl_tag("comment:test_auxiliary"))) #define __auxiliary_unpriv __attribute__((btf_decl_tag("comment:test_auxiliary_unpriv"))) #define __btf_path(path) __attribute__((btf_decl_tag("comment:test_btf_path=" path))) @@ -155,7 +157,7 @@ #define __imm_insn(name, expr) [name]"i"(*(long *)&(expr)) /* Magic constants used with __retval() */ -#define POINTER_VALUE 0xcafe4all +#define POINTER_VALUE 0xbadcafe #define TEST_DATA_LEN 64 #ifndef __used diff --git a/tools/testing/selftests/bpf/progs/verifier_div_overflow.c b/tools/testing/selftests/bpf/progs/verifier_div_overflow.c index 458984da804c..34e0c012ee76 100644 --- a/tools/testing/selftests/bpf/progs/verifier_div_overflow.c +++ b/tools/testing/selftests/bpf/progs/verifier_div_overflow.c @@ -77,7 +77,7 @@ l0_%=: exit; \ SEC("tc") __description("MOD32 overflow, check 1") -__success __retval(INT_MIN) +__success __retval(_INT_MIN) __naked void mod32_overflow_check_1(void) { asm volatile (" \ @@ -92,7 +92,7 @@ __naked void mod32_overflow_check_1(void) SEC("tc") __description("MOD32 overflow, check 2") -__success __retval(INT_MIN) +__success __retval(_INT_MIN) __naked void mod32_overflow_check_2(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 2c7e9729d5fe..78423cf89e01 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -40,7 +40,7 @@ #define TEST_TAG_LOAD_MODE_PFX "comment:load_mode=" /* Warning: duplicated in bpf_misc.h */ -#define POINTER_VALUE 0xcafe4all +#define POINTER_VALUE 0xbadcafe #define TEST_DATA_LEN 64 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS @@ -318,20 +318,14 @@ static int parse_caps(const char *str, __u64 *val, const char *name) static int parse_retval(const char *str, int *val, const char *name) { - struct { - char *name; - int val; - } named_values[] = { - { "INT_MIN" , INT_MIN }, - { "POINTER_VALUE", POINTER_VALUE }, - { "TEST_DATA_LEN", TEST_DATA_LEN }, - }; - int i; - - for (i = 0; i < ARRAY_SIZE(named_values); ++i) { - if (strcmp(str, named_values[i].name) != 0) - continue; - *val = named_values[i].val; + /* + * INT_MIN is defined as (-INT_MAX -1), i.e. it doesn't expand to a + * single int and cannot be parsed with strtol, so we handle it + * separately here. In addition, it expands to different expressions in + * different compilers so we use a prefixed _INT_MIN instead. + */ + if (strcmp(str, "_INT_MIN") == 0) { + *val = INT_MIN; return 0; } -- cgit v1.2.3 From e8763fb66a386843f091925dab757c4f55633d0b Mon Sep 17 00:00:00 2001 From: Viktor Malik Date: Thu, 26 Jun 2025 08:08:31 +0200 Subject: selftests/bpf: Add tests for string kfuncs Add both positive and negative tests cases using string kfuncs added in the previous patches. Positive tests check that the functions work as expected. Negative tests pass various incorrect strings to the kfuncs and check for the expected error codes: -E2BIG when passing too long strings -EFAULT when trying to read inaccessible kernel memory -ERANGE when passing userspace pointers on arches with non-overlapping address spaces A majority of the tests use the RUN_TESTS helper which executes BPF programs with BPF_PROG_TEST_RUN and check for the expected return value. An exception to this are tests for long strings as we need to memset the long string from userspace (at least I haven't found an ergonomic way to memset it from a BPF program), which cannot be done using the RUN_TESTS infrastructure. Suggested-by: Eduard Zingerman Signed-off-by: Viktor Malik Link: https://lore.kernel.org/r/090451a2e60c9ae1dceb4d1bfafa3479db5c7481.1750917800.git.vmalik@redhat.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/string_kfuncs.c | 65 ++++++++++++++++ .../selftests/bpf/progs/string_kfuncs_failure1.c | 87 ++++++++++++++++++++++ .../selftests/bpf/progs/string_kfuncs_failure2.c | 23 ++++++ .../selftests/bpf/progs/string_kfuncs_success.c | 37 +++++++++ 4 files changed, 212 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/string_kfuncs.c create mode 100644 tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c create mode 100644 tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c create mode 100644 tools/testing/selftests/bpf/progs/string_kfuncs_success.c (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c new file mode 100644 index 000000000000..35af8044d059 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025 Red Hat, Inc.*/ +#include +#include "string_kfuncs_success.skel.h" +#include "string_kfuncs_failure1.skel.h" +#include "string_kfuncs_failure2.skel.h" +#include + +static const char * const test_cases[] = { + "strcmp", + "strchr", + "strchrnul", + "strnchr", + "strrchr", + "strlen", + "strnlen", + "strspn_str", + "strspn_accept", + "strcspn_str", + "strcspn_reject", + "strstr", + "strnstr", +}; + +void run_too_long_tests(void) +{ + struct string_kfuncs_failure2 *skel; + struct bpf_program *prog; + char test_name[256]; + int err, i; + + skel = string_kfuncs_failure2__open_and_load(); + if (!ASSERT_OK_PTR(skel, "string_kfuncs_failure2__open_and_load")) + return; + + memset(skel->bss->long_str, 'a', sizeof(skel->bss->long_str)); + + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { + sprintf(test_name, "test_%s_too_long", test_cases[i]); + if (!test__start_subtest(test_name)) + continue; + + prog = bpf_object__find_program_by_name(skel->obj, test_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + + LIBBPF_OPTS(bpf_test_run_opts, topts); + err = bpf_prog_test_run_opts(bpf_program__fd(prog), &topts); + if (!ASSERT_OK(err, "bpf_prog_test_run")) + goto cleanup; + + ASSERT_EQ(topts.retval, -E2BIG, "reading too long string fails with -E2BIG"); + } + +cleanup: + string_kfuncs_failure2__destroy(skel); +} + +void test_string_kfuncs(void) +{ + RUN_TESTS(string_kfuncs_success); + RUN_TESTS(string_kfuncs_failure1); + + run_too_long_tests(); +} diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c new file mode 100644 index 000000000000..53af438bd998 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025 Red Hat, Inc.*/ +#include "vmlinux.h" +#include +#include +#include "bpf_misc.h" +#include "errno.h" + +char *user_ptr = (char *)1; +char *invalid_kern_ptr = (char *)-1; + +/* + * When passing userspace pointers, the error code differs based on arch: + * -ERANGE on arches with non-overlapping address spaces + * -EFAULT on other arches + */ +#if defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_loongarch) || \ + defined(__TARGET_ARCH_powerpc) || defined(__TARGET_ARCH_x86) +#define USER_PTR_ERR -ERANGE +#else +#define USER_PTR_ERR -EFAULT +#endif + +/* + * On s390, __get_kernel_nofault (used in string kfuncs) returns 0 for NULL and + * user_ptr (instead of causing an exception) so the below two groups of tests + * are not applicable. + */ +#ifndef __TARGET_ARCH_s390 + +/* Passing NULL to string kfuncs (treated as a userspace ptr) */ +SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_null1(void *ctx) { return bpf_strcmp(NULL, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strcmp_null2(void *ctx) { return bpf_strcmp("hello", NULL); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strchr_null(void *ctx) { return bpf_strchr(NULL, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strchrnul_null(void *ctx) { return bpf_strchrnul(NULL, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strnchr_null(void *ctx) { return bpf_strnchr(NULL, 1, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strrchr_null(void *ctx) { return bpf_strrchr(NULL, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strlen_null(void *ctx) { return bpf_strlen(NULL); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strnlen_null(void *ctx) { return bpf_strnlen(NULL, 1); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strspn_null1(void *ctx) { return bpf_strspn(NULL, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strspn_null2(void *ctx) { return bpf_strspn("hello", NULL); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strcspn_null1(void *ctx) { return bpf_strcspn(NULL, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strcspn_null2(void *ctx) { return bpf_strcspn("hello", NULL); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strstr_null1(void *ctx) { return bpf_strstr(NULL, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strstr_null2(void *ctx) { return bpf_strstr("hello", NULL); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strnstr_null1(void *ctx) { return bpf_strnstr(NULL, "hello", 1); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strnstr_null2(void *ctx) { return bpf_strnstr("hello", NULL, 1); } + +/* Passing userspace ptr to string kfuncs */ +SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr1(void *ctx) { return bpf_strcmp(user_ptr, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr2(void *ctx) { return bpf_strcmp("hello", user_ptr); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strchr_user_ptr(void *ctx) { return bpf_strchr(user_ptr, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strchrnul_user_ptr(void *ctx) { return bpf_strchrnul(user_ptr, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strnchr_user_ptr(void *ctx) { return bpf_strnchr(user_ptr, 1, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strrchr_user_ptr(void *ctx) { return bpf_strrchr(user_ptr, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strlen_user_ptr(void *ctx) { return bpf_strlen(user_ptr); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strnlen_user_ptr(void *ctx) { return bpf_strnlen(user_ptr, 1); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strspn_user_ptr1(void *ctx) { return bpf_strspn(user_ptr, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strspn_user_ptr2(void *ctx) { return bpf_strspn("hello", user_ptr); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcspn_user_ptr1(void *ctx) { return bpf_strcspn(user_ptr, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcspn_user_ptr2(void *ctx) { return bpf_strcspn("hello", user_ptr); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strstr_user_ptr1(void *ctx) { return bpf_strstr(user_ptr, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strstr_user_ptr2(void *ctx) { return bpf_strstr("hello", user_ptr); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr1(void *ctx) { return bpf_strnstr(user_ptr, "hello", 1); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr2(void *ctx) { return bpf_strnstr("hello", user_ptr, 1); } + +#endif /* __TARGET_ARCH_s390 */ + +/* Passing invalid kernel ptr to string kfuncs should always return -EFAULT */ +SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault1(void *ctx) { return bpf_strcmp(invalid_kern_ptr, "hello"); } +SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault2(void *ctx) { return bpf_strcmp("hello", invalid_kern_ptr); } +SEC("syscall") __retval(-EFAULT) int test_strchr_pagefault(void *ctx) { return bpf_strchr(invalid_kern_ptr, 'a'); } +SEC("syscall") __retval(-EFAULT) int test_strchrnul_pagefault(void *ctx) { return bpf_strchrnul(invalid_kern_ptr, 'a'); } +SEC("syscall") __retval(-EFAULT) int test_strnchr_pagefault(void *ctx) { return bpf_strnchr(invalid_kern_ptr, 1, 'a'); } +SEC("syscall") __retval(-EFAULT) int test_strrchr_pagefault(void *ctx) { return bpf_strrchr(invalid_kern_ptr, 'a'); } +SEC("syscall") __retval(-EFAULT) int test_strlen_pagefault(void *ctx) { return bpf_strlen(invalid_kern_ptr); } +SEC("syscall") __retval(-EFAULT) int test_strnlen_pagefault(void *ctx) { return bpf_strnlen(invalid_kern_ptr, 1); } +SEC("syscall") __retval(-EFAULT) int test_strspn_pagefault1(void *ctx) { return bpf_strspn(invalid_kern_ptr, "hello"); } +SEC("syscall") __retval(-EFAULT) int test_strspn_pagefault2(void *ctx) { return bpf_strspn("hello", invalid_kern_ptr); } +SEC("syscall") __retval(-EFAULT) int test_strcspn_pagefault1(void *ctx) { return bpf_strcspn(invalid_kern_ptr, "hello"); } +SEC("syscall") __retval(-EFAULT) int test_strcspn_pagefault2(void *ctx) { return bpf_strcspn("hello", invalid_kern_ptr); } +SEC("syscall") __retval(-EFAULT) int test_strstr_pagefault1(void *ctx) { return bpf_strstr(invalid_kern_ptr, "hello"); } +SEC("syscall") __retval(-EFAULT) int test_strstr_pagefault2(void *ctx) { return bpf_strstr("hello", invalid_kern_ptr); } +SEC("syscall") __retval(-EFAULT) int test_strnstr_pagefault1(void *ctx) { return bpf_strnstr(invalid_kern_ptr, "hello", 1); } +SEC("syscall") __retval(-EFAULT) int test_strnstr_pagefault2(void *ctx) { return bpf_strnstr("hello", invalid_kern_ptr, 1); } + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c new file mode 100644 index 000000000000..89fb4669b0e9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025 Red Hat, Inc.*/ +#include "vmlinux.h" +#include +#include + +char long_str[XATTR_SIZE_MAX + 1]; + +SEC("syscall") int test_strcmp_too_long(void *ctx) { return bpf_strcmp(long_str, long_str); } +SEC("syscall") int test_strchr_too_long(void *ctx) { return bpf_strchr(long_str, 'b'); } +SEC("syscall") int test_strchrnul_too_long(void *ctx) { return bpf_strchrnul(long_str, 'b'); } +SEC("syscall") int test_strnchr_too_long(void *ctx) { return bpf_strnchr(long_str, sizeof(long_str), 'b'); } +SEC("syscall") int test_strrchr_too_long(void *ctx) { return bpf_strrchr(long_str, 'b'); } +SEC("syscall") int test_strlen_too_long(void *ctx) { return bpf_strlen(long_str); } +SEC("syscall") int test_strnlen_too_long(void *ctx) { return bpf_strnlen(long_str, sizeof(long_str)); } +SEC("syscall") int test_strspn_str_too_long(void *ctx) { return bpf_strspn(long_str, "a"); } +SEC("syscall") int test_strspn_accept_too_long(void *ctx) { return bpf_strspn("b", long_str); } +SEC("syscall") int test_strcspn_str_too_long(void *ctx) { return bpf_strcspn(long_str, "b"); } +SEC("syscall") int test_strcspn_reject_too_long(void *ctx) { return bpf_strcspn("b", long_str); } +SEC("syscall") int test_strstr_too_long(void *ctx) { return bpf_strstr(long_str, "hello"); } +SEC("syscall") int test_strnstr_too_long(void *ctx) { return bpf_strnstr(long_str, "hello", sizeof(long_str)); } + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c new file mode 100644 index 000000000000..46697f381878 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025 Red Hat, Inc.*/ +#include "vmlinux.h" +#include +#include "bpf_misc.h" +#include "errno.h" + +char str[] = "hello world"; + +#define __test(retval) SEC("syscall") __success __retval(retval) + +/* Functional tests */ +__test(0) int test_strcmp_eq(void *ctx) { return bpf_strcmp(str, "hello world"); } +__test(1) int test_strcmp_neq(void *ctx) { return bpf_strcmp(str, "hello"); } +__test(1) int test_strchr_found(void *ctx) { return bpf_strchr(str, 'e'); } +__test(11) int test_strchr_null(void *ctx) { return bpf_strchr(str, '\0'); } +__test(-ENOENT) int test_strchr_notfound(void *ctx) { return bpf_strchr(str, 'x'); } +__test(1) int test_strchrnul_found(void *ctx) { return bpf_strchrnul(str, 'e'); } +__test(11) int test_strchrnul_notfound(void *ctx) { return bpf_strchrnul(str, 'x'); } +__test(1) int test_strnchr_found(void *ctx) { return bpf_strnchr(str, 5, 'e'); } +__test(11) int test_strnchr_null(void *ctx) { return bpf_strnchr(str, 12, '\0'); } +__test(-ENOENT) int test_strnchr_notfound(void *ctx) { return bpf_strnchr(str, 5, 'w'); } +__test(9) int test_strrchr_found(void *ctx) { return bpf_strrchr(str, 'l'); } +__test(11) int test_strrchr_null(void *ctx) { return bpf_strrchr(str, '\0'); } +__test(-ENOENT) int test_strrchr_notfound(void *ctx) { return bpf_strrchr(str, 'x'); } +__test(11) int test_strlen(void *ctx) { return bpf_strlen(str); } +__test(11) int test_strnlen(void *ctx) { return bpf_strnlen(str, 12); } +__test(5) int test_strspn(void *ctx) { return bpf_strspn(str, "ehlo"); } +__test(2) int test_strcspn(void *ctx) { return bpf_strcspn(str, "lo"); } +__test(6) int test_strstr_found(void *ctx) { return bpf_strstr(str, "world"); } +__test(-ENOENT) int test_strstr_notfound(void *ctx) { return bpf_strstr(str, "hi"); } +__test(0) int test_strstr_empty(void *ctx) { return bpf_strstr(str, ""); } +__test(0) int test_strnstr_found(void *ctx) { return bpf_strnstr(str, "hello", 6); } +__test(-ENOENT) int test_strnstr_notfound(void *ctx) { return bpf_strnstr(str, "hi", 10); } +__test(0) int test_strnstr_empty(void *ctx) { return bpf_strnstr(str, "", 1); } + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From be898cb5cbf4dede2f760303d73f5a427107282a Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Wed, 25 Jun 2025 17:59:02 +0100 Subject: selftests/bpf: Separate var preset parsing in veristat Refactor var preset parsing in veristat to simplify implementation. Prepare parsed variable beforehand so that parsing logic is separated from functionality of calculating offsets and searching fields. Introduce rvalue struct, storing either int or enum (string value), will be reused in the next patch, extract parsing rvalue into a separate function. Signed-off-by: Mykyta Yatsenko Signed-off-by: Andrii Nakryiko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20250625165904.87820-2-mykyta.yatsenko5@gmail.com --- tools/testing/selftests/bpf/veristat.c | 152 +++++++++++++++++++++------------ 1 file changed, 99 insertions(+), 53 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 4da627ca5749..4049d4dda97a 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -156,13 +156,23 @@ struct filter { bool abs; }; -struct var_preset { - char *name; +struct rvalue { enum { INTEGRAL, ENUMERATOR } type; union { long long ivalue; char *svalue; }; +}; + +struct field_access { + char *name; +}; + +struct var_preset { + struct field_access *atoms; + int atom_count; + char *full_name; + struct rvalue value; bool applied; }; @@ -1498,6 +1508,35 @@ static int reset_stat_cgroup(void) return 0; } +static int parse_rvalue(const char *val, struct rvalue *rvalue) +{ + long long value; + char *val_end; + + if (val[0] == '-' || isdigit(val[0])) { + /* must be a number */ + errno = 0; + value = strtoll(val, &val_end, 0); + if (errno == ERANGE) { + errno = 0; + value = strtoull(val, &val_end, 0); + } + if (errno || *val_end != '\0') { + fprintf(stderr, "Failed to parse value '%s'\n", val); + return -EINVAL; + } + rvalue->ivalue = value; + rvalue->type = INTEGRAL; + } else { + /* if not a number, consider it enum value */ + rvalue->svalue = strdup(val); + if (!rvalue->svalue) + return -ENOMEM; + rvalue->type = ENUMERATOR; + } + return 0; +} + static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog) { const char *base_filename = basename(strdupa(filename)); @@ -1593,13 +1632,36 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf return 0; }; +static int parse_var_atoms(const char *full_var, struct var_preset *preset) +{ + char expr[256], *name, *saveptr; + + snprintf(expr, sizeof(expr), "%s", full_var); + preset->atom_count = 0; + while ((name = strtok_r(preset->atom_count ? NULL : expr, ".", &saveptr))) { + struct field_access *tmp; + int i = preset->atom_count; + + tmp = reallocarray(preset->atoms, i + 1, sizeof(*preset->atoms)); + if (!tmp) + return -ENOMEM; + + preset->atoms = tmp; + preset->atom_count++; + + preset->atoms[i].name = strdup(name); + if (!preset->atoms[i].name) + return -ENOMEM; + } + return 0; +} + static int append_var_preset(struct var_preset **presets, int *cnt, const char *expr) { void *tmp; struct var_preset *cur; - char var[256], val[256], *val_end; - long long value; - int n; + char var[256], val[256]; + int n, err; tmp = realloc(*presets, (*cnt + 1) * sizeof(**presets)); if (!tmp) @@ -1614,32 +1676,18 @@ static int append_var_preset(struct var_preset **presets, int *cnt, const char * return -EINVAL; } - if (val[0] == '-' || isdigit(val[0])) { - /* must be a number */ - errno = 0; - value = strtoll(val, &val_end, 0); - if (errno == ERANGE) { - errno = 0; - value = strtoull(val, &val_end, 0); - } - if (errno || *val_end != '\0') { - fprintf(stderr, "Failed to parse value '%s'\n", val); - return -EINVAL; - } - cur->ivalue = value; - cur->type = INTEGRAL; - } else { - /* if not a number, consider it enum value */ - cur->svalue = strdup(val); - if (!cur->svalue) - return -ENOMEM; - cur->type = ENUMERATOR; - } + err = parse_rvalue(val, &cur->value); + if (err) + return err; - cur->name = strdup(var); - if (!cur->name) + cur->full_name = strdup(var); + if (!cur->full_name) return -ENOMEM; + err = parse_var_atoms(var, cur); + if (err) + return err; + return 0; } @@ -1766,22 +1814,20 @@ const int btf_find_member(const struct btf *btf, } static int adjust_var_secinfo(struct btf *btf, const struct btf_type *t, - struct btf_var_secinfo *sinfo, const char *var) + struct btf_var_secinfo *sinfo, struct var_preset *preset) { - char expr[256], *saveptr; const struct btf_type *base_type, *member_type; - int err, member_tid; - char *name; + int err, member_tid, i; __u32 member_offset = 0; base_type = btf__type_by_id(btf, btf__resolve_type(btf, t->type)); - snprintf(expr, sizeof(expr), "%s", var); - strtok_r(expr, ".", &saveptr); - while ((name = strtok_r(NULL, ".", &saveptr))) { - err = btf_find_member(btf, base_type, 0, name, &member_tid, &member_offset); + for (i = 1; i < preset->atom_count; ++i) { + err = btf_find_member(btf, base_type, 0, preset->atoms[i].name, + &member_tid, &member_offset); if (err) { - fprintf(stderr, "Could not find member %s for variable %s\n", name, var); + fprintf(stderr, "Could not find member %s for variable %s\n", + preset->atoms[i].name, preset->atoms[i - 1].name); return err; } member_type = btf__type_by_id(btf, member_tid); @@ -1799,7 +1845,7 @@ static int set_global_var(struct bpf_object *obj, struct btf *btf, { const struct btf_type *base_type; void *ptr; - long long value = preset->ivalue; + long long value = preset->value.ivalue; size_t size; base_type = btf__type_by_id(btf, btf__resolve_type(btf, sinfo->type)); @@ -1813,17 +1859,18 @@ static int set_global_var(struct bpf_object *obj, struct btf *btf, return -EINVAL; } - if (preset->type == ENUMERATOR) { + if (preset->value.type == ENUMERATOR) { if (btf_is_any_enum(base_type)) { - if (enum_value_from_name(btf, base_type, preset->svalue, &value)) { + if (enum_value_from_name(btf, base_type, preset->value.svalue, &value)) { fprintf(stderr, "Failed to find integer value for enum element %s\n", - preset->svalue); + preset->value.svalue); return -EINVAL; } } else { fprintf(stderr, "Value %s is not supported for type %s\n", - preset->svalue, btf__name_by_offset(btf, base_type->name_off)); + preset->value.svalue, + btf__name_by_offset(btf, base_type->name_off)); return -EINVAL; } } @@ -1890,20 +1937,16 @@ static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, i for (j = 0; j < n; ++j, ++sinfo) { const struct btf_type *var_type = btf__type_by_id(btf, sinfo->type); const char *var_name; - int var_len; if (!btf_is_var(var_type)) continue; var_name = btf__name_by_offset(btf, var_type->name_off); - var_len = strlen(var_name); for (k = 0; k < npresets; ++k) { struct btf_var_secinfo tmp_sinfo; - if (strncmp(var_name, presets[k].name, var_len) != 0 || - (presets[k].name[var_len] != '\0' && - presets[k].name[var_len] != '.')) + if (strcmp(var_name, presets[k].atoms[0].name) != 0) continue; if (presets[k].applied) { @@ -1913,7 +1956,7 @@ static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, i } tmp_sinfo = *sinfo; err = adjust_var_secinfo(btf, var_type, - &tmp_sinfo, presets[k].name); + &tmp_sinfo, presets + k); if (err) return err; @@ -1928,7 +1971,7 @@ static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, i for (i = 0; i < npresets; ++i) { if (!presets[i].applied) { fprintf(stderr, "Global variable preset %s has not been applied\n", - presets[i].name); + presets[i].full_name); } presets[i].applied = false; } @@ -3062,7 +3105,7 @@ static int handle_replay_mode(void) int main(int argc, char **argv) { - int err = 0, i; + int err = 0, i, j; if (argp_parse(&argp, argc, argv, 0, NULL, NULL)) return 1; @@ -3121,9 +3164,12 @@ int main(int argc, char **argv) } free(env.deny_filters); for (i = 0; i < env.npresets; ++i) { - free(env.presets[i].name); - if (env.presets[i].type == ENUMERATOR) - free(env.presets[i].svalue); + free(env.presets[i].full_name); + for (j = 0; j < env.presets[i].atom_count; ++j) + free(env.presets[i].atoms[j].name); + free(env.presets[i].atoms); + if (env.presets[i].value.type == ENUMERATOR) + free(env.presets[i].value.svalue); } free(env.presets); return -err; -- cgit v1.2.3 From edc99d0b021c1b345af3a27150d5d23799c1bc25 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Wed, 25 Jun 2025 17:59:03 +0100 Subject: selftests/bpf: Support array presets in veristat Implement support for presetting values for array elements in veristat. For example: ``` sudo ./veristat set_global_vars.bpf.o -G "arr[3] = 1" ``` Arrays of structures and structure of arrays work, but each individual scalar value has to be set separately: `foo[1].bar[2] = value`. Signed-off-by: Mykyta Yatsenko Signed-off-by: Andrii Nakryiko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20250625165904.87820-3-mykyta.yatsenko5@gmail.com --- tools/testing/selftests/bpf/veristat.c | 239 ++++++++++++++++++++++++++------- 1 file changed, 189 insertions(+), 50 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 4049d4dda97a..1e9f61f9fd0a 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -165,7 +165,11 @@ struct rvalue { }; struct field_access { - char *name; + enum { FIELD_NAME, ARRAY_INDEX } type; + union { + char *name; + struct rvalue index; + }; }; struct var_preset { @@ -1630,28 +1634,60 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf free(buf); return 0; -}; +} + +static int append_preset_atom(struct var_preset *preset, char *value, bool is_index) +{ + struct field_access *tmp; + int i = preset->atom_count; + int err; + + tmp = reallocarray(preset->atoms, i + 1, sizeof(*preset->atoms)); + if (!tmp) + return -ENOMEM; + + preset->atoms = tmp; + preset->atom_count++; + + if (is_index) { + preset->atoms[i].type = ARRAY_INDEX; + err = parse_rvalue(value, &preset->atoms[i].index); + if (err) + return err; + } else { + preset->atoms[i].type = FIELD_NAME; + preset->atoms[i].name = strdup(value); + if (!preset->atoms[i].name) + return -ENOMEM; + } + return 0; +} static int parse_var_atoms(const char *full_var, struct var_preset *preset) { - char expr[256], *name, *saveptr; + char expr[256], var[256], *name, *saveptr; + int n, len, off; snprintf(expr, sizeof(expr), "%s", full_var); preset->atom_count = 0; while ((name = strtok_r(preset->atom_count ? NULL : expr, ".", &saveptr))) { - struct field_access *tmp; - int i = preset->atom_count; - - tmp = reallocarray(preset->atoms, i + 1, sizeof(*preset->atoms)); - if (!tmp) - return -ENOMEM; - - preset->atoms = tmp; - preset->atom_count++; + len = strlen(name); + /* parse variable name */ + if (sscanf(name, "%[a-zA-Z0-9_] %n", var, &off) != 1) { + fprintf(stderr, "Can't parse %s\n", name); + return -EINVAL; + } + append_preset_atom(preset, var, false); - preset->atoms[i].name = strdup(name); - if (!preset->atoms[i].name) - return -ENOMEM; + /* parse optional array indexes */ + while (off < len) { + if (sscanf(name + off, " [ %[a-zA-Z0-9_] ] %n", var, &n) != 1) { + fprintf(stderr, "Can't parse %s as index\n", name + off); + return -EINVAL; + } + append_preset_atom(preset, var, true); + off += n; + } } return 0; } @@ -1661,7 +1697,7 @@ static int append_var_preset(struct var_preset **presets, int *cnt, const char * void *tmp; struct var_preset *cur; char var[256], val[256]; - int n, err; + int n, err, i; tmp = realloc(*presets, (*cnt + 1) * sizeof(**presets)); if (!tmp) @@ -1671,10 +1707,16 @@ static int append_var_preset(struct var_preset **presets, int *cnt, const char * memset(cur, 0, sizeof(*cur)); (*cnt)++; - if (sscanf(expr, "%s = %s %n", var, val, &n) != 2 || n != strlen(expr)) { + if (sscanf(expr, " %[][a-zA-Z0-9_. ] = %s %n", var, val, &n) != 2 || n != strlen(expr)) { fprintf(stderr, "Failed to parse expression '%s'\n", expr); return -EINVAL; } + /* Remove trailing spaces from var, as scanf may add those */ + for (i = strlen(var) - 1; i > 0; --i) { + if (!isspace(var[i])) + break; + var[i] = '\0'; + } err = parse_rvalue(val, &cur->value); if (err) @@ -1764,22 +1806,96 @@ static bool is_preset_supported(const struct btf_type *t) return btf_is_int(t) || btf_is_enum(t) || btf_is_enum64(t); } -const int btf_find_member(const struct btf *btf, - const struct btf_type *parent_type, - __u32 parent_offset, - const char *member_name, - int *member_tid, - __u32 *member_offset) +static int find_enum_value(const struct btf *btf, const char *name, long long *value) +{ + const struct btf_type *t; + int cnt, i; + long long lvalue; + + cnt = btf__type_cnt(btf); + for (i = 1; i != cnt; ++i) { + t = btf__type_by_id(btf, i); + + if (!btf_is_any_enum(t)) + continue; + + if (enum_value_from_name(btf, t, name, &lvalue) == 0) { + *value = lvalue; + return 0; + } + } + return -ESRCH; +} + +static int resolve_rvalue(struct btf *btf, const struct rvalue *rvalue, long long *result) +{ + int err = 0; + + switch (rvalue->type) { + case INTEGRAL: + *result = rvalue->ivalue; + return 0; + case ENUMERATOR: + err = find_enum_value(btf, rvalue->svalue, result); + if (err) { + fprintf(stderr, "Can't resolve enum value %s\n", rvalue->svalue); + return err; + } + return 0; + default: + fprintf(stderr, "Unknown rvalue type\n"); + return -EOPNOTSUPP; + } + return 0; +} + +static int adjust_var_secinfo_array(struct btf *btf, int tid, struct field_access *atom, + const char *array_name, struct btf_var_secinfo *sinfo) +{ + const struct btf_type *t; + struct btf_array *barr; + long long idx; + int err; + + tid = btf__resolve_type(btf, tid); + t = btf__type_by_id(btf, tid); + if (!btf_is_array(t)) { + fprintf(stderr, "Array index is not expected for %s\n", + array_name); + return -EINVAL; + } + barr = btf_array(t); + err = resolve_rvalue(btf, &atom->index, &idx); + if (err) + return err; + if (idx < 0 || idx >= barr->nelems) { + fprintf(stderr, "Array index %lld is out of bounds [0, %u]: %s\n", + idx, barr->nelems, array_name); + return -EINVAL; + } + sinfo->size = btf__resolve_size(btf, barr->type); + sinfo->offset += sinfo->size * idx; + sinfo->type = btf__resolve_type(btf, barr->type); + return 0; +} + +static int adjust_var_secinfo_member(const struct btf *btf, + const struct btf_type *parent_type, + __u32 parent_offset, + const char *member_name, + struct btf_var_secinfo *sinfo) { int i; - if (!btf_is_composite(parent_type)) + if (!btf_is_composite(parent_type)) { + fprintf(stderr, "Can't resolve field %s for non-composite type\n", member_name); return -EINVAL; + } for (i = 0; i < btf_vlen(parent_type); ++i) { const struct btf_member *member; const struct btf_type *member_type; - int tid; + int tid, off; member = btf_members(parent_type) + i; tid = btf__resolve_type(btf, member->type); @@ -1787,6 +1903,7 @@ const int btf_find_member(const struct btf *btf, return -EINVAL; member_type = btf__type_by_id(btf, tid); + off = parent_offset + member->offset; if (member->name_off) { const char *name = btf__name_by_offset(btf, member->name_off); @@ -1796,15 +1913,16 @@ const int btf_find_member(const struct btf *btf, name); return -EINVAL; } - *member_offset = parent_offset + member->offset; - *member_tid = tid; + sinfo->offset += off / 8; + sinfo->type = tid; + sinfo->size = member_type->size; return 0; } } else if (btf_is_composite(member_type)) { int err; - err = btf_find_member(btf, member_type, parent_offset + member->offset, - member_name, member_tid, member_offset); + err = adjust_var_secinfo_member(btf, member_type, off, + member_name, sinfo); if (!err) return 0; } @@ -1816,26 +1934,39 @@ const int btf_find_member(const struct btf *btf, static int adjust_var_secinfo(struct btf *btf, const struct btf_type *t, struct btf_var_secinfo *sinfo, struct var_preset *preset) { - const struct btf_type *base_type, *member_type; - int err, member_tid, i; - __u32 member_offset = 0; + const struct btf_type *base_type; + const char *prev_name; + int err, i; + int tid; + + assert(preset->atom_count > 0); + assert(preset->atoms[0].type == FIELD_NAME); - base_type = btf__type_by_id(btf, btf__resolve_type(btf, t->type)); + tid = btf__resolve_type(btf, t->type); + base_type = btf__type_by_id(btf, tid); + prev_name = preset->atoms[0].name; for (i = 1; i < preset->atom_count; ++i) { - err = btf_find_member(btf, base_type, 0, preset->atoms[i].name, - &member_tid, &member_offset); - if (err) { - fprintf(stderr, "Could not find member %s for variable %s\n", - preset->atoms[i].name, preset->atoms[i - 1].name); - return err; + struct field_access *atom = preset->atoms + i; + + switch (atom->type) { + case ARRAY_INDEX: + err = adjust_var_secinfo_array(btf, tid, atom, prev_name, sinfo); + break; + case FIELD_NAME: + err = adjust_var_secinfo_member(btf, base_type, 0, atom->name, sinfo); + prev_name = atom->name; + break; + default: + fprintf(stderr, "Unknown field_access type\n"); + return -EOPNOTSUPP; } - member_type = btf__type_by_id(btf, member_tid); - sinfo->offset += member_offset / 8; - sinfo->size = member_type->size; - sinfo->type = member_tid; - base_type = member_type; + if (err) + return err; + base_type = btf__type_by_id(btf, sinfo->type); + tid = sinfo->type; } + return 0; } @@ -1854,8 +1985,8 @@ static int set_global_var(struct bpf_object *obj, struct btf *btf, return -EINVAL; } if (!is_preset_supported(base_type)) { - fprintf(stderr, "Setting value for type %s is not supported\n", - btf__name_by_offset(btf, base_type->name_off)); + fprintf(stderr, "Can't set %s. Only ints and enums are supported\n", + preset->full_name); return -EINVAL; } @@ -1972,6 +2103,7 @@ static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, i if (!presets[i].applied) { fprintf(stderr, "Global variable preset %s has not been applied\n", presets[i].full_name); + err = -EINVAL; } presets[i].applied = false; } @@ -3165,11 +3297,18 @@ int main(int argc, char **argv) free(env.deny_filters); for (i = 0; i < env.npresets; ++i) { free(env.presets[i].full_name); - for (j = 0; j < env.presets[i].atom_count; ++j) - free(env.presets[i].atoms[j].name); + for (j = 0; j < env.presets[i].atom_count; ++j) { + switch (env.presets[i].atoms[j].type) { + case FIELD_NAME: + free(env.presets[i].atoms[j].name); + break; + case ARRAY_INDEX: + if (env.presets[i].atoms[j].index.type == ENUMERATOR) + free(env.presets[i].atoms[j].index.svalue); + break; + } + } free(env.presets[i].atoms); - if (env.presets[i].value.type == ENUMERATOR) - free(env.presets[i].value.svalue); } free(env.presets); return -err; -- cgit v1.2.3 From 583588594b249e0ad3a64c68846d17018295ebe4 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Wed, 25 Jun 2025 17:59:04 +0100 Subject: selftests/bpf: Test array presets in veristat Modify existing veristat tests to verify that array presets are applied as expected. Introduce few negative tests as well to check that common error modes are handled. Signed-off-by: Mykyta Yatsenko Signed-off-by: Andrii Nakryiko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20250625165904.87820-4-mykyta.yatsenko5@gmail.com --- .../selftests/bpf/prog_tests/test_veristat.c | 127 ++++++++++++++++++++- .../testing/selftests/bpf/progs/set_global_vars.c | 56 ++++++--- 2 files changed, 159 insertions(+), 24 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/test_veristat.c b/tools/testing/selftests/bpf/prog_tests/test_veristat.c index 47b56c258f3f..367f47e4a936 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_veristat.c +++ b/tools/testing/selftests/bpf/prog_tests/test_veristat.c @@ -60,13 +60,19 @@ static void test_set_global_vars_succeeds(void) " -G \"var_s8 = -128\" "\ " -G \"var_u8 = 255\" "\ " -G \"var_ea = EA2\" "\ - " -G \"var_eb = EB2\" "\ - " -G \"var_ec = EC2\" "\ + " -G \"var_eb = EB2\" "\ + " -G \"var_ec=EC2\" "\ " -G \"var_b = 1\" "\ - " -G \"struct1.struct2.u.var_u8 = 170\" "\ + " -G \"struct1[2].struct2[1][2].u.var_u8[2]=170\" "\ " -G \"union1.struct3.var_u8_l = 0xaa\" "\ " -G \"union1.struct3.var_u8_h = 0xaa\" "\ - "-vl2 > %s", fix->veristat, fix->tmpfile); + " -G \"arr[3]= 171\" " \ + " -G \"arr[EA2] =172\" " \ + " -G \"enum_arr[EC2]=EA3\" " \ + " -G \"three_d[31][7][EA2]=173\"" \ + " -G \"struct1[2].struct2[1][2].u.mat[5][3]=174\" " \ + " -G \"struct11 [ 7 ] [ 5 ] .struct2[0][1].u.mat[3][0] = 175\" " \ + " -vl2 > %s", fix->veristat, fix->tmpfile); read(fix->fd, fix->output, fix->sz); __CHECK_STR("_w=0xf000000000000001 ", "var_s64 = 0xf000000000000001"); @@ -81,8 +87,14 @@ static void test_set_global_vars_succeeds(void) __CHECK_STR("_w=12 ", "var_eb = EB2"); __CHECK_STR("_w=13 ", "var_ec = EC2"); __CHECK_STR("_w=1 ", "var_b = 1"); - __CHECK_STR("_w=170 ", "struct1.struct2.u.var_u8 = 170"); + __CHECK_STR("_w=170 ", "struct1[2].struct2[1][2].u.var_u8[2]=170"); __CHECK_STR("_w=0xaaaa ", "union1.var_u16 = 0xaaaa"); + __CHECK_STR("_w=171 ", "arr[3]= 171"); + __CHECK_STR("_w=172 ", "arr[EA2] =172"); + __CHECK_STR("_w=10 ", "enum_arr[EC2]=EA3"); + __CHECK_STR("_w=173 ", "matrix[31][7][11]=173"); + __CHECK_STR("_w=174 ", "struct1[2].struct2[1][2].u.mat[5][3]=174"); + __CHECK_STR("_w=175 ", "struct11[7][5].struct2[0][1].u.mat[3][0]=175"); out: teardown_fixture(fix); @@ -129,6 +141,95 @@ out: teardown_fixture(fix); } +static void test_unsupported_ptr_array_type(void) +{ + struct fixture *fix = init_fixture(); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"ptr_arr[0] = 0\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + read(fix->fd, fix->output, fix->sz); + __CHECK_STR("Can't set ptr_arr[0]. Only ints and enums are supported", "ptr_arr"); + +out: + teardown_fixture(fix); +} + +static void test_array_out_of_bounds(void) +{ + struct fixture *fix = init_fixture(); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"arr[99] = 0\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + read(fix->fd, fix->output, fix->sz); + __CHECK_STR("Array index 99 is out of bounds", "arr[99]"); + +out: + teardown_fixture(fix); +} + +static void test_array_index_not_found(void) +{ + struct fixture *fix = init_fixture(); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"arr[EG2] = 0\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + read(fix->fd, fix->output, fix->sz); + __CHECK_STR("Can't resolve enum value EG2", "arr[EG2]"); + +out: + teardown_fixture(fix); +} + +static void test_array_index_for_non_array(void) +{ + struct fixture *fix = init_fixture(); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"var_b[0] = 1\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + pread(fix->fd, fix->output, fix->sz, 0); + __CHECK_STR("Array index is not expected for var_b", "var_b[0] = 1"); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"union1.struct3[0].var_u8_l=1\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + pread(fix->fd, fix->output, fix->sz, 0); + __CHECK_STR("Array index is not expected for struct3", "union1.struct3[0].var_u8_l=1"); + +out: + teardown_fixture(fix); +} + +static void test_no_array_index_for_array(void) +{ + struct fixture *fix = init_fixture(); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"arr = 1\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + pread(fix->fd, fix->output, fix->sz, 0); + __CHECK_STR("Can't set arr. Only ints and enums are supported", "arr = 1"); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"struct1[0].struct2.u.var_u8[2]=1\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + pread(fix->fd, fix->output, fix->sz, 0); + __CHECK_STR("Can't resolve field u for non-composite type", "struct1[0].struct2.u.var_u8[2]=1"); + +out: + teardown_fixture(fix); +} + void test_veristat(void) { if (test__start_subtest("set_global_vars_succeeds")) @@ -139,6 +240,22 @@ void test_veristat(void) if (test__start_subtest("set_global_vars_from_file_succeeds")) test_set_global_vars_from_file_succeeds(); + + if (test__start_subtest("test_unsupported_ptr_array_type")) + test_unsupported_ptr_array_type(); + + if (test__start_subtest("test_array_out_of_bounds")) + test_array_out_of_bounds(); + + if (test__start_subtest("test_array_index_not_found")) + test_array_index_not_found(); + + if (test__start_subtest("test_array_index_for_non_array")) + test_array_index_for_non_array(); + + if (test__start_subtest("test_no_array_index_for_array")) + test_no_array_index_for_array(); + } #undef __CHECK_STR diff --git a/tools/testing/selftests/bpf/progs/set_global_vars.c b/tools/testing/selftests/bpf/progs/set_global_vars.c index 90f5656c3991..ebaef28b2cb3 100644 --- a/tools/testing/selftests/bpf/progs/set_global_vars.c +++ b/tools/testing/selftests/bpf/progs/set_global_vars.c @@ -7,22 +7,30 @@ char _license[] SEC("license") = "GPL"; -enum Enum { EA1 = 0, EA2 = 11 }; +typedef __s32 s32; +typedef s32 i32; +typedef __u8 u8; + +enum Enum { EA1 = 0, EA2 = 11, EA3 = 10 }; enum Enumu64 {EB1 = 0llu, EB2 = 12llu }; enum Enums64 { EC1 = 0ll, EC2 = 13ll }; const volatile __s64 var_s64 = -1; const volatile __u64 var_u64 = 0; -const volatile __s32 var_s32 = -1; +const volatile i32 var_s32 = -1; const volatile __u32 var_u32 = 0; const volatile __s16 var_s16 = -1; const volatile __u16 var_u16 = 0; const volatile __s8 var_s8 = -1; -const volatile __u8 var_u8 = 0; +const volatile u8 var_u8 = 0; const volatile enum Enum var_ea = EA1; const volatile enum Enumu64 var_eb = EB1; const volatile enum Enums64 var_ec = EC1; const volatile bool var_b = false; +const volatile i32 arr[32]; +const volatile enum Enum enum_arr[32]; +const volatile i32 three_d[47][19][17]; +const volatile i32 *ptr_arr[32]; struct Struct { int:16; @@ -35,34 +43,38 @@ struct Struct { volatile struct { const int:1; union { - const volatile __u8 var_u8; + const volatile u8 var_u8[3]; const volatile __s16 filler3; const int:1; + s32 mat[7][5]; } u; }; - } struct2; + } struct2[2][4]; }; const volatile __u32 stru = 0; /* same prefix as below */ -const volatile struct Struct struct1 = {.struct2 = {.u = {.var_u8 = 1}}}; +const volatile struct Struct struct1[3]; +const volatile struct Struct struct11[11][7]; -union Union { - __u16 var_u16; - struct Struct3 { - struct { - __u8 var_u8_l; - }; +struct Struct3 { + struct { + u8 var_u8_l; + }; + struct { struct { - struct { - __u8 var_u8_h; - }; + u8 var_u8_h; }; - } struct3; + }; }; -const volatile union Union union1 = {.var_u16 = -1}; +typedef struct Struct3 Struct3_t; -char arr[4] = {0}; +union Union { + __u16 var_u16; + Struct3_t struct3; +}; + +const volatile union Union union1 = {.var_u16 = -1}; SEC("socket") int test_set_globals(void *ctx) @@ -81,8 +93,14 @@ int test_set_globals(void *ctx) a = var_eb; a = var_ec; a = var_b; - a = struct1.struct2.u.var_u8; + a = struct1[2].struct2[1][2].u.var_u8[2]; a = union1.var_u16; + a = arr[3]; + a = arr[EA2]; + a = enum_arr[EC2]; + a = three_d[31][7][EA2]; + a = struct1[2].struct2[1][2].u.mat[5][3]; + a = struct11[7][5].struct2[0][1].u.mat[3][0]; return a; } -- cgit v1.2.3 From b7863babce0a7aacb252e50d75b192476aa81ea1 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 25 Jun 2025 12:41:23 +0200 Subject: selftests: forwarding: lib: Split setup_wait() setup_wait() takes an optional argument and then is called from the top level of the test script. That confuses shellcheck, which thinks that maybe the intention is to pass $1 of the script to the function, which is never the case. To avoid having to annotate every single new test with a SC disable, split the function in two: one that takes a mandatory argument, and one that takes no argument at all. Convert the two existing users of that optional argument, both in Spectrum resource selftest, to use the new form. Clean up vxlan_bridge_1q_mc_ul.sh to not pass a now-unused argument. Signed-off-by: Petr Machata Link: https://patch.msgid.link/8e13123236fe3912ae29bc04a1528bdd8551da1f.1750847794.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh | 2 +- .../selftests/drivers/net/mlxsw/spectrum/resource_scale.sh | 2 +- tools/testing/selftests/net/forwarding/lib.sh | 9 +++++++-- tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh | 2 +- 4 files changed, 10 insertions(+), 5 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh index 899b6892603f..d7505b933aef 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh @@ -51,7 +51,7 @@ for current_test in ${TESTS:-$ALL_TESTS}; do fi ${current_test}_setup_prepare - setup_wait $num_netifs + setup_wait_n $num_netifs # Update target in case occupancy of a certain resource changed # following the test setup. target=$(${current_test}_get_target "$should_fail") diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh index 482ebb744eba..7b98cdd0580d 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh @@ -55,7 +55,7 @@ for current_test in ${TESTS:-$ALL_TESTS}; do continue fi ${current_test}_setup_prepare - setup_wait $num_netifs + setup_wait_n $num_netifs # Update target in case occupancy of a certain resource # changed following the test setup. target=$(${current_test}_get_target "$should_fail") diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 83ee6a07e072..9308b2f77fed 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -526,9 +526,9 @@ setup_wait_dev_with_timeout() return 1 } -setup_wait() +setup_wait_n() { - local num_netifs=${1:-$NUM_NETIFS} + local num_netifs=$1; shift local i for ((i = 1; i <= num_netifs; ++i)); do @@ -539,6 +539,11 @@ setup_wait() sleep $WAIT_TIME } +setup_wait() +{ + setup_wait_n "$NUM_NETIFS" +} + wait_for_dev() { local dev=$1; shift diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh index 7ec58b6b1128..462db0b603e7 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh @@ -765,7 +765,7 @@ ipv6_mcroute_fdb_sep_rx() trap cleanup EXIT setup_prepare -setup_wait "$NUM_NETIFS" +setup_wait tests_run exit "$EXIT_STATUS" -- cgit v1.2.3 From bacdf5a0e69d343e2a380be768c1f277088e35d8 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 27 Jun 2025 12:12:21 -0700 Subject: selftests/bpf: Fix cgroup_xattr/read_cgroupfs_xattr cgroup_xattr/read_cgroupfs_xattr has two issues: 1. cgroup_xattr/read_cgroupfs_xattr messes up lo without creating a netns first. This causes issue with other tests. Fix this by using a different hook (lsm.s/file_open) and not messing with lo. 2. cgroup_xattr/read_cgroupfs_xattr sets up cgroups without proper mount namespaces. Fix this by using the existing cgroup helpers. A new helper set_cgroup_xattr() is added to set xattr on cgroup files. Fixes: f4fba2d6d282 ("selftests/bpf: Add tests for bpf_cgroup_read_xattr") Reported-by: Alexei Starovoitov Closes: https://lore.kernel.org/bpf/CAADnVQ+iqMi2HEj_iH7hsx+XJAsqaMWqSDe4tzcGAnehFWA9Sw@mail.gmail.com/ Signed-off-by: Song Liu Tested-by: Eduard Zingerman Tested-by: Ihor Solodrai Link: https://lore.kernel.org/r/20250627191221.765921-1-song@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/cgroup_helpers.c | 21 ++++ tools/testing/selftests/bpf/cgroup_helpers.h | 4 + .../selftests/bpf/prog_tests/cgroup_xattr.c | 117 ++++----------------- .../selftests/bpf/progs/read_cgroupfs_xattr.c | 4 +- 4 files changed, 49 insertions(+), 97 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index e4535451322e..15f626014872 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -318,6 +319,26 @@ int join_parent_cgroup(const char *relative_path) return join_cgroup_from_top(cgroup_path); } +/** + * set_cgroup_xattr() - Set xattr on a cgroup dir + * @relative_path: The cgroup path, relative to the workdir, to set xattr + * @name: xattr name + * @value: xattr value + * + * This function set xattr on cgroup dir. + * + * On success, it returns 0, otherwise on failure it returns -1. + */ +int set_cgroup_xattr(const char *relative_path, + const char *name, + const char *value) +{ + char cgroup_path[PATH_MAX + 1]; + + format_cgroup_path(cgroup_path, relative_path); + return setxattr(cgroup_path, name, value, strlen(value) + 1, 0); +} + /** * __cleanup_cgroup_environment() - Delete temporary cgroups * diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index 502845160d88..182e1ac36c95 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -26,6 +26,10 @@ int join_cgroup(const char *relative_path); int join_root_cgroup(void); int join_parent_cgroup(const char *relative_path); +int set_cgroup_xattr(const char *relative_path, + const char *name, + const char *value); + int setup_cgroup_environment(void); void cleanup_cgroup_environment(void); diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c index 87978a0f7eb7..e0dd966e4a3e 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c @@ -7,133 +7,60 @@ #include #include #include -#include - #include +#include "cgroup_helpers.h" #include "read_cgroupfs_xattr.skel.h" #include "cgroup_read_xattr.skel.h" -#define CGROUP_FS_ROOT "/sys/fs/cgroup/" -#define CGROUP_FS_PARENT CGROUP_FS_ROOT "foo/" +#define CGROUP_FS_PARENT "foo/" #define CGROUP_FS_CHILD CGROUP_FS_PARENT "bar/" - -static int move_pid_to_cgroup(const char *cgroup_folder, pid_t pid) -{ - char filename[128]; - char pid_str[64]; - int procs_fd; - int ret; - - snprintf(filename, sizeof(filename), "%scgroup.procs", cgroup_folder); - snprintf(pid_str, sizeof(pid_str), "%d", pid); - - procs_fd = open(filename, O_WRONLY | O_APPEND); - if (!ASSERT_OK_FD(procs_fd, "open")) - return -1; - - ret = write(procs_fd, pid_str, strlen(pid_str)); - close(procs_fd); - if (!ASSERT_GT(ret, 0, "write cgroup.procs")) - return -1; - return 0; -} - -static void reset_cgroups_and_lo(void) -{ - rmdir(CGROUP_FS_CHILD); - rmdir(CGROUP_FS_PARENT); - system("ip addr del 1.1.1.1/32 dev lo"); - system("ip link set dev lo down"); -} +#define TMP_FILE "/tmp/selftests_cgroup_xattr" static const char xattr_value_a[] = "bpf_selftest_value_a"; static const char xattr_value_b[] = "bpf_selftest_value_b"; static const char xattr_name[] = "user.bpf_test"; -static int setup_cgroups_and_lo(void) -{ - int err; - - err = mkdir(CGROUP_FS_PARENT, 0755); - if (!ASSERT_OK(err, "mkdir 1")) - goto error; - err = mkdir(CGROUP_FS_CHILD, 0755); - if (!ASSERT_OK(err, "mkdir 2")) - goto error; - - err = setxattr(CGROUP_FS_PARENT, xattr_name, xattr_value_a, - strlen(xattr_value_a) + 1, 0); - if (!ASSERT_OK(err, "setxattr 1")) - goto error; - - err = setxattr(CGROUP_FS_CHILD, xattr_name, xattr_value_b, - strlen(xattr_value_b) + 1, 0); - if (!ASSERT_OK(err, "setxattr 2")) - goto error; - - err = system("ip link set dev lo up"); - if (!ASSERT_OK(err, "lo up")) - goto error; - - err = system("ip addr add 1.1.1.1 dev lo"); - if (!ASSERT_OK(err, "lo addr v4")) - goto error; - - err = write_sysctl("/proc/sys/net/ipv4/ping_group_range", "0 0"); - if (!ASSERT_OK(err, "write_sysctl")) - goto error; - - return 0; -error: - reset_cgroups_and_lo(); - return err; -} - static void test_read_cgroup_xattr(void) { - struct sockaddr_in sa4 = { - .sin_family = AF_INET, - .sin_addr.s_addr = htonl(INADDR_LOOPBACK), - }; + int tmp_fd, parent_cgroup_fd = -1, child_cgroup_fd = -1; struct read_cgroupfs_xattr *skel = NULL; - pid_t pid = gettid(); - int sock_fd = -1; - int connect_fd = -1; - if (!ASSERT_OK(setup_cgroups_and_lo(), "setup_cgroups_and_lo")) + parent_cgroup_fd = test__join_cgroup(CGROUP_FS_PARENT); + if (!ASSERT_OK_FD(parent_cgroup_fd, "create parent cgroup")) return; - if (!ASSERT_OK(move_pid_to_cgroup(CGROUP_FS_CHILD, pid), - "move_pid_to_cgroup")) + if (!ASSERT_OK(set_cgroup_xattr(CGROUP_FS_PARENT, xattr_name, xattr_value_a), + "set parent xattr")) + goto out; + + child_cgroup_fd = test__join_cgroup(CGROUP_FS_CHILD); + if (!ASSERT_OK_FD(child_cgroup_fd, "create child cgroup")) + goto out; + if (!ASSERT_OK(set_cgroup_xattr(CGROUP_FS_CHILD, xattr_name, xattr_value_b), + "set child xattr")) goto out; skel = read_cgroupfs_xattr__open_and_load(); if (!ASSERT_OK_PTR(skel, "read_cgroupfs_xattr__open_and_load")) goto out; - skel->bss->target_pid = pid; + skel->bss->target_pid = gettid(); if (!ASSERT_OK(read_cgroupfs_xattr__attach(skel), "read_cgroupfs_xattr__attach")) goto out; - sock_fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP); - if (!ASSERT_OK_FD(sock_fd, "sock create")) - goto out; - - connect_fd = connect(sock_fd, &sa4, sizeof(sa4)); - if (!ASSERT_OK_FD(connect_fd, "connect 1")) - goto out; - close(connect_fd); + tmp_fd = open(TMP_FILE, O_RDONLY | O_CREAT); + ASSERT_OK_FD(tmp_fd, "open tmp file"); + close(tmp_fd); ASSERT_TRUE(skel->bss->found_value_a, "found_value_a"); ASSERT_TRUE(skel->bss->found_value_b, "found_value_b"); out: - close(connect_fd); - close(sock_fd); + close(child_cgroup_fd); + close(parent_cgroup_fd); read_cgroupfs_xattr__destroy(skel); - move_pid_to_cgroup(CGROUP_FS_ROOT, pid); - reset_cgroups_and_lo(); + unlink(TMP_FILE); } void test_cgroup_xattr(void) diff --git a/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c b/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c index 855f85fc5522..405adbe5e8b0 100644 --- a/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c +++ b/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c @@ -17,8 +17,8 @@ static const char expected_value_b[] = "bpf_selftest_value_b"; bool found_value_a; bool found_value_b; -SEC("lsm.s/socket_connect") -int BPF_PROG(test_socket_connect) +SEC("lsm.s/file_open") +int BPF_PROG(test_file_open) { u64 cgrp_id = bpf_get_current_cgroup_id(); struct cgroup_subsys_state *css, *tmp; -- cgit v1.2.3 From 8cc8d749dc7eac9ee8355aa91042aac6c7afa639 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Jun 2025 12:34:20 +0000 Subject: selftests/net: packetdrill: add tcp_dsack_mult.pkt Test DSACK behavior with non contiguous ranges. Without prior fix (tcp: fix tcp_ofo_queue() to avoid including too much DUP SACK range) this would fail with: tcp_dsack_mult.pkt:37: error handling packet: bad value outbound TCP option 5 script packet: 0.100682 . 1:1(0) ack 6001 actual packet: 0.100679 . 1:1(0) ack 6001 win 1097 Signed-off-by: Eric Dumazet Cc: xin.guo Link: https://patch.msgid.link/20250626123420.1933835-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- .../selftests/net/packetdrill/tcp_dsack_mult.pkt | 45 ++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt b/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt new file mode 100644 index 000000000000..c790d0af635e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test various DSACK (RFC 2883) behaviors. + +--mss=1000 + +`./defaults.sh` + + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 1024 + +0 accept(3, ..., ...) = 4 + +// First SACK range. + +0 < P. 1001:2001(1000) ack 1 win 1024 + +0 > . 1:1(0) ack 1 + +// Check SACK coalescing (contiguous sequence). + +0 < P. 2001:3001(1000) ack 1 win 1024 + +0 > . 1:1(0) ack 1 + +// Check we have two SACK ranges for non contiguous sequences. + +0 < P. 4001:5001(1000) ack 1 win 1024 + +0 > . 1:1(0) ack 1 + +// Three ranges. + +0 < P. 7001:8001(1000) ack 1 win 1024 + +0 > . 1:1(0) ack 1 + +// DSACK (1001:3001) + SACK (6001:7001) + +0 < P. 1:6001(6000) ack 1 win 1024 + +0 > . 1:1(0) ack 6001 + +// DSACK (7001:8001) + +0 < P. 6001:8001(2000) ack 1 win 1024 + +0 > . 1:1(0) ack 8001 + +// DSACK for an older segment. + +0 < P. 1:1001(1000) ack 1 win 1024 + +0 > . 1:1(0) ack 8001 -- cgit v1.2.3 From ffaff1804e2ca1e88caa9d2e13ac6b4b7ecf011c Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Fri, 27 Jun 2025 15:43:42 +0100 Subject: selftests/bpf: improve error messages in veristat Return error if preset parsing fails. Avoid proceeding with veristat run if preset does not parse. Before: ``` ./veristat set_global_vars.bpf.o -G "arr[999999999999999999999] = 1" Failed to parse value '999999999999999999999' Processing 'set_global_vars.bpf.o'... File Program Verdict Duration (us) Insns States Program size Jited size --------------------- ---------------- ------- ------------- ----- ------ ------------ ---------- set_global_vars.bpf.o test_set_globals success 27 64 0 82 0 --------------------- ---------------- ------- ------------- ----- ------ ------------ ---------- Done. Processed 1 files, 0 programs. Skipped 1 files, 0 programs. ``` After: ``` ./veristat set_global_vars.bpf.o -G "arr[999999999999999999999] = 1" Failed to parse value '999999999999999999999' Failed to parse global variable presets: arr[999999999999999999999] = 1 ``` Improve error messages: * If preset struct member can't be found. * Array index out of bounds Extract rtrim function. Signed-off-by: Mykyta Yatsenko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250627144342.686896-1-mykyta.yatsenko5@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/veristat.c | 36 +++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 1e9f61f9fd0a..09cfbd486f92 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -890,6 +890,18 @@ static bool is_desc_sym(char c) return c == 'v' || c == 'V' || c == '.' || c == '!' || c == '_'; } +static char *rtrim(char *str) +{ + int i; + + for (i = strlen(str) - 1; i > 0; --i) { + if (!isspace(str[i])) + break; + str[i] = '\0'; + } + return str; +} + static int parse_stat(const char *stat_name, struct stat_specs *specs) { int id; @@ -1666,7 +1678,7 @@ static int append_preset_atom(struct var_preset *preset, char *value, bool is_in static int parse_var_atoms(const char *full_var, struct var_preset *preset) { char expr[256], var[256], *name, *saveptr; - int n, len, off; + int n, len, off, err; snprintf(expr, sizeof(expr), "%s", full_var); preset->atom_count = 0; @@ -1677,7 +1689,9 @@ static int parse_var_atoms(const char *full_var, struct var_preset *preset) fprintf(stderr, "Can't parse %s\n", name); return -EINVAL; } - append_preset_atom(preset, var, false); + err = append_preset_atom(preset, var, false); + if (err) + return err; /* parse optional array indexes */ while (off < len) { @@ -1685,7 +1699,9 @@ static int parse_var_atoms(const char *full_var, struct var_preset *preset) fprintf(stderr, "Can't parse %s as index\n", name + off); return -EINVAL; } - append_preset_atom(preset, var, true); + err = append_preset_atom(preset, var, true); + if (err) + return err; off += n; } } @@ -1697,7 +1713,7 @@ static int append_var_preset(struct var_preset **presets, int *cnt, const char * void *tmp; struct var_preset *cur; char var[256], val[256]; - int n, err, i; + int n, err; tmp = realloc(*presets, (*cnt + 1) * sizeof(**presets)); if (!tmp) @@ -1712,11 +1728,7 @@ static int append_var_preset(struct var_preset **presets, int *cnt, const char * return -EINVAL; } /* Remove trailing spaces from var, as scanf may add those */ - for (i = strlen(var) - 1; i > 0; --i) { - if (!isspace(var[i])) - break; - var[i] = '\0'; - } + rtrim(var); err = parse_rvalue(val, &cur->value); if (err) @@ -1869,7 +1881,7 @@ static int adjust_var_secinfo_array(struct btf *btf, int tid, struct field_acces if (err) return err; if (idx < 0 || idx >= barr->nelems) { - fprintf(stderr, "Array index %lld is out of bounds [0, %u]: %s\n", + fprintf(stderr, "Array index %lld is out of bounds [0, %u): %s\n", idx, barr->nelems, array_name); return -EINVAL; } @@ -1928,7 +1940,7 @@ static int adjust_var_secinfo_member(const struct btf *btf, } } - return -EINVAL; + return -ESRCH; } static int adjust_var_secinfo(struct btf *btf, const struct btf_type *t, @@ -1955,6 +1967,8 @@ static int adjust_var_secinfo(struct btf *btf, const struct btf_type *t, break; case FIELD_NAME: err = adjust_var_secinfo_member(btf, base_type, 0, atom->name, sinfo); + if (err == -ESRCH) + fprintf(stderr, "Can't find '%s'\n", atom->name); prev_name = atom->name; break; default: -- cgit v1.2.3 From c4b1be928ea05add0e0f9334e5bc44932395782e Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Thu, 26 Jun 2025 18:55:39 -0700 Subject: selftests/bpf: bpf_rdonly_cast u{8,16,32,64} access tests Tests with aligned and misaligned memory access of different sizes via pointer returned by bpf_rdonly_cast(). Suggested-by: Andrii Nakryiko Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250627015539.1439656-1-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/mem_rdonly_untrusted.c | 41 ++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c index 00604755e698..b0486af36f55 100644 --- a/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c +++ b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c @@ -133,4 +133,45 @@ int mixed_mem_type(void *ctx) return *p; } +__attribute__((__aligned__(8))) +u8 global[] = { + 0x11, 0x22, 0x33, 0x44, + 0x55, 0x66, 0x77, 0x88, + 0x99 +}; + +__always_inline +static u64 combine(void *p) +{ + u64 acc; + + acc = 0; +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + acc |= (*(u64 *)p >> 56) << 24; + acc |= (*(u32 *)p >> 24) << 16; + acc |= (*(u16 *)p >> 8) << 8; + acc |= *(u8 *)p; +#else + acc |= (*(u64 *)p & 0xff) << 24; + acc |= (*(u32 *)p & 0xff) << 16; + acc |= (*(u16 *)p & 0xff) << 8; + acc |= *(u8 *)p; +#endif + return acc; +} + +SEC("socket") +__retval(0x88442211) +int diff_size_access(void *ctx) +{ + return combine(bpf_rdonly_cast(&global, 0)); +} + +SEC("socket") +__retval(0x99553322) +int misaligned_access(void *ctx) +{ + return combine(bpf_rdonly_cast(&global, 0) + 1); +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From a6a2a8a42972476ecff61d2ffabaa1e8ae162f34 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 23 Jun 2025 23:25:39 +0200 Subject: tools/nolibc: MIPS: add support for N64 and N32 ABIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for the MIPS 64bit N64 and ILP32 N32 ABIs. In addition to different byte orders and ABIs there are also different releases of the MIPS architecture. To avoid blowing up the test matrix, only add a subset of all possible test combinations. Signed-off-by: Thomas Weißschuh Tested-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20250623-nolibc-mips-n32-v3-4-6ae2d89f4259@weissschuh.net --- tools/include/nolibc/arch-mips.h | 105 +++++++++++++++++++++---- tools/testing/selftests/nolibc/Makefile.nolibc | 26 ++++++ tools/testing/selftests/nolibc/run-tests.sh | 2 +- 3 files changed, 117 insertions(+), 16 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h index 4f0b969f66af..0cbac63b249a 100644 --- a/tools/include/nolibc/arch-mips.h +++ b/tools/include/nolibc/arch-mips.h @@ -10,7 +10,7 @@ #include "compiler.h" #include "crt.h" -#if !defined(_ABIO32) +#if !defined(_ABIO32) && !defined(_ABIN32) && !defined(_ABI64) #error Unsupported MIPS ABI #endif @@ -32,11 +32,32 @@ * - the arguments are cast to long and assigned into the target registers * which are then simply passed as registers to the asm code, so that we * don't have to experience issues with register constraints. + * + * Syscalls for MIPS ABI N32, same as ABI O32 with the following differences : + * - arguments are in a0, a1, a2, a3, t0, t1, t2, t3. + * t0..t3 are also known as a4..a7. + * - stack is 16-byte aligned */ +#if defined(_ABIO32) + #define _NOLIBC_SYSCALL_CLOBBERLIST \ "memory", "cc", "at", "v1", "hi", "lo", \ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" +#define _NOLIBC_SYSCALL_STACK_RESERVE "addiu $sp, $sp, -32\n" +#define _NOLIBC_SYSCALL_STACK_UNRESERVE "addiu $sp, $sp, 32\n" + +#else /* _ABIN32 || _ABI64 */ + +/* binutils, GCC and clang disagree about register aliases, use numbers instead. */ +#define _NOLIBC_SYSCALL_CLOBBERLIST \ + "memory", "cc", "at", "v1", \ + "10", "11", "12", "13", "14", "15", "24", "25" + +#define _NOLIBC_SYSCALL_STACK_RESERVE +#define _NOLIBC_SYSCALL_STACK_UNRESERVE + +#endif /* _ABIO32 */ #define my_syscall0(num) \ ({ \ @@ -44,9 +65,9 @@ register long _arg4 __asm__ ("a3"); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r"(_num), "=r"(_arg4) \ : "r"(_num) \ : _NOLIBC_SYSCALL_CLOBBERLIST \ @@ -61,9 +82,9 @@ register long _arg4 __asm__ ("a3"); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r"(_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1) \ @@ -80,9 +101,9 @@ register long _arg4 __asm__ ("a3"); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r"(_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2) \ @@ -100,9 +121,9 @@ register long _arg4 __asm__ ("a3"); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r"(_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2), "r"(_arg3) \ @@ -120,9 +141,9 @@ register long _arg4 __asm__ ("a3") = (long)(arg4); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r" (_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \ @@ -131,6 +152,8 @@ _arg4 ? -_num : _num; \ }) +#if defined(_ABIO32) + #define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ ({ \ register long _num __asm__ ("v0") = (num); \ @@ -141,10 +164,10 @@ register long _arg5 = (long)(arg5); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "sw %7, 16($sp)\n" \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r" (_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ @@ -164,11 +187,53 @@ register long _arg6 = (long)(arg6); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "sw %7, 16($sp)\n" \ "sw %8, 20($sp)\n" \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ + : "=r" (_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_arg6) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#else /* _ABIN32 || _ABI64 */ + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("v0") = (num); \ + register long _arg1 __asm__ ("$4") = (long)(arg1); \ + register long _arg2 __asm__ ("$5") = (long)(arg2); \ + register long _arg3 __asm__ ("$6") = (long)(arg3); \ + register long _arg4 __asm__ ("$7") = (long)(arg4); \ + register long _arg5 __asm__ ("$8") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=r" (_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("v0") = (num); \ + register long _arg1 __asm__ ("$4") = (long)(arg1); \ + register long _arg2 __asm__ ("$5") = (long)(arg2); \ + register long _arg3 __asm__ ("$6") = (long)(arg3); \ + register long _arg4 __asm__ ("$7") = (long)(arg4); \ + register long _arg5 __asm__ ("$8") = (long)(arg5); \ + register long _arg6 __asm__ ("$9") = (long)(arg6); \ + \ + __asm__ volatile ( \ + "syscall\n" \ : "=r" (_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ @@ -178,15 +243,25 @@ _arg4 ? -_num : _num; \ }) +#endif /* _ABIO32 */ + /* startup code, note that it's called __start on MIPS */ void __start(void); void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector __start(void) { __asm__ volatile ( "move $a0, $sp\n" /* save stack pointer to $a0, as arg1 of _start_c */ +#if defined(_ABIO32) "addiu $sp, $sp, -16\n" /* the callee expects to save a0..a3 there */ +#endif /* _ABIO32 */ "lui $t9, %hi(_start_c)\n" /* ABI requires current function address in $t9 */ "ori $t9, %lo(_start_c)\n" +#if defined(_ABI64) + "lui $t0, %highest(_start_c)\n" + "ori $t0, %higher(_start_c)\n" + "dsll $t0, 0x20\n" + "or $t9, $t0\n" +#endif /* _ABI64 */ "jalr $t9\n" /* transfer to c runtime */ ); __nolibc_entrypoint_epilogue(); diff --git a/tools/testing/selftests/nolibc/Makefile.nolibc b/tools/testing/selftests/nolibc/Makefile.nolibc index 6d62f350d0c1..9b56191b10b3 100644 --- a/tools/testing/selftests/nolibc/Makefile.nolibc +++ b/tools/testing/selftests/nolibc/Makefile.nolibc @@ -53,6 +53,10 @@ ARCH_ppc64 = powerpc ARCH_ppc64le = powerpc ARCH_mips32le = mips ARCH_mips32be = mips +ARCH_mipsn32le = mips +ARCH_mipsn32be = mips +ARCH_mips64le = mips +ARCH_mips64be = mips ARCH_riscv32 = riscv ARCH_riscv64 = riscv ARCH_s390x = s390 @@ -69,6 +73,10 @@ IMAGE_arm = arch/arm/boot/zImage IMAGE_armthumb = arch/arm/boot/zImage IMAGE_mips32le = vmlinuz IMAGE_mips32be = vmlinuz +IMAGE_mipsn32le = vmlinuz +IMAGE_mipsn32be = vmlinuz +IMAGE_mips64le = vmlinuz +IMAGE_mips64be = vmlinuz IMAGE_ppc = vmlinux IMAGE_ppc64 = vmlinux IMAGE_ppc64le = arch/powerpc/boot/zImage @@ -93,6 +101,10 @@ DEFCONFIG_arm = multi_v7_defconfig DEFCONFIG_armthumb = multi_v7_defconfig DEFCONFIG_mips32le = malta_defconfig DEFCONFIG_mips32be = malta_defconfig generic/eb.config +DEFCONFIG_mipsn32le = malta_defconfig generic/64r2.config +DEFCONFIG_mipsn32be = malta_defconfig generic/64r6.config generic/eb.config +DEFCONFIG_mips64le = malta_defconfig generic/64r6.config +DEFCONFIG_mips64be = malta_defconfig generic/64r2.config generic/eb.config DEFCONFIG_ppc = pmac32_defconfig DEFCONFIG_ppc64 = powernv_be_defconfig DEFCONFIG_ppc64le = powernv_defconfig @@ -124,6 +136,10 @@ QEMU_ARCH_arm = arm QEMU_ARCH_armthumb = arm QEMU_ARCH_mips32le = mipsel # works with malta_defconfig QEMU_ARCH_mips32be = mips +QEMU_ARCH_mipsn32le = mips64el +QEMU_ARCH_mipsn32be = mips64 +QEMU_ARCH_mips64le = mips64el +QEMU_ARCH_mips64be = mips64 QEMU_ARCH_ppc = ppc QEMU_ARCH_ppc64 = ppc64 QEMU_ARCH_ppc64le = ppc64 @@ -139,6 +155,8 @@ QEMU_ARCH_m68k = m68k QEMU_ARCH = $(QEMU_ARCH_$(XARCH)) QEMU_ARCH_USER_ppc64le = ppc64le +QEMU_ARCH_USER_mipsn32le = mipsn32el +QEMU_ARCH_USER_mipsn32be = mipsn32 QEMU_ARCH_USER = $(or $(QEMU_ARCH_USER_$(XARCH)),$(QEMU_ARCH_$(XARCH))) QEMU_BIOS_DIR = /usr/share/edk2/ @@ -157,6 +175,10 @@ QEMU_ARGS_arm = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_armthumb = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_mips32le = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_mips32be = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_mipsn32le = -M malta -cpu 5KEc -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_mipsn32be = -M malta -cpu I6400 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_mips64le = -M malta -cpu I6400 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_mips64be = -M malta -cpu 5KEc -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_ppc = -M g3beige -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_ppc64 = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_ppc64le = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" @@ -191,6 +213,10 @@ CFLAGS_s390x = -m64 CFLAGS_s390 = -m31 CFLAGS_mips32le = -EL -mabi=32 -fPIC CFLAGS_mips32be = -EB -mabi=32 +CFLAGS_mipsn32le = -EL -mabi=n32 -fPIC -march=mips64r2 +CFLAGS_mipsn32be = -EB -mabi=n32 -march=mips64r6 +CFLAGS_mips64le = -EL -mabi=64 -march=mips64r6 +CFLAGS_mips64be = -EB -mabi=64 -march=mips64r2 CFLAGS_sparc32 = $(call cc-option,-m32) ifeq ($(origin XARCH),command line) CFLAGS_XARCH = $(CFLAGS_$(XARCH)) diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh index 279fbd93ef70..fc8351994566 100755 --- a/tools/testing/selftests/nolibc/run-tests.sh +++ b/tools/testing/selftests/nolibc/run-tests.sh @@ -20,7 +20,7 @@ llvm= all_archs=( i386 x86_64 arm64 arm armthumb - mips32le mips32be + mips32le mips32be mipsn32le mipsn32be mips64le mips64be ppc ppc64 ppc64le riscv32 riscv64 s390x s390 -- cgit v1.2.3 From 171f2ee31a42f1802299862686c2521eda77dc61 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 26 Jun 2025 10:31:11 +0300 Subject: selftests: net: Add a selftest for externally validated neighbor entries Add test cases for externally validated neighbor entries, testing both IPv4 and IPv6. Name the file "test_neigh.sh" so that it could be possibly extended in the future with more neighbor test cases. Example output: # ./test_neigh.sh TEST: IPv4 "extern_valid" flag: Add entry [ OK ] TEST: IPv4 "extern_valid" flag: Add with an invalid state [ OK ] TEST: IPv4 "extern_valid" flag: Add with "use" flag [ OK ] TEST: IPv4 "extern_valid" flag: Replace entry [ OK ] TEST: IPv4 "extern_valid" flag: Replace entry with "managed" flag [ OK ] TEST: IPv4 "extern_valid" flag: Replace with an invalid state [ OK ] TEST: IPv4 "extern_valid" flag: Interface down [ OK ] TEST: IPv4 "extern_valid" flag: Carrier down [ OK ] TEST: IPv4 "extern_valid" flag: Transition to "reachable" state [ OK ] TEST: IPv4 "extern_valid" flag: Transition back to "stale" state [ OK ] TEST: IPv4 "extern_valid" flag: Forced garbage collection [ OK ] TEST: IPv4 "extern_valid" flag: Periodic garbage collection [ OK ] TEST: IPv6 "extern_valid" flag: Add entry [ OK ] TEST: IPv6 "extern_valid" flag: Add with an invalid state [ OK ] TEST: IPv6 "extern_valid" flag: Add with "use" flag [ OK ] TEST: IPv6 "extern_valid" flag: Replace entry [ OK ] TEST: IPv6 "extern_valid" flag: Replace entry with "managed" flag [ OK ] TEST: IPv6 "extern_valid" flag: Replace with an invalid state [ OK ] TEST: IPv6 "extern_valid" flag: Interface down [ OK ] TEST: IPv6 "extern_valid" flag: Carrier down [ OK ] TEST: IPv6 "extern_valid" flag: Transition to "reachable" state [ OK ] TEST: IPv6 "extern_valid" flag: Transition back to "stale" state [ OK ] TEST: IPv6 "extern_valid" flag: Forced garbage collection [ OK ] TEST: IPv6 "extern_valid" flag: Periodic garbage collection [ OK ] Signed-off-by: Ido Schimmel Link: https://patch.msgid.link/20250626073111.244534-3-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/test_neigh.sh | 366 ++++++++++++++++++++++++++++++ 2 files changed, 367 insertions(+) create mode 100755 tools/testing/selftests/net/test_neigh.sh (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 227f9e067d25..543776596529 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -100,6 +100,7 @@ TEST_PROGS += test_vxlan_mdb.sh TEST_PROGS += test_bridge_neigh_suppress.sh TEST_PROGS += test_vxlan_nolocalbypass.sh TEST_PROGS += test_bridge_backup_port.sh +TEST_PROGS += test_neigh.sh TEST_PROGS += fdb_flush.sh fdb_notify.sh TEST_PROGS += fq_band_pktlimit.sh TEST_PROGS += vlan_hw_filter.sh diff --git a/tools/testing/selftests/net/test_neigh.sh b/tools/testing/selftests/net/test_neigh.sh new file mode 100755 index 000000000000..388056472b5b --- /dev/null +++ b/tools/testing/selftests/net/test_neigh.sh @@ -0,0 +1,366 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh +TESTS=" + extern_valid_ipv4 + extern_valid_ipv6 +" +VERBOSE=0 + +################################################################################ +# Utilities + +run_cmd() +{ + local cmd="$1" + local out + local stderr="2>/dev/null" + + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: $cmd" + stderr= + fi + + out=$(eval "$cmd" "$stderr") + rc=$? + if [ "$VERBOSE" -eq 1 ] && [ -n "$out" ]; then + echo " $out" + fi + + return $rc +} + +################################################################################ +# Setup + +setup() +{ + set -e + + setup_ns ns1 ns2 + + ip -n "$ns1" link add veth0 type veth peer name veth1 netns "$ns2" + ip -n "$ns1" link set dev veth0 up + ip -n "$ns2" link set dev veth1 up + + ip -n "$ns1" address add 192.0.2.1/24 dev veth0 + ip -n "$ns1" address add 2001:db8:1::1/64 dev veth0 nodad + ip -n "$ns2" address add 192.0.2.2/24 dev veth1 + ip -n "$ns2" address add 2001:db8:1::2/64 dev veth1 nodad + + ip netns exec "$ns1" sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 + ip netns exec "$ns2" sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 + + sleep 5 + + set +e +} + +exit_cleanup_all() +{ + cleanup_all_ns + exit "${EXIT_STATUS}" +} + +################################################################################ +# Tests + +extern_valid_common() +{ + local af_str=$1; shift + local ip_addr=$1; shift + local tbl_name=$1; shift + local subnet=$1; shift + local mac + + mac=$(ip -n "$ns2" -j link show dev veth1 | jq -r '.[]["address"]') + + RET=0 + + # Check that simple addition works. + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "No \"extern_valid\" flag after addition" + + log_test "$af_str \"extern_valid\" flag: Add entry" + + RET=0 + + # Check that an entry cannot be added with "extern_valid" flag and an + # invalid state. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr nud none dev veth0 extern_valid" + check_fail $? "Managed to add an entry with \"extern_valid\" flag and an invalid state" + + log_test "$af_str \"extern_valid\" flag: Add with an invalid state" + + RET=0 + + # Check that entry cannot be added with both "extern_valid" flag and + # "use" / "managed" flag. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid use" + check_fail $? "Managed to add an entry with \"extern_valid\" flag and \"use\" flag" + + log_test "$af_str \"extern_valid\" flag: Add with \"use\" flag" + + RET=0 + + # Check that "extern_valid" flag can be toggled using replace. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0" + run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "Did not manage to set \"extern_valid\" flag with replace" + run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_fail $? "Did not manage to clear \"extern_valid\" flag with replace" + + log_test "$af_str \"extern_valid\" flag: Replace entry" + + RET=0 + + # Check that an existing "extern_valid" entry can be marked as + # "managed". + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid managed" + check_err $? "Did not manage to add \"managed\" flag to an existing \"extern_valid\" entry" + + log_test "$af_str \"extern_valid\" flag: Replace entry with \"managed\" flag" + + RET=0 + + # Check that entry cannot be replaced with "extern_valid" flag and an + # invalid state. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh replace $ip_addr nud none dev veth0 extern_valid" + check_fail $? "Managed to replace an entry with \"extern_valid\" flag and an invalid state" + + log_test "$af_str \"extern_valid\" flag: Replace with an invalid state" + + RET=0 + + # Check that an "extern_valid" entry is flushed when the interface is + # put administratively down. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 link set dev veth0 down" + run_cmd "ip -n $ns1 link set dev veth0 up" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0" + check_fail $? "\"extern_valid\" entry not flushed upon interface down" + + log_test "$af_str \"extern_valid\" flag: Interface down" + + RET=0 + + # Check that an "extern_valid" entry is not flushed when the interface + # loses its carrier. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns2 link set dev veth1 down" + run_cmd "ip -n $ns2 link set dev veth1 up" + run_cmd "sleep 2" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0" + check_err $? "\"extern_valid\" entry flushed upon carrier down" + + log_test "$af_str \"extern_valid\" flag: Carrier down" + + RET=0 + + # Check that when entry transitions to "reachable" state it maintains + # the "extern_valid" flag. Wait "delay_probe" seconds for ARP request / + # NS to be sent. + local delay_probe + + delay_probe=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["delay_probe"]') + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid use" + run_cmd "sleep $((delay_probe / 1000 + 2))" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"REACHABLE\"" + check_err $? "Entry did not transition to \"reachable\" state" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "Entry did not maintain \"extern_valid\" flag after transition to \"reachable\" state" + + log_test "$af_str \"extern_valid\" flag: Transition to \"reachable\" state" + + RET=0 + + # Drop all packets, trigger resolution and check that entry goes back + # to "stale" state instead of "failed". + local mcast_reprobes + local retrans_time + local ucast_probes + local app_probes + local probes + local delay + + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "tc -n $ns2 qdisc add dev veth1 clsact" + run_cmd "tc -n $ns2 filter add dev veth1 ingress proto all matchall action drop" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid use" + retrans_time=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["retrans"]') + ucast_probes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["ucast_probes"]') + app_probes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["app_probes"]') + mcast_reprobes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["mcast_reprobes"]') + delay=$((delay_probe + (ucast_probes + app_probes + mcast_reprobes) * retrans_time)) + run_cmd "sleep $((delay / 1000 + 2))" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"STALE\"" + check_err $? "Entry did not return to \"stale\" state" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "Entry did not maintain \"extern_valid\" flag after returning to \"stale\" state" + probes=$(ip -n "$ns1" -j -s neigh get "$ip_addr" dev veth0 | jq '.[]["probes"]') + if [[ $probes -eq 0 ]]; then + check_err 1 "No probes were sent" + fi + + log_test "$af_str \"extern_valid\" flag: Transition back to \"stale\" state" + + run_cmd "tc -n $ns2 qdisc del dev veth1 clsact" + + RET=0 + + # Forced garbage collection runs whenever the number of entries is + # larger than "thresh3" and deletes stale entries that have not been + # updated in the last 5 seconds. + # + # Check that an "extern_valid" entry survives a forced garbage + # collection. Add an entry, wait 5 seconds and add more entries than + # "thresh3" so that forced garbage collection will run. + # + # Note that the garbage collection thresholds are global resources and + # that changes in the initial namespace affect all the namespaces. + local forced_gc_runs_t0 + local forced_gc_runs_t1 + local orig_thresh1 + local orig_thresh2 + local orig_thresh3 + + run_cmd "ip -n $ns1 neigh flush dev veth0" + orig_thresh1=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["thresh1"]') + orig_thresh2=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh2")) | .["thresh2"]') + orig_thresh3=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh3")) | .["thresh3"]') + run_cmd "ip ntable change name $tbl_name thresh3 10 thresh2 9 thresh1 8" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh add ${subnet}3 lladdr $mac nud stale dev veth0" + run_cmd "sleep 5" + forced_gc_runs_t0=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("forced_gc_runs")) | .["forced_gc_runs"]') + for i in {1..20}; do + run_cmd "ip -n $ns1 neigh add ${subnet}$((i + 4)) nud none dev veth0" + done + forced_gc_runs_t1=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("forced_gc_runs")) | .["forced_gc_runs"]') + if [[ $forced_gc_runs_t1 -eq $forced_gc_runs_t0 ]]; then + check_err 1 "Forced garbage collection did not run" + fi + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "Entry with \"extern_valid\" flag did not survive forced garbage collection" + run_cmd "ip -n $ns1 neigh get ${subnet}3 dev veth0" + check_fail $? "Entry without \"extern_valid\" flag survived forced garbage collection" + + log_test "$af_str \"extern_valid\" flag: Forced garbage collection" + + run_cmd "ip ntable change name $tbl_name thresh3 $orig_thresh3 thresh2 $orig_thresh2 thresh1 $orig_thresh1" + + RET=0 + + # Periodic garbage collection runs every "base_reachable"/2 seconds and + # if the number of entries is larger than "thresh1", then it deletes + # stale entries that have not been used in the last "gc_stale" seconds. + # + # Check that an "extern_valid" entry survives a periodic garbage + # collection. Add an "extern_valid" entry, add more than "thresh1" + # regular entries, wait "base_reachable" (longer than "gc_stale") + # seconds and check that the "extern_valid" entry was not deleted. + # + # Note that the garbage collection thresholds and "base_reachable" are + # global resources and that changes in the initial namespace affect all + # the namespaces. + local periodic_gc_runs_t0 + local periodic_gc_runs_t1 + local orig_base_reachable + local orig_gc_stale + + run_cmd "ip -n $ns1 neigh flush dev veth0" + orig_thresh1=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["thresh1"]') + orig_base_reachable=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["base_reachable"]') + run_cmd "ip ntable change name $tbl_name thresh1 10 base_reachable 10000" + orig_gc_stale=$(ip -n "$ns1" -j ntable show name "$tbl_name" dev veth0 | jq '.[]["gc_stale"]') + run_cmd "ip -n $ns1 ntable change name $tbl_name dev veth0 gc_stale 5000" + # Wait orig_base_reachable/2 for the new interval to take effect. + run_cmd "sleep $(((orig_base_reachable / 1000) / 2 + 2))" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh add ${subnet}3 lladdr $mac nud stale dev veth0" + for i in {1..20}; do + run_cmd "ip -n $ns1 neigh add ${subnet}$((i + 4)) nud none dev veth0" + done + periodic_gc_runs_t0=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("periodic_gc_runs")) | .["periodic_gc_runs"]') + run_cmd "sleep 10" + periodic_gc_runs_t1=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("periodic_gc_runs")) | .["periodic_gc_runs"]') + [[ $periodic_gc_runs_t1 -ne $periodic_gc_runs_t0 ]] + check_err $? "Periodic garbage collection did not run" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "Entry with \"extern_valid\" flag did not survive periodic garbage collection" + run_cmd "ip -n $ns1 neigh get ${subnet}3 dev veth0" + check_fail $? "Entry without \"extern_valid\" flag survived periodic garbage collection" + + log_test "$af_str \"extern_valid\" flag: Periodic garbage collection" + + run_cmd "ip -n $ns1 ntable change name $tbl_name dev veth0 gc_stale $orig_gc_stale" + run_cmd "ip ntable change name $tbl_name thresh1 $orig_thresh1 base_reachable $orig_base_reachable" +} + +extern_valid_ipv4() +{ + extern_valid_common "IPv4" 192.0.2.2 "arp_cache" 192.0.2. +} + +extern_valid_ipv6() +{ + extern_valid_common "IPv6" 2001:db8:1::2 "ndisc_cache" 2001:db8:1:: +} + +################################################################################ +# Usage + +usage() +{ + cat < Test(s) to run (default: all) + (options: $TESTS) + -p Pause on fail + -v Verbose mode (show commands and output) +EOF +} + +################################################################################ +# Main + +while getopts ":t:pvh" opt; do + case $opt in + t) TESTS=$OPTARG;; + p) PAUSE_ON_FAIL=yes;; + v) VERBOSE=$((VERBOSE + 1));; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +require_command jq + +if ! ip neigh help 2>&1 | grep -q "extern_valid"; then + echo "SKIP: iproute2 ip too old, missing \"extern_valid\" support" + exit "$ksft_skip" +fi + +trap exit_cleanup_all EXIT + +for t in $TESTS +do + setup; $t; cleanup_all_ns; +done -- cgit v1.2.3 From 2c0a4428f5d6005ff0db12057cc35273593fc040 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 11 Jun 2025 12:33:51 +0200 Subject: selftests: vDSO: chacha: Correctly skip test if necessary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to kselftest.h ksft_exit_skip() is not meant to be called when a plan has already been printed. Use the recommended function ksft_test_result_skip(). This fixes a bug, where the TAP output would be invalid when skipping: TAP version 13 1..1 ok 2 # SKIP Not implemented on architecture The SKIP line should start with "ok 1" as the plan only contains one test. Fixes: 3b5992eaf730 ("selftests: vDSO: unconditionally build chacha test") Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Reviewed-by: Muhammad Usama Anjum Link: https://lore.kernel.org/all/20250611-selftests-vdso-fixes-v3-1-e62e37a6bcf5@linutronix.de --- tools/testing/selftests/vDSO/vdso_test_chacha.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/vDSO/vdso_test_chacha.c b/tools/testing/selftests/vDSO/vdso_test_chacha.c index 8757f738b0b1..0aad682b12c8 100644 --- a/tools/testing/selftests/vDSO/vdso_test_chacha.c +++ b/tools/testing/selftests/vDSO/vdso_test_chacha.c @@ -76,7 +76,8 @@ static void reference_chacha20_blocks(uint8_t *dst_bytes, const uint32_t *key, u void __weak __arch_chacha20_blocks_nostack(uint8_t *dst_bytes, const uint32_t *key, uint32_t *counter, size_t nblocks) { - ksft_exit_skip("Not implemented on architecture\n"); + ksft_test_result_skip("Not implemented on architecture\n"); + ksft_finished(); } int main(int argc, char *argv[]) -- cgit v1.2.3 From 82669e157bd8abeb08d19c6d597620585ece576a Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 11 Jun 2025 12:33:52 +0200 Subject: selftests: vDSO: clock_getres: Drop unused include of err.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Nothing from err.h is used. Drop the include. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Reviewed-by: Muhammad Usama Anjum Link: https://lore.kernel.org/all/20250611-selftests-vdso-fixes-v3-2-e62e37a6bcf5@linutronix.de --- tools/testing/selftests/vDSO/vdso_test_clock_getres.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/vDSO/vdso_test_clock_getres.c b/tools/testing/selftests/vDSO/vdso_test_clock_getres.c index 38d46a8bf7cb..b5d5f59f725a 100644 --- a/tools/testing/selftests/vDSO/vdso_test_clock_getres.c +++ b/tools/testing/selftests/vDSO/vdso_test_clock_getres.c @@ -13,7 +13,6 @@ #define _GNU_SOURCE #include -#include #include #include #include -- cgit v1.2.3 From 1c0fe1c7674121bcb233565bba08f7fcc2a8d5a8 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 11 Jun 2025 12:33:53 +0200 Subject: selftests: vDSO: vdso_test_getrandom: Drop unused include of linux/compiler.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The header is unused. Furthermore this is not a real UAPI header, but only exists in tools/include/. This prevents building the selftest against real UAPI headers. Drop the include. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250611-selftests-vdso-fixes-v3-3-e62e37a6bcf5@linutronix.de --- tools/testing/selftests/vDSO/vdso_test_getrandom.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/vDSO/vdso_test_getrandom.c b/tools/testing/selftests/vDSO/vdso_test_getrandom.c index 95057f7567db..f36e50f372f9 100644 --- a/tools/testing/selftests/vDSO/vdso_test_getrandom.c +++ b/tools/testing/selftests/vDSO/vdso_test_getrandom.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include "../kselftest.h" -- cgit v1.2.3 From b8ae430871254f95dd81dcff01828be3f6b24a57 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 11 Jun 2025 12:33:54 +0200 Subject: selftests: vDSO: vdso_test_getrandom: Avoid -Wunused MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vgetrandom_put_state() and the variable "ret" in kselftest() are unused. Drop the variable "ret". Suppress the warning for vgetrandom_put_state() as it is meant as an example for libc implementors. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250611-selftests-vdso-fixes-v3-4-e62e37a6bcf5@linutronix.de --- tools/testing/selftests/vDSO/vdso_test_getrandom.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/vDSO/vdso_test_getrandom.c b/tools/testing/selftests/vDSO/vdso_test_getrandom.c index f36e50f372f9..389ead4e1fe3 100644 --- a/tools/testing/selftests/vDSO/vdso_test_getrandom.c +++ b/tools/testing/selftests/vDSO/vdso_test_getrandom.c @@ -100,6 +100,7 @@ out: return state; } +__attribute__((unused)) /* Example for libc implementors */ static void vgetrandom_put_state(void *state) { if (!state) @@ -264,7 +265,7 @@ static void kselftest(void) } for (;;) { struct ptrace_syscall_info info = { 0 }; - int status, ret; + int status; ksft_assert(waitpid(child, &status, 0) >= 0); if (WIFEXITED(status)) { ksft_assert(WEXITSTATUS(status) == 0); -- cgit v1.2.3 From ecabe99a0354fcfa237b75d72a2707b3ace3d5e9 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 11 Jun 2025 12:33:55 +0200 Subject: selftests: vDSO: vdso_config: Avoid -Wunused-variables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not all users of this header make use of all its variables. For example vdso_test_correctness.c does not use "versions": In file included from vdso_test_correctness.c:22: vdso_config.h:61:20: warning: ‘versions’ defined but not used [-Wunused-variable] 61 | static const char *versions[7] = { | ^~~~~~~~ Avoid those warnings through attribute((unused)). Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Reviewed-by: Muhammad Usama Anjum Link: https://lore.kernel.org/all/20250611-selftests-vdso-fixes-v3-5-e62e37a6bcf5@linutronix.de --- tools/testing/selftests/vDSO/vdso_config.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/vDSO/vdso_config.h b/tools/testing/selftests/vDSO/vdso_config.h index 722260f97561..5fdd0f362337 100644 --- a/tools/testing/selftests/vDSO/vdso_config.h +++ b/tools/testing/selftests/vDSO/vdso_config.h @@ -58,6 +58,7 @@ #define VDSO_NAMES 1 #endif +__attribute__((unused)) static const char *versions[7] = { "LINUX_2.6", "LINUX_2.6.15", @@ -68,6 +69,7 @@ static const char *versions[7] = { "LINUX_5.10" }; +__attribute__((unused)) static const char *names[2][7] = { { "__kernel_gettimeofday", -- cgit v1.2.3 From 8863cd78a0f10b5b81ca91a558b7e70bcb66e859 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 11 Jun 2025 12:33:56 +0200 Subject: selftests: vDSO: Enable -Wall MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Protect against common programming errors through compiler warnings. These warnings are also used for the kernel itself. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250611-selftests-vdso-fixes-v3-6-e62e37a6bcf5@linutronix.de --- tools/testing/selftests/vDSO/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index 12a0614b9fd4..06d72254ec75 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -12,7 +12,7 @@ TEST_GEN_PROGS += vdso_test_correctness TEST_GEN_PROGS += vdso_test_getrandom TEST_GEN_PROGS += vdso_test_chacha -CFLAGS := -std=gnu99 -O2 +CFLAGS := -std=gnu99 -O2 -Wall ifeq ($(CONFIG_X86_32),y) LDLIBS += -lgcc_s -- cgit v1.2.3 From 58265d6424c69daf32ed96288709b23f7538c3e2 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 11 Jun 2025 12:33:57 +0200 Subject: selftests: vDSO: vdso_test_correctness: Fix -Wstrict-prototypes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Functions definitions without any argument list produce a warning with -Wstrict-prototypes: vdso_test_correctness.c:111:13: warning: function declaration isn’t a prototype [-Wstrict-prototypes] 111 | static void fill_function_pointers() | ^~~~~~~~~~~~~~~~~~~~~~ Explicitly use an empty argument list. Now that all selftests a free of this warning, enable it in the Makefile. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250611-selftests-vdso-fixes-v3-7-e62e37a6bcf5@linutronix.de --- tools/testing/selftests/vDSO/Makefile | 2 +- tools/testing/selftests/vDSO/vdso_test_correctness.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index 06d72254ec75..918a2caa070e 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -12,7 +12,7 @@ TEST_GEN_PROGS += vdso_test_correctness TEST_GEN_PROGS += vdso_test_getrandom TEST_GEN_PROGS += vdso_test_chacha -CFLAGS := -std=gnu99 -O2 -Wall +CFLAGS := -std=gnu99 -O2 -Wall -Wstrict-prototypes ifeq ($(CONFIG_X86_32),y) LDLIBS += -lgcc_s diff --git a/tools/testing/selftests/vDSO/vdso_test_correctness.c b/tools/testing/selftests/vDSO/vdso_test_correctness.c index 5fb97ad67eea..da651cf53c6c 100644 --- a/tools/testing/selftests/vDSO/vdso_test_correctness.c +++ b/tools/testing/selftests/vDSO/vdso_test_correctness.c @@ -108,7 +108,7 @@ static void *vsyscall_getcpu(void) } -static void fill_function_pointers() +static void fill_function_pointers(void) { void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); -- cgit v1.2.3 From 1158220b24674edaf885433153deb4f0e5c7d331 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 11 Jun 2025 12:33:58 +0200 Subject: selftests: vDSO: vdso_test_getrandom: Always print TAP header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The TAP specification requires that the output begins with a header line. If vgetrandom_init() fails and skips the test, that header line is missing. Call vgetrandom_init() after ksft_print_header(). Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Reviewed-by: Muhammad Usama Anjum Link: https://lore.kernel.org/all/20250611-selftests-vdso-fixes-v3-8-e62e37a6bcf5@linutronix.de --- tools/testing/selftests/vDSO/vdso_test_getrandom.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/vDSO/vdso_test_getrandom.c b/tools/testing/selftests/vDSO/vdso_test_getrandom.c index 389ead4e1fe3..dd1132508a0d 100644 --- a/tools/testing/selftests/vDSO/vdso_test_getrandom.c +++ b/tools/testing/selftests/vDSO/vdso_test_getrandom.c @@ -242,6 +242,7 @@ static void kselftest(void) pid_t child; ksft_print_header(); + vgetrandom_init(); ksft_set_plan(2); for (size_t i = 0; i < 1000; ++i) { @@ -295,8 +296,6 @@ static void usage(const char *argv0) int main(int argc, char *argv[]) { - vgetrandom_init(); - if (argc == 1) { kselftest(); return 0; @@ -306,6 +305,9 @@ int main(int argc, char *argv[]) usage(argv[0]); return 1; } + + vgetrandom_init(); + if (!strcmp(argv[1], "bench-single")) bench_single(); else if (!strcmp(argv[1], "bench-multi")) -- cgit v1.2.3 From 437079605c26dc7c98586580a8c01b5f7f746a79 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 11 Jun 2025 12:33:59 +0200 Subject: selftests: vDSO: vdso_standalone_test_x86: Replace source file with symlink MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the switch over to nolibc the source file vdso_standalone_test_x86.c was intended to be replaced with a symlink to vdso_test_gettimeofday.c. This was the patch that was submitted to LKML, but during application the symlink was replaced by a textual copy of the linked-to file. Having two copies introduces the possibility of divergence and increases maintenance burden, switch back to a symlink. Fixes: 8770a9183fe1 ("selftests: vDSO: vdso_standalone_test_x86: Switch to nolibc") Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/lkml/20250226-parse_vdso-nolibc-v2-16-28e14e031ed8@linutronix.de/ Link: https://lore.kernel.org/all/20250611-selftests-vdso-fixes-v3-9-e62e37a6bcf5@linutronix.de --- .../selftests/vDSO/vdso_standalone_test_x86.c | 59 +--------------------- 1 file changed, 1 insertion(+), 58 deletions(-) mode change 100644 => 120000 tools/testing/selftests/vDSO/vdso_standalone_test_x86.c (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c deleted file mode 100644 index 9ce795b806f0..000000000000 --- a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c +++ /dev/null @@ -1,58 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * vdso_test_gettimeofday.c: Sample code to test parse_vdso.c and - * vDSO gettimeofday() - * Copyright (c) 2014 Andy Lutomirski - * - * Compile with: - * gcc -std=gnu99 vdso_test_gettimeofday.c parse_vdso_gettimeofday.c - * - * Tested on x86, 32-bit and 64-bit. It may work on other architectures, too. - */ - -#include -#ifndef NOLIBC -#include -#include -#endif - -#include "../kselftest.h" -#include "parse_vdso.h" -#include "vdso_config.h" -#include "vdso_call.h" - -int main(int argc, char **argv) -{ - const char *version = versions[VDSO_VERSION]; - const char **name = (const char **)&names[VDSO_NAMES]; - - unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR); - if (!sysinfo_ehdr) { - printf("AT_SYSINFO_EHDR is not present!\n"); - return KSFT_SKIP; - } - - vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR)); - - /* Find gettimeofday. */ - typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz); - gtod_t gtod = (gtod_t)vdso_sym(version, name[0]); - - if (!gtod) { - printf("Could not find %s\n", name[0]); - return KSFT_SKIP; - } - - struct timeval tv; - long ret = VDSO_CALL(gtod, 2, &tv, 0); - - if (ret == 0) { - printf("The time is %lld.%06lld\n", - (long long)tv.tv_sec, (long long)tv.tv_usec); - } else { - printf("%s failed\n", name[0]); - return KSFT_FAIL; - } - - return 0; -} diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c new file mode 120000 index 000000000000..4d3d96f1e440 --- /dev/null +++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c @@ -0,0 +1 @@ +vdso_test_gettimeofday.c \ No newline at end of file -- cgit v1.2.3 From cce3fee729ee18caa03bade7f4c360343e863bb1 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Mon, 30 Jun 2025 14:35:14 +0100 Subject: selftests/bpf: Enable dynptr/test_probe_read_user_str_dynptr Enable previously disabled dynptr/test_probe_read_user_str_dynptr test, after the fix it depended on was merged into bpf-next. Signed-off-by: Mykyta Yatsenko Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20250630133515.1108325-1-mykyta.yatsenko5@gmail.com --- tools/testing/selftests/bpf/DENYLIST | 1 - 1 file changed, 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/DENYLIST b/tools/testing/selftests/bpf/DENYLIST index 1789a61d0a9b..f748f2c33b22 100644 --- a/tools/testing/selftests/bpf/DENYLIST +++ b/tools/testing/selftests/bpf/DENYLIST @@ -1,6 +1,5 @@ # TEMPORARY # Alphabetical order -dynptr/test_probe_read_user_str_dynptr # disabled until https://patchwork.kernel.org/project/linux-mm/patch/20250422131449.57177-1-mykyta.yatsenko5@gmail.com/ makes it into the bpf-next get_stack_raw_tp # spams with kernel warnings until next bpf -> bpf-next merge stacktrace_build_id stacktrace_build_id_nmi -- cgit v1.2.3 From 1230be820981a3ef0fb9abc011f1b3d093f3b1e4 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 30 Jun 2025 13:55:28 +0100 Subject: selftests/bpf: Fix spelling mistake "subtration" -> "subtraction" There are spelling mistakes in description text. Fix these. Signed-off-by: Colin Ian King Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250630125528.563077-1-colin.i.king@gmail.com --- tools/testing/selftests/bpf/progs/verifier_bounds.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index e52a24e15b34..6f986ae5085e 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -1474,7 +1474,7 @@ __naked void sub64_full_overflow(void) } SEC("socket") -__description("64-bit subtration, partial overflow, result in unbounded reg") +__description("64-bit subtraction, partial overflow, result in unbounded reg") __success __log_level(2) __msg("3: (1f) r3 -= r2 {{.*}} R3_w=scalar()") __retval(0) @@ -1514,7 +1514,7 @@ __naked void sub32_full_overflow(void) } SEC("socket") -__description("32-bit subtration, partial overflow, result in unbounded u32 bounds") +__description("32-bit subtraction, partial overflow, result in unbounded u32 bounds") __success __log_level(2) __msg("3: (1c) w3 -= w2 {{.*}} R3_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))") __retval(0) -- cgit v1.2.3 From bf4807c89d8f92c47404b1e4eeeefb42259d1b50 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Tue, 1 Jul 2025 21:48:03 +0200 Subject: selftests/bpf: Add negative test cases for snprintf This patch adds a couple negative test cases with a trailing % at the end of the format string. The %p% case was fixed by the previous commit, whereas the %s% case was already successfully rejected before. Acked-by: Yonghong Song Signed-off-by: Paul Chaignon Link: https://lore.kernel.org/r/0669bf6eb4f9e5bb10e949d60311c06e2d942447.1751395489.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/snprintf.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c index 4be6fdb78c6a..594441acb707 100644 --- a/tools/testing/selftests/bpf/prog_tests/snprintf.c +++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c @@ -116,6 +116,8 @@ static void test_snprintf_negative(void) ASSERT_ERR(load_single_snprintf("%llc"), "invalid specifier 7"); ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character"); ASSERT_ERR(load_single_snprintf("\x1"), "non printable character"); + ASSERT_ERR(load_single_snprintf("%p%"), "invalid specifier 8"); + ASSERT_ERR(load_single_snprintf("%s%"), "invalid specifier 9"); } void test_snprintf(void) -- cgit v1.2.3 From 131e0a1123e709b72a3ce7ce09a30e903e3515f0 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 30 Jun 2025 17:33:41 +0200 Subject: selftests/tc-testing: Enable CONFIG_IP_SET The config snippet specifies CONFIG_NET_EMATCH_IPSET. This option depends on CONFIG_IP_SET. Set CONFIG_IP_SET to be enabled at part for tc-testing. Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20250630153341.Wgh3SzGi@linutronix.de Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/config | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config index db176fe7d0c3..8e902f7f1a18 100644 --- a/tools/testing/selftests/tc-testing/config +++ b/tools/testing/selftests/tc-testing/config @@ -21,6 +21,7 @@ CONFIG_NF_NAT=m CONFIG_NETFILTER_XT_TARGET_LOG=m CONFIG_NET_SCHED=y +CONFIG_IP_SET=m # # Queueing/Scheduling -- cgit v1.2.3 From 8d3e0982f7c2548851f27635ff907a8099d63ba9 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Fri, 27 Jun 2025 20:04:51 +0000 Subject: selftests: pp-bench: remove unneeded linux/version.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit linux/version.h was used by the out-of-tree version, but not needed in the upstream one anymore. While I'm at it, sort the includes. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202506271434.Gk0epC9H-lkp@intel.com/ Signed-off-by: Mina Almasry Reviewed-by: Simon Horman Reviewed-by: Toke Høiland-Jørgensen Reviewed-by: Ilias Apalodimas Acked-by: Jesper Dangaard Brouer Link: https://patch.msgid.link/20250627200501.1712389-1-almasrymina@google.com Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/bench/page_pool/bench_page_pool_simple.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c index f183d5e30dc6..1cd3157fb6a9 100644 --- a/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c +++ b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c @@ -5,15 +5,12 @@ */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include #include #include - -#include #include -#include -#include - #include "time_bench.h" static int verbose = 1; -- cgit v1.2.3 From be75d319d1b3e9429bbb61681f14f88d59f32eb2 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Fri, 27 Jun 2025 20:04:52 +0000 Subject: selftests: pp-bench: remove page_pool_put_page wrapper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Minor cleanup: remove the pointless looking _ wrapper around page_pool_put_page, and just do the call directly. Signed-off-by: Mina Almasry Reviewed-by: Simon Horman Reviewed-by: Toke Høiland-Jørgensen Reviewed-by: Ilias Apalodimas Acked-by: Jesper Dangaard Brouer Link: https://patch.msgid.link/20250627200501.1712389-2-almasrymina@google.com Signed-off-by: Jakub Kicinski --- .../selftests/net/bench/page_pool/bench_page_pool_simple.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c index 1cd3157fb6a9..cb6468adbda4 100644 --- a/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c +++ b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c @@ -16,12 +16,6 @@ static int verbose = 1; #define MY_POOL_SIZE 1024 -static void _page_pool_put_page(struct page_pool *pool, struct page *page, - bool allow_direct) -{ - page_pool_put_page(pool, page, -1, allow_direct); -} - /* Makes tests selectable. Useful for perf-record to analyze a single test. * Hint: Bash shells support writing binary number like: $((2#101010) * @@ -121,7 +115,7 @@ static void pp_fill_ptr_ring(struct page_pool *pp, int elems) for (i = 0; i < elems; i++) array[i] = page_pool_alloc_pages(pp, gfp_mask); for (i = 0; i < elems; i++) - _page_pool_put_page(pp, array[i], false); + page_pool_put_page(pp, array[i], -1, false); kfree(array); } @@ -180,14 +174,14 @@ static int time_bench_page_pool(struct time_bench_record *rec, void *data, } else if (type == type_ptr_ring) { /* Normal return path */ - _page_pool_put_page(pp, page, false); + page_pool_put_page(pp, page, -1, false); } else if (type == type_page_allocator) { /* Test if not pages are recycled, but instead * returned back into systems page allocator */ get_page(page); /* cause no-recycling */ - _page_pool_put_page(pp, page, false); + page_pool_put_page(pp, page, -1, false); put_page(page); } else { BUILD_BUG(); -- cgit v1.2.3 From 434f6703ce268470796496e67c703dfd85a3653c Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 1 Apr 2025 00:36:20 +0900 Subject: selftests: tracing: Enable fprobe events before checking enable_functions Since the fprobe is not registered before enabling the fprobe events, enable_functions is also empty before enabling it. Thus the tests which checking enable_functions must ensure the event is enabled before testing the enable_functions. Link: https://lore.kernel.org/all/174343538009.843280.6583146613234713007.stgit@devnote2/ Signed-off-by: Masami Hiramatsu (Google) --- .../ftrace/test.d/dynevent/add_remove_fprobe.tc | 30 ++++++++++++++-------- 1 file changed, 19 insertions(+), 11 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc index 73f6c6fcecab..2506f464811b 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc @@ -16,6 +16,18 @@ ocnt=`cat enabled_functions | wc -l` echo "f:myevent1 $PLACE" >> dynamic_events +echo "f:myevent2 $PLACE%return" >> dynamic_events + +# add another event +echo "f:myevent3 $PLACE2" >> dynamic_events + +grep -q myevent1 dynamic_events +grep -q myevent2 dynamic_events +grep -q myevent3 dynamic_events +test -d events/fprobes/myevent1 +test -d events/fprobes/myevent2 + +echo 1 > events/fprobes/myevent1/enable # Make sure the event is attached and is the only one grep -q $PLACE enabled_functions cnt=`cat enabled_functions | wc -l` @@ -23,29 +35,22 @@ if [ $cnt -ne $((ocnt + 1)) ]; then exit_fail fi -echo "f:myevent2 $PLACE%return" >> dynamic_events - +echo 1 > events/fprobes/myevent2/enable # It should till be the only attached function cnt=`cat enabled_functions | wc -l` if [ $cnt -ne $((ocnt + 1)) ]; then exit_fail fi -# add another event -echo "f:myevent3 $PLACE2" >> dynamic_events - +echo 1 > events/fprobes/myevent3/enable +# If the function is different, the attached function should be increased grep -q $PLACE2 enabled_functions cnt=`cat enabled_functions | wc -l` if [ $cnt -ne $((ocnt + 2)) ]; then exit_fail fi -grep -q myevent1 dynamic_events -grep -q myevent2 dynamic_events -grep -q myevent3 dynamic_events -test -d events/fprobes/myevent1 -test -d events/fprobes/myevent2 - +echo 0 > events/fprobes/myevent2/enable echo "-:myevent2" >> dynamic_events grep -q myevent1 dynamic_events @@ -57,6 +62,7 @@ if [ $cnt -ne $((ocnt + 2)) ]; then exit_fail fi +echo 0 > events/fprobes/enable echo > dynamic_events # Should have none left @@ -67,12 +73,14 @@ fi echo "f:myevent4 $PLACE" >> dynamic_events +echo 1 > events/fprobes/myevent4/enable # Should only have one enabled cnt=`cat enabled_functions | wc -l` if [ $cnt -ne $((ocnt + 1)) ]; then exit_fail fi +echo 0 > events/fprobes/enable echo > dynamic_events # Should have none left -- cgit v1.2.3 From 3bedaff19bd8b3fa8a86191c9979e19c6d061010 Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Sun, 29 Jun 2025 19:12:26 +0200 Subject: selftests: seg6: fix instaces typo in comments Fix a typo: instaces -> instances The typo has been identified using codespell, and the tool does not report any additional issues in the selftests considered. Signed-off-by: Andrea Mayer Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250629171226.4988-3-andrea.mayer@uniroma2.it Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh | 2 +- tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh | 2 +- tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh | 2 +- tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh index ba730655a7bf..4bc135e5c22c 100755 --- a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh @@ -594,7 +594,7 @@ setup_rt_local_sids() dev "${DUMMY_DEVNAME}" # all SIDs for VPNs start with a common locator. Routes and SRv6 - # Endpoint behavior instaces are grouped together in the 'localsid' + # Endpoint behavior instances are grouped together in the 'localsid' # table. ip -netns "${nsname}" -6 rule \ add to "${VPN_LOCATOR_SERVICE}::/16" \ diff --git a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh index bedf0ce885c2..34b781a2ae74 100755 --- a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh @@ -681,7 +681,7 @@ setup_rt_local_sids() set_underlay_sids_reachability "${rt}" "${rt_neighs}" # all SIDs for VPNs start with a common locator. Routes and SRv6 - # Endpoint behavior instaces are grouped together in the 'localsid' + # Endpoint behavior instances are grouped together in the 'localsid' # table. ip -netns "${nsname}" -6 rule \ add to "${VPN_LOCATOR_SERVICE}::/16" \ diff --git a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh index 3efce1718c5f..6a68c7eff1dc 100755 --- a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh @@ -395,7 +395,7 @@ setup_rt_local_sids() dev "${VRF_DEVNAME}" # all SIDs for VPNs start with a common locator. Routes and SRv6 - # Endpoint behavior instaces are grouped together in the 'localsid' + # Endpoint behavior instances are grouped together in the 'localsid' # table. ip -netns "${nsname}" -6 rule \ add to "${VPN_LOCATOR_SERVICE}::/16" \ diff --git a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh index cabc70538ffe..0979b5316fdf 100755 --- a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh +++ b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh @@ -343,7 +343,7 @@ setup_rt_local_sids() encap seg6local action End dev "${DUMMY_DEVNAME}" # all SIDs for VPNs start with a common locator. Routes and SRv6 - # Endpoint behaviors instaces are grouped together in the 'localsid' + # Endpoint behaviors instances are grouped together in the 'localsid' # table. ip -netns "${nsname}" -6 rule add \ to "${VPN_LOCATOR_SERVICE}::/16" \ -- cgit v1.2.3 From 21eebc655b0f8e84fcac2cf5e20e9eb31d17a982 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Sun, 22 Jun 2025 23:38:54 -0700 Subject: selftests/bpf: Add tests for bpf_cgroup_read_xattr Add tests for different scenarios with bpf_cgroup_read_xattr: 1. Read cgroup xattr from bpf_cgroup_from_id; 2. Read cgroup xattr from bpf_cgroup_ancestor; 3. Read cgroup xattr from css_iter; 4. Use bpf_cgroup_read_xattr in LSM hook security_socket_connect. 5. Use bpf_cgroup_read_xattr in cgroup program. Signed-off-by: Song Liu Link: https://lore.kernel.org/20250623063854.1896364-5-song@kernel.org Signed-off-by: Christian Brauner --- tools/testing/selftests/bpf/bpf_experimental.h | 3 + .../selftests/bpf/prog_tests/cgroup_xattr.c | 145 +++++++++++++++++++ .../selftests/bpf/progs/cgroup_read_xattr.c | 158 +++++++++++++++++++++ .../selftests/bpf/progs/read_cgroupfs_xattr.c | 60 ++++++++ 4 files changed, 366 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c create mode 100644 tools/testing/selftests/bpf/progs/cgroup_read_xattr.c create mode 100644 tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index 5e512a1d09d1..da7e230f2781 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -596,4 +596,7 @@ extern int bpf_iter_dmabuf_new(struct bpf_iter_dmabuf *it) __weak __ksym; extern struct dma_buf *bpf_iter_dmabuf_next(struct bpf_iter_dmabuf *it) __weak __ksym; extern void bpf_iter_dmabuf_destroy(struct bpf_iter_dmabuf *it) __weak __ksym; +extern int bpf_cgroup_read_xattr(struct cgroup *cgroup, const char *name__str, + struct bpf_dynptr *value_p) __weak __ksym; + #endif diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c new file mode 100644 index 000000000000..87978a0f7eb7 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "read_cgroupfs_xattr.skel.h" +#include "cgroup_read_xattr.skel.h" + +#define CGROUP_FS_ROOT "/sys/fs/cgroup/" +#define CGROUP_FS_PARENT CGROUP_FS_ROOT "foo/" +#define CGROUP_FS_CHILD CGROUP_FS_PARENT "bar/" + +static int move_pid_to_cgroup(const char *cgroup_folder, pid_t pid) +{ + char filename[128]; + char pid_str[64]; + int procs_fd; + int ret; + + snprintf(filename, sizeof(filename), "%scgroup.procs", cgroup_folder); + snprintf(pid_str, sizeof(pid_str), "%d", pid); + + procs_fd = open(filename, O_WRONLY | O_APPEND); + if (!ASSERT_OK_FD(procs_fd, "open")) + return -1; + + ret = write(procs_fd, pid_str, strlen(pid_str)); + close(procs_fd); + if (!ASSERT_GT(ret, 0, "write cgroup.procs")) + return -1; + return 0; +} + +static void reset_cgroups_and_lo(void) +{ + rmdir(CGROUP_FS_CHILD); + rmdir(CGROUP_FS_PARENT); + system("ip addr del 1.1.1.1/32 dev lo"); + system("ip link set dev lo down"); +} + +static const char xattr_value_a[] = "bpf_selftest_value_a"; +static const char xattr_value_b[] = "bpf_selftest_value_b"; +static const char xattr_name[] = "user.bpf_test"; + +static int setup_cgroups_and_lo(void) +{ + int err; + + err = mkdir(CGROUP_FS_PARENT, 0755); + if (!ASSERT_OK(err, "mkdir 1")) + goto error; + err = mkdir(CGROUP_FS_CHILD, 0755); + if (!ASSERT_OK(err, "mkdir 2")) + goto error; + + err = setxattr(CGROUP_FS_PARENT, xattr_name, xattr_value_a, + strlen(xattr_value_a) + 1, 0); + if (!ASSERT_OK(err, "setxattr 1")) + goto error; + + err = setxattr(CGROUP_FS_CHILD, xattr_name, xattr_value_b, + strlen(xattr_value_b) + 1, 0); + if (!ASSERT_OK(err, "setxattr 2")) + goto error; + + err = system("ip link set dev lo up"); + if (!ASSERT_OK(err, "lo up")) + goto error; + + err = system("ip addr add 1.1.1.1 dev lo"); + if (!ASSERT_OK(err, "lo addr v4")) + goto error; + + err = write_sysctl("/proc/sys/net/ipv4/ping_group_range", "0 0"); + if (!ASSERT_OK(err, "write_sysctl")) + goto error; + + return 0; +error: + reset_cgroups_and_lo(); + return err; +} + +static void test_read_cgroup_xattr(void) +{ + struct sockaddr_in sa4 = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_LOOPBACK), + }; + struct read_cgroupfs_xattr *skel = NULL; + pid_t pid = gettid(); + int sock_fd = -1; + int connect_fd = -1; + + if (!ASSERT_OK(setup_cgroups_and_lo(), "setup_cgroups_and_lo")) + return; + if (!ASSERT_OK(move_pid_to_cgroup(CGROUP_FS_CHILD, pid), + "move_pid_to_cgroup")) + goto out; + + skel = read_cgroupfs_xattr__open_and_load(); + if (!ASSERT_OK_PTR(skel, "read_cgroupfs_xattr__open_and_load")) + goto out; + + skel->bss->target_pid = pid; + + if (!ASSERT_OK(read_cgroupfs_xattr__attach(skel), "read_cgroupfs_xattr__attach")) + goto out; + + sock_fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP); + if (!ASSERT_OK_FD(sock_fd, "sock create")) + goto out; + + connect_fd = connect(sock_fd, &sa4, sizeof(sa4)); + if (!ASSERT_OK_FD(connect_fd, "connect 1")) + goto out; + close(connect_fd); + + ASSERT_TRUE(skel->bss->found_value_a, "found_value_a"); + ASSERT_TRUE(skel->bss->found_value_b, "found_value_b"); + +out: + close(connect_fd); + close(sock_fd); + read_cgroupfs_xattr__destroy(skel); + move_pid_to_cgroup(CGROUP_FS_ROOT, pid); + reset_cgroups_and_lo(); +} + +void test_cgroup_xattr(void) +{ + RUN_TESTS(cgroup_read_xattr); + + if (test__start_subtest("read_cgroupfs_xattr")) + test_read_cgroup_xattr(); +} diff --git a/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c b/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c new file mode 100644 index 000000000000..092db1d0435e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include +#include "bpf_experimental.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +char value[16]; + +static __always_inline void read_xattr(struct cgroup *cgroup) +{ + struct bpf_dynptr value_ptr; + + bpf_dynptr_from_mem(value, sizeof(value), 0, &value_ptr); + bpf_cgroup_read_xattr(cgroup, "user.bpf_test", + &value_ptr); +} + +SEC("lsm.s/socket_connect") +__success +int BPF_PROG(trusted_cgroup_ptr_sleepable) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup *cgrp; + + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) + return 0; + + read_xattr(cgrp); + bpf_cgroup_release(cgrp); + return 0; +} + +SEC("lsm/socket_connect") +__success +int BPF_PROG(trusted_cgroup_ptr_non_sleepable) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup *cgrp; + + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) + return 0; + + read_xattr(cgrp); + bpf_cgroup_release(cgrp); + return 0; +} + +SEC("lsm/socket_connect") +__success +int BPF_PROG(use_css_iter_non_sleepable) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup_subsys_state *css; + struct cgroup *cgrp; + + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) + return 0; + + bpf_for_each(css, css, &cgrp->self, BPF_CGROUP_ITER_ANCESTORS_UP) + read_xattr(css->cgroup); + + bpf_cgroup_release(cgrp); + return 0; +} + +SEC("lsm.s/socket_connect") +__failure __msg("expected an RCU CS") +int BPF_PROG(use_css_iter_sleepable_missing_rcu_lock) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup_subsys_state *css; + struct cgroup *cgrp; + + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) + return 0; + + bpf_for_each(css, css, &cgrp->self, BPF_CGROUP_ITER_ANCESTORS_UP) + read_xattr(css->cgroup); + + bpf_cgroup_release(cgrp); + return 0; +} + +SEC("lsm.s/socket_connect") +__success +int BPF_PROG(use_css_iter_sleepable_with_rcu_lock) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup_subsys_state *css; + struct cgroup *cgrp; + + bpf_rcu_read_lock(); + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) + goto out; + + bpf_for_each(css, css, &cgrp->self, BPF_CGROUP_ITER_ANCESTORS_UP) + read_xattr(css->cgroup); + + bpf_cgroup_release(cgrp); +out: + bpf_rcu_read_unlock(); + return 0; +} + +SEC("lsm/socket_connect") +__success +int BPF_PROG(use_bpf_cgroup_ancestor) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup *cgrp, *ancestor; + + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) + return 0; + + ancestor = bpf_cgroup_ancestor(cgrp, 1); + if (!ancestor) + goto out; + + read_xattr(cgrp); + bpf_cgroup_release(ancestor); +out: + bpf_cgroup_release(cgrp); + return 0; +} + +SEC("cgroup/sendmsg4") +__success +int BPF_PROG(cgroup_skb) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup *cgrp, *ancestor; + + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) + return 0; + + ancestor = bpf_cgroup_ancestor(cgrp, 1); + if (!ancestor) + goto out; + + read_xattr(cgrp); + bpf_cgroup_release(ancestor); +out: + bpf_cgroup_release(cgrp); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c b/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c new file mode 100644 index 000000000000..855f85fc5522 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include +#include "bpf_experimental.h" + +char _license[] SEC("license") = "GPL"; + +pid_t target_pid = 0; + +char xattr_value[64]; +static const char expected_value_a[] = "bpf_selftest_value_a"; +static const char expected_value_b[] = "bpf_selftest_value_b"; +bool found_value_a; +bool found_value_b; + +SEC("lsm.s/socket_connect") +int BPF_PROG(test_socket_connect) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup_subsys_state *css, *tmp; + struct bpf_dynptr value_ptr; + struct cgroup *cgrp; + + if ((bpf_get_current_pid_tgid() >> 32) != target_pid) + return 0; + + bpf_rcu_read_lock(); + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) { + bpf_rcu_read_unlock(); + return 0; + } + + css = &cgrp->self; + bpf_dynptr_from_mem(xattr_value, sizeof(xattr_value), 0, &value_ptr); + bpf_for_each(css, tmp, css, BPF_CGROUP_ITER_ANCESTORS_UP) { + int ret; + + ret = bpf_cgroup_read_xattr(tmp->cgroup, "user.bpf_test", + &value_ptr); + if (ret < 0) + continue; + + if (ret == sizeof(expected_value_a) && + !bpf_strncmp(xattr_value, sizeof(expected_value_a), expected_value_a)) + found_value_a = true; + if (ret == sizeof(expected_value_b) && + !bpf_strncmp(xattr_value, sizeof(expected_value_b), expected_value_b)) + found_value_b = true; + } + + bpf_rcu_read_unlock(); + bpf_cgroup_release(cgrp); + + return 0; +} -- cgit v1.2.3 From 70619d40e8307b4b2ce1d08405e7b827c61ba4a8 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 2 Jul 2025 13:53:21 +0200 Subject: selftests/kernfs: test xattr retrieval Make sure that listxattr() returns zero and that getxattr() returns ENODATA when no extended attributs are set. Use /sys/kernel/warn_count as that always exists and is a read-only file. Link: https://lore.kernel.org/20250702-hochmoderne-abklatsch-af9c605b57b2@brauner Signed-off-by: Christian Brauner --- tools/testing/selftests/filesystems/.gitignore | 1 + tools/testing/selftests/filesystems/Makefile | 2 +- tools/testing/selftests/filesystems/kernfs_test.c | 38 +++++++++++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/filesystems/kernfs_test.c (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore index 7afa58e2bb20..fcbdb1297e24 100644 --- a/tools/testing/selftests/filesystems/.gitignore +++ b/tools/testing/selftests/filesystems/.gitignore @@ -3,3 +3,4 @@ dnotify_test devpts_pts file_stressor anon_inode_test +kernfs_test diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile index b02326193fee..73d4650af1a5 100644 --- a/tools/testing/selftests/filesystems/Makefile +++ b/tools/testing/selftests/filesystems/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := devpts_pts file_stressor anon_inode_test +TEST_GEN_PROGS := devpts_pts file_stressor anon_inode_test kernfs_test TEST_GEN_PROGS_EXTENDED := dnotify_test include ../lib.mk diff --git a/tools/testing/selftests/filesystems/kernfs_test.c b/tools/testing/selftests/filesystems/kernfs_test.c new file mode 100644 index 000000000000..16538b3b318e --- /dev/null +++ b/tools/testing/selftests/filesystems/kernfs_test.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ + +#include +#include +#include +#include + +#include "../kselftest_harness.h" +#include "wrappers.h" + +TEST(kernfs_listxattr) +{ + int fd; + + /* Read-only file that can never have any extended attributes set. */ + fd = open("/sys/kernel/warn_count", O_RDONLY | O_CLOEXEC); + ASSERT_GE(fd, 0); + ASSERT_EQ(flistxattr(fd, NULL, 0), 0); + EXPECT_EQ(close(fd), 0); +} + +TEST(kernfs_getxattr) +{ + int fd; + char buf[1]; + + /* Read-only file that can never have any extended attributes set. */ + fd = open("/sys/kernel/warn_count", O_RDONLY | O_CLOEXEC); + ASSERT_GE(fd, 0); + ASSERT_LT(fgetxattr(fd, "user.foo", buf, sizeof(buf)), 0); + ASSERT_EQ(errno, ENODATA); + EXPECT_EQ(close(fd), 0); +} + +TEST_HARNESS_MAIN + -- cgit v1.2.3 From 49a9942ff80c99dabdc4a76011d755524e72fd9d Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Wed, 18 Jun 2025 09:45:07 +0100 Subject: kselftest/arm64: Add MTE_FAR hwcap test add MTE_FAR hwcap test on kselftest. Signed-off-by: Yeoreum Yun Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250618084513.1761345-5-yeoreum.yun@arm.com Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/abi/hwcap.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 35f521e5f41c..e60bfb798ba2 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -1098,6 +1098,12 @@ static const struct hwcap_data { .sigill_fn = hbc_sigill, .sigill_reliable = true, }, + { + .name = "MTE_FAR", + .at_hwcap = AT_HWCAP3, + .hwcap_bit = HWCAP3_MTE_FAR, + .cpuinfo = "mtefar", + }, }; typedef void (*sighandler_fn)(int, siginfo_t *, void *); -- cgit v1.2.3 From cfafa517c9e658756511e210e7288efe21554bcf Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Wed, 18 Jun 2025 09:45:08 +0100 Subject: kselftest/arm64/mte: Register mte signal handler with SA_EXPOSE_TAGBITS To test address tag[63:60] and memory tag[59:56] is preserved when memory tag fault happen, Let mte_register_signal() to register signal handler with SA_EXPOSE_TAGBITS. Signed-off-by: Yeoreum Yun Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250618084513.1761345-6-yeoreum.yun@arm.com Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/mte/check_buffer_fill.c | 2 +- tools/testing/selftests/arm64/mte/check_child_memory.c | 4 ++-- tools/testing/selftests/arm64/mte/check_hugetlb_options.c | 4 ++-- tools/testing/selftests/arm64/mte/check_ksm_options.c | 4 ++-- tools/testing/selftests/arm64/mte/check_mmap_options.c | 4 ++-- tools/testing/selftests/arm64/mte/check_tags_inclusion.c | 2 +- tools/testing/selftests/arm64/mte/check_user_mem.c | 2 +- tools/testing/selftests/arm64/mte/mte_common_util.c | 11 ++++++++++- tools/testing/selftests/arm64/mte/mte_common_util.h | 3 ++- 9 files changed, 23 insertions(+), 13 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c index 2ee7f114d7fa..5248b5265aa4 100644 --- a/tools/testing/selftests/arm64/mte/check_buffer_fill.c +++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c @@ -415,7 +415,7 @@ int main(int argc, char *argv[]) return err; /* Register SIGSEGV handler */ - mte_register_signal(SIGSEGV, mte_default_handler); + mte_register_signal(SIGSEGV, mte_default_handler, false); /* Set test plan */ ksft_set_plan(20); diff --git a/tools/testing/selftests/arm64/mte/check_child_memory.c b/tools/testing/selftests/arm64/mte/check_child_memory.c index 7597fc632cad..b97ea3981c21 100644 --- a/tools/testing/selftests/arm64/mte/check_child_memory.c +++ b/tools/testing/selftests/arm64/mte/check_child_memory.c @@ -160,8 +160,8 @@ int main(int argc, char *argv[]) return err; /* Register SIGSEGV handler */ - mte_register_signal(SIGSEGV, mte_default_handler); - mte_register_signal(SIGBUS, mte_default_handler); + mte_register_signal(SIGSEGV, mte_default_handler, false); + mte_register_signal(SIGBUS, mte_default_handler, false); /* Set test plan */ ksft_set_plan(12); diff --git a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c index 3bfcd3848432..4e644a606394 100644 --- a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c +++ b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c @@ -235,8 +235,8 @@ int main(int argc, char *argv[]) return err; /* Register signal handlers */ - mte_register_signal(SIGBUS, mte_default_handler); - mte_register_signal(SIGSEGV, mte_default_handler); + mte_register_signal(SIGBUS, mte_default_handler, false); + mte_register_signal(SIGSEGV, mte_default_handler, false); allocate_hugetlb(); diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c index 88c74bc46d4f..afea4e381862 100644 --- a/tools/testing/selftests/arm64/mte/check_ksm_options.c +++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c @@ -141,8 +141,8 @@ int main(int argc, char *argv[]) return KSFT_FAIL; } /* Register signal handlers */ - mte_register_signal(SIGBUS, mte_default_handler); - mte_register_signal(SIGSEGV, mte_default_handler); + mte_register_signal(SIGBUS, mte_default_handler, false); + mte_register_signal(SIGSEGV, mte_default_handler, false); /* Set test plan */ ksft_set_plan(4); diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c index 17694caaff53..b37bf481c9f9 100644 --- a/tools/testing/selftests/arm64/mte/check_mmap_options.c +++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c @@ -201,8 +201,8 @@ int main(int argc, char *argv[]) sizes[item - 1] = page_size + 1; /* Register signal handlers */ - mte_register_signal(SIGBUS, mte_default_handler); - mte_register_signal(SIGSEGV, mte_default_handler); + mte_register_signal(SIGBUS, mte_default_handler, false); + mte_register_signal(SIGSEGV, mte_default_handler, false); /* Set test plan */ ksft_set_plan(22); diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c index a3d1e23fe02a..b96296ab9870 100644 --- a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c +++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c @@ -180,7 +180,7 @@ int main(int argc, char *argv[]) return err; /* Register SIGSEGV handler */ - mte_register_signal(SIGSEGV, mte_default_handler); + mte_register_signal(SIGSEGV, mte_default_handler, false); /* Set test plan */ ksft_set_plan(4); diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c index f4ae5f87a3b7..d1d14aaaba16 100644 --- a/tools/testing/selftests/arm64/mte/check_user_mem.c +++ b/tools/testing/selftests/arm64/mte/check_user_mem.c @@ -211,7 +211,7 @@ int main(int argc, char *argv[]) return err; /* Register signal handlers */ - mte_register_signal(SIGSEGV, mte_default_handler); + mte_register_signal(SIGSEGV, mte_default_handler, false); /* Set test plan */ ksft_set_plan(64); diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c index a1dc2fe5285b..83240b980f9c 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.c +++ b/tools/testing/selftests/arm64/mte/mte_common_util.c @@ -19,6 +19,10 @@ #include "mte_common_util.h" #include "mte_def.h" +#ifndef SA_EXPOSE_TAGBITS +#define SA_EXPOSE_TAGBITS 0x00000800 +#endif + #define INIT_BUFFER_SIZE 256 struct mte_fault_cxt cur_mte_cxt; @@ -79,12 +83,17 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc) } } -void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *)) +void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *), + bool export_tags) { struct sigaction sa; sa.sa_sigaction = handler; sa.sa_flags = SA_SIGINFO; + + if (export_tags && signal == SIGSEGV) + sa.sa_flags |= SA_EXPOSE_TAGBITS; + sigemptyset(&sa.sa_mask); sigaction(signal, &sa, NULL); } diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h index a0017a303beb..6b109e84fa39 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.h +++ b/tools/testing/selftests/arm64/mte/mte_common_util.h @@ -40,7 +40,8 @@ extern struct mte_fault_cxt cur_mte_cxt; /* MTE utility functions */ void mte_default_handler(int signum, siginfo_t *si, void *uc); -void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *)); +void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *), + bool export_tags); void mte_wait_after_trig(void); void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags); void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping, -- cgit v1.2.3 From 2e3e356560ef54605a1c779ae8dcd7889ff2875d Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Wed, 18 Jun 2025 09:45:09 +0100 Subject: kselftest/arm64/mte: Check MTE_FAR feature is supported To run the MTE_FAR test when cpu supports MTE_FAR feature, check the MTE_FAR feature is supported in mte test. Signed-off-by: Yeoreum Yun Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250618084513.1761345-7-yeoreum.yun@arm.com Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/mte/mte_common_util.c | 4 ++++ tools/testing/selftests/arm64/mte/mte_common_util.h | 1 + 2 files changed, 5 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c index 83240b980f9c..5c5680a87498 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.c +++ b/tools/testing/selftests/arm64/mte/mte_common_util.c @@ -26,6 +26,7 @@ #define INIT_BUFFER_SIZE 256 struct mte_fault_cxt cur_mte_cxt; +bool mtefar_support; static unsigned int mte_cur_mode; static unsigned int mte_cur_pstate_tco; @@ -325,12 +326,15 @@ int mte_switch_mode(int mte_option, unsigned long incl_mask) int mte_default_setup(void) { unsigned long hwcaps2 = getauxval(AT_HWCAP2); + unsigned long hwcaps3 = getauxval(AT_HWCAP3); unsigned long en = 0; int ret; if (!(hwcaps2 & HWCAP2_MTE)) ksft_exit_skip("MTE features unavailable\n"); + mtefar_support = !!(hwcaps3 & HWCAP3_MTE_FAR); + /* Get current mte mode */ ret = prctl(PR_GET_TAGGED_ADDR_CTRL, en, 0, 0, 0); if (ret < 0) { diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h index 6b109e84fa39..4e1dd959df9b 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.h +++ b/tools/testing/selftests/arm64/mte/mte_common_util.h @@ -37,6 +37,7 @@ struct mte_fault_cxt { }; extern struct mte_fault_cxt cur_mte_cxt; +extern bool mtefar_support; /* MTE utility functions */ void mte_default_handler(int signum, siginfo_t *si, void *uc); -- cgit v1.2.3 From ed434c6e081327e9d0685d4b5e4cd067246f8be6 Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Wed, 18 Jun 2025 09:45:10 +0100 Subject: kselftest/arm64/mte: Add address tag related macro and function Add address tag related macro and function to test MTE_FAR feature. Signed-off-by: Yeoreum Yun Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250618084513.1761345-8-yeoreum.yun@arm.com Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/mte/mte_common_util.c | 17 +++++++++++++++++ tools/testing/selftests/arm64/mte/mte_common_util.h | 2 ++ tools/testing/selftests/arm64/mte/mte_def.h | 8 ++++++++ 3 files changed, 27 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c index 5c5680a87498..d9702a542cb6 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.c +++ b/tools/testing/selftests/arm64/mte/mte_common_util.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -130,6 +131,19 @@ void mte_clear_tags(void *ptr, size_t size) mte_clear_tag_address_range(ptr, size); } +void *mte_insert_atag(void *ptr) +{ + unsigned char atag; + + atag = mtefar_support ? (random() % MT_ATAG_MASK) + 1 : 0; + return (void *)MT_SET_ATAG((unsigned long)ptr, atag); +} + +void *mte_clear_atag(void *ptr) +{ + return (void *)MT_CLEAR_ATAG((unsigned long)ptr); +} + static void *__mte_allocate_memory_range(size_t size, int mem_type, int mapping, size_t range_before, size_t range_after, bool tags, int fd) @@ -330,6 +344,9 @@ int mte_default_setup(void) unsigned long en = 0; int ret; + /* To generate random address tag */ + srandom(time(NULL)); + if (!(hwcaps2 & HWCAP2_MTE)) ksft_exit_skip("MTE features unavailable\n"); diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h index 4e1dd959df9b..045e4ad2f018 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.h +++ b/tools/testing/selftests/arm64/mte/mte_common_util.h @@ -56,6 +56,8 @@ void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type, size_t range_before, size_t range_after); void *mte_insert_tags(void *ptr, size_t size); void mte_clear_tags(void *ptr, size_t size); +void *mte_insert_atag(void *ptr); +void *mte_clear_atag(void *ptr); int mte_default_setup(void); void mte_restore_setup(void); int mte_switch_mode(int mte_option, unsigned long incl_mask); diff --git a/tools/testing/selftests/arm64/mte/mte_def.h b/tools/testing/selftests/arm64/mte/mte_def.h index 9b188254b61a..6ad22f07c9b8 100644 --- a/tools/testing/selftests/arm64/mte/mte_def.h +++ b/tools/testing/selftests/arm64/mte/mte_def.h @@ -42,6 +42,8 @@ #define MT_TAG_COUNT 16 #define MT_INCLUDE_TAG_MASK 0xFFFF #define MT_EXCLUDE_TAG_MASK 0x0 +#define MT_ATAG_SHIFT 60 +#define MT_ATAG_MASK 0xFUL #define MT_ALIGN_GRANULE (MT_GRANULE_SIZE - 1) #define MT_CLEAR_TAG(x) ((x) & ~(MT_TAG_MASK << MT_TAG_SHIFT)) @@ -49,6 +51,12 @@ #define MT_FETCH_TAG(x) ((x >> MT_TAG_SHIFT) & (MT_TAG_MASK)) #define MT_ALIGN_UP(x) ((x + MT_ALIGN_GRANULE) & ~(MT_ALIGN_GRANULE)) +#define MT_CLEAR_ATAG(x) ((x) & ~(MT_TAG_MASK << MT_ATAG_SHIFT)) +#define MT_SET_ATAG(x, y) ((x) | (((y) & MT_ATAG_MASK) << MT_ATAG_SHIFT)) +#define MT_FETCH_ATAG(x) ((x >> MT_ATAG_SHIFT) & (MT_ATAG_MASK)) + +#define MT_CLEAR_TAGS(x) (MT_CLEAR_ATAG(MT_CLEAR_TAG(x))) + #define MT_PSTATE_TCO_SHIFT 25 #define MT_PSTATE_TCO_MASK ~(0x1 << MT_PSTATE_TCO_SHIFT) #define MT_PSTATE_TCO_EN 1 -- cgit v1.2.3 From 49cee364c8665c5951162a0e193d10a86e3e6011 Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Wed, 18 Jun 2025 09:45:11 +0100 Subject: kselftest/arm64/mte: Add verification for address tag in signal handler Add the address tag [63:60] verification when synchronous mte fault is happen. when signal handler is registered with SA_EXPOSE_TAGBITS, address includes not only memory tag [59:56] but also address tag. Therefore, when verify fault address location, remove both tags Signed-off-by: Yeoreum Yun Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250618084513.1761345-9-yeoreum.yun@arm.com Signed-off-by: Catalin Marinas --- .../testing/selftests/arm64/mte/mte_common_util.c | 38 ++++++++++++++++------ 1 file changed, 28 insertions(+), 10 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c index d9702a542cb6..10dcbc37e379 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.c +++ b/tools/testing/selftests/arm64/mte/mte_common_util.c @@ -33,12 +33,25 @@ static unsigned int mte_cur_pstate_tco; void mte_default_handler(int signum, siginfo_t *si, void *uc) { + struct sigaction sa; unsigned long addr = (unsigned long)si->si_addr; + unsigned char si_tag, si_atag; + + sigaction(signum, NULL, &sa); + + if (sa.sa_flags & SA_EXPOSE_TAGBITS) { + si_tag = MT_FETCH_TAG(addr); + si_atag = MT_FETCH_ATAG(addr); + addr = MT_CLEAR_TAGS(addr); + } else { + si_tag = 0; + si_atag = 0; + } if (signum == SIGSEGV) { #ifdef DEBUG - ksft_print_msg("INFO: SIGSEGV signal at pc=%lx, fault addr=%lx, si_code=%lx\n", - ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code); + ksft_print_msg("INFO: SIGSEGV signal at pc=%lx, fault addr=%lx, si_code=%lx, si_tag=%x, si_atag=%x\n", + ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code, si_tag, si_atag); #endif if (si->si_code == SEGV_MTEAERR) { if (cur_mte_cxt.trig_si_code == si->si_code) @@ -51,13 +64,18 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc) } /* Compare the context for precise error */ else if (si->si_code == SEGV_MTESERR) { + if ((!mtefar_support && si_atag) || (si_atag != MT_FETCH_ATAG(cur_mte_cxt.trig_addr))) { + ksft_print_msg("Invalid MTE synchronous exception caught for address tag! si_tag=%x, si_atag: %x\n", si_tag, si_atag); + exit(KSFT_FAIL); + } + if (cur_mte_cxt.trig_si_code == si->si_code && ((cur_mte_cxt.trig_range >= 0 && - addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && - addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) || + addr >= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) && + addr <= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) || (cur_mte_cxt.trig_range < 0 && - addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && - addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)))) { + addr <= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) && + addr >= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)))) { cur_mte_cxt.fault_valid = true; /* Adjust the pc by 4 */ ((ucontext_t *)uc)->uc_mcontext.pc += 4; @@ -73,11 +91,11 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc) ksft_print_msg("INFO: SIGBUS signal at pc=%llx, fault addr=%lx, si_code=%x\n", ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code); if ((cur_mte_cxt.trig_range >= 0 && - addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && - addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) || + addr >= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) && + addr <= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) || (cur_mte_cxt.trig_range < 0 && - addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && - addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range))) { + addr <= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) && + addr >= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range))) { cur_mte_cxt.fault_valid = true; /* Adjust the pc by 4 */ ((ucontext_t *)uc)->uc_mcontext.pc += 4; -- cgit v1.2.3 From 64a64e5d12f070405800745c674916fbbf2b9283 Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Wed, 18 Jun 2025 09:45:12 +0100 Subject: kselftest/arm64/mte: Refactor check_mmap_option test Before add mtefar testcase on check_mmap_option.c, refactor check_mmap_option: - make testcase suite array with test options (mem_type, mte_sync type and etc) to use general testcase pattern - generate each test case name acoording to test options. Signed-off-by: Yeoreum Yun Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250618084513.1761345-10-yeoreum.yun@arm.com Signed-off-by: Catalin Marinas --- .../selftests/arm64/mte/check_mmap_options.c | 367 ++++++++++++++++++--- 1 file changed, 313 insertions(+), 54 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c index b37bf481c9f9..0df7ce532465 100644 --- a/tools/testing/selftests/arm64/mte/check_mmap_options.c +++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c @@ -3,6 +3,7 @@ #define _GNU_SOURCE +#include #include #include #include @@ -24,6 +25,23 @@ #define TAG_CHECK_ON 0 #define TAG_CHECK_OFF 1 +#define TEST_NAME_MAX 256 + +enum mte_mem_check_type { + CHECK_ANON_MEM = 0, + CHECK_FILE_MEM = 1, + CHECK_CLEAR_PROT_MTE = 2, +}; + +struct check_mmap_testcase { + int check_type; + int mem_type; + int mte_sync; + int mapping; + int tag_check; + bool enable_tco; +}; + static size_t page_size; static int sizes[] = { 1, 537, 989, 1269, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE, @@ -183,10 +201,271 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping) return KSFT_PASS; } +const char *format_test_name(struct check_mmap_testcase *tc) +{ + static char test_name[TEST_NAME_MAX]; + const char *check_type_str; + const char *mem_type_str; + const char *sync_str; + const char *mapping_str; + const char *tag_check_str; + + switch (tc->check_type) { + case CHECK_ANON_MEM: + check_type_str = "anonymous memory"; + break; + case CHECK_FILE_MEM: + check_type_str = "file memory"; + break; + case CHECK_CLEAR_PROT_MTE: + check_type_str = "clear PROT_MTE flags"; + break; + default: + assert(0); + break; + } + + switch (tc->mem_type) { + case USE_MMAP: + mem_type_str = "mmap"; + break; + case USE_MPROTECT: + mem_type_str = "mmap/mprotect"; + break; + default: + assert(0); + break; + } + + switch (tc->mte_sync) { + case MTE_NONE_ERR: + sync_str = "no error"; + break; + case MTE_SYNC_ERR: + sync_str = "sync error"; + break; + case MTE_ASYNC_ERR: + sync_str = "async error"; + break; + default: + assert(0); + break; + } + + switch (tc->mapping) { + case MAP_SHARED: + mapping_str = "shared"; + break; + case MAP_PRIVATE: + mapping_str = "private"; + break; + default: + assert(0); + break; + } + + switch (tc->tag_check) { + case TAG_CHECK_ON: + tag_check_str = "tag check on"; + break; + case TAG_CHECK_OFF: + tag_check_str = "tag check off"; + break; + default: + assert(0); + break; + } + + snprintf(test_name, sizeof(test_name), + "Check %s with %s mapping, %s mode, %s memory and %s\n", + check_type_str, mapping_str, sync_str, mem_type_str, + tag_check_str); + + return test_name; +} + int main(int argc, char *argv[]) { - int err; + int err, i; int item = ARRAY_SIZE(sizes); + struct check_mmap_testcase test_cases[]= { + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_OFF, + .enable_tco = true, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_OFF, + .enable_tco = true, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_NONE_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_OFF, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_NONE_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_OFF, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_CLEAR_PROT_MTE, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_CLEAR_PROT_MTE, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + }; err = mte_default_setup(); if (err) @@ -205,59 +484,39 @@ int main(int argc, char *argv[]) mte_register_signal(SIGSEGV, mte_default_handler, false); /* Set test plan */ - ksft_set_plan(22); - - mte_enable_pstate_tco(); - - evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF), - "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check off\n"); - evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF), - "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check off\n"); - - mte_disable_pstate_tco(); - evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF), - "Check anonymous memory with private mapping, no error mode, mmap memory and tag check off\n"); - evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF), - "Check file memory with private mapping, no error mode, mmap/mprotect memory and tag check off\n"); - - evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check on\n"); - evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check anonymous memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n"); - evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check anonymous memory with shared mapping, sync error mode, mmap memory and tag check on\n"); - evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check anonymous memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n"); - evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check anonymous memory with private mapping, async error mode, mmap memory and tag check on\n"); - evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check anonymous memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n"); - evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check anonymous memory with shared mapping, async error mode, mmap memory and tag check on\n"); - evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check anonymous memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n"); - - evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check file memory with private mapping, sync error mode, mmap memory and tag check on\n"); - evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n"); - evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check file memory with shared mapping, sync error mode, mmap memory and tag check on\n"); - evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check file memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n"); - evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check file memory with private mapping, async error mode, mmap memory and tag check on\n"); - evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check file memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n"); - evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check file memory with shared mapping, async error mode, mmap memory and tag check on\n"); - evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check file memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n"); - - evaluate_test(check_clear_prot_mte_flag(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), - "Check clear PROT_MTE flags with private mapping, sync error mode and mmap memory\n"); - evaluate_test(check_clear_prot_mte_flag(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE), - "Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n"); + ksft_set_plan(ARRAY_SIZE(test_cases)); + + for (i = 0 ; i < ARRAY_SIZE(test_cases); i++) { + if (test_cases[i].enable_tco) + mte_enable_pstate_tco(); + else + mte_disable_pstate_tco(); + + switch (test_cases[i].check_type) { + case CHECK_ANON_MEM: + evaluate_test(check_anonymous_memory_mapping(test_cases[i].mem_type, + test_cases[i].mte_sync, + test_cases[i].mapping, + test_cases[i].tag_check), + format_test_name(&test_cases[i])); + break; + case CHECK_FILE_MEM: + evaluate_test(check_file_memory_mapping(test_cases[i].mem_type, + test_cases[i].mte_sync, + test_cases[i].mapping, + test_cases[i].tag_check), + format_test_name(&test_cases[i])); + break; + case CHECK_CLEAR_PROT_MTE: + evaluate_test(check_clear_prot_mte_flag(test_cases[i].mem_type, + test_cases[i].mte_sync, + test_cases[i].mapping), + format_test_name(&test_cases[i])); + break; + default: + exit(KSFT_FAIL); + } + } mte_restore_setup(); ksft_print_cnts(); -- cgit v1.2.3 From d09674f98cdbf5dc2288cd5e2d0e63b6e5f723b2 Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Wed, 18 Jun 2025 09:45:13 +0100 Subject: kselftest/arm64/mte: Add mtefar tests on check_mmap_options If FEAT_MTE_TAGGED_FAR (Armv8.9) is supported, bits 63:60 of the fault address are preserved in response to synchronous tag check faults (SEGV_MTESERR). This patch adds new test cases using address tags (bits 63:60), corresponding to each existing test in check_mmap_option. Signed-off-by: Yeoreum Yun Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250618084513.1761345-11-yeoreum.yun@arm.com Signed-off-by: Catalin Marinas --- .../selftests/arm64/mte/check_mmap_options.c | 194 ++++++++++++++++++--- 1 file changed, 171 insertions(+), 23 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c index 0df7ce532465..91a81b4a9bfa 100644 --- a/tools/testing/selftests/arm64/mte/check_mmap_options.c +++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c @@ -24,6 +24,8 @@ #define OVERFLOW MT_GRANULE_SIZE #define TAG_CHECK_ON 0 #define TAG_CHECK_OFF 1 +#define ATAG_CHECK_ON 1 +#define ATAG_CHECK_OFF 0 #define TEST_NAME_MAX 256 @@ -39,6 +41,7 @@ struct check_mmap_testcase { int mte_sync; int mapping; int tag_check; + int atag_check; bool enable_tco; }; @@ -48,8 +51,14 @@ static int sizes[] = { /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0 }; -static int check_mte_memory(char *ptr, int size, int mode, int tag_check) +static int check_mte_memory(char *ptr, int size, int mode, int tag_check, int atag_check) { + if (!mtefar_support && atag_check == ATAG_CHECK_ON) + return KSFT_SKIP; + + if (atag_check == ATAG_CHECK_ON) + ptr = mte_insert_atag(ptr); + mte_initialize_current_context(mode, (uintptr_t)ptr, size); memset(ptr, '1', size); mte_wait_after_trig(); @@ -75,7 +84,7 @@ static int check_mte_memory(char *ptr, int size, int mode, int tag_check) return KSFT_PASS; } -static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, int tag_check) +static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, int tag_check, int atag_check) { char *ptr, *map_ptr; int run, result, map_size; @@ -97,16 +106,16 @@ static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, i munmap((void *)map_ptr, map_size); return KSFT_FAIL; } - result = check_mte_memory(ptr, sizes[run], mode, tag_check); + result = check_mte_memory(ptr, sizes[run], mode, tag_check, atag_check); mte_clear_tags((void *)ptr, sizes[run]); mte_free_memory((void *)map_ptr, map_size, mem_type, false); - if (result == KSFT_FAIL) - return KSFT_FAIL; + if (result != KSFT_PASS) + return result; } return KSFT_PASS; } -static int check_file_memory_mapping(int mem_type, int mode, int mapping, int tag_check) +static int check_file_memory_mapping(int mem_type, int mode, int mapping, int tag_check, int atag_check) { char *ptr, *map_ptr; int run, fd, map_size; @@ -135,17 +144,17 @@ static int check_file_memory_mapping(int mem_type, int mode, int mapping, int ta close(fd); return KSFT_FAIL; } - result = check_mte_memory(ptr, sizes[run], mode, tag_check); + result = check_mte_memory(ptr, sizes[run], mode, tag_check, atag_check); mte_clear_tags((void *)ptr, sizes[run]); munmap((void *)map_ptr, map_size); close(fd); - if (result == KSFT_FAIL) - break; + if (result != KSFT_PASS) + return result; } - return result; + return KSFT_PASS; } -static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping) +static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping, int atag_check) { char *ptr, *map_ptr; int run, prot_flag, result, fd, map_size; @@ -168,7 +177,7 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping) ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n"); return KSFT_FAIL; } - result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON); + result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON, atag_check); mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW); if (result != KSFT_PASS) return KSFT_FAIL; @@ -192,11 +201,11 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping) close(fd); return KSFT_FAIL; } - result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON); + result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON, atag_check); mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW); close(fd); if (result != KSFT_PASS) - return KSFT_FAIL; + return result; } return KSFT_PASS; } @@ -209,6 +218,7 @@ const char *format_test_name(struct check_mmap_testcase *tc) const char *sync_str; const char *mapping_str; const char *tag_check_str; + const char *atag_check_str; switch (tc->check_type) { case CHECK_ANON_MEM: @@ -276,10 +286,22 @@ const char *format_test_name(struct check_mmap_testcase *tc) break; } + switch (tc->atag_check) { + case ATAG_CHECK_ON: + atag_check_str = "with address tag [63:60]"; + break; + case ATAG_CHECK_OFF: + atag_check_str = "without address tag [63:60]"; + break; + default: + assert(0); + break; + } + snprintf(test_name, sizeof(test_name), - "Check %s with %s mapping, %s mode, %s memory and %s\n", + "Check %s with %s mapping, %s mode, %s memory and %s (%s)\n", check_type_str, mapping_str, sync_str, mem_type_str, - tag_check_str); + tag_check_str, atag_check_str); return test_name; } @@ -295,6 +317,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_SYNC_ERR, .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_OFF, + .atag_check = ATAG_CHECK_OFF, .enable_tco = true, }, { @@ -303,6 +326,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_SYNC_ERR, .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_OFF, + .atag_check = ATAG_CHECK_OFF, .enable_tco = true, }, { @@ -311,6 +335,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_NONE_ERR, .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_OFF, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -319,6 +344,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_NONE_ERR, .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_OFF, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -327,6 +353,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_SYNC_ERR, .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -335,6 +362,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_SYNC_ERR, .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -343,6 +371,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_SYNC_ERR, .mapping = MAP_SHARED, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -351,6 +380,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_SYNC_ERR, .mapping = MAP_SHARED, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -359,6 +389,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_ASYNC_ERR, .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -367,6 +398,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_ASYNC_ERR, .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -375,6 +407,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_ASYNC_ERR, .mapping = MAP_SHARED, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -383,6 +416,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_ASYNC_ERR, .mapping = MAP_SHARED, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -391,6 +425,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_SYNC_ERR, .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -399,6 +434,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_SYNC_ERR, .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -407,6 +443,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_SYNC_ERR, .mapping = MAP_SHARED, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -415,6 +452,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_SYNC_ERR, .mapping = MAP_SHARED, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -423,6 +461,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_ASYNC_ERR, .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -431,6 +470,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_ASYNC_ERR, .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -439,6 +479,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_ASYNC_ERR, .mapping = MAP_SHARED, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -447,6 +488,106 @@ int main(int argc, char *argv[]) .mte_sync = MTE_ASYNC_ERR, .mapping = MAP_SHARED, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .enable_tco = false, + }, + { + .check_type = CHECK_CLEAR_PROT_MTE, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .enable_tco = false, + }, + { + .check_type = CHECK_CLEAR_PROT_MTE, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, .enable_tco = false, }, { @@ -455,6 +596,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_SYNC_ERR, .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, .enable_tco = false, }, { @@ -463,6 +605,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_SYNC_ERR, .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, .enable_tco = false, }, }; @@ -479,14 +622,16 @@ int main(int argc, char *argv[]) sizes[item - 2] = page_size; sizes[item - 1] = page_size + 1; - /* Register signal handlers */ - mte_register_signal(SIGBUS, mte_default_handler, false); - mte_register_signal(SIGSEGV, mte_default_handler, false); - /* Set test plan */ ksft_set_plan(ARRAY_SIZE(test_cases)); for (i = 0 ; i < ARRAY_SIZE(test_cases); i++) { + /* Register signal handlers */ + mte_register_signal(SIGBUS, mte_default_handler, + test_cases[i].atag_check == ATAG_CHECK_ON); + mte_register_signal(SIGSEGV, mte_default_handler, + test_cases[i].atag_check == ATAG_CHECK_ON); + if (test_cases[i].enable_tco) mte_enable_pstate_tco(); else @@ -497,20 +642,23 @@ int main(int argc, char *argv[]) evaluate_test(check_anonymous_memory_mapping(test_cases[i].mem_type, test_cases[i].mte_sync, test_cases[i].mapping, - test_cases[i].tag_check), + test_cases[i].tag_check, + test_cases[i].atag_check), format_test_name(&test_cases[i])); break; case CHECK_FILE_MEM: evaluate_test(check_file_memory_mapping(test_cases[i].mem_type, test_cases[i].mte_sync, test_cases[i].mapping, - test_cases[i].tag_check), + test_cases[i].tag_check, + test_cases[i].atag_check), format_test_name(&test_cases[i])); break; case CHECK_CLEAR_PROT_MTE: evaluate_test(check_clear_prot_mte_flag(test_cases[i].mem_type, test_cases[i].mte_sync, - test_cases[i].mapping), + test_cases[i].mapping, + test_cases[i].atag_check), format_test_name(&test_cases[i])); break; default: -- cgit v1.2.3 From 07ee18a0bc946b6b407942c88faed089e20f47d1 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Tue, 1 Jul 2025 20:31:23 +0200 Subject: selftests/bpf: Don't call fsopen() as privileged user In the BPF token example, the fsopen() syscall is called as privileged user. This is unneeded because fsopen() can be called also as unprivileged user from the user namespace. As the `fs_fd` file descriptor which was sent back and forth is still the same, keep it open instead of cloning and closing it twice via SCM_RIGHTS. cfr. https://github.com/systemd/systemd/pull/36134 Signed-off-by: Matteo Croce Signed-off-by: Andrii Nakryiko Acked-by: Christian Brauner Link: https://lore.kernel.org/bpf/20250701183123.31781-1-technoboy85@gmail.com --- tools/testing/selftests/bpf/prog_tests/token.c | 41 +++++++++++++------------- 1 file changed, 21 insertions(+), 20 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c index f9392df23f8a..cfc032b910c4 100644 --- a/tools/testing/selftests/bpf/prog_tests/token.c +++ b/tools/testing/selftests/bpf/prog_tests/token.c @@ -115,7 +115,7 @@ static int create_bpffs_fd(void) static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts) { - int mnt_fd, err; + int err; /* set up token delegation mount options */ err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds, opts->cmds_str); @@ -136,12 +136,7 @@ static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts) if (err < 0) return -errno; - /* create O_PATH fd for detached mount */ - mnt_fd = sys_fsmount(fs_fd, 0, 0); - if (err < 0) - return -errno; - - return mnt_fd; + return 0; } /* send FD over Unix domain (AF_UNIX) socket */ @@ -287,6 +282,7 @@ static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callba { int mnt_fd = -1, fs_fd = -1, err = 0, bpffs_fd = -1, token_fd = -1; struct token_lsm *lsm_skel = NULL; + char one; /* load and attach LSM "policy" before we go into unpriv userns */ lsm_skel = token_lsm__open_and_load(); @@ -333,13 +329,19 @@ static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callba err = sendfd(sock_fd, fs_fd); if (!ASSERT_OK(err, "send_fs_fd")) goto cleanup; - zclose(fs_fd); + + /* wait that the parent reads the fd, does the fsconfig() calls + * and send us a signal that it is done + */ + err = read(sock_fd, &one, sizeof(one)); + if (!ASSERT_GE(err, 0, "read_one")) + goto cleanup; /* avoid mucking around with mount namespaces and mounting at - * well-known path, just get detach-mounted BPF FS fd back from parent + * well-known path, just create O_PATH fd for detached mount */ - err = recvfd(sock_fd, &mnt_fd); - if (!ASSERT_OK(err, "recv_mnt_fd")) + mnt_fd = sys_fsmount(fs_fd, 0, 0); + if (!ASSERT_OK_FD(mnt_fd, "mnt_fd")) goto cleanup; /* try to fspick() BPF FS and try to add some delegation options */ @@ -429,24 +431,24 @@ again: static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd) { - int fs_fd = -1, mnt_fd = -1, token_fd = -1, err; + int fs_fd = -1, token_fd = -1, err; + char one = 1; err = recvfd(sock_fd, &fs_fd); if (!ASSERT_OK(err, "recv_bpffs_fd")) goto cleanup; - mnt_fd = materialize_bpffs_fd(fs_fd, bpffs_opts); - if (!ASSERT_GE(mnt_fd, 0, "materialize_bpffs_fd")) { + err = materialize_bpffs_fd(fs_fd, bpffs_opts); + if (!ASSERT_GE(err, 0, "materialize_bpffs_fd")) { err = -EINVAL; goto cleanup; } - zclose(fs_fd); - /* pass BPF FS context object to parent */ - err = sendfd(sock_fd, mnt_fd); - if (!ASSERT_OK(err, "send_mnt_fd")) + /* notify the child that we did the fsconfig() calls and it can proceed. */ + err = write(sock_fd, &one, sizeof(one)); + if (!ASSERT_EQ(err, sizeof(one), "send_one")) goto cleanup; - zclose(mnt_fd); + zclose(fs_fd); /* receive BPF token FD back from child for some extra tests */ err = recvfd(sock_fd, &token_fd); @@ -459,7 +461,6 @@ static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd) cleanup: zclose(sock_fd); zclose(fs_fd); - zclose(mnt_fd); zclose(token_fd); if (child_pid > 0) -- cgit v1.2.3 From a90f5f7370c283863d3872f231debc9b2227b27f Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 2 Jul 2025 00:36:20 -0700 Subject: selftests/bpf: null checks for rdonly_untrusted_mem should be preserved Test case checking that verifier does not assume rdonly_untrusted_mem values as not null. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250702073620.897517-2-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko --- .../selftests/bpf/progs/mem_rdonly_untrusted.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c index b0486af36f55..8185130ede95 100644 --- a/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c +++ b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c @@ -174,4 +174,25 @@ int misaligned_access(void *ctx) return combine(bpf_rdonly_cast(&global, 0) + 1); } +__weak int return_one(void) +{ + return 1; +} + +SEC("socket") +__success +__retval(1) +int null_check(void *ctx) +{ + int *p; + + p = bpf_rdonly_cast(0, 0); + if (p == 0) + /* make this a function call to avoid compiler + * moving r0 assignment before check. + */ + return return_one(); + return 0; +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 7ec899ac90a205090275cd4a5c96c9606a93f3a1 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Wed, 2 Jul 2025 15:53:23 +0200 Subject: selftests/bpf: Negative test case for ref_obj_id in args This patch adds a test case, as shown below, for the verifier error "more than one arg with ref_obj_id". 0: (b7) r2 = 20 1: (b7) r3 = 0 2: (18) r1 = 0xffff92cee3cbc600 4: (85) call bpf_ringbuf_reserve#131 5: (55) if r0 == 0x0 goto pc+3 6: (bf) r1 = r0 7: (bf) r2 = r0 8: (85) call bpf_tcp_raw_gen_syncookie_ipv4#204 9: (95) exit This error is currently incorrectly reported as a verifier bug, with a warning. The next patch in this series will address that. Signed-off-by: Paul Chaignon Link: https://lore.kernel.org/r/3ba78e6cda47ccafd6ea70dadbc718d020154664.1751463262.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko --- tools/testing/selftests/bpf/verifier/calls.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 18596ae0b0c1..f3492efc8834 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -2409,3 +2409,27 @@ .errstr_unpriv = "", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, +{ + "calls: several args with ref_obj_id", + .insns = { + /* Reserve at least sizeof(struct iphdr) bytes in the ring buffer. + * With a smaller size, the verifier would reject the call to + * bpf_tcp_raw_gen_syncookie_ipv4 before we can reach the + * ref_obj_id error. + */ + BPF_MOV64_IMM(BPF_REG_2, 20), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), + /* if r0 == 0 goto */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tcp_raw_gen_syncookie_ipv4), + BPF_EXIT_INSN(), + }, + .fixup_map_ringbuf = { 2 }, + .result = REJECT, + .errstr = "more than one arg with ref_obj_id", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, -- cgit v1.2.3 From 964a07426eb8bfc3a6aed8d07eeeca77f44f6d91 Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Wed, 18 Jun 2025 10:29:55 +0100 Subject: kselftest/arm64/abi: Add MTE_STORE_ONLY feature hwcap test add MTE_STORE_ONLY feature hwcap test. Signed-off-by: Yeoreum Yun Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250618092957.2069907-7-yeoreum.yun@arm.com Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/abi/hwcap.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index e60bfb798ba2..42b59a994bd0 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -1104,6 +1104,12 @@ static const struct hwcap_data { .hwcap_bit = HWCAP3_MTE_FAR, .cpuinfo = "mtefar", }, + { + .name = "MTE_STOREONLY", + .at_hwcap = AT_HWCAP3, + .hwcap_bit = HWCAP3_MTE_STORE_ONLY, + .cpuinfo = "mtestoreonly", + }, }; typedef void (*sighandler_fn)(int, siginfo_t *, void *); -- cgit v1.2.3 From 391ca7c81b85cc10a0a202c6ed6a4c65e374604b Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Wed, 18 Jun 2025 10:29:56 +0100 Subject: kselftest/arm64/mte: Preparation for mte store only test Since ARMv8.9, FEAT_MTE_STORE_ONLY can be used to restrict raise of tag check fault on store operation only. This patch is preparation for testing FEAT_MTE_STORE_ONLY It shouldn't change test result. Signed-off-by: Yeoreum Yun Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250618092957.2069907-8-yeoreum.yun@arm.com Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/mte/check_buffer_fill.c | 10 +++++----- tools/testing/selftests/arm64/mte/check_child_memory.c | 4 ++-- tools/testing/selftests/arm64/mte/check_hugetlb_options.c | 6 +++--- tools/testing/selftests/arm64/mte/check_ksm_options.c | 2 +- tools/testing/selftests/arm64/mte/check_mmap_options.c | 6 +++--- tools/testing/selftests/arm64/mte/check_tags_inclusion.c | 8 ++++---- tools/testing/selftests/arm64/mte/check_user_mem.c | 2 +- tools/testing/selftests/arm64/mte/mte_common_util.c | 14 ++++++++++++-- tools/testing/selftests/arm64/mte/mte_common_util.h | 3 ++- 9 files changed, 33 insertions(+), 22 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c index 5248b5265aa4..ff4e07503349 100644 --- a/tools/testing/selftests/arm64/mte/check_buffer_fill.c +++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c @@ -31,7 +31,7 @@ static int check_buffer_by_byte(int mem_type, int mode) int i, j, item; bool err; - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); item = ARRAY_SIZE(sizes); for (i = 0; i < item; i++) { @@ -68,7 +68,7 @@ static int check_buffer_underflow_by_byte(int mem_type, int mode, bool err; char *und_ptr = NULL; - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); item = ARRAY_SIZE(sizes); for (i = 0; i < item; i++) { ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0, @@ -164,7 +164,7 @@ static int check_buffer_overflow_by_byte(int mem_type, int mode, size_t tagged_size, overflow_size; char *over_ptr = NULL; - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); item = ARRAY_SIZE(sizes); for (i = 0; i < item; i++) { ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0, @@ -337,7 +337,7 @@ static int check_buffer_by_block(int mem_type, int mode) { int i, item, result = KSFT_PASS; - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); item = ARRAY_SIZE(sizes); cur_mte_cxt.fault_valid = false; for (i = 0; i < item; i++) { @@ -368,7 +368,7 @@ static int check_memory_initial_tags(int mem_type, int mode, int mapping) int run, fd; int total = ARRAY_SIZE(sizes); - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); for (run = 0; run < total; run++) { /* check initial tags for anonymous mmap */ ptr = (char *)mte_allocate_memory(sizes[run], mem_type, mapping, false); diff --git a/tools/testing/selftests/arm64/mte/check_child_memory.c b/tools/testing/selftests/arm64/mte/check_child_memory.c index b97ea3981c21..5e97ee792e4d 100644 --- a/tools/testing/selftests/arm64/mte/check_child_memory.c +++ b/tools/testing/selftests/arm64/mte/check_child_memory.c @@ -88,7 +88,7 @@ static int check_child_memory_mapping(int mem_type, int mode, int mapping) int item = ARRAY_SIZE(sizes); item = ARRAY_SIZE(sizes); - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); for (run = 0; run < item; run++) { ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping, UNDERFLOW, OVERFLOW); @@ -109,7 +109,7 @@ static int check_child_file_mapping(int mem_type, int mode, int mapping) int run, fd, map_size, result = KSFT_PASS; int total = ARRAY_SIZE(sizes); - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); for (run = 0; run < total; run++) { fd = create_temp_file(); if (fd == -1) diff --git a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c index 4e644a606394..aad1234c7e0f 100644 --- a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c +++ b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c @@ -151,7 +151,7 @@ static int check_hugetlb_memory_mapping(int mem_type, int mode, int mapping, int map_size = default_huge_page_size(); - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false); if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) return KSFT_FAIL; @@ -180,7 +180,7 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping) unsigned long map_size; prot_flag = PROT_READ | PROT_WRITE; - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); map_size = default_huge_page_size(); map_ptr = (char *)mte_allocate_memory_tag_range(map_size, mem_type, mapping, 0, 0); @@ -210,7 +210,7 @@ static int check_child_hugetlb_memory_mapping(int mem_type, int mode, int mappin map_size = default_huge_page_size(); - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); ptr = (char *)mte_allocate_memory_tag_range(map_size, mem_type, mapping, 0, 0); if (check_allocated_memory_range(ptr, map_size, mem_type, diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c index afea4e381862..0cf5faef1724 100644 --- a/tools/testing/selftests/arm64/mte/check_ksm_options.c +++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c @@ -106,7 +106,7 @@ static int check_madvise_options(int mem_type, int mode, int mapping) return err; } - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); ptr = mte_allocate_memory(TEST_UNIT * page_sz, mem_type, mapping, true); if (check_allocated_memory(ptr, TEST_UNIT * page_sz, mem_type, false) != KSFT_PASS) return KSFT_FAIL; diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c index 91a81b4a9bfa..447c0ef25f71 100644 --- a/tools/testing/selftests/arm64/mte/check_mmap_options.c +++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c @@ -90,7 +90,7 @@ static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, i int run, result, map_size; int item = ARRAY_SIZE(sizes); - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); for (run = 0; run < item; run++) { map_size = sizes[run] + OVERFLOW + UNDERFLOW; map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false); @@ -122,7 +122,7 @@ static int check_file_memory_mapping(int mem_type, int mode, int mapping, int ta int total = ARRAY_SIZE(sizes); int result = KSFT_PASS; - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); for (run = 0; run < total; run++) { fd = create_temp_file(); if (fd == -1) @@ -161,7 +161,7 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping, int at int total = ARRAY_SIZE(sizes); prot_flag = PROT_READ | PROT_WRITE; - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); for (run = 0; run < total; run++) { map_size = sizes[run] + OVERFLOW + UNDERFLOW; ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping, diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c index b96296ab9870..4b764f2a8185 100644 --- a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c +++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c @@ -57,7 +57,7 @@ static int check_single_included_tags(int mem_type, int mode) return KSFT_FAIL; for (tag = 0; (tag < MT_TAG_COUNT) && (result == KSFT_PASS); tag++) { - ret = mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag)); + ret = mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag), false); if (ret != 0) result = KSFT_FAIL; /* Try to catch a excluded tag by a number of tries. */ @@ -91,7 +91,7 @@ static int check_multiple_included_tags(int mem_type, int mode) for (tag = 0; (tag < MT_TAG_COUNT - 1) && (result == KSFT_PASS); tag++) { excl_mask |= 1 << tag; - mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask)); + mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask), false); /* Try to catch a excluded tag by a number of tries. */ for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) { ptr = mte_insert_tags(ptr, BUFFER_SIZE); @@ -120,7 +120,7 @@ static int check_all_included_tags(int mem_type, int mode) mem_type, false) != KSFT_PASS) return KSFT_FAIL; - ret = mte_switch_mode(mode, MT_INCLUDE_TAG_MASK); + ret = mte_switch_mode(mode, MT_INCLUDE_TAG_MASK, false); if (ret != 0) return KSFT_FAIL; /* Try to catch a excluded tag by a number of tries. */ @@ -145,7 +145,7 @@ static int check_none_included_tags(int mem_type, int mode) if (check_allocated_memory(ptr, BUFFER_SIZE, mem_type, false) != KSFT_PASS) return KSFT_FAIL; - ret = mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK); + ret = mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK, false); if (ret != 0) return KSFT_FAIL; /* Try to catch a excluded tag by a number of tries. */ diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c index d1d14aaaba16..fb7936c4e097 100644 --- a/tools/testing/selftests/arm64/mte/check_user_mem.c +++ b/tools/testing/selftests/arm64/mte/check_user_mem.c @@ -44,7 +44,7 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping, err = KSFT_PASS; len = 2 * page_sz; - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); fd = create_temp_file(); if (fd == -1) return KSFT_FAIL; diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c index 10dcbc37e379..397e57dd946a 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.c +++ b/tools/testing/selftests/arm64/mte/mte_common_util.c @@ -28,8 +28,10 @@ struct mte_fault_cxt cur_mte_cxt; bool mtefar_support; +bool mtestonly_support; static unsigned int mte_cur_mode; static unsigned int mte_cur_pstate_tco; +static bool mte_cur_stonly; void mte_default_handler(int signum, siginfo_t *si, void *uc) { @@ -314,7 +316,7 @@ void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range) cur_mte_cxt.trig_si_code = 0; } -int mte_switch_mode(int mte_option, unsigned long incl_mask) +int mte_switch_mode(int mte_option, unsigned long incl_mask, bool stonly) { unsigned long en = 0; @@ -346,6 +348,9 @@ int mte_switch_mode(int mte_option, unsigned long incl_mask) break; } + if (mtestonly_support && stonly) + en |= PR_MTE_STORE_ONLY; + en |= (incl_mask << PR_MTE_TAG_SHIFT); /* Enable address tagging ABI, mte error reporting mode and tag inclusion mask. */ if (prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) != 0) { @@ -370,6 +375,9 @@ int mte_default_setup(void) mtefar_support = !!(hwcaps3 & HWCAP3_MTE_FAR); + if (hwcaps3 & HWCAP3_MTE_STORE_ONLY) + mtestonly_support = true; + /* Get current mte mode */ ret = prctl(PR_GET_TAGGED_ADDR_CTRL, en, 0, 0, 0); if (ret < 0) { @@ -383,6 +391,8 @@ int mte_default_setup(void) else if (ret & PR_MTE_TCF_NONE) mte_cur_mode = MTE_NONE_ERR; + mte_cur_stonly = (ret & PR_MTE_STORE_ONLY) ? true : false; + mte_cur_pstate_tco = mte_get_pstate_tco(); /* Disable PSTATE.TCO */ mte_disable_pstate_tco(); @@ -391,7 +401,7 @@ int mte_default_setup(void) void mte_restore_setup(void) { - mte_switch_mode(mte_cur_mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mte_cur_mode, MTE_ALLOW_NON_ZERO_TAG, mte_cur_stonly); if (mte_cur_pstate_tco == MT_PSTATE_TCO_EN) mte_enable_pstate_tco(); else if (mte_cur_pstate_tco == MT_PSTATE_TCO_DIS) diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h index 045e4ad2f018..250d671329a5 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.h +++ b/tools/testing/selftests/arm64/mte/mte_common_util.h @@ -38,6 +38,7 @@ struct mte_fault_cxt { extern struct mte_fault_cxt cur_mte_cxt; extern bool mtefar_support; +extern bool mtestonly_support; /* MTE utility functions */ void mte_default_handler(int signum, siginfo_t *si, void *uc); @@ -60,7 +61,7 @@ void *mte_insert_atag(void *ptr); void *mte_clear_atag(void *ptr); int mte_default_setup(void); void mte_restore_setup(void); -int mte_switch_mode(int mte_option, unsigned long incl_mask); +int mte_switch_mode(int mte_option, unsigned long incl_mask, bool stonly); void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range); /* Common utility functions */ -- cgit v1.2.3 From 1f488fb91378e923dd67870292dcc36df85a814e Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Wed, 18 Jun 2025 10:29:57 +0100 Subject: kselftest/arm64/mte: Add MTE_STORE_ONLY testcases Since ARMv8.9, FEAT_MTE_STORE_ONLY can be used to restrict raise of tag check fault on store operation only. Adds new test cases using MTE_STORE_ONLY feature. Signed-off-by: Yeoreum Yun Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250618092957.2069907-9-yeoreum.yun@arm.com Signed-off-by: Catalin Marinas --- .../selftests/arm64/mte/check_mmap_options.c | 361 ++++++++++++++++++++- tools/testing/selftests/arm64/mte/check_prctl.c | 25 +- 2 files changed, 366 insertions(+), 20 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c index 447c0ef25f71..c100af3012cb 100644 --- a/tools/testing/selftests/arm64/mte/check_mmap_options.c +++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c @@ -35,6 +35,11 @@ enum mte_mem_check_type { CHECK_CLEAR_PROT_MTE = 2, }; +enum mte_tag_op_type { + TAG_OP_ALL = 0, + TAG_OP_STONLY = 1, +}; + struct check_mmap_testcase { int check_type; int mem_type; @@ -42,17 +47,24 @@ struct check_mmap_testcase { int mapping; int tag_check; int atag_check; + int tag_op; bool enable_tco; }; +#define TAG_OP_ALL 0 +#define TAG_OP_STONLY 1 + static size_t page_size; static int sizes[] = { 1, 537, 989, 1269, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE, /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0 }; -static int check_mte_memory(char *ptr, int size, int mode, int tag_check, int atag_check) +static int check_mte_memory(char *ptr, int size, int mode, + int tag_check,int atag_check, int tag_op) { + char buf[MT_GRANULE_SIZE]; + if (!mtefar_support && atag_check == ATAG_CHECK_ON) return KSFT_SKIP; @@ -81,16 +93,34 @@ static int check_mte_memory(char *ptr, int size, int mode, int tag_check, int at if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF) return KSFT_FAIL; + if (tag_op == TAG_OP_STONLY) { + mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW); + memcpy(buf, ptr - UNDERFLOW, MT_GRANULE_SIZE); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == true) + return KSFT_FAIL; + + mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW); + memcpy(buf, ptr + size, MT_GRANULE_SIZE); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == true) + return KSFT_FAIL; + } + return KSFT_PASS; } -static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, int tag_check, int atag_check) +static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, + int tag_check, int atag_check, int tag_op) { char *ptr, *map_ptr; int run, result, map_size; int item = ARRAY_SIZE(sizes); - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); + if (tag_op == TAG_OP_STONLY && !mtestonly_support) + return KSFT_SKIP; + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, tag_op); for (run = 0; run < item; run++) { map_size = sizes[run] + OVERFLOW + UNDERFLOW; map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false); @@ -106,7 +136,7 @@ static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, i munmap((void *)map_ptr, map_size); return KSFT_FAIL; } - result = check_mte_memory(ptr, sizes[run], mode, tag_check, atag_check); + result = check_mte_memory(ptr, sizes[run], mode, tag_check, atag_check, tag_op); mte_clear_tags((void *)ptr, sizes[run]); mte_free_memory((void *)map_ptr, map_size, mem_type, false); if (result != KSFT_PASS) @@ -115,14 +145,18 @@ static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, i return KSFT_PASS; } -static int check_file_memory_mapping(int mem_type, int mode, int mapping, int tag_check, int atag_check) +static int check_file_memory_mapping(int mem_type, int mode, int mapping, + int tag_check, int atag_check, int tag_op) { char *ptr, *map_ptr; int run, fd, map_size; int total = ARRAY_SIZE(sizes); int result = KSFT_PASS; - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); + if (tag_op == TAG_OP_STONLY && !mtestonly_support) + return KSFT_SKIP; + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, tag_op); for (run = 0; run < total; run++) { fd = create_temp_file(); if (fd == -1) @@ -144,7 +178,7 @@ static int check_file_memory_mapping(int mem_type, int mode, int mapping, int ta close(fd); return KSFT_FAIL; } - result = check_mte_memory(ptr, sizes[run], mode, tag_check, atag_check); + result = check_mte_memory(ptr, sizes[run], mode, tag_check, atag_check, tag_op); mte_clear_tags((void *)ptr, sizes[run]); munmap((void *)map_ptr, map_size); close(fd); @@ -177,10 +211,10 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping, int at ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n"); return KSFT_FAIL; } - result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON, atag_check); + result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON, atag_check, TAG_OP_ALL); mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW); if (result != KSFT_PASS) - return KSFT_FAIL; + return result; fd = create_temp_file(); if (fd == -1) @@ -201,7 +235,7 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping, int at close(fd); return KSFT_FAIL; } - result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON, atag_check); + result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON, atag_check, TAG_OP_ALL); mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW); close(fd); if (result != KSFT_PASS) @@ -219,6 +253,7 @@ const char *format_test_name(struct check_mmap_testcase *tc) const char *mapping_str; const char *tag_check_str; const char *atag_check_str; + const char *tag_op_str; switch (tc->check_type) { case CHECK_ANON_MEM: @@ -303,6 +338,23 @@ const char *format_test_name(struct check_mmap_testcase *tc) check_type_str, mapping_str, sync_str, mem_type_str, tag_check_str, atag_check_str); + switch (tc->tag_op) { + case TAG_OP_ALL: + tag_op_str = ""; + break; + case TAG_OP_STONLY: + tag_op_str = " / store-only"; + break; + default: + assert(0); + break; + } + + snprintf(test_name, TEST_NAME_MAX, + "Check %s with %s mapping, %s mode, %s memory and %s (%s%s)\n", + check_type_str, mapping_str, sync_str, mem_type_str, + tag_check_str, atag_check_str, tag_op_str); + return test_name; } @@ -318,6 +370,7 @@ int main(int argc, char *argv[]) .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_OFF, .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, .enable_tco = true, }, { @@ -327,6 +380,7 @@ int main(int argc, char *argv[]) .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_OFF, .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, .enable_tco = true, }, { @@ -336,6 +390,7 @@ int main(int argc, char *argv[]) .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_OFF, .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, .enable_tco = false, }, { @@ -345,6 +400,7 @@ int main(int argc, char *argv[]) .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_OFF, .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, .enable_tco = false, }, { @@ -354,6 +410,7 @@ int main(int argc, char *argv[]) .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, .enable_tco = false, }, { @@ -363,6 +420,7 @@ int main(int argc, char *argv[]) .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, .enable_tco = false, }, { @@ -372,6 +430,7 @@ int main(int argc, char *argv[]) .mapping = MAP_SHARED, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, .enable_tco = false, }, { @@ -381,6 +440,7 @@ int main(int argc, char *argv[]) .mapping = MAP_SHARED, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, .enable_tco = false, }, { @@ -390,6 +450,7 @@ int main(int argc, char *argv[]) .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, .enable_tco = false, }, { @@ -399,6 +460,7 @@ int main(int argc, char *argv[]) .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, .enable_tco = false, }, { @@ -408,6 +470,7 @@ int main(int argc, char *argv[]) .mapping = MAP_SHARED, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, .enable_tco = false, }, { @@ -417,6 +480,7 @@ int main(int argc, char *argv[]) .mapping = MAP_SHARED, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, .enable_tco = false, }, { @@ -426,6 +490,7 @@ int main(int argc, char *argv[]) .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, .enable_tco = false, }, { @@ -435,6 +500,7 @@ int main(int argc, char *argv[]) .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, .enable_tco = false, }, { @@ -444,6 +510,7 @@ int main(int argc, char *argv[]) .mapping = MAP_SHARED, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, .enable_tco = false, }, { @@ -453,6 +520,7 @@ int main(int argc, char *argv[]) .mapping = MAP_SHARED, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, .enable_tco = false, }, { @@ -462,6 +530,7 @@ int main(int argc, char *argv[]) .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, .enable_tco = false, }, { @@ -471,6 +540,7 @@ int main(int argc, char *argv[]) .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, .enable_tco = false, }, { @@ -480,6 +550,7 @@ int main(int argc, char *argv[]) .mapping = MAP_SHARED, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, .enable_tco = false, }, { @@ -489,6 +560,7 @@ int main(int argc, char *argv[]) .mapping = MAP_SHARED, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, .enable_tco = false, }, { @@ -498,6 +570,7 @@ int main(int argc, char *argv[]) .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, .enable_tco = false, }, { @@ -507,6 +580,257 @@ int main(int argc, char *argv[]) .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, .enable_tco = false, }, { @@ -516,6 +840,7 @@ int main(int argc, char *argv[]) .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_STONLY, .enable_tco = false, }, { @@ -525,6 +850,7 @@ int main(int argc, char *argv[]) .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_STONLY, .enable_tco = false, }, { @@ -534,6 +860,7 @@ int main(int argc, char *argv[]) .mapping = MAP_SHARED, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_STONLY, .enable_tco = false, }, { @@ -543,6 +870,7 @@ int main(int argc, char *argv[]) .mapping = MAP_SHARED, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_STONLY, .enable_tco = false, }, { @@ -552,6 +880,7 @@ int main(int argc, char *argv[]) .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_STONLY, .enable_tco = false, }, { @@ -561,6 +890,7 @@ int main(int argc, char *argv[]) .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_STONLY, .enable_tco = false, }, { @@ -570,6 +900,7 @@ int main(int argc, char *argv[]) .mapping = MAP_SHARED, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_STONLY, .enable_tco = false, }, { @@ -579,6 +910,7 @@ int main(int argc, char *argv[]) .mapping = MAP_SHARED, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_STONLY, .enable_tco = false, }, { @@ -588,6 +920,7 @@ int main(int argc, char *argv[]) .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_STONLY, .enable_tco = false, }, { @@ -597,6 +930,7 @@ int main(int argc, char *argv[]) .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, .enable_tco = false, }, { @@ -606,6 +940,7 @@ int main(int argc, char *argv[]) .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, .enable_tco = false, }, }; @@ -643,7 +978,8 @@ int main(int argc, char *argv[]) test_cases[i].mte_sync, test_cases[i].mapping, test_cases[i].tag_check, - test_cases[i].atag_check), + test_cases[i].atag_check, + test_cases[i].tag_op), format_test_name(&test_cases[i])); break; case CHECK_FILE_MEM: @@ -651,7 +987,8 @@ int main(int argc, char *argv[]) test_cases[i].mte_sync, test_cases[i].mapping, test_cases[i].tag_check, - test_cases[i].atag_check), + test_cases[i].atag_check, + test_cases[i].tag_op), format_test_name(&test_cases[i])); break; case CHECK_CLEAR_PROT_MTE: diff --git a/tools/testing/selftests/arm64/mte/check_prctl.c b/tools/testing/selftests/arm64/mte/check_prctl.c index 4c89e9538ca0..e9ad8761b3fb 100644 --- a/tools/testing/selftests/arm64/mte/check_prctl.c +++ b/tools/testing/selftests/arm64/mte/check_prctl.c @@ -60,7 +60,7 @@ void check_basic_read(void) /* * Attempt to set a specified combination of modes. */ -void set_mode_test(const char *name, int hwcap2, int mask) +void set_mode_test(const char *name, int hwcap2, int hwcap3, int mask) { int ret; @@ -69,6 +69,11 @@ void set_mode_test(const char *name, int hwcap2, int mask) return; } + if ((getauxval(AT_HWCAP3) & hwcap3) != hwcap3) { + ksft_test_result_skip("%s\n", name); + return; + } + ret = set_tagged_addr_ctrl(mask); if (ret < 0) { ksft_test_result_fail("%s\n", name); @@ -81,7 +86,7 @@ void set_mode_test(const char *name, int hwcap2, int mask) return; } - if ((ret & PR_MTE_TCF_MASK) == mask) { + if ((ret & (PR_MTE_TCF_MASK | PR_MTE_STORE_ONLY)) == mask) { ksft_test_result_pass("%s\n", name); } else { ksft_print_msg("Got %x, expected %x\n", @@ -93,12 +98,16 @@ void set_mode_test(const char *name, int hwcap2, int mask) struct mte_mode { int mask; int hwcap2; + int hwcap3; const char *name; } mte_modes[] = { - { PR_MTE_TCF_NONE, 0, "NONE" }, - { PR_MTE_TCF_SYNC, HWCAP2_MTE, "SYNC" }, - { PR_MTE_TCF_ASYNC, HWCAP2_MTE, "ASYNC" }, - { PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC, HWCAP2_MTE, "SYNC+ASYNC" }, + { PR_MTE_TCF_NONE, 0, 0, "NONE" }, + { PR_MTE_TCF_SYNC, HWCAP2_MTE, 0, "SYNC" }, + { PR_MTE_TCF_ASYNC, HWCAP2_MTE, 0, "ASYNC" }, + { PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC, HWCAP2_MTE, 0, "SYNC+ASYNC" }, + { PR_MTE_TCF_SYNC | PR_MTE_STORE_ONLY, HWCAP2_MTE, HWCAP3_MTE_STORE_ONLY, "SYNC+STONLY" }, + { PR_MTE_TCF_ASYNC | PR_MTE_STORE_ONLY, HWCAP2_MTE, HWCAP3_MTE_STORE_ONLY, "ASYNC+STONLY" }, + { PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC | PR_MTE_STORE_ONLY, HWCAP2_MTE, HWCAP3_MTE_STORE_ONLY, "SYNC+ASYNC+STONLY" }, }; int main(void) @@ -106,11 +115,11 @@ int main(void) int i; ksft_print_header(); - ksft_set_plan(5); + ksft_set_plan(ARRAY_SIZE(mte_modes)); check_basic_read(); for (i = 0; i < ARRAY_SIZE(mte_modes); i++) - set_mode_test(mte_modes[i].name, mte_modes[i].hwcap2, + set_mode_test(mte_modes[i].name, mte_modes[i].hwcap2, mte_modes[i].hwcap3, mte_modes[i].mask); ksft_print_cnts(); -- cgit v1.2.3 From 38d95beb4b24301362f8bdae7fbdb82d74b803ca Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Wed, 2 Jul 2025 18:56:22 +0100 Subject: selftests/bpf: Allow veristat compile standalone Veristat is synced into the standalone repo, where it compiles without kernel private dependencies. This patch fixes compilation errors in standalone veristat. Signed-off-by: Mykyta Yatsenko Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250702175622.358405-1-mykyta.yatsenko5@gmail.com --- tools/testing/selftests/bpf/veristat.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 09cfbd486f92..d532dd82a3a8 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -23,6 +23,7 @@ #include #include #include +#include #ifndef ARRAY_SIZE #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) @@ -239,7 +240,7 @@ static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va #define log_errno(fmt, ...) log_errno_aux(__FILE__, __LINE__, fmt, ##__VA_ARGS__) -__printf(3, 4) +__attribute__((format(printf, 3, 4))) static int log_errno_aux(const char *file, int line, const char *fmt, ...) { int err = -errno; @@ -1337,7 +1338,7 @@ static bool output_stat_enabled(int id) return false; } -__printf(2, 3) +__attribute__((format(printf, 2, 3))) static int write_one_line(const char *file, const char *fmt, ...) { int err, saved_errno; @@ -1358,7 +1359,7 @@ static int write_one_line(const char *file, const char *fmt, ...) return err < 0 ? -1 : 0; } -__scanf(3, 4) +__attribute__((format(scanf, 3, 4))) static int scanf_one_line(const char *file, int fields_expected, const char *fmt, ...) { int res = 0, saved_errno = 0; -- cgit v1.2.3 From 81d572a551f43c01380e4aa39a6b9f331c931fbc Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 30 Jun 2025 15:42:12 -0400 Subject: selftest: net: extend msg_zerocopy test with forwarding Zerocopy skbs are converted to regular copy skbs when data is queued to a local socket. This happens in the existing test with a sender and receiver communicating over a veth device. Zerocopy skbs are sent without copying if egressing a device. Verify that this behavior is maintained even in the common container setup where data is forwarded over a veth to the physical device. Update msg_zerocopy.sh to 1. Have a dummy network device to simulate a physical device. 2. Have forwarding enabled between veth and dummy. 3. Add a tx-only test that sends out dummy via the forwarding path. 4. Verify the exitcode of the sender, which signals zerocopy success. As dummy drops all packets, this cannot be a TCP connection. Test the new case with unconnected UDP only. Update msg_zerocopy.c to - Accept an argument whether send with zerocopy is expected. - Return an exitcode whether behavior matched that expectation. Signed-off-by: Willem de Bruijn Link: https://patch.msgid.link/20250630194312.1571410-3-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/msg_zerocopy.c | 24 +++++---- tools/testing/selftests/net/msg_zerocopy.sh | 84 +++++++++++++++++++++-------- 2 files changed, 77 insertions(+), 31 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c index 7ea5fb28c93d..1d5d3c4e7e87 100644 --- a/tools/testing/selftests/net/msg_zerocopy.c +++ b/tools/testing/selftests/net/msg_zerocopy.c @@ -77,6 +77,7 @@ static int cfg_cork; static bool cfg_cork_mixed; static int cfg_cpu = -1; /* default: pin to last cpu */ +static int cfg_expect_zerocopy = -1; static int cfg_family = PF_UNSPEC; static int cfg_ifindex = 1; static int cfg_payload_len; @@ -92,9 +93,9 @@ static socklen_t cfg_alen; static struct sockaddr_storage cfg_dst_addr; static struct sockaddr_storage cfg_src_addr; +static int exitcode; static char payload[IP_MAXPACKET]; static long packets, bytes, completions, expected_completions; -static int zerocopied = -1; static uint32_t next_completion; static uint32_t sends_since_notify; @@ -444,11 +445,13 @@ static bool do_recv_completion(int fd, int domain) next_completion = hi + 1; zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED); - if (zerocopied == -1) - zerocopied = zerocopy; - else if (zerocopied != zerocopy) { - fprintf(stderr, "serr: inconsistent\n"); - zerocopied = zerocopy; + if (cfg_expect_zerocopy != -1 && + cfg_expect_zerocopy != zerocopy) { + fprintf(stderr, "serr: ee_code: %u != expected %u\n", + zerocopy, cfg_expect_zerocopy); + exitcode = 1; + /* suppress repeated messages */ + cfg_expect_zerocopy = zerocopy; } if (cfg_verbose >= 2) @@ -571,7 +574,7 @@ static void do_tx(int domain, int type, int protocol) fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n", packets, bytes >> 20, completions, - zerocopied == 1 ? 'y' : 'n'); + cfg_zerocopy && cfg_expect_zerocopy == 1 ? 'y' : 'n'); } static int do_setup_rx(int domain, int type, int protocol) @@ -715,7 +718,7 @@ static void parse_opts(int argc, char **argv) cfg_payload_len = max_payload_len; - while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vz")) != -1) { + while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vzZ:")) != -1) { switch (c) { case '4': if (cfg_family != PF_UNSPEC) @@ -770,6 +773,9 @@ static void parse_opts(int argc, char **argv) case 'z': cfg_zerocopy = true; break; + case 'Z': + cfg_expect_zerocopy = !!atoi(optarg); + break; } } @@ -817,5 +823,5 @@ int main(int argc, char **argv) else error(1, 0, "unknown cfg_test %s", cfg_test); - return 0; + return exitcode; } diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh index 89c22f5320e0..28178a38a4e7 100755 --- a/tools/testing/selftests/net/msg_zerocopy.sh +++ b/tools/testing/selftests/net/msg_zerocopy.sh @@ -6,6 +6,7 @@ set -e readonly DEV="veth0" +readonly DUMMY_DEV="dummy0" readonly DEV_MTU=65535 readonly BIN="./msg_zerocopy" @@ -14,21 +15,25 @@ readonly NSPREFIX="ns-${RAND}" readonly NS1="${NSPREFIX}1" readonly NS2="${NSPREFIX}2" -readonly SADDR4='192.168.1.1' -readonly DADDR4='192.168.1.2' -readonly SADDR6='fd::1' -readonly DADDR6='fd::2' +readonly LPREFIX4='192.168.1' +readonly RPREFIX4='192.168.2' +readonly LPREFIX6='fd' +readonly RPREFIX6='fc' + readonly path_sysctl_mem="net.core.optmem_max" # No arguments: automated test if [[ "$#" -eq "0" ]]; then - $0 4 tcp -t 1 - $0 6 tcp -t 1 - $0 4 udp -t 1 - $0 6 udp -t 1 - echo "OK. All tests passed" - exit 0 + ret=0 + + $0 4 tcp -t 1 || ret=1 + $0 6 tcp -t 1 || ret=1 + $0 4 udp -t 1 || ret=1 + $0 6 udp -t 1 || ret=1 + + [[ "$ret" == "0" ]] && echo "OK. All tests passed" + exit $ret fi # Argument parsing @@ -45,11 +50,18 @@ readonly EXTRA_ARGS="$@" # Argument parsing: configure addresses if [[ "${IP}" == "4" ]]; then - readonly SADDR="${SADDR4}" - readonly DADDR="${DADDR4}" + readonly SADDR="${LPREFIX4}.1" + readonly DADDR="${LPREFIX4}.2" + readonly DUMMY_ADDR="${RPREFIX4}.1" + readonly DADDR_TXONLY="${RPREFIX4}.2" + readonly MASK="24" elif [[ "${IP}" == "6" ]]; then - readonly SADDR="${SADDR6}" - readonly DADDR="${DADDR6}" + readonly SADDR="${LPREFIX6}::1" + readonly DADDR="${LPREFIX6}::2" + readonly DUMMY_ADDR="${RPREFIX6}::1" + readonly DADDR_TXONLY="${RPREFIX6}::2" + readonly MASK="64" + readonly NODAD="nodad" else echo "Invalid IP version ${IP}" exit 1 @@ -89,33 +101,61 @@ ip netns exec "${NS2}" sysctl -w -q "${path_sysctl_mem}=1000000" ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \ peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}" +ip link add "${DUMMY_DEV}" mtu "${DEV_MTU}" netns "${NS2}" type dummy + # Bring the devices up ip -netns "${NS1}" link set "${DEV}" up ip -netns "${NS2}" link set "${DEV}" up +ip -netns "${NS2}" link set "${DUMMY_DEV}" up # Set fixed MAC addresses on the devices ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02 ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06 # Add fixed IP addresses to the devices -ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}" -ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}" -ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad -ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad +ip -netns "${NS1}" addr add "${SADDR}/${MASK}" dev "${DEV}" ${NODAD} +ip -netns "${NS2}" addr add "${DADDR}/${MASK}" dev "${DEV}" ${NODAD} +ip -netns "${NS2}" addr add "${DUMMY_ADDR}/${MASK}" dev "${DUMMY_DEV}" ${NODAD} + +ip -netns "${NS1}" route add default via "${DADDR}" dev "${DEV}" +ip -netns "${NS2}" route add default via "${DADDR_TXONLY}" dev "${DUMMY_DEV}" + +ip netns exec "${NS2}" sysctl -wq net.ipv4.ip_forward=1 +ip netns exec "${NS2}" sysctl -wq net.ipv6.conf.all.forwarding=1 # Optionally disable sg or csum offload to test edge cases # ip netns exec "${NS1}" ethtool -K "${DEV}" sg off +ret=0 + do_test() { local readonly ARGS="$1" - echo "ipv${IP} ${TXMODE} ${ARGS}" - ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" & + # tx-rx test + # packets queued to a local socket are copied, + # sender notification has SO_EE_CODE_ZEROCOPY_COPIED. + + echo -e "\nipv${IP} ${TXMODE} ${ARGS} tx-rx\n" + ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 \ + -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" & sleep 0.2 - ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}" + ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 \ + -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}" -Z 0 || ret=1 wait + + # next test is unconnected tx to dummy0, cannot exercise with tcp + [[ "${TXMODE}" == "tcp" ]] && return + + # tx-only test: send out dummy0 + # packets leaving the host are not copied, + # sender notification does not have SO_EE_CODE_ZEROCOPY_COPIED. + + echo -e "\nipv${IP} ${TXMODE} ${ARGS} tx-only\n" + ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 \ + -S "${SADDR}" -D "${DADDR_TXONLY}" ${ARGS} "${TXMODE}" -Z 1 || ret=1 } do_test "${EXTRA_ARGS}" do_test "-z ${EXTRA_ARGS}" -echo ok + +[[ "$ret" == "0" ]] && echo "OK" -- cgit v1.2.3 From 236156d80d5efd942fc395a078d6ec6d810c2c40 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Sun, 29 Jun 2025 17:21:33 +0300 Subject: selftest: netdevsim: Add devlink rate tc-bw test Test verifies that netdevsim correctly implements devlink ops callbacks that set tc-bw on leaf or node rate object. Signed-off-by: Carolina Jubran Reviewed-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250629142138.361537-4-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/dev.c | 43 ++++++++++++++++++ drivers/net/netdevsim/netdevsim.h | 1 + .../selftests/drivers/net/netdevsim/devlink.sh | 53 ++++++++++++++++++++++ 3 files changed, 97 insertions(+) (limited to 'tools/testing/selftests') diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 3e0b61202f0c..b3647691060c 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -388,6 +388,17 @@ static const struct file_operations nsim_dev_rate_parent_fops = { .owner = THIS_MODULE, }; +static void nsim_dev_tc_bw_debugfs_init(struct dentry *ddir, u32 *tc_bw) +{ + int i; + + for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++) { + char name[16]; + + snprintf(name, sizeof(name), "tc%d_bw", i); + debugfs_create_u32(name, 0400, ddir, &tc_bw[i]); + } +} static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) { @@ -415,6 +426,8 @@ static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev, nsim_dev_port->ddir, &nsim_dev_port->parent_name, &nsim_dev_rate_parent_fops); + nsim_dev_tc_bw_debugfs_init(nsim_dev_port->ddir, + nsim_dev_port->tc_bw); } debugfs_create_symlink("dev", nsim_dev_port->ddir, dev_link_name); @@ -1172,6 +1185,19 @@ static int nsim_rate_bytes_to_units(char *name, u64 *rate, struct netlink_ext_ac return 0; } +static int nsim_leaf_tc_bw_set(struct devlink_rate *devlink_rate, + void *priv, u32 *tc_bw, + struct netlink_ext_ack *extack) +{ + struct nsim_dev_port *nsim_dev_port = priv; + int i; + + for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++) + nsim_dev_port->tc_bw[i] = tc_bw[i]; + + return 0; +} + static int nsim_leaf_tx_share_set(struct devlink_rate *devlink_rate, void *priv, u64 tx_share, struct netlink_ext_ack *extack) { @@ -1210,8 +1236,21 @@ struct nsim_rate_node { char *parent_name; u16 tx_share; u16 tx_max; + u32 tc_bw[DEVLINK_RATE_TCS_MAX]; }; +static int nsim_node_tc_bw_set(struct devlink_rate *devlink_rate, void *priv, + u32 *tc_bw, struct netlink_ext_ack *extack) +{ + struct nsim_rate_node *nsim_node = priv; + int i; + + for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++) + nsim_node->tc_bw[i] = tc_bw[i]; + + return 0; +} + static int nsim_node_tx_share_set(struct devlink_rate *devlink_rate, void *priv, u64 tx_share, struct netlink_ext_ack *extack) { @@ -1264,6 +1303,8 @@ static int nsim_rate_node_new(struct devlink_rate *node, void **priv, &nsim_node->parent_name, &nsim_dev_rate_parent_fops); + nsim_dev_tc_bw_debugfs_init(nsim_node->ddir, nsim_node->tc_bw); + *priv = nsim_node; return 0; } @@ -1340,8 +1381,10 @@ static const struct devlink_ops nsim_dev_devlink_ops = { .trap_policer_counter_get = nsim_dev_devlink_trap_policer_counter_get, .rate_leaf_tx_share_set = nsim_leaf_tx_share_set, .rate_leaf_tx_max_set = nsim_leaf_tx_max_set, + .rate_leaf_tc_bw_set = nsim_leaf_tc_bw_set, .rate_node_tx_share_set = nsim_node_tx_share_set, .rate_node_tx_max_set = nsim_node_tx_max_set, + .rate_node_tc_bw_set = nsim_node_tc_bw_set, .rate_node_new = nsim_rate_node_new, .rate_node_del = nsim_rate_node_del, .rate_leaf_parent_set = nsim_rate_leaf_parent_set, diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 4a0c48c7a384..809dd29fc5fe 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -276,6 +276,7 @@ struct nsim_dev_port { struct dentry *ddir; struct dentry *rate_parent; char *parent_name; + u32 tc_bw[DEVLINK_RATE_TCS_MAX]; struct netdevsim *ns; }; diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index b5ea2526f23c..a102803ff74f 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -608,6 +608,46 @@ rate_attr_parent_check() check_err $? "Unexpected parent attr value $api_value != $parent" } +rate_attr_tc_bw_check() +{ + local handle=$1 + local tc_bw=$2 + local debug_file=$3 + + local tc_bw_str="" + for bw in $tc_bw; do + local tc=${bw%%:*} + local value=${bw##*:} + tc_bw_str="$tc_bw_str $tc:$value" + done + tc_bw_str=${tc_bw_str# } + + rate_attr_set "$handle" tc-bw "$tc_bw_str" + check_err $? "Failed to set tc-bw values" + + for bw in $tc_bw; do + local tc=${bw%%:*} + local value=${bw##*:} + local debug_value + debug_value=$(cat "$debug_file"/tc"${tc}"_bw) + check_err $? "Failed to read tc-bw value from debugfs for tc$tc" + [ "$debug_value" == "$value" ] + check_err $? "Unexpected tc-bw debug value for tc$tc: $debug_value != $value" + done + + for bw in $tc_bw; do + local tc=${bw%%:*} + local expected_value=${bw##*:} + local api_value + api_value=$(rate_attr_get "$handle" tc_"$tc") + if [ "$api_value" = "null" ]; then + api_value=0 + fi + [ "$api_value" == "$expected_value" ] + check_err $? "Unexpected tc-bw value for tc$tc: $api_value != $expected_value" + done +} + rate_node_add() { local handle=$1 @@ -649,6 +689,13 @@ rate_test() rate=$(($rate+100)) done + local tc_bw="0:0 1:40 2:0 3:0 4:0 5:0 6:60 7:0" + for r_obj in $leafs + do + rate_attr_tc_bw_check "$r_obj" "$tc_bw" \ + "$DEBUGFS_DIR"/ports/"${r_obj##*/}" + done + local node1_name='group1' local node1="$DL_HANDLE/$node1_name" rate_node_add "$node1" @@ -666,6 +713,12 @@ rate_test() rate_attr_tx_rate_check $node1 tx_max $node_tx_max \ $DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_max + + local tc_bw="0:20 1:0 2:0 3:0 4:0 5:20 6:60 7:0" + rate_attr_tc_bw_check $node1 "$tc_bw" \ + "$DEBUGFS_DIR"/rate_nodes/"${node1##*/}" + + rate_node_del "$node1" check_err $? "Failed to delete node $node1" local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w` -- cgit v1.2.3 From 23ca32e4ead48f68e37000f2552b973ef1439acb Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Sun, 29 Jun 2025 17:21:38 +0300 Subject: selftests: drv-net: Add test for devlink-rate traffic class bandwidth distribution This test suite validates the functionality of the devlink-rate API for traffic class (TC) bandwidth allocation. It ensures that bandwidth can be distributed between different traffic classes as configured, and verifies that explicit TC-to-queue mapping is required for the allocation to be effective. The first test (test_no_tc_mapping_bandwidth) is marked as expected failure on mlx5, since the hardware automatically enforces traffic class separation by dynamically moving queues to the correct TC scheduler, even without explicit TC-to-queue mapping configuration. Test output on mlx5: 1..2 # Created VF interface: eth5 # Created VLAN eth5.101 on eth5 with tc 3 and IP 198.51.100.2 # Created VLAN eth5.102 on eth5 with tc 4 and IP 198.51.100.10 # Set representor eth4 up and added to bridge # Bandwidth check results without TC mapping: # TC 3: 0.19 Gbits/sec # TC 4: 0.76 Gbits/sec # Total bandwidth: 0.95 Gbits/sec # TC 3 percentage: 20.0% # TC 4 percentage: 80.0% ok 1 devlink_rate_tc_bw.test_no_tc_mapping_bandwidth # XFAIL Bandwidth matched 80/20 split without TC mapping # Created VF interface: eth5 # Created VLAN eth5.101 on eth5 with tc 3 and IP 198.51.100.2 # Created VLAN eth5.102 on eth5 with tc 4 and IP 198.51.100.10 # Set representor eth4 up and added to bridge # Bandwidth check results with TC mapping: # TC 3: 0.21 Gbits/sec # TC 4: 0.78 Gbits/sec # Total bandwidth: 0.98 Gbits/sec # TC 3 percentage: 21.1% # TC 4 percentage: 78.9% # Bandwidth is distributed as 80/20 with TC mapping ok 2 devlink_rate_tc_bw.test_tc_mapping_bandwidth # Totals: pass:1 fail:0 xfail:1 xpass:0 skip:0 error:0 Signed-off-by: Carolina Jubran Reviewed-by: Cosmin Ratiu Reviewed-by: Nimrod Oren Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250629142138.361537-9-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/hw/devlink_rate_tc_bw.py | 466 +++++++++++++++++++++ .../selftests/drivers/net/hw/lib/py/__init__.py | 2 +- .../selftests/drivers/net/lib/py/__init__.py | 2 +- tools/testing/selftests/net/lib/py/__init__.py | 2 +- tools/testing/selftests/net/lib/py/ynl.py | 5 + 5 files changed, 474 insertions(+), 3 deletions(-) create mode 100755 tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py new file mode 100755 index 000000000000..820d8a03becc --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py @@ -0,0 +1,466 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Devlink Rate TC Bandwidth Test Suite +=================================== + +This test suite verifies the functionality of devlink-rate traffic class (TC) +bandwidth distribution in a virtualized environment. The tests validate that +bandwidth can be properly allocated between different traffic classes and +that TC mapping works as expected. + +Test Environment: +---------------- +- Creates 1 VF +- Establishes a bridge connecting the VF representor and the uplink representor +- Sets up 2 VLAN interfaces on the VF with different VLAN IDs (101, 102) +- Configures different traffic classes (TC3 and TC4) for each VLAN + +Test Cases: +---------- +1. test_no_tc_mapping_bandwidth: + - Verifies that without TC mapping, bandwidth is NOT distributed according to + the configured 80/20 split between TC4 and TC3 + - This test should fail if bandwidth matches the 80/20 split without TC + mapping + - Expected: Bandwidth should NOT be distributed as 80/20 + +2. test_tc_mapping_bandwidth: + - Configures TC mapping using mqprio qdisc + - Verifies that with TC mapping, bandwidth IS distributed according to the + configured 80/20 split between TC3 and TC4 + - Expected: Bandwidth should be distributed as 80/20 + +Bandwidth Distribution: +---------------------- +- TC3 (VLAN 101): Configured for 80% of total bandwidth +- TC4 (VLAN 102): Configured for 20% of total bandwidth +- Total bandwidth: 1Gbps +- Tolerance: +-12% + +Hardware-Specific Behavior (mlx5): +-------------------------- +mlx5 hardware enforces traffic class separation by ensuring that each transmit +queue (SQ) is associated with a single TC. If a packet is sent on a queue that +doesn't match the expected TC (based on DSCP or VLAN priority and hypervisor-set +mapping), the hardware moves the queue to the correct TC scheduler to preserve +traffic isolation. + +This behavior means that even without explicit TC-to-queue mapping, bandwidth +enforcement may still appear to work—because the hardware dynamically adjusts +the scheduling context. However, this can lead to performance issues in high +rates and HOL blocking if traffic from different TCs is mixed on the same queue. +""" + +import json +import os +import subprocess +import threading +import time + +from lib.py import ksft_pr, ksft_run, ksft_exit +from lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx +from lib.py import NetDrvEpEnv, DevlinkFamily +from lib.py import NlError +from lib.py import cmd, defer, ethtool, ip + + +class BandwidthValidator: + """ + Validates bandwidth totals and per-TC shares against expected values + with a tolerance. + """ + + def __init__(self): + self.tolerance_percent = 12 + self.expected_total_gbps = 1.0 + self.total_min_expected = self.min_expected(self.expected_total_gbps) + self.total_max_expected = self.max_expected(self.expected_total_gbps) + self.tc_expected_percent = { + 3: 20.0, + 4: 80.0, + } + + def min_expected(self, value): + """Calculates the minimum acceptable value based on tolerance.""" + return value - (value * self.tolerance_percent / 100) + + def max_expected(self, value): + """Calculates the maximum acceptable value based on tolerance.""" + return value + (value * self.tolerance_percent / 100) + + def bound(self, expected, value): + """Returns True if value is within expected tolerance.""" + return self.min_expected(expected) <= value <= self.max_expected(expected) + + def tc_bandwidth_bound(self, value, tc_ix): + """ + Returns True if the given bandwidth value is within tolerance + for the TC's expected bandwidth. + """ + expected = self.tc_expected_percent[tc_ix] + return self.bound(expected, value) + + +def setup_vf(cfg, set_tc_mapping=True): + """ + Sets up a VF on the given network interface. + + Enables SR-IOV and switchdev mode, brings the VF interface up, + and optionally configures TC mapping using mqprio. + """ + try: + cmd(f"devlink dev eswitch set pci/{cfg.pci} mode switchdev") + defer(cmd, f"devlink dev eswitch set pci/{cfg.pci} mode legacy") + except Exception as exc: + raise KsftSkipEx(f"Failed to enable switchdev mode on {cfg.pci}") from exc + try: + cmd(f"echo 1 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs") + defer(cmd, f"echo 0 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs") + except Exception as exc: + raise KsftSkipEx(f"Failed to enable SR-IOV on {cfg.ifname}") from exc + + time.sleep(2) + vf_ifc = (os.listdir( + f"/sys/class/net/{cfg.ifname}/device/virtfn0/net") or [None])[0] + if vf_ifc: + ip(f"link set dev {vf_ifc} up") + else: + raise KsftSkipEx("VF interface not found") + if set_tc_mapping: + cmd(f"tc qdisc add dev {vf_ifc} root handle 5 mqprio mode dcb hw 1 num_tc 8") + + return vf_ifc + + +def setup_vlans_on_vf(vf_ifc): + """ + Sets up two VLAN interfaces on the given VF, each mapped to a different TC. + """ + vlan_configs = [ + {"vlan_id": 101, "tc": 3, "ip": "198.51.100.2"}, + {"vlan_id": 102, "tc": 4, "ip": "198.51.100.10"}, + ] + + for config in vlan_configs: + vlan_dev = f"{vf_ifc}.{config['vlan_id']}" + ip(f"link add link {vf_ifc} name {vlan_dev} type vlan id {config['vlan_id']}") + ip(f"addr add {config['ip']}/29 dev {vlan_dev}") + ip(f"link set dev {vlan_dev} up") + ip(f"link set dev {vlan_dev} type vlan egress-qos-map 0:{config['tc']}") + ksft_pr(f"Created VLAN {vlan_dev} on {vf_ifc} with tc {config['tc']} and IP {config['ip']}") + + +def get_vf_info(cfg): + """ + Finds the VF representor interface and devlink port index + for the given PCI device used in the test environment. + """ + cfg.vf_representor = None + cfg.vf_port_index = None + out = subprocess.check_output(["devlink", "-j", "port", "show"], encoding="utf-8") + ports = json.loads(out)["port"] + + for port_name, props in ports.items(): + netdev = props.get("netdev") + + if (port_name.startswith(f"pci/{cfg.pci}/") and + props.get("vfnum") == 0): + cfg.vf_representor = netdev + cfg.vf_port_index = int(port_name.split("/")[-1]) + break + + +def setup_bridge(cfg): + """ + Creates and configures a Linux bridge, with both the uplink + and VF representor interfaces attached to it. + """ + bridge_name = f"br_{os.getpid()}" + ip(f"link add name {bridge_name} type bridge") + defer(cmd, f"ip link del name {bridge_name} type bridge") + + ip(f"link set dev {cfg.ifname} master {bridge_name}") + + rep_name = cfg.vf_representor + if rep_name: + ip(f"link set dev {rep_name} master {bridge_name}") + ip(f"link set dev {rep_name} up") + ksft_pr(f"Set representor {rep_name} up and added to bridge") + else: + raise KsftSkipEx("Could not find representor for the VF") + + ip(f"link set dev {bridge_name} up") + + +def setup_devlink_rate(cfg): + """ + Configures devlink rate tx_max and traffic class bandwidth for the VF. + """ + port_index = cfg.vf_port_index + if port_index is None: + raise KsftSkipEx("Could not find VF port index") + try: + cfg.devnl.rate_set({ + "bus-name": "pci", + "dev-name": cfg.pci, + "port-index": port_index, + "rate-tx-max": 125000000, + "rate-tc-bws": [ + {"rate-tc-index": 0, "rate-tc-bw": 0}, + {"rate-tc-index": 1, "rate-tc-bw": 0}, + {"rate-tc-index": 2, "rate-tc-bw": 0}, + {"rate-tc-index": 3, "rate-tc-bw": 20}, + {"rate-tc-index": 4, "rate-tc-bw": 80}, + {"rate-tc-index": 5, "rate-tc-bw": 0}, + {"rate-tc-index": 6, "rate-tc-bw": 0}, + {"rate-tc-index": 7, "rate-tc-bw": 0}, + ] + }) + except NlError as exc: + if exc.error == 95: # EOPNOTSUPP + raise KsftSkipEx("devlink rate configuration is not supported on the VF") from exc + raise KsftFailEx(f"rate_set failed on VF port {port_index}") from exc + + +def setup_remote_server(cfg): + """ + Sets up VLAN interfaces and starts iperf3 servers on the remote side. + """ + remote_dev = cfg.remote_ifname + vlan_ids = [101, 102] + remote_ips = ["198.51.100.1", "198.51.100.9"] + + for vlan_id, ip_addr in zip(vlan_ids, remote_ips): + vlan_dev = f"{remote_dev}.{vlan_id}" + cmd(f"ip link add link {remote_dev} name {vlan_dev} " + f"type vlan id {vlan_id}", host=cfg.remote) + cmd(f"ip addr add {ip_addr}/29 dev {vlan_dev}", host=cfg.remote) + cmd(f"ip link set dev {vlan_dev} up", host=cfg.remote) + cmd(f"iperf3 -s -1 -B {ip_addr}",background=True, host=cfg.remote) + defer(cmd, f"ip link del {vlan_dev}", host=cfg.remote) + + +def setup_test_environment(cfg, set_tc_mapping=True): + """ + Sets up the complete test environment including VF creation, VLANs, + bridge configuration, devlink rate setup, and the remote server. + """ + vf_ifc = setup_vf(cfg, set_tc_mapping) + ksft_pr(f"Created VF interface: {vf_ifc}") + + setup_vlans_on_vf(vf_ifc) + + get_vf_info(cfg) + setup_bridge(cfg) + + setup_devlink_rate(cfg) + setup_remote_server(cfg) + time.sleep(2) + + +def run_iperf_client(server_ip, local_ip, barrier, min_expected_gbps=0.1): + """ + Runs a single iperf3 client instance, binding to the given local IP. + Waits on a barrier to synchronize with other threads. + """ + try: + barrier.wait(timeout=10) + except Exception as exc: + raise KsftFailEx("iperf3 barrier wait timed") from exc + + iperf_cmd = ["iperf3", "-c", server_ip, "-B", local_ip, "-J"] + result = subprocess.run(iperf_cmd, capture_output=True, text=True, + check=True) + + try: + output = json.loads(result.stdout) + bits_per_second = output["end"]["sum_received"]["bits_per_second"] + gbps = bits_per_second / 1e9 + if gbps < min_expected_gbps: + ksft_pr( + f"iperf3 bandwidth too low: {gbps:.2f} Gbps " + f"(expected ≥ {min_expected_gbps} Gbps)" + ) + return None + return gbps + except json.JSONDecodeError as exc: + ksft_pr(f"Failed to parse iperf3 JSON output: {exc}") + return None + + +def run_bandwidth_test(): + """ + Launches iperf3 client threads for each VLAN/TC pair and collects results. + """ + def _run_iperf_client_thread(server_ip, local_ip, results, barrier, tc_ix): + results[tc_ix] = run_iperf_client(server_ip, local_ip, barrier) + + vf_vlan_data = [ + # (local_ip, remote_ip, TC) + ("198.51.100.2", "198.51.100.1", 3), + ("198.51.100.10", "198.51.100.9", 4), + ] + + results = {} + threads = [] + start_barrier = threading.Barrier(len(vf_vlan_data)) + + for local_ip, remote_ip, tc_ix in vf_vlan_data: + thread = threading.Thread( + target=_run_iperf_client_thread, + args=(remote_ip, local_ip, results, start_barrier, tc_ix) + ) + thread.start() + threads.append(thread) + + for thread in threads: + thread.join() + + for tc_ix, tc_bw in results.items(): + if tc_bw is None: + raise KsftFailEx("iperf3 client failed; cannot evaluate bandwidth") + + return results + +def calculate_bandwidth_percentages(results): + """ + Calculates the percentage of total bandwidth received by TC3 and TC4. + """ + if 3 not in results or 4 not in results: + raise KsftFailEx(f"Missing expected TC results in {results}") + + tc3_bw = results[3] + tc4_bw = results[4] + total_bw = tc3_bw + tc4_bw + tc3_percentage = (tc3_bw / total_bw) * 100 + tc4_percentage = (tc4_bw / total_bw) * 100 + + return { + 'tc3_bw': tc3_bw, + 'tc4_bw': tc4_bw, + 'tc3_percentage': tc3_percentage, + 'tc4_percentage': tc4_percentage, + 'total_bw': total_bw + } + + +def print_bandwidth_results(bw_data, test_name): + """ + Prints bandwidth measurements and TC usage summary for a given test. + """ + ksft_pr(f"Bandwidth check results {test_name}:") + ksft_pr(f"TC 3: {bw_data['tc3_bw']:.2f} Gbits/sec") + ksft_pr(f"TC 4: {bw_data['tc4_bw']:.2f} Gbits/sec") + ksft_pr(f"Total bandwidth: {bw_data['total_bw']:.2f} Gbits/sec") + ksft_pr(f"TC 3 percentage: {bw_data['tc3_percentage']:.1f}%") + ksft_pr(f"TC 4 percentage: {bw_data['tc4_percentage']:.1f}%") + + +def verify_total_bandwidth(bw_data, validator): + """ + Ensures the total measured bandwidth falls within the acceptable tolerance. + """ + total = bw_data['total_bw'] + + if validator.bound(validator.expected_total_gbps, total): + return + + if total < validator.total_min_expected: + raise KsftSkipEx( + f"Total bandwidth {total:.2f} Gbps < minimum " + f"{validator.total_min_expected:.2f} Gbps; " + f"parent tx_max ({validator.expected_total_gbps:.1f} G) " + f"not reached, cannot validate share" + ) + + raise KsftFailEx( + f"Total bandwidth {total:.2f} Gbps exceeds allowed ceiling " + f"{validator.total_max_expected:.2f} Gbps " + f"(VF tx_max set to {validator.expected_total_gbps:.1f} G)" + ) + + +def check_bandwidth_distribution(bw_data, validator): + """ + Checks whether the measured TC3 and TC4 bandwidth percentages + fall within their expected tolerance ranges. + + Returns: + bool: True if both TC3 and TC4 percentages are within bounds. + """ + tc3_valid = validator.tc_bandwidth_bound(bw_data['tc3_percentage'], 3) + tc4_valid = validator.tc_bandwidth_bound(bw_data['tc4_percentage'], 4) + + return tc3_valid and tc4_valid + + +def run_bandwidth_distribution_test(cfg, set_tc_mapping): + """ + Runs parallel iperf3 tests for both TCs and collects results. + """ + setup_test_environment(cfg, set_tc_mapping) + bandwidths = run_bandwidth_test() + bw_data = calculate_bandwidth_percentages(bandwidths) + test_name = "with TC mapping" if set_tc_mapping else "without TC mapping" + print_bandwidth_results(bw_data, test_name) + + verify_total_bandwidth(bw_data, cfg.bw_validator) + + return check_bandwidth_distribution(bw_data, cfg.bw_validator) + + +def test_no_tc_mapping_bandwidth(cfg): + """ + Verifies that bandwidth is not split 80/20 without traffic class mapping. + """ + pass_bw_msg = "Bandwidth is NOT distributed as 80/20 without TC mapping" + fail_bw_msg = "Bandwidth matched 80/20 split without TC mapping" + is_mlx5 = "driver: mlx5" in ethtool(f"-i {cfg.ifname}").stdout + + if run_bandwidth_distribution_test(cfg, set_tc_mapping=False): + if is_mlx5: + raise KsftXfailEx(fail_bw_msg) + raise KsftFailEx(fail_bw_msg) + if is_mlx5: + raise KsftFailEx("mlx5 behavior changed:" + pass_bw_msg) + ksft_pr(pass_bw_msg) + + +def test_tc_mapping_bandwidth(cfg): + """ + Verifies that bandwidth is correctly split 80/20 between TC3 and TC4 + when traffic class mapping is set. + """ + if run_bandwidth_distribution_test(cfg, set_tc_mapping=True): + ksft_pr("Bandwidth is distributed as 80/20 with TC mapping") + else: + raise KsftFailEx("Bandwidth did not match 80/20 split with TC mapping") + + +def main() -> None: + """ + Main entry point for running the test cases. + """ + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + cfg.devnl = DevlinkFamily() + + cfg.pci = os.path.basename( + os.path.realpath(f"/sys/class/net/{cfg.ifname}/device") + ) + if not cfg.pci: + raise KsftSkipEx("Could not get PCI address of the interface") + cfg.require_cmd("iperf3") + cfg.require_cmd("iperf3", remote=True) + + cfg.bw_validator = BandwidthValidator() + + cases = [test_no_tc_mapping_bandwidth, test_tc_mapping_bandwidth] + + ksft_run(cases=cases, args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py index 56ff11074b55..1462a339a74b 100644 --- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py @@ -13,7 +13,7 @@ try: # Import one by one to avoid pylint false positives from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \ - NlError, RtnlFamily + NlError, RtnlFamily, DevlinkFamily from net.lib.py import CmdExitFailure from net.lib.py import bkg, cmd, defer, ethtool, fd_read_timeout, ip, \ rand_port, tool, wait_port_listen diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py index 9ed1d8f70524..fce5d9218f1d 100644 --- a/tools/testing/selftests/drivers/net/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py @@ -12,7 +12,7 @@ try: # Import one by one to avoid pylint false positives from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \ - NlError, RtnlFamily + NlError, RtnlFamily, DevlinkFamily from net.lib.py import CmdExitFailure from net.lib.py import bkg, cmd, defer, ethtool, fd_read_timeout, ip, \ rand_port, tool, wait_port_listen diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py index 8697bd27dc30..02be28dcc089 100644 --- a/tools/testing/selftests/net/lib/py/__init__.py +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -6,4 +6,4 @@ from .netns import NetNS, NetNSEnter from .nsim import * from .utils import * from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily -from .ynl import NetshaperFamily +from .ynl import NetshaperFamily, DevlinkFamily diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py index 6329ae805abf..2b3a61ea3bfa 100644 --- a/tools/testing/selftests/net/lib/py/ynl.py +++ b/tools/testing/selftests/net/lib/py/ynl.py @@ -56,3 +56,8 @@ class NetshaperFamily(YnlFamily): def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('net_shaper.yaml')).as_posix(), schema='', recv_size=recv_size) + +class DevlinkFamily(YnlFamily): + def __init__(self, recv_size=0): + super().__init__((SPEC_PATH / Path('devlink.yaml')).as_posix(), + schema='', recv_size=recv_size) -- cgit v1.2.3 From 6d80cb73131db19a9d625dad93d5dbc53513c6cb Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 9 Jun 2025 14:29:10 +0100 Subject: kselftest/arm64: Convert tpidr2 test to use kselftest.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recent work by Thomas Weißschuh means that it is now possible to use kselftest.h with nolibc. Convert the tpidr2 test which is nolibc specific to use kselftest.h, making it look more standard and ensuring it gets the benefit of any work done on kselftest.h. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250609-kselftest-arm64-nolibc-header-v1-1-16ee1c6fbfed@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/abi/Makefile | 2 +- tools/testing/selftests/arm64/abi/tpidr2.c | 140 ++++++++--------------------- 2 files changed, 38 insertions(+), 104 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/arm64/abi/Makefile b/tools/testing/selftests/arm64/abi/Makefile index a6d30c620908..483488f8c2ad 100644 --- a/tools/testing/selftests/arm64/abi/Makefile +++ b/tools/testing/selftests/arm64/abi/Makefile @@ -12,4 +12,4 @@ $(OUTPUT)/syscall-abi: syscall-abi.c syscall-abi-asm.S $(OUTPUT)/tpidr2: tpidr2.c $(CC) -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \ -static -include ../../../../include/nolibc/nolibc.h \ - -ffreestanding -Wall $^ -o $@ -lgcc + -I../.. -ffreestanding -Wall $^ -o $@ -lgcc diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c index eb19dcc37a75..f58a9f89b952 100644 --- a/tools/testing/selftests/arm64/abi/tpidr2.c +++ b/tools/testing/selftests/arm64/abi/tpidr2.c @@ -3,31 +3,12 @@ #include #include +#include "kselftest.h" + #define SYS_TPIDR2 "S3_3_C13_C0_5" #define EXPECTED_TESTS 5 -static void putstr(const char *str) -{ - write(1, str, strlen(str)); -} - -static void putnum(unsigned int num) -{ - char c; - - if (num / 10) - putnum(num / 10); - - c = '0' + (num % 10); - write(1, &c, 1); -} - -static int tests_run; -static int tests_passed; -static int tests_failed; -static int tests_skipped; - static void set_tpidr2(uint64_t val) { asm volatile ( @@ -50,20 +31,6 @@ static uint64_t get_tpidr2(void) return val; } -static void print_summary(void) -{ - if (tests_passed + tests_failed + tests_skipped != EXPECTED_TESTS) - putstr("# UNEXPECTED TEST COUNT: "); - - putstr("# Totals: pass:"); - putnum(tests_passed); - putstr(" fail:"); - putnum(tests_failed); - putstr(" xfail:0 xpass:0 skip:"); - putnum(tests_skipped); - putstr(" error:0\n"); -} - /* Processes should start with TPIDR2 == 0 */ static int default_value(void) { @@ -105,9 +72,8 @@ static int write_fork_read(void) if (newpid == 0) { /* In child */ if (get_tpidr2() != oldpid) { - putstr("# TPIDR2 changed in child: "); - putnum(get_tpidr2()); - putstr("\n"); + ksft_print_msg("TPIDR2 changed in child: %llx\n", + get_tpidr2()); exit(0); } @@ -115,14 +81,12 @@ static int write_fork_read(void) if (get_tpidr2() == getpid()) { exit(1); } else { - putstr("# Failed to set TPIDR2 in child\n"); + ksft_print_msg("Failed to set TPIDR2 in child\n"); exit(0); } } if (newpid < 0) { - putstr("# fork() failed: -"); - putnum(-newpid); - putstr("\n"); + ksft_print_msg("fork() failed: %d\n", newpid); return 0; } @@ -132,23 +96,22 @@ static int write_fork_read(void) if (waiting < 0) { if (errno == EINTR) continue; - putstr("# waitpid() failed: "); - putnum(errno); - putstr("\n"); + ksft_print_msg("waitpid() failed: %d\n", errno); return 0; } if (waiting != newpid) { - putstr("# waitpid() returned wrong PID\n"); + ksft_print_msg("waitpid() returned wrong PID: %d != %d\n", + waiting, newpid); return 0; } if (!WIFEXITED(status)) { - putstr("# child did not exit\n"); + ksft_print_msg("child did not exit\n"); return 0; } if (getpid() != get_tpidr2()) { - putstr("# TPIDR2 corrupted in parent\n"); + ksft_print_msg("TPIDR2 corrupted in parent\n"); return 0; } @@ -188,35 +151,32 @@ static int write_clone_read(void) stack = malloc(__STACK_SIZE); if (!stack) { - putstr("# malloc() failed\n"); + ksft_print_msg("malloc() failed\n"); return 0; } ret = sys_clone(CLONE_VM, (unsigned long)stack + __STACK_SIZE, &parent_tid, 0, &child_tid); if (ret == -1) { - putstr("# clone() failed\n"); - putnum(errno); - putstr("\n"); + ksft_print_msg("clone() failed: %d\n", errno); return 0; } if (ret == 0) { /* In child */ if (get_tpidr2() != 0) { - putstr("# TPIDR2 non-zero in child: "); - putnum(get_tpidr2()); - putstr("\n"); + ksft_print_msg("TPIDR2 non-zero in child: %llx\n", + get_tpidr2()); exit(0); } if (gettid() == 0) - putstr("# Child TID==0\n"); + ksft_print_msg("Child TID==0\n"); set_tpidr2(gettid()); if (get_tpidr2() == gettid()) { exit(1); } else { - putstr("# Failed to set TPIDR2 in child\n"); + ksft_print_msg("Failed to set TPIDR2 in child\n"); exit(0); } } @@ -227,25 +187,22 @@ static int write_clone_read(void) if (waiting < 0) { if (errno == EINTR) continue; - putstr("# wait4() failed: "); - putnum(errno); - putstr("\n"); + ksft_print_msg("wait4() failed: %d\n", errno); return 0; } if (waiting != ret) { - putstr("# wait4() returned wrong PID "); - putnum(waiting); - putstr("\n"); + ksft_print_msg("wait4() returned wrong PID %d\n", + waiting); return 0; } if (!WIFEXITED(status)) { - putstr("# child did not exit\n"); + ksft_print_msg("child did not exit\n"); return 0; } if (parent != get_tpidr2()) { - putstr("# TPIDR2 corrupted in parent\n"); + ksft_print_msg("TPIDR2 corrupted in parent\n"); return 0; } @@ -253,35 +210,14 @@ static int write_clone_read(void) } } -#define run_test(name) \ - if (name()) { \ - tests_passed++; \ - } else { \ - tests_failed++; \ - putstr("not "); \ - } \ - putstr("ok "); \ - putnum(++tests_run); \ - putstr(" " #name "\n"); - -#define skip_test(name) \ - tests_skipped++; \ - putstr("ok "); \ - putnum(++tests_run); \ - putstr(" # SKIP " #name "\n"); - int main(int argc, char **argv) { int ret; - putstr("TAP version 13\n"); - putstr("1.."); - putnum(EXPECTED_TESTS); - putstr("\n"); + ksft_print_header(); + ksft_set_plan(5); - putstr("# PID: "); - putnum(getpid()); - putstr("\n"); + ksft_print_msg("PID: %d\n", getpid()); /* * This test is run with nolibc which doesn't support hwcap and @@ -290,23 +226,21 @@ int main(int argc, char **argv) */ ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0); if (ret >= 0) { - run_test(default_value); - run_test(write_read); - run_test(write_sleep_read); - run_test(write_fork_read); - run_test(write_clone_read); + ksft_test_result(default_value(), "default_value\n"); + ksft_test_result(write_read, "write_read\n"); + ksft_test_result(write_sleep_read, "write_sleep_read\n"); + ksft_test_result(write_fork_read, "write_fork_read\n"); + ksft_test_result(write_clone_read, "write_clone_read\n"); } else { - putstr("# SME support not present\n"); + ksft_print_msg("SME support not present\n"); - skip_test(default_value); - skip_test(write_read); - skip_test(write_sleep_read); - skip_test(write_fork_read); - skip_test(write_clone_read); + ksft_test_result_skip("default_value\n"); + ksft_test_result_skip("write_read\n"); + ksft_test_result_skip("write_sleep_read\n"); + ksft_test_result_skip("write_fork_read\n"); + ksft_test_result_skip("write_clone_read\n"); } - print_summary(); - - return 0; + ksft_finished(); } -- cgit v1.2.3 From 867446f090589626497638f70b10be5e61a0b925 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 9 Jun 2025 16:25:31 +0100 Subject: kselftest/arm64: Fix check for setting new VLs in sve-ptrace The check that the new vector length we set was the expected one was typoed to an assignment statement which for some reason the compilers didn't spot, most likely due to the macros involved. Fixes: a1d7111257cd ("selftests: arm64: More comprehensively test the SVE ptrace interface") Acked-by: Mark Rutland Acked-by: Dev Jain Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250609-kselftest-arm64-ssve-fixups-v2-1-998fcfa6f240@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/sve-ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index 577b6e05e860..c499d5789dd5 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -253,7 +253,7 @@ static void ptrace_set_get_vl(pid_t child, const struct vec_type *type, return; } - ksft_test_result(new_sve->vl = prctl_vl, "Set %s VL %u\n", + ksft_test_result(new_sve->vl == prctl_vl, "Set %s VL %u\n", type->name, vl); free(new_sve); -- cgit v1.2.3 From 94ab150010f430a36c72e2fe5b7da44a625a6ad5 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 9 Jun 2025 16:25:32 +0100 Subject: kselftest/arm64: Fix test for streaming FPSIMD write in sve-ptrace Since f916dd32a943 ("arm64/fpsimd: ptrace: Mandate SVE payload for streaming-mode state") we do not support writing FPSIMD payload data when writing NT_ARM_SSVE but the sve-ptrace test has an explicit test for this being supported which was not updated to reflect the new behaviour. Fix the test to expect a failure when writing FPSIMD data to the streaming mode register set. Acked-by: Mark Rutland Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250609-kselftest-arm64-ssve-fixups-v2-2-998fcfa6f240@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/sve-ptrace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index c499d5789dd5..7e259907805b 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -301,8 +301,10 @@ static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type) p[j] = j; } + /* This should only succeed for SVE */ ret = set_sve(child, type, sve); - ksft_test_result(ret == 0, "%s FPSIMD set via SVE: %d\n", + ksft_test_result((type->regset == NT_ARM_SVE) == (ret == 0), + "%s FPSIMD set via SVE: %d\n", type->name, ret); if (ret) goto out; -- cgit v1.2.3 From 9e8ebfe677f9101bbfe1f75d548a5aec581e8213 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 9 Jun 2025 16:25:33 +0100 Subject: kselftest/arm64: Specify SVE data when testing VL set in sve-ptrace Since f916dd32a943 ("arm64/fpsimd: ptrace: Mandate SVE payload for streaming-mode state") we reject attempts to write to the streaming mode regset even if there is no register data supplied, causing the tests for setting vector lengths and setting SVE_VL_INHERIT in sve-ptrace to spuriously fail. Set the flag to avoid the issue, we still support not supplying register data. Acked-by: Mark Rutland Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250609-kselftest-arm64-ssve-fixups-v2-3-998fcfa6f240@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/sve-ptrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index 7e259907805b..7f9b6a61d369 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -170,7 +170,7 @@ static void ptrace_set_get_inherit(pid_t child, const struct vec_type *type) memset(&sve, 0, sizeof(sve)); sve.size = sizeof(sve); sve.vl = sve_vl_from_vq(SVE_VQ_MIN); - sve.flags = SVE_PT_VL_INHERIT; + sve.flags = SVE_PT_VL_INHERIT | SVE_PT_REGS_SVE; ret = set_sve(child, type, &sve); if (ret != 0) { ksft_test_result_fail("Failed to set %s SVE_PT_VL_INHERIT\n", @@ -235,6 +235,7 @@ static void ptrace_set_get_vl(pid_t child, const struct vec_type *type, /* Set the VL by doing a set with no register payload */ memset(&sve, 0, sizeof(sve)); sve.size = sizeof(sve); + sve.flags = SVE_PT_REGS_SVE; sve.vl = vl; ret = set_sve(child, type, &sve); if (ret != 0) { -- cgit v1.2.3 From 7980ad7e4ca80f6c255f4473fba82a475342035a Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Tue, 1 Jul 2025 08:08:45 +0200 Subject: selftests/sched_ext: Fix exit selftest hang on UP On single-CPU systems, ops.select_cpu() is never called, causing the EXIT_SELECT_CPU test case to wait indefinitely. Avoid the stall by skipping this specific sub-test when only one CPU is available. Reported-by: Phil Auld Fixes: a5db7817af780 ("sched_ext: Add selftests") Signed-off-by: Andrea Righi Reviewed-by: Phil Auld Tested-by: Phil Auld Signed-off-by: Tejun Heo --- tools/testing/selftests/sched_ext/exit.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/sched_ext/exit.c b/tools/testing/selftests/sched_ext/exit.c index 9451782689de..ee25824b1cbe 100644 --- a/tools/testing/selftests/sched_ext/exit.c +++ b/tools/testing/selftests/sched_ext/exit.c @@ -22,6 +22,14 @@ static enum scx_test_status run(void *ctx) struct bpf_link *link; char buf[16]; + /* + * On single-CPU systems, ops.select_cpu() is never + * invoked, so skip this test to avoid getting stuck + * indefinitely. + */ + if (tc == EXIT_SELECT_CPU && libbpf_num_possible_cpus() == 1) + continue; + skel = exit__open(); SCX_ENUM_INIT(skel); skel->rodata->exit_point = tc; -- cgit v1.2.3 From 30ff3c981e48b37a93249a96675b450469ac13a6 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 13 Jun 2025 08:06:46 +0530 Subject: KVM: selftests: Change MDSCR_EL1 register holding variables as uint64_t Change MDSCR_EL1 register holding local variables as uint64_t that reflects its true register width as well. Cc: Oliver Upton Cc: Joey Gouly Cc: kvm@vger.kernel.org Cc: kvmarm@lists.linux.dev Cc: linux-kernel@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Acked-by: Marc Zyngier Reviewed-by: Ada Couprie Diaz Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/20250613023646.1215700-3-anshuman.khandual@arm.com Signed-off-by: Catalin Marinas --- tools/testing/selftests/kvm/arm64/debug-exceptions.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/arm64/debug-exceptions.c b/tools/testing/selftests/kvm/arm64/debug-exceptions.c index c7fb55c9135b..e34963956fbc 100644 --- a/tools/testing/selftests/kvm/arm64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/arm64/debug-exceptions.c @@ -140,7 +140,7 @@ static void enable_os_lock(void) static void enable_monitor_debug_exceptions(void) { - uint32_t mdscr; + uint64_t mdscr; asm volatile("msr daifclr, #8"); @@ -223,7 +223,7 @@ void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr, static void install_ss(void) { - uint32_t mdscr; + uint64_t mdscr; asm volatile("msr daifclr, #8"); -- cgit v1.2.3 From 439fa8756a107043b54555096bd1da22e0d5cffd Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 23 Jun 2025 23:15:52 +0200 Subject: selftests/nolibc: fix EXTRACONFIG variables ordering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The variable block got disordered at some point. Use the correct ordering. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Tested-by: John Paul Adrian Glaubitz Link: https://lore.kernel.org/r/20250623-nolibc-sh-v2-1-0f5b4b303025@weissschuh.net --- tools/testing/selftests/nolibc/Makefile.nolibc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/nolibc/Makefile.nolibc b/tools/testing/selftests/nolibc/Makefile.nolibc index 9b56191b10b3..a28b46c5b9e7 100644 --- a/tools/testing/selftests/nolibc/Makefile.nolibc +++ b/tools/testing/selftests/nolibc/Makefile.nolibc @@ -119,10 +119,10 @@ DEFCONFIG_sparc64 = sparc64_defconfig DEFCONFIG_m68k = virt_defconfig DEFCONFIG = $(DEFCONFIG_$(XARCH)) -EXTRACONFIG_m68k = -e CONFIG_BLK_DEV_INITRD -EXTRACONFIG = $(EXTRACONFIG_$(XARCH)) EXTRACONFIG_arm = -e CONFIG_NAMESPACES EXTRACONFIG_armthumb = -e CONFIG_NAMESPACES +EXTRACONFIG_m68k = -e CONFIG_BLK_DEV_INITRD +EXTRACONFIG = $(EXTRACONFIG_$(XARCH)) # optional tests to run (default = all) TEST = -- cgit v1.2.3 From 358b2511d7e687a243383fbbba9cb21b242f48b8 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 23 Jun 2025 23:15:53 +0200 Subject: selftests/nolibc: use file driver for QEMU serial MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For the test implementation of the SuperH architecture a second serial serial port needs to be used. Unfortunately the currently used 'stdio' driver does not support multiple serial ports at the same time. Switch to the 'file' driver which does support multiple ports and is sufficient for the nolibc-test usecase. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Tested-by: John Paul Adrian Glaubitz Link: https://lore.kernel.org/r/20250623-nolibc-sh-v2-2-0f5b4b303025@weissschuh.net --- tools/testing/selftests/nolibc/Makefile.nolibc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/nolibc/Makefile.nolibc b/tools/testing/selftests/nolibc/Makefile.nolibc index a28b46c5b9e7..450d6add864c 100644 --- a/tools/testing/selftests/nolibc/Makefile.nolibc +++ b/tools/testing/selftests/nolibc/Makefile.nolibc @@ -337,12 +337,12 @@ kernel-standalone: initramfs # run the tests after building the kernel run: kernel initramfs.cpio - $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(IMAGE)" -initrd initramfs.cpio -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out" + $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(IMAGE)" -initrd initramfs.cpio -serial file:/dev/stdout $(QEMU_ARGS) > "$(CURDIR)/run.out" $(Q)$(REPORT) $(CURDIR)/run.out # re-run the tests from an existing kernel rerun: - $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(IMAGE)" -initrd initramfs.cpio -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out" + $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(IMAGE)" -initrd initramfs.cpio -serial file:/dev/stdout $(QEMU_ARGS) > "$(CURDIR)/run.out" $(Q)$(REPORT) $(CURDIR)/run.out # report with existing test log -- cgit v1.2.3 From 7b29689263fb21cd0394a664d7d2d1cbc9d1fff1 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Wed, 2 Jul 2025 14:03:09 -0700 Subject: selftests/bpf: Add test cases for bpf_dynptr_memset() Add tests to verify the behavior of bpf_dynptr_memset(): * normal memset 0 * normal memset non-0 * memset with an offset * memset in dynptr that was adjusted * error: size overflow * error: offset+size overflow * error: readonly dynptr * memset into non-linear xdp dynptr Signed-off-by: Ihor Solodrai Signed-off-by: Andrii Nakryiko Acked-by: Mykyta Yatsenko Link: https://lore.kernel.org/bpf/20250702210309.3115903-3-isolodrai@meta.com --- tools/testing/selftests/bpf/prog_tests/dynptr.c | 8 ++ tools/testing/selftests/bpf/progs/dynptr_success.c | 158 +++++++++++++++++++++ 2 files changed, 166 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index 62e7ec775f24..f2b65398afce 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -21,6 +21,14 @@ static struct { {"test_dynptr_data", SETUP_SYSCALL_SLEEP}, {"test_dynptr_copy", SETUP_SYSCALL_SLEEP}, {"test_dynptr_copy_xdp", SETUP_XDP_PROG}, + {"test_dynptr_memset_zero", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_memset_notzero", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_memset_zero_offset", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_memset_zero_adjusted", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_memset_overflow", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_memset_overflow_offset", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_memset_readonly", SETUP_SKB_PROG}, + {"test_dynptr_memset_xdp_chunks", SETUP_XDP_PROG}, {"test_ringbuf", SETUP_SYSCALL_SLEEP}, {"test_skb_readonly", SETUP_SKB_PROG}, {"test_dynptr_skb_data", SETUP_SKB_PROG}, diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index a0391f9da2d4..7d7081d05d47 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -681,6 +681,164 @@ out: return XDP_DROP; } +char memset_zero_data[] = "data to be zeroed"; + +SEC("?tp/syscalls/sys_enter_nanosleep") +int test_dynptr_memset_zero(void *ctx) +{ + __u32 data_sz = sizeof(memset_zero_data); + char zeroes[32] = {'\0'}; + struct bpf_dynptr ptr; + + err = bpf_dynptr_from_mem(memset_zero_data, data_sz, 0, &ptr); + err = err ?: bpf_dynptr_memset(&ptr, 0, data_sz, 0); + err = err ?: bpf_memcmp(zeroes, memset_zero_data, data_sz); + + return 0; +} + +#define DYNPTR_MEMSET_VAL 42 + +char memset_notzero_data[] = "data to be overwritten"; + +SEC("?tp/syscalls/sys_enter_nanosleep") +int test_dynptr_memset_notzero(void *ctx) +{ + u32 data_sz = sizeof(memset_notzero_data); + struct bpf_dynptr ptr; + char expected[32]; + + __builtin_memset(expected, DYNPTR_MEMSET_VAL, data_sz); + + err = bpf_dynptr_from_mem(memset_notzero_data, data_sz, 0, &ptr); + err = err ?: bpf_dynptr_memset(&ptr, 0, data_sz, DYNPTR_MEMSET_VAL); + err = err ?: bpf_memcmp(expected, memset_notzero_data, data_sz); + + return 0; +} + +char memset_zero_offset_data[] = "data to be zeroed partially"; + +SEC("?tp/syscalls/sys_enter_nanosleep") +int test_dynptr_memset_zero_offset(void *ctx) +{ + char expected[] = "data to \0\0\0\0eroed partially"; + __u32 data_sz = sizeof(memset_zero_offset_data); + struct bpf_dynptr ptr; + + err = bpf_dynptr_from_mem(memset_zero_offset_data, data_sz, 0, &ptr); + err = err ?: bpf_dynptr_memset(&ptr, 8, 4, 0); + err = err ?: bpf_memcmp(expected, memset_zero_offset_data, data_sz); + + return 0; +} + +char memset_zero_adjusted_data[] = "data to be zeroed partially"; + +SEC("?tp/syscalls/sys_enter_nanosleep") +int test_dynptr_memset_zero_adjusted(void *ctx) +{ + char expected[] = "data\0\0\0\0be zeroed partially"; + __u32 data_sz = sizeof(memset_zero_adjusted_data); + struct bpf_dynptr ptr; + + err = bpf_dynptr_from_mem(memset_zero_adjusted_data, data_sz, 0, &ptr); + err = err ?: bpf_dynptr_adjust(&ptr, 4, 8); + err = err ?: bpf_dynptr_memset(&ptr, 0, bpf_dynptr_size(&ptr), 0); + err = err ?: bpf_memcmp(expected, memset_zero_adjusted_data, data_sz); + + return 0; +} + +char memset_overflow_data[] = "memset overflow data"; + +SEC("?tp/syscalls/sys_enter_nanosleep") +int test_dynptr_memset_overflow(void *ctx) +{ + __u32 data_sz = sizeof(memset_overflow_data); + struct bpf_dynptr ptr; + int ret; + + err = bpf_dynptr_from_mem(memset_overflow_data, data_sz, 0, &ptr); + ret = bpf_dynptr_memset(&ptr, 0, data_sz + 1, 0); + if (ret != -E2BIG) + err = 1; + + return 0; +} + +SEC("?tp/syscalls/sys_enter_nanosleep") +int test_dynptr_memset_overflow_offset(void *ctx) +{ + __u32 data_sz = sizeof(memset_overflow_data); + struct bpf_dynptr ptr; + int ret; + + err = bpf_dynptr_from_mem(memset_overflow_data, data_sz, 0, &ptr); + ret = bpf_dynptr_memset(&ptr, 1, data_sz, 0); + if (ret != -E2BIG) + err = 1; + + return 0; +} + +SEC("?cgroup_skb/egress") +int test_dynptr_memset_readonly(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr; + int ret; + + err = bpf_dynptr_from_skb(skb, 0, &ptr); + + /* cgroup skbs are read only, memset should fail */ + ret = bpf_dynptr_memset(&ptr, 0, bpf_dynptr_size(&ptr), 0); + if (ret != -EINVAL) + err = 1; + + return 0; +} + +#define min_t(type, x, y) ({ \ + type __x = (x); \ + type __y = (y); \ + __x < __y ? __x : __y; }) + +SEC("xdp") +int test_dynptr_memset_xdp_chunks(struct xdp_md *xdp) +{ + u32 data_sz, chunk_sz, offset = 0; + const int max_chunks = 200; + struct bpf_dynptr ptr_xdp; + char expected_buf[32]; + char buf[32]; + int i; + + __builtin_memset(expected_buf, DYNPTR_MEMSET_VAL, sizeof(expected_buf)); + + /* ptr_xdp is backed by non-contiguous memory */ + bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp); + data_sz = bpf_dynptr_size(&ptr_xdp); + + err = bpf_dynptr_memset(&ptr_xdp, 0, data_sz, DYNPTR_MEMSET_VAL); + if (err) + goto out; + + bpf_for(i, 0, max_chunks) { + offset = i * sizeof(buf); + if (offset >= data_sz) + goto out; + chunk_sz = min_t(u32, sizeof(buf), data_sz - offset); + err = bpf_dynptr_read(&buf, chunk_sz, &ptr_xdp, offset, 0); + if (err) + goto out; + err = bpf_memcmp(buf, expected_buf, sizeof(buf)); + if (err) + goto out; + } +out: + return XDP_DROP; +} + void *user_ptr; /* Contains the copy of the data pointed by user_ptr. * Size 384 to make it not fit into a single kernel chunk when copying -- cgit v1.2.3 From 5697683e133d05e6333ee1f12a7e8dd402593258 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 3 Jul 2025 13:48:18 -0700 Subject: selftests/bpf: Add tests for prog streams Add selftests to stress test the various facets of the stream API, memory allocation pattern, and ensuring dumping support is tested and functional. Reviewed-by: Emil Tsalapatis Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20250703204818.925464-13-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/stream.c | 141 ++++++++++++++++++++++++ tools/testing/selftests/bpf/progs/stream.c | 79 +++++++++++++ tools/testing/selftests/bpf/progs/stream_fail.c | 33 ++++++ 3 files changed, 253 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/stream.c create mode 100644 tools/testing/selftests/bpf/progs/stream.c create mode 100644 tools/testing/selftests/bpf/progs/stream_fail.c (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/stream.c b/tools/testing/selftests/bpf/prog_tests/stream.c new file mode 100644 index 000000000000..d9f0185dca61 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/stream.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include +#include +#include + +#include "stream.skel.h" +#include "stream_fail.skel.h" + +void test_stream_failure(void) +{ + RUN_TESTS(stream_fail); +} + +void test_stream_success(void) +{ + RUN_TESTS(stream); + return; +} + +struct { + int prog_off; + const char *errstr; +} stream_error_arr[] = { + { + offsetof(struct stream, progs.stream_cond_break), + "ERROR: Timeout detected for may_goto instruction\n" + "CPU: [0-9]+ UID: 0 PID: [0-9]+ Comm: .*\n" + "Call trace:\n" + "([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" + "|[ \t]+[^\n]+\n)*", + }, + { + offsetof(struct stream, progs.stream_deadlock), + "ERROR: AA or ABBA deadlock detected for bpf_res_spin_lock\n" + "Attempted lock = (0x[0-9a-fA-F]+)\n" + "Total held locks = 1\n" + "Held lock\\[ 0\\] = \\1\n" // Lock address must match + "CPU: [0-9]+ UID: 0 PID: [0-9]+ Comm: .*\n" + "Call trace:\n" + "([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" + "|[ \t]+[^\n]+\n)*", + }, +}; + +static int match_regex(const char *pattern, const char *string) +{ + int err, rc; + regex_t re; + + err = regcomp(&re, pattern, REG_EXTENDED | REG_NEWLINE); + if (err) + return -1; + rc = regexec(&re, string, 0, NULL, 0); + regfree(&re); + return rc == 0 ? 1 : 0; +} + +void test_stream_errors(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts); + struct stream *skel; + int ret, prog_fd; + char buf[1024]; + + skel = stream__open_and_load(); + if (!ASSERT_OK_PTR(skel, "stream__open_and_load")) + return; + + for (int i = 0; i < ARRAY_SIZE(stream_error_arr); i++) { + struct bpf_program **prog; + + prog = (struct bpf_program **)(((char *)skel) + stream_error_arr[i].prog_off); + prog_fd = bpf_program__fd(*prog); + ret = bpf_prog_test_run_opts(prog_fd, &opts); + ASSERT_OK(ret, "ret"); + ASSERT_OK(opts.retval, "retval"); + +#if !defined(__x86_64__) + ASSERT_TRUE(1, "Timed may_goto unsupported, skip."); + if (i == 0) { + ret = bpf_prog_stream_read(prog_fd, 2, buf, sizeof(buf), &ropts); + ASSERT_EQ(ret, 0, "stream read"); + continue; + } +#endif + + ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, sizeof(buf), &ropts); + ASSERT_GT(ret, 0, "stream read"); + ASSERT_LE(ret, 1023, "len for buf"); + buf[ret] = '\0'; + + ret = match_regex(stream_error_arr[i].errstr, buf); + if (!ASSERT_TRUE(ret == 1, "regex match")) + fprintf(stderr, "Output from stream:\n%s\n", buf); + } + + stream__destroy(skel); +} + +void test_stream_syscall(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts); + struct stream *skel; + int ret, prog_fd; + char buf[64]; + + skel = stream__open_and_load(); + if (!ASSERT_OK_PTR(skel, "stream__open_and_load")) + return; + + prog_fd = bpf_program__fd(skel->progs.stream_syscall); + ret = bpf_prog_test_run_opts(prog_fd, &opts); + ASSERT_OK(ret, "ret"); + ASSERT_OK(opts.retval, "retval"); + + ASSERT_LT(bpf_prog_stream_read(0, BPF_STREAM_STDOUT, buf, sizeof(buf), &ropts), 0, "error"); + ret = -errno; + ASSERT_EQ(ret, -EINVAL, "bad prog_fd"); + + ASSERT_LT(bpf_prog_stream_read(prog_fd, 0, buf, sizeof(buf), &ropts), 0, "error"); + ret = -errno; + ASSERT_EQ(ret, -ENOENT, "bad stream id"); + + ASSERT_LT(bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, NULL, sizeof(buf), NULL), 0, "error"); + ret = -errno; + ASSERT_EQ(ret, -EFAULT, "bad stream buf"); + + ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, buf, 2, NULL); + ASSERT_EQ(ret, 2, "bytes"); + ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, buf, 2, NULL); + ASSERT_EQ(ret, 1, "bytes"); + ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, buf, 1, &ropts); + ASSERT_EQ(ret, 0, "no bytes stdout"); + ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, 1, &ropts); + ASSERT_EQ(ret, 0, "no bytes stderr"); + + stream__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/stream.c b/tools/testing/selftests/bpf/progs/stream.c new file mode 100644 index 000000000000..35790897dc87 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/stream.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include +#include +#include +#include "bpf_misc.h" +#include "bpf_experimental.h" + +struct arr_elem { + struct bpf_res_spin_lock lock; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct arr_elem); +} arrmap SEC(".maps"); + +#define ENOSPC 28 +#define _STR "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + +int size; + +SEC("syscall") +__success __retval(0) +int stream_exhaust(void *ctx) +{ + /* Use global variable for loop convergence. */ + size = 0; + bpf_repeat(BPF_MAX_LOOPS) { + if (bpf_stream_printk(BPF_STDOUT, _STR) == -ENOSPC && size == 99954) + return 0; + size += sizeof(_STR) - 1; + } + return 1; +} + +SEC("syscall") +__success __retval(0) +int stream_cond_break(void *ctx) +{ + while (can_loop) + ; + return 0; +} + +SEC("syscall") +__success __retval(0) +int stream_deadlock(void *ctx) +{ + struct bpf_res_spin_lock *lock, *nlock; + + lock = bpf_map_lookup_elem(&arrmap, &(int){0}); + if (!lock) + return 1; + nlock = bpf_map_lookup_elem(&arrmap, &(int){0}); + if (!nlock) + return 1; + if (bpf_res_spin_lock(lock)) + return 1; + if (bpf_res_spin_lock(nlock)) { + bpf_res_spin_unlock(lock); + return 0; + } + bpf_res_spin_unlock(nlock); + bpf_res_spin_unlock(lock); + return 1; +} + +SEC("syscall") +__success __retval(0) +int stream_syscall(void *ctx) +{ + bpf_stream_printk(BPF_STDOUT, "foo"); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/stream_fail.c b/tools/testing/selftests/bpf/progs/stream_fail.c new file mode 100644 index 000000000000..b4a0d0cc8ec8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/stream_fail.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include +#include +#include +#include +#include "bpf_misc.h" + +SEC("syscall") +__failure __msg("Possibly NULL pointer passed") +int stream_vprintk_null_arg(void *ctx) +{ + bpf_stream_vprintk(BPF_STDOUT, "", NULL, 0, NULL); + return 0; +} + +SEC("syscall") +__failure __msg("R3 type=scalar expected=") +int stream_vprintk_scalar_arg(void *ctx) +{ + bpf_stream_vprintk(BPF_STDOUT, "", (void *)46, 0, NULL); + return 0; +} + +SEC("syscall") +__failure __msg("arg#1 doesn't point to a const string") +int stream_vprintk_string_arg(void *ctx) +{ + bpf_stream_vprintk(BPF_STDOUT, ctx, NULL, 0, NULL); + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 861bdc6314a49520d9b3778b763461517c1321c0 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Fri, 4 Jul 2025 00:23:11 +0200 Subject: selftests: net: extend SCM_PIDFD test to cover stale pidfds Extend SCM_PIDFD test scenarios to also cover dead task's pidfd retrieval and reading its exit info. Cc: linux-kselftest@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: netdev@vger.kernel.org Cc: Shuah Khan Cc: David S. Miller Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Simon Horman Cc: Christian Brauner Cc: Kuniyuki Iwashima Cc: Lennart Poettering Cc: Luca Boccassi Cc: David Rheinsberg Signed-off-by: Alexander Mikhalitsyn Link: https://lore.kernel.org/20250703222314.309967-8-aleksandr.mikhalitsyn@canonical.com Reviewed-by: Christian Brauner Signed-off-by: Christian Brauner --- tools/testing/selftests/net/af_unix/scm_pidfd.c | 217 +++++++++++++++++++----- 1 file changed, 173 insertions(+), 44 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/af_unix/scm_pidfd.c b/tools/testing/selftests/net/af_unix/scm_pidfd.c index 7e534594167e..37e034874034 100644 --- a/tools/testing/selftests/net/af_unix/scm_pidfd.c +++ b/tools/testing/selftests/net/af_unix/scm_pidfd.c @@ -15,6 +15,7 @@ #include #include +#include "../../pidfd/pidfd.h" #include "../../kselftest_harness.h" #define clean_errno() (errno == 0 ? "None" : strerror(errno)) @@ -26,6 +27,8 @@ #define SCM_PIDFD 0x04 #endif +#define CHILD_EXIT_CODE_OK 123 + static void child_die() { exit(1); @@ -126,16 +129,65 @@ out: return result; } +struct cmsg_data { + struct ucred *ucred; + int *pidfd; +}; + +static int parse_cmsg(struct msghdr *msg, struct cmsg_data *res) +{ + struct cmsghdr *cmsg; + int data = 0; + + if (msg->msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { + log_err("recvmsg: truncated"); + return 1; + } + + for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; + cmsg = CMSG_NXTHDR(msg, cmsg)) { + if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_PIDFD) { + if (cmsg->cmsg_len < sizeof(*res->pidfd)) { + log_err("CMSG parse: SCM_PIDFD wrong len"); + return 1; + } + + res->pidfd = (void *)CMSG_DATA(cmsg); + } + + if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_CREDENTIALS) { + if (cmsg->cmsg_len < sizeof(*res->ucred)) { + log_err("CMSG parse: SCM_CREDENTIALS wrong len"); + return 1; + } + + res->ucred = (void *)CMSG_DATA(cmsg); + } + } + + if (!res->pidfd) { + log_err("CMSG parse: SCM_PIDFD not found"); + return 1; + } + + if (!res->ucred) { + log_err("CMSG parse: SCM_CREDENTIALS not found"); + return 1; + } + + return 0; +} + static int cmsg_check(int fd) { struct msghdr msg = { 0 }; - struct cmsghdr *cmsg; + struct cmsg_data res; struct iovec iov; - struct ucred *ucred = NULL; int data = 0; char control[CMSG_SPACE(sizeof(struct ucred)) + CMSG_SPACE(sizeof(int))] = { 0 }; - int *pidfd = NULL; pid_t parent_pid; int err; @@ -158,53 +210,99 @@ static int cmsg_check(int fd) return 1; } - for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; - cmsg = CMSG_NXTHDR(&msg, cmsg)) { - if (cmsg->cmsg_level == SOL_SOCKET && - cmsg->cmsg_type == SCM_PIDFD) { - if (cmsg->cmsg_len < sizeof(*pidfd)) { - log_err("CMSG parse: SCM_PIDFD wrong len"); - return 1; - } + /* send(pfd, "x", sizeof(char), 0) */ + if (data != 'x') { + log_err("recvmsg: data corruption"); + return 1; + } - pidfd = (void *)CMSG_DATA(cmsg); - } + if (parse_cmsg(&msg, &res)) { + log_err("CMSG parse: parse_cmsg() failed"); + return 1; + } - if (cmsg->cmsg_level == SOL_SOCKET && - cmsg->cmsg_type == SCM_CREDENTIALS) { - if (cmsg->cmsg_len < sizeof(*ucred)) { - log_err("CMSG parse: SCM_CREDENTIALS wrong len"); - return 1; - } + /* pidfd from SCM_PIDFD should point to the parent process PID */ + parent_pid = + get_pid_from_fdinfo_file(*res.pidfd, "Pid:", sizeof("Pid:") - 1); + if (parent_pid != getppid()) { + log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid()); + close(*res.pidfd); + return 1; + } - ucred = (void *)CMSG_DATA(cmsg); - } + close(*res.pidfd); + return 0; +} + +static int cmsg_check_dead(int fd, int expected_pid) +{ + int err; + struct msghdr msg = { 0 }; + struct cmsg_data res; + struct iovec iov; + int data = 0; + char control[CMSG_SPACE(sizeof(struct ucred)) + + CMSG_SPACE(sizeof(int))] = { 0 }; + pid_t client_pid; + struct pidfd_info info = { + .mask = PIDFD_INFO_EXIT, + }; + + iov.iov_base = &data; + iov.iov_len = sizeof(data); + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + err = recvmsg(fd, &msg, 0); + if (err < 0) { + log_err("recvmsg"); + return 1; } - /* send(pfd, "x", sizeof(char), 0) */ - if (data != 'x') { + if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { + log_err("recvmsg: truncated"); + return 1; + } + + /* send(cfd, "y", sizeof(char), 0) */ + if (data != 'y') { log_err("recvmsg: data corruption"); return 1; } - if (!pidfd) { - log_err("CMSG parse: SCM_PIDFD not found"); + if (parse_cmsg(&msg, &res)) { + log_err("CMSG parse: parse_cmsg() failed"); return 1; } - if (!ucred) { - log_err("CMSG parse: SCM_CREDENTIALS not found"); + /* + * pidfd from SCM_PIDFD should point to the client_pid. + * Let's read exit information and check if it's what + * we expect to see. + */ + if (ioctl(*res.pidfd, PIDFD_GET_INFO, &info)) { + log_err("%s: ioctl(PIDFD_GET_INFO) failed", __func__); + close(*res.pidfd); return 1; } - /* pidfd from SCM_PIDFD should point to the parent process PID */ - parent_pid = - get_pid_from_fdinfo_file(*pidfd, "Pid:", sizeof("Pid:") - 1); - if (parent_pid != getppid()) { - log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid()); + if (!(info.mask & PIDFD_INFO_EXIT)) { + log_err("%s: No exit information from ioctl(PIDFD_GET_INFO)", __func__); + close(*res.pidfd); return 1; } + err = WIFEXITED(info.exit_code) ? WEXITSTATUS(info.exit_code) : 1; + if (err != CHILD_EXIT_CODE_OK) { + log_err("%s: wrong exit_code %d != %d", __func__, err, CHILD_EXIT_CODE_OK); + close(*res.pidfd); + return 1; + } + + close(*res.pidfd); return 0; } @@ -291,6 +389,24 @@ static void fill_sockaddr(struct sock_addr *addr, bool abstract) memcpy(sun_path_buf, addr->sock_name, strlen(addr->sock_name)); } +static int sk_enable_cred_pass(int sk) +{ + int on = 0; + + on = 1; + if (setsockopt(sk, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) { + log_err("Failed to set SO_PASSCRED"); + return 1; + } + + if (setsockopt(sk, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) { + log_err("Failed to set SO_PASSPIDFD"); + return 1; + } + + return 0; +} + static void client(FIXTURE_DATA(scm_pidfd) *self, const FIXTURE_VARIANT(scm_pidfd) *variant) { @@ -299,7 +415,6 @@ static void client(FIXTURE_DATA(scm_pidfd) *self, struct ucred peer_cred; int peer_pidfd; pid_t peer_pid; - int on = 0; cfd = socket(AF_UNIX, variant->type, 0); if (cfd < 0) { @@ -322,14 +437,8 @@ static void client(FIXTURE_DATA(scm_pidfd) *self, child_die(); } - on = 1; - if (setsockopt(cfd, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) { - log_err("Failed to set SO_PASSCRED"); - child_die(); - } - - if (setsockopt(cfd, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) { - log_err("Failed to set SO_PASSPIDFD"); + if (sk_enable_cred_pass(cfd)) { + log_err("sk_enable_cred_pass() failed"); child_die(); } @@ -340,6 +449,12 @@ static void client(FIXTURE_DATA(scm_pidfd) *self, child_die(); } + /* send something to the parent so it can receive SCM_PIDFD too and validate it */ + if (send(cfd, "y", sizeof(char), 0) == -1) { + log_err("Failed to send(cfd, \"y\", sizeof(char), 0)"); + child_die(); + } + /* skip further for SOCK_DGRAM as it's not applicable */ if (variant->type == SOCK_DGRAM) return; @@ -398,7 +513,13 @@ TEST_F(scm_pidfd, test) close(self->server); close(self->startup_pipe[0]); client(self, variant); - exit(0); + + /* + * It's a bit unusual, but in case of success we return non-zero + * exit code (CHILD_EXIT_CODE_OK) and then we expect to read it + * from ioctl(PIDFD_GET_INFO) in cmsg_check_dead(). + */ + exit(CHILD_EXIT_CODE_OK); } close(self->startup_pipe[1]); @@ -421,9 +542,17 @@ TEST_F(scm_pidfd, test) ASSERT_NE(-1, err); } - close(pfd); waitpid(self->client_pid, &child_status, 0); - ASSERT_EQ(0, WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1); + /* see comment before exit(CHILD_EXIT_CODE_OK) */ + ASSERT_EQ(CHILD_EXIT_CODE_OK, WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1); + + err = sk_enable_cred_pass(pfd); + ASSERT_EQ(0, err); + + err = cmsg_check_dead(pfd, self->client_pid); + ASSERT_EQ(0, err); + + close(pfd); } TEST_HARNESS_MAIN -- cgit v1.2.3 From 02217ad447d7b1dd592cfc8253a07375d0b32aa7 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 23 Jun 2025 23:15:54 +0200 Subject: tools/nolibc: add support for SuperH MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for SuperH/"sh" to nolibc. Only sh4 is tested for now. The startup code is special: __nolibc_entrypoint_epilogue() calls __builtin_unreachable() which emits a call to abort(). To make this work a function prologue is generated to set up a GOT pointer which corrupts "sp". __builtin_unreachable() is necessary for __attribute__((noreturn)). Also depending on compiler flags (for example -fPIC) even more prologue is generated. Work around this by defining a nested function in asm. Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70216 Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Acked-by: Rob Landley Acked-by: D. Jeff Dionne Tested-by: John Paul Adrian Glaubitz Link: https://lore.kernel.org/r/20250623-nolibc-sh-v2-3-0f5b4b303025@weissschuh.net --- tools/include/nolibc/arch-sh.h | 162 +++++++++++++++++++++++++ tools/include/nolibc/arch.h | 2 + tools/testing/selftests/nolibc/Makefile.nolibc | 7 ++ tools/testing/selftests/nolibc/run-tests.sh | 3 +- 4 files changed, 173 insertions(+), 1 deletion(-) create mode 100644 tools/include/nolibc/arch-sh.h (limited to 'tools/testing/selftests') diff --git a/tools/include/nolibc/arch-sh.h b/tools/include/nolibc/arch-sh.h new file mode 100644 index 000000000000..a96b8914607e --- /dev/null +++ b/tools/include/nolibc/arch-sh.h @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * SuperH specific definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh + */ + +#ifndef _NOLIBC_ARCH_SH_H +#define _NOLIBC_ARCH_SH_H + +#include "compiler.h" +#include "crt.h" + +/* + * Syscalls for SuperH: + * - registers are 32bit wide + * - syscall number is passed in r3 + * - arguments are in r4, r5, r6, r7, r0, r1, r2 + * - the system call is performed by calling trapa #31 + * - syscall return value is in r0 + */ + +#define my_syscall0(num) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + register long _arg2 __asm__ ("r5") = (long)(arg2); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1), "r"(_arg2) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + register long _arg2 __asm__ ("r5") = (long)(arg2); \ + register long _arg3 __asm__ ("r6") = (long)(arg3); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1), "r"(_arg2), "r"(_arg3) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + register long _arg2 __asm__ ("r5") = (long)(arg2); \ + register long _arg3 __asm__ ("r6") = (long)(arg3); \ + register long _arg4 __asm__ ("r7") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + register long _arg2 __asm__ ("r5") = (long)(arg2); \ + register long _arg3 __asm__ ("r6") = (long)(arg3); \ + register long _arg4 __asm__ ("r7") = (long)(arg4); \ + register long _arg5 __asm__ ("r0") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "r"(_arg5) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + register long _arg2 __asm__ ("r5") = (long)(arg2); \ + register long _arg3 __asm__ ("r6") = (long)(arg3); \ + register long _arg4 __asm__ ("r7") = (long)(arg4); \ + register long _arg5 __asm__ ("r0") = (long)(arg5); \ + register long _arg6 __asm__ ("r1") = (long)(arg6); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "r"(_arg5), "r"(_arg6) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +/* startup code */ +void _start_wrapper(void); +void __attribute__((weak,noreturn)) __nolibc_entrypoint __no_stack_protector _start_wrapper(void) +{ + __asm__ volatile ( + ".global _start\n" /* The C function will have a prologue, */ + ".type _start, @function\n" /* corrupting "sp" */ + ".weak _start\n" + "_start:\n" + + "mov sp, r4\n" /* save argc pointer to r4, as arg1 of _start_c */ + "bsr _start_c\n" /* transfer to c runtime */ + "nop\n" /* delay slot */ + + ".size _start, .-_start\n" + ); + __nolibc_entrypoint_epilogue(); +} + +#endif /* _NOLIBC_ARCH_SH_H */ diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h index db69537c71d8..426c89198135 100644 --- a/tools/include/nolibc/arch.h +++ b/tools/include/nolibc/arch.h @@ -35,6 +35,8 @@ #include "arch-sparc.h" #elif defined(__m68k__) #include "arch-m68k.h" +#elif defined(__sh__) +#include "arch-sh.h" #else #error Unsupported Architecture #endif diff --git a/tools/testing/selftests/nolibc/Makefile.nolibc b/tools/testing/selftests/nolibc/Makefile.nolibc index 450d6add864c..cc5ee6c67586 100644 --- a/tools/testing/selftests/nolibc/Makefile.nolibc +++ b/tools/testing/selftests/nolibc/Makefile.nolibc @@ -62,6 +62,7 @@ ARCH_riscv64 = riscv ARCH_s390x = s390 ARCH_sparc32 = sparc ARCH_sparc64 = sparc +ARCH_sh4 = sh ARCH := $(or $(ARCH_$(XARCH)),$(XARCH)) # kernel image names by architecture @@ -89,6 +90,7 @@ IMAGE_loongarch = arch/loongarch/boot/vmlinuz.efi IMAGE_sparc32 = arch/sparc/boot/image IMAGE_sparc64 = arch/sparc/boot/image IMAGE_m68k = vmlinux +IMAGE_sh4 = arch/sh/boot/zImage IMAGE = $(objtree)/$(IMAGE_$(XARCH)) IMAGE_NAME = $(notdir $(IMAGE)) @@ -117,11 +119,13 @@ DEFCONFIG_loongarch = defconfig DEFCONFIG_sparc32 = sparc32_defconfig DEFCONFIG_sparc64 = sparc64_defconfig DEFCONFIG_m68k = virt_defconfig +DEFCONFIG_sh4 = rts7751r2dplus_defconfig DEFCONFIG = $(DEFCONFIG_$(XARCH)) EXTRACONFIG_arm = -e CONFIG_NAMESPACES EXTRACONFIG_armthumb = -e CONFIG_NAMESPACES EXTRACONFIG_m68k = -e CONFIG_BLK_DEV_INITRD +EXTRACONFIG_sh4 = -e CONFIG_BLK_DEV_INITRD -e CONFIG_CMDLINE_FROM_BOOTLOADER EXTRACONFIG = $(EXTRACONFIG_$(XARCH)) # optional tests to run (default = all) @@ -152,6 +156,7 @@ QEMU_ARCH_loongarch = loongarch64 QEMU_ARCH_sparc32 = sparc QEMU_ARCH_sparc64 = sparc64 QEMU_ARCH_m68k = m68k +QEMU_ARCH_sh4 = sh4 QEMU_ARCH = $(QEMU_ARCH_$(XARCH)) QEMU_ARCH_USER_ppc64le = ppc64le @@ -191,6 +196,7 @@ QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=N QEMU_ARGS_sparc32 = -M SS-5 -m 256M -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_sparc64 = -M sun4u -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_m68k = -M virt -append "console=ttyGF0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_sh4 = -M r2d -serial file:/dev/stdout -append "console=ttySC1,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS = -m 1G $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA) # OUTPUT is only set when run from the main makefile, otherwise @@ -218,6 +224,7 @@ CFLAGS_mipsn32be = -EB -mabi=n32 -march=mips64r6 CFLAGS_mips64le = -EL -mabi=64 -march=mips64r6 CFLAGS_mips64be = -EB -mabi=64 -march=mips64r2 CFLAGS_sparc32 = $(call cc-option,-m32) +CFLAGS_sh4 = -ml -m4 ifeq ($(origin XARCH),command line) CFLAGS_XARCH = $(CFLAGS_$(XARCH)) endif diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh index fc8351994566..43eb30be316a 100755 --- a/tools/testing/selftests/nolibc/run-tests.sh +++ b/tools/testing/selftests/nolibc/run-tests.sh @@ -27,6 +27,7 @@ all_archs=( loongarch sparc32 sparc64 m68k + sh4 ) archs="${all_archs[@]}" @@ -187,7 +188,7 @@ test_arch() { echo "Unsupported configuration" return fi - if [ "$arch" = "m68k" ] && [ "$llvm" = "1" ]; then + if [ "$arch" = "m68k" -o "$arch" = "sh4" ] && [ "$llvm" = "1" ]; then echo "Unsupported configuration" return fi -- cgit v1.2.3 From 696bf15792524aa869ebdbee2643881a77491a83 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 3 Jul 2025 17:00:18 +0100 Subject: selftests/nolibc: Add coverage of vfork() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Generalise the existing fork() test to also cover the newly added vfork() implementation. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250703-arm64-gcs-vfork-exit-v3-4-1e9a9d2ddbbe@kernel.org Signed-off-by: Thomas Weißschuh --- tools/testing/selftests/nolibc/nolibc-test.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 52640d8ae402..b5bca1dcf36e 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -877,7 +877,12 @@ int test_file_stream(void) return 0; } -int test_fork(void) +enum fork_type { + FORK_STANDARD, + FORK_VFORK, +}; + +int test_fork(enum fork_type type) { int status; pid_t pid; @@ -886,14 +891,23 @@ int test_fork(void) fflush(stdout); fflush(stderr); - pid = fork(); + switch (type) { + case FORK_STANDARD: + pid = fork(); + break; + case FORK_VFORK: + pid = vfork(); + break; + default: + return 1; + } switch (pid) { case -1: return 1; case 0: - exit(123); + _exit(123); default: pid = waitpid(pid, &status, 0); @@ -1330,7 +1344,7 @@ int run_syscall(int min, int max) CASE_TEST(dup3_m1); tmp = dup3(-1, 100, 0); EXPECT_SYSER(1, tmp, -1, EBADF); if (tmp != -1) close(tmp); break; CASE_TEST(execve_root); EXPECT_SYSER(1, execve("/", (char*[]){ [0] = "/", [1] = NULL }, NULL), -1, EACCES); break; CASE_TEST(file_stream); EXPECT_SYSZR(1, test_file_stream()); break; - CASE_TEST(fork); EXPECT_SYSZR(1, test_fork()); break; + CASE_TEST(fork); EXPECT_SYSZR(1, test_fork(FORK_STANDARD)); break; CASE_TEST(getdents64_root); EXPECT_SYSNE(1, test_getdents64("/"), -1); break; CASE_TEST(getdents64_null); EXPECT_SYSER(1, test_getdents64("/dev/null"), -1, ENOTDIR); break; CASE_TEST(directories); EXPECT_SYSZR(proc, test_dirent()); break; @@ -1374,6 +1388,7 @@ int run_syscall(int min, int max) CASE_TEST(uname_fault); EXPECT_SYSER(1, uname(NULL), -1, EFAULT); break; CASE_TEST(unlink_root); EXPECT_SYSER(1, unlink("/"), -1, EISDIR); break; CASE_TEST(unlink_blah); EXPECT_SYSER(1, unlink("/proc/self/blah"), -1, ENOENT); break; + CASE_TEST(vfork); EXPECT_SYSZR(1, test_fork(FORK_VFORK)); break; CASE_TEST(wait_child); EXPECT_SYSER(1, wait(&tmp), -1, ECHILD); break; CASE_TEST(waitpid_min); EXPECT_SYSER(1, waitpid(INT_MIN, &tmp, WNOHANG), -1, ESRCH); break; CASE_TEST(waitpid_child); EXPECT_SYSER(1, waitpid(getpid(), &tmp, WNOHANG), -1, ECHILD); break; -- cgit v1.2.3 From 1536aa0fb1e09cb50f401ec4852c60f38173d751 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 3 Jul 2025 17:00:17 +0100 Subject: kselftest/arm64: Add a test for vfork() with GCS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ensure that we've got at least some coverage of the special cases around vfork() by adding a test case in basic-gcs doing the same thing as the plain fork() one - vfork(), do a few checks and then return to the parent. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20250703-arm64-gcs-vfork-exit-v3-3-1e9a9d2ddbbe@kernel.org Signed-off-by: Thomas Weißschuh --- tools/testing/selftests/arm64/gcs/basic-gcs.c | 63 +++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/arm64/gcs/basic-gcs.c b/tools/testing/selftests/arm64/gcs/basic-gcs.c index 3fb9742342a3..54f9c888249d 100644 --- a/tools/testing/selftests/arm64/gcs/basic-gcs.c +++ b/tools/testing/selftests/arm64/gcs/basic-gcs.c @@ -298,6 +298,68 @@ out: return pass; } +/* A vfork()ed process can run and exit */ +static bool test_vfork(void) +{ + unsigned long child_mode; + int ret, status; + pid_t pid; + bool pass = true; + + pid = vfork(); + if (pid == -1) { + ksft_print_msg("vfork() failed: %d\n", errno); + pass = false; + goto out; + } + if (pid == 0) { + /* + * In child, make sure we can call a function, read + * the GCS pointer and status and then exit. + */ + valid_gcs_function(); + get_gcspr(); + + ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, + &child_mode, 0, 0, 0); + if (ret == 0 && !(child_mode & PR_SHADOW_STACK_ENABLE)) { + ksft_print_msg("GCS not enabled in child\n"); + ret = EXIT_FAILURE; + } + + _exit(ret); + } + + /* + * In parent, check we can still do function calls then check + * on the child. + */ + valid_gcs_function(); + + ksft_print_msg("Waiting for child %d\n", pid); + + ret = waitpid(pid, &status, 0); + if (ret == -1) { + ksft_print_msg("Failed to wait for child: %d\n", + errno); + return false; + } + + if (!WIFEXITED(status)) { + ksft_print_msg("Child exited due to signal %d\n", + WTERMSIG(status)); + pass = false; + } else if (WEXITSTATUS(status)) { + ksft_print_msg("Child exited with status %d\n", + WEXITSTATUS(status)); + pass = false; + } + +out: + + return pass; +} + typedef bool (*gcs_test)(void); static struct { @@ -314,6 +376,7 @@ static struct { { "enable_invalid", enable_invalid, true }, { "map_guarded_stack", map_guarded_stack }, { "fork", test_fork }, + { "vfork", test_vfork }, }; int main(void) -- cgit v1.2.3 From 46b0a67e8f22d2dbc679b37b26c5ff0f50424847 Mon Sep 17 00:00:00 2001 From: Terry Tritton Date: Fri, 4 Jul 2025 11:37:49 +0100 Subject: selftests/futex: Add futex_numa to .gitignore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit futex_numa was never added to the .gitignore file. Add it. Fixes: 9140f57c1c13 ("futex,selftests: Add another FUTEX2_NUMA selftest") Signed-off-by: Terry Tritton Signed-off-by: Thomas Gleixner Reviewed-by: André Almeida Link: https://lore.kernel.org/all/20250704103749.10341-1-terry.tritton@linaro.org --- tools/testing/selftests/futex/functional/.gitignore | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore index 7b24ae89594a..776ad658f75e 100644 --- a/tools/testing/selftests/futex/functional/.gitignore +++ b/tools/testing/selftests/futex/functional/.gitignore @@ -11,3 +11,4 @@ futex_wait_timeout futex_wait_uninitialized_heap futex_wait_wouldblock futex_waitv +futex_numa -- cgit v1.2.3 From 7c02bc4088af55195f16bfa565127b4864c7e248 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 4 Jul 2025 16:19:48 +0200 Subject: tools/nolibc: add support for clock_nanosleep() and nanosleep() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also add some tests. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250704-nolibc-nanosleep-v1-1-d79c19701952@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/time.h | 34 ++++++++++++++++++++++++++++ tools/testing/selftests/nolibc/nolibc-test.c | 1 + 2 files changed, 35 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h index fc387940d51f..d02bc44d2643 100644 --- a/tools/include/nolibc/time.h +++ b/tools/include/nolibc/time.h @@ -36,6 +36,8 @@ void __nolibc_timespec_kernel_to_user(const struct __kernel_timespec *kts, struc * int clock_getres(clockid_t clockid, struct timespec *res); * int clock_gettime(clockid_t clockid, struct timespec *tp); * int clock_settime(clockid_t clockid, const struct timespec *tp); + * int clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqtp, + * struct timespec *rmtp) */ static __attribute__((unused)) @@ -107,6 +109,32 @@ int clock_settime(clockid_t clockid, struct timespec *tp) return __sysret(sys_clock_settime(clockid, tp)); } +static __attribute__((unused)) +int sys_clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqtp, + struct timespec *rmtp) +{ +#if defined(__NR_clock_nanosleep) + return my_syscall4(__NR_clock_nanosleep, clockid, flags, rqtp, rmtp); +#elif defined(__NR_clock_nanosleep_time64) + struct __kernel_timespec krqtp, krmtp; + int ret; + + __nolibc_timespec_user_to_kernel(rqtp, &krqtp); + ret = my_syscall4(__NR_clock_nanosleep_time64, clockid, flags, &krqtp, &krmtp); + if (rmtp) + __nolibc_timespec_kernel_to_user(&krmtp, rmtp); + return ret; +#else + return __nolibc_enosys(__func__, clockid, flags, rqtp, rmtp); +#endif +} + +static __attribute__((unused)) +int clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqtp, + struct timespec *rmtp) +{ + return __sysret(sys_clock_nanosleep(clockid, flags, rqtp, rmtp)); +} static __inline__ double difftime(time_t time1, time_t time2) @@ -114,6 +142,12 @@ double difftime(time_t time1, time_t time2) return time1 - time2; } +static __inline__ +int nanosleep(const struct timespec *rqtp, struct timespec *rmtp) +{ + return clock_nanosleep(CLOCK_REALTIME, 0, rqtp, rmtp); +} + static __attribute__((unused)) time_t time(time_t *tptr) diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index b5bca1dcf36e..315229233930 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -1363,6 +1363,7 @@ int run_syscall(int min, int max) CASE_TEST(mmap_bad); EXPECT_PTRER(1, mmap(NULL, 0, PROT_READ, MAP_PRIVATE, 0, 0), MAP_FAILED, EINVAL); break; CASE_TEST(munmap_bad); EXPECT_SYSER(1, munmap(NULL, 0), -1, EINVAL); break; CASE_TEST(mmap_munmap_good); EXPECT_SYSZR(1, test_mmap_munmap()); break; + CASE_TEST(nanosleep); ts.tv_nsec = -1; EXPECT_SYSER(1, nanosleep(&ts, NULL), -1, EINVAL); break; CASE_TEST(open_tty); EXPECT_SYSNE(1, tmp = open("/dev/null", O_RDONLY), -1); if (tmp != -1) close(tmp); break; CASE_TEST(open_blah); EXPECT_SYSER(1, tmp = open("/proc/self/blah", O_RDONLY), -1, ENOENT); if (tmp != -1) close(tmp); break; CASE_TEST(openat_dir); EXPECT_SYSZR(1, test_openat()); break; -- cgit v1.2.3 From 2b1ed5f7f8ab82597abfff56a39f056f8592df43 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 4 Jul 2025 15:43:12 +0200 Subject: selftests/nolibc: create /dev/full when running as PID 1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An upcoming testcase will use /dev/full. Make sure it is always present. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250704-nolibc-printf-error-v1-1-74b7a092433b@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/testing/selftests/nolibc/nolibc-test.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 315229233930..ef80861105f5 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -1778,12 +1778,14 @@ int prepare(void) if (stat("/dev/.", &stat_buf) == 0 || mkdir("/dev", 0755) == 0) { if (stat("/dev/console", &stat_buf) != 0 || stat("/dev/null", &stat_buf) != 0 || - stat("/dev/zero", &stat_buf) != 0) { + stat("/dev/zero", &stat_buf) != 0 || + stat("/dev/full", &stat_buf) != 0) { /* try devtmpfs first, otherwise fall back to manual creation */ if (mount("/dev", "/dev", "devtmpfs", 0, 0) != 0) { mknod("/dev/console", 0600 | S_IFCHR, makedev(5, 1)); mknod("/dev/null", 0666 | S_IFCHR, makedev(1, 3)); mknod("/dev/zero", 0666 | S_IFCHR, makedev(1, 5)); + mknod("/dev/full", 0666 | S_IFCHR, makedev(1, 7)); } } } -- cgit v1.2.3 From 4a40129087a4c32135bb1177a57bbbe6ee646f1a Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 4 Jul 2025 15:43:13 +0200 Subject: selftests/nolibc: correctly report errors from printf() and friends MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When an error is encountered by printf() it needs to be reported. errno() is already set by the callback. sprintf() is different, but that keeps working and is already tested. Also add a new test. Fixes: 7e4346f4a3a6 ("tools/nolibc/stdio: add a minimal [vf]printf() implementation") Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250704-nolibc-printf-error-v1-2-74b7a092433b@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/stdio.h | 4 ++-- tools/testing/selftests/nolibc/nolibc-test.c | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h index c470d334ef3f..7630234408c5 100644 --- a/tools/include/nolibc/stdio.h +++ b/tools/include/nolibc/stdio.h @@ -358,11 +358,11 @@ int __nolibc_printf(__nolibc_printf_cb cb, intptr_t state, size_t n, const char n -= w; while (width-- > w) { if (cb(state, " ", 1) != 0) - break; + return -1; written += 1; } if (cb(state, outstr, w) != 0) - break; + return -1; } written += len; diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index ef80861105f5..a297ee0d6d07 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -1662,6 +1662,28 @@ int test_strerror(void) return 0; } +static int test_printf_error(void) +{ + int fd, ret, saved_errno; + + fd = open("/dev/full", O_RDWR); + if (fd == -1) + return 1; + + errno = 0; + ret = dprintf(fd, "foo"); + saved_errno = errno; + close(fd); + + if (ret != -1) + return 2; + + if (saved_errno != ENOSPC) + return 3; + + return 0; +} + static int run_printf(int min, int max) { int test; @@ -1691,6 +1713,7 @@ static int run_printf(int min, int max) CASE_TEST(width_trunc); EXPECT_VFPRINTF(25, " ", "%25d", 1); break; CASE_TEST(scanf); EXPECT_ZR(1, test_scanf()); break; CASE_TEST(strerror); EXPECT_ZR(1, test_strerror()); break; + CASE_TEST(printf_error); EXPECT_ZR(1, test_printf_error()); break; case __LINE__: return ret; /* must be last */ /* note: do not set any defaults so as to permit holes above */ -- cgit v1.2.3 From d0a48dc4df5c986bf8c3caf4d8fc15c480273052 Mon Sep 17 00:00:00 2001 From: Terry Tritton Date: Fri, 4 Jul 2025 20:02:34 +0100 Subject: selftests/futex: Convert 32-bit timespec to 64-bit version for 32-bit compatibility mode sys_futex_wait() expects a struct __kernel_timespec pointer for the timeout, but the provided struct timespec pointer is of type struct old_timespec32 when compiled for 32-bit architectures, unless they use 64-bit timespecs already. Make it work for all variants by converting the provided timespec value into a local struct __kernel_timespec and provide a pointer to it to the syscall. This is a pointless operation for 64-bit, but this is not a hotpath operation, so keep it simple. This fix is based off [1] Originally-by: Wei Gao Signed-off-by: Terry Tritton Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250704190234.14230-1-terry.tritton@linaro.org Link: https://lore.kernel.org/all/20231203235117.29677-1-wegao@suse.com/ [1] --- tools/testing/selftests/futex/include/futex2test.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/futex/include/futex2test.h b/tools/testing/selftests/futex/include/futex2test.h index ea79662405bc..1f625b39948a 100644 --- a/tools/testing/selftests/futex/include/futex2test.h +++ b/tools/testing/selftests/futex/include/futex2test.h @@ -4,6 +4,7 @@ * * Copyright 2021 Collabora Ltd. */ +#include #include #define u64_to_ptr(x) ((void *)(uintptr_t)(x)) @@ -65,7 +66,12 @@ struct futex32_numa { static inline int futex_waitv(volatile struct futex_waitv *waiters, unsigned long nr_waiters, unsigned long flags, struct timespec *timo, clockid_t clockid) { - return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, timo, clockid); + struct __kernel_timespec ts = { + .tv_sec = timo->tv_sec, + .tv_nsec = timo->tv_nsec, + }; + + return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, &ts, clockid); } /* -- cgit v1.2.3 From 2d5c91e1cc14c3511d163ac36ee869ecf3a29cc2 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Fri, 4 Jul 2025 16:03:48 -0700 Subject: bpf: rdonly_untrusted_mem for btf id walk pointer leafs When processing a load from a PTR_TO_BTF_ID, the verifier calculates the type of the loaded structure field based on the load offset. For example, given the following types: struct foo { struct foo *a; int *b; } *p; The verifier would calculate the type of `p->a` as a pointer to `struct foo`. However, the type of `p->b` is currently calculated as a SCALAR_VALUE. This commit updates the logic for processing PTR_TO_BTF_ID to instead calculate the type of p->b as PTR_TO_MEM|MEM_RDONLY|PTR_UNTRUSTED. This change allows further dereferencing of such pointers (using probe memory instructions). Suggested-by: Alexei Starovoitov Acked-by: Kumar Kartikeya Dwivedi Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250704230354.1323244-3-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/btf.c | 6 ++++++ kernel/bpf/verifier.c | 5 +++++ tools/testing/selftests/bpf/prog_tests/linked_list.c | 2 +- 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 05fd64a371af..b3c8a95d38fb 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6915,6 +6915,7 @@ enum bpf_struct_walk_result { /* < 0 error */ WALK_SCALAR = 0, WALK_PTR, + WALK_PTR_UNTRUSTED, WALK_STRUCT, }; @@ -7156,6 +7157,8 @@ error: *field_name = mname; return WALK_PTR; } + + return WALK_PTR_UNTRUSTED; } /* Allow more flexible access within an int as long as @@ -7228,6 +7231,9 @@ int btf_struct_access(struct bpf_verifier_log *log, *next_btf_id = id; *flag = tmp_flag; return PTR_TO_BTF_ID; + case WALK_PTR_UNTRUSTED: + *flag = MEM_RDONLY | PTR_UNTRUSTED; + return PTR_TO_MEM; case WALK_SCALAR: return SCALAR_VALUE; case WALK_STRUCT: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9e8328f40b88..87ab00b40d9f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2814,6 +2814,11 @@ static int mark_btf_ld_reg(struct bpf_verifier_env *env, if (type_may_be_null(flag)) regs[regno].id = ++env->id_gen; return 0; + case PTR_TO_MEM: + mark_reg_known_zero(env, regs, regno); + regs[regno].type = PTR_TO_MEM | flag; + regs[regno].mem_size = 0; + return 0; default: verifier_bug(env, "unexpected reg_type %d in %s\n", reg_type, __func__); return -EFAULT; diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c index 5266c7022863..14c5a7ef0e87 100644 --- a/tools/testing/selftests/bpf/prog_tests/linked_list.c +++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c @@ -72,7 +72,7 @@ static struct { { "new_null_ret", "R0 invalid mem access 'ptr_or_null_'" }, { "obj_new_acq", "Unreleased reference id=" }, { "use_after_drop", "invalid mem access 'scalar'" }, - { "ptr_walk_scalar", "type=scalar expected=percpu_ptr_" }, + { "ptr_walk_scalar", "type=rdonly_untrusted_mem expected=percpu_ptr_" }, { "direct_read_lock", "direct access to bpf_spin_lock is disallowed" }, { "direct_write_lock", "direct access to bpf_spin_lock is disallowed" }, { "direct_read_head", "direct access to bpf_list_head is disallowed" }, -- cgit v1.2.3 From f1f5d6f25d098e21b0a53237cfd055fc28787c46 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Fri, 4 Jul 2025 16:03:49 -0700 Subject: selftests/bpf: ptr_to_btf_id struct walk ending with primitive pointer Validate that reading a PTR_TO_BTF_ID field produces a value of type PTR_TO_MEM|MEM_RDONLY|PTR_UNTRUSTED, if field is a pointer to a primitive type. Acked-by: Kumar Kartikeya Dwivedi Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250704230354.1323244-4-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/mem_rdonly_untrusted.c | 31 ++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c index 8185130ede95..4f94c971ae86 100644 --- a/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c +++ b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c @@ -5,6 +5,37 @@ #include "bpf_misc.h" #include "../test_kmods/bpf_testmod_kfunc.h" +SEC("tp_btf/sys_enter") +__success +__log_level(2) +__msg("r8 = *(u64 *)(r7 +0) ; R7_w=ptr_nameidata(off={{[0-9]+}}) R8_w=rdonly_untrusted_mem(sz=0)") +__msg("r9 = *(u8 *)(r8 +0) ; R8_w=rdonly_untrusted_mem(sz=0) R9_w=scalar") +int btf_id_to_ptr_mem(void *ctx) +{ + struct task_struct *task; + struct nameidata *idata; + u64 ret, off; + + task = bpf_get_current_task_btf(); + idata = task->nameidata; + off = bpf_core_field_offset(struct nameidata, pathname); + /* + * asm block to have reliable match target for __msg, equivalent of: + * ret = task->nameidata->pathname[0]; + */ + asm volatile ( + "r7 = %[idata];" + "r7 += %[off];" + "r8 = *(u64 *)(r7 + 0);" + "r9 = *(u8 *)(r8 + 0);" + "%[ret] = r9;" + : [ret]"=r"(ret) + : [idata]"r"(idata), + [off]"r"(off) + : "r7", "r8", "r9"); + return ret; +} + SEC("socket") __success __retval(0) -- cgit v1.2.3 From 54ac2c9418af2f115ca42419e6c6633b88f3d49d Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Fri, 4 Jul 2025 16:03:52 -0700 Subject: selftests/bpf: test cases for __arg_untrusted Check usage of __arg_untrusted parameters with PTR_TO_BTF_ID: - combining __arg_untrusted with other tags is forbidden; - non-kernel (program local) types for __arg_untrusted are forbidden; - passing of {trusted, untrusted, map value, scalar value, values with variable offset} to untrusted is ok; - passing of PTR_TO_BTF_ID with a different type to untrusted is ok; - passing of untrusted to trusted is forbidden. Acked-by: Kumar Kartikeya Dwivedi Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250704230354.1323244-7-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/verifier_global_ptr_args.c | 81 ++++++++++++++++++++++ 1 file changed, 81 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c index 4ab0ef18d7eb..4bd436a35826 100644 --- a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c +++ b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c @@ -179,4 +179,85 @@ int BPF_PROG(trusted_acq_rel, struct task_struct *task, u64 clone_flags) return subprog_trusted_acq_rel(task); } +__weak int subprog_untrusted_bad_tags(struct task_struct *task __arg_untrusted __arg_nullable) +{ + return task->pid; +} + +SEC("tp_btf/sys_enter") +__failure +__msg("arg#0 untrusted cannot be combined with any other tags") +int untrusted_bad_tags(void *ctx) +{ + return subprog_untrusted_bad_tags(0); +} + +struct local_type_wont_be_accepted {}; + +__weak int subprog_untrusted_bad_type(struct local_type_wont_be_accepted *p __arg_untrusted) +{ + return 0; +} + +SEC("tp_btf/sys_enter") +__failure +__msg("arg#0 reference type('STRUCT local_type_wont_be_accepted') has no matches") +int untrusted_bad_type(void *ctx) +{ + return subprog_untrusted_bad_type(bpf_rdonly_cast(0, 0)); +} + +__weak int subprog_untrusted(const volatile struct task_struct *restrict task __arg_untrusted) +{ + return task->pid; +} + +SEC("tp_btf/sys_enter") +__success +__log_level(2) +__msg("r1 = {{.*}}; {{.*}}R1_w=trusted_ptr_task_struct()") +__msg("Func#1 ('subprog_untrusted') is global and assumed valid.") +__msg("Validating subprog_untrusted() func#1...") +__msg(": R1=untrusted_ptr_task_struct") +int trusted_to_untrusted(void *ctx) +{ + return subprog_untrusted(bpf_get_current_task_btf()); +} + +char mem[16]; +u32 off; + +SEC("tp_btf/sys_enter") +__success +int anything_to_untrusted(void *ctx) +{ + /* untrusted to untrusted */ + subprog_untrusted(bpf_core_cast(0, struct task_struct)); + /* wrong type to untrusted */ + subprog_untrusted((void *)bpf_core_cast(0, struct bpf_verifier_env)); + /* map value to untrusted */ + subprog_untrusted((void *)mem); + /* scalar to untrusted */ + subprog_untrusted(0); + /* variable offset to untrusted (map) */ + subprog_untrusted((void *)mem + off); + /* variable offset to untrusted (trusted) */ + subprog_untrusted((void *)bpf_get_current_task_btf() + off); + return 0; +} + +__weak int subprog_untrusted2(struct task_struct *task __arg_untrusted) +{ + return subprog_trusted_task_nullable(task); +} + +SEC("tp_btf/sys_enter") +__failure +__msg("R1 type=untrusted_ptr_ expected=ptr_, trusted_ptr_, rcu_ptr_") +__msg("Caller passes invalid args into func#{{.*}} ('subprog_trusted_task_nullable')") +int untrusted_to_trusted(void *ctx) +{ + return subprog_untrusted2(bpf_get_current_task_btf()); +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 68cca81fd57fd9f5b8fd8da0dccd1d00522592f9 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Fri, 4 Jul 2025 16:03:54 -0700 Subject: selftests/bpf: tests for __arg_untrusted void * global func params Check usage of __arg_untrusted parameters of primitive type: - passing of {trusted, untrusted, map value, scalar value, values with variable offset} to untrusted `void *`, `char *` or enum is ok; - varifier represents such parameters as rdonly_untrusted_mem(sz=0). Acked-by: Kumar Kartikeya Dwivedi Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250704230354.1323244-9-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/verifier_global_ptr_args.c | 53 ++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c index 4bd436a35826..b346f669d159 100644 --- a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c +++ b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c @@ -260,4 +260,57 @@ int untrusted_to_trusted(void *ctx) return subprog_untrusted2(bpf_get_current_task_btf()); } +__weak int subprog_void_untrusted(void *p __arg_untrusted) +{ + return *(int *)p; +} + +__weak int subprog_char_untrusted(char *p __arg_untrusted) +{ + return *(int *)p; +} + +__weak int subprog_enum_untrusted(enum bpf_attach_type *p __arg_untrusted) +{ + return *(int *)p; +} + +__weak int subprog_enum64_untrusted(enum scx_public_consts *p __arg_untrusted) +{ + return *(int *)p; +} + +SEC("tp_btf/sys_enter") +__success +__log_level(2) +__msg("r1 = {{.*}}; {{.*}}R1_w=trusted_ptr_task_struct()") +__msg("Func#1 ('subprog_void_untrusted') is global and assumed valid.") +__msg("Validating subprog_void_untrusted() func#1...") +__msg(": R1=rdonly_untrusted_mem(sz=0)") +int trusted_to_untrusted_mem(void *ctx) +{ + return subprog_void_untrusted(bpf_get_current_task_btf()); +} + +SEC("tp_btf/sys_enter") +__success +int anything_to_untrusted_mem(void *ctx) +{ + /* untrusted to untrusted mem */ + subprog_void_untrusted(bpf_core_cast(0, struct task_struct)); + /* map value to untrusted mem */ + subprog_void_untrusted(mem); + /* scalar to untrusted mem */ + subprog_void_untrusted(0); + /* variable offset to untrusted mem (map) */ + subprog_void_untrusted((void *)mem + off); + /* variable offset to untrusted mem (trusted) */ + subprog_void_untrusted(bpf_get_current_task_btf() + off); + /* variable offset to untrusted char/enum/enum64 (map) */ + subprog_char_untrusted(mem + off); + subprog_enum_untrusted((void *)mem + off); + subprog_enum64_untrusted((void *)mem + off); + return 0; +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 0f626c98fd10330da0420029a1c4fa35e39fb873 Mon Sep 17 00:00:00 2001 From: Saket Kumar Bhaskar Date: Mon, 7 Jul 2025 12:47:35 +0530 Subject: selftests/bpf: Set CONFIG_PACKET=y for selftests BPF selftest fails to build with below error: CLNG-BPF [test_progs] lsm_cgroup.bpf.o progs/lsm_cgroup.c:105:21: error: variable has incomplete type 'struct sockaddr_ll' 105 | struct sockaddr_ll sa = {}; | ^ progs/lsm_cgroup.c:105:9: note: forward declaration of 'struct sockaddr_ll' 105 | struct sockaddr_ll sa = {}; | ^ 1 error generated. lsm_cgroup selftest requires sockaddr_ll structure which is not there in vmlinux.h when the kernel is built with CONFIG_PACKET=m. Enabling CONFIG_PACKET=y ensures that sockaddr_ll is available in vmlinux, allowing it to be captured in the generated vmlinux.h for bpf selftests. Reported-by: Sachin P Bappalige Signed-off-by: Saket Kumar Bhaskar Acked-by: Daniel Borkmann Link: https://lore.kernel.org/r/20250707071735.705137-1-skb99@linux.ibm.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/config | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index f74e1ea0ad3b..7247833fe623 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -105,6 +105,7 @@ CONFIG_IP_NF_IPTABLES=y CONFIG_IP6_NF_IPTABLES=y CONFIG_IP6_NF_FILTER=y CONFIG_NF_NAT=y +CONFIG_PACKET=y CONFIG_RC_CORE=y CONFIG_SECURITY=y CONFIG_SECURITYFS=y -- cgit v1.2.3 From 92974cef83b560175fc52acb53aa833cb1e93306 Mon Sep 17 00:00:00 2001 From: Luis Gerhorst Date: Sat, 5 Jul 2025 21:09:08 +0200 Subject: selftests/bpf: Add Spectre v4 tests Add the following tests: 1. A test with an (unimportant) ldimm64 (16 byte insn) and a Spectre-v4--induced nospec that clarifies and serves as a basic Spectre v4 test. 2. Make sure a Spectre v4 nospec_result does not prevent a Spectre v1 nospec from being added before the dangerous instruction (tests that [1] is fixed). 3. Combine the two, which is the combination that triggers the warning in [2]. This is because the unanalyzed stack write has nospec_result set, but the ldimm64 (which was just analyzed) had incremented insn_idx by 2. That violates the assertion that nospec_result is only used after insns that increment insn_idx by 1 (i.e., stack writes). [1] https://lore.kernel.org/bpf/4266fd5de04092aa4971cbef14f1b4b96961f432.camel@gmail.com/ [2] https://lore.kernel.org/bpf/685b3c1b.050a0220.2303ee.0010.GAE@google.com/ Signed-off-by: Luis Gerhorst Link: https://lore.kernel.org/r/20250705190908.1756862-3-luis.gerhorst@fau.de Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/bpf_misc.h | 4 + .../testing/selftests/bpf/progs/verifier_unpriv.c | 149 +++++++++++++++++++++ 2 files changed, 153 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 20dce508d8e0..530752ddde8e 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -237,4 +237,8 @@ #define SPEC_V1 #endif +#if defined(__TARGET_ARCH_x86) +#define SPEC_V4 +#endif + #endif diff --git a/tools/testing/selftests/bpf/progs/verifier_unpriv.c b/tools/testing/selftests/bpf/progs/verifier_unpriv.c index 4470541b5e71..28b4f7035ceb 100644 --- a/tools/testing/selftests/bpf/progs/verifier_unpriv.c +++ b/tools/testing/selftests/bpf/progs/verifier_unpriv.c @@ -801,4 +801,153 @@ l2_%=: \ : __clobber_all); } +SEC("socket") +__description("unpriv: ldimm64 before Spectre v4 barrier") +__success __success_unpriv +__retval(0) +#ifdef SPEC_V4 +__xlated_unpriv("r1 = 0x2020200005642020") /* should not matter */ +__xlated_unpriv("*(u64 *)(r10 -8) = r1") +__xlated_unpriv("nospec") +#endif +__naked void unpriv_ldimm64_spectre_v4(void) +{ + asm volatile (" \ + r1 = 0x2020200005642020 ll; \ + *(u64 *)(r10 -8) = r1; \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("unpriv: Spectre v1 and v4 barrier") +__success __success_unpriv +__retval(0) +#ifdef SPEC_V1 +#ifdef SPEC_V4 +/* starts with r0 == r8 == r9 == 0 */ +__xlated_unpriv("if r8 != 0x0 goto pc+1") +__xlated_unpriv("goto pc+2") +__xlated_unpriv("if r9 == 0x0 goto pc+4") +__xlated_unpriv("r2 = r0") +/* Following nospec required to prevent following dangerous `*(u64 *)(NOT_FP -64) + * = r1` iff `if r9 == 0 goto pc+4` was mispredicted because of Spectre v1. The + * test therefore ensures the Spectre-v4--induced nospec does not prevent the + * Spectre-v1--induced speculative path from being fully analyzed. + */ +__xlated_unpriv("nospec") /* Spectre v1 */ +__xlated_unpriv("*(u64 *)(r2 -64) = r1") /* could be used to leak r2 */ +__xlated_unpriv("nospec") /* Spectre v4 */ +#endif +#endif +__naked void unpriv_spectre_v1_and_v4(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + r8 = r0; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + r9 = r0; \ + r0 = r10; \ + r1 = 0; \ + r2 = r10; \ + if r8 != 0 goto l0_%=; \ + if r9 != 0 goto l0_%=; \ + r0 = 0; \ +l0_%=: if r8 != 0 goto l1_%=; \ + goto l2_%=; \ +l1_%=: if r9 == 0 goto l3_%=; \ + r2 = r0; \ +l2_%=: *(u64 *)(r2 -64) = r1; \ +l3_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") +__description("unpriv: Spectre v1 and v4 barrier (simple)") +__success __success_unpriv +__retval(0) +#ifdef SPEC_V1 +#ifdef SPEC_V4 +__xlated_unpriv("if r8 != 0x0 goto pc+1") +__xlated_unpriv("goto pc+2") +__xlated_unpriv("goto pc-1") /* if r9 == 0 goto l3_%= */ +__xlated_unpriv("goto pc-1") /* r2 = r0 */ +__xlated_unpriv("nospec") +__xlated_unpriv("*(u64 *)(r2 -64) = r1") +__xlated_unpriv("nospec") +#endif +#endif +__naked void unpriv_spectre_v1_and_v4_simple(void) +{ + asm volatile (" \ + r8 = 0; \ + r9 = 0; \ + r0 = r10; \ + r1 = 0; \ + r2 = r10; \ + if r8 != 0 goto l0_%=; \ + if r9 != 0 goto l0_%=; \ + r0 = 0; \ +l0_%=: if r8 != 0 goto l1_%=; \ + goto l2_%=; \ +l1_%=: if r9 == 0 goto l3_%=; \ + r2 = r0; \ +l2_%=: *(u64 *)(r2 -64) = r1; \ +l3_%=: r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("unpriv: ldimm64 before Spectre v1 and v4 barrier (simple)") +__success __success_unpriv +__retval(0) +#ifdef SPEC_V1 +#ifdef SPEC_V4 +__xlated_unpriv("if r8 != 0x0 goto pc+1") +__xlated_unpriv("goto pc+4") +__xlated_unpriv("goto pc-1") /* if r9 == 0 goto l3_%= */ +__xlated_unpriv("goto pc-1") /* r2 = r0 */ +__xlated_unpriv("goto pc-1") /* r1 = 0x2020200005642020 ll */ +__xlated_unpriv("goto pc-1") /* second part of ldimm64 */ +__xlated_unpriv("nospec") +__xlated_unpriv("*(u64 *)(r2 -64) = r1") +__xlated_unpriv("nospec") +#endif +#endif +__naked void unpriv_ldimm64_spectre_v1_and_v4_simple(void) +{ + asm volatile (" \ + r8 = 0; \ + r9 = 0; \ + r0 = r10; \ + r1 = 0; \ + r2 = r10; \ + if r8 != 0 goto l0_%=; \ + if r9 != 0 goto l0_%=; \ + r0 = 0; \ +l0_%=: if r8 != 0 goto l1_%=; \ + goto l2_%=; \ +l1_%=: if r9 == 0 goto l3_%=; \ + r2 = r0; \ + r1 = 0x2020200005642020 ll; \ +l2_%=: *(u64 *)(r2 -64) = r1; \ +l3_%=: r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 192e3aa145292bad69f1d702ab21755ae07dda40 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Mon, 7 Jul 2025 13:50:35 +0200 Subject: selftests/bpf: Negative test case for tail call map This patch adds a negative test case for the following verifier error. expected prog array map for tail call Acked-by: Yonghong Song Signed-off-by: Paul Chaignon Acked-by: Daniel Borkmann Link: https://lore.kernel.org/r/aGu0i1X_jII-3aFa@mail.gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/verifier.c | 2 ++ .../selftests/bpf/progs/verifier_tailcall.c | 31 ++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/verifier_tailcall.c (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index c9da06741104..77ec95d4ffaa 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -85,6 +85,7 @@ #include "verifier_store_release.skel.h" #include "verifier_subprog_precision.skel.h" #include "verifier_subreg.skel.h" +#include "verifier_tailcall.skel.h" #include "verifier_tailcall_jit.skel.h" #include "verifier_typedef.skel.h" #include "verifier_uninit.skel.h" @@ -219,6 +220,7 @@ void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); } void test_verifier_store_release(void) { RUN(verifier_store_release); } void test_verifier_subprog_precision(void) { RUN(verifier_subprog_precision); } void test_verifier_subreg(void) { RUN(verifier_subreg); } +void test_verifier_tailcall(void) { RUN(verifier_tailcall); } void test_verifier_tailcall_jit(void) { RUN(verifier_tailcall_jit); } void test_verifier_typedef(void) { RUN(verifier_typedef); } void test_verifier_uninit(void) { RUN(verifier_uninit); } diff --git a/tools/testing/selftests/bpf/progs/verifier_tailcall.c b/tools/testing/selftests/bpf/progs/verifier_tailcall.c new file mode 100644 index 000000000000..b4acce60fb9b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_tailcall.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} map_array SEC(".maps"); + +SEC("socket") +__description("invalid map type for tail call") +__failure __msg("expected prog array map for tail call") +__failure_unpriv +__naked void invalid_map_for_tail_call(void) +{ + asm volatile (" \ + r2 = %[map_array] ll; \ + r3 = 0; \ + call %[bpf_tail_call]; \ + exit; \ +" : + : __imm(bpf_tail_call), + __imm_addr(map_array) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 95d32c7ee05001ea6ecd0d9c415dd00599a7dc56 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Thu, 3 Jul 2025 17:17:46 -0600 Subject: selftests: print installation complete message Add installation complete message to Makefile install logic. Link: https://lore.kernel.org/r/20250703231747.37544-1-skhan@linuxfoundation.org Signed-off-by: Shuah Khan --- tools/testing/selftests/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 9dae84a74e7f..b95de208265a 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -300,6 +300,7 @@ ifdef INSTALL_PATH else \ printf "Unable to get version from git describe\n"; \ fi + @echo "**Kselftest Installation is complete: $(INSTALL_PATH)**" else $(error Error: set INSTALL_PATH to use install) endif -- cgit v1.2.3 From a089bb2822a49b0c5777a8936f82c1f8629231fb Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Thu, 3 Jul 2025 13:26:43 +0900 Subject: selftests: tracing: Use mutex_unlock for testing glob filter Since commit c5b6ababd21a ("locking/mutex: implement mutex_trylock_nested") makes mutex_trylock() as an inlined function if CONFIG_DEBUG_LOCK_ALLOC=y, we can not use mutex_trylock() for testing the glob filter of ftrace. Use mutex_unlock instead. Link: https://lore.kernel.org/r/175151680309.2149615.9795104805153538717.stgit@mhiramat.tok.corp.google.com Signed-off-by: Masami Hiramatsu (Google) Acked-by: Steven Rostedt (Google) Signed-off-by: Shuah Khan --- tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc index 4b994b6df5ac..ed81eaf2afd6 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc @@ -29,7 +29,7 @@ ftrace_filter_check 'schedule*' '^schedule.*$' ftrace_filter_check '*pin*lock' '.*pin.*lock$' # filter by start*mid* -ftrace_filter_check 'mutex*try*' '^mutex.*try.*' +ftrace_filter_check 'mutex*unl*' '^mutex.*unl.*' # Advanced full-glob matching feature is recently supported. # Skip the tests if we are sure the kernel does not support it. -- cgit v1.2.3 From 3dc6c76391cbe5ec7f87531d992beaf7dccc6ff4 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 2 Jul 2025 03:06:39 -0700 Subject: selftests: net: Add IPv6 support to netconsole basic tests Add IPv6 support to the netconsole basic functionality tests by: - Introducing separate IPv4 and IPv6 address variables (SRCIP4/SRCIP6, DSTIP4/DSTIP6) to replace the single SRCIP/DSTIP variables - Adding select_ipv4_or_ipv6() function to choose protocol version - Updating socat configuration to use UDP6-LISTEN for IPv6 tests - Adding wait_for_port() wrapper to handle protocol-specific port waiting - Expanding test matrix to run both basic and extended formats against both IPv4 and IPv6 protocols - Improving cleanup to kill any remaining socat processes - Adding sleep delays for better IPv6 packet handling reliability The test now validates netconsole functionality across both IP versions, improving test coverage for dual-stack network environments. This test would avoid the regression fixed by commit f59902070269 ("net: netpoll: Initialize UDP checksum field before checksumming") Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250702-netpoll_untagle_ip-v2-7-13cf3db24e2b@debian.org Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/lib/sh/lib_netcons.sh | 76 ++++++++++++++++++++-- .../testing/selftests/drivers/net/netcons_basic.sh | 53 ++++++++------- 2 files changed, 99 insertions(+), 30 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index 3fcf85a34596..258af805497b 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -11,9 +11,11 @@ set -euo pipefail LIBDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") SRCIF="" # to be populated later -SRCIP=192.0.2.1 +SRCIP4="192.0.2.1" +SRCIP6="fc00::1" DSTIF="" # to be populated later -DSTIP=192.0.2.2 +DSTIP4="192.0.2.2" +DSTIP6="fc00::2" PORT="6666" MSG="netconsole selftest" @@ -80,7 +82,23 @@ function configure_ip() { ip link set "${SRCIF}" up } +function select_ipv4_or_ipv6() +{ + local VERSION=${1} + + if [[ "$VERSION" == "ipv6" ]] + then + DSTIP="${DSTIP6}" + SRCIP="${SRCIP6}" + else + DSTIP="${DSTIP4}" + SRCIP="${SRCIP4}" + fi +} + function set_network() { + local IP_VERSION=${1:-"ipv4"} + # setup_ns function is coming from lib.sh setup_ns NAMESPACE @@ -91,6 +109,7 @@ function set_network() { # Link both interfaces back to back link_ifaces + select_ipv4_or_ipv6 "${IP_VERSION}" configure_ip } @@ -119,6 +138,11 @@ function create_dynamic_target() { fi echo 1 > "${NETCONS_PATH}"/enabled + + # This will make sure that the kernel was able to + # load the netconsole driver configuration. The console message + # gets more organized/sequential as well. + sleep 1 } # Generate the command line argument for netconsole following: @@ -179,9 +203,18 @@ function set_user_data() { function listen_port_and_save_to() { local OUTPUT=${1} + local IPVERSION=${2:-"ipv4"} + + if [ "${IPVERSION}" == "ipv4" ] + then + SOCAT_MODE="UDP-LISTEN" + else + SOCAT_MODE="UDP6-LISTEN" + fi + # Just wait for 2 seconds timeout 2 ip netns exec "${NAMESPACE}" \ - socat UDP-LISTEN:"${PORT}",fork "${OUTPUT}" + socat "${SOCAT_MODE}":"${PORT}",fork "${OUTPUT}" } # Only validate that the message arrived properly @@ -263,8 +296,15 @@ function check_for_dependencies() { exit "${ksft_skip}" fi - if ip addr list | grep -E "inet.*(${SRCIP}|${DSTIP})" 2> /dev/null; then - echo "SKIP: IPs already in use. Skipping it" >&2 + REGEXP4="inet.*(${SRCIP4}|${DSTIP4})" + REGEXP6="inet.*(${SRCIP6}|${DSTIP6})" + if ip addr list | grep -E "${REGEXP4}" 2> /dev/null; then + echo "SKIP: IPv4s already in use. Skipping it" >&2 + exit "${ksft_skip}" + fi + + if ip addr list | grep -E "${REGEXP6}" 2> /dev/null; then + echo "SKIP: IPv6s already in use. Skipping it" >&2 exit "${ksft_skip}" fi } @@ -278,11 +318,13 @@ function check_for_taskset() { # This is necessary if running multiple tests in a row function pkill_socat() { - PROCESS_NAME="socat UDP-LISTEN:6666,fork ${OUTPUT_FILE}" + PROCESS_NAME4="socat UDP-LISTEN:6666,fork ${OUTPUT_FILE}" + PROCESS_NAME6="socat UDP6-LISTEN:6666,fork ${OUTPUT_FILE}" # socat runs under timeout(1), kill it if it is still alive # do not fail if socat doesn't exist anymore set +e - pkill -f "${PROCESS_NAME}" + pkill -f "${PROCESS_NAME4}" + pkill -f "${PROCESS_NAME6}" set -e } @@ -294,3 +336,23 @@ function check_netconsole_module() { exit "${ksft_skip}" fi } + +# A wrapper to translate protocol version to udp version +function wait_for_port() { + local NAMESPACE=${1} + local PORT=${2} + IP_VERSION=${3} + + if [ "${IP_VERSION}" == "ipv6" ] + then + PROTOCOL="udp6" + else + PROTOCOL="udp" + fi + + wait_local_port_listen "${NAMESPACE}" "${PORT}" "${PROTOCOL}" + # even after the port is open, let's wait 1 second before writing + # otherwise the packet could be missed, and the test will fail. Happens + # more frequently on IPv6 + sleep 1 +} diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh index 40a6ac6191b8..a3446b569976 100755 --- a/tools/testing/selftests/drivers/net/netcons_basic.sh +++ b/tools/testing/selftests/drivers/net/netcons_basic.sh @@ -36,30 +36,37 @@ trap cleanup EXIT # Run the test twice, with different format modes for FORMAT in "basic" "extended" do - echo "Running with target mode: ${FORMAT}" - # Create one namespace and two interfaces - set_network - # Create a dynamic target for netconsole - create_dynamic_target "${FORMAT}" - # Only set userdata for extended format - if [ "$FORMAT" == "extended" ] - then - # Set userdata "key" with the "value" value - set_user_data - fi - # Listed for netconsole port inside the namespace and destination interface - listen_port_and_save_to "${OUTPUT_FILE}" & - # Wait for socat to start and listen to the port. - wait_local_port_listen "${NAMESPACE}" "${PORT}" udp - # Send the message - echo "${MSG}: ${TARGET}" > /dev/kmsg - # Wait until socat saves the file to disk - busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" + for IP_VERSION in "ipv6" "ipv4" + do + echo "Running with target mode: ${FORMAT} (${IP_VERSION})" + # Create one namespace and two interfaces + set_network "${IP_VERSION}" + # Create a dynamic target for netconsole + create_dynamic_target "${FORMAT}" + # Only set userdata for extended format + if [ "$FORMAT" == "extended" ] + then + # Set userdata "key" with the "value" value + set_user_data + fi + # Listed for netconsole port inside the namespace and + # destination interface + listen_port_and_save_to "${OUTPUT_FILE}" "${IP_VERSION}" & + # Wait for socat to start and listen to the port. + wait_for_port "${NAMESPACE}" "${PORT}" "${IP_VERSION}" + # Send the message + echo "${MSG}: ${TARGET}" > /dev/kmsg + # Wait until socat saves the file to disk + busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" - # Make sure the message was received in the dst part - # and exit - validate_result "${OUTPUT_FILE}" "${FORMAT}" - cleanup + # Make sure the message was received in the dst part + # and exit + validate_result "${OUTPUT_FILE}" "${FORMAT}" + # kill socat in case it is still running + pkill_socat + cleanup + echo "${FORMAT} : ${IP_VERSION} : Test passed" >&2 + done done trap - EXIT -- cgit v1.2.3 From f6f6be0c4faf0e0faa3ed5253458186c2d598226 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 20 Jun 2025 13:00:28 +0200 Subject: tools/nolibc: drop s390 clang target override MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tools/scripts/Makefile.include now has the same override, removing the need for the one in the nolibc Makefile. Drop the superfluous custom override. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20250620-tools-cross-s390-v2-2-ecda886e00e5@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/Makefile | 3 --- tools/testing/selftests/nolibc/Makefile.nolibc | 3 --- 2 files changed, 6 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index 3fcee9fe4ece..143c2d2c2ba6 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -107,9 +107,6 @@ headers_standalone: headers $(Q)$(MAKE) -C $(srctree) headers $(Q)$(MAKE) -C $(srctree) headers_install INSTALL_HDR_PATH=$(OUTPUT)sysroot -# GCC uses "s390", clang "systemz" -CLANG_CROSS_FLAGS := $(subst --target=s390-linux,--target=systemz-linux,$(CLANG_CROSS_FLAGS)) - headers_check: headers_standalone $(Q)for header in $(filter-out crt.h std.h,$(all_files)); do \ $(CC) $(CLANG_CROSS_FLAGS) -Wall -Werror -nostdinc -fsyntax-only -x c /dev/null \ diff --git a/tools/testing/selftests/nolibc/Makefile.nolibc b/tools/testing/selftests/nolibc/Makefile.nolibc index cc5ee6c67586..8cb65392fa10 100644 --- a/tools/testing/selftests/nolibc/Makefile.nolibc +++ b/tools/testing/selftests/nolibc/Makefile.nolibc @@ -244,9 +244,6 @@ endif # Modify CFLAGS based on LLVM= include $(srctree)/tools/scripts/Makefile.include -# GCC uses "s390", clang "systemz" -CLANG_CROSS_FLAGS := $(subst --target=s390-linux,--target=systemz-linux,$(CLANG_CROSS_FLAGS)) - REPORT ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{if (!f) printf("\n"); f++; print;} /\[SKIPPED\][\r]*$$/{s++} \ END{ printf("\n%3d test(s): %3d passed, %3d skipped, %3d failed => status: ", p+s+f, p, s, f); \ if (f || !p) printf("failure\n"); else if (s) printf("warning\n"); else printf("success\n");; \ -- cgit v1.2.3 From 2858ea3083f088a76b439f5b88b6d4f032dabc0c Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 8 Jul 2025 10:25:29 -0700 Subject: KVM: arm64: selftests: Add basic SError injection test Add tests for SError injection considering KVM is more directly involved in delivery: - Pending SErrors are taken at the first CSE after SErrors are unmasked - Pending SErrors aren't taken and remain pending if SErrors are masked - Unmasked SErrors are taken immediately when injected (implementation detail) Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20250708172532.1699409-25-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- tools/testing/selftests/kvm/Makefile.kvm | 2 +- .../testing/selftests/kvm/arm64/external_aborts.c | 262 +++++++++++++++++++++ tools/testing/selftests/kvm/arm64/mmio_abort.c | 159 ------------- .../selftests/kvm/include/arm64/processor.h | 10 + 4 files changed, 273 insertions(+), 160 deletions(-) create mode 100644 tools/testing/selftests/kvm/arm64/external_aborts.c delete mode 100644 tools/testing/selftests/kvm/arm64/mmio_abort.c (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index 38b95998e1e6..ce817a975e50 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -156,7 +156,7 @@ TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases TEST_GEN_PROGS_arm64 += arm64/debug-exceptions TEST_GEN_PROGS_arm64 += arm64/host_sve TEST_GEN_PROGS_arm64 += arm64/hypercalls -TEST_GEN_PROGS_arm64 += arm64/mmio_abort +TEST_GEN_PROGS_arm64 += arm64/external_aborts TEST_GEN_PROGS_arm64 += arm64/page_fault_test TEST_GEN_PROGS_arm64 += arm64/psci_test TEST_GEN_PROGS_arm64 += arm64/set_id_regs diff --git a/tools/testing/selftests/kvm/arm64/external_aborts.c b/tools/testing/selftests/kvm/arm64/external_aborts.c new file mode 100644 index 000000000000..f49c98bda60e --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/external_aborts.c @@ -0,0 +1,262 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * external_abort - Tests for userspace external abort injection + * + * Copyright (c) 2024 Google LLC + */ +#include "processor.h" +#include "test_util.h" + +#define MMIO_ADDR 0x8000000ULL + +static u64 expected_abort_pc; + +static void expect_sea_handler(struct ex_regs *regs) +{ + u64 esr = read_sysreg(esr_el1); + + GUEST_ASSERT_EQ(regs->pc, expected_abort_pc); + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR); + GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT); + + GUEST_DONE(); +} + +static void unexpected_dabt_handler(struct ex_regs *regs) +{ + GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc); +} + +static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code, + handler_fn dabt_handler) +{ + struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(*vcpu); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler); + + virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1); + + return vm; +} + +static void vcpu_inject_sea(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_events events = {}; + + events.exception.ext_dabt_pending = true; + vcpu_events_set(vcpu, &events); +} + +static void vcpu_inject_serror(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_events events = {}; + + events.exception.serror_pending = true; + vcpu_events_set(vcpu, &events); +} + +static void __vcpu_run_expect(struct kvm_vcpu *vcpu, unsigned int cmd) +{ + struct ucall uc; + + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + if (uc.cmd == cmd) + return; + + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } +} + +static void vcpu_run_expect_done(struct kvm_vcpu *vcpu) +{ + __vcpu_run_expect(vcpu, UCALL_DONE); +} + +static void vcpu_run_expect_sync(struct kvm_vcpu *vcpu) +{ + __vcpu_run_expect(vcpu, UCALL_SYNC); +} + +extern char test_mmio_abort_insn; + +static void test_mmio_abort_guest(void) +{ + WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn); + + asm volatile("test_mmio_abort_insn:\n\t" + "ldr x0, [%0]\n\t" + : : "r" (MMIO_ADDR) : "x0", "memory"); + + GUEST_FAIL("MMIO instruction should not retire"); +} + +/* + * Test that KVM doesn't complete MMIO emulation when userspace has made an + * external abort pending for the instruction. + */ +static void test_mmio_abort(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest, + expect_sea_handler); + struct kvm_run *run = vcpu->run; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO); + TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR); + TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long)); + TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read"); + + vcpu_inject_sea(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +extern char test_mmio_nisv_insn; + +static void test_mmio_nisv_guest(void) +{ + WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn); + + asm volatile("test_mmio_nisv_insn:\n\t" + "ldr x0, [%0], #8\n\t" + : : "r" (MMIO_ADDR) : "x0", "memory"); + + GUEST_FAIL("MMIO instruction should not retire"); +} + +/* + * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace + * hasn't enabled KVM_CAP_ARM_NISV_TO_USER. + */ +static void test_mmio_nisv(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, + unexpected_dabt_handler); + + TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN"); + TEST_ASSERT_EQ(errno, ENOSYS); + + kvm_vm_free(vm); +} + +/* + * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA + * reaches the guest. + */ +static void test_mmio_nisv_abort(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, + expect_sea_handler); + struct kvm_run *run = vcpu->run; + + vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV); + TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR); + + vcpu_inject_sea(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void unexpected_serror_handler(struct ex_regs *regs) +{ + GUEST_FAIL("Took unexpected SError exception"); +} + +static void test_serror_masked_guest(void) +{ + GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A); + + isb(); + + GUEST_DONE(); +} + +static void test_serror_masked(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_masked_guest, + unexpected_dabt_handler); + + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, unexpected_serror_handler); + + vcpu_inject_serror(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void expect_serror_handler(struct ex_regs *regs) +{ + GUEST_DONE(); +} + +static void test_serror_guest(void) +{ + GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A); + + local_serror_enable(); + isb(); + local_serror_disable(); + + GUEST_FAIL("Should've taken pending SError exception"); +} + +static void test_serror(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_guest, + unexpected_dabt_handler); + + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler); + + vcpu_inject_serror(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void test_serror_emulated_guest(void) +{ + GUEST_ASSERT(!(read_sysreg(isr_el1) & ISR_EL1_A)); + + local_serror_enable(); + GUEST_SYNC(0); + local_serror_disable(); + + GUEST_FAIL("Should've taken unmasked SError exception"); +} + +static void test_serror_emulated(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_emulated_guest, + unexpected_dabt_handler); + + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler); + + vcpu_run_expect_sync(vcpu); + vcpu_inject_serror(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +int main(void) +{ + test_mmio_abort(); + test_mmio_nisv(); + test_mmio_nisv_abort(); + test_serror(); + test_serror_masked(); + test_serror_emulated(); +} diff --git a/tools/testing/selftests/kvm/arm64/mmio_abort.c b/tools/testing/selftests/kvm/arm64/mmio_abort.c deleted file mode 100644 index 8b7a80a51b1c..000000000000 --- a/tools/testing/selftests/kvm/arm64/mmio_abort.c +++ /dev/null @@ -1,159 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * mmio_abort - Tests for userspace MMIO abort injection - * - * Copyright (c) 2024 Google LLC - */ -#include "processor.h" -#include "test_util.h" - -#define MMIO_ADDR 0x8000000ULL - -static u64 expected_abort_pc; - -static void expect_sea_handler(struct ex_regs *regs) -{ - u64 esr = read_sysreg(esr_el1); - - GUEST_ASSERT_EQ(regs->pc, expected_abort_pc); - GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR); - GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT); - - GUEST_DONE(); -} - -static void unexpected_dabt_handler(struct ex_regs *regs) -{ - GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc); -} - -static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code, - handler_fn dabt_handler) -{ - struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code); - - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(*vcpu); - vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler); - - virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1); - - return vm; -} - -static void vcpu_inject_extabt(struct kvm_vcpu *vcpu) -{ - struct kvm_vcpu_events events = {}; - - events.exception.ext_dabt_pending = true; - vcpu_events_set(vcpu, &events); -} - -static void vcpu_run_expect_done(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - vcpu_run(vcpu); - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - case UCALL_DONE: - break; - default: - TEST_FAIL("Unexpected ucall: %lu", uc.cmd); - } -} - -extern char test_mmio_abort_insn; - -static void test_mmio_abort_guest(void) -{ - WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn); - - asm volatile("test_mmio_abort_insn:\n\t" - "ldr x0, [%0]\n\t" - : : "r" (MMIO_ADDR) : "x0", "memory"); - - GUEST_FAIL("MMIO instruction should not retire"); -} - -/* - * Test that KVM doesn't complete MMIO emulation when userspace has made an - * external abort pending for the instruction. - */ -static void test_mmio_abort(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest, - expect_sea_handler); - struct kvm_run *run = vcpu->run; - - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO); - TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR); - TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long)); - TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read"); - - vcpu_inject_extabt(vcpu); - vcpu_run_expect_done(vcpu); - kvm_vm_free(vm); -} - -extern char test_mmio_nisv_insn; - -static void test_mmio_nisv_guest(void) -{ - WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn); - - asm volatile("test_mmio_nisv_insn:\n\t" - "ldr x0, [%0], #8\n\t" - : : "r" (MMIO_ADDR) : "x0", "memory"); - - GUEST_FAIL("MMIO instruction should not retire"); -} - -/* - * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace - * hasn't enabled KVM_CAP_ARM_NISV_TO_USER. - */ -static void test_mmio_nisv(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, - unexpected_dabt_handler); - - TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN"); - TEST_ASSERT_EQ(errno, ENOSYS); - - kvm_vm_free(vm); -} - -/* - * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA - * reaches the guest. - */ -static void test_mmio_nisv_abort(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, - expect_sea_handler); - struct kvm_run *run = vcpu->run; - - vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1); - - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV); - TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR); - - vcpu_inject_extabt(vcpu); - vcpu_run_expect_done(vcpu); - kvm_vm_free(vm); -} - -int main(void) -{ - test_mmio_abort(); - test_mmio_nisv(); - test_mmio_nisv_abort(); -} diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h index b0fc0f945766..255fed769a8a 100644 --- a/tools/testing/selftests/kvm/include/arm64/processor.h +++ b/tools/testing/selftests/kvm/include/arm64/processor.h @@ -254,6 +254,16 @@ static inline void local_irq_disable(void) asm volatile("msr daifset, #3" : : : "memory"); } +static inline void local_serror_enable(void) +{ + asm volatile("msr daifclr, #4" : : : "memory"); +} + +static inline void local_serror_disable(void) +{ + asm volatile("msr daifset, #4" : : : "memory"); +} + /** * struct arm_smccc_res - Result from SMC/HVC call * @a0-a3 result values from registers 0 to 3 -- cgit v1.2.3 From a90aac55324920de988cc447f4227c04ad769351 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 8 Jul 2025 10:25:30 -0700 Subject: KVM: arm64: selftests: Test SEAs are taken to SError vector when EASE=1 Ensure KVM routes SEAs to the correct vector depending on SCTLR2_EL1.EASE. Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20250708172532.1699409-26-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- .../testing/selftests/kvm/arm64/external_aborts.c | 42 +++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/arm64/external_aborts.c b/tools/testing/selftests/kvm/arm64/external_aborts.c index f49c98bda60e..ec7e30776a0e 100644 --- a/tools/testing/selftests/kvm/arm64/external_aborts.c +++ b/tools/testing/selftests/kvm/arm64/external_aborts.c @@ -86,7 +86,7 @@ static void vcpu_run_expect_sync(struct kvm_vcpu *vcpu) extern char test_mmio_abort_insn; -static void test_mmio_abort_guest(void) +static noinline void test_mmio_abort_guest(void) { WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn); @@ -251,6 +251,45 @@ static void test_serror_emulated(void) kvm_vm_free(vm); } +static void test_mmio_ease_guest(void) +{ + sysreg_clear_set_s(SYS_SCTLR2_EL1, 0, SCTLR2_EL1_EASE); + isb(); + + test_mmio_abort_guest(); +} + +/* + * Test that KVM doesn't complete MMIO emulation when userspace has made an + * external abort pending for the instruction. + */ +static void test_mmio_ease(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_ease_guest, + unexpected_dabt_handler); + struct kvm_run *run = vcpu->run; + u64 pfr1; + + pfr1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); + if (!SYS_FIELD_GET(ID_AA64PFR1_EL1, DF2, pfr1)) { + pr_debug("Skipping %s\n", __func__); + return; + } + + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO); + TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR); + TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long)); + TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read"); + + vcpu_inject_sea(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + int main(void) { test_mmio_abort(); @@ -259,4 +298,5 @@ int main(void) test_serror(); test_serror_masked(); test_serror_emulated(); + test_mmio_ease(); } -- cgit v1.2.3 From 55ea75f5b27381d4b23e5617de80860a6e7c0477 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 8 Jul 2025 10:25:31 -0700 Subject: KVM: arm64: selftests: Add SCTLR2_EL1 to get-reg-list Handle SCTLR2_EL1 specially as it is only visible to userspace when FEAT_SCTLR2 is implemented for the VM. Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20250708172532.1699409-27-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- tools/testing/selftests/kvm/arm64/get-reg-list.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c index d01798b6b3b4..684c52b1b881 100644 --- a/tools/testing/selftests/kvm/arm64/get-reg-list.c +++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c @@ -52,6 +52,12 @@ static struct feature_id_reg feat_id_regs[] = { ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ 16, 1 + }, + { + KVM_ARM64_SYS_REG(SYS_SCTLR2_EL1), + KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR3_EL1), + ID_AA64MMFR3_EL1_SCTLRX_SHIFT, + ID_AA64MMFR3_EL1_SCTLRX_IMP } }; @@ -469,6 +475,7 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 0, 1, 0, 0), /* SCTLR_EL1 */ ARM64_SYS_REG(3, 0, 1, 0, 1), /* ACTLR_EL1 */ ARM64_SYS_REG(3, 0, 1, 0, 2), /* CPACR_EL1 */ + KVM_ARM64_SYS_REG(SYS_SCTLR2_EL1), ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */ ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */ ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */ -- cgit v1.2.3 From 0b593ef12afce0124612d90cd1768a64d3f27a65 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 8 Jul 2025 10:25:32 -0700 Subject: KVM: arm64: selftests: Catch up set_id_regs with the kernel Add test coverage for ID_AA64MMFR3_EL1 and the recently added FEAT_DoubleFault2. Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20250708172532.1699409-28-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- tools/testing/selftests/kvm/arm64/set_id_regs.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index 8f422bfdfcb9..d3bf9204409c 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -139,6 +139,7 @@ static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = { }; static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, DF2, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, CSV2_frac, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, SSBS, ID_AA64PFR1_EL1_SSBS_NI), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, BT, 0), @@ -187,6 +188,14 @@ static const struct reg_ftr_bits ftr_id_aa64mmfr2_el1[] = { REG_FTR_END, }; +static const struct reg_ftr_bits ftr_id_aa64mmfr3_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, S1POE, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, S1PIE, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, SCTLRX, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, TCRX, 0), + REG_FTR_END, +}; + static const struct reg_ftr_bits ftr_id_aa64zfr0_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F64MM, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F32MM, 0), @@ -217,6 +226,7 @@ static struct test_feature_reg test_regs[] = { TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1), TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1), TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1), + TEST_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3_el1), TEST_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0_el1), }; @@ -774,8 +784,8 @@ int main(void) ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) + ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + - ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 3 + - MPAM_IDREG_TEST + MTE_IDREG_TEST; + ARRAY_SIZE(ftr_id_aa64mmfr3_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) - + ARRAY_SIZE(test_regs) + 3 + MPAM_IDREG_TEST + MTE_IDREG_TEST; ksft_set_plan(test_cnt); -- cgit v1.2.3 From e0f60ba041a0088a48a5064583e8c36306a8f7e3 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 2 Jul 2025 22:35:19 +0000 Subject: selftest: af_unix: Add test for SO_INQ. Let's add a simple test to check the basic functionality of SO_INQ. The test does the following: 1. Create socketpair in self->fd[] 2. Enable SO_INQ 3. Send data via self->fd[0] 4. Receive data from self->fd[1] 5. Compare the SCM_INQ cmsg with ioctl(SIOCINQ) Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250702223606.1054680-8-kuniyu@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/.gitignore | 1 + tools/testing/selftests/net/af_unix/Makefile | 2 +- tools/testing/selftests/net/af_unix/scm_inq.c | 125 ++++++++++++++++++++++++++ 3 files changed, 127 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/net/af_unix/scm_inq.c (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index c6dd2a335cf4..47c293c2962f 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -34,6 +34,7 @@ reuseport_bpf_numa reuseport_dualstack rxtimestamp sctp_hello +scm_inq scm_pidfd scm_rights sk_bind_sendto_listen diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index 50584479540b..a4b61c6d0290 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,4 +1,4 @@ CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := diag_uid msg_oob scm_pidfd scm_rights unix_connect +TEST_GEN_PROGS := diag_uid msg_oob scm_inq scm_pidfd scm_rights unix_connect include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/scm_inq.c b/tools/testing/selftests/net/af_unix/scm_inq.c new file mode 100644 index 000000000000..9d22561e7b8f --- /dev/null +++ b/tools/testing/selftests/net/af_unix/scm_inq.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2025 Google LLC */ + +#include +#include +#include +#include + +#include "../../kselftest_harness.h" + +#define NR_CHUNKS 100 +#define MSG_LEN 256 + +struct scm_inq { + struct cmsghdr cmsghdr; + int inq; +}; + +FIXTURE(scm_inq) +{ + int fd[2]; +}; + +FIXTURE_VARIANT(scm_inq) +{ + int type; +}; + +FIXTURE_VARIANT_ADD(scm_inq, stream) +{ + .type = SOCK_STREAM, +}; + +FIXTURE_VARIANT_ADD(scm_inq, dgram) +{ + .type = SOCK_DGRAM, +}; + +FIXTURE_VARIANT_ADD(scm_inq, seqpacket) +{ + .type = SOCK_SEQPACKET, +}; + +FIXTURE_SETUP(scm_inq) +{ + int err; + + err = socketpair(AF_UNIX, variant->type | SOCK_NONBLOCK, 0, self->fd); + ASSERT_EQ(0, err); +} + +FIXTURE_TEARDOWN(scm_inq) +{ + close(self->fd[0]); + close(self->fd[1]); +} + +static void send_chunks(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_inq) *self) +{ + char buf[MSG_LEN] = {}; + int i, ret; + + for (i = 0; i < NR_CHUNKS; i++) { + ret = send(self->fd[0], buf, sizeof(buf), 0); + ASSERT_EQ(sizeof(buf), ret); + } +} + +static void recv_chunks(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_inq) *self) +{ + struct msghdr msg = {}; + struct iovec iov = {}; + struct scm_inq cmsg; + char buf[MSG_LEN]; + int i, ret; + int inq; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = &cmsg; + msg.msg_controllen = CMSG_SPACE(sizeof(cmsg.inq)); + + iov.iov_base = buf; + iov.iov_len = sizeof(buf); + + for (i = 0; i < NR_CHUNKS; i++) { + memset(buf, 0, sizeof(buf)); + memset(&cmsg, 0, sizeof(cmsg)); + + ret = recvmsg(self->fd[1], &msg, 0); + ASSERT_EQ(MSG_LEN, ret); + ASSERT_NE(NULL, CMSG_FIRSTHDR(&msg)); + ASSERT_EQ(CMSG_LEN(sizeof(cmsg.inq)), cmsg.cmsghdr.cmsg_len); + ASSERT_EQ(SOL_SOCKET, cmsg.cmsghdr.cmsg_level); + ASSERT_EQ(SCM_INQ, cmsg.cmsghdr.cmsg_type); + + ret = ioctl(self->fd[1], SIOCINQ, &inq); + ASSERT_EQ(0, ret); + ASSERT_EQ(cmsg.inq, inq); + } +} + +TEST_F(scm_inq, basic) +{ + int err, inq; + + err = setsockopt(self->fd[1], SOL_SOCKET, SO_INQ, &(int){1}, sizeof(int)); + if (variant->type != SOCK_STREAM) { + ASSERT_EQ(-ENOPROTOOPT, -errno); + return; + } + + ASSERT_EQ(0, err); + + err = ioctl(self->fd[1], SIOCINQ, &inq); + ASSERT_EQ(0, err); + ASSERT_EQ(0, inq); + + send_chunks(_metadata, self); + recv_chunks(_metadata, self); +} + +TEST_HARNESS_MAIN -- cgit v1.2.3 From 680acde13ffd10ae4b95ed7d8f1633df38180462 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 3 Jul 2025 22:17:12 +0800 Subject: selftests/bpf: add a new test to check the consumer update case The subtest sends 33 packets at one time on purpose to see if xsk exitting __xsk_generic_xmit() updates the global consumer of tx queue when reaching the max loop (max_tx_budget, 32 by default). The number 33 can avoid xskq_cons_peek_desc() updates the consumer when it's about to quit sending, to accurately check if the issue that the first patch resolves remains. The new case will not check this issue in zero copy mode. Signed-off-by: Jason Xing Acked-by: Maciej Fijalkowski Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250703141712.33190-3-kerneljasonxing@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/bpf/xskxceiver.c | 56 +++++++++++++++++++++++++++++++- tools/testing/selftests/bpf/xskxceiver.h | 1 + 2 files changed, 56 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 0ced4026ee44..a29de0713f19 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -109,6 +109,8 @@ #include +#define MAX_TX_BUDGET_DEFAULT 32 + static bool opt_verbose; static bool opt_print_tests; static enum test_mode opt_mode = TEST_MODE_ALL; @@ -1091,11 +1093,45 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) return true; } +static u32 load_value(u32 *counter) +{ + return __atomic_load_n(counter, __ATOMIC_ACQUIRE); +} + +static bool kick_tx_with_check(struct xsk_socket_info *xsk, int *ret) +{ + u32 max_budget = MAX_TX_BUDGET_DEFAULT; + u32 cons, ready_to_send; + int delta; + + cons = load_value(xsk->tx.consumer); + ready_to_send = load_value(xsk->tx.producer) - cons; + *ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); + + delta = load_value(xsk->tx.consumer) - cons; + /* By default, xsk should consume exact @max_budget descs at one + * send in this case where hitting the max budget limit in while + * loop is triggered in __xsk_generic_xmit(). Please make sure that + * the number of descs to be sent is larger than @max_budget, or + * else the tx.consumer will be updated in xskq_cons_peek_desc() + * in time which hides the issue we try to verify. + */ + if (ready_to_send > max_budget && delta != max_budget) + return false; + + return true; +} + static int kick_tx(struct xsk_socket_info *xsk) { int ret; - ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); + if (xsk->check_consumer) { + if (!kick_tx_with_check(xsk, &ret)) + return TEST_FAILURE; + } else { + ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); + } if (ret >= 0) return TEST_PASS; if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) { @@ -2613,6 +2649,23 @@ static int testapp_adjust_tail_grow_mb(struct test_spec *test) XSK_UMEM__LARGE_FRAME_SIZE * 2); } +static int testapp_tx_queue_consumer(struct test_spec *test) +{ + int nr_packets; + + if (test->mode == TEST_MODE_ZC) { + ksft_test_result_skip("Can not run TX_QUEUE_CONSUMER test for ZC mode\n"); + return TEST_SKIP; + } + + nr_packets = MAX_TX_BUDGET_DEFAULT + 1; + pkt_stream_replace(test, nr_packets, MIN_PKT_SIZE); + test->ifobj_tx->xsk->batch_size = nr_packets; + test->ifobj_tx->xsk->check_consumer = true; + + return testapp_validate_traffic(test); +} + static void run_pkt_test(struct test_spec *test) { int ret; @@ -2723,6 +2776,7 @@ static const struct test_spec tests[] = { {.name = "XDP_ADJUST_TAIL_SHRINK_MULTI_BUFF", .test_func = testapp_adjust_tail_shrink_mb}, {.name = "XDP_ADJUST_TAIL_GROW", .test_func = testapp_adjust_tail_grow}, {.name = "XDP_ADJUST_TAIL_GROW_MULTI_BUFF", .test_func = testapp_adjust_tail_grow_mb}, + {.name = "TX_QUEUE_CONSUMER", .test_func = testapp_tx_queue_consumer}, }; static void print_tests(void) diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h index 67fc44b2813b..4df3a5d329ac 100644 --- a/tools/testing/selftests/bpf/xskxceiver.h +++ b/tools/testing/selftests/bpf/xskxceiver.h @@ -95,6 +95,7 @@ struct xsk_socket_info { u32 batch_size; u8 dst_mac[ETH_ALEN]; u8 src_mac[ETH_ALEN]; + bool check_consumer; }; struct pkt { -- cgit v1.2.3 From ad97cb2ed06a6ba9025fd8bd14fa24369550cbb5 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 8 Jul 2025 15:08:56 -0700 Subject: selftests/bpf: Remove enum64 case from __arg_untrusted test suite The enum64 type used by verifier_global_ptr_args test case requires CONFIG_SCHED_CLASS_EXT. At the moment selftets do not depend on this option. There are just a few enum64 types in the kernel. Instead of tying selftests to implementation details of unrelated sub-systems, just remove enum64 test case. Simple enums are covered and that should be sufficient. Fixes: 68cca81fd57f ("selftests/bpf: tests for __arg_untrusted void * global func params") Reported-by: Amery Hung Signed-off-by: Eduard Zingerman Tested-by: Amery Hung Link: https://lore.kernel.org/r/20250708220856.3059578-1-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c index b346f669d159..181da86ba5f0 100644 --- a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c +++ b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c @@ -275,11 +275,6 @@ __weak int subprog_enum_untrusted(enum bpf_attach_type *p __arg_untrusted) return *(int *)p; } -__weak int subprog_enum64_untrusted(enum scx_public_consts *p __arg_untrusted) -{ - return *(int *)p; -} - SEC("tp_btf/sys_enter") __success __log_level(2) @@ -306,10 +301,9 @@ int anything_to_untrusted_mem(void *ctx) subprog_void_untrusted((void *)mem + off); /* variable offset to untrusted mem (trusted) */ subprog_void_untrusted(bpf_get_current_task_btf() + off); - /* variable offset to untrusted char/enum/enum64 (map) */ + /* variable offset to untrusted char/enum (map) */ subprog_char_untrusted(mem + off); subprog_enum_untrusted((void *)mem + off); - subprog_enum64_untrusted((void *)mem + off); return 0; } -- cgit v1.2.3 From 750aef513c610a37a13732ec64902428b839715e Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Wed, 9 Jul 2025 17:55:10 +0200 Subject: selftests/nolibc: show failed run if test process crashes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The logic would not catch if the test process crashes and would incorrectly report a "success" state. Fix this by looking for the final "Total number of errors:" message and printing "failure" if it was not seen. Signed-off-by: Benjamin Berg Link: https://lore.kernel.org/r/20250709155512.971080-2-benjamin@sipsolutions.net [Thomas: fix patch prefix] Signed-off-by: Thomas Weißschuh --- tools/testing/selftests/nolibc/Makefile.nolibc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/nolibc/Makefile.nolibc b/tools/testing/selftests/nolibc/Makefile.nolibc index 8cb65392fa10..51ba853dd97e 100644 --- a/tools/testing/selftests/nolibc/Makefile.nolibc +++ b/tools/testing/selftests/nolibc/Makefile.nolibc @@ -245,8 +245,9 @@ endif include $(srctree)/tools/scripts/Makefile.include REPORT ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{if (!f) printf("\n"); f++; print;} /\[SKIPPED\][\r]*$$/{s++} \ + /^Total number of errors:/{done++} \ END{ printf("\n%3d test(s): %3d passed, %3d skipped, %3d failed => status: ", p+s+f, p, s, f); \ - if (f || !p) printf("failure\n"); else if (s) printf("warning\n"); else printf("success\n");; \ + if (f || !p || !done) printf("failure\n"); else if (s) printf("warning\n"); else printf("success\n");; \ printf("\nSee all results in %s\n", ARGV[1]); }' help: -- cgit v1.2.3 From e83ee6f76c33de80d5fe4cec523e2b95bfc5e3ea Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 25 Jun 2025 17:12:23 -0700 Subject: KVM: selftests: Expand set of APIs for pinning tasks to a single CPU Expand kvm_pin_this_task_to_pcpu() into a set of APIs to allow pinning a task (or self) to a CPU (any or specific). This will allow deduplicating code throughout a variety of selftests. Opportunistically use "self" instead of "this_task" as it is both more concise and less ambiguous. Link: https://lore.kernel.org/r/20250626001225.744268-4-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/include/kvm_util.h | 31 +++++++++++++++++++++++++- tools/testing/selftests/kvm/lib/kvm_util.c | 15 ++++++------- tools/testing/selftests/kvm/lib/memstress.c | 2 +- 3 files changed, 38 insertions(+), 10 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index bee65ca08721..492f0edc7955 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -20,6 +20,8 @@ #include +#include + #include "kvm_util_arch.h" #include "kvm_util_types.h" #include "sparsebit.h" @@ -1013,7 +1015,34 @@ struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm); void kvm_set_files_rlimit(uint32_t nr_vcpus); -void kvm_pin_this_task_to_pcpu(uint32_t pcpu); +int __pin_task_to_cpu(pthread_t task, int cpu); + +static inline void pin_task_to_cpu(pthread_t task, int cpu) +{ + int r; + + r = __pin_task_to_cpu(task, cpu); + TEST_ASSERT(!r, "Failed to set thread affinity to pCPU '%u'", cpu); +} + +static inline int pin_task_to_any_cpu(pthread_t task) +{ + int cpu = sched_getcpu(); + + pin_task_to_cpu(task, cpu); + return cpu; +} + +static inline void pin_self_to_cpu(int cpu) +{ + pin_task_to_cpu(pthread_self(), cpu); +} + +static inline int pin_self_to_any_cpu(void) +{ + return pin_task_to_any_cpu(pthread_self()); +} + void kvm_print_vcpu_pinning_help(void); void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], int nr_vcpus); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index a055343a7bf7..1a59d69edc6e 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -605,15 +605,14 @@ struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm) return vm_vcpu_recreate(vm, 0); } -void kvm_pin_this_task_to_pcpu(uint32_t pcpu) +int __pin_task_to_cpu(pthread_t task, int cpu) { - cpu_set_t mask; - int r; + cpu_set_t cpuset; - CPU_ZERO(&mask); - CPU_SET(pcpu, &mask); - r = sched_setaffinity(0, sizeof(mask), &mask); - TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.", pcpu); + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + + return pthread_setaffinity_np(task, sizeof(cpuset), &cpuset); } static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) @@ -667,7 +666,7 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], /* 2. Check if the main worker needs to be pinned. */ if (cpu) { - kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask)); + pin_self_to_cpu(parse_pcpu(cpu, &allowed_mask)); cpu = strtok(NULL, delim); } diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c index 313277486a1d..557c0a0a5658 100644 --- a/tools/testing/selftests/kvm/lib/memstress.c +++ b/tools/testing/selftests/kvm/lib/memstress.c @@ -265,7 +265,7 @@ static void *vcpu_thread_main(void *data) int vcpu_idx = vcpu->vcpu_idx; if (memstress_args.pin_vcpus) - kvm_pin_this_task_to_pcpu(memstress_args.vcpu_to_pcpu[vcpu_idx]); + pin_self_to_cpu(memstress_args.vcpu_to_pcpu[vcpu_idx]); WRITE_ONCE(vcpu->running, true); -- cgit v1.2.3 From df98ce784aebdf4b1448ec2abfbbcd8f48b2e295 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 25 Jun 2025 17:12:24 -0700 Subject: KVM: selftests: Test behavior of KVM_X86_DISABLE_EXITS_APERFMPERF For a VCPU thread pinned to a single LPU, verify that interleaved host and guest reads of IA32_[AM]PERF return strictly increasing values when APERFMPERF exiting is disabled. Run the test in both L1 and L2 to verify that KVM passes through the APERF and MPERF MSRs when L1 doesn't want to intercept them (or any MSRs). Signed-off-by: Jim Mattson Link: https://lore.kernel.org/r/20250530185239.2335185-4-jmattson@google.com Co-developed-by: Sean Christopherson Link: https://lore.kernel.org/r/20250626001225.744268-5-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/Makefile.kvm | 1 + tools/testing/selftests/kvm/x86/aperfmperf_test.c | 213 ++++++++++++++++++++++ 2 files changed, 214 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86/aperfmperf_test.c (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index 38b95998e1e6..0322444a42ad 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -134,6 +134,7 @@ TEST_GEN_PROGS_x86 += x86/amx_test TEST_GEN_PROGS_x86 += x86/max_vcpuid_cap_test TEST_GEN_PROGS_x86 += x86/triple_fault_event_test TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test +TEST_GEN_PROGS_x86 += x86/aperfmperf_test TEST_GEN_PROGS_x86 += access_tracking_perf_test TEST_GEN_PROGS_x86 += coalesced_io_test TEST_GEN_PROGS_x86 += dirty_log_perf_test diff --git a/tools/testing/selftests/kvm/x86/aperfmperf_test.c b/tools/testing/selftests/kvm/x86/aperfmperf_test.c new file mode 100644 index 000000000000..8b15a13df939 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/aperfmperf_test.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test for KVM_X86_DISABLE_EXITS_APERFMPERF + * + * Copyright (C) 2025, Google LLC. + * + * Test the ability to disable VM-exits for rdmsr of IA32_APERF and + * IA32_MPERF. When these VM-exits are disabled, reads of these MSRs + * return the host's values. + * + * Note: Requires read access to /dev/cpu//msr to read host MSRs. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" +#include "test_util.h" +#include "vmx.h" + +#define NUM_ITERATIONS 10000 + +static int open_dev_msr(int cpu) +{ + char path[PATH_MAX]; + + snprintf(path, sizeof(path), "/dev/cpu/%d/msr", cpu); + return open_path_or_exit(path, O_RDONLY); +} + +static uint64_t read_dev_msr(int msr_fd, uint32_t msr) +{ + uint64_t data; + ssize_t rc; + + rc = pread(msr_fd, &data, sizeof(data), msr); + TEST_ASSERT(rc == sizeof(data), "Read of MSR 0x%x failed", msr); + + return data; +} + +static void guest_read_aperf_mperf(void) +{ + int i; + + for (i = 0; i < NUM_ITERATIONS; i++) + GUEST_SYNC2(rdmsr(MSR_IA32_APERF), rdmsr(MSR_IA32_MPERF)); +} + +#define L2_GUEST_STACK_SIZE 64 + +static void l2_guest_code(void) +{ + guest_read_aperf_mperf(); + GUEST_DONE(); +} + +static void l1_svm_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + struct vmcb *vmcb = svm->vmcb; + + generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + run_guest(vmcb, svm->vmcb_gpa); +} + +static void l1_vmx_code(struct vmx_pages *vmx) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true); + GUEST_ASSERT_EQ(load_vmcs(vmx), true); + + prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* + * Enable MSR bitmaps (the bitmap itself is allocated, zeroed, and set + * in the VMCS by prepare_vmcs()), as MSR exiting mandatory on Intel. + */ + vmwrite(CPU_BASED_VM_EXEC_CONTROL, + vmreadz(CPU_BASED_VM_EXEC_CONTROL) | CPU_BASED_USE_MSR_BITMAPS); + + GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code)); + GUEST_ASSERT(!vmlaunch()); +} + +static void guest_code(void *nested_test_data) +{ + guest_read_aperf_mperf(); + + if (this_cpu_has(X86_FEATURE_SVM)) + l1_svm_code(nested_test_data); + else if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(nested_test_data); + else + GUEST_DONE(); + + TEST_FAIL("L2 should have signaled 'done'"); +} + +static void guest_no_aperfmperf(void) +{ + uint64_t msr_val; + uint8_t vector; + + vector = rdmsr_safe(MSR_IA32_APERF, &msr_val); + GUEST_ASSERT(vector == GP_VECTOR); + + vector = rdmsr_safe(MSR_IA32_APERF, &msr_val); + GUEST_ASSERT(vector == GP_VECTOR); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + const bool has_nested = kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX); + uint64_t host_aperf_before, host_mperf_before; + vm_vaddr_t nested_test_data_gva; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int msr_fd, cpu, i; + + /* Sanity check that APERF/MPERF are unsupported by default. */ + vm = vm_create_with_one_vcpu(&vcpu, guest_no_aperfmperf); + vcpu_run(vcpu); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + kvm_vm_free(vm); + + cpu = pin_self_to_any_cpu(); + + msr_fd = open_dev_msr(cpu); + + /* + * This test requires a non-standard VM initialization, because + * KVM_ENABLE_CAP cannot be used on a VM file descriptor after + * a VCPU has been created. + */ + vm = vm_create(1); + + TEST_REQUIRE(vm_check_cap(vm, KVM_CAP_X86_DISABLE_EXITS) & + KVM_X86_DISABLE_EXITS_APERFMPERF); + + vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, + KVM_X86_DISABLE_EXITS_APERFMPERF); + + vcpu = vm_vcpu_add(vm, 0, guest_code); + + if (!has_nested) + nested_test_data_gva = NONCANONICAL; + else if (kvm_cpu_has(X86_FEATURE_SVM)) + vcpu_alloc_svm(vm, &nested_test_data_gva); + else + vcpu_alloc_vmx(vm, &nested_test_data_gva); + + vcpu_args_set(vcpu, 1, nested_test_data_gva); + + host_aperf_before = read_dev_msr(msr_fd, MSR_IA32_APERF); + host_mperf_before = read_dev_msr(msr_fd, MSR_IA32_MPERF); + + for (i = 0; i <= NUM_ITERATIONS * (1 + has_nested); i++) { + uint64_t host_aperf_after, host_mperf_after; + uint64_t guest_aperf, guest_mperf; + struct ucall uc; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + goto done; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + case UCALL_SYNC: + guest_aperf = uc.args[0]; + guest_mperf = uc.args[1]; + + host_aperf_after = read_dev_msr(msr_fd, MSR_IA32_APERF); + host_mperf_after = read_dev_msr(msr_fd, MSR_IA32_MPERF); + + TEST_ASSERT(host_aperf_before < guest_aperf, + "APERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")", + host_aperf_before, guest_aperf); + TEST_ASSERT(guest_aperf < host_aperf_after, + "APERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")", + guest_aperf, host_aperf_after); + TEST_ASSERT(host_mperf_before < guest_mperf, + "MPERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")", + host_mperf_before, guest_mperf); + TEST_ASSERT(guest_mperf < host_mperf_after, + "MPERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")", + guest_mperf, host_mperf_after); + + host_aperf_before = host_aperf_after; + host_mperf_before = host_mperf_after; + + break; + } + } + TEST_FAIL("Didn't receive UCALL_DONE\n"); +done: + kvm_vm_free(vm); + close(msr_fd); + + return 0; +} -- cgit v1.2.3 From 95826e1ed3592cb81262c7e533ddea60751095c9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 25 Jun 2025 17:12:25 -0700 Subject: KVM: selftests: Convert arch_timer tests to common helpers to pin task Convert the arch timer tests to use __pin_task_to_cpu() and pin_self_to_cpu(). No functional change intended. Link: https://lore.kernel.org/r/20250626001225.744268-6-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/arch_timer.c | 7 +------ .../selftests/kvm/arm64/arch_timer_edge_cases.c | 23 ++-------------------- 2 files changed, 3 insertions(+), 27 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/arch_timer.c b/tools/testing/selftests/kvm/arch_timer.c index acb2cb596332..cf8fb67104f1 100644 --- a/tools/testing/selftests/kvm/arch_timer.c +++ b/tools/testing/selftests/kvm/arch_timer.c @@ -98,16 +98,11 @@ static uint32_t test_get_pcpu(void) static int test_migrate_vcpu(unsigned int vcpu_idx) { int ret; - cpu_set_t cpuset; uint32_t new_pcpu = test_get_pcpu(); - CPU_ZERO(&cpuset); - CPU_SET(new_pcpu, &cpuset); - pr_debug("Migrating vCPU: %u to pCPU: %u\n", vcpu_idx, new_pcpu); - ret = pthread_setaffinity_np(pt_vcpu_run[vcpu_idx], - sizeof(cpuset), &cpuset); + ret = __pin_task_to_cpu(pt_vcpu_run[vcpu_idx], new_pcpu); /* Allow the error where the vCPU thread is already finished */ TEST_ASSERT(ret == 0 || ret == ESRCH, diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c index 4e71740a098b..ce74d069cb7b 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c @@ -862,25 +862,6 @@ static uint32_t next_pcpu(void) return next; } -static void migrate_self(uint32_t new_pcpu) -{ - int ret; - cpu_set_t cpuset; - pthread_t thread; - - thread = pthread_self(); - - CPU_ZERO(&cpuset); - CPU_SET(new_pcpu, &cpuset); - - pr_debug("Migrating from %u to %u\n", sched_getcpu(), new_pcpu); - - ret = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset); - - TEST_ASSERT(ret == 0, "Failed to migrate to pCPU: %u; ret: %d\n", - new_pcpu, ret); -} - static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt, enum arch_timer timer) { @@ -907,7 +888,7 @@ static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc) sched_yield(); break; case USERSPACE_MIGRATE_SELF: - migrate_self(next_pcpu()); + pin_self_to_cpu(next_pcpu()); break; default: break; @@ -919,7 +900,7 @@ static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu) struct ucall uc; /* Start on CPU 0 */ - migrate_self(0); + pin_self_to_cpu(0); while (true) { vcpu_run(vcpu); -- cgit v1.2.3 From f9e4e0a663d239f944649c5201879c7471615dd0 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 8 Jul 2025 16:06:32 -0700 Subject: KVM: arm64: selftests: Test ESR propagation for vSError injection Ensure that vSErrors taken in the guest have an appropriate ESR_ELx value for the expected exception. Additionally, switch the EASE test to install the SEA handler at the SError offset, as the ESR is still expected to match an SEA in that case. Link: https://lore.kernel.org/r/20250708230632.1954240-3-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- .../testing/selftests/kvm/arm64/external_aborts.c | 32 ++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/arm64/external_aborts.c b/tools/testing/selftests/kvm/arm64/external_aborts.c index ec7e30776a0e..062bf84cced1 100644 --- a/tools/testing/selftests/kvm/arm64/external_aborts.c +++ b/tools/testing/selftests/kvm/arm64/external_aborts.c @@ -7,7 +7,8 @@ #include "processor.h" #include "test_util.h" -#define MMIO_ADDR 0x8000000ULL +#define MMIO_ADDR 0x8000000ULL +#define EXPECTED_SERROR_ISS (ESR_ELx_ISV | 0x1d1ed) static u64 expected_abort_pc; @@ -49,11 +50,28 @@ static void vcpu_inject_sea(struct kvm_vcpu *vcpu) vcpu_events_set(vcpu, &events); } +static bool vcpu_has_ras(struct kvm_vcpu *vcpu) +{ + u64 pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); + + return SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, pfr0); +} + +static bool guest_has_ras(void) +{ + return SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, read_sysreg(id_aa64pfr0_el1)); +} + static void vcpu_inject_serror(struct kvm_vcpu *vcpu) { struct kvm_vcpu_events events = {}; events.exception.serror_pending = true; + if (vcpu_has_ras(vcpu)) { + events.exception.serror_has_esr = true; + events.exception.serror_esr = EXPECTED_SERROR_ISS; + } + vcpu_events_set(vcpu, &events); } @@ -199,6 +217,12 @@ static void test_serror_masked(void) static void expect_serror_handler(struct ex_regs *regs) { + u64 esr = read_sysreg(esr_el1); + + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_SERROR); + if (guest_has_ras()) + GUEST_ASSERT_EQ(ESR_ELx_ISS(esr), EXPECTED_SERROR_ISS); + GUEST_DONE(); } @@ -277,7 +301,11 @@ static void test_mmio_ease(void) return; } - vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler); + /* + * SCTLR2_ELx.EASE changes the exception vector to the SError vector but + * doesn't further modify the exception context (e.g. ESR_ELx, FAR_ELx). + */ + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_sea_handler); vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO); -- cgit v1.2.3 From d55683866c79a3af1e334126f32841d05e7e4143 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Sat, 5 Jul 2025 17:36:38 -0300 Subject: selftests/tc-testing: Create test case for UAF scenario with DRR/NETEM/BLACKHOLE chain Create a tdc test for the UAF scenario with DRR/NETEM/BLACKHOLE chain shared by Lion on his report [1]. [1] https://lore.kernel.org/netdev/45876f14-cf28-4177-8ead-bb769fd9e57a@gmail.com/ Signed-off-by: Victor Nogueira Acked-by: Cong Wang Link: https://patch.msgid.link/20250705203638.246350-1-victor@mojatatu.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/qdiscs.json | 37 ++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 9aa44d8176d9..5c6851e8d311 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -635,5 +635,42 @@ "$TC qdisc del dev $DUMMY handle 1:0 root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "d74b", + "name": "Test use-after-free with DRR/NETEM/BLACKHOLE chain", + "category": [ + "qdisc", + "hfsc", + "drr", + "netem", + "blackhole" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: drr", + "$TC filter add dev $DUMMY parent 1: basic classid 1:1", + "$TC class add dev $DUMMY parent 1: classid 1:1 drr", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2: hfsc def 1", + "$TC class add dev $DUMMY parent 2: classid 2:1 hfsc rt m1 8 d 1 m2 0", + "$TC qdisc add dev $DUMMY parent 2:1 handle 3: netem", + "$TC qdisc add dev $DUMMY parent 3:1 handle 4: blackhole", + "ping -c1 -W0.01 -I $DUMMY 10.10.11.11 || true", + "$TC class del dev $DUMMY classid 1:1" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.11.11", + "expExitCode": "1", + "verifyCmd": "$TC -j class ls dev $DUMMY classid 1:1", + "matchJSON": [], + "teardown": [ + "$TC qdisc del dev $DUMMY root handle 1: drr" + ] } ] -- cgit v1.2.3 From b939c074efc160648de884a41aeb5e857f2c5c68 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Jul 2025 21:39:00 +0000 Subject: selftests/net: packetdrill: add tcp_ooo-before-and-after-accept.pkt Test how new passive flows react to ooo incoming packets. Their sk_rcvbuf can increase only after accept(). Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250707213900.1543248-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- .../tcp_ooo-before-and-after-accept.pkt | 53 ++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt new file mode 100644 index 000000000000..09aabc775e80 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"` + +// Test that a not-yet-accepted socket does not change +// its initial sk_rcvbuf (tcp_rmem[1]) when receiving ooo packets. + + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 257 + +0 < . 2001:41001(39000) ack 1 win 257 + +0 > . 1:1(0) ack 1 + +0 < . 41001:101001(60000) ack 1 win 257 + +0 > . 1:1(0) ack 1 + +0 < . 1:1001(1000) ack 1 win 257 + +0 > . 1:1(0) ack 1001 + +0 < . 1001:2001(1000) ack 1 win 257 + +0 > . 1:1(0) ack 101001 + + +0 accept(3, ..., ...) = 4 + + +0 %{ assert SK_MEMINFO_RCVBUF == 131072, SK_MEMINFO_RCVBUF }% + + +0 close(4) = 0 + +0 close(3) = 0 + +// Test that ooo packets for accepted sockets do increase sk_rcvbuf + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < . 2001:41001(39000) ack 1 win 257 + +0 > . 1:1(0) ack 1 + +0 < . 41001:101001(60000) ack 1 win 257 + +0 > . 1:1(0) ack 1 + + +0 %{ assert SK_MEMINFO_RCVBUF > 131072, SK_MEMINFO_RCVBUF }% + -- cgit v1.2.3 From 4a1ff347e44ce45e8a5c15f466574583b019e4f9 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 29 May 2025 18:15:48 +0100 Subject: tools/testing/selftests: add VMA merge tests for KSM merge Add test to assert that we have now allowed merging of VMAs when KSM merging-by-default has been set by prctl(PR_SET_MEMORY_MERGE, ...). We simply perform a trivial mapping of adjacent VMAs expecting a merge, however prior to recent changes implementing this mode earlier than before, these merges would not have succeeded. Assert that we have fixed this! Link: https://lkml.kernel.org/r/6dec7aabf062c6b121cfac992c9c716cefdda00c.1748537921.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Chengming Zhou Tested-by: Chengming Zhou Reviewed-by: Liam R. Howlett Acked-by: Vlastimil Babka Cc: Al Viro Cc: Christian Brauner Cc: David Hildenbrand Cc: Jan Kara Cc: Jann Horn Cc: Stefan Roesch Cc: Xu Xin Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/merge.c | 78 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/merge.c b/tools/testing/selftests/mm/merge.c index cc26480098ae..150dd5baed2b 100644 --- a/tools/testing/selftests/mm/merge.c +++ b/tools/testing/selftests/mm/merge.c @@ -2,11 +2,13 @@ #define _GNU_SOURCE #include "../kselftest_harness.h" +#include #include #include #include #include #include +#include #include #include #include @@ -34,6 +36,11 @@ FIXTURE_TEARDOWN(merge) { ASSERT_EQ(munmap(self->carveout, 12 * self->page_size), 0); ASSERT_EQ(close_procmap(&self->procmap), 0); + /* + * Clear unconditionally, as some tests set this. It is no issue if this + * fails (KSM may be disabled for instance). + */ + prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0); } TEST_F(merge, mprotect_unfaulted_left) @@ -498,4 +505,75 @@ out: remove(probe_file); } +TEST_F(merge, ksm_merge) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr, *ptr2; + int err; + + /* + * Map two R/W immediately adjacent to one another, they should + * trivially merge: + * + * |-----------|-----------| + * | R/W | R/W | + * |-----------|-----------| + * ptr ptr2 + */ + + ptr = mmap(&carveout[page_size], page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + ptr2 = mmap(&carveout[2 * page_size], page_size, + PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 2 * page_size); + + /* Unmap the second half of this merged VMA. */ + ASSERT_EQ(munmap(ptr2, page_size), 0); + + /* OK, now enable global KSM merge. We clear this on test teardown. */ + err = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0); + if (err == -1) { + int errnum = errno; + + /* Only non-failure case... */ + ASSERT_EQ(errnum, EINVAL); + /* ...but indicates we should skip. */ + SKIP(return, "KSM memory merging not supported, skipping."); + } + + /* + * Now map a VMA adjacent to the existing that was just made + * VM_MERGEABLE, this should merge as well. + */ + ptr2 = mmap(&carveout[2 * page_size], page_size, + PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 2 * page_size); + + /* Now this VMA altogether. */ + ASSERT_EQ(munmap(ptr, 2 * page_size), 0); + + /* Try the same operation as before, asserting this also merges fine. */ + ptr = mmap(&carveout[page_size], page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + ptr2 = mmap(&carveout[2 * page_size], page_size, + PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 2 * page_size); +} + TEST_HARNESS_MAIN -- cgit v1.2.3 From be3d3343d467905b14fe62cb1dafa68906e4d5ed Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 10 Jun 2025 15:13:54 +0100 Subject: kselftest/mm: clarify errors for pipe() Patch series "selftests/mm: Tweaks to the cow test". A collection of non-functional updates from David Hildenbrand's review. This patch (of 4): Specify that errors reported from pipe() failures are the result of failures. Link: https://lkml.kernel.org/r/20250610-selftest-mm-cow-tweaks-v1-0-43cd7457500f@kernel.org Link: https://lkml.kernel.org/r/20250610-selftest-mm-cow-tweaks-v1-1-43cd7457500f@kernel.org Signed-off-by: Mark Brown Suggested-by: David Hildenbrand Acked-by: David Hildenbrand Cc: Mark Brown Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/cow.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index dbbcc5eb3dce..0adc0ab142c1 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -113,11 +113,11 @@ struct comm_pipes { static int setup_comm_pipes(struct comm_pipes *comm_pipes) { if (pipe(comm_pipes->child_ready) < 0) { - ksft_perror("pipe()"); + ksft_perror("pipe() failed"); return -errno; } if (pipe(comm_pipes->parent_ready) < 0) { - ksft_perror("pipe()"); + ksft_perror("pipe() failed"); close(comm_pipes->child_ready[0]); close(comm_pipes->child_ready[1]); return -errno; -- cgit v1.2.3 From 5fbfb1f39da0a5cbfe66e9ab67790e607b8f6e58 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 10 Jun 2025 15:13:55 +0100 Subject: selftests/mm: convert some cow error reports to ksft_perror() This prints the errno and a string decode of it. Link: https://lkml.kernel.org/r/20250610-selftest-mm-cow-tweaks-v1-2-43cd7457500f@kernel.org Signed-off-by: Mark Brown Acked-by: David Hildenbrand Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/cow.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index 0adc0ab142c1..ff80329313e4 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -332,7 +332,7 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, if (before_fork) { transferred = vmsplice(fds[1], &iov, 1, 0); if (transferred <= 0) { - ksft_print_msg("vmsplice() failed\n"); + ksft_perror("vmsplice() failed\n"); log_test_result(KSFT_FAIL); goto close_pipe; } @@ -562,7 +562,7 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) while (total < size) { cur = pread(fd, tmp + total, size - total, total); if (cur < 0) { - ksft_print_msg("pread() failed\n"); + ksft_perror("pread() failed\n"); log_test_result(KSFT_FAIL); goto quit_child; } @@ -628,7 +628,7 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, tmp = malloc(size); if (!tmp) { - ksft_print_msg("malloc() failed\n"); + ksft_perror("malloc() failed\n"); log_test_result(KSFT_FAIL); return; } -- cgit v1.2.3 From 32dc2d5e3f0de7a62dd85b2ec2be2964449c4796 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 10 Jun 2025 15:13:56 +0100 Subject: selftests/mm: don't compare return values to in cow Tweak the coding style for checking for non-zero return values. While we're at it also remove a now redundant oring of the madvise() return code. Link: https://lkml.kernel.org/r/20250610-selftest-mm-cow-tweaks-v1-3-43cd7457500f@kernel.org Signed-off-by: Mark Brown Suggested-by: David Hildenbrand Acked-by: David Hildenbrand Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/cow.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index ff80329313e4..48fcf03aa4cd 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -1613,13 +1613,13 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1)); ret = madvise(mem, pmdsize, MADV_HUGEPAGE); - if (ret != 0) { + if (ret) { ksft_perror("madvise()"); log_test_result(KSFT_FAIL); goto munmap; } - ret |= madvise(smem, pmdsize, MADV_HUGEPAGE); - if (ret != 0) { + ret = madvise(smem, pmdsize, MADV_HUGEPAGE); + if (ret) { ksft_perror("madvise()"); log_test_result(KSFT_FAIL); goto munmap; -- cgit v1.2.3 From 4ff52d4a2de1c2adc77b2cca82b542cd916b42a9 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 10 Jun 2025 15:13:57 +0100 Subject: selftests/mm: add messages about test errors to the cow tests It is not sufficiently clear what the individual tests in the cow test program are checking so add messages for the failure cases. Link: https://lkml.kernel.org/r/20250610-selftest-mm-cow-tweaks-v1-4-43cd7457500f@kernel.org Signed-off-by: Mark Brown Suggested-by: David Hildenbrand Acked-by: David Hildenbrand Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/cow.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index 48fcf03aa4cd..29767e2afdd0 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -268,8 +268,10 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, * fail because (a) harder to fix and (b) nobody really cares. * Flag them as expected failure for now. */ + ksft_print_msg("Leak from parent into child\n"); log_test_result(KSFT_XFAIL); } else { + ksft_print_msg("Leak from parent into child\n"); log_test_result(KSFT_FAIL); } close_comm_pipes: @@ -397,8 +399,10 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, * fail because (a) harder to fix and (b) nobody really cares. * Flag them as expected failure for now. */ + ksft_print_msg("Leak from child into parent\n"); log_test_result(KSFT_XFAIL); } else { + ksft_print_msg("Leak from child into parent\n"); log_test_result(KSFT_FAIL); } close_pipe: @@ -570,10 +574,12 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) } /* Finally, check if we read what we expected. */ - if (!memcmp(mem, tmp, size)) + if (!memcmp(mem, tmp, size)) { log_test_result(KSFT_PASS); - else + } else { + ksft_print_msg("Longtom R/W pin is not reliable\n"); log_test_result(KSFT_FAIL); + } quit_child: if (use_fork) { @@ -725,10 +731,12 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, ksft_perror("PIN_LONGTERM_TEST_READ failed"); log_test_result(KSFT_FAIL); } else { - if (!memcmp(mem, tmp, size)) + if (!memcmp(mem, tmp, size)) { log_test_result(KSFT_PASS); - else + } else { + ksft_print_msg("Longterm R/O pin is not reliable\n"); log_test_result(KSFT_FAIL); + } } ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP); @@ -1417,10 +1425,12 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, else ret = -EINVAL; - if (!ret) + if (!ret) { log_test_result(KSFT_PASS); - else + } else { + ksft_print_msg("Leak from parent into child\n"); log_test_result(KSFT_FAIL); + } close_comm_pipes: close_comm_pipes(&comm_pipes); } @@ -1528,10 +1538,12 @@ static void test_cow(char *mem, const char *smem, size_t size) memset(mem, 0xff, size); /* See if we still read the old values via the other mapping. */ - if (!memcmp(smem, old, size)) + if (!memcmp(smem, old, size)) { log_test_result(KSFT_PASS); - else + } else { + ksft_print_msg("Other mapping modified\n"); log_test_result(KSFT_FAIL); + } free(old); } -- cgit v1.2.3 From ba78585585d9f5ec4dd32de7b3027a86fe33cee0 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 10 Jun 2025 15:07:44 +0100 Subject: selftests/mm: check for YAMA ptrace_scope configuraiton before modifying it When running the memfd_secret test run_vmtests.sh unconditionally tries to confgiure the YAMA LSM's ptrace_scope configuration, leading to an error if YAMA is not in the running kernel: # ./run_vmtests.sh: line 432: /proc/sys/kernel/yama/ptrace_scope: No such file or directory # # ---------------------- # # running ./memfd_secret # # ---------------------- Check that this file is present before trying to write to it. The indentation here is a bit odd, and it doesn't seem great that we configure but don't restore ptrace_scope. Link: https://lkml.kernel.org/r/20250610-selftest-mm-enable-yama-v1-1-0097b6713116@kernel.org Signed-off-by: Mark Brown Acked-by: Mike Rapoport (Microsoft) Acked-by: David Hildenbrand Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/run_vmtests.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index dddd1dd8af14..33fc7fafa8f9 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -429,7 +429,9 @@ CATEGORY="vma_merge" run_test ./merge if [ -x ./memfd_secret ] then -(echo 0 > /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix +if [ -f /proc/sys/kernel/yama/ptrace_scope ]; then + (echo 0 > /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix +fi CATEGORY="memfd_secret" run_test ./memfd_secret fi -- cgit v1.2.3 From a788b6e571f3cb1aceb1611e97010be7334cbd63 Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Wed, 11 Jun 2025 10:01:06 +0000 Subject: selftests/mm: use generic read_sysfs in thuge-gen test As generic read_sysfs is available in vm_utils, let's use is in thuge-gen test. Link: https://lkml.kernel.org/r/20250611100106.1331197-1-pulehui@huaweicloud.com Signed-off-by: Pu Lehui Reviewed-by: Lorenzo Stoakes Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/thuge-gen.c | 38 +++++++++------------------------- 1 file changed, 10 insertions(+), 28 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c index 95b6f043a3cb..8e2b08dc5762 100644 --- a/tools/testing/selftests/mm/thuge-gen.c +++ b/tools/testing/selftests/mm/thuge-gen.c @@ -77,40 +77,20 @@ void show(unsigned long ps) system(buf); } -unsigned long thuge_read_sysfs(int warn, char *fmt, ...) +unsigned long read_free(unsigned long ps) { - char *line = NULL; - size_t linelen = 0; - char buf[100]; - FILE *f; - va_list ap; unsigned long val = 0; + char buf[100]; - va_start(ap, fmt); - vsnprintf(buf, sizeof buf, fmt, ap); - va_end(ap); + snprintf(buf, sizeof(buf), + "/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages", + ps >> 10); + if (read_sysfs(buf, &val) && ps != getpagesize()) + ksft_print_msg("missing %s\n", buf); - f = fopen(buf, "r"); - if (!f) { - if (warn) - ksft_print_msg("missing %s\n", buf); - return 0; - } - if (getline(&line, &linelen, f) > 0) { - sscanf(line, "%lu", &val); - } - fclose(f); - free(line); return val; } -unsigned long read_free(unsigned long ps) -{ - return thuge_read_sysfs(ps != getpagesize(), - "/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages", - ps >> 10); -} - void test_mmap(unsigned long size, unsigned flags) { char *map; @@ -173,6 +153,7 @@ void test_shmget(unsigned long size, unsigned flags) void find_pagesizes(void) { unsigned long largest = getpagesize(); + unsigned long shmmax_val = 0; int i; glob_t g; @@ -195,7 +176,8 @@ void find_pagesizes(void) } globfree(&g); - if (thuge_read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest) + read_sysfs("/proc/sys/kernel/shmmax", &shmmax_val); + if (shmmax_val < NUM_PAGES * largest) ksft_exit_fail_msg("Please do echo %lu > /proc/sys/kernel/shmmax", largest * NUM_PAGES); -- cgit v1.2.3 From 7962e05a835f5a40fed4f13f415d6a48b00a93c3 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Fri, 13 Jun 2025 09:49:19 +0800 Subject: selftests: khugepaged: fix the shmem collapse failure When running the khugepaged selftest for shmem (./khugepaged all:shmem), I encountered the following test failures: : Run test: collapse_full (khugepaged:shmem) : Collapse multiple fully populated PTE table.... Fail : ... : Run test: collapse_single_pte_entry (khugepaged:shmem) : Collapse PTE table with single PTE entry present.... Fail : ... : Run test: collapse_full_of_compound (khugepaged:shmem) : Allocate huge page... OK : Split huge page leaving single PTE page table full of compound pages... OK : Collapse PTE table full of compound pages.... Fail The reason for the failure is that it will set MADV_NOHUGEPAGE to prevent khugepaged from continuing to scan shmem VMA after khugepaged finishes scanning in the wait_for_scan() function. Moreover, shmem requires a refault to establish PMD mappings. However, after commit 2b0f922323cc ("mm: don't install PMD mappings when THPs are disabled by the hw/process/vma"), PMD mappings are prevented if the VMA is set with MADV_NOHUGEPAGE flag, so shmem cannot establish PMD mappings during refault. One way to fix this issue is to move the MADV_NOHUGEPAGE setting after the shmem refault. After shmem refault and check huge, the test case will unmap the shmem immediately. So it seems unnecessary to set the MADV_NOHUGEPAGE. Then we can simply drop the MADV_NOHUGEPAGE setting, and all khugepaged test cases passed. Link: https://lkml.kernel.org/r/d8502fc50d0304c2afd27ced062b1d636b7a872e.1749779183.git.baolin.wang@linux.alibaba.com Fixes: 2b0f922323cc ("mm: don't install PMD mappings when THPs are disabled by the hw/process/vma") Signed-off-by: Baolin Wang Reviewed-by: Zi Yan Acked-by: David Hildenbrand Reviewed-by: Dev Jain Tested-by: Dev Jain Tested-by: Mario Casquero Cc: Barry Song Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Mariano Pache Cc: Ryan Roberts Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/khugepaged.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c index 8a4d34cce36b..4341ce6b3b38 100644 --- a/tools/testing/selftests/mm/khugepaged.c +++ b/tools/testing/selftests/mm/khugepaged.c @@ -561,8 +561,6 @@ static bool wait_for_scan(const char *msg, char *p, int nr_hpages, usleep(TICK); } - madvise(p, nr_hpages * hpage_pmd_size, MADV_NOHUGEPAGE); - return timeout == -1; } -- cgit v1.2.3 From f081a460bbac54c5ec460aac42079974ee413bd6 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Fri, 13 Jun 2025 09:49:20 +0800 Subject: selftests: mm: add shmem collapse as a default test item Currently, we only test anonymous memory collapse by default. We should also add shmem collapse as a default test item to catch issues that could break the test cases. Link: https://lkml.kernel.org/r/a30b1529b399f2e649b5a05c3d352f41a68faeae.1749779183.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Reviewed-by: Dev Jain Tested-by: Dev Jain Acked-by: David Hildenbrand Acked-by: Zi Yan Cc: Barry Song Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Mariano Pache Cc: Ryan Roberts Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/run_vmtests.sh | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 33fc7fafa8f9..a38c984103ce 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -485,6 +485,10 @@ CATEGORY="thp" run_test ./khugepaged CATEGORY="thp" run_test ./khugepaged -s 2 +CATEGORY="thp" run_test ./khugepaged all:shmem + +CATEGORY="thp" run_test ./khugepaged -s 4 all:shmem + CATEGORY="thp" run_test ./transhuge-stress -d 20 # Try to create XFS if not provided -- cgit v1.2.3 From 3e49aa8e6510be458c1120d6d2a9deac9bc40253 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 13 Jun 2025 12:44:07 +0100 Subject: selftest/mm: skip if fallocate() is unsupported in gup_longterm Currently gup_longterm assumes that filesystems support fallocate() and uses that to allocate space in files, however this is an optional feature and is in particular not implemented by NFSv3 which is commonly used in CI systems leading to spurious failures. Check for lack of support and report a skip instead for that case. Link: https://lkml.kernel.org/r/20250613-selftest-mm-gup-longterm-fallocate-nfs-v1-1-758a104c175f@kernel.org Signed-off-by: Mark Brown Cc: David Hildenbrand Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/gup_longterm.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c index 29047d2e0c49..268dadb8ce43 100644 --- a/tools/testing/selftests/mm/gup_longterm.c +++ b/tools/testing/selftests/mm/gup_longterm.c @@ -114,7 +114,15 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) } if (fallocate(fd, 0, 0, size)) { - if (size == pagesize) { + /* + * Some filesystems (eg, NFSv3) don't support + * fallocate(), report this as a skip rather than a + * test failure. + */ + if (errno == EOPNOTSUPP) { + ksft_print_msg("fallocate() not supported by filesystem\n"); + result = KSFT_SKIP; + } else if (size == pagesize) { ksft_print_msg("fallocate() failed (%s)\n", strerror(errno)); result = KSFT_FAIL; } else { -- cgit v1.2.3 From cf34cfbf1784be7ce9f22789aa157535eff2195f Mon Sep 17 00:00:00 2001 From: Vivek Kasireddy Date: Tue, 17 Jun 2025 22:30:55 -0700 Subject: selftests/udmabuf: add a test to pin first before writing to memfd Unlike the existing tests, this new test will create a memfd (backed by hugetlb) and pin the folios in it (a small subset) before writing/ populating it with data. This is a valid use-case that invokes the memfd_alloc_folio() kernel API and is expected to work unless there aren't enough hugetlb folios to satisfy the allocation needs. Link: https://lkml.kernel.org/r/20250618053415.1036185-4-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy Cc: Gerd Hoffmann Cc: Steve Sistare Cc: Muchun Song Cc: David Hildenbrand Cc: Oscar Salvador Signed-off-by: Andrew Morton --- tools/testing/selftests/drivers/dma-buf/udmabuf.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/dma-buf/udmabuf.c b/tools/testing/selftests/drivers/dma-buf/udmabuf.c index 6062723a172e..77aa2897e79f 100644 --- a/tools/testing/selftests/drivers/dma-buf/udmabuf.c +++ b/tools/testing/selftests/drivers/dma-buf/udmabuf.c @@ -138,7 +138,7 @@ int main(int argc, char *argv[]) void *addr1, *addr2; ksft_print_header(); - ksft_set_plan(6); + ksft_set_plan(7); devfd = open("/dev/udmabuf", O_RDWR); if (devfd < 0) { @@ -248,6 +248,24 @@ int main(int argc, char *argv[]) else ksft_test_result_pass("%s: [PASS,test-6]\n", TEST_PREFIX); + close(buf); + close(memfd); + + /* same test as above but we pin first before writing to memfd */ + page_size = getpagesize() * 512; /* 2 MB */ + size = MEMFD_SIZE * page_size; + memfd = create_memfd_with_seals(size, true); + buf = create_udmabuf_list(devfd, memfd, size); + addr2 = mmap_fd(buf, NUM_PAGES * NUM_ENTRIES * getpagesize()); + addr1 = mmap_fd(memfd, size); + write_to_memfd(addr1, size, 'a'); + write_to_memfd(addr1, size, 'b'); + ret = compare_chunks(addr1, addr2, size); + if (ret < 0) + ksft_test_result_fail("%s: [FAIL,test-7]\n", TEST_PREFIX); + else + ksft_test_result_pass("%s: [PASS,test-7]\n", TEST_PREFIX); + close(buf); close(memfd); close(devfd); -- cgit v1.2.3 From dfa3cf0bc018ff15e02ce19906be00a287ff2395 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 19 Jun 2025 11:36:08 -0700 Subject: selftets/damon: add a test for memcg_path leak There was a memory leak bug in DAMOS sysfs memcg_path file. Add a selftest to ensure the bug never comes back. Link: https://lkml.kernel.org/r/20250619183608.6647-3-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/Makefile | 1 + .../selftests/damon/sysfs_memcg_path_leak.sh | 43 ++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100755 tools/testing/selftests/damon/sysfs_memcg_path_leak.sh (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index ff21524be458..e888455e3cf8 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -15,6 +15,7 @@ TEST_PROGS += reclaim.sh lru_sort.sh # regression tests (reproducers of previously found bugs) TEST_PROGS += sysfs_update_removed_scheme_dir.sh TEST_PROGS += sysfs_update_schemes_tried_regions_hang.py +TEST_PROGS += sysfs_memcg_path_leak.sh EXTRA_CLEAN = __pycache__ diff --git a/tools/testing/selftests/damon/sysfs_memcg_path_leak.sh b/tools/testing/selftests/damon/sysfs_memcg_path_leak.sh new file mode 100755 index 000000000000..64c5d8c518a4 --- /dev/null +++ b/tools/testing/selftests/damon/sysfs_memcg_path_leak.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +if [ $EUID -ne 0 ] +then + echo "Run as root" + exit $ksft_skip +fi + +damon_sysfs="/sys/kernel/mm/damon/admin" +if [ ! -d "$damon_sysfs" ] +then + echo "damon sysfs not found" + exit $ksft_skip +fi + +# ensure filter directory +echo 1 > "$damon_sysfs/kdamonds/nr_kdamonds" +echo 1 > "$damon_sysfs/kdamonds/0/contexts/nr_contexts" +echo 1 > "$damon_sysfs/kdamonds/0/contexts/0/schemes/nr_schemes" +echo 1 > "$damon_sysfs/kdamonds/0/contexts/0/schemes/0/filters/nr_filters" + +filter_dir="$damon_sysfs/kdamonds/0/contexts/0/schemes/0/filters/0" + +before_kb=$(grep Slab /proc/meminfo | awk '{print $2}') + +# try to leak 3000 KiB +for i in {1..102400}; +do + echo "012345678901234567890123456789" > "$filter_dir/memcg_path" +done + +after_kb=$(grep Slab /proc/meminfo | awk '{print $2}') +# expect up to 1500 KiB free from other tasks memory +expected_after_kb_max=$((before_kb + 1500)) + +if [ "$after_kb" -gt "$expected_after_kb_max" ] +then + echo "maybe memcg_path are leaking: $before_kb -> $after_kb" + exit 1 +else + exit 0 +fi -- cgit v1.2.3 From ab7ed56a03ce0196260bc29ab844eb4e54afee66 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 20 Jun 2025 11:00:58 -0400 Subject: selftests/mm: reduce uffd-unit-test poison test to minimum The test will still generate quite some unwanted MCE error messages to syslog. There was old proposal ratelimiting the MCE messages from kernel, but that has risk of hiding real useful information on production systems. We can at least reduce the test to minimum to not over-pollute dmesg, however trying to not lose its coverage too much. [peterx@redhat.com: reduce uffd-unit-test poison test to minimum] Link: https://lkml.kernel.org/r/aF2RSsjuEOtzXcUa@x1.local Link: https://lkml.kernel.org/r/20250620150058.1729489-1-peterx@redhat.com Signed-off-by: Peter Xu Cc: Axel Rasmussen Cc: Brendan Jackman Cc: David Hildenbrand Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/uffd-unit-tests.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index c73fd5d455c8..50501b38e34e 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -1027,6 +1027,9 @@ static void uffd_poison_handle_fault( do_uffdio_poison(uffd, offset); } +/* Make sure to cover odd/even, and minimum duplications */ +#define UFFD_POISON_TEST_NPAGES 4 + static void uffd_poison_test(uffd_test_args_t *targs) { pthread_t uffd_mon; @@ -1034,12 +1037,17 @@ static void uffd_poison_test(uffd_test_args_t *targs) struct uffd_args args = { 0 }; struct sigaction act = { 0 }; unsigned long nr_sigbus = 0; - unsigned long nr; + unsigned long nr, poison_pages = UFFD_POISON_TEST_NPAGES; + + if (nr_pages < poison_pages) { + uffd_test_skip("Too few pages for POISON test"); + return; + } fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); - uffd_register_poison(uffd, area_dst, nr_pages * page_size); - memset(area_src, 0, nr_pages * page_size); + uffd_register_poison(uffd, area_dst, poison_pages * page_size); + memset(area_src, 0, poison_pages * page_size); args.handle_fault = uffd_poison_handle_fault; if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) @@ -1051,7 +1059,7 @@ static void uffd_poison_test(uffd_test_args_t *targs) if (sigaction(SIGBUS, &act, 0)) err("sigaction"); - for (nr = 0; nr < nr_pages; ++nr) { + for (nr = 0; nr < poison_pages; ++nr) { unsigned long offset = nr * page_size; const char *bytes = (const char *) area_dst + offset; const char *i; @@ -1078,9 +1086,9 @@ static void uffd_poison_test(uffd_test_args_t *targs) if (pthread_join(uffd_mon, NULL)) err("pthread_join()"); - if (nr_sigbus != nr_pages / 2) + if (nr_sigbus != poison_pages / 2) err("expected to receive %lu SIGBUS, actually received %lu", - nr_pages / 2, nr_sigbus); + poison_pages / 2, nr_sigbus); uffd_test_pass(); } -- cgit v1.2.3 From 67c94320571f37efd6d88b8c23a3a69ece63fb7f Mon Sep 17 00:00:00 2001 From: Moon Hee Lee Date: Wed, 25 Jun 2025 19:07:58 -0700 Subject: selftests/mm: remove duplicate .gitignore entries Remove redundant entries in .gitignore confirmed by: $ sort tools/testing/selftests/mm/.gitignore | uniq -d hugetlb_dio pkey_sighandler_tests_32 pkey_sighandler_tests_64 These entries were originally added by [1], and later duplicated by [2]. [1] https://lore.kernel.org/all/20240924185911.117937-1-lorenzo.stoakes@oracle.com/ [2] https://lore.kernel.org/all/20241125064036.413536-1-lizhijian@fujitsu.com/ Link: https://lkml.kernel.org/r/20250626020758.163243-1-moonhee.lee.ca@gmail.com Signed-off-by: Moon Hee Lee Reviewed-by: Dev Jain Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/.gitignore | 3 --- 1 file changed, 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index 824266982aa3..f2dafa0b700b 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -38,9 +38,6 @@ map_fixed_noreplace write_to_hugetlbfs hmm-tests memfd_secret -hugetlb_dio -pkey_sighandler_tests_32 -pkey_sighandler_tests_64 soft-dirty split_huge_page_test ksm_tests -- cgit v1.2.3 From 98db4b5472ed8a64e418b895054c5fb123c038bf Mon Sep 17 00:00:00 2001 From: Li Wang Date: Tue, 24 Jun 2025 12:24:11 +0800 Subject: selftests/mm: fix UFFDIO_API usage with proper two-step feature negotiation The current implementation of test_unmerge_uffd_wp() explicitly sets `uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP` before calling UFFDIO_API. This can cause the ioctl() call to fail with EINVAL on kernels that do not support UFFD-WP, leading the test to fail unnecessarily: # ------------------------------ # running ./ksm_functional_tests # ------------------------------ # TAP version 13 # 1..9 # # [RUN] test_unmerge # ok 1 Pages were unmerged # # [RUN] test_unmerge_zero_pages # ok 2 KSM zero pages were unmerged # # [RUN] test_unmerge_discarded # ok 3 Pages were unmerged # # [RUN] test_unmerge_uffd_wp # not ok 4 UFFDIO_API failed <----- # # [RUN] test_prot_none # ok 5 Pages were unmerged # # [RUN] test_prctl # ok 6 Setting/clearing PR_SET_MEMORY_MERGE works # # [RUN] test_prctl_fork # # No pages got merged # # [RUN] test_prctl_fork_exec # ok 7 PR_SET_MEMORY_MERGE value is inherited # # [RUN] test_prctl_unmerge # ok 8 Pages were unmerged # Bail out! 1 out of 8 tests failed # # Planned tests != run tests (9 != 8) # # Totals: pass:7 fail:1 xfail:0 xpass:0 skip:0 error:0 # [FAIL] This patch improves compatibility and robustness of the UFFD-WP test (test_unmerge_uffd_wp) by correctly implementing the UFFDIO_API two-step handshake as recommended by the userfaultfd(2) man page. Key changes: 1. Use features=0 in the initial UFFDIO_API call to query supported feature bits, rather than immediately requesting WP support. 2. Skip the test gracefully if: - UFFDIO_API fails with EINVAL (e.g. unsupported API version), or - UFFD_FEATURE_PAGEFAULT_FLAG_WP is not advertised by the kernel. 3. Close the initial userfaultfd and create a new one before enabling the required feature, since UFFDIO_API can only be called once per fd. 4. Improve diagnostics by distinguishing between expected and unexpected failures, using strerror() to report errors. This ensures the test behaves correctly across a wider range of kernel versions and configurations, while preserving the intended behavior on kernels that support UFFD-WP. [liwang@redhat.com: fail the test if sys_userfaultfd() fails, per David] Link: https://lkml.kernel.org/r/20250625004645.400520-1-liwang@redhat.com Link: https://lkml.kernel.org/r/20250624042411.395285-1-liwang@redhat.com Signed-off-by: Li Wang Suggested-by: David Hildenbrand Acked-by: David Hildenbrand Cc: Aruna Ramakrishna Cc: Bagas Sanjaya Cc: Catalin Marinas Cc: Dave Hansen Cc: Joey Gouly Cc: Johannes Weiner Cc: Keith Lucas Cc: Ryan Roberts Cc: Shuah Khan Cc: Axel Rasmussen Cc: Li Wang Cc: Mike Rapoport Cc: Nadav Amit Cc: Peter Xu Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/ksm_functional_tests.c | 28 +++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c index b61803e36d1c..d8bd1911dfc0 100644 --- a/tools/testing/selftests/mm/ksm_functional_tests.c +++ b/tools/testing/selftests/mm/ksm_functional_tests.c @@ -393,9 +393,13 @@ static void test_unmerge_uffd_wp(void) /* See if UFFD-WP is around. */ uffdio_api.api = UFFD_API; - uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP; + uffdio_api.features = 0; if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) { - ksft_test_result_fail("UFFDIO_API failed\n"); + if (errno == EINVAL) + ksft_test_result_skip("The API version requested is not supported\n"); + else + ksft_test_result_fail("UFFDIO_API failed: %s\n", strerror(errno)); + goto close_uffd; } if (!(uffdio_api.features & UFFD_FEATURE_PAGEFAULT_FLAG_WP)) { @@ -403,6 +407,26 @@ static void test_unmerge_uffd_wp(void) goto close_uffd; } + /* + * UFFDIO_API must only be called once to enable features. + * So we close the old userfaultfd and create a new one to + * actually enable UFFD_FEATURE_PAGEFAULT_FLAG_WP. + */ + close(uffd); + uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + if (uffd < 0) { + ksft_test_result_fail("__NR_userfaultfd failed\n"); + goto unmap; + } + + /* Now, enable it ("two-step handshake") */ + uffdio_api.api = UFFD_API; + uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP; + if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) { + ksft_test_result_fail("UFFDIO_API failed: %s\n", strerror(errno)); + goto close_uffd; + } + /* Register UFFD-WP, no need for an actual handler. */ if (uffd_register(uffd, map, size, false, true, false)) { ksft_test_result_fail("UFFDIO_REGISTER_MODE_WP failed\n"); -- cgit v1.2.3 From fdc3bc3497946c6b416a17628907581657102e60 Mon Sep 17 00:00:00 2001 From: Li Wang Date: Tue, 24 Jun 2025 11:27:48 +0800 Subject: ksm_tests: skip hugepage test when Transparent Hugepages are disabled Some systems (e.g. minimal or real-time kernels) may not enable Transparent Hugepages (THP), causing MADV_HUGEPAGE to return EINVAL. This patch introduces a runtime check using the existing THP sysfs interface and skips the hugepage merging test (`-H`) when THP is not available. To avoid those failures: # ----------------------------- # running ./ksm_tests -H -s 100 # ----------------------------- # ksm_tests: MADV_HUGEPAGE: Invalid argument # [FAIL] not ok 1 ksm_tests -H -s 100 # exit=2 # -------------------- # running ./khugepaged # -------------------- # Reading PMD pagesize failed# [FAIL] not ok 1 khugepaged # exit=1 # -------------------- # running ./soft-dirty # -------------------- # TAP version 13 # 1..15 # ok 1 Test test_simple # ok 2 Test test_vma_reuse dirty bit of allocated page # ok 3 Test test_vma_reuse dirty bit of reused address page # Bail out! Reading PMD pagesize failed# Planned tests != run tests (15 != 3) # # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:0 error:0 # [FAIL] not ok 1 soft-dirty # exit=1 # SUMMARY: PASS=0 SKIP=0 FAIL=1 # ------------------- # running ./migration # ------------------- # TAP version 13 # 1..3 # # Starting 3 tests from 1 test cases. # # RUN migration.private_anon ... # # OK migration.private_anon # ok 1 migration.private_anon # # RUN migration.shared_anon ... # # OK migration.shared_anon # ok 2 migration.shared_anon # # RUN migration.private_anon_thp ... # # migration.c:196:private_anon_thp:Expected madvise(ptr, TWOMEG, MADV_HUGEPAGE) (-1) == 0 (0) # # private_anon_thp: Test terminated by assertion # # FAIL migration.private_anon_thp # not ok 3 migration.private_anon_thp # # FAILED: 2 / 3 tests passed. # # Totals: pass:2 fail:1 xfail:0 xpass:0 skip:0 error:0 # [FAIL] not ok 1 migration # exit=1 It's true that CONFIG_TRANSPARENT_HUGEPAGE=y is explicitly enabled in tools/testing/selftests/mm/config, so ideally the runtime environment should also support THP. However, in practice, we've found that on some systems: - THP is disabled at boot time (transparent_hugepage=never) - Or manually disabled via sysfs - Or unavailable in RT kernels, containers, or minimal CI environments In these cases, the test will fail with EINVAL on madvise(MADV_HUGEPAGE), even though the kernel config is correct. To make the test suite more robust and avoid false negatives, this patch adds a runtime check for /sys/kernel/mm/transparent_hugepage/enabled. If THP is not available, the hugepage test (-H) is skipped with a clear message. Link: https://lkml.kernel.org/r/20250624032748.393836-1-liwang@redhat.com Signed-off-by: Li Wang Cc: Aruna Ramakrishna Cc: Bagas Sanjaya Cc: Catalin Marinas Cc: Dave Hansen Cc: David Hildenbrand Cc: Joey Gouly Cc: Johannes Weiner Cc: Keith Lucas Cc: Ryan Roberts Cc: Shuah Khan Cc: Lorenzo Stoakes Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/khugepaged.c | 5 +++++ tools/testing/selftests/mm/ksm_tests.c | 6 ++++++ tools/testing/selftests/mm/migration.c | 8 ++++++++ tools/testing/selftests/mm/soft-dirty.c | 9 ++++++++- tools/testing/selftests/mm/thp_settings.c | 11 +++++++++++ tools/testing/selftests/mm/thp_settings.h | 2 ++ 6 files changed, 40 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c index 4341ce6b3b38..a18c50d51141 100644 --- a/tools/testing/selftests/mm/khugepaged.c +++ b/tools/testing/selftests/mm/khugepaged.c @@ -1188,6 +1188,11 @@ int main(int argc, char **argv) .read_ahead_kb = 0, }; + if (!thp_is_enabled()) { + printf("Transparent Hugepages not available\n"); + return KSFT_SKIP; + } + parse_test_type(argc, argv); setbuf(stdout, NULL); diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c index e80deac1436b..b77462b5c240 100644 --- a/tools/testing/selftests/mm/ksm_tests.c +++ b/tools/testing/selftests/mm/ksm_tests.c @@ -15,6 +15,7 @@ #include "../kselftest.h" #include #include "vm_util.h" +#include "thp_settings.h" #define KSM_SYSFS_PATH "/sys/kernel/mm/ksm/" #define KSM_FP(s) (KSM_SYSFS_PATH s) @@ -527,6 +528,11 @@ static int ksm_merge_hugepages_time(int merge_type, int mapping, int prot, unsigned long scan_time_ns; int pagemap_fd, n_normal_pages, n_huge_pages; + if (!thp_is_enabled()) { + printf("Transparent Hugepages not available\n"); + return KSFT_SKIP; + } + map_size *= MB; size_t len = map_size; diff --git a/tools/testing/selftests/mm/migration.c b/tools/testing/selftests/mm/migration.c index 1e3a595fbf01..a306f8bab087 100644 --- a/tools/testing/selftests/mm/migration.c +++ b/tools/testing/selftests/mm/migration.c @@ -5,6 +5,8 @@ */ #include "../kselftest_harness.h" +#include "thp_settings.h" + #include #include #include @@ -185,6 +187,9 @@ TEST_F_TIMEOUT(migration, private_anon_thp, 2*RUNTIME) uint64_t *ptr; int i; + if (!thp_is_enabled()) + SKIP(return, "Transparent Hugepages not available"); + if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0) SKIP(return, "Not enough threads or NUMA nodes available"); @@ -214,6 +219,9 @@ TEST_F_TIMEOUT(migration, shared_anon_thp, 2*RUNTIME) uint64_t *ptr; int i; + if (!thp_is_enabled()) + SKIP(return, "Transparent Hugepages not available"); + if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0) SKIP(return, "Not enough threads or NUMA nodes available"); diff --git a/tools/testing/selftests/mm/soft-dirty.c b/tools/testing/selftests/mm/soft-dirty.c index 8e1462ce0532..8a3f2b4b2186 100644 --- a/tools/testing/selftests/mm/soft-dirty.c +++ b/tools/testing/selftests/mm/soft-dirty.c @@ -6,8 +6,10 @@ #include #include #include + #include "../kselftest.h" #include "vm_util.h" +#include "thp_settings.h" #define PAGEMAP_FILE_PATH "/proc/self/pagemap" #define TEST_ITERATIONS 10000 @@ -78,8 +80,13 @@ static void test_hugepage(int pagemap_fd, int pagesize) { char *map; int i, ret; - size_t hpage_len = read_pmd_pagesize(); + if (!thp_is_enabled()) { + ksft_test_result_skip("Transparent Hugepages not available\n"); + return; + } + + size_t hpage_len = read_pmd_pagesize(); if (!hpage_len) ksft_exit_fail_msg("Reading PMD pagesize failed"); diff --git a/tools/testing/selftests/mm/thp_settings.c b/tools/testing/selftests/mm/thp_settings.c index ad872af1c81a..bad60ac52874 100644 --- a/tools/testing/selftests/mm/thp_settings.c +++ b/tools/testing/selftests/mm/thp_settings.c @@ -381,3 +381,14 @@ unsigned long thp_shmem_supported_orders(void) { return __thp_supported_orders(true); } + +bool thp_is_enabled(void) +{ + if (access(THP_SYSFS, F_OK) != 0) + return false; + + int mode = thp_read_string("enabled", thp_enabled_strings); + + /* THP is considered enabled if it's either "always" or "madvise" */ + return mode == 1 || mode == 3; +} diff --git a/tools/testing/selftests/mm/thp_settings.h b/tools/testing/selftests/mm/thp_settings.h index fc131d23d593..6c07f70beee9 100644 --- a/tools/testing/selftests/mm/thp_settings.h +++ b/tools/testing/selftests/mm/thp_settings.h @@ -84,4 +84,6 @@ void thp_set_read_ahead_path(char *path); unsigned long thp_supported_orders(void); unsigned long thp_shmem_supported_orders(void); +bool thp_is_enabled(void); + #endif /* __THP_SETTINGS_H__ */ -- cgit v1.2.3 From 1f04e0e65209be3148a20b4b370e01d02b7ac445 Mon Sep 17 00:00:00 2001 From: Moon Hee Lee Date: Mon, 23 Jun 2025 11:34:06 -0700 Subject: selftests: ptrace: add set_syscall_info to .gitignore Add the set_syscall_info test binary to .gitignore to avoid tracking build artifacts in the ptrace selftests directory. Link: https://lkml.kernel.org/r/20250623183405.133434-2-moonhee.lee.ca@gmail.com Signed-off-by: Moon Hee Lee Cc: "Dmitry V. Levin" Cc: Oleg Nesterov Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/ptrace/.gitignore | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/ptrace/.gitignore b/tools/testing/selftests/ptrace/.gitignore index b7dde152e75a..f6be8efd57ea 100644 --- a/tools/testing/selftests/ptrace/.gitignore +++ b/tools/testing/selftests/ptrace/.gitignore @@ -3,3 +3,4 @@ get_syscall_info get_set_sud peeksiginfo vmaccess +set_syscall_info -- cgit v1.2.3 From 81bf24f1ac77029bf858c0da081088eb62b1b230 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 10 Jul 2025 12:12:20 +0100 Subject: KVM: selftests: Add CONFIG_EVENTFD for irqfd selftest In 7e9b231c402a ("KVM: selftests: Add a KVM_IRQFD test to verify uniqueness requirements") we added a test for the newly added irqfd support but since this feature works with eventfds it won't work unless the kernel has been built wth eventfd support. Add CONFIG_EVENTFD to the list of required options for the KVM selftests. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250710-kvm-selftests-eventfd-config-v1-1-78c276e4b80f@kernel.org Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/config | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/config b/tools/testing/selftests/kvm/config index 8835fed09e9f..96d874b239eb 100644 --- a/tools/testing/selftests/kvm/config +++ b/tools/testing/selftests/kvm/config @@ -1,5 +1,6 @@ CONFIG_KVM=y CONFIG_KVM_INTEL=y CONFIG_KVM_AMD=y +CONFIG_EVENTFD=y CONFIG_USERFAULTFD=y CONFIG_IDLE_PAGE_TRACKING=y -- cgit v1.2.3 From c85a8cb9b8d3a3dd9bb12879b28fc377dd24272b Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 9 Jul 2025 11:08:49 +0200 Subject: selftests/hid: run ruff format on the python part We aim at syncing with the hid-tools repo on gitlab.freedesktop.org/libevdev/hid-tools. One of the commits is this mechanical formatting, so pull it over here so changes are not hidden by those. Reviewed-by: Peter Hutterer Link: https://patch.msgid.link/20250709-wip-fix-ci-v1-1-b7df4c271cf8@kernel.org Signed-off-by: Benjamin Tissoires --- tools/testing/selftests/hid/tests/test_tablet.py | 4 +- .../selftests/hid/tests/test_wacom_generic.py | 436 +++++++++++++++------ 2 files changed, 325 insertions(+), 115 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/hid/tests/test_tablet.py b/tools/testing/selftests/hid/tests/test_tablet.py index a9e2de1e8861..52fb22cac91e 100644 --- a/tools/testing/selftests/hid/tests/test_tablet.py +++ b/tools/testing/selftests/hid/tests/test_tablet.py @@ -1228,9 +1228,9 @@ class Huion_Kamvas_Pro_19_256c_006b(PenDigitizer): pen.current_state = state def call_input_event(self, report): - if report[0] == 0x0a: + if report[0] == 0x0A: # ensures the original second Eraser usage is null - report[1] &= 0xdf + report[1] &= 0xDF # ensures the original last bit is equal to bit 6 (In Range) if report[1] & 0x40: diff --git a/tools/testing/selftests/hid/tests/test_wacom_generic.py b/tools/testing/selftests/hid/tests/test_wacom_generic.py index b62c7dba6777..5cbc0cc9308f 100644 --- a/tools/testing/selftests/hid/tests/test_wacom_generic.py +++ b/tools/testing/selftests/hid/tests/test_wacom_generic.py @@ -892,7 +892,7 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest locations. The value of `t` may be incremented over time to move the points along a linear path. """ - return [ self.make_contact(id, t) for id in range(0, n) ] + return [self.make_contact(id, t) for id in range(0, n)] def assert_contact(self, uhdev, evdev, contact_ids, t=0): """ @@ -997,12 +997,17 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events - self.assert_contacts(uhdev, evdev, - [ self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = None), - self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), - self.ContactIds(contact_id = 2, tracking_id = -1, slot_num = None), - self.ContactIds(contact_id = 3, tracking_id = 1, slot_num = 1), - self.ContactIds(contact_id = 4, tracking_id = -1, slot_num = None) ]) + self.assert_contacts( + uhdev, + evdev, + [ + self.ContactIds(contact_id=0, tracking_id=-1, slot_num=None), + self.ContactIds(contact_id=1, tracking_id=0, slot_num=0), + self.ContactIds(contact_id=2, tracking_id=-1, slot_num=None), + self.ContactIds(contact_id=3, tracking_id=1, slot_num=1), + self.ContactIds(contact_id=4, tracking_id=-1, slot_num=None), + ], + ) def confidence_change_assert_playback(self, uhdev, evdev, timeline): """ @@ -1026,7 +1031,7 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest events = uhdev.next_sync_events() self.debug_reports(r, uhdev, events) - ids = [ x[0] for x in state ] + ids = [x[0] for x in state] self.assert_contacts(uhdev, evdev, ids, t) t += 1 @@ -1044,27 +1049,68 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest uhdev = self.uhdev evdev = uhdev.get_evdev() - self.confidence_change_assert_playback(uhdev, evdev, [ - # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident - # Both fingers confidently in contact - [(self.ContactIds(contact_id = 0, tracking_id = 0, slot_num = 0), True, True), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], - - # t=1: Contact 0 == !Down + confident; Contact 1 == Down + confident - # First finger looses confidence and clears only the tipswitch flag - [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, True), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], - - # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident - # First finger has lost confidence and has both flags cleared - [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], - - # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident - # First finger has lost confidence and has both flags cleared - [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)] - ]) + self.confidence_change_assert_playback( + uhdev, + evdev, + [ + # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident + # Both fingers confidently in contact + [ + ( + self.ContactIds(contact_id=0, tracking_id=0, slot_num=0), + True, + True, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + # t=1: Contact 0 == !Down + confident; Contact 1 == Down + confident + # First finger looses confidence and clears only the tipswitch flag + [ + ( + self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0), + False, + True, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident + # First finger has lost confidence and has both flags cleared + [ + ( + self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0), + False, + False, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident + # First finger has lost confidence and has both flags cleared + [ + ( + self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0), + False, + False, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + ], + ) def test_confidence_loss_b(self): """ @@ -1079,27 +1125,68 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest uhdev = self.uhdev evdev = uhdev.get_evdev() - self.confidence_change_assert_playback(uhdev, evdev, [ - # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident - # Both fingers confidently in contact - [(self.ContactIds(contact_id = 0, tracking_id = 0, slot_num = 0), True, True), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], - - # t=1: Contact 0 == !Down + !confident; Contact 1 == Down + confident - # First finger looses confidence and has both flags cleared simultaneously - [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], - - # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident - # First finger has lost confidence and has both flags cleared - [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], - - # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident - # First finger has lost confidence and has both flags cleared - [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)] - ]) + self.confidence_change_assert_playback( + uhdev, + evdev, + [ + # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident + # Both fingers confidently in contact + [ + ( + self.ContactIds(contact_id=0, tracking_id=0, slot_num=0), + True, + True, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + # t=1: Contact 0 == !Down + !confident; Contact 1 == Down + confident + # First finger looses confidence and has both flags cleared simultaneously + [ + ( + self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0), + False, + False, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident + # First finger has lost confidence and has both flags cleared + [ + ( + self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0), + False, + False, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident + # First finger has lost confidence and has both flags cleared + [ + ( + self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0), + False, + False, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + ], + ) def test_confidence_loss_c(self): """ @@ -1113,27 +1200,68 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest uhdev = self.uhdev evdev = uhdev.get_evdev() - self.confidence_change_assert_playback(uhdev, evdev, [ - # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident - # Both fingers confidently in contact - [(self.ContactIds(contact_id = 0, tracking_id = 0, slot_num = 0), True, True), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], - - # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident - # First finger looses confidence and clears only the confidence flag - [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), True, False), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], - - # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident - # First finger has lost confidence and has both flags cleared - [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], - - # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident - # First finger has lost confidence and has both flags cleared - [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)] - ]) + self.confidence_change_assert_playback( + uhdev, + evdev, + [ + # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident + # Both fingers confidently in contact + [ + ( + self.ContactIds(contact_id=0, tracking_id=0, slot_num=0), + True, + True, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident + # First finger looses confidence and clears only the confidence flag + [ + ( + self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0), + True, + False, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident + # First finger has lost confidence and has both flags cleared + [ + ( + self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0), + False, + False, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident + # First finger has lost confidence and has both flags cleared + [ + ( + self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0), + False, + False, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + ], + ) def test_confidence_gain_a(self): """ @@ -1144,27 +1272,68 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest uhdev = self.uhdev evdev = uhdev.get_evdev() - self.confidence_change_assert_playback(uhdev, evdev, [ - # t=0: Contact 0 == Down + !confident; Contact 1 == Down + confident - # Only second finger is confidently in contact - [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = None), True, False), - (self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), True, True)], - - # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident - # First finger gains confidence - [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = None), True, False), - (self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), True, True)], - - # t=2: Contact 0 == Down + confident; Contact 1 == Down + confident - # First finger remains confident - [(self.ContactIds(contact_id = 0, tracking_id = 1, slot_num = 1), True, True), - (self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), True, True)], - - # t=3: Contact 0 == Down + confident; Contact 1 == Down + confident - # First finger remains confident - [(self.ContactIds(contact_id = 0, tracking_id = 1, slot_num = 1), True, True), - (self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), True, True)] - ]) + self.confidence_change_assert_playback( + uhdev, + evdev, + [ + # t=0: Contact 0 == Down + !confident; Contact 1 == Down + confident + # Only second finger is confidently in contact + [ + ( + self.ContactIds(contact_id=0, tracking_id=-1, slot_num=None), + True, + False, + ), + ( + self.ContactIds(contact_id=1, tracking_id=0, slot_num=0), + True, + True, + ), + ], + # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident + # First finger gains confidence + [ + ( + self.ContactIds(contact_id=0, tracking_id=-1, slot_num=None), + True, + False, + ), + ( + self.ContactIds(contact_id=1, tracking_id=0, slot_num=0), + True, + True, + ), + ], + # t=2: Contact 0 == Down + confident; Contact 1 == Down + confident + # First finger remains confident + [ + ( + self.ContactIds(contact_id=0, tracking_id=1, slot_num=1), + True, + True, + ), + ( + self.ContactIds(contact_id=1, tracking_id=0, slot_num=0), + True, + True, + ), + ], + # t=3: Contact 0 == Down + confident; Contact 1 == Down + confident + # First finger remains confident + [ + ( + self.ContactIds(contact_id=0, tracking_id=1, slot_num=1), + True, + True, + ), + ( + self.ContactIds(contact_id=1, tracking_id=0, slot_num=0), + True, + True, + ), + ], + ], + ) def test_confidence_gain_b(self): """ @@ -1175,24 +1344,65 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest uhdev = self.uhdev evdev = uhdev.get_evdev() - self.confidence_change_assert_playback(uhdev, evdev, [ - # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident - # First and second finger confidently in contact - [(self.ContactIds(contact_id = 0, tracking_id = 0, slot_num = 0), True, True), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], - - # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident - # Firtst finger looses confidence - [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), True, False), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], - - # t=2: Contact 0 == Down + confident; Contact 1 == Down + confident - # First finger gains confidence - [(self.ContactIds(contact_id = 0, tracking_id = 2, slot_num = 0), True, True), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], - - # t=3: Contact 0 == !Down + confident; Contact 1 == Down + confident - # First finger goes up - [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, True), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)] - ]) + self.confidence_change_assert_playback( + uhdev, + evdev, + [ + # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident + # First and second finger confidently in contact + [ + ( + self.ContactIds(contact_id=0, tracking_id=0, slot_num=0), + True, + True, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident + # Firtst finger looses confidence + [ + ( + self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0), + True, + False, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + # t=2: Contact 0 == Down + confident; Contact 1 == Down + confident + # First finger gains confidence + [ + ( + self.ContactIds(contact_id=0, tracking_id=2, slot_num=0), + True, + True, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + # t=3: Contact 0 == !Down + confident; Contact 1 == Down + confident + # First finger goes up + [ + ( + self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0), + False, + True, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + ], + ) -- cgit v1.2.3 From 642f9b2d608cc2239d22957ca1dc557d07470b50 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 9 Jul 2025 11:08:50 +0200 Subject: selftests/hid: sync the python tests to hid-tools 0.8 Instead of backporting one by one each commits, let's pull them in bulk and refer to the hid-tools project for a detailed history. The short summary is: - make use of dataclass when possible, to avoid tuples - wacom: remove unused uhdev parameter - various small fixes not worth mentioning Reviewed-by: Peter Hutterer Link: https://patch.msgid.link/20250709-wip-fix-ci-v1-2-b7df4c271cf8@kernel.org Signed-off-by: Benjamin Tissoires --- tools/testing/selftests/hid/tests/base.py | 46 ++++++++++++++-------- tools/testing/selftests/hid/tests/base_device.py | 30 ++++++++------ .../selftests/hid/tests/test_apple_keyboard.py | 3 +- tools/testing/selftests/hid/tests/test_gamepad.py | 3 +- .../selftests/hid/tests/test_ite_keyboard.py | 3 +- .../testing/selftests/hid/tests/test_multitouch.py | 2 +- tools/testing/selftests/hid/tests/test_sony.py | 7 ++-- tools/testing/selftests/hid/tests/test_tablet.py | 7 ++-- .../selftests/hid/tests/test_wacom_generic.py | 11 +++--- 9 files changed, 69 insertions(+), 43 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/hid/tests/base.py b/tools/testing/selftests/hid/tests/base.py index 3a465768e507..5175cf235b2f 100644 --- a/tools/testing/selftests/hid/tests/base.py +++ b/tools/testing/selftests/hid/tests/base.py @@ -5,6 +5,7 @@ # Copyright (c) 2017 Benjamin Tissoires # Copyright (c) 2017 Red Hat, Inc. +import dataclasses import libevdev import os import pytest @@ -145,6 +146,18 @@ class UHIDTestDevice(BaseDevice): self.name = name +@dataclasses.dataclass +class HidBpf: + object_name: str + has_rdesc_fixup: bool + + +@dataclasses.dataclass +class KernelModule: + driver_name: str + module_name: str + + class BaseTestCase: class TestUhid(object): syn_event = libevdev.InputEvent(libevdev.EV_SYN.SYN_REPORT) # type: ignore @@ -155,20 +168,20 @@ class BaseTestCase: # List of kernel modules to load before starting the test # if any module is not available (not compiled), the test will skip. - # Each element is a tuple '(kernel driver name, kernel module)', - # for example ("playstation", "hid-playstation") - kernel_modules: List[Tuple[str, str]] = [] + # Each element is a KernelModule object, for example + # KernelModule("playstation", "hid-playstation") + kernel_modules: List[KernelModule] = [] # List of in kernel HID-BPF object files to load # before starting the test # Any existing pre-loaded HID-BPF module will be removed # before the ones in this list will be manually loaded. - # Each Element is a tuple '(hid_bpf_object, rdesc_fixup_present)', - # for example '("xppen-ArtistPro16Gen2.bpf.o", True)' - # If 'rdesc_fixup_present' is True, the test needs to wait + # Each Element is a HidBpf object, for example + # 'HidBpf("xppen-ArtistPro16Gen2.bpf.o", True)' + # If 'has_rdesc_fixup' is True, the test needs to wait # for one unbind and rebind before it can be sure the kernel is # ready - hid_bpfs: List[Tuple[str, bool]] = [] + hid_bpfs: List[HidBpf] = [] def assertInputEventsIn(self, expected_events, effective_events): effective_events = effective_events.copy() @@ -232,25 +245,26 @@ class BaseTestCase: @pytest.fixture() def load_kernel_module(self): - for kernel_driver, kernel_module in self.kernel_modules: - self._load_kernel_module(kernel_driver, kernel_module) + for k in self.kernel_modules: + self._load_kernel_module(k.driver_name, k.module_name) yield def load_hid_bpfs(self): + # this function will only work when run in the kernel tree script_dir = Path(os.path.dirname(os.path.realpath(__file__))) root_dir = (script_dir / "../../../../..").resolve() bpf_dir = root_dir / "drivers/hid/bpf/progs" + if not bpf_dir.exists(): + pytest.skip("looks like we are not in the kernel tree, skipping") + udev_hid_bpf = shutil.which("udev-hid-bpf") if not udev_hid_bpf: pytest.skip("udev-hid-bpf not found in $PATH, skipping") - wait = False - for _, rdesc_fixup in self.hid_bpfs: - if rdesc_fixup: - wait = True + wait = any(b.has_rdesc_fixup for b in self.hid_bpfs) - for hid_bpf, _ in self.hid_bpfs: + for hid_bpf in self.hid_bpfs: # We need to start `udev-hid-bpf` in the background # and dispatch uhid events in case the kernel needs # to fetch features on the device @@ -260,13 +274,13 @@ class BaseTestCase: "--verbose", "add", str(self.uhdev.sys_path), - str(bpf_dir / hid_bpf), + str(bpf_dir / hid_bpf.object_name), ], ) while process.poll() is None: self.uhdev.dispatch(1) - if process.poll() != 0: + if process.returncode != 0: pytest.fail( f"Couldn't insert hid-bpf program '{hid_bpf}', marking the test as failed" ) diff --git a/tools/testing/selftests/hid/tests/base_device.py b/tools/testing/selftests/hid/tests/base_device.py index e0515be97f83..e13035fe1deb 100644 --- a/tools/testing/selftests/hid/tests/base_device.py +++ b/tools/testing/selftests/hid/tests/base_device.py @@ -18,6 +18,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +import dataclasses import fcntl import functools import libevdev @@ -104,6 +105,12 @@ class PowerSupply(object): return self._type.str_value +@dataclasses.dataclass +class HidReadiness: + is_ready: bool = False + count: int = 0 + + class HIDIsReady(object): """ Companion class that binds to a kernel mechanism @@ -115,18 +122,18 @@ class HIDIsReady(object): def __init__(self: "HIDIsReady", uhid: UHIDDevice) -> None: self.uhid = uhid - def is_ready(self: "HIDIsReady") -> bool: + def is_ready(self: "HIDIsReady") -> HidReadiness: """ Overwrite in subclasses: should return True or False whether the attached uhid device is ready or not. """ - return False + return HidReadiness() class UdevHIDIsReady(HIDIsReady): _pyudev_context: ClassVar[Optional[pyudev.Context]] = None _pyudev_monitor: ClassVar[Optional[pyudev.Monitor]] = None - _uhid_devices: ClassVar[Dict[int, Tuple[bool, int]]] = {} + _uhid_devices: ClassVar[Dict[int, HidReadiness]] = {} def __init__(self: "UdevHIDIsReady", uhid: UHIDDevice) -> None: super().__init__(uhid) @@ -157,18 +164,19 @@ class UdevHIDIsReady(HIDIsReady): id = int(event.sys_path.strip().split(".")[-1], 16) - device_ready, count = cls._uhid_devices.get(id, (False, 0)) + readiness = cls._uhid_devices.setdefault(id, HidReadiness()) ready = event.action == "bind" - if not device_ready and ready: - count += 1 - cls._uhid_devices[id] = (ready, count) + if not readiness.is_ready and ready: + readiness.count += 1 + + readiness.is_ready = ready - def is_ready(self: "UdevHIDIsReady") -> Tuple[bool, int]: + def is_ready(self: "UdevHIDIsReady") -> HidReadiness: try: return self._uhid_devices[self.uhid.hid_id] except KeyError: - return (False, 0) + return HidReadiness() class EvdevMatch(object): @@ -322,11 +330,11 @@ class BaseDevice(UHIDDevice): @property def kernel_is_ready(self: "BaseDevice") -> bool: - return self._kernel_is_ready.is_ready()[0] and self.started + return self._kernel_is_ready.is_ready().is_ready and self.started @property def kernel_ready_count(self: "BaseDevice") -> int: - return self._kernel_is_ready.is_ready()[1] + return self._kernel_is_ready.is_ready().count @property def input_nodes(self: "BaseDevice") -> List[EvdevDevice]: diff --git a/tools/testing/selftests/hid/tests/test_apple_keyboard.py b/tools/testing/selftests/hid/tests/test_apple_keyboard.py index f81071d46166..0e17588b945c 100644 --- a/tools/testing/selftests/hid/tests/test_apple_keyboard.py +++ b/tools/testing/selftests/hid/tests/test_apple_keyboard.py @@ -8,13 +8,14 @@ from .test_keyboard import ArrayKeyboard, TestArrayKeyboard from hidtools.util import BusType +from . import base import libevdev import logging logger = logging.getLogger("hidtools.test.apple-keyboard") -KERNEL_MODULE = ("apple", "hid-apple") +KERNEL_MODULE = base.KernelModule("apple", "hid-apple") class KbdData(object): diff --git a/tools/testing/selftests/hid/tests/test_gamepad.py b/tools/testing/selftests/hid/tests/test_gamepad.py index 8d5b5ffdae49..612197805931 100644 --- a/tools/testing/selftests/hid/tests/test_gamepad.py +++ b/tools/testing/selftests/hid/tests/test_gamepad.py @@ -12,6 +12,7 @@ import pytest from .base_gamepad import BaseGamepad, JoystickGamepad, AxisMapping from hidtools.util import BusType +from .base import HidBpf import logging @@ -654,7 +655,7 @@ class TestAsusGamepad(BaseTest.TestGamepad): class TestRaptorMach2Joystick(BaseTest.TestGamepad): - hid_bpfs = [("FR-TEC__Raptor-Mach-2.bpf.o", True)] + hid_bpfs = [HidBpf("FR-TEC__Raptor-Mach-2.bpf.o", True)] def create_device(self): return RaptorMach2Joystick( diff --git a/tools/testing/selftests/hid/tests/test_ite_keyboard.py b/tools/testing/selftests/hid/tests/test_ite_keyboard.py index 38550c167bae..f695eaad1648 100644 --- a/tools/testing/selftests/hid/tests/test_ite_keyboard.py +++ b/tools/testing/selftests/hid/tests/test_ite_keyboard.py @@ -11,10 +11,11 @@ from hidtools.util import BusType import libevdev import logging +from . import base logger = logging.getLogger("hidtools.test.ite-keyboard") -KERNEL_MODULE = ("itetech", "hid_ite") +KERNEL_MODULE = base.KernelModule("itetech", "hid_ite") class KbdData(object): diff --git a/tools/testing/selftests/hid/tests/test_multitouch.py b/tools/testing/selftests/hid/tests/test_multitouch.py index 4265012231c6..5d2ffa3d5977 100644 --- a/tools/testing/selftests/hid/tests/test_multitouch.py +++ b/tools/testing/selftests/hid/tests/test_multitouch.py @@ -17,7 +17,7 @@ import time logger = logging.getLogger("hidtools.test.multitouch") -KERNEL_MODULE = ("hid-multitouch", "hid_multitouch") +KERNEL_MODULE = base.KernelModule("hid-multitouch", "hid_multitouch") def BIT(x): diff --git a/tools/testing/selftests/hid/tests/test_sony.py b/tools/testing/selftests/hid/tests/test_sony.py index 7e52c28e59c5..7fd3a8e6137d 100644 --- a/tools/testing/selftests/hid/tests/test_sony.py +++ b/tools/testing/selftests/hid/tests/test_sony.py @@ -7,6 +7,7 @@ # from .base import application_matches +from .base import KernelModule from .test_gamepad import BaseTest from hidtools.device.sony_gamepad import ( PS3Controller, @@ -24,9 +25,9 @@ import pytest logger = logging.getLogger("hidtools.test.sony") -PS3_MODULE = ("sony", "hid_sony") -PS4_MODULE = ("playstation", "hid_playstation") -PS5_MODULE = ("playstation", "hid_playstation") +PS3_MODULE = KernelModule("sony", "hid_sony") +PS4_MODULE = KernelModule("playstation", "hid_playstation") +PS5_MODULE = KernelModule("playstation", "hid_playstation") class SonyBaseTest: diff --git a/tools/testing/selftests/hid/tests/test_tablet.py b/tools/testing/selftests/hid/tests/test_tablet.py index 52fb22cac91e..50d5699812bb 100644 --- a/tools/testing/selftests/hid/tests/test_tablet.py +++ b/tools/testing/selftests/hid/tests/test_tablet.py @@ -10,6 +10,7 @@ from . import base import copy from enum import Enum from hidtools.util import BusType +from .base import HidBpf import libevdev import logging import pytest @@ -1472,7 +1473,7 @@ class TestGoodix_27c6_0e00(BaseTest.TestTablet): class TestXPPen_ArtistPro16Gen2_28bd_095b(BaseTest.TestTablet): - hid_bpfs = [("XPPen__ArtistPro16Gen2.bpf.o", True)] + hid_bpfs = [HidBpf("XPPen__ArtistPro16Gen2.bpf.o", True)] def create_device(self): dev = XPPen_ArtistPro16Gen2_28bd_095b( @@ -1484,7 +1485,7 @@ class TestXPPen_ArtistPro16Gen2_28bd_095b(BaseTest.TestTablet): class TestXPPen_Artist24_28bd_093a(BaseTest.TestTablet): - hid_bpfs = [("XPPen__Artist24.bpf.o", True)] + hid_bpfs = [HidBpf("XPPen__Artist24.bpf.o", True)] def create_device(self): return XPPen_Artist24_28bd_093a( @@ -1495,7 +1496,7 @@ class TestXPPen_Artist24_28bd_093a(BaseTest.TestTablet): class TestHuion_Kamvas_Pro_19_256c_006b(BaseTest.TestTablet): - hid_bpfs = [("Huion__Kamvas-Pro-19.bpf.o", True)] + hid_bpfs = [HidBpf("Huion__Kamvas-Pro-19.bpf.o", True)] def create_device(self): return Huion_Kamvas_Pro_19_256c_006b( diff --git a/tools/testing/selftests/hid/tests/test_wacom_generic.py b/tools/testing/selftests/hid/tests/test_wacom_generic.py index 5cbc0cc9308f..2d6d04f0ff80 100644 --- a/tools/testing/selftests/hid/tests/test_wacom_generic.py +++ b/tools/testing/selftests/hid/tests/test_wacom_generic.py @@ -40,7 +40,7 @@ import logging logger = logging.getLogger("hidtools.test.wacom") -KERNEL_MODULE = ("wacom", "wacom") +KERNEL_MODULE = base.KernelModule("wacom", "wacom") class ProximityState(Enum): @@ -894,7 +894,7 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest """ return [self.make_contact(id, t) for id in range(0, n)] - def assert_contact(self, uhdev, evdev, contact_ids, t=0): + def assert_contact(self, evdev, contact_ids, t=0): """ Assert properties of a contact generated by make_contact. """ @@ -916,12 +916,12 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest assert evdev.slots[slot_num][libevdev.EV_ABS.ABS_MT_POSITION_X] == x assert evdev.slots[slot_num][libevdev.EV_ABS.ABS_MT_POSITION_Y] == y - def assert_contacts(self, uhdev, evdev, data, t=0): + def assert_contacts(self, evdev, data, t=0): """ Assert properties of a list of contacts generated by make_contacts. """ for contact_ids in data: - self.assert_contact(uhdev, evdev, contact_ids, t) + self.assert_contact(evdev, contact_ids, t) def test_contact_id_0(self): """ @@ -998,7 +998,6 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events self.assert_contacts( - uhdev, evdev, [ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=None), @@ -1032,7 +1031,7 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest self.debug_reports(r, uhdev, events) ids = [x[0] for x in state] - self.assert_contacts(uhdev, evdev, ids, t) + self.assert_contacts(evdev, ids, t) t += 1 -- cgit v1.2.3 From 1aee3a44fad2adeaa8f1a3aafd7c0154924af666 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 9 Jul 2025 11:08:51 +0200 Subject: selftests/hid: sync python tests to hid-tools 0.10 hid-tools 0.10 fixes one inconvenience introduced by commit 6a9e76f75c1a ("HID: multitouch: Disable touchpad on firmware level while not in use") This change added a new callback when a hid-nultitouch device is opened or closed to put the underlying device into a given operating mode. However, in the test cases, that means that while the single threaded test is run, it opens the device but has to react to the device while the open() is still running. hid-tools now implements a minimal thread to circumvent this. This makes the HID kernel tests in sync with hid-tools 0.10. This has the net effect of running the full HID python testsuite in 6 minutes instead of 1 hour. Reviewed-by: Peter Hutterer Link: https://patch.msgid.link/20250709-wip-fix-ci-v1-3-b7df4c271cf8@kernel.org Signed-off-by: Benjamin Tissoires --- tools/testing/selftests/hid/tests/base_device.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/hid/tests/base_device.py b/tools/testing/selftests/hid/tests/base_device.py index e13035fe1deb..59465c58d94d 100644 --- a/tools/testing/selftests/hid/tests/base_device.py +++ b/tools/testing/selftests/hid/tests/base_device.py @@ -23,6 +23,7 @@ import fcntl import functools import libevdev import os +import threading try: import pyudev @@ -344,10 +345,28 @@ class BaseDevice(UHIDDevice): if not self.kernel_is_ready or not self.started: return [] + # Starting with kernel v6.16, an event is emitted when + # userspace opens a kernel device, and for some devices + # this translates into a SET_REPORT. + # Because EvdevDevice(path) opens every single evdev node + # we need to have a separate thread to process the incoming + # SET_REPORT or we end up having to wait for the kernel + # timeout of 5 seconds. + done = False + + def dispatch(): + while not done: + self.dispatch(1) + + t = threading.Thread(target=dispatch) + t.start() + self._input_nodes = [ EvdevDevice(path) for path in self.walk_sysfs("input", "input/input*/event*") ] + done = True + t.join() return self._input_nodes def match_evdev_rule(self, application, evdev): -- cgit v1.2.3 From 0e3e0b0c08e388cd9e05bb4d17534bd36bedc9fe Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 9 Jul 2025 22:59:01 -0700 Subject: iommufd/selftest: Add coverage for viommu data Extend the existing test_cmd/err_viommu_alloc helpers to accept optional user data. And add a TEST_F for a loopback test. Link: https://patch.msgid.link/r/8ceb64d30e9953f29270a7d341032ca439317271.1752126748.git.nicolinc@nvidia.com Reviewed-by: Pranjal Shrivastava Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- tools/testing/selftests/iommu/iommufd.c | 31 ++++++++++++++++++------ tools/testing/selftests/iommu/iommufd_fail_nth.c | 5 ++-- tools/testing/selftests/iommu/iommufd_utils.h | 21 ++++++++++------ 3 files changed, 40 insertions(+), 17 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 1a8e85afe9aa..a9dfcce5e1b2 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -2688,7 +2688,7 @@ FIXTURE_SETUP(iommufd_viommu) /* Allocate a vIOMMU taking refcount of the parent hwpt */ test_cmd_viommu_alloc(self->device_id, self->hwpt_id, - IOMMU_VIOMMU_TYPE_SELFTEST, + IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0, &self->viommu_id); /* Allocate a regular nested hwpt */ @@ -2727,24 +2727,27 @@ TEST_F(iommufd_viommu, viommu_negative_tests) if (self->device_id) { /* Negative test -- invalid hwpt (hwpt_id=0) */ test_err_viommu_alloc(ENOENT, device_id, 0, - IOMMU_VIOMMU_TYPE_SELFTEST, NULL); + IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0, + NULL); /* Negative test -- not a nesting parent hwpt */ test_cmd_hwpt_alloc(device_id, ioas_id, 0, &hwpt_id); test_err_viommu_alloc(EINVAL, device_id, hwpt_id, - IOMMU_VIOMMU_TYPE_SELFTEST, NULL); + IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0, + NULL); test_ioctl_destroy(hwpt_id); /* Negative test -- unsupported viommu type */ test_err_viommu_alloc(EOPNOTSUPP, device_id, self->hwpt_id, - 0xdead, NULL); + 0xdead, NULL, 0, NULL); EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, self->hwpt_id)); EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, self->viommu_id)); } else { test_err_viommu_alloc(ENOENT, self->device_id, self->hwpt_id, - IOMMU_VIOMMU_TYPE_SELFTEST, NULL); + IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0, + NULL); } } @@ -2791,6 +2794,21 @@ TEST_F(iommufd_viommu, viommu_alloc_nested_iopf) } } +TEST_F(iommufd_viommu, viommu_alloc_with_data) +{ + struct iommu_viommu_selftest data = { + .in_data = 0xbeef, + }; + + if (!self->device_id) + SKIP(return, "Skipping test for variant no_viommu"); + + test_cmd_viommu_alloc(self->device_id, self->hwpt_id, + IOMMU_VIOMMU_TYPE_SELFTEST, &data, sizeof(data), + &self->viommu_id); + ASSERT_EQ(data.out_data, data.in_data); +} + TEST_F(iommufd_viommu, vdevice_alloc) { uint32_t viommu_id = self->viommu_id; @@ -3105,8 +3123,7 @@ TEST_F(iommufd_device_pasid, pasid_attach) /* Allocate a regular nested hwpt based on viommu */ test_cmd_viommu_alloc(self->device_id, parent_hwpt_id, - IOMMU_VIOMMU_TYPE_SELFTEST, - &viommu_id); + IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0, &viommu_id); test_cmd_hwpt_alloc_nested(self->device_id, viommu_id, IOMMU_HWPT_ALLOC_PASID, &nested_hwpt_id[2], diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c index e11ec4b121fc..f7ccf1822108 100644 --- a/tools/testing/selftests/iommu/iommufd_fail_nth.c +++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c @@ -688,8 +688,9 @@ TEST_FAIL_NTH(basic_fail_nth, device) IOMMU_HWPT_DATA_NONE, 0, 0)) return -1; - if (_test_cmd_viommu_alloc(self->fd, idev_id, hwpt_id, - IOMMU_VIOMMU_TYPE_SELFTEST, 0, &viommu_id)) + if (_test_cmd_viommu_alloc(self->fd, idev_id, hwpt_id, 0, + IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0, + &viommu_id)) return -1; if (_test_cmd_vdevice_alloc(self->fd, viommu_id, idev_id, 0, &vdev_id)) diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 72f6636e5d90..a5d4cbd089ba 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -897,7 +897,8 @@ static int _test_cmd_trigger_iopf(int fd, __u32 device_id, __u32 pasid, pasid, fault_fd)) static int _test_cmd_viommu_alloc(int fd, __u32 device_id, __u32 hwpt_id, - __u32 type, __u32 flags, __u32 *viommu_id) + __u32 flags, __u32 type, void *data, + __u32 data_len, __u32 *viommu_id) { struct iommu_viommu_alloc cmd = { .size = sizeof(cmd), @@ -905,6 +906,8 @@ static int _test_cmd_viommu_alloc(int fd, __u32 device_id, __u32 hwpt_id, .type = type, .dev_id = device_id, .hwpt_id = hwpt_id, + .data_uptr = (uint64_t)data, + .data_len = data_len, }; int ret; @@ -916,13 +919,15 @@ static int _test_cmd_viommu_alloc(int fd, __u32 device_id, __u32 hwpt_id, return 0; } -#define test_cmd_viommu_alloc(device_id, hwpt_id, type, viommu_id) \ - ASSERT_EQ(0, _test_cmd_viommu_alloc(self->fd, device_id, hwpt_id, \ - type, 0, viommu_id)) -#define test_err_viommu_alloc(_errno, device_id, hwpt_id, type, viommu_id) \ - EXPECT_ERRNO(_errno, \ - _test_cmd_viommu_alloc(self->fd, device_id, hwpt_id, \ - type, 0, viommu_id)) +#define test_cmd_viommu_alloc(device_id, hwpt_id, type, data, data_len, \ + viommu_id) \ + ASSERT_EQ(0, _test_cmd_viommu_alloc(self->fd, device_id, hwpt_id, 0, \ + type, data, data_len, viommu_id)) +#define test_err_viommu_alloc(_errno, device_id, hwpt_id, type, data, \ + data_len, viommu_id) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_viommu_alloc(self->fd, device_id, hwpt_id, 0, \ + type, data, data_len, viommu_id)) static int _test_cmd_vdevice_alloc(int fd, __u32 viommu_id, __u32 idev_id, __u64 virt_id, __u32 *vdev_id) -- cgit v1.2.3 From 07b7c2b4eca3f83ce9cd5ee3fa1c7c001d721c69 Mon Sep 17 00:00:00 2001 From: Moon Hee Lee Date: Thu, 26 Jun 2025 12:16:26 -0700 Subject: selftests: breakpoints: use suspend_stats to reliably check suspend success The step_after_suspend_test verifies that the system successfully suspended and resumed by setting a timerfd and checking whether the timer fully expired. However, this method is unreliable due to timing races. In practice, the system may take time to enter suspend, during which the timer may expire just before or during the transition. As a result, the remaining time after resume may show non-zero nanoseconds, even if suspend/resume completed successfully. This leads to false test failures. Replace the timer-based check with a read from /sys/power/suspend_stats/success. This counter is incremented only after a full suspend/resume cycle, providing a reliable and race-free indicator. Also remove the unused file descriptor for /sys/power/state, which remained after switching to a system() call to trigger suspend [1]. [1] https://lore.kernel.org/all/20240930224025.2858767-1-yifei.l.liu@oracle.com/ Link: https://lore.kernel.org/r/20250626191626.36794-1-moonhee.lee.ca@gmail.com Fixes: c66be905cda2 ("selftests: breakpoints: use remaining time to check if suspend succeed") Signed-off-by: Moon Hee Lee Signed-off-by: Shuah Khan --- .../breakpoints/step_after_suspend_test.c | 41 ++++++++++++++++------ 1 file changed, 31 insertions(+), 10 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c index 8d275f03e977..8d233ac95696 100644 --- a/tools/testing/selftests/breakpoints/step_after_suspend_test.c +++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c @@ -127,22 +127,42 @@ int run_test(int cpu) return KSFT_PASS; } +/* + * Reads the suspend success count from sysfs. + * Returns the count on success or exits on failure. + */ +static int get_suspend_success_count_or_fail(void) +{ + FILE *fp; + int val; + + fp = fopen("/sys/power/suspend_stats/success", "r"); + if (!fp) + ksft_exit_fail_msg( + "Failed to open suspend_stats/success: %s\n", + strerror(errno)); + + if (fscanf(fp, "%d", &val) != 1) { + fclose(fp); + ksft_exit_fail_msg( + "Failed to read suspend success count\n"); + } + + fclose(fp); + return val; +} + void suspend(void) { - int power_state_fd; int timerfd; int err; + int count_before; + int count_after; struct itimerspec spec = {}; if (getuid() != 0) ksft_exit_skip("Please run the test as root - Exiting.\n"); - power_state_fd = open("/sys/power/state", O_RDWR); - if (power_state_fd < 0) - ksft_exit_fail_msg( - "open(\"/sys/power/state\") failed %s)\n", - strerror(errno)); - timerfd = timerfd_create(CLOCK_BOOTTIME_ALARM, 0); if (timerfd < 0) ksft_exit_fail_msg("timerfd_create() failed\n"); @@ -152,14 +172,15 @@ void suspend(void) if (err < 0) ksft_exit_fail_msg("timerfd_settime() failed\n"); + count_before = get_suspend_success_count_or_fail(); + system("(echo mem > /sys/power/state) 2> /dev/null"); - timerfd_gettime(timerfd, &spec); - if (spec.it_value.tv_sec != 0 || spec.it_value.tv_nsec != 0) + count_after = get_suspend_success_count_or_fail(); + if (count_after <= count_before) ksft_exit_fail_msg("Failed to enter Suspend state\n"); close(timerfd); - close(power_state_fd); } int main(int argc, char **argv) -- cgit v1.2.3 From 0c8754b75e69160ae4bf5436c01447b2e77e181c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 8 Jul 2025 15:06:40 -0700 Subject: selftests: drv-net: test RSS header field configuration Test reading RXFH fields over IOCTL and netlink. # ./tools/testing/selftests/drivers/net/hw/rss_api.py TAP version 13 1..3 ok 1 rss_api.test_rxfh_indir_ntf ok 2 rss_api.test_rxfh_indir_ctx_ntf ok 3 rss_api.test_rxfh_fields # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:0 error:0 Link: https://patch.msgid.link/20250708220640.2738464-6-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/rss_api.py | 47 +++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/hw/rss_api.py b/tools/testing/selftests/drivers/net/hw/rss_api.py index db0f723a674b..6ae908bed1a4 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_api.py +++ b/tools/testing/selftests/drivers/net/hw/rss_api.py @@ -20,6 +20,38 @@ def _ethtool_create(cfg, act, opts): return int(output.split()[-1]) +def _ethtool_get_cfg(cfg, fl_type, to_nl=False): + descr = ethtool(f"-n {cfg.ifname} rx-flow-hash {fl_type}").stdout + + if to_nl: + converter = { + "IP SA": "ip-src", + "IP DA": "ip-dst", + "L4 bytes 0 & 1 [TCP/UDP src port]": "l4-b-0-1", + "L4 bytes 2 & 3 [TCP/UDP dst port]": "l4-b-2-3", + } + + ret = set() + else: + converter = { + "IP SA": "s", + "IP DA": "d", + "L3 proto": "t", + "L4 bytes 0 & 1 [TCP/UDP src port]": "f", + "L4 bytes 2 & 3 [TCP/UDP dst port]": "n", + } + + ret = "" + + for line in descr.split("\n")[1:-2]: + # if this raises we probably need to add more keys to converter above + if to_nl: + ret.add(converter[line]) + else: + ret += converter[line] + return ret + + def test_rxfh_indir_ntf(cfg): """ Check that Netlink notifications are generated when RSS indirection @@ -77,6 +109,21 @@ def test_rxfh_indir_ctx_ntf(cfg): ksft_eq(set(ntf["msg"]["indir"]), {1}) +def test_rxfh_fields(cfg): + """ + Test reading Rx Flow Hash over Netlink. + """ + + flow_types = ["tcp4", "tcp6", "udp4", "udp6"] + ethnl = EthtoolFamily() + + cfg_nl = ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + for fl_type in flow_types: + one = _ethtool_get_cfg(cfg, fl_type, to_nl=True) + ksft_eq(one, cfg_nl["flow-hash"][fl_type], + comment="Config for " + fl_type) + + def main() -> None: """ Ksft boiler plate main """ -- cgit v1.2.3 From 4d61a8a7334399f457867442445a4f916b40cddb Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 9 Jul 2025 16:30:17 +0200 Subject: selftests: Add IPv6 multicast route generation tests for GRE devices. The previous patch fixes a bug that prevented the creation of the default IPv6 multicast route (ff00::/8) for some GRE devices. Now let's extend the GRE IPv6 selftests to cover this case. Also, rename check_ipv6_ll_addr() to check_ipv6_device_config() and adapt comments and script output to take into account the fact that we're not limited to link-local address generation. Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/65a89583bde3bf866a1922c2e5158e4d72c520e2.1752070620.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/gre_ipv6_lladdr.sh | 27 ++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/gre_ipv6_lladdr.sh b/tools/testing/selftests/net/gre_ipv6_lladdr.sh index 5b34f6e1f831..48eb999a3120 100755 --- a/tools/testing/selftests/net/gre_ipv6_lladdr.sh +++ b/tools/testing/selftests/net/gre_ipv6_lladdr.sh @@ -24,7 +24,10 @@ setup_basenet() ip -netns "${NS0}" address add dev lo 2001:db8::10/64 nodad } -# Check if network device has an IPv6 link-local address assigned. +# Check the IPv6 configuration of a network device. +# +# We currently check the generation of the link-local IPv6 address and the +# creation of the ff00::/8 multicast route. # # Parameters: # @@ -35,7 +38,7 @@ setup_basenet() # a link-local address) # * $4: The user visible name for the scenario being tested # -check_ipv6_ll_addr() +check_ipv6_device_config() { local DEV="$1" local EXTRA_MATCH="$2" @@ -45,7 +48,11 @@ check_ipv6_ll_addr() RET=0 set +e ip -netns "${NS0}" -6 address show dev "${DEV}" scope link | grep "fe80::" | grep -q "${EXTRA_MATCH}" - check_err_fail "${XRET}" $? "" + check_err_fail "${XRET}" $? "IPv6 link-local address generation" + + ip -netns "${NS0}" -6 route show table local type multicast ff00::/8 proto kernel | grep -q "${DEV}" + check_err_fail 0 $? "IPv6 multicast route creation" + log_test "${MSG}" set -e } @@ -102,20 +109,20 @@ test_gre_device() ;; esac - # Check that IPv6 link-local address is generated when device goes up + # Check the IPv6 device configuration when it goes up ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" ip -netns "${NS0}" link set dev gretest up - check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}" + check_ipv6_device_config gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}" # Now disable link-local address generation ip -netns "${NS0}" link set dev gretest down ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode=1 ip -netns "${NS0}" link set dev gretest up - # Check that link-local address generation works when re-enabled while - # the device is already up + # Check the IPv6 device configuration when link-local address + # generation is re-enabled while the device is already up ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" - check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}" + check_ipv6_device_config gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}" ip -netns "${NS0}" link del dev gretest } @@ -126,7 +133,7 @@ test_gre4() local MODE for GRE_TYPE in "gre" "gretap"; do - printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + printf "\n####\nTesting IPv6 configuration of ${GRE_TYPE} devices\n####\n\n" for MODE in "eui64" "none" "stable-privacy" "random"; do test_gre_device "${GRE_TYPE}" 192.0.2.10 192.0.2.11 "${MODE}" @@ -142,7 +149,7 @@ test_gre6() local MODE for GRE_TYPE in "ip6gre" "ip6gretap"; do - printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + printf "\n####\nTesting IPv6 configuration of ${GRE_TYPE} devices\n####\n\n" for MODE in "eui64" "none" "stable-privacy" "random"; do test_gre_device "${GRE_TYPE}" 2001:db8::10 2001:db8::11 "${MODE}" -- cgit v1.2.3 From 47c84997c686b4d43b225521b732492552b84758 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 9 Jul 2025 09:12:44 +0000 Subject: selftests: net: lib: fix shift count out of range I got the following warning when writing other tests: + handle_test_result_pass 'bond 802.3ad' '(lacp_active off)' + local 'test_name=bond 802.3ad' + shift + local 'opt_str=(lacp_active off)' + shift + log_test_result 'bond 802.3ad' '(lacp_active off)' ' OK ' + local 'test_name=bond 802.3ad' + shift + local 'opt_str=(lacp_active off)' + shift + local 'result= OK ' + shift + local retmsg= + shift /net/tools/testing/selftests/net/forwarding/../lib.sh: line 315: shift: shift count out of range This happens because an extra shift is executed even after all arguments have been consumed. Remove the last shift in log_test_result() to avoid this warning. Fixes: a923af1ceee7 ("selftests: forwarding: Convert log_test() to recognize RET values") Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20250709091244.88395-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 006fdadcc4b9..86a216e9aca8 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -312,7 +312,7 @@ log_test_result() local test_name=$1; shift local opt_str=$1; shift local result=$1; shift - local retmsg=$1; shift + local retmsg=$1 printf "TEST: %-60s [%s]\n" "$test_name $opt_str" "$result" if [[ $retmsg ]]; then -- cgit v1.2.3 From a255b78d14324f8a4a49f88e983b9f00818d1194 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 10 Jul 2025 13:00:06 +0200 Subject: selftests/futex: Adapt the private hash test to RCU related changes The auto scaling on create creation used to automatically assign the new hash because there was the private hash was unused and could be replaced right away. This is already racy because if the private hash is in use by a thread then the visibile resize will be delayed. With the upcoming change to wait for a RCU grace period before the hash can be assigned, the test will always fail. If the reported number of hash buckets is not updated after an auto scaling event, block on an acquired lock with a timeout. The timeout is the delay to wait towards a grace period and locking and a locked pthread_mutex_t ensure that glibc calls into kernel using futex operation which will assign new private hash if available. This will retry every 100ms up to 2 seconds in total. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250710110011.384614-2-bigeasy@linutronix.de --- .../selftests/futex/functional/futex_priv_hash.c | 42 +++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c index 24a92dc94eb8..625e3be4129c 100644 --- a/tools/testing/selftests/futex/functional/futex_priv_hash.c +++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c @@ -111,6 +111,30 @@ static void join_max_threads(void) } } +#define SEC_IN_NSEC 1000000000 +#define MSEC_IN_NSEC 1000000 + +static void futex_dummy_op(void) +{ + pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; + struct timespec timeout; + int ret; + + pthread_mutex_lock(&lock); + clock_gettime(CLOCK_REALTIME, &timeout); + timeout.tv_nsec += 100 * MSEC_IN_NSEC; + if (timeout.tv_nsec >= SEC_IN_NSEC) { + timeout.tv_nsec -= SEC_IN_NSEC; + timeout.tv_sec++; + } + ret = pthread_mutex_timedlock(&lock, &timeout); + if (ret == 0) + ksft_exit_fail_msg("Succeffuly locked an already locked mutex.\n"); + + if (ret != ETIMEDOUT) + ksft_exit_fail_msg("pthread_mutex_timedlock() did not timeout: %d.\n", ret); +} + static void usage(char *prog) { printf("Usage: %s\n", prog); @@ -129,7 +153,7 @@ int main(int argc, char *argv[]) int futex_slots1, futex_slotsn, online_cpus; pthread_mutexattr_t mutex_attr_pi; int use_global_hash = 0; - int ret; + int ret, retry = 20; int c; while ((c = getopt(argc, argv, "cghv:")) != -1) { @@ -208,8 +232,24 @@ int main(int argc, char *argv[]) */ ksft_print_msg("Online CPUs: %d\n", online_cpus); if (online_cpus > 16) { +retry_getslots: futex_slotsn = futex_hash_slots_get(); if (futex_slotsn < 0 || futex_slots1 == futex_slotsn) { + retry--; + /* + * Auto scaling on thread creation can be slightly delayed + * because it waits for a RCU grace period twice. The new + * private hash is assigned upon the first futex operation + * after grace period. + * To cover all this for testing purposes the function + * below will acquire a lock and acquire it again with a + * 100ms timeout which must timeout. This ensures we + * sleep for 100ms and issue a futex operation. + */ + if (retry > 0) { + futex_dummy_op(); + goto retry_getslots; + } ksft_print_msg("Expected increase of hash buckets but got: %d -> %d\n", futex_slots1, futex_slotsn); ksft_exit_fail_msg(test_msg_auto_inc); -- cgit v1.2.3 From 16adc7f136dc143fdaa0d465172d7a1e6d5ae3c5 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 10 Jul 2025 13:00:10 +0200 Subject: selftests/futex: Remove support for IMMUTABLE Testing for the IMMUTABLE part of the futex interface is not needed after the removal of the interface. Remove support for IMMUTABLE from the sefltest. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250710110011.384614-6-bigeasy@linutronix.de --- .../selftests/futex/functional/futex_priv_hash.c | 71 +++++++--------------- 1 file changed, 22 insertions(+), 49 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c index 625e3be4129c..a9cedc365102 100644 --- a/tools/testing/selftests/futex/functional/futex_priv_hash.c +++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c @@ -26,14 +26,12 @@ static int counter; #ifndef PR_FUTEX_HASH #define PR_FUTEX_HASH 78 # define PR_FUTEX_HASH_SET_SLOTS 1 -# define FH_FLAG_IMMUTABLE (1ULL << 0) # define PR_FUTEX_HASH_GET_SLOTS 2 -# define PR_FUTEX_HASH_GET_IMMUTABLE 3 #endif -static int futex_hash_slots_set(unsigned int slots, int flags) +static int futex_hash_slots_set(unsigned int slots) { - return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, slots, flags); + return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, slots, 0); } static int futex_hash_slots_get(void) @@ -41,16 +39,11 @@ static int futex_hash_slots_get(void) return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_SLOTS); } -static int futex_hash_immutable_get(void) -{ - return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_IMMUTABLE); -} - static void futex_hash_slots_set_verify(int slots) { int ret; - ret = futex_hash_slots_set(slots, 0); + ret = futex_hash_slots_set(slots); if (ret != 0) { ksft_test_result_fail("Failed to set slots to %d: %m\n", slots); ksft_finished(); @@ -64,13 +57,13 @@ static void futex_hash_slots_set_verify(int slots) ksft_test_result_pass("SET and GET slots %d passed\n", slots); } -static void futex_hash_slots_set_must_fail(int slots, int flags) +static void futex_hash_slots_set_must_fail(int slots) { int ret; - ret = futex_hash_slots_set(slots, flags); - ksft_test_result(ret < 0, "futex_hash_slots_set(%d, %d)\n", - slots, flags); + ret = futex_hash_slots_set(slots); + ksft_test_result(ret < 0, "futex_hash_slots_set(%d)\n", + slots); } static void *thread_return_fn(void *arg) @@ -152,18 +145,14 @@ int main(int argc, char *argv[]) { int futex_slots1, futex_slotsn, online_cpus; pthread_mutexattr_t mutex_attr_pi; - int use_global_hash = 0; int ret, retry = 20; int c; - while ((c = getopt(argc, argv, "cghv:")) != -1) { + while ((c = getopt(argc, argv, "chv:")) != -1) { switch (c) { case 'c': log_color(1); break; - case 'g': - use_global_hash = 1; - break; case 'h': usage(basename(argv[0])); exit(0); @@ -178,7 +167,7 @@ int main(int argc, char *argv[]) } ksft_print_header(); - ksft_set_plan(22); + ksft_set_plan(21); ret = pthread_mutexattr_init(&mutex_attr_pi); ret |= pthread_mutexattr_setprotocol(&mutex_attr_pi, PTHREAD_PRIO_INHERIT); @@ -191,10 +180,6 @@ int main(int argc, char *argv[]) if (ret != 0) ksft_exit_fail_msg("futex_hash_slots_get() failed: %d, %m\n", ret); - ret = futex_hash_immutable_get(); - if (ret != 0) - ksft_exit_fail_msg("futex_hash_immutable_get() failed: %d, %m\n", ret); - ksft_test_result_pass("Basic get slots and immutable status.\n"); ret = pthread_create(&threads[0], NULL, thread_return_fn, NULL); if (ret != 0) @@ -267,7 +252,7 @@ retry_getslots: futex_hash_slots_set_verify(32); futex_hash_slots_set_verify(16); - ret = futex_hash_slots_set(15, 0); + ret = futex_hash_slots_set(15); ksft_test_result(ret < 0, "Use 15 slots\n"); futex_hash_slots_set_verify(2); @@ -285,28 +270,23 @@ retry_getslots: ksft_test_result(ret == 2, "No more auto-resize after manaul setting, got %d\n", ret); - futex_hash_slots_set_must_fail(1 << 29, 0); + futex_hash_slots_set_must_fail(1 << 29); + futex_hash_slots_set_verify(4); /* - * Once the private hash has been made immutable or global hash has been requested, - * then this requested can not be undone. + * Once the global hash has been requested, then this requested can not + * be undone. */ - if (use_global_hash) { - ret = futex_hash_slots_set(0, 0); - ksft_test_result(ret == 0, "Global hash request\n"); - } else { - ret = futex_hash_slots_set(4, FH_FLAG_IMMUTABLE); - ksft_test_result(ret == 0, "Immutable resize to 4\n"); - } + ret = futex_hash_slots_set(0); + ksft_test_result(ret == 0, "Global hash request\n"); if (ret != 0) goto out; - futex_hash_slots_set_must_fail(4, 0); - futex_hash_slots_set_must_fail(4, FH_FLAG_IMMUTABLE); - futex_hash_slots_set_must_fail(8, 0); - futex_hash_slots_set_must_fail(8, FH_FLAG_IMMUTABLE); - futex_hash_slots_set_must_fail(0, FH_FLAG_IMMUTABLE); - futex_hash_slots_set_must_fail(6, FH_FLAG_IMMUTABLE); + futex_hash_slots_set_must_fail(4); + futex_hash_slots_set_must_fail(8); + futex_hash_slots_set_must_fail(8); + futex_hash_slots_set_must_fail(0); + futex_hash_slots_set_must_fail(6); ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS); if (ret != 0) { @@ -317,14 +297,7 @@ retry_getslots: join_max_threads(); ret = futex_hash_slots_get(); - if (use_global_hash) { - ksft_test_result(ret == 0, "Continue to use global hash\n"); - } else { - ksft_test_result(ret == 4, "Continue to use the 4 hash buckets\n"); - } - - ret = futex_hash_immutable_get(); - ksft_test_result(ret == 1, "Hash reports to be immutable\n"); + ksft_test_result(ret == 0, "Continue to use global hash\n"); out: ksft_finished(); -- cgit v1.2.3 From 20896914da8ad24df0a77e24887912d87754fb83 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 9 Jul 2025 22:59:08 -0700 Subject: iommufd/selftest: Add coverage for IOMMUFD_CMD_HW_QUEUE_ALLOC Some simple tests for IOMMUFD_CMD_HW_QUEUE_ALLOC infrastructure covering the new iommufd_hw_queue_depend/undepend() helpers. Link: https://patch.msgid.link/r/e8a194d187d7ef445f43e4a3c04fb39472050afd.1752126748.git.nicolinc@nvidia.com Signed-off-by: Nicolin Chen Reviewed-by: Pranjal Shrivastava Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/iommufd_test.h | 3 + drivers/iommu/iommufd/selftest.c | 97 ++++++++++++++++++++++++ tools/testing/selftests/iommu/iommufd.c | 59 ++++++++++++++ tools/testing/selftests/iommu/iommufd_fail_nth.c | 6 ++ tools/testing/selftests/iommu/iommufd_utils.h | 31 ++++++++ 5 files changed, 196 insertions(+) (limited to 'tools/testing/selftests') diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h index fbf9ecb35a13..51cd744a354f 100644 --- a/drivers/iommu/iommufd/iommufd_test.h +++ b/drivers/iommu/iommufd/iommufd_test.h @@ -265,4 +265,7 @@ struct iommu_viommu_event_selftest { __u32 virt_id; }; +#define IOMMU_HW_QUEUE_TYPE_SELFTEST 0xdeadbeef +#define IOMMU_TEST_HW_QUEUE_MAX 2 + #endif diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 38066dfeb2e7..2189e9b119ee 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -150,6 +150,8 @@ to_mock_nested(struct iommu_domain *domain) struct mock_viommu { struct iommufd_viommu core; struct mock_iommu_domain *s2_parent; + struct mock_hw_queue *hw_queue[IOMMU_TEST_HW_QUEUE_MAX]; + struct mutex queue_mutex; }; static inline struct mock_viommu *to_mock_viommu(struct iommufd_viommu *viommu) @@ -157,6 +159,19 @@ static inline struct mock_viommu *to_mock_viommu(struct iommufd_viommu *viommu) return container_of(viommu, struct mock_viommu, core); } +struct mock_hw_queue { + struct iommufd_hw_queue core; + struct mock_viommu *mock_viommu; + struct mock_hw_queue *prev; + u16 index; +}; + +static inline struct mock_hw_queue * +to_mock_hw_queue(struct iommufd_hw_queue *hw_queue) +{ + return container_of(hw_queue, struct mock_hw_queue, core); +} + enum selftest_obj_type { TYPE_IDEV, }; @@ -670,9 +685,11 @@ static void mock_viommu_destroy(struct iommufd_viommu *viommu) { struct mock_iommu_device *mock_iommu = container_of( viommu->iommu_dev, struct mock_iommu_device, iommu_dev); + struct mock_viommu *mock_viommu = to_mock_viommu(viommu); if (refcount_dec_and_test(&mock_iommu->users)) complete(&mock_iommu->complete); + mutex_destroy(&mock_viommu->queue_mutex); /* iommufd core frees mock_viommu and viommu */ } @@ -764,10 +781,86 @@ out: return rc; } +static size_t mock_viommu_get_hw_queue_size(struct iommufd_viommu *viommu, + enum iommu_hw_queue_type queue_type) +{ + if (queue_type != IOMMU_HW_QUEUE_TYPE_SELFTEST) + return 0; + return HW_QUEUE_STRUCT_SIZE(struct mock_hw_queue, core); +} + +static void mock_hw_queue_destroy(struct iommufd_hw_queue *hw_queue) +{ + struct mock_hw_queue *mock_hw_queue = to_mock_hw_queue(hw_queue); + struct mock_viommu *mock_viommu = mock_hw_queue->mock_viommu; + + mutex_lock(&mock_viommu->queue_mutex); + mock_viommu->hw_queue[mock_hw_queue->index] = NULL; + if (mock_hw_queue->prev) + iommufd_hw_queue_undepend(mock_hw_queue, mock_hw_queue->prev, + core); + mutex_unlock(&mock_viommu->queue_mutex); +} + +/* Test iommufd_hw_queue_depend/undepend() */ +static int mock_hw_queue_init_phys(struct iommufd_hw_queue *hw_queue, u32 index, + phys_addr_t base_addr_pa) +{ + struct mock_viommu *mock_viommu = to_mock_viommu(hw_queue->viommu); + struct mock_hw_queue *mock_hw_queue = to_mock_hw_queue(hw_queue); + struct mock_hw_queue *prev = NULL; + int rc = 0; + + if (index >= IOMMU_TEST_HW_QUEUE_MAX) + return -EINVAL; + + mutex_lock(&mock_viommu->queue_mutex); + + if (mock_viommu->hw_queue[index]) { + rc = -EEXIST; + goto unlock; + } + + if (index) { + prev = mock_viommu->hw_queue[index - 1]; + if (!prev) { + rc = -EIO; + goto unlock; + } + } + + /* + * Test to catch a kernel bug if the core converted the physical address + * incorrectly. Let mock_domain_iova_to_phys() WARN_ON if it fails. + */ + if (base_addr_pa != iommu_iova_to_phys(&mock_viommu->s2_parent->domain, + hw_queue->base_addr)) { + rc = -EFAULT; + goto unlock; + } + + if (prev) { + rc = iommufd_hw_queue_depend(mock_hw_queue, prev, core); + if (rc) + goto unlock; + } + + mock_hw_queue->prev = prev; + mock_hw_queue->mock_viommu = mock_viommu; + mock_viommu->hw_queue[index] = mock_hw_queue; + + hw_queue->destroy = &mock_hw_queue_destroy; +unlock: + mutex_unlock(&mock_viommu->queue_mutex); + return rc; +} + static struct iommufd_viommu_ops mock_viommu_ops = { .destroy = mock_viommu_destroy, .alloc_domain_nested = mock_viommu_alloc_domain_nested, .cache_invalidate = mock_viommu_cache_invalidate, + .get_hw_queue_size = mock_viommu_get_hw_queue_size, + .hw_queue_init_phys = mock_hw_queue_init_phys, }; static size_t mock_get_viommu_size(struct device *dev, @@ -784,6 +877,7 @@ static int mock_viommu_init(struct iommufd_viommu *viommu, { struct mock_iommu_device *mock_iommu = container_of( viommu->iommu_dev, struct mock_iommu_device, iommu_dev); + struct mock_viommu *mock_viommu = to_mock_viommu(viommu); struct iommu_viommu_selftest data; int rc; @@ -801,6 +895,9 @@ static int mock_viommu_init(struct iommufd_viommu *viommu, } refcount_inc(&mock_iommu->users); + mutex_init(&mock_viommu->queue_mutex); + mock_viommu->s2_parent = to_mock_domain(parent_domain); + viommu->ops = &mock_viommu_ops; return 0; } diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index a9dfcce5e1b2..73426de77675 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -3032,6 +3032,65 @@ TEST_F(iommufd_viommu, vdevice_cache) } } +TEST_F(iommufd_viommu, hw_queue) +{ + __u64 iova = MOCK_APERTURE_START, iova2; + uint32_t viommu_id = self->viommu_id; + uint32_t hw_queue_id[2]; + + if (!viommu_id) + SKIP(return, "Skipping test for variant no_viommu"); + + /* Fail IOMMU_HW_QUEUE_TYPE_DEFAULT */ + test_err_hw_queue_alloc(EOPNOTSUPP, viommu_id, + IOMMU_HW_QUEUE_TYPE_DEFAULT, 0, iova, PAGE_SIZE, + &hw_queue_id[0]); + /* Fail queue addr and length */ + test_err_hw_queue_alloc(EINVAL, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST, + 0, iova, 0, &hw_queue_id[0]); + test_err_hw_queue_alloc(EOVERFLOW, viommu_id, + IOMMU_HW_QUEUE_TYPE_SELFTEST, 0, ~(uint64_t)0, + PAGE_SIZE, &hw_queue_id[0]); + /* Fail missing iova */ + test_err_hw_queue_alloc(ENOENT, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST, + 0, iova, PAGE_SIZE, &hw_queue_id[0]); + + /* Map iova */ + test_ioctl_ioas_map(buffer, PAGE_SIZE, &iova); + test_ioctl_ioas_map(buffer + PAGE_SIZE, PAGE_SIZE, &iova2); + + /* Fail index=1 and =MAX; must start from index=0 */ + test_err_hw_queue_alloc(EIO, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST, 1, + iova, PAGE_SIZE, &hw_queue_id[0]); + test_err_hw_queue_alloc(EINVAL, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST, + IOMMU_TEST_HW_QUEUE_MAX, iova, PAGE_SIZE, + &hw_queue_id[0]); + + /* Allocate index=0, declare ownership of the iova */ + test_cmd_hw_queue_alloc(viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST, 0, + iova, PAGE_SIZE, &hw_queue_id[0]); + /* Fail duplicated index */ + test_err_hw_queue_alloc(EEXIST, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST, + 0, iova, PAGE_SIZE, &hw_queue_id[0]); + /* Fail unmap, due to iova ownership */ + test_err_ioctl_ioas_unmap(EBUSY, iova, PAGE_SIZE); + /* The 2nd page is not pinned, so it can be unmmap */ + test_ioctl_ioas_unmap(iova2, PAGE_SIZE); + + /* Allocate index=1, with an unaligned case */ + test_cmd_hw_queue_alloc(viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST, 1, + iova + PAGE_SIZE / 2, PAGE_SIZE / 2, + &hw_queue_id[1]); + /* Fail to destroy, due to dependency */ + EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, hw_queue_id[0])); + + /* Destroy in descending order */ + test_ioctl_destroy(hw_queue_id[1]); + test_ioctl_destroy(hw_queue_id[0]); + /* Now it can unmap the first page */ + test_ioctl_ioas_unmap(iova, PAGE_SIZE); +} + FIXTURE(iommufd_device_pasid) { int fd; diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c index f7ccf1822108..41c685bbd252 100644 --- a/tools/testing/selftests/iommu/iommufd_fail_nth.c +++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c @@ -634,6 +634,7 @@ TEST_FAIL_NTH(basic_fail_nth, device) uint32_t idev_id; uint32_t hwpt_id; uint32_t viommu_id; + uint32_t hw_queue_id; uint32_t vdev_id; __u64 iova; @@ -696,6 +697,11 @@ TEST_FAIL_NTH(basic_fail_nth, device) if (_test_cmd_vdevice_alloc(self->fd, viommu_id, idev_id, 0, &vdev_id)) return -1; + if (_test_cmd_hw_queue_alloc(self->fd, viommu_id, + IOMMU_HW_QUEUE_TYPE_SELFTEST, 0, iova, + PAGE_SIZE, &hw_queue_id)) + return -1; + if (_test_ioctl_fault_alloc(self->fd, &fault_id, &fault_fd)) return -1; close(fault_fd); diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index a5d4cbd089ba..9a556f99d992 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -956,6 +956,37 @@ static int _test_cmd_vdevice_alloc(int fd, __u32 viommu_id, __u32 idev_id, _test_cmd_vdevice_alloc(self->fd, viommu_id, idev_id, \ virt_id, vdev_id)) +static int _test_cmd_hw_queue_alloc(int fd, __u32 viommu_id, __u32 type, + __u32 idx, __u64 base_addr, __u64 length, + __u32 *hw_queue_id) +{ + struct iommu_hw_queue_alloc cmd = { + .size = sizeof(cmd), + .viommu_id = viommu_id, + .type = type, + .index = idx, + .nesting_parent_iova = base_addr, + .length = length, + }; + int ret; + + ret = ioctl(fd, IOMMU_HW_QUEUE_ALLOC, &cmd); + if (ret) + return ret; + if (hw_queue_id) + *hw_queue_id = cmd.out_hw_queue_id; + return 0; +} + +#define test_cmd_hw_queue_alloc(viommu_id, type, idx, base_addr, len, out_qid) \ + ASSERT_EQ(0, _test_cmd_hw_queue_alloc(self->fd, viommu_id, type, idx, \ + base_addr, len, out_qid)) +#define test_err_hw_queue_alloc(_errno, viommu_id, type, idx, base_addr, len, \ + out_qid) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_hw_queue_alloc(self->fd, viommu_id, type, idx, \ + base_addr, len, out_qid)) + static int _test_cmd_veventq_alloc(int fd, __u32 viommu_id, __u32 type, __u32 *veventq_id, __u32 *veventq_fd) { -- cgit v1.2.3 From a339dd699a7aa01bce4b38c8d81def310cf2bca0 Mon Sep 17 00:00:00 2001 From: Mohsin Bashir Date: Thu, 10 Jul 2025 11:43:47 -0700 Subject: selftests: drv-net: Add bpftool util Add bpf utility to simplify the use of bpftool for XDP tests included in this series. Signed-off-by: Mohsin Bashir Link: https://patch.msgid.link/20250710184351.63797-2-mohsin.bashr@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/lib/py/__init__.py | 2 +- tools/testing/selftests/net/lib/py/utils.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py index fce5d9218f1d..39968bc3df43 100644 --- a/tools/testing/selftests/drivers/net/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py @@ -15,7 +15,7 @@ try: NlError, RtnlFamily, DevlinkFamily from net.lib.py import CmdExitFailure from net.lib.py import bkg, cmd, defer, ethtool, fd_read_timeout, ip, \ - rand_port, tool, wait_port_listen + rand_port, tool, wait_port_listen, bpftool from net.lib.py import fd_read_timeout from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index 34470d65d871..acf0e2c38614 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -175,6 +175,10 @@ def tool(name, args, json=None, ns=None, host=None): return cmd_obj +def bpftool(args, json=None, ns=None, host=None): + return tool('bpftool', args, json=json, ns=ns, host=host) + + def ip(args, json=None, ns=None, host=None): if ns: args = f'-netns {ns} ' + args -- cgit v1.2.3 From 80478a2b450e984b5d270d0d7088912d64e84303 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 9 Jul 2025 22:59:10 -0700 Subject: iommufd/selftest: Add coverage for the new mmap interface Extend the loopback test to a new mmap page. Link: https://patch.msgid.link/r/b02b1220c955c3cf9ea5dd9fe9349ab1b4f8e20b.1752126748.git.nicolinc@nvidia.com Signed-off-by: Nicolin Chen Reviewed-by: Pranjal Shrivastava Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/iommufd_test.h | 4 ++++ drivers/iommu/iommufd/selftest.c | 33 ++++++++++++++++++++++++++- tools/testing/selftests/iommu/iommufd.c | 19 +++++++++++++++ tools/testing/selftests/iommu/iommufd_utils.h | 4 ++++ 4 files changed, 59 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h index 51cd744a354f..8fc618b2bcf9 100644 --- a/drivers/iommu/iommufd/iommufd_test.h +++ b/drivers/iommu/iommufd/iommufd_test.h @@ -232,12 +232,16 @@ struct iommu_hwpt_invalidate_selftest { * (IOMMU_VIOMMU_TYPE_SELFTEST) * @in_data: Input random data from user space * @out_data: Output data (matching @in_data) to user space + * @out_mmap_offset: The offset argument for mmap syscall + * @out_mmap_length: The length argument for mmap syscall * * Simply set @out_data=@in_data for a loopback test */ struct iommu_viommu_selftest { __u32 in_data; __u32 out_data; + __aligned_u64 out_mmap_offset; + __aligned_u64 out_mmap_length; }; /* Should not be equal to any defined value in enum iommu_viommu_invalidate_data_type */ diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 2189e9b119ee..8b2c44b32530 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -152,6 +152,9 @@ struct mock_viommu { struct mock_iommu_domain *s2_parent; struct mock_hw_queue *hw_queue[IOMMU_TEST_HW_QUEUE_MAX]; struct mutex queue_mutex; + + unsigned long mmap_offset; + u32 *page; /* Mmap page to test u32 type of in_data */ }; static inline struct mock_viommu *to_mock_viommu(struct iommufd_viommu *viommu) @@ -689,6 +692,10 @@ static void mock_viommu_destroy(struct iommufd_viommu *viommu) if (refcount_dec_and_test(&mock_iommu->users)) complete(&mock_iommu->complete); + if (mock_viommu->mmap_offset) + iommufd_viommu_destroy_mmap(&mock_viommu->core, + mock_viommu->mmap_offset); + free_page((unsigned long)mock_viommu->page); mutex_destroy(&mock_viommu->queue_mutex); /* iommufd core frees mock_viommu and viommu */ @@ -887,11 +894,28 @@ static int mock_viommu_init(struct iommufd_viommu *viommu, if (rc) return rc; + /* Allocate two pages */ + mock_viommu->page = + (u32 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); + if (!mock_viommu->page) + return -ENOMEM; + + rc = iommufd_viommu_alloc_mmap(&mock_viommu->core, + __pa(mock_viommu->page), + PAGE_SIZE * 2, + &mock_viommu->mmap_offset); + if (rc) + goto err_free_page; + + /* For loopback tests on both the page and out_data */ + *mock_viommu->page = data.in_data; data.out_data = data.in_data; + data.out_mmap_length = PAGE_SIZE * 2; + data.out_mmap_offset = mock_viommu->mmap_offset; rc = iommu_copy_struct_to_user( user_data, &data, IOMMU_VIOMMU_TYPE_SELFTEST, out_data); if (rc) - return rc; + goto err_destroy_mmap; } refcount_inc(&mock_iommu->users); @@ -900,6 +924,13 @@ static int mock_viommu_init(struct iommufd_viommu *viommu, viommu->ops = &mock_viommu_ops; return 0; + +err_destroy_mmap: + iommufd_viommu_destroy_mmap(&mock_viommu->core, + mock_viommu->mmap_offset); +err_free_page: + free_page((unsigned long)mock_viommu->page); + return rc; } static const struct iommu_ops mock_ops = { diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 73426de77675..0b21c095ca5e 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -2799,6 +2799,7 @@ TEST_F(iommufd_viommu, viommu_alloc_with_data) struct iommu_viommu_selftest data = { .in_data = 0xbeef, }; + uint32_t *test; if (!self->device_id) SKIP(return, "Skipping test for variant no_viommu"); @@ -2807,6 +2808,24 @@ TEST_F(iommufd_viommu, viommu_alloc_with_data) IOMMU_VIOMMU_TYPE_SELFTEST, &data, sizeof(data), &self->viommu_id); ASSERT_EQ(data.out_data, data.in_data); + + /* Negative mmap tests -- offset and length cannot be changed */ + test_err_mmap(ENXIO, data.out_mmap_length, + data.out_mmap_offset + PAGE_SIZE); + test_err_mmap(ENXIO, data.out_mmap_length, + data.out_mmap_offset + PAGE_SIZE * 2); + test_err_mmap(ENXIO, data.out_mmap_length / 2, data.out_mmap_offset); + test_err_mmap(ENXIO, data.out_mmap_length * 2, data.out_mmap_offset); + + /* Now do a correct mmap for a loopback test */ + test = mmap(NULL, data.out_mmap_length, PROT_READ | PROT_WRITE, + MAP_SHARED, self->fd, data.out_mmap_offset); + ASSERT_NE(MAP_FAILED, test); + ASSERT_EQ(data.in_data, *test); + + /* The owner of the mmap region should be blocked */ + EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, self->viommu_id)); + munmap(test, data.out_mmap_length); } TEST_F(iommufd_viommu, vdevice_alloc) diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 9a556f99d992..4a1b2bade018 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -56,6 +56,10 @@ static unsigned long PAGE_SIZE; #define offsetofend(TYPE, MEMBER) \ (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) +#define test_err_mmap(_errno, length, offset) \ + EXPECT_ERRNO(_errno, (long)mmap(NULL, length, PROT_READ | PROT_WRITE, \ + MAP_SHARED, self->fd, offset)) + static inline void *memfd_mmap(size_t length, int prot, int flags, int *mfd_p) { int mfd_flags = (flags & MAP_HUGETLB) ? MFD_HUGETLB : 0; -- cgit v1.2.3 From 3a35f7d4a4673edf6f02422bb2d78b17c667e167 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 9 Jul 2025 22:59:14 -0700 Subject: iommufd/selftest: Update hw_info coverage for an input data_type Test both IOMMU_HW_INFO_TYPE_DEFAULT and IOMMU_HW_INFO_TYPE_SELFTEST, and add a negative test for an unsupported type. Also drop the unused mask in test_cmd_get_hw_capabilities() as checkpatch is complaining. Link: https://patch.msgid.link/r/f01a1e50cd7366f217cbf192ad0b2b79e0eb89f0.1752126748.git.nicolinc@nvidia.com Signed-off-by: Nicolin Chen Reviewed-by: Pranjal Shrivastava Signed-off-by: Jason Gunthorpe --- tools/testing/selftests/iommu/iommufd.c | 32 +++++++++++++++++------ tools/testing/selftests/iommu/iommufd_fail_nth.c | 4 +-- tools/testing/selftests/iommu/iommufd_utils.h | 33 ++++++++++++++---------- 3 files changed, 46 insertions(+), 23 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 0b21c095ca5e..d59d48022a24 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -764,19 +764,34 @@ TEST_F(iommufd_ioas, get_hw_info) uint8_t max_pasid = 0; /* Provide a zero-size user_buffer */ - test_cmd_get_hw_info(self->device_id, NULL, 0); + test_cmd_get_hw_info(self->device_id, + IOMMU_HW_INFO_TYPE_DEFAULT, NULL, 0); /* Provide a user_buffer with exact size */ - test_cmd_get_hw_info(self->device_id, &buffer_exact, sizeof(buffer_exact)); + test_cmd_get_hw_info(self->device_id, + IOMMU_HW_INFO_TYPE_DEFAULT, &buffer_exact, + sizeof(buffer_exact)); + + /* Request for a wrong data_type, and a correct one */ + test_err_get_hw_info(EOPNOTSUPP, self->device_id, + IOMMU_HW_INFO_TYPE_SELFTEST + 1, + &buffer_exact, sizeof(buffer_exact)); + test_cmd_get_hw_info(self->device_id, + IOMMU_HW_INFO_TYPE_SELFTEST, &buffer_exact, + sizeof(buffer_exact)); /* * Provide a user_buffer with size larger than the exact size to check if * kernel zero the trailing bytes. */ - test_cmd_get_hw_info(self->device_id, &buffer_larger, sizeof(buffer_larger)); + test_cmd_get_hw_info(self->device_id, + IOMMU_HW_INFO_TYPE_DEFAULT, &buffer_larger, + sizeof(buffer_larger)); /* * Provide a user_buffer with size smaller than the exact size to check if * the fields within the size range still gets updated. */ - test_cmd_get_hw_info(self->device_id, &buffer_smaller, sizeof(buffer_smaller)); + test_cmd_get_hw_info(self->device_id, + IOMMU_HW_INFO_TYPE_DEFAULT, + &buffer_smaller, sizeof(buffer_smaller)); test_cmd_get_hw_info_pasid(self->device_id, &max_pasid); ASSERT_EQ(0, max_pasid); if (variant->pasid_capable) { @@ -786,9 +801,11 @@ TEST_F(iommufd_ioas, get_hw_info) } } else { test_err_get_hw_info(ENOENT, self->device_id, - &buffer_exact, sizeof(buffer_exact)); + IOMMU_HW_INFO_TYPE_DEFAULT, &buffer_exact, + sizeof(buffer_exact)); test_err_get_hw_info(ENOENT, self->device_id, - &buffer_larger, sizeof(buffer_larger)); + IOMMU_HW_INFO_TYPE_DEFAULT, &buffer_larger, + sizeof(buffer_larger)); } } @@ -2175,8 +2192,7 @@ TEST_F(iommufd_dirty_tracking, device_dirty_capability) test_cmd_hwpt_alloc(self->idev_id, self->ioas_id, 0, &hwpt_id); test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL); - test_cmd_get_hw_capabilities(self->idev_id, caps, - IOMMU_HW_CAP_DIRTY_TRACKING); + test_cmd_get_hw_capabilities(self->idev_id, caps); ASSERT_EQ(IOMMU_HW_CAP_DIRTY_TRACKING, caps & IOMMU_HW_CAP_DIRTY_TRACKING); diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c index 41c685bbd252..651fc9f13c08 100644 --- a/tools/testing/selftests/iommu/iommufd_fail_nth.c +++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c @@ -667,8 +667,8 @@ TEST_FAIL_NTH(basic_fail_nth, device) &self->stdev_id, NULL, &idev_id)) return -1; - if (_test_cmd_get_hw_info(self->fd, idev_id, &info, - sizeof(info), NULL, NULL)) + if (_test_cmd_get_hw_info(self->fd, idev_id, IOMMU_HW_INFO_TYPE_DEFAULT, + &info, sizeof(info), NULL, NULL)) return -1; if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 4a1b2bade018..5384852ce038 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -761,20 +761,24 @@ static void teardown_iommufd(int fd, struct __test_metadata *_metadata) #endif /* @data can be NULL */ -static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data, - size_t data_len, uint32_t *capabilities, - uint8_t *max_pasid) +static int _test_cmd_get_hw_info(int fd, __u32 device_id, __u32 data_type, + void *data, size_t data_len, + uint32_t *capabilities, uint8_t *max_pasid) { struct iommu_test_hw_info *info = (struct iommu_test_hw_info *)data; struct iommu_hw_info cmd = { .size = sizeof(cmd), .dev_id = device_id, .data_len = data_len, + .in_data_type = data_type, .data_uptr = (uint64_t)data, .out_capabilities = 0, }; int ret; + if (data_type != IOMMU_HW_INFO_TYPE_DEFAULT) + cmd.flags |= IOMMU_HW_INFO_FLAG_INPUT_TYPE; + ret = ioctl(fd, IOMMU_GET_HW_INFO, &cmd); if (ret) return ret; @@ -817,20 +821,23 @@ static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data, return 0; } -#define test_cmd_get_hw_info(device_id, data, data_len) \ - ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, data, \ - data_len, NULL, NULL)) +#define test_cmd_get_hw_info(device_id, data_type, data, data_len) \ + ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, data_type, \ + data, data_len, NULL, NULL)) -#define test_err_get_hw_info(_errno, device_id, data, data_len) \ - EXPECT_ERRNO(_errno, _test_cmd_get_hw_info(self->fd, device_id, data, \ - data_len, NULL, NULL)) +#define test_err_get_hw_info(_errno, device_id, data_type, data, data_len) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_get_hw_info(self->fd, device_id, data_type, \ + data, data_len, NULL, NULL)) -#define test_cmd_get_hw_capabilities(device_id, caps, mask) \ - ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, \ +#define test_cmd_get_hw_capabilities(device_id, caps) \ + ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, \ + IOMMU_HW_INFO_TYPE_DEFAULT, NULL, \ 0, &caps, NULL)) -#define test_cmd_get_hw_info_pasid(device_id, max_pasid) \ - ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, \ +#define test_cmd_get_hw_info_pasid(device_id, max_pasid) \ + ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, \ + IOMMU_HW_INFO_TYPE_DEFAULT, NULL, \ 0, NULL, max_pasid)) static int _test_ioctl_fault_alloc(int fd, __u32 *fault_id, __u32 *fault_fd) -- cgit v1.2.3 From 9f9559f0acc42e98709017951144ef6334dad187 Mon Sep 17 00:00:00 2001 From: Emil Tsalapatis Date: Wed, 9 Jul 2025 15:13:12 -0400 Subject: selftests/bpf: add selftests for bpf_arena_reserve_pages Add selftests for the new bpf_arena_reserve_pages kfunc. Acked-by: Yonghong Song Signed-off-by: Emil Tsalapatis Acked-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20250709191312.29840-3-emil@etsalapatis.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/bpf_arena_common.h | 3 + tools/testing/selftests/bpf/progs/verifier_arena.c | 106 +++++++++++++++++++++ .../selftests/bpf/progs/verifier_arena_large.c | 98 +++++++++++++++++++ 3 files changed, 207 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/bpf_arena_common.h b/tools/testing/selftests/bpf/bpf_arena_common.h index 68a51dcc0669..16f8ce832004 100644 --- a/tools/testing/selftests/bpf/bpf_arena_common.h +++ b/tools/testing/selftests/bpf/bpf_arena_common.h @@ -46,8 +46,11 @@ void __arena* bpf_arena_alloc_pages(void *map, void __arena *addr, __u32 page_cnt, int node_id, __u64 flags) __ksym __weak; +int bpf_arena_reserve_pages(void *map, void __arena *addr, __u32 page_cnt) __ksym __weak; void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym __weak; +#define arena_base(map) ((void __arena *)((struct bpf_arena *)(map))->user_vm_start) + #else /* when compiled as user space code */ #define __arena diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c index 67509c5d3982..7f4827eede3c 100644 --- a/tools/testing/selftests/bpf/progs/verifier_arena.c +++ b/tools/testing/selftests/bpf/progs/verifier_arena.c @@ -3,6 +3,7 @@ #define BPF_NO_KFUNC_PROTOTYPES #include +#include #include #include #include "bpf_misc.h" @@ -114,6 +115,111 @@ int basic_alloc3(void *ctx) return 0; } +SEC("syscall") +__success __retval(0) +int basic_reserve1(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + char __arena *page; + int ret; + + page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); + if (!page) + return 1; + + page += __PAGE_SIZE; + + /* Reserve the second page */ + ret = bpf_arena_reserve_pages(&arena, page, 1); + if (ret) + return 2; + + /* Try to explicitly allocate the reserved page. */ + page = bpf_arena_alloc_pages(&arena, page, 1, NUMA_NO_NODE, 0); + if (page) + return 3; + + /* Try to implicitly allocate the page (since there's only 2 of them). */ + page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); + if (page) + return 4; +#endif + return 0; +} + +SEC("syscall") +__success __retval(0) +int basic_reserve2(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + char __arena *page; + int ret; + + page = arena_base(&arena); + ret = bpf_arena_reserve_pages(&arena, page, 1); + if (ret) + return 1; + + page = bpf_arena_alloc_pages(&arena, page, 1, NUMA_NO_NODE, 0); + if ((u64)page) + return 2; +#endif + return 0; +} + +/* Reserve the same page twice, should return -EBUSY. */ +SEC("syscall") +__success __retval(0) +int reserve_twice(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + char __arena *page; + int ret; + + page = arena_base(&arena); + + ret = bpf_arena_reserve_pages(&arena, page, 1); + if (ret) + return 1; + + ret = bpf_arena_reserve_pages(&arena, page, 1); + if (ret != -EBUSY) + return 2; +#endif + return 0; +} + +/* Try to reserve past the end of the arena. */ +SEC("syscall") +__success __retval(0) +int reserve_invalid_region(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + char __arena *page; + int ret; + + /* Try a NULL pointer. */ + ret = bpf_arena_reserve_pages(&arena, NULL, 3); + if (ret != -EINVAL) + return 1; + + page = arena_base(&arena); + + ret = bpf_arena_reserve_pages(&arena, page, 3); + if (ret != -EINVAL) + return 2; + + ret = bpf_arena_reserve_pages(&arena, page, 4096); + if (ret != -EINVAL) + return 3; + + ret = bpf_arena_reserve_pages(&arena, page, (1ULL << 32) - 1); + if (ret != -EINVAL) + return 4; +#endif + return 0; +} + SEC("iter.s/bpf_map") __success __log_level(2) int iter_maps1(struct bpf_iter__bpf_map *ctx) diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_large.c b/tools/testing/selftests/bpf/progs/verifier_arena_large.c index f94f30cf1bb8..9dbdf123542d 100644 --- a/tools/testing/selftests/bpf/progs/verifier_arena_large.c +++ b/tools/testing/selftests/bpf/progs/verifier_arena_large.c @@ -67,6 +67,104 @@ int big_alloc1(void *ctx) return 0; } +/* Try to access a reserved page. Behavior should be identical with accessing unallocated pages. */ +SEC("syscall") +__success __retval(0) +int access_reserved(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + volatile char __arena *page; + char __arena *base; + const size_t len = 4; + int ret, i; + + /* Get a separate region of the arena. */ + page = base = arena_base(&arena) + 16384 * PAGE_SIZE; + + ret = bpf_arena_reserve_pages(&arena, base, len); + if (ret) + return 1; + + /* Try to dirty reserved memory. */ + for (i = 0; i < len && can_loop; i++) + *page = 0x5a; + + for (i = 0; i < len && can_loop; i++) { + page = (volatile char __arena *)(base + i * PAGE_SIZE); + + /* + * Error out in case either the write went through, + * or the address has random garbage. + */ + if (*page == 0x5a) + return 2 + 2 * i; + + if (*page) + return 2 + 2 * i + 1; + } +#endif + return 0; +} + +/* Try to allocate a region overlapping with a reservation. */ +SEC("syscall") +__success __retval(0) +int request_partially_reserved(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + volatile char __arena *page; + char __arena *base; + int ret; + + /* Add an arbitrary page offset. */ + page = base = arena_base(&arena) + 4096 * __PAGE_SIZE; + + ret = bpf_arena_reserve_pages(&arena, base + 3 * __PAGE_SIZE, 4); + if (ret) + return 1; + + page = bpf_arena_alloc_pages(&arena, base, 5, NUMA_NO_NODE, 0); + if ((u64)page != 0ULL) + return 2; +#endif + return 0; +} + +SEC("syscall") +__success __retval(0) +int free_reserved(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + char __arena *addr; + char __arena *page; + int ret; + + /* Add an arbitrary page offset. */ + addr = arena_base(&arena) + 32768 * __PAGE_SIZE; + + page = bpf_arena_alloc_pages(&arena, addr, 2, NUMA_NO_NODE, 0); + if (!page) + return 1; + + ret = bpf_arena_reserve_pages(&arena, addr + 2 * __PAGE_SIZE, 2); + if (ret) + return 2; + + /* + * Reserved and allocated pages should be interchangeable for + * bpf_arena_free_pages(). Free a reserved and an allocated + * page with a single call. + */ + bpf_arena_free_pages(&arena, addr + __PAGE_SIZE , 2); + + /* The free call above should have succeeded, so this allocation should too. */ + page = bpf_arena_alloc_pages(&arena, addr + __PAGE_SIZE, 2, NUMA_NO_NODE, 0); + if (!page) + return 3; +#endif + return 0; +} + #if defined(__BPF_FEATURE_ADDR_SPACE_CAST) #define PAGE_CNT 100 __u8 __arena * __arena page[PAGE_CNT]; /* occupies the first page */ -- cgit v1.2.3 From d81526a6ebff4ac2358b71d40271c8f95212fac1 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Thu, 10 Jul 2025 20:21:41 +0200 Subject: selftests/bpf: Range analysis test case for JSET This patch adds coverage for the warning detected by syzkaller and fixed in the previous patch. Without the previous patch, this test fails with: verifier bug: REG INVARIANTS VIOLATION (false_reg1): range bounds violation u64=[0x0, 0x0] s64=[0x0, 0x0] u32=[0x1, 0x0] s32=[0x0, 0x0] var_off=(0x0, 0x0)(1) Signed-off-by: Paul Chaignon Acked-by: Yonghong Song Link: https://lore.kernel.org/r/c7893be1170fdbcf64e0200c110cdbd360ce7086.1752171365.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/verifier_bounds.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index 6f986ae5085e..63b533ca4933 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -2,6 +2,7 @@ /* Converted from tools/testing/selftests/bpf/verifier/bounds.c */ #include +#include <../../../include/linux/filter.h> #include #include "bpf_misc.h" @@ -1532,4 +1533,21 @@ __naked void sub32_partial_overflow(void) : __clobber_all); } +SEC("socket") +__description("dead branch on jset, does not result in invariants violation error") +__success __log_level(2) +__retval(0) __flag(BPF_F_TEST_REG_INVARIANTS) +__naked void jset_range_analysis(void) +{ + asm volatile (" \ + call %[bpf_get_netns_cookie]; \ + if r0 == 0 goto l0_%=; \ + if r0 & 0xffffffff goto +0; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_netns_cookie) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From ecdec65ec78d67d3ebd17edc88b88312054abe0d Mon Sep 17 00:00:00 2001 From: William Liu Date: Tue, 8 Jul 2025 16:44:05 +0000 Subject: selftests/tc-testing: Add tests for restrictions on netem duplication Ensure that a duplicating netem cannot exist in a tree with other netems in both qdisc addition and change. This is meant to prevent the soft lockup and OOM loop scenario discussed in [1]. Also adjust a HFSC's re-entrancy test case with netem for this new restriction - KASAN still triggers upon its failure. [1] https://lore.kernel.org/netdev/8DuRWwfqjoRDLDmBMlIfbrsZg9Gx50DHJc1ilxsEBNe2D6NMoigR_eIRIG0LOjMc3r10nUUZtArXx4oZBIdUfZQrwjcQhdinnMis_0G7VEk=@willsroot.io/ Signed-off-by: William Liu Reviewed-by: Savino Dicanosa Acked-by: Jamal Hadi Salim Link: https://patch.msgid.link/20250708164219.875521-1-will@willsroot.io Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/qdiscs.json | 5 +- .../tc-testing/tc-tests/qdiscs/netem.json | 81 ++++++++++++++++++++++ 2 files changed, 83 insertions(+), 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 5c6851e8d311..9504c4d55a8f 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -478,7 +478,6 @@ "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%", "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip dst 10.10.10.1/32 flowid 1:1", "$TC class add dev $DUMMY parent 1:0 classid 1:2 hfsc ls m2 10Mbit", - "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 netem duplicate 100%", "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 2 u32 match ip dst 10.10.10.2/32 flowid 1:2", "ping -c 1 10.10.10.1 -I$DUMMY > /dev/null || true", "$TC filter del dev $DUMMY parent 1:0 protocol ip prio 1", @@ -491,8 +490,8 @@ { "kind": "hfsc", "handle": "1:", - "bytes": 392, - "packets": 4 + "bytes": 294, + "packets": 3 } ], "matchCount": "1", diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json index 3c4444961488..718d2df2aafa 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json @@ -336,5 +336,86 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "d34d", + "name": "NETEM test qdisc duplication restriction in qdisc tree in netem_change root", + "category": ["qdisc", "netem"], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY root handle 1: netem limit 1", + "$TC qdisc add dev $DUMMY parent 1: handle 2: netem limit 1" + ], + "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: netem duplicate 50%", + "expExitCode": "2", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "qdisc netem", + "matchCount": "2", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root" + ] + }, + { + "id": "b33f", + "name": "NETEM test qdisc duplication restriction in qdisc tree in netem_change non-root", + "category": ["qdisc", "netem"], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY root handle 1: netem limit 1", + "$TC qdisc add dev $DUMMY parent 1: handle 2: netem limit 1" + ], + "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 2: netem duplicate 50%", + "expExitCode": "2", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "qdisc netem", + "matchCount": "2", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root" + ] + }, + { + "id": "cafe", + "name": "NETEM test qdisc duplication restriction in qdisc tree", + "category": ["qdisc", "netem"], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY root handle 1: netem limit 1 duplicate 100%" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent 1: handle 2: netem duplicate 100%", + "expExitCode": "2", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "qdisc netem", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root" + ] + }, + { + "id": "1337", + "name": "NETEM test qdisc duplication restriction in qdisc tree across branches", + "category": ["qdisc", "netem"], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY parent root handle 1:0 hfsc", + "$TC class add dev $DUMMY parent 1:0 classid 1:1 hfsc rt m2 10Mbit", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem", + "$TC class add dev $DUMMY parent 1:0 classid 1:2 hfsc rt m2 10Mbit" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 netem duplicate 100%", + "expExitCode": "2", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "qdisc netem", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root" + ] } ] -- cgit v1.2.3 From 650fe2a9dd290078b782fb2e13dd4f5104a5de8b Mon Sep 17 00:00:00 2001 From: Vishwanath Seshagiri Date: Thu, 10 Jul 2025 09:53:37 -0700 Subject: selftests: flip local/remote endpoints in iou-zcrx.py The iou-zcrx selftest currently runs the server on the remote host and the client on the local host. This commit flips the endpoints such that server runs on localhost and client on remote. This change brings the iou-zcrx selftest in convention with other selftests. Drive-by fix for a missing import exception that happens when the network interface has less than 2 combined channels. Test plan: ran iou-zcrx.py selftest between 2 physical machines Signed-off-by: Vishwanath Seshagiri Link: https://patch.msgid.link/20250710165337.614159-1-vishs@meta.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/iou-zcrx.py | 98 +++++++++++----------- 1 file changed, 49 insertions(+), 49 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py index 9c03fd777f3d..712c806508b5 100755 --- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py @@ -3,37 +3,37 @@ import re from os import path -from lib.py import ksft_run, ksft_exit +from lib.py import ksft_run, ksft_exit, KsftSkipEx from lib.py import NetDrvEpEnv from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen def _get_current_settings(cfg): - output = ethtool(f"-g {cfg.ifname}", json=True, host=cfg.remote)[0] + output = ethtool(f"-g {cfg.ifname}", json=True)[0] return (output['rx'], output['hds-thresh']) def _get_combined_channels(cfg): - output = ethtool(f"-l {cfg.ifname}", host=cfg.remote).stdout + output = ethtool(f"-l {cfg.ifname}").stdout values = re.findall(r'Combined:\s+(\d+)', output) return int(values[1]) def _create_rss_ctx(cfg, chan): - output = ethtool(f"-X {cfg.ifname} context new start {chan} equal 1", host=cfg.remote).stdout + output = ethtool(f"-X {cfg.ifname} context new start {chan} equal 1").stdout values = re.search(r'New RSS context is (\d+)', output).group(1) ctx_id = int(values) - return (ctx_id, defer(ethtool, f"-X {cfg.ifname} delete context {ctx_id}", host=cfg.remote)) + return (ctx_id, defer(ethtool, f"-X {cfg.ifname} delete context {ctx_id}")) def _set_flow_rule(cfg, port, chan): - output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} action {chan}", host=cfg.remote).stdout + output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} action {chan}").stdout values = re.search(r'ID (\d+)', output).group(1) return int(values) def _set_flow_rule_rss(cfg, port, ctx_id): - output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} context {ctx_id}", host=cfg.remote).stdout + output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} context {ctx_id}").stdout values = re.search(r'ID (\d+)', output).group(1) return int(values) @@ -47,26 +47,26 @@ def test_zcrx(cfg) -> None: (rx_ring, hds_thresh) = _get_current_settings(cfg) port = rand_port() - ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) - defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) + ethtool(f"-G {cfg.ifname} tcp-data-split on") + defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto") - ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote) - defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote) + ethtool(f"-G {cfg.ifname} hds-thresh 0") + defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}") - ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) - defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) + ethtool(f"-G {cfg.ifname} rx 64") + defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}") - ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) - defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote) + ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}") + defer(ethtool, f"-X {cfg.ifname} default") flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1) - defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") - rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}" - tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 12840" - with bkg(rx_cmd, host=cfg.remote, exit_wait=True): - wait_port_listen(port, proto="tcp", host=cfg.remote) - cmd(tx_cmd) + rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}" + tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840" + with bkg(rx_cmd, exit_wait=True): + wait_port_listen(port, proto="tcp") + cmd(tx_cmd, host=cfg.remote) def test_zcrx_oneshot(cfg) -> None: @@ -78,26 +78,26 @@ def test_zcrx_oneshot(cfg) -> None: (rx_ring, hds_thresh) = _get_current_settings(cfg) port = rand_port() - ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) - defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) + ethtool(f"-G {cfg.ifname} tcp-data-split on") + defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto") - ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote) - defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote) + ethtool(f"-G {cfg.ifname} hds-thresh 0") + defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}") - ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) - defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) + ethtool(f"-G {cfg.ifname} rx 64") + defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}") - ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) - defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote) + ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}") + defer(ethtool, f"-X {cfg.ifname} default") flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1) - defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") - rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -o 4" - tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 4096 -z 16384" - with bkg(rx_cmd, host=cfg.remote, exit_wait=True): - wait_port_listen(port, proto="tcp", host=cfg.remote) - cmd(tx_cmd) + rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -o 4" + tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 4096 -z 16384" + with bkg(rx_cmd, exit_wait=True): + wait_port_listen(port, proto="tcp") + cmd(tx_cmd, host=cfg.remote) def test_zcrx_rss(cfg) -> None: @@ -109,27 +109,27 @@ def test_zcrx_rss(cfg) -> None: (rx_ring, hds_thresh) = _get_current_settings(cfg) port = rand_port() - ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) - defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) + ethtool(f"-G {cfg.ifname} tcp-data-split on") + defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto") - ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote) - defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote) + ethtool(f"-G {cfg.ifname} hds-thresh 0") + defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}") - ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) - defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) + ethtool(f"-G {cfg.ifname} rx 64") + defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}") - ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) - defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote) + ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}") + defer(ethtool, f"-X {cfg.ifname} default") (ctx_id, delete_ctx) = _create_rss_ctx(cfg, combined_chans - 1) flow_rule_id = _set_flow_rule_rss(cfg, port, ctx_id) - defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") - rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}" - tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 12840" - with bkg(rx_cmd, host=cfg.remote, exit_wait=True): - wait_port_listen(port, proto="tcp", host=cfg.remote) - cmd(tx_cmd) + rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}" + tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840" + with bkg(rx_cmd, exit_wait=True): + wait_port_listen(port, proto="tcp") + cmd(tx_cmd, host=cfg.remote) def main() -> None: -- cgit v1.2.3 From 963c94c95a3161487b0e3d9f75848b3c161e9def Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Thu, 10 Jul 2025 13:18:34 +0200 Subject: selftests: net: add netdev-l2addr.sh for testing L2 address functionality MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new test script to the network selftests which tests getting and setting of layer 2 addresses through netlink, including the newly added support for setting a permaddr on netdevsim devices. Reviewed-by: Simon Horman Signed-off-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20250710-netdevsim-perm_addr-v4-2-c9db2fecf3bf@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/lib.sh | 23 +++++++++++ tools/testing/selftests/net/netdev-l2addr.sh | 59 ++++++++++++++++++++++++++++ 3 files changed, 83 insertions(+) create mode 100755 tools/testing/selftests/net/netdev-l2addr.sh (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 543776596529..66a3ef221ad7 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -63,6 +63,7 @@ TEST_PROGS += ip_local_port_range.sh TEST_PROGS += rps_default_mask.sh TEST_PROGS += big_tcp.sh TEST_PROGS += netns-sysctl.sh +TEST_PROGS += netdev-l2addr.sh TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh xfrm_policy_add_speed.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 35c03e9feebc..c7add0dc4c60 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -240,6 +240,29 @@ create_netdevsim() { echo nsim$id } +create_netdevsim_port() { + local nsim_id="$1" + local ns="$2" + local port_id="$3" + local perm_addr="$4" + local orig_dev + local new_dev + local nsim_path + + nsim_path="/sys/bus/netdevsim/devices/netdevsim$nsim_id" + + echo "$port_id $perm_addr" | ip netns exec "$ns" tee "$nsim_path"/new_port > /dev/null || return 1 + + orig_dev=$(ip netns exec "$ns" find "$nsim_path"/net/ -maxdepth 1 -name 'e*' | tail -n 1) + orig_dev=$(basename "$orig_dev") + new_dev="nsim${nsim_id}p$port_id" + + ip -netns "$ns" link set dev "$orig_dev" name "$new_dev" + ip -netns "$ns" link set dev "$new_dev" up + + echo "$new_dev" +} + # Remove netdevsim with given id. cleanup_netdevsim() { local id="$1" diff --git a/tools/testing/selftests/net/netdev-l2addr.sh b/tools/testing/selftests/net/netdev-l2addr.sh new file mode 100755 index 000000000000..18509da293e5 --- /dev/null +++ b/tools/testing/selftests/net/netdev-l2addr.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh +set -o pipefail + +NSIM_ADDR=2025 +TEST_ADDR="d0:be:d0:be:d0:00" + +RET_CODE=0 + +cleanup() { + cleanup_netdevsim "$NSIM_ADDR" + cleanup_ns "$NS" +} + +trap cleanup EXIT + +fail() { + echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2 + RET_CODE=1 +} + +get_addr() +{ + local type="$1" + local dev="$2" + local ns="$3" + + ip -j -n "$ns" link show dev "$dev" | jq -er ".[0].$type" +} + +setup_ns NS + +nsim=$(create_netdevsim $NSIM_ADDR "$NS") + +get_addr address "$nsim" "$NS" >/dev/null || fail "Couldn't get ether addr" +get_addr broadcast "$nsim" "$NS" >/dev/null || fail "Couldn't get brd addr" +get_addr permaddr "$nsim" "$NS" >/dev/null && fail "Found perm_addr without setting it" + +ip -n "$NS" link set dev "$nsim" address "$TEST_ADDR" +ip -n "$NS" link set dev "$nsim" brd "$TEST_ADDR" + +[[ "$(get_addr address "$nsim" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't set ether addr" +[[ "$(get_addr broadcast "$nsim" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't set brd addr" + +if create_netdevsim_port "$NSIM_ADDR" "$NS" 2 "FF:FF:FF:FF:FF:FF" 2>/dev/null; then + fail "Created netdevsim with broadcast permaddr" +fi + +nsim_port=$(create_netdevsim_port "$NSIM_ADDR" "$NS" 2 "$TEST_ADDR") + +get_addr address "$nsim_port" "$NS" >/dev/null || fail "Couldn't get ether addr" +get_addr broadcast "$nsim_port" "$NS" >/dev/null || fail "Couldn't get brd addr" +[[ "$(get_addr permaddr "$nsim_port" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't get permaddr" + +cleanup_netdevsim "$NSIM_ADDR" "$NS" + +exit $RET_CODE -- cgit v1.2.3 From f0600fe94986f70f433f4a1a0664658a079b2feb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Jul 2025 15:56:41 +0000 Subject: selftests/net: packetdrill: add --mss option to three tests Three tests are cooking GSO packets but do not provide gso_size information to the kernel, triggering this message: TCP: tun0: Driver has suspect GRO implementation, TCP performance may be compromised. Add --mss option to avoid this warning. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250710155641.3028726-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt | 2 ++ tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt | 3 +++ tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt | 3 +++ 3 files changed, 8 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt index 914eabab367a..657e42ca65b5 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 // Test for blocking read. + --tolerance_usecs=10000 +--mss=1000 `./defaults.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt index df49c67645ac..e13f0eee9795 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt @@ -1,5 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 // Test TCP_INQ and TCP_CM_INQ on the client side. + +--mss=1000 + `./defaults.sh ` diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt index 04a5e2590c62..14dd5f813d50 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt @@ -1,5 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 // Test TCP_INQ and TCP_CM_INQ on the server side. + +--mss=1000 + `./defaults.sh ` -- cgit v1.2.3 From c7d7713e36a6ab4c42e40c952d5ba7a51b1091b0 Mon Sep 17 00:00:00 2001 From: Sebastian Chlad Date: Wed, 2 Jul 2025 15:23:36 +0200 Subject: selftests: cgroup: Allow longer timeout for kmem_dead_cgroups cleanup The test_kmem_dead_cgroups test currently assumes that RCU and memory reclaim will complete within 5 seconds. In some environments this timeout may be insufficient, leading to spurious test failures. This patch introduces max_time set to 20 which is then used in the test. After 5th sec the debug message is printed to indicate the cleanup is still ongoing. In the system under test with 16 CPUs the original test was failing most of the time and the cleanup time took usually approx. 6sec. Further tests were conducted with and without do_rcu_barrier and the results (respectively) are as follow: quantiles 0 0.25 0.5 0.75 1 1 2 3 8 20 (mean = 4.7667) 3 5 8 8 20 (mean = 7.6667) Acked-by: Michal Koutny Signed-off-by: Sebastian Chlad Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_kmem.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c index 96693d8772be..63b3c9aad399 100644 --- a/tools/testing/selftests/cgroup/test_kmem.c +++ b/tools/testing/selftests/cgroup/test_kmem.c @@ -308,6 +308,7 @@ static int test_kmem_dead_cgroups(const char *root) char *parent; long dead; int i; + int max_time = 20; parent = cg_name(root, "kmem_dead_cgroups_test"); if (!parent) @@ -322,7 +323,7 @@ static int test_kmem_dead_cgroups(const char *root) if (cg_run_in_subcgroups(parent, alloc_dcache, (void *)100, 30)) goto cleanup; - for (i = 0; i < 5; i++) { + for (i = 0; i < max_time; i++) { dead = cg_read_key_long(parent, "cgroup.stat", "nr_dying_descendants "); if (dead == 0) { @@ -334,6 +335,8 @@ static int test_kmem_dead_cgroups(const char *root) * let's wait a bit and repeat. */ sleep(1); + if (i > 5) + printf("Waiting time longer than 5s; wait: %ds (dead: %ld)\n", i, dead); } cleanup: -- cgit v1.2.3 From e07caae73557d144a9237fb977dfee08befa015f Mon Sep 17 00:00:00 2001 From: Sebastian Chlad Date: Wed, 2 Jul 2025 18:40:10 +0200 Subject: selftests: cgroup: Fix missing newline in test_zswap_writeback_one Fixes malformed test output due to missing newline Signed-off-by: Sebastian Chlad Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_zswap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c index 40de679248b8..e1f578ca2841 100644 --- a/tools/testing/selftests/cgroup/test_zswap.c +++ b/tools/testing/selftests/cgroup/test_zswap.c @@ -338,7 +338,7 @@ static int test_zswap_writeback_one(const char *cgroup, bool wb) return -1; if (wb != !!zswpwb_after) { - ksft_print_msg("zswpwb_after is %ld while wb is %s", + ksft_print_msg("zswpwb_after is %ld while wb is %s\n", zswpwb_after, wb ? "enabled" : "disabled"); return -1; } -- cgit v1.2.3 From 3a1d22bd85381c4e358fc3340e776c3a3223a1d0 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 10 Jul 2025 16:01:36 +0200 Subject: selftests/hid: add a test case for the recent syzbot underflow Syzbot found a buffer underflow in __hid_request(). Add a related test case for it. It's not perfect, but it allows to catch a corner case when a report descriptor is crafted so that it has a size of 0. Link: https://patch.msgid.link/20250710-report-size-null-v2-4-ccf922b7c4e5@kernel.org Signed-off-by: Benjamin Tissoires --- tools/testing/selftests/hid/tests/test_mouse.py | 70 +++++++++++++++++++++++++ 1 file changed, 70 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/hid/tests/test_mouse.py b/tools/testing/selftests/hid/tests/test_mouse.py index 66daf7e5975c..eb4e15a0e53b 100644 --- a/tools/testing/selftests/hid/tests/test_mouse.py +++ b/tools/testing/selftests/hid/tests/test_mouse.py @@ -439,6 +439,68 @@ class BadResolutionMultiplierMouse(ResolutionMultiplierMouse): return 32 # EPIPE +class BadReportDescriptorMouse(BaseMouse): + """ + This "device" was one autogenerated by syzbot. There are a lot of issues in + it, and the most problematic is that it declares features that have no + size. + + This leads to report->size being set to 0 and can mess up with usbhid + internals. Fortunately, uhid merely passes the incoming buffer, without + touching it so a buffer of size 0 will be translated to [] without + triggering a kernel oops. + + Because the report descriptor is wrong, no input are created, and we need + to tweak a little bit the parameters to make it look correct. + """ + + # fmt: off + report_descriptor = [ + 0x96, 0x01, 0x00, # Report Count (1) 0 + 0x06, 0x01, 0x00, # Usage Page (Generic Desktop) 3 + # 0x03, 0x00, 0x00, 0x00, 0x00, # Ignored by the kernel somehow + 0x2a, 0x90, 0xa0, # Usage Maximum (41104) 6 + 0x27, 0x00, 0x00, 0x00, 0x00, # Logical Maximum (0) 9 + 0xb3, 0x81, 0x3e, 0x25, 0x03, # Feature (Cnst,Arr,Abs,Vol) 14 + 0x1b, 0xdd, 0xe8, 0x40, 0x50, # Usage Minimum (1346431197) 19 + 0x3b, 0x5d, 0x8c, 0x3d, 0xda, # Designator Index 24 + ] + # fmt: on + + def __init__( + self, rdesc=report_descriptor, name=None, input_info=(3, 0x045E, 0x07DA) + ): + super().__init__(rdesc, name, input_info) + self.high_resolution_report_called = False + + def get_evdev(self, application=None): + assert self._input_nodes is None + return ( + "Ok" # should be a list or None, but both would fail, so abusing the system + ) + + def next_sync_events(self, application=None): + # there are no evdev nodes, so no events + return [] + + def is_ready(self): + # we wait for the SET_REPORT command to come + return self.high_resolution_report_called + + def set_report(self, req, rnum, rtype, data): + if rtype != self.UHID_FEATURE_REPORT: + raise InvalidHIDCommunication(f"Unexpected report type: {rtype}") + if rnum != 0x0: + raise InvalidHIDCommunication(f"Unexpected report number: {rnum}") + + if len(data) != 1: + raise InvalidHIDCommunication(f"Unexpected data: {data}, expected '[0]'") + + self.high_resolution_report_called = True + + return 0 + + class ResolutionMultiplierHWheelMouse(TwoWheelMouse): # fmt: off report_descriptor = [ @@ -975,3 +1037,11 @@ class TestMiMouse(TestWheelMouse): # assert below print out the real error pass assert remaining == [] + + +class TestBadReportDescriptorMouse(base.BaseTestCase.TestUhid): + def create_device(self): + return BadReportDescriptorMouse() + + def assertName(self, uhdev): + pass -- cgit v1.2.3 From b9e50363178a40c76bebaf2f00faa2b0b6baf8d1 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 12 Jul 2025 11:00:56 +0200 Subject: selftests/nolibc: add x32 test configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Nolibc supports the x32 ABI on x86. Add a testcase to make sure the support stays functional. QEMU user does not have support for x32, so skip the test there. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250712-nolibc-x32-v1-2-6d81cb798710@weissschuh.net --- tools/testing/selftests/nolibc/Makefile.nolibc | 12 ++++++++++++ tools/testing/selftests/nolibc/run-tests.sh | 7 ++++++- 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/nolibc/Makefile.nolibc b/tools/testing/selftests/nolibc/Makefile.nolibc index 51ba853dd97e..0fb759ba992e 100644 --- a/tools/testing/selftests/nolibc/Makefile.nolibc +++ b/tools/testing/selftests/nolibc/Makefile.nolibc @@ -47,6 +47,7 @@ XARCH_riscv = riscv64 XARCH = $(or $(XARCH_$(ARCH)),$(ARCH)) # map from user input variants to their kernel supported architectures +ARCH_x32 = x86 ARCH_armthumb = arm ARCH_ppc = powerpc ARCH_ppc64 = powerpc @@ -68,6 +69,7 @@ ARCH := $(or $(ARCH_$(XARCH)),$(XARCH)) # kernel image names by architecture IMAGE_i386 = arch/x86/boot/bzImage IMAGE_x86_64 = arch/x86/boot/bzImage +IMAGE_x32 = arch/x86/boot/bzImage IMAGE_x86 = arch/x86/boot/bzImage IMAGE_arm64 = arch/arm64/boot/Image IMAGE_arm = arch/arm/boot/zImage @@ -97,6 +99,7 @@ IMAGE_NAME = $(notdir $(IMAGE)) # default kernel configurations that appear to be usable DEFCONFIG_i386 = defconfig DEFCONFIG_x86_64 = defconfig +DEFCONFIG_x32 = defconfig DEFCONFIG_x86 = defconfig DEFCONFIG_arm64 = defconfig DEFCONFIG_arm = multi_v7_defconfig @@ -122,6 +125,7 @@ DEFCONFIG_m68k = virt_defconfig DEFCONFIG_sh4 = rts7751r2dplus_defconfig DEFCONFIG = $(DEFCONFIG_$(XARCH)) +EXTRACONFIG_x32 = -e CONFIG_X86_X32_ABI EXTRACONFIG_arm = -e CONFIG_NAMESPACES EXTRACONFIG_armthumb = -e CONFIG_NAMESPACES EXTRACONFIG_m68k = -e CONFIG_BLK_DEV_INITRD @@ -134,6 +138,7 @@ TEST = # QEMU_ARCH: arch names used by qemu QEMU_ARCH_i386 = i386 QEMU_ARCH_x86_64 = x86_64 +QEMU_ARCH_x32 = x86_64 QEMU_ARCH_x86 = x86_64 QEMU_ARCH_arm64 = aarch64 QEMU_ARCH_arm = arm @@ -174,6 +179,7 @@ endif # QEMU_ARGS : some arch-specific args to pass to qemu QEMU_ARGS_i386 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_x86_64 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_x32 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_x86 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_arm64 = -M virt -cpu cortex-a53 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_arm = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" @@ -210,6 +216,7 @@ Q=@ endif CFLAGS_i386 = $(call cc-option,-m32) +CFLAGS_x32 = -mx32 CFLAGS_arm = -marm CFLAGS_armthumb = -mthumb -march=armv6t2 CFLAGS_ppc = -m32 -mbig-endian -mno-vsx $(call cc-option,-mmultiple) @@ -236,6 +243,11 @@ LDFLAGS := LIBGCC := -lgcc +ifeq ($(ARCH),x86) +# Not needed on x86, probably not present for x32 +LIBGCC := +endif + ifneq ($(LLVM),) # Not needed for clang LIBGCC := diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh index 43eb30be316a..e8af1fb505cf 100755 --- a/tools/testing/selftests/nolibc/run-tests.sh +++ b/tools/testing/selftests/nolibc/run-tests.sh @@ -18,7 +18,7 @@ test_mode=system werror=1 llvm= all_archs=( - i386 x86_64 + i386 x86_64 x32 arm64 arm armthumb mips32le mips32be mipsn32le mipsn32be mips64le mips64be ppc ppc64 ppc64le @@ -115,6 +115,7 @@ crosstool_arch() { mips*) echo mips;; s390*) echo s390;; sparc*) echo sparc64;; + x32*) echo x86_64;; *) echo "$1";; esac } @@ -192,6 +193,10 @@ test_arch() { echo "Unsupported configuration" return fi + if [ "$arch" = "x32" ] && [ "$test_mode" = "user" ]; then + echo "Unsupported configuration" + return + fi mkdir -p "$build_dir" swallow_output "${MAKE[@]}" defconfig -- cgit v1.2.3 From f3e8e1e51362680f221f98626924098c8b3c6634 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sat, 28 Jun 2025 09:04:23 -0700 Subject: selftests/damon: add drgn script for extracting damon status Patch series "selftests/damon: add python and drgn based DAMON sysfs functionality tests". DAMON sysfs interface is the bridge between the user space and the kernel space for DAMON parameters. There is no good and simple test to see if the parameters are set as expected. Existing DAMON selftests therefore test end-to-end features. For example, damos_quota_goal.py runs a DAMOS scheme with quota goal set against a test program running an artificial access pattern, and see if the result is as expected. Such tests cover only a few part of DAMON. Adding more tests is also complicated. Finally, the reliability of the test itself on different systems is bad. 'drgn' is a tool that can extract kernel internal data structures like DAMON parameters. Add a test that passes specific DAMON parameters via DAMON sysfs reusing _damon_sysfs.py, extract resulting DAMON parameters via 'drgn', and compare those. Note that this test is not adding exhaustive tests of all DAMON parameters and input combinations but very basic things. Advancing the test infrastructure and adding more tests are future works. This patch (of 6): 'drgn' is a useful tool for extracting kernel internal data structures such as DAMON's parameter and running status. Add a 'drgn' script that extracts such DAMON internal data at runtime, for using it as a tool for seeing if a test input has made expected results in the kernel. The script saves or prints out the DAMON internal data as a json file or string. This is for making use of it not very depends on 'drgn'. If 'drgn' is not available on a test setup and we find alternative tools for doing that, the json-based tests can be updated to use an alternative tool in future. Note that the script is tested with 'drgn v0.0.22'. Link: https://lkml.kernel.org/r/20250628160428.53115-1-sj@kernel.org Link: https://lkml.kernel.org/r/20250628160428.53115-2-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- .../selftests/damon/drgn_dump_damon_status.py | 161 +++++++++++++++++++++ 1 file changed, 161 insertions(+) create mode 100755 tools/testing/selftests/damon/drgn_dump_damon_status.py (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/drgn_dump_damon_status.py b/tools/testing/selftests/damon/drgn_dump_damon_status.py new file mode 100755 index 000000000000..333a0d0c4bff --- /dev/null +++ b/tools/testing/selftests/damon/drgn_dump_damon_status.py @@ -0,0 +1,161 @@ +#!/usr/bin/env drgn +# SPDX-License-Identifier: GPL-2.0 + +''' +Read DAMON context data and dump as a json string. +''' +import drgn +from drgn import FaultError, NULL, Object, cast, container_of, execscript, offsetof, reinterpret, sizeof +from drgn.helpers.common import * +from drgn.helpers.linux import * + +import json +import sys + +if "prog" not in globals(): + try: + prog = drgn.get_default_prog() + except drgn.NoDefaultProgramError: + prog = drgn.program_from_kernel() + drgn.set_default_prog(prog) + +def to_dict(object, attr_name_converter): + d = {} + for attr_name, converter in attr_name_converter: + d[attr_name] = converter(getattr(object, attr_name)) + return d + +def intervals_goal_to_dict(goal): + return to_dict(goal, [ + ['access_bp', int], + ['aggrs', int], + ['min_sample_us', int], + ['max_sample_us', int], + ]) + +def attrs_to_dict(attrs): + return to_dict(attrs, [ + ['sample_interval', int], + ['aggr_interval', int], + ['ops_update_interval', int], + ['intervals_goal', intervals_goal_to_dict], + ['min_nr_regions', int], + ['max_nr_regions', int], + ]) + +def addr_range_to_dict(addr_range): + return to_dict(addr_range, [ + ['start', int], + ['end', int], + ]) + +def region_to_dict(region): + return to_dict(region, [ + ['ar', addr_range_to_dict], + ['sampling_addr', int], + ['nr_accesses', int], + ['nr_accesses_bp', int], + ['age', int], + ]) + +def regions_to_list(regions): + return [region_to_dict(r) + for r in list_for_each_entry( + 'struct damon_region', regions.address_of_(), 'list')] + +def target_to_dict(target): + return to_dict(target, [ + ['pid', int], + ['nr_regions', int], + ['regions_list', regions_to_list], + ]) + +def targets_to_list(targets): + return [target_to_dict(t) + for t in list_for_each_entry( + 'struct damon_target', targets.address_of_(), 'list')] + +def damos_access_pattern_to_dict(pattern): + return to_dict(pattern, [ + ['min_sz_region', int], + ['max_sz_region', int], + ['min_nr_accesses', int], + ['max_nr_accesses', int], + ['min_age_region', int], + ['max_age_region', int], + ]) + +def damos_quota_goal_to_dict(goal): + return to_dict(goal, [ + ['metric', int], + ['target_value', int], + ['current_value', int], + ['last_psi_total', int], + ['nid', int], + ]) + +def damos_quota_goals_to_list(goals): + return [damos_quota_goal_to_dict(g) + for g in list_for_each_entry( + 'struct damos_quota_goal', goals.address_of_(), 'list')] + +def damos_quota_to_dict(quota): + return to_dict(quota, [ + ['reset_interval', int], + ['ms', int], ['sz', int], + ['goals', damos_quota_goals_to_list], + ['esz', int], + ['weight_sz', int], + ['weight_nr_accesses', int], + ['weight_age', int], + ]) + +def damos_watermarks_to_dict(watermarks): + return to_dict(watermarks, [ + ['metric', int], + ['interval', int], + ['high', int], ['mid', int], ['low', int], + ]) + +def scheme_to_dict(scheme): + return to_dict(scheme, [ + ['pattern', damos_access_pattern_to_dict], + ['action', int], + ['apply_interval_us', int], + ['quota', damos_quota_to_dict], + ['wmarks', damos_watermarks_to_dict], + ['target_nid', int], + ]) + +def schemes_to_list(schemes): + return [scheme_to_dict(s) + for s in list_for_each_entry( + 'struct damos', schemes.address_of_(), 'list')] + +def damon_ctx_to_dict(ctx): + return to_dict(ctx, [ + ['attrs', attrs_to_dict], + ['adaptive_targets', targets_to_list], + ['schemes', schemes_to_list], + ]) + +def main(): + if len(sys.argv) < 3: + print('Usage: %s ' % sys.argv[0]) + exit(1) + + pid = int(sys.argv[1]) + file_to_store = sys.argv[2] + + kthread_data = cast('struct kthread *', + find_task(prog, pid).worker_private).data + ctx = cast('struct damon_ctx *', kthread_data) + status = {'contexts': [damon_ctx_to_dict(ctx)]} + if file_to_store == 'stdout': + print(json.dumps(status, indent=4)) + else: + with open(file_to_store, 'w') as f: + json.dump(status, f, indent=4) + +if __name__ == '__main__': + main() -- cgit v1.2.3 From e227472ebf00b6b5187915826c41258c472edb0a Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sat, 28 Jun 2025 09:04:24 -0700 Subject: selftests/damon/_damon_sysfs: set Kdamond.pid in start() _damon_sysfs.py is a Python module for reading and writing DAMON sysfs for testing. It is not reading resulting kdamond pids. Read and update those when starting kdamonds. Link: https://lkml.kernel.org/r/20250628160428.53115-3-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/_damon_sysfs.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py index 5b1cb6b3ce4e..f587e117472e 100644 --- a/tools/testing/selftests/damon/_damon_sysfs.py +++ b/tools/testing/selftests/damon/_damon_sysfs.py @@ -408,6 +408,9 @@ class Kdamond: if err is not None: return err err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'on') + if err is not None: + return err + self.pid, err = read_file(os.path.join(self.sysfs_dir(), 'pid')) return err def stop(self): -- cgit v1.2.3 From 4ece01897627ddeefcede4ac709cd99763994dc4 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sat, 28 Jun 2025 09:04:25 -0700 Subject: selftests/damon: add python and drgn-based DAMON sysfs test Add a python-written DAMON sysfs functionality selftest. It sets DAMON parameters using Python module _damon_sysfs, reads updated kernel internal DAMON status and parameters using a 'drgn' script, namely drgn_dump_damon_status.py, and compare if the resulted DAMON internal status is as expected. The test is very minimum at the moment. Link: https://lkml.kernel.org/r/20250628160428.53115-4-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/Makefile | 1 + tools/testing/selftests/damon/sysfs.py | 42 ++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100755 tools/testing/selftests/damon/sysfs.py (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index e888455e3cf8..5b230deb19e8 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -7,6 +7,7 @@ TEST_FILES = _damon_sysfs.py # functionality tests TEST_PROGS += sysfs.sh +TEST_PROGS += sysfs.py TEST_PROGS += sysfs_update_schemes_tried_regions_wss_estimation.py TEST_PROGS += damos_quota.py damos_quota_goal.py damos_apply_interval.py TEST_PROGS += damos_tried_regions.py damon_nr_regions.py diff --git a/tools/testing/selftests/damon/sysfs.py b/tools/testing/selftests/damon/sysfs.py new file mode 100755 index 000000000000..4ff99db0d247 --- /dev/null +++ b/tools/testing/selftests/damon/sysfs.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import json +import os +import subprocess + +import _damon_sysfs + +def dump_damon_status_dict(pid): + file_dir = os.path.dirname(os.path.abspath(__file__)) + dump_script = os.path.join(file_dir, 'drgn_dump_damon_status.py') + rc = subprocess.call(['drgn', dump_script, pid, 'damon_dump_output'], + stderr=subprocess.DEVNULL) + if rc != 0: + return None, 'drgn fail' + try: + with open('damon_dump_output', 'r') as f: + return json.load(f), None + except Exception as e: + return None, 'json.load fail (%s)' % e + +def main(): + kdamonds = _damon_sysfs.Kdamonds( + [_damon_sysfs.Kdamond(contexts=[_damon_sysfs.DamonCtx()])]) + err = kdamonds.start() + if err is not None: + print('kdamond start failed: %s' % err) + exit(1) + + status, err = dump_damon_status_dict(kdamonds.kdamonds[0].pid) + if err is not None: + print(err) + exit(1) + + if len(status['contexts']) != 1: + print('number of contexts: %d' % len(status['contexts'])) + exit(1) + kdamonds.stop() + +if __name__ == '__main__': + main() -- cgit v1.2.3 From ae3ab07e0d0488925008c19f03ba02d7818c85af Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sat, 28 Jun 2025 09:04:26 -0700 Subject: selftests/damon/sysfs.py: test monitoring attribute parameters Add DAMON sysfs interface functionality tests for DAMON monitoring attribute parameters, including intervals, intervals tuning goals, and min/max number of regions. Link: https://lkml.kernel.org/r/20250628160428.53115-5-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/sysfs.py | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/sysfs.py b/tools/testing/selftests/damon/sysfs.py index 4ff99db0d247..a721901a880d 100755 --- a/tools/testing/selftests/damon/sysfs.py +++ b/tools/testing/selftests/damon/sysfs.py @@ -20,6 +20,11 @@ def dump_damon_status_dict(pid): except Exception as e: return None, 'json.load fail (%s)' % e +def fail(expectation, status): + print('unexpected %s' % expectation) + print(json.dumps(status, indent=4)) + exit(1) + def main(): kdamonds = _damon_sysfs.Kdamonds( [_damon_sysfs.Kdamond(contexts=[_damon_sysfs.DamonCtx()])]) @@ -34,8 +39,33 @@ def main(): exit(1) if len(status['contexts']) != 1: - print('number of contexts: %d' % len(status['contexts'])) - exit(1) + fail('number of contexts', status) + + ctx = status['contexts'][0] + attrs = ctx['attrs'] + if attrs['sample_interval'] != 5000: + fail('sample interval', status) + if attrs['aggr_interval'] != 100000: + fail('aggr interval', status) + if attrs['ops_update_interval'] != 1000000: + fail('ops updte interval', status) + + if attrs['intervals_goal'] != { + 'access_bp': 0, 'aggrs': 0, + 'min_sample_us': 0, 'max_sample_us': 0}: + fail('intervals goal') + + if attrs['min_nr_regions'] != 10: + fail('min_nr_regions') + if attrs['max_nr_regions'] != 1000: + fail('max_nr_regions') + + if ctx['adaptive_targets'] != []: + fail('adaptive_targets') + + if ctx['schemes'] != []: + fail('schemes') + kdamonds.stop() if __name__ == '__main__': -- cgit v1.2.3 From 7e6bcf354f3ea5cc409a7210a4b3834585e61954 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sat, 28 Jun 2025 09:04:27 -0700 Subject: selftests/damon/sysfs.py: test adaptive targets parameter Add DAMON sysfs interface functionality tests for setup of basic adaptive targets parameters. Link: https://lkml.kernel.org/r/20250628160428.53115-6-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/sysfs.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/sysfs.py b/tools/testing/selftests/damon/sysfs.py index a721901a880d..3b085268f342 100755 --- a/tools/testing/selftests/damon/sysfs.py +++ b/tools/testing/selftests/damon/sysfs.py @@ -27,7 +27,9 @@ def fail(expectation, status): def main(): kdamonds = _damon_sysfs.Kdamonds( - [_damon_sysfs.Kdamond(contexts=[_damon_sysfs.DamonCtx()])]) + [_damon_sysfs.Kdamond( + contexts=[_damon_sysfs.DamonCtx( + targets=[_damon_sysfs.DamonTarget(pid=-1)])])]) err = kdamonds.start() if err is not None: print('kdamond start failed: %s' % err) @@ -60,8 +62,9 @@ def main(): if attrs['max_nr_regions'] != 1000: fail('max_nr_regions') - if ctx['adaptive_targets'] != []: - fail('adaptive_targets') + if ctx['adaptive_targets'] != [ + { 'pid': 0, 'nr_regions': 0, 'regions_list': []}]: + fail('adaptive targets', status) if ctx['schemes'] != []: fail('schemes') -- cgit v1.2.3 From 603cb4aa09a14157ba412ba4db9dffebb79eb598 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sat, 28 Jun 2025 09:04:28 -0700 Subject: selftests/damon/sysfs.py: test DAMOS schemes parameters setup Add DAMON sysfs interface functionality tests for basic DAMOS schemes parameters setup. Link: https://lkml.kernel.org/r/20250628160428.53115-7-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/sysfs.py | 46 +++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/sysfs.py b/tools/testing/selftests/damon/sysfs.py index 3b085268f342..e67008fd055d 100755 --- a/tools/testing/selftests/damon/sysfs.py +++ b/tools/testing/selftests/damon/sysfs.py @@ -29,7 +29,9 @@ def main(): kdamonds = _damon_sysfs.Kdamonds( [_damon_sysfs.Kdamond( contexts=[_damon_sysfs.DamonCtx( - targets=[_damon_sysfs.DamonTarget(pid=-1)])])]) + targets=[_damon_sysfs.DamonTarget(pid=-1)], + schemes=[_damon_sysfs.Damos()], + )])]) err = kdamonds.start() if err is not None: print('kdamond start failed: %s' % err) @@ -66,8 +68,46 @@ def main(): { 'pid': 0, 'nr_regions': 0, 'regions_list': []}]: fail('adaptive targets', status) - if ctx['schemes'] != []: - fail('schemes') + if len(ctx['schemes']) != 1: + fail('number of schemes', status) + + scheme = ctx['schemes'][0] + if scheme['pattern'] != { + 'min_sz_region': 0, + 'max_sz_region': 2**64 - 1, + 'min_nr_accesses': 0, + 'max_nr_accesses': 2**32 - 1, + 'min_age_region': 0, + 'max_age_region': 2**32 - 1, + }: + fail('damos pattern', status) + if scheme['action'] != 9: # stat + fail('damos action', status) + if scheme['apply_interval_us'] != 0: + fail('damos apply interval', status) + if scheme['target_nid'] != -1: + fail('damos target nid', status) + + if scheme['quota'] != { + 'reset_interval': 0, + 'ms': 0, + 'sz': 0, + 'goals': [], + 'esz': 0, + 'weight_sz': 0, + 'weight_nr_accesses': 0, + 'weight_age': 0, + }: + fail('damos quota', status) + + if scheme['wmarks'] != { + 'metric': 0, + 'interval': 0, + 'high': 0, + 'mid': 0, + 'low': 0, + }: + fail('damos wmarks', status) kdamonds.stop() -- cgit v1.2.3 From 4e25f85b9f85d238fe012cb18cd040172344d7e6 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Wed, 2 Jul 2025 09:47:17 +0100 Subject: tools/testing/selftests: add mremap() unfaulted/faulted test cases Assert that mremap() behaviour is as expected when moving around unfaulted VMAs immediately adjacent to faulted ones, as well as moving around faulted VMAs and placing them back immediately adjacent to the VMA from which they were moved. This also introduces a shared helper for the syscall version of mremap() so we don't encounter any issues with libc filtering parameters. Link: https://lkml.kernel.org/r/20250702084717.21360-1-lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Cc: Jann Horn Cc: Liam Howlett Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/merge.c | 599 ++++++++++++++++++++++++++++++++++- tools/testing/selftests/mm/vm_util.c | 8 + tools/testing/selftests/mm/vm_util.h | 3 + 3 files changed, 608 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/merge.c b/tools/testing/selftests/mm/merge.c index 150dd5baed2b..cc4253f47f10 100644 --- a/tools/testing/selftests/mm/merge.c +++ b/tools/testing/selftests/mm/merge.c @@ -13,6 +13,7 @@ #include #include #include "vm_util.h" +#include FIXTURE(merge) { @@ -25,7 +26,7 @@ FIXTURE_SETUP(merge) { self->page_size = psize(); /* Carve out PROT_NONE region to map over. */ - self->carveout = mmap(NULL, 12 * self->page_size, PROT_NONE, + self->carveout = mmap(NULL, 30 * self->page_size, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); ASSERT_NE(self->carveout, MAP_FAILED); /* Setup PROCMAP_QUERY interface. */ @@ -34,7 +35,7 @@ FIXTURE_SETUP(merge) FIXTURE_TEARDOWN(merge) { - ASSERT_EQ(munmap(self->carveout, 12 * self->page_size), 0); + ASSERT_EQ(munmap(self->carveout, 30 * self->page_size), 0); ASSERT_EQ(close_procmap(&self->procmap), 0); /* * Clear unconditionally, as some tests set this. It is no issue if this @@ -576,4 +577,598 @@ TEST_F(merge, ksm_merge) ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 2 * page_size); } +TEST_F(merge, mremap_unfaulted_to_faulted) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr, *ptr2; + + /* + * Map two distinct areas: + * + * |-----------| |-----------| + * | unfaulted | | unfaulted | + * |-----------| |-----------| + * ptr ptr2 + */ + ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + ptr2 = mmap(&carveout[7 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + + /* Offset ptr2 further away. */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000); + ASSERT_NE(ptr2, MAP_FAILED); + + /* + * Fault in ptr: + * \ + * |-----------| / |-----------| + * | faulted | \ | unfaulted | + * |-----------| / |-----------| + * ptr \ ptr2 + */ + ptr[0] = 'x'; + + /* + * Now move ptr2 adjacent to ptr: + * + * |-----------|-----------| + * | faulted | unfaulted | + * |-----------|-----------| + * ptr ptr2 + * + * It should merge: + * + * |----------------------| + * | faulted | + * |----------------------| + * ptr + */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]); + ASSERT_NE(ptr2, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size); +} + +TEST_F(merge, mremap_unfaulted_behind_faulted) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr, *ptr2; + + /* + * Map two distinct areas: + * + * |-----------| |-----------| + * | unfaulted | | unfaulted | + * |-----------| |-----------| + * ptr ptr2 + */ + ptr = mmap(&carveout[6 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + ptr2 = mmap(&carveout[14 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + + /* Offset ptr2 further away. */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000); + ASSERT_NE(ptr2, MAP_FAILED); + + /* + * Fault in ptr: + * \ + * |-----------| / |-----------| + * | faulted | \ | unfaulted | + * |-----------| / |-----------| + * ptr \ ptr2 + */ + ptr[0] = 'x'; + + /* + * Now move ptr2 adjacent, but behind, ptr: + * + * |-----------|-----------| + * | unfaulted | faulted | + * |-----------|-----------| + * ptr2 ptr + * + * It should merge: + * + * |----------------------| + * | faulted | + * |----------------------| + * ptr2 + */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &carveout[page_size]); + ASSERT_NE(ptr2, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr2)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr2); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 10 * page_size); +} + +TEST_F(merge, mremap_unfaulted_between_faulted) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr, *ptr2, *ptr3; + + /* + * Map three distinct areas: + * + * |-----------| |-----------| |-----------| + * | unfaulted | | unfaulted | | unfaulted | + * |-----------| |-----------| |-----------| + * ptr ptr2 ptr3 + */ + ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + ptr2 = mmap(&carveout[7 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + ptr3 = mmap(&carveout[14 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr3, MAP_FAILED); + + /* Offset ptr3 further away. */ + ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr3 + page_size * 2000); + ASSERT_NE(ptr3, MAP_FAILED); + + /* Offset ptr2 further away. */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000); + ASSERT_NE(ptr2, MAP_FAILED); + + /* + * Fault in ptr, ptr3: + * \ \ + * |-----------| / |-----------| / |-----------| + * | faulted | \ | unfaulted | \ | faulted | + * |-----------| / |-----------| / |-----------| + * ptr \ ptr2 \ ptr3 + */ + ptr[0] = 'x'; + ptr3[0] = 'x'; + + /* + * Move ptr3 back into place, leaving a place for ptr2: + * \ + * |-----------| |-----------| / |-----------| + * | faulted | | faulted | \ | unfaulted | + * |-----------| |-----------| / |-----------| + * ptr ptr3 \ ptr2 + */ + ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[10 * page_size]); + ASSERT_NE(ptr3, MAP_FAILED); + + /* + * Finally, move ptr2 into place: + * + * |-----------|-----------|-----------| + * | faulted | unfaulted | faulted | + * |-----------|-----------|-----------| + * ptr ptr2 ptr3 + * + * It should merge, but only ptr, ptr2: + * + * |-----------------------|-----------| + * | faulted | unfaulted | + * |-----------------------|-----------| + */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]); + ASSERT_NE(ptr2, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr3)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr3); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr3 + 5 * page_size); +} + +TEST_F(merge, mremap_unfaulted_between_faulted_unfaulted) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr, *ptr2, *ptr3; + + /* + * Map three distinct areas: + * + * |-----------| |-----------| |-----------| + * | unfaulted | | unfaulted | | unfaulted | + * |-----------| |-----------| |-----------| + * ptr ptr2 ptr3 + */ + ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + ptr2 = mmap(&carveout[7 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + ptr3 = mmap(&carveout[14 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr3, MAP_FAILED); + + /* Offset ptr3 further away. */ + ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr3 + page_size * 2000); + ASSERT_NE(ptr3, MAP_FAILED); + + + /* Offset ptr2 further away. */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000); + ASSERT_NE(ptr2, MAP_FAILED); + + /* + * Fault in ptr: + * \ \ + * |-----------| / |-----------| / |-----------| + * | faulted | \ | unfaulted | \ | unfaulted | + * |-----------| / |-----------| / |-----------| + * ptr \ ptr2 \ ptr3 + */ + ptr[0] = 'x'; + + /* + * Move ptr3 back into place, leaving a place for ptr2: + * \ + * |-----------| |-----------| / |-----------| + * | faulted | | unfaulted | \ | unfaulted | + * |-----------| |-----------| / |-----------| + * ptr ptr3 \ ptr2 + */ + ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[10 * page_size]); + ASSERT_NE(ptr3, MAP_FAILED); + + /* + * Finally, move ptr2 into place: + * + * |-----------|-----------|-----------| + * | faulted | unfaulted | unfaulted | + * |-----------|-----------|-----------| + * ptr ptr2 ptr3 + * + * It should merge: + * + * |-----------------------------------| + * | faulted | + * |-----------------------------------| + */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]); + ASSERT_NE(ptr2, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 15 * page_size); +} + +TEST_F(merge, mremap_unfaulted_between_correctly_placed_faulted) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr, *ptr2; + + /* + * Map one larger area: + * + * |-----------------------------------| + * | unfaulted | + * |-----------------------------------| + */ + ptr = mmap(&carveout[page_size], 15 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * Fault in ptr: + * + * |-----------------------------------| + * | faulted | + * |-----------------------------------| + */ + ptr[0] = 'x'; + + /* + * Unmap middle: + * + * |-----------| |-----------| + * | faulted | | faulted | + * |-----------| |-----------| + * + * Now the faulted areas are compatible with each other (anon_vma the + * same, vma->vm_pgoff equal to virtual page offset). + */ + ASSERT_EQ(munmap(&ptr[5 * page_size], 5 * page_size), 0); + + /* + * Map a new area, ptr2: + * \ + * |-----------| |-----------| / |-----------| + * | faulted | | faulted | \ | unfaulted | + * |-----------| |-----------| / |-----------| + * ptr \ ptr2 + */ + ptr2 = mmap(&carveout[20 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + + /* + * Finally, move ptr2 into place: + * + * |-----------|-----------|-----------| + * | faulted | unfaulted | faulted | + * |-----------|-----------|-----------| + * ptr ptr2 ptr3 + * + * It should merge: + * + * |-----------------------------------| + * | faulted | + * |-----------------------------------| + */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]); + ASSERT_NE(ptr2, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 15 * page_size); +} + +TEST_F(merge, mremap_correct_placed_faulted) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr, *ptr2, *ptr3; + + /* + * Map one larger area: + * + * |-----------------------------------| + * | unfaulted | + * |-----------------------------------| + */ + ptr = mmap(&carveout[page_size], 15 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * Fault in ptr: + * + * |-----------------------------------| + * | faulted | + * |-----------------------------------| + */ + ptr[0] = 'x'; + + /* + * Offset the final and middle 5 pages further away: + * \ \ + * |-----------| / |-----------| / |-----------| + * | faulted | \ | faulted | \ | faulted | + * |-----------| / |-----------| / |-----------| + * ptr \ ptr2 \ ptr3 + */ + ptr3 = &ptr[10 * page_size]; + ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr3 + page_size * 2000); + ASSERT_NE(ptr3, MAP_FAILED); + ptr2 = &ptr[5 * page_size]; + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000); + ASSERT_NE(ptr2, MAP_FAILED); + + /* + * Move ptr2 into its correct place: + * \ + * |-----------|-----------| / |-----------| + * | faulted | faulted | \ | faulted | + * |-----------|-----------| / |-----------| + * ptr ptr2 \ ptr3 + * + * It should merge: + * \ + * |-----------------------| / |-----------| + * | faulted | \ | faulted | + * |-----------------------| / |-----------| + * ptr \ ptr3 + */ + + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]); + ASSERT_NE(ptr2, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size); + + /* + * Now move ptr out of place: + * \ \ + * |-----------| / |-----------| / |-----------| + * | faulted | \ | faulted | \ | faulted | + * |-----------| / |-----------| / |-----------| + * ptr2 \ ptr \ ptr3 + */ + ptr = sys_mremap(ptr, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr + page_size * 1000); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * Now move ptr back into place: + * \ + * |-----------|-----------| / |-----------| + * | faulted | faulted | \ | faulted | + * |-----------|-----------| / |-----------| + * ptr ptr2 \ ptr3 + * + * It should merge: + * \ + * |-----------------------| / |-----------| + * | faulted | \ | faulted | + * |-----------------------| / |-----------| + * ptr \ ptr3 + */ + ptr = sys_mremap(ptr, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &carveout[page_size]); + ASSERT_NE(ptr, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size); + + /* + * Now move ptr out of place again: + * \ \ + * |-----------| / |-----------| / |-----------| + * | faulted | \ | faulted | \ | faulted | + * |-----------| / |-----------| / |-----------| + * ptr2 \ ptr \ ptr3 + */ + ptr = sys_mremap(ptr, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr + page_size * 1000); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * Now move ptr3 back into place: + * \ + * |-----------|-----------| / |-----------| + * | faulted | faulted | \ | faulted | + * |-----------|-----------| / |-----------| + * ptr2 ptr3 \ ptr + * + * It should merge: + * \ + * |-----------------------| / |-----------| + * | faulted | \ | faulted | + * |-----------------------| / |-----------| + * ptr2 \ ptr + */ + ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr2[5 * page_size]); + ASSERT_NE(ptr3, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr2)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr2); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 10 * page_size); + + /* + * Now move ptr back into place: + * + * |-----------|-----------------------| + * | faulted | faulted | + * |-----------|-----------------------| + * ptr ptr2 + * + * It should merge: + * + * |-----------------------------------| + * | faulted | + * |-----------------------------------| + * ptr + */ + ptr = sys_mremap(ptr, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &carveout[page_size]); + ASSERT_NE(ptr, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 15 * page_size); + + /* + * Now move ptr2 out of the way: + * \ + * |-----------| |-----------| / |-----------| + * | faulted | | faulted | \ | faulted | + * |-----------| |-----------| / |-----------| + * ptr ptr3 \ ptr2 + */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000); + ASSERT_NE(ptr2, MAP_FAILED); + + /* + * Now move it back: + * + * |-----------|-----------|-----------| + * | faulted | faulted | faulted | + * |-----------|-----------|-----------| + * ptr ptr2 ptr3 + * + * It should merge: + * + * |-----------------------------------| + * | faulted | + * |-----------------------------------| + * ptr + */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]); + ASSERT_NE(ptr2, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 15 * page_size); + + /* + * Move ptr3 out of place: + * \ + * |-----------------------| / |-----------| + * | faulted | \ | faulted | + * |-----------------------| / |-----------| + * ptr \ ptr3 + */ + ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr3 + page_size * 1000); + ASSERT_NE(ptr3, MAP_FAILED); + + /* + * Now move it back: + * + * |-----------|-----------|-----------| + * | faulted | faulted | faulted | + * |-----------|-----------|-----------| + * ptr ptr2 ptr3 + * + * It should merge: + * + * |-----------------------------------| + * | faulted | + * |-----------------------------------| + * ptr + */ + ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[10 * page_size]); + ASSERT_NE(ptr3, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 15 * page_size); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c index 5492e3f784df..1d434772fa54 100644 --- a/tools/testing/selftests/mm/vm_util.c +++ b/tools/testing/selftests/mm/vm_util.c @@ -524,3 +524,11 @@ int read_sysfs(const char *file_path, unsigned long *val) return 0; } + +void *sys_mremap(void *old_address, unsigned long old_size, + unsigned long new_size, int flags, void *new_address) +{ + return (void *)syscall(__NR_mremap, (unsigned long)old_address, + old_size, new_size, flags, + (unsigned long)new_address); +} diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index b8136d12a0f8..797c24215b17 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -117,6 +117,9 @@ static inline void log_test_result(int result) ksft_test_result_report(result, "%s\n", test_name); } +void *sys_mremap(void *old_address, unsigned long old_size, + unsigned long new_size, int flags, void *new_address); + /* * On ppc64 this will only work with radix 2M hugepage size */ -- cgit v1.2.3 From b08590559f4b6954f1bf2510445aba346044c91b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 27 Jun 2025 16:27:50 +0200 Subject: selftests: netfilter: conntrack_resize.sh: extend resize test Extend the resize test: - continuously dump table both via /proc and ctnetlink interfaces while table is resized in a loop. - if socat is available, send udp packets in additon to ping requests. - increase/decrease the icmp and udp timeouts while resizes are happening. This makes sure we also exercise the 'ct has expired' check that happens on conntrack lookup. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- .../selftests/net/netfilter/conntrack_resize.sh | 80 ++++++++++++++++++++-- 1 file changed, 75 insertions(+), 5 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/netfilter/conntrack_resize.sh b/tools/testing/selftests/net/netfilter/conntrack_resize.sh index 9e033e80219e..aa1ba07eaf50 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_resize.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_resize.sh @@ -12,6 +12,9 @@ tmpfile="" tmpfile_proc="" tmpfile_uniq="" ret=0 +have_socat=0 + +socat -h > /dev/null && have_socat=1 insert_count=2000 [ "$KSFT_MACHINE_SLOW" = "yes" ] && insert_count=400 @@ -123,7 +126,7 @@ ctflush() { done } -ctflood() +ct_pingflood() { local ns="$1" local duration="$2" @@ -152,6 +155,28 @@ ctflood() wait } +ct_udpflood() +{ + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + [ $have_socat -ne "1" ] && return + + while [ $now -lt $end ]; do +ip netns exec "$ns" bash<<"EOF" + for i in $(seq 1 100);do + dport=$(((RANDOM%65536)+1)) + + echo bar | socat -u STDIN UDP:"127.0.0.1:$dport" & + done > /dev/null 2>&1 + wait +EOF + now=$(date +%s) + done +} + # dump to /dev/null. We don't want dumps to cause infinite loops # or use-after-free even when conntrack table is altered while dumps # are in progress. @@ -169,6 +194,48 @@ ct_nulldump() wait } +ct_nulldump_loop() +{ + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + while [ $now -lt $end ]; do + ct_nulldump "$ns" + sleep $((RANDOM%2)) + now=$(date +%s) + done +} + +change_timeouts() +{ + local ns="$1" + local r1=$((RANDOM%2)) + local r2=$((RANDOM%2)) + + [ "$r1" -eq 1 ] && ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=$((RANDOM%5)) + [ "$r2" -eq 1 ] && ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_udp_timeout=$((RANDOM%5)) +} + +ct_change_timeouts_loop() +{ + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + while [ $now -lt $end ]; do + change_timeouts "$ns" + sleep $((RANDOM%2)) + now=$(date +%s) + done + + # restore defaults + ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=30 + ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_udp_timeout=30 +} + check_taint() { local tainted_then="$1" @@ -198,10 +265,13 @@ insert_flood() r=$((RANDOM%$insert_count)) - ctflood "$n" "$timeout" "floodresize" & + ct_pingflood "$n" "$timeout" "floodresize" & + ct_udpflood "$n" "$timeout" & + insert_ctnetlink "$n" "$r" & ctflush "$n" "$timeout" & - ct_nulldump "$n" & + ct_nulldump_loop "$n" "$timeout" & + ct_change_timeouts_loop "$n" "$timeout" & wait } @@ -306,7 +376,7 @@ test_dump_all() ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=3600 - ctflood "$nsclient1" $timeout "dumpall" & + ct_pingflood "$nsclient1" $timeout "dumpall" & insert_ctnetlink "$nsclient2" $insert_count wait @@ -368,7 +438,7 @@ test_conntrack_disable() ct_flush_once "$nsclient1" ct_flush_once "$nsclient2" - ctflood "$nsclient1" "$timeout" "conntrack disable" + ct_pingflood "$nsclient1" "$timeout" "conntrack disable" ip netns exec "$nsclient2" ping -q -c 1 127.0.0.1 >/dev/null 2>&1 # Disabled, should not have picked up any connection. -- cgit v1.2.3 From 78a58836358783995884784cb20021db6f6a29df Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 27 Jun 2025 16:27:51 +0200 Subject: selftests: netfilter: add conntrack clash resolution test case Add a dedicated test to exercise conntrack clash resolution path. Test program emits 128 identical udp packets in parallel, then reads back replies from socat echo server. Also check (via conntrack -S) that the clash path was hit at least once. Due to the racy nature of the test its possible that despite the threaded program all packets were processed in-order or on same cpu, emit a SKIP warning in this case. Two tests are added: - one to test the simpler, non-nat case - one to exercise clash resolution where packets might have different nat transformations attached to them. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/net/netfilter/.gitignore | 1 + tools/testing/selftests/net/netfilter/Makefile | 3 + .../selftests/net/netfilter/conntrack_clash.sh | 175 +++++++++++++++++++++ tools/testing/selftests/net/netfilter/udpclash.c | 158 +++++++++++++++++++ 4 files changed, 337 insertions(+) create mode 100755 tools/testing/selftests/net/netfilter/conntrack_clash.sh create mode 100644 tools/testing/selftests/net/netfilter/udpclash.c (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/netfilter/.gitignore b/tools/testing/selftests/net/netfilter/.gitignore index 64c4f8d9aa6c..5d2be9a00627 100644 --- a/tools/testing/selftests/net/netfilter/.gitignore +++ b/tools/testing/selftests/net/netfilter/.gitignore @@ -5,3 +5,4 @@ conntrack_dump_flush conntrack_reverse_clash sctp_collision nf_queue +udpclash diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile index e9b2f553588d..a98ed892f55f 100644 --- a/tools/testing/selftests/net/netfilter/Makefile +++ b/tools/testing/selftests/net/netfilter/Makefile @@ -15,6 +15,7 @@ TEST_PROGS += conntrack_tcp_unreplied.sh TEST_PROGS += conntrack_resize.sh TEST_PROGS += conntrack_sctp_collision.sh TEST_PROGS += conntrack_vrf.sh +TEST_PROGS += conntrack_clash.sh TEST_PROGS += conntrack_reverse_clash.sh TEST_PROGS += ipvs.sh TEST_PROGS += nf_conntrack_packetdrill.sh @@ -44,6 +45,7 @@ TEST_GEN_FILES += connect_close nf_queue TEST_GEN_FILES += conntrack_dump_flush TEST_GEN_FILES += conntrack_reverse_clash TEST_GEN_FILES += sctp_collision +TEST_GEN_FILES += udpclash include ../../lib.mk @@ -52,6 +54,7 @@ $(OUTPUT)/nf_queue: LDLIBS += $(MNL_LDLIBS) $(OUTPUT)/conntrack_dump_flush: CFLAGS += $(MNL_CFLAGS) $(OUTPUT)/conntrack_dump_flush: LDLIBS += $(MNL_LDLIBS) +$(OUTPUT)/udpclash: LDLIBS += -lpthread TEST_FILES := lib.sh TEST_FILES += packetdrill diff --git a/tools/testing/selftests/net/netfilter/conntrack_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_clash.sh new file mode 100755 index 000000000000..3712c1b9b38b --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_clash.sh @@ -0,0 +1,175 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +clash_resolution_active=0 +dport=22111 +ret=0 + +cleanup() +{ + # netns cleanup also zaps any remaining socat echo server. + cleanup_all_ns +} + +checktool "nft --version" "run test without nft" +checktool "conntrack --version" "run test without conntrack" +checktool "socat -h" "run test without socat" + +trap cleanup EXIT + +setup_ns nsclient1 nsclient2 nsrouter + +ip netns exec "$nsrouter" nft -f -</dev/null & + done + + for port in $ports; do + wait_local_port_listen "$ns" $port udp + done +} + +add_addr() +{ + local ns="$1" + local dev="$2" + local i="$3" + local j="$4" + + ip -net "$ns" link set "$dev" up + ip -net "$ns" addr add "10.0.$i.$j/24" dev "$dev" +} + +ping_test() +{ + local ns="$1" + local daddr="$2" + + if ! ip netns exec "$ns" ping -q -c 1 $daddr > /dev/null;then + echo "FAIL: ping from $ns to $daddr" + exit 1 + fi +} + +run_one_clash_test() +{ + local ns="$1" + local daddr="$2" + local dport="$3" + local entries + local cre + + if ! ip netns exec "$ns" ./udpclash $daddr $dport;then + echo "FAIL: did not receive expected number of replies for $daddr:$dport" + ret=1 + return 1 + fi + + entries=$(conntrack -S | wc -l) + cre=$(conntrack -S | grep -v "clash_resolve=0" | wc -l) + + if [ "$cre" -ne "$entries" ] ;then + clash_resolution_active=1 + return 0 + fi + + # 1 cpu -> parallel insertion impossible + if [ "$entries" -eq 1 ]; then + return 0 + fi + + # not a failure: clash resolution logic did not trigger, but all replies + # were received. With right timing, xmit completed sequentially and + # no parallel insertion occurs. + return $ksft_skip +} + +run_clash_test() +{ + local ns="$1" + local daddr="$2" + local dport="$3" + + for i in $(seq 1 10);do + run_one_clash_test "$ns" "$daddr" "$dport" + local rv=$? + if [ $rv -eq 0 ];then + echo "PASS: clash resolution test for $daddr:$dport on attempt $i" + return 0 + elif [ $rv -eq 1 ];then + echo "FAIL: clash resolution test for $daddr:$dport on attempt $i" + return 1 + fi + done +} + +ip link add veth0 netns "$nsclient1" type veth peer name veth0 netns "$nsrouter" +ip link add veth0 netns "$nsclient2" type veth peer name veth1 netns "$nsrouter" +add_addr "$nsclient1" veth0 1 1 +add_addr "$nsclient2" veth0 2 1 +add_addr "$nsrouter" veth0 1 99 +add_addr "$nsrouter" veth1 2 99 + +ip -net "$nsclient1" route add default via 10.0.1.99 +ip -net "$nsclient2" route add default via 10.0.2.99 +ip netns exec "$nsrouter" sysctl -q net.ipv4.ip_forward=1 + +ping_test "$nsclient1" 10.0.1.99 +ping_test "$nsclient1" 10.0.2.1 +ping_test "$nsclient2" 10.0.1.1 + +spawn_servers "$nsclient2" + +# exercise clash resolution with nat: +# nsrouter is supposed to dnat to 10.0.2.1:900{0,1,2,3}. +run_clash_test "$nsclient1" 10.0.1.99 "$dport" + +# exercise clash resolution without nat. +load_simple_ruleset "$nsclient2" +run_clash_test "$nsclient2" 127.0.0.1 9001 + +if [ $clash_resolution_active -eq 0 ];then + [ "$ret" -eq 0 ] && ret=$ksft_skip + echo "SKIP: Clash resolution did not trigger" +fi + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/udpclash.c b/tools/testing/selftests/net/netfilter/udpclash.c new file mode 100644 index 000000000000..85c7b906ad08 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/udpclash.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Usage: ./udpclash + * + * Emit THREAD_COUNT UDP packets sharing the same saddr:daddr pair. + * + * This mimics DNS resolver libraries that emit A and AAAA requests + * in parallel. + * + * This exercises conntrack clash resolution logic added and later + * refined in + * + * 71d8c47fc653 ("netfilter: conntrack: introduce clash resolution on insertion race") + * ed07d9a021df ("netfilter: nf_conntrack: resolve clash for matching conntracks") + * 6a757c07e51f ("netfilter: conntrack: allow insertion of clashing entries") + */ +#include +#include +#include +#include +#include +#include +#include + +#define THREAD_COUNT 128 + +struct thread_args { + const struct sockaddr_in *si_remote; + int sockfd; +}; + +static int wait = 1; + +static void *thread_main(void *varg) +{ + const struct sockaddr_in *si_remote; + const struct thread_args *args = varg; + static const char msg[] = "foo"; + + si_remote = args->si_remote; + + while (wait == 1) + ; + + if (sendto(args->sockfd, msg, strlen(msg), MSG_NOSIGNAL, + (struct sockaddr *)si_remote, sizeof(*si_remote)) < 0) + exit(111); + + return varg; +} + +static int run_test(int fd, const struct sockaddr_in *si_remote) +{ + struct thread_args thread_args = { + .si_remote = si_remote, + .sockfd = fd, + }; + pthread_t *tid = calloc(THREAD_COUNT, sizeof(pthread_t)); + unsigned int repl_count = 0, timeout = 0; + int i; + + if (!tid) { + perror("calloc"); + return 1; + } + + for (i = 0; i < THREAD_COUNT; i++) { + int err = pthread_create(&tid[i], NULL, &thread_main, &thread_args); + + if (err != 0) { + perror("pthread_create"); + exit(1); + } + } + + wait = 0; + + for (i = 0; i < THREAD_COUNT; i++) + pthread_join(tid[i], NULL); + + while (repl_count < THREAD_COUNT) { + struct sockaddr_in si_repl; + socklen_t si_repl_len = sizeof(si_repl); + char repl[512]; + ssize_t ret; + + ret = recvfrom(fd, repl, sizeof(repl), MSG_NOSIGNAL, + (struct sockaddr *) &si_repl, &si_repl_len); + if (ret < 0) { + if (timeout++ > 5000) { + fputs("timed out while waiting for reply from thread\n", stderr); + break; + } + + /* give reply time to pass though the stack */ + usleep(1000); + continue; + } + + if (si_repl_len != sizeof(*si_remote)) { + fprintf(stderr, "warning: reply has unexpected repl_len %d vs %d\n", + (int)si_repl_len, (int)sizeof(si_repl)); + } else if (si_remote->sin_addr.s_addr != si_repl.sin_addr.s_addr || + si_remote->sin_port != si_repl.sin_port) { + char a[64], b[64]; + + inet_ntop(AF_INET, &si_remote->sin_addr, a, sizeof(a)); + inet_ntop(AF_INET, &si_repl.sin_addr, b, sizeof(b)); + + fprintf(stderr, "reply from wrong source: want %s:%d got %s:%d\n", + a, ntohs(si_remote->sin_port), b, ntohs(si_repl.sin_port)); + } + + repl_count++; + } + + printf("got %d of %d replies\n", repl_count, THREAD_COUNT); + + free(tid); + + return repl_count == THREAD_COUNT ? 0 : 1; +} + +int main(int argc, char *argv[]) +{ + struct sockaddr_in si_local = { + .sin_family = AF_INET, + }; + struct sockaddr_in si_remote = { + .sin_family = AF_INET, + }; + int fd, ret; + + if (argc < 3) { + fputs("Usage: send_udp \n", stderr); + return 1; + } + + si_remote.sin_port = htons(atoi(argv[2])); + si_remote.sin_addr.s_addr = inet_addr(argv[1]); + + fd = socket(AF_INET, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, IPPROTO_UDP); + if (fd < 0) { + perror("socket"); + return 1; + } + + if (bind(fd, (struct sockaddr *)&si_local, sizeof(si_local)) < 0) { + perror("bind"); + return 1; + } + + ret = run_test(fd, &si_remote); + + close(fd); + + return ret; +} -- cgit v1.2.3 From aa085ea1a68d27d34f14db1f4026c35aa6b1ecc8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 27 Jun 2025 16:27:52 +0200 Subject: selftests: netfilter: conntrack_resize.sh: also use udpclash tool Previous patch added a new clash resolution test case. Also use this during conntrack resize stress test in addition to icmp ping flood. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- .../testing/selftests/net/netfilter/conntrack_resize.sh | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/netfilter/conntrack_resize.sh b/tools/testing/selftests/net/netfilter/conntrack_resize.sh index aa1ba07eaf50..788cd56ea4a0 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_resize.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_resize.sh @@ -177,6 +177,22 @@ EOF done } +ct_udpclash() +{ + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + [ -x udpclash ] || return + + while [ $now -lt $end ]; do + ip netns exec "$ns" ./udpclash 127.0.0.1 $((RANDOM%65536)) > /dev/null 2>&1 + + now=$(date +%s) + done +} + # dump to /dev/null. We don't want dumps to cause infinite loops # or use-after-free even when conntrack table is altered while dumps # are in progress. @@ -267,6 +283,7 @@ insert_flood() ct_pingflood "$n" "$timeout" "floodresize" & ct_udpflood "$n" "$timeout" & + ct_udpclash "$n" "$timeout" & insert_ctnetlink "$n" "$r" & ctflush "$n" "$timeout" & -- cgit v1.2.3 From 6dc2fae7f8a2384304ef48b7cdcb988222102a54 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 1 Jul 2025 14:41:37 +0200 Subject: selftests: netfilter: nft_concat_range.sh: send packets to empty set The selftest doesn't cover this error path: scratch = *raw_cpu_ptr(m->scratch); if (unlikely(!scratch)) { // here cover this too. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/net/netfilter/nft_concat_range.sh | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh index cd12b8b5ac0e..20e76b395c85 100755 --- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh @@ -1311,6 +1311,9 @@ maybe_send_match() { # - remove some elements, check that packets don't match anymore test_correctness_main() { range_size=1 + + send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1 + for i in $(seq "${start}" $((start + count))); do local elem="" -- cgit v1.2.3 From da1d987d3b39a91e53be888c29610f57fb67bbe0 Mon Sep 17 00:00:00 2001 From: Jordan Rife Date: Mon, 14 Jul 2025 11:09:10 -0700 Subject: selftests/bpf: Add tests for bucket resume logic in listening sockets Replicate the set of test cases used for UDP socket iterators to test similar scenarios for TCP listening sockets. Signed-off-by: Jordan Rife Signed-off-by: Martin KaFai Lau Acked-by: Stanislav Fomichev --- .../selftests/bpf/prog_tests/sock_iter_batch.c | 47 ++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c index a4517bee34d5..2adacd91fdf8 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c @@ -358,6 +358,53 @@ static struct test_case resume_tests[] = { .family = AF_INET6, .test = force_realloc, }, + { + .description = "tcp: resume after removing a seen socket (listening)", + .init_socks = nr_soreuse, + .max_socks = nr_soreuse, + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = remove_seen, + }, + { + .description = "tcp: resume after removing one unseen socket (listening)", + .init_socks = nr_soreuse, + .max_socks = nr_soreuse, + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = remove_unseen, + }, + { + .description = "tcp: resume after removing all unseen sockets (listening)", + .init_socks = nr_soreuse, + .max_socks = nr_soreuse, + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = remove_all, + }, + { + .description = "tcp: resume after adding a few sockets (listening)", + .init_socks = nr_soreuse, + .max_socks = nr_soreuse, + .sock_type = SOCK_STREAM, + /* Use AF_INET so that new sockets are added to the head of the + * bucket's list. + */ + .family = AF_INET, + .test = add_some, + }, + { + .description = "tcp: force a realloc to occur (listening)", + .init_socks = init_batch_size, + .max_socks = init_batch_size * 2, + .sock_type = SOCK_STREAM, + /* Use AF_INET6 so that new sockets are added to the tail of the + * bucket's list, needing to be added to the next batch to force + * a realloc. + */ + .family = AF_INET6, + .test = force_realloc, + }, }; static void do_resume_test(struct test_case *tc) -- cgit v1.2.3 From 346066c3278f3baa61b1abc8a03721ed2684efe7 Mon Sep 17 00:00:00 2001 From: Jordan Rife Date: Mon, 14 Jul 2025 11:09:11 -0700 Subject: selftests/bpf: Allow for iteration over multiple ports Prepare to test TCP socket iteration over both listening and established sockets by allowing the BPF iterator programs to skip the port check. Signed-off-by: Jordan Rife Signed-off-by: Martin KaFai Lau Acked-by: Stanislav Fomichev --- tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c | 7 ++----- tools/testing/selftests/bpf/progs/sock_iter_batch.c | 4 ++++ 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c index 2adacd91fdf8..0d0f1b4debff 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c @@ -416,7 +416,6 @@ static void do_resume_test(struct test_case *tc) int err, iter_fd = -1; const char *addr; int *fds = NULL; - int local_port; counts = calloc(tc->max_socks, sizeof(*counts)); if (!ASSERT_OK_PTR(counts, "counts")) @@ -431,10 +430,8 @@ static void do_resume_test(struct test_case *tc) tc->init_socks); if (!ASSERT_OK_PTR(fds, "start_reuseport_server")) goto done; - local_port = get_socket_local_port(*fds); - if (!ASSERT_GE(local_port, 0, "get_socket_local_port")) - goto done; - skel->rodata->ports[0] = ntohs(local_port); + skel->rodata->ports[0] = 0; + skel->rodata->ports[1] = 0; skel->rodata->sf = tc->family; err = sock_iter_batch__load(skel); diff --git a/tools/testing/selftests/bpf/progs/sock_iter_batch.c b/tools/testing/selftests/bpf/progs/sock_iter_batch.c index 8f483337e103..40dce6a38c30 100644 --- a/tools/testing/selftests/bpf/progs/sock_iter_batch.c +++ b/tools/testing/selftests/bpf/progs/sock_iter_batch.c @@ -52,6 +52,8 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx) idx = 0; else if (sk->sk_num == ports[1]) idx = 1; + else if (!ports[0] && !ports[1]) + idx = 0; else return 0; @@ -92,6 +94,8 @@ int iter_udp_soreuse(struct bpf_iter__udp *ctx) idx = 0; else if (sk->sk_num == ports[1]) idx = 1; + else if (!ports[0] && !ports[1]) + idx = 0; else return 0; -- cgit v1.2.3 From f00468124a08a7ecd6f2ed932c57d86a1fc249db Mon Sep 17 00:00:00 2001 From: Jordan Rife Date: Mon, 14 Jul 2025 11:09:12 -0700 Subject: selftests/bpf: Allow for iteration over multiple states Add parentheses around loopback address check to fix up logic and make the socket state filter configurable for the TCP socket iterators. Iterators can skip the socket state check by setting ss to 0. Signed-off-by: Jordan Rife Signed-off-by: Martin KaFai Lau Acked-by: Stanislav Fomichev --- tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c | 3 +++ tools/testing/selftests/bpf/progs/sock_iter_batch.c | 11 ++++++----- 2 files changed, 9 insertions(+), 5 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c index 0d0f1b4debff..4e15a0c2f237 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c @@ -433,6 +433,7 @@ static void do_resume_test(struct test_case *tc) skel->rodata->ports[0] = 0; skel->rodata->ports[1] = 0; skel->rodata->sf = tc->family; + skel->rodata->ss = 0; err = sock_iter_batch__load(skel); if (!ASSERT_OK(err, "sock_iter_batch__load")) @@ -498,6 +499,8 @@ static void do_test(int sock_type, bool onebyone) skel->rodata->ports[i] = ntohs(local_port); } skel->rodata->sf = AF_INET6; + if (sock_type == SOCK_STREAM) + skel->rodata->ss = TCP_LISTEN; err = sock_iter_batch__load(skel); if (!ASSERT_OK(err, "sock_iter_batch__load")) diff --git a/tools/testing/selftests/bpf/progs/sock_iter_batch.c b/tools/testing/selftests/bpf/progs/sock_iter_batch.c index 40dce6a38c30..a36361e4a5de 100644 --- a/tools/testing/selftests/bpf/progs/sock_iter_batch.c +++ b/tools/testing/selftests/bpf/progs/sock_iter_batch.c @@ -23,6 +23,7 @@ static bool ipv4_addr_loopback(__be32 a) } volatile const unsigned int sf; +volatile const unsigned int ss; volatile const __u16 ports[2]; unsigned int bucket[2]; @@ -42,10 +43,10 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx) sock_cookie = bpf_get_socket_cookie(sk); sk = bpf_core_cast(sk, struct sock); if (sk->sk_family != sf || - sk->sk_state != TCP_LISTEN || - sk->sk_family == AF_INET6 ? + (ss && sk->sk_state != ss) || + (sk->sk_family == AF_INET6 ? !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) : - !ipv4_addr_loopback(sk->sk_rcv_saddr)) + !ipv4_addr_loopback(sk->sk_rcv_saddr))) return 0; if (sk->sk_num == ports[0]) @@ -85,9 +86,9 @@ int iter_udp_soreuse(struct bpf_iter__udp *ctx) sock_cookie = bpf_get_socket_cookie(sk); sk = bpf_core_cast(sk, struct sock); if (sk->sk_family != sf || - sk->sk_family == AF_INET6 ? + (sk->sk_family == AF_INET6 ? !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) : - !ipv4_addr_loopback(sk->sk_rcv_saddr)) + !ipv4_addr_loopback(sk->sk_rcv_saddr))) return 0; if (sk->sk_num == ports[0]) -- cgit v1.2.3 From 08327292e7093de9b68ef02fe975738799ceedf4 Mon Sep 17 00:00:00 2001 From: Jordan Rife Date: Mon, 14 Jul 2025 11:09:13 -0700 Subject: selftests/bpf: Make ehash buckets configurable in socket iterator tests Prepare for bucket resume tests for established TCP sockets by making the number of ehash buckets configurable. Subsequent patches force all established sockets into the same bucket by setting ehash_buckets to one. Signed-off-by: Jordan Rife Signed-off-by: Martin KaFai Lau Acked-by: Stanislav Fomichev --- .../selftests/bpf/prog_tests/sock_iter_batch.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c index 4e15a0c2f237..60b45685ef72 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c @@ -6,6 +6,7 @@ #include "sock_iter_batch.skel.h" #define TEST_NS "sock_iter_batch_netns" +#define TEST_CHILD_NS "sock_iter_batch_child_netns" static const int init_batch_size = 16; static const int nr_soreuse = 4; @@ -304,6 +305,7 @@ struct test_case { int *socks, int socks_len, struct sock_count *counts, int counts_len, struct bpf_link *link, int iter_fd); const char *description; + int ehash_buckets; int init_socks; int max_socks; int sock_type; @@ -410,13 +412,25 @@ static struct test_case resume_tests[] = { static void do_resume_test(struct test_case *tc) { struct sock_iter_batch *skel = NULL; + struct sock_count *counts = NULL; static const __u16 port = 10001; + struct nstoken *nstoken = NULL; struct bpf_link *link = NULL; - struct sock_count *counts; int err, iter_fd = -1; const char *addr; int *fds = NULL; + if (tc->ehash_buckets) { + SYS_NOFAIL("ip netns del " TEST_CHILD_NS); + SYS(done, "sysctl -wq net.ipv4.tcp_child_ehash_entries=%d", + tc->ehash_buckets); + SYS(done, "ip netns add %s", TEST_CHILD_NS); + SYS(done, "ip -net %s link set dev lo up", TEST_CHILD_NS); + nstoken = open_netns(TEST_CHILD_NS); + if (!ASSERT_OK_PTR(nstoken, "open_child_netns")) + goto done; + } + counts = calloc(tc->max_socks, sizeof(*counts)); if (!ASSERT_OK_PTR(counts, "counts")) goto done; @@ -453,6 +467,9 @@ static void do_resume_test(struct test_case *tc) tc->test(tc->family, tc->sock_type, addr, port, fds, tc->init_socks, counts, tc->max_socks, link, iter_fd); done: + close_netns(nstoken); + SYS_NOFAIL("ip netns del " TEST_CHILD_NS); + SYS_NOFAIL("sysctl -w net.ipv4.tcp_child_ehash_entries=0"); free(counts); free_fds(fds, tc->init_socks); if (iter_fd >= 0) -- cgit v1.2.3 From 07ebabbbfe9b4c95e8f1f144f21c07fe830fa501 Mon Sep 17 00:00:00 2001 From: Jordan Rife Date: Mon, 14 Jul 2025 11:09:14 -0700 Subject: selftests/bpf: Create established sockets in socket iterator tests Prepare for bucket resume tests for established TCP sockets by creating established sockets. Collect socket fds from connect() and accept() sides and pass them to test cases. Signed-off-by: Jordan Rife Signed-off-by: Martin KaFai Lau Acked-by: Stanislav Fomichev --- .../selftests/bpf/prog_tests/sock_iter_batch.c | 89 +++++++++++++++++++++- 1 file changed, 85 insertions(+), 4 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c index 60b45685ef72..0ccc90d1d1ef 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2024 Meta +#include #include #include "network_helpers.h" #include "sock_iter_batch.skel.h" @@ -153,8 +154,71 @@ static void check_n_were_seen_once(int *fds, int fds_len, int n, ASSERT_EQ(seen_once, n, "seen_once"); } +static int accept_from_one(struct pollfd *server_poll_fds, + int server_poll_fds_len) +{ + static const int poll_timeout_ms = 5000; /* 5s */ + int ret; + int i; + + ret = poll(server_poll_fds, server_poll_fds_len, poll_timeout_ms); + if (!ASSERT_EQ(ret, 1, "poll")) + return -1; + + for (i = 0; i < server_poll_fds_len; i++) + if (server_poll_fds[i].revents & POLLIN) + return accept(server_poll_fds[i].fd, NULL, NULL); + + return -1; +} + +static int *connect_to_server(int family, int sock_type, const char *addr, + __u16 port, int nr_connects, int *server_fds, + int server_fds_len) +{ + struct pollfd *server_poll_fds = NULL; + int *established_socks = NULL; + int i; + + server_poll_fds = calloc(server_fds_len, sizeof(*server_poll_fds)); + if (!ASSERT_OK_PTR(server_poll_fds, "server_poll_fds")) + return NULL; + + for (i = 0; i < server_fds_len; i++) { + server_poll_fds[i].fd = server_fds[i]; + server_poll_fds[i].events = POLLIN; + } + + i = 0; + + established_socks = malloc(sizeof(*established_socks) * nr_connects*2); + if (!ASSERT_OK_PTR(established_socks, "established_socks")) + goto error; + + while (nr_connects--) { + established_socks[i] = connect_to_addr_str(family, sock_type, + addr, port, NULL); + if (!ASSERT_OK_FD(established_socks[i], "connect_to_addr_str")) + goto error; + i++; + established_socks[i] = accept_from_one(server_poll_fds, + server_fds_len); + if (!ASSERT_OK_FD(established_socks[i], "accept_from_one")) + goto error; + i++; + } + + free(server_poll_fds); + return established_socks; +error: + free_fds(established_socks, i); + free(server_poll_fds); + return NULL; +} + static void remove_seen(int family, int sock_type, const char *addr, __u16 port, - int *socks, int socks_len, struct sock_count *counts, + int *socks, int socks_len, int *established_socks, + int established_socks_len, struct sock_count *counts, int counts_len, struct bpf_link *link, int iter_fd) { int close_idx; @@ -185,6 +249,7 @@ static void remove_seen(int family, int sock_type, const char *addr, __u16 port, static void remove_unseen(int family, int sock_type, const char *addr, __u16 port, int *socks, int socks_len, + int *established_socks, int established_socks_len, struct sock_count *counts, int counts_len, struct bpf_link *link, int iter_fd) { @@ -217,6 +282,7 @@ static void remove_unseen(int family, int sock_type, const char *addr, static void remove_all(int family, int sock_type, const char *addr, __u16 port, int *socks, int socks_len, + int *established_socks, int established_socks_len, struct sock_count *counts, int counts_len, struct bpf_link *link, int iter_fd) { @@ -244,7 +310,8 @@ static void remove_all(int family, int sock_type, const char *addr, } static void add_some(int family, int sock_type, const char *addr, __u16 port, - int *socks, int socks_len, struct sock_count *counts, + int *socks, int socks_len, int *established_socks, + int established_socks_len, struct sock_count *counts, int counts_len, struct bpf_link *link, int iter_fd) { int *new_socks = NULL; @@ -274,6 +341,7 @@ done: static void force_realloc(int family, int sock_type, const char *addr, __u16 port, int *socks, int socks_len, + int *established_socks, int established_socks_len, struct sock_count *counts, int counts_len, struct bpf_link *link, int iter_fd) { @@ -302,10 +370,12 @@ done: struct test_case { void (*test)(int family, int sock_type, const char *addr, __u16 port, - int *socks, int socks_len, struct sock_count *counts, + int *socks, int socks_len, int *established_socks, + int established_socks_len, struct sock_count *counts, int counts_len, struct bpf_link *link, int iter_fd); const char *description; int ehash_buckets; + int connections; int init_socks; int max_socks; int sock_type; @@ -416,6 +486,7 @@ static void do_resume_test(struct test_case *tc) static const __u16 port = 10001; struct nstoken *nstoken = NULL; struct bpf_link *link = NULL; + int *established_fds = NULL; int err, iter_fd = -1; const char *addr; int *fds = NULL; @@ -444,6 +515,14 @@ static void do_resume_test(struct test_case *tc) tc->init_socks); if (!ASSERT_OK_PTR(fds, "start_reuseport_server")) goto done; + if (tc->connections) { + established_fds = connect_to_server(tc->family, tc->sock_type, + addr, port, + tc->connections, fds, + tc->init_socks); + if (!ASSERT_OK_PTR(established_fds, "connect_to_server")) + goto done; + } skel->rodata->ports[0] = 0; skel->rodata->ports[1] = 0; skel->rodata->sf = tc->family; @@ -465,13 +544,15 @@ static void do_resume_test(struct test_case *tc) goto done; tc->test(tc->family, tc->sock_type, addr, port, fds, tc->init_socks, - counts, tc->max_socks, link, iter_fd); + established_fds, tc->connections*2, counts, tc->max_socks, + link, iter_fd); done: close_netns(nstoken); SYS_NOFAIL("ip netns del " TEST_CHILD_NS); SYS_NOFAIL("sysctl -w net.ipv4.tcp_child_ehash_entries=0"); free(counts); free_fds(fds, tc->init_socks); + free_fds(established_fds, tc->connections*2); if (iter_fd >= 0) close(iter_fd); bpf_link__destroy(link); -- cgit v1.2.3 From 8fc0c5a82d04e1582b666e3c407340e66493608f Mon Sep 17 00:00:00 2001 From: Jordan Rife Date: Mon, 14 Jul 2025 11:09:15 -0700 Subject: selftests/bpf: Create iter_tcp_destroy test program Prepare for bucket resume tests for established TCP sockets by creating a program to immediately destroy and remove sockets from the TCP ehash table, since close() is not deterministic. Signed-off-by: Jordan Rife Signed-off-by: Martin KaFai Lau Acked-by: Stanislav Fomichev --- tools/testing/selftests/bpf/progs/sock_iter_batch.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/sock_iter_batch.c b/tools/testing/selftests/bpf/progs/sock_iter_batch.c index a36361e4a5de..77966ded5467 100644 --- a/tools/testing/selftests/bpf/progs/sock_iter_batch.c +++ b/tools/testing/selftests/bpf/progs/sock_iter_batch.c @@ -70,6 +70,27 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx) return 0; } +volatile const __u64 destroy_cookie; + +SEC("iter/tcp") +int iter_tcp_destroy(struct bpf_iter__tcp *ctx) +{ + struct sock_common *sk_common = (struct sock_common *)ctx->sk_common; + __u64 sock_cookie; + + if (!sk_common) + return 0; + + sock_cookie = bpf_get_socket_cookie(sk_common); + if (sock_cookie != destroy_cookie) + return 0; + + bpf_sock_destroy(sk_common); + bpf_seq_write(ctx->meta->seq, &sock_cookie, sizeof(sock_cookie)); + + return 0; +} + #define udp_sk(ptr) container_of(ptr, struct udp_sock, inet.sk) SEC("iter/udp") -- cgit v1.2.3 From f126f0ce7c830d538ca047f3a3bdc64669812d9c Mon Sep 17 00:00:00 2001 From: Jordan Rife Date: Mon, 14 Jul 2025 11:09:16 -0700 Subject: selftests/bpf: Add tests for bucket resume logic in established sockets Replicate the set of test cases used for UDP socket iterators to test similar scenarios for TCP established sockets. Signed-off-by: Jordan Rife Signed-off-by: Martin KaFai Lau Acked-by: Stanislav Fomichev --- .../selftests/bpf/prog_tests/sock_iter_batch.c | 293 +++++++++++++++++++++ 1 file changed, 293 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c index 0ccc90d1d1ef..27781df8f2fb 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c @@ -120,6 +120,45 @@ done: return nth_sock_idx; } +static void destroy(int fd) +{ + struct sock_iter_batch *skel = NULL; + __u64 cookie = socket_cookie(fd); + struct bpf_link *link = NULL; + int iter_fd = -1; + int nread; + __u64 out; + + skel = sock_iter_batch__open(); + if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open")) + goto done; + + skel->rodata->destroy_cookie = cookie; + + if (!ASSERT_OK(sock_iter_batch__load(skel), "sock_iter_batch__load")) + goto done; + + link = bpf_program__attach_iter(skel->progs.iter_tcp_destroy, NULL); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter")) + goto done; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create")) + goto done; + + /* Delete matching socket. */ + nread = read(iter_fd, &out, sizeof(out)); + ASSERT_GE(nread, 0, "nread"); + if (nread) + ASSERT_EQ(out, cookie, "cookie matches"); +done: + if (iter_fd >= 0) + close(iter_fd); + bpf_link__destroy(link); + sock_iter_batch__destroy(skel); + close(fd); +} + static int get_seen_count(int fd, struct sock_count counts[], int n) { __u64 cookie = socket_cookie(fd); @@ -247,6 +286,43 @@ static void remove_seen(int family, int sock_type, const char *addr, __u16 port, counts_len); } +static void remove_seen_established(int family, int sock_type, const char *addr, + __u16 port, int *listen_socks, + int listen_socks_len, int *established_socks, + int established_socks_len, + struct sock_count *counts, int counts_len, + struct bpf_link *link, int iter_fd) +{ + int close_idx; + + /* Iterate through all listening sockets. */ + read_n(iter_fd, listen_socks_len, counts, counts_len); + + /* Make sure we saw all listening sockets exactly once. */ + check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, + counts, counts_len); + + /* Leave one established socket. */ + read_n(iter_fd, established_socks_len - 1, counts, counts_len); + + /* Close a socket we've already seen to remove it from the bucket. */ + close_idx = get_nth_socket(established_socks, established_socks_len, + link, listen_socks_len + 1); + if (!ASSERT_GE(close_idx, 0, "close_idx")) + return; + destroy(established_socks[close_idx]); + established_socks[close_idx] = -1; + + /* Iterate through the rest of the sockets. */ + read_n(iter_fd, -1, counts, counts_len); + + /* Make sure the last socket wasn't skipped and that there were no + * repeats. + */ + check_n_were_seen_once(established_socks, established_socks_len, + established_socks_len - 1, counts, counts_len); +} + static void remove_unseen(int family, int sock_type, const char *addr, __u16 port, int *socks, int socks_len, int *established_socks, int established_socks_len, @@ -280,6 +356,51 @@ static void remove_unseen(int family, int sock_type, const char *addr, counts_len); } +static void remove_unseen_established(int family, int sock_type, + const char *addr, __u16 port, + int *listen_socks, int listen_socks_len, + int *established_socks, + int established_socks_len, + struct sock_count *counts, int counts_len, + struct bpf_link *link, int iter_fd) +{ + int close_idx; + + /* Iterate through all listening sockets. */ + read_n(iter_fd, listen_socks_len, counts, counts_len); + + /* Make sure we saw all listening sockets exactly once. */ + check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, + counts, counts_len); + + /* Iterate through the first established socket. */ + read_n(iter_fd, 1, counts, counts_len); + + /* Make sure we saw one established socks. */ + check_n_were_seen_once(established_socks, established_socks_len, 1, + counts, counts_len); + + /* Close what would be the next socket in the bucket to exercise the + * condition where we need to skip past the first cookie we remembered. + */ + close_idx = get_nth_socket(established_socks, established_socks_len, + link, listen_socks_len + 1); + if (!ASSERT_GE(close_idx, 0, "close_idx")) + return; + + destroy(established_socks[close_idx]); + established_socks[close_idx] = -1; + + /* Iterate through the rest of the sockets. */ + read_n(iter_fd, -1, counts, counts_len); + + /* Make sure the remaining sockets were seen exactly once and that we + * didn't repeat the socket that was already seen. + */ + check_n_were_seen_once(established_socks, established_socks_len, + established_socks_len - 1, counts, counts_len); +} + static void remove_all(int family, int sock_type, const char *addr, __u16 port, int *socks, int socks_len, int *established_socks, int established_socks_len, @@ -309,6 +430,54 @@ static void remove_all(int family, int sock_type, const char *addr, ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n"); } +static void remove_all_established(int family, int sock_type, const char *addr, + __u16 port, int *listen_socks, + int listen_socks_len, int *established_socks, + int established_socks_len, + struct sock_count *counts, int counts_len, + struct bpf_link *link, int iter_fd) +{ + int *close_idx = NULL; + int i; + + /* Iterate through all listening sockets. */ + read_n(iter_fd, listen_socks_len, counts, counts_len); + + /* Make sure we saw all listening sockets exactly once. */ + check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, + counts, counts_len); + + /* Iterate through the first established socket. */ + read_n(iter_fd, 1, counts, counts_len); + + /* Make sure we saw one established socks. */ + check_n_were_seen_once(established_socks, established_socks_len, 1, + counts, counts_len); + + /* Close all remaining sockets to exhaust the list of saved cookies and + * exit without putting any sockets into the batch on the next read. + */ + close_idx = malloc(sizeof(int) * (established_socks_len - 1)); + if (!ASSERT_OK_PTR(close_idx, "close_idx malloc")) + return; + for (i = 0; i < established_socks_len - 1; i++) { + close_idx[i] = get_nth_socket(established_socks, + established_socks_len, link, + listen_socks_len + i); + if (!ASSERT_GE(close_idx[i], 0, "close_idx")) + return; + } + + for (i = 0; i < established_socks_len - 1; i++) { + destroy(established_socks[close_idx[i]]); + established_socks[close_idx[i]] = -1; + } + + /* Make sure there are no more sockets returned */ + ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n"); + free(close_idx); +} + static void add_some(int family, int sock_type, const char *addr, __u16 port, int *socks, int socks_len, int *established_socks, int established_socks_len, struct sock_count *counts, @@ -339,6 +508,49 @@ done: free_fds(new_socks, socks_len); } +static void add_some_established(int family, int sock_type, const char *addr, + __u16 port, int *listen_socks, + int listen_socks_len, int *established_socks, + int established_socks_len, + struct sock_count *counts, + int counts_len, struct bpf_link *link, + int iter_fd) +{ + int *new_socks = NULL; + + /* Iterate through all listening sockets. */ + read_n(iter_fd, listen_socks_len, counts, counts_len); + + /* Make sure we saw all listening sockets exactly once. */ + check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, + counts, counts_len); + + /* Iterate through the first established_socks_len - 1 sockets. */ + read_n(iter_fd, established_socks_len - 1, counts, counts_len); + + /* Make sure we saw established_socks_len - 1 sockets exactly once. */ + check_n_were_seen_once(established_socks, established_socks_len, + established_socks_len - 1, counts, counts_len); + + /* Double the number of established sockets in the bucket. */ + new_socks = connect_to_server(family, sock_type, addr, port, + established_socks_len / 2, listen_socks, + listen_socks_len); + if (!ASSERT_OK_PTR(new_socks, "connect_to_server")) + goto done; + + /* Iterate through the rest of the sockets. */ + read_n(iter_fd, -1, counts, counts_len); + + /* Make sure each of the original sockets was seen exactly once. */ + check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, + counts, counts_len); + check_n_were_seen_once(established_socks, established_socks_len, + established_socks_len, counts, counts_len); +done: + free_fds(new_socks, established_socks_len); +} + static void force_realloc(int family, int sock_type, const char *addr, __u16 port, int *socks, int socks_len, int *established_socks, int established_socks_len, @@ -368,6 +580,24 @@ done: free_fds(new_socks, socks_len); } +static void force_realloc_established(int family, int sock_type, + const char *addr, __u16 port, + int *listen_socks, int listen_socks_len, + int *established_socks, + int established_socks_len, + struct sock_count *counts, int counts_len, + struct bpf_link *link, int iter_fd) +{ + /* Iterate through all sockets to trigger a realloc. */ + read_n(iter_fd, -1, counts, counts_len); + + /* Make sure each socket was seen exactly once. */ + check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, + counts, counts_len); + check_n_were_seen_once(established_socks, established_socks_len, + established_socks_len, counts, counts_len); +} + struct test_case { void (*test)(int family, int sock_type, const char *addr, __u16 port, int *socks, int socks_len, int *established_socks, @@ -477,6 +707,69 @@ static struct test_case resume_tests[] = { .family = AF_INET6, .test = force_realloc, }, + { + .description = "tcp: resume after removing a seen socket (established)", + /* Force all established sockets into one bucket */ + .ehash_buckets = 1, + .connections = nr_soreuse, + .init_socks = nr_soreuse, + /* Room for connect()ed and accept()ed sockets */ + .max_socks = nr_soreuse * 3, + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = remove_seen_established, + }, + { + .description = "tcp: resume after removing one unseen socket (established)", + /* Force all established sockets into one bucket */ + .ehash_buckets = 1, + .connections = nr_soreuse, + .init_socks = nr_soreuse, + /* Room for connect()ed and accept()ed sockets */ + .max_socks = nr_soreuse * 3, + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = remove_unseen_established, + }, + { + .description = "tcp: resume after removing all unseen sockets (established)", + /* Force all established sockets into one bucket */ + .ehash_buckets = 1, + .connections = nr_soreuse, + .init_socks = nr_soreuse, + /* Room for connect()ed and accept()ed sockets */ + .max_socks = nr_soreuse * 3, + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = remove_all_established, + }, + { + .description = "tcp: resume after adding a few sockets (established)", + /* Force all established sockets into one bucket */ + .ehash_buckets = 1, + .connections = nr_soreuse, + .init_socks = nr_soreuse, + /* Room for connect()ed and accept()ed sockets */ + .max_socks = nr_soreuse * 3, + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = add_some_established, + }, + { + .description = "tcp: force a realloc to occur (established)", + /* Force all established sockets into one bucket */ + .ehash_buckets = 1, + /* Bucket size will need to double when going from listening to + * established sockets. + */ + .connections = init_batch_size, + .init_socks = nr_soreuse, + /* Room for connect()ed and accept()ed sockets */ + .max_socks = nr_soreuse + (init_batch_size * 2), + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = force_realloc_established, + }, }; static void do_resume_test(struct test_case *tc) -- cgit v1.2.3 From 5ae3bcc20446b486f479c4df69e4186d6fecbcb4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 11 Jul 2025 18:20:05 -0700 Subject: selftests: drv-net: add rss_api to the Makefile I missed adding rss_api.py to the Makefile. The NIPA Makefile checking script was scanning for shell scripts only, so it didn't flag it either. Fixes: 4d13c6c449af ("selftests: drv-net: test RSS Netlink notifications") Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250712012005.4010263-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index df2c047ffa90..fdc97355588c 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -16,6 +16,7 @@ TEST_PROGS = \ irq.py \ loopback.sh \ pp_alloc_fail.py \ + rss_api.py \ rss_ctx.py \ rss_input_xfrm.py \ tso.py \ -- cgit v1.2.3 From e18f348632ec4ee25d9172cdff273f0e939241c7 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Sat, 12 Jul 2025 11:50:35 -0300 Subject: selftests/tc-testing: Create test cases for adding qdiscs to invalid qdisc parents As described in a previous commit [1], Lion's patch [2] revealed an ancient bug in the qdisc API. Whenever a user tries to add a qdisc to an invalid parent (not a class, root, or ingress qdisc), the qdisc API will detect this after qdisc_create is called. Some qdiscs (like fq_codel, pie, and sfq) call functions (on their init callback) which assume the parent is valid, so qdisc_create itself may have caused a NULL pointer dereference in such cases. This commit creates 3 TDC tests that attempt to add fq_codel, pie and sfq qdiscs to invalid parents - Attempts to add an fq_codel qdisc to an hhf qdisc parent - Attempts to add a pie qdisc to a drr qdisc parent - Attempts to add an sfq qdisc to an inexistent hfsc classid (which would belong to a valid hfsc qdisc) [1] https://lore.kernel.org/all/20250707210801.372995-1-victor@mojatatu.com/ [2] https://lore.kernel.org/netdev/d912cbd7-193b-4269-9857-525bee8bbb6a@gmail.com/ Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20250712145035.705156-1-victor@mojatatu.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/qdiscs.json | 66 ++++++++++++++++++++++ 1 file changed, 66 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 5c6851e8d311..b344570e7f40 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -672,5 +672,71 @@ "teardown": [ "$TC qdisc del dev $DUMMY root handle 1: drr" ] + }, + { + "id": "be28", + "name": "Try to add fq_codel qdisc as a child of an hhf qdisc", + "category": [ + "qdisc", + "fq_codel", + "hhf" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY root handle a: hhf" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent a: handle b: fq_codel", + "expExitCode": "2", + "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle b:", + "matchJSON": [], + "teardown": [ + "$TC qdisc del dev $DUMMY root" + ] + }, + { + "id": "fcb5", + "name": "Try to add pie qdisc as a child of a drr qdisc", + "category": [ + "qdisc", + "pie", + "drr" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY root handle a: drr" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent a: handle b: pie", + "expExitCode": "2", + "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle b:", + "matchJSON": [], + "teardown": [ + "$TC qdisc del dev $DUMMY root" + ] + }, + { + "id": "7801", + "name": "Try to add fq qdisc as a child of an inexistent hfsc class", + "category": [ + "qdisc", + "sfq", + "hfsc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY root handle a: hfsc" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent a:fff2 sfq limit 4", + "expExitCode": "2", + "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle b:", + "matchJSON": [], + "teardown": [ + "$TC qdisc del dev $DUMMY root" + ] } ] -- cgit v1.2.3 From 5777d1871bf69d435e57e639fcf132d2d0c00883 Mon Sep 17 00:00:00 2001 From: Oscar Maes Date: Thu, 10 Jul 2025 16:27:14 +0200 Subject: selftests: net: add test for variable PMTU in broadcast routes Added a test for variable PMTU in broadcast routes. This test uses iputils' ping and attempts to send a ping between two peers, which should result in a regular echo reply. This test will fail when the receiving peer does not receive the echo request due to a lack of packet fragmentation. Signed-off-by: Oscar Maes Link: https://patch.msgid.link/20250710142714.12986-2-oscmaes92@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/broadcast_pmtu.sh | 47 +++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100755 tools/testing/selftests/net/broadcast_pmtu.sh (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 66a3ef221ad7..13e2678d418b 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -115,6 +115,7 @@ TEST_PROGS += skf_net_off.sh TEST_GEN_FILES += skf_net_off TEST_GEN_FILES += tfo TEST_PROGS += tfo_passive.sh +TEST_PROGS += broadcast_pmtu.sh # YNL files, must be before "include ..lib.mk" YNL_GEN_FILES := busy_poller netlink-dumps diff --git a/tools/testing/selftests/net/broadcast_pmtu.sh b/tools/testing/selftests/net/broadcast_pmtu.sh new file mode 100755 index 000000000000..726eb5d25839 --- /dev/null +++ b/tools/testing/selftests/net/broadcast_pmtu.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Ensures broadcast route MTU is respected + +CLIENT_NS=$(mktemp -u client-XXXXXXXX) +CLIENT_IP4="192.168.0.1/24" +CLIENT_BROADCAST_ADDRESS="192.168.0.255" + +SERVER_NS=$(mktemp -u server-XXXXXXXX) +SERVER_IP4="192.168.0.2/24" + +setup() { + ip netns add "${CLIENT_NS}" + ip netns add "${SERVER_NS}" + + ip -net "${SERVER_NS}" link add link1 type veth peer name link0 netns "${CLIENT_NS}" + + ip -net "${CLIENT_NS}" link set link0 up + ip -net "${CLIENT_NS}" link set link0 mtu 9000 + ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}" dev link0 + + ip -net "${SERVER_NS}" link set link1 up + ip -net "${SERVER_NS}" link set link1 mtu 1500 + ip -net "${SERVER_NS}" addr add "${SERVER_IP4}" dev link1 + + read -r -a CLIENT_BROADCAST_ENTRY <<< "$(ip -net "${CLIENT_NS}" route show table local type broadcast)" + ip -net "${CLIENT_NS}" route del "${CLIENT_BROADCAST_ENTRY[@]}" + ip -net "${CLIENT_NS}" route add "${CLIENT_BROADCAST_ENTRY[@]}" mtu 1500 + + ip net exec "${SERVER_NS}" sysctl -wq net.ipv4.icmp_echo_ignore_broadcasts=0 +} + +cleanup() { + ip -net "${SERVER_NS}" link del link1 + ip netns del "${CLIENT_NS}" + ip netns del "${SERVER_NS}" +} + +trap cleanup EXIT + +setup && + echo "Testing for broadcast route MTU" && + ip net exec "${CLIENT_NS}" ping -f -M want -q -c 1 -s 8000 -w 1 -b "${CLIENT_BROADCAST_ADDRESS}" > /dev/null 2>&1 + +exit $? + -- cgit v1.2.3 From 2677010e7793451c20d895c477c4dc76f6e6a10e Mon Sep 17 00:00:00 2001 From: Samiullah Khawaja Date: Thu, 10 Jul 2025 21:12:03 +0000 Subject: Add support to set NAPI threaded for individual NAPI A net device has a threaded sysctl that can be used to enable threaded NAPI polling on all of the NAPI contexts under that device. Allow enabling threaded NAPI polling at individual NAPI level using netlink. Extend the netlink operation `napi-set` and allow setting the threaded attribute of a NAPI. This will enable the threaded polling on a NAPI context. Add a test in `nl_netdev.py` that verifies various cases of threaded NAPI being set at NAPI and at device level. Tested ./tools/testing/selftests/net/nl_netdev.py TAP version 13 1..7 ok 1 nl_netdev.empty_check ok 2 nl_netdev.lo_check ok 3 nl_netdev.page_pool_check ok 4 nl_netdev.napi_list_check ok 5 nl_netdev.dev_set_threaded ok 6 nl_netdev.napi_set_threaded ok 7 nl_netdev.nsim_rxq_reset_down # Totals: pass:7 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Samiullah Khawaja Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250710211203.3979655-1-skhawaja@google.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 10 ++++ Documentation/networking/napi.rst | 9 +++- include/linux/netdevice.h | 1 + include/uapi/linux/netdev.h | 1 + net/core/dev.c | 30 +++++++++-- net/core/dev.h | 7 +++ net/core/netdev-genl-gen.c | 5 +- net/core/netdev-genl.c | 14 +++++ tools/include/uapi/linux/netdev.h | 1 + tools/testing/selftests/net/nl_netdev.py | 91 +++++++++++++++++++++++++++++++- 10 files changed, 162 insertions(+), 7 deletions(-) (limited to 'tools/testing/selftests') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index ce4cfec82100..85d0ea6ac426 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -283,6 +283,14 @@ attribute-sets: doc: The timeout, in nanoseconds, of how long to suspend irq processing, if event polling finds events type: uint + - + name: threaded + doc: Whether the NAPI is configured to operate in threaded polling + mode. If this is set to 1 then the NAPI context operates in + threaded polling mode. + type: uint + checks: + max: 1 - name: xsk-info attributes: [] @@ -694,6 +702,7 @@ operations: - defer-hard-irqs - gro-flush-timeout - irq-suspend-timeout + - threaded dump: request: attributes: @@ -746,6 +755,7 @@ operations: - defer-hard-irqs - gro-flush-timeout - irq-suspend-timeout + - threaded - name: bind-tx doc: Bind dmabuf to netdev for TX diff --git a/Documentation/networking/napi.rst b/Documentation/networking/napi.rst index d0e3953cae6a..a15754adb041 100644 --- a/Documentation/networking/napi.rst +++ b/Documentation/networking/napi.rst @@ -444,7 +444,14 @@ dependent). The NAPI instance IDs will be assigned in the opposite order than the process IDs of the kernel threads. Threaded NAPI is controlled by writing 0/1 to the ``threaded`` file in -netdev's sysfs directory. +netdev's sysfs directory. It can also be enabled for a specific NAPI using +netlink interface. + +For example, using the script: + +.. code-block:: bash + + $ ynl --family netdev --do napi-set --json='{"id": 66, "threaded": 1}' .. rubric:: Footnotes diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ec23cee5245d..e49d8c98d284 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -369,6 +369,7 @@ struct napi_config { u64 irq_suspend_timeout; u32 defer_hard_irqs; cpumask_t affinity_mask; + bool threaded; unsigned int napi_id; }; diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 7eb9571786b8..1f3719a9a0eb 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -134,6 +134,7 @@ enum { NETDEV_A_NAPI_DEFER_HARD_IRQS, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, + NETDEV_A_NAPI_THREADED, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) diff --git a/net/core/dev.c b/net/core/dev.c index 19ddc3e6990a..621a639aeba1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6961,6 +6961,31 @@ static void napi_stop_kthread(struct napi_struct *napi) napi->thread = NULL; } +int napi_set_threaded(struct napi_struct *napi, bool threaded) +{ + if (threaded) { + if (!napi->thread) { + int err = napi_kthread_create(napi); + + if (err) + return err; + } + } + + if (napi->config) + napi->config->threaded = threaded; + + if (!threaded && napi->thread) { + napi_stop_kthread(napi); + } else { + /* Make sure kthread is created before THREADED bit is set. */ + smp_mb__before_atomic(); + assign_bit(NAPI_STATE_THREADED, &napi->state, threaded); + } + + return 0; +} + int dev_set_threaded(struct net_device *dev, bool threaded) { struct napi_struct *napi; @@ -6968,9 +6993,6 @@ int dev_set_threaded(struct net_device *dev, bool threaded) netdev_assert_locked_or_invisible(dev); - if (dev->threaded == threaded) - return 0; - if (threaded) { list_for_each_entry(napi, &dev->napi_list, dev_list) { if (!napi->thread) { @@ -7221,6 +7243,8 @@ static void napi_restore_config(struct napi_struct *n) napi_hash_add(n); n->config->napi_id = n->napi_id; } + + WARN_ON_ONCE(napi_set_threaded(n, n->config->threaded)); } static void napi_save_config(struct napi_struct *n) diff --git a/net/core/dev.h b/net/core/dev.h index e93f36b7ddf3..a603387fb566 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -315,6 +315,13 @@ static inline void napi_set_irq_suspend_timeout(struct napi_struct *n, WRITE_ONCE(n->irq_suspend_timeout, timeout); } +static inline bool napi_get_threaded(struct napi_struct *n) +{ + return test_bit(NAPI_STATE_THREADED, &n->state); +} + +int napi_set_threaded(struct napi_struct *n, bool threaded); + int rps_cpumask_housekeeping(struct cpumask *mask); #if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL) diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index 4fc44587f493..0994bd68a7e6 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -92,11 +92,12 @@ static const struct nla_policy netdev_bind_rx_nl_policy[NETDEV_A_DMABUF_FD + 1] }; /* NETDEV_CMD_NAPI_SET - do */ -static const struct nla_policy netdev_napi_set_nl_policy[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT + 1] = { +static const struct nla_policy netdev_napi_set_nl_policy[NETDEV_A_NAPI_THREADED + 1] = { [NETDEV_A_NAPI_ID] = { .type = NLA_U32, }, [NETDEV_A_NAPI_DEFER_HARD_IRQS] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_napi_defer_hard_irqs_range), [NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT] = { .type = NLA_UINT, }, [NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT] = { .type = NLA_UINT, }, + [NETDEV_A_NAPI_THREADED] = NLA_POLICY_MAX(NLA_UINT, 1), }; /* NETDEV_CMD_BIND_TX - do */ @@ -193,7 +194,7 @@ static const struct genl_split_ops netdev_nl_ops[] = { .cmd = NETDEV_CMD_NAPI_SET, .doit = netdev_nl_napi_set_doit, .policy = netdev_napi_set_nl_policy, - .maxattr = NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, + .maxattr = NETDEV_A_NAPI_THREADED, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 2afa7b2141aa..5875df372415 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -184,6 +184,10 @@ netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi, if (napi->irq >= 0 && nla_put_u32(rsp, NETDEV_A_NAPI_IRQ, napi->irq)) goto nla_put_failure; + if (nla_put_uint(rsp, NETDEV_A_NAPI_THREADED, + napi_get_threaded(napi))) + goto nla_put_failure; + if (napi->thread) { pid = task_pid_nr(napi->thread); if (nla_put_u32(rsp, NETDEV_A_NAPI_PID, pid)) @@ -322,8 +326,18 @@ netdev_nl_napi_set_config(struct napi_struct *napi, struct genl_info *info) { u64 irq_suspend_timeout = 0; u64 gro_flush_timeout = 0; + u8 threaded = 0; u32 defer = 0; + if (info->attrs[NETDEV_A_NAPI_THREADED]) { + int ret; + + threaded = nla_get_uint(info->attrs[NETDEV_A_NAPI_THREADED]); + ret = napi_set_threaded(napi, !!threaded); + if (ret) + return ret; + } + if (info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]) { defer = nla_get_u32(info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]); napi_set_defer_hard_irqs(napi, defer); diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 7eb9571786b8..1f3719a9a0eb 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -134,6 +134,7 @@ enum { NETDEV_A_NAPI_DEFER_HARD_IRQS, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, + NETDEV_A_NAPI_THREADED, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py index c9109627a741..c8ffade79a52 100755 --- a/tools/testing/selftests/net/nl_netdev.py +++ b/tools/testing/selftests/net/nl_netdev.py @@ -35,6 +35,91 @@ def napi_list_check(nf) -> None: ksft_eq(len(napis), 100, comment=f"queue count after reset queue {q} mode {i}") +def napi_set_threaded(nf) -> None: + """ + Test that verifies various cases of napi threaded + set and unset at napi and device level. + """ + with NetdevSimDev(queue_count=2) as nsimdev: + nsim = nsimdev.nsims[0] + + ip(f"link set dev {nsim.ifname} up") + + napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True) + ksft_eq(len(napis), 2) + + napi0_id = napis[0]['id'] + napi1_id = napis[1]['id'] + + # set napi threaded and verify + nf.napi_set({'id': napi0_id, 'threaded': 1}) + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], 1) + ksft_ne(napi0.get('pid'), None) + + # check it is not set for napi1 + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], 0) + ksft_eq(napi1.get('pid'), None) + + ip(f"link set dev {nsim.ifname} down") + ip(f"link set dev {nsim.ifname} up") + + # verify if napi threaded is still set + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], 1) + ksft_ne(napi0.get('pid'), None) + + # check it is still not set for napi1 + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], 0) + ksft_eq(napi1.get('pid'), None) + + # unset napi threaded and verify + nf.napi_set({'id': napi0_id, 'threaded': 0}) + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], 0) + ksft_eq(napi0.get('pid'), None) + + # set threaded at device level + system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is set for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], 1) + ksft_ne(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], 1) + ksft_ne(napi1.get('pid'), None) + + # unset threaded at device level + system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is unset for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], 0) + ksft_eq(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], 0) + ksft_eq(napi1.get('pid'), None) + + # set napi threaded for napi0 + nf.napi_set({'id': napi0_id, 'threaded': 1}) + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], 1) + ksft_ne(napi0.get('pid'), None) + + # unset threaded at device level + system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is unset for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], 0) + ksft_eq(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], 0) + ksft_eq(napi1.get('pid'), None) + def dev_set_threaded(nf) -> None: """ Test that verifies various cases of napi threaded @@ -56,8 +141,10 @@ def dev_set_threaded(nf) -> None: # check napi threaded is set for both napis napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], 1) ksft_ne(napi0.get('pid'), None) napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], 1) ksft_ne(napi1.get('pid'), None) # unset threaded @@ -65,8 +152,10 @@ def dev_set_threaded(nf) -> None: # check napi threaded is unset for both napis napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], 0) ksft_eq(napi0.get('pid'), None) napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], 0) ksft_eq(napi1.get('pid'), None) def nsim_rxq_reset_down(nf) -> None: @@ -156,7 +245,7 @@ def page_pool_check(nf) -> None: def main() -> None: nf = NetdevFamily() ksft_run([empty_check, lo_check, page_pool_check, napi_list_check, - dev_set_threaded, nsim_rxq_reset_down], + dev_set_threaded, napi_set_threaded, nsim_rxq_reset_down], args=(nf, )) ksft_exit() -- cgit v1.2.3 From f5fda1a86884cf20d9b5842221b963bb16bcebf1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 11 Jul 2025 11:40:01 +0000 Subject: selftests/net: packetdrill: add tcp_rcv_big_endseq.pkt This test checks TCP behavior when receiving a packet beyond the window. It checks the new TcpExtBeyondWindow SNMP counter. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250711114006.480026-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- .../net/packetdrill/tcp_rcv_big_endseq.pkt | 44 ++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt new file mode 100644 index 000000000000..7e170b94fd36 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh` + + 0 `nstat -n` + +// Establish a connection. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [10000], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < P. 1:4001(4000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 win 5000 + +// packet in sequence : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW + +0 < P. 4001:54001(50000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 win 5000 + +// ooo packet. : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW + +1 < P. 5001:55001(50000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 win 5000 + +// SKB_DROP_REASON_TCP_INVALID_SEQUENCE / LINUX_MIB_BEYOND_WINDOW + +0 < P. 70001:80001(10000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 win 5000 + + +0 read(4, ..., 100000) = 4000 + +// If queue is empty, accept a packet even if its end_seq is above wup + rcv_wnd + +0 < P. 4001:54001(50000) ack 1 win 257 + +.040 > . 1:1(0) ack 54001 win 0 + +// Check LINUX_MIB_BEYOND_WINDOW has been incremented 3 times. ++0 `nstat | grep TcpExtBeyondWindow | grep -q " 3 "` -- cgit v1.2.3 From 445e0cc38d498e341f36f2e3a9cacf1ddf0b09b6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 11 Jul 2025 11:40:03 +0000 Subject: selftests/net: packetdrill: add tcp_ooo_rcv_mss.pkt We make sure tcpi_rcv_mss and tp->scaling_ratio are correctly updated if no in-order packet has been received yet. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250711114006.480026-6-edumazet@google.com Signed-off-by: Jakub Kicinski --- .../selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt | 27 ++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt b/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt new file mode 100644 index 000000000000..7e6bc5fb0c8d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"` + + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < . 2001:11001(9000) ack 1 win 257 + +0 > . 1:1(0) ack 1 win 81 + +// check that ooo packet properly updates tcpi_rcv_mss + +0 %{ assert tcpi_rcv_mss == 1000, tcpi_rcv_mss }% + + +0 < . 11001:21001(10000) ack 1 win 257 + +0 > . 1:1(0) ack 1 win 81 + -- cgit v1.2.3 From 906893cf2cf275bf33eeff2c76a621c4b60c9bba Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 11 Jul 2025 11:40:06 +0000 Subject: selftests/net: packetdrill: add tcp_rcv_toobig.pkt Check that TCP receiver behavior after "tcp: stronger sk_rcvbuf checks" Too fat packet is dropped unless receive queue is empty. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250711114006.480026-9-edumazet@google.com Signed-off-by: Jakub Kicinski --- .../selftests/net/packetdrill/tcp_rcv_toobig.pkt | 33 ++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt new file mode 100644 index 000000000000..f575c0ff89da --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh` + + 0 `nstat -n` + +// Establish a connection. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [20000], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 win 18980 + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < P. 1:20001(20000) ack 1 win 257 + +.04 > . 1:1(0) ack 20001 win 18000 + + +0 setsockopt(4, SOL_SOCKET, SO_RCVBUF, [12000], 4) = 0 + +0 < P. 20001:80001(60000) ack 1 win 257 + +0 > . 1:1(0) ack 20001 win 18000 + + +0 read(4, ..., 20000) = 20000 +// A too big packet is accepted if the receive queue is empty + +0 < P. 20001:80001(60000) ack 1 win 257 + +0 > . 1:1(0) ack 80001 win 0 + -- cgit v1.2.3 From 0e9418961f897be59b1fab6e31ae1b09a0bae902 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 10 Jul 2025 18:04:50 +0200 Subject: selftests: net: increase inter-packet timeout in udpgro.sh The mentioned test is not very stable when running on top of debug kernel build. Increase the inter-packet timeout to allow more slack in such environments. Fixes: 3327a9c46352 ("selftests: add functionals test for UDP GRO") Reviewed-by: Simon Horman Link: https://patch.msgid.link/b0370c06ddb3235debf642c17de0284b2cd3c652.1752163107.git.pabeni@redhat.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/udpgro.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index 1dc337c709f8..b17e032a6d75 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -48,7 +48,7 @@ run_one() { cfg_veth - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${rx_args} & local PID1=$! wait_local_port_listen ${PEER_NS} 8000 udp @@ -95,7 +95,7 @@ run_one_nat() { # will land on the 'plain' one ip netns exec "${PEER_NS}" ./udpgso_bench_rx -G ${family} -b ${addr1} -n 0 & local PID1=$! - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${family} -b ${addr2%/*} ${rx_args} & local PID2=$! wait_local_port_listen "${PEER_NS}" 8000 udp @@ -117,9 +117,9 @@ run_one_2sock() { cfg_veth - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} -p 12345 & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${rx_args} -p 12345 & local PID1=$! - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 100 ${rx_args} & local PID2=$! wait_local_port_listen "${PEER_NS}" 12345 udp -- cgit v1.2.3 From b36c73251aaec6c9941b5493637a9007d0a56616 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 13 Jul 2025 22:34:06 +0800 Subject: selftests: ublk: remove `tag` parameter of ->tgt_io_done() The `tag` parameter can be figured out from cqe->user_data, and that is also the only way to get the info, so remove `tag` parameter, and let target code retrieve it from cqe->user_data. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250713143415.2857561-12-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/fault_inject.c | 3 ++- tools/testing/selftests/ublk/file_backed.c | 3 ++- tools/testing/selftests/ublk/kublk.c | 4 +--- tools/testing/selftests/ublk/kublk.h | 3 +-- tools/testing/selftests/ublk/null.c | 3 ++- tools/testing/selftests/ublk/stripe.c | 3 ++- 6 files changed, 10 insertions(+), 9 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/ublk/fault_inject.c b/tools/testing/selftests/ublk/fault_inject.c index 6e60f7d97125..c980958ec045 100644 --- a/tools/testing/selftests/ublk/fault_inject.c +++ b/tools/testing/selftests/ublk/fault_inject.c @@ -55,9 +55,10 @@ static int ublk_fault_inject_queue_io(struct ublk_queue *q, int tag) return 0; } -static void ublk_fault_inject_tgt_io_done(struct ublk_queue *q, int tag, +static void ublk_fault_inject_tgt_io_done(struct ublk_queue *q, const struct io_uring_cqe *cqe) { + unsigned tag = user_data_to_tag(cqe->user_data); const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); if (cqe->res != -ETIME) diff --git a/tools/testing/selftests/ublk/file_backed.c b/tools/testing/selftests/ublk/file_backed.c index cfa59b631693..02fb8a411d3b 100644 --- a/tools/testing/selftests/ublk/file_backed.c +++ b/tools/testing/selftests/ublk/file_backed.c @@ -108,9 +108,10 @@ static int ublk_loop_queue_io(struct ublk_queue *q, int tag) return 0; } -static void ublk_loop_io_done(struct ublk_queue *q, int tag, +static void ublk_loop_io_done(struct ublk_queue *q, const struct io_uring_cqe *cqe) { + unsigned tag = user_data_to_tag(cqe->user_data); unsigned op = user_data_to_op(cqe->user_data); struct ublk_io *io = ublk_get_io(q, tag); diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index e2d2042810d4..fba4e80e9bab 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -717,8 +717,6 @@ static int ublk_thread_is_done(struct ublk_thread *t) static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q, struct io_uring_cqe *cqe) { - unsigned tag = user_data_to_tag(cqe->user_data); - if (cqe->res < 0 && cqe->res != -EAGAIN) ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n", __func__, cqe->res, q->q_id, @@ -726,7 +724,7 @@ static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q, user_data_to_op(cqe->user_data)); if (q->tgt_ops->tgt_io_done) - q->tgt_ops->tgt_io_done(q, tag, cqe); + q->tgt_ops->tgt_io_done(q, cqe); } static void ublk_handle_cqe(struct ublk_thread *t, diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 6be601536b3d..d7b711e63822 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -145,8 +145,7 @@ struct ublk_tgt_ops { void (*deinit_tgt)(struct ublk_dev *); int (*queue_io)(struct ublk_queue *, int tag); - void (*tgt_io_done)(struct ublk_queue *, - int tag, const struct io_uring_cqe *); + void (*tgt_io_done)(struct ublk_queue *, const struct io_uring_cqe *); /* * Target specific command line handling diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c index afe0b99d77ee..ea3da53437e9 100644 --- a/tools/testing/selftests/ublk/null.c +++ b/tools/testing/selftests/ublk/null.c @@ -87,9 +87,10 @@ static int null_queue_auto_zc_io(struct ublk_queue *q, int tag) return 1; } -static void ublk_null_io_done(struct ublk_queue *q, int tag, +static void ublk_null_io_done(struct ublk_queue *q, const struct io_uring_cqe *cqe) { + unsigned tag = user_data_to_tag(cqe->user_data); unsigned op = user_data_to_op(cqe->user_data); struct ublk_io *io = ublk_get_io(q, tag); diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c index 37d50bbf5f5e..53cefffaf32e 100644 --- a/tools/testing/selftests/ublk/stripe.c +++ b/tools/testing/selftests/ublk/stripe.c @@ -226,9 +226,10 @@ static int ublk_stripe_queue_io(struct ublk_queue *q, int tag) return 0; } -static void ublk_stripe_io_done(struct ublk_queue *q, int tag, +static void ublk_stripe_io_done(struct ublk_queue *q, const struct io_uring_cqe *cqe) { + unsigned tag = user_data_to_tag(cqe->user_data); const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); unsigned op = user_data_to_op(cqe->user_data); struct ublk_io *io = ublk_get_io(q, tag); -- cgit v1.2.3 From e0054835bf6850245e17417fdbe80e232737e537 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 13 Jul 2025 22:34:07 +0800 Subject: selftests: ublk: pass 'ublk_thread *' to ->queue_io() and ->tgt_io_done() 'struct thread' is task local structure, and the related code will become more readable if we pass it via parameter. Meantime pass 'ublk_thread *' to ublk_io_alloc_sqes(), and this way is natural since we use per-thread io_uring for handling IO. More importantly it helps much for removing the current ubq_daemon or per-io-task limit. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250713143415.2857561-13-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/fault_inject.c | 8 +++++--- tools/testing/selftests/ublk/file_backed.c | 25 ++++++++++++++----------- tools/testing/selftests/ublk/kublk.c | 13 +++++++------ tools/testing/selftests/ublk/kublk.h | 9 +++++---- tools/testing/selftests/ublk/null.c | 21 ++++++++++++--------- tools/testing/selftests/ublk/stripe.c | 26 +++++++++++++++----------- 6 files changed, 58 insertions(+), 44 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/ublk/fault_inject.c b/tools/testing/selftests/ublk/fault_inject.c index c980958ec045..b4e9355e0eab 100644 --- a/tools/testing/selftests/ublk/fault_inject.c +++ b/tools/testing/selftests/ublk/fault_inject.c @@ -38,7 +38,8 @@ static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx, return 0; } -static int ublk_fault_inject_queue_io(struct ublk_queue *q, int tag) +static int ublk_fault_inject_queue_io(struct ublk_thread *t, + struct ublk_queue *q, int tag) { const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); struct io_uring_sqe *sqe; @@ -46,7 +47,7 @@ static int ublk_fault_inject_queue_io(struct ublk_queue *q, int tag) .tv_nsec = (long long)q->dev->private_data, }; - ublk_io_alloc_sqes(ublk_get_io(q, tag), &sqe, 1); + ublk_io_alloc_sqes(t, &sqe, 1); io_uring_prep_timeout(sqe, &ts, 1, 0); sqe->user_data = build_user_data(tag, ublksrv_get_op(iod), 0, q->q_id, 1); @@ -55,7 +56,8 @@ static int ublk_fault_inject_queue_io(struct ublk_queue *q, int tag) return 0; } -static void ublk_fault_inject_tgt_io_done(struct ublk_queue *q, +static void ublk_fault_inject_tgt_io_done(struct ublk_thread *t, + struct ublk_queue *q, const struct io_uring_cqe *cqe) { unsigned tag = user_data_to_tag(cqe->user_data); diff --git a/tools/testing/selftests/ublk/file_backed.c b/tools/testing/selftests/ublk/file_backed.c index 02fb8a411d3b..eeac7af2230f 100644 --- a/tools/testing/selftests/ublk/file_backed.c +++ b/tools/testing/selftests/ublk/file_backed.c @@ -13,12 +13,13 @@ static enum io_uring_op ublk_to_uring_op(const struct ublksrv_io_desc *iod, int assert(0); } -static int loop_queue_flush_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) +static int loop_queue_flush_io(struct ublk_thread *t, struct ublk_queue *q, + const struct ublksrv_io_desc *iod, int tag) { unsigned ublk_op = ublksrv_get_op(iod); struct io_uring_sqe *sqe[1]; - ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 1); + ublk_io_alloc_sqes(t, sqe, 1); io_uring_prep_fsync(sqe[0], 1 /*fds[1]*/, IORING_FSYNC_DATASYNC); io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE); /* bit63 marks us as tgt io */ @@ -26,7 +27,8 @@ static int loop_queue_flush_io(struct ublk_queue *q, const struct ublksrv_io_des return 1; } -static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) +static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, + const struct ublksrv_io_desc *iod, int tag) { unsigned ublk_op = ublksrv_get_op(iod); unsigned zc = ublk_queue_use_zc(q); @@ -36,7 +38,7 @@ static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_de void *addr = (zc | auto_zc) ? NULL : (void *)iod->addr; if (!zc || auto_zc) { - ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 1); + ublk_io_alloc_sqes(t, sqe, 1); if (!sqe[0]) return -ENOMEM; @@ -52,7 +54,7 @@ static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_de return 1; } - ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 3); + ublk_io_alloc_sqes(t, sqe, 3); io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index); sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; @@ -72,7 +74,7 @@ static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_de return 2; } -static int loop_queue_tgt_io(struct ublk_queue *q, int tag) +static int loop_queue_tgt_io(struct ublk_thread *t, struct ublk_queue *q, int tag) { const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); unsigned ublk_op = ublksrv_get_op(iod); @@ -80,7 +82,7 @@ static int loop_queue_tgt_io(struct ublk_queue *q, int tag) switch (ublk_op) { case UBLK_IO_OP_FLUSH: - ret = loop_queue_flush_io(q, iod, tag); + ret = loop_queue_flush_io(t, q, iod, tag); break; case UBLK_IO_OP_WRITE_ZEROES: case UBLK_IO_OP_DISCARD: @@ -88,7 +90,7 @@ static int loop_queue_tgt_io(struct ublk_queue *q, int tag) break; case UBLK_IO_OP_READ: case UBLK_IO_OP_WRITE: - ret = loop_queue_tgt_rw_io(q, iod, tag); + ret = loop_queue_tgt_rw_io(t, q, iod, tag); break; default: ret = -EINVAL; @@ -100,15 +102,16 @@ static int loop_queue_tgt_io(struct ublk_queue *q, int tag) return ret; } -static int ublk_loop_queue_io(struct ublk_queue *q, int tag) +static int ublk_loop_queue_io(struct ublk_thread *t, struct ublk_queue *q, + int tag) { - int queued = loop_queue_tgt_io(q, tag); + int queued = loop_queue_tgt_io(t, q, tag); ublk_queued_tgt_io(q, tag, queued); return 0; } -static void ublk_loop_io_done(struct ublk_queue *q, +static void ublk_loop_io_done(struct ublk_thread *t, struct ublk_queue *q, const struct io_uring_cqe *cqe) { unsigned tag = user_data_to_tag(cqe->user_data); diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index fba4e80e9bab..180b6da08eab 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -620,7 +620,7 @@ int ublk_queue_io_cmd(struct ublk_io *io) if (io_uring_sq_space_left(&t->ring) < 1) io_uring_submit(&t->ring); - ublk_io_alloc_sqes(io, sqe, 1); + ublk_io_alloc_sqes(t, sqe, 1); if (!sqe[0]) { ublk_err("%s: run out of sqe. thread %u, tag %d\n", __func__, t->idx, io->tag); @@ -714,8 +714,9 @@ static int ublk_thread_is_done(struct ublk_thread *t) return (t->state & UBLKSRV_THREAD_STOPPING) && ublk_thread_is_idle(t); } -static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q, - struct io_uring_cqe *cqe) +static inline void ublksrv_handle_tgt_cqe(struct ublk_thread *t, + struct ublk_queue *q, + struct io_uring_cqe *cqe) { if (cqe->res < 0 && cqe->res != -EAGAIN) ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n", @@ -724,7 +725,7 @@ static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q, user_data_to_op(cqe->user_data)); if (q->tgt_ops->tgt_io_done) - q->tgt_ops->tgt_io_done(q, cqe); + q->tgt_ops->tgt_io_done(t, q, cqe); } static void ublk_handle_cqe(struct ublk_thread *t, @@ -751,7 +752,7 @@ static void ublk_handle_cqe(struct ublk_thread *t, /* Don't retrieve io in case of target io */ if (is_target_io(cqe->user_data)) { - ublksrv_handle_tgt_cqe(q, cqe); + ublksrv_handle_tgt_cqe(t, q, cqe); return; } @@ -766,7 +767,7 @@ static void ublk_handle_cqe(struct ublk_thread *t, if (cqe->res == UBLK_IO_RES_OK) { assert(tag < q->q_depth); if (q->tgt_ops->queue_io) - q->tgt_ops->queue_io(q, tag); + q->tgt_ops->queue_io(t, q, tag); } else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) { io->flags |= UBLKSRV_NEED_GET_DATA | UBLKSRV_IO_FREE; ublk_queue_io_cmd(io); diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index d7b711e63822..91e0286c5ca6 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -144,8 +144,9 @@ struct ublk_tgt_ops { int (*init_tgt)(const struct dev_ctx *ctx, struct ublk_dev *); void (*deinit_tgt)(struct ublk_dev *); - int (*queue_io)(struct ublk_queue *, int tag); - void (*tgt_io_done)(struct ublk_queue *, const struct io_uring_cqe *); + int (*queue_io)(struct ublk_thread *, struct ublk_queue *, int tag); + void (*tgt_io_done)(struct ublk_thread *, struct ublk_queue *, + const struct io_uring_cqe *); /* * Target specific command line handling @@ -313,10 +314,10 @@ static inline struct ublk_queue *ublk_io_to_queue(const struct ublk_io *io) return container_of(io, struct ublk_queue, ios[io->tag]); } -static inline int ublk_io_alloc_sqes(struct ublk_io *io, +static inline int ublk_io_alloc_sqes(struct ublk_thread *t, struct io_uring_sqe *sqes[], int nr_sqes) { - struct io_uring *ring = &io->t->ring; + struct io_uring *ring = &t->ring; unsigned left = io_uring_sq_space_left(ring); int i; diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c index ea3da53437e9..e29a005fc1cc 100644 --- a/tools/testing/selftests/ublk/null.c +++ b/tools/testing/selftests/ublk/null.c @@ -55,12 +55,13 @@ static void __setup_nop_io(int tag, const struct ublksrv_io_desc *iod, sqe->user_data = build_user_data(tag, ublk_op, 0, q_id, 1); } -static int null_queue_zc_io(struct ublk_queue *q, int tag) +static int null_queue_zc_io(struct ublk_thread *t, struct ublk_queue *q, + int tag) { const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); struct io_uring_sqe *sqe[3]; - ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 3); + ublk_io_alloc_sqes(t, sqe, 3); io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index); sqe[0]->user_data = build_user_data(tag, @@ -77,18 +78,19 @@ static int null_queue_zc_io(struct ublk_queue *q, int tag) return 2; } -static int null_queue_auto_zc_io(struct ublk_queue *q, int tag) +static int null_queue_auto_zc_io(struct ublk_thread *t, struct ublk_queue *q, + int tag) { const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); struct io_uring_sqe *sqe[1]; - ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 1); + ublk_io_alloc_sqes(t, sqe, 1); __setup_nop_io(tag, iod, sqe[0], q->q_id); return 1; } -static void ublk_null_io_done(struct ublk_queue *q, - const struct io_uring_cqe *cqe) +static void ublk_null_io_done(struct ublk_thread *t, struct ublk_queue *q, + const struct io_uring_cqe *cqe) { unsigned tag = user_data_to_tag(cqe->user_data); unsigned op = user_data_to_op(cqe->user_data); @@ -110,7 +112,8 @@ static void ublk_null_io_done(struct ublk_queue *q, ublk_complete_io(q, tag, io->result); } -static int ublk_null_queue_io(struct ublk_queue *q, int tag) +static int ublk_null_queue_io(struct ublk_thread *t, struct ublk_queue *q, + int tag) { const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); unsigned auto_zc = ublk_queue_use_auto_zc(q); @@ -118,9 +121,9 @@ static int ublk_null_queue_io(struct ublk_queue *q, int tag) int queued; if (auto_zc && !ublk_io_auto_zc_fallback(iod)) - queued = null_queue_auto_zc_io(q, tag); + queued = null_queue_auto_zc_io(t, q, tag); else if (zc) - queued = null_queue_zc_io(q, tag); + queued = null_queue_zc_io(t, q, tag); else { ublk_complete_io(q, tag, iod->nr_sectors << 9); return 0; diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c index 53cefffaf32e..462fab7492ce 100644 --- a/tools/testing/selftests/ublk/stripe.c +++ b/tools/testing/selftests/ublk/stripe.c @@ -123,7 +123,8 @@ static inline enum io_uring_op stripe_to_uring_op( assert(0); } -static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) +static int stripe_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, + const struct ublksrv_io_desc *iod, int tag) { const struct stripe_conf *conf = get_chunk_shift(q); unsigned auto_zc = (ublk_queue_use_auto_zc(q) != 0); @@ -138,7 +139,7 @@ static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_ io->private_data = s; calculate_stripe_array(conf, iod, s, base); - ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, s->nr + extra); + ublk_io_alloc_sqes(t, sqe, s->nr + extra); if (zc) { io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, io->buf_index); @@ -176,13 +177,14 @@ static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_ return s->nr + zc; } -static int handle_flush(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) +static int handle_flush(struct ublk_thread *t, struct ublk_queue *q, + const struct ublksrv_io_desc *iod, int tag) { const struct stripe_conf *conf = get_chunk_shift(q); struct io_uring_sqe *sqe[NR_STRIPE]; int i; - ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, conf->nr_files); + ublk_io_alloc_sqes(t, sqe, conf->nr_files); for (i = 0; i < conf->nr_files; i++) { io_uring_prep_fsync(sqe[i], i + 1, IORING_FSYNC_DATASYNC); io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE); @@ -191,7 +193,8 @@ static int handle_flush(struct ublk_queue *q, const struct ublksrv_io_desc *iod, return conf->nr_files; } -static int stripe_queue_tgt_io(struct ublk_queue *q, int tag) +static int stripe_queue_tgt_io(struct ublk_thread *t, struct ublk_queue *q, + int tag) { const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); unsigned ublk_op = ublksrv_get_op(iod); @@ -199,7 +202,7 @@ static int stripe_queue_tgt_io(struct ublk_queue *q, int tag) switch (ublk_op) { case UBLK_IO_OP_FLUSH: - ret = handle_flush(q, iod, tag); + ret = handle_flush(t, q, iod, tag); break; case UBLK_IO_OP_WRITE_ZEROES: case UBLK_IO_OP_DISCARD: @@ -207,7 +210,7 @@ static int stripe_queue_tgt_io(struct ublk_queue *q, int tag) break; case UBLK_IO_OP_READ: case UBLK_IO_OP_WRITE: - ret = stripe_queue_tgt_rw_io(q, iod, tag); + ret = stripe_queue_tgt_rw_io(t, q, iod, tag); break; default: ret = -EINVAL; @@ -218,16 +221,17 @@ static int stripe_queue_tgt_io(struct ublk_queue *q, int tag) return ret; } -static int ublk_stripe_queue_io(struct ublk_queue *q, int tag) +static int ublk_stripe_queue_io(struct ublk_thread *t, struct ublk_queue *q, + int tag) { - int queued = stripe_queue_tgt_io(q, tag); + int queued = stripe_queue_tgt_io(t, q, tag); ublk_queued_tgt_io(q, tag, queued); return 0; } -static void ublk_stripe_io_done(struct ublk_queue *q, - const struct io_uring_cqe *cqe) +static void ublk_stripe_io_done(struct ublk_thread *t, struct ublk_queue *q, + const struct io_uring_cqe *cqe) { unsigned tag = user_data_to_tag(cqe->user_data); const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); -- cgit v1.2.3 From 92dda98424feebb5f9b9135e30219342b80791b3 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 13 Jul 2025 22:34:08 +0800 Subject: selftests: ublk: pass 'ublk_thread *' to more common helpers Pass 'ublk_thread *' to more common helpers, then we can avoid to store this reference into 'struct ublk_io'. Prepare for supporting to handle IO via different task context. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250713143415.2857561-14-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/fault_inject.c | 6 +++--- tools/testing/selftests/ublk/file_backed.c | 6 +++--- tools/testing/selftests/ublk/kublk.c | 11 ++++------- tools/testing/selftests/ublk/kublk.h | 20 +++++++++++--------- tools/testing/selftests/ublk/null.c | 8 ++++---- tools/testing/selftests/ublk/stripe.c | 6 +++--- 6 files changed, 28 insertions(+), 29 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/ublk/fault_inject.c b/tools/testing/selftests/ublk/fault_inject.c index b4e9355e0eab..b227bd78b252 100644 --- a/tools/testing/selftests/ublk/fault_inject.c +++ b/tools/testing/selftests/ublk/fault_inject.c @@ -51,7 +51,7 @@ static int ublk_fault_inject_queue_io(struct ublk_thread *t, io_uring_prep_timeout(sqe, &ts, 1, 0); sqe->user_data = build_user_data(tag, ublksrv_get_op(iod), 0, q->q_id, 1); - ublk_queued_tgt_io(q, tag, 1); + ublk_queued_tgt_io(t, q, tag, 1); return 0; } @@ -66,8 +66,8 @@ static void ublk_fault_inject_tgt_io_done(struct ublk_thread *t, if (cqe->res != -ETIME) ublk_err("%s: unexpected cqe res %d\n", __func__, cqe->res); - if (ublk_completed_tgt_io(q, tag)) - ublk_complete_io(q, tag, iod->nr_sectors << 9); + if (ublk_completed_tgt_io(t, q, tag)) + ublk_complete_io(t, q, tag, iod->nr_sectors << 9); else ublk_err("%s: io not complete after 1 cqe\n", __func__); } diff --git a/tools/testing/selftests/ublk/file_backed.c b/tools/testing/selftests/ublk/file_backed.c index eeac7af2230f..2d93ac860bd5 100644 --- a/tools/testing/selftests/ublk/file_backed.c +++ b/tools/testing/selftests/ublk/file_backed.c @@ -107,7 +107,7 @@ static int ublk_loop_queue_io(struct ublk_thread *t, struct ublk_queue *q, { int queued = loop_queue_tgt_io(t, q, tag); - ublk_queued_tgt_io(q, tag, queued); + ublk_queued_tgt_io(t, q, tag, queued); return 0; } @@ -130,8 +130,8 @@ static void ublk_loop_io_done(struct ublk_thread *t, struct ublk_queue *q, if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF)) io->tgt_ios += 1; - if (ublk_completed_tgt_io(q, tag)) - ublk_complete_io(q, tag, io->result); + if (ublk_completed_tgt_io(t, q, tag)) + ublk_complete_io(t, q, tag, io->result); } static int ublk_loop_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 180b6da08eab..944e0806ba05 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -589,9 +589,8 @@ static void ublk_set_auto_buf_reg(const struct ublk_queue *q, sqe->addr = ublk_auto_buf_reg_to_sqe_addr(&buf); } -int ublk_queue_io_cmd(struct ublk_io *io) +int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io) { - struct ublk_thread *t = io->t; struct ublk_queue *q = ublk_io_to_queue(io); struct ublksrv_io_cmd *cmd; struct io_uring_sqe *sqe[1]; @@ -685,9 +684,8 @@ static void ublk_submit_fetch_commands(struct ublk_thread *t) int tag = i % dinfo->queue_depth; q = &t->dev->q[q_id]; io = &q->ios[tag]; - io->t = t; io->buf_index = j++; - ublk_queue_io_cmd(io); + ublk_queue_io_cmd(t, io); } } else { /* @@ -697,9 +695,8 @@ static void ublk_submit_fetch_commands(struct ublk_thread *t) struct ublk_queue *q = &t->dev->q[t->idx]; for (i = 0; i < q->q_depth; i++) { io = &q->ios[i]; - io->t = t; io->buf_index = i; - ublk_queue_io_cmd(io); + ublk_queue_io_cmd(t, io); } } } @@ -770,7 +767,7 @@ static void ublk_handle_cqe(struct ublk_thread *t, q->tgt_ops->queue_io(t, q, tag); } else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) { io->flags |= UBLKSRV_NEED_GET_DATA | UBLKSRV_IO_FREE; - ublk_queue_io_cmd(io); + ublk_queue_io_cmd(t, io); } else { /* * COMMIT_REQ will be completed immediately since no fetching diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 91e0286c5ca6..a4049984b055 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -136,7 +136,6 @@ struct ublk_io { unsigned short buf_index; unsigned short tgt_ios; void *private_data; - struct ublk_thread *t; }; struct ublk_tgt_ops { @@ -232,7 +231,7 @@ struct ublk_dev { extern unsigned int ublk_dbg_mask; -extern int ublk_queue_io_cmd(struct ublk_io *io); +extern int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io); static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod) @@ -402,33 +401,36 @@ static inline struct ublk_io *ublk_get_io(struct ublk_queue *q, unsigned tag) return &q->ios[tag]; } -static inline int ublk_complete_io(struct ublk_queue *q, unsigned tag, int res) +static inline int ublk_complete_io(struct ublk_thread *t, struct ublk_queue *q, + unsigned tag, int res) { struct ublk_io *io = &q->ios[tag]; ublk_mark_io_done(io, res); - return ublk_queue_io_cmd(io); + return ublk_queue_io_cmd(t, io); } -static inline void ublk_queued_tgt_io(struct ublk_queue *q, unsigned tag, int queued) +static inline void ublk_queued_tgt_io(struct ublk_thread *t, struct ublk_queue *q, + unsigned tag, int queued) { if (queued < 0) - ublk_complete_io(q, tag, queued); + ublk_complete_io(t, q, tag, queued); else { struct ublk_io *io = ublk_get_io(q, tag); - io->t->io_inflight += queued; + t->io_inflight += queued; io->tgt_ios = queued; io->result = 0; } } -static inline int ublk_completed_tgt_io(struct ublk_queue *q, unsigned tag) +static inline int ublk_completed_tgt_io(struct ublk_thread *t, + struct ublk_queue *q, unsigned tag) { struct ublk_io *io = ublk_get_io(q, tag); - io->t->io_inflight--; + t->io_inflight--; return --io->tgt_ios == 0; } diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c index e29a005fc1cc..452dcc369c8b 100644 --- a/tools/testing/selftests/ublk/null.c +++ b/tools/testing/selftests/ublk/null.c @@ -108,8 +108,8 @@ static void ublk_null_io_done(struct ublk_thread *t, struct ublk_queue *q, if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF)) io->tgt_ios += 1; - if (ublk_completed_tgt_io(q, tag)) - ublk_complete_io(q, tag, io->result); + if (ublk_completed_tgt_io(t, q, tag)) + ublk_complete_io(t, q, tag, io->result); } static int ublk_null_queue_io(struct ublk_thread *t, struct ublk_queue *q, @@ -125,10 +125,10 @@ static int ublk_null_queue_io(struct ublk_thread *t, struct ublk_queue *q, else if (zc) queued = null_queue_zc_io(t, q, tag); else { - ublk_complete_io(q, tag, iod->nr_sectors << 9); + ublk_complete_io(t, q, tag, iod->nr_sectors << 9); return 0; } - ublk_queued_tgt_io(q, tag, queued); + ublk_queued_tgt_io(t, q, tag, queued); return 0; } diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c index 462fab7492ce..1fb9b7cc281b 100644 --- a/tools/testing/selftests/ublk/stripe.c +++ b/tools/testing/selftests/ublk/stripe.c @@ -226,7 +226,7 @@ static int ublk_stripe_queue_io(struct ublk_thread *t, struct ublk_queue *q, { int queued = stripe_queue_tgt_io(t, q, tag); - ublk_queued_tgt_io(q, tag, queued); + ublk_queued_tgt_io(t, q, tag, queued); return 0; } @@ -262,13 +262,13 @@ static void ublk_stripe_io_done(struct ublk_thread *t, struct ublk_queue *q, } } - if (ublk_completed_tgt_io(q, tag)) { + if (ublk_completed_tgt_io(t, q, tag)) { int res = io->result; if (!res) res = iod->nr_sectors << 9; - ublk_complete_io(q, tag, res); + ublk_complete_io(t, q, tag, res); free_stripe_array(io->private_data); io->private_data = NULL; -- cgit v1.2.3 From c3a6d48f86da1df277ef4f95f147a7f7f5cd6f44 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 13 Jul 2025 22:34:09 +0800 Subject: selftests: ublk: remove ublk queue self-defined flags Remove ublk queue self-defined flags, and use the uapi flags directly. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250713143415.2857561-15-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.c | 25 +++++++++---------------- tools/testing/selftests/ublk/kublk.h | 22 +++++++++++++++------- tools/testing/selftests/ublk/null.c | 2 +- 3 files changed, 25 insertions(+), 24 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 944e0806ba05..e90260468652 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -441,17 +441,10 @@ static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags) unsigned long off; q->tgt_ops = dev->tgt.ops; - q->state = 0; + q->flags = 0; q->q_depth = depth; - - if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) { - q->state |= UBLKSRV_NO_BUF; - if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) - q->state |= UBLKSRV_ZC; - if (dev->dev_info.flags & UBLK_F_AUTO_BUF_REG) - q->state |= UBLKSRV_AUTO_BUF_REG; - } - q->state |= extra_flags; + q->flags = dev->dev_info.flags; + q->flags |= extra_flags; cmd_buf_size = ublk_queue_cmd_buf_sz(q); off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz(); @@ -469,7 +462,7 @@ static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags) q->ios[i].flags = UBLKSRV_NEED_FETCH_RQ | UBLKSRV_IO_FREE; q->ios[i].tag = i; - if (q->state & UBLKSRV_NO_BUF) + if (ublk_queue_no_buf(q)) continue; if (posix_memalign((void **)&q->ios[i].buf_addr, @@ -583,7 +576,7 @@ static void ublk_set_auto_buf_reg(const struct ublk_queue *q, else buf.index = q->ios[tag].buf_index; - if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK) + if (ublk_queue_auto_zc_fallback(q)) buf.flags = UBLK_AUTO_BUF_REG_FALLBACK; sqe->addr = ublk_auto_buf_reg_to_sqe_addr(&buf); @@ -639,12 +632,12 @@ int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io) sqe[0]->rw_flags = 0; cmd->tag = io->tag; cmd->q_id = q->q_id; - if (!(q->state & UBLKSRV_NO_BUF)) + if (!ublk_queue_no_buf(q)) cmd->addr = (__u64) (uintptr_t) io->buf_addr; else cmd->addr = 0; - if (q->state & UBLKSRV_AUTO_BUF_REG) + if (ublk_queue_use_auto_zc(q)) ublk_set_auto_buf_reg(q, sqe[0], io->tag); user_data = build_user_data(io->tag, _IOC_NR(cmd_op), 0, q->q_id, 0); @@ -739,7 +732,7 @@ static void ublk_handle_cqe(struct ublk_thread *t, if (cqe->res < 0 && cqe->res != -ENODEV) ublk_err("%s: res %d userdata %llx queue state %x\n", __func__, - cqe->res, cqe->user_data, q->state); + cqe->res, cqe->user_data, q->flags); ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d/%d) stopping %d\n", __func__, cqe->res, q->q_id, tag, cmd_op, @@ -911,7 +904,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) { const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info; struct ublk_thread_info *tinfo; - unsigned extra_flags = 0; + unsigned long long extra_flags = 0; cpu_set_t *affinity_buf; void *thread_ret; sem_t ready; diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index a4049984b055..9ecb63bc930e 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -178,12 +178,10 @@ struct ublk_queue { const struct ublk_tgt_ops *tgt_ops; struct ublksrv_io_desc *io_cmd_buf; +/* borrow one bit of ublk uapi flags, which may never be used */ +#define UBLKSRV_AUTO_BUF_REG_FALLBACK (1ULL << 63) + __u64 flags; struct ublk_io ios[UBLK_QUEUE_DEPTH]; -#define UBLKSRV_NO_BUF (1U << 2) -#define UBLKSRV_ZC (1U << 3) -#define UBLKSRV_AUTO_BUF_REG (1U << 4) -#define UBLKSRV_AUTO_BUF_REG_FALLBACK (1U << 5) - unsigned state; }; struct ublk_thread { @@ -437,12 +435,22 @@ static inline int ublk_completed_tgt_io(struct ublk_thread *t, static inline int ublk_queue_use_zc(const struct ublk_queue *q) { - return q->state & UBLKSRV_ZC; + return q->flags & UBLK_F_SUPPORT_ZERO_COPY; } static inline int ublk_queue_use_auto_zc(const struct ublk_queue *q) { - return q->state & UBLKSRV_AUTO_BUF_REG; + return q->flags & UBLK_F_AUTO_BUF_REG; +} + +static inline int ublk_queue_auto_zc_fallback(const struct ublk_queue *q) +{ + return q->flags & UBLKSRV_AUTO_BUF_REG_FALLBACK; +} + +static inline int ublk_queue_no_buf(const struct ublk_queue *q) +{ + return ublk_queue_use_zc(q) || ublk_queue_use_auto_zc(q); } extern const struct ublk_tgt_ops null_tgt_ops; diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c index 452dcc369c8b..f0e0003a4860 100644 --- a/tools/testing/selftests/ublk/null.c +++ b/tools/testing/selftests/ublk/null.c @@ -138,7 +138,7 @@ static int ublk_null_queue_io(struct ublk_thread *t, struct ublk_queue *q, */ static unsigned short ublk_null_buf_index(const struct ublk_queue *q, int tag) { - if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK) + if (ublk_queue_auto_zc_fallback(q)) return (unsigned short)-1; return q->ios[tag].buf_index; } -- cgit v1.2.3 From a66f89017673881e85e65a460cfdec35da4f07f2 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 13 Jul 2025 22:34:10 +0800 Subject: selftests: ublk: improve flags naming Improve all kinds of flags naming by adding its host structure suffix for making code more readable. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250713143415.2857561-16-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.c | 38 ++++++++++++++++++------------------ tools/testing/selftests/ublk/kublk.h | 23 ++++++++++------------ 2 files changed, 29 insertions(+), 32 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index e90260468652..84307bc12f37 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -459,7 +459,7 @@ static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags) io_buf_size = dev->dev_info.max_io_buf_bytes; for (i = 0; i < q->q_depth; i++) { q->ios[i].buf_addr = NULL; - q->ios[i].flags = UBLKSRV_NEED_FETCH_RQ | UBLKSRV_IO_FREE; + q->ios[i].flags = UBLKS_IO_NEED_FETCH_RQ | UBLKS_IO_FREE; q->ios[i].tag = i; if (ublk_queue_no_buf(q)) @@ -591,7 +591,7 @@ int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io) __u64 user_data; /* only freed io can be issued */ - if (!(io->flags & UBLKSRV_IO_FREE)) + if (!(io->flags & UBLKS_IO_FREE)) return 0; /* @@ -599,14 +599,14 @@ int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io) * getting data */ if (!(io->flags & - (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_NEED_GET_DATA))) + (UBLKS_IO_NEED_FETCH_RQ | UBLKS_IO_NEED_COMMIT_RQ_COMP | UBLKS_IO_NEED_GET_DATA))) return 0; - if (io->flags & UBLKSRV_NEED_GET_DATA) + if (io->flags & UBLKS_IO_NEED_GET_DATA) cmd_op = UBLK_U_IO_NEED_GET_DATA; - else if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP) + else if (io->flags & UBLKS_IO_NEED_COMMIT_RQ_COMP) cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ; - else if (io->flags & UBLKSRV_NEED_FETCH_RQ) + else if (io->flags & UBLKS_IO_NEED_FETCH_RQ) cmd_op = UBLK_U_IO_FETCH_REQ; if (io_uring_sq_space_left(&t->ring) < 1) @@ -649,7 +649,7 @@ int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io) ublk_dbg(UBLK_DBG_IO_CMD, "%s: (thread %u qid %d tag %u cmd_op %u) iof %x stopping %d\n", __func__, t->idx, q->q_id, io->tag, cmd_op, - io->flags, !!(t->state & UBLKSRV_THREAD_STOPPING)); + io->flags, !!(t->state & UBLKS_T_STOPPING)); return 1; } @@ -701,7 +701,7 @@ static int ublk_thread_is_idle(struct ublk_thread *t) static int ublk_thread_is_done(struct ublk_thread *t) { - return (t->state & UBLKSRV_THREAD_STOPPING) && ublk_thread_is_idle(t); + return (t->state & UBLKS_T_STOPPING) && ublk_thread_is_idle(t); } static inline void ublksrv_handle_tgt_cqe(struct ublk_thread *t, @@ -727,7 +727,7 @@ static void ublk_handle_cqe(struct ublk_thread *t, unsigned tag = user_data_to_tag(cqe->user_data); unsigned cmd_op = user_data_to_op(cqe->user_data); int fetch = (cqe->res != UBLK_IO_RES_ABORT) && - !(t->state & UBLKSRV_THREAD_STOPPING); + !(t->state & UBLKS_T_STOPPING); struct ublk_io *io; if (cqe->res < 0 && cqe->res != -ENODEV) @@ -738,7 +738,7 @@ static void ublk_handle_cqe(struct ublk_thread *t, __func__, cqe->res, q->q_id, tag, cmd_op, is_target_io(cqe->user_data), user_data_to_tgt_data(cqe->user_data), - (t->state & UBLKSRV_THREAD_STOPPING)); + (t->state & UBLKS_T_STOPPING)); /* Don't retrieve io in case of target io */ if (is_target_io(cqe->user_data)) { @@ -750,8 +750,8 @@ static void ublk_handle_cqe(struct ublk_thread *t, t->cmd_inflight--; if (!fetch) { - t->state |= UBLKSRV_THREAD_STOPPING; - io->flags &= ~UBLKSRV_NEED_FETCH_RQ; + t->state |= UBLKS_T_STOPPING; + io->flags &= ~UBLKS_IO_NEED_FETCH_RQ; } if (cqe->res == UBLK_IO_RES_OK) { @@ -759,7 +759,7 @@ static void ublk_handle_cqe(struct ublk_thread *t, if (q->tgt_ops->queue_io) q->tgt_ops->queue_io(t, q, tag); } else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) { - io->flags |= UBLKSRV_NEED_GET_DATA | UBLKSRV_IO_FREE; + io->flags |= UBLKS_IO_NEED_GET_DATA | UBLKS_IO_FREE; ublk_queue_io_cmd(t, io); } else { /* @@ -767,10 +767,10 @@ static void ublk_handle_cqe(struct ublk_thread *t, * piggyback is required. * * Marking IO_FREE only, then this io won't be issued since - * we only issue io with (UBLKSRV_IO_FREE | UBLKSRV_NEED_*) + * we only issue io with (UBLKS_IO_FREE | UBLKSRV_NEED_*) * * */ - io->flags = UBLKSRV_IO_FREE; + io->flags = UBLKS_IO_FREE; } } @@ -797,7 +797,7 @@ static int ublk_process_io(struct ublk_thread *t) t->dev->dev_info.dev_id, t->idx, io_uring_sq_ready(&t->ring), t->cmd_inflight, - (t->state & UBLKSRV_THREAD_STOPPING)); + (t->state & UBLKS_T_STOPPING)); if (ublk_thread_is_done(t)) return -ENODEV; @@ -806,8 +806,8 @@ static int ublk_process_io(struct ublk_thread *t) reapped = ublk_reap_events_uring(t); ublk_dbg(UBLK_DBG_THREAD, "submit result %d, reapped %d stop %d idle %d\n", - ret, reapped, (t->state & UBLKSRV_THREAD_STOPPING), - (t->state & UBLKSRV_THREAD_IDLE)); + ret, reapped, (t->state & UBLKS_T_STOPPING), + (t->state & UBLKS_T_IDLE)); return reapped; } @@ -926,7 +926,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) return ret; if (ctx->auto_zc_fallback) - extra_flags = UBLKSRV_AUTO_BUF_REG_FALLBACK; + extra_flags = UBLKS_Q_AUTO_BUF_REG_FALLBACK; for (i = 0; i < dinfo->nr_hw_queues; i++) { dev->q[i].dev = dev; diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 9ecb63bc930e..c668472097ff 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -45,9 +45,6 @@ #define UBLK_CTRL_RING_DEPTH 32 #define ERROR_EVTFD_DEVID -2 -/* queue idle timeout */ -#define UBLKSRV_IO_IDLE_SECS 20 - #define UBLK_IO_MAX_BYTES (1 << 20) #define UBLK_MAX_QUEUES_SHIFT 5 #define UBLK_MAX_QUEUES (1 << UBLK_MAX_QUEUES_SHIFT) @@ -121,11 +118,11 @@ struct ublk_ctrl_cmd_data { struct ublk_io { char *buf_addr; -#define UBLKSRV_NEED_FETCH_RQ (1UL << 0) -#define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1) -#define UBLKSRV_IO_FREE (1UL << 2) -#define UBLKSRV_NEED_GET_DATA (1UL << 3) -#define UBLKSRV_NEED_REG_BUF (1UL << 4) +#define UBLKS_IO_NEED_FETCH_RQ (1UL << 0) +#define UBLKS_IO_NEED_COMMIT_RQ_COMP (1UL << 1) +#define UBLKS_IO_FREE (1UL << 2) +#define UBLKS_IO_NEED_GET_DATA (1UL << 3) +#define UBLKS_IO_NEED_REG_BUF (1UL << 4) unsigned short flags; unsigned short refs; /* used by target code only */ @@ -179,7 +176,7 @@ struct ublk_queue { struct ublksrv_io_desc *io_cmd_buf; /* borrow one bit of ublk uapi flags, which may never be used */ -#define UBLKSRV_AUTO_BUF_REG_FALLBACK (1ULL << 63) +#define UBLKS_Q_AUTO_BUF_REG_FALLBACK (1ULL << 63) __u64 flags; struct ublk_io ios[UBLK_QUEUE_DEPTH]; }; @@ -193,8 +190,8 @@ struct ublk_thread { pthread_t thread; unsigned idx; -#define UBLKSRV_THREAD_STOPPING (1U << 0) -#define UBLKSRV_THREAD_IDLE (1U << 1) +#define UBLKS_T_STOPPING (1U << 0) +#define UBLKS_T_IDLE (1U << 1) unsigned state; }; @@ -377,7 +374,7 @@ static inline int ublk_get_io_res(const struct ublk_queue *q, unsigned tag) static inline void ublk_mark_io_done(struct ublk_io *io, int res) { - io->flags |= (UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_IO_FREE); + io->flags |= (UBLKS_IO_NEED_COMMIT_RQ_COMP | UBLKS_IO_FREE); io->result = res; } @@ -445,7 +442,7 @@ static inline int ublk_queue_use_auto_zc(const struct ublk_queue *q) static inline int ublk_queue_auto_zc_fallback(const struct ublk_queue *q) { - return q->flags & UBLKSRV_AUTO_BUF_REG_FALLBACK; + return q->flags & UBLKS_Q_AUTO_BUF_REG_FALLBACK; } static inline int ublk_queue_no_buf(const struct ublk_queue *q) -- cgit v1.2.3 From c1dc9b0d9e48380c868acd338c8912ebb7d75f0a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 13 Jul 2025 22:34:11 +0800 Subject: selftests: ublk: add helper ublk_handle_uring_cmd() for handle ublk command Add helper ublk_handle_uring_cmd() for handling ublk command, and make ublk_handle_cqe() more readable. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250713143415.2857561-17-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.c | 61 ++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 27 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 84307bc12f37..95188065b2e9 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -718,36 +718,14 @@ static inline void ublksrv_handle_tgt_cqe(struct ublk_thread *t, q->tgt_ops->tgt_io_done(t, q, cqe); } -static void ublk_handle_cqe(struct ublk_thread *t, - struct io_uring_cqe *cqe, void *data) +static void ublk_handle_uring_cmd(struct ublk_thread *t, + struct ublk_queue *q, + const struct io_uring_cqe *cqe) { - struct ublk_dev *dev = t->dev; - unsigned q_id = user_data_to_q_id(cqe->user_data); - struct ublk_queue *q = &dev->q[q_id]; - unsigned tag = user_data_to_tag(cqe->user_data); - unsigned cmd_op = user_data_to_op(cqe->user_data); int fetch = (cqe->res != UBLK_IO_RES_ABORT) && !(t->state & UBLKS_T_STOPPING); - struct ublk_io *io; - - if (cqe->res < 0 && cqe->res != -ENODEV) - ublk_err("%s: res %d userdata %llx queue state %x\n", __func__, - cqe->res, cqe->user_data, q->flags); - - ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d/%d) stopping %d\n", - __func__, cqe->res, q->q_id, tag, cmd_op, - is_target_io(cqe->user_data), - user_data_to_tgt_data(cqe->user_data), - (t->state & UBLKS_T_STOPPING)); - - /* Don't retrieve io in case of target io */ - if (is_target_io(cqe->user_data)) { - ublksrv_handle_tgt_cqe(t, q, cqe); - return; - } - - io = &q->ios[tag]; - t->cmd_inflight--; + unsigned tag = user_data_to_tag(cqe->user_data); + struct ublk_io *io = &q->ios[tag]; if (!fetch) { t->state |= UBLKS_T_STOPPING; @@ -774,6 +752,35 @@ static void ublk_handle_cqe(struct ublk_thread *t, } } +static void ublk_handle_cqe(struct ublk_thread *t, + struct io_uring_cqe *cqe, void *data) +{ + struct ublk_dev *dev = t->dev; + unsigned q_id = user_data_to_q_id(cqe->user_data); + struct ublk_queue *q = &dev->q[q_id]; + unsigned cmd_op = user_data_to_op(cqe->user_data); + + if (cqe->res < 0 && cqe->res != -ENODEV) + ublk_err("%s: res %d userdata %llx queue state %x\n", __func__, + cqe->res, cqe->user_data, q->flags); + + ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d/%d) stopping %d\n", + __func__, cqe->res, q->q_id, user_data_to_tag(cqe->user_data), + cmd_op, is_target_io(cqe->user_data), + user_data_to_tgt_data(cqe->user_data), + (t->state & UBLKS_T_STOPPING)); + + /* Don't retrieve io in case of target io */ + if (is_target_io(cqe->user_data)) { + ublksrv_handle_tgt_cqe(t, q, cqe); + return; + } + + t->cmd_inflight--; + + ublk_handle_uring_cmd(t, q, cqe); +} + static int ublk_reap_events_uring(struct ublk_thread *t) { struct io_uring_cqe *cqe; -- cgit v1.2.3 From e56828f4df139b49cb837198ef8f3d2f13db65cb Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 13 Jul 2025 22:34:12 +0800 Subject: selftests: ublk: add utils.h Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250713143415.2857561-18-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.h | 64 ++------------------------------- tools/testing/selftests/ublk/utils.h | 70 ++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 62 deletions(-) create mode 100644 tools/testing/selftests/ublk/utils.h (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index c668472097ff..219233f8a053 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -29,13 +29,9 @@ #include "ublk_dep.h" #include -#define __maybe_unused __attribute__((unused)) -#define MAX_BACK_FILES 4 -#ifndef min -#define min(a, b) ((a) < (b) ? (a) : (b)) -#endif +#include "utils.h" -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) +#define MAX_BACK_FILES 4 /****************** part 1: libublk ********************/ @@ -52,13 +48,6 @@ #define UBLK_MAX_THREADS (1 << UBLK_MAX_THREADS_SHIFT) #define UBLK_QUEUE_DEPTH 1024 -#define UBLK_DBG_DEV (1U << 0) -#define UBLK_DBG_THREAD (1U << 1) -#define UBLK_DBG_IO_CMD (1U << 2) -#define UBLK_DBG_IO (1U << 3) -#define UBLK_DBG_CTRL_CMD (1U << 4) -#define UBLK_LOG (1U << 5) - struct ublk_dev; struct ublk_queue; struct ublk_thread; @@ -211,21 +200,6 @@ struct ublk_dev { void *private_data; }; -#ifndef offsetof -#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) -#endif - -#ifndef container_of -#define container_of(ptr, type, member) ({ \ - unsigned long __mptr = (unsigned long)(ptr); \ - ((type *)(__mptr - offsetof(type, member))); }) -#endif - -#define round_up(val, rnd) \ - (((val) + ((rnd) - 1)) & ~((rnd) - 1)) - - -extern unsigned int ublk_dbg_mask; extern int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io); @@ -275,34 +249,6 @@ static inline unsigned short ublk_cmd_op_nr(unsigned int op) return _IOC_NR(op); } -static inline void ublk_err(const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); -} - -static inline void ublk_log(const char *fmt, ...) -{ - if (ublk_dbg_mask & UBLK_LOG) { - va_list ap; - - va_start(ap, fmt); - vfprintf(stdout, fmt, ap); - } -} - -static inline void ublk_dbg(int level, const char *fmt, ...) -{ - if (level & ublk_dbg_mask) { - va_list ap; - - va_start(ap, fmt); - vfprintf(stdout, fmt, ap); - } -} - static inline struct ublk_queue *ublk_io_to_queue(const struct ublk_io *io) { return container_of(io, struct ublk_queue, ios[io->tag]); @@ -458,10 +404,4 @@ extern const struct ublk_tgt_ops fault_inject_tgt_ops; void backing_file_tgt_deinit(struct ublk_dev *dev); int backing_file_tgt_init(struct ublk_dev *dev); -static inline unsigned int ilog2(unsigned int x) -{ - if (x == 0) - return 0; - return (sizeof(x) * 8 - 1) - __builtin_clz(x); -} #endif diff --git a/tools/testing/selftests/ublk/utils.h b/tools/testing/selftests/ublk/utils.h new file mode 100644 index 000000000000..36545d1567f1 --- /dev/null +++ b/tools/testing/selftests/ublk/utils.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef KUBLK_UTILS_H +#define KUBLK_UTILS_H + +#define __maybe_unused __attribute__((unused)) + +#ifndef min +#define min(a, b) ((a) < (b) ? (a) : (b)) +#endif + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) + +#ifndef offsetof +#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) +#endif + +#ifndef container_of +#define container_of(ptr, type, member) ({ \ + unsigned long __mptr = (unsigned long)(ptr); \ + ((type *)(__mptr - offsetof(type, member))); }) +#endif + +#define round_up(val, rnd) \ + (((val) + ((rnd) - 1)) & ~((rnd) - 1)) + +static inline unsigned int ilog2(unsigned int x) +{ + if (x == 0) + return 0; + return (sizeof(x) * 8 - 1) - __builtin_clz(x); +} + +#define UBLK_DBG_DEV (1U << 0) +#define UBLK_DBG_THREAD (1U << 1) +#define UBLK_DBG_IO_CMD (1U << 2) +#define UBLK_DBG_IO (1U << 3) +#define UBLK_DBG_CTRL_CMD (1U << 4) +#define UBLK_LOG (1U << 5) + +extern unsigned int ublk_dbg_mask; + +static inline void ublk_err(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); +} + +static inline void ublk_log(const char *fmt, ...) +{ + if (ublk_dbg_mask & UBLK_LOG) { + va_list ap; + + va_start(ap, fmt); + vfprintf(stdout, fmt, ap); + } +} + +static inline void ublk_dbg(int level, const char *fmt, ...) +{ + if (level & ublk_dbg_mask) { + va_list ap; + + va_start(ap, fmt); + vfprintf(stdout, fmt, ap); + } +} + +#endif -- cgit v1.2.3 From e860a98c8aebd8de82c0ee901acf5a759acd4570 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 15 Jul 2025 11:59:10 -0700 Subject: selftests/bpf: Fix build error due to certain uninitialized variables With the latest llvm21 compiler, I hit several errors when building bpf selftests. Some of errors look like below: test_maps.c:565:40: error: variable 'val' is uninitialized when passed as a const pointer argument here [-Werror,-Wuninitialized-const-pointer] 565 | assert(bpf_map_update_elem(fd, NULL, &val, 0) < 0 && | ^~~ prog_tests/bpf_iter.c:400:25: error: variable 'c' is uninitialized when passed as a const pointer argument here [-Werror,-Wuninitialized-const-pointer] 400 | write(finish_pipe[1], &c, 1); | ^ Some other errors have similar the pattern as the above. These errors are fixed by initializing those variables properly. Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250715185910.3659447-1-yonghong.song@linux.dev --- tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c | 2 +- tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 2 +- tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c | 2 +- tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c | 2 +- tools/testing/selftests/bpf/test_maps.c | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c index 67557cda2208..42b49870e520 100644 --- a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c +++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c @@ -13,7 +13,7 @@ static void test_fail_cases(void) { LIBBPF_OPTS(bpf_map_create_opts, opts); - __u32 value; + __u32 value = 0; int fd, err; /* Invalid key size */ diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index add4a18c33bd..5225d69bf79b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -323,7 +323,7 @@ static void test_task_pidfd(void) static void test_task_sleepable(void) { struct bpf_iter_tasks *skel; - int pid, status, err, data_pipe[2], finish_pipe[2], c; + int pid, status, err, data_pipe[2], finish_pipe[2], c = 0; char *test_data = NULL; char *test_data_long = NULL; char *data[2]; diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c index c397336fe1ed..b17dc39a23db 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c @@ -251,7 +251,7 @@ static void test_uretprobe_syscall_call(void) .retprobe = true, ); struct uprobe_syscall_executed *skel; - int pid, status, err, go[2], c; + int pid, status, err, go[2], c = 0; if (!ASSERT_OK(pipe(go), "pipe")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c index ab0f02faa80c..4d69d9d55e17 100644 --- a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c +++ b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c @@ -268,7 +268,7 @@ static void test_verify_pkcs7_sig_from_map(void) char *tmp_dir; struct test_verify_pkcs7_sig *skel = NULL; struct bpf_map *map; - struct data data; + struct data data = {}; int ret, zero = 0; /* Trigger creation of session keyring. */ diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 986ce32b113a..3fae9ce46ca9 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -535,7 +535,7 @@ static void test_devmap_hash(unsigned int task, void *data) static void test_queuemap(unsigned int task, void *data) { const int MAP_SIZE = 32; - __u32 vals[MAP_SIZE + MAP_SIZE/2], val; + __u32 vals[MAP_SIZE + MAP_SIZE/2], val = 0; int fd, i; /* Fill test values to be used */ @@ -591,7 +591,7 @@ static void test_queuemap(unsigned int task, void *data) static void test_stackmap(unsigned int task, void *data) { const int MAP_SIZE = 32; - __u32 vals[MAP_SIZE + MAP_SIZE/2], val; + __u32 vals[MAP_SIZE + MAP_SIZE/2], val = 0; int fd, i; /* Fill test values to be used */ -- cgit v1.2.3 From 9a4071807909c8aafcb598bb807fdc1a1eb3c214 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 14 Jul 2025 13:26:32 +0100 Subject: KVM: arm64: selftests: get-reg-list: Simplify feature dependency Describing the dependencies between registers and features is on the masochistic side of things, with hard-coded values that would be better taken from the existing description. Add a couple of helpers to that effect, and repaint the dependency array. More could be done to improve this test, but my interest is wearing thin... Signed-off-by: Marc Zyngier Tested-by: Itaru Kitayama Link: https://lore.kernel.org/r/20250714122634.3334816-10-maz@kernel.org Signed-off-by: Oliver Upton --- tools/testing/selftests/kvm/arm64/get-reg-list.c | 52 ++++++++++-------------- 1 file changed, 22 insertions(+), 30 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c index d01798b6b3b4..a35b01d08cc6 100644 --- a/tools/testing/selftests/kvm/arm64/get-reg-list.c +++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c @@ -15,6 +15,12 @@ #include "test_util.h" #include "processor.h" +#define SYS_REG(r) ARM64_SYS_REG(sys_reg_Op0(SYS_ ## r), \ + sys_reg_Op1(SYS_ ## r), \ + sys_reg_CRn(SYS_ ## r), \ + sys_reg_CRm(SYS_ ## r), \ + sys_reg_Op2(SYS_ ## r)) + struct feature_id_reg { __u64 reg; __u64 id_reg; @@ -22,37 +28,23 @@ struct feature_id_reg { __u64 feat_min; }; -static struct feature_id_reg feat_id_regs[] = { - { - ARM64_SYS_REG(3, 0, 2, 0, 3), /* TCR2_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 0, - 1 - }, - { - ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 8, - 1 - }, - { - ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 8, - 1 - }, - { - ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 16, - 1 - }, - { - ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 16, - 1 +#define FEAT(id, f, v) \ + .id_reg = SYS_REG(id), \ + .feat_shift = id ## _ ## f ## _SHIFT, \ + .feat_min = id ## _ ## f ## _ ## v + +#define REG_FEAT(r, id, f, v) \ + { \ + .reg = SYS_REG(r), \ + FEAT(id, f, v) \ } + +static struct feature_id_reg feat_id_regs[] = { + REG_FEAT(TCR2_EL1, ID_AA64MMFR3_EL1, TCRX, IMP), + REG_FEAT(PIRE0_EL1, ID_AA64MMFR3_EL1, S1PIE, IMP), + REG_FEAT(PIR_EL1, ID_AA64MMFR3_EL1, S1PIE, IMP), + REG_FEAT(POR_EL1, ID_AA64MMFR3_EL1, S1POE, IMP), + REG_FEAT(POR_EL0, ID_AA64MMFR3_EL1, S1POE, IMP), }; bool filter_reg(__u64 reg) -- cgit v1.2.3 From 3a90b6f2796493a0e2f989361e2b7d5f9de2f451 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 14 Jul 2025 13:26:33 +0100 Subject: KVM: arm64: selftests: get-reg-list: Add base EL2 registers Add the EL2 registers and the eventual dependencies, effectively doubling the number of test vectors. Oh well. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20250714122634.3334816-11-maz@kernel.org Signed-off-by: Oliver Upton --- tools/testing/selftests/kvm/arm64/get-reg-list.c | 145 +++++++++++++++++++++++ 1 file changed, 145 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c index a35b01d08cc6..996d78ee3548 100644 --- a/tools/testing/selftests/kvm/arm64/get-reg-list.c +++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c @@ -41,10 +41,27 @@ struct feature_id_reg { static struct feature_id_reg feat_id_regs[] = { REG_FEAT(TCR2_EL1, ID_AA64MMFR3_EL1, TCRX, IMP), + REG_FEAT(TCR2_EL2, ID_AA64MMFR3_EL1, TCRX, IMP), REG_FEAT(PIRE0_EL1, ID_AA64MMFR3_EL1, S1PIE, IMP), + REG_FEAT(PIRE0_EL2, ID_AA64MMFR3_EL1, S1PIE, IMP), REG_FEAT(PIR_EL1, ID_AA64MMFR3_EL1, S1PIE, IMP), + REG_FEAT(PIR_EL2, ID_AA64MMFR3_EL1, S1PIE, IMP), REG_FEAT(POR_EL1, ID_AA64MMFR3_EL1, S1POE, IMP), REG_FEAT(POR_EL0, ID_AA64MMFR3_EL1, S1POE, IMP), + REG_FEAT(POR_EL2, ID_AA64MMFR3_EL1, S1POE, IMP), + REG_FEAT(HCRX_EL2, ID_AA64MMFR1_EL1, HCX, IMP), + REG_FEAT(HFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HFGWTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HFGITR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HDFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HDFGWTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HAFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HFGRTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(HFGWTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(HFGITR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(HDFGRTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(HDFGWTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP), }; bool filter_reg(__u64 reg) @@ -678,6 +695,60 @@ static __u64 pauth_generic_regs[] = { ARM64_SYS_REG(3, 0, 2, 3, 1), /* APGAKEYHI_EL1 */ }; +static __u64 el2_regs[] = { + SYS_REG(VPIDR_EL2), + SYS_REG(VMPIDR_EL2), + SYS_REG(SCTLR_EL2), + SYS_REG(ACTLR_EL2), + SYS_REG(HCR_EL2), + SYS_REG(MDCR_EL2), + SYS_REG(CPTR_EL2), + SYS_REG(HSTR_EL2), + SYS_REG(HFGRTR_EL2), + SYS_REG(HFGWTR_EL2), + SYS_REG(HFGITR_EL2), + SYS_REG(HACR_EL2), + SYS_REG(ZCR_EL2), + SYS_REG(HCRX_EL2), + SYS_REG(TTBR0_EL2), + SYS_REG(TTBR1_EL2), + SYS_REG(TCR_EL2), + SYS_REG(TCR2_EL2), + SYS_REG(VTTBR_EL2), + SYS_REG(VTCR_EL2), + SYS_REG(VNCR_EL2), + SYS_REG(HDFGRTR2_EL2), + SYS_REG(HDFGWTR2_EL2), + SYS_REG(HFGRTR2_EL2), + SYS_REG(HFGWTR2_EL2), + SYS_REG(HDFGRTR_EL2), + SYS_REG(HDFGWTR_EL2), + SYS_REG(HAFGRTR_EL2), + SYS_REG(HFGITR2_EL2), + SYS_REG(SPSR_EL2), + SYS_REG(ELR_EL2), + SYS_REG(AFSR0_EL2), + SYS_REG(AFSR1_EL2), + SYS_REG(ESR_EL2), + SYS_REG(FAR_EL2), + SYS_REG(HPFAR_EL2), + SYS_REG(MAIR_EL2), + SYS_REG(PIRE0_EL2), + SYS_REG(PIR_EL2), + SYS_REG(POR_EL2), + SYS_REG(AMAIR_EL2), + SYS_REG(VBAR_EL2), + SYS_REG(CONTEXTIDR_EL2), + SYS_REG(TPIDR_EL2), + SYS_REG(CNTVOFF_EL2), + SYS_REG(CNTHCTL_EL2), + SYS_REG(CNTHP_CTL_EL2), + SYS_REG(CNTHP_CVAL_EL2), + SYS_REG(CNTHV_CTL_EL2), + SYS_REG(CNTHV_CVAL_EL2), + SYS_REG(SP_EL2), +}; + #define BASE_SUBLIST \ { "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), } #define VREGS_SUBLIST \ @@ -704,6 +775,14 @@ static __u64 pauth_generic_regs[] = { .regs = pauth_generic_regs, \ .regs_n = ARRAY_SIZE(pauth_generic_regs), \ } +#define EL2_SUBLIST \ + { \ + .name = "EL2", \ + .capability = KVM_CAP_ARM_EL2, \ + .feature = KVM_ARM_VCPU_HAS_EL2, \ + .regs = el2_regs, \ + .regs_n = ARRAY_SIZE(el2_regs), \ + } static struct vcpu_reg_list vregs_config = { .sublists = { @@ -753,6 +832,65 @@ static struct vcpu_reg_list pauth_pmu_config = { }, }; +static struct vcpu_reg_list el2_vregs_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + VREGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_vregs_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + VREGS_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_sve_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + SVE_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_sve_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + SVE_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_pauth_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + VREGS_SUBLIST, + PAUTH_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_pauth_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + VREGS_SUBLIST, + PAUTH_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + struct vcpu_reg_list *vcpu_configs[] = { &vregs_config, &vregs_pmu_config, @@ -760,5 +898,12 @@ struct vcpu_reg_list *vcpu_configs[] = { &sve_pmu_config, &pauth_config, &pauth_pmu_config, + + &el2_vregs_config, + &el2_vregs_pmu_config, + &el2_sve_config, + &el2_sve_pmu_config, + &el2_pauth_config, + &el2_pauth_pmu_config, }; int vcpu_configs_n = ARRAY_SIZE(vcpu_configs); -- cgit v1.2.3 From a883f273431804adfac6048f5e71a041f5626f64 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 15 May 2025 15:30:01 -0700 Subject: torture: Provide EXPERT Kconfig option for arm64 KCSAN torture.sh runs The arm64 architecture requires that KCSAN-enabled kernels be built with the CONFIG_EXPERT=y Kconfig option. This commit therefore causes the torture.sh script to provide this option, but only for --kcsan runs on arm64 systems. Signed-off-by: Paul E. McKenney Cc: Marco Elver Cc: Dmitry Vyukov Cc: Catalin Marinas Cc: Will Deacon Cc: Cc: Acked-by: Will Deacon Signed-off-by: Neeraj Upadhyay (AMD) --- tools/testing/selftests/rcutorture/bin/torture.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index 25847042e30e..420c551b824b 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -313,6 +313,13 @@ then do_scftorture=no fi +# CONFIG_EXPERT=y is currently required for arm64 KCSAN runs. +kcsan_expert= +if test "${thisarch}" = aarch64 +then + kcsan_expert="CONFIG_EXPERT=y" +fi + touch $T/failures touch $T/successes @@ -392,7 +399,7 @@ function torture_set { then chk_rdr_state="CONFIG_RCU_TORTURE_TEST_CHK_RDR_STATE=y" fi - torture_one "$@" --kconfig "CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y ${chk_rdr_state}" $kcsan_kmake_tag $cur_kcsan_kmake_args --kcsan + torture_one "$@" --kconfig "CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y ${kcsan_expert} ${chk_rdr_state}" $kcsan_kmake_tag $cur_kcsan_kmake_args --kcsan mv $T/last-resdir $T/last-resdir-kcsan || : fi } -- cgit v1.2.3 From ce243b71cfef7e44401c8b2ca93cdc206f8393d4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 15 May 2025 16:12:00 -0700 Subject: torture: Suppress "find" diagnostics from torture.sh --do-none run MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When torture.sh is told to do nothing, it produces a couple of distracting diagnostics from the "find" command: find: ‘’: No such file or directory find: ‘’: No such file or directory This is pointless chatter and could cause confusion. This commit therefore suppresses these diagnostics when there is nothing to find. Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay (AMD) --- tools/testing/selftests/rcutorture/bin/torture.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index 420c551b824b..ed59bd43d4f8 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -719,8 +719,11 @@ fi echo Started at $startdate, ended at `date`, duration `get_starttime_duration $starttime`. | tee -a $T/log echo Summary: Successes: $nsuccesses Failures: $nfailures. | tee -a $T/log tdir="`cat $T/successes $T/failures | head -1 | awk '{ print $NF }' | sed -e 's,/[^/]\+/*$,,'`" -find "$tdir" -name 'ConfigFragment.diags' -print > $T/configerrors -find "$tdir" -name 'Make.out.diags' -print > $T/builderrors +if test -n "$tdir" +then + find "$tdir" -name 'ConfigFragment.diags' -print > $T/configerrors + find "$tdir" -name 'Make.out.diags' -print > $T/builderrors +fi if test -s "$T/configerrors" then echo " Scenarios with .config errors: `wc -l "$T/configerrors" | awk '{ print $1 }'`" -- cgit v1.2.3 From 0783f216423fff1acafae3b464b95e05ac596e12 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 3 Jun 2025 18:03:53 -0700 Subject: torture: Extract testid.txt generation to separate script The kvm.sh script places a testid.txt file in the top-level results directory in order to identify the tree and commit that was tested. This works well, but there are scripts other than kvm.sh that also create results directories, and it would be good for them to also identify exactly what was tested. This commit therefore extracts the testid.txt generation to a new mktestid.sh script so that it can be easily used elsewhere. Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay (AMD) --- tools/testing/selftests/rcutorture/bin/kvm.sh | 13 +--------- tools/testing/selftests/rcutorture/bin/mktestid.sh | 29 ++++++++++++++++++++++ 2 files changed, 30 insertions(+), 12 deletions(-) create mode 100755 tools/testing/selftests/rcutorture/bin/mktestid.sh (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 9c1b850b3227..617cba339d28 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -442,18 +442,7 @@ echo $scriptname $args touch $resdir/$ds/log echo $scriptname $args >> $resdir/$ds/log echo ${TORTURE_SUITE} > $resdir/$ds/torture_suite -echo Build directory: `pwd` > $resdir/$ds/testid.txt -if test -d .git -then - echo Current commit: `git rev-parse HEAD` >> $resdir/$ds/testid.txt - echo >> $resdir/$ds/testid.txt - echo ' ---' Output of "'"git status"'": >> $resdir/$ds/testid.txt - git status >> $resdir/$ds/testid.txt - echo >> $resdir/$ds/testid.txt - echo >> $resdir/$ds/testid.txt - echo ' ---' Output of "'"git diff HEAD"'": >> $resdir/$ds/testid.txt - git diff HEAD >> $resdir/$ds/testid.txt -fi +mktestid.sh $resdir/$ds ___EOF___ kvm-assign-cpus.sh /sys/devices/system/node > $T/cpuarray.awk kvm-get-cpus-script.sh $T/cpuarray.awk $T/dumpbatches.awk diff --git a/tools/testing/selftests/rcutorture/bin/mktestid.sh b/tools/testing/selftests/rcutorture/bin/mktestid.sh new file mode 100755 index 000000000000..16f9907a4dae --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/mktestid.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Create a testid.txt file in the specified directory. +# +# Usage: mktestid.sh dirpath +# +# Copyright (C) Meta Platforms, Inc. 2025 +# +# Author: Paul E. McKenney + +resdir="$1" +if test -z "${resdir}" || ! test -d "${resdir}" || ! test -w "${resdir}" +then + echo Path '"'${resdir}'"' not writeable directory, no ${resdir}/testid.txt. + exit 1 +fi +echo Build directory: `pwd` > ${resdir}/testid.txt +if test -d .git +then + echo Current commit: `git rev-parse HEAD` >> ${resdir}/testid.txt + echo >> ${resdir}/testid.txt + echo ' ---' Output of "'"git status"'": >> ${resdir}/testid.txt + git status >> ${resdir}/testid.txt + echo >> ${resdir}/testid.txt + echo >> ${resdir}/testid.txt + echo ' ---' Output of "'"git diff HEAD"'": >> ${resdir}/testid.txt + git diff HEAD >> ${resdir}/testid.txt +fi -- cgit v1.2.3 From d57300010d38a8eb518fa05d305bef1343716431 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 3 Jun 2025 19:35:13 -0700 Subject: torture: Add textid.txt file to --do-allmodconfig and --do-rcu-rust runs This commit causes the torture.sh --do-allmodconfig and --do-rcu-rust parameters to add testid.txt files to their results directories, thus allowing easier analysis of the results of a series of runs kicked off by "git bisect". Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay (AMD) --- tools/testing/selftests/rcutorture/bin/torture.sh | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index ed59bd43d4f8..a7a8e801283d 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -410,6 +410,7 @@ then echo " --- allmodconfig:" Start `date` | tee -a $T/log amcdir="tools/testing/selftests/rcutorture/res/$ds/allmodconfig" mkdir -p "$amcdir" + mktestid.sh "$amcdir" echo " --- make clean" | tee $amcdir/log > "$amcdir/Make.out" 2>&1 make -j$MAKE_ALLOTED_CPUS clean >> "$amcdir/Make.out" 2>&1 retcode=$? @@ -516,6 +517,7 @@ then echo " --- do-rcu-rust:" Start `date` | tee -a $T/log rrdir="tools/testing/selftests/rcutorture/res/$ds/results-rcu-rust" mkdir -p "$rrdir" + mktestid.sh "$rrdir" echo " --- make LLVM=1 rustavailable " | tee -a $rrdir/log > $rrdir/rustavailable.out make LLVM=1 rustavailable > $T/rustavailable.out 2>&1 retcode=$? -- cgit v1.2.3 From 3aee453496026451fe126e53d43db6d687b51209 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 5 Jun 2025 07:19:34 -0700 Subject: torture: Make torture.sh tolerate runs having bad kvm.sh arguments Currently, torture.sh assumes excessive levels of reviewer competence and thus fails to gracefully handle cases where it is tricked into giving kvm.sh invalid arguments. This commit therefore upgrades error handling to more gracefully handle this situation. Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay (AMD) --- tools/testing/selftests/rcutorture/bin/torture.sh | 28 ++++++++++++++++++----- 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index a7a8e801283d..39844d213da5 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -378,13 +378,19 @@ function torture_set { then curflavor=$flavor torture_one "$@" - mv $T/last-resdir $T/last-resdir-nodebug || : + if test -e $T/last-resdir + then + mv $T/last-resdir $T/last-resdir-nodebug || : + fi fi if test "$do_kasan" = "yes" then curflavor=${flavor}-kasan torture_one "$@" --kasan - mv $T/last-resdir $T/last-resdir-kasan || : + if test -e $T/last-resdir + then + mv $T/last-resdir $T/last-resdir-kasan || : + fi fi if test "$do_kcsan" = "yes" then @@ -400,7 +406,10 @@ function torture_set { chk_rdr_state="CONFIG_RCU_TORTURE_TEST_CHK_RDR_STATE=y" fi torture_one "$@" --kconfig "CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y ${kcsan_expert} ${chk_rdr_state}" $kcsan_kmake_tag $cur_kcsan_kmake_args --kcsan - mv $T/last-resdir $T/last-resdir-kcsan || : + if test -e $T/last-resdir + then + mv $T/last-resdir $T/last-resdir-kcsan || : + fi fi } @@ -704,7 +713,14 @@ nfailures=0 echo FAILURES: | tee -a $T/log if test -s "$T/failures" then - awk < "$T/failures" -v sq="'" '{ print "echo " sq $0 sq; print "sed -e " sq "1,/^ --- .* Test summary:$/d" sq " " $2 "/log | grep Summary: | sed -e " sq "s/^[^S]*/ /" sq; }' | sh | tee -a $T/log | tee "$T/failuresum" + awk < "$T/failures" -v sq="'" ' + { + print "echo " sq $0 sq; + if ($2 != "") + print "sed -e " sq "1,/^ --- .* Test summary:$/d" sq " " $2 "/log | grep Summary: | sed -e " sq "s/^[^S]*/ /" sq; + else + print "echo " sq " " sq "Run failed to produce results directory."; + }' | sh | tee -a $T/log | tee "$T/failuresum" nfailures="`wc -l "$T/failures" | awk '{ print $1 }'`" grep "^ Summary: " "$T/failuresum" | grep -v '^ Summary: Bugs: [0-9]* (all bugs kcsan)$' > "$T/nonkcsan" @@ -714,13 +730,13 @@ then fi ret=2 fi -if test "$do_kcsan" = "yes" +if test "$do_kcsan" = "yes" && test -e tools/testing/selftests/rcutorture/res/$ds then TORTURE_KCONFIG_KCSAN_ARG=1 tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh tools/testing/selftests/rcutorture/res/$ds > tools/testing/selftests/rcutorture/res/$ds/kcsan.sum fi echo Started at $startdate, ended at `date`, duration `get_starttime_duration $starttime`. | tee -a $T/log echo Summary: Successes: $nsuccesses Failures: $nfailures. | tee -a $T/log -tdir="`cat $T/successes $T/failures | head -1 | awk '{ print $NF }' | sed -e 's,/[^/]\+/*$,,'`" +tdir="`cat $T/successes $T/failures | awk 'NF > 1 { print $NF }' | head -1 | sed -e 's,/[^/]\+/*$,,'`" if test -n "$tdir" then find "$tdir" -name 'ConfigFragment.diags' -print > $T/configerrors -- cgit v1.2.3 From 17f4698a9e6092dd59e5dd616b21095ba9c8b6fe Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 5 Jun 2025 14:36:04 -0700 Subject: torture: Add "ERROR" diagnostic for testing kernel-build output Some recent kernel-build failures have featured "ERROR", so this commit adds it to the list checked by kvm-build.sh. Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay (AMD) --- tools/testing/selftests/rcutorture/bin/kvm-build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh index 11f8d232b0ee..3edfd064ef81 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh @@ -44,7 +44,7 @@ fi ncpus="`getconf _NPROCESSORS_ONLN`" make -j$((2 * ncpus)) $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1 retval=$? -if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | grep -E -q "Stop|Error|error:|warning:" || grep -E -q "Stop|Error|error:" < $resdir/Make.out +if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | grep -E -q "Stop|ERROR|Error|error:|warning:" || grep -E -q "Stop|ERROR|Error|error:" < $resdir/Make.out then echo Kernel build error grep -E "Stop|Error|error:|warning:" < $resdir/Make.out -- cgit v1.2.3 From 748d7923b53ff7bcb3d825ef765d8461d7af1794 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 6 Jun 2025 03:47:11 -0700 Subject: torture: Make torture.sh --allmodconfig testing fail on warnings Currently, the torture.sh --allmodconfig testing looks solely at the exit code from the kernel build, and thus fails to flag many compiler warnings. This commit therefore checks the kernel-build output for compiler diagnostics. Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay (AMD) --- tools/testing/selftests/rcutorture/bin/torture.sh | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index 39844d213da5..611bc03a8dc7 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -438,6 +438,10 @@ then make -j$MAKE_ALLOTED_CPUS >> "$amcdir/Make.out" 2>&1 retcode="$?" echo $retcode > "$amcdir/Make.exitcode" + if grep -E -q "Stop|ERROR|Error|error:|warning:" < "$amcdir/Make.out" + then + retcode=99 + fi buildphase='"make"' fi if test "$retcode" -eq 0 -- cgit v1.2.3 From d08d409126d7d5ad2d8ceac77fb97f2d892e9f85 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 1 Jul 2025 17:23:27 -0700 Subject: rcutorture: Remove SRCU-lite scenarios This commit prepares for the removal of SRCU-Lite by removing the SRCU-L rcutorture scenario that tests it. Both SRCU-lite and SRCU-fast provide faster readers by dropping the smp_mb() call from their lock and unlock primitives, but incur a pair of added RCU grace periods during the SRCU grace period. There is a trivial mapping from the SRCU-lite API to that of SRCU-fast, so there should be no transition issues. [ paulmck: Apply Christoph Hellwig feedback. ] Signed-off-by: "Paul E. McKenney" Signed-off-by: Neeraj Upadhyay (AMD) --- tools/testing/selftests/rcutorture/configs/rcu/CFLIST | 1 - tools/testing/selftests/rcutorture/configs/rcu/SRCU-L | 10 ---------- tools/testing/selftests/rcutorture/configs/rcu/SRCU-L.boot | 3 --- 3 files changed, 14 deletions(-) delete mode 100644 tools/testing/selftests/rcutorture/configs/rcu/SRCU-L delete mode 100644 tools/testing/selftests/rcutorture/configs/rcu/SRCU-L.boot (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST index 45f572570a8c..98b6175e5aa0 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST +++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST @@ -5,7 +5,6 @@ TREE04 TREE05 TREE07 TREE09 -SRCU-L SRCU-N SRCU-P SRCU-T diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L deleted file mode 100644 index 3b4fa8dbef8a..000000000000 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L +++ /dev/null @@ -1,10 +0,0 @@ -CONFIG_RCU_TRACE=n -CONFIG_SMP=y -CONFIG_NR_CPUS=6 -CONFIG_HOTPLUG_CPU=y -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_RCU_EXPERT=n -CONFIG_KPROBES=n -CONFIG_FTRACE=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L.boot deleted file mode 100644 index 0207b3138c5b..000000000000 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L.boot +++ /dev/null @@ -1,3 +0,0 @@ -rcutorture.torture_type=srcu -rcutorture.reader_flavor=0x4 -rcutorture.fwd_progress=3 -- cgit v1.2.3 From 3047957cc7c19433dc8b88a7fec471efa13ba034 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 15 Jul 2025 04:34:59 +0000 Subject: selftests: rtnetlink: fix addrlft test flakiness on power-saving systems Jakub reported that the rtnetlink test for the preferred lifetime of an address has become quite flaky. The issue started appearing around the 6.16 merge window in May, and the test fails with: FAIL: preferred_lft addresses remaining The flakiness might be related to power-saving behavior, as address expiration is handled by a "power-efficient" workqueue. To address this, use slowwait to check more frequently whether the address still exists. This reduces the likelihood of the system entering a low-power state during the test, improving reliability. Reported-by: Jakub Kicinski Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20250715043459.110523-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/rtnetlink.sh | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 2e8243a65b50..49141254065c 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -291,6 +291,17 @@ kci_test_route_get() end_test "PASS: route get" } +check_addr_not_exist() +{ + dev=$1 + addr=$2 + if ip addr show dev $dev | grep -q $addr; then + return 1 + else + return 0 + fi +} + kci_test_addrlft() { for i in $(seq 10 100) ;do @@ -298,9 +309,8 @@ kci_test_addrlft() run_cmd ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1)) done - sleep 5 - run_cmd_grep_fail "10.23.11." ip addr show dev "$devdummy" - if [ $? -eq 0 ]; then + slowwait 5 check_addr_not_exist "$devdummy" "10.23.11." + if [ $? -eq 1 ]; then check_err 1 end_test "FAIL: preferred_lft addresses remaining" return -- cgit v1.2.3 From 511ad4c26446e5254b94352f55067c501d319462 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 15 Jul 2025 07:28:49 -0700 Subject: selftests: packetdrill: correct the expected timing in tcp_rcv_big_endseq Commit f5fda1a86884 ("selftests/net: packetdrill: add tcp_rcv_big_endseq.pkt") added this test recently, but it's failing with: # tcp_rcv_big_endseq.pkt:41: error handling packet: timing error: expected outbound packet at 1.230105 sec but happened at 1.190101 sec; tolerance 0.005046 sec # script packet: 1.230105 . 1:1(0) ack 54001 win 0 # actual packet: 1.190101 . 1:1(0) ack 54001 win 0 It's unclear why the test expects the ack to be delayed. Correct it. Link: https://patch.msgid.link/20250715142849.959444-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt index 7e170b94fd36..3848b419e68c 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt @@ -38,7 +38,7 @@ // If queue is empty, accept a packet even if its end_seq is above wup + rcv_wnd +0 < P. 4001:54001(50000) ack 1 win 257 - +.040 > . 1:1(0) ack 54001 win 0 + +0 > . 1:1(0) ack 54001 win 0 // Check LINUX_MIB_BEYOND_WINDOW has been incremented 3 times. +0 `nstat | grep TcpExtBeyondWindow | grep -q " 3 "` -- cgit v1.2.3 From 3c561c547c396038c7690645cff4f98181c62d49 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Jul 2025 02:56:48 -0700 Subject: selftests: drv-net: add helper/wrapper for bpftrace bpftrace is very useful for low level driver testing. perf or trace-cmd would also do for collecting data from tracepoints, but they require much more post-processing. Add a wrapper for running bpftrace and sanitizing its output. bpftrace has JSON output, which is great, but it prints loose objects and in a slightly inconvenient format. We have to read the objects line by line, and while at it return them indexed by the map name. Reviewed-by: Breno Leitao Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20250714-netpoll_test-v7-1-c0220cfaa63e@debian.org Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/lib/py/__init__.py | 4 +-- tools/testing/selftests/net/lib/py/utils.py | 33 ++++++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py index 39968bc3df43..8711c67ad658 100644 --- a/tools/testing/selftests/drivers/net/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py @@ -14,8 +14,8 @@ try: from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \ NlError, RtnlFamily, DevlinkFamily from net.lib.py import CmdExitFailure - from net.lib.py import bkg, cmd, defer, ethtool, fd_read_timeout, ip, \ - rand_port, tool, wait_port_listen, bpftool + from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \ + fd_read_timeout, ip, rand_port, tool, wait_port_listen from net.lib.py import fd_read_timeout from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index acf0e2c38614..5950a643a533 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -189,6 +189,39 @@ def ethtool(args, json=None, ns=None, host=None): return tool('ethtool', args, json=json, ns=ns, host=host) +def bpftrace(expr, json=None, ns=None, host=None, timeout=None): + """ + Run bpftrace and return map data (if json=True). + The output of bpftrace is inconvenient, so the helper converts + to a dict indexed by map name, e.g.: + { + "@": { ... }, + "@map2": { ... }, + } + """ + cmd_arr = ['bpftrace'] + # Throw in --quiet if json, otherwise the output has two objects + if json: + cmd_arr += ['-f', 'json', '-q'] + if timeout: + expr += ' interval:s:' + str(timeout) + ' { exit(); }' + cmd_arr += ['-e', expr] + cmd_obj = cmd(cmd_arr, ns=ns, host=host, shell=False) + if json: + # bpftrace prints objects as lines + ret = {} + for l in cmd_obj.stdout.split('\n'): + if not l.strip(): + continue + one = _json.loads(l) + if one.get('type') != 'map': + continue + for k, v in one["data"].items(): + ret[k] = v + return ret + return cmd_obj + + def rand_port(type=socket.SOCK_STREAM): """ Get a random unprivileged port. -- cgit v1.2.3 From fd2aadcefbacb4425f54c252ec9cfb8218548eb9 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 14 Jul 2025 02:56:49 -0700 Subject: selftests: drv-net: Strip '@' prefix from bpftrace map keys The '@' prefix in bpftrace map keys is specific to bpftrace and can be safely removed when processing results. This patch modifies the bpftrace utility to strip the '@' from map keys before storing them in the result dictionary, making the keys more consistent with Python conventions. Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20250714-netpoll_test-v7-2-c0220cfaa63e@debian.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib/py/utils.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index 5950a643a533..f395c90fb0f1 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -217,6 +217,8 @@ def bpftrace(expr, json=None, ns=None, host=None, timeout=None): if one.get('type') != 'map': continue for k, v in one["data"].items(): + if k.startswith('@'): + k = k.lstrip('@') ret[k] = v return ret return cmd_obj -- cgit v1.2.3 From b3019343e4bde385d1d59918b2e3ffa4eb340739 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 14 Jul 2025 02:56:50 -0700 Subject: selftests: net: add netpoll basic functionality test Add a basic selftest for the netpoll polling mechanism, specifically targeting the netpoll poll() side. The test creates a scenario where network transmission is running at maximum speed, and netpoll needs to poll the NIC. This is achieved by: 1. Configuring a single RX/TX queue to create contention 2. Generating background traffic to saturate the interface 3. Sending netconsole messages to trigger netpoll polling 4. Using dynamic netconsole targets via configfs 5. Delete and create new netconsole targets after some messages 6. Start a bpftrace in parallel to make sure netpoll_poll_dev() is called 7. If bpftrace exists and netpoll_poll_dev() was called, stop. The test validates a critical netpoll code path by monitoring traffic flow and ensuring netpoll_poll_dev() is called when the normal TX path is blocked. This addresses a gap in netpoll test coverage for a path that is tricky for the network stack. Signed-off-by: Breno Leitao Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250714-netpoll_test-v7-3-c0220cfaa63e@debian.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/Makefile | 1 + .../testing/selftests/drivers/net/netpoll_basic.py | 396 +++++++++++++++++++++ 2 files changed, 397 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/netpoll_basic.py (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index bd309b2d3909..9bd84d6b542e 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -16,6 +16,7 @@ TEST_PROGS := \ netcons_fragmented_msg.sh \ netcons_overflow.sh \ netcons_sysdata.sh \ + netpoll_basic.py \ ping.py \ queues.py \ stats.py \ diff --git a/tools/testing/selftests/drivers/net/netpoll_basic.py b/tools/testing/selftests/drivers/net/netpoll_basic.py new file mode 100755 index 000000000000..408bd54d6779 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netpoll_basic.py @@ -0,0 +1,396 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Author: Breno Leitao +""" + This test aims to evaluate the netpoll polling mechanism (as in + netpoll_poll_dev()). It presents a complex scenario where the network + attempts to send a packet but fails, prompting it to poll the NIC from within + the netpoll TX side. + + This has been a crucial path in netpoll that was previously untested. Jakub + suggested using a single RX/TX queue, pushing traffic to the NIC, and then + sending netpoll messages (via netconsole) to trigger the poll. + + In parallel, bpftrace is used to detect if netpoll_poll_dev() was called. If + so, the test passes, otherwise it will be skipped. This test is very dependent on + the driver and environment, given we are trying to trigger a tricky scenario. +""" + +import errno +import logging +import os +import random +import string +import threading +import time +from typing import Optional + +from lib.py import ( + bpftrace, + CmdExitFailure, + defer, + ethtool, + GenerateTraffic, + ksft_exit, + ksft_pr, + ksft_run, + KsftFailEx, + KsftSkipEx, + NetDrvEpEnv, + KsftXfailEx, +) + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) + +NETCONSOLE_CONFIGFS_PATH: str = "/sys/kernel/config/netconsole" +NETCONS_REMOTE_PORT: int = 6666 +NETCONS_LOCAL_PORT: int = 1514 + +# Max number of netcons messages to send. Each iteration will setup +# netconsole and send MAX_WRITES messages +ITERATIONS: int = 20 +# Number of writes to /dev/kmsg per iteration +MAX_WRITES: int = 40 +# MAPS contains the information coming from bpftrace it will have only one +# key: "hits", which tells the number of times netpoll_poll_dev() was called +MAPS: dict[str, int] = {} +# Thread to run bpftrace in parallel +BPF_THREAD: Optional[threading.Thread] = None +# Time bpftrace will be running in parallel. +BPFTRACE_TIMEOUT: int = 10 + + +def ethtool_get_ringsize(interface_name: str) -> tuple[int, int]: + """ + Read the ringsize using ethtool. This will be used to restore it after the test + """ + try: + ethtool_result = ethtool(f"-g {interface_name}", json=True)[0] + rxs = ethtool_result["rx"] + txs = ethtool_result["tx"] + except (KeyError, IndexError) as exception: + raise KsftSkipEx( + f"Failed to read RX/TX ringsize: {exception}. Not going to mess with them." + ) from exception + + return rxs, txs + + +def ethtool_set_ringsize(interface_name: str, ring_size: tuple[int, int]) -> bool: + """Try to the number of RX and TX ringsize.""" + rxs = ring_size[0] + txs = ring_size[1] + + logging.debug("Setting ring size to %d/%d", rxs, txs) + try: + ethtool(f"-G {interface_name} rx {rxs} tx {txs}") + except CmdExitFailure: + # This might fail on real device, retry with a higher value, + # worst case, keep it as it is. + return False + + return True + + +def ethtool_get_queues_cnt(interface_name: str) -> tuple[int, int, int]: + """Read the number of RX, TX and combined queues using ethtool""" + + try: + ethtool_result = ethtool(f"-l {interface_name}", json=True)[0] + rxq = ethtool_result.get("rx", -1) + txq = ethtool_result.get("tx", -1) + combined = ethtool_result.get("combined", -1) + + except IndexError as exception: + raise KsftSkipEx( + f"Failed to read queues numbers: {exception}. Not going to mess with them." + ) from exception + + return rxq, txq, combined + + +def ethtool_set_queues_cnt(interface_name: str, queues: tuple[int, int, int]) -> None: + """Set the number of RX, TX and combined queues using ethtool""" + rxq, txq, combined = queues + + cmdline = f"-L {interface_name}" + + if rxq != -1: + cmdline += f" rx {rxq}" + if txq != -1: + cmdline += f" tx {txq}" + if combined != -1: + cmdline += f" combined {combined}" + + logging.debug("calling: ethtool %s", cmdline) + + try: + ethtool(cmdline) + except CmdExitFailure as exception: + raise KsftSkipEx( + f"Failed to configure RX/TX queues: {exception}. Ethtool not available?" + ) from exception + + +def netcons_generate_random_target_name() -> str: + """Generate a random target name starting with 'netcons'""" + random_suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=8)) + return f"netcons_{random_suffix}" + + +def netcons_create_target( + config_data: dict[str, str], + target_name: str, +) -> None: + """Create a netconsole dynamic target against the interfaces""" + logging.debug("Using netconsole name: %s", target_name) + try: + os.makedirs(f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}", exist_ok=True) + logging.debug( + "Created target directory: %s/%s", NETCONSOLE_CONFIGFS_PATH, target_name + ) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise KsftFailEx( + f"Failed to create netconsole target directory: {exception}" + ) from exception + + try: + for key, value in config_data.items(): + path = f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{key}" + logging.debug("Writing %s to %s", key, path) + with open(path, "w", encoding="utf-8") as file: + # Always convert to string to write to file + file.write(str(value)) + + # Read all configuration values for debugging purposes + for debug_key in config_data.keys(): + with open( + f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{debug_key}", + "r", + encoding="utf-8", + ) as file: + content = file.read() + logging.debug( + "%s/%s/%s : %s", + NETCONSOLE_CONFIGFS_PATH, + target_name, + debug_key, + content.strip(), + ) + + except Exception as exception: + raise KsftFailEx( + f"Failed to configure netconsole target: {exception}" + ) from exception + + +def netcons_configure_target( + cfg: NetDrvEpEnv, interface_name: str, target_name: str +) -> None: + """Configure netconsole on the interface with the given target name""" + config_data = { + "extended": "1", + "dev_name": interface_name, + "local_port": NETCONS_LOCAL_PORT, + "remote_port": NETCONS_REMOTE_PORT, + "local_ip": cfg.addr, + "remote_ip": cfg.remote_addr, + "remote_mac": "00:00:00:00:00:00", # Not important for this test + "enabled": "1", + } + + netcons_create_target(config_data, target_name) + logging.debug( + "Created netconsole target: %s on interface %s", target_name, interface_name + ) + + +def netcons_delete_target(name: str) -> None: + """Delete a netconsole dynamic target""" + target_path = f"{NETCONSOLE_CONFIGFS_PATH}/{name}" + try: + if os.path.exists(target_path): + os.rmdir(target_path) + except OSError as exception: + raise KsftFailEx( + f"Failed to delete netconsole target: {exception}" + ) from exception + + +def netcons_load_module() -> None: + """Try to load the netconsole module""" + os.system("modprobe netconsole") + + +def bpftrace_call() -> None: + """Call bpftrace to find how many times netpoll_poll_dev() is called. + Output is saved in the global variable `maps`""" + + # This is going to update the global variable, that will be seen by the + # main function + global MAPS # pylint: disable=W0603 + + # This will be passed to bpftrace as in bpftrace -e "expr" + expr = "kprobe:netpoll_poll_dev { @hits = count(); }" + + MAPS = bpftrace(expr, timeout=BPFTRACE_TIMEOUT, json=True) + logging.debug("BPFtrace output: %s", MAPS) + + +def bpftrace_start(): + """Start a thread to call `call_bpf` in a parallel thread""" + global BPF_THREAD # pylint: disable=W0603 + + BPF_THREAD = threading.Thread(target=bpftrace_call) + BPF_THREAD.start() + if not BPF_THREAD.is_alive(): + raise KsftSkipEx("BPFtrace thread is not alive. Skipping test") + + +def bpftrace_stop() -> None: + """Stop the bpftrace thread""" + if BPF_THREAD: + BPF_THREAD.join() + + +def bpftrace_any_hit(join: bool) -> bool: + """Check if netpoll_poll_dev() was called by checking the global variable `maps`""" + if not BPF_THREAD: + raise KsftFailEx("BPFtrace didn't start") + + if BPF_THREAD.is_alive(): + if join: + # Wait for bpftrace to finish + BPF_THREAD.join() + else: + # bpftrace is still running, so, we will not check the result yet + return False + + logging.debug("MAPS coming from bpftrace = %s", MAPS) + if "hits" not in MAPS.keys(): + raise KsftFailEx(f"bpftrace failed to run!?: {MAPS}") + + logging.debug("Got a total of %d hits", MAPS["hits"]) + return MAPS["hits"] > 0 + + +def do_netpoll_flush_monitored(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None: + """Print messages to the console, trying to trigger a netpoll poll""" + # Start bpftrace in parallel, so, it is watching + # netpoll_poll_dev() while we are sending netconsole messages + bpftrace_start() + defer(bpftrace_stop) + + do_netpoll_flush(cfg, ifname, target_name) + + if bpftrace_any_hit(join=True): + ksft_pr("netpoll_poll_dev() was called. Success") + return + + raise KsftXfailEx("netpoll_poll_dev() was not called during the test...") + + +def do_netpoll_flush(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None: + """Print messages to the console, trying to trigger a netpoll poll""" + netcons_configure_target(cfg, ifname, target_name) + retry = 0 + + for i in range(int(ITERATIONS)): + if not BPF_THREAD.is_alive() or bpftrace_any_hit(join=False): + # bpftrace is done, stop sending messages + break + + msg = f"netcons test #{i}" + with open("/dev/kmsg", "w", encoding="utf-8") as kmsg: + for j in range(MAX_WRITES): + try: + kmsg.write(f"{msg}-{j}\n") + except OSError as exception: + # in some cases, kmsg can be busy, so, we will retry + time.sleep(1) + retry += 1 + if retry < 5: + logging.info("Failed to write to kmsg. Retrying") + # Just retry a few times + continue + raise KsftFailEx( + f"Failed to write to kmsg: {exception}" + ) from exception + + netcons_delete_target(target_name) + netcons_configure_target(cfg, ifname, target_name) + # If we sleep here, we will have a better chance of triggering + # This number is based on a few tests I ran while developing this test + time.sleep(0.4) + + +def configure_network(ifname: str) -> None: + """Configure ring size and queue numbers""" + + # Set defined queues to 1 to force congestion + prev_queues = ethtool_get_queues_cnt(ifname) + logging.debug("RX/TX/combined queues: %s", prev_queues) + # Only set the queues to 1 if they exists in the device. I.e, they are > 0 + ethtool_set_queues_cnt(ifname, tuple(1 if x > 0 else x for x in prev_queues)) + defer(ethtool_set_queues_cnt, ifname, prev_queues) + + # Try to set the ring size to some low value. + # Do not fail if the hardware do not accepted desired values + prev_ring_size = ethtool_get_ringsize(ifname) + for size in [(1, 1), (128, 128), (256, 256)]: + if ethtool_set_ringsize(ifname, size): + # hardware accepted the desired ringsize + logging.debug("Set RX/TX ringsize to: %s from %s", size, prev_ring_size) + break + defer(ethtool_set_ringsize, ifname, prev_ring_size) + + +def test_netpoll(cfg: NetDrvEpEnv) -> None: + """ + Test netpoll by sending traffic to the interface and then sending + netconsole messages to trigger a poll + """ + + ifname = cfg.ifname + configure_network(ifname) + target_name = netcons_generate_random_target_name() + traffic = None + + try: + traffic = GenerateTraffic(cfg) + do_netpoll_flush_monitored(cfg, ifname, target_name) + finally: + if traffic: + traffic.stop() + + # Revert RX/TX queues + netcons_delete_target(target_name) + + +def test_check_dependencies() -> None: + """Check if the dependencies are met""" + if not os.path.exists(NETCONSOLE_CONFIGFS_PATH): + raise KsftSkipEx( + f"Directory {NETCONSOLE_CONFIGFS_PATH} does not exist. CONFIG_NETCONSOLE_DYNAMIC might not be set." # pylint: disable=C0301 + ) + + +def main() -> None: + """Main function to run the test""" + netcons_load_module() + test_check_dependencies() + with NetDrvEpEnv(__file__) as cfg: + ksft_run( + [test_netpoll], + args=(cfg,), + ) + ksft_exit() + + +if __name__ == "__main__": + main() -- cgit v1.2.3 From 4a760d2d7aa6357428eadb6c3714f21a8b85cf6b Mon Sep 17 00:00:00 2001 From: "Alexis Lothoré (eBPF Foundation)" Date: Wed, 9 Jul 2025 10:36:56 +0200 Subject: selftests/bpf: enable tracing_struct tests for arm64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that the constraint preventing attachment to functions consuming struct on stack has been removed from the kernel (and moved to pahole, with a slightly smarter detection, to prevent only those that are packed), re-enable the tracing_struct tests for arm64. Signed-off-by: Alexis Lothoré (eBPF Foundation) Link: https://lore.kernel.org/r/20250709-arm64_relax_jit_comp-v1-2-3850fe189092@bootlin.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/DENYLIST.aarch64 | 1 - 1 file changed, 1 deletion(-) delete mode 100644 tools/testing/selftests/bpf/DENYLIST.aarch64 (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 deleted file mode 100644 index 12e99c0277a8..000000000000 --- a/tools/testing/selftests/bpf/DENYLIST.aarch64 +++ /dev/null @@ -1 +0,0 @@ -tracing_struct/struct_many_args # struct_many_args:FAIL:tracing_struct_many_args__attach unexpected error: -524 -- cgit v1.2.3 From d459dbbbfa323165849451edb9690a933c210bac Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 16 Jul 2025 21:35:07 +0200 Subject: selftests/bpf: Stress test attaching a BPF prog to another BPF prog Add a test that invokes a BPF prog in a loop, while concurrently attaching and detaching another BPF prog to and from it. This helps identifying race conditions in bpf_arch_text_poke(). Signed-off-by: Ilya Leoshkevich Link: https://lore.kernel.org/r/20250716194524.48109-3-iii@linux.ibm.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/recursive_attach.c | 67 ++++++++++++++++++++++ 1 file changed, 67 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/recursive_attach.c b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c index 8100509e561b..0ffa01d54ce2 100644 --- a/tools/testing/selftests/bpf/prog_tests/recursive_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c @@ -149,3 +149,70 @@ close_prog: fentry_recursive_target__destroy(target_skel); fentry_recursive__destroy(tracing_skel); } + +static void *fentry_target_test_run(void *arg) +{ + for (;;) { + int prog_fd = __atomic_load_n((int *)arg, __ATOMIC_SEQ_CST); + LIBBPF_OPTS(bpf_test_run_opts, topts); + int err; + + if (prog_fd == -1) + break; + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "fentry_target test_run")) + break; + } + + return NULL; +} + +void test_fentry_attach_stress(void) +{ + struct fentry_recursive_target *target_skel = NULL; + struct fentry_recursive *tracing_skel = NULL; + struct bpf_program *prog; + int err, i, tgt_prog_fd; + pthread_t thread; + + target_skel = fentry_recursive_target__open_and_load(); + if (!ASSERT_OK_PTR(target_skel, + "fentry_recursive_target__open_and_load")) + goto close_prog; + tgt_prog_fd = bpf_program__fd(target_skel->progs.fentry_target); + err = pthread_create(&thread, NULL, + fentry_target_test_run, &tgt_prog_fd); + if (!ASSERT_OK(err, "bpf_program__set_attach_target")) + goto close_prog; + + for (i = 0; i < 1000; i++) { + tracing_skel = fentry_recursive__open(); + if (!ASSERT_OK_PTR(tracing_skel, "fentry_recursive__open")) + goto stop_thread; + + prog = tracing_skel->progs.recursive_attach; + err = bpf_program__set_attach_target(prog, tgt_prog_fd, + "fentry_target"); + if (!ASSERT_OK(err, "bpf_program__set_attach_target")) + goto stop_thread; + + err = fentry_recursive__load(tracing_skel); + if (!ASSERT_OK(err, "fentry_recursive__load")) + goto stop_thread; + + err = fentry_recursive__attach(tracing_skel); + if (!ASSERT_OK(err, "fentry_recursive__attach")) + goto stop_thread; + + fentry_recursive__destroy(tracing_skel); + tracing_skel = NULL; + } + +stop_thread: + __atomic_store_n(&tgt_prog_fd, -1, __ATOMIC_SEQ_CST); + err = pthread_join(thread, NULL); + ASSERT_OK(err, "pthread_join"); +close_prog: + fentry_recursive__destroy(tracing_skel); + fentry_recursive_target__destroy(target_skel); +} -- cgit v1.2.3 From fd60aa0a45c1508cdcb982dbf25fd003a6b34e92 Mon Sep 17 00:00:00 2001 From: Tao Chen Date: Wed, 16 Jul 2025 21:46:54 +0800 Subject: bpf/selftests: Add selftests for token info A previous change added bpf_token_info to get token info with bpf_get_obj_info_by_fd, this patch adds a new test for token info. #461/12 token/bpf_token_info:OK Acked-by: Andrii Nakryiko Signed-off-by: Tao Chen Link: https://lore.kernel.org/r/20250716134654.1162635-2-chen.dylane@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/token.c | 44 ++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c index cfc032b910c4..b81dde283052 100644 --- a/tools/testing/selftests/bpf/prog_tests/token.c +++ b/tools/testing/selftests/bpf/prog_tests/token.c @@ -1047,6 +1047,41 @@ err_out: #define bit(n) (1ULL << (n)) +static int userns_bpf_token_info(int mnt_fd, struct token_lsm *lsm_skel) +{ + int err, token_fd = -1; + struct bpf_token_info info; + u32 len = sizeof(struct bpf_token_info); + + /* create BPF token from BPF FS mount */ + token_fd = bpf_token_create(mnt_fd, NULL); + if (!ASSERT_GT(token_fd, 0, "token_create")) { + err = -EINVAL; + goto cleanup; + } + + memset(&info, 0, len); + err = bpf_obj_get_info_by_fd(token_fd, &info, &len); + if (!ASSERT_ERR(err, "bpf_obj_get_token_info")) + goto cleanup; + if (!ASSERT_EQ(info.allowed_cmds, bit(BPF_MAP_CREATE), "token_info_cmds_map_create")) { + err = -EINVAL; + goto cleanup; + } + if (!ASSERT_EQ(info.allowed_progs, bit(BPF_PROG_TYPE_XDP), "token_info_progs_xdp")) { + err = -EINVAL; + goto cleanup; + } + + /* The BPF_PROG_TYPE_EXT is not set in token */ + if (ASSERT_EQ(info.allowed_progs, bit(BPF_PROG_TYPE_EXT), "token_info_progs_ext")) + err = -EINVAL; + +cleanup: + zclose(token_fd); + return err; +} + void test_token(void) { if (test__start_subtest("map_token")) { @@ -1150,4 +1185,13 @@ void test_token(void) subtest_userns(&opts, userns_obj_priv_implicit_token_envvar); } + if (test__start_subtest("bpf_token_info")) { + struct bpffs_opts opts = { + .cmds = bit(BPF_MAP_CREATE), + .progs = bit(BPF_PROG_TYPE_XDP), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_bpf_token_info); + } } -- cgit v1.2.3 From 0769857a07b4451a1dc1c3ad1f1c86a6f4ce136a Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Thu, 10 Jul 2025 17:54:33 +0000 Subject: selftests/bpf: fix implementation of smp_mb() As BPF doesn't include any barrier instructions, smp_mb() is implemented by doing a dummy value returning atomic operation. Such an operation acts a full barrier as enforced by LKMM and also by the work in progress BPF memory model. If the returned value is not used, clang[1] can optimize the value returning atomic instruction in to a normal atomic instruction which provides no ordering guarantees. Mark the variable as volatile so the above optimization is never performed and smp_mb() works as expected. [1] https://godbolt.org/z/qzze7bG6z Fixes: 88d706ba7cc5 ("selftests/bpf: Introduce arena spin lock") Signed-off-by: Puranjay Mohan Link: https://lore.kernel.org/r/20250710175434.18829-2-puranjay@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/bpf_atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/bpf_atomic.h b/tools/testing/selftests/bpf/bpf_atomic.h index a9674e544322..c550e5711967 100644 --- a/tools/testing/selftests/bpf/bpf_atomic.h +++ b/tools/testing/selftests/bpf/bpf_atomic.h @@ -61,7 +61,7 @@ extern bool CONFIG_X86_64 __kconfig __weak; #define smp_mb() \ ({ \ - unsigned long __val; \ + volatile unsigned long __val; \ __sync_fetch_and_add(&__val, 0); \ }) -- cgit v1.2.3 From 54c605124da6fad3d6033ca822c551ce33982084 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 15 Jul 2025 22:11:41 +0100 Subject: kselftest/arm4: Provide local defines for AT_HWCAP3 Some build environments for the selftests are not picking up the newly added AT_HWCAP3 when using the libc headers, even with headers_install (which we require already for the arm64 selftests). As a quick fix add local definitions of the constant to tools use it, while auxvec.h is installed with some toolchains it needs some persuasion to get picked up. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250715-arm64-selftest-bodge-hwcap3-v1-1-541b54bc43bb@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/abi/hwcap.c | 4 ++++ tools/testing/selftests/arm64/mte/check_prctl.c | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 35f521e5f41c..aa902408facd 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -21,6 +21,10 @@ #define TESTS_PER_HWCAP 3 +#ifndef AT_HWCAP3 +#define AT_HWCAP3 29 +#endif + /* * Function expected to generate exception when the feature is not * supported and return when it is supported. If the specific exception diff --git a/tools/testing/selftests/arm64/mte/check_prctl.c b/tools/testing/selftests/arm64/mte/check_prctl.c index 4c89e9538ca0..c36c4c49ff95 100644 --- a/tools/testing/selftests/arm64/mte/check_prctl.c +++ b/tools/testing/selftests/arm64/mte/check_prctl.c @@ -12,6 +12,10 @@ #include "kselftest.h" +#ifndef AT_HWCAP3 +#define AT_HWCAP3 29 +#endif + static int set_tagged_addr_ctrl(int val) { int ret; -- cgit v1.2.3 From e0f3b3e5c77a5f9dd7224612a6d74e0888cb055f Mon Sep 17 00:00:00 2001 From: Dong Chenchen Date: Wed, 16 Jul 2025 11:45:04 +0800 Subject: selftests: Add test cases for vlan_filter modification during runtime Add test cases for vlan_filter modification during runtime, which may triger null-ptr-ref or memory leak of vlan0. Signed-off-by: Dong Chenchen Link: https://patch.msgid.link/20250716034504.2285203-3-dongchenchen2@huawei.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/vlan_hw_filter.sh | 98 +++++++++++++++++++++++---- 1 file changed, 86 insertions(+), 12 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/vlan_hw_filter.sh b/tools/testing/selftests/net/vlan_hw_filter.sh index 7bc804ffaf7c..0fb56baf28e4 100755 --- a/tools/testing/selftests/net/vlan_hw_filter.sh +++ b/tools/testing/selftests/net/vlan_hw_filter.sh @@ -3,27 +3,101 @@ readonly NETNS="ns-$(mktemp -u XXXXXX)" +ALL_TESTS=" + test_vlan_filter_check + test_vlan0_del_crash_01 + test_vlan0_del_crash_02 + test_vlan0_del_crash_03 + test_vid0_memleak +" + ret=0 +setup() { + ip netns add ${NETNS} +} + cleanup() { - ip netns del $NETNS + ip netns del $NETNS 2>/dev/null } trap cleanup EXIT fail() { - echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2 - ret=1 + echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2 + ret=1 +} + +tests_run() +{ + local current_test + for current_test in ${TESTS:-$ALL_TESTS}; do + $current_test + done +} + +test_vlan_filter_check() { + setup + ip netns exec ${NETNS} ip link add bond0 type bond mode 0 + ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2 + ip netns exec ${NETNS} ip link set bond_slave_1 master bond0 + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0 + ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0 + ip netns exec ${NETNS} ip link set bond_slave_1 nomaster + ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function" + cleanup } -ip netns add ${NETNS} -ip netns exec ${NETNS} ip link add bond0 type bond mode 0 -ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2 -ip netns exec ${NETNS} ip link set bond_slave_1 master bond0 -ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off -ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0 -ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0 -ip netns exec ${NETNS} ip link set bond_slave_1 nomaster -ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function" +#enable vlan_filter feature of real_dev with vlan0 during running time +test_vlan0_del_crash_01() { + setup + ip netns exec ${NETNS} ip link add bond0 type bond mode 0 + ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on + ip netns exec ${NETNS} ifconfig bond0 down + ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function" + cleanup +} + +#enable vlan_filter feature and add vlan0 for real_dev during running time +test_vlan0_del_crash_02() { + setup + ip netns exec ${NETNS} ip link add bond0 type bond mode 0 + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on + ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q + ip netns exec ${NETNS} ifconfig bond0 down + ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function" + cleanup +} + +#enable vlan_filter feature of real_dev during running time +#test kernel_bug of vlan unregister +test_vlan0_del_crash_03() { + setup + ip netns exec ${NETNS} ip link add bond0 type bond mode 0 + ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on + ip netns exec ${NETNS} ifconfig bond0 down + ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function" + cleanup +} + +test_vid0_memleak() { + setup + ip netns exec ${NETNS} ip link add bond0 up type bond mode 0 + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ip link del dev bond0 || fail "Please check vlan HW filter function" + cleanup +} +tests_run exit $ret -- cgit v1.2.3 From 88b06e4fb4bf9ee36f788cb6f5a65d188341d0e2 Mon Sep 17 00:00:00 2001 From: William Liu Date: Thu, 17 Jul 2025 02:29:47 +0000 Subject: selftests/tc-testing: Test htb_dequeue_tree with deactivation and row emptying Ensure that any deactivation and row emptying that occurs during htb_dequeue_tree does not cause a kernel panic. This scenario originally triggered a kernel BUG_ON, and we are checking for a graceful fail now. Signed-off-by: William Liu Signed-off-by: Savino Dicanosa Link: https://patch.msgid.link/20250717022912.221426-1-will@willsroot.io Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/qdiscs.json | 26 ++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index b344570e7f40..c6db7fa94f55 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -127,6 +127,32 @@ "$IP addr del 10.10.10.10/24 dev $DUMMY" ] }, + { + "id": "5456", + "name": "Test htb_dequeue_tree with deactivation and row emptying", + "category": [ + "qdisc", + "htb" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: htb default 1", + "$TC class add dev $DUMMY parent 1: classid 1:1 htb rate 64bit ", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2: netem", + "$TC qdisc add dev $DUMMY parent 2:1 handle 3: blackhole" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.11.11", + "expExitCode": "1", + "verifyCmd": "$TC -j qdisc show dev $DUMMY", + "matchJSON": [], + "teardown": [ + "$TC qdisc del dev $DUMMY root" + ] + }, { "id": "c024", "name": "Test TBF with SKBPRIO - catch qlen corner cases", -- cgit v1.2.3 From 954bacce36d976fe472090b55987df66da00c49b Mon Sep 17 00:00:00 2001 From: Shashank Balaji Date: Fri, 4 Jul 2025 20:08:41 +0900 Subject: selftests/cgroup: fix cpu.max tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Current cpu.max tests (both the normal one and the nested one) are broken. They setup cpu.max with 1000 us quota and the default period (100,000 us). A cpu hog is run for a duration of 1s as per wall clock time. This corresponds to 10 periods, hence an expected usage of 10,000 us. We want the measured usage (as per cpu.stat) to be close to 10,000 us. Previously, this approximate equality test was done by `!values_close(usage_usec, expected_usage_usec, 95)`: if the absolute difference between usage_usec and expected_usage_usec is greater than 95% of their sum, then we pass. And expected_usage_usec was set to 1,000,000 us. Mathematically, this translates to the following being true for pass: |usage - expected_usage| > (usage + expected_usage)*0.95 If usage > expected_usage: usage - expected_usage > (usage + expected_usage)*0.95 0.05*usage > 1.95*expected_usage usage > 39*expected_usage = 39s If usage < expected_usage: expected_usage - usage > (usage + expected_usage)*0.95 0.05*expected_usage > 1.95*usage usage < 0.0256*expected_usage = 25,600 us Combined, Pass if usage < 25,600 us or > 39 s, which makes no sense given that all we need is for usage_usec to be close to 10,000 us. Fix this by explicitly calcuating the expected usage duration based on the configured quota, default period, and the duration, and compare usage_usec and expected_usage_usec using values_close() with a 10% error margin. Also, use snprintf to get the quota string to write to cpu.max instead of hardcoding the quota, ensuring a single source of truth. Remove the check comparing user_usec and expected_usage_usec, since on running this test modified with printfs, it's seen that user_usec and usage_usec can regularly exceed the theoretical expected_usage_usec: $ sudo ./test_cpu user: 10485, usage: 10485, expected: 10000 ok 1 test_cpucg_max user: 11127, usage: 11127, expected: 10000 ok 2 test_cpucg_max_nested $ sudo ./test_cpu user: 10286, usage: 10286, expected: 10000 ok 1 test_cpucg_max user: 10404, usage: 11271, expected: 10000 ok 2 test_cpucg_max_nested Hence, a values_close() check of usage_usec and expected_usage_usec is sufficient. Fixes: a79906570f9646ae17 ("cgroup: Add test_cpucg_max_nested() testcase") Fixes: 889ab8113ef1386c57 ("cgroup: Add test_cpucg_max() testcase") Acked-by: Michal Koutný Signed-off-by: Shashank Balaji Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_cpu.c | 63 +++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 20 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c index a2b50af8e9ee..2a60e6c41940 100644 --- a/tools/testing/selftests/cgroup/test_cpu.c +++ b/tools/testing/selftests/cgroup/test_cpu.c @@ -2,6 +2,7 @@ #define _GNU_SOURCE #include +#include #include #include #include @@ -645,10 +646,16 @@ test_cpucg_nested_weight_underprovisioned(const char *root) static int test_cpucg_max(const char *root) { int ret = KSFT_FAIL; - long usage_usec, user_usec; - long usage_seconds = 1; - long expected_usage_usec = usage_seconds * USEC_PER_SEC; + long quota_usec = 1000; + long default_period_usec = 100000; /* cpu.max's default period */ + long duration_seconds = 1; + + long duration_usec = duration_seconds * USEC_PER_SEC; + long usage_usec, n_periods, remainder_usec, expected_usage_usec; char *cpucg; + char quota_buf[32]; + + snprintf(quota_buf, sizeof(quota_buf), "%ld", quota_usec); cpucg = cg_name(root, "cpucg_test"); if (!cpucg) @@ -657,13 +664,13 @@ static int test_cpucg_max(const char *root) if (cg_create(cpucg)) goto cleanup; - if (cg_write(cpucg, "cpu.max", "1000")) + if (cg_write(cpucg, "cpu.max", quota_buf)) goto cleanup; struct cpu_hog_func_param param = { .nprocs = 1, .ts = { - .tv_sec = usage_seconds, + .tv_sec = duration_seconds, .tv_nsec = 0, }, .clock_type = CPU_HOG_CLOCK_WALL, @@ -672,14 +679,19 @@ static int test_cpucg_max(const char *root) goto cleanup; usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec"); - user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec"); - if (user_usec <= 0) + if (usage_usec <= 0) goto cleanup; - if (user_usec >= expected_usage_usec) - goto cleanup; + /* + * The following calculation applies only since + * the cpu hog is set to run as per wall-clock time + */ + n_periods = duration_usec / default_period_usec; + remainder_usec = duration_usec - n_periods * default_period_usec; + expected_usage_usec + = n_periods * quota_usec + MIN(remainder_usec, quota_usec); - if (values_close(usage_usec, expected_usage_usec, 95)) + if (!values_close(usage_usec, expected_usage_usec, 10)) goto cleanup; ret = KSFT_PASS; @@ -698,10 +710,16 @@ cleanup: static int test_cpucg_max_nested(const char *root) { int ret = KSFT_FAIL; - long usage_usec, user_usec; - long usage_seconds = 1; - long expected_usage_usec = usage_seconds * USEC_PER_SEC; + long quota_usec = 1000; + long default_period_usec = 100000; /* cpu.max's default period */ + long duration_seconds = 1; + + long duration_usec = duration_seconds * USEC_PER_SEC; + long usage_usec, n_periods, remainder_usec, expected_usage_usec; char *parent, *child; + char quota_buf[32]; + + snprintf(quota_buf, sizeof(quota_buf), "%ld", quota_usec); parent = cg_name(root, "cpucg_parent"); child = cg_name(parent, "cpucg_child"); @@ -717,13 +735,13 @@ static int test_cpucg_max_nested(const char *root) if (cg_create(child)) goto cleanup; - if (cg_write(parent, "cpu.max", "1000")) + if (cg_write(parent, "cpu.max", quota_buf)) goto cleanup; struct cpu_hog_func_param param = { .nprocs = 1, .ts = { - .tv_sec = usage_seconds, + .tv_sec = duration_seconds, .tv_nsec = 0, }, .clock_type = CPU_HOG_CLOCK_WALL, @@ -732,14 +750,19 @@ static int test_cpucg_max_nested(const char *root) goto cleanup; usage_usec = cg_read_key_long(child, "cpu.stat", "usage_usec"); - user_usec = cg_read_key_long(child, "cpu.stat", "user_usec"); - if (user_usec <= 0) + if (usage_usec <= 0) goto cleanup; - if (user_usec >= expected_usage_usec) - goto cleanup; + /* + * The following calculation applies only since + * the cpu hog is set to run as per wall-clock time + */ + n_periods = duration_usec / default_period_usec; + remainder_usec = duration_usec - n_periods * default_period_usec; + expected_usage_usec + = n_periods * quota_usec + MIN(remainder_usec, quota_usec); - if (values_close(usage_usec, expected_usage_usec, 95)) + if (!values_close(usage_usec, expected_usage_usec, 10)) goto cleanup; ret = KSFT_PASS; -- cgit v1.2.3 From 1560af51e1ea32f87b7be2c28bb623308b37acf3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 15 Jul 2025 17:03:22 -0700 Subject: selftests: drv-net: rss_api: factor out checking min queue count Multiple tests check min queue count, create a helper. Link: https://patch.msgid.link/20250716000331.1378807-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/rss_api.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/hw/rss_api.py b/tools/testing/selftests/drivers/net/hw/rss_api.py index 6ae908bed1a4..2c76fbdb2617 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_api.py +++ b/tools/testing/selftests/drivers/net/hw/rss_api.py @@ -13,6 +13,13 @@ from lib.py import EthtoolFamily from lib.py import NetDrvEnv +def _require_2qs(cfg): + qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*")) + if qcnt < 2: + raise KsftSkipEx(f"Local has only {qcnt} queues") + return qcnt + + def _ethtool_create(cfg, act, opts): output = ethtool(f"{act} {cfg.ifname} {opts}").stdout # Output will be something like: "New RSS context is 1" or @@ -57,10 +64,7 @@ def test_rxfh_indir_ntf(cfg): Check that Netlink notifications are generated when RSS indirection table was modified. """ - - qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*")) - if qcnt < 2: - raise KsftSkipEx(f"Local has only {qcnt} queues") + _require_2qs(cfg) ethnl = EthtoolFamily() ethnl.ntf_subscribe("monitor") @@ -88,10 +92,7 @@ def test_rxfh_indir_ctx_ntf(cfg): Check that Netlink notifications are generated when RSS indirection table was modified on an additional RSS context. """ - - qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*")) - if qcnt < 2: - raise KsftSkipEx(f"Local has only {qcnt} queues") + _require_2qs(cfg) ctx_id = _ethtool_create(cfg, "-X", "context new") defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") -- cgit v1.2.3 From 6e7eb93a692c21d8d1496e31df8b0d5f77d98ea2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 15 Jul 2025 17:03:24 -0700 Subject: selftests: drv-net: rss_api: test setting indirection table via Netlink Test setting indirection table via Netlink. # ./tools/testing/selftests/drivers/net/hw/rss_api.py TAP version 13 1..6 ok 1 rss_api.test_rxfh_nl_set_fail ok 2 rss_api.test_rxfh_nl_set_indir ok 3 rss_api.test_rxfh_nl_set_indir_ctx ok 4 rss_api.test_rxfh_indir_ntf ok 5 rss_api.test_rxfh_indir_ctx_ntf ok 6 rss_api.test_rxfh_fields # Totals: pass:6 fail:0 xfail:0 xpass:0 skip:0 error:0 Reviewed-by: Edward Cree Link: https://patch.msgid.link/20250716000331.1378807-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/rss_api.py | 96 ++++++++++++++++++++++- 1 file changed, 93 insertions(+), 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/hw/rss_api.py b/tools/testing/selftests/drivers/net/hw/rss_api.py index 2c76fbdb2617..7353e8f3e1a4 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_api.py +++ b/tools/testing/selftests/drivers/net/hw/rss_api.py @@ -6,10 +6,10 @@ API level tests for RSS (mostly Netlink vs IOCTL). """ import glob -from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_is, ksft_ne +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_is, ksft_ne, ksft_raises from lib.py import KsftSkipEx, KsftFailEx -from lib.py import defer, ethtool -from lib.py import EthtoolFamily +from lib.py import defer, ethtool, CmdExitFailure +from lib.py import EthtoolFamily, NlError from lib.py import NetDrvEnv @@ -59,6 +59,95 @@ def _ethtool_get_cfg(cfg, fl_type, to_nl=False): return ret +def test_rxfh_nl_set_fail(cfg): + """ + Test error path of Netlink SET. + """ + _require_2qs(cfg) + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + with ksft_raises(NlError): + ethnl.rss_set({"header": {"dev-name": "lo"}, + "indir": None}) + + with ksft_raises(NlError): + ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "indir": [100000]}) + ntf = next(ethnl.poll_ntf(duration=0.2), None) + ksft_is(ntf, None) + + +def test_rxfh_nl_set_indir(cfg): + """ + Test setting indirection table via Netlink. + """ + qcnt = _require_2qs(cfg) + + # Test some SETs with a value + reset = defer(cfg.ethnl.rss_set, + {"header": {"dev-index": cfg.ifindex}, "indir": None}) + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "indir": [1]}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(set(rss.get("indir", [-1])), {1}) + + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "indir": [0, 1]}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(set(rss.get("indir", [-1])), {0, 1}) + + # Make sure we can't set the queue count below max queue used + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 0 rx 1") + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 1 rx 0") + + # Test reset back to default + reset.exec() + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(set(rss.get("indir", [-1])), set(range(qcnt))) + + +def test_rxfh_nl_set_indir_ctx(cfg): + """ + Test setting indirection table for a custom context via Netlink. + """ + _require_2qs(cfg) + + # Get setting for ctx 0, we'll make sure they don't get clobbered + dflt = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + + # Create context + ctx_id = _ethtool_create(cfg, "-X", "context new") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "context": ctx_id, "indir": [1]}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}, + "context": ctx_id}) + ksft_eq(set(rss.get("indir", [-1])), {1}) + + ctx0 = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(ctx0, dflt) + + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "context": ctx_id, "indir": [0, 1]}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}, + "context": ctx_id}) + ksft_eq(set(rss.get("indir", [-1])), {0, 1}) + + ctx0 = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(ctx0, dflt) + + # Make sure we can't set the queue count below max queue used + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 0 rx 1") + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 1 rx 0") + + def test_rxfh_indir_ntf(cfg): """ Check that Netlink notifications are generated when RSS indirection @@ -129,6 +218,7 @@ def main() -> None: """ Ksft boiler plate main """ with NetDrvEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, )) ksft_exit() -- cgit v1.2.3 From 169b26207a46a558588c9558da7b375dfcd61513 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 15 Jul 2025 17:03:27 -0700 Subject: selftests: drv-net: rss_api: test setting hashing key via Netlink Test setting hashing key via Netlink. # ./tools/testing/selftests/drivers/net/hw/rss_api.py TAP version 13 1..7 ok 1 rss_api.test_rxfh_nl_set_fail ok 2 rss_api.test_rxfh_nl_set_indir ok 3 rss_api.test_rxfh_nl_set_indir_ctx ok 4 rss_api.test_rxfh_indir_ntf ok 5 rss_api.test_rxfh_indir_ctx_ntf ok 6 rss_api.test_rxfh_nl_set_key ok 7 rss_api.test_rxfh_fields # Totals: pass:7 fail:0 xfail:0 xpass:0 skip:0 error:0 Link: https://patch.msgid.link/20250716000331.1378807-8-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/rss_api.py | 33 +++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/hw/rss_api.py b/tools/testing/selftests/drivers/net/hw/rss_api.py index 7353e8f3e1a4..6d48799a423c 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_api.py +++ b/tools/testing/selftests/drivers/net/hw/rss_api.py @@ -6,6 +6,7 @@ API level tests for RSS (mostly Netlink vs IOCTL). """ import glob +import random from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_is, ksft_ne, ksft_raises from lib.py import KsftSkipEx, KsftFailEx from lib.py import defer, ethtool, CmdExitFailure @@ -199,6 +200,38 @@ def test_rxfh_indir_ctx_ntf(cfg): ksft_eq(set(ntf["msg"]["indir"]), {1}) +def test_rxfh_nl_set_key(cfg): + """ + Test setting hashing key via Netlink. + """ + + dflt = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + defer(cfg.ethnl.rss_set, + {"header": {"dev-index": cfg.ifindex}, + "hkey": dflt["hkey"], "indir": None}) + + # Empty key should error out + with ksft_raises(NlError) as cm: + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "hkey": None}) + ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.hkey') + + # Set key to random + mod = random.randbytes(len(dflt["hkey"])) + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "hkey": mod}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(rss.get("hkey", [-1]), mod) + + # Set key to random and indir tbl to something at once + mod = random.randbytes(len(dflt["hkey"])) + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "indir": [0, 1], "hkey": mod}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(rss.get("hkey", [-1]), mod) + ksft_eq(set(rss.get("indir", [-1])), {0, 1}) + + def test_rxfh_fields(cfg): """ Test reading Rx Flow Hash over Netlink. -- cgit v1.2.3 From c1b27f0695d65e91878d6ae917c285d3163297e4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 15 Jul 2025 17:03:28 -0700 Subject: netlink: specs: define input-xfrm enum in the spec Help YNL decode the values for input-xfrm by defining the possible values in the spec. Don't define "no change" as it's an IOCTL artifact with no use in Netlink. With this change on mlx5 input-xfrm gets decoded: # ynl --family ethtool --dump rss-get [{'header': {'dev-index': 2, 'dev-name': 'eth0'}, 'hfunc': 1, 'hkey': b'V\xa8\xf9\x9 ...', 'indir': [0, 1, ... ], 'input-xfrm': {'sym-or-xor'}, <<< 'flow-hash': {'ah4': {'ip-dst', 'ip-src'}, 'ah6': {'ip-dst', 'ip-src'}, 'esp4': {'ip-dst', 'ip-src'}, 'esp6': {'ip-dst', 'ip-src'}, 'ip4': {'ip-dst', 'ip-src'}, 'ip6': {'ip-dst', 'ip-src'}, 'tcp4': {'l4-b-0-1', 'ip-dst', 'l4-b-2-3', 'ip-src'}, 'tcp6': {'l4-b-0-1', 'ip-dst', 'l4-b-2-3', 'ip-src'}, 'udp4': {'l4-b-0-1', 'ip-dst', 'l4-b-2-3', 'ip-src'}, 'udp6': {'l4-b-0-1', 'ip-dst', 'l4-b-2-3', 'ip-src'}} }] Reviewed-by: Gal Pressman Link: https://patch.msgid.link/20250716000331.1378807-9-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ethtool.yaml | 21 +++++++++++++++++++++ .../selftests/drivers/net/hw/rss_input_xfrm.py | 6 +++--- 2 files changed, 24 insertions(+), 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index aa55fc9068e1..d0a9c4120a19 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -158,6 +158,26 @@ definitions: - name: pse-event-sw-pw-control-error doc: PSE faced an error managing the power control from software + - + name: input-xfrm + doc: RSS hash function transformations. + type: flags + enum-name: + name-prefix: rxh-xfrm- + header: linux/ethtool.h + entries: + - + name: sym-xor + doc: >- + XOR the corresponding source and destination fields of each specified + protocol. Both copies of the XOR'ed fields are fed into the RSS and + RXHASH calculation. Note that this XORing reduces the input set + entropy and could be exploited to reduce the RSS queue spread. + - + name: sym-or-xor + doc: >- + Similar to SYM_XOR, except that one copy of the XOR'ed fields is + replaced by an OR of the same fields. - name: rxfh-fields name-prefix: rxh- @@ -1621,6 +1641,7 @@ attribute-sets: - name: input-xfrm type: u32 + enum: input-xfrm - name: start-context type: u32 diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py index 648ff50bc1c3..6e90fb290564 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py +++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py @@ -37,11 +37,11 @@ def test_rss_input_xfrm(cfg, ipver): if not hasattr(socket, "SO_INCOMING_CPU"): raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11") - input_xfrm = cfg.ethnl.rss_get( - {'header': {'dev-name': cfg.ifname}}).get('input-xfrm') + rss = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}}) + input_xfrm = set(filter(lambda x: 'sym' in x, rss.get('input-xfrm', {}))) # Check for symmetric xor/or-xor - if not input_xfrm or (input_xfrm != 1 and input_xfrm != 2): + if not input_xfrm: raise KsftSkipEx("Symmetric RSS hash not requested") cpus = set() -- cgit v1.2.3 From 00e6c61c5a0a8277e0cb3e1ec9fdaf79a5928819 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 15 Jul 2025 17:03:31 -0700 Subject: selftests: drv-net: rss_api: test input-xfrm and hash fields Test configuring input-xfrm and hash fields with all the limitations. Tested on mlx5 (CX6): # ./ksft-net-drv/drivers/net/hw/rss_api.py TAP version 13 1..10 ok 1 rss_api.test_rxfh_nl_set_fail ok 2 rss_api.test_rxfh_nl_set_indir ok 3 rss_api.test_rxfh_nl_set_indir_ctx ok 4 rss_api.test_rxfh_indir_ntf ok 5 rss_api.test_rxfh_indir_ctx_ntf ok 6 rss_api.test_rxfh_nl_set_key ok 7 rss_api.test_rxfh_fields ok 8 rss_api.test_rxfh_fields_set ok 9 rss_api.test_rxfh_fields_set_xfrm ok 10 rss_api.test_rxfh_fields_ntf # Totals: pass:10 fail:0 xfail:0 xpass:0 skip:0 error:0 Link: https://patch.msgid.link/20250716000331.1378807-12-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/rss_api.py | 143 ++++++++++++++++++++++ 1 file changed, 143 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/hw/rss_api.py b/tools/testing/selftests/drivers/net/hw/rss_api.py index 6d48799a423c..424743bb583b 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_api.py +++ b/tools/testing/selftests/drivers/net/hw/rss_api.py @@ -247,6 +247,149 @@ def test_rxfh_fields(cfg): comment="Config for " + fl_type) +def test_rxfh_fields_set(cfg): + """ Test configuring Rx Flow Hash over Netlink. """ + + flow_types = ["tcp4", "tcp6", "udp4", "udp6"] + + # Collect current settings + cfg_old = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + # symmetric hashing is config-order-sensitive make sure we leave + # symmetric mode, or make the flow-hash sym-compatible first + changes = [{"flow-hash": cfg_old["flow-hash"],}, + {"input-xfrm": cfg_old.get("input-xfrm", {}),}] + if cfg_old.get("input-xfrm"): + changes = list(reversed(changes)) + for old in changes: + defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},} | old) + + # symmetric hashing prevents some of the configs below + if cfg_old.get("input-xfrm"): + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "input-xfrm": {}}) + + for fl_type in flow_types: + cur = _ethtool_get_cfg(cfg, fl_type) + if cur == "sdfn": + change_nl = {"ip-src", "ip-dst"} + change_ic = "sd" + else: + change_nl = {"l4-b-0-1", "l4-b-2-3", "ip-src", "ip-dst"} + change_ic = "sdfn" + + cfg.ethnl.rss_set({ + "header": {"dev-index": cfg.ifindex}, + "flow-hash": {fl_type: change_nl} + }) + reset = defer(ethtool, f"--disable-netlink -N {cfg.ifname} " + f"rx-flow-hash {fl_type} {cur}") + + cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(change_nl, cfg_nl["flow-hash"][fl_type], + comment=f"Config for {fl_type} over Netlink") + cfg_ic = _ethtool_get_cfg(cfg, fl_type) + ksft_eq(change_ic, cfg_ic, + comment=f"Config for {fl_type} over IOCTL") + + reset.exec() + cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(cfg_old["flow-hash"][fl_type], cfg_nl["flow-hash"][fl_type], + comment=f"Un-config for {fl_type} over Netlink") + cfg_ic = _ethtool_get_cfg(cfg, fl_type) + ksft_eq(cur, cfg_ic, comment=f"Un-config for {fl_type} over IOCTL") + + # Try to set multiple at once, the defer was already installed at the start + change = {"ip-src"} + if change == cfg_old["flow-hash"]["tcp4"]: + change = {"ip-dst"} + cfg.ethnl.rss_set({ + "header": {"dev-index": cfg.ifindex}, + "flow-hash": {x: change for x in flow_types} + }) + + cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + for fl_type in flow_types: + ksft_eq(change, cfg_nl["flow-hash"][fl_type], + comment=f"multi-config for {fl_type} over Netlink") + + +def test_rxfh_fields_set_xfrm(cfg): + """ Test changing Rx Flow Hash vs xfrm_input at once. """ + + def set_rss(cfg, xfrm, fh): + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "input-xfrm": xfrm, "flow-hash": fh}) + + # Install the reset handler + cfg_old = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + # symmetric hashing is config-order-sensitive make sure we leave + # symmetric mode, or make the flow-hash sym-compatible first + changes = [{"flow-hash": cfg_old["flow-hash"],}, + {"input-xfrm": cfg_old.get("input-xfrm", {}),}] + if cfg_old.get("input-xfrm"): + changes = list(reversed(changes)) + for old in changes: + defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},} | old) + + # Make sure we start with input-xfrm off, and tcp4 config non-sym + set_rss(cfg, {}, {}) + set_rss(cfg, {}, {"tcp4": {"ip-src"}}) + + # Setting sym and fixing tcp4 config not expected to pass right now + with ksft_raises(NlError): + set_rss(cfg, {"sym-xor"}, {"tcp4": {"ip-src", "ip-dst"}}) + # One at a time should work, hopefully + set_rss(cfg, 0, {"tcp4": {"ip-src", "ip-dst"}}) + no_support = False + try: + set_rss(cfg, {"sym-xor"}, {}) + except NlError: + try: + set_rss(cfg, {"sym-or-xor"}, {}) + except NlError: + no_support = True + if no_support: + raise KsftSkipEx("no input-xfrm supported") + # Disabling two at once should not work either without kernel changes + with ksft_raises(NlError): + set_rss(cfg, {}, {"tcp4": {"ip-src"}}) + + +def test_rxfh_fields_ntf(cfg): + """ Test Rx Flow Hash notifications. """ + + cur = _ethtool_get_cfg(cfg, "tcp4") + if cur == "sdfn": + change = {"ip-src", "ip-dst"} + else: + change = {"l4-b-0-1", "l4-b-2-3", "ip-src", "ip-dst"} + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + ethnl.rss_set({ + "header": {"dev-index": cfg.ifindex}, + "flow-hash": {"tcp4": change} + }) + reset = defer(ethtool, + f"--disable-netlink -N {cfg.ifname} rx-flow-hash tcp4 {cur}") + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received after IOCTL change") + ksft_eq(ntf["name"], "rss-ntf") + ksft_eq(ntf["msg"]["flow-hash"]["tcp4"], change) + ksft_eq(next(ethnl.poll_ntf(duration=0.01), None), None) + + reset.exec() + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received after Netlink change") + ksft_eq(ntf["name"], "rss-ntf") + ksft_ne(ntf["msg"]["flow-hash"]["tcp4"], change) + ksft_eq(next(ethnl.poll_ntf(duration=0.01), None), None) + + def main() -> None: """ Ksft boiler plate main """ -- cgit v1.2.3 From 797f080c463d9866ca8a4bcc8cf0f512dec634e6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 16 Jul 2025 13:57:12 -0700 Subject: selftests: net: prevent Python from buffering the output Make sure Python doesn't buffer the output, otherwise for some tests we may see false positive timeouts in NIPA. NIPA thinks that a machine has hung if the test doesn't print anything for 3min. This is also nice to heave for running the tests manually, especially in vng. Reviewed-by: Petr Machata Link: https://patch.msgid.link/20250716205712.1787325-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib/py/ksft.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index 61287c203b6e..8e35ed12ed9e 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -32,6 +32,7 @@ class KsftTerminate(KeyboardInterrupt): def ksft_pr(*objs, **kwargs): + kwargs["flush"] = True print("#", *objs, **kwargs) @@ -139,7 +140,7 @@ def ktap_result(ok, cnt=1, case="", comment=""): res += "." + str(case.__name__) if comment: res += " # " + comment - print(res) + print(res, flush=True) def ksft_flush_defer(): @@ -227,8 +228,8 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0} - print("TAP version 13") - print("1.." + str(len(cases))) + print("TAP version 13", flush=True) + print("1.." + str(len(cases)), flush=True) global KSFT_RESULT cnt = 0 -- cgit v1.2.3 From b6645645d0d0b6c5cb33cf58c9de5e74b6701326 Mon Sep 17 00:00:00 2001 From: Tianyi Cui <1997cui@gmail.com> Date: Wed, 16 Jul 2025 18:19:13 -0700 Subject: selftests/drivers/net: Support ipv6 for napi_id test Add support for IPv6 environment for napi_id test. Test Plan: ./run_kselftest.sh -t drivers/net:napi_id.py TAP version 13 1..1 # timeout set to 45 # selftests: drivers/net: napi_id.py # TAP version 13 # 1..1 # ok 1 napi_id.test_napi_id # # Totals: pass:1 fail:0 xfail:0 xpass:0 skip:0 error:0 ok 1 selftests: drivers/net: napi_id.py Signed-off-by: Tianyi Cui <1997cui@gmail.com> Link: https://patch.msgid.link/20250717011913.1248816-1-1997cui@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/napi_id.py | 4 +-- .../testing/selftests/drivers/net/napi_id_helper.c | 35 ++++++++++++++++------ 2 files changed, 28 insertions(+), 11 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/napi_id.py b/tools/testing/selftests/drivers/net/napi_id.py index 356bac46ba04..d05eddcad539 100755 --- a/tools/testing/selftests/drivers/net/napi_id.py +++ b/tools/testing/selftests/drivers/net/napi_id.py @@ -7,10 +7,10 @@ from lib.py import bkg, cmd, rand_port, NetNSEnter def test_napi_id(cfg) -> None: port = rand_port() - listen_cmd = f"{cfg.test_dir}/napi_id_helper {cfg.addr_v['4']} {port}" + listen_cmd = f"{cfg.test_dir}/napi_id_helper {cfg.addr} {port}" with bkg(listen_cmd, ksft_wait=3) as server: - cmd(f"echo a | socat - TCP:{cfg.addr_v['4']}:{port}", host=cfg.remote, shell=True) + cmd(f"echo a | socat - TCP:{cfg.baddr}:{port}", host=cfg.remote, shell=True) ksft_eq(0, server.ret) diff --git a/tools/testing/selftests/drivers/net/napi_id_helper.c b/tools/testing/selftests/drivers/net/napi_id_helper.c index eecd610c2109..7f49ca6c8637 100644 --- a/tools/testing/selftests/drivers/net/napi_id_helper.c +++ b/tools/testing/selftests/drivers/net/napi_id_helper.c @@ -7,41 +7,58 @@ #include #include #include +#include #include "../../net/lib/ksft.h" int main(int argc, char *argv[]) { - struct sockaddr_in address; + struct sockaddr_storage address; + struct addrinfo *result; + struct addrinfo hints; unsigned int napi_id; - unsigned int port; + socklen_t addr_len; socklen_t optlen; char buf[1024]; int opt = 1; + int family; int server; int client; int ret; - server = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_flags = AI_PASSIVE; + + ret = getaddrinfo(argv[1], argv[2], &hints, &result); + if (ret != 0) { + fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(ret)); + return 1; + } + + family = result->ai_family; + addr_len = result->ai_addrlen; + + server = socket(family, SOCK_STREAM, IPPROTO_TCP); if (server < 0) { perror("socket creation failed"); + freeaddrinfo(result); if (errno == EAFNOSUPPORT) return -1; return 1; } - port = atoi(argv[2]); - if (setsockopt(server, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt))) { perror("setsockopt"); + freeaddrinfo(result); return 1; } - address.sin_family = AF_INET; - inet_pton(AF_INET, argv[1], &address.sin_addr); - address.sin_port = htons(port); + memcpy(&address, result->ai_addr, result->ai_addrlen); + freeaddrinfo(result); - if (bind(server, (struct sockaddr *)&address, sizeof(address)) < 0) { + if (bind(server, (struct sockaddr *)&address, addr_len) < 0) { perror("bind failed"); return 1; } -- cgit v1.2.3 From 5d8b1d957def5358113aa39b299c084836893b73 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 17 Jul 2025 14:59:02 -0300 Subject: iommufd/selftest: Test reserved regions near ULONG_MAX This has triggered an overflow inside the ioas iova auto allocation logic, test it directly. Use the same stimulus syzkaller found. Link: https://patch.msgid.link/all/2-v1-7b4a16fc390b+10f4-iommufd_alloc_overflow_jgg@nvidia.com/ Tested-by: Yi Liu Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- tools/testing/selftests/iommu/iommufd.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index d59d48022a24..9d6c6fbbebb4 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -968,6 +968,33 @@ TEST_F(iommufd_ioas, area_auto_iova) test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE * (i + 1)); } +/* https://lore.kernel.org/r/685af644.a00a0220.2e5631.0094.GAE@google.com */ +TEST_F(iommufd_ioas, reserved_overflow) +{ + struct iommu_test_cmd test_cmd = { + .size = sizeof(test_cmd), + .op = IOMMU_TEST_OP_ADD_RESERVED, + .id = self->ioas_id, + .add_reserved.start = 6, + }; + unsigned int map_len; + __u64 iova; + + if (PAGE_SIZE == 4096) { + test_cmd.add_reserved.length = 0xffffffffffff8001; + map_len = 0x5000; + } else { + test_cmd.add_reserved.length = + 0xffffffffffffffff - MOCK_PAGE_SIZE * 16; + map_len = MOCK_PAGE_SIZE * 10; + } + + ASSERT_EQ(0, + ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED), + &test_cmd)); + test_err_ioctl_ioas_map(ENOSPC, buffer, map_len, &iova); +} + TEST_F(iommufd_ioas, area_allowed) { struct iommu_test_cmd test_cmd = { -- cgit v1.2.3 From c4e496d413686cbd717139cfd8c58f467eff9e08 Mon Sep 17 00:00:00 2001 From: Xu Yilun Date: Wed, 16 Jul 2025 15:03:47 +0800 Subject: iommufd/selftest: Explicitly skip tests for inapplicable variant no_viommu is not applicable for some viommu/vdevice tests. Explicitly report the skipping, don't do it silently. Opportunistically adjust the line wrappings after the indentation changes using git clang-format. Only add the prints. No functional change intended. Link: https://patch.msgid.link/r/20250716070349.1807226-7-yilun.xu@linux.intel.com Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Nicolin Chen Tested-by: Nicolin Chen Signed-off-by: Xu Yilun Signed-off-by: Jason Gunthorpe --- tools/testing/selftests/iommu/iommufd.c | 363 ++++++++++++++++---------------- 1 file changed, 176 insertions(+), 187 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 9d6c6fbbebb4..d16fd9928b31 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -2806,35 +2806,32 @@ TEST_F(iommufd_viommu, viommu_alloc_nested_iopf) uint32_t fault_fd; uint32_t vdev_id; - if (self->device_id) { - test_ioctl_fault_alloc(&fault_id, &fault_fd); - test_err_hwpt_alloc_iopf( - ENOENT, dev_id, viommu_id, UINT32_MAX, - IOMMU_HWPT_FAULT_ID_VALID, &iopf_hwpt_id, - IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data)); - test_err_hwpt_alloc_iopf( - EOPNOTSUPP, dev_id, viommu_id, fault_id, - IOMMU_HWPT_FAULT_ID_VALID | (1 << 31), &iopf_hwpt_id, - IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data)); - test_cmd_hwpt_alloc_iopf( - dev_id, viommu_id, fault_id, IOMMU_HWPT_FAULT_ID_VALID, - &iopf_hwpt_id, IOMMU_HWPT_DATA_SELFTEST, &data, - sizeof(data)); + if (!dev_id) + SKIP(return, "Skipping test for variant no_viommu"); - /* Must allocate vdevice before attaching to a nested hwpt */ - test_err_mock_domain_replace(ENOENT, self->stdev_id, - iopf_hwpt_id); - test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id); - test_cmd_mock_domain_replace(self->stdev_id, iopf_hwpt_id); - EXPECT_ERRNO(EBUSY, - _test_ioctl_destroy(self->fd, iopf_hwpt_id)); - test_cmd_trigger_iopf(dev_id, fault_fd); + test_ioctl_fault_alloc(&fault_id, &fault_fd); + test_err_hwpt_alloc_iopf(ENOENT, dev_id, viommu_id, UINT32_MAX, + IOMMU_HWPT_FAULT_ID_VALID, &iopf_hwpt_id, + IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data)); + test_err_hwpt_alloc_iopf(EOPNOTSUPP, dev_id, viommu_id, fault_id, + IOMMU_HWPT_FAULT_ID_VALID | (1 << 31), + &iopf_hwpt_id, IOMMU_HWPT_DATA_SELFTEST, &data, + sizeof(data)); + test_cmd_hwpt_alloc_iopf(dev_id, viommu_id, fault_id, + IOMMU_HWPT_FAULT_ID_VALID, &iopf_hwpt_id, + IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data)); - test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id); - test_ioctl_destroy(iopf_hwpt_id); - close(fault_fd); - test_ioctl_destroy(fault_id); - } + /* Must allocate vdevice before attaching to a nested hwpt */ + test_err_mock_domain_replace(ENOENT, self->stdev_id, iopf_hwpt_id); + test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id); + test_cmd_mock_domain_replace(self->stdev_id, iopf_hwpt_id); + EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, iopf_hwpt_id)); + test_cmd_trigger_iopf(dev_id, fault_fd); + + test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id); + test_ioctl_destroy(iopf_hwpt_id); + close(fault_fd); + test_ioctl_destroy(fault_id); } TEST_F(iommufd_viommu, viommu_alloc_with_data) @@ -2929,169 +2926,161 @@ TEST_F(iommufd_viommu, vdevice_cache) uint32_t vdev_id = 0; uint32_t num_inv; - if (dev_id) { - test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id); - - test_cmd_dev_check_cache_all(dev_id, - IOMMU_TEST_DEV_CACHE_DEFAULT); - - /* Check data_type by passing zero-length array */ - num_inv = 0; - test_cmd_viommu_invalidate(viommu_id, inv_reqs, - sizeof(*inv_reqs), &num_inv); - assert(!num_inv); - - /* Negative test: Invalid data_type */ - num_inv = 1; - test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, - IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST_INVALID, - sizeof(*inv_reqs), &num_inv); - assert(!num_inv); - - /* Negative test: structure size sanity */ - num_inv = 1; - test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, - IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, - sizeof(*inv_reqs) + 1, &num_inv); - assert(!num_inv); - - num_inv = 1; - test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, - IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, - 1, &num_inv); - assert(!num_inv); - - /* Negative test: invalid flag is passed */ - num_inv = 1; - inv_reqs[0].flags = 0xffffffff; - inv_reqs[0].vdev_id = 0x99; - test_err_viommu_invalidate(EOPNOTSUPP, viommu_id, inv_reqs, - IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, - sizeof(*inv_reqs), &num_inv); - assert(!num_inv); - - /* Negative test: invalid data_uptr when array is not empty */ - num_inv = 1; - inv_reqs[0].flags = 0; - inv_reqs[0].vdev_id = 0x99; - test_err_viommu_invalidate(EINVAL, viommu_id, NULL, - IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, - sizeof(*inv_reqs), &num_inv); - assert(!num_inv); - - /* Negative test: invalid entry_len when array is not empty */ - num_inv = 1; - inv_reqs[0].flags = 0; - inv_reqs[0].vdev_id = 0x99; - test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, - IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, - 0, &num_inv); - assert(!num_inv); - - /* Negative test: invalid cache_id */ - num_inv = 1; - inv_reqs[0].flags = 0; - inv_reqs[0].vdev_id = 0x99; - inv_reqs[0].cache_id = MOCK_DEV_CACHE_ID_MAX + 1; - test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, - IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, - sizeof(*inv_reqs), &num_inv); - assert(!num_inv); + if (!dev_id) + SKIP(return, "Skipping test for variant no_viommu"); - /* Negative test: invalid vdev_id */ - num_inv = 1; - inv_reqs[0].flags = 0; - inv_reqs[0].vdev_id = 0x9; - inv_reqs[0].cache_id = 0; - test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, - IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, - sizeof(*inv_reqs), &num_inv); - assert(!num_inv); + test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id); + + test_cmd_dev_check_cache_all(dev_id, IOMMU_TEST_DEV_CACHE_DEFAULT); + + /* Check data_type by passing zero-length array */ + num_inv = 0; + test_cmd_viommu_invalidate(viommu_id, inv_reqs, sizeof(*inv_reqs), + &num_inv); + assert(!num_inv); + + /* Negative test: Invalid data_type */ + num_inv = 1; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST_INVALID, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: structure size sanity */ + num_inv = 1; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs) + 1, &num_inv); + assert(!num_inv); + + num_inv = 1; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, 1, + &num_inv); + assert(!num_inv); + + /* Negative test: invalid flag is passed */ + num_inv = 1; + inv_reqs[0].flags = 0xffffffff; + inv_reqs[0].vdev_id = 0x99; + test_err_viommu_invalidate(EOPNOTSUPP, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: invalid data_uptr when array is not empty */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + test_err_viommu_invalidate(EINVAL, viommu_id, NULL, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: invalid entry_len when array is not empty */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, 0, + &num_inv); + assert(!num_inv); + + /* Negative test: invalid cache_id */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].cache_id = MOCK_DEV_CACHE_ID_MAX + 1; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: invalid vdev_id */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x9; + inv_reqs[0].cache_id = 0; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); - /* - * Invalidate the 1st cache entry but fail the 2nd request - * due to invalid flags configuration in the 2nd request. - */ - num_inv = 2; - inv_reqs[0].flags = 0; - inv_reqs[0].vdev_id = 0x99; - inv_reqs[0].cache_id = 0; - inv_reqs[1].flags = 0xffffffff; - inv_reqs[1].vdev_id = 0x99; - inv_reqs[1].cache_id = 1; - test_err_viommu_invalidate(EOPNOTSUPP, viommu_id, inv_reqs, - IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, - sizeof(*inv_reqs), &num_inv); - assert(num_inv == 1); - test_cmd_dev_check_cache(dev_id, 0, 0); - test_cmd_dev_check_cache(dev_id, 1, - IOMMU_TEST_DEV_CACHE_DEFAULT); - test_cmd_dev_check_cache(dev_id, 2, - IOMMU_TEST_DEV_CACHE_DEFAULT); - test_cmd_dev_check_cache(dev_id, 3, - IOMMU_TEST_DEV_CACHE_DEFAULT); - - /* - * Invalidate the 1st cache entry but fail the 2nd request - * due to invalid cache_id configuration in the 2nd request. - */ - num_inv = 2; - inv_reqs[0].flags = 0; - inv_reqs[0].vdev_id = 0x99; - inv_reqs[0].cache_id = 0; - inv_reqs[1].flags = 0; - inv_reqs[1].vdev_id = 0x99; - inv_reqs[1].cache_id = MOCK_DEV_CACHE_ID_MAX + 1; - test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, - IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, - sizeof(*inv_reqs), &num_inv); - assert(num_inv == 1); - test_cmd_dev_check_cache(dev_id, 0, 0); - test_cmd_dev_check_cache(dev_id, 1, - IOMMU_TEST_DEV_CACHE_DEFAULT); - test_cmd_dev_check_cache(dev_id, 2, - IOMMU_TEST_DEV_CACHE_DEFAULT); - test_cmd_dev_check_cache(dev_id, 3, - IOMMU_TEST_DEV_CACHE_DEFAULT); - - /* Invalidate the 2nd cache entry and verify */ - num_inv = 1; - inv_reqs[0].flags = 0; - inv_reqs[0].vdev_id = 0x99; - inv_reqs[0].cache_id = 1; - test_cmd_viommu_invalidate(viommu_id, inv_reqs, - sizeof(*inv_reqs), &num_inv); - assert(num_inv == 1); - test_cmd_dev_check_cache(dev_id, 0, 0); - test_cmd_dev_check_cache(dev_id, 1, 0); - test_cmd_dev_check_cache(dev_id, 2, - IOMMU_TEST_DEV_CACHE_DEFAULT); - test_cmd_dev_check_cache(dev_id, 3, - IOMMU_TEST_DEV_CACHE_DEFAULT); - - /* Invalidate the 3rd and 4th cache entries and verify */ - num_inv = 2; - inv_reqs[0].flags = 0; - inv_reqs[0].vdev_id = 0x99; - inv_reqs[0].cache_id = 2; - inv_reqs[1].flags = 0; - inv_reqs[1].vdev_id = 0x99; - inv_reqs[1].cache_id = 3; - test_cmd_viommu_invalidate(viommu_id, inv_reqs, - sizeof(*inv_reqs), &num_inv); - assert(num_inv == 2); - test_cmd_dev_check_cache_all(dev_id, 0); + /* + * Invalidate the 1st cache entry but fail the 2nd request + * due to invalid flags configuration in the 2nd request. + */ + num_inv = 2; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].cache_id = 0; + inv_reqs[1].flags = 0xffffffff; + inv_reqs[1].vdev_id = 0x99; + inv_reqs[1].cache_id = 1; + test_err_viommu_invalidate(EOPNOTSUPP, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 1); + test_cmd_dev_check_cache(dev_id, 0, 0); + test_cmd_dev_check_cache(dev_id, 1, IOMMU_TEST_DEV_CACHE_DEFAULT); + test_cmd_dev_check_cache(dev_id, 2, IOMMU_TEST_DEV_CACHE_DEFAULT); + test_cmd_dev_check_cache(dev_id, 3, IOMMU_TEST_DEV_CACHE_DEFAULT); - /* Invalidate all cache entries for nested_dev_id[1] and verify */ - num_inv = 1; - inv_reqs[0].vdev_id = 0x99; - inv_reqs[0].flags = IOMMU_TEST_INVALIDATE_FLAG_ALL; - test_cmd_viommu_invalidate(viommu_id, inv_reqs, - sizeof(*inv_reqs), &num_inv); - assert(num_inv == 1); - test_cmd_dev_check_cache_all(dev_id, 0); - test_ioctl_destroy(vdev_id); - } + /* + * Invalidate the 1st cache entry but fail the 2nd request + * due to invalid cache_id configuration in the 2nd request. + */ + num_inv = 2; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].cache_id = 0; + inv_reqs[1].flags = 0; + inv_reqs[1].vdev_id = 0x99; + inv_reqs[1].cache_id = MOCK_DEV_CACHE_ID_MAX + 1; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 1); + test_cmd_dev_check_cache(dev_id, 0, 0); + test_cmd_dev_check_cache(dev_id, 1, IOMMU_TEST_DEV_CACHE_DEFAULT); + test_cmd_dev_check_cache(dev_id, 2, IOMMU_TEST_DEV_CACHE_DEFAULT); + test_cmd_dev_check_cache(dev_id, 3, IOMMU_TEST_DEV_CACHE_DEFAULT); + + /* Invalidate the 2nd cache entry and verify */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].cache_id = 1; + test_cmd_viommu_invalidate(viommu_id, inv_reqs, sizeof(*inv_reqs), + &num_inv); + assert(num_inv == 1); + test_cmd_dev_check_cache(dev_id, 0, 0); + test_cmd_dev_check_cache(dev_id, 1, 0); + test_cmd_dev_check_cache(dev_id, 2, IOMMU_TEST_DEV_CACHE_DEFAULT); + test_cmd_dev_check_cache(dev_id, 3, IOMMU_TEST_DEV_CACHE_DEFAULT); + + /* Invalidate the 3rd and 4th cache entries and verify */ + num_inv = 2; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].cache_id = 2; + inv_reqs[1].flags = 0; + inv_reqs[1].vdev_id = 0x99; + inv_reqs[1].cache_id = 3; + test_cmd_viommu_invalidate(viommu_id, inv_reqs, sizeof(*inv_reqs), + &num_inv); + assert(num_inv == 2); + test_cmd_dev_check_cache_all(dev_id, 0); + + /* Invalidate all cache entries for nested_dev_id[1] and verify */ + num_inv = 1; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].flags = IOMMU_TEST_INVALIDATE_FLAG_ALL; + test_cmd_viommu_invalidate(viommu_id, inv_reqs, sizeof(*inv_reqs), + &num_inv); + assert(num_inv == 1); + test_cmd_dev_check_cache_all(dev_id, 0); + test_ioctl_destroy(vdev_id); } TEST_F(iommufd_viommu, hw_queue) -- cgit v1.2.3 From 39a369c34152e1b76895fda37aa586dfb9b2cf38 Mon Sep 17 00:00:00 2001 From: Xu Yilun Date: Wed, 16 Jul 2025 15:03:48 +0800 Subject: iommufd/selftest: Add coverage for vdevice tombstone This tests the flow to tombstone vdevice when idevice is to be unbound before vdevice destruction. The expected results of the tombstone are: - The vdevice ID can't be reused anymore (not tested in this patch). - Even ioctl(IOMMU_DESTROY) can't free the vdevice ID. - iommufd_fops_release() can still free everything. Link: https://patch.msgid.link/r/20250716070349.1807226-8-yilun.xu@linux.intel.com Reviewed-by: Nicolin Chen Tested-by: Nicolin Chen Signed-off-by: Xu Yilun Signed-off-by: Jason Gunthorpe --- tools/testing/selftests/iommu/iommufd.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index d16fd9928b31..4de9ce066692 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -3142,6 +3142,20 @@ TEST_F(iommufd_viommu, hw_queue) test_ioctl_ioas_unmap(iova, PAGE_SIZE); } +TEST_F(iommufd_viommu, vdevice_tombstone) +{ + uint32_t viommu_id = self->viommu_id; + uint32_t dev_id = self->device_id; + uint32_t vdev_id = 0; + + if (!dev_id) + SKIP(return, "Skipping test for variant no_viommu"); + + test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id); + test_ioctl_destroy(self->stdev_id); + EXPECT_ERRNO(ENOENT, _test_ioctl_destroy(self->fd, vdev_id)); +} + FIXTURE(iommufd_device_pasid) { int fd; -- cgit v1.2.3 From 25250f40e2a9cade7ef294af8bee0b2bd0afca2d Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 17 Jul 2025 15:51:51 +0300 Subject: selftests: rtnetlink: Add operational state test Virtual devices (e.g., VXLAN) that do not have a notion of a carrier are created with an "UNKNOWN" operational state which some users find confusing [1]. It is possible to set the operational state from user space either during device creation or afterwards and some applications will start doing that in order to avoid the above problem. Add a test for this functionality to ensure it does not regress. [1] https://lore.kernel.org/netdev/20241119153703.71f97b76@hermes.local/ Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250717125151.466882-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/rtnetlink.sh | 34 ++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 49141254065c..441b17947230 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -30,6 +30,7 @@ ALL_TESTS=" kci_test_address_proto kci_test_enslave_bonding kci_test_mngtmpaddr + kci_test_operstate " devdummy="test-dummy0" @@ -1344,6 +1345,39 @@ kci_test_mngtmpaddr() return $ret } +kci_test_operstate() +{ + local ret=0 + + # Check that it is possible to set operational state during device + # creation and that it is preserved when the administrative state of + # the device is toggled. + run_cmd ip link add name vx0 up state up type vxlan id 10010 dstport 4789 + run_cmd_grep "state UP" ip link show dev vx0 + run_cmd ip link set dev vx0 down + run_cmd_grep "state DOWN" ip link show dev vx0 + run_cmd ip link set dev vx0 up + run_cmd_grep "state UP" ip link show dev vx0 + + run_cmd ip link del dev vx0 + + # Check that it is possible to set the operational state of the device + # after creation. + run_cmd ip link add name vx0 up type vxlan id 10010 dstport 4789 + run_cmd_grep "state UNKNOWN" ip link show dev vx0 + run_cmd ip link set dev vx0 state up + run_cmd_grep "state UP" ip link show dev vx0 + + run_cmd ip link del dev vx0 + + if [ "$ret" -ne 0 ]; then + end_test "FAIL: operstate" + return 1 + fi + + end_test "PASS: operstate" +} + kci_test_rtnl() { local current_test -- cgit v1.2.3 From f73858d5ef93cb880aca64d826b4e3ca8b55365a Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Mon, 7 Jul 2025 12:33:20 +0500 Subject: selftests/mm: pagemap_scan ioctl: add PFN ZERO test cases Add test cases to test the correctness of PFN ZERO flag of pagemap_scan ioctl. Test with normal pages backed memory and huge pages backed memory. Link: https://lkml.kernel.org/r/20250707073321.106431-1-usama.anjum@collabora.com Signed-off-by: Muhammad Usama Anjum Cc: David Hildenbrand Cc: Muhammad Usama Anjum Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/cow.c | 27 +---------- tools/testing/selftests/mm/pagemap_ioctl.c | 72 ++++++++++++++++++++++++++++-- tools/testing/selftests/mm/vm_util.c | 23 ++++++++++ tools/testing/selftests/mm/vm_util.h | 2 + 4 files changed, 95 insertions(+), 29 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index 29767e2afdd0..788e82b00b75 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -72,31 +72,6 @@ static int detect_thp_sizes(size_t sizes[], int max) return count; } -static void detect_huge_zeropage(void) -{ - int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page", - O_RDONLY); - size_t enabled = 0; - char buf[15]; - int ret; - - if (fd < 0) - return; - - ret = pread(fd, buf, sizeof(buf), 0); - if (ret > 0 && ret < sizeof(buf)) { - buf[ret] = 0; - - enabled = strtoul(buf, NULL, 10); - if (enabled == 1) { - has_huge_zeropage = true; - ksft_print_msg("[INFO] huge zeropage is enabled\n"); - } - } - - close(fd); -} - static bool range_is_swapped(void *addr, size_t size) { for (; size; addr += pagesize, size -= pagesize) @@ -1903,7 +1878,7 @@ int main(int argc, char **argv) } nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, ARRAY_SIZE(hugetlbsizes)); - detect_huge_zeropage(); + has_huge_zeropage = detect_huge_zeropage(); ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() + ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() + diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index b07acc86f4f0..c2dcda78ad31 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 + #define _GNU_SOURCE #include #include @@ -34,8 +35,8 @@ #define PAGEMAP "/proc/self/pagemap" int pagemap_fd; int uffd; -unsigned long page_size; -unsigned int hpage_size; +size_t page_size; +size_t hpage_size; const char *progname; #define LEN(region) ((region.end - region.start)/page_size) @@ -1480,6 +1481,68 @@ static void transact_test(int page_size) extra_thread_faults); } +void zeropfn_tests(void) +{ + unsigned long long mem_size; + struct page_region vec; + int i, ret; + char *mmap_mem, *mem; + + /* Test with normal memory */ + mem_size = 10 * page_size; + mem = mmap(NULL, mem_size, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + + /* Touch each page to ensure it's mapped */ + for (i = 0; i < mem_size; i += page_size) + (void)((volatile char *)mem)[i]; + + ret = pagemap_ioctl(mem, mem_size, &vec, 1, 0, + (mem_size / page_size), PAGE_IS_PFNZERO, 0, 0, PAGE_IS_PFNZERO); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec) == (mem_size / page_size), + "%s all pages must have PFNZERO set\n", __func__); + + munmap(mem, mem_size); + + /* Test with huge page if user_zero_page is set to 1 */ + if (!detect_huge_zeropage()) { + ksft_test_result_skip("%s use_zero_page not supported or set to 1\n", __func__); + return; + } + + mem_size = 2 * hpage_size; + mmap_mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mmap_mem == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + + /* We need a THP-aligned memory area. */ + mem = (char *)(((uintptr_t)mmap_mem + hpage_size) & ~(hpage_size - 1)); + + ret = madvise(mem, hpage_size, MADV_HUGEPAGE); + if (!ret) { + char tmp = *mem; + + asm volatile("" : "+r" (tmp)); + + ret = pagemap_ioctl(mem, hpage_size, &vec, 1, 0, + 0, PAGE_IS_PFNZERO, 0, 0, PAGE_IS_PFNZERO); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec) == (hpage_size / page_size), + "%s all huge pages must have PFNZERO set\n", __func__); + } else { + ksft_test_result_skip("%s huge page not supported\n", __func__); + } + + munmap(mmap_mem, mem_size); +} + int main(int __attribute__((unused)) argc, char *argv[]) { int shmid, buf_size, fd, i, ret; @@ -1494,7 +1557,7 @@ int main(int __attribute__((unused)) argc, char *argv[]) if (init_uffd()) ksft_exit_pass(); - ksft_set_plan(115); + ksft_set_plan(117); page_size = getpagesize(); hpage_size = read_pmd_pagesize(); @@ -1669,6 +1732,9 @@ int main(int __attribute__((unused)) argc, char *argv[]) /* 16. Userfaultfd tests */ userfaultfd_tests(); + /* 17. ZEROPFN tests */ + zeropfn_tests(); + close(pagemap_fd); ksft_exit_pass(); } diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c index 1d434772fa54..9dafa7669ef9 100644 --- a/tools/testing/selftests/mm/vm_util.c +++ b/tools/testing/selftests/mm/vm_util.c @@ -532,3 +532,26 @@ void *sys_mremap(void *old_address, unsigned long old_size, old_size, new_size, flags, (unsigned long)new_address); } + +bool detect_huge_zeropage(void) +{ + int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page", + O_RDONLY); + bool enabled = 0; + char buf[15]; + int ret; + + if (fd < 0) + return 0; + + ret = pread(fd, buf, sizeof(buf), 0); + if (ret > 0 && ret < sizeof(buf)) { + buf[ret] = 0; + + if (strtoul(buf, NULL, 10) == 1) + enabled = 1; + } + + close(fd); + return enabled; +} diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index 797c24215b17..2b154c287591 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -44,6 +44,8 @@ static inline unsigned int pshift(void) return __page_shift; } +bool detect_huge_zeropage(void); + /* * Plan 9 FS has bugs (at least on QEMU) where certain operations fail with * ENOENT on unlinked files. See -- cgit v1.2.3 From 813b46808822db6838c43e92ba21ce013d23fcdc Mon Sep 17 00:00:00 2001 From: WangYuli Date: Thu, 10 Jul 2025 21:04:12 +0800 Subject: selftests/thermal: remove duplicate sprintf() call in workload_hint_test Remove redundant sprintf() call that was duplicating the same operation of formatting delay_str with argv[1]. Link: https://lkml.kernel.org/r/6338CD0E839B770B+20250710130412.284531-1-wangyuli@uniontech.com Signed-off-by: WangYuli Cc: Guan Wentao Cc: Shuah Khan Signed-off-by: Andrew Morton --- .../testing/selftests/thermal/intel/workload_hint/workload_hint_test.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c index a40097232967..bda006af8b1b 100644 --- a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c +++ b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c @@ -67,8 +67,6 @@ int main(int argc, char **argv) if (delay < 0) exit(1); - sprintf(delay_str, "%s\n", argv[1]); - sprintf(delay_str, "%s\n", argv[1]); fd = open(WORKLOAD_NOTIFICATION_DELAY_ATTRIBUTE, O_RDWR); if (fd < 0) { -- cgit v1.2.3 From 599579e857ab8d8f97409f631090e08018f8343b Mon Sep 17 00:00:00 2001 From: WangYuli Date: Thu, 10 Jul 2025 21:47:51 +0800 Subject: selftests/thermal: remove duplicate newlines in perror calls perror() automatically appends a newline character, so the explicit '\n' in the format strings is redundant and results in duplicate newlines in the output. Remove the redundant '\n' characters from perror() calls in workload_hint_test.c to fix the formatting. Link: https://lkml.kernel.org/r/F482FB1EC020000C+20250710134751.306096-1-wangyuli@uniontech.com Signed-off-by: WangYuli Cc: Guan Wentao Cc: Shuah Khan Signed-off-by: Andrew Morton --- .../thermal/intel/workload_hint/workload_hint_test.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c index bda006af8b1b..ba58589a1145 100644 --- a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c +++ b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c @@ -32,12 +32,12 @@ void workload_hint_exit(int signum) fd = open(WORKLOAD_ENABLE_ATTRIBUTE, O_RDWR); if (fd < 0) { - perror("Unable to open workload type feature enable file\n"); + perror("Unable to open workload type feature enable file"); exit(1); } if (write(fd, "0\n", 2) < 0) { - perror("Can't disable workload hints\n"); + perror("Can't disable workload hints"); exit(1); } @@ -70,12 +70,12 @@ int main(int argc, char **argv) sprintf(delay_str, "%s\n", argv[1]); fd = open(WORKLOAD_NOTIFICATION_DELAY_ATTRIBUTE, O_RDWR); if (fd < 0) { - perror("Unable to open workload notification delay\n"); + perror("Unable to open workload notification delay"); exit(1); } if (write(fd, delay_str, strlen(delay_str)) < 0) { - perror("Can't set delay\n"); + perror("Can't set delay"); exit(1); } @@ -92,12 +92,12 @@ int main(int argc, char **argv) /* Enable feature via sysfs knob */ fd = open(WORKLOAD_ENABLE_ATTRIBUTE, O_RDWR); if (fd < 0) { - perror("Unable to open workload type feature enable file\n"); + perror("Unable to open workload type feature enable file"); exit(1); } if (write(fd, "1\n", 2) < 0) { - perror("Can't enable workload hints\n"); + perror("Can't enable workload hints"); exit(1); } @@ -108,7 +108,7 @@ int main(int argc, char **argv) while (1) { fd = open(WORKLOAD_TYPE_INDEX_ATTRIBUTE, O_RDONLY); if (fd < 0) { - perror("Unable to open workload type file\n"); + perror("Unable to open workload type file"); exit(1); } -- cgit v1.2.3 From 7563fcbfd484b347b776aeed4d7dac78b30884aa Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Tue, 8 Jul 2025 21:27:59 -0400 Subject: selftests/mm: fix split_huge_page_test for folio_split() tests PID_FMT does not have an offset field, so folio_split() tests are not performed. Add PID_FMT_OFFSET with an offset field and use it to perform folio_split() tests. Link: https://lkml.kernel.org/r/20250709012800.3225727-1-ziy@nvidia.com Fixes: 80a5c494c89f ("selftests/mm: add tests for folio_split(), buddy allocator like split") Signed-off-by: Zi Yan Tested-by: Baolin Wang Reviewed-by: Lorenzo Stoakes Reviewed-by: Donet Tom Tested-by : Donet Tom Cc: Barry Song Cc: David Hildenbrand Cc: Dev Jain Cc: Liam Howlett Cc: Mariano Pache Cc: Ryan Roberts Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/split_huge_page_test.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index aa7400ed0e99..f0d9c035641d 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -31,6 +31,7 @@ uint64_t pmd_pagesize; #define INPUT_MAX 80 #define PID_FMT "%d,0x%lx,0x%lx,%d" +#define PID_FMT_OFFSET "%d,0x%lx,0x%lx,%d,%d" #define PATH_FMT "%s,0x%lx,0x%lx,%d" #define PFN_MASK ((1UL<<55)-1) @@ -483,7 +484,7 @@ void split_thp_in_pagecache_to_order_at(size_t fd_size, const char *fs_loc, write_debugfs(PID_FMT, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order); else - write_debugfs(PID_FMT, getpid(), (uint64_t)addr, + write_debugfs(PID_FMT_OFFSET, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order, offset); for (i = 0; i < fd_size; i++) -- cgit v1.2.3 From ff3fbcdd472453190d4f37fe1d1e69e50cce7612 Mon Sep 17 00:00:00 2001 From: Li Shuang Date: Fri, 18 Jul 2025 22:16:12 +0800 Subject: selftests: tc: Add generic erspan_opts matching support for tc-flower Add test cases to tc_flower.sh to validate generic matching on ERSPAN options. Both ERSPAN Type II and Type III are covered. Also add check_tc_erspan_support() to verify whether tc supports erspan_opts. Signed-off-by: Li Shuang Reviewed-by: Xin Long Link: https://patch.msgid.link/1f354a1afd60f29bbbf02bd60cb52ecfc0b6bd17.1752848172.git.shuali@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/lib.sh | 14 ++++++ .../testing/selftests/net/forwarding/tc_flower.sh | 52 +++++++++++++++++++++- 2 files changed, 65 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 9308b2f77fed..890b3374dacd 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -142,6 +142,20 @@ check_tc_version() fi } +check_tc_erspan_support() +{ + local dev=$1; shift + + tc filter add dev $dev ingress pref 1 handle 1 flower \ + erspan_opts 1:0:0:0 &> /dev/null + if [[ $? -ne 0 ]]; then + echo "SKIP: iproute2 too old; tc is missing erspan support" + return $ksft_skip + fi + tc filter del dev $dev ingress pref 1 handle 1 flower \ + erspan_opts 1:0:0:0 &> /dev/null +} + # Old versions of tc don't understand "mpls_uc" check_tc_mpls_support() { diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh index b1daad19b01e..b58909a93112 100755 --- a/tools/testing/selftests/net/forwarding/tc_flower.sh +++ b/tools/testing/selftests/net/forwarding/tc_flower.sh @@ -6,7 +6,7 @@ ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \ match_ip_tos_test match_indev_test match_ip_ttl_test match_mpls_label_test \ match_mpls_tc_test match_mpls_bos_test match_mpls_ttl_test \ - match_mpls_lse_test" + match_mpls_lse_test match_erspan_opts_test" NUM_NETIFS=2 source tc_common.sh source lib.sh @@ -676,6 +676,56 @@ match_mpls_lse_test() log_test "mpls lse match ($tcflags)" } +match_erspan_opts_test() +{ + RET=0 + + check_tc_erspan_support $h2 || return 0 + + # h1 erspan setup + tunnel_create erspan1 erspan 192.0.2.1 192.0.2.2 dev $h1 seq key 1001 \ + tos C ttl 64 erspan_ver 1 erspan 6789 # ERSPAN Type II + tunnel_create erspan2 erspan 192.0.2.1 192.0.2.2 dev $h1 seq key 1002 \ + tos C ttl 64 erspan_ver 2 erspan_dir egress erspan_hwid 63 \ + # ERSPAN Type III + ip link set dev erspan1 master v$h1 + ip link set dev erspan2 master v$h1 + # h2 erspan setup + ip link add ep-ex type erspan ttl 64 external # To collect tunnel info + ip link set ep-ex up + ip link set dev ep-ex master v$h2 + tc qdisc add dev ep-ex clsact + + # ERSPAN Type II [decap direction] + tc filter add dev ep-ex ingress protocol ip handle 101 flower \ + $tcflags enc_src_ip 192.0.2.1 enc_dst_ip 192.0.2.2 \ + enc_key_id 1001 erspan_opts 1:6789:0:0 \ + action drop + # ERSPAN Type III [decap direction] + tc filter add dev ep-ex ingress protocol ip handle 102 flower \ + $tcflags enc_src_ip 192.0.2.1 enc_dst_ip 192.0.2.2 \ + enc_key_id 1002 erspan_opts 2:0:1:63 action drop + + ep1mac=$(mac_get erspan1) + $MZ erspan1 -c 1 -p 64 -a $ep1mac -b $h2mac -t ip -q + tc_check_packets "dev ep-ex ingress" 101 1 + check_err $? "ERSPAN Type II" + + ep2mac=$(mac_get erspan2) + $MZ erspan2 -c 1 -p 64 -a $ep1mac -b $h2mac -t ip -q + tc_check_packets "dev ep-ex ingress" 102 1 + check_err $? "ERSPAN Type III" + + # h2 erspan cleanup + tc qdisc del dev ep-ex clsact + tunnel_destroy ep-ex + # h1 erspan cleanup + tunnel_destroy erspan2 # ERSPAN Type III + tunnel_destroy erspan1 # ERSPAN Type II + + log_test "erspan_opts match ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} -- cgit v1.2.3 From 37848a456fc38c191aedfe41f662cc24db8c23d9 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 15 Jul 2025 20:43:28 +0200 Subject: selftests: mptcp: connect: also cover alt modes The "mmap" and "sendfile" alternate modes for mptcp_connect.sh/.c are available from the beginning, but only tested when mptcp_connect.sh is manually launched with "-m mmap" or "-m sendfile", not via the kselftests helpers. The MPTCP CI was manually running "mptcp_connect.sh -m mmap", but not "-m sendfile". Plus other CIs, especially the ones validating the stable releases, were not validating these alternate modes. To make sure these modes are validated by these CIs, add two new test programs executing mptcp_connect.sh with the alternate modes. Fixes: 048d19d444be ("mptcp: add basic kselftest for mptcp") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250715-net-mptcp-sft-connect-alt-v2-1-8230ddd82454@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/Makefile | 3 ++- tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh | 5 +++++ tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh | 5 +++++ 3 files changed, 12 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh create mode 100755 tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index e47788bfa671..c6b030babba8 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -4,7 +4,8 @@ top_srcdir = ../../../../.. CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) -TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ +TEST_PROGS := mptcp_connect.sh mptcp_connect_mmap.sh mptcp_connect_sendfile.sh \ + pm_netlink.sh mptcp_join.sh diag.sh \ simult_flows.sh mptcp_sockopt.sh userspace_pm.sh TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq mptcp_diag diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh new file mode 100755 index 000000000000..5dd30f9394af --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \ + "$(dirname "${0}")/mptcp_connect.sh" -m mmap "${@}" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh new file mode 100755 index 000000000000..1d16fb1cc9bb --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \ + "$(dirname "${0}")/mptcp_connect.sh" -m sendfile "${@}" -- cgit v1.2.3 From fdf0f60a2bb02ba581d9e71d583e69dd0714a521 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 15 Jul 2025 20:43:29 +0200 Subject: selftests: mptcp: connect: also cover checksum The checksum mode has been added a while ago, but it is only validated when manually launching mptcp_connect.sh with "-C". The different CIs were then not validating these MPTCP Connect tests with checksum enabled. To make sure they do, add a new test program executing mptcp_connect.sh with the checksum mode. Fixes: 94d66ba1d8e4 ("selftests: mptcp: enable checksum in mptcp_connect.sh") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250715-net-mptcp-sft-connect-alt-v2-2-8230ddd82454@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/Makefile | 2 +- tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index c6b030babba8..4c7e51336ab2 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -5,7 +5,7 @@ top_srcdir = ../../../../.. CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) TEST_PROGS := mptcp_connect.sh mptcp_connect_mmap.sh mptcp_connect_sendfile.sh \ - pm_netlink.sh mptcp_join.sh diag.sh \ + mptcp_connect_checksum.sh pm_netlink.sh mptcp_join.sh diag.sh \ simult_flows.sh mptcp_sockopt.sh userspace_pm.sh TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq mptcp_diag diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh new file mode 100755 index 000000000000..ce93ec2f107f --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \ + "$(dirname "${0}")/mptcp_connect.sh" -C "${@}" -- cgit v1.2.3 From 4c86c9fdf6a51394434811add48c7d5fe7da47ef Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 17 Jul 2025 16:43:43 -0700 Subject: selftests: drv-net: rss_api: context create and delete tests Add test cases for creating and deleting contexts. TAP version 13 1..12 ok 1 rss_api.test_rxfh_nl_set_fail ok 2 rss_api.test_rxfh_nl_set_indir ok 3 rss_api.test_rxfh_nl_set_indir_ctx ok 4 rss_api.test_rxfh_indir_ntf ok 5 rss_api.test_rxfh_indir_ctx_ntf ok 6 rss_api.test_rxfh_nl_set_key ok 7 rss_api.test_rxfh_fields ok 8 rss_api.test_rxfh_fields_set ok 9 rss_api.test_rxfh_fields_set_xfrm # SKIP no input-xfrm supported ok 10 rss_api.test_rxfh_fields_ntf ok 11 rss_api.test_rss_ctx_add ok 12 rss_api.test_rss_ctx_ntf # Totals: pass:11 fail:0 xfail:0 xpass:0 skip:1 error:0 Link: https://patch.msgid.link/20250717234343.2328602-9-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/rss_api.py | 73 +++++++++++++++++++++++ 1 file changed, 73 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/hw/rss_api.py b/tools/testing/selftests/drivers/net/hw/rss_api.py index 424743bb583b..19847f3d4a00 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_api.py +++ b/tools/testing/selftests/drivers/net/hw/rss_api.py @@ -5,6 +5,7 @@ API level tests for RSS (mostly Netlink vs IOCTL). """ +import errno import glob import random from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_is, ksft_ne, ksft_raises @@ -390,6 +391,78 @@ def test_rxfh_fields_ntf(cfg): ksft_eq(next(ethnl.poll_ntf(duration=0.01), None), None) +def test_rss_ctx_add(cfg): + """ Test creating an additional RSS context via Netlink """ + + _require_2qs(cfg) + + # Test basic creation + ctx = cfg.ethnl.rss_create_act({"header": {"dev-index": cfg.ifindex}}) + d = defer(ethtool, f"-X {cfg.ifname} context {ctx.get('context')} delete") + ksft_ne(ctx.get("context", 0), 0) + ksft_ne(set(ctx.get("indir", [0])), {0}, + comment="Driver should init the indirection table") + + # Try requesting the ID we just got allocated + with ksft_raises(NlError) as cm: + ctx = cfg.ethnl.rss_create_act({ + "header": {"dev-index": cfg.ifindex}, + "context": ctx.get("context"), + }) + ethtool(f"-X {cfg.ifname} context {ctx.get('context')} delete") + d.exec() + ksft_eq(cm.exception.nl_msg.error, -errno.EBUSY) + + # Test creating with a specified RSS table, and context ID + ctx_id = ctx.get("context") + ctx = cfg.ethnl.rss_create_act({ + "header": {"dev-index": cfg.ifindex}, + "context": ctx_id, + "indir": [1], + }) + ethtool(f"-X {cfg.ifname} context {ctx.get('context')} delete") + ksft_eq(ctx.get("context"), ctx_id) + ksft_eq(set(ctx.get("indir", [0])), {1}) + + +def test_rss_ctx_ntf(cfg): + """ Test notifications for creating additional RSS contexts """ + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + # Create / delete via Netlink + ctx = cfg.ethnl.rss_create_act({"header": {"dev-index": cfg.ifindex}}) + cfg.ethnl.rss_delete_act({ + "header": {"dev-index": cfg.ifindex}, + "context": ctx["context"], + }) + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("[NL] No notification after context creation") + ksft_eq(ntf["name"], "rss-create-ntf") + ksft_eq(ctx, ntf["msg"]) + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("[NL] No notification after context deletion") + ksft_eq(ntf["name"], "rss-delete-ntf") + + # Create / deleve via IOCTL + ctx_id = _ethtool_create(cfg, "--disable-netlink -X", "context new") + ethtool(f"--disable-netlink -X {cfg.ifname} context {ctx_id} delete") + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("[IOCTL] No notification after context creation") + ksft_eq(ntf["name"], "rss-create-ntf") + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("[IOCTL] No notification after context deletion") + ksft_eq(ntf["name"], "rss-delete-ntf") + + def main() -> None: """ Ksft boiler plate main """ -- cgit v1.2.3 From 57fbad15c2eee77276a541c616589b32976d2b8e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Jul 2025 16:25:06 -0700 Subject: stackleak: Rename STACKLEAK to KSTACK_ERASE In preparation for adding Clang sanitizer coverage stack depth tracking that can support stack depth callbacks: - Add the new top-level CONFIG_KSTACK_ERASE option which will be implemented either with the stackleak GCC plugin, or with the Clang stack depth callback support. - Rename CONFIG_GCC_PLUGIN_STACKLEAK as needed to CONFIG_KSTACK_ERASE, but keep it for anything specific to the GCC plugin itself. - Rename all exposed "STACKLEAK" names and files to "KSTACK_ERASE" (named for what it does rather than what it protects against), but leave as many of the internals alone as possible to avoid even more churn. While here, also split "prev_lowest_stack" into CONFIG_KSTACK_ERASE_METRICS, since that's the only place it is referenced from. Suggested-by: Ingo Molnar Link: https://lore.kernel.org/r/20250717232519.2984886-1-kees@kernel.org Signed-off-by: Kees Cook --- Documentation/admin-guide/sysctl/kernel.rst | 4 +- Documentation/arch/x86/x86_64/mm.rst | 2 +- Documentation/security/self-protection.rst | 2 +- .../zh_CN/security/self-protection.rst | 2 +- MAINTAINERS | 4 +- arch/Kconfig | 4 +- arch/arm/Kconfig | 2 +- arch/arm/boot/compressed/Makefile | 2 +- arch/arm/kernel/entry-common.S | 2 +- arch/arm64/Kconfig | 2 +- arch/arm64/kernel/entry.S | 2 +- arch/arm64/kernel/pi/Makefile | 2 +- arch/arm64/kvm/hyp/nvhe/Makefile | 2 +- arch/loongarch/Kconfig | 2 +- arch/riscv/Kconfig | 2 +- arch/riscv/kernel/entry.S | 2 +- arch/riscv/kernel/pi/Makefile | 2 +- arch/riscv/purgatory/Makefile | 2 +- arch/s390/Kconfig | 2 +- arch/s390/kernel/entry.S | 2 +- arch/x86/Kconfig | 2 +- arch/x86/entry/calling.h | 4 +- arch/x86/purgatory/Makefile | 2 +- drivers/firmware/efi/libstub/Makefile | 8 +- drivers/misc/lkdtm/Makefile | 2 +- drivers/misc/lkdtm/kstack_erase.c | 150 +++++++++++++++++ drivers/misc/lkdtm/stackleak.c | 150 ----------------- fs/proc/base.c | 6 +- include/linux/kstack_erase.h | 89 +++++++++++ include/linux/sched.h | 4 +- include/linux/stackleak.h | 89 ----------- kernel/Makefile | 10 +- kernel/fork.c | 2 +- kernel/kstack_erase.c | 177 +++++++++++++++++++++ kernel/stackleak.c | 177 --------------------- lib/Makefile | 2 +- scripts/Makefile.gcc-plugins | 6 +- security/Kconfig.hardening | 36 +++-- tools/objtool/check.c | 2 +- tools/testing/selftests/lkdtm/config | 2 +- 40 files changed, 486 insertions(+), 480 deletions(-) create mode 100644 drivers/misc/lkdtm/kstack_erase.c delete mode 100644 drivers/misc/lkdtm/stackleak.c create mode 100644 include/linux/kstack_erase.h delete mode 100644 include/linux/stackleak.h create mode 100644 kernel/kstack_erase.c delete mode 100644 kernel/stackleak.c (limited to 'tools/testing/selftests') diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index dd49a89a62d3..19224eeac1c2 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -1465,7 +1465,7 @@ stack_erasing ============= This parameter can be used to control kernel stack erasing at the end -of syscalls for kernels built with ``CONFIG_GCC_PLUGIN_STACKLEAK``. +of syscalls for kernels built with ``CONFIG_KSTACK_ERASE``. That erasing reduces the information which kernel stack leak bugs can reveal and blocks some uninitialized stack variable attacks. @@ -1473,7 +1473,7 @@ The tradeoff is the performance impact: on a single CPU system kernel compilation sees a 1% slowdown, other systems and workloads may vary. = ==================================================================== -0 Kernel stack erasing is disabled, STACKLEAK_METRICS are not updated. +0 Kernel stack erasing is disabled, KSTACK_ERASE_METRICS are not updated. 1 Kernel stack erasing is enabled (default), it is performed before returning to the userspace at the end of syscalls. = ==================================================================== diff --git a/Documentation/arch/x86/x86_64/mm.rst b/Documentation/arch/x86/x86_64/mm.rst index f2db178b353f..a6cf05d51bd8 100644 --- a/Documentation/arch/x86/x86_64/mm.rst +++ b/Documentation/arch/x86/x86_64/mm.rst @@ -176,5 +176,5 @@ Be very careful vs. KASLR when changing anything here. The KASLR address range must not overlap with anything except the KASAN shadow area, which is correct as KASAN disables KASLR. -For both 4- and 5-level layouts, the STACKLEAK_POISON value in the last 2MB +For both 4- and 5-level layouts, the KSTACK_ERASE_POISON value in the last 2MB hole: ffffffffffff4111 diff --git a/Documentation/security/self-protection.rst b/Documentation/security/self-protection.rst index 910668e665cb..a32ca23c21b0 100644 --- a/Documentation/security/self-protection.rst +++ b/Documentation/security/self-protection.rst @@ -303,7 +303,7 @@ Memory poisoning When releasing memory, it is best to poison the contents, to avoid reuse attacks that rely on the old contents of memory. E.g., clear stack on a -syscall return (``CONFIG_GCC_PLUGIN_STACKLEAK``), wipe heap memory on a +syscall return (``CONFIG_KSTACK_ERASE``), wipe heap memory on a free. This frustrates many uninitialized variable attacks, stack content exposures, heap content exposures, and use-after-free attacks. diff --git a/Documentation/translations/zh_CN/security/self-protection.rst b/Documentation/translations/zh_CN/security/self-protection.rst index 3c8a68b1e1be..93de9cee5c1a 100644 --- a/Documentation/translations/zh_CN/security/self-protection.rst +++ b/Documentation/translations/zh_CN/security/self-protection.rst @@ -259,7 +259,7 @@ KALLSYSM,则会直接打印原始地址。 -------- 在释放内存时,最好对内存内容进行清除处理,以防止攻击者重用内存中以前 -的内容。例如,在系统调用返回时清除堆栈(CONFIG_GCC_PLUGIN_STACKLEAK), +的内容。例如,在系统调用返回时清除堆栈(CONFIG_KSTACK_ERASE), 在释放堆内容是清除其内容。这有助于防止许多未初始化变量攻击、堆栈内容 泄露、堆内容泄露以及使用后释放攻击(user-after-free)。 diff --git a/MAINTAINERS b/MAINTAINERS index 0c1d245bf7b8..470d159d8fea 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9992,8 +9992,6 @@ L: linux-hardening@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening F: Documentation/kbuild/gcc-plugins.rst -F: include/linux/stackleak.h -F: kernel/stackleak.c F: scripts/Makefile.gcc-plugins F: scripts/gcc-plugins/ @@ -13087,10 +13085,12 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/har F: Documentation/ABI/testing/sysfs-kernel-oops_count F: Documentation/ABI/testing/sysfs-kernel-warn_count F: arch/*/configs/hardening.config +F: include/linux/kstack_erase.h F: include/linux/overflow.h F: include/linux/randomize_kstack.h F: include/linux/ucopysize.h F: kernel/configs/hardening.config +F: kernel/kstack_erase.c F: lib/tests/randstruct_kunit.c F: lib/tests/usercopy_kunit.c F: mm/usercopy.c diff --git a/arch/Kconfig b/arch/Kconfig index a3308a220f86..4d1908f6f084 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -630,11 +630,11 @@ config SECCOMP_CACHE_DEBUG If unsure, say N. -config HAVE_ARCH_STACKLEAK +config HAVE_ARCH_KSTACK_ERASE bool help An architecture should select this if it has the code which - fills the used part of the kernel stack with the STACKLEAK_POISON + fills the used part of the kernel stack with the KSTACK_ERASE_POISON value before returning from system calls. config HAVE_STACKPROTECTOR diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 3072731fe09c..cb0b2e2211ca 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -87,11 +87,11 @@ config ARM select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN + select HAVE_ARCH_KSTACK_ERASE select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_PFN_VALID select HAVE_ARCH_SECCOMP select HAVE_ARCH_SECCOMP_FILTER if AEABI && !OABI_COMPAT - select HAVE_ARCH_STACKLEAK select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if ARM_LPAE diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index d61369b1eabe..f9075edfd773 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -9,7 +9,7 @@ OBJS = HEAD = head.o OBJS += misc.o decompress.o -CFLAGS_decompress.o += $(DISABLE_STACKLEAK_PLUGIN) +CFLAGS_decompress.o += $(DISABLE_KSTACK_ERASE) ifeq ($(CONFIG_DEBUG_UNCOMPRESS),y) OBJS += debug.o AFLAGS_head.o += -DDEBUG diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index f379c852dcb7..88336a1292bb 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -119,7 +119,7 @@ no_work_pending: ct_user_enter save = 0 -#ifdef CONFIG_GCC_PLUGIN_STACKLEAK +#ifdef CONFIG_KSTACK_ERASE bl stackleak_erase_on_task_stack #endif restore_user_regs fast = 0, offset = 0 diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 55fc331af337..e2a9e013b6a9 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -187,12 +187,12 @@ config ARM64 select HAVE_ARCH_KCSAN if EXPERT select HAVE_ARCH_KFENCE select HAVE_ARCH_KGDB + select HAVE_ARCH_KSTACK_ERASE select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT select HAVE_ARCH_PREL32_RELOCATIONS select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET select HAVE_ARCH_SECCOMP_FILTER - select HAVE_ARCH_STACKLEAK select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 5ae2a34b50bd..67331437b2aa 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -614,7 +614,7 @@ SYM_CODE_END(ret_to_kernel) SYM_CODE_START_LOCAL(ret_to_user) ldr x19, [tsk, #TSK_TI_FLAGS] // re-check for single-step enable_step_tsk x19, x2 -#ifdef CONFIG_GCC_PLUGIN_STACKLEAK +#ifdef CONFIG_KSTACK_ERASE bl stackleak_erase_on_task_stack #endif kernel_exit 0 diff --git a/arch/arm64/kernel/pi/Makefile b/arch/arm64/kernel/pi/Makefile index 4d11a8c29181..f440bf57b1a5 100644 --- a/arch/arm64/kernel/pi/Makefile +++ b/arch/arm64/kernel/pi/Makefile @@ -2,7 +2,7 @@ # Copyright 2022 Google LLC KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \ - -Os -DDISABLE_BRANCH_PROFILING $(DISABLE_STACKLEAK_PLUGIN) \ + -Os -DDISABLE_BRANCH_PROFILING $(DISABLE_KSTACK_ERASE) \ $(DISABLE_LATENT_ENTROPY_PLUGIN) \ $(call cc-option,-mbranch-protection=none) \ -I$(srctree)/scripts/dtc/libfdt -fno-stack-protector \ diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index a76522d63c3e..0b0a68b663d4 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -12,7 +12,7 @@ asflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS -D__DISABLE_TRACE_MMIO__ ccflags-y += -fno-stack-protector \ -DDISABLE_BRANCH_PROFILING \ - $(DISABLE_STACKLEAK_PLUGIN) + $(DISABLE_KSTACK_ERASE) hostprogs := gen-hyprel HOST_EXTRACFLAGS += -I$(objtree)/include diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 4b19f93379a1..1514789bea4a 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -120,11 +120,11 @@ config LOONGARCH select HAVE_ARCH_KASAN select HAVE_ARCH_KFENCE select HAVE_ARCH_KGDB if PERF_EVENTS + select HAVE_ARCH_KSTACK_ERASE select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET select HAVE_ARCH_SECCOMP select HAVE_ARCH_SECCOMP_FILTER - select HAVE_ARCH_STACKLEAK select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 36061f4732b7..cfc084fc9e6f 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -135,13 +135,13 @@ config RISCV select HAVE_ARCH_KASAN if MMU && 64BIT select HAVE_ARCH_KASAN_VMALLOC if MMU && 64BIT select HAVE_ARCH_KFENCE if MMU && 64BIT + select HAVE_ARCH_KSTACK_ERASE select HAVE_ARCH_KGDB if !XIP_KERNEL select HAVE_ARCH_KGDB_QXFER_PKT select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET select HAVE_ARCH_SECCOMP_FILTER - select HAVE_ARCH_STACKLEAK select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 75656afa2d6b..3a0ec6fd5956 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -220,7 +220,7 @@ SYM_CODE_START_NOALIGN(ret_from_exception) #endif bnez s0, 1f -#ifdef CONFIG_GCC_PLUGIN_STACKLEAK +#ifdef CONFIG_KSTACK_ERASE call stackleak_erase_on_task_stack #endif diff --git a/arch/riscv/kernel/pi/Makefile b/arch/riscv/kernel/pi/Makefile index 81d69d45c06c..7dd15be69c90 100644 --- a/arch/riscv/kernel/pi/Makefile +++ b/arch/riscv/kernel/pi/Makefile @@ -2,7 +2,7 @@ # This file was copied from arm64/kernel/pi/Makefile. KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \ - -Os -DDISABLE_BRANCH_PROFILING $(DISABLE_STACKLEAK_PLUGIN) \ + -Os -DDISABLE_BRANCH_PROFILING $(DISABLE_KSTACK_ERASE) \ $(call cc-option,-mbranch-protection=none) \ -I$(srctree)/scripts/dtc/libfdt -fno-stack-protector \ -include $(srctree)/include/linux/hidden.h \ diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile index fb9c917c9b45..240592e3f5c2 100644 --- a/arch/riscv/purgatory/Makefile +++ b/arch/riscv/purgatory/Makefile @@ -53,7 +53,7 @@ targets += purgatory.ro purgatory.chk PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel PURGATORY_CFLAGS := -mcmodel=medany -ffreestanding -fno-zero-initialized-in-bss -PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN) -DDISABLE_BRANCH_PROFILING +PURGATORY_CFLAGS += $(DISABLE_KSTACK_ERASE) -DDISABLE_BRANCH_PROFILING PURGATORY_CFLAGS += -fno-stack-protector -g0 # Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 0c16dc443e2f..a8e74ed8e3cc 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -176,10 +176,10 @@ config S390 select HAVE_ARCH_KCSAN select HAVE_ARCH_KMSAN select HAVE_ARCH_KFENCE + select HAVE_ARCH_KSTACK_ERASE select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_SOFT_DIRTY - select HAVE_ARCH_STACKLEAK select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_VMAP_STACK diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 0f00f4b06d51..75b0fbb236d0 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -124,7 +124,7 @@ _LPP_OFFSET = __LC_LPP #endif .macro STACKLEAK_ERASE -#ifdef CONFIG_GCC_PLUGIN_STACKLEAK +#ifdef CONFIG_KSTACK_ERASE brasl %r14,stackleak_erase_on_task_stack #endif .endm diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 340e5468980e..bc3708cad46b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -204,13 +204,13 @@ config X86 select HAVE_ARCH_KFENCE select HAVE_ARCH_KMSAN if X86_64 select HAVE_ARCH_KGDB + select HAVE_ARCH_KSTACK_ERASE select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT select HAVE_ARCH_PREL32_RELOCATIONS select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_THREAD_STRUCT_WHITELIST - select HAVE_ARCH_STACKLEAK select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64 diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index d83236b96f22..94519688b007 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -369,7 +369,7 @@ For 32-bit we have the following conventions - kernel is built with .endm .macro STACKLEAK_ERASE_NOCLOBBER -#ifdef CONFIG_GCC_PLUGIN_STACKLEAK +#ifdef CONFIG_KSTACK_ERASE PUSH_AND_CLEAR_REGS call stackleak_erase POP_REGS @@ -388,7 +388,7 @@ For 32-bit we have the following conventions - kernel is built with #endif /* !CONFIG_X86_64 */ .macro STACKLEAK_ERASE -#ifdef CONFIG_GCC_PLUGIN_STACKLEAK +#ifdef CONFIG_KSTACK_ERASE call stackleak_erase #endif .endm diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index ebdfd7b84feb..e0a607a14e7e 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -35,7 +35,7 @@ targets += purgatory.ro purgatory.chk PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel PURGATORY_CFLAGS := -mcmodel=small -ffreestanding -fno-zero-initialized-in-bss -g0 PURGATORY_CFLAGS += -fpic -fvisibility=hidden -PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN) -DDISABLE_BRANCH_PROFILING +PURGATORY_CFLAGS += $(DISABLE_KSTACK_ERASE) -DDISABLE_BRANCH_PROFILING PURGATORY_CFLAGS += -fno-stack-protector # Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 939a4955e00b..94b05e4451dd 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -22,16 +22,16 @@ cflags-$(CONFIG_X86) += -m$(BITS) -D__KERNEL__ -std=gnu11 \ # arm64 uses the full KBUILD_CFLAGS so it's necessary to explicitly # disable the stackleak plugin -cflags-$(CONFIG_ARM64) += -fpie $(DISABLE_STACKLEAK_PLUGIN) \ +cflags-$(CONFIG_ARM64) += -fpie $(DISABLE_KSTACK_ERASE) \ -fno-unwind-tables -fno-asynchronous-unwind-tables cflags-$(CONFIG_ARM) += -DEFI_HAVE_STRLEN -DEFI_HAVE_STRNLEN \ -DEFI_HAVE_MEMCHR -DEFI_HAVE_STRRCHR \ -DEFI_HAVE_STRCMP -fno-builtin -fpic \ $(call cc-option,-mno-single-pic-base) \ - $(DISABLE_STACKLEAK_PLUGIN) + $(DISABLE_KSTACK_ERASE) cflags-$(CONFIG_RISCV) += -fpic -DNO_ALTERNATIVE -mno-relax \ - $(DISABLE_STACKLEAK_PLUGIN) -cflags-$(CONFIG_LOONGARCH) += -fpie $(DISABLE_STACKLEAK_PLUGIN) + $(DISABLE_KSTACK_ERASE) +cflags-$(CONFIG_LOONGARCH) += -fpie $(DISABLE_KSTACK_ERASE) cflags-$(CONFIG_EFI_PARAMS_FROM_FDT) += -I$(srctree)/scripts/dtc/libfdt diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile index 39468bd27b85..03ebe33185f9 100644 --- a/drivers/misc/lkdtm/Makefile +++ b/drivers/misc/lkdtm/Makefile @@ -8,7 +8,7 @@ lkdtm-$(CONFIG_LKDTM) += perms.o lkdtm-$(CONFIG_LKDTM) += refcount.o lkdtm-$(CONFIG_LKDTM) += rodata_objcopy.o lkdtm-$(CONFIG_LKDTM) += usercopy.o -lkdtm-$(CONFIG_LKDTM) += stackleak.o +lkdtm-$(CONFIG_LKDTM) += kstack_erase.o lkdtm-$(CONFIG_LKDTM) += cfi.o lkdtm-$(CONFIG_LKDTM) += fortify.o lkdtm-$(CONFIG_PPC_64S_HASH_MMU) += powerpc.o diff --git a/drivers/misc/lkdtm/kstack_erase.c b/drivers/misc/lkdtm/kstack_erase.c new file mode 100644 index 000000000000..4fd9b0bfb874 --- /dev/null +++ b/drivers/misc/lkdtm/kstack_erase.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This code tests that the current task stack is properly erased (filled + * with KSTACK_ERASE_POISON). + * + * Authors: + * Alexander Popov + * Tycho Andersen + */ + +#include "lkdtm.h" +#include + +#if defined(CONFIG_KSTACK_ERASE) +/* + * Check that stackleak tracks the lowest stack pointer and erases the stack + * below this as expected. + * + * To prevent the lowest stack pointer changing during the test, IRQs are + * masked and instrumentation of this function is disabled. We assume that the + * compiler will create a fixed-size stack frame for this function. + * + * Any non-inlined function may make further use of the stack, altering the + * lowest stack pointer and/or clobbering poison values. To avoid spurious + * failures we must avoid printing until the end of the test or have already + * encountered a failure condition. + */ +static void noinstr check_stackleak_irqoff(void) +{ + const unsigned long task_stack_base = (unsigned long)task_stack_page(current); + const unsigned long task_stack_low = stackleak_task_low_bound(current); + const unsigned long task_stack_high = stackleak_task_high_bound(current); + const unsigned long current_sp = current_stack_pointer; + const unsigned long lowest_sp = current->lowest_stack; + unsigned long untracked_high; + unsigned long poison_high, poison_low; + bool test_failed = false; + + /* + * Check that the current and lowest recorded stack pointer values fall + * within the expected task stack boundaries. These tests should never + * fail unless the boundaries are incorrect or we're clobbering the + * STACK_END_MAGIC, and in either casee something is seriously wrong. + */ + if (current_sp < task_stack_low || current_sp >= task_stack_high) { + instrumentation_begin(); + pr_err("FAIL: current_stack_pointer (0x%lx) outside of task stack bounds [0x%lx..0x%lx]\n", + current_sp, task_stack_low, task_stack_high - 1); + test_failed = true; + goto out; + } + if (lowest_sp < task_stack_low || lowest_sp >= task_stack_high) { + instrumentation_begin(); + pr_err("FAIL: current->lowest_stack (0x%lx) outside of task stack bounds [0x%lx..0x%lx]\n", + lowest_sp, task_stack_low, task_stack_high - 1); + test_failed = true; + goto out; + } + + /* + * Depending on what has run prior to this test, the lowest recorded + * stack pointer could be above or below the current stack pointer. + * Start from the lowest of the two. + * + * Poison values are naturally-aligned unsigned longs. As the current + * stack pointer might not be sufficiently aligned, we must align + * downwards to find the lowest known stack pointer value. This is the + * high boundary for a portion of the stack which may have been used + * without being tracked, and has to be scanned for poison. + */ + untracked_high = min(current_sp, lowest_sp); + untracked_high = ALIGN_DOWN(untracked_high, sizeof(unsigned long)); + + /* + * Find the top of the poison in the same way as the erasing code. + */ + poison_high = stackleak_find_top_of_poison(task_stack_low, untracked_high); + + /* + * Check whether the poisoned portion of the stack (if any) consists + * entirely of poison. This verifies the entries that + * stackleak_find_top_of_poison() should have checked. + */ + poison_low = poison_high; + while (poison_low > task_stack_low) { + poison_low -= sizeof(unsigned long); + + if (*(unsigned long *)poison_low == KSTACK_ERASE_POISON) + continue; + + instrumentation_begin(); + pr_err("FAIL: non-poison value %lu bytes below poison boundary: 0x%lx\n", + poison_high - poison_low, *(unsigned long *)poison_low); + test_failed = true; + goto out; + } + + instrumentation_begin(); + pr_info("kstack erase stack usage:\n" + " high offset: %lu bytes\n" + " current: %lu bytes\n" + " lowest: %lu bytes\n" + " tracked: %lu bytes\n" + " untracked: %lu bytes\n" + " poisoned: %lu bytes\n" + " low offset: %lu bytes\n", + task_stack_base + THREAD_SIZE - task_stack_high, + task_stack_high - current_sp, + task_stack_high - lowest_sp, + task_stack_high - untracked_high, + untracked_high - poison_high, + poison_high - task_stack_low, + task_stack_low - task_stack_base); + +out: + if (test_failed) { + pr_err("FAIL: the thread stack is NOT properly erased!\n"); + } else { + pr_info("OK: the rest of the thread stack is properly erased\n"); + } + instrumentation_end(); +} + +static void lkdtm_KSTACK_ERASE(void) +{ + unsigned long flags; + + local_irq_save(flags); + check_stackleak_irqoff(); + local_irq_restore(flags); +} +#else /* defined(CONFIG_KSTACK_ERASE) */ +static void lkdtm_KSTACK_ERASE(void) +{ + if (IS_ENABLED(CONFIG_HAVE_ARCH_KSTACK_ERASE)) { + pr_err("XFAIL: stackleak is not enabled (CONFIG_KSTACK_ERASE=n)\n"); + } else { + pr_err("XFAIL: stackleak is not supported on this arch (HAVE_ARCH_KSTACK_ERASE=n)\n"); + } +} +#endif /* defined(CONFIG_KSTACK_ERASE) */ + +static struct crashtype crashtypes[] = { + CRASHTYPE(KSTACK_ERASE), +}; + +struct crashtype_category stackleak_crashtypes = { + .crashtypes = crashtypes, + .len = ARRAY_SIZE(crashtypes), +}; diff --git a/drivers/misc/lkdtm/stackleak.c b/drivers/misc/lkdtm/stackleak.c deleted file mode 100644 index f1d022160913..000000000000 --- a/drivers/misc/lkdtm/stackleak.c +++ /dev/null @@ -1,150 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * This code tests that the current task stack is properly erased (filled - * with STACKLEAK_POISON). - * - * Authors: - * Alexander Popov - * Tycho Andersen - */ - -#include "lkdtm.h" -#include - -#if defined(CONFIG_GCC_PLUGIN_STACKLEAK) -/* - * Check that stackleak tracks the lowest stack pointer and erases the stack - * below this as expected. - * - * To prevent the lowest stack pointer changing during the test, IRQs are - * masked and instrumentation of this function is disabled. We assume that the - * compiler will create a fixed-size stack frame for this function. - * - * Any non-inlined function may make further use of the stack, altering the - * lowest stack pointer and/or clobbering poison values. To avoid spurious - * failures we must avoid printing until the end of the test or have already - * encountered a failure condition. - */ -static void noinstr check_stackleak_irqoff(void) -{ - const unsigned long task_stack_base = (unsigned long)task_stack_page(current); - const unsigned long task_stack_low = stackleak_task_low_bound(current); - const unsigned long task_stack_high = stackleak_task_high_bound(current); - const unsigned long current_sp = current_stack_pointer; - const unsigned long lowest_sp = current->lowest_stack; - unsigned long untracked_high; - unsigned long poison_high, poison_low; - bool test_failed = false; - - /* - * Check that the current and lowest recorded stack pointer values fall - * within the expected task stack boundaries. These tests should never - * fail unless the boundaries are incorrect or we're clobbering the - * STACK_END_MAGIC, and in either casee something is seriously wrong. - */ - if (current_sp < task_stack_low || current_sp >= task_stack_high) { - instrumentation_begin(); - pr_err("FAIL: current_stack_pointer (0x%lx) outside of task stack bounds [0x%lx..0x%lx]\n", - current_sp, task_stack_low, task_stack_high - 1); - test_failed = true; - goto out; - } - if (lowest_sp < task_stack_low || lowest_sp >= task_stack_high) { - instrumentation_begin(); - pr_err("FAIL: current->lowest_stack (0x%lx) outside of task stack bounds [0x%lx..0x%lx]\n", - lowest_sp, task_stack_low, task_stack_high - 1); - test_failed = true; - goto out; - } - - /* - * Depending on what has run prior to this test, the lowest recorded - * stack pointer could be above or below the current stack pointer. - * Start from the lowest of the two. - * - * Poison values are naturally-aligned unsigned longs. As the current - * stack pointer might not be sufficiently aligned, we must align - * downwards to find the lowest known stack pointer value. This is the - * high boundary for a portion of the stack which may have been used - * without being tracked, and has to be scanned for poison. - */ - untracked_high = min(current_sp, lowest_sp); - untracked_high = ALIGN_DOWN(untracked_high, sizeof(unsigned long)); - - /* - * Find the top of the poison in the same way as the erasing code. - */ - poison_high = stackleak_find_top_of_poison(task_stack_low, untracked_high); - - /* - * Check whether the poisoned portion of the stack (if any) consists - * entirely of poison. This verifies the entries that - * stackleak_find_top_of_poison() should have checked. - */ - poison_low = poison_high; - while (poison_low > task_stack_low) { - poison_low -= sizeof(unsigned long); - - if (*(unsigned long *)poison_low == STACKLEAK_POISON) - continue; - - instrumentation_begin(); - pr_err("FAIL: non-poison value %lu bytes below poison boundary: 0x%lx\n", - poison_high - poison_low, *(unsigned long *)poison_low); - test_failed = true; - goto out; - } - - instrumentation_begin(); - pr_info("stackleak stack usage:\n" - " high offset: %lu bytes\n" - " current: %lu bytes\n" - " lowest: %lu bytes\n" - " tracked: %lu bytes\n" - " untracked: %lu bytes\n" - " poisoned: %lu bytes\n" - " low offset: %lu bytes\n", - task_stack_base + THREAD_SIZE - task_stack_high, - task_stack_high - current_sp, - task_stack_high - lowest_sp, - task_stack_high - untracked_high, - untracked_high - poison_high, - poison_high - task_stack_low, - task_stack_low - task_stack_base); - -out: - if (test_failed) { - pr_err("FAIL: the thread stack is NOT properly erased!\n"); - } else { - pr_info("OK: the rest of the thread stack is properly erased\n"); - } - instrumentation_end(); -} - -static void lkdtm_STACKLEAK_ERASING(void) -{ - unsigned long flags; - - local_irq_save(flags); - check_stackleak_irqoff(); - local_irq_restore(flags); -} -#else /* defined(CONFIG_GCC_PLUGIN_STACKLEAK) */ -static void lkdtm_STACKLEAK_ERASING(void) -{ - if (IS_ENABLED(CONFIG_HAVE_ARCH_STACKLEAK)) { - pr_err("XFAIL: stackleak is not enabled (CONFIG_GCC_PLUGIN_STACKLEAK=n)\n"); - } else { - pr_err("XFAIL: stackleak is not supported on this arch (HAVE_ARCH_STACKLEAK=n)\n"); - } -} -#endif /* defined(CONFIG_GCC_PLUGIN_STACKLEAK) */ - -static struct crashtype crashtypes[] = { - CRASHTYPE(STACKLEAK_ERASING), -}; - -struct crashtype_category stackleak_crashtypes = { - .crashtypes = crashtypes, - .len = ARRAY_SIZE(crashtypes), -}; diff --git a/fs/proc/base.c b/fs/proc/base.c index c667702dc69b..be34612af8b6 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3291,7 +3291,7 @@ static int proc_pid_ksm_stat(struct seq_file *m, struct pid_namespace *ns, } #endif /* CONFIG_KSM */ -#ifdef CONFIG_STACKLEAK_METRICS +#ifdef CONFIG_KSTACK_ERASE_METRICS static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { @@ -3304,7 +3304,7 @@ static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns, prev_depth, depth); return 0; } -#endif /* CONFIG_STACKLEAK_METRICS */ +#endif /* CONFIG_KSTACK_ERASE_METRICS */ /* * Thread groups @@ -3411,7 +3411,7 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_LIVEPATCH ONE("patch_state", S_IRUSR, proc_pid_patch_state), #endif -#ifdef CONFIG_STACKLEAK_METRICS +#ifdef CONFIG_KSTACK_ERASE_METRICS ONE("stack_depth", S_IRUGO, proc_stack_depth), #endif #ifdef CONFIG_PROC_PID_ARCH_STATUS diff --git a/include/linux/kstack_erase.h b/include/linux/kstack_erase.h new file mode 100644 index 000000000000..4e432eefa4d0 --- /dev/null +++ b/include/linux/kstack_erase.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_KSTACK_ERASE_H +#define _LINUX_KSTACK_ERASE_H + +#include +#include + +/* + * Check that the poison value points to the unused hole in the + * virtual memory map for your platform. + */ +#define KSTACK_ERASE_POISON -0xBEEF +#define KSTACK_ERASE_SEARCH_DEPTH 128 + +#ifdef CONFIG_KSTACK_ERASE +#include +#include + +/* + * The lowest address on tsk's stack which we can plausibly erase. + */ +static __always_inline unsigned long +stackleak_task_low_bound(const struct task_struct *tsk) +{ + /* + * The lowest unsigned long on the task stack contains STACK_END_MAGIC, + * which we must not corrupt. + */ + return (unsigned long)end_of_stack(tsk) + sizeof(unsigned long); +} + +/* + * The address immediately after the highest address on tsk's stack which we + * can plausibly erase. + */ +static __always_inline unsigned long +stackleak_task_high_bound(const struct task_struct *tsk) +{ + /* + * The task's pt_regs lives at the top of the task stack and will be + * overwritten by exception entry, so there's no need to erase them. + */ + return (unsigned long)task_pt_regs(tsk); +} + +/* + * Find the address immediately above the poisoned region of the stack, where + * that region falls between 'low' (inclusive) and 'high' (exclusive). + */ +static __always_inline unsigned long +stackleak_find_top_of_poison(const unsigned long low, const unsigned long high) +{ + const unsigned int depth = KSTACK_ERASE_SEARCH_DEPTH / sizeof(unsigned long); + unsigned int poison_count = 0; + unsigned long poison_high = high; + unsigned long sp = high; + + while (sp > low && poison_count < depth) { + sp -= sizeof(unsigned long); + + if (*(unsigned long *)sp == KSTACK_ERASE_POISON) { + poison_count++; + } else { + poison_count = 0; + poison_high = sp; + } + } + + return poison_high; +} + +static inline void stackleak_task_init(struct task_struct *t) +{ + t->lowest_stack = stackleak_task_low_bound(t); +# ifdef CONFIG_KSTACK_ERASE_METRICS + t->prev_lowest_stack = t->lowest_stack; +# endif +} + +asmlinkage void noinstr stackleak_erase(void); +asmlinkage void noinstr stackleak_erase_on_task_stack(void); +asmlinkage void noinstr stackleak_erase_off_task_stack(void); +void __no_caller_saved_registers noinstr stackleak_track_stack(void); + +#else /* !CONFIG_KSTACK_ERASE */ +static inline void stackleak_task_init(struct task_struct *t) { } +#endif + +#endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 4f78a64beb52..b7d2f2fd4cd4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1607,8 +1607,10 @@ struct task_struct { /* Used by BPF for per-TASK xdp storage */ struct bpf_net_context *bpf_net_context; -#ifdef CONFIG_GCC_PLUGIN_STACKLEAK +#ifdef CONFIG_KSTACK_ERASE unsigned long lowest_stack; +#endif +#ifdef CONFIG_KSTACK_ERASE_METRICS unsigned long prev_lowest_stack; #endif diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h deleted file mode 100644 index 3be2cb564710..000000000000 --- a/include/linux/stackleak.h +++ /dev/null @@ -1,89 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_STACKLEAK_H -#define _LINUX_STACKLEAK_H - -#include -#include - -/* - * Check that the poison value points to the unused hole in the - * virtual memory map for your platform. - */ -#define STACKLEAK_POISON -0xBEEF -#define STACKLEAK_SEARCH_DEPTH 128 - -#ifdef CONFIG_GCC_PLUGIN_STACKLEAK -#include -#include - -/* - * The lowest address on tsk's stack which we can plausibly erase. - */ -static __always_inline unsigned long -stackleak_task_low_bound(const struct task_struct *tsk) -{ - /* - * The lowest unsigned long on the task stack contains STACK_END_MAGIC, - * which we must not corrupt. - */ - return (unsigned long)end_of_stack(tsk) + sizeof(unsigned long); -} - -/* - * The address immediately after the highest address on tsk's stack which we - * can plausibly erase. - */ -static __always_inline unsigned long -stackleak_task_high_bound(const struct task_struct *tsk) -{ - /* - * The task's pt_regs lives at the top of the task stack and will be - * overwritten by exception entry, so there's no need to erase them. - */ - return (unsigned long)task_pt_regs(tsk); -} - -/* - * Find the address immediately above the poisoned region of the stack, where - * that region falls between 'low' (inclusive) and 'high' (exclusive). - */ -static __always_inline unsigned long -stackleak_find_top_of_poison(const unsigned long low, const unsigned long high) -{ - const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long); - unsigned int poison_count = 0; - unsigned long poison_high = high; - unsigned long sp = high; - - while (sp > low && poison_count < depth) { - sp -= sizeof(unsigned long); - - if (*(unsigned long *)sp == STACKLEAK_POISON) { - poison_count++; - } else { - poison_count = 0; - poison_high = sp; - } - } - - return poison_high; -} - -static inline void stackleak_task_init(struct task_struct *t) -{ - t->lowest_stack = stackleak_task_low_bound(t); -# ifdef CONFIG_STACKLEAK_METRICS - t->prev_lowest_stack = t->lowest_stack; -# endif -} - -asmlinkage void noinstr stackleak_erase(void); -asmlinkage void noinstr stackleak_erase_on_task_stack(void); -asmlinkage void noinstr stackleak_erase_off_task_stack(void); -void __no_caller_saved_registers noinstr stackleak_track_stack(void); - -#else /* !CONFIG_GCC_PLUGIN_STACKLEAK */ -static inline void stackleak_task_init(struct task_struct *t) { } -#endif - -#endif diff --git a/kernel/Makefile b/kernel/Makefile index 32e80dd626af..e4f01f1d4d0c 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -139,11 +139,11 @@ obj-$(CONFIG_WATCH_QUEUE) += watch_queue.o obj-$(CONFIG_RESOURCE_KUNIT_TEST) += resource_kunit.o obj-$(CONFIG_SYSCTL_KUNIT_TEST) += sysctl-test.o -CFLAGS_stackleak.o += $(DISABLE_STACKLEAK_PLUGIN) -obj-$(CONFIG_GCC_PLUGIN_STACKLEAK) += stackleak.o -KASAN_SANITIZE_stackleak.o := n -KCSAN_SANITIZE_stackleak.o := n -KCOV_INSTRUMENT_stackleak.o := n +CFLAGS_kstack_erase.o += $(DISABLE_KSTACK_ERASE) +obj-$(CONFIG_KSTACK_ERASE) += kstack_erase.o +KASAN_SANITIZE_kstack_erase.o := n +KCSAN_SANITIZE_kstack_erase.o := n +KCOV_INSTRUMENT_kstack_erase.o := n obj-$(CONFIG_SCF_TORTURE_TEST) += scftorture.o diff --git a/kernel/fork.c b/kernel/fork.c index 1ee8eb11f38b..1ec66911f6f6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -93,7 +93,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/kstack_erase.c b/kernel/kstack_erase.c new file mode 100644 index 000000000000..201b846f8345 --- /dev/null +++ b/kernel/kstack_erase.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This code fills the used part of the kernel stack with a poison value + * before returning to userspace. It's part of the STACKLEAK feature + * ported from grsecurity/PaX. + * + * Author: Alexander Popov + * + * KSTACK_ERASE reduces the information which kernel stack leak bugs can + * reveal and blocks some uninitialized stack variable attacks. + */ + +#include +#include + +#ifdef CONFIG_KSTACK_ERASE_RUNTIME_DISABLE +#include +#include +#include +#include + +static DEFINE_STATIC_KEY_FALSE(stack_erasing_bypass); + +#ifdef CONFIG_SYSCTL +static int stack_erasing_sysctl(const struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret = 0; + int state = !static_branch_unlikely(&stack_erasing_bypass); + int prev_state = state; + struct ctl_table table_copy = *table; + + table_copy.data = &state; + ret = proc_dointvec_minmax(&table_copy, write, buffer, lenp, ppos); + state = !!state; + if (ret || !write || state == prev_state) + return ret; + + if (state) + static_branch_disable(&stack_erasing_bypass); + else + static_branch_enable(&stack_erasing_bypass); + + pr_warn("stackleak: kernel stack erasing is %s\n", + str_enabled_disabled(state)); + return ret; +} +static const struct ctl_table stackleak_sysctls[] = { + { + .procname = "stack_erasing", + .data = NULL, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = stack_erasing_sysctl, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +}; + +static int __init stackleak_sysctls_init(void) +{ + register_sysctl_init("kernel", stackleak_sysctls); + return 0; +} +late_initcall(stackleak_sysctls_init); +#endif /* CONFIG_SYSCTL */ + +#define skip_erasing() static_branch_unlikely(&stack_erasing_bypass) +#else +#define skip_erasing() false +#endif /* CONFIG_KSTACK_ERASE_RUNTIME_DISABLE */ + +#ifndef __stackleak_poison +static __always_inline void __stackleak_poison(unsigned long erase_low, + unsigned long erase_high, + unsigned long poison) +{ + while (erase_low < erase_high) { + *(unsigned long *)erase_low = poison; + erase_low += sizeof(unsigned long); + } +} +#endif + +static __always_inline void __stackleak_erase(bool on_task_stack) +{ + const unsigned long task_stack_low = stackleak_task_low_bound(current); + const unsigned long task_stack_high = stackleak_task_high_bound(current); + unsigned long erase_low, erase_high; + + erase_low = stackleak_find_top_of_poison(task_stack_low, + current->lowest_stack); + +#ifdef CONFIG_KSTACK_ERASE_METRICS + current->prev_lowest_stack = erase_low; +#endif + + /* + * Write poison to the task's stack between 'erase_low' and + * 'erase_high'. + * + * If we're running on a different stack (e.g. an entry trampoline + * stack) we can erase everything below the pt_regs at the top of the + * task stack. + * + * If we're running on the task stack itself, we must not clobber any + * stack used by this function and its caller. We assume that this + * function has a fixed-size stack frame, and the current stack pointer + * doesn't change while we write poison. + */ + if (on_task_stack) + erase_high = current_stack_pointer; + else + erase_high = task_stack_high; + + __stackleak_poison(erase_low, erase_high, KSTACK_ERASE_POISON); + + /* Reset the 'lowest_stack' value for the next syscall */ + current->lowest_stack = task_stack_high; +} + +/* + * Erase and poison the portion of the task stack used since the last erase. + * Can be called from the task stack or an entry stack when the task stack is + * no longer in use. + */ +asmlinkage void noinstr stackleak_erase(void) +{ + if (skip_erasing()) + return; + + __stackleak_erase(on_thread_stack()); +} + +/* + * Erase and poison the portion of the task stack used since the last erase. + * Can only be called from the task stack. + */ +asmlinkage void noinstr stackleak_erase_on_task_stack(void) +{ + if (skip_erasing()) + return; + + __stackleak_erase(true); +} + +/* + * Erase and poison the portion of the task stack used since the last erase. + * Can only be called from a stack other than the task stack. + */ +asmlinkage void noinstr stackleak_erase_off_task_stack(void) +{ + if (skip_erasing()) + return; + + __stackleak_erase(false); +} + +void __used __no_caller_saved_registers noinstr stackleak_track_stack(void) +{ + unsigned long sp = current_stack_pointer; + + /* + * Having CONFIG_KSTACK_ERASE_TRACK_MIN_SIZE larger than + * KSTACK_ERASE_SEARCH_DEPTH makes the poison search in + * stackleak_erase() unreliable. Let's prevent that. + */ + BUILD_BUG_ON(CONFIG_KSTACK_ERASE_TRACK_MIN_SIZE > KSTACK_ERASE_SEARCH_DEPTH); + + /* 'lowest_stack' should be aligned on the register width boundary */ + sp = ALIGN(sp, sizeof(unsigned long)); + if (sp < current->lowest_stack && + sp >= stackleak_task_low_bound(current)) { + current->lowest_stack = sp; + } +} +EXPORT_SYMBOL(stackleak_track_stack); diff --git a/kernel/stackleak.c b/kernel/stackleak.c deleted file mode 100644 index bb65321761b4..000000000000 --- a/kernel/stackleak.c +++ /dev/null @@ -1,177 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * This code fills the used part of the kernel stack with a poison value - * before returning to userspace. It's part of the STACKLEAK feature - * ported from grsecurity/PaX. - * - * Author: Alexander Popov - * - * STACKLEAK reduces the information which kernel stack leak bugs can - * reveal and blocks some uninitialized stack variable attacks. - */ - -#include -#include - -#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE -#include -#include -#include -#include - -static DEFINE_STATIC_KEY_FALSE(stack_erasing_bypass); - -#ifdef CONFIG_SYSCTL -static int stack_erasing_sysctl(const struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - int ret = 0; - int state = !static_branch_unlikely(&stack_erasing_bypass); - int prev_state = state; - struct ctl_table table_copy = *table; - - table_copy.data = &state; - ret = proc_dointvec_minmax(&table_copy, write, buffer, lenp, ppos); - state = !!state; - if (ret || !write || state == prev_state) - return ret; - - if (state) - static_branch_disable(&stack_erasing_bypass); - else - static_branch_enable(&stack_erasing_bypass); - - pr_warn("stackleak: kernel stack erasing is %s\n", - str_enabled_disabled(state)); - return ret; -} -static const struct ctl_table stackleak_sysctls[] = { - { - .procname = "stack_erasing", - .data = NULL, - .maxlen = sizeof(int), - .mode = 0600, - .proc_handler = stack_erasing_sysctl, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -}; - -static int __init stackleak_sysctls_init(void) -{ - register_sysctl_init("kernel", stackleak_sysctls); - return 0; -} -late_initcall(stackleak_sysctls_init); -#endif /* CONFIG_SYSCTL */ - -#define skip_erasing() static_branch_unlikely(&stack_erasing_bypass) -#else -#define skip_erasing() false -#endif /* CONFIG_STACKLEAK_RUNTIME_DISABLE */ - -#ifndef __stackleak_poison -static __always_inline void __stackleak_poison(unsigned long erase_low, - unsigned long erase_high, - unsigned long poison) -{ - while (erase_low < erase_high) { - *(unsigned long *)erase_low = poison; - erase_low += sizeof(unsigned long); - } -} -#endif - -static __always_inline void __stackleak_erase(bool on_task_stack) -{ - const unsigned long task_stack_low = stackleak_task_low_bound(current); - const unsigned long task_stack_high = stackleak_task_high_bound(current); - unsigned long erase_low, erase_high; - - erase_low = stackleak_find_top_of_poison(task_stack_low, - current->lowest_stack); - -#ifdef CONFIG_STACKLEAK_METRICS - current->prev_lowest_stack = erase_low; -#endif - - /* - * Write poison to the task's stack between 'erase_low' and - * 'erase_high'. - * - * If we're running on a different stack (e.g. an entry trampoline - * stack) we can erase everything below the pt_regs at the top of the - * task stack. - * - * If we're running on the task stack itself, we must not clobber any - * stack used by this function and its caller. We assume that this - * function has a fixed-size stack frame, and the current stack pointer - * doesn't change while we write poison. - */ - if (on_task_stack) - erase_high = current_stack_pointer; - else - erase_high = task_stack_high; - - __stackleak_poison(erase_low, erase_high, STACKLEAK_POISON); - - /* Reset the 'lowest_stack' value for the next syscall */ - current->lowest_stack = task_stack_high; -} - -/* - * Erase and poison the portion of the task stack used since the last erase. - * Can be called from the task stack or an entry stack when the task stack is - * no longer in use. - */ -asmlinkage void noinstr stackleak_erase(void) -{ - if (skip_erasing()) - return; - - __stackleak_erase(on_thread_stack()); -} - -/* - * Erase and poison the portion of the task stack used since the last erase. - * Can only be called from the task stack. - */ -asmlinkage void noinstr stackleak_erase_on_task_stack(void) -{ - if (skip_erasing()) - return; - - __stackleak_erase(true); -} - -/* - * Erase and poison the portion of the task stack used since the last erase. - * Can only be called from a stack other than the task stack. - */ -asmlinkage void noinstr stackleak_erase_off_task_stack(void) -{ - if (skip_erasing()) - return; - - __stackleak_erase(false); -} - -void __used __no_caller_saved_registers noinstr stackleak_track_stack(void) -{ - unsigned long sp = current_stack_pointer; - - /* - * Having CONFIG_STACKLEAK_TRACK_MIN_SIZE larger than - * STACKLEAK_SEARCH_DEPTH makes the poison search in - * stackleak_erase() unreliable. Let's prevent that. - */ - BUILD_BUG_ON(CONFIG_STACKLEAK_TRACK_MIN_SIZE > STACKLEAK_SEARCH_DEPTH); - - /* 'lowest_stack' should be aligned on the register width boundary */ - sp = ALIGN(sp, sizeof(unsigned long)); - if (sp < current->lowest_stack && - sp >= stackleak_task_low_bound(current)) { - current->lowest_stack = sp; - } -} -EXPORT_SYMBOL(stackleak_track_stack); diff --git a/lib/Makefile b/lib/Makefile index c38582f187dd..632e69d25feb 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -337,7 +337,7 @@ obj-$(CONFIG_UBSAN) += ubsan.o UBSAN_SANITIZE_ubsan.o := n KASAN_SANITIZE_ubsan.o := n KCSAN_SANITIZE_ubsan.o := n -CFLAGS_ubsan.o := -fno-stack-protector $(DISABLE_STACKLEAK_PLUGIN) +CFLAGS_ubsan.o := -fno-stack-protector $(DISABLE_KSTACK_ERASE) obj-$(CONFIG_SBITMAP) += sbitmap.o diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins index 435ab3f0ec44..28b8867c4e84 100644 --- a/scripts/Makefile.gcc-plugins +++ b/scripts/Makefile.gcc-plugins @@ -12,15 +12,15 @@ gcc-plugin-$(CONFIG_GCC_PLUGIN_STACKLEAK) += stackleak_plugin.so gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STACKLEAK) \ += -DSTACKLEAK_PLUGIN gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STACKLEAK) \ - += -fplugin-arg-stackleak_plugin-track-min-size=$(CONFIG_STACKLEAK_TRACK_MIN_SIZE) + += -fplugin-arg-stackleak_plugin-track-min-size=$(CONFIG_KSTACK_ERASE_TRACK_MIN_SIZE) gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STACKLEAK) \ += -fplugin-arg-stackleak_plugin-arch=$(SRCARCH) gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STACKLEAK_VERBOSE) \ += -fplugin-arg-stackleak_plugin-verbose ifdef CONFIG_GCC_PLUGIN_STACKLEAK - DISABLE_STACKLEAK_PLUGIN += -fplugin-arg-stackleak_plugin-disable + DISABLE_KSTACK_ERASE += -fplugin-arg-stackleak_plugin-disable endif -export DISABLE_STACKLEAK_PLUGIN +export DISABLE_KSTACK_ERASE # All the plugin CFLAGS are collected here in case a build target needs to # filter them out of the KBUILD_CFLAGS. diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index fd1238753cad..125b35e2ef0f 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -82,10 +82,10 @@ choice endchoice -config GCC_PLUGIN_STACKLEAK +config KSTACK_ERASE bool "Poison kernel stack before returning from syscalls" + depends on HAVE_ARCH_KSTACK_ERASE depends on GCC_PLUGINS - depends on HAVE_ARCH_STACKLEAK help This option makes the kernel erase the kernel stack before returning from system calls. This has the effect of leaving @@ -103,6 +103,10 @@ config GCC_PLUGIN_STACKLEAK are advised to test this feature on your expected workload before deploying it. +config GCC_PLUGIN_STACKLEAK + def_bool KSTACK_ERASE + depends on GCC_PLUGINS + help This plugin was ported from grsecurity/PaX. More information at: * https://grsecurity.net/ * https://pax.grsecurity.net/ @@ -117,37 +121,37 @@ config GCC_PLUGIN_STACKLEAK_VERBOSE instrumented. This is useful for comparing coverage between builds. -config STACKLEAK_TRACK_MIN_SIZE - int "Minimum stack frame size of functions tracked by STACKLEAK" +config KSTACK_ERASE_TRACK_MIN_SIZE + int "Minimum stack frame size of functions tracked by KSTACK_ERASE" default 100 range 0 4096 - depends on GCC_PLUGIN_STACKLEAK + depends on KSTACK_ERASE help - The STACKLEAK gcc plugin instruments the kernel code for tracking + The KSTACK_ERASE option instruments the kernel code for tracking the lowest border of the kernel stack (and for some other purposes). It inserts the stackleak_track_stack() call for the functions with a stack frame size greater than or equal to this parameter. If unsure, leave the default value 100. -config STACKLEAK_METRICS - bool "Show STACKLEAK metrics in the /proc file system" - depends on GCC_PLUGIN_STACKLEAK +config KSTACK_ERASE_METRICS + bool "Show KSTACK_ERASE metrics in the /proc file system" + depends on KSTACK_ERASE depends on PROC_FS help - If this is set, STACKLEAK metrics for every task are available in - the /proc file system. In particular, /proc//stack_depth + If this is set, KSTACK_ERASE metrics for every task are available + in the /proc file system. In particular, /proc//stack_depth shows the maximum kernel stack consumption for the current and previous syscalls. Although this information is not precise, it - can be useful for estimating the STACKLEAK performance impact for - your workloads. + can be useful for estimating the KSTACK_ERASE performance impact + for your workloads. -config STACKLEAK_RUNTIME_DISABLE +config KSTACK_ERASE_RUNTIME_DISABLE bool "Allow runtime disabling of kernel stack erasing" - depends on GCC_PLUGIN_STACKLEAK + depends on KSTACK_ERASE help This option provides 'stack_erasing' sysctl, which can be used in runtime to control kernel stack erasing for kernels built with - CONFIG_GCC_PLUGIN_STACKLEAK. + CONFIG_KSTACK_ERASE. config INIT_ON_ALLOC_DEFAULT_ON bool "Enable heap memory zeroing on allocation by default" diff --git a/tools/objtool/check.c b/tools/objtool/check.c index f23bdda737aa..5451bdbcf84a 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1192,7 +1192,7 @@ static const char *uaccess_safe_builtin[] = { "__ubsan_handle_type_mismatch_v1", "__ubsan_handle_shift_out_of_bounds", "__ubsan_handle_load_invalid_value", - /* STACKLEAK */ + /* KSTACK_ERASE */ "stackleak_track_stack", /* TRACE_BRANCH_PROFILING */ "ftrace_likely_update", diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config index 7afe05e8c4d7..bd09fdaf53e0 100644 --- a/tools/testing/selftests/lkdtm/config +++ b/tools/testing/selftests/lkdtm/config @@ -2,7 +2,7 @@ CONFIG_LKDTM=y CONFIG_DEBUG_LIST=y CONFIG_SLAB_FREELIST_HARDENED=y CONFIG_FORTIFY_SOURCE=y -CONFIG_GCC_PLUGIN_STACKLEAK=y +CONFIG_KSTACK_ERASE=y CONFIG_HARDENED_USERCOPY=y CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y CONFIG_INIT_ON_FREE_DEFAULT_ON=y -- cgit v1.2.3 From b5cebb5de9a8fce3f6e6451f6db8e5608c7f2261 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 18 Jul 2025 19:33:16 +0100 Subject: kselftest/arm64: Allow sve-ptrace to run on SME only systems Currently the sve-ptrace test program only runs if the system supports SVE but since SME includes streaming SVE the tests it offers are valid even on a system that only supports SME. Since the tests already have individual hwcap checks just remove the top level test and rely on those. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250718-arm64-sve-ptrace-sme-only-v1-1-2a1121e51b1d@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/sve-ptrace.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index 7f9b6a61d369..b22303778fb0 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -753,9 +753,6 @@ int main(void) ksft_print_header(); ksft_set_plan(EXPECTED_TESTS); - if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) - ksft_exit_skip("SVE not available\n"); - child = fork(); if (!child) return do_child(); -- cgit v1.2.3 From b84d2b27954f83c95c4b984d4f85574e8f51caa5 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 18 Jul 2025 23:03:26 +0100 Subject: kselftest/arm64: Test FPSIMD format data writes via NT_ARM_SVE in fp-ptrace The NT_ARM_SVE register set supports two data formats, the native SVE one and an alternative format where we embed a copy of user_fpsimd_data as used for NT_PRFPREG in the SVE register set. The register data is set as for a write to NT_PRFPREG and changes in vector length and streaming mode are handled as for any NT_ARM_SVE write. This has not previously been tested by fp-ptrace, add coverage of it. We do not support writes in FPSIMD format for NT_ARM_SSVE so we skip the test for anything that would leave us in streaming mode. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250718-arm64-fp-ptrace-sve-fpsimd-v1-1-7ecda32aa297@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/fp-ptrace.c | 66 +++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c index 191c47ca0ed8..c479c97dea1a 100644 --- a/tools/testing/selftests/arm64/fp/fp-ptrace.c +++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c @@ -1066,6 +1066,23 @@ static bool sve_write_supported(struct test_config *config) return true; } +static bool sve_write_fpsimd_supported(struct test_config *config) +{ + if (!sve_supported()) + return false; + + if ((config->svcr_in & SVCR_ZA) != (config->svcr_expected & SVCR_ZA)) + return false; + + if (config->svcr_expected & SVCR_SM) + return false; + + if (config->sme_vl_in != config->sme_vl_expected) + return false; + + return true; +} + static void fpsimd_write_expected(struct test_config *config) { int vl; @@ -1152,7 +1169,7 @@ static void sve_write_expected(struct test_config *config) } } -static void sve_write(pid_t child, struct test_config *config) +static void sve_write_sve(pid_t child, struct test_config *config) { struct user_sve_header *sve; struct iovec iov; @@ -1195,6 +1212,45 @@ static void sve_write(pid_t child, struct test_config *config) free(iov.iov_base); } +static void sve_write_fpsimd(pid_t child, struct test_config *config) +{ + struct user_sve_header *sve; + struct user_fpsimd_state *fpsimd; + struct iovec iov; + int ret, vl, vq; + + vl = vl_expected(config); + vq = __sve_vq_from_vl(vl); + + if (!vl) + return; + + iov.iov_len = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, + SVE_PT_REGS_FPSIMD); + iov.iov_base = malloc(iov.iov_len); + if (!iov.iov_base) { + ksft_print_msg("Failed allocating %lu byte SVE write buffer\n", + iov.iov_len); + return; + } + memset(iov.iov_base, 0, iov.iov_len); + + sve = iov.iov_base; + sve->size = iov.iov_len; + sve->flags = SVE_PT_REGS_FPSIMD; + sve->vl = vl; + + fpsimd = iov.iov_base + SVE_PT_REGS_OFFSET; + memcpy(&fpsimd->vregs, v_expected, sizeof(v_expected)); + + ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_SVE, &iov); + if (ret != 0) + ksft_print_msg("Failed to write SVE: %s (%d)\n", + strerror(errno), errno); + + free(iov.iov_base); +} + static bool za_write_supported(struct test_config *config) { if ((config->svcr_in & SVCR_SM) != (config->svcr_expected & SVCR_SM)) @@ -1386,7 +1442,13 @@ static struct test_definition sve_test_defs[] = { .name = "SVE write", .supported = sve_write_supported, .set_expected_values = sve_write_expected, - .modify_values = sve_write, + .modify_values = sve_write_sve, + }, + { + .name = "SVE write FPSIMD format", + .supported = sve_write_fpsimd_supported, + .set_expected_values = fpsimd_write_expected, + .modify_values = sve_write_fpsimd, }, }; -- cgit v1.2.3 From b021f45d39f37f67005948a96abea84736890463 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 18 Jul 2025 23:14:50 +0100 Subject: kselftest/arm64: Test SME on SME only systems in fp-ptrace When checking that the vector extensions are supported fp-ptrace currently only checks for SVE being supported which means that we get into a confused half configured state for SME only systems. Check for SME as well. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250718-arm64-fp-ptrace-sme-only-v1-1-3b96dd19a503@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/fp-ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c index c479c97dea1a..52d4d8fa2958 100644 --- a/tools/testing/selftests/arm64/fp/fp-ptrace.c +++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c @@ -1669,7 +1669,7 @@ int main(void) * Run the test set if there is no SVE or SME, with those we * have to pick a VL for each run. */ - if (!sve_supported()) { + if (!sve_supported() && !sme_supported()) { test_config.sve_vl_in = 0; test_config.sve_vl_expected = 0; test_config.sme_vl_in = 0; -- cgit v1.2.3 From aa7d3c8bc27d32dec940c924d6d270fa312e731f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 18 Jul 2025 23:14:51 +0100 Subject: kselftest/arm64: Fix SVE write data generation for SME only systems fp-ptrace does not handle SME only systems correctly when generating data, on SME only systems scenarios where we are not in streaming mode will not have an expected vector length. This leads to attempts to do memcpy()s of zero byte arrays which can crash, fix this by skipping generation of SVE data for cases where we do not expect to have an active vector length. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250718-arm64-fp-ptrace-sme-only-v1-2-3b96dd19a503@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/fp-ptrace.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c index 52d4d8fa2958..a709485cf553 100644 --- a/tools/testing/selftests/arm64/fp/fp-ptrace.c +++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c @@ -1151,6 +1151,9 @@ static void sve_write_expected(struct test_config *config) int vl = vl_expected(config); int sme_vq = __sve_vq_from_vl(config->sme_vl_expected); + if (!vl) + return; + fill_random(z_expected, __SVE_ZREGS_SIZE(__sve_vq_from_vl(vl))); fill_random(p_expected, __SVE_PREGS_SIZE(__sve_vq_from_vl(vl))); @@ -1178,6 +1181,9 @@ static void sve_write_sve(pid_t child, struct test_config *config) vl = vl_expected(config); vq = __sve_vq_from_vl(vl); + if (!vl) + return; + iov.iov_len = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE); iov.iov_base = malloc(iov.iov_len); if (!iov.iov_base) { -- cgit v1.2.3 From 4752dcc156f2090143296ff45f8e35c8ec3e1730 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 18 Jul 2025 23:14:52 +0100 Subject: kselftest/arm64: Handle attempts to disable SM on SME only systems The ABI for disabling streaming mode via ptrace is to do a write via the SVE register set. Following the recent round of fixes to the ptrace code we don't support this operation on systems without SVE, which is detected as failures by fp-ptrace. Update the program so that it knows that this operation is not currently supported. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250718-arm64-fp-ptrace-sme-only-v1-3-3b96dd19a503@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/fp-ptrace.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c index a709485cf553..124bc883365e 100644 --- a/tools/testing/selftests/arm64/fp/fp-ptrace.c +++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c @@ -1061,6 +1061,9 @@ static bool sve_write_supported(struct test_config *config) if (config->sme_vl_in != config->sme_vl_expected) { return false; } + + if (!sve_supported()) + return false; } return true; -- cgit v1.2.3 From 04850819c65c8242072818655d4341e70ae998b5 Mon Sep 17 00:00:00 2001 From: Cynthia Huang Date: Thu, 10 Jul 2025 18:36:30 +0800 Subject: selftests/futex: Define SYS_futex on 32-bit architectures with 64-bit time_t The kernel does not provide sys_futex() on 32-bit architectures that do not support 32-bit time representations, such as riscv32. As a result, glibc cannot define SYS_futex, causing compilation failures in tests that rely on this syscall. Define SYS_futex as SYS_futex_time64 in such cases to ensure successful compilation and compatibility. Signed-off-by: Cynthia Huang Signed-off-by: Ben Zong-You Xie Signed-off-by: Thomas Gleixner Reviewed-by: Muhammad Usama Anjum Link: https://lore.kernel.org/all/20250710103630.3156130-1-ben717@andestech.com --- tools/testing/selftests/futex/include/futextest.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h index ddbcfc9b7bac..7a5fd1d5355e 100644 --- a/tools/testing/selftests/futex/include/futextest.h +++ b/tools/testing/selftests/futex/include/futextest.h @@ -47,6 +47,17 @@ typedef volatile u_int32_t futex_t; FUTEX_PRIVATE_FLAG) #endif +/* + * SYS_futex is expected from system C library, in glibc some 32-bit + * architectures (e.g. RV32) are using 64-bit time_t, therefore it doesn't have + * SYS_futex defined but just SYS_futex_time64. Define SYS_futex as + * SYS_futex_time64 in this situation to ensure the compilation and the + * compatibility. + */ +#if !defined(SYS_futex) && defined(SYS_futex_time64) +#define SYS_futex SYS_futex_time64 +#endif + /** * futex() - SYS_futex syscall wrapper * @uaddr: address of first futex -- cgit v1.2.3 From e40892214b454c8734350d82374f46c2e495a4d2 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 15 Jul 2025 14:06:26 +0100 Subject: selftests/futex: Fix spelling mistake "Succeffuly" -> "Successfully" There is a spelling mistake in a ksft_exit_fail_msg() message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250715130627.1907017-1-colin.i.king@gmail.com --- tools/testing/selftests/futex/functional/futex_priv_hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c index a9cedc365102..aea001ac4946 100644 --- a/tools/testing/selftests/futex/functional/futex_priv_hash.c +++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c @@ -122,7 +122,7 @@ static void futex_dummy_op(void) } ret = pthread_mutex_timedlock(&lock, &timeout); if (ret == 0) - ksft_exit_fail_msg("Succeffuly locked an already locked mutex.\n"); + ksft_exit_fail_msg("Successfully locked an already locked mutex.\n"); if (ret != ETIMEDOUT) ksft_exit_fail_msg("pthread_mutex_timedlock() did not timeout: %d.\n", ret); -- cgit v1.2.3 From 661e9cd196598c7d2502260ebbe60970546cca35 Mon Sep 17 00:00:00 2001 From: Moon Hee Lee Date: Wed, 2 Jul 2025 10:17:05 -0700 Subject: selftests/kexec: fix test_kexec_jump build The test_kexec_jump program builds correctly when invoked from the top-level selftests/Makefile, which explicitly sets the OUTPUT variable. However, building directly in tools/testing/selftests/kexec fails with: make: *** No rule to make target '/test_kexec_jump', needed by 'test_kexec_jump.sh'. Stop. This failure occurs because the Makefile rule relies on $(OUTPUT), which is undefined in direct builds. Fix this by listing test_kexec_jump in TEST_GEN_PROGS, the standard way to declare generated test binaries in the kselftest framework. This ensures the binary is built regardless of invocation context and properly removed by make clean. Link: https://lore.kernel.org/r/20250702171704.22559-2-moonhee.lee.ca@gmail.com Acked-by: Shuah Khan Signed-off-by: Moon Hee Lee Acked-by: Baoquan He Acked-by: David Woodhouse Signed-off-by: Shuah Khan --- tools/testing/selftests/kexec/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kexec/Makefile b/tools/testing/selftests/kexec/Makefile index e3000ccb9a5d..874cfdd3b75b 100644 --- a/tools/testing/selftests/kexec/Makefile +++ b/tools/testing/selftests/kexec/Makefile @@ -12,7 +12,7 @@ include ../../../scripts/Makefile.arch ifeq ($(IS_64_BIT)$(ARCH_PROCESSED),1x86) TEST_PROGS += test_kexec_jump.sh -test_kexec_jump.sh: $(OUTPUT)/test_kexec_jump +TEST_GEN_PROGS := test_kexec_jump endif include ../lib.mk -- cgit v1.2.3 From 1cbcb1b28b26a528b1c1cf1eefb5d5c5659967dd Mon Sep 17 00:00:00 2001 From: Mohsin Bashir Date: Sat, 19 Jul 2025 01:30:56 -0700 Subject: selftests: drv-net: Test XDP_PASS/DROP support Test XDP_PASS/DROP in single buffer and multi buffer mode when XDP native support is available. ./drivers/net/xdp.py TAP version 13 1..4 ok 1 xdp.test_xdp_native_pass_sb ok 2 xdp.test_xdp_native_pass_mb ok 3 xdp.test_xdp_native_drop_sb ok 4 xdp.test_xdp_native_drop_mb \# Totals: pass:4 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Mohsin Bashir Link: https://patch.msgid.link/20250719083059.3209169-3-mohsin.bashr@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/Makefile | 1 + tools/testing/selftests/drivers/net/xdp.py | 303 +++++++++++++++++++++++ tools/testing/selftests/net/lib/xdp_native.bpf.c | 158 ++++++++++++ 3 files changed, 462 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/xdp.py create mode 100644 tools/testing/selftests/net/lib/xdp_native.bpf.c (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 9bd84d6b542e..3556f3563e08 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -22,6 +22,7 @@ TEST_PROGS := \ stats.py \ shaper.py \ hds.py \ + xdp.py \ # end of TEST_PROGS include ../../lib.mk diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py new file mode 100755 index 000000000000..8ab1607edcd8 --- /dev/null +++ b/tools/testing/selftests/drivers/net/xdp.py @@ -0,0 +1,303 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +This file contains tests to verify native XDP support in network drivers. +The tests utilize the BPF program `xdp_native.bpf.o` from the `selftests.net.lib` +directory, with each test focusing on a specific aspect of XDP functionality. +""" +import random +import string +from dataclasses import dataclass +from enum import Enum + +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ne +from lib.py import KsftFailEx, NetDrvEpEnv +from lib.py import bkg, cmd, rand_port +from lib.py import ip, bpftool, defer + + +class TestConfig(Enum): + """Enum for XDP configuration options.""" + MODE = 0 # Configures the BPF program for a specific test + PORT = 1 # Port configuration to communicate with the remote host + + +class XDPAction(Enum): + """Enum for XDP actions.""" + PASS = 0 # Pass the packet up to the stack + DROP = 1 # Drop the packet + + +class XDPStats(Enum): + """Enum for XDP statistics.""" + RX = 0 # Count of valid packets received for testing + PASS = 1 # Count of packets passed up to the stack + DROP = 2 # Count of packets dropped + + +@dataclass +class BPFProgInfo: + """Data class to store information about a BPF program.""" + name: str # Name of the BPF program + file: str # BPF program object file + xdp_sec: str = "xdp" # XDP section name (e.g., "xdp" or "xdp.frags") + mtu: int = 1500 # Maximum Transmission Unit, default is 1500 + + +def _exchg_udp(cfg, port, test_string): + """ + Exchanges UDP packets between a local and remote host using the socat tool. + + Args: + cfg: Configuration object containing network settings. + port: Port number to use for the UDP communication. + test_string: String that the remote host will send. + + Returns: + The string received by the test host. + """ + cfg.require_cmd("socat", remote=True) + + rx_udp_cmd = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport STDOUT" + tx_udp_cmd = f"echo -n {test_string} | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port}" + + with bkg(rx_udp_cmd, exit_wait=True) as nc: + cmd(tx_udp_cmd, host=cfg.remote, shell=True) + + return nc.stdout.strip() + + +def _test_udp(cfg, port, size=256): + """ + Tests UDP packet exchange between a local and remote host. + + Args: + cfg: Configuration object containing network settings. + port: Port number to use for the UDP communication. + size: The length of the test string to be exchanged, default is 256 characters. + + Returns: + bool: True if the received string matches the sent string, False otherwise. + """ + test_str = "".join(random.choice(string.ascii_lowercase) for _ in range(size)) + recvd_str = _exchg_udp(cfg, port, test_str) + + return recvd_str == test_str + + +def _load_xdp_prog(cfg, bpf_info): + """ + Loads an XDP program onto a network interface. + + Args: + cfg: Configuration object containing network settings. + bpf_info: BPFProgInfo object containing information about the BPF program. + + Returns: + dict: A dictionary containing the XDP program ID, name, and associated map IDs. + """ + abs_path = cfg.net_lib_dir / bpf_info.file + prog_info = {} + + cmd(f"ip link set dev {cfg.remote_ifname} mtu {bpf_info.mtu}", shell=True, host=cfg.remote) + defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote) + + cmd( + f"ip link set dev {cfg.ifname} mtu {bpf_info.mtu} xdp obj {abs_path} sec {bpf_info.xdp_sec}", + shell=True + ) + defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") + + xdp_info = ip(f"-d link show dev {cfg.ifname}", json=True)[0] + prog_info["id"] = xdp_info["xdp"]["prog"]["id"] + prog_info["name"] = xdp_info["xdp"]["prog"]["name"] + prog_id = prog_info["id"] + + map_ids = bpftool(f"prog show id {prog_id}", json=True)["map_ids"] + prog_info["maps"] = {} + for map_id in map_ids: + name = bpftool(f"map show id {map_id}", json=True)["name"] + prog_info["maps"][name] = map_id + + return prog_info + + +def format_hex_bytes(value): + """ + Helper function that converts an integer into a formatted hexadecimal byte string. + + Args: + value: An integer representing the number to be converted. + + Returns: + A string representing hexadecimal equivalent of value, with bytes separated by spaces. + """ + hex_str = value.to_bytes(4, byteorder='little', signed=True) + return ' '.join(f'{byte:02x}' for byte in hex_str) + + +def _set_xdp_map(map_name, key, value): + """ + Updates an XDP map with a given key-value pair using bpftool. + + Args: + map_name: The name of the XDP map to update. + key: The key to update in the map, formatted as a hexadecimal string. + value: The value to associate with the key, formatted as a hexadecimal string. + """ + key_formatted = format_hex_bytes(key) + value_formatted = format_hex_bytes(value) + bpftool( + f"map update name {map_name} key hex {key_formatted} value hex {value_formatted}" + ) + + +def _get_stats(xdp_map_id): + """ + Retrieves and formats statistics from an XDP map. + + Args: + xdp_map_id: The ID of the XDP map from which to retrieve statistics. + + Returns: + A dictionary containing formatted packet statistics for various XDP actions. + The keys are based on the XDPStats Enum values. + + Raises: + KsftFailEx: If the stats retrieval fails. + """ + stats_dump = bpftool(f"map dump id {xdp_map_id}", json=True) + if not stats_dump: + raise KsftFailEx(f"Failed to get stats for map {xdp_map_id}") + + stats_formatted = {} + for key in range(0, 4): + val = stats_dump[key]["formatted"]["value"] + if stats_dump[key]["formatted"]["key"] == XDPStats.RX.value: + stats_formatted[XDPStats.RX.value] = val + elif stats_dump[key]["formatted"]["key"] == XDPStats.PASS.value: + stats_formatted[XDPStats.PASS.value] = val + elif stats_dump[key]["formatted"]["key"] == XDPStats.DROP.value: + stats_formatted[XDPStats.DROP.value] = val + + return stats_formatted + + +def _test_pass(cfg, bpf_info, msg_sz): + """ + Tests the XDP_PASS action by exchanging UDP packets. + + Args: + cfg: Configuration object containing network settings. + bpf_info: BPFProgInfo object containing information about the BPF program. + msg_sz: Size of the test message to send. + """ + + prog_info = _load_xdp_prog(cfg, bpf_info) + port = rand_port() + + _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.PASS.value) + _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + + ksft_eq(_test_udp(cfg, port, msg_sz), True, "UDP packet exchange failed") + stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) + + ksft_ne(stats[XDPStats.RX.value], 0, "RX stats should not be zero") + ksft_eq(stats[XDPStats.RX.value], stats[XDPStats.PASS.value], "RX and PASS stats mismatch") + + +def test_xdp_native_pass_sb(cfg): + """ + Tests the XDP_PASS action for single buffer case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500) + + _test_pass(cfg, bpf_info, 256) + + +def test_xdp_native_pass_mb(cfg): + """ + Tests the XDP_PASS action for a multi-buff size. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000) + + _test_pass(cfg, bpf_info, 8000) + + +def _test_drop(cfg, bpf_info, msg_sz): + """ + Tests the XDP_DROP action by exchanging UDP packets. + + Args: + cfg: Configuration object containing network settings. + bpf_info: BPFProgInfo object containing information about the BPF program. + msg_sz: Size of the test message to send. + """ + + prog_info = _load_xdp_prog(cfg, bpf_info) + port = rand_port() + + _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.DROP.value) + _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + + ksft_eq(_test_udp(cfg, port, msg_sz), False, "UDP packet exchange should fail") + stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) + + ksft_ne(stats[XDPStats.RX.value], 0, "RX stats should be zero") + ksft_eq(stats[XDPStats.RX.value], stats[XDPStats.DROP.value], "RX and DROP stats mismatch") + + +def test_xdp_native_drop_sb(cfg): + """ + Tests the XDP_DROP action for a signle-buff case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500) + + _test_drop(cfg, bpf_info, 256) + + +def test_xdp_native_drop_mb(cfg): + """ + Tests the XDP_DROP action for a multi-buff case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000) + + _test_drop(cfg, bpf_info, 8000) + + +def main(): + """ + Main function to execute the XDP tests. + + This function runs a series of tests to validate the XDP support for + both the single and multi-buffer. It uses the NetDrvEpEnv context + manager to manage the network driver environment and the ksft_run + function to execute the tests. + """ + with NetDrvEpEnv(__file__) as cfg: + ksft_run( + [ + test_xdp_native_pass_sb, + test_xdp_native_pass_mb, + test_xdp_native_drop_sb, + test_xdp_native_drop_mb, + ], + args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c new file mode 100644 index 000000000000..90b34b2a4fef --- /dev/null +++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +enum { + XDP_MODE = 0, + XDP_PORT = 1, +} xdp_map_setup_keys; + +enum { + XDP_MODE_PASS = 0, + XDP_MODE_DROP = 1, +} xdp_map_modes; + +enum { + STATS_RX = 0, + STATS_PASS = 1, + STATS_DROP = 2, +} xdp_stats; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 2); + __type(key, __u32); + __type(value, __s32); +} map_xdp_setup SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 4); + __type(key, __u32); + __type(value, __u64); +} map_xdp_stats SEC(".maps"); + +static void record_stats(struct xdp_md *ctx, __u32 stat_type) +{ + __u64 *count; + + count = bpf_map_lookup_elem(&map_xdp_stats, &stat_type); + + if (count) + __sync_fetch_and_add(count, 1); +} + +static struct udphdr *filter_udphdr(struct xdp_md *ctx, __u16 port) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct udphdr *udph = NULL; + struct ethhdr *eth = data; + + if (data + sizeof(*eth) > data_end) + return NULL; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = data + sizeof(*eth); + + if (iph + 1 > (struct iphdr *)data_end || + iph->protocol != IPPROTO_UDP) + return NULL; + + udph = (void *)eth + sizeof(*iph) + sizeof(*eth); + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ipv6h = data + sizeof(*eth); + + if (ipv6h + 1 > (struct ipv6hdr *)data_end || + ipv6h->nexthdr != IPPROTO_UDP) + return NULL; + + udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth); + } else { + return NULL; + } + + if (udph + 1 > (struct udphdr *)data_end) + return NULL; + + if (udph->dest != bpf_htons(port)) + return NULL; + + record_stats(ctx, STATS_RX); + + return udph; +} + +static int xdp_mode_pass(struct xdp_md *ctx, __u16 port) +{ + struct udphdr *udph = NULL; + + udph = filter_udphdr(ctx, port); + if (!udph) + return XDP_PASS; + + record_stats(ctx, STATS_PASS); + + return XDP_PASS; +} + +static int xdp_mode_drop_handler(struct xdp_md *ctx, __u16 port) +{ + struct udphdr *udph = NULL; + + udph = filter_udphdr(ctx, port); + if (!udph) + return XDP_PASS; + + record_stats(ctx, STATS_DROP); + + return XDP_DROP; +} + +static int xdp_prog_common(struct xdp_md *ctx) +{ + __u32 key, *port; + __s32 *mode; + + key = XDP_MODE; + mode = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!mode) + return XDP_PASS; + + key = XDP_PORT; + port = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!port) + return XDP_PASS; + + switch (*mode) { + case XDP_MODE_PASS: + return xdp_mode_pass(ctx, (__u16)(*port)); + case XDP_MODE_DROP: + return xdp_mode_drop_handler(ctx, (__u16)(*port)); + } + + /* Default action is to simple pass */ + return XDP_PASS; +} + +SEC("xdp") +int xdp_prog(struct xdp_md *ctx) +{ + return xdp_prog_common(ctx); +} + +SEC("xdp.frags") +int xdp_prog_frags(struct xdp_md *ctx) +{ + return xdp_prog_common(ctx); +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 6713945726ce6859aac9cfe0f37ba641471f9235 Mon Sep 17 00:00:00 2001 From: Mohsin Bashir Date: Sat, 19 Jul 2025 01:30:57 -0700 Subject: selftests: drv-net: Test XDP_TX support Add test to verify the XDP_TX functionality by generating traffic from a remote node on a specific UDP port and redirecting it back to the sender. ./drivers/net/xdp.py TAP version 13 1..5 ok 1 xdp.test_xdp_native_pass_sb ok 2 xdp.test_xdp_native_pass_mb ok 3 xdp.test_xdp_native_drop_sb ok 4 xdp.test_xdp_native_drop_mb ok 5 xdp.test_xdp_native_tx_mb \# Totals: pass:5 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Mohsin Bashir Link: https://patch.msgid.link/20250719083059.3209169-4-mohsin.bashr@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/xdp.py | 34 ++++++++++ tools/testing/selftests/net/lib/xdp_native.bpf.c | 80 ++++++++++++++++++++++++ 2 files changed, 114 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py index 8ab1607edcd8..d42dbee78431 100755 --- a/tools/testing/selftests/drivers/net/xdp.py +++ b/tools/testing/selftests/drivers/net/xdp.py @@ -27,6 +27,7 @@ class XDPAction(Enum): """Enum for XDP actions.""" PASS = 0 # Pass the packet up to the stack DROP = 1 # Drop the packet + TX = 2 # Route the packet to the remote host class XDPStats(Enum): @@ -34,6 +35,7 @@ class XDPStats(Enum): RX = 0 # Count of valid packets received for testing PASS = 1 # Count of packets passed up to the stack DROP = 2 # Count of packets dropped + TX = 3 # Count of incoming packets routed to the remote host @dataclass @@ -180,6 +182,8 @@ def _get_stats(xdp_map_id): stats_formatted[XDPStats.PASS.value] = val elif stats_dump[key]["formatted"]["key"] == XDPStats.DROP.value: stats_formatted[XDPStats.DROP.value] = val + elif stats_dump[key]["formatted"]["key"] == XDPStats.TX.value: + stats_formatted[XDPStats.TX.value] = val return stats_formatted @@ -278,6 +282,35 @@ def test_xdp_native_drop_mb(cfg): _test_drop(cfg, bpf_info, 8000) +def test_xdp_native_tx_mb(cfg): + """ + Tests the XDP_TX action for a multi-buff case. + + Args: + cfg: Configuration object containing network settings. + """ + cfg.require_cmd("socat", remote=True) + + bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000) + prog_info = _load_xdp_prog(cfg, bpf_info) + port = rand_port() + + _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TX.value) + _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + + test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(8000)) + rx_udp = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport STDOUT" + tx_udp = f"echo {test_string} | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port}" + + with bkg(rx_udp, host=cfg.remote, exit_wait=True) as rnc: + cmd(tx_udp, host=cfg.remote, shell=True) + + stats = _get_stats(prog_info['maps']['map_xdp_stats']) + + ksft_eq(rnc.stdout.strip(), test_string, "UDP packet exchange failed") + ksft_eq(stats[XDPStats.TX.value], 1, "TX stats mismatch") + + def main(): """ Main function to execute the XDP tests. @@ -294,6 +327,7 @@ def main(): test_xdp_native_pass_mb, test_xdp_native_drop_sb, test_xdp_native_drop_mb, + test_xdp_native_tx_mb, ], args=(cfg,)) ksft_exit() diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c index 90b34b2a4fef..d3c66c891589 100644 --- a/tools/testing/selftests/net/lib/xdp_native.bpf.c +++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c @@ -18,12 +18,14 @@ enum { enum { XDP_MODE_PASS = 0, XDP_MODE_DROP = 1, + XDP_MODE_TX = 2, } xdp_map_modes; enum { STATS_RX = 0, STATS_PASS = 1, STATS_DROP = 2, + STATS_TX = 3, } xdp_stats; struct { @@ -117,6 +119,82 @@ static int xdp_mode_drop_handler(struct xdp_md *ctx, __u16 port) return XDP_DROP; } +static void swap_machdr(void *data) +{ + struct ethhdr *eth = data; + __u8 tmp_mac[ETH_ALEN]; + + __builtin_memcpy(tmp_mac, eth->h_source, ETH_ALEN); + __builtin_memcpy(eth->h_source, eth->h_dest, ETH_ALEN); + __builtin_memcpy(eth->h_dest, tmp_mac, ETH_ALEN); +} + +static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct udphdr *udph = NULL; + struct ethhdr *eth = data; + + if (data + sizeof(*eth) > data_end) + return XDP_PASS; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = data + sizeof(*eth); + __be32 tmp_ip = iph->saddr; + + if (iph + 1 > (struct iphdr *)data_end || + iph->protocol != IPPROTO_UDP) + return XDP_PASS; + + udph = data + sizeof(*iph) + sizeof(*eth); + + if (udph + 1 > (struct udphdr *)data_end) + return XDP_PASS; + if (udph->dest != bpf_htons(port)) + return XDP_PASS; + + record_stats(ctx, STATS_RX); + swap_machdr((void *)eth); + + iph->saddr = iph->daddr; + iph->daddr = tmp_ip; + + record_stats(ctx, STATS_TX); + + return XDP_TX; + + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ipv6h = data + sizeof(*eth); + struct in6_addr tmp_ipv6; + + if (ipv6h + 1 > (struct ipv6hdr *)data_end || + ipv6h->nexthdr != IPPROTO_UDP) + return XDP_PASS; + + udph = data + sizeof(*ipv6h) + sizeof(*eth); + + if (udph + 1 > (struct udphdr *)data_end) + return XDP_PASS; + if (udph->dest != bpf_htons(port)) + return XDP_PASS; + + record_stats(ctx, STATS_RX); + swap_machdr((void *)eth); + + __builtin_memcpy(&tmp_ipv6, &ipv6h->saddr, sizeof(tmp_ipv6)); + __builtin_memcpy(&ipv6h->saddr, &ipv6h->daddr, + sizeof(tmp_ipv6)); + __builtin_memcpy(&ipv6h->daddr, &tmp_ipv6, sizeof(tmp_ipv6)); + + record_stats(ctx, STATS_TX); + + return XDP_TX; + } + + return XDP_PASS; +} + static int xdp_prog_common(struct xdp_md *ctx) { __u32 key, *port; @@ -137,6 +215,8 @@ static int xdp_prog_common(struct xdp_md *ctx) return xdp_mode_pass(ctx, (__u16)(*port)); case XDP_MODE_DROP: return xdp_mode_drop_handler(ctx, (__u16)(*port)); + case XDP_MODE_TX: + return xdp_mode_tx_handler(ctx, (__u16)(*port)); } /* Default action is to simple pass */ -- cgit v1.2.3 From 0b65cfcef9c5737eb2700aba20a93b8593f94c04 Mon Sep 17 00:00:00 2001 From: Mohsin Bashir Date: Sat, 19 Jul 2025 01:30:58 -0700 Subject: selftests: drv-net: Test tail-adjustment support Add test to validate support for the two cases of tail adjustment: 1) tail extension, and 2) tail shrinking across different frame sizes and offset values. For each of the two cases, test both the single and multi-buffer cases by choosing appropriate packet size. The negative offset value result in growing of tailroom (shrinking of payload) while the positive offset result in shrinking of tailroom (growing of payload). Since the support for tail adjustment varies across drivers, classify the test as pass if at least one combination of packet size and offset from a pre-selected list results in a successful run. In case of an unsuccessful run, report the failure and highlight the packet size and offset values that caused the test to fail, as well as the values that resulted in the last successful run. Note: The growing part of this test for netdevsim may appear flaky when the offset value is larger than 1. This behavior occurs because tailroom is not explicitly reserved for netdevsim, with 1 being the typical tailroom value. However, in certain cases, such as payload being the last in the page with additional available space, the truesize is expanded. This also result increases the tailroom causing the test to pass intermittently. In contrast, when tailrrom is explicitly reserved, such as in the of fbnic, the test results are deterministic. ./drivers/net/xdp.py TAP version 13 1..7 ok 1 xdp.test_xdp_native_pass_sb ok 2 xdp.test_xdp_native_pass_mb ok 3 xdp.test_xdp_native_drop_sb ok 4 xdp.test_xdp_native_drop_mb ok 5 xdp.test_xdp_native_tx_mb \# Failed run: ... successful run: ... offset 1. Reason: Adjustment failed ok 6 xdp.test_xdp_native_adjst_tail_grow_data ok 7 xdp.test_xdp_native_adjst_tail_shrnk_data \# Totals: pass:7 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Mohsin Bashir Link: https://patch.msgid.link/20250719083059.3209169-5-mohsin.bashr@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/xdp.py | 178 +++++++++++++++++++- tools/testing/selftests/net/lib/xdp_native.bpf.c | 206 ++++++++++++++++++++++- 2 files changed, 380 insertions(+), 4 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py index d42dbee78431..85b5db0a1121 100755 --- a/tools/testing/selftests/drivers/net/xdp.py +++ b/tools/testing/selftests/drivers/net/xdp.py @@ -11,7 +11,7 @@ import string from dataclasses import dataclass from enum import Enum -from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ne +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ne, ksft_pr from lib.py import KsftFailEx, NetDrvEpEnv from lib.py import bkg, cmd, rand_port from lib.py import ip, bpftool, defer @@ -21,6 +21,8 @@ class TestConfig(Enum): """Enum for XDP configuration options.""" MODE = 0 # Configures the BPF program for a specific test PORT = 1 # Port configuration to communicate with the remote host + ADJST_OFFSET = 2 # Tail/Head adjustment offset for extension/shrinking + ADJST_TAG = 3 # Adjustment tag to annotate the start and end of extension class XDPAction(Enum): @@ -28,6 +30,7 @@ class XDPAction(Enum): PASS = 0 # Pass the packet up to the stack DROP = 1 # Drop the packet TX = 2 # Route the packet to the remote host + TAIL_ADJST = 3 # Adjust the tail of the packet class XDPStats(Enum): @@ -36,6 +39,7 @@ class XDPStats(Enum): PASS = 1 # Count of packets passed up to the stack DROP = 2 # Count of packets dropped TX = 3 # Count of incoming packets routed to the remote host + ABORT = 4 # Count of packets that were aborted @dataclass @@ -174,7 +178,7 @@ def _get_stats(xdp_map_id): raise KsftFailEx(f"Failed to get stats for map {xdp_map_id}") stats_formatted = {} - for key in range(0, 4): + for key in range(0, 5): val = stats_dump[key]["formatted"]["value"] if stats_dump[key]["formatted"]["key"] == XDPStats.RX.value: stats_formatted[XDPStats.RX.value] = val @@ -184,6 +188,8 @@ def _get_stats(xdp_map_id): stats_formatted[XDPStats.DROP.value] = val elif stats_dump[key]["formatted"]["key"] == XDPStats.TX.value: stats_formatted[XDPStats.TX.value] = val + elif stats_dump[key]["formatted"]["key"] == XDPStats.ABORT.value: + stats_formatted[XDPStats.ABORT.value] = val return stats_formatted @@ -311,6 +317,172 @@ def test_xdp_native_tx_mb(cfg): ksft_eq(stats[XDPStats.TX.value], 1, "TX stats mismatch") +def _validate_res(res, offset_lst, pkt_sz_lst): + """ + Validates the result of a test. + + Args: + res: The result of the test, which should be a dictionary with a "status" key. + + Raises: + KsftFailEx: If the test fails to pass any combination of offset and packet size. + """ + if "status" not in res: + raise KsftFailEx("Missing 'status' key in result dictionary") + + # Validate that not a single case was successful + if res["status"] == "fail": + if res["offset"] == offset_lst[0] and res["pkt_sz"] == pkt_sz_lst[0]: + raise KsftFailEx(f"{res['reason']}") + + # Get the previous offset and packet size to report the successful run + tmp_idx = offset_lst.index(res["offset"]) + prev_offset = offset_lst[tmp_idx - 1] + if tmp_idx == 0: + tmp_idx = pkt_sz_lst.index(res["pkt_sz"]) + prev_pkt_sz = pkt_sz_lst[tmp_idx - 1] + else: + prev_pkt_sz = res["pkt_sz"] + + # Use these values for error reporting + ksft_pr( + f"Failed run: pkt_sz {res['pkt_sz']}, offset {res['offset']}. " + f"Last successful run: pkt_sz {prev_pkt_sz}, offset {prev_offset}. " + f"Reason: {res['reason']}" + ) + + +def _check_for_failures(recvd_str, stats): + """ + Checks for common failures while adjusting headroom or tailroom. + + Args: + recvd_str: The string received from the remote host after sending a test string. + stats: A dictionary containing formatted packet statistics for various XDP actions. + + Returns: + str: A string describing the failure reason if a failure is detected, otherwise None. + """ + + # Any adjustment failure result in an abort hence, we track this counter + if stats[XDPStats.ABORT.value] != 0: + return "Adjustment failed" + + # Since we are using aggregate stats for a single test across all offsets and packet sizes + # we can't use RX stats only to track data exchange failure without taking a previous + # snapshot. An easier way is to simply check for non-zero length of received string. + if len(recvd_str) == 0: + return "Data exchange failed" + + # Check for RX and PASS stats mismatch. Ideally, they should be equal for a successful run + if stats[XDPStats.RX.value] != stats[XDPStats.PASS.value]: + return "RX stats mismatch" + + return None + + +def _test_xdp_native_tail_adjst(cfg, pkt_sz_lst, offset_lst): + """ + Tests the XDP tail adjustment functionality. + + This function loads the appropriate XDP program based on the provided + program name and configures the XDP map for tail adjustment. It then + validates the tail adjustment by sending and receiving UDP packets + with specified packet sizes and offsets. + + Args: + cfg: Configuration object containing network settings. + prog: Name of the XDP program to load. + pkt_sz_lst: List of packet sizes to test. + offset_lst: List of offsets to validate support for tail adjustment. + + Returns: + dict: A dictionary with test status and failure details if applicable. + """ + port = rand_port() + bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000) + + prog_info = _load_xdp_prog(cfg, bpf_info) + + # Configure the XDP map for tail adjustment + _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TAIL_ADJST.value) + _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + + for offset in offset_lst: + tag = format(random.randint(65, 90), "02x") + + _set_xdp_map("map_xdp_setup", TestConfig.ADJST_OFFSET.value, offset) + if offset > 0: + _set_xdp_map("map_xdp_setup", TestConfig.ADJST_TAG.value, int(tag, 16)) + + for pkt_sz in pkt_sz_lst: + test_str = "".join(random.choice(string.ascii_lowercase) for _ in range(pkt_sz)) + recvd_str = _exchg_udp(cfg, port, test_str) + stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) + + failure = _check_for_failures(recvd_str, stats) + if failure is not None: + return { + "status": "fail", + "reason": failure, + "offset": offset, + "pkt_sz": pkt_sz, + } + + # Validate data content based on offset direction + expected_data = None + if offset > 0: + expected_data = test_str + (offset * chr(int(tag, 16))) + else: + expected_data = test_str[0:pkt_sz + offset] + + if recvd_str != expected_data: + return { + "status": "fail", + "reason": "Data mismatch", + "offset": offset, + "pkt_sz": pkt_sz, + } + + return {"status": "pass"} + + +def test_xdp_native_adjst_tail_grow_data(cfg): + """ + Tests the XDP tail adjustment by growing packet data. + + Args: + cfg: Configuration object containing network settings. + """ + pkt_sz_lst = [512, 1024, 2048] + offset_lst = [1, 16, 32, 64, 128, 256] + res = _test_xdp_native_tail_adjst( + cfg, + pkt_sz_lst, + offset_lst, + ) + + _validate_res(res, offset_lst, pkt_sz_lst) + + +def test_xdp_native_adjst_tail_shrnk_data(cfg): + """ + Tests the XDP tail adjustment by shrinking packet data. + + Args: + cfg: Configuration object containing network settings. + """ + pkt_sz_lst = [512, 1024, 2048] + offset_lst = [-16, -32, -64, -128, -256] + res = _test_xdp_native_tail_adjst( + cfg, + pkt_sz_lst, + offset_lst, + ) + + _validate_res(res, offset_lst, pkt_sz_lst) + + def main(): """ Main function to execute the XDP tests. @@ -328,6 +500,8 @@ def main(): test_xdp_native_drop_sb, test_xdp_native_drop_mb, test_xdp_native_tx_mb, + test_xdp_native_adjst_tail_grow_data, + test_xdp_native_adjst_tail_shrnk_data, ], args=(cfg,)) ksft_exit() diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c index d3c66c891589..7bb2dff86d8f 100644 --- a/tools/testing/selftests/net/lib/xdp_native.bpf.c +++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c @@ -10,15 +10,22 @@ #include #include +#define MAX_ADJST_OFFSET 256 +#define MAX_PAYLOAD_LEN 5000 +#define MAX_HDR_LEN 64 + enum { XDP_MODE = 0, XDP_PORT = 1, + XDP_ADJST_OFFSET = 2, + XDP_ADJST_TAG = 3, } xdp_map_setup_keys; enum { XDP_MODE_PASS = 0, XDP_MODE_DROP = 1, XDP_MODE_TX = 2, + XDP_MODE_TAIL_ADJST = 3, } xdp_map_modes; enum { @@ -26,22 +33,28 @@ enum { STATS_PASS = 1, STATS_DROP = 2, STATS_TX = 3, + STATS_ABORT = 4, } xdp_stats; struct { __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 2); + __uint(max_entries, 5); __type(key, __u32); __type(value, __s32); } map_xdp_setup SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 4); + __uint(max_entries, 5); __type(key, __u32); __type(value, __u64); } map_xdp_stats SEC(".maps"); +static __u32 min(__u32 a, __u32 b) +{ + return a < b ? a : b; +} + static void record_stats(struct xdp_md *ctx, __u32 stat_type) { __u64 *count; @@ -195,6 +208,193 @@ static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port) return XDP_PASS; } +static void *update_pkt(struct xdp_md *ctx, __s16 offset, __u32 *udp_csum) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct udphdr *udph = NULL; + struct ethhdr *eth = data; + __u32 len, len_new; + + if (data + sizeof(*eth) > data_end) + return NULL; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = data + sizeof(*eth); + __u16 total_len; + + if (iph + 1 > (struct iphdr *)data_end) + return NULL; + + iph->tot_len = bpf_htons(bpf_ntohs(iph->tot_len) + offset); + + udph = (void *)eth + sizeof(*iph) + sizeof(*eth); + if (!udph || udph + 1 > (struct udphdr *)data_end) + return NULL; + + len_new = bpf_htons(bpf_ntohs(udph->len) + offset); + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ipv6h = data + sizeof(*eth); + __u16 payload_len; + + if (ipv6h + 1 > (struct ipv6hdr *)data_end) + return NULL; + + udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth); + if (!udph || udph + 1 > (struct udphdr *)data_end) + return NULL; + + *udp_csum = ~((__u32)udph->check); + + len = ipv6h->payload_len; + len_new = bpf_htons(bpf_ntohs(len) + offset); + ipv6h->payload_len = len_new; + + *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new, + sizeof(len_new), *udp_csum); + + len = udph->len; + len_new = bpf_htons(bpf_ntohs(udph->len) + offset); + *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new, + sizeof(len_new), *udp_csum); + } else { + return NULL; + } + + udph->len = len_new; + + return udph; +} + +static __u16 csum_fold_helper(__u32 csum) +{ + return ~((csum & 0xffff) + (csum >> 16)) ? : 0xffff; +} + +static int xdp_adjst_tail_shrnk_data(struct xdp_md *ctx, __u16 offset, + __u32 hdr_len) +{ + char tmp_buff[MAX_ADJST_OFFSET]; + __u32 buff_pos, udp_csum = 0; + struct udphdr *udph = NULL; + __u32 buff_len; + + udph = update_pkt(ctx, 0 - offset, &udp_csum); + if (!udph) + return -1; + + buff_len = bpf_xdp_get_buff_len(ctx); + + offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET : + offset & 0xff; + if (offset == 0) + return -1; + + /* Make sure we have enough data to avoid eating the header */ + if (buff_len - offset < hdr_len) + return -1; + + buff_pos = buff_len - offset; + if (bpf_xdp_load_bytes(ctx, buff_pos, tmp_buff, offset) < 0) + return -1; + + udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum); + udph->check = (__u16)csum_fold_helper(udp_csum); + + if (bpf_xdp_adjust_tail(ctx, 0 - offset) < 0) + return -1; + + return 0; +} + +static int xdp_adjst_tail_grow_data(struct xdp_md *ctx, __u16 offset) +{ + char tmp_buff[MAX_ADJST_OFFSET]; + __u32 buff_pos, udp_csum = 0; + __u32 buff_len, hdr_len, key; + struct udphdr *udph; + __s32 *val; + __u8 tag; + + /* Proceed to update the packet headers before attempting to adjuste + * the tail. Once the tail is adjusted we lose access to the offset + * amount of data at the end of the packet which is crucial to update + * the checksum. + * Since any failure beyond this would abort the packet, we should + * not worry about passing a packet up the stack with wrong headers + */ + udph = update_pkt(ctx, offset, &udp_csum); + if (!udph) + return -1; + + key = XDP_ADJST_TAG; + val = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!val) + return -1; + + tag = (__u8)(*val); + + for (int i = 0; i < MAX_ADJST_OFFSET; i++) + __builtin_memcpy(&tmp_buff[i], &tag, 1); + + offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET : + offset & 0xff; + if (offset == 0) + return -1; + + udp_csum = bpf_csum_diff(0, 0, (__be32 *)tmp_buff, offset, udp_csum); + udph->check = (__u16)csum_fold_helper(udp_csum); + + buff_len = bpf_xdp_get_buff_len(ctx); + + if (bpf_xdp_adjust_tail(ctx, offset) < 0) { + bpf_printk("Failed to adjust tail\n"); + return -1; + } + + if (bpf_xdp_store_bytes(ctx, buff_len, tmp_buff, offset) < 0) + return -1; + + return 0; +} + +static int xdp_adjst_tail(struct xdp_md *ctx, __u16 port) +{ + void *data = (void *)(long)ctx->data; + struct udphdr *udph = NULL; + __s32 *adjust_offset, *val; + __u32 key, hdr_len; + void *offset_ptr; + __u8 tag; + int ret; + + udph = filter_udphdr(ctx, port); + if (!udph) + return XDP_PASS; + + hdr_len = (void *)udph - data + sizeof(struct udphdr); + key = XDP_ADJST_OFFSET; + adjust_offset = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!adjust_offset) + return XDP_PASS; + + if (*adjust_offset < 0) + ret = xdp_adjst_tail_shrnk_data(ctx, + (__u16)(0 - *adjust_offset), + hdr_len); + else + ret = xdp_adjst_tail_grow_data(ctx, (__u16)(*adjust_offset)); + if (ret) + goto abort_pkt; + + record_stats(ctx, STATS_PASS); + return XDP_PASS; + +abort_pkt: + record_stats(ctx, STATS_ABORT); + return XDP_ABORTED; +} + static int xdp_prog_common(struct xdp_md *ctx) { __u32 key, *port; @@ -217,6 +417,8 @@ static int xdp_prog_common(struct xdp_md *ctx) return xdp_mode_drop_handler(ctx, (__u16)(*port)); case XDP_MODE_TX: return xdp_mode_tx_handler(ctx, (__u16)(*port)); + case XDP_MODE_TAIL_ADJST: + return xdp_adjst_tail(ctx, (__u16)(*port)); } /* Default action is to simple pass */ -- cgit v1.2.3 From d6444ebc97dcbbc1e378bedb037380305294e77d Mon Sep 17 00:00:00 2001 From: Mohsin Bashir Date: Sat, 19 Jul 2025 01:30:59 -0700 Subject: selftests: drv-net: Test head-adjustment support Add test to validate the headroom adjustment support for both extension and the shrinking cases. For the extension part, eat up space from the start of payload data whereas, for the shrinking part, populate the newly available space with a tag. In the user-space, validate that a test string is manipulated accordingly. The negative and positive offset values result in shrinking and growing of headroom (growing and shrinking of payload) respectively. TAP version 13 1..9 ok 1 xdp.test_xdp_native_pass_sb ok 2 xdp.test_xdp_native_pass_mb ok 3 xdp.test_xdp_native_drop_sb ok 4 xdp.test_xdp_native_drop_mb ok 5 xdp.test_xdp_native_tx_mb \# Failed run: pkt_sz 512, ... offset 1. Reason: Adjustment failed ok 6 xdp.test_xdp_native_adjst_tail_grow_data ok 7 xdp.test_xdp_native_adjst_tail_shrnk_data \# Failed run: pkt_sz 512, ... offset -128. Reason: Adjustment failed ok 8 xdp.test_xdp_native_adjst_head_grow_data \# Failed run: pkt_sz (512) > HDS threshold (0) and offset 64 > 48 ok 9 xdp.test_xdp_native_adjst_head_shrnk_data \# Totals: pass:9 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Mohsin Bashir Link: https://patch.msgid.link/20250719083059.3209169-6-mohsin.bashr@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/xdp.py | 147 +++++++++++++++++- tools/testing/selftests/net/lib/xdp_native.bpf.c | 181 +++++++++++++++++++++++ 2 files changed, 327 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py index 85b5db0a1121..887d662ad128 100755 --- a/tools/testing/selftests/drivers/net/xdp.py +++ b/tools/testing/selftests/drivers/net/xdp.py @@ -12,7 +12,7 @@ from dataclasses import dataclass from enum import Enum from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ne, ksft_pr -from lib.py import KsftFailEx, NetDrvEpEnv +from lib.py import KsftFailEx, NetDrvEpEnv, EthtoolFamily, NlError from lib.py import bkg, cmd, rand_port from lib.py import ip, bpftool, defer @@ -31,6 +31,7 @@ class XDPAction(Enum): DROP = 1 # Drop the packet TX = 2 # Route the packet to the remote host TAIL_ADJST = 3 # Adjust the tail of the packet + HEAD_ADJST = 4 # Adjust the head of the packet class XDPStats(Enum): @@ -483,6 +484,147 @@ def test_xdp_native_adjst_tail_shrnk_data(cfg): _validate_res(res, offset_lst, pkt_sz_lst) +def get_hds_thresh(cfg): + """ + Retrieves the header data split (HDS) threshold for a network interface. + + Args: + cfg: Configuration object containing network settings. + + Returns: + The HDS threshold value. If the threshold is not supported or an error occurs, + a default value of 1500 is returned. + """ + netnl = cfg.netnl + hds_thresh = 1500 + + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + if 'hds-thresh' not in rings: + ksft_pr(f'hds-thresh not supported. Using default: {hds_thresh}') + return hds_thresh + hds_thresh = rings['hds-thresh'] + except NlError as e: + ksft_pr(f"Failed to get rings: {e}. Using default: {hds_thresh}") + + return hds_thresh + + +def _test_xdp_native_head_adjst(cfg, prog, pkt_sz_lst, offset_lst): + """ + Tests the XDP head adjustment action for a multi-buffer case. + + Args: + cfg: Configuration object containing network settings. + netnl: Network namespace or link object (not used in this function). + + This function sets up the packet size and offset lists, then performs + the head adjustment test by sending and receiving UDP packets. + """ + cfg.require_cmd("socat", remote=True) + + prog_info = _load_xdp_prog(cfg, BPFProgInfo(prog, "xdp_native.bpf.o", "xdp.frags", 9000)) + port = rand_port() + + _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.HEAD_ADJST.value) + _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + + hds_thresh = get_hds_thresh(cfg) + for offset in offset_lst: + for pkt_sz in pkt_sz_lst: + # The "head" buffer must contain at least the Ethernet header + # after we eat into it. We send large-enough packets, but if HDS + # is enabled head will only contain headers. Don't try to eat + # more than 28 bytes (UDPv4 + eth hdr left: (14 + 20 + 8) - 14) + l2_cut_off = 28 if cfg.addr_ipver == 4 else 48 + if pkt_sz > hds_thresh and offset > l2_cut_off: + ksft_pr( + f"Failed run: pkt_sz ({pkt_sz}) > HDS threshold ({hds_thresh}) and " + f"offset {offset} > {l2_cut_off}" + ) + return {"status": "pass"} + + test_str = ''.join(random.choice(string.ascii_lowercase) for _ in range(pkt_sz)) + tag = format(random.randint(65, 90), '02x') + + _set_xdp_map("map_xdp_setup", + TestConfig.ADJST_OFFSET.value, + offset) + _set_xdp_map("map_xdp_setup", TestConfig.ADJST_TAG.value, int(tag, 16)) + _set_xdp_map("map_xdp_setup", TestConfig.ADJST_OFFSET.value, offset) + + recvd_str = _exchg_udp(cfg, port, test_str) + + # Check for failures around adjustment and data exchange + failure = _check_for_failures(recvd_str, _get_stats(prog_info['maps']['map_xdp_stats'])) + if failure is not None: + return { + "status": "fail", + "reason": failure, + "offset": offset, + "pkt_sz": pkt_sz + } + + # Validate data content based on offset direction + expected_data = None + if offset < 0: + expected_data = chr(int(tag, 16)) * (0 - offset) + test_str + else: + expected_data = test_str[offset:] + + if recvd_str != expected_data: + return { + "status": "fail", + "reason": "Data mismatch", + "offset": offset, + "pkt_sz": pkt_sz + } + + return {"status": "pass"} + + +def test_xdp_native_adjst_head_grow_data(cfg): + """ + Tests the XDP headroom growth support. + + Args: + cfg: Configuration object containing network settings. + + This function sets up the packet size and offset lists, then calls the + _test_xdp_native_head_adjst_mb function to perform the actual test. The + test is passed if the headroom is successfully extended for given packet + sizes and offsets. + """ + pkt_sz_lst = [512, 1024, 2048] + + # Negative values result in headroom shrinking, resulting in growing of payload + offset_lst = [-16, -32, -64, -128, -256] + res = _test_xdp_native_head_adjst(cfg, "xdp_prog_frags", pkt_sz_lst, offset_lst) + + _validate_res(res, offset_lst, pkt_sz_lst) + + +def test_xdp_native_adjst_head_shrnk_data(cfg): + """ + Tests the XDP headroom shrinking support. + + Args: + cfg: Configuration object containing network settings. + + This function sets up the packet size and offset lists, then calls the + _test_xdp_native_head_adjst_mb function to perform the actual test. The + test is passed if the headroom is successfully shrunk for given packet + sizes and offsets. + """ + pkt_sz_lst = [512, 1024, 2048] + + # Positive values result in headroom growing, resulting in shrinking of payload + offset_lst = [16, 32, 64, 128, 256] + res = _test_xdp_native_head_adjst(cfg, "xdp_prog_frags", pkt_sz_lst, offset_lst) + + _validate_res(res, offset_lst, pkt_sz_lst) + + def main(): """ Main function to execute the XDP tests. @@ -493,6 +635,7 @@ def main(): function to execute the tests. """ with NetDrvEpEnv(__file__) as cfg: + cfg.netnl = EthtoolFamily() ksft_run( [ test_xdp_native_pass_sb, @@ -502,6 +645,8 @@ def main(): test_xdp_native_tx_mb, test_xdp_native_adjst_tail_grow_data, test_xdp_native_adjst_tail_shrnk_data, + test_xdp_native_adjst_head_grow_data, + test_xdp_native_adjst_head_shrnk_data, ], args=(cfg,)) ksft_exit() diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c index 7bb2dff86d8f..521ba38f2ddd 100644 --- a/tools/testing/selftests/net/lib/xdp_native.bpf.c +++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c @@ -26,6 +26,7 @@ enum { XDP_MODE_DROP = 1, XDP_MODE_TX = 2, XDP_MODE_TAIL_ADJST = 3, + XDP_MODE_HEAD_ADJST = 4, } xdp_map_modes; enum { @@ -395,6 +396,184 @@ abort_pkt: return XDP_ABORTED; } +static int xdp_adjst_head_shrnk_data(struct xdp_md *ctx, __u64 hdr_len, + __u32 offset) +{ + char tmp_buff[MAX_ADJST_OFFSET]; + struct udphdr *udph; + void *offset_ptr; + __u32 udp_csum = 0; + + /* Update the length information in the IP and UDP headers before + * adjusting the headroom. This simplifies accessing the relevant + * fields in the IP and UDP headers for fragmented packets. Any + * failure beyond this point will result in the packet being aborted, + * so we don't need to worry about incorrect length information for + * passed packets. + */ + udph = update_pkt(ctx, (__s16)(0 - offset), &udp_csum); + if (!udph) + return -1; + + offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET : + offset & 0xff; + if (offset == 0) + return -1; + + if (bpf_xdp_load_bytes(ctx, hdr_len, tmp_buff, offset) < 0) + return -1; + + udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum); + + udph->check = (__u16)csum_fold_helper(udp_csum); + + if (bpf_xdp_load_bytes(ctx, 0, tmp_buff, MAX_ADJST_OFFSET) < 0) + return -1; + + if (bpf_xdp_adjust_head(ctx, offset) < 0) + return -1; + + if (offset > MAX_ADJST_OFFSET) + return -1; + + if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0) + return -1; + + /* Added here to handle clang complain about negative value */ + hdr_len = hdr_len & 0xff; + + if (hdr_len == 0) + return -1; + + if (bpf_xdp_store_bytes(ctx, 0, tmp_buff, hdr_len) < 0) + return -1; + + return 0; +} + +static int xdp_adjst_head_grow_data(struct xdp_md *ctx, __u64 hdr_len, + __u32 offset) +{ + char hdr_buff[MAX_HDR_LEN]; + char data_buff[MAX_ADJST_OFFSET]; + void *offset_ptr; + __s32 *val; + __u32 key; + __u8 tag; + __u32 udp_csum = 0; + struct udphdr *udph; + + udph = update_pkt(ctx, (__s16)(offset), &udp_csum); + if (!udph) + return -1; + + key = XDP_ADJST_TAG; + val = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!val) + return -1; + + tag = (__u8)(*val); + for (int i = 0; i < MAX_ADJST_OFFSET; i++) + __builtin_memcpy(&data_buff[i], &tag, 1); + + offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET : + offset & 0xff; + if (offset == 0) + return -1; + + udp_csum = bpf_csum_diff(0, 0, (__be32 *)data_buff, offset, udp_csum); + udph->check = (__u16)csum_fold_helper(udp_csum); + + if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0) + return -1; + + /* Added here to handle clang complain about negative value */ + hdr_len = hdr_len & 0xff; + + if (hdr_len == 0) + return -1; + + if (bpf_xdp_load_bytes(ctx, 0, hdr_buff, hdr_len) < 0) + return -1; + + if (offset > MAX_ADJST_OFFSET) + return -1; + + if (bpf_xdp_adjust_head(ctx, 0 - offset) < 0) + return -1; + + if (bpf_xdp_store_bytes(ctx, 0, hdr_buff, hdr_len) < 0) + return -1; + + if (bpf_xdp_store_bytes(ctx, hdr_len, data_buff, offset) < 0) + return -1; + + return 0; +} + +static int xdp_head_adjst(struct xdp_md *ctx, __u16 port) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct udphdr *udph_ptr = NULL; + __u32 key, size, hdr_len; + __s32 *val; + int res; + + /* Filter packets based on UDP port */ + udph_ptr = filter_udphdr(ctx, port); + if (!udph_ptr) + return XDP_PASS; + + hdr_len = (void *)udph_ptr - data + sizeof(struct udphdr); + + key = XDP_ADJST_OFFSET; + val = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!val) + return XDP_PASS; + + switch (*val) { + case -16: + case 16: + size = 16; + break; + case -32: + case 32: + size = 32; + break; + case -64: + case 64: + size = 64; + break; + case -128: + case 128: + size = 128; + break; + case -256: + case 256: + size = 256; + break; + default: + bpf_printk("Invalid adjustment offset: %d\n", *val); + goto abort; + } + + if (*val < 0) + res = xdp_adjst_head_grow_data(ctx, hdr_len, size); + else + res = xdp_adjst_head_shrnk_data(ctx, hdr_len, size); + + if (res) + goto abort; + + record_stats(ctx, STATS_PASS); + return XDP_PASS; + +abort: + record_stats(ctx, STATS_ABORT); + return XDP_ABORTED; +} + static int xdp_prog_common(struct xdp_md *ctx) { __u32 key, *port; @@ -419,6 +598,8 @@ static int xdp_prog_common(struct xdp_md *ctx) return xdp_mode_tx_handler(ctx, (__u16)(*port)); case XDP_MODE_TAIL_ADJST: return xdp_adjst_tail(ctx, (__u16)(*port)); + case XDP_MODE_HEAD_ADJST: + return xdp_head_adjst(ctx, (__u16)(*port)); } /* Default action is to simple pass */ -- cgit v1.2.3 From dca56cc8b5c3bee889d6cb0d2ee3e933b21cb4ec Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 22 Jul 2025 00:36:49 +0200 Subject: selftests: netfilter: tone-down conntrack clash test The test is supposed to observe that the 'clash_resolve' stat counter incremented (i.e., the code path was covered). This check was incorrect, 'conntrack -S' needs to be called in the revevant namespace, not the initial netns. The clash resolution logic in conntrack is only exercised when multiple packets with the same udp quadruple race. Depending on kernel config, number of CPUs, scheduling policy etc. this might not trigger even after several retries. Thus the script eventually returns SKIP if the retry count is exceeded. The udpclash tool with also exit with a failure if it did not observe the expected number of replies. In the script, make a note of this but do not fail anymore, just check if the clash resolution logic triggered after all. Remove the 'single-core' test: while unlikely, with preemptible kernel it should be possible to also trigger clash resolution logic. With this change the test will either SKIP or pass. Hard error could be restored later once its clear whats going on, so also dump 'conntrack -S' when some packets went missing to see if conntrack dropped them on insert. Fixes: 78a588363587 ("selftests: netfilter: add conntrack clash resolution test case") Signed-off-by: Florian Westphal Link: https://patch.msgid.link/20250721223652.6956-1-fw@strlen.de Signed-off-by: Jakub Kicinski --- .../selftests/net/netfilter/conntrack_clash.sh | 45 +++++++++++----------- 1 file changed, 22 insertions(+), 23 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/netfilter/conntrack_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_clash.sh index 3712c1b9b38b..606a43a60f73 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_clash.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_clash.sh @@ -93,32 +93,28 @@ ping_test() run_one_clash_test() { local ns="$1" - local daddr="$2" - local dport="$3" + local ctns="$2" + local daddr="$3" + local dport="$4" local entries local cre if ! ip netns exec "$ns" ./udpclash $daddr $dport;then - echo "FAIL: did not receive expected number of replies for $daddr:$dport" - ret=1 - return 1 + echo "INFO: did not receive expected number of replies for $daddr:$dport" + ip netns exec "$ctns" conntrack -S + # don't fail: check if clash resolution triggered after all. fi - entries=$(conntrack -S | wc -l) - cre=$(conntrack -S | grep -v "clash_resolve=0" | wc -l) + entries=$(ip netns exec "$ctns" conntrack -S | wc -l) + cre=$(ip netns exec "$ctns" conntrack -S | grep "clash_resolve=0" | wc -l) - if [ "$cre" -ne "$entries" ] ;then + if [ "$cre" -ne "$entries" ];then clash_resolution_active=1 return 0 fi - # 1 cpu -> parallel insertion impossible - if [ "$entries" -eq 1 ]; then - return 0 - fi - - # not a failure: clash resolution logic did not trigger, but all replies - # were received. With right timing, xmit completed sequentially and + # not a failure: clash resolution logic did not trigger. + # With right timing, xmit completed sequentially and # no parallel insertion occurs. return $ksft_skip } @@ -126,20 +122,23 @@ run_one_clash_test() run_clash_test() { local ns="$1" - local daddr="$2" - local dport="$3" + local ctns="$2" + local daddr="$3" + local dport="$4" + local softerr=0 for i in $(seq 1 10);do - run_one_clash_test "$ns" "$daddr" "$dport" + run_one_clash_test "$ns" "$ctns" "$daddr" "$dport" local rv=$? if [ $rv -eq 0 ];then echo "PASS: clash resolution test for $daddr:$dport on attempt $i" return 0 - elif [ $rv -eq 1 ];then - echo "FAIL: clash resolution test for $daddr:$dport on attempt $i" - return 1 + elif [ $rv -eq $ksft_skip ]; then + softerr=1 fi done + + [ $softerr -eq 1 ] && echo "SKIP: clash resolution for $daddr:$dport did not trigger" } ip link add veth0 netns "$nsclient1" type veth peer name veth0 netns "$nsrouter" @@ -161,11 +160,11 @@ spawn_servers "$nsclient2" # exercise clash resolution with nat: # nsrouter is supposed to dnat to 10.0.2.1:900{0,1,2,3}. -run_clash_test "$nsclient1" 10.0.1.99 "$dport" +run_clash_test "$nsclient1" "$nsrouter" 10.0.1.99 "$dport" # exercise clash resolution without nat. load_simple_ruleset "$nsclient2" -run_clash_test "$nsclient2" 127.0.0.1 9001 +run_clash_test "$nsclient2" "$nsclient2" 127.0.0.1 9001 if [ $clash_resolution_active -eq 0 ];then [ "$ret" -eq 0 ] && ret=$ksft_skip -- cgit v1.2.3 From 02b072fd9fe1c5e16b7ae8da2f4ae31c8ef6f6a3 Mon Sep 17 00:00:00 2001 From: Joel Granados Date: Thu, 26 Jun 2025 14:48:08 +0200 Subject: sysctl: Nixify sysctl.sh Use "#!/usr/bin/env bash" instead of "#!/bin/bash". Needed for testing in nix environments as they only provide /usr/bin/env at the standard location. Signed-off-by: Joel Granados --- tools/testing/selftests/sysctl/sysctl.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh index a10350c8a46e..b2d8bd9026a7 100755 --- a/tools/testing/selftests/sysctl/sysctl.sh +++ b/tools/testing/selftests/sysctl/sysctl.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1 # Copyright (C) 2017 Luis R. Rodriguez -- cgit v1.2.3 From 1bbdb81a98363fd5cd0c2ac16ad5346bdf814dff Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Tue, 22 Jul 2025 12:13:29 +0300 Subject: devlink: Fix excessive stack usage in rate TC bandwidth parsing The devlink_nl_rate_tc_bw_parse function uses a large stack array for devlink attributes, which triggers a warning about excessive stack usage: net/devlink/rate.c: In function 'devlink_nl_rate_tc_bw_parse': net/devlink/rate.c:382:1: error: the frame size of 1648 bytes is larger than 1536 bytes [-Werror=frame-larger-than=] Introduce a separate attribute set specifically for rate TC bandwidth parsing that only contains the two attributes actually used: index and bandwidth. This reduces the stack array from DEVLINK_ATTR_MAX entries to just 2 entries, solving the stack usage issue. Update devlink selftest to use the new 'index' and 'bw' attribute names consistent with the YAML spec. Example usage with ynl with the new spec: ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/devlink.yaml \ --do rate-set --json '{ "bus-name": "pci", "dev-name": "0000:08:00.0", "port-index": 1, "rate-tc-bws": [ {"index": 0, "bw": 50}, {"index": 1, "bw": 50}, {"index": 2, "bw": 0}, {"index": 3, "bw": 0}, {"index": 4, "bw": 0}, {"index": 5, "bw": 0}, {"index": 6, "bw": 0}, {"index": 7, "bw": 0} ] }' ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/devlink.yaml \ --do rate-get --json '{ "bus-name": "pci", "dev-name": "0000:08:00.0", "port-index": 1 }' output for rate-get: {'bus-name': 'pci', 'dev-name': '0000:08:00.0', 'port-index': 1, 'rate-tc-bws': [{'bw': 50, 'index': 0}, {'bw': 50, 'index': 1}, {'bw': 0, 'index': 2}, {'bw': 0, 'index': 3}, {'bw': 0, 'index': 4}, {'bw': 0, 'index': 5}, {'bw': 0, 'index': 6}, {'bw': 0, 'index': 7}], 'rate-tx-max': 0, 'rate-tx-priority': 0, 'rate-tx-share': 0, 'rate-tx-weight': 0, 'rate-type': 'leaf'} Fixes: 566e8f108fc7 ("devlink: Extend devlink rate API with traffic classes bandwidth management") Reported-by: Arnd Bergmann Closes: https://lore.kernel.org/netdev/20250708160652.1810573-1-arnd@kernel.org/ Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202507171943.W7DJcs6Y-lkp@intel.com/ Suggested-by: Jakub Kicinski Signed-off-by: Carolina Jubran Tested-by: Carolina Jubran Signed-off-by: Tariq Toukan Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/1753175609-330621-1-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/devlink.yaml | 26 +++++++++------------- include/uapi/linux/devlink.h | 11 +++++++-- net/devlink/netlink_gen.c | 6 ++--- net/devlink/netlink_gen.h | 2 +- net/devlink/rate.c | 20 ++++++++--------- .../selftests/drivers/net/hw/devlink_rate_tc_bw.py | 16 ++++++------- 6 files changed, 42 insertions(+), 39 deletions(-) (limited to 'tools/testing/selftests') diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index 1c4bb0cbe5f0..bb87111d5e16 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -853,18 +853,6 @@ attribute-sets: type: nest multi-attr: true nested-attributes: dl-rate-tc-bws - - - name: rate-tc-index - type: u8 - checks: - max: rate-tc-index-max - - - name: rate-tc-bw - type: u32 - doc: | - Specifies the bandwidth share assigned to the Traffic Class. - The bandwidth for the traffic class is determined - in proportion to the sum of the shares of all configured classes. - name: dl-dev-stats subset-of: devlink @@ -1271,12 +1259,20 @@ attribute-sets: type: flag - name: dl-rate-tc-bws - subset-of: devlink + name-prefix: devlink-rate-tc-attr- attributes: - - name: rate-tc-index + name: index + type: u8 + checks: + max: rate-tc-index-max - - name: rate-tc-bw + name: bw + type: u32 + doc: | + Specifies the bandwidth share assigned to the Traffic Class. + The bandwidth for the traffic class is determined + in proportion to the sum of the shares of all configured classes. operations: enum-model: directional diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index e72bcc239afd..9fcb25a0f447 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -635,8 +635,6 @@ enum devlink_attr { DEVLINK_ATTR_REGION_DIRECT, /* flag */ DEVLINK_ATTR_RATE_TC_BWS, /* nested */ - DEVLINK_ATTR_RATE_TC_INDEX, /* u8 */ - DEVLINK_ATTR_RATE_TC_BW, /* u32 */ /* Add new attributes above here, update the spec in * Documentation/netlink/specs/devlink.yaml and re-generate @@ -647,6 +645,15 @@ enum devlink_attr { DEVLINK_ATTR_MAX = __DEVLINK_ATTR_MAX - 1 }; +enum devlink_rate_tc_attr { + DEVLINK_RATE_TC_ATTR_UNSPEC, + DEVLINK_RATE_TC_ATTR_INDEX, /* u8 */ + DEVLINK_RATE_TC_ATTR_BW, /* u32 */ + + __DEVLINK_RATE_TC_ATTR_MAX, + DEVLINK_RATE_TC_ATTR_MAX = __DEVLINK_RATE_TC_ATTR_MAX - 1 +}; + /* Mapping between internal resource described by the field and system * structure */ diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c index c50436433c18..d97c326a9045 100644 --- a/net/devlink/netlink_gen.c +++ b/net/devlink/netlink_gen.c @@ -45,9 +45,9 @@ const struct nla_policy devlink_dl_port_function_nl_policy[DEVLINK_PORT_FN_ATTR_ [DEVLINK_PORT_FN_ATTR_CAPS] = NLA_POLICY_BITFIELD32(15), }; -const struct nla_policy devlink_dl_rate_tc_bws_nl_policy[DEVLINK_ATTR_RATE_TC_BW + 1] = { - [DEVLINK_ATTR_RATE_TC_INDEX] = NLA_POLICY_MAX(NLA_U8, DEVLINK_RATE_TC_INDEX_MAX), - [DEVLINK_ATTR_RATE_TC_BW] = { .type = NLA_U32, }, +const struct nla_policy devlink_dl_rate_tc_bws_nl_policy[DEVLINK_RATE_TC_ATTR_BW + 1] = { + [DEVLINK_RATE_TC_ATTR_INDEX] = NLA_POLICY_MAX(NLA_U8, DEVLINK_RATE_TC_INDEX_MAX), + [DEVLINK_RATE_TC_ATTR_BW] = { .type = NLA_U32, }, }; const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1] = { diff --git a/net/devlink/netlink_gen.h b/net/devlink/netlink_gen.h index fb733b5d4ff1..09cc6f264ccf 100644 --- a/net/devlink/netlink_gen.h +++ b/net/devlink/netlink_gen.h @@ -13,7 +13,7 @@ /* Common nested types */ extern const struct nla_policy devlink_dl_port_function_nl_policy[DEVLINK_PORT_FN_ATTR_CAPS + 1]; -extern const struct nla_policy devlink_dl_rate_tc_bws_nl_policy[DEVLINK_ATTR_RATE_TC_BW + 1]; +extern const struct nla_policy devlink_dl_rate_tc_bws_nl_policy[DEVLINK_RATE_TC_ATTR_BW + 1]; extern const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1]; /* Ops table for devlink */ diff --git a/net/devlink/rate.c b/net/devlink/rate.c index d39300a9b3d4..110b3fa8a0b1 100644 --- a/net/devlink/rate.c +++ b/net/devlink/rate.c @@ -90,8 +90,8 @@ static int devlink_rate_put_tc_bws(struct sk_buff *msg, u32 *tc_bw) if (!nla_tc_bw) return -EMSGSIZE; - if (nla_put_u8(msg, DEVLINK_ATTR_RATE_TC_INDEX, i) || - nla_put_u32(msg, DEVLINK_ATTR_RATE_TC_BW, tc_bw[i])) + if (nla_put_u8(msg, DEVLINK_RATE_TC_ATTR_INDEX, i) || + nla_put_u32(msg, DEVLINK_RATE_TC_ATTR_BW, tc_bw[i])) goto nla_put_failure; nla_nest_end(msg, nla_tc_bw); @@ -346,26 +346,26 @@ static int devlink_nl_rate_tc_bw_parse(struct nlattr *parent_nest, u32 *tc_bw, unsigned long *bitmap, struct netlink_ext_ack *extack) { - struct nlattr *tb[DEVLINK_ATTR_MAX + 1]; + struct nlattr *tb[DEVLINK_RATE_TC_ATTR_MAX + 1]; u8 tc_index; int err; - err = nla_parse_nested(tb, DEVLINK_ATTR_MAX, parent_nest, + err = nla_parse_nested(tb, DEVLINK_RATE_TC_ATTR_MAX, parent_nest, devlink_dl_rate_tc_bws_nl_policy, extack); if (err) return err; - if (!tb[DEVLINK_ATTR_RATE_TC_INDEX]) { + if (!tb[DEVLINK_RATE_TC_ATTR_INDEX]) { NL_SET_ERR_ATTR_MISS(extack, parent_nest, - DEVLINK_ATTR_RATE_TC_INDEX); + DEVLINK_RATE_TC_ATTR_INDEX); return -EINVAL; } - tc_index = nla_get_u8(tb[DEVLINK_ATTR_RATE_TC_INDEX]); + tc_index = nla_get_u8(tb[DEVLINK_RATE_TC_ATTR_INDEX]); - if (!tb[DEVLINK_ATTR_RATE_TC_BW]) { + if (!tb[DEVLINK_RATE_TC_ATTR_BW]) { NL_SET_ERR_ATTR_MISS(extack, parent_nest, - DEVLINK_ATTR_RATE_TC_BW); + DEVLINK_RATE_TC_ATTR_BW); return -EINVAL; } @@ -376,7 +376,7 @@ static int devlink_nl_rate_tc_bw_parse(struct nlattr *parent_nest, u32 *tc_bw, return -EINVAL; } - tc_bw[tc_index] = nla_get_u32(tb[DEVLINK_ATTR_RATE_TC_BW]); + tc_bw[tc_index] = nla_get_u32(tb[DEVLINK_RATE_TC_ATTR_BW]); return 0; } diff --git a/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py index 820d8a03becc..835c357919a8 100755 --- a/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py +++ b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py @@ -208,14 +208,14 @@ def setup_devlink_rate(cfg): "port-index": port_index, "rate-tx-max": 125000000, "rate-tc-bws": [ - {"rate-tc-index": 0, "rate-tc-bw": 0}, - {"rate-tc-index": 1, "rate-tc-bw": 0}, - {"rate-tc-index": 2, "rate-tc-bw": 0}, - {"rate-tc-index": 3, "rate-tc-bw": 20}, - {"rate-tc-index": 4, "rate-tc-bw": 80}, - {"rate-tc-index": 5, "rate-tc-bw": 0}, - {"rate-tc-index": 6, "rate-tc-bw": 0}, - {"rate-tc-index": 7, "rate-tc-bw": 0}, + {"index": 0, "bw": 0}, + {"index": 1, "bw": 0}, + {"index": 2, "bw": 0}, + {"index": 3, "bw": 20}, + {"index": 4, "bw": 80}, + {"index": 5, "bw": 0}, + {"index": 6, "bw": 0}, + {"index": 7, "bw": 0}, ] }) except NlError as exc: -- cgit v1.2.3 From 9a5bbab285cd21f56ec759d38d452394b51c5073 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 22 Jul 2025 11:19:45 +0200 Subject: netdevsim: add fw_update_flash_chunk_time_ms debugfs knobs Netdevsim emulates firmware update and it takes 5 seconds to complete. For some use cases, this is too long and unnecessary. Allow user to configure the time by exposing debugfs a knob to set chunk time. Signed-off-by: Jiri Pirko Reviewed-by: Jakub Kicinski Link: https://patch.msgid.link/20250722091945.79506-1-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/dev.c | 9 ++++++--- drivers/net/netdevsim/netdevsim.h | 1 + tools/testing/selftests/drivers/net/netdevsim/devlink.sh | 2 ++ 3 files changed, 9 insertions(+), 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 01c7edb28d96..2672d071b325 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -314,6 +314,8 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) &nsim_dev->fw_update_status); debugfs_create_u32("fw_update_overwrite_mask", 0600, nsim_dev->ddir, &nsim_dev->fw_update_overwrite_mask); + debugfs_create_u32("fw_update_flash_chunk_time_ms", 0600, nsim_dev->ddir, + &nsim_dev->fw_update_flash_chunk_time_ms); debugfs_create_u32("max_macs", 0600, nsim_dev->ddir, &nsim_dev->max_macs); debugfs_create_bool("test1", 0600, nsim_dev->ddir, @@ -1015,9 +1017,9 @@ static int nsim_dev_info_get(struct devlink *devlink, DEVLINK_INFO_VERSION_TYPE_COMPONENT); } -#define NSIM_DEV_FLASH_SIZE 500000 +#define NSIM_DEV_FLASH_SIZE 50000 #define NSIM_DEV_FLASH_CHUNK_SIZE 1000 -#define NSIM_DEV_FLASH_CHUNK_TIME_MS 10 +#define NSIM_DEV_FLASH_CHUNK_TIME_MS_DEFAULT 100 static int nsim_dev_flash_update(struct devlink *devlink, struct devlink_flash_update_params *params, @@ -1041,7 +1043,7 @@ static int nsim_dev_flash_update(struct devlink *devlink, params->component, i * NSIM_DEV_FLASH_CHUNK_SIZE, NSIM_DEV_FLASH_SIZE); - msleep(NSIM_DEV_FLASH_CHUNK_TIME_MS); + msleep(nsim_dev->fw_update_flash_chunk_time_ms ?: 1); } if (nsim_dev->fw_update_status) { @@ -1585,6 +1587,7 @@ int nsim_drv_probe(struct nsim_bus_dev *nsim_bus_dev) INIT_LIST_HEAD(&nsim_dev->port_list); nsim_dev->fw_update_status = true; nsim_dev->fw_update_overwrite_mask = 0; + nsim_dev->fw_update_flash_chunk_time_ms = NSIM_DEV_FLASH_CHUNK_TIME_MS_DEFAULT; nsim_dev->max_macs = NSIM_DEV_MAX_MACS_DEFAULT; nsim_dev->test1 = NSIM_DEV_TEST1_DEFAULT; spin_lock_init(&nsim_dev->fa_cookie_lock); diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 8eeeb9256077..bddd24c1389d 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -317,6 +317,7 @@ struct nsim_dev { struct list_head port_list; bool fw_update_status; u32 fw_update_overwrite_mask; + u32 fw_update_flash_chunk_time_ms; u32 max_macs; bool test1; bool dont_allow_reload; diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index a102803ff74f..030762b203d7 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -40,6 +40,8 @@ fw_flash_test() return fi + echo "10"> $DEBUGFS_DIR/fw_update_flash_chunk_time_ms + devlink dev flash $DL_HANDLE file $DUMMYFILE check_err $? "Failed to flash with status updates on" -- cgit v1.2.3 From 51217c659e741e7e5452ac550aaf83692d3d14cd Mon Sep 17 00:00:00 2001 From: Chia-Yu Chang Date: Tue, 22 Jul 2025 11:59:13 +0200 Subject: selftests/tc-testing: Fix warning and style check on tdc.sh Replace exit code check with '! cmd' and add both quote and $(...) around 'nproc' to prevent warning and issue reported by shellcheck. Signed-off-by: Chia-Yu Chang Link: https://patch.msgid.link/20250722095915.24485-5-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/tdc.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh index 589b18ed758a..7a81088802d1 100755 --- a/tools/testing/selftests/tc-testing/tdc.sh +++ b/tools/testing/selftests/tc-testing/tdc.sh @@ -4,8 +4,7 @@ # If a module is required and was not compiled # the test that requires it will fail anyways try_modprobe() { - modprobe -q -R "$1" - if [ $? -ne 0 ]; then + if ! modprobe -q -R "$1"; then echo "Module $1 not found... skipping." else modprobe "$1" @@ -67,4 +66,4 @@ try_modprobe sch_hfsc try_modprobe sch_hhf try_modprobe sch_htb try_modprobe sch_teql -./tdc.py -J`nproc` +./tdc.py -J"$(nproc)" -- cgit v1.2.3 From 032f0e9e15a41899ccd0d8173c07b7c2a7236174 Mon Sep 17 00:00:00 2001 From: Chia-Yu Chang Date: Tue, 22 Jul 2025 11:59:14 +0200 Subject: selftests/tc-testing: Add selftests for qdisc DualPI2 Update configuration of tc-tests and preload DualPI2 module for self-tests, and add following self-test cases for DualPI2: Test a4c7: Create DualPI2 with default setting Test 1ea4: Create DualPI2 with memlimit Test 2130: Create DualPI2 with typical_rtt and max_rtt Test 90c1: Create DualPI2 with max_rtt Test 7b3c: Create DualPI2 with any_ect option Test 49a3: Create DualPI2 with overflow option Test d0a1: Create DualPI2 with drop_enqueue option Test f051: Create DualPI2 with no_split_gso option Test 456b: Create DualPI2 with packet step_thresh Test 610c: Create DualPI2 with packet min_qlen_step Test b4fa: Create DualPI2 with packet coupling_factor Test 37f1: Create DualPI2 with packet classic_protection Signed-off-by: Chia-Yu Chang Reviewed-by: Victor Nogueira Link: https://patch.msgid.link/20250722095915.24485-6-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/config | 1 + .../tc-testing/tc-tests/qdiscs/dualpi2.json | 254 +++++++++++++++++++++ tools/testing/selftests/tc-testing/tdc.sh | 1 + 3 files changed, 256 insertions(+) create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config index 8e902f7f1a18..c20aa16b1d63 100644 --- a/tools/testing/selftests/tc-testing/config +++ b/tools/testing/selftests/tc-testing/config @@ -31,6 +31,7 @@ CONFIG_NET_SCH_CBS=m CONFIG_NET_SCH_CHOKE=m CONFIG_NET_SCH_CODEL=m CONFIG_NET_SCH_DRR=m +CONFIG_NET_SCH_DUALPI2=m CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_FQ_CODEL=m diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json new file mode 100644 index 000000000000..cd1f2ee8f354 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json @@ -0,0 +1,254 @@ +[ + { + "id": "a4c7", + "name": "Create DualPI2 with default setting", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* step_thresh 1ms min_qlen_step 0p coupling_factor 2 drop_on_overload drop_dequeue classic_protection 10% l4s_ect split_gso", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "1ea4", + "name": "Create DualPI2 with memlimit", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 memlimit 20000000", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* memlimit 20000000B", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "2130", + "name": "Create DualPI2 with typical_rtt and max_rtt", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 typical_rtt 20ms max_rtt 200ms", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* target 20ms tupdate 20ms alpha 0.042969 beta 1.496094", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "90c1", + "name": "Create DualPI2 with max_rtt", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 max_rtt 300ms", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* target 50ms tupdate 50ms alpha 0.050781 beta 0.996094", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "7b3c", + "name": "Create DualPI2 with any_ect option", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 any_ect", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* any_ect", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "49a3", + "name": "Create DualPI2 with overflow option", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 overflow", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* overflow", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "d0a1", + "name": "Create DualPI2 with drop_enqueue option", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 drop_enqueue", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* drop_enqueue", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "f051", + "name": "Create DualPI2 with no_split_gso option", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 no_split_gso", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* no_split_gso", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "456b", + "name": "Create DualPI2 with packet step_thresh", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 step_thresh 3p", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* step_thresh 3p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "610c", + "name": "Create DualPI2 with packet min_qlen_step", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 min_qlen_step 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* min_qlen_step 1p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "b4fa", + "name": "Create DualPI2 with packet coupling_factor", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 coupling_factor 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* coupling_factor 1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "37f1", + "name": "Create DualPI2 with packet classic_protection", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 classic_protection 0", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* classic_protection 0%", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + } +] diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh index 7a81088802d1..dae19687912d 100755 --- a/tools/testing/selftests/tc-testing/tdc.sh +++ b/tools/testing/selftests/tc-testing/tdc.sh @@ -66,4 +66,5 @@ try_modprobe sch_hfsc try_modprobe sch_hhf try_modprobe sch_htb try_modprobe sch_teql +try_modprobe sch_dualpi2 ./tdc.py -J"$(nproc)" -- cgit v1.2.3 From 86941382508850d58c11bdafe0fec646dfd31b09 Mon Sep 17 00:00:00 2001 From: Nimrod Oren Date: Tue, 22 Jul 2025 15:26:55 +0300 Subject: selftests: drv-net: wait for iperf client to stop sending A few packets may still be sent out during the termination of iperf processes. These late packets cause failures in rss_ctx.py when they arrive on queues expected to be empty. Example failure observed: Check failed 2 != 0 traffic on inactive queues (context 1): [0, 0, 1, 1, 386385, 397196, 0, 0, 0, 0, ...] Check failed 4 != 0 traffic on inactive queues (context 2): [0, 0, 0, 0, 2, 2, 247152, 253013, 0, 0, ...] Check failed 2 != 0 traffic on inactive queues (context 3): [0, 0, 0, 0, 0, 0, 1, 1, 282434, 283070, ...] To avoid such failures, wait until all client sockets for the requested port are either closed or in the TIME_WAIT state. Fixes: 847aa551fa78 ("selftests: drv-net: rss_ctx: factor out send traffic and check") Signed-off-by: Nimrod Oren Reviewed-by: Gal Pressman Reviewed-by: Carolina Jubran Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250722122655.3194442-1-noren@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/lib/py/load.py | 23 +++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py index d9c10613ae67..44151b7b1a24 100644 --- a/tools/testing/selftests/drivers/net/lib/py/load.py +++ b/tools/testing/selftests/drivers/net/lib/py/load.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 +import re import time from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen @@ -10,12 +11,11 @@ class GenerateTraffic: self.env = env - if port is None: - port = rand_port() - self._iperf_server = cmd(f"iperf3 -s -1 -p {port}", background=True) - wait_port_listen(port) + self.port = rand_port() if port is None else port + self._iperf_server = cmd(f"iperf3 -s -1 -p {self.port}", background=True) + wait_port_listen(self.port) time.sleep(0.1) - self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {port} -t 86400", + self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {self.port} -t 86400", background=True, host=env.remote) # Wait for traffic to ramp up @@ -56,3 +56,16 @@ class GenerateTraffic: ksft_pr(">> Server:") ksft_pr(self._iperf_server.stdout) ksft_pr(self._iperf_server.stderr) + self._wait_client_stopped() + + def _wait_client_stopped(self, sleep=0.005, timeout=5): + end = time.monotonic() + timeout + + live_port_pattern = re.compile(fr":{self.port:04X} 0[^6] ") + + while time.monotonic() < end: + data = cmd("cat /proc/net/tcp*", host=self.env.remote).stdout + if not live_port_pattern.search(data): + return + time.sleep(sleep) + raise Exception(f"Waiting for client to stop timed out after {timeout}s") -- cgit v1.2.3 From ba578b87fe2beef95b37264f8a98c0b505b93de9 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Tue, 22 Jul 2025 16:33:37 +0200 Subject: selftests/bpf: Test invalid narrower ctx load This patch adds selftests to cover invalid narrower loads on the context. These used to cause kernel warnings before the previous patch. To trigger the warning, the load had to be aligned, to read an affected context field (ex., skb->sk), and not starting at the beginning of the field. The nine new cases all fail without the previous patch. Suggested-by: Eduard Zingerman Signed-off-by: Paul Chaignon Signed-off-by: Martin KaFai Lau Acked-by: Eduard Zingerman Link: https://patch.msgid.link/44cd83ea9c6868079943f0a436c6efa850528cc1.1753194596.git.paul.chaignon@gmail.com --- tools/testing/selftests/bpf/progs/verifier_ctx.c | 25 ++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/verifier_ctx.c b/tools/testing/selftests/bpf/progs/verifier_ctx.c index a83809a1dbbf..0450840c92d9 100644 --- a/tools/testing/selftests/bpf/progs/verifier_ctx.c +++ b/tools/testing/selftests/bpf/progs/verifier_ctx.c @@ -218,4 +218,29 @@ __naked void null_check_8_null_bind(void) : __clobber_all); } +#define narrow_load(type, ctx, field) \ + SEC(type) \ + __description("narrow load on field " #field " of " #ctx) \ + __failure __msg("invalid bpf_context access") \ + __naked void invalid_narrow_load##ctx##field(void) \ + { \ + asm volatile (" \ + r1 = *(u32 *)(r1 + %[off]); \ + r0 = 0; \ + exit;" \ + : \ + : __imm_const(off, offsetof(struct ctx, field) + 4) \ + : __clobber_all); \ + } + +narrow_load("cgroup/getsockopt", bpf_sockopt, sk); +narrow_load("cgroup/getsockopt", bpf_sockopt, optval); +narrow_load("cgroup/getsockopt", bpf_sockopt, optval_end); +narrow_load("tc", __sk_buff, sk); +narrow_load("cgroup/bind4", bpf_sock_addr, sk); +narrow_load("sockops", bpf_sock_ops, sk); +narrow_load("sockops", bpf_sock_ops, skb_data); +narrow_load("sockops", bpf_sock_ops, skb_data_end); +narrow_load("sockops", bpf_sock_ops, skb_hwtstamp); + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From b351e9c93a4fc0a1b789c0b89eeecb9d5bf564cd Mon Sep 17 00:00:00 2001 From: Frank Li Date: Thu, 10 Jul 2025 15:13:54 -0400 Subject: selftests: pci_endpoint: Add doorbell test case Add doorbell test case. Signed-off-by: Frank Li [mani: Reworded the testcase description] Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Tested-by: Niklas Cassel Link: https://patch.msgid.link/20250710-ep-msi-v21-8-57683fc7fb25@nxp.com --- Documentation/PCI/endpoint/pci-test-howto.rst | 15 ++++++++++++ .../selftests/pci_endpoint/pci_endpoint_test.c | 28 ++++++++++++++++++++++ 2 files changed, 43 insertions(+) (limited to 'tools/testing/selftests') diff --git a/Documentation/PCI/endpoint/pci-test-howto.rst b/Documentation/PCI/endpoint/pci-test-howto.rst index aafc17ef3fd3..dd66858cde46 100644 --- a/Documentation/PCI/endpoint/pci-test-howto.rst +++ b/Documentation/PCI/endpoint/pci-test-howto.rst @@ -203,3 +203,18 @@ controllers, it is advisable to skip this testcase using this command:: # pci_endpoint_test -f pci_ep_bar -f pci_ep_basic -v memcpy -T COPY_TEST -v dma + +Kselftest EP Doorbell +~~~~~~~~~~~~~~~~~~~~~ + +If the Endpoint MSI controller is used for the doorbell usecase, run below +command for testing it: + + # pci_endpoint_test -f pcie_ep_doorbell + + # Starting 1 tests from 1 test cases. + # RUN pcie_ep_doorbell.DOORBELL_TEST ... + # OK pcie_ep_doorbell.DOORBELL_TEST + ok 1 pcie_ep_doorbell.DOORBELL_TEST + # PASSED: 1 / 1 tests passed. + # Totals: pass:1 fail:0 xfail:0 xpass:0 skip:0 error:0 diff --git a/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c b/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c index ac26481d29d9..da0db0e7c969 100644 --- a/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c +++ b/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c @@ -229,4 +229,32 @@ TEST_F(pci_ep_data_transfer, COPY_TEST) test_size[i]); } } + +FIXTURE(pcie_ep_doorbell) +{ + int fd; +}; + +FIXTURE_SETUP(pcie_ep_doorbell) +{ + self->fd = open(test_device, O_RDWR); + + ASSERT_NE(-1, self->fd) TH_LOG("Can't open PCI Endpoint Test device"); +}; + +FIXTURE_TEARDOWN(pcie_ep_doorbell) +{ + close(self->fd); +}; + +TEST_F(pcie_ep_doorbell, DOORBELL_TEST) +{ + int ret; + + pci_ep_ioctl(PCITEST_SET_IRQTYPE, PCITEST_IRQ_TYPE_AUTO); + ASSERT_EQ(0, ret) TH_LOG("Can't set AUTO IRQ type"); + + pci_ep_ioctl(PCITEST_DOORBELL, 0); + EXPECT_FALSE(ret) TH_LOG("Test failed for Doorbell\n"); +} TEST_HARNESS_MAIN -- cgit v1.2.3 From 213879061a9c60200ba971330dbefec6df3b4a30 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 21 Jul 2025 13:42:12 -0400 Subject: selftests/tracing: Fix false failure of subsystem event test The subsystem event test enables all "sched" events and makes sure there's at least 3 different events in the output. It used to cat the entire trace file to | wc -l, but on slow machines, that could last a very long time. To solve that, it was changed to just read the first 100 lines of the trace file. This can cause false failures as some events repeat so often, that the 100 lines that are examined could possibly be of only one event. Instead, create an awk script that looks for 3 different events and will exit out after it finds them. This will find the 3 events the test looks for (eventually if it works), and still exit out after the test is satisfied and not cause slower machines to run forever. Link: https://lore.kernel.org/r/20250721134212.53c3e140@batman.local.home Reported-by: Tengda Wu Closes: https://lore.kernel.org/all/20250710130134.591066-1-wutengda@huaweicloud.com/ Fixes: 1a4ea83a6e67 ("selftests/ftrace: Limit length in subsystem-enable tests") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Shuah Khan --- .../ftrace/test.d/event/subsystem-enable.tc | 28 ++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc index b7c8f29c09a9..65916bb55dfb 100644 --- a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc +++ b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc @@ -14,11 +14,35 @@ fail() { #msg exit_fail } +# As reading trace can last forever, simply look for 3 different +# events then exit out of reading the file. If there's not 3 different +# events, then the test has failed. +check_unique() { + cat trace | grep -v '^#' | awk ' + BEGIN { cnt = 0; } + { + for (i = 0; i < cnt; i++) { + if (event[i] == $5) { + break; + } + } + if (i == cnt) { + event[cnt++] = $5; + if (cnt > 2) { + exit; + } + } + } + END { + printf "%d", cnt; + }' +} + echo 'sched:*' > set_event yield -count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` +count=`check_unique` if [ $count -lt 3 ]; then fail "at least fork, exec and exit events should be recorded" fi @@ -29,7 +53,7 @@ echo 1 > events/sched/enable yield -count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` +count=`check_unique` if [ $count -lt 3 ]; then fail "at least fork, exec and exit events should be recorded" fi -- cgit v1.2.3 From 30fb5e134f05800dc424f8aa1d69841a6bdd9a54 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 23 Jul 2025 16:13:45 -0700 Subject: selftests/pidfd: Fix duplicate-symbol warnings for SCHED_ CPP symbols The pidfd selftests run in userspace and include both userspace and kernel header files. On some distros (for example, CentOS), this results in duplicate-symbol warnings in allmodconfig builds, while on other distros (for example, Ubuntu) it does not. Therefore, use #undef to get rid of the userspace definitions in favor of the kernel definitions. Other ways of handling this include splitting up the selftest code so that the userspace definitions go into one translation unit and the kernel definitions into another (which might or might not be feasible) or to adjust compiler command-line options to suppress the warnings (which might or might not be desirable). [ paulmck: Apply Shuah Khan feedback. ] Link: https://lore.kernel.org/r/cc7e4fe7-299f-4bf3-af46-df6551d61997@paulmck-laptop Signed-off-by: Paul E. McKenney Reviewed-by: Shuah Khan Cc: Christian Brauner Cc: Signed-off-by: Shuah Khan --- tools/testing/selftests/pidfd/pidfd.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index efd74063126e..254eaa6cc4df 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -16,6 +16,15 @@ #include #include +/* + * Remove the userspace definitions of the following preprocessor symbols + * to avoid duplicate-definition warnings from the subsequent in-kernel + * definitions. + */ +#undef SCHED_NORMAL +#undef SCHED_FLAG_KEEP_ALL +#undef SCHED_FLAG_UTIL_CLAMP + #include "../kselftest.h" #include "../clone3/clone3_selftests.h" -- cgit v1.2.3 From f70d9819c779fd9eae04c38b1997b3224a5b0fe7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 23 Jul 2025 10:10:46 -0700 Subject: selftests: drv-net: devmem: use new mattr ynl helpers Use the just-added YNL helpers instead of manually setting "_present" bits in the queue attrs. Compile tested only. Reviewed-by: Donald Hunter Acked-by: Stanislav Fomichev Acked-by: Mina Almasry Link: https://patch.msgid.link/20250723171046.4027470-6-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/ncdevmem.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index cc9b40d9c5d5..72f828021f83 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -526,12 +526,10 @@ static struct netdev_queue_id *create_queues(void) struct netdev_queue_id *queues; size_t i = 0; - queues = calloc(num_queues, sizeof(*queues)); + queues = netdev_queue_id_alloc(num_queues); for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; + netdev_queue_id_set_type(&queues[i], NETDEV_QUEUE_TYPE_RX); + netdev_queue_id_set_id(&queues[i], start_queue + i); } return queues; -- cgit v1.2.3 From 8e7583a4f65f3dbf3e8deb4e60f3679c276bef62 Mon Sep 17 00:00:00 2001 From: Samiullah Khawaja Date: Wed, 23 Jul 2025 01:30:31 +0000 Subject: net: define an enum for the napi threaded state Instead of using '0' and '1' for napi threaded state use an enum with 'disabled' and 'enabled' states. Tested: ./tools/testing/selftests/net/nl_netdev.py TAP version 13 1..7 ok 1 nl_netdev.empty_check ok 2 nl_netdev.lo_check ok 3 nl_netdev.page_pool_check ok 4 nl_netdev.napi_list_check ok 5 nl_netdev.dev_set_threaded ok 6 nl_netdev.napi_set_threaded ok 7 nl_netdev.nsim_rxq_reset_down # Totals: pass:7 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Samiullah Khawaja Link: https://patch.msgid.link/20250723013031.2911384-4-skhawaja@google.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 13 +++++--- .../networking/net_cachelines/net_device.rst | 2 +- include/linux/netdevice.h | 10 +++--- include/uapi/linux/netdev.h | 5 +++ net/core/dev.c | 12 +++++--- net/core/dev.h | 13 +++++--- net/core/dev_api.c | 3 +- net/core/netdev-genl-gen.c | 2 +- net/core/netdev-genl.c | 2 +- tools/include/uapi/linux/netdev.h | 5 +++ tools/testing/selftests/net/nl_netdev.py | 36 +++++++++++----------- 11 files changed, 62 insertions(+), 41 deletions(-) (limited to 'tools/testing/selftests') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 85d0ea6ac426..c035dc0f64fd 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -85,6 +85,10 @@ definitions: name: qstats-scope type: flags entries: [queue] + - + name: napi-threaded + type: enum + entries: [disabled, enabled] attribute-sets: - @@ -286,11 +290,10 @@ attribute-sets: - name: threaded doc: Whether the NAPI is configured to operate in threaded polling - mode. If this is set to 1 then the NAPI context operates in - threaded polling mode. - type: uint - checks: - max: 1 + mode. If this is set to enabled then the NAPI context operates + in threaded polling mode. + type: u32 + enum: napi-threaded - name: xsk-info attributes: [] diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index 2d3dc4692d20..1c19bb7705df 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -68,6 +68,7 @@ unsigned_char addr_assign_type unsigned_char addr_len unsigned_char upper_level unsigned_char lower_level +u8 threaded napi_poll(napi_enable,netif_set_threaded) unsigned_short neigh_priv_len unsigned_short padded unsigned_short dev_id @@ -165,7 +166,6 @@ struct sfp_bus* sfp_bus struct lock_class_key* qdisc_tx_busylock bool proto_down unsigned:1 wol_enabled -unsigned:1 threaded napi_poll(napi_enable,netif_set_threaded) unsigned_long:1 see_all_hwtstamp_requests unsigned_long:1 change_proto_down unsigned_long:1 netns_immutable diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a97c9a337d6b..5e5de4b0a433 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -369,7 +369,7 @@ struct napi_config { u64 irq_suspend_timeout; u32 defer_hard_irqs; cpumask_t affinity_mask; - bool threaded; + u8 threaded; unsigned int napi_id; }; @@ -590,7 +590,8 @@ static inline bool napi_complete(struct napi_struct *n) } void netif_threaded_enable(struct net_device *dev); -int dev_set_threaded(struct net_device *dev, bool threaded); +int dev_set_threaded(struct net_device *dev, + enum netdev_napi_threaded threaded); void napi_disable(struct napi_struct *n); void napi_disable_locked(struct napi_struct *n); @@ -1872,6 +1873,7 @@ enum netdev_reg_state { * @addr_len: Hardware address length * @upper_level: Maximum depth level of upper devices. * @lower_level: Maximum depth level of lower devices. + * @threaded: napi threaded state. * @neigh_priv_len: Used in neigh_alloc() * @dev_id: Used to differentiate devices that share * the same link layer address @@ -2011,8 +2013,6 @@ enum netdev_reg_state { * switch driver and used to set the phys state of the * switch port. * - * @threaded: napi threaded mode is enabled - * * @irq_affinity_auto: driver wants the core to store and re-assign the IRQ * affinity. Set by netif_enable_irq_affinity(), then * the driver must create a persistent napi by @@ -2248,6 +2248,7 @@ struct net_device { unsigned char addr_len; unsigned char upper_level; unsigned char lower_level; + u8 threaded; unsigned short neigh_priv_len; unsigned short dev_id; @@ -2429,7 +2430,6 @@ struct net_device { struct sfp_bus *sfp_bus; struct lock_class_key *qdisc_tx_busylock; bool proto_down; - bool threaded; bool irq_affinity_auto; bool rx_cpu_rmap_auto; diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 1f3719a9a0eb..48eb49aa03d4 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -77,6 +77,11 @@ enum netdev_qstats_scope { NETDEV_QSTATS_SCOPE_QUEUE = 1, }; +enum netdev_napi_threaded { + NETDEV_NAPI_THREADED_DISABLED, + NETDEV_NAPI_THREADED_ENABLED, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, diff --git a/net/core/dev.c b/net/core/dev.c index f28661d6f5ea..1c6e755841ce 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6963,7 +6963,8 @@ static void napi_stop_kthread(struct napi_struct *napi) napi->thread = NULL; } -int napi_set_threaded(struct napi_struct *napi, bool threaded) +int napi_set_threaded(struct napi_struct *napi, + enum netdev_napi_threaded threaded) { if (threaded) { if (!napi->thread) { @@ -6988,7 +6989,8 @@ int napi_set_threaded(struct napi_struct *napi, bool threaded) return 0; } -int netif_set_threaded(struct net_device *dev, bool threaded) +int netif_set_threaded(struct net_device *dev, + enum netdev_napi_threaded threaded) { struct napi_struct *napi; int err = 0; @@ -7000,7 +7002,7 @@ int netif_set_threaded(struct net_device *dev, bool threaded) if (!napi->thread) { err = napi_kthread_create(napi); if (err) { - threaded = false; + threaded = NETDEV_NAPI_THREADED_DISABLED; break; } } @@ -7043,7 +7045,7 @@ int netif_set_threaded(struct net_device *dev, bool threaded) */ void netif_threaded_enable(struct net_device *dev) { - WARN_ON_ONCE(netif_set_threaded(dev, true)); + WARN_ON_ONCE(netif_set_threaded(dev, NETDEV_NAPI_THREADED_ENABLED)); } EXPORT_SYMBOL(netif_threaded_enable); @@ -7360,7 +7362,7 @@ void netif_napi_add_weight_locked(struct net_device *dev, * threaded mode will not be enabled in napi_enable(). */ if (dev->threaded && napi_kthread_create(napi)) - dev->threaded = false; + dev->threaded = NETDEV_NAPI_THREADED_DISABLED; netif_napi_set_irq_locked(napi, -1); } EXPORT_SYMBOL(netif_napi_add_weight_locked); diff --git a/net/core/dev.h b/net/core/dev.h index f5b567310908..ab69edc0c3e3 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -315,14 +315,19 @@ static inline void napi_set_irq_suspend_timeout(struct napi_struct *n, WRITE_ONCE(n->irq_suspend_timeout, timeout); } -static inline bool napi_get_threaded(struct napi_struct *n) +static inline enum netdev_napi_threaded napi_get_threaded(struct napi_struct *n) { - return test_bit(NAPI_STATE_THREADED, &n->state); + if (test_bit(NAPI_STATE_THREADED, &n->state)) + return NETDEV_NAPI_THREADED_ENABLED; + + return NETDEV_NAPI_THREADED_DISABLED; } -int napi_set_threaded(struct napi_struct *n, bool threaded); +int napi_set_threaded(struct napi_struct *n, + enum netdev_napi_threaded threaded); -int netif_set_threaded(struct net_device *dev, bool threaded); +int netif_set_threaded(struct net_device *dev, + enum netdev_napi_threaded threaded); int rps_cpumask_housekeeping(struct cpumask *mask); diff --git a/net/core/dev_api.c b/net/core/dev_api.c index dd7f57013ce5..f28852078aa6 100644 --- a/net/core/dev_api.c +++ b/net/core/dev_api.c @@ -368,7 +368,8 @@ void netdev_state_change(struct net_device *dev) } EXPORT_SYMBOL(netdev_state_change); -int dev_set_threaded(struct net_device *dev, bool threaded) +int dev_set_threaded(struct net_device *dev, + enum netdev_napi_threaded threaded) { int ret; diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index 0994bd68a7e6..e9a2a6f26cb7 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -97,7 +97,7 @@ static const struct nla_policy netdev_napi_set_nl_policy[NETDEV_A_NAPI_THREADED [NETDEV_A_NAPI_DEFER_HARD_IRQS] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_napi_defer_hard_irqs_range), [NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT] = { .type = NLA_UINT, }, [NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT] = { .type = NLA_UINT, }, - [NETDEV_A_NAPI_THREADED] = NLA_POLICY_MAX(NLA_UINT, 1), + [NETDEV_A_NAPI_THREADED] = NLA_POLICY_MAX(NLA_U32, 1), }; /* NETDEV_CMD_BIND_TX - do */ diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 5875df372415..6314eb7bdf69 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -333,7 +333,7 @@ netdev_nl_napi_set_config(struct napi_struct *napi, struct genl_info *info) int ret; threaded = nla_get_uint(info->attrs[NETDEV_A_NAPI_THREADED]); - ret = napi_set_threaded(napi, !!threaded); + ret = napi_set_threaded(napi, threaded); if (ret) return ret; } diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 1f3719a9a0eb..48eb49aa03d4 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -77,6 +77,11 @@ enum netdev_qstats_scope { NETDEV_QSTATS_SCOPE_QUEUE = 1, }; +enum netdev_napi_threaded { + NETDEV_NAPI_THREADED_DISABLED, + NETDEV_NAPI_THREADED_ENABLED, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py index c8ffade79a52..5c66421ab8aa 100755 --- a/tools/testing/selftests/net/nl_netdev.py +++ b/tools/testing/selftests/net/nl_netdev.py @@ -52,14 +52,14 @@ def napi_set_threaded(nf) -> None: napi1_id = napis[1]['id'] # set napi threaded and verify - nf.napi_set({'id': napi0_id, 'threaded': 1}) + nf.napi_set({'id': napi0_id, 'threaded': "enabled"}) napi0 = nf.napi_get({'id': napi0_id}) - ksft_eq(napi0['threaded'], 1) + ksft_eq(napi0['threaded'], "enabled") ksft_ne(napi0.get('pid'), None) # check it is not set for napi1 napi1 = nf.napi_get({'id': napi1_id}) - ksft_eq(napi1['threaded'], 0) + ksft_eq(napi1['threaded'], "disabled") ksft_eq(napi1.get('pid'), None) ip(f"link set dev {nsim.ifname} down") @@ -67,18 +67,18 @@ def napi_set_threaded(nf) -> None: # verify if napi threaded is still set napi0 = nf.napi_get({'id': napi0_id}) - ksft_eq(napi0['threaded'], 1) + ksft_eq(napi0['threaded'], "enabled") ksft_ne(napi0.get('pid'), None) # check it is still not set for napi1 napi1 = nf.napi_get({'id': napi1_id}) - ksft_eq(napi1['threaded'], 0) + ksft_eq(napi1['threaded'], "disabled") ksft_eq(napi1.get('pid'), None) # unset napi threaded and verify - nf.napi_set({'id': napi0_id, 'threaded': 0}) + nf.napi_set({'id': napi0_id, 'threaded': "disabled"}) napi0 = nf.napi_get({'id': napi0_id}) - ksft_eq(napi0['threaded'], 0) + ksft_eq(napi0['threaded'], "disabled") ksft_eq(napi0.get('pid'), None) # set threaded at device level @@ -86,10 +86,10 @@ def napi_set_threaded(nf) -> None: # check napi threaded is set for both napis napi0 = nf.napi_get({'id': napi0_id}) - ksft_eq(napi0['threaded'], 1) + ksft_eq(napi0['threaded'], "enabled") ksft_ne(napi0.get('pid'), None) napi1 = nf.napi_get({'id': napi1_id}) - ksft_eq(napi1['threaded'], 1) + ksft_eq(napi1['threaded'], "enabled") ksft_ne(napi1.get('pid'), None) # unset threaded at device level @@ -97,16 +97,16 @@ def napi_set_threaded(nf) -> None: # check napi threaded is unset for both napis napi0 = nf.napi_get({'id': napi0_id}) - ksft_eq(napi0['threaded'], 0) + ksft_eq(napi0['threaded'], "disabled") ksft_eq(napi0.get('pid'), None) napi1 = nf.napi_get({'id': napi1_id}) - ksft_eq(napi1['threaded'], 0) + ksft_eq(napi1['threaded'], "disabled") ksft_eq(napi1.get('pid'), None) # set napi threaded for napi0 nf.napi_set({'id': napi0_id, 'threaded': 1}) napi0 = nf.napi_get({'id': napi0_id}) - ksft_eq(napi0['threaded'], 1) + ksft_eq(napi0['threaded'], "enabled") ksft_ne(napi0.get('pid'), None) # unset threaded at device level @@ -114,10 +114,10 @@ def napi_set_threaded(nf) -> None: # check napi threaded is unset for both napis napi0 = nf.napi_get({'id': napi0_id}) - ksft_eq(napi0['threaded'], 0) + ksft_eq(napi0['threaded'], "disabled") ksft_eq(napi0.get('pid'), None) napi1 = nf.napi_get({'id': napi1_id}) - ksft_eq(napi1['threaded'], 0) + ksft_eq(napi1['threaded'], "disabled") ksft_eq(napi1.get('pid'), None) def dev_set_threaded(nf) -> None: @@ -141,10 +141,10 @@ def dev_set_threaded(nf) -> None: # check napi threaded is set for both napis napi0 = nf.napi_get({'id': napi0_id}) - ksft_eq(napi0['threaded'], 1) + ksft_eq(napi0['threaded'], "enabled") ksft_ne(napi0.get('pid'), None) napi1 = nf.napi_get({'id': napi1_id}) - ksft_eq(napi1['threaded'], 1) + ksft_eq(napi1['threaded'], "enabled") ksft_ne(napi1.get('pid'), None) # unset threaded @@ -152,10 +152,10 @@ def dev_set_threaded(nf) -> None: # check napi threaded is unset for both napis napi0 = nf.napi_get({'id': napi0_id}) - ksft_eq(napi0['threaded'], 0) + ksft_eq(napi0['threaded'], "disabled") ksft_eq(napi0.get('pid'), None) napi1 = nf.napi_get({'id': napi1_id}) - ksft_eq(napi1['threaded'], 0) + ksft_eq(napi1['threaded'], "disabled") ksft_eq(napi1.get('pid'), None) def nsim_rxq_reset_down(nf) -> None: -- cgit v1.2.3 From b4d52c698210ae1a3ceb487b189701bc70551a48 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 23 Jul 2025 16:54:53 +0300 Subject: selftests: drv-net: Fix remote command checking in require_cmd() The require_cmd() method was checking for command availability locally even when remote=True was specified, due to a missing host parameter. Fix by passing host=self.remote when checking remote command availability, ensuring commands are verified on the correct host. Fixes: f1e68a1a4a40 ("selftests: drv-net: add require_XYZ() helpers for validating env") Reviewed-by: Nimrod Oren Signed-off-by: Gal Pressman Link: https://patch.msgid.link/20250723135454.649342-2-gal@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/lib/py/env.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index 3bccddf8cbc5..1b8bd648048f 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -259,7 +259,7 @@ class NetDrvEpEnv(NetDrvEnvBase): if not self._require_cmd(comm, "local"): raise KsftSkipEx("Test requires command: " + comm) if remote: - if not self._require_cmd(comm, "remote"): + if not self._require_cmd(comm, "remote", host=self.remote): raise KsftSkipEx("Test requires (remote) command: " + comm) def wait_hw_stats_settle(self): -- cgit v1.2.3 From d74cd9a02f020a6263b12a4c9e0f846b679a2f13 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 23 Jul 2025 16:54:54 +0300 Subject: selftests: drv-net: Make command requirements explicit Make require_cmd() calls explicit about whether commands are needed locally, remotely, or both. Since require_cmd() defaults to local=True, tests should explicitly set local=False when commands are only needed remotely. - socat: Set local=False since it's only needed on remote hosts. - iperf3: Use single call with both local=True and remote=True since it's needed on both hosts. This avoids unnecessary test failures when commands are missing locally but available remotely where actually needed, and consolidates a duplicate require_cmd() call into single call that checks both hosts. Fixes: 0d0f4174f6c8 ("selftests: drv-net: add a simple TSO test") Fixes: f1e68a1a4a40 ("selftests: drv-net: add require_XYZ() helpers for validating env") Fixes: c76bab22e920 ("selftests: drv-net: rss_input_xfrm: Check test prerequisites before running") Reviewed-by: Nimrod Oren Signed-off-by: Gal Pressman Link: https://patch.msgid.link/20250723135454.649342-3-gal@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py | 3 +-- tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py | 2 +- tools/testing/selftests/drivers/net/hw/tso.py | 2 +- tools/testing/selftests/drivers/net/lib/py/load.py | 2 +- tools/testing/selftests/drivers/net/ping.py | 2 +- 5 files changed, 5 insertions(+), 6 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py index 835c357919a8..ead6784d1910 100755 --- a/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py +++ b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py @@ -451,8 +451,7 @@ def main() -> None: ) if not cfg.pci: raise KsftSkipEx("Could not get PCI address of the interface") - cfg.require_cmd("iperf3") - cfg.require_cmd("iperf3", remote=True) + cfg.require_cmd("iperf3", local=True, remote=True) cfg.bw_validator = BandwidthValidator() diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py index 6e90fb290564..72880e388478 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py +++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py @@ -32,7 +32,7 @@ def test_rss_input_xfrm(cfg, ipver): if multiprocessing.cpu_count() < 2: raise KsftSkipEx("Need at least two CPUs to test symmetric RSS hash") - cfg.require_cmd("socat", remote=True) + cfg.require_cmd("socat", local=False, remote=True) if not hasattr(socket, "SO_INCOMING_CPU"): raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11") diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py index 3370827409aa..3500d8f1bac4 100755 --- a/tools/testing/selftests/drivers/net/hw/tso.py +++ b/tools/testing/selftests/drivers/net/hw/tso.py @@ -34,7 +34,7 @@ def tcp_sock_get_retrans(sock): def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso): - cfg.require_cmd("socat", remote=True) + cfg.require_cmd("socat", local=False, remote=True) port = rand_port() listen_cmd = f"socat -{ipver} -t 2 -u TCP-LISTEN:{port},reuseport /dev/null,ignoreeof" diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py index 44151b7b1a24..c4e808407cc4 100644 --- a/tools/testing/selftests/drivers/net/lib/py/load.py +++ b/tools/testing/selftests/drivers/net/lib/py/load.py @@ -7,7 +7,7 @@ from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen class GenerateTraffic: def __init__(self, env, port=None): - env.require_cmd("iperf3", remote=True) + env.require_cmd("iperf3", local=True, remote=True) self.env = env diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index e0f114612c1a..da3623c5e8a9 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -30,7 +30,7 @@ def _test_v6(cfg) -> None: cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["6"], host=cfg.remote) def _test_tcp(cfg) -> None: - cfg.require_cmd("socat", remote=True) + cfg.require_cmd("socat", local=False, remote=True) port = rand_port() listen_cmd = f"socat -{cfg.addr_ipver} -t 2 -u TCP-LISTEN:{port},reuseport STDOUT" -- cgit v1.2.3 From 266b835e5e84a0f8fec7fd988ee81925890e8d89 Mon Sep 17 00:00:00 2001 From: Daniel Zahka Date: Wed, 23 Jul 2025 11:47:36 -0700 Subject: selftests: drv-net: tso: enable test cases based on hw_features tso.py uses the active features at the time of test execution as the set of available gso features to test. This means if a gso feature is supported but toggled off at test start, the test will be skipped with a "Device does not support {feature}" message. Instead, we can enumerate the set of toggleable features by capturing the driver's hw_features bitmap. To avoid configuration side-effects from running the test, we also snapshot the wanted_features flag set before making any feature changes, and then attempt to restore the same set of wanted_features before test exit. Fixes: 0d0f4174f6c8 ("selftests: drv-net: add a simple TSO test") Signed-off-by: Daniel Zahka Link: https://patch.msgid.link/20250723184740.4075410-2-daniel.zahka@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/tso.py | 52 ++++++++++++++++++++------- 1 file changed, 40 insertions(+), 12 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py index 3370827409aa..f8386e3d88cd 100755 --- a/tools/testing/selftests/drivers/net/hw/tso.py +++ b/tools/testing/selftests/drivers/net/hw/tso.py @@ -119,15 +119,30 @@ def build_tunnel(cfg, outer_ipver, tun_info): return remote_v4, remote_v6 +def restore_wanted_features(cfg): + features_cmd = "" + for feature in cfg.hw_features: + setting = "on" if feature in cfg.wanted_features else "off" + features_cmd += f" {feature} {setting}" + try: + ethtool(f"-K {cfg.ifname} {features_cmd}") + except Exception as e: + ksft_pr(f"WARNING: failure restoring wanted features: {e}") + + def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None): """Construct specific tests from the common template.""" def f(cfg): cfg.require_ipver(outer_ipver) + defer(restore_wanted_features, cfg) if not cfg.have_stat_super_count and \ not cfg.have_stat_wire_count: raise KsftSkipEx(f"Device does not support LSO queue stats") + if feature not in cfg.hw_features: + raise KsftSkipEx(f"Device does not support {feature}") + ipver = outer_ipver if tun: remote_v4, remote_v6 = build_tunnel(cfg, ipver, tun) @@ -138,12 +153,12 @@ def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None): tun_partial = tun and tun[1] # Tunnel which can silently fall back to gso-partial - has_gso_partial = tun and 'tx-gso-partial' in cfg.features + has_gso_partial = tun and 'tx-gso-partial' in cfg.hw_features # For TSO4 via partial we need mangleid if ipver == "4" and feature in cfg.partial_features: ksft_pr("Testing with mangleid enabled") - if 'tx-tcp-mangleid-segmentation' not in cfg.features: + if 'tx-tcp-mangleid-segmentation' not in cfg.hw_features: ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on") defer(ethtool, f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off") @@ -161,11 +176,8 @@ def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None): should_lso=tun_partial) # Full feature enabled. - if feature in cfg.features: - ethtool(f"-K {cfg.ifname} {feature} on") - run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True) - else: - raise KsftXfailEx(f"Device does not support {feature}") + ethtool(f"-K {cfg.ifname} {feature} on") + run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True) f.__name__ = name + ((outer_ipver + "_") if tun else "") + "ipv" + inner_ipver return f @@ -176,23 +188,39 @@ def query_nic_features(cfg) -> None: cfg.have_stat_super_count = False cfg.have_stat_wire_count = False - cfg.features = set() features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}}) - for f in features["active"]["bits"]["bit"]: - cfg.features.add(f["name"]) + + cfg.wanted_features = set() + for f in features["wanted"]["bits"]["bit"]: + cfg.wanted_features.add(f["name"]) + + cfg.hw_features = set() + hw_all_features_cmd = "" + for f in features["hw"]["bits"]["bit"]: + if f.get("value", False): + feature = f["name"] + cfg.hw_features.add(feature) + hw_all_features_cmd += f" {feature} on" + try: + ethtool(f"-K {cfg.ifname} {hw_all_features_cmd}") + except Exception as e: + ksft_pr(f"WARNING: failure enabling all hw features: {e}") + ksft_pr("partial gso feature detection may be impacted") # Check which features are supported via GSO partial cfg.partial_features = set() - if 'tx-gso-partial' in cfg.features: + if 'tx-gso-partial' in cfg.hw_features: ethtool(f"-K {cfg.ifname} tx-gso-partial off") no_partial = set() features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}}) for f in features["active"]["bits"]["bit"]: no_partial.add(f["name"]) - cfg.partial_features = cfg.features - no_partial + cfg.partial_features = cfg.hw_features - no_partial ethtool(f"-K {cfg.ifname} tx-gso-partial on") + restore_wanted_features(cfg) + stats = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True) if stats: if 'tx-hw-gso-packets' in stats[0]: -- cgit v1.2.3 From 2cfbcc5d8af9199823151c21f740e476b223dd2e Mon Sep 17 00:00:00 2001 From: Daniel Zahka Date: Wed, 23 Jul 2025 11:47:37 -0700 Subject: selftests: drv-net: tso: fix vxlan tunnel flags to get correct gso_type When vxlan is used with ipv6 as the outer network header, the correct ip link parameters for acheiving the SKB_GSO_UDP_TUNNEL gso type is "udp6zerocsumtx udp6zerocsumrx". Otherwise the gso type will be SKB_GSO_UDP_TUNNEL_CSUM. This bug was the reason for the second of the three possible invocations of run_one_stream() invocations, so that can be deleted as well. We only need to test with the feature off and on. Fixes: 0d0f4174f6c8 ("selftests: drv-net: add a simple TSO test") Signed-off-by: Daniel Zahka Link: https://patch.msgid.link/20250723184740.4075410-3-daniel.zahka@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/tso.py | 37 ++++++++++----------------- 1 file changed, 13 insertions(+), 24 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py index f8386e3d88cd..6461a83b3d0e 100755 --- a/tools/testing/selftests/drivers/net/hw/tso.py +++ b/tools/testing/selftests/drivers/net/hw/tso.py @@ -102,7 +102,7 @@ def build_tunnel(cfg, outer_ipver, tun_info): remote_addr = cfg.remote_addr_v[outer_ipver] tun_type = tun_info[0] - tun_arg = tun_info[2] + tun_arg = tun_info[1] ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {local_addr} remote {remote_addr} dev {cfg.ifname}") defer(ip, f"link del {tun_type}-ksft") ip(f"link set dev {tun_type}-ksft up") @@ -151,29 +151,17 @@ def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None): remote_v4 = cfg.remote_addr_v["4"] remote_v6 = cfg.remote_addr_v["6"] - tun_partial = tun and tun[1] - # Tunnel which can silently fall back to gso-partial - has_gso_partial = tun and 'tx-gso-partial' in cfg.hw_features - - # For TSO4 via partial we need mangleid - if ipver == "4" and feature in cfg.partial_features: - ksft_pr("Testing with mangleid enabled") - if 'tx-tcp-mangleid-segmentation' not in cfg.hw_features: - ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on") - defer(ethtool, f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off") - # First test without the feature enabled. ethtool(f"-K {cfg.ifname} {feature} off") - if has_gso_partial: - ethtool(f"-K {cfg.ifname} tx-gso-partial off") run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=False) - # Now test with the feature enabled. - # For compatible tunnels only - just GSO partial, not specific feature. - if has_gso_partial: + ethtool(f"-K {cfg.ifname} tx-gso-partial off") + ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off") + if feature in cfg.partial_features: ethtool(f"-K {cfg.ifname} tx-gso-partial on") - run_one_stream(cfg, ipver, remote_v4, remote_v6, - should_lso=tun_partial) + if ipver == "4": + ksft_pr("Testing with mangleid enabled") + ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on") # Full feature enabled. ethtool(f"-K {cfg.ifname} {feature} on") @@ -239,13 +227,14 @@ def main() -> None: query_nic_features(cfg) test_info = ( - # name, v4/v6 ethtool_feature tun:(type, partial, args) + # name, v4/v6 ethtool_feature tun:(type, args) ("", "4", "tx-tcp-segmentation", None), ("", "6", "tx-tcp6-segmentation", None), - ("vxlan", "", "tx-udp_tnl-segmentation", ("vxlan", True, "id 100 dstport 4789 noudpcsum")), - ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", False, "id 100 dstport 4789 udpcsum")), - ("gre", "4", "tx-gre-segmentation", ("gre", False, "")), - ("gre", "6", "tx-gre-segmentation", ("ip6gre", False, "")), + ("vxlan", "4", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 noudpcsum")), + ("vxlan", "6", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 udp6zerocsumtx udp6zerocsumrx")), + ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", "id 100 dstport 4789 udpcsum")), + ("gre", "4", "tx-gre-segmentation", ("gre", "")), + ("gre", "6", "tx-gre-segmentation", ("ip6gre", "")), ) cases = [] -- cgit v1.2.3 From b25b44cd178cc54277f2dc0ff3b3d5a37ae4b26b Mon Sep 17 00:00:00 2001 From: Daniel Zahka Date: Wed, 23 Jul 2025 11:47:38 -0700 Subject: selftests: drv-net: tso: fix non-tunneled tso6 test case name The non-tunneled tso6 test case was showing up as: ok 8 tso.ipv4 This is because of the way test_builder() uses the inner_ipver arg in test naming, and how test_info is iterated over in main(). Given that some tunnels not supported yet, e.g. ipip or sit, only support ipv4 or ipv6 as the inner network protocol, I think the best fix here is to call test_builder() in separate branches for tunneled and non-tunneled tests, and to make supported inner l3 types an explicit attribute of tunnel test cases. # Detected qstat for LSO wire-packets TAP version 13 1..14 ok 1 tso.ipv4 # Testing with mangleid enabled ok 2 tso.vxlan4_ipv4 ok 3 tso.vxlan4_ipv6 # Testing with mangleid enabled ok 4 tso.vxlan_csum4_ipv4 ok 5 tso.vxlan_csum4_ipv6 # Testing with mangleid enabled ok 6 tso.gre4_ipv4 ok 7 tso.gre4_ipv6 ok 8 tso.ipv6 # Testing with mangleid enabled ok 9 tso.vxlan6_ipv4 ok 10 tso.vxlan6_ipv6 # Testing with mangleid enabled ok 11 tso.vxlan_csum6_ipv4 ok 12 tso.vxlan_csum6_ipv6 # Testing with mangleid enabled ok 13 tso.gre6_ipv4 ok 14 tso.gre6_ipv6 # Totals: pass:14 fail:0 xfail:0 xpass:0 skip:0 error:0 Fixes: 0d0f4174f6c8 ("selftests: drv-net: add a simple TSO test") Signed-off-by: Daniel Zahka Link: https://patch.msgid.link/20250723184740.4075410-4-daniel.zahka@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/tso.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py index 6461a83b3d0e..5fddb5056a20 100755 --- a/tools/testing/selftests/drivers/net/hw/tso.py +++ b/tools/testing/selftests/drivers/net/hw/tso.py @@ -227,14 +227,14 @@ def main() -> None: query_nic_features(cfg) test_info = ( - # name, v4/v6 ethtool_feature tun:(type, args) - ("", "4", "tx-tcp-segmentation", None), - ("", "6", "tx-tcp6-segmentation", None), - ("vxlan", "4", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 noudpcsum")), - ("vxlan", "6", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 udp6zerocsumtx udp6zerocsumrx")), - ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", "id 100 dstport 4789 udpcsum")), - ("gre", "4", "tx-gre-segmentation", ("gre", "")), - ("gre", "6", "tx-gre-segmentation", ("ip6gre", "")), + # name, v4/v6 ethtool_feature tun:(type, args, inner ip versions) + ("", "4", "tx-tcp-segmentation", None), + ("", "6", "tx-tcp6-segmentation", None), + ("vxlan", "4", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 noudpcsum", ("4", "6"))), + ("vxlan", "6", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 udp6zerocsumtx udp6zerocsumrx", ("4", "6"))), + ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", "id 100 dstport 4789 udpcsum", ("4", "6"))), + ("gre", "4", "tx-gre-segmentation", ("gre", "", ("4", "6"))), + ("gre", "6", "tx-gre-segmentation", ("ip6gre","", ("4", "6"))), ) cases = [] @@ -244,11 +244,13 @@ def main() -> None: if info[1] and outer_ipver != info[1]: continue - cases.append(test_builder(info[0], cfg, outer_ipver, info[2], - tun=info[3], inner_ipver="4")) if info[3]: - cases.append(test_builder(info[0], cfg, outer_ipver, info[2], - tun=info[3], inner_ipver="6")) + cases += [ + test_builder(info[0], cfg, outer_ipver, info[2], info[3], inner_ipver) + for inner_ipver in info[3][2] + ] + else: + cases.append(test_builder(info[0], cfg, outer_ipver, info[2], None, outer_ipver)) ksft_run(cases=cases, args=(cfg, )) ksft_exit() -- cgit v1.2.3 From d53f248258e11e145b773a925ad1a8590ce4618e Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 17 Jul 2025 17:56:00 +0100 Subject: tools/testing/selftests: extend mremap_test to test multi-VMA mremap Now that we have added the ability to move multiple VMAs at once, assert that this functions correctly, both overwriting VMAs and moving backwards and forwards with merge and VMA invalidation. Additionally assert that page tables are correctly propagated by setting random data and reading it back. Link: https://lkml.kernel.org/r/139074a24a011ca4ed52498a7fa2080024b43917.1752770784.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Cc: Al Viro Cc: Christian Brauner Cc: Jan Kara Cc: Jann Horn Cc: Liam Howlett Cc: Peter Xu Cc: Rik van Riel Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/mremap_test.c | 146 ++++++++++++++++++++++++++++++- 1 file changed, 145 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index bb84476a177f..0a49be11e614 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -380,6 +380,149 @@ out: ksft_test_result_fail("%s\n", test_name); } +static bool is_multiple_vma_range_ok(unsigned int pattern_seed, + char *ptr, unsigned long page_size) +{ + int i; + + srand(pattern_seed); + for (i = 0; i <= 10; i += 2) { + int j; + char *buf = &ptr[i * page_size]; + size_t size = i == 4 ? 2 * page_size : page_size; + + for (j = 0; j < size; j++) { + char chr = rand(); + + if (chr != buf[j]) { + ksft_print_msg("page %d offset %d corrupted, expected %d got %d\n", + i, j, chr, buf[j]); + return false; + } + } + } + + return true; +} + +static void mremap_move_multiple_vmas(unsigned int pattern_seed, + unsigned long page_size) +{ + char *test_name = "mremap move multiple vmas"; + const size_t size = 11 * page_size; + bool success = true; + char *ptr, *tgt_ptr; + int i; + + ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (ptr == MAP_FAILED) { + perror("mmap"); + success = false; + goto out; + } + + tgt_ptr = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (tgt_ptr == MAP_FAILED) { + perror("mmap"); + success = false; + goto out; + } + if (munmap(tgt_ptr, 2 * size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + + /* + * Unmap so we end up with: + * + * 0 2 4 5 6 8 10 offset in buffer + * |*| |*| |*****| |*| |*| + * |*| |*| |*****| |*| |*| + * 0 1 2 3 4 5 6 pattern offset + */ + for (i = 1; i < 10; i += 2) { + if (i == 5) + continue; + + if (munmap(&ptr[i * page_size], page_size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + } + + srand(pattern_seed); + + /* Set up random patterns. */ + for (i = 0; i <= 10; i += 2) { + int j; + size_t size = i == 4 ? 2 * page_size : page_size; + char *buf = &ptr[i * page_size]; + + for (j = 0; j < size; j++) + buf[j] = rand(); + } + + /* First, just move the whole thing. */ + if (mremap(ptr, size, size, + MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr) == MAP_FAILED) { + perror("mremap"); + success = false; + goto out_unmap; + } + /* Check move was ok. */ + if (!is_multiple_vma_range_ok(pattern_seed, tgt_ptr, page_size)) { + success = false; + goto out_unmap; + } + + /* Move next to itself. */ + if (mremap(tgt_ptr, size, size, + MREMAP_MAYMOVE | MREMAP_FIXED, &tgt_ptr[size]) == MAP_FAILED) { + perror("mremap"); + goto out_unmap; + } + /* Check that the move is ok. */ + if (!is_multiple_vma_range_ok(pattern_seed, &tgt_ptr[size], page_size)) { + success = false; + goto out_unmap; + } + + /* Map a range to overwrite. */ + if (mmap(tgt_ptr, size, PROT_NONE, + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0) == MAP_FAILED) { + perror("mmap tgt"); + success = false; + goto out_unmap; + } + /* Move and overwrite. */ + if (mremap(&tgt_ptr[size], size, size, + MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr) == MAP_FAILED) { + perror("mremap"); + goto out_unmap; + } + /* Check that the move is ok. */ + if (!is_multiple_vma_range_ok(pattern_seed, tgt_ptr, page_size)) { + success = false; + goto out_unmap; + } + +out_unmap: + if (munmap(tgt_ptr, 2 * size)) + perror("munmap tgt"); + if (munmap(ptr, size)) + perror("munmap src"); + +out: + if (success) + ksft_test_result_pass("%s\n", test_name); + else + ksft_test_result_fail("%s\n", test_name); +} + /* Returns the time taken for the remap on success else returns -1. */ static long long remap_region(struct config c, unsigned int threshold_mb, char *rand_addr) @@ -721,7 +864,7 @@ int main(int argc, char **argv) char *rand_addr; size_t rand_size; int num_expand_tests = 2; - int num_misc_tests = 2; + int num_misc_tests = 3; struct test test_cases[MAX_TEST] = {}; struct test perf_test_cases[MAX_PERF_TEST]; int page_size; @@ -848,6 +991,7 @@ int main(int argc, char **argv) mremap_move_within_range(pattern_seed, rand_addr); mremap_move_1mb_from_start(pattern_seed, rand_addr); + mremap_move_multiple_vmas(pattern_seed, page_size); if (run_perf_tests) { ksft_print_msg("\n%s\n", -- cgit v1.2.3 From beb69e81724634063b9dbae4bc79e2e011fdeeb1 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Sat, 19 Jul 2025 11:28:49 -0700 Subject: selftests/proc: add /proc/pid/maps tearing from vma split test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "use per-vma locks for /proc/pid/maps reads", v8. Reading /proc/pid/maps requires read-locking mmap_lock which prevents any other task from concurrently modifying the address space. This guarantees coherent reporting of virtual address ranges, however it can block important updates from happening. Oftentimes /proc/pid/maps readers are low priority monitoring tasks and them blocking high priority tasks results in priority inversion. Locking the entire address space is required to present fully coherent picture of the address space, however even current implementation does not strictly guarantee that by outputting vmas in page-size chunks and dropping mmap_lock in between each chunk. Address space modifications are possible while mmap_lock is dropped and userspace reading the content is expected to deal with possible concurrent address space modifications. Considering these relaxed rules, holding mmap_lock is not strictly needed as long as we can guarantee that a concurrently modified vma is reported either in its original form or after it was modified. This patchset switches from holding mmap_lock while reading /proc/pid/maps to taking per-vma locks as we walk the vma tree. This reduces the contention with tasks modifying the address space because they would have to contend for the same vma as opposed to the entire address space. Previous version of this patchset [1] tried to perform /proc/pid/maps reading under RCU, however its implementation is quite complex and the results are worse than the new version because it still relied on mmap_lock speculation which retries if any part of the address space gets modified. New implementaion is both simpler and results in less contention. Note that similar approach would not work for /proc/pid/smaps reading as it also walks the page table and that's not RCU-safe. Paul McKenney's designed a test [2] to measure mmap/munmap latencies while concurrently reading /proc/pid/maps. The test has a pair of processes scanning /proc/PID/maps, and another process unmapping and remapping 4K pages from a 128MB range of anonymous memory. At the end of each 10 second run, the latency of each mmap() or munmap() operation is measured, and for each run the maximum and mean latency is printed. The map/unmap process is started first, its PID is passed to the scanners, and then the map/unmap process waits until both scanners are running before starting its timed test. The scanners keep scanning until the specified /proc/PID/maps file disappears. The latest results from Paul: Stock mm-unstable, all of the runs had maximum latencies in excess of 0.5 milliseconds, and with 80% of the runs' latencies exceeding a full millisecond, and ranging up beyond 4 full milliseconds. In contrast, 99% of the runs with this patch series applied had maximum latencies of less than 0.5 milliseconds, with the single outlier at only 0.608 milliseconds. From a median-performance (as opposed to maximum-latency) viewpoint, this patch series also looks good, with stock mm weighing in at 11 microseconds and patch series at 6 microseconds, better than a 2x improvement. Before the change: ./run-proc-vs-map.sh --nsamples 100 --rawdata -- --busyduration 2 0.011 0.008 0.521 0.011 0.008 0.552 0.011 0.008 0.590 0.011 0.008 0.660 ... 0.011 0.015 2.987 0.011 0.015 3.038 0.011 0.016 3.431 0.011 0.016 4.707 After the change: ./run-proc-vs-map.sh --nsamples 100 --rawdata -- --busyduration 2 0.006 0.005 0.026 0.006 0.005 0.029 0.006 0.005 0.034 0.006 0.005 0.035 ... 0.006 0.006 0.421 0.006 0.006 0.423 0.006 0.006 0.439 0.006 0.006 0.608 The patchset also adds a number of tests to check for /proc/pid/maps data coherency. They are designed to detect any unexpected data tearing while performing some common address space modifications (vma split, resize and remap). Even before these changes, reading /proc/pid/maps might have inconsistent data because the file is read page-by-page with mmap_lock being dropped between the pages. An example of user-visible inconsistency can be that the same vma is printed twice: once before it was modified and then after the modifications. For example if vma was extended, it might be found and reported twice. What is not expected is to see a gap where there should have been a vma both before and after modification. This patchset increases the chances of such tearing, therefore it's even more important now to test for unexpected inconsistencies. In [3] Lorenzo identified the following possible vma merging/splitting scenarios: Merges with changes to existing vmas: 1 Merge both - mapping a vma over another one and between two vmas which can be merged after this replacement; 2. Merge left full - mapping a vma at the end of an existing one and completely over its right neighbor; 3. Merge left partial - mapping a vma at the end of an existing one and partially over its right neighbor; 4. Merge right full - mapping a vma before the start of an existing one and completely over its left neighbor; 5. Merge right partial - mapping a vma before the start of an existing one and partially over its left neighbor; Merges without changes to existing vmas: 6. Merge both - mapping a vma into a gap between two vmas which can be merged after the insertion; 7. Merge left - mapping a vma at the end of an existing one; 8. Merge right - mapping a vma before the start end of an existing one; Splits 9. Split with new vma at the lower address; 10. Split with new vma at the higher address; If such merges or splits happen concurrently with the /proc/maps reading we might report a vma twice, once before the modification and once after it is modified: Case 1 might report overwritten and previous vma along with the final merged vma; Case 2 might report previous and the final merged vma; Case 3 might cause us to retry once we detect the temporary gap caused by shrinking of the right neighbor; Case 4 might report overritten and the final merged vma; Case 5 might cause us to retry once we detect the temporary gap caused by shrinking of the left neighbor; Case 6 might report previous vma and the gap along with the final marged vma; Case 7 might report previous and the final merged vma; Case 8 might report the original gap and the final merged vma covering the gap; Case 9 might cause us to retry once we detect the temporary gap caused by shrinking of the original vma at the vma start; Case 10 might cause us to retry once we detect the temporary gap caused by shrinking of the original vma at the vma end; In all these cases the retry mechanism prevents us from reporting possible temporary gaps. [1] https://lore.kernel.org/all/20250418174959.1431962-1-surenb@google.com/ [2] https://github.com/paulmckrcu/proc-mmap_sem-test [3] https://lore.kernel.org/all/e1863f40-39ab-4e5b-984a-c48765ffde1c@lucifer.local/ The /proc/pid/maps file is generated page by page, with the mmap_lock released between pages. This can lead to inconsistent reads if the underlying vmas are concurrently modified. For instance, if a vma split or merge occurs at a page boundary while /proc/pid/maps is being read, the same vma might be seen twice: once before and once after the change. This duplication is considered acceptable for userspace handling. However, observing a "hole" where a vma should be (e.g., due to a vma being replaced and the space temporarily being empty) is unacceptable. Implement a test that: 1. Forks a child process which continuously modifies its address space, specifically targeting a vma at the boundary between two pages. 2. The parent process repeatedly reads the child's /proc/pid/maps. 3. The parent process checks the last vma of the first page and the first vma of the second page for consistency, looking for the effects of vma splits or merges. The test duration is configurable via DURATION environment variable expressed in seconds. The default test duration is 5 seconds. Example Command: DURATION=10 ./proc-maps-race Link: https://lore.kernel.org/all/20250418174959.1431962-1-surenb@google.com/ [1] Link: https://github.com/paulmckrcu/proc-mmap_sem-test [2] Link: https://lore.kernel.org/all/e1863f40-39ab-4e5b-984a-c48765ffde1c@lucifer.local/ [3] Link: https://lkml.kernel.org/r/20250719182854.3166724-1-surenb@google.com Link: https://lkml.kernel.org/r/20250719182854.3166724-2-surenb@google.com Signed-off-by: Suren Baghdasaryan Cc: Alexey Dobriyan Cc: Andrii Nakryiko Cc: Christian Brauner Cc: Christophe Leroy Cc: David Hildenbrand Cc: Jann Horn Cc: Jeongjun Park Cc: Johannes Weiner Cc: Josef Bacik Cc: Kalesh Singh Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Oscar Salvador Cc: "Paul E . McKenney" Cc: Peter Xu Cc: Ryan Roberts Cc: Shuah Khan Cc: Thomas Weißschuh Cc: T.J. Mercier Cc: Vlastimil Babka Cc: Ye Bin Signed-off-by: Andrew Morton --- tools/testing/selftests/proc/.gitignore | 1 + tools/testing/selftests/proc/Makefile | 1 + tools/testing/selftests/proc/proc-maps-race.c | 447 ++++++++++++++++++++++++++ 3 files changed, 449 insertions(+) create mode 100644 tools/testing/selftests/proc/proc-maps-race.c (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore index 973968f45bba..19bb333e2485 100644 --- a/tools/testing/selftests/proc/.gitignore +++ b/tools/testing/selftests/proc/.gitignore @@ -5,6 +5,7 @@ /proc-2-is-kthread /proc-fsconfig-hidepid /proc-loadavg-001 +/proc-maps-race /proc-multiple-procfs /proc-empty-vm /proc-pid-vm diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile index b12921b9794b..50aba102201a 100644 --- a/tools/testing/selftests/proc/Makefile +++ b/tools/testing/selftests/proc/Makefile @@ -9,6 +9,7 @@ TEST_GEN_PROGS += fd-002-posix-eq TEST_GEN_PROGS += fd-003-kthread TEST_GEN_PROGS += proc-2-is-kthread TEST_GEN_PROGS += proc-loadavg-001 +TEST_GEN_PROGS += proc-maps-race TEST_GEN_PROGS += proc-empty-vm TEST_GEN_PROGS += proc-pid-vm TEST_GEN_PROGS += proc-self-map-files-001 diff --git a/tools/testing/selftests/proc/proc-maps-race.c b/tools/testing/selftests/proc/proc-maps-race.c new file mode 100644 index 000000000000..5b28dda08b7d --- /dev/null +++ b/tools/testing/selftests/proc/proc-maps-race.c @@ -0,0 +1,447 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022 Google LLC. + * Author: Suren Baghdasaryan + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* + * Fork a child that concurrently modifies address space while the main + * process is reading /proc/$PID/maps and verifying the results. Address + * space modifications include: + * VMA splitting and merging + * + */ +#define _GNU_SOURCE +#include "../kselftest_harness.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* /proc/pid/maps parsing routines */ +struct page_content { + char *data; + ssize_t size; +}; + +#define LINE_MAX_SIZE 256 + +struct line_content { + char text[LINE_MAX_SIZE]; + unsigned long start_addr; + unsigned long end_addr; +}; + +enum test_state { + INIT, + CHILD_READY, + PARENT_READY, + SETUP_READY, + SETUP_MODIFY_MAPS, + SETUP_MAPS_MODIFIED, + SETUP_RESTORE_MAPS, + SETUP_MAPS_RESTORED, + TEST_READY, + TEST_DONE, +}; + +struct vma_modifier_info; + +FIXTURE(proc_maps_race) +{ + struct vma_modifier_info *mod_info; + struct page_content page1; + struct page_content page2; + struct line_content last_line; + struct line_content first_line; + unsigned long duration_sec; + int shared_mem_size; + int page_size; + int vma_count; + int maps_fd; + pid_t pid; +}; + +typedef bool (*vma_modifier_op)(FIXTURE_DATA(proc_maps_race) *self); +typedef bool (*vma_mod_result_check_op)(struct line_content *mod_last_line, + struct line_content *mod_first_line, + struct line_content *restored_last_line, + struct line_content *restored_first_line); + +struct vma_modifier_info { + int vma_count; + void *addr; + int prot; + void *next_addr; + vma_modifier_op vma_modify; + vma_modifier_op vma_restore; + vma_mod_result_check_op vma_mod_check; + pthread_mutex_t sync_lock; + pthread_cond_t sync_cond; + enum test_state curr_state; + bool exit; + void *child_mapped_addr[]; +}; + + +static bool read_two_pages(FIXTURE_DATA(proc_maps_race) *self) +{ + ssize_t bytes_read; + + if (lseek(self->maps_fd, 0, SEEK_SET) < 0) + return false; + + bytes_read = read(self->maps_fd, self->page1.data, self->page_size); + if (bytes_read <= 0) + return false; + + self->page1.size = bytes_read; + + bytes_read = read(self->maps_fd, self->page2.data, self->page_size); + if (bytes_read <= 0) + return false; + + self->page2.size = bytes_read; + + return true; +} + +static void copy_first_line(struct page_content *page, char *first_line) +{ + char *pos = strchr(page->data, '\n'); + + strncpy(first_line, page->data, pos - page->data); + first_line[pos - page->data] = '\0'; +} + +static void copy_last_line(struct page_content *page, char *last_line) +{ + /* Get the last line in the first page */ + const char *end = page->data + page->size - 1; + /* skip last newline */ + const char *pos = end - 1; + + /* search previous newline */ + while (pos[-1] != '\n') + pos--; + strncpy(last_line, pos, end - pos); + last_line[end - pos] = '\0'; +} + +/* Read the last line of the first page and the first line of the second page */ +static bool read_boundary_lines(FIXTURE_DATA(proc_maps_race) *self, + struct line_content *last_line, + struct line_content *first_line) +{ + if (!read_two_pages(self)) + return false; + + copy_last_line(&self->page1, last_line->text); + copy_first_line(&self->page2, first_line->text); + + return sscanf(last_line->text, "%lx-%lx", &last_line->start_addr, + &last_line->end_addr) == 2 && + sscanf(first_line->text, "%lx-%lx", &first_line->start_addr, + &first_line->end_addr) == 2; +} + +/* Thread synchronization routines */ +static void wait_for_state(struct vma_modifier_info *mod_info, enum test_state state) +{ + pthread_mutex_lock(&mod_info->sync_lock); + while (mod_info->curr_state != state) + pthread_cond_wait(&mod_info->sync_cond, &mod_info->sync_lock); + pthread_mutex_unlock(&mod_info->sync_lock); +} + +static void signal_state(struct vma_modifier_info *mod_info, enum test_state state) +{ + pthread_mutex_lock(&mod_info->sync_lock); + mod_info->curr_state = state; + pthread_cond_signal(&mod_info->sync_cond); + pthread_mutex_unlock(&mod_info->sync_lock); +} + +static void stop_vma_modifier(struct vma_modifier_info *mod_info) +{ + wait_for_state(mod_info, SETUP_READY); + mod_info->exit = true; + signal_state(mod_info, SETUP_MODIFY_MAPS); +} + +static bool capture_mod_pattern(FIXTURE_DATA(proc_maps_race) *self, + struct line_content *mod_last_line, + struct line_content *mod_first_line, + struct line_content *restored_last_line, + struct line_content *restored_first_line) +{ + signal_state(self->mod_info, SETUP_MODIFY_MAPS); + wait_for_state(self->mod_info, SETUP_MAPS_MODIFIED); + + /* Copy last line of the first page and first line of the last page */ + if (!read_boundary_lines(self, mod_last_line, mod_first_line)) + return false; + + signal_state(self->mod_info, SETUP_RESTORE_MAPS); + wait_for_state(self->mod_info, SETUP_MAPS_RESTORED); + + /* Copy last line of the first page and first line of the last page */ + if (!read_boundary_lines(self, restored_last_line, restored_first_line)) + return false; + + if (!self->mod_info->vma_mod_check(mod_last_line, mod_first_line, + restored_last_line, restored_first_line)) + return false; + + /* + * The content of these lines after modify+resore should be the same + * as the original. + */ + return strcmp(restored_last_line->text, self->last_line.text) == 0 && + strcmp(restored_first_line->text, self->first_line.text) == 0; +} + +static inline bool split_vma(FIXTURE_DATA(proc_maps_race) *self) +{ + return mmap(self->mod_info->addr, self->page_size, self->mod_info->prot | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) != MAP_FAILED; +} + +static inline bool merge_vma(FIXTURE_DATA(proc_maps_race) *self) +{ + return mmap(self->mod_info->addr, self->page_size, self->mod_info->prot, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) != MAP_FAILED; +} + +static inline bool check_split_result(struct line_content *mod_last_line, + struct line_content *mod_first_line, + struct line_content *restored_last_line, + struct line_content *restored_first_line) +{ + /* Make sure vmas at the boundaries are changing */ + return strcmp(mod_last_line->text, restored_last_line->text) != 0 && + strcmp(mod_first_line->text, restored_first_line->text) != 0; +} + +FIXTURE_SETUP(proc_maps_race) +{ + const char *duration = getenv("DURATION"); + struct vma_modifier_info *mod_info; + pthread_mutexattr_t mutex_attr; + pthread_condattr_t cond_attr; + unsigned long duration_sec; + char fname[32]; + + self->page_size = (unsigned long)sysconf(_SC_PAGESIZE); + duration_sec = duration ? atol(duration) : 0; + self->duration_sec = duration_sec ? duration_sec : 5UL; + + /* + * Have to map enough vmas for /proc/pid/maps to contain more than one + * page worth of vmas. Assume at least 32 bytes per line in maps output + */ + self->vma_count = self->page_size / 32 + 1; + self->shared_mem_size = sizeof(struct vma_modifier_info) + self->vma_count * sizeof(void *); + + /* map shared memory for communication with the child process */ + self->mod_info = (struct vma_modifier_info *)mmap(NULL, self->shared_mem_size, + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(self->mod_info, MAP_FAILED); + mod_info = self->mod_info; + + /* Initialize shared members */ + pthread_mutexattr_init(&mutex_attr); + pthread_mutexattr_setpshared(&mutex_attr, PTHREAD_PROCESS_SHARED); + ASSERT_EQ(pthread_mutex_init(&mod_info->sync_lock, &mutex_attr), 0); + pthread_condattr_init(&cond_attr); + pthread_condattr_setpshared(&cond_attr, PTHREAD_PROCESS_SHARED); + ASSERT_EQ(pthread_cond_init(&mod_info->sync_cond, &cond_attr), 0); + mod_info->vma_count = self->vma_count; + mod_info->curr_state = INIT; + mod_info->exit = false; + + self->pid = fork(); + if (!self->pid) { + /* Child process modifying the address space */ + int prot = PROT_READ | PROT_WRITE; + int i; + + for (i = 0; i < mod_info->vma_count; i++) { + mod_info->child_mapped_addr[i] = mmap(NULL, self->page_size * 3, prot, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(mod_info->child_mapped_addr[i], MAP_FAILED); + /* change protection in adjacent maps to prevent merging */ + prot ^= PROT_WRITE; + } + signal_state(mod_info, CHILD_READY); + wait_for_state(mod_info, PARENT_READY); + while (true) { + signal_state(mod_info, SETUP_READY); + wait_for_state(mod_info, SETUP_MODIFY_MAPS); + if (mod_info->exit) + break; + + ASSERT_TRUE(mod_info->vma_modify(self)); + signal_state(mod_info, SETUP_MAPS_MODIFIED); + wait_for_state(mod_info, SETUP_RESTORE_MAPS); + ASSERT_TRUE(mod_info->vma_restore(self)); + signal_state(mod_info, SETUP_MAPS_RESTORED); + + wait_for_state(mod_info, TEST_READY); + while (mod_info->curr_state != TEST_DONE) { + ASSERT_TRUE(mod_info->vma_modify(self)); + ASSERT_TRUE(mod_info->vma_restore(self)); + } + } + for (i = 0; i < mod_info->vma_count; i++) + munmap(mod_info->child_mapped_addr[i], self->page_size * 3); + + exit(0); + } + + sprintf(fname, "/proc/%d/maps", self->pid); + self->maps_fd = open(fname, O_RDONLY); + ASSERT_NE(self->maps_fd, -1); + + /* Wait for the child to map the VMAs */ + wait_for_state(mod_info, CHILD_READY); + + /* Read first two pages */ + self->page1.data = malloc(self->page_size); + ASSERT_NE(self->page1.data, NULL); + self->page2.data = malloc(self->page_size); + ASSERT_NE(self->page2.data, NULL); + + ASSERT_TRUE(read_boundary_lines(self, &self->last_line, &self->first_line)); + + /* + * Find the addresses corresponding to the last line in the first page + * and the first line in the last page. + */ + mod_info->addr = NULL; + mod_info->next_addr = NULL; + for (int i = 0; i < mod_info->vma_count; i++) { + if (mod_info->child_mapped_addr[i] == (void *)self->last_line.start_addr) { + mod_info->addr = mod_info->child_mapped_addr[i]; + mod_info->prot = PROT_READ; + /* Even VMAs have write permission */ + if ((i % 2) == 0) + mod_info->prot |= PROT_WRITE; + } else if (mod_info->child_mapped_addr[i] == (void *)self->first_line.start_addr) { + mod_info->next_addr = mod_info->child_mapped_addr[i]; + } + + if (mod_info->addr && mod_info->next_addr) + break; + } + ASSERT_TRUE(mod_info->addr && mod_info->next_addr); + + signal_state(mod_info, PARENT_READY); + +} + +FIXTURE_TEARDOWN(proc_maps_race) +{ + int status; + + stop_vma_modifier(self->mod_info); + + free(self->page2.data); + free(self->page1.data); + + for (int i = 0; i < self->vma_count; i++) + munmap(self->mod_info->child_mapped_addr[i], self->page_size); + close(self->maps_fd); + waitpid(self->pid, &status, 0); + munmap(self->mod_info, self->shared_mem_size); +} + +TEST_F(proc_maps_race, test_maps_tearing_from_split) +{ + struct vma_modifier_info *mod_info = self->mod_info; + + struct line_content split_last_line; + struct line_content split_first_line; + struct line_content restored_last_line; + struct line_content restored_first_line; + + wait_for_state(mod_info, SETUP_READY); + + /* re-read the file to avoid using stale data from previous test */ + ASSERT_TRUE(read_boundary_lines(self, &self->last_line, &self->first_line)); + + mod_info->vma_modify = split_vma; + mod_info->vma_restore = merge_vma; + mod_info->vma_mod_check = check_split_result; + + ASSERT_TRUE(capture_mod_pattern(self, &split_last_line, &split_first_line, + &restored_last_line, &restored_first_line)); + + /* Now start concurrent modifications for self->duration_sec */ + signal_state(mod_info, TEST_READY); + + struct line_content new_last_line; + struct line_content new_first_line; + struct timespec start_ts, end_ts; + + clock_gettime(CLOCK_MONOTONIC_COARSE, &start_ts); + do { + bool last_line_changed; + bool first_line_changed; + + ASSERT_TRUE(read_boundary_lines(self, &new_last_line, &new_first_line)); + + /* Check if we read vmas after split */ + if (!strcmp(new_last_line.text, split_last_line.text)) { + /* + * The vmas should be consistent with split results, + * however if vma was concurrently restored after a + * split, it can be reported twice (first the original + * split one, then the same vma but extended after the + * merge) because we found it as the next vma again. + * In that case new first line will be the same as the + * last restored line. + */ + ASSERT_FALSE(strcmp(new_first_line.text, split_first_line.text) && + strcmp(new_first_line.text, restored_last_line.text)); + } else { + /* The vmas should be consistent with merge results */ + ASSERT_FALSE(strcmp(new_last_line.text, restored_last_line.text)); + ASSERT_FALSE(strcmp(new_first_line.text, restored_first_line.text)); + } + /* + * First and last lines should change in unison. If the last + * line changed then the first line should change as well and + * vice versa. + */ + last_line_changed = strcmp(new_last_line.text, self->last_line.text) != 0; + first_line_changed = strcmp(new_first_line.text, self->first_line.text) != 0; + ASSERT_EQ(last_line_changed, first_line_changed); + + clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts); + } while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec); + + /* Signal the modifyer thread to stop and wait until it exits */ + signal_state(mod_info, TEST_DONE); +} + +TEST_HARNESS_MAIN -- cgit v1.2.3 From b11d9e2d7883031afb570545cb9657a5c457349a Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Sat, 19 Jul 2025 11:28:50 -0700 Subject: selftests/proc: extend /proc/pid/maps tearing test to include vma resizing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test that /proc/pid/maps does not report unexpected holes in the address space when a vma at the edge of the page is being concurrently remapped. This remapping results in the vma shrinking and expanding from under the reader. We should always see either shrunk or expanded (original) version of the vma. Link: https://lkml.kernel.org/r/20250719182854.3166724-3-surenb@google.com Signed-off-by: Suren Baghdasaryan Cc: Alexey Dobriyan Cc: Andrii Nakryiko Cc: Christian Brauner Cc: Christophe Leroy Cc: David Hildenbrand Cc: Jann Horn Cc: Jeongjun Park Cc: Johannes Weiner Cc: Josef Bacik Cc: Kalesh Singh Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Oscar Salvador Cc: "Paul E . McKenney" Cc: Peter Xu Cc: Ryan Roberts Cc: Shuah Khan Cc: Thomas Weißschuh Cc: T.J. Mercier Cc: Vlastimil Babka Cc: Ye Bin Signed-off-by: Andrew Morton --- tools/testing/selftests/proc/proc-maps-race.c | 79 +++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/proc/proc-maps-race.c b/tools/testing/selftests/proc/proc-maps-race.c index 5b28dda08b7d..19028bd3b85c 100644 --- a/tools/testing/selftests/proc/proc-maps-race.c +++ b/tools/testing/selftests/proc/proc-maps-race.c @@ -242,6 +242,28 @@ static inline bool check_split_result(struct line_content *mod_last_line, strcmp(mod_first_line->text, restored_first_line->text) != 0; } +static inline bool shrink_vma(FIXTURE_DATA(proc_maps_race) *self) +{ + return mremap(self->mod_info->addr, self->page_size * 3, + self->page_size, 0) != MAP_FAILED; +} + +static inline bool expand_vma(FIXTURE_DATA(proc_maps_race) *self) +{ + return mremap(self->mod_info->addr, self->page_size, + self->page_size * 3, 0) != MAP_FAILED; +} + +static inline bool check_shrink_result(struct line_content *mod_last_line, + struct line_content *mod_first_line, + struct line_content *restored_last_line, + struct line_content *restored_first_line) +{ + /* Make sure only the last vma of the first page is changing */ + return strcmp(mod_last_line->text, restored_last_line->text) != 0 && + strcmp(mod_first_line->text, restored_first_line->text) == 0; +} + FIXTURE_SETUP(proc_maps_race) { const char *duration = getenv("DURATION"); @@ -444,4 +466,61 @@ TEST_F(proc_maps_race, test_maps_tearing_from_split) signal_state(mod_info, TEST_DONE); } +TEST_F(proc_maps_race, test_maps_tearing_from_resize) +{ + struct vma_modifier_info *mod_info = self->mod_info; + + struct line_content shrunk_last_line; + struct line_content shrunk_first_line; + struct line_content restored_last_line; + struct line_content restored_first_line; + + wait_for_state(mod_info, SETUP_READY); + + /* re-read the file to avoid using stale data from previous test */ + ASSERT_TRUE(read_boundary_lines(self, &self->last_line, &self->first_line)); + + mod_info->vma_modify = shrink_vma; + mod_info->vma_restore = expand_vma; + mod_info->vma_mod_check = check_shrink_result; + + ASSERT_TRUE(capture_mod_pattern(self, &shrunk_last_line, &shrunk_first_line, + &restored_last_line, &restored_first_line)); + + /* Now start concurrent modifications for self->duration_sec */ + signal_state(mod_info, TEST_READY); + + struct line_content new_last_line; + struct line_content new_first_line; + struct timespec start_ts, end_ts; + + clock_gettime(CLOCK_MONOTONIC_COARSE, &start_ts); + do { + ASSERT_TRUE(read_boundary_lines(self, &new_last_line, &new_first_line)); + + /* Check if we read vmas after shrinking it */ + if (!strcmp(new_last_line.text, shrunk_last_line.text)) { + /* + * The vmas should be consistent with shrunk results, + * however if the vma was concurrently restored, it + * can be reported twice (first as shrunk one, then + * as restored one) because we found it as the next vma + * again. In that case new first line will be the same + * as the last restored line. + */ + ASSERT_FALSE(strcmp(new_first_line.text, shrunk_first_line.text) && + strcmp(new_first_line.text, restored_last_line.text)); + } else { + /* The vmas should be consistent with the original/resored state */ + ASSERT_FALSE(strcmp(new_last_line.text, restored_last_line.text)); + ASSERT_FALSE(strcmp(new_first_line.text, restored_first_line.text)); + } + + clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts); + } while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec); + + /* Signal the modifyer thread to stop and wait until it exits */ + signal_state(mod_info, TEST_DONE); +} + TEST_HARNESS_MAIN -- cgit v1.2.3 From 6a45336b9b6fcc1d9ef3c6fe9a43252f9a20084b Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Sat, 19 Jul 2025 11:28:51 -0700 Subject: selftests/proc: extend /proc/pid/maps tearing test to include vma remapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test that /proc/pid/maps does not report unexpected holes in the address space when we concurrently remap a part of a vma into the middle of another vma. This remapping results in the destination vma being split into three parts and the part in the middle being patched back from, all done concurrently from under the reader. We should always see either original vma or the split one with no holes. Link: https://lkml.kernel.org/r/20250719182854.3166724-4-surenb@google.com Signed-off-by: Suren Baghdasaryan Cc: Alexey Dobriyan Cc: Andrii Nakryiko Cc: Christian Brauner Cc: Christophe Leroy Cc: David Hildenbrand Cc: Jann Horn Cc: Jeongjun Park Cc: Johannes Weiner Cc: Josef Bacik Cc: Kalesh Singh Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Oscar Salvador Cc: "Paul E . McKenney" Cc: Peter Xu Cc: Ryan Roberts Cc: Shuah Khan Cc: Thomas Weißschuh Cc: T.J. Mercier Cc: Vlastimil Babka Cc: Ye Bin Signed-off-by: Andrew Morton --- tools/testing/selftests/proc/proc-maps-race.c | 86 +++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/proc/proc-maps-race.c b/tools/testing/selftests/proc/proc-maps-race.c index 19028bd3b85c..bc614a2d944a 100644 --- a/tools/testing/selftests/proc/proc-maps-race.c +++ b/tools/testing/selftests/proc/proc-maps-race.c @@ -264,6 +264,35 @@ static inline bool check_shrink_result(struct line_content *mod_last_line, strcmp(mod_first_line->text, restored_first_line->text) == 0; } +static inline bool remap_vma(FIXTURE_DATA(proc_maps_race) *self) +{ + /* + * Remap the last page of the next vma into the middle of the vma. + * This splits the current vma and the first and middle parts (the + * parts at lower addresses) become the last vma objserved in the + * first page and the first vma observed in the last page. + */ + return mremap(self->mod_info->next_addr + self->page_size * 2, self->page_size, + self->page_size, MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP, + self->mod_info->addr + self->page_size) != MAP_FAILED; +} + +static inline bool patch_vma(FIXTURE_DATA(proc_maps_race) *self) +{ + return mprotect(self->mod_info->addr + self->page_size, self->page_size, + self->mod_info->prot) == 0; +} + +static inline bool check_remap_result(struct line_content *mod_last_line, + struct line_content *mod_first_line, + struct line_content *restored_last_line, + struct line_content *restored_first_line) +{ + /* Make sure vmas at the boundaries are changing */ + return strcmp(mod_last_line->text, restored_last_line->text) != 0 && + strcmp(mod_first_line->text, restored_first_line->text) != 0; +} + FIXTURE_SETUP(proc_maps_race) { const char *duration = getenv("DURATION"); @@ -523,4 +552,61 @@ TEST_F(proc_maps_race, test_maps_tearing_from_resize) signal_state(mod_info, TEST_DONE); } +TEST_F(proc_maps_race, test_maps_tearing_from_remap) +{ + struct vma_modifier_info *mod_info = self->mod_info; + + struct line_content remapped_last_line; + struct line_content remapped_first_line; + struct line_content restored_last_line; + struct line_content restored_first_line; + + wait_for_state(mod_info, SETUP_READY); + + /* re-read the file to avoid using stale data from previous test */ + ASSERT_TRUE(read_boundary_lines(self, &self->last_line, &self->first_line)); + + mod_info->vma_modify = remap_vma; + mod_info->vma_restore = patch_vma; + mod_info->vma_mod_check = check_remap_result; + + ASSERT_TRUE(capture_mod_pattern(self, &remapped_last_line, &remapped_first_line, + &restored_last_line, &restored_first_line)); + + /* Now start concurrent modifications for self->duration_sec */ + signal_state(mod_info, TEST_READY); + + struct line_content new_last_line; + struct line_content new_first_line; + struct timespec start_ts, end_ts; + + clock_gettime(CLOCK_MONOTONIC_COARSE, &start_ts); + do { + ASSERT_TRUE(read_boundary_lines(self, &new_last_line, &new_first_line)); + + /* Check if we read vmas after remapping it */ + if (!strcmp(new_last_line.text, remapped_last_line.text)) { + /* + * The vmas should be consistent with remap results, + * however if the vma was concurrently restored, it + * can be reported twice (first as split one, then + * as restored one) because we found it as the next vma + * again. In that case new first line will be the same + * as the last restored line. + */ + ASSERT_FALSE(strcmp(new_first_line.text, remapped_first_line.text) && + strcmp(new_first_line.text, restored_last_line.text)); + } else { + /* The vmas should be consistent with the original/resored state */ + ASSERT_FALSE(strcmp(new_last_line.text, restored_last_line.text)); + ASSERT_FALSE(strcmp(new_first_line.text, restored_first_line.text)); + } + + clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts); + } while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec); + + /* Signal the modifyer thread to stop and wait until it exits */ + signal_state(mod_info, TEST_DONE); +} + TEST_HARNESS_MAIN -- cgit v1.2.3 From aadc099c480f98817849cd44585904402160fe21 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Sat, 19 Jul 2025 11:28:52 -0700 Subject: selftests/proc: add verbose mode for /proc/pid/maps tearing tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add verbose mode to the /proc/pid/maps tearing tests to print debugging information. VERBOSE environment variable is used to enable it. Usage example: VERBOSE=1 ./proc-maps-race Link: https://lkml.kernel.org/r/20250719182854.3166724-5-surenb@google.com Signed-off-by: Suren Baghdasaryan Cc: Alexey Dobriyan Cc: Andrii Nakryiko Cc: Christian Brauner Cc: Christophe Leroy Cc: David Hildenbrand Cc: Jann Horn Cc: Jeongjun Park Cc: Johannes Weiner Cc: Josef Bacik Cc: Kalesh Singh Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Oscar Salvador Cc: "Paul E . McKenney" Cc: Peter Xu Cc: Ryan Roberts Cc: Shuah Khan Cc: Thomas Weißschuh Cc: T.J. Mercier Cc: Vlastimil Babka Cc: Ye Bin Signed-off-by: Andrew Morton --- tools/testing/selftests/proc/proc-maps-race.c | 153 ++++++++++++++++++++++++-- 1 file changed, 141 insertions(+), 12 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/proc/proc-maps-race.c b/tools/testing/selftests/proc/proc-maps-race.c index bc614a2d944a..66773685a047 100644 --- a/tools/testing/selftests/proc/proc-maps-race.c +++ b/tools/testing/selftests/proc/proc-maps-race.c @@ -77,6 +77,7 @@ FIXTURE(proc_maps_race) int shared_mem_size; int page_size; int vma_count; + bool verbose; int maps_fd; pid_t pid; }; @@ -188,12 +189,104 @@ static void stop_vma_modifier(struct vma_modifier_info *mod_info) signal_state(mod_info, SETUP_MODIFY_MAPS); } +static void print_first_lines(char *text, int nr) +{ + const char *end = text; + + while (nr && (end = strchr(end, '\n')) != NULL) { + nr--; + end++; + } + + if (end) { + int offs = end - text; + + text[offs] = '\0'; + printf(text); + text[offs] = '\n'; + printf("\n"); + } else { + printf(text); + } +} + +static void print_last_lines(char *text, int nr) +{ + const char *start = text + strlen(text); + + nr++; /* to ignore the last newline */ + while (nr) { + while (start > text && *start != '\n') + start--; + nr--; + start--; + } + printf(start); +} + +static void print_boundaries(const char *title, FIXTURE_DATA(proc_maps_race) *self) +{ + if (!self->verbose) + return; + + printf("%s", title); + /* Print 3 boundary lines from each page */ + print_last_lines(self->page1.data, 3); + printf("-----------------page boundary-----------------\n"); + print_first_lines(self->page2.data, 3); +} + +static bool print_boundaries_on(bool condition, const char *title, + FIXTURE_DATA(proc_maps_race) *self) +{ + if (self->verbose && condition) + print_boundaries(title, self); + + return condition; +} + +static void report_test_start(const char *name, bool verbose) +{ + if (verbose) + printf("==== %s ====\n", name); +} + +static struct timespec print_ts; + +static void start_test_loop(struct timespec *ts, bool verbose) +{ + if (verbose) + print_ts.tv_sec = ts->tv_sec; +} + +static void end_test_iteration(struct timespec *ts, bool verbose) +{ + if (!verbose) + return; + + /* Update every second */ + if (print_ts.tv_sec == ts->tv_sec) + return; + + printf("."); + fflush(stdout); + print_ts.tv_sec = ts->tv_sec; +} + +static void end_test_loop(bool verbose) +{ + if (verbose) + printf("\n"); +} + static bool capture_mod_pattern(FIXTURE_DATA(proc_maps_race) *self, struct line_content *mod_last_line, struct line_content *mod_first_line, struct line_content *restored_last_line, struct line_content *restored_first_line) { + print_boundaries("Before modification", self); + signal_state(self->mod_info, SETUP_MODIFY_MAPS); wait_for_state(self->mod_info, SETUP_MAPS_MODIFIED); @@ -201,6 +294,8 @@ static bool capture_mod_pattern(FIXTURE_DATA(proc_maps_race) *self, if (!read_boundary_lines(self, mod_last_line, mod_first_line)) return false; + print_boundaries("After modification", self); + signal_state(self->mod_info, SETUP_RESTORE_MAPS); wait_for_state(self->mod_info, SETUP_MAPS_RESTORED); @@ -208,6 +303,8 @@ static bool capture_mod_pattern(FIXTURE_DATA(proc_maps_race) *self, if (!read_boundary_lines(self, restored_last_line, restored_first_line)) return false; + print_boundaries("After restore", self); + if (!self->mod_info->vma_mod_check(mod_last_line, mod_first_line, restored_last_line, restored_first_line)) return false; @@ -295,6 +392,7 @@ static inline bool check_remap_result(struct line_content *mod_last_line, FIXTURE_SETUP(proc_maps_race) { + const char *verbose = getenv("VERBOSE"); const char *duration = getenv("DURATION"); struct vma_modifier_info *mod_info; pthread_mutexattr_t mutex_attr; @@ -303,6 +401,7 @@ FIXTURE_SETUP(proc_maps_race) char fname[32]; self->page_size = (unsigned long)sysconf(_SC_PAGESIZE); + self->verbose = verbose && !strncmp(verbose, "1", 1); duration_sec = duration ? atol(duration) : 0; self->duration_sec = duration_sec ? duration_sec : 5UL; @@ -444,6 +543,7 @@ TEST_F(proc_maps_race, test_maps_tearing_from_split) mod_info->vma_restore = merge_vma; mod_info->vma_mod_check = check_split_result; + report_test_start("Tearing from split", self->verbose); ASSERT_TRUE(capture_mod_pattern(self, &split_last_line, &split_first_line, &restored_last_line, &restored_first_line)); @@ -455,6 +555,7 @@ TEST_F(proc_maps_race, test_maps_tearing_from_split) struct timespec start_ts, end_ts; clock_gettime(CLOCK_MONOTONIC_COARSE, &start_ts); + start_test_loop(&start_ts, self->verbose); do { bool last_line_changed; bool first_line_changed; @@ -472,12 +573,18 @@ TEST_F(proc_maps_race, test_maps_tearing_from_split) * In that case new first line will be the same as the * last restored line. */ - ASSERT_FALSE(strcmp(new_first_line.text, split_first_line.text) && - strcmp(new_first_line.text, restored_last_line.text)); + ASSERT_FALSE(print_boundaries_on( + strcmp(new_first_line.text, split_first_line.text) && + strcmp(new_first_line.text, restored_last_line.text), + "Split result invalid", self)); } else { /* The vmas should be consistent with merge results */ - ASSERT_FALSE(strcmp(new_last_line.text, restored_last_line.text)); - ASSERT_FALSE(strcmp(new_first_line.text, restored_first_line.text)); + ASSERT_FALSE(print_boundaries_on( + strcmp(new_last_line.text, restored_last_line.text), + "Merge result invalid", self)); + ASSERT_FALSE(print_boundaries_on( + strcmp(new_first_line.text, restored_first_line.text), + "Merge result invalid", self)); } /* * First and last lines should change in unison. If the last @@ -489,7 +596,9 @@ TEST_F(proc_maps_race, test_maps_tearing_from_split) ASSERT_EQ(last_line_changed, first_line_changed); clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts); + end_test_iteration(&end_ts, self->verbose); } while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec); + end_test_loop(self->verbose); /* Signal the modifyer thread to stop and wait until it exits */ signal_state(mod_info, TEST_DONE); @@ -513,6 +622,7 @@ TEST_F(proc_maps_race, test_maps_tearing_from_resize) mod_info->vma_restore = expand_vma; mod_info->vma_mod_check = check_shrink_result; + report_test_start("Tearing from resize", self->verbose); ASSERT_TRUE(capture_mod_pattern(self, &shrunk_last_line, &shrunk_first_line, &restored_last_line, &restored_first_line)); @@ -524,6 +634,7 @@ TEST_F(proc_maps_race, test_maps_tearing_from_resize) struct timespec start_ts, end_ts; clock_gettime(CLOCK_MONOTONIC_COARSE, &start_ts); + start_test_loop(&start_ts, self->verbose); do { ASSERT_TRUE(read_boundary_lines(self, &new_last_line, &new_first_line)); @@ -537,16 +648,24 @@ TEST_F(proc_maps_race, test_maps_tearing_from_resize) * again. In that case new first line will be the same * as the last restored line. */ - ASSERT_FALSE(strcmp(new_first_line.text, shrunk_first_line.text) && - strcmp(new_first_line.text, restored_last_line.text)); + ASSERT_FALSE(print_boundaries_on( + strcmp(new_first_line.text, shrunk_first_line.text) && + strcmp(new_first_line.text, restored_last_line.text), + "Shrink result invalid", self)); } else { /* The vmas should be consistent with the original/resored state */ - ASSERT_FALSE(strcmp(new_last_line.text, restored_last_line.text)); - ASSERT_FALSE(strcmp(new_first_line.text, restored_first_line.text)); + ASSERT_FALSE(print_boundaries_on( + strcmp(new_last_line.text, restored_last_line.text), + "Expand result invalid", self)); + ASSERT_FALSE(print_boundaries_on( + strcmp(new_first_line.text, restored_first_line.text), + "Expand result invalid", self)); } clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts); + end_test_iteration(&end_ts, self->verbose); } while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec); + end_test_loop(self->verbose); /* Signal the modifyer thread to stop and wait until it exits */ signal_state(mod_info, TEST_DONE); @@ -570,6 +689,7 @@ TEST_F(proc_maps_race, test_maps_tearing_from_remap) mod_info->vma_restore = patch_vma; mod_info->vma_mod_check = check_remap_result; + report_test_start("Tearing from remap", self->verbose); ASSERT_TRUE(capture_mod_pattern(self, &remapped_last_line, &remapped_first_line, &restored_last_line, &restored_first_line)); @@ -581,6 +701,7 @@ TEST_F(proc_maps_race, test_maps_tearing_from_remap) struct timespec start_ts, end_ts; clock_gettime(CLOCK_MONOTONIC_COARSE, &start_ts); + start_test_loop(&start_ts, self->verbose); do { ASSERT_TRUE(read_boundary_lines(self, &new_last_line, &new_first_line)); @@ -594,16 +715,24 @@ TEST_F(proc_maps_race, test_maps_tearing_from_remap) * again. In that case new first line will be the same * as the last restored line. */ - ASSERT_FALSE(strcmp(new_first_line.text, remapped_first_line.text) && - strcmp(new_first_line.text, restored_last_line.text)); + ASSERT_FALSE(print_boundaries_on( + strcmp(new_first_line.text, remapped_first_line.text) && + strcmp(new_first_line.text, restored_last_line.text), + "Remap result invalid", self)); } else { /* The vmas should be consistent with the original/resored state */ - ASSERT_FALSE(strcmp(new_last_line.text, restored_last_line.text)); - ASSERT_FALSE(strcmp(new_first_line.text, restored_first_line.text)); + ASSERT_FALSE(print_boundaries_on( + strcmp(new_last_line.text, restored_last_line.text), + "Remap restore result invalid", self)); + ASSERT_FALSE(print_boundaries_on( + strcmp(new_first_line.text, restored_first_line.text), + "Remap restore result invalid", self)); } clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts); + end_test_iteration(&end_ts, self->verbose); } while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec); + end_test_loop(self->verbose); /* Signal the modifyer thread to stop and wait until it exits */ signal_state(mod_info, TEST_DONE); -- cgit v1.2.3 From 3f6bfd4789a0f396b8c0dfb8713c1f3eeed3b2d7 Mon Sep 17 00:00:00 2001 From: wang lian Date: Thu, 17 Jul 2025 21:18:56 +0800 Subject: selftests/mm: reuse FORCE_READ to replace "asm volatile("" : "+r" (XXX));" Patch series "selftests/mm: reuse FORCE_READ to replace "asm volatile("" : "+r" (XXX));" and some cleanup", v2. This series introduces a common FORCE_READ() macro to replace the cryptic asm volatile("" : "+r" (variable)); construct used in several mm selftests. This improves code readability and maintainability by removing duplicated, hard-to-understand code. This patch (of 2): Several mm selftests use the `asm volatile("" : "+r" (variable));` construct to force a read of a variable, preventing the compiler from optimizing away the memory access. This idiom is cryptic and duplicated across multiple test files. Following a suggestion from David[1], this patch refactors this common pattern into a FORCE_READ() macro Link: https://lkml.kernel.org/r/20250717131857.59909-1-lianux.mm@gmail.com Link: https://lkml.kernel.org/r/20250717131857.59909-2-lianux.mm@gmail.com Link: https://lore.kernel.org/lkml/4a3e0759-caa1-4cfa-bc3f-402593f1eee3@redhat.com/ [1] Signed-off-by: wang lian Reviewed-by: Lorenzo Stoakes Acked-by: David Hildenbrand Reviewed-by: Zi Yan Reviewed-by: Wei Yang Cc: Christian Brauner Cc: Jann Horn Cc: Kairui Song Cc: Liam Howlett Cc: Mark Brown Cc: SeongJae Park Cc: Shuah Khan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/cow.c | 30 +++++++++++------------ tools/testing/selftests/mm/guard-regions.c | 7 ------ tools/testing/selftests/mm/hugetlb-madvise.c | 5 +--- tools/testing/selftests/mm/migration.c | 13 +++++----- tools/testing/selftests/mm/pagemap_ioctl.c | 4 +-- tools/testing/selftests/mm/split_huge_page_test.c | 4 +-- tools/testing/selftests/mm/vm_util.h | 7 ++++++ 7 files changed, 31 insertions(+), 39 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index 788e82b00b75..d30625c18259 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -1534,7 +1534,7 @@ static void test_ro_fast_pin(char *mem, const char *smem, size_t size) static void run_with_zeropage(non_anon_test_fn fn, const char *desc) { - char *mem, *smem, tmp; + char *mem, *smem; log_test_start("%s ... with shared zeropage", desc); @@ -1554,8 +1554,8 @@ static void run_with_zeropage(non_anon_test_fn fn, const char *desc) } /* Read from the page to populate the shared zeropage. */ - tmp = *mem + *smem; - asm volatile("" : "+r" (tmp)); + FORCE_READ(mem); + FORCE_READ(smem); fn(mem, smem, pagesize); munmap: @@ -1566,7 +1566,7 @@ munmap: static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) { - char *mem, *smem, *mmap_mem, *mmap_smem, tmp; + char *mem, *smem, *mmap_mem, *mmap_smem; size_t mmap_size; int ret; @@ -1617,8 +1617,8 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) * the first sub-page and test if we get another sub-page populated * automatically. */ - tmp = *mem + *smem; - asm volatile("" : "+r" (tmp)); + FORCE_READ(mem); + FORCE_READ(smem); if (!pagemap_is_populated(pagemap_fd, mem + pagesize) || !pagemap_is_populated(pagemap_fd, smem + pagesize)) { ksft_test_result_skip("Did not get THPs populated\n"); @@ -1634,7 +1634,7 @@ munmap: static void run_with_memfd(non_anon_test_fn fn, const char *desc) { - char *mem, *smem, tmp; + char *mem, *smem; int fd; log_test_start("%s ... with memfd", desc); @@ -1668,8 +1668,8 @@ static void run_with_memfd(non_anon_test_fn fn, const char *desc) } /* Fault the page in. */ - tmp = *mem + *smem; - asm volatile("" : "+r" (tmp)); + FORCE_READ(mem); + FORCE_READ(smem); fn(mem, smem, pagesize); munmap: @@ -1682,7 +1682,7 @@ close: static void run_with_tmpfile(non_anon_test_fn fn, const char *desc) { - char *mem, *smem, tmp; + char *mem, *smem; FILE *file; int fd; @@ -1724,8 +1724,8 @@ static void run_with_tmpfile(non_anon_test_fn fn, const char *desc) } /* Fault the page in. */ - tmp = *mem + *smem; - asm volatile("" : "+r" (tmp)); + FORCE_READ(mem); + FORCE_READ(smem); fn(mem, smem, pagesize); munmap: @@ -1740,7 +1740,7 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, size_t hugetlbsize) { int flags = MFD_HUGETLB; - char *mem, *smem, tmp; + char *mem, *smem; int fd; log_test_start("%s ... with memfd hugetlb (%zu kB)", desc, @@ -1778,8 +1778,8 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, } /* Fault the page in. */ - tmp = *mem + *smem; - asm volatile("" : "+r" (tmp)); + FORCE_READ(mem); + FORCE_READ(smem); fn(mem, smem, hugetlbsize); munmap: diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c index 93af3d3760f9..4b76e72e7053 100644 --- a/tools/testing/selftests/mm/guard-regions.c +++ b/tools/testing/selftests/mm/guard-regions.c @@ -35,13 +35,6 @@ static volatile sig_atomic_t signal_jump_set; static sigjmp_buf signal_jmp_buf; -/* - * Ignore the checkpatch warning, we must read from x but don't want to do - * anything with it in order to trigger a read page fault. We therefore must use - * volatile to stop the compiler from optimising this away. - */ -#define FORCE_READ(x) (*(volatile typeof(x) *)x) - /* * How is the test backing the mapping being tested? */ diff --git a/tools/testing/selftests/mm/hugetlb-madvise.c b/tools/testing/selftests/mm/hugetlb-madvise.c index e74107185324..1afe14b9dc0c 100644 --- a/tools/testing/selftests/mm/hugetlb-madvise.c +++ b/tools/testing/selftests/mm/hugetlb-madvise.c @@ -47,14 +47,11 @@ void write_fault_pages(void *addr, unsigned long nr_pages) void read_fault_pages(void *addr, unsigned long nr_pages) { - volatile unsigned long dummy = 0; unsigned long i; for (i = 0; i < nr_pages; i++) { - dummy += *((unsigned long *)(addr + (i * huge_page_size))); - /* Prevent the compiler from optimizing out the entire loop: */ - asm volatile("" : "+r" (dummy)); + FORCE_READ(((unsigned long *)(addr + (i * huge_page_size)))); } } diff --git a/tools/testing/selftests/mm/migration.c b/tools/testing/selftests/mm/migration.c index a306f8bab087..c5a73617796a 100644 --- a/tools/testing/selftests/mm/migration.c +++ b/tools/testing/selftests/mm/migration.c @@ -16,6 +16,7 @@ #include #include #include +#include "vm_util.h" #define TWOMEG (2<<20) #define RUNTIME (20) @@ -103,15 +104,13 @@ int migrate(uint64_t *ptr, int n1, int n2) void *access_mem(void *ptr) { - volatile uint64_t y = 0; - volatile uint64_t *x = ptr; - while (1) { pthread_testcancel(); - y += *x; - - /* Prevent the compiler from optimizing out the writes to y: */ - asm volatile("" : "+r" (y)); + /* Force a read from the memory pointed to by ptr. This ensures + * the memory access actually happens and prevents the compiler + * from optimizing away this entire loop. + */ + FORCE_READ((uint64_t *)ptr); } return NULL; diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index c2dcda78ad31..0d4209eef0c3 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -1525,9 +1525,7 @@ void zeropfn_tests(void) ret = madvise(mem, hpage_size, MADV_HUGEPAGE); if (!ret) { - char tmp = *mem; - - asm volatile("" : "+r" (tmp)); + FORCE_READ(mem); ret = pagemap_ioctl(mem, hpage_size, &vec, 1, 0, 0, PAGE_IS_PFNZERO, 0, 0, PAGE_IS_PFNZERO); diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index aa7400ed0e99..6640748ff221 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -398,7 +398,6 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, char **addr) { size_t i; - int dummy = 0; unsigned char buf[1024]; srand(time(NULL)); @@ -440,8 +439,7 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, madvise(*addr, fd_size, MADV_HUGEPAGE); for (size_t i = 0; i < fd_size; i++) - dummy += *(*addr + i); - asm volatile("" : "+r" (dummy)); + FORCE_READ((*addr + i)); if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) { ksft_print_msg("No large pagecache folio generated, please provide a filesystem supporting large folio\n"); diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index 2b154c287591..c20298ae98ea 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -18,6 +18,13 @@ #define PM_SWAP BIT_ULL(62) #define PM_PRESENT BIT_ULL(63) +/* + * Ignore the checkpatch warning, we must read from x but don't want to do + * anything with it in order to trigger a read page fault. We therefore must use + * volatile to stop the compiler from optimising this away. + */ +#define FORCE_READ(x) (*(volatile typeof(x) *)x) + extern unsigned int __page_size; extern unsigned int __page_shift; -- cgit v1.2.3 From 6f1cc9fb476941f9cf8ac7ca0d8343b425b1446d Mon Sep 17 00:00:00 2001 From: wang lian Date: Thu, 17 Jul 2025 21:18:57 +0800 Subject: selftests/mm: guard-regions: Use SKIP() instead of ksft_exit_skip() To ensure only the current test is skipped on permission failure, instead of terminating the entire test binary. Link: https://lkml.kernel.org/r/20250717131857.59909-3-lianux.mm@gmail.com Signed-off-by: wang lian Reviewed-by: Lorenzo Stoakes Acked-by: David Hildenbrand Reviewed-by: Mark Brown Reviewed-by: Zi Yan Reviewed-by: Wei Yang Cc: Christian Brauner Cc: Jann Horn Cc: Kairui Song Cc: Liam Howlett Cc: SeongJae Park Cc: Shuah Khan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/guard-regions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c index 4b76e72e7053..b0d42eb04e3a 100644 --- a/tools/testing/selftests/mm/guard-regions.c +++ b/tools/testing/selftests/mm/guard-regions.c @@ -575,7 +575,7 @@ TEST_F(guard_regions, process_madvise) /* OK we don't have permission to do this, skip. */ if (count == -1 && errno == EPERM) - ksft_exit_skip("No process_madvise() permissions, try running as root.\n"); + SKIP(return, "No process_madvise() permissions, try running as root.\n"); /* Returns the number of bytes advised. */ ASSERT_EQ(count, 6 * page_size); -- cgit v1.2.3 From 10aed7dac451850a8e18128ee90222ab67d8a7e5 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 21 Jul 2025 18:33:25 +0100 Subject: tools/testing/selftests: add mremap() shrink test for multiple VMAs Patch series "tools/testing: expand mremap testing". Expand our mremap() testing to further assert that behaviour is as expected. There is a poorly documented mremap() feature whereby it is possible to mremap() multiple VMAs (even with gaps) when shrinking, as long as the resultant shrunk range spans only a single VMA. So we start by asserting this behaviour functions correctly both with an in-place shrink and a shrink/move. Next, we further test the newly introduced ability to mremap() multiple VMAs when performing a MAP_FIXED move (that is without the size being changed), firstly by asserting that MREMAP_DONTUNMAP has no bearing on this behaviour. Finally, we explicitly test that such moves, when splitting source VMAs, function correctly. This patch (of 3): There is an apparently little-known feature of mremap() whereby, in stark contrast to other modes (other than the recently introduced capacity to move multiple VMAs), the input source range span multiple VMAs with gaps between. This is, when shrinking a VMA, whether moving it or not, and the shrink would reduce the range to a single VMA - this is permitted, as the shrink is actioned by an unmap. This patch adds tests to assert that this behaves as expected. Link: https://lkml.kernel.org/r/cover.1753119043.git.lorenzo.stoakes@oracle.com Link: https://lkml.kernel.org/r/f08122893a26092a2bec6e69443e87f468ffdbed.1753119043.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Cc: Jann Horn Cc: Liam Howlett Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/mremap_test.c | 83 +++++++++++++++++++++++++++++++- 1 file changed, 82 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index 0a49be11e614..141a9032414e 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -523,6 +523,85 @@ out: ksft_test_result_fail("%s\n", test_name); } +static void mremap_shrink_multiple_vmas(unsigned long page_size, + bool inplace) +{ + char *test_name = "mremap shrink multiple vmas"; + const size_t size = 10 * page_size; + bool success = true; + char *ptr, *tgt_ptr; + void *res; + int i; + + ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (ptr == MAP_FAILED) { + perror("mmap"); + success = false; + goto out; + } + + tgt_ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (tgt_ptr == MAP_FAILED) { + perror("mmap"); + success = false; + goto out; + } + if (munmap(tgt_ptr, size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + + /* + * Unmap so we end up with: + * + * 0 2 4 6 8 10 offset in buffer + * |*| |*| |*| |*| |*| |*| + * |*| |*| |*| |*| |*| |*| + */ + for (i = 1; i < 10; i += 2) { + if (munmap(&ptr[i * page_size], page_size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + } + + /* + * Shrink in-place across multiple VMAs and gaps so we end up with: + * + * 0 + * |*| + * |*| + */ + if (inplace) + res = mremap(ptr, size, page_size, 0); + else + res = mremap(ptr, size, page_size, MREMAP_MAYMOVE | MREMAP_FIXED, + tgt_ptr); + + if (res == MAP_FAILED) { + perror("mremap"); + success = false; + goto out_unmap; + } + +out_unmap: + if (munmap(tgt_ptr, size)) + perror("munmap tgt"); + if (munmap(ptr, size)) + perror("munmap src"); +out: + if (success) + ksft_test_result_pass("%s%s\n", test_name, + inplace ? " [inplace]" : ""); + else + ksft_test_result_fail("%s%s\n", test_name, + inplace ? " [inplace]" : ""); +} + /* Returns the time taken for the remap on success else returns -1. */ static long long remap_region(struct config c, unsigned int threshold_mb, char *rand_addr) @@ -864,7 +943,7 @@ int main(int argc, char **argv) char *rand_addr; size_t rand_size; int num_expand_tests = 2; - int num_misc_tests = 3; + int num_misc_tests = 5; struct test test_cases[MAX_TEST] = {}; struct test perf_test_cases[MAX_PERF_TEST]; int page_size; @@ -992,6 +1071,8 @@ int main(int argc, char **argv) mremap_move_within_range(pattern_seed, rand_addr); mremap_move_1mb_from_start(pattern_seed, rand_addr); mremap_move_multiple_vmas(pattern_seed, page_size); + mremap_shrink_multiple_vmas(page_size, /* inplace= */true); + mremap_shrink_multiple_vmas(page_size, /* inplace= */false); if (run_perf_tests) { ksft_print_msg("\n%s\n", -- cgit v1.2.3 From 7062387ed690f76fd486963663f2c7c8d5762764 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 21 Jul 2025 18:33:26 +0100 Subject: tools/testing/selftests: test MREMAP_DONTUNMAP on multiple VMA move We support MREMAP_MAYMOVE | MREMAP_FIXED | MREMAP_DONTUNMAP for moving multiple VMAs via mremap(), so assert that the tests pass with both MREMAP_DONTUNMAP set and not set. Additionally, add success = false settings when mremap() fails. This is something that cannot realistically happen, so in no way impacted test outcome, but it is incorrect to indicate a test pass when something has failed. Link: https://lkml.kernel.org/r/d7359941981e4e44c774753b3e364d1c54928e6a.1753119043.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Cc: Jann Horn Cc: Liam Howlett Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/mremap_test.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index 141a9032414e..d3b4772cd8c9 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -406,14 +406,19 @@ static bool is_multiple_vma_range_ok(unsigned int pattern_seed, } static void mremap_move_multiple_vmas(unsigned int pattern_seed, - unsigned long page_size) + unsigned long page_size, + bool dont_unmap) { + int mremap_flags = MREMAP_FIXED | MREMAP_MAYMOVE; char *test_name = "mremap move multiple vmas"; const size_t size = 11 * page_size; bool success = true; char *ptr, *tgt_ptr; int i; + if (dont_unmap) + mremap_flags |= MREMAP_DONTUNMAP; + ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (ptr == MAP_FAILED) { @@ -467,8 +472,7 @@ static void mremap_move_multiple_vmas(unsigned int pattern_seed, } /* First, just move the whole thing. */ - if (mremap(ptr, size, size, - MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr) == MAP_FAILED) { + if (mremap(ptr, size, size, mremap_flags, tgt_ptr) == MAP_FAILED) { perror("mremap"); success = false; goto out_unmap; @@ -480,9 +484,10 @@ static void mremap_move_multiple_vmas(unsigned int pattern_seed, } /* Move next to itself. */ - if (mremap(tgt_ptr, size, size, - MREMAP_MAYMOVE | MREMAP_FIXED, &tgt_ptr[size]) == MAP_FAILED) { + if (mremap(tgt_ptr, size, size, mremap_flags, + &tgt_ptr[size]) == MAP_FAILED) { perror("mremap"); + success = false; goto out_unmap; } /* Check that the move is ok. */ @@ -500,8 +505,9 @@ static void mremap_move_multiple_vmas(unsigned int pattern_seed, } /* Move and overwrite. */ if (mremap(&tgt_ptr[size], size, size, - MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr) == MAP_FAILED) { + mremap_flags, tgt_ptr) == MAP_FAILED) { perror("mremap"); + success = false; goto out_unmap; } /* Check that the move is ok. */ @@ -518,9 +524,11 @@ out_unmap: out: if (success) - ksft_test_result_pass("%s\n", test_name); + ksft_test_result_pass("%s%s\n", test_name, + dont_unmap ? " [dontunnmap]" : ""); else - ksft_test_result_fail("%s\n", test_name); + ksft_test_result_fail("%s%s\n", test_name, + dont_unmap ? " [dontunnmap]" : ""); } static void mremap_shrink_multiple_vmas(unsigned long page_size, @@ -943,7 +951,7 @@ int main(int argc, char **argv) char *rand_addr; size_t rand_size; int num_expand_tests = 2; - int num_misc_tests = 5; + int num_misc_tests = 6; struct test test_cases[MAX_TEST] = {}; struct test perf_test_cases[MAX_PERF_TEST]; int page_size; @@ -1070,9 +1078,10 @@ int main(int argc, char **argv) mremap_move_within_range(pattern_seed, rand_addr); mremap_move_1mb_from_start(pattern_seed, rand_addr); - mremap_move_multiple_vmas(pattern_seed, page_size); mremap_shrink_multiple_vmas(page_size, /* inplace= */true); mremap_shrink_multiple_vmas(page_size, /* inplace= */false); + mremap_move_multiple_vmas(pattern_seed, page_size, /* dontunmap= */ false); + mremap_move_multiple_vmas(pattern_seed, page_size, /* dontunmap= */ true); if (run_perf_tests) { ksft_print_msg("\n%s\n", -- cgit v1.2.3 From 7d6597dfef1183b2c198151c2740c4d9c73d3dfc Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 21 Jul 2025 18:33:27 +0100 Subject: tools/testing/selftests: explicitly test split multi VMA mremap move Check that moving a range of VMAs where we are offset into the first and last VMAs works correctly. This results in the VMAs being split at these points at which we are offset into VMAs. We explicitly test both the ordinary MREMAP_FIXED multi VMA move case and the MREMAP_DONTUNMAP multi VMA move case. Link: https://lkml.kernel.org/r/b04920bb6c09dc86c207c251eab8ec670fbbcaef.1753119043.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Cc: Jann Horn Cc: Liam Howlett Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/mremap_test.c | 127 ++++++++++++++++++++++++++++++- 1 file changed, 126 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index d3b4772cd8c9..fccf9e797a0c 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -610,6 +610,129 @@ out: inplace ? " [inplace]" : ""); } +static void mremap_move_multiple_vmas_split(unsigned int pattern_seed, + unsigned long page_size, + bool dont_unmap) +{ + char *test_name = "mremap move multiple vmas split"; + int mremap_flags = MREMAP_FIXED | MREMAP_MAYMOVE; + const size_t size = 10 * page_size; + bool success = true; + char *ptr, *tgt_ptr; + int i; + + if (dont_unmap) + mremap_flags |= MREMAP_DONTUNMAP; + + ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (ptr == MAP_FAILED) { + perror("mmap"); + success = false; + goto out; + } + + tgt_ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (tgt_ptr == MAP_FAILED) { + perror("mmap"); + success = false; + goto out; + } + if (munmap(tgt_ptr, size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + + /* + * Unmap so we end up with: + * + * 0 1 2 3 4 5 6 7 8 9 10 offset in buffer + * |**********| |*******| + * |**********| |*******| + * 0 1 2 3 4 5 6 7 8 9 pattern offset + */ + if (munmap(&ptr[5 * page_size], page_size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + + /* Set up random patterns. */ + srand(pattern_seed); + for (i = 0; i < 10; i++) { + int j; + char *buf = &ptr[i * page_size]; + + if (i == 5) + continue; + + for (j = 0; j < page_size; j++) + buf[j] = rand(); + } + + /* + * Move the below: + * + * <-------------> + * 0 1 2 3 4 5 6 7 8 9 10 offset in buffer + * |**********| |*******| + * |**********| |*******| + * 0 1 2 3 4 5 6 7 8 9 pattern offset + * + * Into: + * + * 0 1 2 3 4 5 6 7 offset in buffer + * |*****| |*****| + * |*****| |*****| + * 2 3 4 5 6 7 pattern offset + */ + if (mremap(&ptr[2 * page_size], size - 3 * page_size, size - 3 * page_size, + mremap_flags, tgt_ptr) == MAP_FAILED) { + perror("mremap"); + success = false; + goto out_unmap; + } + + /* Offset into random pattern. */ + srand(pattern_seed); + for (i = 0; i < 2 * page_size; i++) + rand(); + + /* Check pattern. */ + for (i = 0; i < 7; i++) { + int j; + char *buf = &tgt_ptr[i * page_size]; + + if (i == 3) + continue; + + for (j = 0; j < page_size; j++) { + char chr = rand(); + + if (chr != buf[j]) { + ksft_print_msg("page %d offset %d corrupted, expected %d got %d\n", + i, j, chr, buf[j]); + goto out_unmap; + } + } + } + +out_unmap: + if (munmap(tgt_ptr, size)) + perror("munmap tgt"); + if (munmap(ptr, size)) + perror("munmap src"); +out: + if (success) + ksft_test_result_pass("%s%s\n", test_name, + dont_unmap ? " [dontunnmap]" : ""); + else + ksft_test_result_fail("%s%s\n", test_name, + dont_unmap ? " [dontunnmap]" : ""); +} + /* Returns the time taken for the remap on success else returns -1. */ static long long remap_region(struct config c, unsigned int threshold_mb, char *rand_addr) @@ -951,7 +1074,7 @@ int main(int argc, char **argv) char *rand_addr; size_t rand_size; int num_expand_tests = 2; - int num_misc_tests = 6; + int num_misc_tests = 8; struct test test_cases[MAX_TEST] = {}; struct test perf_test_cases[MAX_PERF_TEST]; int page_size; @@ -1082,6 +1205,8 @@ int main(int argc, char **argv) mremap_shrink_multiple_vmas(page_size, /* inplace= */false); mremap_move_multiple_vmas(pattern_seed, page_size, /* dontunmap= */ false); mremap_move_multiple_vmas(pattern_seed, page_size, /* dontunmap= */ true); + mremap_move_multiple_vmas_split(pattern_seed, page_size, /* dontunmap= */ false); + mremap_move_multiple_vmas_split(pattern_seed, page_size, /* dontunmap= */ true); if (run_perf_tests) { ksft_print_msg("\n%s\n", -- cgit v1.2.3 From 3c3ab65f00ebf7859d93e29980eb9a9c5bc64642 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 30 Jun 2025 17:44:24 +0200 Subject: selftests: net: Enable legacy netfilter legacy options. Some specified options rely on NETFILTER_XTABLES_LEGACY to be enabled. IP_NF_TARGET_TTL for instance depends on IP_NF_MANGLE which in turn depends on IP_NF_IPTABLES_LEGACY -> NETFILTER_XTABLES_LEGACY. Enable relevant iptables config options explicitly, this is needed to avoid breakage when symbols related to iptables-legacy will depend on NETFILTER_LEGACY resp. IP_TABLES_LEGACY. This also means that the classic tables (Kernel modules) will not be enabled by default, so enable them too. Signed-off-by: Florian Westphal [bigeasy: Split out the config bits from the main patch] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/bpf/config | 1 + tools/testing/selftests/hid/config.common | 1 + tools/testing/selftests/net/config | 11 +++++++++++ tools/testing/selftests/net/mptcp/config | 2 ++ tools/testing/selftests/net/netfilter/config | 5 +++++ tools/testing/selftests/wireguard/qemu/kernel.config | 4 ++++ 6 files changed, 24 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index f74e1ea0ad3b..521836776733 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -97,6 +97,7 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NF_TABLES_IPV4=y CONFIG_NF_TABLES_IPV6=y CONFIG_NETFILTER_INGRESS=y +CONFIG_NETFILTER_XTABLES_LEGACY=y CONFIG_NF_FLOW_TABLE=y CONFIG_NF_FLOW_TABLE_INET=y CONFIG_NETFILTER_NETLINK=y diff --git a/tools/testing/selftests/hid/config.common b/tools/testing/selftests/hid/config.common index b1f40857307d..38c51158adf8 100644 --- a/tools/testing/selftests/hid/config.common +++ b/tools/testing/selftests/hid/config.common @@ -135,6 +135,7 @@ CONFIG_NET_EMATCH=y CONFIG_NETFILTER_NETLINK_LOG=y CONFIG_NETFILTER_NETLINK_QUEUE=y CONFIG_NETFILTER_XTABLES=y +CONFIG_NETFILTER_XTABLES_LEGACY=y CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=y CONFIG_NETFILTER_XT_MATCH_BPF=y CONFIG_NETFILTER_XT_MATCH_COMMENT=y diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 3cfef5153823..c24417d0047b 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -30,16 +30,25 @@ CONFIG_NET_FOU=y CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y +CONFIG_NETFILTER_XTABLES_LEGACY=y CONFIG_NF_CONNTRACK=m CONFIG_IPV6_MROUTE=y CONFIG_IPV6_SIT=y CONFIG_NF_NAT=m CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_IPTABLES_LEGACY=m CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_IPTABLES_LEGACY=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_FILTER=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_RAW=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_NAT=m CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP6_NF_TARGET_REJECT=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IPV6_GRE=m CONFIG_IPV6_SEG6_LWTUNNEL=y @@ -57,6 +66,8 @@ CONFIG_NF_TABLES_IPV6=y CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_NAT=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_TARGET_HL=m +CONFIG_NETFILTER_XT_NAT=m CONFIG_NET_ACT_CSUM=m CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GACT=m diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 4f80014cae49..968d440c03fe 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -13,6 +13,7 @@ CONFIG_NETFILTER_NETLINK=m CONFIG_NF_TABLES=m CONFIG_NFT_COMPAT=m CONFIG_NETFILTER_XTABLES=m +CONFIG_NETFILTER_XTABLES_LEGACY=y CONFIG_NETFILTER_XT_MATCH_BPF=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m CONFIG_NETFILTER_XT_MATCH_STATISTIC=m @@ -25,6 +26,7 @@ CONFIG_IP_MULTIPLE_TABLES=y CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP6_NF_TARGET_REJECT=m CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IP6_NF_FILTER=m CONFIG_NET_ACT_CSUM=m diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config index 363646f4fefe..c981d2a38ed6 100644 --- a/tools/testing/selftests/net/netfilter/config +++ b/tools/testing/selftests/net/netfilter/config @@ -1,6 +1,8 @@ CONFIG_AUDIT=y CONFIG_BPF_SYSCALL=y CONFIG_BRIDGE=m +CONFIG_NETFILTER_XTABLES_LEGACY=y +CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m CONFIG_BRIDGE_EBT_BROUTE=m CONFIG_BRIDGE_EBT_IP=m CONFIG_BRIDGE_EBT_REDIRECT=m @@ -14,7 +16,10 @@ CONFIG_INET_ESP=m CONFIG_IP_NF_MATCH_RPFILTER=m CONFIG_IP6_NF_MATCH_RPFILTER=m CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_IPTABLES_LEGACY=m CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_IPTABLES_LEGACY=m +CONFIG_IP_NF_NAT=m CONFIG_IP_NF_FILTER=m CONFIG_IP6_NF_FILTER=m CONFIG_IP_NF_RAW=m diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index f314d3789f17..0a5381717e9f 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -16,9 +16,13 @@ CONFIG_NETFILTER_ADVANCED=y CONFIG_NF_CONNTRACK=y CONFIG_NF_NAT=y CONFIG_NETFILTER_XTABLES=y +CONFIG_NETFILTER_XTABLES_LEGACY=y CONFIG_NETFILTER_XT_NAT=y CONFIG_NETFILTER_XT_MATCH_LENGTH=y CONFIG_NETFILTER_XT_MARK=y +CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP6_NF_TARGET_REJECT=m CONFIG_IP_NF_IPTABLES=y CONFIG_IP_NF_FILTER=y CONFIG_IP_NF_MANGLE=y -- cgit v1.2.3 From ba71a6e58b38aa6f86865d4e18579cb014903692 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 30 Jun 2025 17:44:25 +0200 Subject: selftests: netfilter: Enable CONFIG_INET_SCTP_DIAG The config snippet specifies CONFIG_SCTP_DIAG. This was never an option. Replace CONFIG_SCTP_DIAG with the intended CONFIG_INET_SCTP_DIAG. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/net/netfilter/config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config index c981d2a38ed6..79d5b33966ba 100644 --- a/tools/testing/selftests/net/netfilter/config +++ b/tools/testing/selftests/net/netfilter/config @@ -97,4 +97,4 @@ CONFIG_XFRM_STATISTICS=y CONFIG_NET_PKTGEN=m CONFIG_TUN=m CONFIG_INET_DIAG=m -CONFIG_SCTP_DIAG=m +CONFIG_INET_SCTP_DIAG=m -- cgit v1.2.3 From 8d1c91850d064944ab214b2fbfffb7fc08a11d65 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 23 Jul 2025 17:17:48 +0200 Subject: selftests: netfilter: Ignore tainted kernels in interface stress test Complain about kernel taint value only if it wasn't set at start already. Fixes: 73db1b5dab6f ("selftests: netfilter: Torture nftables netdev hooks") Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/net/netfilter/nft_interface_stress.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/netfilter/nft_interface_stress.sh b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh index 5ff7be9daeee..c0fffaa6dbd9 100755 --- a/tools/testing/selftests/net/netfilter/nft_interface_stress.sh +++ b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh @@ -10,6 +10,8 @@ source lib.sh checktool "nft --version" "run test without nft tool" checktool "iperf3 --version" "run test without iperf3 tool" +read kernel_tainted < /proc/sys/kernel/tainted + # how many seconds to torture the kernel? # default to 80% of max run time but don't exceed 48s TEST_RUNTIME=$((${kselftest_timeout:-60} * 8 / 10)) @@ -135,7 +137,8 @@ else wait fi -[[ $( Date: Thu, 24 Jul 2025 16:06:53 +0800 Subject: selftests: netfilter: ipvs.sh: Explicity disable rp_filter on interface tunl0 Although setup_ns() set net.ipv4.conf.default.rp_filter=0, loading certain module such as ipip will automatically create a tunl0 interface in all netns including new created ones. In the script, this is before than default.rp_filter=0 applied, as a result tunl0.rp_filter remains set to 1 which causes the test report FAIL when ipip module is preloaded. Before fix: Testing DR mode... Testing NAT mode... Testing Tunnel mode... ipvs.sh: FAIL After fix: Testing DR mode... Testing NAT mode... Testing Tunnel mode... ipvs.sh: PASS Fixes: 7c8b89ec506e ("selftests: netfilter: remove rp_filter configuration") Signed-off-by: Yi Chen Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/net/netfilter/ipvs.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/netfilter/ipvs.sh b/tools/testing/selftests/net/netfilter/ipvs.sh index 6af2ea3ad6b8..9c9d5b38ab71 100755 --- a/tools/testing/selftests/net/netfilter/ipvs.sh +++ b/tools/testing/selftests/net/netfilter/ipvs.sh @@ -151,7 +151,7 @@ test_nat() { test_tun() { ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0 - ip netns exec "${ns1}" modprobe -q ipip + modprobe -q ipip ip netns exec "${ns1}" ip link set tunl0 up ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=0 ip netns exec "${ns1}" sysctl -qw net.ipv4.conf.all.send_redirects=0 @@ -160,10 +160,10 @@ test_tun() { ip netns exec "${ns1}" ipvsadm -a -i -t "${vip_v4}:${port}" -r ${rip_v4}:${port} ip netns exec "${ns1}" ip addr add ${vip_v4}/32 dev lo:1 - ip netns exec "${ns2}" modprobe -q ipip ip netns exec "${ns2}" ip link set tunl0 up ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1 ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2 + ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.tunl0.rp_filter=0 ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1 test_service -- cgit v1.2.3 From f6c650c8d87e778a36f69acfc591f99b04bfe82b Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 23 Jul 2025 15:47:15 -0700 Subject: selftests: rtnetlink: add macsec and vlan nesting test Add reproducer for [0] with a dummy device. 0: https://lore.kernel.org/netdev/2aff4342b0f5b1539c02ffd8df4c7e58dd9746e7.camel@nvidia.com/ Reviewed-by: Simon Horman Tested-by: Simon Horman Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250723224715.1341121-2-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/rtnetlink.sh | 36 ++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 441b17947230..7020f988f659 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -21,6 +21,7 @@ ALL_TESTS=" kci_test_vrf kci_test_encap kci_test_macsec + kci_test_macsec_vlan kci_test_ipsec kci_test_ipsec_offload kci_test_fdb_get @@ -572,6 +573,41 @@ kci_test_macsec() end_test "PASS: macsec" } +# Test __dev_set_rx_mode call from dev_uc_add under addr_list_lock spinlock. +# Make sure __dev_set_promiscuity is not grabbing (sleeping) netdev instance +# lock. +# https://lore.kernel.org/netdev/2aff4342b0f5b1539c02ffd8df4c7e58dd9746e7.camel@nvidia.com/ +kci_test_macsec_vlan() +{ + msname="test_macsec1" + vlanname="test_vlan1" + local ret=0 + run_cmd_grep "^Usage: ip macsec" ip macsec help + if [ $? -ne 0 ]; then + end_test "SKIP: macsec: iproute2 too old" + return $ksft_skip + fi + run_cmd ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on + if [ $ret -ne 0 ];then + end_test "FAIL: can't add macsec interface, skipping test" + return 1 + fi + + run_cmd ip link set dev "$msname" up + ip link add link "$msname" name "$vlanname" type vlan id 1 + ip link set dev "$vlanname" address 00:11:22:33:44:88 + ip link set dev "$vlanname" up + run_cmd ip link del dev "$vlanname" + run_cmd ip link del dev "$msname" + + if [ $ret -ne 0 ];then + end_test "FAIL: macsec_vlan" + return 1 + fi + + end_test "PASS: macsec_vlan" +} + #------------------------------------------------------------------- # Example commands # ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \ -- cgit v1.2.3 From 5ec9b15d8dfa4992c6f9c26c1f96e69202d8fcb1 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 23 Jul 2025 10:35:06 -0700 Subject: selftests: net: Skip test if IPv6 is not configured Extend the `check_for_dependencies()` function in `lib_netcons.sh` to check whether IPv6 is enabled by verifying the existence of `/proc/net/if_inet6`. Having IPv6 is a now a dependency of netconsole tests. If the file does not exist, the script will skip the test with an appropriate message suggesting to verify if `CONFIG_IPV6` is enabled. This prevents the test to misbehave if IPv6 is not configured. Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250723-netcons_test_ipv6-v1-1-41c9092f93f9@debian.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index 258af805497b..b6071e80ebbb 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -281,6 +281,11 @@ function check_for_dependencies() { exit "${ksft_skip}" fi + if [ ! -f /proc/net/if_inet6 ]; then + echo "SKIP: IPv6 not configured. Check if CONFIG_IPV6 is enabled" >&2 + exit "${ksft_skip}" + fi + if [ ! -f "${NSIM_DEV_SYS_NEW}" ]; then echo "SKIP: file ${NSIM_DEV_SYS_NEW} does not exist. Check if CONFIG_NETDEVSIM is enabled" >&2 exit "${ksft_skip}" -- cgit v1.2.3 From f24987ef6959a7efaf79bffd265522c3df18d431 Mon Sep 17 00:00:00 2001 From: Gabriel Goller Date: Tue, 22 Jul 2025 10:18:45 +0200 Subject: ipv6: add `force_forwarding` sysctl to enable per-interface forwarding It is currently impossible to enable ipv6 forwarding on a per-interface basis like in ipv4. To enable forwarding on an ipv6 interface we need to enable it on all interfaces and disable it on the other interfaces using a netfilter rule. This is especially cumbersome if you have lots of interfaces and only want to enable forwarding on a few. According to the sysctl docs [0] the `net.ipv6.conf.all.forwarding` enables forwarding for all interfaces, while the interface-specific `net.ipv6.conf..forwarding` configures the interface Host/Router configuration. Introduce a new sysctl flag `force_forwarding`, which can be set on every interface. The ip6_forwarding function will then check if the global forwarding flag OR the force_forwarding flag is active and forward the packet. To preserve backwards-compatibility reset the flag (on all interfaces) to 0 if the net.ipv6.conf.all.forwarding flag is set to 0. Add a short selftest that checks if a packet gets forwarded with and without `force_forwarding`. [0]: https://www.kernel.org/doc/Documentation/networking/ip-sysctl.txt Acked-by: Nicolas Dichtel Signed-off-by: Gabriel Goller Link: https://patch.msgid.link/20250722081847.132632-1-g.goller@proxmox.com Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 8 +- include/linux/ipv6.h | 1 + include/uapi/linux/ipv6.h | 1 + include/uapi/linux/netconf.h | 1 + include/uapi/linux/sysctl.h | 1 + net/ipv6/addrconf.c | 82 ++++++++++++++++ net/ipv6/ip6_output.c | 3 +- tools/testing/selftests/net/Makefile | 1 + .../testing/selftests/net/ipv6_force_forwarding.sh | 105 +++++++++++++++++++++ 9 files changed, 200 insertions(+), 3 deletions(-) create mode 100755 tools/testing/selftests/net/ipv6_force_forwarding.sh (limited to 'tools/testing/selftests') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 14700ea77e75..bb620f554598 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -2543,8 +2543,8 @@ conf/all/disable_ipv6 - BOOLEAN conf/all/forwarding - BOOLEAN Enable global IPv6 forwarding between all interfaces. - IPv4 and IPv6 work differently here; e.g. netfilter must be used - to control which interfaces may forward packets and which not. + IPv4 and IPv6 work differently here; the ``force_forwarding`` flag must + be used to control which interfaces may forward packets. This also sets all interfaces' Host/Router setting 'forwarding' to the specified value. See below for details. @@ -2561,6 +2561,10 @@ proxy_ndp - BOOLEAN Default: 0 (disabled) +force_forwarding - BOOLEAN + Enable forwarding on this interface only -- regardless of the setting on + ``conf/all/forwarding``. When setting ``conf.all.forwarding`` to 0, + the ``force_forwarding`` flag will be reset on all interfaces. fwmark_reflect - BOOLEAN Controls the fwmark of kernel-generated IPv6 reply packets that are not diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index db0eb0d86b64..bc6ec2959173 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -17,6 +17,7 @@ struct ipv6_devconf { __s32 hop_limit; __s32 mtu6; __s32 forwarding; + __s32 force_forwarding; __s32 disable_policy; __s32 proxy_ndp; __cacheline_group_end(ipv6_devconf_read_txrx); diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index cf592d7b630f..d4d3ae774b26 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -199,6 +199,7 @@ enum { DEVCONF_NDISC_EVICT_NOCARRIER, DEVCONF_ACCEPT_UNTRACKED_NA, DEVCONF_ACCEPT_RA_MIN_LFT, + DEVCONF_FORCE_FORWARDING, DEVCONF_MAX }; diff --git a/include/uapi/linux/netconf.h b/include/uapi/linux/netconf.h index fac4edd55379..1c8c84d65ae3 100644 --- a/include/uapi/linux/netconf.h +++ b/include/uapi/linux/netconf.h @@ -19,6 +19,7 @@ enum { NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, NETCONFA_INPUT, NETCONFA_BC_FORWARDING, + NETCONFA_FORCE_FORWARDING, __NETCONFA_MAX }; #define NETCONFA_MAX (__NETCONFA_MAX - 1) diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index 8981f00204db..63d1464cb71c 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -573,6 +573,7 @@ enum { NET_IPV6_ACCEPT_RA_FROM_LOCAL=26, NET_IPV6_ACCEPT_RA_RT_INFO_MIN_PLEN=27, NET_IPV6_RA_DEFRTR_METRIC=28, + NET_IPV6_FORCE_FORWARDING=29, __NET_IPV6_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4f1d7d110302..81a067a2e526 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -239,6 +239,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .ndisc_evict_nocarrier = 1, .ra_honor_pio_life = 0, .ra_honor_pio_pflag = 0, + .force_forwarding = 0, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -303,6 +304,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .ndisc_evict_nocarrier = 1, .ra_honor_pio_life = 0, .ra_honor_pio_pflag = 0, + .force_forwarding = 0, }; /* Check if link is ready: is it up and is a valid qdisc available */ @@ -857,6 +859,9 @@ static void addrconf_forward_change(struct net *net, __s32 newf) idev = __in6_dev_get_rtnl_net(dev); if (idev) { int changed = (!idev->cnf.forwarding) ^ (!newf); + /* Disabling all.forwarding sets 0 to force_forwarding for all interfaces */ + if (newf == 0) + WRITE_ONCE(idev->cnf.force_forwarding, 0); WRITE_ONCE(idev->cnf.forwarding, newf); if (changed) @@ -5710,6 +5715,7 @@ static void ipv6_store_devconf(const struct ipv6_devconf *cnf, array[DEVCONF_ACCEPT_UNTRACKED_NA] = READ_ONCE(cnf->accept_untracked_na); array[DEVCONF_ACCEPT_RA_MIN_LFT] = READ_ONCE(cnf->accept_ra_min_lft); + array[DEVCONF_FORCE_FORWARDING] = READ_ONCE(cnf->force_forwarding); } static inline size_t inet6_ifla6_size(void) @@ -6738,6 +6744,75 @@ static int addrconf_sysctl_disable_policy(const struct ctl_table *ctl, int write return ret; } +static void addrconf_force_forward_change(struct net *net, __s32 newf) +{ + struct net_device *dev; + struct inet6_dev *idev; + + for_each_netdev(net, dev) { + idev = __in6_dev_get_rtnl_net(dev); + if (idev) { + int changed = (!idev->cnf.force_forwarding) ^ (!newf); + + WRITE_ONCE(idev->cnf.force_forwarding, newf); + if (changed) + inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, + NETCONFA_FORCE_FORWARDING, + dev->ifindex, &idev->cnf); + } + } +} + +static int addrconf_sysctl_force_forwarding(const struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct inet6_dev *idev = ctl->extra1; + struct ctl_table tmp_ctl = *ctl; + struct net *net = ctl->extra2; + int *valp = ctl->data; + int new_val = *valp; + int old_val = *valp; + loff_t pos = *ppos; + int ret; + + tmp_ctl.extra1 = SYSCTL_ZERO; + tmp_ctl.extra2 = SYSCTL_ONE; + tmp_ctl.data = &new_val; + + ret = proc_douintvec_minmax(&tmp_ctl, write, buffer, lenp, ppos); + + if (write && old_val != new_val) { + if (!rtnl_net_trylock(net)) + return restart_syscall(); + + WRITE_ONCE(*valp, new_val); + + if (valp == &net->ipv6.devconf_dflt->force_forwarding) { + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORCE_FORWARDING, + NETCONFA_IFINDEX_DEFAULT, + net->ipv6.devconf_dflt); + } else if (valp == &net->ipv6.devconf_all->force_forwarding) { + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORCE_FORWARDING, + NETCONFA_IFINDEX_ALL, + net->ipv6.devconf_all); + + addrconf_force_forward_change(net, new_val); + } else { + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORCE_FORWARDING, + idev->dev->ifindex, + &idev->cnf); + } + rtnl_net_unlock(net); + } + + if (ret) + *ppos = pos; + return ret; +} + static int minus_one = -1; static const int two_five_five = 255; static u32 ioam6_if_id_max = U16_MAX; @@ -7208,6 +7283,13 @@ static const struct ctl_table addrconf_sysctl[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, + { + .procname = "force_forwarding", + .data = &ipv6_devconf.force_forwarding, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_force_forwarding, + }, }; static int __addrconf_sysctl_register(struct net *net, char *dev_name, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 0412f8544695..1e1410237b6e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -511,7 +511,8 @@ int ip6_forward(struct sk_buff *skb) u32 mtu; idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); - if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0) + if (!READ_ONCE(net->ipv6.devconf_all->forwarding) && + (!idev || !READ_ONCE(idev->cnf.force_forwarding))) goto error; if (skb->pkt_type != PACKET_HOST) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 13e2678d418b..b31a71f2b372 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -116,6 +116,7 @@ TEST_GEN_FILES += skf_net_off TEST_GEN_FILES += tfo TEST_PROGS += tfo_passive.sh TEST_PROGS += broadcast_pmtu.sh +TEST_PROGS += ipv6_force_forwarding.sh # YNL files, must be before "include ..lib.mk" YNL_GEN_FILES := busy_poller netlink-dumps diff --git a/tools/testing/selftests/net/ipv6_force_forwarding.sh b/tools/testing/selftests/net/ipv6_force_forwarding.sh new file mode 100755 index 000000000000..bf0243366caa --- /dev/null +++ b/tools/testing/selftests/net/ipv6_force_forwarding.sh @@ -0,0 +1,105 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test IPv6 force_forwarding interface property +# +# This test verifies that the force_forwarding property works correctly: +# - When global forwarding is disabled, packets are not forwarded normally +# - When force_forwarding is enabled on an interface, packets are forwarded +# regardless of the global forwarding setting + +source lib.sh + +cleanup() { + cleanup_ns $ns1 $ns2 $ns3 +} + +trap cleanup EXIT + +setup_test() { + # Create three namespaces: sender, router, receiver + setup_ns ns1 ns2 ns3 + + # Create veth pairs: ns1 <-> ns2 <-> ns3 + ip link add name veth12 type veth peer name veth21 + ip link add name veth23 type veth peer name veth32 + + # Move interfaces to namespaces + ip link set veth12 netns $ns1 + ip link set veth21 netns $ns2 + ip link set veth23 netns $ns2 + ip link set veth32 netns $ns3 + + # Configure interfaces + ip -n $ns1 addr add 2001:db8:1::1/64 dev veth12 nodad + ip -n $ns2 addr add 2001:db8:1::2/64 dev veth21 nodad + ip -n $ns2 addr add 2001:db8:2::1/64 dev veth23 nodad + ip -n $ns3 addr add 2001:db8:2::2/64 dev veth32 nodad + + # Bring up interfaces + ip -n $ns1 link set veth12 up + ip -n $ns2 link set veth21 up + ip -n $ns2 link set veth23 up + ip -n $ns3 link set veth32 up + + # Add routes + ip -n $ns1 route add 2001:db8:2::/64 via 2001:db8:1::2 + ip -n $ns3 route add 2001:db8:1::/64 via 2001:db8:2::1 + + # Disable global forwarding + ip netns exec $ns2 sysctl -qw net.ipv6.conf.all.forwarding=0 +} + +test_force_forwarding() { + local ret=0 + + echo "TEST: force_forwarding functionality" + + # Check if force_forwarding sysctl exists + if ! ip netns exec $ns2 test -f /proc/sys/net/ipv6/conf/veth21/force_forwarding; then + echo "SKIP: force_forwarding not available" + return $ksft_skip + fi + + # Test 1: Without force_forwarding, ping should fail + ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth21.force_forwarding=0 + ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth23.force_forwarding=0 + + if ip netns exec $ns1 ping -6 -c 1 -W 2 2001:db8:2::2 &>/dev/null; then + echo "FAIL: ping succeeded when forwarding disabled" + ret=1 + else + echo "PASS: forwarding disabled correctly" + fi + + # Test 2: With force_forwarding enabled, ping should succeed + ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth21.force_forwarding=1 + ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth23.force_forwarding=1 + + if ip netns exec $ns1 ping -6 -c 1 -W 2 2001:db8:2::2 &>/dev/null; then + echo "PASS: force_forwarding enabled forwarding" + else + echo "FAIL: ping failed with force_forwarding enabled" + ret=1 + fi + + return $ret +} + +echo "IPv6 force_forwarding test" +echo "==========================" + +setup_test +test_force_forwarding +ret=$? + +if [ $ret -eq 0 ]; then + echo "OK" + exit 0 +elif [ $ret -eq $ksft_skip ]; then + echo "SKIP" + exit $ksft_skip +else + echo "FAIL" + exit 1 +fi -- cgit v1.2.3 From d1f3dbad6f0dbac6266c6b6b450af2aefde8481f Mon Sep 17 00:00:00 2001 From: Mohsin Bashir Date: Thu, 24 Jul 2025 16:51:40 -0700 Subject: selftests: drv-net: Wait for bkg socat to start Currently, UDP exchange is prone to failure when cmd attempt to send data while socat in bkg is not ready. Since, the behavior is probabilistic, this can result in flakiness for XDP tests. While testing test_xdp_native_tx_mb() on netdevsim, a failure rate of around 1% in 500 500 iterations was observed. Use wait_port_listen() to ensure that the bkg socat is started and ready to receive before cmd start sending. With proposed changes, a re-run of the same test passed 100% of time. Signed-off-by: Mohsin Bashir Link: https://patch.msgid.link/20250724235140.2645885-1-mohsin.bashr@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/xdp.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py index 887d662ad128..1dd8bf3bf6c9 100755 --- a/tools/testing/selftests/drivers/net/xdp.py +++ b/tools/testing/selftests/drivers/net/xdp.py @@ -13,7 +13,7 @@ from enum import Enum from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ne, ksft_pr from lib.py import KsftFailEx, NetDrvEpEnv, EthtoolFamily, NlError -from lib.py import bkg, cmd, rand_port +from lib.py import bkg, cmd, rand_port, wait_port_listen from lib.py import ip, bpftool, defer @@ -70,6 +70,7 @@ def _exchg_udp(cfg, port, test_string): tx_udp_cmd = f"echo -n {test_string} | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port}" with bkg(rx_udp_cmd, exit_wait=True) as nc: + wait_port_listen(port, proto="udp") cmd(tx_udp_cmd, host=cfg.remote, shell=True) return nc.stdout.strip() @@ -310,6 +311,7 @@ def test_xdp_native_tx_mb(cfg): tx_udp = f"echo {test_string} | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port}" with bkg(rx_udp, host=cfg.remote, exit_wait=True) as rnc: + wait_port_listen(port, proto="udp", host=cfg.remote) cmd(tx_udp, host=cfg.remote, shell=True) stats = _get_stats(prog_info['maps']['map_xdp_stats']) -- cgit v1.2.3 From 4c82768e413402c329043ed2a30a36bdfc82127b Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 24 Jul 2025 21:34:30 -0700 Subject: selftests/bpf: Increase xdp data size for arm64 64K page size With arm64 64K page size, the following 4 subtests failed: #97/25 dynptr/test_probe_read_user_dynptr:FAIL #97/26 dynptr/test_probe_read_kernel_dynptr:FAIL #97/27 dynptr/test_probe_read_user_str_dynptr:FAIL #97/28 dynptr/test_probe_read_kernel_str_dynptr:FAIL These failures are due to function bpf_dynptr_check_off_len() in include/linux/bpf.h where there is a test if (len > size || offset > size - len) return -E2BIG; With 64K page size, the 'offset' is greater than 'size - len', which caused the test failure. For 64KB page size, this patch increased the xdp buffer size from 5000 to 90000. The above 4 test failures are fixed as 'size' value is increased. But it introduced two new failures: #97/4 dynptr/test_dynptr_copy_xdp:FAIL #97/12 dynptr/test_dynptr_memset_xdp_chunks:FAIL These two failures will be addressed in subsequent patches. Signed-off-by: Yonghong Song Signed-off-by: Martin KaFai Lau Acked-by: Mykyta Yatsenko Link: https://patch.msgid.link/20250725043430.208469-1-yonghong.song@linux.dev --- tools/testing/selftests/bpf/prog_tests/dynptr.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index f2b65398afce..9b2d9ceda210 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -51,6 +51,8 @@ static struct { {"test_copy_from_user_task_str_dynptr", SETUP_SYSCALL_SLEEP}, }; +#define PAGE_SIZE_64K 65536 + static void verify_success(const char *prog_name, enum test_setup_type setup_type) { char user_data[384] = {[0 ... 382] = 'a', '\0'}; @@ -146,14 +148,18 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ } case SETUP_XDP_PROG: { - char data[5000]; + char data[90000]; int err, prog_fd; LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = &data, - .data_size_in = sizeof(data), .repeat = 1, ); + if (getpagesize() == PAGE_SIZE_64K) + opts.data_size_in = sizeof(data); + else + opts.data_size_in = 5000; + prog_fd = bpf_program__fd(prog); err = bpf_prog_test_run_opts(prog_fd, &opts); -- cgit v1.2.3 From 90f791a975af80964b1ae6a370598c5bb8f565a3 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 24 Jul 2025 21:34:35 -0700 Subject: selftests/bpf: Fix test dynptr/test_dynptr_copy_xdp failure For arm64 64K page size, the bpf_dynptr_copy() in test dynptr/test_dynptr_copy_xdp will succeed, but the test will failure with 4K page size. This patch made a change so the test will fail expectedly for both 4K and 64K page sizes. Signed-off-by: Yonghong Song Signed-off-by: Martin KaFai Lau Acked-by: Mykyta Yatsenko Link: https://patch.msgid.link/20250725043435.208974-1-yonghong.song@linux.dev --- tools/testing/selftests/bpf/progs/dynptr_success.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index 7d7081d05d47..3094a1e4ee91 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -611,11 +611,12 @@ int test_dynptr_copy_xdp(struct xdp_md *xdp) struct bpf_dynptr ptr_buf, ptr_xdp; char data[] = "qwertyuiopasdfghjkl"; char buf[32] = {'\0'}; - __u32 len = sizeof(data); + __u32 len = sizeof(data), xdp_data_size; int i, chunks = 200; /* ptr_xdp is backed by non-contiguous memory */ bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp); + xdp_data_size = bpf_dynptr_size(&ptr_xdp); bpf_ringbuf_reserve_dynptr(&ringbuf, len * chunks, 0, &ptr_buf); /* Destination dynptr is backed by non-contiguous memory */ @@ -673,7 +674,7 @@ int test_dynptr_copy_xdp(struct xdp_md *xdp) goto out; } - if (bpf_dynptr_copy(&ptr_xdp, 2000, &ptr_xdp, 0, len * chunks) != -E2BIG) + if (bpf_dynptr_copy(&ptr_xdp, xdp_data_size - 3000, &ptr_xdp, 0, len * chunks) != -E2BIG) err = 1; out: -- cgit v1.2.3 From 4a5dcb337395db24976090e84f52d48e597697f9 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 24 Jul 2025 21:34:40 -0700 Subject: selftests/bpf: Fix test dynptr/test_dynptr_memset_xdp_chunks failure For arm64 64K page size, the xdp data size was set to be more than 64K in one of previous patches. This will cause failure for bpf_dynptr_memset(). Since the failure of bpf_dynptr_memset() is expected with 64K page size, return success. Signed-off-by: Yonghong Song Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20250725043440.209266-1-yonghong.song@linux.dev --- tools/testing/selftests/bpf/progs/dynptr_success.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index 3094a1e4ee91..8315273cb900 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -9,6 +9,8 @@ #include "bpf_misc.h" #include "errno.h" +#define PAGE_SIZE_64K 65536 + char _license[] SEC("license") = "GPL"; int pid, err, val; @@ -821,8 +823,17 @@ int test_dynptr_memset_xdp_chunks(struct xdp_md *xdp) data_sz = bpf_dynptr_size(&ptr_xdp); err = bpf_dynptr_memset(&ptr_xdp, 0, data_sz, DYNPTR_MEMSET_VAL); - if (err) + if (err) { + /* bpf_dynptr_memset() eventually called bpf_xdp_pointer() + * where if data_sz is greater than 0xffff, -EFAULT will be + * returned. For 64K page size, data_sz is greater than + * 64K, so error is expected and let us zero out error and + * return success. + */ + if (data_sz >= PAGE_SIZE_64K) + err = 0; goto out; + } bpf_for(i, 0, max_chunks) { offset = i * sizeof(buf); -- cgit v1.2.3 From 3435bd79ec13369281f87155b9b612ad0a379795 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 18 Jul 2025 12:11:54 +0100 Subject: KVM: arm64: selftest: vgic-v3: Add basic GICv3 sysreg userspace access test We have a lot of more or less useful vgic tests, but none of them tracks the availability of GICv3 system registers, which is a bit annoying. Add one such test, which covers both EL1 and EL2 registers. Signed-off-by: Marc Zyngier Tested-by: Itaru Kitayama Reviewed-by: Sebastian Ott Link: https://lore.kernel.org/r/20250718111154.104029-5-maz@kernel.org Signed-off-by: Oliver Upton --- tools/testing/selftests/kvm/arm64/vgic_init.c | 219 +++++++++++++++++++++++++- 1 file changed, 217 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/arm64/vgic_init.c b/tools/testing/selftests/kvm/arm64/vgic_init.c index b3b5fb0ff0a9..7d508dcdbf23 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_init.c +++ b/tools/testing/selftests/kvm/arm64/vgic_init.c @@ -9,6 +9,8 @@ #include #include +#include + #include "test_util.h" #include "kvm_util.h" #include "processor.h" @@ -18,8 +20,6 @@ #define REG_OFFSET(vcpu, offset) (((uint64_t)vcpu << 32) | offset) -#define GICR_TYPER 0x8 - #define VGIC_DEV_IS_V2(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V2) #define VGIC_DEV_IS_V3(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V3) @@ -715,6 +715,220 @@ int test_kvm_device(uint32_t gic_dev_type) return 0; } +struct sr_def { + const char *name; + u32 encoding; +}; + +#define PACK_SR(r) \ + ((sys_reg_Op0(r) << 14) | \ + (sys_reg_Op1(r) << 11) | \ + (sys_reg_CRn(r) << 7) | \ + (sys_reg_CRm(r) << 3) | \ + (sys_reg_Op2(r))) + +#define SR(r) \ + { \ + .name = #r, \ + .encoding = r, \ + } + +static const struct sr_def sysregs_el1[] = { + SR(SYS_ICC_PMR_EL1), + SR(SYS_ICC_BPR0_EL1), + SR(SYS_ICC_AP0R0_EL1), + SR(SYS_ICC_AP0R1_EL1), + SR(SYS_ICC_AP0R2_EL1), + SR(SYS_ICC_AP0R3_EL1), + SR(SYS_ICC_AP1R0_EL1), + SR(SYS_ICC_AP1R1_EL1), + SR(SYS_ICC_AP1R2_EL1), + SR(SYS_ICC_AP1R3_EL1), + SR(SYS_ICC_BPR1_EL1), + SR(SYS_ICC_CTLR_EL1), + SR(SYS_ICC_SRE_EL1), + SR(SYS_ICC_IGRPEN0_EL1), + SR(SYS_ICC_IGRPEN1_EL1), +}; + +static const struct sr_def sysregs_el2[] = { + SR(SYS_ICH_AP0R0_EL2), + SR(SYS_ICH_AP0R1_EL2), + SR(SYS_ICH_AP0R2_EL2), + SR(SYS_ICH_AP0R3_EL2), + SR(SYS_ICH_AP1R0_EL2), + SR(SYS_ICH_AP1R1_EL2), + SR(SYS_ICH_AP1R2_EL2), + SR(SYS_ICH_AP1R3_EL2), + SR(SYS_ICH_HCR_EL2), + SR(SYS_ICC_SRE_EL2), + SR(SYS_ICH_VTR_EL2), + SR(SYS_ICH_VMCR_EL2), + SR(SYS_ICH_LR0_EL2), + SR(SYS_ICH_LR1_EL2), + SR(SYS_ICH_LR2_EL2), + SR(SYS_ICH_LR3_EL2), + SR(SYS_ICH_LR4_EL2), + SR(SYS_ICH_LR5_EL2), + SR(SYS_ICH_LR6_EL2), + SR(SYS_ICH_LR7_EL2), + SR(SYS_ICH_LR8_EL2), + SR(SYS_ICH_LR9_EL2), + SR(SYS_ICH_LR10_EL2), + SR(SYS_ICH_LR11_EL2), + SR(SYS_ICH_LR12_EL2), + SR(SYS_ICH_LR13_EL2), + SR(SYS_ICH_LR14_EL2), + SR(SYS_ICH_LR15_EL2), +}; + +static void test_sysreg_array(int gic, const struct sr_def *sr, int nr, + int (*check)(int, const struct sr_def *, const char *)) +{ + for (int i = 0; i < nr; i++) { + u64 val; + u64 attr; + int ret; + + /* Assume MPIDR_EL1.Aff*=0 */ + attr = PACK_SR(sr[i].encoding); + + /* + * The API is braindead. A register can be advertised as + * available, and yet not be readable or writable. + * ICC_APnR{1,2,3}_EL1 are examples of such non-sense, and + * ICH_APnR{1,2,3}_EL2 do follow suit for consistency. + * + * On the bright side, no known HW is implementing more than + * 5 bits of priority, so we're safe. Sort of... + */ + ret = __kvm_has_device_attr(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + attr); + TEST_ASSERT(ret == 0, "%s unavailable", sr[i].name); + + /* Check that we can write back what we read */ + ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + attr, &val); + TEST_ASSERT(ret == 0 || !check(gic, &sr[i], "read"), "%s unreadable", sr[i].name); + ret = __kvm_device_attr_set(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + attr, &val); + TEST_ASSERT(ret == 0 || !check(gic, &sr[i], "write"), "%s unwritable", sr[i].name); + } +} + +static u8 get_ctlr_pribits(int gic) +{ + int ret; + u64 val; + u8 pri; + + ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + PACK_SR(SYS_ICC_CTLR_EL1), &val); + TEST_ASSERT(ret == 0, "ICC_CTLR_EL1 unreadable"); + + pri = FIELD_GET(ICC_CTLR_EL1_PRI_BITS_MASK, val) + 1; + TEST_ASSERT(pri >= 5 && pri <= 7, "Bad pribits %d", pri); + + return pri; +} + +static int check_unaccessible_el1_regs(int gic, const struct sr_def *sr, const char *what) +{ + switch (sr->encoding) { + case SYS_ICC_AP0R1_EL1: + case SYS_ICC_AP1R1_EL1: + if (get_ctlr_pribits(gic) >= 6) + return -EINVAL; + break; + case SYS_ICC_AP0R2_EL1: + case SYS_ICC_AP0R3_EL1: + case SYS_ICC_AP1R2_EL1: + case SYS_ICC_AP1R3_EL1: + if (get_ctlr_pribits(gic) == 7) + return 0; + break; + default: + return -EINVAL; + } + + pr_info("SKIP %s for %s\n", sr->name, what); + return 0; +} + +static u8 get_vtr_pribits(int gic) +{ + int ret; + u64 val; + u8 pri; + + ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + PACK_SR(SYS_ICH_VTR_EL2), &val); + TEST_ASSERT(ret == 0, "ICH_VTR_EL2 unreadable"); + + pri = FIELD_GET(ICH_VTR_EL2_PRIbits, val) + 1; + TEST_ASSERT(pri >= 5 && pri <= 7, "Bad pribits %d", pri); + + return pri; +} + +static int check_unaccessible_el2_regs(int gic, const struct sr_def *sr, const char *what) +{ + switch (sr->encoding) { + case SYS_ICH_AP0R1_EL2: + case SYS_ICH_AP1R1_EL2: + if (get_vtr_pribits(gic) >= 6) + return -EINVAL; + break; + case SYS_ICH_AP0R2_EL2: + case SYS_ICH_AP0R3_EL2: + case SYS_ICH_AP1R2_EL2: + case SYS_ICH_AP1R3_EL2: + if (get_vtr_pribits(gic) == 7) + return -EINVAL; + break; + default: + return -EINVAL; + } + + pr_info("SKIP %s for %s\n", sr->name, what); + return 0; +} + +static void test_v3_sysregs(void) +{ + struct kvm_vcpu_init init = {}; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + u32 feat = 0; + int gic; + + if (kvm_check_cap(KVM_CAP_ARM_EL2)) + feat |= BIT(KVM_ARM_VCPU_HAS_EL2); + + vm = vm_create(1); + + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); + init.features[0] |= feat; + + vcpu = aarch64_vcpu_add(vm, 0, &init, NULL); + TEST_ASSERT(vcpu, "Can't create a vcpu?"); + + gic = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3); + TEST_ASSERT(gic >= 0, "No GIC???"); + + kvm_device_attr_set(gic, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); + + test_sysreg_array(gic, sysregs_el1, ARRAY_SIZE(sysregs_el1), check_unaccessible_el1_regs); + if (feat) + test_sysreg_array(gic, sysregs_el2, ARRAY_SIZE(sysregs_el2), check_unaccessible_el2_regs); + else + pr_info("SKIP EL2 registers, not available\n"); + + close(gic); + kvm_vm_free(vm); +} + void run_tests(uint32_t gic_dev_type) { test_vcpus_then_vgic(gic_dev_type); @@ -730,6 +944,7 @@ void run_tests(uint32_t gic_dev_type) test_v3_last_bit_single_rdist(); test_v3_redist_ipa_range_check_at_vcpu_run(); test_v3_its_region(); + test_v3_sysregs(); } } -- cgit v1.2.3 From 15b5964a411f386d53e41a5595a3dd5e6068a59e Mon Sep 17 00:00:00 2001 From: Raghavendra Rao Ananta Date: Wed, 23 Jul 2025 23:28:04 -0700 Subject: KVM: arm64: selftests: Add test for nASSGIcap attribute Extend vgic_init to test the nASSGIcap attribute, asserting that it is configurable (within reason) prior to initializing the VGIC. Additionally, check that userspace cannot set the attribute after the VGIC has been initialized. Signed-off-by: Raghavendra Rao Ananta Reviewed-by: Eric Auger Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20250724062805.2658919-6-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- tools/testing/selftests/kvm/arm64/vgic_init.c | 42 +++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/arm64/vgic_init.c b/tools/testing/selftests/kvm/arm64/vgic_init.c index b3b5fb0ff0a9..743351aa998e 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_init.c +++ b/tools/testing/selftests/kvm/arm64/vgic_init.c @@ -13,13 +13,12 @@ #include "kvm_util.h" #include "processor.h" #include "vgic.h" +#include "gic_v3.h" #define NR_VCPUS 4 #define REG_OFFSET(vcpu, offset) (((uint64_t)vcpu << 32) | offset) -#define GICR_TYPER 0x8 - #define VGIC_DEV_IS_V2(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V2) #define VGIC_DEV_IS_V3(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V3) @@ -675,6 +674,44 @@ static void test_v3_its_region(void) vm_gic_destroy(&v); } +static void test_v3_nassgicap(void) +{ + struct kvm_vcpu *vcpus[NR_VCPUS]; + bool has_nassgicap; + struct vm_gic vm; + u32 typer2; + int ret; + + vm = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus); + kvm_device_attr_get(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + GICD_TYPER2, &typer2); + has_nassgicap = typer2 & GICD_TYPER2_nASSGIcap; + + typer2 |= GICD_TYPER2_nASSGIcap; + ret = __kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + GICD_TYPER2, &typer2); + if (has_nassgicap) + TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEVICE_ATTR_SET, ret)); + else + TEST_ASSERT(ret && errno == EINVAL, + "Enabled nASSGIcap even though it's unavailable"); + + typer2 &= ~GICD_TYPER2_nASSGIcap; + kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + GICD_TYPER2, &typer2); + + kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); + + typer2 ^= GICD_TYPER2_nASSGIcap; + ret = __kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + GICD_TYPER2, &typer2); + TEST_ASSERT(ret && errno == EBUSY, + "Changed nASSGIcap after initializing the VGIC"); + + vm_gic_destroy(&vm); +} + /* * Returns 0 if it's possible to create GIC device of a given type (V2 or V3). */ @@ -730,6 +767,7 @@ void run_tests(uint32_t gic_dev_type) test_v3_last_bit_single_rdist(); test_v3_redist_ipa_range_check_at_vcpu_run(); test_v3_its_region(); + test_v3_nassgicap(); } } -- cgit v1.2.3 From 5b32321fdaf3fd1a92ec726af18765e225b0ee2b Mon Sep 17 00:00:00 2001 From: Xiumei Mu Date: Fri, 25 Jul 2025 11:50:28 +0800 Subject: selftests: rtnetlink.sh: remove esp4_offload after test The esp4_offload module, loaded during IPsec offload tests, should be reset to its default settings after testing. Otherwise, leaving it enabled could unintentionally affect subsequence test cases by keeping offload active. Without this fix: $ lsmod | grep offload; ./rtnetlink.sh -t kci_test_ipsec_offload ; lsmod | grep offload; PASS: ipsec_offload esp4_offload 12288 0 esp4 32768 1 esp4_offload With this fix: $ lsmod | grep offload; ./rtnetlink.sh -t kci_test_ipsec_offload ; lsmod | grep offload; PASS: ipsec_offload Fixes: 2766a11161cc ("selftests: rtnetlink: add ipsec offload API test") Signed-off-by: Xiumei Mu Reviewed-by: Shannon Nelson Reviewed-by: Hangbin Liu Link: https://patch.msgid.link/6d3a1d777c4de4eb0ca94ced9e77be8d48c5b12f.1753415428.git.xmu@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/rtnetlink.sh | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 7020f988f659..d6c00efeb664 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -720,6 +720,11 @@ kci_test_ipsec_offload() sysfsf=$sysfsd/ipsec sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/ probed=false + esp4_offload_probed_default=false + + if lsmod | grep -q esp4_offload; then + esp4_offload_probed_default=true + fi if ! mount | grep -q debugfs; then mount -t debugfs none /sys/kernel/debug/ &> /dev/null @@ -813,6 +818,7 @@ EOF fi # clean up any leftovers + ! "$esp4_offload_probed_default" && lsmod | grep -q esp4_offload && rmmod esp4_offload echo 0 > /sys/bus/netdevsim/del_device $probed && rmmod netdevsim -- cgit v1.2.3 From 38b74b212a34c37c3157ba6c3af7025fb9447458 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 26 Jul 2025 08:53:49 -0700 Subject: selftests: bpf: fix legacy netfilter options Recent commit to add NETFILTER_XTABLES_LEGACY missed setting a couple of configs to y. They are still enabled but as modules which appears to have upset BPF CI, e.g.: test_bpf_nf_ct:FAIL:iptables-legacy -t raw -A PREROUTING -j CONNMARK --set-mark 42/0 unexpected error: 768 (errno 0) Fixes: 3c3ab65f00eb ("selftests: net: Enable legacy netfilter legacy options.") Link: https://patch.msgid.link/20250726155349.1161845-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/bpf/config | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 521836776733..e8c6c77b96cb 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -97,6 +97,8 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NF_TABLES_IPV4=y CONFIG_NF_TABLES_IPV6=y CONFIG_NETFILTER_INGRESS=y +CONFIG_IP_NF_IPTABLES_LEGACY=y +CONFIG_IP6_NF_IPTABLES_LEGACY=y CONFIG_NETFILTER_XTABLES_LEGACY=y CONFIG_NF_FLOW_TABLE=y CONFIG_NF_FLOW_TABLE_INET=y -- cgit v1.2.3 From e9f545d0d336b37ece594a0bfd8d2d13ccbed6ab Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Thu, 24 Jul 2025 12:02:55 +0000 Subject: selftests/bpf: Enable private stack tests for arm64 As arm64 JIT now supports private stack, make sure all relevant tests run on arm64 architecture. Relevant tests: #415/1 struct_ops_private_stack/private_stack:OK #415/2 struct_ops_private_stack/private_stack_fail:OK #415/3 struct_ops_private_stack/private_stack_recur:OK #415 struct_ops_private_stack:OK #549/1 verifier_private_stack/Private stack, single prog:OK #549/2 verifier_private_stack/Private stack, subtree > MAX_BPF_STACK:OK #549/3 verifier_private_stack/No private stack:OK #549/4 verifier_private_stack/Private stack, callback:OK #549/5 verifier_private_stack/Private stack, exception in mainprog:OK #549/6 verifier_private_stack/Private stack, exception in subprog:OK #549/7 verifier_private_stack/Private stack, async callback, not nested:OK #549/8 verifier_private_stack/Private stack, async callback, potential nesting:OK #549 verifier_private_stack:OK Summary: 2/11 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Puranjay Mohan Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20250724120257.7299-4-puranjay@kernel.org --- .../selftests/bpf/progs/struct_ops_private_stack.c | 2 +- .../bpf/progs/struct_ops_private_stack_fail.c | 2 +- .../bpf/progs/struct_ops_private_stack_recur.c | 2 +- .../selftests/bpf/progs/verifier_private_stack.c | 89 +++++++++++++++++++++- 4 files changed, 91 insertions(+), 4 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c index 0e4d2ff63ab8..dbe646013811 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c @@ -7,7 +7,7 @@ char _license[] SEC("license") = "GPL"; -#if defined(__TARGET_ARCH_x86) +#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64) bool skip __attribute((__section__(".data"))) = false; #else bool skip = true; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c index 58d5d8dc2235..3d89ad7cbe2a 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c @@ -7,7 +7,7 @@ char _license[] SEC("license") = "GPL"; -#if defined(__TARGET_ARCH_x86) +#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64) bool skip __attribute((__section__(".data"))) = false; #else bool skip = true; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c index 31e58389bb8b..b1f6d7e5a8e5 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c @@ -7,7 +7,7 @@ char _license[] SEC("license") = "GPL"; -#if defined(__TARGET_ARCH_x86) +#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64) bool skip __attribute((__section__(".data"))) = false; #else bool skip = true; diff --git a/tools/testing/selftests/bpf/progs/verifier_private_stack.c b/tools/testing/selftests/bpf/progs/verifier_private_stack.c index fc91b414364e..1ecd34ebde19 100644 --- a/tools/testing/selftests/bpf/progs/verifier_private_stack.c +++ b/tools/testing/selftests/bpf/progs/verifier_private_stack.c @@ -8,7 +8,7 @@ /* From include/linux/filter.h */ #define MAX_BPF_STACK 512 -#if defined(__TARGET_ARCH_x86) +#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64) struct elem { struct bpf_timer t; @@ -30,6 +30,18 @@ __jited(" movabsq $0x{{.*}}, %r9") __jited(" addq %gs:{{.*}}, %r9") __jited(" movl $0x2a, %edi") __jited(" movq %rdi, -0x100(%r9)") +__arch_arm64 +__jited(" stp x25, x27, [sp, {{.*}}]!") +__jited(" mov x27, {{.*}}") +__jited(" movk x27, {{.*}}, lsl #16") +__jited(" movk x27, {{.*}}") +__jited(" mrs x10, TPIDR_EL{{[0-1]}}") +__jited(" add x27, x27, x10") +__jited(" add x25, x27, {{.*}}") +__jited(" mov x0, #0x2a") +__jited(" str x0, [x27]") +__jited("...") +__jited(" ldp x25, x27, [sp], {{.*}}") __naked void private_stack_single_prog(void) { asm volatile (" \ @@ -45,6 +57,9 @@ __description("No private stack") __success __arch_x86_64 __jited(" subq $0x8, %rsp") +__arch_arm64 +__jited(" mov x25, sp") +__jited(" sub sp, sp, #0x10") __naked void no_private_stack_nested(void) { asm volatile (" \ @@ -81,6 +96,19 @@ __jited(" pushq %r9") __jited(" callq 0x{{.*}}") __jited(" popq %r9") __jited(" xorl %eax, %eax") +__arch_arm64 +__jited(" stp x25, x27, [sp, {{.*}}]!") +__jited(" mov x27, {{.*}}") +__jited(" movk x27, {{.*}}, lsl #16") +__jited(" movk x27, {{.*}}") +__jited(" mrs x10, TPIDR_EL{{[0-1]}}") +__jited(" add x27, x27, x10") +__jited(" add x25, x27, {{.*}}") +__jited(" mov x0, #0x2a") +__jited(" str x0, [x27]") +__jited(" bl {{.*}}") +__jited("...") +__jited(" ldp x25, x27, [sp], {{.*}}") __naked void private_stack_nested_1(void) { asm volatile (" \ @@ -131,6 +159,24 @@ __jited(" movq %rdi, -0x200(%r9)") __jited(" pushq %r9") __jited(" callq") __jited(" popq %r9") +__arch_arm64 +__jited("func #1") +__jited("...") +__jited(" stp x25, x27, [sp, {{.*}}]!") +__jited(" mov x27, {{.*}}") +__jited(" movk x27, {{.*}}, lsl #16") +__jited(" movk x27, {{.*}}") +__jited(" mrs x10, TPIDR_EL{{[0-1]}}") +__jited(" add x27, x27, x10") +__jited(" add x25, x27, {{.*}}") +__jited(" bl 0x{{.*}}") +__jited(" add x7, x0, #0x0") +__jited(" mov x0, #0x2a") +__jited(" str x0, [x27]") +__jited(" bl 0x{{.*}}") +__jited(" add x7, x0, #0x0") +__jited(" mov x7, #0x0") +__jited(" ldp x25, x27, [sp], {{.*}}") __naked void private_stack_callback(void) { asm volatile (" \ @@ -154,6 +200,28 @@ __arch_x86_64 __jited(" pushq %r9") __jited(" callq") __jited(" popq %r9") +__arch_arm64 +__jited(" stp x29, x30, [sp, #-0x10]!") +__jited(" mov x29, sp") +__jited(" stp xzr, x26, [sp, #-0x10]!") +__jited(" mov x26, sp") +__jited(" stp x19, x20, [sp, #-0x10]!") +__jited(" stp x21, x22, [sp, #-0x10]!") +__jited(" stp x23, x24, [sp, #-0x10]!") +__jited(" stp x25, x26, [sp, #-0x10]!") +__jited(" stp x27, x28, [sp, #-0x10]!") +__jited(" mov x27, {{.*}}") +__jited(" movk x27, {{.*}}, lsl #16") +__jited(" movk x27, {{.*}}") +__jited(" mrs x10, TPIDR_EL{{[0-1]}}") +__jited(" add x27, x27, x10") +__jited(" add x25, x27, {{.*}}") +__jited(" mov x0, #0x2a") +__jited(" str x0, [x27]") +__jited(" mov x0, #0x0") +__jited(" bl 0x{{.*}}") +__jited(" add x7, x0, #0x0") +__jited(" ldp x27, x28, [sp], #0x10") int private_stack_exception_main_prog(void) { asm volatile (" \ @@ -179,6 +247,19 @@ __jited(" movq %rdi, -0x200(%r9)") __jited(" pushq %r9") __jited(" callq") __jited(" popq %r9") +__arch_arm64 +__jited(" stp x27, x28, [sp, #-0x10]!") +__jited(" mov x27, {{.*}}") +__jited(" movk x27, {{.*}}, lsl #16") +__jited(" movk x27, {{.*}}") +__jited(" mrs x10, TPIDR_EL{{[0-1]}}") +__jited(" add x27, x27, x10") +__jited(" add x25, x27, {{.*}}") +__jited(" mov x0, #0x2a") +__jited(" str x0, [x27]") +__jited(" bl 0x{{.*}}") +__jited(" add x7, x0, #0x0") +__jited(" ldp x27, x28, [sp], #0x10") int private_stack_exception_sub_prog(void) { asm volatile (" \ @@ -220,6 +301,10 @@ __description("Private stack, async callback, not nested") __success __retval(0) __arch_x86_64 __jited(" movabsq $0x{{.*}}, %r9") +__arch_arm64 +__jited(" mrs x10, TPIDR_EL{{[0-1]}}") +__jited(" add x27, x27, x10") +__jited(" add x25, x27, {{.*}}") int private_stack_async_callback_1(void) { struct bpf_timer *arr_timer; @@ -241,6 +326,8 @@ __description("Private stack, async callback, potential nesting") __success __retval(0) __arch_x86_64 __jited(" subq $0x100, %rsp") +__arch_arm64 +__jited(" sub sp, sp, #0x100") int private_stack_async_callback_2(void) { struct bpf_timer *arr_timer; -- cgit v1.2.3 From cf20cb9ad1eb3bf59ede1f93a0640f06e6b9d028 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 21 Jul 2025 23:03:30 -0700 Subject: selftests/damon/sysfs.py: stop DAMON for dumping failures Commit 4ece01897627 ("selftests/damon: add python and drgn-based DAMON sysfs test") in mm-stable tree introduced sysfs.py that runs drgn for dumping DAMON status. When the DAMON status dumping fails for reasons including drgn uninstalled environment, the test fails without stopping DAMON. Following DAMON selftests that assumes DAMON is not running when they executed therefore fail. Catch dumping failures and stop DAMON for that case. Link: https://lkml.kernel.org/r/20250722060330.56068-1-sj@kernel.org Fixes: 4ece01897627 ("selftests/damon: add python and drgn-based DAMON sysfs test") Signed-off-by: SeongJae Park Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202507220707.9c5d6247-lkp@intel.com Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/sysfs.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/sysfs.py b/tools/testing/selftests/damon/sysfs.py index e67008fd055d..fdc04f12be54 100755 --- a/tools/testing/selftests/damon/sysfs.py +++ b/tools/testing/selftests/damon/sysfs.py @@ -8,6 +8,10 @@ import subprocess import _damon_sysfs def dump_damon_status_dict(pid): + try: + subprocess.check_output(['which', 'drgn'], stderr=subprocess.DEVNULL) + except: + return None, 'drgn not found' file_dir = os.path.dirname(os.path.abspath(__file__)) dump_script = os.path.join(file_dir, 'drgn_dump_damon_status.py') rc = subprocess.call(['drgn', dump_script, pid, 'damon_dump_output'], @@ -40,6 +44,7 @@ def main(): status, err = dump_damon_status_dict(kdamonds.kdamonds[0].pid) if err is not None: print(err) + kdamonds.stop() exit(1) if len(status['contexts']) != 1: -- cgit v1.2.3 From 6da5e2961f3a1f350ec974b0c56374015b1c8fc1 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 20 Jul 2025 10:16:31 -0700 Subject: selftests/damon/_damon_sysfs: support DAMOS watermarks setup Patch series "selftests/damon/sysfs.py: test all parameters". sysfs.py tests if DAMON sysfs interface is passing the user-requested parameters to DAMON as expected. But only the default (minimum) parameters are being tested. This is partially because _damon_sysfs.py, which is the library for making the parameter requests, is not supporting the entire parameters. The internal DAMON status dump script (drgn_dump_damon_status.py) is also not dumping entire parameters. Extend the test coverage by updating parameters input and status dumping scripts to support all parameters, and writing additional tests using those. This increased test coverage actually found one real bug (https://lore.kernel.org/20250719181932.72944-1-sj@kernel.org). First seven patches (1-7) extend _damon_sysfs.py for all parameters setup. The eight patch (8) fixes _damon_sysfs.py to use correct max nr_acceses and age values for their type. Following three patches (9-11) extend drgn_dump_damon_status.py to dump full DAMON parameters. Following nine patches (12-20) refactor sysfs.py for general testing code reuse, and extend it for full parameters check. Finally, two patches (21 and 22) add test cases in sysfs.py for full parameters testing. This patch (of 22): _damon_sysfs.py contains code for test-purpose DAMON sysfs interface control. Add support of DAMOS watermarks setup for more tests. Link: https://lkml.kernel.org/r/20250720171652.92309-1-sj@kernel.org Link: https://lkml.kernel.org/r/20250720171652.92309-2-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/_damon_sysfs.py | 46 ++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 4 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py index f587e117472e..d81aa11e3d32 100644 --- a/tools/testing/selftests/damon/_damon_sysfs.py +++ b/tools/testing/selftests/damon/_damon_sysfs.py @@ -165,6 +165,42 @@ class DamosQuota: return err return None +class DamosWatermarks: + metric = None + interval = None + high = None + mid = None + low = None + scheme = None # owner scheme + + def __init__(self, metric='none', interval=0, high=0, mid=0, low=0): + self.metric = metric + self.interval = interval + self.high = high + self.mid = mid + self.low = low + + def sysfs_dir(self): + return os.path.join(self.scheme.sysfs_dir(), 'watermarks') + + def stage(self): + err = write_file(os.path.join(self.sysfs_dir(), 'metric'), self.metric) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'interval_us'), + self.interval) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'high'), self.high) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'mid'), self.mid) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'low'), self.low) + if err is not None: + return err + class DamosStats: nr_tried = None sz_tried = None @@ -190,6 +226,7 @@ class Damos: action = None access_pattern = None quota = None + watermarks = None apply_interval_us = None # todo: Support watermarks, stats idx = None @@ -199,12 +236,15 @@ class Damos: tried_regions = None def __init__(self, action='stat', access_pattern=DamosAccessPattern(), - quota=DamosQuota(), apply_interval_us=0): + quota=DamosQuota(), watermarks=DamosWatermarks(), + apply_interval_us=0): self.action = action self.access_pattern = access_pattern self.access_pattern.scheme = self self.quota = quota self.quota.scheme = self + self.watermarks = watermarks + self.watermarks.scheme = self self.apply_interval_us = apply_interval_us def sysfs_dir(self): @@ -227,9 +267,7 @@ class Damos: if err is not None: return err - # disable watermarks - err = write_file( - os.path.join(self.sysfs_dir(), 'watermarks', 'metric'), 'none') + err = self.watermarks.stage() if err is not None: return err -- cgit v1.2.3 From 9d93c103edf0cdf7f1f251b943799a7fee56a580 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 20 Jul 2025 10:16:32 -0700 Subject: selftests/damon/_damon_sysfs: support DAMOS filters setup _damon_sysfs.py contains code for test-purpose DAMON sysfs interface control. Add support of DAMOS filters setup for more tests. Link: https://lkml.kernel.org/r/20250720171652.92309-3-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/_damon_sysfs.py | 115 +++++++++++++++++++++++++- 1 file changed, 111 insertions(+), 4 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py index d81aa11e3d32..f853af6ad926 100644 --- a/tools/testing/selftests/damon/_damon_sysfs.py +++ b/tools/testing/selftests/damon/_damon_sysfs.py @@ -201,6 +201,96 @@ class DamosWatermarks: if err is not None: return err +class DamosFilter: + type_ = None + matching = None + allow = None + memcg_path = None + addr_start = None + addr_end = None + target_idx = None + min_ = None + max_ = None + idx = None + filters = None # owner filters + + def __init__(self, type_='anon', matching=False, allow=False, + memcg_path='', addr_start=0, addr_end=0, target_idx=0, min_=0, + max_=0): + self.type_ = type_ + self.matching = matching + self.allow = allow + self.memcg_path = memcg_path, + self.addr_start = addr_start + self.addr_end = addr_end + self.target_idx = target_idx + self.min_ = min_ + self.max_ = max_ + + def sysfs_dir(self): + return os.path.join(self.filters.sysfs_dir(), '%d' % self.idx) + + def stage(self): + err = write_file(os.path.join(self.sysfs_dir(), 'type'), self.type_) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'matching'), + self.matching) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'allow'), self.allow) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'memcg_path'), + self.memcg_path) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'addr_start'), + self.addr_start) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'addr_end'), + self.addr_end) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'damon_target_idx'), + self.target_idx) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'min'), self.min_) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'max'), self.max_) + if err is not None: + return err + return None + +class DamosFilters: + name = None + filters = None + scheme = None # owner scheme + + def __init__(self, name, filters=[]): + self.name = name + self.filters = filters + for idx, filter_ in enumerate(self.filters): + filter_.idx = idx + filter_.filters = self + + def sysfs_dir(self): + return os.path.join(self.scheme.sysfs_dir(), self.name) + + def stage(self): + err = write_file(os.path.join(self.sysfs_dir(), 'nr_filters'), + len(self.filters)) + if err is not None: + return err + for filter_ in self.filters: + err = filter_.stage() + if err is not None: + return err + return None + class DamosStats: nr_tried = None sz_tried = None @@ -227,8 +317,10 @@ class Damos: access_pattern = None quota = None watermarks = None + core_filters = None + ops_filters = None + filters = None apply_interval_us = None - # todo: Support watermarks, stats idx = None context = None tried_bytes = None @@ -237,6 +329,7 @@ class Damos: def __init__(self, action='stat', access_pattern=DamosAccessPattern(), quota=DamosQuota(), watermarks=DamosWatermarks(), + core_filters=[], ops_filters=[], filters=[], apply_interval_us=0): self.action = action self.access_pattern = access_pattern @@ -245,6 +338,16 @@ class Damos: self.quota.scheme = self self.watermarks = watermarks self.watermarks.scheme = self + + self.core_filters = DamosFilters(name='core_filters', + filters=core_filters) + self.core_filters.scheme = self + self.ops_filters = DamosFilters(name='ops_filters', + filters=ops_filters) + self.ops_filters.scheme = self + self.filters = DamosFilters(name='filters', filters=filters) + self.filters.scheme = self + self.apply_interval_us = apply_interval_us def sysfs_dir(self): @@ -271,9 +374,13 @@ class Damos: if err is not None: return err - # disable filters - err = write_file( - os.path.join(self.sysfs_dir(), 'filters', 'nr_filters'), '0') + err = self.core_filters.stage() + if err is not None: + return err + err = self.ops_filters.stage() + if err is not None: + return err + err = self.filters.stage() if err is not None: return err -- cgit v1.2.3 From b436dfaad2ba7bf5061017248ee595f299074610 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 20 Jul 2025 10:16:33 -0700 Subject: selftests/damon/_damon_sysfs: support monitoring intervals goal setup _damon_sysfs.py contains code for test-purpose DAMON sysfs interface control. Add support of the monitoring intervals auto-tune goal setup for more tests. Link: https://lkml.kernel.org/r/20250720171652.92309-4-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/_damon_sysfs.py | 43 ++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py index f853af6ad926..ec6230929d36 100644 --- a/tools/testing/selftests/damon/_damon_sysfs.py +++ b/tools/testing/selftests/damon/_damon_sysfs.py @@ -405,18 +405,56 @@ class DamonTarget: return write_file( os.path.join(self.sysfs_dir(), 'pid_target'), self.pid) +class IntervalsGoal: + access_bp = None + aggrs = None + min_sample_us = None + max_sample_us = None + attrs = None # owner DamonAttrs + + def __init__(self, access_bp=0, aggrs=0, min_sample_us=0, max_sample_us=0): + self.access_bp = access_bp + self.aggrs = aggrs + self.min_sample_us = min_sample_us + self.max_sample_us = max_sample_us + + def sysfs_dir(self): + return os.path.join(self.attrs.interval_sysfs_dir(), 'intervals_goal') + + def stage(self): + err = write_file( + os.path.join(self.sysfs_dir(), 'access_bp'), self.access_bp) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'aggrs'), self.aggrs) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'min_sample_us'), + self.min_sample_us) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'max_sample_us'), + self.max_sample_us) + if err is not None: + return err + return None + class DamonAttrs: sample_us = None aggr_us = None + intervals_goal = None update_us = None min_nr_regions = None max_nr_regions = None context = None - def __init__(self, sample_us=5000, aggr_us=100000, update_us=1000000, + def __init__(self, sample_us=5000, aggr_us=100000, + intervals_goal=IntervalsGoal(), update_us=1000000, min_nr_regions=10, max_nr_regions=1000): self.sample_us = sample_us self.aggr_us = aggr_us + self.intervals_goal = intervals_goal + self.intervals_goal.attrs = self self.update_us = update_us self.min_nr_regions = min_nr_regions self.max_nr_regions = max_nr_regions @@ -436,6 +474,9 @@ class DamonAttrs: return err err = write_file(os.path.join(self.interval_sysfs_dir(), 'aggr_us'), self.aggr_us) + if err is not None: + return err + err = self.intervals_goal.stage() if err is not None: return err err = write_file(os.path.join(self.interval_sysfs_dir(), 'update_us'), -- cgit v1.2.3 From ff5aae307bfb789c9e9c4564bc19d5393aa2fc43 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 20 Jul 2025 10:16:34 -0700 Subject: selftests/damon/_damon_sysfs: support DAMOS quota weights setup _damon_sysfs.py contains code for test-purpose DAMON sysfs interface control. Add support of DAMOS quotas prioritization weights setup for more tests. Link: https://lkml.kernel.org/r/20250720171652.92309-5-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/_damon_sysfs.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py index ec6230929d36..12d076260b2b 100644 --- a/tools/testing/selftests/damon/_damon_sysfs.py +++ b/tools/testing/selftests/damon/_damon_sysfs.py @@ -125,12 +125,20 @@ class DamosQuota: ms = None # time quota goals = None # quota goals reset_interval_ms = None # quota reset interval + weight_sz_permil = None + weight_nr_accesses_permil = None + weight_age_permil = None scheme = None # owner scheme - def __init__(self, sz=0, ms=0, goals=None, reset_interval_ms=0): + def __init__(self, sz=0, ms=0, goals=None, reset_interval_ms=0, + weight_sz_permil=0, weight_nr_accesses_permil=0, + weight_age_permil=0): self.sz = sz self.ms = ms self.reset_interval_ms = reset_interval_ms + self.weight_sz_permil = weight_sz_permil + self.weight_nr_accesses_permil = weight_nr_accesses_permil + self.weight_age_permil = weight_age_permil self.goals = goals if goals is not None else [] for idx, goal in enumerate(self.goals): goal.idx = idx @@ -151,6 +159,20 @@ class DamosQuota: if err is not None: return err + err = write_file(os.path.join( + self.sysfs_dir(), 'weights', 'sz_permil'), self.weight_sz_permil) + if err is not None: + return err + err = write_file(os.path.join( + self.sysfs_dir(), 'weights', 'nr_accesses_permil'), + self.weight_nr_accesses_permil) + if err is not None: + return err + err = write_file(os.path.join( + self.sysfs_dir(), 'weights', 'age_permil'), self.weight_age_permil) + if err is not None: + return err + nr_goals_file = os.path.join(self.sysfs_dir(), 'goals', 'nr_goals') content, err = read_file(nr_goals_file) if err is not None: -- cgit v1.2.3 From 229b0af6640704bb709fae356108c11d6e63058d Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 20 Jul 2025 10:16:35 -0700 Subject: selftests/damon/_damon_sysfs: support DAMOS quota goal nid setup _damon_sysfs.py contains code for test-purpose DAMON sysfs interface control. Add support of DAMOS quota goal nid setup for more tests. Link: https://lkml.kernel.org/r/20250720171652.92309-6-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/_damon_sysfs.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py index 12d076260b2b..23de9202b4e3 100644 --- a/tools/testing/selftests/damon/_damon_sysfs.py +++ b/tools/testing/selftests/damon/_damon_sysfs.py @@ -93,14 +93,16 @@ class DamosQuotaGoal: metric = None target_value = None current_value = None + nid = None effective_bytes = None quota = None # owner quota idx = None - def __init__(self, metric, target_value=10000, current_value=0): + def __init__(self, metric, target_value=10000, current_value=0, nid=0): self.metric = metric self.target_value = target_value self.current_value = current_value + self.nid = nid def sysfs_dir(self): return os.path.join(self.quota.sysfs_dir(), 'goals', '%d' % self.idx) @@ -118,6 +120,10 @@ class DamosQuotaGoal: self.current_value) if err is not None: return err + err = write_file(os.path.join(self.sysfs_dir(), 'nid'), self.nid) + if err is not None: + return err + return None class DamosQuota: -- cgit v1.2.3 From fca6ddf44df477d7ecaac4840b15da635798f1eb Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 20 Jul 2025 10:16:36 -0700 Subject: selftests/damon/_damon_sysfs: support DAMOS action dests setup _damon_sysfs.py contains code for test-purpose DAMON sysfs interface control. Add support of DAMOS action destinations setup for more tests. Link: https://lkml.kernel.org/r/20250720171652.92309-7-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/_damon_sysfs.py | 56 ++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py index 23de9202b4e3..2d95ab564885 100644 --- a/tools/testing/selftests/damon/_damon_sysfs.py +++ b/tools/testing/selftests/damon/_damon_sysfs.py @@ -319,6 +319,52 @@ class DamosFilters: return err return None +class DamosDest: + id = None + weight = None + idx = None + dests = None # owner dests + + def __init__(self, id=0, weight=0): + self.id = id + self.weight = weight + + def sysfs_dir(self): + return os.path.join(self.dests.sysfs_dir(), '%d' % self.idx) + + def stage(self): + err = write_file(os.path.join(self.sysfs_dir(), 'id'), self.id) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'weight'), self.weight) + if err is not None: + return err + return None + +class DamosDests: + dests = None + scheme = None # owner scheme + + def __init__(self, dests=[]): + self.dests = dests + for idx, dest in enumerate(self.dests): + dest.idx = idx + dest.dests = self + + def sysfs_dir(self): + return os.path.join(self.scheme.sysfs_dir(), 'dests') + + def stage(self): + err = write_file(os.path.join(self.sysfs_dir(), 'nr_dests'), + len(self.dests)) + if err is not None: + return err + for dest in self.dests: + err = dest.stage() + if err is not None: + return err + return None + class DamosStats: nr_tried = None sz_tried = None @@ -349,6 +395,7 @@ class Damos: ops_filters = None filters = None apply_interval_us = None + dests = None idx = None context = None tried_bytes = None @@ -358,7 +405,7 @@ class Damos: def __init__(self, action='stat', access_pattern=DamosAccessPattern(), quota=DamosQuota(), watermarks=DamosWatermarks(), core_filters=[], ops_filters=[], filters=[], - apply_interval_us=0): + dests=DamosDests(), apply_interval_us=0): self.action = action self.access_pattern = access_pattern self.access_pattern.scheme = self @@ -376,6 +423,9 @@ class Damos: self.filters = DamosFilters(name='filters', filters=filters) self.filters.scheme = self + self.dests = dests + self.dests.scheme = self + self.apply_interval_us = apply_interval_us def sysfs_dir(self): @@ -412,6 +462,10 @@ class Damos: if err is not None: return err + err = self.dests.stage() + if err is not None: + return err + class DamonTarget: pid = None # todo: Support target regions if test is made -- cgit v1.2.3 From 86e541f0be477d4656a02c5438c5034eae814c23 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 20 Jul 2025 10:16:37 -0700 Subject: selftests/damon/_damon_sysfs: support DAMOS target_nid setup _damon_sysfs.py contains code for test-purpose DAMON sysfs interface control. Add support of DAMOS action destination target_nid setup for more tests. Link: https://lkml.kernel.org/r/20250720171652.92309-8-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/_damon_sysfs.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py index 2d95ab564885..70860d925503 100644 --- a/tools/testing/selftests/damon/_damon_sysfs.py +++ b/tools/testing/selftests/damon/_damon_sysfs.py @@ -395,6 +395,7 @@ class Damos: ops_filters = None filters = None apply_interval_us = None + target_nid = None dests = None idx = None context = None @@ -404,7 +405,7 @@ class Damos: def __init__(self, action='stat', access_pattern=DamosAccessPattern(), quota=DamosQuota(), watermarks=DamosWatermarks(), - core_filters=[], ops_filters=[], filters=[], + core_filters=[], ops_filters=[], filters=[], target_nid=0, dests=DamosDests(), apply_interval_us=0): self.action = action self.access_pattern = access_pattern @@ -423,6 +424,7 @@ class Damos: self.filters = DamosFilters(name='filters', filters=filters) self.filters.scheme = self + self.target_nid = target_nid self.dests = dests self.dests.scheme = self @@ -462,6 +464,11 @@ class Damos: if err is not None: return err + err = write_file(os.path.join(self.sysfs_dir(), 'target_nid'), '%d' % + self.target_nid) + if err is not None: + return err + err = self.dests.stage() if err is not None: return err -- cgit v1.2.3 From 80d4e381075f5e454482c76aeaaafbbd76994aeb Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 20 Jul 2025 10:16:38 -0700 Subject: selftests/damon/_damon_sysfs: use 2**32 - 1 as max nr_accesses and age nr_accesses and age are unsigned int. Use the proper max value. Link: https://lkml.kernel.org/r/20250720171652.92309-9-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/_damon_sysfs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py index 70860d925503..a0e6290833fb 100644 --- a/tools/testing/selftests/damon/_damon_sysfs.py +++ b/tools/testing/selftests/damon/_damon_sysfs.py @@ -52,9 +52,9 @@ class DamosAccessPattern: if self.size is None: self.size = [0, 2**64 - 1] if self.nr_accesses is None: - self.nr_accesses = [0, 2**64 - 1] + self.nr_accesses = [0, 2**32 - 1] if self.age is None: - self.age = [0, 2**64 - 1] + self.age = [0, 2**32 - 1] def sysfs_dir(self): return os.path.join(self.scheme.sysfs_dir(), 'access_pattern') -- cgit v1.2.3 From c1a69589571270f70f1995d88e41b2e262ad05ab Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 20 Jul 2025 10:16:39 -0700 Subject: selftests/damon/drgn_dump_damon_status: dump damos->migrate_dests drgn_dump_damon_status.py is a script for dumping DAMON internal status in json format. It is being used for seeing if DAMON parameters that are set using _damon_sysfs.py are actually passed to DAMON in the kernel space. It is, however, not dumping full DAMON internal status, and it makes increasing test coverage difficult. Add damos->migrate_dests dumping for more tests. Link: https://lkml.kernel.org/r/20250720171652.92309-10-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/drgn_dump_damon_status.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/drgn_dump_damon_status.py b/tools/testing/selftests/damon/drgn_dump_damon_status.py index 333a0d0c4bff..8db081f965f5 100755 --- a/tools/testing/selftests/damon/drgn_dump_damon_status.py +++ b/tools/testing/selftests/damon/drgn_dump_damon_status.py @@ -117,6 +117,19 @@ def damos_watermarks_to_dict(watermarks): ['high', int], ['mid', int], ['low', int], ]) +def damos_migrate_dests_to_dict(dests): + nr_dests = int(dests.nr_dests) + node_id_arr = [] + weight_arr = [] + for i in range(nr_dests): + node_id_arr.append(int(dests.node_id_arr[i])) + weight_arr.append(int(dests.weight_arr[i])) + return { + 'node_id_arr': node_id_arr, + 'weight_arr': weight_arr, + 'nr_dests': nr_dests, + } + def scheme_to_dict(scheme): return to_dict(scheme, [ ['pattern', damos_access_pattern_to_dict], @@ -125,6 +138,7 @@ def scheme_to_dict(scheme): ['quota', damos_quota_to_dict], ['wmarks', damos_watermarks_to_dict], ['target_nid', int], + ['migrate_dests', damos_migrate_dests_to_dict], ]) def schemes_to_list(schemes): -- cgit v1.2.3 From eb413daaf222b1d545f6b46253f0ace97195ccd4 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 20 Jul 2025 10:16:40 -0700 Subject: selftests/damon/drgn_dump_damon_status: dump ctx->ops.id drgn_dump_damon_status.py is a script for dumping DAMON internal status in json format. It is being used for seeing if DAMON parameters that are set using _damon_sysfs.py are actually passed to DAMON in the kernel space. It is, however, not dumping full DAMON internal status, and it makes increasing test coverage difficult. Add ctx->ops.id dumping for more tests. Link: https://lkml.kernel.org/r/20250720171652.92309-11-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/drgn_dump_damon_status.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/drgn_dump_damon_status.py b/tools/testing/selftests/damon/drgn_dump_damon_status.py index 8db081f965f5..cf5d492670d8 100755 --- a/tools/testing/selftests/damon/drgn_dump_damon_status.py +++ b/tools/testing/selftests/damon/drgn_dump_damon_status.py @@ -25,6 +25,11 @@ def to_dict(object, attr_name_converter): d[attr_name] = converter(getattr(object, attr_name)) return d +def ops_to_dict(ops): + return to_dict(ops, [ + ['id', int], + ]) + def intervals_goal_to_dict(goal): return to_dict(goal, [ ['access_bp', int], @@ -148,6 +153,7 @@ def schemes_to_list(schemes): def damon_ctx_to_dict(ctx): return to_dict(ctx, [ + ['ops', ops_to_dict], ['attrs', attrs_to_dict], ['adaptive_targets', targets_to_list], ['schemes', schemes_to_list], -- cgit v1.2.3 From a1d52cd0302d7aefe26edbbccd1d1ae70cca50c9 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 20 Jul 2025 10:16:41 -0700 Subject: selftests/damon/drgn_dump_damon_status: dump DAMOS filters drgn_dump_damon_status.py is a script for dumping DAMON internal status in json format. It is being used for seeing if DAMON parameters that are set using _damon_sysfs.py are actually passed to DAMON in the kernel space. It is, however, not dumping full DAMON internal status, and it makes increasing test coverage difficult. Add damos filters dumping for more tests. Link: https://lkml.kernel.org/r/20250720171652.92309-12-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- .../selftests/damon/drgn_dump_damon_status.py | 43 +++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/drgn_dump_damon_status.py b/tools/testing/selftests/damon/drgn_dump_damon_status.py index cf5d492670d8..7233369a3a44 100755 --- a/tools/testing/selftests/damon/drgn_dump_damon_status.py +++ b/tools/testing/selftests/damon/drgn_dump_damon_status.py @@ -135,8 +135,37 @@ def damos_migrate_dests_to_dict(dests): 'nr_dests': nr_dests, } +def damos_filter_to_dict(damos_filter): + filter_type_keyword = { + 0: 'anon', + 1: 'active', + 2: 'memcg', + 3: 'young', + 4: 'hugepage_size', + 5: 'unmapped', + 6: 'addr', + 7: 'target' + } + dict_ = { + 'type': filter_type_keyword[int(damos_filter.type)], + 'matching': bool(damos_filter.matching), + 'allow': bool(damos_filter.allow), + } + type_ = dict_['type'] + if type_ == 'memcg': + dict_['memcg_id'] = int(damos_filter.memcg_id) + elif type_ == 'addr': + dict_['addr_range'] = [int(damos_filter.addr_range.start), + int(damos_filter.addr_range.end)] + elif type_ == 'target': + dict_['target_idx'] = int(damos_filter.target_idx) + elif type_ == 'hugeapge_size': + dict_['sz_range'] = [int(damos_filter.sz_range.min), + int(damos_filter.sz_range.max)] + return dict_ + def scheme_to_dict(scheme): - return to_dict(scheme, [ + dict_ = to_dict(scheme, [ ['pattern', damos_access_pattern_to_dict], ['action', int], ['apply_interval_us', int], @@ -145,6 +174,18 @@ def scheme_to_dict(scheme): ['target_nid', int], ['migrate_dests', damos_migrate_dests_to_dict], ]) + filters = [] + for f in list_for_each_entry( + 'struct damos_filter', scheme.filters.address_of_(), 'list'): + filters.append(damos_filter_to_dict(f)) + dict_['filters'] = filters + ops_filters = [] + for f in list_for_each_entry( + 'struct damos_filter', scheme.ops_filters.address_of_(), 'list'): + ops_filters.append(damos_filter_to_dict(f)) + dict_['ops_filters'] = ops_filters + + return dict_ def schemes_to_list(schemes): return [scheme_to_dict(s) -- cgit v1.2.3 From b50c48de6111bc70731ba375e1b45a65e63e287f Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 20 Jul 2025 10:16:42 -0700 Subject: selftests/damon/sysfs.py: generalize DAMOS Watermarks commit assertion DamosWatermarks commitment assertion is hard-coded for a specific test case. Split it out into a general version that can be reused for different test cases. Link: https://lkml.kernel.org/r/20250720171652.92309-13-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/sysfs.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/sysfs.py b/tools/testing/selftests/damon/sysfs.py index fdc04f12be54..cb34444323e1 100755 --- a/tools/testing/selftests/damon/sysfs.py +++ b/tools/testing/selftests/damon/sysfs.py @@ -29,6 +29,22 @@ def fail(expectation, status): print(json.dumps(status, indent=4)) exit(1) +def assert_true(condition, expectation, status): + if condition is not True: + fail(expectation, status) + +def assert_watermarks_committed(watermarks, dump): + wmark_metric_val = { + 'none': 0, + 'free_mem_rate': 1, + } + assert_true(dump['metric'] == wmark_metric_val[watermarks.metric], + 'metric', dump) + assert_true(dump['interval'] == watermarks.interval, 'interval', dump) + assert_true(dump['high'] == watermarks.high, 'high', dump) + assert_true(dump['mid'] == watermarks.mid, 'mid', dump) + assert_true(dump['low'] == watermarks.low, 'low', dump) + def main(): kdamonds = _damon_sysfs.Kdamonds( [_damon_sysfs.Kdamond( @@ -105,14 +121,8 @@ def main(): }: fail('damos quota', status) - if scheme['wmarks'] != { - 'metric': 0, - 'interval': 0, - 'high': 0, - 'mid': 0, - 'low': 0, - }: - fail('damos wmarks', status) + assert_watermarks_committed(_damon_sysfs.DamosWatermarks(), + scheme['wmarks']) kdamonds.stop() -- cgit v1.2.3 From f797e709f741d5a28e4f3ca2592e5cc1d7e21e3b Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 20 Jul 2025 10:16:43 -0700 Subject: selftests/damon/sysfs.py: generalize DamosQuota commit assertion DamosQuota commitment assertion is hard-coded for a specific test case. Split it out into a general version that can be reused for different test cases. Link: https://lkml.kernel.org/r/20250720171652.92309-14-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/sysfs.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/sysfs.py b/tools/testing/selftests/damon/sysfs.py index cb34444323e1..5f46b5f73896 100755 --- a/tools/testing/selftests/damon/sysfs.py +++ b/tools/testing/selftests/damon/sysfs.py @@ -45,6 +45,18 @@ def assert_watermarks_committed(watermarks, dump): assert_true(dump['mid'] == watermarks.mid, 'mid', dump) assert_true(dump['low'] == watermarks.low, 'low', dump) +def assert_quota_committed(quota, dump): + assert_true(dump['reset_interval'] == quota.reset_interval_ms, + 'reset_interval', dump) + assert_true(dump['ms'] == quota.ms, 'ms', dump) + assert_true(dump['sz'] == quota.sz, 'sz', dump) + # TODO: assert goals are committed + assert_true(dump['weight_sz'] == quota.weight_sz_permil, 'weight_sz', dump) + assert_true(dump['weight_nr_accesses'] == quota.weight_nr_accesses_permil, + 'weight_nr_accesses', dump) + assert_true( + dump['weight_age'] == quota.weight_age_permil, 'weight_age', dump) + def main(): kdamonds = _damon_sysfs.Kdamonds( [_damon_sysfs.Kdamond( @@ -109,18 +121,15 @@ def main(): if scheme['target_nid'] != -1: fail('damos target nid', status) - if scheme['quota'] != { - 'reset_interval': 0, - 'ms': 0, - 'sz': 0, - 'goals': [], - 'esz': 0, - 'weight_sz': 0, - 'weight_nr_accesses': 0, - 'weight_age': 0, - }: - fail('damos quota', status) + migrate_dests = scheme['migrate_dests'] + if migrate_dests['nr_dests'] != 0: + fail('nr_dests', status) + if migrate_dests['node_id_arr'] != []: + fail('node_id_arr', status) + if migrate_dests['weight_arr'] != []: + fail('weight_arr', status) + assert_quota_committed(_damon_sysfs.DamosQuota(), scheme['quota']) assert_watermarks_committed(_damon_sysfs.DamosWatermarks(), scheme['wmarks']) -- cgit v1.2.3 From 84dc442bd552f3e0b9abc6f1531431beaea68518 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 20 Jul 2025 10:16:44 -0700 Subject: selftests/damon/sysfs.py: test quota goal commitment Current DAMOS quota commitment assertion is not testing quota goal commitment. Add the test. Link: https://lkml.kernel.org/r/20250720171652.92309-15-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/sysfs.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/sysfs.py b/tools/testing/selftests/damon/sysfs.py index 5f46b5f73896..8d859fee1141 100755 --- a/tools/testing/selftests/damon/sysfs.py +++ b/tools/testing/selftests/damon/sysfs.py @@ -45,12 +45,28 @@ def assert_watermarks_committed(watermarks, dump): assert_true(dump['mid'] == watermarks.mid, 'mid', dump) assert_true(dump['low'] == watermarks.low, 'low', dump) +def assert_quota_goal_committed(qgoal, dump): + metric_val = { + 'user_input': 0, + 'some_mem_psi_us': 1, + 'node_mem_used_bp': 2, + 'node_mem_free_bp': 3, + } + assert_true(dump['metric'] == metric_val[qgoal.metric], 'metric', dump) + assert_true(dump['target_value'] == qgoal.target_value, 'target_value', + dump) + if qgoal.metric == 'user_input': + assert_true(dump['current_value'] == qgoal.current_value, + 'current_value', dump) + assert_true(dump['nid'] == qgoal.nid, 'nid', dump) + def assert_quota_committed(quota, dump): assert_true(dump['reset_interval'] == quota.reset_interval_ms, 'reset_interval', dump) assert_true(dump['ms'] == quota.ms, 'ms', dump) assert_true(dump['sz'] == quota.sz, 'sz', dump) - # TODO: assert goals are committed + for idx, qgoal in enumerate(quota.goals): + assert_quota_goal_committed(qgoal, dump['goals'][idx]) assert_true(dump['weight_sz'] == quota.weight_sz_permil, 'weight_sz', dump) assert_true(dump['weight_nr_accesses'] == quota.weight_nr_accesses_permil, 'weight_nr_accesses', dump) -- cgit v1.2.3 From bd0487a7745ea84fb0ece7fd796ad51a92a12c70 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 20 Jul 2025 10:16:45 -0700 Subject: selftests/damon/sysfs.py: test DAMOS destinations commitment Current DAMOS commitment assertion is not testing quota destinations commitment. Add the test. Link: https://lkml.kernel.org/r/20250720171652.92309-16-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/sysfs.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/sysfs.py b/tools/testing/selftests/damon/sysfs.py index 8d859fee1141..5ab07a88ab7f 100755 --- a/tools/testing/selftests/damon/sysfs.py +++ b/tools/testing/selftests/damon/sysfs.py @@ -73,6 +73,13 @@ def assert_quota_committed(quota, dump): assert_true( dump['weight_age'] == quota.weight_age_permil, 'weight_age', dump) + +def assert_migrate_dests_committed(dests, dump): + assert_true(dump['nr_dests'] == len(dests.dests), 'nr_dests', dump) + for idx, dest in enumerate(dests.dests): + assert_true(dump['node_id_arr'][idx] == dest.id, 'node_id', dump) + assert_true(dump['weight_arr'][idx] == dest.weight, 'weight', dump) + def main(): kdamonds = _damon_sysfs.Kdamonds( [_damon_sysfs.Kdamond( @@ -137,14 +144,8 @@ def main(): if scheme['target_nid'] != -1: fail('damos target nid', status) - migrate_dests = scheme['migrate_dests'] - if migrate_dests['nr_dests'] != 0: - fail('nr_dests', status) - if migrate_dests['node_id_arr'] != []: - fail('node_id_arr', status) - if migrate_dests['weight_arr'] != []: - fail('weight_arr', status) - + assert_migrate_dests_committed(_damon_sysfs.DamosDests(), + scheme['migrate_dests']) assert_quota_committed(_damon_sysfs.DamosQuota(), scheme['quota']) assert_watermarks_committed(_damon_sysfs.DamosWatermarks(), scheme['wmarks']) -- cgit v1.2.3 From f22ff7b5a5baf7dc3326948e50781067f9aebb3c Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 20 Jul 2025 10:16:46 -0700 Subject: selftests/damon/sysfs.py: generalize DAMOS scheme commit assertion DAMOS scheme commitment assertion is hard-coded for a specific test case. Split it out into a general version that can be reused for different test cases. Link: https://lkml.kernel.org/r/20250720171652.92309-17-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/sysfs.py | 59 +++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 22 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/sysfs.py b/tools/testing/selftests/damon/sysfs.py index 5ab07a88ab7f..6326f62733f6 100755 --- a/tools/testing/selftests/damon/sysfs.py +++ b/tools/testing/selftests/damon/sysfs.py @@ -80,6 +80,42 @@ def assert_migrate_dests_committed(dests, dump): assert_true(dump['node_id_arr'][idx] == dest.id, 'node_id', dump) assert_true(dump['weight_arr'][idx] == dest.weight, 'weight', dump) +def assert_access_pattern_committed(pattern, dump): + assert_true(dump['min_sz_region'] == pattern.size[0], 'min_sz_region', + dump) + assert_true(dump['max_sz_region'] == pattern.size[1], 'max_sz_region', + dump) + assert_true(dump['min_nr_accesses'] == pattern.nr_accesses[0], + 'min_nr_accesses', dump) + assert_true(dump['max_nr_accesses'] == pattern.nr_accesses[1], + 'max_nr_accesses', dump) + assert_true(dump['min_age_region'] == pattern.age[0], 'min_age_region', + dump) + assert_true(dump['max_age_region'] == pattern.age[1], 'miaxage_region', + dump) + +def assert_scheme_committed(scheme, dump): + assert_access_pattern_committed(scheme.access_pattern, dump['pattern']) + action_val = { + 'willneed': 0, + 'cold': 1, + 'pageout': 2, + 'hugepage': 3, + 'nohugeapge': 4, + 'lru_prio': 5, + 'lru_deprio': 6, + 'migrate_hot': 7, + 'migrate_cold': 8, + 'stat': 9, + } + assert_true(dump['action'] == action_val[scheme.action], 'action', dump) + assert_true(dump['apply_interval_us'] == scheme. apply_interval_us, + 'apply_interval_us', dump) + assert_true(dump['target_nid'] == scheme.target_nid, 'target_nid', dump) + assert_migrate_dests_committed(scheme.dests, dump['migrate_dests']) + assert_quota_committed(scheme.quota, dump['quota']) + assert_watermarks_committed(scheme.watermarks, dump['wmarks']) + def main(): kdamonds = _damon_sysfs.Kdamonds( [_damon_sysfs.Kdamond( @@ -127,28 +163,7 @@ def main(): if len(ctx['schemes']) != 1: fail('number of schemes', status) - scheme = ctx['schemes'][0] - if scheme['pattern'] != { - 'min_sz_region': 0, - 'max_sz_region': 2**64 - 1, - 'min_nr_accesses': 0, - 'max_nr_accesses': 2**32 - 1, - 'min_age_region': 0, - 'max_age_region': 2**32 - 1, - }: - fail('damos pattern', status) - if scheme['action'] != 9: # stat - fail('damos action', status) - if scheme['apply_interval_us'] != 0: - fail('damos apply interval', status) - if scheme['target_nid'] != -1: - fail('damos target nid', status) - - assert_migrate_dests_committed(_damon_sysfs.DamosDests(), - scheme['migrate_dests']) - assert_quota_committed(_damon_sysfs.DamosQuota(), scheme['quota']) - assert_watermarks_committed(_damon_sysfs.DamosWatermarks(), - scheme['wmarks']) + assert_scheme_committed(_damon_sysfs.Damos(), ctx['schemes'][0]) kdamonds.stop() -- cgit v1.2.3 From 53f800581f79555e84aeabff81a90cf954803b83 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 20 Jul 2025 10:16:47 -0700 Subject: selftests/damon/sysfs.py: test DAMOS filters commitment Current DAMOS scheme commitment assertion is not testing DAMOS filters. Add the test. Link: https://lkml.kernel.org/r/20250720171652.92309-18-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/sysfs.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/sysfs.py b/tools/testing/selftests/damon/sysfs.py index 6326f62733f6..6471cb8c6de2 100755 --- a/tools/testing/selftests/damon/sysfs.py +++ b/tools/testing/selftests/damon/sysfs.py @@ -80,6 +80,21 @@ def assert_migrate_dests_committed(dests, dump): assert_true(dump['node_id_arr'][idx] == dest.id, 'node_id', dump) assert_true(dump['weight_arr'][idx] == dest.weight, 'weight', dump) +def assert_filter_committed(filter_, dump): + assert_true(filter_.type_ == dump['type'], 'type', dump) + assert_true(filter_.matching == dump['matching'], 'matching', dump) + assert_true(filter_.allow == dump['allow'], 'allow', dump) + # TODO: check memcg_path and memcg_id if type is memcg + if filter_.type_ == 'addr': + assert_true([filter_.addr_start, filter_.addr_end] == + dump['addr_range'], 'addr_range', dump) + elif filter_.type_ == 'target': + assert_true(filter_.target_idx == dump['target_idx'], 'target_idx', + dump) + elif filter_.type_ == 'hugepage_size': + assert_true([filter_.min_, filter_.max_] == dump['sz_range'], + 'sz_range', dump) + def assert_access_pattern_committed(pattern, dump): assert_true(dump['min_sz_region'] == pattern.size[0], 'min_sz_region', dump) @@ -115,6 +130,11 @@ def assert_scheme_committed(scheme, dump): assert_migrate_dests_committed(scheme.dests, dump['migrate_dests']) assert_quota_committed(scheme.quota, dump['quota']) assert_watermarks_committed(scheme.watermarks, dump['wmarks']) + # TODO: test filters directory + for idx, f in enumerate(scheme.core_filters.filters): + assert_filter_committed(f, dump['filters'][idx]) + for idx, f in enumerate(scheme.ops_filters.filters): + assert_filter_committed(f, dump['ops_filters'][idx]) def main(): kdamonds = _damon_sysfs.Kdamonds( -- cgit v1.2.3 From 771d7754ab28597c0370a588887f50db229d1c0d Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 20 Jul 2025 10:16:48 -0700 Subject: selftests/damon/sysfs.py: generalize DAMOS schemes commit assertion DAMOS schemes commitment assertion is hard-coded for a specific test case. Split it out into a general version that can be reused for different test cases. Link: https://lkml.kernel.org/r/20250720171652.92309-19-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/sysfs.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/sysfs.py b/tools/testing/selftests/damon/sysfs.py index 6471cb8c6de2..4d13da00733e 100755 --- a/tools/testing/selftests/damon/sysfs.py +++ b/tools/testing/selftests/damon/sysfs.py @@ -136,6 +136,11 @@ def assert_scheme_committed(scheme, dump): for idx, f in enumerate(scheme.ops_filters.filters): assert_filter_committed(f, dump['ops_filters'][idx]) +def assert_schemes_committed(schemes, dump): + assert_true(len(schemes) == len(dump), 'len_schemes', dump) + for idx, scheme in enumerate(schemes): + assert_scheme_committed(scheme, dump[idx]) + def main(): kdamonds = _damon_sysfs.Kdamonds( [_damon_sysfs.Kdamond( @@ -180,10 +185,7 @@ def main(): { 'pid': 0, 'nr_regions': 0, 'regions_list': []}]: fail('adaptive targets', status) - if len(ctx['schemes']) != 1: - fail('number of schemes', status) - - assert_scheme_committed(_damon_sysfs.Damos(), ctx['schemes'][0]) + assert_schemes_committed([_damon_sysfs.Damos()], ctx['schemes']) kdamonds.stop() -- cgit v1.2.3 From a4027b5f24282b4aab5cee1b63b4267d27b6c686 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 20 Jul 2025 10:16:49 -0700 Subject: selftests/damon/sysfs.py: generalize monitoring attributes commit assertion DAMON monitoring attributes commitment assertion is hard-coded for a specific test case. Split it out into a general version that can be reused for different test cases. Link: https://lkml.kernel.org/r/20250720171652.92309-20-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/sysfs.py | 42 ++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 17 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/sysfs.py b/tools/testing/selftests/damon/sysfs.py index 4d13da00733e..2144a47d9827 100755 --- a/tools/testing/selftests/damon/sysfs.py +++ b/tools/testing/selftests/damon/sysfs.py @@ -141,6 +141,29 @@ def assert_schemes_committed(schemes, dump): for idx, scheme in enumerate(schemes): assert_scheme_committed(scheme, dump[idx]) +def assert_monitoring_attrs_committed(attrs, dump): + assert_true(dump['sample_interval'] == attrs.sample_us, 'sample_interval', + dump) + assert_true(dump['aggr_interval'] == attrs.aggr_us, 'aggr_interval', dump) + assert_true(dump['intervals_goal']['access_bp'] == + attrs.intervals_goal.access_bp, 'access_bp', + dump['intervals_goal']) + assert_true(dump['intervals_goal']['aggrs'] == attrs.intervals_goal.aggrs, + 'aggrs', dump['intervals_goal']) + assert_true(dump['intervals_goal']['min_sample_us'] == + attrs.intervals_goal.min_sample_us, 'min_sample_us', + dump['intervals_goal']) + assert_true(dump['intervals_goal']['max_sample_us'] == + attrs.intervals_goal.max_sample_us, 'max_sample_us', + dump['intervals_goal']) + + assert_true(dump['ops_update_interval'] == attrs.update_us, + 'ops_update_interval', dump) + assert_true(dump['min_nr_regions'] == attrs.min_nr_regions, + 'min_nr_regions', dump) + assert_true(dump['max_nr_regions'] == attrs.max_nr_regions, + 'max_nr_regions', dump) + def main(): kdamonds = _damon_sysfs.Kdamonds( [_damon_sysfs.Kdamond( @@ -163,23 +186,8 @@ def main(): fail('number of contexts', status) ctx = status['contexts'][0] - attrs = ctx['attrs'] - if attrs['sample_interval'] != 5000: - fail('sample interval', status) - if attrs['aggr_interval'] != 100000: - fail('aggr interval', status) - if attrs['ops_update_interval'] != 1000000: - fail('ops updte interval', status) - - if attrs['intervals_goal'] != { - 'access_bp': 0, 'aggrs': 0, - 'min_sample_us': 0, 'max_sample_us': 0}: - fail('intervals goal') - - if attrs['min_nr_regions'] != 10: - fail('min_nr_regions') - if attrs['max_nr_regions'] != 1000: - fail('max_nr_regions') + + assert_monitoring_attrs_committed(_damon_sysfs.DamonAttrs(), ctx['attrs']) if ctx['adaptive_targets'] != [ { 'pid': 0, 'nr_regions': 0, 'regions_list': []}]: -- cgit v1.2.3 From 16797a55aab118c5fa9236604aa2142f1121f136 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 20 Jul 2025 10:16:50 -0700 Subject: selftests/damon/sysfs.py: generalize DAMON context commit assertion DAMON context commitment assertion is hard-coded for a specific test case. Split it out into a general version that can be reused for different test cases. Link: https://lkml.kernel.org/r/20250720171652.92309-21-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/sysfs.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/sysfs.py b/tools/testing/selftests/damon/sysfs.py index 2144a47d9827..845fc177f8a7 100755 --- a/tools/testing/selftests/damon/sysfs.py +++ b/tools/testing/selftests/damon/sysfs.py @@ -164,6 +164,21 @@ def assert_monitoring_attrs_committed(attrs, dump): assert_true(dump['max_nr_regions'] == attrs.max_nr_regions, 'max_nr_regions', dump) +def assert_ctx_committed(ctx, dump): + ops_val = { + 'vaddr': 0, + 'fvaddr': 1, + 'paddr': 2, + } + assert_true(dump['ops']['id'] == ops_val[ctx.ops], 'ops_id', dump) + assert_monitoring_attrs_committed(ctx.monitoring_attrs, dump['attrs']) + assert_schemes_committed(ctx.schemes, dump['schemes']) + +def assert_ctxs_committed(ctxs, dump): + assert_true(len(ctxs) == len(dump), 'ctxs length', dump) + for idx, ctx in enumerate(ctxs): + assert_ctx_committed(ctx, dump[idx]) + def main(): kdamonds = _damon_sysfs.Kdamonds( [_damon_sysfs.Kdamond( @@ -182,18 +197,7 @@ def main(): kdamonds.stop() exit(1) - if len(status['contexts']) != 1: - fail('number of contexts', status) - - ctx = status['contexts'][0] - - assert_monitoring_attrs_committed(_damon_sysfs.DamonAttrs(), ctx['attrs']) - - if ctx['adaptive_targets'] != [ - { 'pid': 0, 'nr_regions': 0, 'regions_list': []}]: - fail('adaptive targets', status) - - assert_schemes_committed([_damon_sysfs.Damos()], ctx['schemes']) + assert_ctxs_committed(kdamonds.kdamonds[0].contexts, status['contexts']) kdamonds.stop() -- cgit v1.2.3 From 62b7b1ffa2dd242253480ffcd12c96c4b7ef8fb6 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 20 Jul 2025 10:16:51 -0700 Subject: selftests/damon/sysfs.py: test non-default parameters runtime commit sysfs.py is testing only the default and minimum DAMON parameters. Add another test case for more non-default additional DAMON parameters commitment on runtime. Link: https://lkml.kernel.org/r/20250720171652.92309-22-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/sysfs.py | 53 ++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/sysfs.py b/tools/testing/selftests/damon/sysfs.py index 845fc177f8a7..1bf9508cd44d 100755 --- a/tools/testing/selftests/damon/sysfs.py +++ b/tools/testing/selftests/damon/sysfs.py @@ -199,6 +199,59 @@ def main(): assert_ctxs_committed(kdamonds.kdamonds[0].contexts, status['contexts']) + context = _damon_sysfs.DamonCtx( + monitoring_attrs=_damon_sysfs.DamonAttrs( + sample_us=100000, aggr_us=2000000, + intervals_goal=_damon_sysfs.IntervalsGoal( + access_bp=400, aggrs=3, min_sample_us=5000, + max_sample_us=10000000), + update_us=2000000), + schemes=[_damon_sysfs.Damos( + action='pageout', + access_pattern=_damon_sysfs.DamosAccessPattern( + size=[4096, 2**10], + nr_accesses=[3, 317], + age=[5,71]), + quota=_damon_sysfs.DamosQuota( + sz=100*1024*1024, ms=100, + goals=[_damon_sysfs.DamosQuotaGoal( + metric='node_mem_used_bp', + target_value=9950, + nid=1)], + reset_interval_ms=1500, + weight_sz_permil=20, + weight_nr_accesses_permil=200, + weight_age_permil=1000), + watermarks=_damon_sysfs.DamosWatermarks( + metric = 'free_mem_rate', interval = 500000, # 500 ms + high = 500, mid = 400, low = 50), + target_nid=1, + apply_interval_us=1000000, + dests=_damon_sysfs.DamosDests( + dests=[_damon_sysfs.DamosDest(id=1, weight=30), + _damon_sysfs.DamosDest(id=0, weight=70)]), + core_filters=[ + _damon_sysfs.DamosFilter(type_='addr', matching=True, + allow=False, addr_start=42, + addr_end=4242), + ], + ops_filters=[ + _damon_sysfs.DamosFilter(type_='anon', matching=True, + allow=True), + ], + )]) + context.idx = 0 + context.kdamond = kdamonds.kdamonds[0] + kdamonds.kdamonds[0].contexts = [context] + kdamonds.kdamonds[0].commit() + + status, err = dump_damon_status_dict(kdamonds.kdamonds[0].pid) + if err is not None: + print(err) + exit(1) + + assert_ctxs_committed(kdamonds.kdamonds[0].contexts, status['contexts']) + kdamonds.stop() if __name__ == '__main__': -- cgit v1.2.3 From da5973a0b8e0370f9e309c60e35a4d4d4dfe32fc Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 20 Jul 2025 10:16:52 -0700 Subject: selftests/damon/sysfs.py: test runtime reduction of DAMON parameters sysfs.py is testing if non-default additional parameters can be committed. Add a test case for further reducing the parameters to the default set. Link: https://lkml.kernel.org/r/20250720171652.92309-23-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/sysfs.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/sysfs.py b/tools/testing/selftests/damon/sysfs.py index 1bf9508cd44d..2666c6f0f1a5 100755 --- a/tools/testing/selftests/damon/sysfs.py +++ b/tools/testing/selftests/damon/sysfs.py @@ -252,6 +252,20 @@ def main(): assert_ctxs_committed(kdamonds.kdamonds[0].contexts, status['contexts']) + # test online commitment of minimum context. + context = _damon_sysfs.DamonCtx() + context.idx = 0 + context.kdamond = kdamonds.kdamonds[0] + kdamonds.kdamonds[0].contexts = [context] + kdamonds.kdamonds[0].commit() + + status, err = dump_damon_status_dict(kdamonds.kdamonds[0].pid) + if err is not None: + print(err) + exit(1) + + assert_ctxs_committed(kdamonds.kdamonds[0].contexts, status['contexts']) + kdamonds.stop() if __name__ == '__main__': -- cgit v1.2.3 From 511914506d194be931de012a5f2ab55841b9b708 Mon Sep 17 00:00:00 2001 From: Enze Li Date: Fri, 18 Jul 2025 14:42:17 +0800 Subject: selftests/damon: introduce _common.sh to host shared function The current test scripts contain duplicated root permission checks in multiple locations. This patch consolidates these checks into _common.sh to eliminate code redundancy. Link: https://lkml.kernel.org/r/20250718064217.299300-1-lienze@kylinos.cn Signed-off-by: Enze Li Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/_common.sh | 11 +++++++++++ tools/testing/selftests/damon/lru_sort.sh | 8 +++----- tools/testing/selftests/damon/reclaim.sh | 8 +++----- tools/testing/selftests/damon/sysfs.sh | 11 ++--------- .../selftests/damon/sysfs_update_removed_scheme_dir.sh | 8 +++----- 5 files changed, 22 insertions(+), 24 deletions(-) create mode 100644 tools/testing/selftests/damon/_common.sh (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/_common.sh b/tools/testing/selftests/damon/_common.sh new file mode 100644 index 000000000000..0279698f733e --- /dev/null +++ b/tools/testing/selftests/damon/_common.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +check_dependencies() +{ + if [ $EUID -ne 0 ] + then + echo "Run as root" + exit $ksft_skip + fi +} diff --git a/tools/testing/selftests/damon/lru_sort.sh b/tools/testing/selftests/damon/lru_sort.sh index 61b80197c896..1e4849db78a9 100755 --- a/tools/testing/selftests/damon/lru_sort.sh +++ b/tools/testing/selftests/damon/lru_sort.sh @@ -1,14 +1,12 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +source _common.sh + # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 -if [ $EUID -ne 0 ] -then - echo "Run as root" - exit $ksft_skip -fi +check_dependencies damon_lru_sort_enabled="/sys/module/damon_lru_sort/parameters/enabled" if [ ! -f "$damon_lru_sort_enabled" ] diff --git a/tools/testing/selftests/damon/reclaim.sh b/tools/testing/selftests/damon/reclaim.sh index 78dbc2334cbe..e56ceb035129 100755 --- a/tools/testing/selftests/damon/reclaim.sh +++ b/tools/testing/selftests/damon/reclaim.sh @@ -1,14 +1,12 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +source _common.sh + # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 -if [ $EUID -ne 0 ] -then - echo "Run as root" - exit $ksft_skip -fi +check_dependencies damon_reclaim_enabled="/sys/module/damon_reclaim/parameters/enabled" if [ ! -f "$damon_reclaim_enabled" ] diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh index e9a976d296e2..83e3b7f63d81 100755 --- a/tools/testing/selftests/damon/sysfs.sh +++ b/tools/testing/selftests/damon/sysfs.sh @@ -1,6 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +source _common.sh + # Kselftest frmework requirement - SKIP code is 4. ksft_skip=4 @@ -364,14 +366,5 @@ test_damon_sysfs() test_kdamonds "$damon_sysfs/kdamonds" } -check_dependencies() -{ - if [ $EUID -ne 0 ] - then - echo "Run as root" - exit $ksft_skip - fi -} - check_dependencies test_damon_sysfs "/sys/kernel/mm/damon/admin" diff --git a/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh b/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh index ade35576e748..35fc32beeaf7 100755 --- a/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh +++ b/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh @@ -1,14 +1,12 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +source _common.sh + # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 -if [ $EUID -ne 0 ] -then - echo "Run as root" - exit $ksft_skip -fi +check_dependencies damon_sysfs="/sys/kernel/mm/damon/admin" if [ ! -d "$damon_sysfs" ] -- cgit v1.2.3 From cf2a6de32cabbf84a889e24a9ee7c51dee4a1f70 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Thu, 17 Jul 2025 20:29:17 +0000 Subject: powerpc64/bpf: Add jit support for load_acquire and store_release Add JIT support for the load_acquire and store_release instructions. The implementation is similar to the kernel where: load_acquire => plain load -> lwsync store_release => lwsync -> plain store To test the correctness of the implementation, following selftests were run: [fedora@linux-kernel bpf]$ sudo ./test_progs -a \ verifier_load_acquire,verifier_store_release,atomics #11/1 atomics/add:OK #11/2 atomics/sub:OK #11/3 atomics/and:OK #11/4 atomics/or:OK #11/5 atomics/xor:OK #11/6 atomics/cmpxchg:OK #11/7 atomics/xchg:OK #11 atomics:OK #519/1 verifier_load_acquire/load-acquire, 8-bit:OK #519/2 verifier_load_acquire/load-acquire, 8-bit @unpriv:OK #519/3 verifier_load_acquire/load-acquire, 16-bit:OK #519/4 verifier_load_acquire/load-acquire, 16-bit @unpriv:OK #519/5 verifier_load_acquire/load-acquire, 32-bit:OK #519/6 verifier_load_acquire/load-acquire, 32-bit @unpriv:OK #519/7 verifier_load_acquire/load-acquire, 64-bit:OK #519/8 verifier_load_acquire/load-acquire, 64-bit @unpriv:OK #519/9 verifier_load_acquire/load-acquire with uninitialized src_reg:OK #519/10 verifier_load_acquire/load-acquire with uninitialized src_reg @unpriv:OK #519/11 verifier_load_acquire/load-acquire with non-pointer src_reg:OK #519/12 verifier_load_acquire/load-acquire with non-pointer src_reg @unpriv:OK #519/13 verifier_load_acquire/misaligned load-acquire:OK #519/14 verifier_load_acquire/misaligned load-acquire @unpriv:OK #519/15 verifier_load_acquire/load-acquire from ctx pointer:OK #519/16 verifier_load_acquire/load-acquire from ctx pointer @unpriv:OK #519/17 verifier_load_acquire/load-acquire with invalid register R15:OK #519/18 verifier_load_acquire/load-acquire with invalid register R15 @unpriv:OK #519/19 verifier_load_acquire/load-acquire from pkt pointer:OK #519/20 verifier_load_acquire/load-acquire from flow_keys pointer:OK #519/21 verifier_load_acquire/load-acquire from sock pointer:OK #519 verifier_load_acquire:OK #556/1 verifier_store_release/store-release, 8-bit:OK #556/2 verifier_store_release/store-release, 8-bit @unpriv:OK #556/3 verifier_store_release/store-release, 16-bit:OK #556/4 verifier_store_release/store-release, 16-bit @unpriv:OK #556/5 verifier_store_release/store-release, 32-bit:OK #556/6 verifier_store_release/store-release, 32-bit @unpriv:OK #556/7 verifier_store_release/store-release, 64-bit:OK #556/8 verifier_store_release/store-release, 64-bit @unpriv:OK #556/9 verifier_store_release/store-release with uninitialized src_reg:OK #556/10 verifier_store_release/store-release with uninitialized src_reg @unpriv:OK #556/11 verifier_store_release/store-release with uninitialized dst_reg:OK #556/12 verifier_store_release/store-release with uninitialized dst_reg @unpriv:OK #556/13 verifier_store_release/store-release with non-pointer dst_reg:OK #556/14 verifier_store_release/store-release with non-pointer dst_reg @unpriv:OK #556/15 verifier_store_release/misaligned store-release:OK #556/16 verifier_store_release/misaligned store-release @unpriv:OK #556/17 verifier_store_release/store-release to ctx pointer:OK #556/18 verifier_store_release/store-release to ctx pointer @unpriv:OK #556/19 verifier_store_release/store-release, leak pointer to stack:OK #556/20 verifier_store_release/store-release, leak pointer to stack @unpriv:OK #556/21 verifier_store_release/store-release, leak pointer to map:OK #556/22 verifier_store_release/store-release, leak pointer to map @unpriv:OK #556/23 verifier_store_release/store-release with invalid register R15:OK #556/24 verifier_store_release/store-release with invalid register R15 @unpriv:OK #556/25 verifier_store_release/store-release to pkt pointer:OK #556/26 verifier_store_release/store-release to flow_keys pointer:OK #556/27 verifier_store_release/store-release to sock pointer:OK #556 verifier_store_release:OK Summary: 3/55 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Puranjay Mohan Tested-by: Saket Kumar Bhaskar Reviewed-by: Hari Bathini Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250717202935.29018-2-puranjay@kernel.org --- arch/powerpc/include/asm/ppc-opcode.h | 1 + arch/powerpc/net/bpf_jit_comp64.c | 82 ++++++++++++++++++++++++++++ tools/testing/selftests/bpf/progs/bpf_misc.h | 3 +- 3 files changed, 85 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 4312bcb913a4..8053b24afc39 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -425,6 +425,7 @@ #define PPC_RAW_SC() (0x44000002) #define PPC_RAW_SYNC() (0x7c0004ac) #define PPC_RAW_ISYNC() (0x4c00012c) +#define PPC_RAW_LWSYNC() (0x7c2004ac) /* * Define what the VSX XX1 form instructions will look like, then add diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 5daa77aee7f7..2039eb957f3f 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -392,6 +392,71 @@ asm ( " blr ;" ); +static int emit_atomic_ld_st(const struct bpf_insn insn, struct codegen_context *ctx, u32 *image) +{ + u32 code = insn.code; + u32 dst_reg = bpf_to_ppc(insn.dst_reg); + u32 src_reg = bpf_to_ppc(insn.src_reg); + u32 size = BPF_SIZE(code); + u32 tmp1_reg = bpf_to_ppc(TMP_REG_1); + u32 tmp2_reg = bpf_to_ppc(TMP_REG_2); + s16 off = insn.off; + s32 imm = insn.imm; + + switch (imm) { + case BPF_LOAD_ACQ: + switch (size) { + case BPF_B: + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); + break; + case BPF_H: + EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); + break; + case BPF_W: + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); + break; + case BPF_DW: + if (off % 4) { + EMIT(PPC_RAW_LI(tmp1_reg, off)); + EMIT(PPC_RAW_LDX(dst_reg, src_reg, tmp1_reg)); + } else { + EMIT(PPC_RAW_LD(dst_reg, src_reg, off)); + } + break; + } + EMIT(PPC_RAW_LWSYNC()); + break; + case BPF_STORE_REL: + EMIT(PPC_RAW_LWSYNC()); + switch (size) { + case BPF_B: + EMIT(PPC_RAW_STB(src_reg, dst_reg, off)); + break; + case BPF_H: + EMIT(PPC_RAW_STH(src_reg, dst_reg, off)); + break; + case BPF_W: + EMIT(PPC_RAW_STW(src_reg, dst_reg, off)); + break; + case BPF_DW: + if (off % 4) { + EMIT(PPC_RAW_LI(tmp2_reg, off)); + EMIT(PPC_RAW_STDX(src_reg, dst_reg, tmp2_reg)); + } else { + EMIT(PPC_RAW_STD(src_reg, dst_reg, off)); + } + break; + } + break; + default: + pr_err_ratelimited("unexpected atomic load/store op code %02x\n", + imm); + return -EINVAL; + } + + return 0; +} + /* Assemble the body code between the prologue & epilogue */ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx, u32 *addrs, int pass, bool extra_pass) @@ -859,8 +924,25 @@ emit_clear: /* * BPF_STX ATOMIC (atomic ops) */ + case BPF_STX | BPF_ATOMIC | BPF_B: + case BPF_STX | BPF_ATOMIC | BPF_H: case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: + if (bpf_atomic_is_load_store(&insn[i])) { + ret = emit_atomic_ld_st(insn[i], ctx, image); + if (ret) + return ret; + + if (size != BPF_DW && insn_is_zext(&insn[i + 1])) + addrs[++i] = ctx->idx * 4; + break; + } else if (size == BPF_B || size == BPF_H) { + pr_err_ratelimited( + "eBPF filter atomic op code %02x (@%d) unsupported\n", + code, i); + return -EOPNOTSUPP; + } + save_reg = tmp2_reg; ret_reg = src_reg; diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 6e208e24ba3b..3bf3af5639fa 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -227,7 +227,8 @@ #if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) || \ + (defined(__TARGET_ARCH_powerpc)) #define CAN_USE_LOAD_ACQ_STORE_REL #endif -- cgit v1.2.3 From 18ec25dd0e97653cdb576bb1750c31acf2513ea7 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 28 Jul 2025 08:26:03 -0700 Subject: KVM: arm64: selftests: Add FEAT_RAS EL2 registers to get-reg-list VDISR_EL2 and VSESR_EL2 are now visible to userspace for nested VMs. Add them to get-reg-list. Link: https://lore.kernel.org/r/20250728152603.2823699-1-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- tools/testing/selftests/kvm/arm64/get-reg-list.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c index f4f85d12d42e..011fad95dd02 100644 --- a/tools/testing/selftests/kvm/arm64/get-reg-list.c +++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c @@ -63,6 +63,8 @@ static struct feature_id_reg feat_id_regs[] = { REG_FEAT(HDFGWTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP), REG_FEAT(SCTLR2_EL1, ID_AA64MMFR3_EL1, SCTLRX, IMP), + REG_FEAT(VDISR_EL2, ID_AA64PFR0_EL1, RAS, IMP), + REG_FEAT(VSESR_EL2, ID_AA64PFR0_EL1, RAS, IMP), }; bool filter_reg(__u64 reg) @@ -749,6 +751,8 @@ static __u64 el2_regs[] = { SYS_REG(CNTHV_CTL_EL2), SYS_REG(CNTHV_CVAL_EL2), SYS_REG(SP_EL2), + SYS_REG(VDISR_EL2), + SYS_REG(VSESR_EL2), }; #define BASE_SUBLIST \ -- cgit v1.2.3 From da653de268d32a80e135c9eb960a8147c186f1bc Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Mon, 28 Jul 2025 11:51:16 +0200 Subject: selftests/bpf: Update reg_bound range refinement logic This patch updates the range refinement logic in the reg_bound test to match the new logic from the previous commit. Without this change, tests would fail because we end with more precise ranges than the tests expect. Acked-by: Eduard Zingerman Signed-off-by: Paul Chaignon Link: https://lore.kernel.org/r/b7f6b1fbe03373cca4e1bb6a113035a6cd2b3ff7.1753695655.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/reg_bounds.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c index 39d42271cc46..e261b0e872db 100644 --- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c +++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c @@ -465,6 +465,20 @@ static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t, return range_improve(x_t, x, x_swap); } + if (!t_is_32(x_t) && !t_is_32(y_t) && x_t != y_t) { + if (x_t == S64 && x.a > x.b) { + if (x.b < y.a && x.a <= y.b) + return range(x_t, x.a, y.b); + if (x.a > y.b && x.b >= y.a) + return range(x_t, y.a, x.b); + } else if (x_t == U64 && y.a > y.b) { + if (y.b < x.a && y.a <= x.b) + return range(x_t, y.a, x.b); + if (y.a > x.b && y.b >= x.a) + return range(x_t, x.a, y.b); + } + } + /* otherwise, plain range cast and intersection works */ return range_improve(x_t, x, y_cast); } -- cgit v1.2.3 From 26e5e346a52c796190e63af1c2a80a417fda261a Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Mon, 28 Jul 2025 11:51:30 +0200 Subject: selftests/bpf: Test cross-sign 64bits range refinement This patch adds coverage for the new cross-sign 64bits range refinement logic. The three tests cover the cases when the u64 and s64 ranges overlap (1) in the negative portion of s64, (2) in the positive portion of s64, and (3) in both portions. The first test is a simplified version of a BPF program generated by syzkaller that caused an invariant violation [1]. It looks like syzkaller could not extract the reproducer itself (and therefore didn't report it to the mailing list), but I was able to extract it from the console logs of a crash. The principle is similar to the invariant violation described in commit 6279846b9b25 ("bpf: Forget ranges when refining tnum after JSET"): the verifier walks a dead branch, uses the condition to refine ranges, and ends up with inconsistent ranges. In this case, the dead branch is when we fallthrough on both jumps. The new refinement logic improves the bounds such that the second jump is properly detected as always-taken and the verifier doesn't end up walking a dead branch. The second and third tests are inspired by the first, but rely on condition jumps to prepare the bounds instead of ALU instructions. An R10 write is used to trigger a verifier error when the bounds can't be refined. Link: https://syzkaller.appspot.com/bug?extid=c711ce17dd78e5d4fdcf [1] Acked-by: Eduard Zingerman Signed-off-by: Paul Chaignon Link: https://lore.kernel.org/r/a0e17b00dab8dabcfa6f8384e7e151186efedfdd.1753695655.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/progs/verifier_bounds.c | 118 +++++++++++++++++++++ 1 file changed, 118 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index 63b533ca4933..41f4389e08c7 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -1550,4 +1550,122 @@ l0_%=: r0 = 0; \ : __clobber_all); } +/* This test covers the bounds deduction on 64bits when the s64 and u64 ranges + * overlap on the negative side. At instruction 7, the ranges look as follows: + * + * 0 umin=0xfffffcf1 umax=0xff..ff6e U64_MAX + * | [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx] | + * |----------------------------|------------------------------| + * |xxxxxxxxxx] [xxxxxxxxxxxx| + * 0 smax=0xeffffeee smin=-655 -1 + * + * We should therefore deduce the following new bounds: + * + * 0 u64=[0xff..ffd71;0xff..ff6e] U64_MAX + * | [xxx] | + * |----------------------------|------------------------------| + * | [xxx] | + * 0 s64=[-655;-146] -1 + * + * Without the deduction cross sign boundary, we end up with an invariant + * violation error. + */ +SEC("socket") +__description("bounds deduction cross sign boundary, negative overlap") +__success __log_level(2) __flag(BPF_F_TEST_REG_INVARIANTS) +__msg("7: (1f) r0 -= r6 {{.*}} R0=scalar(smin=-655,smax=smax32=-146,umin=0xfffffffffffffd71,umax=0xffffffffffffff6e,smin32=-783,umin32=0xfffffcf1,umax32=0xffffff6e,var_off=(0xfffffffffffffc00; 0x3ff))") +__retval(0) +__naked void bounds_deduct_negative_overlap(void) +{ + asm volatile(" \ + call %[bpf_get_prandom_u32]; \ + w3 = w0; \ + w6 = (s8)w0; \ + r0 = (s8)r0; \ + if w6 >= 0xf0000000 goto l0_%=; \ + r0 += r6; \ + r6 += 400; \ + r0 -= r6; \ + if r3 < r0 goto l0_%=; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +/* This test covers the bounds deduction on 64bits when the s64 and u64 ranges + * overlap on the positive side. At instruction 3, the ranges look as follows: + * + * 0 umin=0 umax=0xffffffffffffff00 U64_MAX + * [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx] | + * |----------------------------|------------------------------| + * |xxxxxxxx] [xxxxxxxx| + * 0 smax=127 smin=-128 -1 + * + * We should therefore deduce the following new bounds: + * + * 0 u64=[0;127] U64_MAX + * [xxxxxxxx] | + * |----------------------------|------------------------------| + * [xxxxxxxx] | + * 0 s64=[0;127] -1 + * + * Without the deduction cross sign boundary, the program is rejected due to + * the frame pointer write. + */ +SEC("socket") +__description("bounds deduction cross sign boundary, positive overlap") +__success __log_level(2) __flag(BPF_F_TEST_REG_INVARIANTS) +__msg("3: (2d) if r0 > r1 {{.*}} R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=127,var_off=(0x0; 0x7f))") +__retval(0) +__naked void bounds_deduct_positive_overlap(void) +{ + asm volatile(" \ + call %[bpf_get_prandom_u32]; \ + r0 = (s8)r0; \ + r1 = 0xffffffffffffff00; \ + if r0 > r1 goto l0_%=; \ + if r0 < 128 goto l0_%=; \ + r10 = 0; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +/* This test is the same as above, but the s64 and u64 ranges overlap in two + * places. At instruction 3, the ranges look as follows: + * + * 0 umin=0 umax=0xffffffffffffff80 U64_MAX + * [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx] | + * |----------------------------|------------------------------| + * |xxxxxxxx] [xxxxxxxx| + * 0 smax=127 smin=-128 -1 + * + * 0xffffffffffffff80 = (u64)-128. We therefore can't deduce anything new and + * the program should fail due to the frame pointer write. + */ +SEC("socket") +__description("bounds deduction cross sign boundary, two overlaps") +__failure __flag(BPF_F_TEST_REG_INVARIANTS) +__msg("3: (2d) if r0 > r1 {{.*}} R0_w=scalar(smin=smin32=-128,smax=smax32=127,umax=0xffffffffffffff80)") +__msg("frame pointer is read only") +__naked void bounds_deduct_two_overlaps(void) +{ + asm volatile(" \ + call %[bpf_get_prandom_u32]; \ + r0 = (s8)r0; \ + r1 = 0xffffffffffffff80; \ + if r0 > r1 goto l0_%=; \ + if r0 < 128 goto l0_%=; \ + r10 = 0; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From f96841bbf4a1ee4ed0336ba192a01278fdea6383 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Mon, 28 Jul 2025 11:51:45 +0200 Subject: selftests/bpf: Test invariants on JSLT crossing sign The improvement of the u64/s64 range refinement fixed the invariant violation that was happening on this test for BPF_JSLT when crossing the sign boundary. After this patch, we have one test remaining with a known invariant violation. It's the same test as fixed here but for 32 bits ranges. Acked-by: Eduard Zingerman Signed-off-by: Paul Chaignon Link: https://lore.kernel.org/r/ad046fb0016428f1a33c3b81617aabf31b51183f.1753695655.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/verifier_bounds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index 41f4389e08c7..34b3f259b7a4 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -1066,7 +1066,7 @@ l0_%=: r0 = 0; \ SEC("xdp") __description("bound check with JMP_JSLT for crossing 64-bit signed boundary") __success __retval(0) -__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */ +__flag(BPF_F_TEST_REG_INVARIANTS) __naked void crossing_64_bit_signed_boundary_2(void) { asm volatile (" \ -- cgit v1.2.3 From 5dbb19b16ac498b0b7f3a8a85f9d25d6d8af397d Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Mon, 28 Jul 2025 11:52:00 +0200 Subject: bpf: Add third round of bounds deduction Commit d7f008738171 ("bpf: try harder to deduce register bounds from different numeric domains") added a second call to __reg_deduce_bounds in reg_bounds_sync because a single call wasn't enough to converge to a fixed point in terms of register bounds. With patch "bpf: Improve bounds when s64 crosses sign boundary" from this series, Eduard noticed that calling __reg_deduce_bounds twice isn't enough anymore to converge. The first selftest added in "selftests/bpf: Test cross-sign 64bits range refinement" highlights the need for a third call to __reg_deduce_bounds. After instruction 7, reg_bounds_sync performs the following bounds deduction: reg_bounds_sync entry: scalar(smin=-655,smax=0xeffffeee,smin32=-783,smax32=-146) __update_reg_bounds: scalar(smin=-655,smax=0xeffffeee,smin32=-783,smax32=-146) __reg_deduce_bounds: __reg32_deduce_bounds: scalar(smin=-655,smax=0xeffffeee,smin32=-783,smax32=-146,umin32=0xfffffcf1,umax32=0xffffff6e) __reg64_deduce_bounds: scalar(smin=-655,smax=0xeffffeee,smin32=-783,smax32=-146,umin32=0xfffffcf1,umax32=0xffffff6e) __reg_deduce_mixed_bounds: scalar(smin=-655,smax=0xeffffeee,umin=umin32=0xfffffcf1,umax=0xffffffffffffff6e,smin32=-783,smax32=-146,umax32=0xffffff6e) __reg_deduce_bounds: __reg32_deduce_bounds: scalar(smin=-655,smax=0xeffffeee,umin=umin32=0xfffffcf1,umax=0xffffffffffffff6e,smin32=-783,smax32=-146,umax32=0xffffff6e) __reg64_deduce_bounds: scalar(smin=-655,smax=smax32=-146,umin=0xfffffffffffffd71,umax=0xffffffffffffff6e,smin32=-783,umin32=0xfffffcf1,umax32=0xffffff6e) __reg_deduce_mixed_bounds: scalar(smin=-655,smax=smax32=-146,umin=0xfffffffffffffd71,umax=0xffffffffffffff6e,smin32=-783,umin32=0xfffffcf1,umax32=0xffffff6e) __reg_bound_offset: scalar(smin=-655,smax=smax32=-146,umin=0xfffffffffffffd71,umax=0xffffffffffffff6e,smin32=-783,umin32=0xfffffcf1,umax32=0xffffff6e,var_off=(0xfffffffffffffc00; 0x3ff)) __update_reg_bounds: scalar(smin=-655,smax=smax32=-146,umin=0xfffffffffffffd71,umax=0xffffffffffffff6e,smin32=-783,umin32=0xfffffcf1,umax32=0xffffff6e,var_off=(0xfffffffffffffc00; 0x3ff)) In particular, notice how: 1. In the first call to __reg_deduce_bounds, __reg32_deduce_bounds learns new u32 bounds. 2. __reg64_deduce_bounds is unable to improve bounds at this point. 3. __reg_deduce_mixed_bounds derives new u64 bounds from the u32 bounds. 4. In the second call to __reg_deduce_bounds, __reg64_deduce_bounds improves the smax and umin bounds thanks to patch "bpf: Improve bounds when s64 crosses sign boundary" from this series. 5. Subsequent functions are unable to improve the ranges further (only tnums). Yet, a better smin32 bound could be learned from the smin bound. __reg32_deduce_bounds is able to improve smin32 from smin, but for that we need a third call to __reg_deduce_bounds. As discussed in [1], there may be a better way to organize the deduction rules to learn the same information with less calls to the same functions. Such an optimization requires further analysis and is orthogonal to the present patchset. Link: https://lore.kernel.org/bpf/aIKtSK9LjQXB8FLY@mail.gmail.com/ [1] Acked-by: Eduard Zingerman Co-developed-by: Eduard Zingerman Signed-off-by: Eduard Zingerman Signed-off-by: Paul Chaignon Link: https://lore.kernel.org/r/79619d3b42e5525e0e174ed534b75879a5ba15de.1753695655.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 1 + tools/testing/selftests/bpf/progs/verifier_bounds.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 251e06dc07eb..72e3f2b03349 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2672,6 +2672,7 @@ static void reg_bounds_sync(struct bpf_reg_state *reg) /* We might have learned something about the sign bit. */ __reg_deduce_bounds(reg); __reg_deduce_bounds(reg); + __reg_deduce_bounds(reg); /* We might have learned some bits from the bounds. */ __reg_bound_offset(reg); /* Intersecting with the old var_off might have improved our bounds diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index 34b3f259b7a4..87a2c60d86e6 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -1573,7 +1573,7 @@ l0_%=: r0 = 0; \ SEC("socket") __description("bounds deduction cross sign boundary, negative overlap") __success __log_level(2) __flag(BPF_F_TEST_REG_INVARIANTS) -__msg("7: (1f) r0 -= r6 {{.*}} R0=scalar(smin=-655,smax=smax32=-146,umin=0xfffffffffffffd71,umax=0xffffffffffffff6e,smin32=-783,umin32=0xfffffcf1,umax32=0xffffff6e,var_off=(0xfffffffffffffc00; 0x3ff))") +__msg("7: (1f) r0 -= r6 {{.*}} R0=scalar(smin=smin32=-655,smax=smax32=-146,umin=0xfffffffffffffd71,umax=0xffffffffffffff6e,umin32=0xfffffd71,umax32=0xffffff6e,var_off=(0xfffffffffffffc00; 0x3ff))") __retval(0) __naked void bounds_deduct_negative_overlap(void) { -- cgit v1.2.3 From a5a6b29a700fda1dd766cc42dde2cbba9b19f470 Mon Sep 17 00:00:00 2001 From: KaFai Wan Date: Thu, 24 Jul 2025 23:14:51 +0800 Subject: bpf: Show precise rejected function when attaching fexit/fmod_ret to __noreturn functions With this change, we know the precise rejected function name when attaching fexit/fmod_ret to __noreturn functions from log. $ ./fexit libbpf: prog 'fexit': BPF program load failed: -EINVAL libbpf: prog 'fexit': -- BEGIN PROG LOAD LOG -- Attaching fexit/fmod_ret to __noreturn function 'do_exit' is rejected. Suggested-by: Leon Hwang Signed-off-by: KaFai Wan Acked-by: Yafang Shao Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20250724151454.499040-2-kafai.wan@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 3 ++- tools/testing/selftests/bpf/progs/fexit_noreturns.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e50d3d43be67..4bf7392fd2c3 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -23985,7 +23985,8 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) } else if ((prog->expected_attach_type == BPF_TRACE_FEXIT || prog->expected_attach_type == BPF_MODIFY_RETURN) && btf_id_set_contains(&noreturn_deny, btf_id)) { - verbose(env, "Attaching fexit/fmod_ret to __noreturn functions is rejected.\n"); + verbose(env, "Attaching fexit/fmod_ret to __noreturn function '%s' is rejected.\n", + tgt_info.tgt_name); return -EINVAL; } diff --git a/tools/testing/selftests/bpf/progs/fexit_noreturns.c b/tools/testing/selftests/bpf/progs/fexit_noreturns.c index 54654539f550..b1c33d958ae2 100644 --- a/tools/testing/selftests/bpf/progs/fexit_noreturns.c +++ b/tools/testing/selftests/bpf/progs/fexit_noreturns.c @@ -8,7 +8,7 @@ char _license[] SEC("license") = "GPL"; SEC("fexit/do_exit") -__failure __msg("Attaching fexit/fmod_ret to __noreturn functions is rejected.") +__failure __msg("Attaching fexit/fmod_ret to __noreturn function 'do_exit' is rejected.") int BPF_PROG(noreturns) { return 0; -- cgit v1.2.3 From a32f6f17a74d0e897b48bd4c697cc9782a38bf4f Mon Sep 17 00:00:00 2001 From: KaFai Wan Date: Thu, 24 Jul 2025 23:14:53 +0800 Subject: selftests/bpf: Add selftest for attaching tracing programs to functions in deny list The result: $ tools/testing/selftests/bpf/test_progs -t tracing_failure/tracing_deny #468/3 tracing_failure/tracing_deny:OK #468 tracing_failure:OK Summary: 1/1 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: KaFai Wan Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20250724151454.499040-4-kafai.wan@linux.dev Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/tracing_failure.c | 33 ++++++++++++++++++++++ .../testing/selftests/bpf/progs/tracing_failure.c | 6 ++++ 2 files changed, 39 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c index a222df765bc3..39b59276884a 100644 --- a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c +++ b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c @@ -28,10 +28,43 @@ out: tracing_failure__destroy(skel); } +static void test_tracing_deny(void) +{ + struct tracing_failure *skel; + char log_buf[256]; + int btf_id, err; + + /* __rcu_read_lock depends on CONFIG_PREEMPT_RCU */ + btf_id = libbpf_find_vmlinux_btf_id("__rcu_read_lock", BPF_TRACE_FENTRY); + if (btf_id <= 0) { + test__skip(); + return; + } + + skel = tracing_failure__open(); + if (!ASSERT_OK_PTR(skel, "tracing_failure__open")) + return; + + bpf_program__set_autoload(skel->progs.tracing_deny, true); + bpf_program__set_log_buf(skel->progs.tracing_deny, log_buf, sizeof(log_buf)); + + err = tracing_failure__load(skel); + if (!ASSERT_ERR(err, "tracing_failure__load")) + goto out; + + ASSERT_HAS_SUBSTR(log_buf, + "Attaching tracing programs to function '__rcu_read_lock' is rejected.", + "log_buf"); +out: + tracing_failure__destroy(skel); +} + void test_tracing_failure(void) { if (test__start_subtest("bpf_spin_lock")) test_bpf_spin_lock(true); if (test__start_subtest("bpf_spin_unlock")) test_bpf_spin_lock(false); + if (test__start_subtest("tracing_deny")) + test_tracing_deny(); } diff --git a/tools/testing/selftests/bpf/progs/tracing_failure.c b/tools/testing/selftests/bpf/progs/tracing_failure.c index d41665d2ec8c..58d2777014e1 100644 --- a/tools/testing/selftests/bpf/progs/tracing_failure.c +++ b/tools/testing/selftests/bpf/progs/tracing_failure.c @@ -18,3 +18,9 @@ int BPF_PROG(test_spin_unlock, struct bpf_spin_lock *lock) { return 0; } + +SEC("?fentry/__rcu_read_lock") +int BPF_PROG(tracing_deny) +{ + return 0; +} -- cgit v1.2.3 From 51d3750aba798335568970f3157629c6ca5dc91a Mon Sep 17 00:00:00 2001 From: KaFai Wan Date: Thu, 24 Jul 2025 23:14:54 +0800 Subject: selftests/bpf: Migrate fexit_noreturns case into tracing_failure test suite Delete fexit_noreturns.c files and migrate the cases into tracing_failure.c files. The result: $ tools/testing/selftests/bpf/test_progs -t tracing_failure/fexit_noreturns #467/4 tracing_failure/fexit_noreturns:OK #467 tracing_failure:OK Summary: 1/1 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: KaFai Wan Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20250724151454.499040-5-kafai.wan@linux.dev Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/fexit_noreturns.c | 9 ----- .../selftests/bpf/prog_tests/tracing_failure.c | 47 +++++++++++++++------- .../testing/selftests/bpf/progs/fexit_noreturns.c | 15 ------- .../testing/selftests/bpf/progs/tracing_failure.c | 6 +++ 4 files changed, 39 insertions(+), 38 deletions(-) delete mode 100644 tools/testing/selftests/bpf/prog_tests/fexit_noreturns.c delete mode 100644 tools/testing/selftests/bpf/progs/fexit_noreturns.c (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_noreturns.c b/tools/testing/selftests/bpf/prog_tests/fexit_noreturns.c deleted file mode 100644 index 568d3aa48a78..000000000000 --- a/tools/testing/selftests/bpf/prog_tests/fexit_noreturns.c +++ /dev/null @@ -1,9 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include -#include "fexit_noreturns.skel.h" - -void test_fexit_noreturns(void) -{ - RUN_TESTS(fexit_noreturns); -} diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c index 39b59276884a..10e231965589 100644 --- a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c +++ b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c @@ -28,37 +28,54 @@ out: tracing_failure__destroy(skel); } -static void test_tracing_deny(void) +static void test_tracing_fail_prog(const char *prog_name, const char *exp_msg) { struct tracing_failure *skel; + struct bpf_program *prog; char log_buf[256]; - int btf_id, err; - - /* __rcu_read_lock depends on CONFIG_PREEMPT_RCU */ - btf_id = libbpf_find_vmlinux_btf_id("__rcu_read_lock", BPF_TRACE_FENTRY); - if (btf_id <= 0) { - test__skip(); - return; - } + int err; skel = tracing_failure__open(); if (!ASSERT_OK_PTR(skel, "tracing_failure__open")) return; - bpf_program__set_autoload(skel->progs.tracing_deny, true); - bpf_program__set_log_buf(skel->progs.tracing_deny, log_buf, sizeof(log_buf)); + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto out; + + bpf_program__set_autoload(prog, true); + bpf_program__set_log_buf(prog, log_buf, sizeof(log_buf)); err = tracing_failure__load(skel); if (!ASSERT_ERR(err, "tracing_failure__load")) goto out; - ASSERT_HAS_SUBSTR(log_buf, - "Attaching tracing programs to function '__rcu_read_lock' is rejected.", - "log_buf"); + ASSERT_HAS_SUBSTR(log_buf, exp_msg, "log_buf"); out: tracing_failure__destroy(skel); } +static void test_tracing_deny(void) +{ + int btf_id; + + /* __rcu_read_lock depends on CONFIG_PREEMPT_RCU */ + btf_id = libbpf_find_vmlinux_btf_id("__rcu_read_lock", BPF_TRACE_FENTRY); + if (btf_id <= 0) { + test__skip(); + return; + } + + test_tracing_fail_prog("tracing_deny", + "Attaching tracing programs to function '__rcu_read_lock' is rejected."); +} + +static void test_fexit_noreturns(void) +{ + test_tracing_fail_prog("fexit_noreturns", + "Attaching fexit/fmod_ret to __noreturn function 'do_exit' is rejected."); +} + void test_tracing_failure(void) { if (test__start_subtest("bpf_spin_lock")) @@ -67,4 +84,6 @@ void test_tracing_failure(void) test_bpf_spin_lock(false); if (test__start_subtest("tracing_deny")) test_tracing_deny(); + if (test__start_subtest("fexit_noreturns")) + test_fexit_noreturns(); } diff --git a/tools/testing/selftests/bpf/progs/fexit_noreturns.c b/tools/testing/selftests/bpf/progs/fexit_noreturns.c deleted file mode 100644 index b1c33d958ae2..000000000000 --- a/tools/testing/selftests/bpf/progs/fexit_noreturns.c +++ /dev/null @@ -1,15 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include -#include -#include -#include "bpf_misc.h" - -char _license[] SEC("license") = "GPL"; - -SEC("fexit/do_exit") -__failure __msg("Attaching fexit/fmod_ret to __noreturn function 'do_exit' is rejected.") -int BPF_PROG(noreturns) -{ - return 0; -} diff --git a/tools/testing/selftests/bpf/progs/tracing_failure.c b/tools/testing/selftests/bpf/progs/tracing_failure.c index 58d2777014e1..65e485c4468c 100644 --- a/tools/testing/selftests/bpf/progs/tracing_failure.c +++ b/tools/testing/selftests/bpf/progs/tracing_failure.c @@ -24,3 +24,9 @@ int BPF_PROG(tracing_deny) { return 0; } + +SEC("?fexit/do_exit") +int BPF_PROG(fexit_noreturns) +{ + return 0; +} -- cgit v1.2.3 From 6260da046819b7bda828bacae148fc8856fdebd7 Mon Sep 17 00:00:00 2001 From: WangYuli Date: Thu, 31 Jul 2025 18:02:22 +0800 Subject: selftests: ALSA: fix memory leak in utimer test Free the malloc'd buffer in TEST_F(timer_f, utimer) to prevent memory leak. Fixes: 1026392d10af ("selftests: ALSA: Cover userspace-driven timers with test") Reported-by: Jun Zhan Signed-off-by: WangYuli Link: https://patch.msgid.link/DE4D931FCF54F3DB+20250731100222.65748-1-wangyuli@uniontech.com Signed-off-by: Takashi Iwai --- tools/testing/selftests/alsa/utimer-test.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/alsa/utimer-test.c b/tools/testing/selftests/alsa/utimer-test.c index 32ee3ce57721..37964f311a33 100644 --- a/tools/testing/selftests/alsa/utimer-test.c +++ b/tools/testing/selftests/alsa/utimer-test.c @@ -135,6 +135,7 @@ TEST_F(timer_f, utimer) { pthread_join(ticking_thread, NULL); ASSERT_EQ(total_ticks, TICKS_COUNT); pclose(rfp); + free(buf); } TEST(wrong_timers_test) { -- cgit v1.2.3 From f8fded7536a9350ce849f21eee124d66056aa54c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 31 Jul 2025 14:09:14 +0300 Subject: selftests: net: Fix flaky neighbor garbage collection test The purpose of the "Periodic garbage collection" test case is to make sure that "extern_valid" neighbors are not flushed during periodic garbage collection, unlike regular neighbor entries. The test case is currently doing the following: 1. Changing the base reachable time to 10 seconds so that periodic garbage collection will run every 5 seconds. 2. Changing the garbage collection stale time to 5 seconds so that neighbors that have not been used in the last 5 seconds will be considered for removal. 3. Waiting for the base reachable time change to take effect. 4. Adding an "extern_valid" neighbor, a non-"extern_valid" neighbor and a bunch of other neighbors so that the threshold ("thresh1") will be crossed and stale neighbors will be flushed during garbage collection. 5. Waiting for 10 seconds to give garbage collection a chance to run. 6. Checking that the "extern_valid" neighbor was not flushed and that the non-"extern_valid" neighbor was flushed. The test sometimes fails in the netdev CI because the non-"extern_valid" neighbor was not flushed. I am unable to reproduce this locally, but my theory that since we do not know exactly when the periodic garbage collection runs, it is possible for it to run at a time when the non-"extern_valid" neighbor is still not considered stale. Fix by moving the addition of the two neighbors before step 3 and by reducing the garbage collection stale time to 1 second, to ensure that both neighbors are considered stale when garbage collection runs. Fixes: 171f2ee31a42 ("selftests: net: Add a selftest for externally validated neighbor entries") Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/netdev/20250728093504.4ebbd73c@kernel.org/ Signed-off-by: Ido Schimmel Link: https://patch.msgid.link/20250731110914.506890-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/test_neigh.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/test_neigh.sh b/tools/testing/selftests/net/test_neigh.sh index 388056472b5b..7c594bf6ead0 100755 --- a/tools/testing/selftests/net/test_neigh.sh +++ b/tools/testing/selftests/net/test_neigh.sh @@ -289,11 +289,11 @@ extern_valid_common() orig_base_reachable=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["base_reachable"]') run_cmd "ip ntable change name $tbl_name thresh1 10 base_reachable 10000" orig_gc_stale=$(ip -n "$ns1" -j ntable show name "$tbl_name" dev veth0 | jq '.[]["gc_stale"]') - run_cmd "ip -n $ns1 ntable change name $tbl_name dev veth0 gc_stale 5000" - # Wait orig_base_reachable/2 for the new interval to take effect. - run_cmd "sleep $(((orig_base_reachable / 1000) / 2 + 2))" + run_cmd "ip -n $ns1 ntable change name $tbl_name dev veth0 gc_stale 1000" run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" run_cmd "ip -n $ns1 neigh add ${subnet}3 lladdr $mac nud stale dev veth0" + # Wait orig_base_reachable/2 for the new interval to take effect. + run_cmd "sleep $(((orig_base_reachable / 1000) / 2 + 2))" for i in {1..20}; do run_cmd "ip -n $ns1 neigh add ${subnet}$((i + 4)) nud none dev veth0" done -- cgit v1.2.3 From 7cbd49795d4ca86fba5830084e94fece3b343b79 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 30 Jul 2025 11:53:13 +0000 Subject: selftests: avoid using ifconfig ifconfig is deprecated and not always present, use ip command instead. Fixes: e0f3b3e5c77a ("selftests: Add test cases for vlan_filter modification during runtime") Signed-off-by: Eric Dumazet Cc: Dong Chenchen Reviewed-by: Hangbin Liu Link: https://patch.msgid.link/20250730115313.3356036-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/vlan_hw_filter.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/vlan_hw_filter.sh b/tools/testing/selftests/net/vlan_hw_filter.sh index 0fb56baf28e4..e195d5cab6f7 100755 --- a/tools/testing/selftests/net/vlan_hw_filter.sh +++ b/tools/testing/selftests/net/vlan_hw_filter.sh @@ -55,10 +55,10 @@ test_vlan0_del_crash_01() { ip netns exec ${NETNS} ip link add bond0 type bond mode 0 ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off - ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ip link set dev bond0 up ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on - ip netns exec ${NETNS} ifconfig bond0 down - ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ip link set dev bond0 down + ip netns exec ${NETNS} ip link set dev bond0 up ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function" cleanup } @@ -68,11 +68,11 @@ test_vlan0_del_crash_02() { setup ip netns exec ${NETNS} ip link add bond0 type bond mode 0 ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off - ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ip link set dev bond0 up ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q - ip netns exec ${NETNS} ifconfig bond0 down - ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ip link set dev bond0 down + ip netns exec ${NETNS} ip link set dev bond0 up ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function" cleanup } @@ -84,9 +84,9 @@ test_vlan0_del_crash_03() { ip netns exec ${NETNS} ip link add bond0 type bond mode 0 ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off - ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ip link set dev bond0 up ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on - ip netns exec ${NETNS} ifconfig bond0 down + ip netns exec ${NETNS} ip link set dev bond0 down ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function" cleanup } -- cgit v1.2.3 From d8d2d9d12f141302aaec3ff9a3a8cbed4ac0546c Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Fri, 1 Aug 2025 11:49:44 +0200 Subject: selftests/bpf: Test for unaligned flow_dissector ctx access This patch adds tests for two context fields where unaligned accesses were not properly rejected. Note the new macro is similar to the existing narrow_load macro, but we need a different description and access offset. Combining the two macros into one is probably doable but I don't think it would help readability. vmlinux.h is included in place of bpf.h so we have the definition of struct bpf_nf_ctx. Signed-off-by: Paul Chaignon Tested-by: Eduard Zingerman Acked-by: Yonghong Song Link: https://lore.kernel.org/r/bf014046ddcf41677fb8b98d150c14027e9fddba.1754039605.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/verifier_ctx.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/verifier_ctx.c b/tools/testing/selftests/bpf/progs/verifier_ctx.c index 0450840c92d9..424463094760 100644 --- a/tools/testing/selftests/bpf/progs/verifier_ctx.c +++ b/tools/testing/selftests/bpf/progs/verifier_ctx.c @@ -1,10 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 /* Converted from tools/testing/selftests/bpf/verifier/ctx.c */ -#include +#include "vmlinux.h" #include #include "bpf_misc.h" +#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) + SEC("tc") __description("context stores via BPF_ATOMIC") __failure __msg("BPF_ATOMIC stores into R1 ctx is not allowed") @@ -243,4 +245,23 @@ narrow_load("sockops", bpf_sock_ops, skb_data); narrow_load("sockops", bpf_sock_ops, skb_data_end); narrow_load("sockops", bpf_sock_ops, skb_hwtstamp); +#define unaligned_access(type, ctx, field) \ + SEC(type) \ + __description("unaligned access on field " #field " of " #ctx) \ + __failure __msg("invalid bpf_context access") \ + __naked void unaligned_ctx_access_##ctx##field(void) \ + { \ + asm volatile (" \ + r1 = *(u%[size] *)(r1 + %[off]); \ + r0 = 0; \ + exit;" \ + : \ + : __imm_const(size, sizeof_field(struct ctx, field) * 8), \ + __imm_const(off, offsetof(struct ctx, field) + 1) \ + : __clobber_all); \ + } + +unaligned_access("flow_dissector", __sk_buff, data); +unaligned_access("netfilter", bpf_nf_ctx, skb); + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From b753522bed0b7e388a643f58d91bd81d8849ba43 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 27 Jul 2025 11:37:33 +0300 Subject: kho: add test for kexec handover Testing kexec handover requires a kernel driver that will generate some data and preserve it with KHO on the first boot and then restore that data and verify it was preserved properly after kexec. To facilitate such test, along with the kernel driver responsible for data generation, preservation and restoration add a script that runs a kernel in a VM with a minimal /init. The /init enables KHO, loads a kernel image for kexec and runs kexec reboot. After the boot of the kexeced kernel, the driver verifies that the data was properly preserved. [rppt@kernel.org: fix section mismatch] Link: https://lkml.kernel.org/r/aIiRC8fXiOXKbPM_@kernel.org Link: https://lkml.kernel.org/r/20250727083733.2590139-1-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Cc: Alexander Graf Cc: Changyuan Lyu Cc: Pasha Tatashin Cc: Pratyush Yadav Cc: Shuah Khan Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + lib/Kconfig.debug | 21 +++ lib/Makefile | 1 + lib/test_kho.c | 305 +++++++++++++++++++++++++++++++++ tools/testing/selftests/kho/arm64.conf | 9 + tools/testing/selftests/kho/init.c | 100 +++++++++++ tools/testing/selftests/kho/vmtest.sh | 183 ++++++++++++++++++++ tools/testing/selftests/kho/x86.conf | 7 + 8 files changed, 627 insertions(+) create mode 100644 lib/test_kho.c create mode 100644 tools/testing/selftests/kho/arm64.conf create mode 100644 tools/testing/selftests/kho/init.c create mode 100755 tools/testing/selftests/kho/vmtest.sh create mode 100644 tools/testing/selftests/kho/x86.conf (limited to 'tools/testing/selftests') diff --git a/MAINTAINERS b/MAINTAINERS index 87897a285bc1..d16d76f24c6e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13353,6 +13353,7 @@ F: Documentation/admin-guide/mm/kho.rst F: Documentation/core-api/kho/* F: include/linux/kexec_handover.h F: kernel/kexec_handover.c +F: tools/testing/selftests/kho/ KEYS-ENCRYPTED M: Mimi Zohar diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ebe33181b6e6..4f82d38e3c45 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -3225,6 +3225,27 @@ config TEST_OBJPOOL If unsure, say N. +config TEST_KEXEC_HANDOVER + bool "Test for Kexec HandOver" + default n + depends on KEXEC_HANDOVER + help + This option enables test for Kexec HandOver (KHO). + The test consists of two parts: saving kernel data before kexec and + restoring the data after kexec and verifying that it was properly + handed over. This test module creates and saves data on the boot of + the first kernel and restores and verifies the data on the boot of + kexec'ed kernel. + + For detailed documentation about KHO, see Documentation/core-api/kho. + + To run the test run: + + tools/testing/selftests/kho/vmtest.sh -h + + If unsure, say N. + + config INT_POW_KUNIT_TEST tristate "Integer exponentiation (int_pow) test" if !KUNIT_ALL_TESTS depends on KUNIT diff --git a/lib/Makefile b/lib/Makefile index 88d6228089a8..dadf0028b319 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -102,6 +102,7 @@ obj-$(CONFIG_TEST_HMM) += test_hmm.o obj-$(CONFIG_TEST_FREE_PAGES) += test_free_pages.o obj-$(CONFIG_TEST_REF_TRACKER) += test_ref_tracker.o obj-$(CONFIG_TEST_OBJPOOL) += test_objpool.o +obj-$(CONFIG_TEST_KEXEC_HANDOVER) += test_kho.o obj-$(CONFIG_TEST_FPU) += test_fpu.o test_fpu-y := test_fpu_glue.o test_fpu_impl.o diff --git a/lib/test_kho.c b/lib/test_kho.c new file mode 100644 index 000000000000..c2eb899c3b45 --- /dev/null +++ b/lib/test_kho.c @@ -0,0 +1,305 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Test module for KHO + * Copyright (c) 2025 Microsoft Corporation. + * + * Authors: + * Saurabh Sengar + * Mike Rapoport + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define KHO_TEST_MAGIC 0x4b484f21 /* KHO! */ +#define KHO_TEST_FDT "kho_test" +#define KHO_TEST_COMPAT "kho-test-v1" + +static long max_mem = (PAGE_SIZE << MAX_PAGE_ORDER) * 2; +module_param(max_mem, long, 0644); + +struct kho_test_state { + unsigned int nr_folios; + struct folio **folios; + struct folio *fdt; + __wsum csum; +}; + +static struct kho_test_state kho_test_state; + +static int kho_test_notifier(struct notifier_block *self, unsigned long cmd, + void *v) +{ + struct kho_test_state *state = &kho_test_state; + struct kho_serialization *ser = v; + int err = 0; + + switch (cmd) { + case KEXEC_KHO_ABORT: + return NOTIFY_DONE; + case KEXEC_KHO_FINALIZE: + /* Handled below */ + break; + default: + return NOTIFY_BAD; + } + + err |= kho_preserve_folio(state->fdt); + err |= kho_add_subtree(ser, KHO_TEST_FDT, folio_address(state->fdt)); + + return err ? NOTIFY_BAD : NOTIFY_DONE; +} + +static struct notifier_block kho_test_nb = { + .notifier_call = kho_test_notifier, +}; + +static int kho_test_save_data(struct kho_test_state *state, void *fdt) +{ + phys_addr_t *folios_info __free(kvfree) = NULL; + int err = 0; + + folios_info = kvmalloc_array(state->nr_folios, sizeof(*folios_info), + GFP_KERNEL); + if (!folios_info) + return -ENOMEM; + + for (int i = 0; i < state->nr_folios; i++) { + struct folio *folio = state->folios[i]; + unsigned int order = folio_order(folio); + + folios_info[i] = virt_to_phys(folio_address(folio)) | order; + + err = kho_preserve_folio(folio); + if (err) + return err; + } + + err |= fdt_begin_node(fdt, "data"); + err |= fdt_property(fdt, "nr_folios", &state->nr_folios, + sizeof(state->nr_folios)); + err |= fdt_property(fdt, "folios_info", folios_info, + state->nr_folios * sizeof(*folios_info)); + err |= fdt_property(fdt, "csum", &state->csum, sizeof(state->csum)); + err |= fdt_end_node(fdt); + + return err; +} + +static int kho_test_prepare_fdt(struct kho_test_state *state) +{ + const char compatible[] = KHO_TEST_COMPAT; + unsigned int magic = KHO_TEST_MAGIC; + ssize_t fdt_size; + int err = 0; + void *fdt; + + fdt_size = state->nr_folios * sizeof(phys_addr_t) + PAGE_SIZE; + state->fdt = folio_alloc(GFP_KERNEL, get_order(fdt_size)); + if (!state->fdt) + return -ENOMEM; + + fdt = folio_address(state->fdt); + + err |= fdt_create(fdt, fdt_size); + err |= fdt_finish_reservemap(fdt); + + err |= fdt_begin_node(fdt, ""); + err |= fdt_property(fdt, "compatible", compatible, sizeof(compatible)); + err |= fdt_property(fdt, "magic", &magic, sizeof(magic)); + err |= kho_test_save_data(state, fdt); + err |= fdt_end_node(fdt); + + err |= fdt_finish(fdt); + + if (err) + folio_put(state->fdt); + + return err; +} + +static int kho_test_generate_data(struct kho_test_state *state) +{ + size_t alloc_size = 0; + __wsum csum = 0; + + while (alloc_size < max_mem) { + int order = get_random_u32() % NR_PAGE_ORDERS; + struct folio *folio; + unsigned int size; + void *addr; + + /* cap allocation so that we won't exceed max_mem */ + if (alloc_size + (PAGE_SIZE << order) > max_mem) { + order = get_order(max_mem - alloc_size); + if (order) + order--; + } + size = PAGE_SIZE << order; + + folio = folio_alloc(GFP_KERNEL | __GFP_NORETRY, order); + if (!folio) + goto err_free_folios; + + state->folios[state->nr_folios++] = folio; + addr = folio_address(folio); + get_random_bytes(addr, size); + csum = csum_partial(addr, size, csum); + alloc_size += size; + } + + state->csum = csum; + return 0; + +err_free_folios: + for (int i = 0; i < state->nr_folios; i++) + folio_put(state->folios[i]); + return -ENOMEM; +} + +static int kho_test_save(void) +{ + struct kho_test_state *state = &kho_test_state; + struct folio **folios __free(kvfree) = NULL; + unsigned long max_nr; + int err; + + max_mem = PAGE_ALIGN(max_mem); + max_nr = max_mem >> PAGE_SHIFT; + + folios = kvmalloc_array(max_nr, sizeof(*state->folios), GFP_KERNEL); + if (!folios) + return -ENOMEM; + state->folios = folios; + + err = kho_test_generate_data(state); + if (err) + return err; + + err = kho_test_prepare_fdt(state); + if (err) + return err; + + return register_kho_notifier(&kho_test_nb); +} + +static int kho_test_restore_data(const void *fdt, int node) +{ + const unsigned int *nr_folios; + const phys_addr_t *folios_info; + const __wsum *old_csum; + __wsum csum = 0; + int len; + + node = fdt_path_offset(fdt, "/data"); + + nr_folios = fdt_getprop(fdt, node, "nr_folios", &len); + if (!nr_folios || len != sizeof(*nr_folios)) + return -EINVAL; + + old_csum = fdt_getprop(fdt, node, "csum", &len); + if (!old_csum || len != sizeof(*old_csum)) + return -EINVAL; + + folios_info = fdt_getprop(fdt, node, "folios_info", &len); + if (!folios_info || len != sizeof(*folios_info) * *nr_folios) + return -EINVAL; + + for (int i = 0; i < *nr_folios; i++) { + unsigned int order = folios_info[i] & ~PAGE_MASK; + phys_addr_t phys = folios_info[i] & PAGE_MASK; + unsigned int size = PAGE_SIZE << order; + struct folio *folio; + + folio = kho_restore_folio(phys); + if (!folio) + break; + + if (folio_order(folio) != order) + break; + + csum = csum_partial(folio_address(folio), size, csum); + folio_put(folio); + } + + if (csum != *old_csum) + return -EINVAL; + + return 0; +} + +static int kho_test_restore(phys_addr_t fdt_phys) +{ + void *fdt = phys_to_virt(fdt_phys); + const unsigned int *magic; + int node, len, err; + + node = fdt_path_offset(fdt, "/"); + if (node < 0) + return -EINVAL; + + if (fdt_node_check_compatible(fdt, node, KHO_TEST_COMPAT)) + return -EINVAL; + + magic = fdt_getprop(fdt, node, "magic", &len); + if (!magic || len != sizeof(*magic)) + return -EINVAL; + + if (*magic != KHO_TEST_MAGIC) + return -EINVAL; + + err = kho_test_restore_data(fdt, node); + if (err) + return err; + + pr_info("KHO restore succeeded\n"); + return 0; +} + +static int __init kho_test_init(void) +{ + phys_addr_t fdt_phys; + int err; + + err = kho_retrieve_subtree(KHO_TEST_FDT, &fdt_phys); + if (!err) + return kho_test_restore(fdt_phys); + + if (err != -ENOENT) { + pr_warn("failed to retrieve %s FDT: %d\n", KHO_TEST_FDT, err); + return err; + } + + return kho_test_save(); +} +module_init(kho_test_init); + +static void kho_test_cleanup(void) +{ + for (int i = 0; i < kho_test_state.nr_folios; i++) + folio_put(kho_test_state.folios[i]); + + kvfree(kho_test_state.folios); +} + +static void __exit kho_test_exit(void) +{ + unregister_kho_notifier(&kho_test_nb); + kho_test_cleanup(); +} +module_exit(kho_test_exit); + +MODULE_AUTHOR("Mike Rapoport "); +MODULE_DESCRIPTION("KHO test module"); +MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/kho/arm64.conf b/tools/testing/selftests/kho/arm64.conf new file mode 100644 index 000000000000..ee696807cd35 --- /dev/null +++ b/tools/testing/selftests/kho/arm64.conf @@ -0,0 +1,9 @@ +QEMU_CMD="qemu-system-aarch64 -M virt -cpu max" +QEMU_KCONFIG=" +CONFIG_SERIAL_AMBA_PL010=y +CONFIG_SERIAL_AMBA_PL010_CONSOLE=y +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +" +KERNEL_IMAGE="Image" +KERNEL_CMDLINE="console=ttyAMA0" diff --git a/tools/testing/selftests/kho/init.c b/tools/testing/selftests/kho/init.c new file mode 100644 index 000000000000..8034e24c6bf6 --- /dev/null +++ b/tools/testing/selftests/kho/init.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef NOLIBC +#include +#include +#include +#include +#include +#include +#include +#endif + +/* from arch/x86/include/asm/setup.h */ +#define COMMAND_LINE_SIZE 2048 + +/* from include/linux/kexex.h */ +#define KEXEC_FILE_NO_INITRAMFS 0x00000004 + +#define KHO_FINILIZE "/debugfs/kho/out/finalize" +#define KERNEL_IMAGE "/kernel" + +static int mount_filesystems(void) +{ + if (mount("debugfs", "/debugfs", "debugfs", 0, NULL) < 0) + return -1; + + return mount("proc", "/proc", "proc", 0, NULL); +} + +static int kho_enable(void) +{ + const char enable[] = "1"; + int fd; + + fd = open(KHO_FINILIZE, O_RDWR); + if (fd < 0) + return -1; + + if (write(fd, enable, sizeof(enable)) != sizeof(enable)) + return 1; + + close(fd); + return 0; +} + +static long kexec_file_load(int kernel_fd, int initrd_fd, + unsigned long cmdline_len, const char *cmdline, + unsigned long flags) +{ + return syscall(__NR_kexec_file_load, kernel_fd, initrd_fd, cmdline_len, + cmdline, flags); +} + +static int kexec_load(void) +{ + char cmdline[COMMAND_LINE_SIZE]; + ssize_t len; + int fd, err; + + fd = open("/proc/cmdline", O_RDONLY); + if (fd < 0) + return -1; + + len = read(fd, cmdline, sizeof(cmdline)); + close(fd); + if (len < 0) + return -1; + + /* replace \n with \0 */ + cmdline[len - 1] = 0; + fd = open(KERNEL_IMAGE, O_RDONLY); + if (fd < 0) + return -1; + + err = kexec_file_load(fd, -1, len, cmdline, KEXEC_FILE_NO_INITRAMFS); + close(fd); + + return err ? : 0; +} + +int main(int argc, char *argv[]) +{ + if (mount_filesystems()) + goto err_reboot; + + if (kho_enable()) + goto err_reboot; + + if (kexec_load()) + goto err_reboot; + + if (reboot(RB_KEXEC)) + goto err_reboot; + + return 0; + +err_reboot: + reboot(RB_AUTOBOOT); + return -1; +} diff --git a/tools/testing/selftests/kho/vmtest.sh b/tools/testing/selftests/kho/vmtest.sh new file mode 100755 index 000000000000..ec70a17bd476 --- /dev/null +++ b/tools/testing/selftests/kho/vmtest.sh @@ -0,0 +1,183 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +set -ue + +CROSS_COMPILE="${CROSS_COMPILE:-""}" + +test_dir=$(realpath "$(dirname "$0")") +kernel_dir=$(realpath "$test_dir/../../../..") + +tmp_dir=$(mktemp -d /tmp/kho-test.XXXXXXXX) +headers_dir="$tmp_dir/usr" +initrd_dir="$tmp_dir/initrd" +initrd="$tmp_dir/initrd.cpio" + +source "$test_dir/../kselftest/ktap_helpers.sh" + +function usage() { + cat < "$kho_config" </dev/null || skip "$opt is missing" + done < "$kho_config" + + $make_cmd "$kimage" + $make_cmd headers_install INSTALL_HDR_PATH="$headers_dir" +} + +function mkinitrd() { + local kernel=$1 + + mkdir -p "$initrd_dir"/{dev,debugfs,proc} + sudo mknod "$initrd_dir/dev/console" c 5 1 + + "$CROSS_COMPILE"gcc -s -static -Os -nostdinc -I"$headers_dir/include" \ + -fno-asynchronous-unwind-tables -fno-ident -nostdlib \ + -include "$test_dir/../../../include/nolibc/nolibc.h" \ + -o "$initrd_dir/init" "$test_dir/init.c" \ + + cp "$kernel" "$initrd_dir/kernel" + + pushd "$initrd_dir" &>/dev/null + find . | cpio -H newc --create > "$initrd" 2>/dev/null + popd &>/dev/null +} + +function run_qemu() { + local qemu_cmd=$1 + local cmdline=$2 + local kernel=$3 + local serial="$tmp_dir/qemu.serial" + + cmdline="$cmdline kho=on panic=-1" + + $qemu_cmd -m 1G -smp 2 -no-reboot -nographic -nodefaults \ + -accel kvm -accel hvf -accel tcg \ + -serial file:"$serial" \ + -append "$cmdline" \ + -kernel "$kernel" \ + -initrd "$initrd" + + grep "KHO restore succeeded" "$serial" &> /dev/null || fail "KHO failed" +} + +function target_to_arch() { + local target=$1 + + case $target in + aarch64) echo "arm64" ;; + x86_64) echo "x86" ;; + *) skip "architecture $target is not supported" + esac +} + +function main() { + local build_dir="$kernel_dir/.kho" + local jobs=$(($(nproc) * 2)) + local target="$(uname -m)" + + # skip the test if any of the preparation steps fails + set -o errtrace + trap skip ERR + + while getopts 'hd:j:t:' opt; do + case $opt in + d) + build_dir="$OPTARG" + ;; + j) + jobs="$OPTARG" + ;; + t) + target="$OPTARG" + ;; + h) + usage + exit 0 + ;; + *) + echo Unknown argument "$opt" + usage + exit 1 + ;; + esac + done + + ktap_print_header + ktap_set_plan 1 + + if [[ "$target" != "$(uname -m)" ]] && [[ -z "$CROSS_COMPILE" ]]; then + skip "Cross-platform testing needs to specify CROSS_COMPILE" + fi + + mkdir -p "$build_dir" + local arch=$(target_to_arch "$target") + source "$test_dir/$arch.conf" + + # build the kernel and create initrd + # initrd includes the kernel image that will be kexec'ed + local make_cmd="make ARCH=$arch CROSS_COMPILE=$CROSS_COMPILE -j$jobs" + build_kernel "$build_dir" "$make_cmd" "$QEMU_KCONFIG" "$KERNEL_IMAGE" + + local kernel="$build_dir/arch/$arch/boot/$KERNEL_IMAGE" + mkinitrd "$kernel" + + run_qemu "$QEMU_CMD" "$KERNEL_CMDLINE" "$kernel" + + ktap_test_pass "KHO succeeded" +} + +main "$@" diff --git a/tools/testing/selftests/kho/x86.conf b/tools/testing/selftests/kho/x86.conf new file mode 100644 index 000000000000..b419e610ca22 --- /dev/null +++ b/tools/testing/selftests/kho/x86.conf @@ -0,0 +1,7 @@ +QEMU_CMD=qemu-system-x86_64 +QEMU_KCONFIG=" +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +" +KERNEL_IMAGE="bzImage" +KERNEL_CMDLINE="console=ttyS0" -- cgit v1.2.3 From b50e37889f9f343b772d9162d00105bb7a26c2f5 Mon Sep 17 00:00:00 2001 From: wang lian Date: Mon, 21 Jul 2025 19:46:14 +0800 Subject: selftests/mm: add process_madvise() tests Add tests for process_madvise(), focusing on verifying behavior under various conditions including valid usage and error cases. [lianux.mm@gmail.com: v7] Link: https://lkml.kernel.org/r/20250729113109.12272-1-lianux.mm@gmail.com Link: https://lkml.kernel.org/r/20250729113109.12272-1-lianux.mm@gmail.com Link: https://lkml.kernel.org/r/20250721114614.40996-1-lianux.mm@gmail.com Signed-off-by: wang lian Suggested-by: Lorenzo Stoakes Suggested-by: David Hildenbrand Suggested-by: Zi Yan Suggested-by: Mark Brown Acked-by: SeongJae Park Reviewed-by: Zi Yan Tested-by: Zi Yan Cc: Christian Brauner Cc: Jann Horn Cc: Kairui Song Cc: Liam Howlett Cc: Shuah Khan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/.gitignore | 1 + tools/testing/selftests/mm/Makefile | 1 + tools/testing/selftests/mm/process_madv.c | 344 ++++++++++++++++++++++++++++++ tools/testing/selftests/mm/run_vmtests.sh | 5 + 4 files changed, 351 insertions(+) create mode 100644 tools/testing/selftests/mm/process_madv.c (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index f2dafa0b700b..e7b23a8a05fe 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -21,6 +21,7 @@ on-fault-limit transhuge-stress pagemap_ioctl pfnmap +process_madv *.tmp* protection_keys protection_keys_32 diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index ae6f994d3add..d13b3cef2a2b 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -85,6 +85,7 @@ TEST_GEN_FILES += mseal_test TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += pagemap_ioctl TEST_GEN_FILES += pfnmap +TEST_GEN_FILES += process_madv TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += uffd-stress diff --git a/tools/testing/selftests/mm/process_madv.c b/tools/testing/selftests/mm/process_madv.c new file mode 100644 index 000000000000..471cae8427f1 --- /dev/null +++ b/tools/testing/selftests/mm/process_madv.c @@ -0,0 +1,344 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#define _GNU_SOURCE +#include "../kselftest_harness.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "vm_util.h" + +#include "../pidfd/pidfd.h" + +FIXTURE(process_madvise) +{ + unsigned long page_size; + pid_t child_pid; + int remote_pidfd; + int pidfd; +}; + +FIXTURE_SETUP(process_madvise) +{ + self->page_size = (unsigned long)sysconf(_SC_PAGESIZE); + self->pidfd = PIDFD_SELF; + self->remote_pidfd = -1; + self->child_pid = -1; +}; + +FIXTURE_TEARDOWN_PARENT(process_madvise) +{ + /* This teardown is guaranteed to run, even if tests SKIP or ASSERT */ + if (self->child_pid > 0) { + kill(self->child_pid, SIGKILL); + waitpid(self->child_pid, NULL, 0); + } + + if (self->remote_pidfd >= 0) + close(self->remote_pidfd); +} + +static ssize_t sys_process_madvise(int pidfd, const struct iovec *iovec, + size_t vlen, int advice, unsigned int flags) +{ + return syscall(__NR_process_madvise, pidfd, iovec, vlen, advice, flags); +} + +/* + * This test uses PIDFD_SELF to target the current process. The main + * goal is to verify the basic behavior of process_madvise() with + * a vector of non-contiguous memory ranges, not its cross-process + * capabilities. + */ +TEST_F(process_madvise, basic) +{ + const unsigned long pagesize = self->page_size; + const int madvise_pages = 4; + struct iovec vec[madvise_pages]; + int pidfd = self->pidfd; + ssize_t ret; + char *map; + + /* + * Create a single large mapping. We will pick pages from this + * mapping to advise on. This ensures we test non-contiguous iovecs. + */ + map = mmap(NULL, pagesize * 10, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (map == MAP_FAILED) + SKIP(return, "mmap failed, not enough memory.\n"); + + /* Fill the entire region with a known pattern. */ + memset(map, 'A', pagesize * 10); + + /* + * Setup the iovec to point to 4 non-contiguous pages + * within the mapping. + */ + vec[0].iov_base = &map[0 * pagesize]; + vec[0].iov_len = pagesize; + vec[1].iov_base = &map[3 * pagesize]; + vec[1].iov_len = pagesize; + vec[2].iov_base = &map[5 * pagesize]; + vec[2].iov_len = pagesize; + vec[3].iov_base = &map[8 * pagesize]; + vec[3].iov_len = pagesize; + + ret = sys_process_madvise(pidfd, vec, madvise_pages, MADV_DONTNEED, 0); + if (ret == -1 && errno == EPERM) + SKIP(return, + "process_madvise() unsupported or permission denied, try running as root.\n"); + else if (errno == EINVAL) + SKIP(return, + "process_madvise() unsupported or parameter invalid, please check arguments.\n"); + + /* The call should succeed and report the total bytes processed. */ + ASSERT_EQ(ret, madvise_pages * pagesize); + + /* Check that advised pages are now zero. */ + for (int i = 0; i < madvise_pages; i++) { + char *advised_page = (char *)vec[i].iov_base; + + /* Content must be 0, not 'A'. */ + ASSERT_EQ(*advised_page, '\0'); + } + + /* Check that an un-advised page in between is still 'A'. */ + char *unadvised_page = &map[1 * pagesize]; + + for (int i = 0; i < pagesize; i++) + ASSERT_EQ(unadvised_page[i], 'A'); + + /* Cleanup. */ + ASSERT_EQ(munmap(map, pagesize * 10), 0); +} + +/* + * This test deterministically validates process_madvise() with MADV_COLLAPSE + * on a remote process, other advices are difficult to verify reliably. + * + * The test verifies that a memory region in a child process, + * focus on process_madv remote result, only check addresses and lengths. + * The correctness of the MADV_COLLAPSE can be found in the relevant test examples in khugepaged. + */ +TEST_F(process_madvise, remote_collapse) +{ + const unsigned long pagesize = self->page_size; + long huge_page_size; + int pipe_info[2]; + ssize_t ret; + struct iovec vec; + + struct child_info { + pid_t pid; + void *map_addr; + } info; + + huge_page_size = read_pmd_pagesize(); + if (huge_page_size <= 0) + SKIP(return, "Could not determine a valid huge page size.\n"); + + ASSERT_EQ(pipe(pipe_info), 0); + + self->child_pid = fork(); + ASSERT_NE(self->child_pid, -1); + + if (self->child_pid == 0) { + char *map; + size_t map_size = 2 * huge_page_size; + + close(pipe_info[0]); + + map = mmap(NULL, map_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(map, MAP_FAILED); + + /* Fault in as small pages */ + for (size_t i = 0; i < map_size; i += pagesize) + map[i] = 'A'; + + /* Send info and pause */ + info.pid = getpid(); + info.map_addr = map; + ret = write(pipe_info[1], &info, sizeof(info)); + ASSERT_EQ(ret, sizeof(info)); + close(pipe_info[1]); + + pause(); + exit(0); + } + + close(pipe_info[1]); + + /* Receive child info */ + ret = read(pipe_info[0], &info, sizeof(info)); + if (ret <= 0) { + waitpid(self->child_pid, NULL, 0); + SKIP(return, "Failed to read child info from pipe.\n"); + } + ASSERT_EQ(ret, sizeof(info)); + close(pipe_info[0]); + self->child_pid = info.pid; + + self->remote_pidfd = syscall(__NR_pidfd_open, self->child_pid, 0); + ASSERT_GE(self->remote_pidfd, 0); + + vec.iov_base = info.map_addr; + vec.iov_len = huge_page_size; + + ret = sys_process_madvise(self->remote_pidfd, &vec, 1, MADV_COLLAPSE, + 0); + if (ret == -1) { + if (errno == EINVAL) + SKIP(return, "PROCESS_MADV_ADVISE is not supported.\n"); + else if (errno == EPERM) + SKIP(return, + "No process_madvise() permissions, try running as root.\n"); + return; + } + + ASSERT_EQ(ret, huge_page_size); +} + +/* + * Test process_madvise() with a pidfd for a process that has already + * exited to ensure correct error handling. + */ +TEST_F(process_madvise, exited_process_pidfd) +{ + const unsigned long pagesize = self->page_size; + struct iovec vec; + char *map; + ssize_t ret; + + map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, + 0); + if (map == MAP_FAILED) + SKIP(return, "mmap failed, not enough memory.\n"); + + vec.iov_base = map; + vec.iov_len = pagesize; + + /* + * Using a pidfd for a process that has already exited should fail + * with ESRCH. + */ + self->child_pid = fork(); + ASSERT_NE(self->child_pid, -1); + + if (self->child_pid == 0) + exit(0); + + self->remote_pidfd = syscall(__NR_pidfd_open, self->child_pid, 0); + ASSERT_GE(self->remote_pidfd, 0); + + /* Wait for the child to ensure it has terminated. */ + waitpid(self->child_pid, NULL, 0); + + ret = sys_process_madvise(self->remote_pidfd, &vec, 1, MADV_DONTNEED, + 0); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ESRCH); +} + +/* + * Test process_madvise() with bad pidfds to ensure correct error + * handling. + */ +TEST_F(process_madvise, bad_pidfd) +{ + const unsigned long pagesize = self->page_size; + struct iovec vec; + char *map; + ssize_t ret; + + map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, + 0); + if (map == MAP_FAILED) + SKIP(return, "mmap failed, not enough memory.\n"); + + vec.iov_base = map; + vec.iov_len = pagesize; + + /* Using an invalid fd number (-1) should fail with EBADF. */ + ret = sys_process_madvise(-1, &vec, 1, MADV_DONTNEED, 0); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EBADF); + + /* + * Using a valid fd that is not a pidfd (e.g. stdin) should fail + * with EBADF. + */ + ret = sys_process_madvise(STDIN_FILENO, &vec, 1, MADV_DONTNEED, 0); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EBADF); +} + +/* + * Test that process_madvise() rejects vlen > UIO_MAXIOV. + * The kernel should return -EINVAL when the number of iovecs exceeds 1024. + */ +TEST_F(process_madvise, invalid_vlen) +{ + const unsigned long pagesize = self->page_size; + int pidfd = self->pidfd; + struct iovec vec; + char *map; + ssize_t ret; + + map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, + 0); + if (map == MAP_FAILED) + SKIP(return, "mmap failed, not enough memory.\n"); + + vec.iov_base = map; + vec.iov_len = pagesize; + + ret = sys_process_madvise(pidfd, &vec, 1025, MADV_DONTNEED, 0); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EINVAL); + + /* Cleanup. */ + ASSERT_EQ(munmap(map, pagesize), 0); +} + +/* + * Test process_madvise() with an invalid flag value. Currently, only a flag + * value of 0 is supported. This test is reserved for the future, e.g., if + * synchronous flags are added. + */ +TEST_F(process_madvise, flag) +{ + const unsigned long pagesize = self->page_size; + unsigned int invalid_flag; + int pidfd = self->pidfd; + struct iovec vec; + char *map; + ssize_t ret; + + map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, + 0); + if (map == MAP_FAILED) + SKIP(return, "mmap failed, not enough memory.\n"); + + vec.iov_base = map; + vec.iov_len = pagesize; + + invalid_flag = 0x80000000; + + ret = sys_process_madvise(pidfd, &vec, 1, MADV_DONTNEED, invalid_flag); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EINVAL); + + /* Cleanup. */ + ASSERT_EQ(munmap(map, pagesize), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index a38c984103ce..471e539d82b8 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -65,6 +65,8 @@ separated by spaces: test pagemap_scan IOCTL - pfnmap tests for VM_PFNMAP handling +- process_madv + test for process_madv - cow test copy-on-write semantics - thp @@ -425,6 +427,9 @@ CATEGORY="madv_guard" run_test ./guard-regions # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests CATEGORY="madv_populate" run_test ./madv_populate +# PROCESS_MADV test +CATEGORY="process_madv" run_test ./process_madv + CATEGORY="vma_merge" run_test ./merge if [ -x ./memfd_secret ] -- cgit v1.2.3 From d6a511dea45ce3e851326b6bdc63f827ebb3e765 Mon Sep 17 00:00:00 2001 From: Suresh K C Date: Wed, 9 Jul 2025 23:16:57 +0530 Subject: selftests: cachestat: add tests for mmap, refactor and enhance mmap test for cachestat validation Add a cohesive test case that verifies cachestat behavior with memory-mapped files using mmap(). Also refactor the test logic to reduce redundancy, improve error reporting, and clarify failure messages for both shmem and mmap file types. [akpm@linux-foundation.org: coding-style cleanups] Link: https://lkml.kernel.org/r/20250709174657.6916-1-suresh.k.chandrappa@gmail.com Signed-off-by: Suresh K C Reviewed-by: Joshua Hahn Tested-by: Nhat Pham Acked-by: Nhat Pham Cc: Johannes Weiner Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/cachestat/test_cachestat.c | 62 +++++++++++++++++++--- 1 file changed, 54 insertions(+), 8 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/cachestat/test_cachestat.c b/tools/testing/selftests/cachestat/test_cachestat.c index 632ab44737ec..c952640f163b 100644 --- a/tools/testing/selftests/cachestat/test_cachestat.c +++ b/tools/testing/selftests/cachestat/test_cachestat.c @@ -33,6 +33,11 @@ void print_cachestat(struct cachestat *cs) cs->nr_evicted, cs->nr_recently_evicted); } +enum file_type { + FILE_MMAP, + FILE_SHMEM +}; + bool write_exactly(int fd, size_t filesize) { int random_fd = open("/dev/urandom", O_RDONLY); @@ -201,8 +206,20 @@ out1: out: return ret; } +const char *file_type_str(enum file_type type) +{ + switch (type) { + case FILE_SHMEM: + return "shmem"; + case FILE_MMAP: + return "mmap"; + default: + return "unknown"; + } +} -bool test_cachestat_shmem(void) + +bool run_cachestat_test(enum file_type type) { size_t PS = sysconf(_SC_PAGESIZE); size_t filesize = PS * 512 * 2; /* 2 2MB huge pages */ @@ -212,27 +229,50 @@ bool test_cachestat_shmem(void) char *filename = "tmpshmcstat"; struct cachestat cs; bool ret = true; + int fd; unsigned long num_pages = compute_len / PS; - int fd = shm_open(filename, O_CREAT | O_RDWR, 0600); + if (type == FILE_SHMEM) + fd = shm_open(filename, O_CREAT | O_RDWR, 0600); + else + fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, 0666); if (fd < 0) { - ksft_print_msg("Unable to create shmem file.\n"); + ksft_print_msg("Unable to create %s file.\n", + file_type_str(type)); ret = false; goto out; } if (ftruncate(fd, filesize)) { - ksft_print_msg("Unable to truncate shmem file.\n"); + ksft_print_msg("Unable to truncate %s file.\n",file_type_str(type)); ret = false; goto close_fd; } + switch (type) { + case FILE_SHMEM: + if (!write_exactly(fd, filesize)) { + ksft_print_msg("Unable to write to file.\n"); + ret = false; + goto close_fd; + } + break; + case FILE_MMAP: + char *map = mmap(NULL, filesize, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); - if (!write_exactly(fd, filesize)) { - ksft_print_msg("Unable to write to shmem file.\n"); + if (map == MAP_FAILED) { + ksft_print_msg("mmap failed.\n"); + ret = false; + goto close_fd; + } + for (int i = 0; i < filesize; i++) + map[i] = 'A'; + break; + default: + ksft_print_msg("Unsupported file type.\n"); ret = false; goto close_fd; } - syscall_ret = syscall(__NR_cachestat, fd, &cs_range, &cs, 0); if (syscall_ret) { @@ -308,12 +348,18 @@ int main(void) break; } - if (test_cachestat_shmem()) + if (run_cachestat_test(FILE_SHMEM)) ksft_test_result_pass("cachestat works with a shmem file\n"); else { ksft_test_result_fail("cachestat fails with a shmem file\n"); ret = 1; } + if (run_cachestat_test(FILE_MMAP)) + ksft_test_result_pass("cachestat works with a mmap file\n"); + else { + ksft_test_result_fail("cachestat fails with a mmap file\n"); + ret = 1; + } return ret; } -- cgit v1.2.3 From 5ef7fdf52c0f2b792802aac3438e67e5ebe7e63d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 1 Aug 2025 11:16:38 -0700 Subject: selftests: net: packetdrill: xfail all problems on slow machines We keep seeing flakes on packetdrill on debug kernels, while non-debug kernels are stable, not a single flake in 200 runs. Time to give up, debug kernels appear to suffer from 10msec latency spikes and any timing-sensitive test is bound to flake. Reviewed-by: Willem de Bruijn Acked-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250801181638.2483531-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/packetdrill/ksft_runner.sh | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh index c5b01e1bd4c7..a7e790af38ff 100755 --- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh +++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh @@ -35,24 +35,7 @@ failfunc=ktap_test_fail if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then optargs+=('--tolerance_usecs=14000') - - # xfail tests that are known flaky with dbg config, not fixable. - # still run them for coverage (and expect 100% pass without dbg). - declare -ar xfail_list=( - "tcp_blocking_blocking-connect.pkt" - "tcp_blocking_blocking-read.pkt" - "tcp_eor_no-coalesce-retrans.pkt" - "tcp_fast_recovery_prr-ss.*.pkt" - "tcp_sack_sack-route-refresh-ip-tos.pkt" - "tcp_slow_start_slow-start-after-win-update.pkt" - "tcp_timestamping.*.pkt" - "tcp_user_timeout_user-timeout-probe.pkt" - "tcp_zerocopy_cl.*.pkt" - "tcp_zerocopy_epoll_.*.pkt" - "tcp_tcp_info_tcp-info-.*-limited.pkt" - ) - readonly xfail_regex="^($(printf '%s|' "${xfail_list[@]}"))$" - [[ "$script" =~ ${xfail_regex} ]] && failfunc=ktap_test_xfail + failfunc=ktap_test_xfail fi ktap_print_header -- cgit v1.2.3 From 084d2ac4030c5919e85bba1f4af26e33491469cb Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Sat, 2 Aug 2025 22:55:35 +0200 Subject: selftests/perf_events: Add a mmap() correctness test Exercise various mmap(), munmap() and mremap() invocations, which might cause a perf buffer mapping to be split or truncated. To avoid hard coding the perf event and having dependencies on architectures and configuration options, scan through event types in sysfs and try to open them. On success, try to mmap() and if that succeeds try to mmap() the AUX buffer. In case that no AUX buffer supporting event is found, only test the base buffer mapping. If no mappable event is found or permissions are not sufficient, skip the tests. Reserve a PROT_NONE region for both rb and aux tests to allow testing the case where mremap unmaps beyond the end of a mapped VMA to prevent it from unmapping unrelated mappings. Signed-off-by: Lorenzo Stoakes Co-developed-by: Thomas Gleixner Signed-off-by: Thomas Gleixner Reviewed-by: Lorenzo Stoakes --- tools/testing/selftests/perf_events/.gitignore | 1 + tools/testing/selftests/perf_events/Makefile | 2 +- tools/testing/selftests/perf_events/mmap.c | 236 +++++++++++++++++++++++++ 3 files changed, 238 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/perf_events/mmap.c (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/perf_events/.gitignore b/tools/testing/selftests/perf_events/.gitignore index ee93dc4969b8..4931b3b6bbd3 100644 --- a/tools/testing/selftests/perf_events/.gitignore +++ b/tools/testing/selftests/perf_events/.gitignore @@ -2,3 +2,4 @@ sigtrap_threads remove_on_exec watermark_signal +mmap diff --git a/tools/testing/selftests/perf_events/Makefile b/tools/testing/selftests/perf_events/Makefile index 70e3ff211278..2e5d85770dfe 100644 --- a/tools/testing/selftests/perf_events/Makefile +++ b/tools/testing/selftests/perf_events/Makefile @@ -2,5 +2,5 @@ CFLAGS += -Wl,-no-as-needed -Wall $(KHDR_INCLUDES) LDFLAGS += -lpthread -TEST_GEN_PROGS := sigtrap_threads remove_on_exec watermark_signal +TEST_GEN_PROGS := sigtrap_threads remove_on_exec watermark_signal mmap include ../lib.mk diff --git a/tools/testing/selftests/perf_events/mmap.c b/tools/testing/selftests/perf_events/mmap.c new file mode 100644 index 000000000000..ea0427aac1f9 --- /dev/null +++ b/tools/testing/selftests/perf_events/mmap.c @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define _GNU_SOURCE + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include "../kselftest_harness.h" + +#define RB_SIZE 0x3000 +#define AUX_SIZE 0x10000 +#define AUX_OFFS 0x4000 + +#define HOLE_SIZE 0x1000 + +/* Reserve space for rb, aux with space for shrink-beyond-vma testing. */ +#define REGION_SIZE (2 * RB_SIZE + 2 * AUX_SIZE) +#define REGION_AUX_OFFS (2 * RB_SIZE) + +#define MAP_BASE 1 +#define MAP_AUX 2 + +#define EVENT_SRC_DIR "/sys/bus/event_source/devices" + +FIXTURE(perf_mmap) +{ + int fd; + void *ptr; + void *region; +}; + +FIXTURE_VARIANT(perf_mmap) +{ + bool aux; + unsigned long ptr_size; +}; + +FIXTURE_VARIANT_ADD(perf_mmap, rb) +{ + .aux = false, + .ptr_size = RB_SIZE, +}; + +FIXTURE_VARIANT_ADD(perf_mmap, aux) +{ + .aux = true, + .ptr_size = AUX_SIZE, +}; + +static bool read_event_type(struct dirent *dent, __u32 *type) +{ + char typefn[512]; + FILE *fp; + int res; + + snprintf(typefn, sizeof(typefn), "%s/%s/type", EVENT_SRC_DIR, dent->d_name); + fp = fopen(typefn, "r"); + if (!fp) + return false; + + res = fscanf(fp, "%u", type); + fclose(fp); + return res > 0; +} + +FIXTURE_SETUP(perf_mmap) +{ + struct perf_event_attr attr = { + .size = sizeof(attr), + .disabled = 1, + .exclude_kernel = 1, + .exclude_hv = 1, + }; + struct perf_event_attr attr_ok = {}; + unsigned int eacces = 0, map = 0; + struct perf_event_mmap_page *rb; + struct dirent *dent; + void *aux, *region; + DIR *dir; + + self->ptr = NULL; + + dir = opendir(EVENT_SRC_DIR); + if (!dir) + SKIP(return, "perf not available."); + + region = mmap(NULL, REGION_SIZE, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(region, MAP_FAILED); + self->region = region; + + // Try to find a suitable event on this system + while ((dent = readdir(dir))) { + int fd; + + if (!read_event_type(dent, &attr.type)) + continue; + + fd = syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0); + if (fd < 0) { + if (errno == EACCES) + eacces++; + continue; + } + + // Check whether the event supports mmap() + rb = mmap(region, RB_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, 0); + if (rb == MAP_FAILED) { + close(fd); + continue; + } + + if (!map) { + // Save the event in case that no AUX capable event is found + attr_ok = attr; + map = MAP_BASE; + } + + if (!variant->aux) + continue; + + rb->aux_offset = AUX_OFFS; + rb->aux_size = AUX_SIZE; + + // Check whether it supports a AUX buffer + aux = mmap(region + REGION_AUX_OFFS, AUX_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, fd, AUX_OFFS); + if (aux == MAP_FAILED) { + munmap(rb, RB_SIZE); + close(fd); + continue; + } + + attr_ok = attr; + map = MAP_AUX; + munmap(aux, AUX_SIZE); + munmap(rb, RB_SIZE); + close(fd); + break; + } + closedir(dir); + + if (!map) { + if (!eacces) + SKIP(return, "No mappable perf event found."); + else + SKIP(return, "No permissions for perf_event_open()"); + } + + self->fd = syscall(SYS_perf_event_open, &attr_ok, 0, -1, -1, 0); + ASSERT_NE(self->fd, -1); + + rb = mmap(region, RB_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, self->fd, 0); + ASSERT_NE(rb, MAP_FAILED); + + if (!variant->aux) { + self->ptr = rb; + return; + } + + if (map != MAP_AUX) + SKIP(return, "No AUX event found."); + + rb->aux_offset = AUX_OFFS; + rb->aux_size = AUX_SIZE; + aux = mmap(region + REGION_AUX_OFFS, AUX_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, self->fd, AUX_OFFS); + ASSERT_NE(aux, MAP_FAILED); + self->ptr = aux; +} + +FIXTURE_TEARDOWN(perf_mmap) +{ + ASSERT_EQ(munmap(self->region, REGION_SIZE), 0); + if (self->fd != -1) + ASSERT_EQ(close(self->fd), 0); +} + +TEST_F(perf_mmap, remap) +{ + void *tmp, *ptr = self->ptr; + unsigned long size = variant->ptr_size; + + // Test the invalid remaps + ASSERT_EQ(mremap(ptr, size, HOLE_SIZE, MREMAP_MAYMOVE), MAP_FAILED); + ASSERT_EQ(mremap(ptr + HOLE_SIZE, size, HOLE_SIZE, MREMAP_MAYMOVE), MAP_FAILED); + ASSERT_EQ(mremap(ptr + size - HOLE_SIZE, HOLE_SIZE, size, MREMAP_MAYMOVE), MAP_FAILED); + // Shrink the end of the mapping such that we only unmap past end of the VMA, + // which should succeed and poke a hole into the PROT_NONE region + ASSERT_NE(mremap(ptr + size - HOLE_SIZE, size, HOLE_SIZE, MREMAP_MAYMOVE), MAP_FAILED); + + // Remap the whole buffer to a new address + tmp = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(tmp, MAP_FAILED); + + // Try splitting offset 1 hole size into VMA, this should fail + ASSERT_EQ(mremap(ptr + HOLE_SIZE, size - HOLE_SIZE, size - HOLE_SIZE, + MREMAP_MAYMOVE | MREMAP_FIXED, tmp), MAP_FAILED); + // Remapping the whole thing should succeed fine + ptr = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tmp); + ASSERT_EQ(ptr, tmp); + ASSERT_EQ(munmap(tmp, size), 0); +} + +TEST_F(perf_mmap, unmap) +{ + unsigned long size = variant->ptr_size; + + // Try to poke holes into the mappings + ASSERT_NE(munmap(self->ptr, HOLE_SIZE), 0); + ASSERT_NE(munmap(self->ptr + HOLE_SIZE, HOLE_SIZE), 0); + ASSERT_NE(munmap(self->ptr + size - HOLE_SIZE, HOLE_SIZE), 0); +} + +TEST_F(perf_mmap, map) +{ + unsigned long size = variant->ptr_size; + + // Try to poke holes into the mappings by mapping anonymous memory over it + ASSERT_EQ(mmap(self->ptr, HOLE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0), MAP_FAILED); + ASSERT_EQ(mmap(self->ptr + HOLE_SIZE, HOLE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0), MAP_FAILED); + ASSERT_EQ(mmap(self->ptr + size - HOLE_SIZE, HOLE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0), MAP_FAILED); +} + +TEST_HARNESS_MAIN -- cgit v1.2.3 From 8d22aea8af0d57a1daff046d65b7c18552e35e29 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 4 Aug 2025 14:43:20 +0300 Subject: selftests: netdevsim: Xfail nexthop test on slow machines A lot of test cases in the file are related to the idle and unbalanced timers of resilient nexthop groups and these tests are reported to be flaky on slow machines running debug kernels. Rather than marking a lot of individual tests with xfail_on_slow(), simply mark all the tests. Note that the test is stable on non-debug machines and that with debug kernels we are mainly interested in the output of various sanitizers in order to determine pass / fail. Before: # make -C tools/testing/selftests KSFT_MACHINE_SLOW=yes \ TARGETS=drivers/net/netdevsim TEST_PROGS=nexthop.sh \ TEST_GEN_PROGS="" run_tests [...] # TEST: Bucket migration after idle timer (with delete) [FAIL] # Group expected to still be unbalanced [...] not ok 1 selftests: drivers/net/netdevsim: nexthop.sh # exit=1 After: # make -C tools/testing/selftests KSFT_MACHINE_SLOW=yes \ TARGETS=drivers/net/netdevsim TEST_PROGS=nexthop.sh \ TEST_GEN_PROGS="" run_tests [...] # TEST: Bucket migration after idle timer (with delete) [XFAIL] # Group expected to still be unbalanced [...] ok 1 selftests: drivers/net/netdevsim: nexthop.sh Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/netdev/20250729160609.02e0f157@kernel.org/ Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Link: https://patch.msgid.link/20250804114320.193203-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/netdevsim/nexthop.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh index e8e0dc088d6a..01d0c044a5fc 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh @@ -1053,6 +1053,6 @@ trap cleanup EXIT setup_prepare -tests_run +xfail_on_slow tests_run exit $EXIT_STATUS -- cgit v1.2.3 From e6d76268813dc64cc0b74ea9c274501f2de05344 Mon Sep 17 00:00:00 2001 From: Samiullah Khawaja Date: Mon, 4 Aug 2025 16:44:57 +0000 Subject: net: Update threaded state in napi config in netif_set_threaded Commit 2677010e7793 ("Add support to set NAPI threaded for individual NAPI") added support to enable/disable threaded napi using netlink. This also extended the napi config save/restore functionality to set the napi threaded state. This breaks netdev reset for drivers that use napi threaded at device level and also use napi config save/restore on napi_disable/napi_enable. Basically on netdev with napi threaded enabled at device level, a napi_enable call will get stuck trying to stop the napi kthread. This is because the napi->config->threaded is set to disabled when threaded is enabled at device level. The issue can be reproduced on virtio-net device using qemu. To reproduce the issue run following, echo 1 > /sys/class/net/threaded ethtool -L eth0 combined 1 Update the threaded state in napi config in netif_set_threaded and add a new test that verifies this scenario. Tested on qemu with virtio-net: NETIF=eth0 ./tools/testing/selftests/drivers/net/napi_threaded.py TAP version 13 1..2 ok 1 napi_threaded.change_num_queues ok 2 napi_threaded.enable_dev_threaded_disable_napi_threaded # Totals: pass:2 fail:0 xfail:0 xpass:0 skip:0 error:0 Fixes: 2677010e7793 ("Add support to set NAPI threaded for individual NAPI") Signed-off-by: Samiullah Khawaja Link: https://patch.msgid.link/20250804164457.2494390-1-skhawaja@google.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 26 ++--- tools/testing/selftests/drivers/net/Makefile | 1 + .../testing/selftests/drivers/net/napi_threaded.py | 111 +++++++++++++++++++++ 3 files changed, 121 insertions(+), 17 deletions(-) create mode 100755 tools/testing/selftests/drivers/net/napi_threaded.py (limited to 'tools/testing/selftests') diff --git a/net/core/dev.c b/net/core/dev.c index b28ce68830b2..68dc47d7e700 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6978,6 +6978,12 @@ int napi_set_threaded(struct napi_struct *napi, if (napi->config) napi->config->threaded = threaded; + /* Setting/unsetting threaded mode on a napi might not immediately + * take effect, if the current napi instance is actively being + * polled. In this case, the switch between threaded mode and + * softirq mode will happen in the next round of napi_schedule(). + * This should not cause hiccups/stalls to the live traffic. + */ if (!threaded && napi->thread) { napi_stop_kthread(napi); } else { @@ -7011,23 +7017,9 @@ int netif_set_threaded(struct net_device *dev, WRITE_ONCE(dev->threaded, threaded); - /* Make sure kthread is created before THREADED bit - * is set. - */ - smp_mb__before_atomic(); - - /* Setting/unsetting threaded mode on a napi might not immediately - * take effect, if the current napi instance is actively being - * polled. In this case, the switch between threaded mode and - * softirq mode will happen in the next round of napi_schedule(). - * This should not cause hiccups/stalls to the live traffic. - */ - list_for_each_entry(napi, &dev->napi_list, dev_list) { - if (!threaded && napi->thread) - napi_stop_kthread(napi); - else - assign_bit(NAPI_STATE_THREADED, &napi->state, threaded); - } + /* The error should not occur as the kthreads are already created. */ + list_for_each_entry(napi, &dev->napi_list, dev_list) + WARN_ON_ONCE(napi_set_threaded(napi, threaded)); return err; } diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 3556f3563e08..984ece05f7f9 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -11,6 +11,7 @@ TEST_GEN_FILES := \ TEST_PROGS := \ napi_id.py \ + napi_threaded.py \ netcons_basic.sh \ netcons_cmdline.sh \ netcons_fragmented_msg.sh \ diff --git a/tools/testing/selftests/drivers/net/napi_threaded.py b/tools/testing/selftests/drivers/net/napi_threaded.py new file mode 100755 index 000000000000..b2698db39817 --- /dev/null +++ b/tools/testing/selftests/drivers/net/napi_threaded.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Test napi threaded states. +""" + +from lib.py import ksft_run, ksft_exit +from lib.py import ksft_eq, ksft_ne, ksft_ge +from lib.py import NetDrvEnv, NetdevFamily +from lib.py import cmd, defer, ethtool + + +def _assert_napi_threaded_enabled(nl, napi_id) -> None: + napi = nl.napi_get({'id': napi_id}) + ksft_eq(napi['threaded'], 'enabled') + ksft_ne(napi.get('pid'), None) + + +def _assert_napi_threaded_disabled(nl, napi_id) -> None: + napi = nl.napi_get({'id': napi_id}) + ksft_eq(napi['threaded'], 'disabled') + ksft_eq(napi.get('pid'), None) + + +def _set_threaded_state(cfg, threaded) -> None: + cmd(f"echo {threaded} > /sys/class/net/{cfg.ifname}/threaded") + + +def _setup_deferred_cleanup(cfg) -> None: + combined = ethtool(f"-l {cfg.ifname}", json=True)[0].get("combined", 0) + ksft_ge(combined, 2) + defer(ethtool, f"-L {cfg.ifname} combined {combined}") + + threaded = cmd(f"cat /sys/class/net/{cfg.ifname}/threaded").stdout + defer(_set_threaded_state, cfg, threaded) + + +def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None: + """ + Test that when napi threaded is enabled at device level and + then disabled at napi level for one napi, the threaded state + of all napis is preserved after a change in number of queues. + """ + + napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True) + ksft_ge(len(napis), 2) + + napi0_id = napis[0]['id'] + napi1_id = napis[1]['id'] + + _setup_deferred_cleanup(cfg) + + # set threaded + _set_threaded_state(cfg, 1) + + # check napi threaded is set for both napis + _assert_napi_threaded_enabled(nl, napi0_id) + _assert_napi_threaded_enabled(nl, napi1_id) + + # disable threaded for napi1 + nl.napi_set({'id': napi1_id, 'threaded': 'disabled'}) + + cmd(f"ethtool -L {cfg.ifname} combined 1") + cmd(f"ethtool -L {cfg.ifname} combined 2") + _assert_napi_threaded_enabled(nl, napi0_id) + _assert_napi_threaded_disabled(nl, napi1_id) + + +def change_num_queues(cfg, nl) -> None: + """ + Test that when napi threaded is enabled at device level, + the napi threaded state is preserved after a change in + number of queues. + """ + + napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True) + ksft_ge(len(napis), 2) + + napi0_id = napis[0]['id'] + napi1_id = napis[1]['id'] + + _setup_deferred_cleanup(cfg) + + # set threaded + _set_threaded_state(cfg, 1) + + # check napi threaded is set for both napis + _assert_napi_threaded_enabled(nl, napi0_id) + _assert_napi_threaded_enabled(nl, napi1_id) + + cmd(f"ethtool -L {cfg.ifname} combined 1") + cmd(f"ethtool -L {cfg.ifname} combined 2") + + # check napi threaded is set for both napis + _assert_napi_threaded_enabled(nl, napi0_id) + _assert_napi_threaded_enabled(nl, napi1_id) + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEnv(__file__, queue_count=2) as cfg: + ksft_run([change_num_queues, + enable_dev_threaded_disable_napi_threaded], + args=(cfg, NetdevFamily())) + ksft_exit() + + +if __name__ == "__main__": + main() -- cgit v1.2.3 From 85acc29f90e0183997dea27277057c9aec2769aa Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 6 Aug 2025 18:13:41 +0100 Subject: KVM: arm64: selftest: Add standalone test checking for KVM's own UUID Tinkering with UUIDs is a perilious task, and the KVM UUID gets broken at times. In order to spot this early enough, add a selftest that will shout if the expected value isn't found. Signed-off-by: Marc Zyngier Reviewed-by: Sebastian Ott Link: https://lore.kernel.org/r/20250721130558.50823-1-jackabt.amazon@gmail.com Link: https://lore.kernel.org/r/20250806171341.1521210-1-maz@kernel.org Signed-off-by: Oliver Upton --- tools/testing/selftests/kvm/Makefile.kvm | 1 + tools/testing/selftests/kvm/arm64/kvm-uuid.c | 70 ++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 tools/testing/selftests/kvm/arm64/kvm-uuid.c (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index ce817a975e50..e1eb1ba238a2 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -167,6 +167,7 @@ TEST_GEN_PROGS_arm64 += arm64/vgic_irq TEST_GEN_PROGS_arm64 += arm64/vgic_lpi_stress TEST_GEN_PROGS_arm64 += arm64/vpmu_counter_access TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3 +TEST_GEN_PROGS_arm64 += arm64/kvm-uuid TEST_GEN_PROGS_arm64 += access_tracking_perf_test TEST_GEN_PROGS_arm64 += arch_timer TEST_GEN_PROGS_arm64 += coalesced_io_test diff --git a/tools/testing/selftests/kvm/arm64/kvm-uuid.c b/tools/testing/selftests/kvm/arm64/kvm-uuid.c new file mode 100644 index 000000000000..af9581b860f1 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/kvm-uuid.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Check that nobody has tampered with KVM's UID + +#include +#include +#include +#include + +#include "processor.h" + +/* + * Do NOT redefine these constants, or try to replace them with some + * "common" version. They are hardcoded here to detect any potential + * breakage happening in the rest of the kernel. + * + * KVM UID value: 28b46fb6-2ec5-11e9-a9ca-4b564d003a74 + */ +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 0xb66fb428U +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 0xe911c52eU +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 0x564bcaa9U +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3 0x743a004dU + +static void guest_code(void) +{ + struct arm_smccc_res res = {}; + + smccc_hvc(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0, 0, 0, 0, 0, 0, 0, &res); + + __GUEST_ASSERT(res.a0 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 && + res.a1 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 && + res.a2 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 && + res.a3 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3, + "Unexpected KVM-specific UID %lx %lx %lx %lx\n", res.a0, res.a1, res.a2, res.a3); + GUEST_DONE(); +} + +int main (int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + bool guest_done = false; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + while (!guest_done) { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + break; + case UCALL_DONE: + guest_done = true; + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_PRINTF: + printf("%s", uc.buffer); + break; + default: + TEST_FAIL("Unexpected guest exit"); + } + } + + kvm_vm_free(vm); + + return 0; +} -- cgit v1.2.3 From 81e4b9cf365df4cde30157a85cc9f3d673946118 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Fri, 8 Aug 2025 03:55:06 +1000 Subject: selftests/mount_setattr: add smoke tests for open_tree_attr(2) bug There appear to be no other open_tree_attr(2) tests at the moment, but as a minimal solution just add some additional checks in the existing MOUNT_ATTR_IDMAP tests to make sure that open_tree_attr(2) cannot be used to bypass the tested restrictions that apply to mount_setattr(2). Signed-off-by: Aleksa Sarai Link: https://lore.kernel.org/20250808-open_tree_attr-bugfix-idmap-v1-2-0ec7bc05646c@cyphar.com Signed-off-by: Christian Brauner --- .../selftests/mount_setattr/mount_setattr_test.c | 77 ++++++++++++++++++---- 1 file changed, 64 insertions(+), 13 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c index b1e4618399be..a688871a98eb 100644 --- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c +++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c @@ -107,6 +107,26 @@ #endif #endif +#ifndef __NR_open_tree_attr + #if defined __alpha__ + #define __NR_open_tree_attr 577 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_open_tree_attr (467 + 4000) + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_open_tree_attr (467 + 6000) + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_open_tree_attr (467 + 5000) + #endif + #elif defined __ia64__ + #define __NR_open_tree_attr (467 + 1024) + #else + #define __NR_open_tree_attr 467 + #endif +#endif + #ifndef MOUNT_ATTR_IDMAP #define MOUNT_ATTR_IDMAP 0x00100000 #endif @@ -121,6 +141,12 @@ static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flag return syscall(__NR_mount_setattr, dfd, path, flags, attr, size); } +static inline int sys_open_tree_attr(int dfd, const char *path, unsigned int flags, + struct mount_attr *attr, size_t size) +{ + return syscall(__NR_open_tree_attr, dfd, path, flags, attr, size); +} + static ssize_t write_nointr(int fd, const void *buf, size_t count) { ssize_t ret; @@ -1222,6 +1248,12 @@ TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace) attr.userns_fd = get_userns_fd(0, 10000, 10000); ASSERT_GE(attr.userns_fd, 0); ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + /* + * Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way + * to bypass this mount_setattr() restriction. + */ + ASSERT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); ASSERT_EQ(close(open_tree_fd), 0); } @@ -1255,6 +1287,12 @@ TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace) ASSERT_GE(attr.userns_fd, 0); ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + /* + * Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way + * to bypass this mount_setattr() restriction. + */ + ASSERT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); ASSERT_EQ(close(open_tree_fd), 0); } @@ -1321,6 +1359,19 @@ TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace) ASSERT_EQ(close(open_tree_fd), 0); } +static bool expected_uid_gid(int dfd, const char *path, int flags, + uid_t expected_uid, gid_t expected_gid) +{ + int ret; + struct stat st; + + ret = fstatat(dfd, path, &st, flags); + if (ret < 0) + return false; + + return st.st_uid == expected_uid && st.st_gid == expected_gid; +} + /** * Validate that currently changing the idmapping of an idmapped mount fails. */ @@ -1331,6 +1382,8 @@ TEST_F(mount_setattr_idmapped, change_idmapping) .attr_set = MOUNT_ATTR_IDMAP, }; + ASSERT_TRUE(expected_uid_gid(-EBADF, "/mnt/D", 0, 0, 0)); + if (!mount_setattr_supported()) SKIP(return, "mount_setattr syscall not supported"); @@ -1348,27 +1401,25 @@ TEST_F(mount_setattr_idmapped, change_idmapping) AT_EMPTY_PATH, &attr, sizeof(attr)), 0); ASSERT_EQ(close(attr.userns_fd), 0); + EXPECT_FALSE(expected_uid_gid(open_tree_fd, ".", 0, 0, 0)); + EXPECT_TRUE(expected_uid_gid(open_tree_fd, ".", 0, 10000, 10000)); + /* Change idmapping on a detached mount that is already idmapped. */ attr.userns_fd = get_userns_fd(0, 20000, 10000); ASSERT_GE(attr.userns_fd, 0); ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + /* + * Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way + * to bypass this mount_setattr() restriction. + */ + EXPECT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + EXPECT_FALSE(expected_uid_gid(open_tree_fd, ".", 0, 20000, 20000)); + EXPECT_TRUE(expected_uid_gid(open_tree_fd, ".", 0, 10000, 10000)); + ASSERT_EQ(close(attr.userns_fd), 0); ASSERT_EQ(close(open_tree_fd), 0); } -static bool expected_uid_gid(int dfd, const char *path, int flags, - uid_t expected_uid, gid_t expected_gid) -{ - int ret; - struct stat st; - - ret = fstatat(dfd, path, &st, flags); - if (ret < 0) - return false; - - return st.st_uid == expected_uid && st.st_gid == expected_gid; -} - TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid) { int open_tree_fd = -EBADF; -- cgit v1.2.3 From 2319f9d0aa644eb9666c7be903078f50ecc2eb5b Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Mon, 11 Aug 2025 09:49:57 +0200 Subject: selftests/coredump: Remove the read() that fails the test Resolve a conflict between commit 6a68d28066b6 ("selftests/coredump: Fix "socket_detect_userspace_client" test failure") and commit 994dc26302ed ("selftests/coredump: fix build") The first commit adds a read() to wait for write() from another thread to finish. But the second commit removes the write(). Now that the two commits are in the same tree, the read() now gets EOF and the test fails. Remove this read() so that the test passes. Signed-off-by: Nam Cao Link: https://lore.kernel.org/20250811074957.4079616-1-namcao@linutronix.de Signed-off-by: Christian Brauner --- tools/testing/selftests/coredump/stackdump_test.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c index 5a5a7a5f7e1d..a4ac80bb1003 100644 --- a/tools/testing/selftests/coredump/stackdump_test.c +++ b/tools/testing/selftests/coredump/stackdump_test.c @@ -446,9 +446,6 @@ TEST_F(coredump, socket_detect_userspace_client) if (info.coredump_mask & PIDFD_COREDUMPED) goto out; - if (read(fd_coredump, &c, 1) < 1) - goto out; - exit_code = EXIT_SUCCESS; out: if (fd_peer_pidfd >= 0) -- cgit v1.2.3 From e69980bd16f264581c3f606bae987e54f0ba8c4a Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 25 Jul 2025 17:04:12 +0800 Subject: selftests/sched_ext: Remove duplicate sched.h header ./tools/testing/selftests/sched_ext/hotplug.c: sched.h is included more than once. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=22941 Signed-off-by: Jiapeng Chong Acked-by: Andrea Righi Signed-off-by: Tejun Heo --- tools/testing/selftests/sched_ext/hotplug.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/sched_ext/hotplug.c b/tools/testing/selftests/sched_ext/hotplug.c index 1c9ceb661c43..0cfbb111a2d0 100644 --- a/tools/testing/selftests/sched_ext/hotplug.c +++ b/tools/testing/selftests/sched_ext/hotplug.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3 From ab5ac789efa985e42bdb5b5e8a7a4ad84935d44e Mon Sep 17 00:00:00 2001 From: Sukrut Heroorkar Date: Tue, 5 Aug 2025 00:56:14 +0200 Subject: selftests/proc: fix string literal warning in proc-maps-race.c This change resolves non literal string format warning invoked for proc-maps-race.c while compiling. proc-maps-race.c:205:17: warning: format not a string literal and no format arguments [-Wformat-security] 205 | printf(text); | ^~~~~~ proc-maps-race.c:209:17: warning: format not a string literal and no format arguments [-Wformat-security] 209 | printf(text); | ^~~~~~ proc-maps-race.c: In function `print_last_lines': proc-maps-race.c:224:9: warning: format not a string literal and no format arguments [-Wformat-security] 224 | printf(start); | ^~~~~~ Add string format specifier %s for the printf calls in both print_first_lines() and print_last_lines() thus resolving the warnings. The test executes fine after this change thus causing no effect to the functional behavior of the test. Link: https://lkml.kernel.org/r/20250804225633.841777-1-hsukrut3@gmail.com Fixes: aadc099c480f ("selftests/proc: add verbose mode for /proc/pid/maps tearing tests") Signed-off-by: Sukrut Heroorkar Acked-by: Suren Baghdasaryan Cc: David Hunter Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/proc/proc-maps-race.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/proc/proc-maps-race.c b/tools/testing/selftests/proc/proc-maps-race.c index 66773685a047..94bba4553130 100644 --- a/tools/testing/selftests/proc/proc-maps-race.c +++ b/tools/testing/selftests/proc/proc-maps-race.c @@ -202,11 +202,11 @@ static void print_first_lines(char *text, int nr) int offs = end - text; text[offs] = '\0'; - printf(text); + printf("%s", text); text[offs] = '\n'; printf("\n"); } else { - printf(text); + printf("%s", text); } } @@ -221,7 +221,7 @@ static void print_last_lines(char *text, int nr) nr--; start--; } - printf(start); + printf("%s", start); } static void print_boundaries(const char *title, FIXTURE_DATA(proc_maps_race) *self) -- cgit v1.2.3 From bda053d6445717f8a4cd76f88caea2e39299fe07 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Aug 2025 17:12:03 -0700 Subject: selftests: drv-net: don't assume device has only 2 queues The test is implicitly assuming the device only has 2 queues. A real device will likely have more. The exact problem is that because NAPIs get added to the list from the head, the netlink dump reports them in reverse order. So the naive napis[0] will actually likely give us the _last_ NAPI, not the first one. Re-enable all the NAPIs instead of hard-coding 2 in the test. This way the NAPIs we operated on will always reappear, doesn't matter where they were in the registration order. Fixes: e6d76268813d ("net: Update threaded state in napi config in netif_set_threaded") Signed-off-by: Jakub Kicinski Reviewed-by: Joe Damato Link: https://patch.msgid.link/20250809001205.1147153-2-kuba@kernel.org Signed-off-by: Paolo Abeni --- tools/testing/selftests/drivers/net/napi_threaded.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/napi_threaded.py b/tools/testing/selftests/drivers/net/napi_threaded.py index b2698db39817..9699a100a87d 100755 --- a/tools/testing/selftests/drivers/net/napi_threaded.py +++ b/tools/testing/selftests/drivers/net/napi_threaded.py @@ -35,6 +35,8 @@ def _setup_deferred_cleanup(cfg) -> None: threaded = cmd(f"cat /sys/class/net/{cfg.ifname}/threaded").stdout defer(_set_threaded_state, cfg, threaded) + return combined + def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None: """ @@ -49,7 +51,7 @@ def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None: napi0_id = napis[0]['id'] napi1_id = napis[1]['id'] - _setup_deferred_cleanup(cfg) + qcnt = _setup_deferred_cleanup(cfg) # set threaded _set_threaded_state(cfg, 1) @@ -62,7 +64,7 @@ def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None: nl.napi_set({'id': napi1_id, 'threaded': 'disabled'}) cmd(f"ethtool -L {cfg.ifname} combined 1") - cmd(f"ethtool -L {cfg.ifname} combined 2") + cmd(f"ethtool -L {cfg.ifname} combined {qcnt}") _assert_napi_threaded_enabled(nl, napi0_id) _assert_napi_threaded_disabled(nl, napi1_id) @@ -80,7 +82,7 @@ def change_num_queues(cfg, nl) -> None: napi0_id = napis[0]['id'] napi1_id = napis[1]['id'] - _setup_deferred_cleanup(cfg) + qcnt = _setup_deferred_cleanup(cfg) # set threaded _set_threaded_state(cfg, 1) @@ -90,7 +92,7 @@ def change_num_queues(cfg, nl) -> None: _assert_napi_threaded_enabled(nl, napi1_id) cmd(f"ethtool -L {cfg.ifname} combined 1") - cmd(f"ethtool -L {cfg.ifname} combined 2") + cmd(f"ethtool -L {cfg.ifname} combined {qcnt}") # check napi threaded is set for both napis _assert_napi_threaded_enabled(nl, napi0_id) -- cgit v1.2.3 From d7e82594a45c5cb270940ac469846e8026c7db0f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 7 Aug 2025 16:29:07 -0700 Subject: selftests: tls: test TCP stealing data from under the TLS socket Check a race where data disappears from the TCP socket after TLS signaled that its ready to receive. ok 6 global.data_steal # RUN tls_basic.base_base ... # OK tls_basic.base_base Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250807232907.600366-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 63 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 5ded3b3a7538..d8cfcf9bb825 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -2708,6 +2708,69 @@ TEST(prequeue) { close(cfd); } +TEST(data_steal) { + struct tls_crypto_info_keys tls; + char buf[20000], buf2[20000]; + struct sockaddr_in addr; + int sfd, cfd, ret, fd; + int pid, status; + socklen_t len; + + len = sizeof(addr); + memrnd(buf, sizeof(buf)); + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls, 0); + + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + addr.sin_port = 0; + + fd = socket(AF_INET, SOCK_STREAM, 0); + sfd = socket(AF_INET, SOCK_STREAM, 0); + + ASSERT_EQ(bind(sfd, &addr, sizeof(addr)), 0); + ASSERT_EQ(listen(sfd, 10), 0); + ASSERT_EQ(getsockname(sfd, &addr, &len), 0); + ASSERT_EQ(connect(fd, &addr, sizeof(addr)), 0); + ASSERT_GE(cfd = accept(sfd, &addr, &len), 0); + close(sfd); + + ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); + if (ret) { + ASSERT_EQ(errno, ENOENT); + SKIP(return, "no TLS support"); + } + ASSERT_EQ(setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")), 0); + + /* Spawn a child and get it into the read wait path of the underlying + * TCP socket. + */ + pid = fork(); + ASSERT_GE(pid, 0); + if (!pid) { + EXPECT_EQ(recv(cfd, buf, sizeof(buf), MSG_WAITALL), + sizeof(buf)); + exit(!__test_passed(_metadata)); + } + + usleep(2000); + ASSERT_EQ(setsockopt(fd, SOL_TLS, TLS_TX, &tls, tls.len), 0); + ASSERT_EQ(setsockopt(cfd, SOL_TLS, TLS_RX, &tls, tls.len), 0); + + EXPECT_EQ(send(fd, buf, sizeof(buf), 0), sizeof(buf)); + usleep(2000); + EXPECT_EQ(recv(cfd, buf2, sizeof(buf2), MSG_DONTWAIT), -1); + /* Don't check errno, the error will be different depending + * on what random bytes TLS interpreted as the record length. + */ + + close(fd); + close(cfd); + + EXPECT_EQ(wait(&status), pid); + EXPECT_EQ(status, 0); +} + static void __attribute__((constructor)) fips_check(void) { int res; FILE *f; -- cgit v1.2.3 From 774a2ae6617b30a4dcc7ebaf178ef05da05b2a47 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 12 Aug 2025 18:40:30 +0200 Subject: selftests: net/forwarding: test purge of active DWRR classes Extend sch_ets.sh to add a reproducer for problematic list deletions when active DWRR class are purged by ets_qdisc_change() [1] [2]. [1] https://lore.kernel.org/netdev/e08c7f4a6882f260011909a868311c6e9b54f3e4.1639153474.git.dcaratti@redhat.com/ [2] https://lore.kernel.org/netdev/f3b9bacc73145f265c19ab80785933da5b7cbdec.1754581577.git.dcaratti@redhat.com/ Suggested-by: Victor Nogueira Signed-off-by: Davide Caratti Acked-by: Victor Nogueira Link: https://patch.msgid.link/489497cb781af7389011ca1591fb702a7391f5e7.1755016081.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/sch_ets.sh | 1 + tools/testing/selftests/net/forwarding/sch_ets_tests.sh | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/forwarding/sch_ets.sh b/tools/testing/selftests/net/forwarding/sch_ets.sh index 1f6f53e284b5..6269d5e23487 100755 --- a/tools/testing/selftests/net/forwarding/sch_ets.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets.sh @@ -11,6 +11,7 @@ ALL_TESTS=" ets_test_strict ets_test_mixed ets_test_dwrr + ets_test_plug classifier_mode ets_test_strict ets_test_mixed diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh index 08240d3e3c87..79d837a2868a 100644 --- a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh @@ -224,3 +224,11 @@ ets_test_dwrr() ets_set_dwrr_two_bands xfail_on_slow ets_dwrr_test_01 } + +ets_test_plug() +{ + ets_change_qdisc $put 2 "3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3" "1514 1514" + tc qdisc add dev $put handle 20: parent 10:4 plug + start_traffic_pktsize 100 $h1.10 192.0.2.1 192.0.2.2 00:c1:a0:c1:a0:00 "-c 1" + ets_qdisc_setup $put 2 +} -- cgit v1.2.3 From 8c06cbdcbaea34d7b96d76df4d6669275c1d291a Mon Sep 17 00:00:00 2001 From: William Liu Date: Tue, 12 Aug 2025 23:58:26 +0000 Subject: selftests/tc-testing: Check backlog stats in gso_skb case Add tests to ensure proper backlog accounting in hhf, codel, pie, fq, fq_pie, and fq_codel qdiscs. We check for the bug pattern originally found in fq, fq_pie, and fq_codel, which was an underflow in the tbf parent backlog stats upon child qdisc removal. Signed-off-by: William Liu Reviewed-by: Savino Dicanosa Link: https://patch.msgid.link/20250812235808.45281-1-will@willsroot.io Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/qdiscs.json | 198 +++++++++++++++++++++ 1 file changed, 198 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 23a61e5b99d0..998e5a2f4579 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -185,6 +185,204 @@ "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] }, + { + "id": "34c0", + "name": "Test TBF with HHF Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "hhf" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 hhf limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 hhf limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "fd68", + "name": "Test TBF with CODEL Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "codel" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 codel limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 codel limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "514e", + "name": "Test TBF with PIE Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "pie" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 pie limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 pie limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "6c97", + "name": "Test TBF with FQ Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "fq" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 fq limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 fq limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "5d0b", + "name": "Test TBF with FQ_CODEL Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "fq_codel" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 fq_codel limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 fq_codel limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "21c3", + "name": "Test TBF with FQ_PIE Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "fq_pie" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 fq_pie limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 fq_pie limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, { "id": "a4bb", "name": "Test FQ_CODEL with HTB parent - force packet drop with empty queue", -- cgit v1.2.3 From 5e0b2177bdba99c2487480e9864825f742b684ee Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 14 Aug 2025 15:06:41 +0200 Subject: selftest: forwarding: router: Add a test case for IPv4 link-local source IP Add a test case which checks that packets with an IPv4 link-local source IP are forwarded and not dropped. Signed-off-by: Ido Schimmel Signed-off-by: Petr Machata Link: https://patch.msgid.link/3c2e0b17d99530f57bef5ddff9af284fa0c9b667.1755174341.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/router.sh | 29 ++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh index b98ea9449b8b..dfb6646cb97b 100755 --- a/tools/testing/selftests/net/forwarding/router.sh +++ b/tools/testing/selftests/net/forwarding/router.sh @@ -18,6 +18,8 @@ # | 2001:db8:1::1/64 2001:db8:2::1/64 | # | | # +-----------------------------------------------------------------+ +# +#shellcheck disable=SC2034 # SC doesn't see our uses of global variables ALL_TESTS=" ping_ipv4 @@ -27,6 +29,7 @@ ALL_TESTS=" ipv4_sip_equal_dip ipv6_sip_equal_dip ipv4_dip_link_local + ipv4_sip_link_local " NUM_NETIFS=4 @@ -330,6 +333,32 @@ ipv4_dip_link_local() tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower } +ipv4_sip_link_local() +{ + local sip=169.254.1.1 + + RET=0 + + # Disable rpfilter to prevent packets to be dropped because of it. + sysctl_set net.ipv4.conf.all.rp_filter 0 + sysctl_set net.ipv4.conf."$rp1".rp_filter 0 + + tc filter add dev "$rp2" egress protocol ip pref 1 handle 101 \ + flower src_ip "$sip" action pass + + $MZ "$h1" -t udp "sp=54321,dp=12345" -c 5 -d 1msec -b "$rp1mac" \ + -A "$sip" -B 198.51.100.2 -q + + tc_check_packets "dev $rp2 egress" 101 5 + check_err $? "Packets were dropped" + + log_test "IPv4 source IP is link-local" + + tc filter del dev "$rp2" egress protocol ip pref 1 handle 101 flower + sysctl_restore net.ipv4.conf."$rp1".rp_filter + sysctl_restore net.ipv4.conf.all.rp_filter +} + trap cleanup EXIT setup_prepare -- cgit v1.2.3 From 715c7a36d59f54162a26fac1d1ed8dc087a24cf1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 14 Aug 2025 12:43:23 -0700 Subject: selftests: tls: make the new data_steal test less flaky The CI has hit a couple of cases of: RUN global.data_steal ... tls.c:2762:data_steal:Expected recv(cfd, buf2, sizeof(buf2), MSG_DONTWAIT) (20000) == -1 (-1) data_steal: Test terminated by timeout FAIL global.data_steal Looks like the 2msec sleep is not long enough. Make the sleep longer, and then instead of second sleep wait for the thieving process to exit. That way we can be sure it called recv() before us. While at it also avoid trying to steal more than a record, this seems to be causing issues in manual testing as well. Fixes: d7e82594a45c ("selftests: tls: test TCP stealing data from under the TLS socket") Link: https://patch.msgid.link/20250814194323.2014650-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index d8cfcf9bb825..2b8387a83bc7 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -2748,17 +2748,18 @@ TEST(data_steal) { pid = fork(); ASSERT_GE(pid, 0); if (!pid) { - EXPECT_EQ(recv(cfd, buf, sizeof(buf), MSG_WAITALL), - sizeof(buf)); + EXPECT_EQ(recv(cfd, buf, sizeof(buf) / 2, MSG_WAITALL), + sizeof(buf) / 2); exit(!__test_passed(_metadata)); } - usleep(2000); + usleep(10000); ASSERT_EQ(setsockopt(fd, SOL_TLS, TLS_TX, &tls, tls.len), 0); ASSERT_EQ(setsockopt(cfd, SOL_TLS, TLS_RX, &tls, tls.len), 0); EXPECT_EQ(send(fd, buf, sizeof(buf), 0), sizeof(buf)); - usleep(2000); + EXPECT_EQ(wait(&status), pid); + EXPECT_EQ(status, 0); EXPECT_EQ(recv(cfd, buf2, sizeof(buf2), MSG_DONTWAIT), -1); /* Don't check errno, the error will be different depending * on what random bytes TLS interpreted as the record length. @@ -2766,9 +2767,6 @@ TEST(data_steal) { close(fd); close(cfd); - - EXPECT_EQ(wait(&status), pid); - EXPECT_EQ(status, 0); } static void __attribute__((constructor)) fips_check(void) { -- cgit v1.2.3 From 0227af355b50c526bf83ca52d67aef5d102e9b07 Mon Sep 17 00:00:00 2001 From: Akhilesh Patil Date: Sun, 17 Aug 2025 15:06:05 +0530 Subject: selftests: ublk: Use ARRAY_SIZE() macro to improve code Use ARRAY_SIZE() macro while calculating size of an array to improve code readability and reduce potential sizing errors. Implement this suggestion given by spatch tool by running coccinelle script - scripts/coccinelle/misc/array_size.cocci Follow ARRAY_SIZE() macro usage pattern in ublk.c introduced by, commit ec120093180b9 ("selftests: ublk: fix ublk_find_tgt()") wherever appropriate to maintain consistency. Signed-off-by: Akhilesh Patil Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/aKGihYui6/Pcijbk@bhairav-test.ee.iitb.ac.in Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 95188065b2e9..6512dfbdbce3 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -1400,7 +1400,7 @@ static int cmd_dev_get_features(void) if (!((1ULL << i) & features)) continue; - if (i < sizeof(feat_map) / sizeof(feat_map[0])) + if (i < ARRAY_SIZE(feat_map)) feat = feat_map[i]; else feat = "unknown"; @@ -1477,7 +1477,7 @@ static void __cmd_create_help(char *exe, bool recovery) printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); printf("\tdefault: nthreads=nr_queues"); - for (i = 0; i < sizeof(tgt_ops_list) / sizeof(tgt_ops_list[0]); i++) { + for (i = 0; i < ARRAY_SIZE(tgt_ops_list); i++) { const struct ublk_tgt_ops *ops = tgt_ops_list[i]; if (ops->usage) -- cgit v1.2.3 From 452690be7de2f91cc0de68cb9e95252875b33503 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 15 Aug 2025 19:28:21 +0200 Subject: selftests: mptcp: pm: check flush doesn't reset limits This modification is linked to the parent commit where the received ADD_ADDR limit was accidentally reset when the endpoints were flushed. To validate that, the test is now flushing endpoints after having set new limits, and before checking them. The 'Fixes' tag here below is the same as the one from the previous commit: this patch here is not fixing anything wrong in the selftests, but it validates the previous fix for an issue introduced by this commit ID. Fixes: 01cacb00b35c ("mptcp: add netlink-based PM") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-3-521fe9957892@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/pm_netlink.sh | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 2e6648a2b2c0..ac7ec6f94023 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -198,6 +198,7 @@ set_limits 1 9 2>/dev/null check "get_limits" "${default_limits}" "subflows above hard limit" set_limits 8 8 +flush_endpoint ## to make sure it doesn't affect the limits check "get_limits" "$(format_limits 8 8)" "set limits" flush_endpoint -- cgit v1.2.3 From f92199f551e617fae028c5c5905ddd63e3616e18 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 15 Aug 2025 19:28:24 +0200 Subject: selftests: mptcp: disable add_addr retrans in endpoint_tests To prevent test instability in the "delete re-add signal" test caused by ADD_ADDR retransmissions, disable retransmissions for this test by setting net.mptcp.add_addr_timeout to 0. Suggested-by: Matthieu Baerts Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-6-521fe9957892@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index b8af65373b3a..82cae37d9c20 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3842,6 +3842,7 @@ endpoint_tests() # remove and re-add if reset_with_events "delete re-add signal" && mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=0 pm_nl_set_limits $ns1 0 3 pm_nl_set_limits $ns2 3 3 pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal -- cgit v1.2.3 From 2eefbed30d46d5e68593baf6b52923e00e7678af Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 15 Aug 2025 19:28:25 +0200 Subject: selftests: mptcp: connect: fix C23 extension warning GCC was complaining about the new label: mptcp_connect.c:187:2: warning: label followed by a declaration is a C23 extension [-Wc23-extensions] 187 | int err = getaddrinfo(node, service, hints, res); | ^ Simply declare 'err' before the label to avoid this warning. Fixes: a862771d1aa4 ("selftests: mptcp: use IPPROTO_MPTCP for getaddrinfo") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-7-521fe9957892@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index ac1349c4b9e5..4f07ac9fa207 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -183,9 +183,10 @@ static void xgetaddrinfo(const char *node, const char *service, struct addrinfo *hints, struct addrinfo **res) { -again: - int err = getaddrinfo(node, service, hints, res); + int err; +again: + err = getaddrinfo(node, service, hints, res); if (err) { const char *errstr; -- cgit v1.2.3 From 3259889fd3c0cc165b7e9ee375c789875dd32326 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 15 Aug 2025 19:28:26 +0200 Subject: selftests: mptcp: sockopt: fix C23 extension warning GCC was complaining about the new label: mptcp_inq.c:79:2: warning: label followed by a declaration is a C23 extension [-Wc23-extensions] 79 | int err = getaddrinfo(node, service, hints, res); | ^ mptcp_sockopt.c:166:2: warning: label followed by a declaration is a C23 extension [-Wc23-extensions] 166 | int err = getaddrinfo(node, service, hints, res); | ^ Simply declare 'err' before the label to avoid this warning. Fixes: dd367e81b79a ("selftests: mptcp: sockopt: use IPPROTO_MPTCP for getaddrinfo") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-8-521fe9957892@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_inq.c | 5 +++-- tools/testing/selftests/net/mptcp/mptcp_sockopt.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c index 3cf1e2a612ce..f3bcaa48df8f 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_inq.c +++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c @@ -75,9 +75,10 @@ static void xgetaddrinfo(const char *node, const char *service, struct addrinfo *hints, struct addrinfo **res) { -again: - int err = getaddrinfo(node, service, hints, res); + int err; +again: + err = getaddrinfo(node, service, hints, res); if (err) { const char *errstr; diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c index 9934a68df237..e934dd26a59d 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c @@ -162,9 +162,10 @@ static void xgetaddrinfo(const char *node, const char *service, struct addrinfo *hints, struct addrinfo **res) { -again: - int err = getaddrinfo(node, service, hints, res); + int err; +again: + err = getaddrinfo(node, service, hints, res); if (err) { const char *errstr; -- cgit v1.2.3 From 742d3663a5775cb7b957f4ca2ddb4ccd26badb94 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Sun, 3 Aug 2025 12:11:23 +0100 Subject: selftests/mm: add test for invalid multi VMA operations We can use UFFD to easily assert invalid multi VMA moves, so do so, asserting expected behaviour when VMAs invalid for a multi VMA operation are encountered. We assert both that such operations are not permitted, and that we do not even attempt to move the first VMA under these circumstances. We also assert that we can still move a single VMA regardless. We then assert that a partial failure can occur if the invalid VMA appears later in the range of multiple VMAs, both at the very next VMA, and also at the end of the range. As part of this change, we are using the is_range_valid() helper more aggressively. Therefore, fix a bug where stale buffered data would hang around on success, causing subsequent calls to is_range_valid() to potentially give invalid results. We simply have to fflush() the stream on success to resolve this issue. Link: https://lkml.kernel.org/r/c4fb86dd5ba37610583ad5fc0e0c2306ddf318b9.1754218667.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Cc: David Hildenbrand Cc: Jann Horn Cc: Liam Howlett Cc: Michal Hocko Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/mremap_test.c | 264 ++++++++++++++++++++++++++++++- 1 file changed, 261 insertions(+), 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index fccf9e797a0c..5bd52a951cbd 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -5,10 +5,14 @@ #define _GNU_SOURCE #include +#include +#include #include #include #include +#include #include +#include #include #include @@ -168,6 +172,7 @@ static bool is_range_mapped(FILE *maps_fp, unsigned long start, if (first_val <= start && second_val >= end) { success = true; + fflush(maps_fp); break; } } @@ -175,6 +180,15 @@ static bool is_range_mapped(FILE *maps_fp, unsigned long start, return success; } +/* Check if [ptr, ptr + size) mapped in /proc/self/maps. */ +static bool is_ptr_mapped(FILE *maps_fp, void *ptr, unsigned long size) +{ + unsigned long start = (unsigned long)ptr; + unsigned long end = start + size; + + return is_range_mapped(maps_fp, start, end); +} + /* * Returns the start address of the mapping on success, else returns * NULL on failure. @@ -733,6 +747,249 @@ out: dont_unmap ? " [dontunnmap]" : ""); } +#ifdef __NR_userfaultfd +static void mremap_move_multi_invalid_vmas(FILE *maps_fp, + unsigned long page_size) +{ + char *test_name = "mremap move multiple invalid vmas"; + const size_t size = 10 * page_size; + bool success = true; + char *ptr, *tgt_ptr; + int uffd, err, i; + void *res; + struct uffdio_api api = { + .api = UFFD_API, + .features = UFFD_EVENT_PAGEFAULT, + }; + + uffd = syscall(__NR_userfaultfd, O_NONBLOCK); + if (uffd == -1) { + err = errno; + perror("userfaultfd"); + if (err == EPERM) { + ksft_test_result_skip("%s - missing uffd", test_name); + return; + } + success = false; + goto out; + } + if (ioctl(uffd, UFFDIO_API, &api)) { + perror("ioctl UFFDIO_API"); + success = false; + goto out_close_uffd; + } + + ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (ptr == MAP_FAILED) { + perror("mmap"); + success = false; + goto out_close_uffd; + } + + tgt_ptr = mmap(NULL, size, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (tgt_ptr == MAP_FAILED) { + perror("mmap"); + success = false; + goto out_close_uffd; + } + if (munmap(tgt_ptr, size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + + /* + * Unmap so we end up with: + * + * 0 2 4 6 8 10 offset in buffer + * |*| |*| |*| |*| |*| + * |*| |*| |*| |*| |*| + * + * Additionally, register each with UFFD. + */ + for (i = 0; i < 10; i += 2) { + void *unmap_ptr = &ptr[(i + 1) * page_size]; + unsigned long start = (unsigned long)&ptr[i * page_size]; + struct uffdio_register reg = { + .range = { + .start = start, + .len = page_size, + }, + .mode = UFFDIO_REGISTER_MODE_MISSING, + }; + + if (ioctl(uffd, UFFDIO_REGISTER, ®) == -1) { + perror("ioctl UFFDIO_REGISTER"); + success = false; + goto out_unmap; + } + if (munmap(unmap_ptr, page_size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + } + + /* + * Now try to move the entire range which is invalid for multi VMA move. + * + * This will fail, and no VMA should be moved, as we check this ahead of + * time. + */ + res = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr); + err = errno; + if (res != MAP_FAILED) { + fprintf(stderr, "mremap() succeeded for multi VMA uffd armed\n"); + success = false; + goto out_unmap; + } + if (err != EFAULT) { + errno = err; + perror("mrmeap() unexpected error"); + success = false; + goto out_unmap; + } + if (is_ptr_mapped(maps_fp, tgt_ptr, page_size)) { + fprintf(stderr, + "Invalid uffd-armed VMA at start of multi range moved\n"); + success = false; + goto out_unmap; + } + + /* + * Now try to move a single VMA, this should succeed as not multi VMA + * move. + */ + res = mremap(ptr, page_size, page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr); + if (res == MAP_FAILED) { + perror("mremap single invalid-multi VMA"); + success = false; + goto out_unmap; + } + + /* + * Unmap the VMA, and remap a non-uffd registered (therefore, multi VMA + * move valid) VMA at the start of ptr range. + */ + if (munmap(tgt_ptr, page_size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + res = mmap(ptr, page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0); + if (res == MAP_FAILED) { + perror("mmap"); + success = false; + goto out_unmap; + } + + /* + * Now try to move the entire range, we should succeed in moving the + * first VMA, but no others, and report a failure. + */ + res = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr); + err = errno; + if (res != MAP_FAILED) { + fprintf(stderr, "mremap() succeeded for multi VMA uffd armed\n"); + success = false; + goto out_unmap; + } + if (err != EFAULT) { + errno = err; + perror("mrmeap() unexpected error"); + success = false; + goto out_unmap; + } + if (!is_ptr_mapped(maps_fp, tgt_ptr, page_size)) { + fprintf(stderr, "Valid VMA not moved\n"); + success = false; + goto out_unmap; + } + + /* + * Unmap the VMA, and map valid VMA at start of ptr range, and replace + * all existing multi-move invalid VMAs, except the last, with valid + * multi-move VMAs. + */ + if (munmap(tgt_ptr, page_size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + if (munmap(ptr, size - 2 * page_size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + for (i = 0; i < 8; i += 2) { + res = mmap(&ptr[i * page_size], page_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0); + if (res == MAP_FAILED) { + perror("mmap"); + success = false; + goto out_unmap; + } + } + + /* + * Now try to move the entire range, we should succeed in moving all but + * the last VMA, and report a failure. + */ + res = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr); + err = errno; + if (res != MAP_FAILED) { + fprintf(stderr, "mremap() succeeded for multi VMA uffd armed\n"); + success = false; + goto out_unmap; + } + if (err != EFAULT) { + errno = err; + perror("mrmeap() unexpected error"); + success = false; + goto out_unmap; + } + + for (i = 0; i < 10; i += 2) { + bool is_mapped = is_ptr_mapped(maps_fp, + &tgt_ptr[i * page_size], page_size); + + if (i < 8 && !is_mapped) { + fprintf(stderr, "Valid VMA not moved at %d\n", i); + success = false; + goto out_unmap; + } else if (i == 8 && is_mapped) { + fprintf(stderr, "Invalid VMA moved at %d\n", i); + success = false; + goto out_unmap; + } + } + +out_unmap: + if (munmap(tgt_ptr, size)) + perror("munmap tgt"); + if (munmap(ptr, size)) + perror("munmap src"); +out_close_uffd: + close(uffd); +out: + if (success) + ksft_test_result_pass("%s\n", test_name); + else + ksft_test_result_fail("%s\n", test_name); +} +#else +static void mremap_move_multi_invalid_vmas(FILE *maps_fp, unsigned long page_size) +{ + char *test_name = "mremap move multiple invalid vmas"; + + ksft_test_result_skip("%s - missing uffd", test_name); +} +#endif /* __NR_userfaultfd */ + /* Returns the time taken for the remap on success else returns -1. */ static long long remap_region(struct config c, unsigned int threshold_mb, char *rand_addr) @@ -1074,7 +1331,7 @@ int main(int argc, char **argv) char *rand_addr; size_t rand_size; int num_expand_tests = 2; - int num_misc_tests = 8; + int num_misc_tests = 9; struct test test_cases[MAX_TEST] = {}; struct test perf_test_cases[MAX_PERF_TEST]; int page_size; @@ -1197,8 +1454,6 @@ int main(int argc, char **argv) mremap_expand_merge(maps_fp, page_size); mremap_expand_merge_offset(maps_fp, page_size); - fclose(maps_fp); - mremap_move_within_range(pattern_seed, rand_addr); mremap_move_1mb_from_start(pattern_seed, rand_addr); mremap_shrink_multiple_vmas(page_size, /* inplace= */true); @@ -1207,6 +1462,9 @@ int main(int argc, char **argv) mremap_move_multiple_vmas(pattern_seed, page_size, /* dontunmap= */ true); mremap_move_multiple_vmas_split(pattern_seed, page_size, /* dontunmap= */ false); mremap_move_multiple_vmas_split(pattern_seed, page_size, /* dontunmap= */ true); + mremap_move_multi_invalid_vmas(maps_fp, page_size); + + fclose(maps_fp); if (run_perf_tests) { ksft_print_msg("\n%s\n", -- cgit v1.2.3 From 0cc2a4880ced1486acc34898cd85edc6ee109c4c Mon Sep 17 00:00:00 2001 From: Sang-Heon Jeon Date: Tue, 12 Aug 2025 23:00:46 +0900 Subject: selftests/damon: fix selftests by installing drgn related script drgn_dump_damon_status is not installed during kselftest setup. It can break other tests which depend on drgn_dump_damon_status. Install drgn_dump_damon_status files to fix broken test. Link: https://lkml.kernel.org/r/20250812140046.660486-1-ekffu200098@gmail.com Fixes: f3e8e1e51362 ("selftests/damon: add drgn script for extracting damon status") Signed-off-by: Sang-Heon Jeon Reviewed-by: SeongJae Park Cc: Alexandre Ghiti Cc: Honggyu Kim Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index 5b230deb19e8..9a3499827d4b 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -4,6 +4,7 @@ TEST_GEN_FILES += access_memory access_memory_even TEST_FILES = _damon_sysfs.py +TEST_FILES += drgn_dump_damon_status.py # functionality tests TEST_PROGS += sysfs.sh -- cgit v1.2.3 From dce1b33ed7430c7189b8cc1567498f9e6bf12731 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 24 Jun 2025 16:19:30 -0700 Subject: selftests: harness: Rename is_signed_type() to avoid collision with overflow.h Rename is_signed_type() to is_signed_var() to avoid colliding with a macro of the same name defined by tools' linux/overflow.h. This fixes warnings (and presumably potential test failures) in tests that utilize the selftests harness and happen to (indirectly) include overflow.h. In file included from tools/include/linux/bits.h:34, from tools/include/linux/bitops.h:14, from tools/include/linux/hashtable.h:13, from include/kvm_util.h:11, from x86/userspace_msr_exit_test.c:11: tools/include/linux/overflow.h:31:9: error: "is_signed_type" redefined [-Werror] 31 | #define is_signed_type(type) (((type)(-1)) < (type)1) | ^~~~~~~~~~~~~~ In file included from include/kvm_test_harness.h:11, from x86/userspace_msr_exit_test.c:9: ../kselftest_harness.h:754:9: note: this is the location of the previous definition 754 | #define is_signed_type(var) (!!(((__typeof__(var))(-1)) < (__typeof__(var))1)) | ^~~~~~~~~~~~~~ Use a separate definition, at least for now, as many selftests build without tools/include in their include path. Fixes: fc92099902fb ("tools headers: Synchronize linux/bits.h with the kernel sources") Cc: Vincent Mailhol Cc: Arnaldo Carvalho de Melo Cc: Mark Brown Link: https://lore.kernel.org/r/20250624231930.583689-1-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kselftest_harness.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 2925e47db995..8516e8434bc4 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -751,7 +751,7 @@ for (; _metadata->trigger; _metadata->trigger = \ __bail(_assert, _metadata)) -#define is_signed_type(var) (!!(((__typeof__(var))(-1)) < (__typeof__(var))1)) +#define is_signed_var(var) (!!(((__typeof__(var))(-1)) < (__typeof__(var))1)) #define __EXPECT(_expected, _expected_str, _seen, _seen_str, _t, _assert) do { \ /* Avoid multiple evaluation of the cases */ \ @@ -759,7 +759,7 @@ __typeof__(_seen) __seen = (_seen); \ if (!(__exp _t __seen)) { \ /* Report with actual signedness to avoid weird output. */ \ - switch (is_signed_type(__exp) * 2 + is_signed_type(__seen)) { \ + switch (is_signed_var(__exp) * 2 + is_signed_var(__seen)) { \ case 0: { \ uintmax_t __exp_print = (uintmax_t)__exp; \ uintmax_t __seen_print = (uintmax_t)__seen; \ -- cgit v1.2.3 From 87951b566446da04eed1fe8100f99a512ef02756 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 15 Aug 2025 06:20:00 +0000 Subject: selftests: bonding: add test for passive LACP mode Add a selftest to verify bonding behavior when `lacp_active` is set to `off`. The test checks the following: - The passive LACP bond should not send LACPDUs before receiving a partner's LACPDU. - The transmitted LACPDUs must not include the active flag. - After transitioning to EXPIRED and DEFAULTED states, the passive side should still not initiate LACPDUs. Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20250815062000.22220-4-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- .../testing/selftests/drivers/net/bonding/Makefile | 3 +- .../drivers/net/bonding/bond_passive_lacp.sh | 105 +++++++++++++++++++++ tools/testing/selftests/drivers/net/bonding/config | 1 + 3 files changed, 108 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile index 2b10854e4b1e..44b98f17f8ff 100644 --- a/tools/testing/selftests/drivers/net/bonding/Makefile +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -10,7 +10,8 @@ TEST_PROGS := \ mode-2-recovery-updelay.sh \ bond_options.sh \ bond-eth-type-change.sh \ - bond_macvlan_ipvlan.sh + bond_macvlan_ipvlan.sh \ + bond_passive_lacp.sh TEST_FILES := \ lag_lib.sh \ diff --git a/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh b/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh new file mode 100755 index 000000000000..9c3b089813df --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh @@ -0,0 +1,105 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test if a bond interface works with lacp_active=off. + +# shellcheck disable=SC2034 +REQUIRE_MZ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +# shellcheck disable=SC1091 +source "$lib_dir"/../../../net/forwarding/lib.sh + +# shellcheck disable=SC2317 +check_port_state() +{ + local netns=$1 + local port=$2 + local state=$3 + + ip -n "${netns}" -d -j link show "$port" | \ + jq -e ".[].linkinfo.info_slave_data.ad_actor_oper_port_state_str | index(\"${state}\") != null" > /dev/null +} + +check_pkt_count() +{ + RET=0 + local ns="$1" + local iface="$2" + + # wait 65s, one per 30s + slowwait_for_counter 65 2 tc_rule_handle_stats_get \ + "dev ${iface} egress" 101 ".packets" "-n ${ns}" &> /dev/null +} + +setup() { + setup_ns c_ns s_ns + + # shellcheck disable=SC2154 + ip -n "${c_ns}" link add eth0 type veth peer name eth0 netns "${s_ns}" + ip -n "${c_ns}" link add eth1 type veth peer name eth1 netns "${s_ns}" + + # Add tc filter to count the pkts + tc -n "${c_ns}" qdisc add dev eth0 clsact + tc -n "${c_ns}" filter add dev eth0 egress handle 101 protocol 0x8809 matchall action pass + tc -n "${s_ns}" qdisc add dev eth1 clsact + tc -n "${s_ns}" filter add dev eth1 egress handle 101 protocol 0x8809 matchall action pass + + ip -n "${s_ns}" link add bond0 type bond mode 802.3ad lacp_active on lacp_rate fast + ip -n "${s_ns}" link set eth0 master bond0 + ip -n "${s_ns}" link set eth1 master bond0 + + ip -n "${c_ns}" link add bond0 type bond mode 802.3ad lacp_active off lacp_rate fast + ip -n "${c_ns}" link set eth0 master bond0 + ip -n "${c_ns}" link set eth1 master bond0 + +} + +trap cleanup_all_ns EXIT +setup + +# The bond will send 2 lacpdu pkts during init time, let's wait at least 2s +# after interface up +ip -n "${c_ns}" link set bond0 up +sleep 2 + +# 1. The passive side shouldn't send LACPDU. +check_pkt_count "${c_ns}" "eth0" && RET=1 +log_test "802.3ad lacp_active off" "init port" + +ip -n "${s_ns}" link set bond0 up +# 2. The passive side should not have the 'active' flag. +RET=0 +slowwait 2 check_port_state "${c_ns}" "eth0" "active" && RET=1 +log_test "802.3ad lacp_active off" "port state active" + +# 3. The active side should have the 'active' flag. +RET=0 +slowwait 2 check_port_state "${s_ns}" "eth0" "active" || RET=1 +log_test "802.3ad lacp_active on" "port state active" + +# 4. Make sure the connection is not expired. +RET=0 +slowwait 5 check_port_state "${s_ns}" "eth0" "distributing" +slowwait 10 check_port_state "${s_ns}" "eth0" "expired" && RET=1 +log_test "bond 802.3ad lacp_active off" "port connection" + +# After testing, disconnect one port on each side to check the state. +ip -n "${s_ns}" link set eth0 nomaster +ip -n "${s_ns}" link set eth0 up +ip -n "${c_ns}" link set eth1 nomaster +ip -n "${c_ns}" link set eth1 up +# Due to Periodic Machine and Rx Machine state change, the bond will still +# send lacpdu pkts in a few seconds. sleep at lease 5s to make sure +# negotiation finished +sleep 5 + +# 5. The active side should keep sending LACPDU. +check_pkt_count "${s_ns}" "eth1" || RET=1 +log_test "bond 802.3ad lacp_active on" "port pkt after disconnect" + +# 6. The passive side shouldn't send LACPDU anymore. +check_pkt_count "${c_ns}" "eth0" && RET=1 +log_test "bond 802.3ad lacp_active off" "port pkt after disconnect" + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config index dad4e5fda4db..4d16a69ffc65 100644 --- a/tools/testing/selftests/drivers/net/bonding/config +++ b/tools/testing/selftests/drivers/net/bonding/config @@ -6,6 +6,7 @@ CONFIG_MACVLAN=y CONFIG_IPVLAN=y CONFIG_NET_ACT_GACT=y CONFIG_NET_CLS_FLOWER=y +CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_SCH_INGRESS=y CONFIG_NLMON=y CONFIG_VETH=y -- cgit v1.2.3 From a61a3e961baff65b0a49f862fe21ce304f279b24 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 19 Aug 2025 19:19:52 -0700 Subject: selftests: tls: add tests for zero-length records Test various combinations of zero-length records. Unfortunately, kernel cannot be coerced into producing those, so hardcode the ciphertext messages in the test. Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20250820021952.143068-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 300 +++++++++++++++++++++++++++++++++++++- 1 file changed, 295 insertions(+), 5 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 2b8387a83bc7..0f5640d8dc7f 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -181,13 +181,12 @@ static int tls_send_cmsg(int fd, unsigned char record_type, return sendmsg(fd, &msg, flags); } -static int tls_recv_cmsg(struct __test_metadata *_metadata, - int fd, unsigned char record_type, - void *data, size_t len, int flags) +static int __tls_recv_cmsg(struct __test_metadata *_metadata, + int fd, unsigned char *ctype, + void *data, size_t len, int flags) { char cbuf[CMSG_SPACE(sizeof(char))]; struct cmsghdr *cmsg; - unsigned char ctype; struct msghdr msg; struct iovec vec; int n; @@ -206,7 +205,20 @@ static int tls_recv_cmsg(struct __test_metadata *_metadata, EXPECT_NE(cmsg, NULL); EXPECT_EQ(cmsg->cmsg_level, SOL_TLS); EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE); - ctype = *((unsigned char *)CMSG_DATA(cmsg)); + if (ctype) + *ctype = *((unsigned char *)CMSG_DATA(cmsg)); + + return n; +} + +static int tls_recv_cmsg(struct __test_metadata *_metadata, + int fd, unsigned char record_type, + void *data, size_t len, int flags) +{ + unsigned char ctype; + int n; + + n = __tls_recv_cmsg(_metadata, fd, &ctype, data, len, flags); EXPECT_EQ(ctype, record_type); return n; @@ -2164,6 +2176,284 @@ TEST_F(tls, rekey_poll_delay) } } +struct raw_rec { + unsigned int plain_len; + unsigned char plain_data[100]; + unsigned int cipher_len; + unsigned char cipher_data[128]; +}; + +/* TLS 1.2, AES_CCM, data, seqno:0, plaintext: 'Hello world' */ +static const struct raw_rec id0_data_l11 = { + .plain_len = 11, + .plain_data = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, + }, + .cipher_len = 40, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x26, 0xa2, 0x33, + 0xde, 0x8d, 0x94, 0xf0, 0x29, 0x6c, 0xb1, 0xaf, + 0x6a, 0x75, 0xb2, 0x93, 0xad, 0x45, 0xd5, 0xfd, + 0x03, 0x51, 0x57, 0x8f, 0xf9, 0xcc, 0x3b, 0x42, + }, +}; + +/* TLS 1.2, AES_CCM, ctrl, seqno:0, plaintext: '' */ +static const struct raw_rec id0_ctrl_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x38, 0x7b, + 0xa6, 0x1c, 0xdd, 0xa7, 0x19, 0x33, 0xab, 0xae, + 0x88, 0xe1, 0xd2, 0x08, 0x4f, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:0, plaintext: '' */ +static const struct raw_rec id0_data_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xc5, 0x37, 0x90, + 0x70, 0x45, 0x89, 0xfb, 0x5c, 0xc7, 0x89, 0x03, + 0x68, 0x80, 0xd3, 0xd8, 0xcc, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:1, plaintext: 'Hello world' */ +static const struct raw_rec id1_data_l11 = { + .plain_len = 11, + .plain_data = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, + }, + .cipher_len = 40, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x3a, 0x1a, 0x9c, + 0xd0, 0xa8, 0x9a, 0xd6, 0x69, 0xd6, 0x1a, 0xe3, + 0xb5, 0x1f, 0x0d, 0x2c, 0xe2, 0x97, 0x46, 0xff, + 0x2b, 0xcc, 0x5a, 0xc4, 0xa3, 0xb9, 0xef, 0xba, + }, +}; + +/* TLS 1.2, AES_CCM, ctrl, seqno:1, plaintext: '' */ +static const struct raw_rec id1_ctrl_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x3e, 0xf0, 0xfe, + 0xee, 0xd9, 0xe2, 0x5d, 0xc7, 0x11, 0x4c, 0xe6, + 0xb4, 0x7e, 0xef, 0x40, 0x2b, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:1, plaintext: '' */ +static const struct raw_rec id1_data_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0xce, 0xfc, 0x86, + 0xc8, 0xf0, 0x55, 0xf9, 0x47, 0x3f, 0x74, 0xdc, + 0xc9, 0xbf, 0xfe, 0x5b, 0xb1, + }, +}; + +/* TLS 1.2, AES_CCM, ctrl, seqno:2, plaintext: 'Hello world' */ +static const struct raw_rec id2_ctrl_l11 = { + .plain_len = 11, + .plain_data = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, + }, + .cipher_len = 40, + .cipher_data = { + 0x16, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xe5, 0x3d, 0x19, + 0x3d, 0xca, 0xb8, 0x16, 0xb6, 0xff, 0x79, 0x87, + 0x2a, 0x04, 0x11, 0x3d, 0xf8, 0x64, 0x5f, 0x36, + 0x8b, 0xa8, 0xee, 0x4c, 0x6d, 0x62, 0xa5, 0x00, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:2, plaintext: 'Hello world' */ +static const struct raw_rec id2_data_l11 = { + .plain_len = 11, + .plain_data = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, + }, + .cipher_len = 40, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xe5, 0x3d, 0x19, + 0x3d, 0xca, 0xb8, 0x16, 0xb6, 0xff, 0x79, 0x87, + 0x8e, 0xa1, 0xd0, 0xcd, 0x33, 0xb5, 0x86, 0x2b, + 0x17, 0xf1, 0x52, 0x2a, 0x55, 0x62, 0x65, 0x11, + }, +}; + +/* TLS 1.2, AES_CCM, ctrl, seqno:2, plaintext: '' */ +static const struct raw_rec id2_ctrl_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xdc, 0x5c, 0x0e, + 0x41, 0xdd, 0xba, 0xd3, 0xcc, 0xcf, 0x6d, 0xd9, + 0x06, 0xdb, 0x79, 0xe5, 0x5d, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:2, plaintext: '' */ +static const struct raw_rec id2_data_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xc3, 0xca, 0x26, + 0x22, 0xe4, 0x25, 0xfb, 0x5f, 0x6d, 0xbf, 0x83, + 0x30, 0x48, 0x69, 0x1a, 0x47, + }, +}; + +FIXTURE(zero_len) +{ + int fd, cfd; + bool notls; +}; + +FIXTURE_VARIANT(zero_len) +{ + const struct raw_rec *recs[4]; + ssize_t recv_ret[4]; +}; + +FIXTURE_VARIANT_ADD(zero_len, data_data_data) +{ + .recs = { &id0_data_l11, &id1_data_l11, &id2_data_l11, }, + .recv_ret = { 33, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, data_0ctrl_data) +{ + .recs = { &id0_data_l11, &id1_ctrl_l0, &id2_data_l11, }, + .recv_ret = { 11, 0, 11, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0data_0data_0data) +{ + .recs = { &id0_data_l0, &id1_data_l0, &id2_data_l0, }, + .recv_ret = { -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0data_0data_ctrl) +{ + .recs = { &id0_data_l0, &id1_data_l0, &id2_ctrl_l11, }, + .recv_ret = { 0, 11, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0data_0data_0ctrl) +{ + .recs = { &id0_data_l0, &id1_data_l0, &id2_ctrl_l0, }, + .recv_ret = { 0, 0, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0ctrl_0ctrl_0ctrl) +{ + .recs = { &id0_ctrl_l0, &id1_ctrl_l0, &id2_ctrl_l0, }, + .recv_ret = { 0, 0, 0, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0data_0data_data) +{ + .recs = { &id0_data_l0, &id1_data_l0, &id2_data_l11, }, + .recv_ret = { 11, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, data_0data_0data) +{ + .recs = { &id0_data_l11, &id1_data_l0, &id2_data_l0, }, + .recv_ret = { 11, -EAGAIN, }, +}; + +FIXTURE_SETUP(zero_len) +{ + struct tls_crypto_info_keys tls12; + int ret; + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128, + &tls12, 0); + + ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); + if (self->notls) + return; + + /* Don't install keys on fd, we'll send raw records */ + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); +} + +FIXTURE_TEARDOWN(zero_len) +{ + close(self->fd); + close(self->cfd); +} + +TEST_F(zero_len, test) +{ + const struct raw_rec *const *rec; + unsigned char buf[128]; + int rec_off; + int i; + + for (i = 0; i < 4 && variant->recs[i]; i++) + EXPECT_EQ(send(self->fd, variant->recs[i]->cipher_data, + variant->recs[i]->cipher_len, 0), + variant->recs[i]->cipher_len); + + rec = &variant->recs[0]; + rec_off = 0; + for (i = 0; i < 4; i++) { + int j, ret; + + ret = variant->recv_ret[i] >= 0 ? variant->recv_ret[i] : -1; + EXPECT_EQ(__tls_recv_cmsg(_metadata, self->cfd, NULL, + buf, sizeof(buf), MSG_DONTWAIT), ret); + if (ret == -1) + EXPECT_EQ(errno, -variant->recv_ret[i]); + if (variant->recv_ret[i] == -EAGAIN) + break; + + for (j = 0; j < ret; j++) { + while (rec_off == (*rec)->plain_len) { + rec++; + rec_off = 0; + } + EXPECT_EQ(buf[j], (*rec)->plain_data[rec_off]); + rec_off++; + } + } +}; + FIXTURE(tls_err) { int fd, cfd; -- cgit v1.2.3 From 0843e0ced338d07c8bcec5675c560a94d05a4d41 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 17 Aug 2025 21:21:58 +0100 Subject: KVM: arm64: Get rid of ARM64_FEATURE_MASK() The ARM64_FEATURE_MASK() macro was a hack introduce whilst the automatic generation of sysreg encoding was introduced, and was too unreliable to be entirely trusted. We are in a better place now, and we could really do without this macro. Get rid of it altogether. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20250817202158.395078-7-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/sysreg.h | 3 -- arch/arm64/kvm/arm.c | 8 ++--- arch/arm64/kvm/sys_regs.c | 40 +++++++++++----------- tools/arch/arm64/include/asm/sysreg.h | 3 -- .../testing/selftests/kvm/arm64/aarch32_id_regs.c | 2 +- .../testing/selftests/kvm/arm64/debug-exceptions.c | 12 +++---- tools/testing/selftests/kvm/arm64/no-vgic-v3.c | 4 +-- .../testing/selftests/kvm/arm64/page_fault_test.c | 6 ++-- tools/testing/selftests/kvm/arm64/set_id_regs.c | 8 ++--- .../selftests/kvm/arm64/vpmu_counter_access.c | 2 +- tools/testing/selftests/kvm/lib/arm64/processor.c | 6 ++-- 11 files changed, 44 insertions(+), 50 deletions(-) (limited to 'tools/testing/selftests') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 948007cd3684..845875991eb8 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1146,9 +1146,6 @@ #define ARM64_FEATURE_FIELD_BITS 4 -/* Defined for compatibility only, do not add new users. */ -#define ARM64_FEATURE_MASK(x) (x##_MASK) - #ifdef __ASSEMBLY__ .macro mrs_s, rt, sreg diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 7a1a8210ff91..d60d2a644391 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2404,12 +2404,12 @@ static u64 get_hyp_id_aa64pfr0_el1(void) */ u64 val = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); - val &= ~(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) | - ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3)); + val &= ~(ID_AA64PFR0_EL1_CSV2 | + ID_AA64PFR0_EL1_CSV3); - val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2), + val |= FIELD_PREP(ID_AA64PFR0_EL1_CSV2, arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED); - val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3), + val |= FIELD_PREP(ID_AA64PFR0_EL1_CSV3, arm64_get_meltdown_state() == SPECTRE_UNAFFECTED); return val; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 5abe4db6c008..e387d1dfed1e 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1615,18 +1615,18 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, break; case SYS_ID_AA64ISAR1_EL1: if (!vcpu_has_ptrauth(vcpu)) - val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI)); + val &= ~(ID_AA64ISAR1_EL1_APA | + ID_AA64ISAR1_EL1_API | + ID_AA64ISAR1_EL1_GPA | + ID_AA64ISAR1_EL1_GPI); break; case SYS_ID_AA64ISAR2_EL1: if (!vcpu_has_ptrauth(vcpu)) - val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) | - ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3)); + val &= ~(ID_AA64ISAR2_EL1_APA3 | + ID_AA64ISAR2_EL1_GPA3); if (!cpus_have_final_cap(ARM64_HAS_WFXT) || has_broken_cntvoff()) - val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_WFxT); + val &= ~ID_AA64ISAR2_EL1_WFxT; break; case SYS_ID_AA64ISAR3_EL1: val &= ID_AA64ISAR3_EL1_FPRCVT | ID_AA64ISAR3_EL1_FAMINMAX; @@ -1642,7 +1642,7 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, ID_AA64MMFR3_EL1_S1PIE; break; case SYS_ID_MMFR4_EL1: - val &= ~ARM64_FEATURE_MASK(ID_MMFR4_EL1_CCIDX); + val &= ~ID_MMFR4_EL1_CCIDX; break; } @@ -1828,22 +1828,22 @@ static u64 sanitise_id_aa64pfr1_el1(const struct kvm_vcpu *vcpu, u64 val) u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); if (!kvm_has_mte(vcpu->kvm)) { - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac); + val &= ~ID_AA64PFR1_EL1_MTE; + val &= ~ID_AA64PFR1_EL1_MTE_frac; } if (!(cpus_have_final_cap(ARM64_HAS_RASV1P1_EXTN) && SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, pfr0) == ID_AA64PFR0_EL1_RAS_IMP)) - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_RAS_frac); - - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SME); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_RNDR_trap); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_NMI); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_GCS); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_THE); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTEX); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_PFAR); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MPAM_frac); + val &= ~ID_AA64PFR1_EL1_RAS_frac; + + val &= ~ID_AA64PFR1_EL1_SME; + val &= ~ID_AA64PFR1_EL1_RNDR_trap; + val &= ~ID_AA64PFR1_EL1_NMI; + val &= ~ID_AA64PFR1_EL1_GCS; + val &= ~ID_AA64PFR1_EL1_THE; + val &= ~ID_AA64PFR1_EL1_MTEX; + val &= ~ID_AA64PFR1_EL1_PFAR; + val &= ~ID_AA64PFR1_EL1_MPAM_frac; return val; } diff --git a/tools/arch/arm64/include/asm/sysreg.h b/tools/arch/arm64/include/asm/sysreg.h index 690b6ebd118f..65f2759ea27a 100644 --- a/tools/arch/arm64/include/asm/sysreg.h +++ b/tools/arch/arm64/include/asm/sysreg.h @@ -1080,9 +1080,6 @@ #define ARM64_FEATURE_FIELD_BITS 4 -/* Defined for compatibility only, do not add new users. */ -#define ARM64_FEATURE_MASK(x) (x##_MASK) - #ifdef __ASSEMBLY__ .macro mrs_s, rt, sreg diff --git a/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c index cef8f7323ceb..713005b6f508 100644 --- a/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c @@ -146,7 +146,7 @@ static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu) val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); - el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val); + el0 = FIELD_GET(ID_AA64PFR0_EL1_EL0, val); return el0 == ID_AA64PFR0_EL1_EL0_IMP; } diff --git a/tools/testing/selftests/kvm/arm64/debug-exceptions.c b/tools/testing/selftests/kvm/arm64/debug-exceptions.c index c7fb55c9135b..521991a89ad9 100644 --- a/tools/testing/selftests/kvm/arm64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/arm64/debug-exceptions.c @@ -116,12 +116,12 @@ static void reset_debug_state(void) /* Reset all bcr/bvr/wcr/wvr registers */ dfr0 = read_sysreg(id_aa64dfr0_el1); - brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), dfr0); + brps = FIELD_GET(ID_AA64DFR0_EL1_BRPs, dfr0); for (i = 0; i <= brps; i++) { write_dbgbcr(i, 0); write_dbgbvr(i, 0); } - wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), dfr0); + wrps = FIELD_GET(ID_AA64DFR0_EL1_WRPs, dfr0); for (i = 0; i <= wrps; i++) { write_dbgwcr(i, 0); write_dbgwvr(i, 0); @@ -418,7 +418,7 @@ static void guest_code_ss(int test_cnt) static int debug_version(uint64_t id_aa64dfr0) { - return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), id_aa64dfr0); + return FIELD_GET(ID_AA64DFR0_EL1_DebugVer, id_aa64dfr0); } static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn) @@ -539,14 +539,14 @@ void test_guest_debug_exceptions_all(uint64_t aa64dfr0) int b, w, c; /* Number of breakpoints */ - brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), aa64dfr0) + 1; + brp_num = FIELD_GET(ID_AA64DFR0_EL1_BRPs, aa64dfr0) + 1; __TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required"); /* Number of watchpoints */ - wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), aa64dfr0) + 1; + wrp_num = FIELD_GET(ID_AA64DFR0_EL1_WRPs, aa64dfr0) + 1; /* Number of context aware breakpoints */ - ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_CTX_CMPs), aa64dfr0) + 1; + ctx_brp_num = FIELD_GET(ID_AA64DFR0_EL1_CTX_CMPs, aa64dfr0) + 1; pr_debug("%s brp_num:%d, wrp_num:%d, ctx_brp_num:%d\n", __func__, brp_num, wrp_num, ctx_brp_num); diff --git a/tools/testing/selftests/kvm/arm64/no-vgic-v3.c b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c index ebd70430c89d..f222538e6084 100644 --- a/tools/testing/selftests/kvm/arm64/no-vgic-v3.c +++ b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c @@ -54,7 +54,7 @@ static void guest_code(void) * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having * hidden the feature at runtime without any other userspace action. */ - __GUEST_ASSERT(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), + __GUEST_ASSERT(FIELD_GET(ID_AA64PFR0_EL1_GIC, read_sysreg(id_aa64pfr0_el1)) == 0, "GICv3 wrongly advertised"); @@ -165,7 +165,7 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(&vcpu, NULL); pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); - __TEST_REQUIRE(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), pfr0), + __TEST_REQUIRE(FIELD_GET(ID_AA64PFR0_EL1_GIC, pfr0), "GICv3 not supported."); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/arm64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c index dc6559dad9d8..4ccbd389d133 100644 --- a/tools/testing/selftests/kvm/arm64/page_fault_test.c +++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c @@ -95,14 +95,14 @@ static bool guest_check_lse(void) uint64_t isar0 = read_sysreg(id_aa64isar0_el1); uint64_t atomic; - atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC), isar0); + atomic = FIELD_GET(ID_AA64ISAR0_EL1_ATOMIC, isar0); return atomic >= 2; } static bool guest_check_dc_zva(void) { uint64_t dczid = read_sysreg(dczid_el0); - uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_EL0_DZP), dczid); + uint64_t dzp = FIELD_GET(DCZID_EL0_DZP, dczid); return dzp == 0; } @@ -195,7 +195,7 @@ static bool guest_set_ha(void) uint64_t hadbs, tcr; /* Skip if HA is not supported. */ - hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS), mmfr1); + hadbs = FIELD_GET(ID_AA64MMFR1_EL1_HAFDBS, mmfr1); if (hadbs == 0) return false; diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index d3bf9204409c..36d40c267b99 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -594,8 +594,8 @@ static void test_user_set_mte_reg(struct kvm_vcpu *vcpu) */ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); - mte = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE), val); - mte_frac = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac), val); + mte = FIELD_GET(ID_AA64PFR1_EL1_MTE, val); + mte_frac = FIELD_GET(ID_AA64PFR1_EL1_MTE_frac, val); if (mte != ID_AA64PFR1_EL1_MTE_MTE2 || mte_frac != ID_AA64PFR1_EL1_MTE_frac_NI) { ksft_test_result_skip("MTE_ASYNC or MTE_ASYMM are supported, nothing to test\n"); @@ -612,7 +612,7 @@ static void test_user_set_mte_reg(struct kvm_vcpu *vcpu) } val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); - mte_frac = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac), val); + mte_frac = FIELD_GET(ID_AA64PFR1_EL1_MTE_frac, val); if (mte_frac == ID_AA64PFR1_EL1_MTE_frac_NI) ksft_test_result_pass("ID_AA64PFR1_EL1.MTE_frac=0 accepted and still 0xF\n"); else @@ -774,7 +774,7 @@ int main(void) /* Check for AARCH64 only system */ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); - el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val); + el0 = FIELD_GET(ID_AA64PFR0_EL1_EL0, val); aarch64_only = (el0 == ID_AA64PFR0_EL1_EL0_IMP); ksft_print_header(); diff --git a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c index f16b3b27e32e..a0c4ab839155 100644 --- a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c +++ b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c @@ -441,7 +441,7 @@ static void create_vpmu_vm(void *guest_code) /* Make sure that PMUv3 support is indicated in the ID register */ dfr0 = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1)); - pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), dfr0); + pmuver = FIELD_GET(ID_AA64DFR0_EL1_PMUVer, dfr0); TEST_ASSERT(pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF && pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP, "Unexpected PMUVER (0x%x) on the vCPU with PMUv3", pmuver); diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 9d69904cb608..eb115123d741 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -573,15 +573,15 @@ void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, err = ioctl(vcpu_fd, KVM_GET_ONE_REG, ®); TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd)); - gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val); + gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN4, val); *ipa4k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN4_NI, ID_AA64MMFR0_EL1_TGRAN4_52_BIT); - gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val); + gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN64, val); *ipa64k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN64_NI, ID_AA64MMFR0_EL1_TGRAN64_IMP); - gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val); + gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN16, val); *ipa16k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN16_NI, ID_AA64MMFR0_EL1_TGRAN16_52_BIT); -- cgit v1.2.3 From 01860bcc53432d8b9b92a72939b35679ac24059f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 18 Aug 2025 17:41:00 +0100 Subject: KVM: arm64: selftests: Sync ID_AA64MMFR3_EL1 in set_id_regs When we added coverage for ID_AA64MMFR3_EL1 we didn't add it to the list of registers we read in the guest, do so. Fixes: 0b593ef12afc ("KVM: arm64: selftests: Catch up set_id_regs with the kernel") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250818-kvm-arm64-selftests-mmfr3-idreg-v1-1-2f85114d0163@kernel.org Signed-off-by: Oliver Upton --- tools/testing/selftests/kvm/arm64/set_id_regs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index 36d40c267b99..189321e96925 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -243,6 +243,7 @@ static void guest_code(void) GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1); + GUEST_REG_SYNC(SYS_ID_AA64MMFR3_EL1); GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1); GUEST_REG_SYNC(SYS_CTR_EL0); GUEST_REG_SYNC(SYS_MIDR_EL1); -- cgit v1.2.3 From 5bbc2b785e63699cfcaa7adbf739f6e9b771028a Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Tue, 5 Aug 2025 13:51:40 -0400 Subject: selftests/mm: fix FORCE_READ to read input value correctly FORCE_READ() converts input value x to its pointer type then reads from address x. This is wrong. If x is a non-pointer, it would be caught it easily. But all FORCE_READ() callers are trying to read from a pointer and FORCE_READ() basically reads a pointer to a pointer instead of the original typed pointer. Almost no access violation was found, except the one from split_huge_page_test. Fix it by implementing a simplified READ_ONCE() instead. Link: https://lkml.kernel.org/r/20250805175140.241656-1-ziy@nvidia.com Fixes: 3f6bfd4789a0 ("selftests/mm: reuse FORCE_READ to replace "asm volatile("" : "+r" (XXX));"") Signed-off-by: Zi Yan Reviewed-by: Lorenzo Stoakes Acked-by: David Hildenbrand Reviewed-by: wang lian Reviewed-by: Wei Yang Cc: Christian Brauner Cc: Jann Horn Cc: Kairui Song Cc: Liam Howlett Cc: Mark Brown Cc: SeongJae Park Cc: Shuah Khan Cc: Vlastimil Babka Cc: Zi Yan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/cow.c | 4 ++-- tools/testing/selftests/mm/guard-regions.c | 2 +- tools/testing/selftests/mm/hugetlb-madvise.c | 4 +++- tools/testing/selftests/mm/migration.c | 2 +- tools/testing/selftests/mm/pagemap_ioctl.c | 2 +- tools/testing/selftests/mm/split_huge_page_test.c | 7 +++++-- tools/testing/selftests/mm/vm_util.h | 2 +- 7 files changed, 14 insertions(+), 9 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index d30625c18259..c744c603d688 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -1554,8 +1554,8 @@ static void run_with_zeropage(non_anon_test_fn fn, const char *desc) } /* Read from the page to populate the shared zeropage. */ - FORCE_READ(mem); - FORCE_READ(smem); + FORCE_READ(*mem); + FORCE_READ(*smem); fn(mem, smem, pagesize); munmap: diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c index b0d42eb04e3a..8dd81c0a4a5a 100644 --- a/tools/testing/selftests/mm/guard-regions.c +++ b/tools/testing/selftests/mm/guard-regions.c @@ -145,7 +145,7 @@ static bool try_access_buf(char *ptr, bool write) if (write) *ptr = 'x'; else - FORCE_READ(ptr); + FORCE_READ(*ptr); } signal_jump_set = false; diff --git a/tools/testing/selftests/mm/hugetlb-madvise.c b/tools/testing/selftests/mm/hugetlb-madvise.c index 1afe14b9dc0c..c5940c0595be 100644 --- a/tools/testing/selftests/mm/hugetlb-madvise.c +++ b/tools/testing/selftests/mm/hugetlb-madvise.c @@ -50,8 +50,10 @@ void read_fault_pages(void *addr, unsigned long nr_pages) unsigned long i; for (i = 0; i < nr_pages; i++) { + unsigned long *addr2 = + ((unsigned long *)(addr + (i * huge_page_size))); /* Prevent the compiler from optimizing out the entire loop: */ - FORCE_READ(((unsigned long *)(addr + (i * huge_page_size)))); + FORCE_READ(*addr2); } } diff --git a/tools/testing/selftests/mm/migration.c b/tools/testing/selftests/mm/migration.c index c5a73617796a..ea945eebec2f 100644 --- a/tools/testing/selftests/mm/migration.c +++ b/tools/testing/selftests/mm/migration.c @@ -110,7 +110,7 @@ void *access_mem(void *ptr) * the memory access actually happens and prevents the compiler * from optimizing away this entire loop. */ - FORCE_READ((uint64_t *)ptr); + FORCE_READ(*(uint64_t *)ptr); } return NULL; diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index 0d4209eef0c3..e6face7c0166 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -1525,7 +1525,7 @@ void zeropfn_tests(void) ret = madvise(mem, hpage_size, MADV_HUGEPAGE); if (!ret) { - FORCE_READ(mem); + FORCE_READ(*mem); ret = pagemap_ioctl(mem, hpage_size, &vec, 1, 0, 0, PAGE_IS_PFNZERO, 0, 0, PAGE_IS_PFNZERO); diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index 05de1fc0005b..44a3f8a58806 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -439,8 +439,11 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, } madvise(*addr, fd_size, MADV_HUGEPAGE); - for (size_t i = 0; i < fd_size; i++) - FORCE_READ((*addr + i)); + for (size_t i = 0; i < fd_size; i++) { + char *addr2 = *addr + i; + + FORCE_READ(*addr2); + } if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) { ksft_print_msg("No large pagecache folio generated, please provide a filesystem supporting large folio\n"); diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index c20298ae98ea..b55d1809debc 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -23,7 +23,7 @@ * anything with it in order to trigger a read page fault. We therefore must use * volatile to stop the compiler from optimising this away. */ -#define FORCE_READ(x) (*(volatile typeof(x) *)x) +#define FORCE_READ(x) (*(const volatile typeof(x) *)&(x)) extern unsigned int __page_size; extern unsigned int __page_shift; -- cgit v1.2.3 From 9b2785ea8592f239836405de023c75c4f3f5ce00 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 27 Aug 2025 20:16:00 +0800 Subject: ublk selftests: add --no_ublk_fixed_fd for not using registered ublk char device Add a new command line option --no_ublk_fixed_fd that excludes the ublk control device (/dev/ublkcN) from io_uring's registered files array. When this option is used, only backing files are registered starting from index 1, while the ublk control device is accessed using its raw file descriptor. Add ublk_get_registered_fd() helper function that returns the appropriate file descriptor for use with io_uring operations. Key optimizations implemented: - Cache UBLKS_Q_NO_UBLK_FIXED_FD flag in ublk_queue.flags to avoid reading dev->no_ublk_fixed_fd in fast path - Cache ublk char device fd in ublk_queue.ublk_fd for fast access - Update ublk_get_registered_fd() to use ublk_queue * parameter - Update io_uring_prep_buf_register/unregister() to use ublk_queue * - Replace ublk_device * access with ublk_queue * access in fast paths Also pass --no_ublk_fixed_fd to test_stress_04.sh for covering plain ublk char device mode. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250827121602.2619736-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/file_backed.c | 10 +++--- tools/testing/selftests/ublk/kublk.c | 38 ++++++++++++++++++---- tools/testing/selftests/ublk/kublk.h | 45 ++++++++++++++++++-------- tools/testing/selftests/ublk/null.c | 4 +-- tools/testing/selftests/ublk/stripe.c | 4 +-- tools/testing/selftests/ublk/test_stress_04.sh | 6 ++-- 6 files changed, 74 insertions(+), 33 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/ublk/file_backed.c b/tools/testing/selftests/ublk/file_backed.c index 2d93ac860bd5..cd9fe69ecce2 100644 --- a/tools/testing/selftests/ublk/file_backed.c +++ b/tools/testing/selftests/ublk/file_backed.c @@ -20,7 +20,7 @@ static int loop_queue_flush_io(struct ublk_thread *t, struct ublk_queue *q, struct io_uring_sqe *sqe[1]; ublk_io_alloc_sqes(t, sqe, 1); - io_uring_prep_fsync(sqe[0], 1 /*fds[1]*/, IORING_FSYNC_DATASYNC); + io_uring_prep_fsync(sqe[0], ublk_get_registered_fd(q, 1) /*fds[1]*/, IORING_FSYNC_DATASYNC); io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE); /* bit63 marks us as tgt io */ sqe[0]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1); @@ -42,7 +42,7 @@ static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, if (!sqe[0]) return -ENOMEM; - io_uring_prep_rw(op, sqe[0], 1 /*fds[1]*/, + io_uring_prep_rw(op, sqe[0], ublk_get_registered_fd(q, 1) /*fds[1]*/, addr, iod->nr_sectors << 9, iod->start_sector << 9); @@ -56,19 +56,19 @@ static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, ublk_io_alloc_sqes(t, sqe, 3); - io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index); + io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index); sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; sqe[0]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1); - io_uring_prep_rw(op, sqe[1], 1 /*fds[1]*/, 0, + io_uring_prep_rw(op, sqe[1], ublk_get_registered_fd(q, 1) /*fds[1]*/, 0, iod->nr_sectors << 9, iod->start_sector << 9); sqe[1]->buf_index = tag; sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK; sqe[1]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1); - io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index); + io_uring_prep_buf_unregister(sqe[2], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index); sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, q->q_id, 1); return 2; diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 6512dfbdbce3..b71faba86c3b 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -432,7 +432,7 @@ static void ublk_thread_deinit(struct ublk_thread *t) } } -static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags) +static int ublk_queue_init(struct ublk_queue *q, unsigned long long extra_flags) { struct ublk_dev *dev = q->dev; int depth = dev->dev_info.queue_depth; @@ -446,6 +446,9 @@ static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags) q->flags = dev->dev_info.flags; q->flags |= extra_flags; + /* Cache fd in queue for fast path access */ + q->ublk_fd = dev->fds[0]; + cmd_buf_size = ublk_queue_cmd_buf_sz(q); off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz(); q->io_cmd_buf = mmap(0, cmd_buf_size, PROT_READ, @@ -481,9 +484,10 @@ static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags) return -ENOMEM; } -static int ublk_thread_init(struct ublk_thread *t) +static int ublk_thread_init(struct ublk_thread *t, unsigned long long extra_flags) { struct ublk_dev *dev = t->dev; + unsigned long long flags = dev->dev_info.flags | extra_flags; int ring_depth = dev->tgt.sq_depth, cq_depth = dev->tgt.cq_depth; int ret; @@ -512,7 +516,17 @@ static int ublk_thread_init(struct ublk_thread *t) io_uring_register_ring_fd(&t->ring); - ret = io_uring_register_files(&t->ring, dev->fds, dev->nr_fds); + if (flags & UBLKS_Q_NO_UBLK_FIXED_FD) { + /* Register only backing files starting from index 1, exclude ublk control device */ + if (dev->nr_fds > 1) { + ret = io_uring_register_files(&t->ring, &dev->fds[1], dev->nr_fds - 1); + } else { + /* No backing files to register, skip file registration */ + ret = 0; + } + } else { + ret = io_uring_register_files(&t->ring, dev->fds, dev->nr_fds); + } if (ret) { ublk_err("ublk dev %d thread %d register files failed %d\n", t->dev->dev_info.dev_id, t->idx, ret); @@ -626,9 +640,12 @@ int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io) /* These fields should be written once, never change */ ublk_set_sqe_cmd_op(sqe[0], cmd_op); - sqe[0]->fd = 0; /* dev->fds[0] */ + sqe[0]->fd = ublk_get_registered_fd(q, 0); /* dev->fds[0] */ sqe[0]->opcode = IORING_OP_URING_CMD; - sqe[0]->flags = IOSQE_FIXED_FILE; + if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) + sqe[0]->flags = 0; /* Use raw FD, not fixed file */ + else + sqe[0]->flags = IOSQE_FIXED_FILE; sqe[0]->rw_flags = 0; cmd->tag = io->tag; cmd->q_id = q->q_id; @@ -832,6 +849,7 @@ struct ublk_thread_info { unsigned idx; sem_t *ready; cpu_set_t *affinity; + unsigned long long extra_flags; }; static void *ublk_io_handler_fn(void *data) @@ -844,7 +862,7 @@ static void *ublk_io_handler_fn(void *data) t->dev = info->dev; t->idx = info->idx; - ret = ublk_thread_init(t); + ret = ublk_thread_init(t, info->extra_flags); if (ret) { ublk_err("ublk dev %d thread %u init failed\n", dev_id, t->idx); @@ -934,6 +952,8 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) if (ctx->auto_zc_fallback) extra_flags = UBLKS_Q_AUTO_BUF_REG_FALLBACK; + if (ctx->no_ublk_fixed_fd) + extra_flags |= UBLKS_Q_NO_UBLK_FIXED_FD; for (i = 0; i < dinfo->nr_hw_queues; i++) { dev->q[i].dev = dev; @@ -951,6 +971,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) tinfo[i].dev = dev; tinfo[i].idx = i; tinfo[i].ready = &ready; + tinfo[i].extra_flags = extra_flags; /* * If threads are not tied 1:1 to queues, setting thread @@ -1471,7 +1492,7 @@ static void __cmd_create_help(char *exe, bool recovery) printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n", exe, recovery ? "recover" : "add"); printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--auto_zc_fallback] [--debug_mask mask] [-r 0|1 ] [-g]\n"); - printf("\t[-e 0|1 ] [-i 0|1]\n"); + printf("\t[-e 0|1 ] [-i 0|1] [--no_ublk_fixed_fd]\n"); printf("\t[--nthreads threads] [--per_io_tasks]\n"); printf("\t[target options] [backfile1] [backfile2] ...\n"); printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); @@ -1534,6 +1555,7 @@ int main(int argc, char *argv[]) { "size", 1, NULL, 's'}, { "nthreads", 1, NULL, 0 }, { "per_io_tasks", 0, NULL, 0 }, + { "no_ublk_fixed_fd", 0, NULL, 0 }, { 0, 0, 0, 0 } }; const struct ublk_tgt_ops *ops = NULL; @@ -1613,6 +1635,8 @@ int main(int argc, char *argv[]) ctx.nthreads = strtol(optarg, NULL, 10); if (!strcmp(longopts[option_idx].name, "per_io_tasks")) ctx.per_io_tasks = 1; + if (!strcmp(longopts[option_idx].name, "no_ublk_fixed_fd")) + ctx.no_ublk_fixed_fd = 1; break; case '?': /* diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 219233f8a053..5e55484fb0aa 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -77,6 +77,7 @@ struct dev_ctx { unsigned int recovery:1; unsigned int auto_zc_fallback:1; unsigned int per_io_tasks:1; + unsigned int no_ublk_fixed_fd:1; int _evtfd; int _shmid; @@ -166,7 +167,9 @@ struct ublk_queue { /* borrow one bit of ublk uapi flags, which may never be used */ #define UBLKS_Q_AUTO_BUF_REG_FALLBACK (1ULL << 63) +#define UBLKS_Q_NO_UBLK_FIXED_FD (1ULL << 62) __u64 flags; + int ublk_fd; /* cached ublk char device fd */ struct ublk_io ios[UBLK_QUEUE_DEPTH]; }; @@ -273,34 +276,48 @@ static inline int ublk_io_alloc_sqes(struct ublk_thread *t, return nr_sqes; } -static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe, - int dev_fd, int tag, int q_id, __u64 index) +static inline int ublk_get_registered_fd(struct ublk_queue *q, int fd_index) +{ + if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) { + if (fd_index == 0) + /* Return the raw ublk FD for index 0 */ + return q->ublk_fd; + /* Adjust index for backing files (index 1 becomes 0, etc.) */ + return fd_index - 1; + } + return fd_index; +} + +static inline void __io_uring_prep_buf_reg_unreg(struct io_uring_sqe *sqe, + struct ublk_queue *q, int tag, int q_id, __u64 index) { struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd; + int dev_fd = ublk_get_registered_fd(q, 0); io_uring_prep_read(sqe, dev_fd, 0, 0, 0); sqe->opcode = IORING_OP_URING_CMD; - sqe->flags |= IOSQE_FIXED_FILE; - sqe->cmd_op = UBLK_U_IO_REGISTER_IO_BUF; + if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) + sqe->flags &= ~IOSQE_FIXED_FILE; + else + sqe->flags |= IOSQE_FIXED_FILE; cmd->tag = tag; cmd->addr = index; cmd->q_id = q_id; } -static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe, - int dev_fd, int tag, int q_id, __u64 index) +static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe, + struct ublk_queue *q, int tag, int q_id, __u64 index) { - struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd; + __io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index); + sqe->cmd_op = UBLK_U_IO_REGISTER_IO_BUF; +} - io_uring_prep_read(sqe, dev_fd, 0, 0, 0); - sqe->opcode = IORING_OP_URING_CMD; - sqe->flags |= IOSQE_FIXED_FILE; +static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe, + struct ublk_queue *q, int tag, int q_id, __u64 index) +{ + __io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index); sqe->cmd_op = UBLK_U_IO_UNREGISTER_IO_BUF; - - cmd->tag = tag; - cmd->addr = index; - cmd->q_id = q_id; } static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe) diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c index f0e0003a4860..280043f6b689 100644 --- a/tools/testing/selftests/ublk/null.c +++ b/tools/testing/selftests/ublk/null.c @@ -63,7 +63,7 @@ static int null_queue_zc_io(struct ublk_thread *t, struct ublk_queue *q, ublk_io_alloc_sqes(t, sqe, 3); - io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index); + io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index); sqe[0]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1); sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; @@ -71,7 +71,7 @@ static int null_queue_zc_io(struct ublk_thread *t, struct ublk_queue *q, __setup_nop_io(tag, iod, sqe[1], q->q_id); sqe[1]->flags |= IOSQE_IO_HARDLINK; - io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index); + io_uring_prep_buf_unregister(sqe[2], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index); sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, q->q_id, 1); // buf register is marked as IOSQE_CQE_SKIP_SUCCESS diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c index 1fb9b7cc281b..791fa8dc1651 100644 --- a/tools/testing/selftests/ublk/stripe.c +++ b/tools/testing/selftests/ublk/stripe.c @@ -142,7 +142,7 @@ static int stripe_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, ublk_io_alloc_sqes(t, sqe, s->nr + extra); if (zc) { - io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, io->buf_index); + io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, io->buf_index); sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; sqe[0]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1); @@ -168,7 +168,7 @@ static int stripe_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, if (zc) { struct io_uring_sqe *unreg = sqe[s->nr + 1]; - io_uring_prep_buf_unregister(unreg, 0, tag, q->q_id, io->buf_index); + io_uring_prep_buf_unregister(unreg, q, tag, q->q_id, io->buf_index); unreg->user_data = build_user_data( tag, ublk_cmd_op_nr(unreg->cmd_op), 0, q->q_id, 1); } diff --git a/tools/testing/selftests/ublk/test_stress_04.sh b/tools/testing/selftests/ublk/test_stress_04.sh index 40d1437ca298..3f901db4d09d 100755 --- a/tools/testing/selftests/ublk/test_stress_04.sh +++ b/tools/testing/selftests/ublk/test_stress_04.sh @@ -28,14 +28,14 @@ _create_backfile 0 256M _create_backfile 1 128M _create_backfile 2 128M -ublk_io_and_kill_daemon 8G -t null -q 4 -z & -ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" & +ublk_io_and_kill_daemon 8G -t null -q 4 -z --no_ublk_fixed_fd & +ublk_io_and_kill_daemon 256M -t loop -q 4 -z --no_ublk_fixed_fd "${UBLK_BACKFILES[0]}" & ublk_io_and_kill_daemon 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & if _have_feature "AUTO_BUF_REG"; then ublk_io_and_kill_daemon 8G -t null -q 4 --auto_zc & ublk_io_and_kill_daemon 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" & - ublk_io_and_kill_daemon 256M -t stripe -q 4 --auto_zc "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & + ublk_io_and_kill_daemon 256M -t stripe -q 4 --auto_zc --no_ublk_fixed_fd "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & ublk_io_and_kill_daemon 8G -t null -q 4 -z --auto_zc --auto_zc_fallback & fi -- cgit v1.2.3 From d6a367ec6c96fc8e61b4d67e69df03565ec69fb7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 28 Aug 2025 23:49:18 +0200 Subject: netfilter: nft_flowtable.sh: re-run with random mtu sizes Jakub says: nft_flowtable.sh is one of the most flake-atious test for netdev CI currently :( The root cause is two-fold: 1. the failing part of the test is supposed to make sure that ip fragments are forwarded for offloaded flows. (flowtable has to pass them to classic forward path). path mtu discovery for these subtests is disabled. 2. nft_flowtable.sh has two passes. One with fixed mtus/file size and one where link mtus and file sizes are random. The CI failures all have same pattern: re-run with random mtus and file size: -o 27663 -l 4117 -r 10089 -s 54384840 [..] PASS: dscp_egress: dscp packet counters match FAIL: file mismatch for ns1 -> ns2 In some cases this error triggers a bit ealier, sometimes in a later subtest: re-run with random mtus and file size: -o 20201 -l 4555 -r 12657 -s 9405856 [..] PASS: dscp_egress: dscp packet counters match PASS: dscp_fwd: dscp packet counters match 2025/08/17 20:37:52 socat[18954] E write(7, 0x560716b96000, 8192): Broken pipe FAIL: file mismatch for ns1 -> ns2 -rw------- 1 root root 9405856 Aug 17 20:36 /tmp/tmp.2n63vlTrQe But all logs I saw show same scenario: 1. Failing tests have pmtu discovery off (i.e., ip fragmentation) 2. The test file is much larger than first-pass default (2M Byte) 3. peers have much larger MTUs compared to the 'network'. These errors are very reproducible when re-running the test with the same commandline arguments. The timeout became much more prominent with 1d2fbaad7cd8 ("tcp: stronger sk_rcvbuf checks"): reassembled packets typically have a skb->truesize more than double the skb length. As that commit is intentional and pmtud-off with large-tcp-packets-as-fragments is not normal adjust the test to use a smaller file for the pmtu-off subtests. While at it, add more information to pass/fail messages and also run the dscp alteration subtest with pmtu discovery enabled. Link: https://netdev.bots.linux.dev/contest.html?test=nft-flowtable-sh Fixes: f84ab634904c ("selftests: netfilter: nft_flowtable.sh: re-run with random mtu sizes") Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/netdev/20250822071330.4168f0db@kernel.org/ Signed-off-by: Florian Westphal Link: https://patch.msgid.link/20250828214918.3385-1-fw@strlen.de Signed-off-by: Jakub Kicinski --- .../selftests/net/netfilter/nft_flowtable.sh | 113 ++++++++++++++------- 1 file changed, 76 insertions(+), 37 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh index a4ee5496f2a1..45832df98295 100755 --- a/tools/testing/selftests/net/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh @@ -20,6 +20,7 @@ ret=0 SOCAT_TIMEOUT=60 nsin="" +nsin_small="" ns1out="" ns2out="" @@ -36,7 +37,7 @@ cleanup() { cleanup_all_ns - rm -f "$nsin" "$ns1out" "$ns2out" + rm -f "$nsin" "$nsin_small" "$ns1out" "$ns2out" [ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns="$log_netns" } @@ -72,6 +73,7 @@ lmtu=1500 rmtu=2000 filesize=$((2 * 1024 * 1024)) +filesize_small=$((filesize / 16)) usage(){ echo "nft_flowtable.sh [OPTIONS]" @@ -89,7 +91,10 @@ do o) omtu=$OPTARG;; l) lmtu=$OPTARG;; r) rmtu=$OPTARG;; - s) filesize=$OPTARG;; + s) + filesize=$OPTARG + filesize_small=$((OPTARG / 16)) + ;; *) usage;; esac done @@ -215,6 +220,7 @@ if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then fi nsin=$(mktemp) +nsin_small=$(mktemp) ns1out=$(mktemp) ns2out=$(mktemp) @@ -265,6 +271,7 @@ check_counters() check_dscp() { local what=$1 + local pmtud="$2" local ok=1 local counter @@ -277,37 +284,39 @@ check_dscp() local pc4z=${counter%*bytes*} local pc4z=${pc4z#*packets} + local failmsg="FAIL: pmtu $pmtu: $what counters do not match, expected" + case "$what" in "dscp_none") if [ "$pc4" -gt 0 ] || [ "$pc4z" -eq 0 ]; then - echo "FAIL: dscp counters do not match, expected dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2 + echo "$failmsg dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; "dscp_fwd") if [ "$pc4" -eq 0 ] || [ "$pc4z" -eq 0 ]; then - echo "FAIL: dscp counters do not match, expected dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2 + echo "$failmsg dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; "dscp_ingress") if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then - echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 + echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; "dscp_egress") if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then - echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 + echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; *) - echo "FAIL: Unknown DSCP check" 1>&2 + echo "$failmsg: Unknown DSCP check" 1>&2 ret=1 ok=0 esac @@ -319,9 +328,9 @@ check_dscp() check_transfer() { - in=$1 - out=$2 - what=$3 + local in=$1 + local out=$2 + local what=$3 if ! cmp "$in" "$out" > /dev/null 2>&1; then echo "FAIL: file mismatch for $what" 1>&2 @@ -342,25 +351,39 @@ test_tcp_forwarding_ip() { local nsa=$1 local nsb=$2 - local dstip=$3 - local dstport=$4 + local pmtu=$3 + local dstip=$4 + local dstport=$5 local lret=0 + local socatc + local socatl + local infile="$nsin" + + if [ $pmtu -eq 0 ]; then + infile="$nsin_small" + fi - timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$nsin" > "$ns2out" & + timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$infile" > "$ns2out" & lpid=$! busywait 1000 listener_ready - timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$nsin" > "$ns1out" + timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$infile" > "$ns1out" + socatc=$? wait $lpid + socatl=$? - if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then + if [ $socatl -ne 0 ] || [ $socatc -ne 0 ];then + rc=1 + fi + + if ! check_transfer "$infile" "$ns2out" "ns1 -> ns2"; then lret=1 ret=1 fi - if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then + if ! check_transfer "$infile" "$ns1out" "ns1 <- ns2"; then lret=1 ret=1 fi @@ -370,14 +393,16 @@ test_tcp_forwarding_ip() test_tcp_forwarding() { - test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 + local pmtu="$3" + + test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345 return $? } test_tcp_forwarding_set_dscp() { - check_dscp "dscp_none" + local pmtu="$3" ip netns exec "$nsr1" nft -f - <&2 @@ -489,8 +519,9 @@ table ip nat { } EOF +check_dscp "dscp_none" "0" if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 ""; then - echo "FAIL: flow offload for ns1/ns2 with dscp update" 1>&2 + echo "FAIL: flow offload for ns1/ns2 with dscp update and no pmtu discovery" 1>&2 exit 0 fi @@ -512,6 +543,14 @@ ip netns exec "$ns2" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null # are lower than file size and packets were forwarded via flowtable layer. # For earlier tests (large mtus), packets cannot be handled via flowtable # (except pure acks and other small packets). +ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null +ip netns exec "$ns2" nft reset counters table inet filter >/dev/null + +if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 1 ""; then + echo "FAIL: flow offload for ns1/ns2 with dscp update and pmtu discovery" 1>&2 + exit 0 +fi + ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 ""; then @@ -644,7 +683,7 @@ ip -net "$ns2" route del 192.168.10.1 via 10.0.2.1 ip -net "$ns2" route add default via 10.0.2.1 ip -net "$ns2" route add default via dead:2::1 -if test_tcp_forwarding "$ns1" "$ns2"; then +if test_tcp_forwarding "$ns1" "$ns2" 1; then check_counters "ipsec tunnel mode for ns1/ns2" else echo "FAIL: ipsec tunnel mode for ns1/ns2" @@ -668,7 +707,7 @@ if [ "$1" = "" ]; then fi echo "re-run with random mtus and file size: -o $o -l $l -r $r -s $filesize" - $0 -o "$o" -l "$l" -r "$r" -s "$filesize" + $0 -o "$o" -l "$l" -r "$r" -s "$filesize" || ret=1 fi exit $ret -- cgit v1.2.3 From d82aa5d3501b25bfb7bc2a24a68ad0a83b2ad10b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 12 Aug 2025 15:49:27 +0100 Subject: kselftest/arm64: Don't open code SVE_PT_SIZE() in fp-ptrace In fp-trace when allocating a buffer to write SVE register data we open code the addition of the header size to the VL depeendent register data size, which lead to an underallocation bug when we cut'n'pasted the code for FPSIMD format writes. Use the SVE_PT_SIZE() macro that the kernel UAPI provides for this. Fixes: b84d2b27954f ("kselftest/arm64: Test FPSIMD format data writes via NT_ARM_SVE in fp-ptrace") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250812-arm64-fp-trace-macro-v1-1-317cfff986a5@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/fp-ptrace.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c index 124bc883365e..cdd7a45c045d 100644 --- a/tools/testing/selftests/arm64/fp/fp-ptrace.c +++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c @@ -1187,7 +1187,7 @@ static void sve_write_sve(pid_t child, struct test_config *config) if (!vl) return; - iov.iov_len = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE); + iov.iov_len = SVE_PT_SIZE(vq, SVE_PT_REGS_SVE); iov.iov_base = malloc(iov.iov_len); if (!iov.iov_base) { ksft_print_msg("Failed allocating %lu byte SVE write buffer\n", @@ -1234,8 +1234,7 @@ static void sve_write_fpsimd(pid_t child, struct test_config *config) if (!vl) return; - iov.iov_len = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, - SVE_PT_REGS_FPSIMD); + iov.iov_len = SVE_PT_SIZE(vq, SVE_PT_REGS_FPSIMD); iov.iov_base = malloc(iov.iov_len); if (!iov.iov_base) { ksft_print_msg("Failed allocating %lu byte SVE write buffer\n", -- cgit v1.2.3 From 49c2502b5946ebf454d7e16fd0189769a82b6117 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 30 Aug 2025 11:38:42 -0700 Subject: selftests: drv-net: csum: fix interface name for remote host Use cfg.remote_ifname for arguments of remote command. Without this UDP tests fail in NIPA where local interface is called enp1s0 and remote enp0s4. Fixes: 1d0dc857b5d8 ("selftests: drv-net: add checksum tests") Reviewed-by: Willem de Bruijn Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250830183842.688935-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/csum.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py index cd23af875317..3e3a89a34afe 100755 --- a/tools/testing/selftests/drivers/net/hw/csum.py +++ b/tools/testing/selftests/drivers/net/hw/csum.py @@ -17,7 +17,7 @@ def test_receive(cfg, ipver="6", extra_args=None): ip_args = f"-{ipver} -S {cfg.remote_addr_v[ipver]} -D {cfg.addr_v[ipver]}" rx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -n 100 {ip_args} -r 1 -R {extra_args}" - tx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -n 100 {ip_args} -r 1 -T {extra_args}" + tx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -n 100 {ip_args} -r 1 -T {extra_args}" with bkg(rx_cmd, exit_wait=True): wait_port_listen(34000, proto="udp") @@ -37,7 +37,7 @@ def test_transmit(cfg, ipver="6", extra_args=None): if extra_args != "-U -Z": extra_args += " -r 1" - rx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -L 1 -n 100 {ip_args} -R {extra_args}" + rx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -L 1 -n 100 {ip_args} -R {extra_args}" tx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -L 1 -n 100 {ip_args} -T {extra_args}" with bkg(rx_cmd, host=cfg.remote, exit_wait=True): -- cgit v1.2.3 From e51bd0e595476c1527bb0b4def095a6fd16b2563 Mon Sep 17 00:00:00 2001 From: Xing Guo Date: Wed, 13 Aug 2025 11:16:47 +0800 Subject: selftests/fs/mount-notify: Fix compilation failure. Commit c6d9775c2066 ("selftests/fs/mount-notify: build with tools include dir") introduces the struct __kernel_fsid_t to decouple dependency with headers_install. The commit forgets to define a macro for __kernel_fsid_t and it will cause type re-definition issue. Signed-off-by: Xing Guo Link: https://lore.kernel.org/20250813031647.96411-1-higuoxing@gmail.com Acked-by: Amir Goldstein Closes: https://lore.kernel.org/oe-lkp/202508110628.65069d92-lkp@intel.com Signed-off-by: Christian Brauner --- .../filesystems/mount-notify/mount-notify_test.c | 17 ++++++++--------- .../filesystems/mount-notify/mount-notify_test_ns.c | 18 ++++++++---------- 2 files changed, 16 insertions(+), 19 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c index 63ce708d93ed..e4b7c2b457ee 100644 --- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c +++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c @@ -2,6 +2,13 @@ // Copyright (c) 2025 Miklos Szeredi #define _GNU_SOURCE + +// Needed for linux/fanotify.h +typedef struct { + int val[2]; +} __kernel_fsid_t; +#define __kernel_fsid_t __kernel_fsid_t + #include #include #include @@ -10,20 +17,12 @@ #include #include #include +#include #include "../../kselftest_harness.h" #include "../statmount/statmount.h" #include "../utils.h" -// Needed for linux/fanotify.h -#ifndef __kernel_fsid_t -typedef struct { - int val[2]; -} __kernel_fsid_t; -#endif - -#include - static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX"; static const int mark_cmds[] = { diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c index 090a5ca65004..9f57ca46e3af 100644 --- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c +++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c @@ -2,6 +2,13 @@ // Copyright (c) 2025 Miklos Szeredi #define _GNU_SOURCE + +// Needed for linux/fanotify.h +typedef struct { + int val[2]; +} __kernel_fsid_t; +#define __kernel_fsid_t __kernel_fsid_t + #include #include #include @@ -10,21 +17,12 @@ #include #include #include +#include #include "../../kselftest_harness.h" -#include "../../pidfd/pidfd.h" #include "../statmount/statmount.h" #include "../utils.h" -// Needed for linux/fanotify.h -#ifndef __kernel_fsid_t -typedef struct { - int val[2]; -} __kernel_fsid_t; -#endif - -#include - static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX"; static const int mark_types[] = { -- cgit v1.2.3 From 2c9fb925c2ccc6ee475134840cff6c6b73851730 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 1 Sep 2025 09:50:35 +0300 Subject: selftests: net: Add a selftest for VXLAN with FDB nexthop groups Add test cases for VXLAN with FDB nexthop groups, testing both IPv4 and IPv6. Test basic Tx functionality as well as some corner cases. Example output: # ./test_vxlan_nh.sh TEST: VXLAN FDB nexthop: IPv4 basic Tx [ OK ] TEST: VXLAN FDB nexthop: IPv6 basic Tx [ OK ] TEST: VXLAN FDB nexthop: learning [ OK ] TEST: VXLAN FDB nexthop: IPv4 proxy [ OK ] TEST: VXLAN FDB nexthop: IPv6 proxy [ OK ] Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20250901065035.159644-4-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/test_vxlan_nh.sh | 223 +++++++++++++++++++++++++++ 2 files changed, 224 insertions(+) create mode 100755 tools/testing/selftests/net/test_vxlan_nh.sh (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index b31a71f2b372..c7e03e1d6f63 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -99,6 +99,7 @@ TEST_GEN_PROGS += bind_wildcard TEST_GEN_PROGS += bind_timewait TEST_PROGS += test_vxlan_mdb.sh TEST_PROGS += test_bridge_neigh_suppress.sh +TEST_PROGS += test_vxlan_nh.sh TEST_PROGS += test_vxlan_nolocalbypass.sh TEST_PROGS += test_bridge_backup_port.sh TEST_PROGS += test_neigh.sh diff --git a/tools/testing/selftests/net/test_vxlan_nh.sh b/tools/testing/selftests/net/test_vxlan_nh.sh new file mode 100755 index 000000000000..20f3369f776b --- /dev/null +++ b/tools/testing/selftests/net/test_vxlan_nh.sh @@ -0,0 +1,223 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh +TESTS=" + basic_tx_ipv4 + basic_tx_ipv6 + learning + proxy_ipv4 + proxy_ipv6 +" +VERBOSE=0 + +################################################################################ +# Utilities + +run_cmd() +{ + local cmd="$1" + local out + local stderr="2>/dev/null" + + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: $cmd" + stderr= + fi + + out=$(eval "$cmd" "$stderr") + rc=$? + if [ "$VERBOSE" -eq 1 ] && [ -n "$out" ]; then + echo " $out" + fi + + return $rc +} + +################################################################################ +# Cleanup + +exit_cleanup_all() +{ + cleanup_all_ns + exit "${EXIT_STATUS}" +} + +################################################################################ +# Tests + +nh_stats_get() +{ + ip -n "$ns1" -s -j nexthop show id 10 | jq ".[][\"group_stats\"][][\"packets\"]" +} + +tc_stats_get() +{ + tc_rule_handle_stats_get "dev dummy1 egress" 101 ".packets" "-n $ns1" +} + +basic_tx_common() +{ + local af_str=$1; shift + local proto=$1; shift + local local_addr=$1; shift + local plen=$1; shift + local remote_addr=$1; shift + + RET=0 + + # Test basic Tx functionality. Check that stats are incremented on + # both the FDB nexthop group and the egress device. + + run_cmd "ip -n $ns1 link add name dummy1 up type dummy" + run_cmd "ip -n $ns1 route add $remote_addr/$plen dev dummy1" + run_cmd "tc -n $ns1 qdisc add dev dummy1 clsact" + run_cmd "tc -n $ns1 filter add dev dummy1 egress proto $proto pref 1 handle 101 flower ip_proto udp dst_ip $remote_addr dst_port 4789 action pass" + + run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo" + + run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb" + run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb" + + run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789" + run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10" + + run_cmd "ip netns exec $ns1 mausezahn vx0 -a own -b 00:11:22:33:44:55 -c 1 -q" + + busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" nh_stats_get > /dev/null + check_err $? "FDB nexthop group stats did not increase" + + busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" tc_stats_get > /dev/null + check_err $? "tc filter stats did not increase" + + log_test "VXLAN FDB nexthop: $af_str basic Tx" +} + +basic_tx_ipv4() +{ + basic_tx_common "IPv4" ipv4 192.0.2.1 32 192.0.2.2 +} + +basic_tx_ipv6() +{ + basic_tx_common "IPv6" ipv6 2001:db8:1::1 128 2001:db8:1::2 +} + +learning() +{ + RET=0 + + # When learning is enabled on the VXLAN device, an incoming packet + # might try to refresh an FDB entry that points to an FDB nexthop group + # instead of an ordinary remote destination. Check that the kernel does + # not crash in this situation. + + run_cmd "ip -n $ns1 address add 192.0.2.1/32 dev lo" + run_cmd "ip -n $ns1 address add 192.0.2.2/32 dev lo" + + run_cmd "ip -n $ns1 nexthop add id 1 via 192.0.2.3 fdb" + run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb" + + run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local 192.0.2.1 dstport 12345 localbypass" + run_cmd "ip -n $ns1 link add name vx1 up type vxlan id 10020 local 192.0.2.2 dstport 54321 learning" + + run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static dst 192.0.2.2 port 54321 vni 10020" + run_cmd "bridge -n $ns1 fdb add 00:aa:bb:cc:dd:ee dev vx1 self static nhid 10" + + run_cmd "ip netns exec $ns1 mausezahn vx0 -a 00:aa:bb:cc:dd:ee -b 00:11:22:33:44:55 -c 1 -q" + + log_test "VXLAN FDB nexthop: learning" +} + +proxy_common() +{ + local af_str=$1; shift + local local_addr=$1; shift + local plen=$1; shift + local remote_addr=$1; shift + local neigh_addr=$1; shift + local ping_cmd=$1; shift + + RET=0 + + # When the "proxy" option is enabled on the VXLAN device, the device + # will suppress ARP requests and IPv6 Neighbor Solicitation messages if + # it is able to reply on behalf of the remote host. That is, if a + # matching and valid neighbor entry is configured on the VXLAN device + # whose MAC address is not behind the "any" remote (0.0.0.0 / ::). The + # FDB entry for the neighbor's MAC address might point to an FDB + # nexthop group instead of an ordinary remote destination. Check that + # the kernel does not crash in this situation. + + run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo" + + run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb" + run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb" + + run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789 proxy" + + run_cmd "ip -n $ns1 neigh add $neigh_addr lladdr 00:11:22:33:44:55 nud perm dev vx0" + + run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10" + + run_cmd "ip netns exec $ns1 $ping_cmd" + + log_test "VXLAN FDB nexthop: $af_str proxy" +} + +proxy_ipv4() +{ + proxy_common "IPv4" 192.0.2.1 32 192.0.2.2 192.0.2.3 \ + "arping -b -c 1 -s 192.0.2.1 -I vx0 192.0.2.3" +} + +proxy_ipv6() +{ + proxy_common "IPv6" 2001:db8:1::1 128 2001:db8:1::2 2001:db8:1::3 \ + "ndisc6 -r 1 -s 2001:db8:1::1 -w 1 2001:db8:1::3 vx0" +} + +################################################################################ +# Usage + +usage() +{ + cat < Test(s) to run (default: all) + (options: $TESTS) + -p Pause on fail + -v Verbose mode (show commands and output) +EOF +} + +################################################################################ +# Main + +while getopts ":t:pvh" opt; do + case $opt in + t) TESTS=$OPTARG;; + p) PAUSE_ON_FAIL=yes;; + v) VERBOSE=$((VERBOSE + 1));; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +require_command mausezahn +require_command arping +require_command ndisc6 +require_command jq + +if ! ip nexthop help 2>&1 | grep -q "stats"; then + echo "SKIP: iproute2 ip too old, missing nexthop stats support" + exit "$ksft_skip" +fi + +trap exit_cleanup_all EXIT + +for t in $TESTS +do + setup_ns ns1; $t; cleanup_all_ns; +done -- cgit v1.2.3 From 661a4f307fe0f80c1d544e09476ccba9037e8e65 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 27 Aug 2025 19:17:32 +0200 Subject: selftests: netfilter: fix udpclash tool hang Yi Chen reports that 'udpclash' loops forever depending on compiler (and optimization level used); while (x == 1) gets optimized into for (;;). Add volatile qualifier to avoid that. While at it, also run it under timeout(1) and fix the resize script to not ignore the timeout passed as second parameter to insert_flood. Reported-by: Yi Chen Suggested-by: Yi Chen Fixes: 78a588363587 ("selftests: netfilter: add conntrack clash resolution test case") Signed-off-by: Florian Westphal --- tools/testing/selftests/net/netfilter/conntrack_clash.sh | 2 +- tools/testing/selftests/net/netfilter/conntrack_resize.sh | 5 +++-- tools/testing/selftests/net/netfilter/udpclash.c | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/netfilter/conntrack_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_clash.sh index 606a43a60f73..7fc6c5dbd551 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_clash.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_clash.sh @@ -99,7 +99,7 @@ run_one_clash_test() local entries local cre - if ! ip netns exec "$ns" ./udpclash $daddr $dport;then + if ! ip netns exec "$ns" timeout 30 ./udpclash $daddr $dport;then echo "INFO: did not receive expected number of replies for $daddr:$dport" ip netns exec "$ctns" conntrack -S # don't fail: check if clash resolution triggered after all. diff --git a/tools/testing/selftests/net/netfilter/conntrack_resize.sh b/tools/testing/selftests/net/netfilter/conntrack_resize.sh index 788cd56ea4a0..615fe3c6f405 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_resize.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_resize.sh @@ -187,7 +187,7 @@ ct_udpclash() [ -x udpclash ] || return while [ $now -lt $end ]; do - ip netns exec "$ns" ./udpclash 127.0.0.1 $((RANDOM%65536)) > /dev/null 2>&1 + ip netns exec "$ns" timeout 30 ./udpclash 127.0.0.1 $((RANDOM%65536)) > /dev/null 2>&1 now=$(date +%s) done @@ -277,6 +277,7 @@ check_taint() insert_flood() { local n="$1" + local timeout="$2" local r=0 r=$((RANDOM%$insert_count)) @@ -302,7 +303,7 @@ test_floodresize_all() read tainted_then < /proc/sys/kernel/tainted for n in "$nsclient1" "$nsclient2";do - insert_flood "$n" & + insert_flood "$n" "$timeout" & done # resize table constantly while flood/insert/dump/flushs diff --git a/tools/testing/selftests/net/netfilter/udpclash.c b/tools/testing/selftests/net/netfilter/udpclash.c index 85c7b906ad08..79de163d61ab 100644 --- a/tools/testing/selftests/net/netfilter/udpclash.c +++ b/tools/testing/selftests/net/netfilter/udpclash.c @@ -29,7 +29,7 @@ struct thread_args { int sockfd; }; -static int wait = 1; +static volatile int wait = 1; static void *thread_main(void *varg) { -- cgit v1.2.3 From fd2004d82d8d8faa94879e3de3096c8511728637 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 3 Sep 2025 22:28:51 +0000 Subject: selftest: net: Fix weird setsockopt() in bind_bhash.c. bind_bhash.c passes (SO_REUSEADDR | SO_REUSEPORT) to setsockopt(). In the asm-generic definition, the value happens to match with the bare SO_REUSEPORT, (2 | 15) == 15, but not on some arch. arch/alpha/include/uapi/asm/socket.h:18:#define SO_REUSEADDR 0x0004 arch/alpha/include/uapi/asm/socket.h:24:#define SO_REUSEPORT 0x0200 arch/mips/include/uapi/asm/socket.h:24:#define SO_REUSEADDR 0x0004 /* Allow reuse of local addresses. */ arch/mips/include/uapi/asm/socket.h:33:#define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */ arch/parisc/include/uapi/asm/socket.h:12:#define SO_REUSEADDR 0x0004 arch/parisc/include/uapi/asm/socket.h:18:#define SO_REUSEPORT 0x0200 arch/sparc/include/uapi/asm/socket.h:13:#define SO_REUSEADDR 0x0004 arch/sparc/include/uapi/asm/socket.h:20:#define SO_REUSEPORT 0x0200 include/uapi/asm-generic/socket.h:12:#define SO_REUSEADDR 2 include/uapi/asm-generic/socket.h:27:#define SO_REUSEPORT 15 Let's pass SO_REUSEPORT only. Fixes: c35ecb95c448 ("selftests/net: Add test for timing a bind request to a port with a populated bhash entry") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250903222938.2601522-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/bind_bhash.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/bind_bhash.c b/tools/testing/selftests/net/bind_bhash.c index 57ff67a3751e..da04b0b19b73 100644 --- a/tools/testing/selftests/net/bind_bhash.c +++ b/tools/testing/selftests/net/bind_bhash.c @@ -75,7 +75,7 @@ static void *setup(void *arg) int *array = (int *)arg; for (i = 0; i < MAX_CONNECTIONS; i++) { - sock_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr); + sock_fd = bind_socket(SO_REUSEPORT, setup_addr); if (sock_fd < 0) { ret = sock_fd; pthread_exit(&ret); @@ -103,7 +103,7 @@ int main(int argc, const char *argv[]) setup_addr = use_v6 ? setup_addr_v6 : setup_addr_v4; - listener_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr); + listener_fd = bind_socket(SO_REUSEPORT, setup_addr); if (listen(listener_fd, 100) < 0) { perror("listen failed"); return -1; -- cgit v1.2.3 From bf59028ea8d42e8d10bb3d847c9982488ee9e3a0 Mon Sep 17 00:00:00 2001 From: Oscar Maes Date: Tue, 2 Sep 2025 17:02:40 +0200 Subject: selftests: net: add test for destination in broadcast packets Add test to check the broadcast ethernet destination field is set correctly. This test sends a broadcast ping, captures it using tcpdump and ensures that all bits of the 6 octet ethernet destination address are correctly set by examining the output capture file. Co-developed-by: Brett A C Sheffield Signed-off-by: Brett A C Sheffield Signed-off-by: Oscar Maes Link: https://patch.msgid.link/20250902150240.4272-1-oscmaes92@gmail.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/broadcast_ether_dst.sh | 83 ++++++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100755 tools/testing/selftests/net/broadcast_ether_dst.sh (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index c7e03e1d6f63..2b31d4a93ad7 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -116,6 +116,7 @@ TEST_PROGS += skf_net_off.sh TEST_GEN_FILES += skf_net_off TEST_GEN_FILES += tfo TEST_PROGS += tfo_passive.sh +TEST_PROGS += broadcast_ether_dst.sh TEST_PROGS += broadcast_pmtu.sh TEST_PROGS += ipv6_force_forwarding.sh diff --git a/tools/testing/selftests/net/broadcast_ether_dst.sh b/tools/testing/selftests/net/broadcast_ether_dst.sh new file mode 100755 index 000000000000..334a7eca8a80 --- /dev/null +++ b/tools/testing/selftests/net/broadcast_ether_dst.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Author: Brett A C Sheffield +# Author: Oscar Maes +# +# Ensure destination ethernet field is correctly set for +# broadcast packets + +source lib.sh + +CLIENT_IP4="192.168.0.1" +GW_IP4="192.168.0.2" + +setup() { + setup_ns CLIENT_NS SERVER_NS + + ip -net "${SERVER_NS}" link add link1 type veth \ + peer name link0 netns "${CLIENT_NS}" + + ip -net "${CLIENT_NS}" link set link0 up + ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}"/24 dev link0 + + ip -net "${SERVER_NS}" link set link1 up + + ip -net "${CLIENT_NS}" route add default via "${GW_IP4}" + ip netns exec "${CLIENT_NS}" arp -s "${GW_IP4}" 00:11:22:33:44:55 +} + +cleanup() { + rm -f "${CAPFILE}" "${OUTPUT}" + ip -net "${SERVER_NS}" link del link1 + cleanup_ns "${CLIENT_NS}" "${SERVER_NS}" +} + +test_broadcast_ether_dst() { + local rc=0 + CAPFILE=$(mktemp -u cap.XXXXXXXXXX) + OUTPUT=$(mktemp -u out.XXXXXXXXXX) + + echo "Testing ethernet broadcast destination" + + # start tcpdump listening for icmp + # tcpdump will exit after receiving a single packet + # timeout will kill tcpdump if it is still running after 2s + timeout 2s ip netns exec "${CLIENT_NS}" \ + tcpdump -i link0 -c 1 -w "${CAPFILE}" icmp &> "${OUTPUT}" & + pid=$! + slowwait 1 grep -qs "listening" "${OUTPUT}" + + # send broadcast ping + ip netns exec "${CLIENT_NS}" \ + ping -W0.01 -c1 -b 255.255.255.255 &> /dev/null + + # wait for tcpdump for exit after receiving packet + wait "${pid}" + + # compare ethernet destination field to ff:ff:ff:ff:ff:ff + ether_dst=$(tcpdump -r "${CAPFILE}" -tnne 2>/dev/null | \ + awk '{sub(/,/,"",$3); print $3}') + if [[ "${ether_dst}" == "ff:ff:ff:ff:ff:ff" ]]; then + echo "[ OK ]" + rc="${ksft_pass}" + else + echo "[FAIL] expected dst ether addr to be ff:ff:ff:ff:ff:ff," \ + "got ${ether_dst}" + rc="${ksft_fail}" + fi + + return "${rc}" +} + +if [ ! -x "$(command -v tcpdump)" ]; then + echo "SKIP: Could not run test without tcpdump tool" + exit "${ksft_skip}" +fi + +trap cleanup EXIT + +setup +test_broadcast_ether_dst + +exit $? -- cgit v1.2.3 From 3aa9b9a165d5e9afc7d8b5dbcd508810c05c8e89 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 29 Aug 2025 16:36:57 +0200 Subject: selftests/bpf: Extend crypto_sanity selftest with invalid dst buffer Small cleanup and test extension to probe the bpf_crypto_{encrypt,decrypt}() kfunc when a bad dst buffer is passed in to assert that an error is returned. Also, encrypt_sanity() and skb_crypto_setup() were explicit to set the global status variable to zero before any test, so do the same for decrypt_sanity(). Do not explicitly zero the on-stack err before bpf_crypto_ctx_create() given the kfunc is expected to do it internally for the success case. Before kernel fix: # ./vmtest.sh -- ./test_progs -t crypto [...] [ 1.531200] bpf_testmod: loading out-of-tree module taints kernel. [ 1.533388] bpf_testmod: module verification failed: signature and/or required key missing - tainting kernel #87/1 crypto_basic/crypto_release:OK #87/2 crypto_basic/crypto_acquire:OK #87 crypto_basic:OK test_crypto_sanity:PASS:skel open 0 nsec test_crypto_sanity:PASS:ip netns add crypto_sanity_ns 0 nsec test_crypto_sanity:PASS:ip -net crypto_sanity_ns -6 addr add face::1/128 dev lo nodad 0 nsec test_crypto_sanity:PASS:ip -net crypto_sanity_ns link set dev lo up 0 nsec test_crypto_sanity:PASS:open_netns 0 nsec test_crypto_sanity:PASS:AF_ALG init fail 0 nsec test_crypto_sanity:PASS:if_nametoindex lo 0 nsec test_crypto_sanity:PASS:skb_crypto_setup fd 0 nsec test_crypto_sanity:PASS:skb_crypto_setup 0 nsec test_crypto_sanity:PASS:skb_crypto_setup retval 0 nsec test_crypto_sanity:PASS:skb_crypto_setup status 0 nsec test_crypto_sanity:PASS:create qdisc hook 0 nsec test_crypto_sanity:PASS:make_sockaddr 0 nsec test_crypto_sanity:PASS:attach encrypt filter 0 nsec test_crypto_sanity:PASS:encrypt socket 0 nsec test_crypto_sanity:PASS:encrypt send 0 nsec test_crypto_sanity:FAIL:encrypt status unexpected error: -5 (errno 95) #88 crypto_sanity:FAIL Summary: 1/2 PASSED, 0 SKIPPED, 1 FAILED After kernel fix: # ./vmtest.sh -- ./test_progs -t crypto [...] [ 1.540963] bpf_testmod: loading out-of-tree module taints kernel. [ 1.542404] bpf_testmod: module verification failed: signature and/or required key missing - tainting kernel #87/1 crypto_basic/crypto_release:OK #87/2 crypto_basic/crypto_acquire:OK #87 crypto_basic:OK #88 crypto_sanity:OK Summary: 2/2 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Daniel Borkmann Cc: Vadim Fedorenko Link: https://lore.kernel.org/r/20250829143657.318524-2-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/crypto_sanity.c | 46 ++++++++++++++++------- 1 file changed, 32 insertions(+), 14 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/crypto_sanity.c b/tools/testing/selftests/bpf/progs/crypto_sanity.c index 645be6cddf36..dfd8a258f14a 100644 --- a/tools/testing/selftests/bpf/progs/crypto_sanity.c +++ b/tools/testing/selftests/bpf/progs/crypto_sanity.c @@ -14,7 +14,7 @@ unsigned char key[256] = {}; u16 udp_test_port = 7777; u32 authsize, key_len; char algo[128] = {}; -char dst[16] = {}; +char dst[16] = {}, dst_bad[8] = {}; int status; static int skb_dynptr_validate(struct __sk_buff *skb, struct bpf_dynptr *psrc) @@ -59,10 +59,9 @@ int skb_crypto_setup(void *ctx) .authsize = authsize, }; struct bpf_crypto_ctx *cctx; - int err = 0; + int err; status = 0; - if (key_len > 256) { status = -EINVAL; return 0; @@ -70,8 +69,8 @@ int skb_crypto_setup(void *ctx) __builtin_memcpy(¶ms.algo, algo, sizeof(algo)); __builtin_memcpy(¶ms.key, key, sizeof(key)); - cctx = bpf_crypto_ctx_create(¶ms, sizeof(params), &err); + cctx = bpf_crypto_ctx_create(¶ms, sizeof(params), &err); if (!cctx) { status = err; return 0; @@ -80,7 +79,6 @@ int skb_crypto_setup(void *ctx) err = crypto_ctx_insert(cctx); if (err && err != -EEXIST) status = err; - return 0; } @@ -92,6 +90,7 @@ int decrypt_sanity(struct __sk_buff *skb) struct bpf_dynptr psrc, pdst; int err; + status = 0; err = skb_dynptr_validate(skb, &psrc); if (err < 0) { status = err; @@ -110,13 +109,23 @@ int decrypt_sanity(struct __sk_buff *skb) return TC_ACT_SHOT; } - /* dst is a global variable to make testing part easier to check. In real - * production code, a percpu map should be used to store the result. + /* Check also bad case where the dst buffer is smaller than the + * skb's linear section. + */ + bpf_dynptr_from_mem(dst_bad, sizeof(dst_bad), 0, &pdst); + status = bpf_crypto_decrypt(ctx, &psrc, &pdst, NULL); + if (!status) + status = -EIO; + if (status != -EINVAL) + goto err; + + /* dst is a global variable to make testing part easier to check. + * In real production code, a percpu map should be used to store + * the result. */ bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst); - status = bpf_crypto_decrypt(ctx, &psrc, &pdst, NULL); - +err: return TC_ACT_SHOT; } @@ -129,7 +138,6 @@ int encrypt_sanity(struct __sk_buff *skb) int err; status = 0; - err = skb_dynptr_validate(skb, &psrc); if (err < 0) { status = err; @@ -148,13 +156,23 @@ int encrypt_sanity(struct __sk_buff *skb) return TC_ACT_SHOT; } - /* dst is a global variable to make testing part easier to check. In real - * production code, a percpu map should be used to store the result. + /* Check also bad case where the dst buffer is smaller than the + * skb's linear section. + */ + bpf_dynptr_from_mem(dst_bad, sizeof(dst_bad), 0, &pdst); + status = bpf_crypto_encrypt(ctx, &psrc, &pdst, NULL); + if (!status) + status = -EIO; + if (status != -EINVAL) + goto err; + + /* dst is a global variable to make testing part easier to check. + * In real production code, a percpu map should be used to store + * the result. */ bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst); - status = bpf_crypto_encrypt(ctx, &psrc, &pdst, NULL); - +err: return TC_ACT_SHOT; } -- cgit v1.2.3 From 5d40c038c879eeb910039adeaf6102e1c4dda807 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Fri, 29 Aug 2025 04:53:57 +0200 Subject: selftests/bpf: Fix "expression result unused" warnings with icecc icecc is a compiler wrapper that distributes compile jobs over a build farm [1]. It works by sending toolchain binaries and preprocessed source code to remote machines. Unfortunately using it with BPF selftests causes build failures due to a clang bug [2]. The problem is that clang suppresses the -Wunused-value warning if the unused expression comes from a macro expansion. Since icecc compiles preprocessed source code, this information is not available. This leads to -Wunused-value false positives. obj_new_no_struct() and obj_new_acq() use the bpf_obj_new() macro and discard the result. arena_spin_lock_slowpath() uses two macros that produce values and ignores the results. Add (void) casts to explicitly indicate that this is intentional and suppress the warning. An alternative solution is to change the macros to not produce values. This would work today for the arena_spin_lock_slowpath() issue, but in the future there may appear users who need them. Another potential solution is to replace these macros with functions. Unfortunately this would not work, because these macros work with unknown types and control flow. [1] https://github.com/icecc/icecream [2] https://github.com/llvm/llvm-project/issues/142614 Signed-off-by: Ilya Leoshkevich Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20250829030017.102615-2-iii@linux.ibm.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h | 4 ++-- tools/testing/selftests/bpf/progs/linked_list_fail.c | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h b/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h index d67466c1ff77..f90531cf3ee5 100644 --- a/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h +++ b/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h @@ -302,7 +302,7 @@ int arena_spin_lock_slowpath(arena_spinlock_t __arena __arg_arena *lock, u32 val * barriers. */ if (val & _Q_LOCKED_MASK) - smp_cond_load_acquire_label(&lock->locked, !VAL, release_err); + (void)smp_cond_load_acquire_label(&lock->locked, !VAL, release_err); /* * take ownership and clear the pending bit. @@ -380,7 +380,7 @@ queue: /* Link @node into the waitqueue. */ WRITE_ONCE(prev->next, node); - arch_mcs_spin_lock_contended_label(&node->locked, release_node_err); + (void)arch_mcs_spin_lock_contended_label(&node->locked, release_node_err); /* * While waiting for the MCS lock, the next pointer may have diff --git a/tools/testing/selftests/bpf/progs/linked_list_fail.c b/tools/testing/selftests/bpf/progs/linked_list_fail.c index 6438982b928b..ddd26d1a083f 100644 --- a/tools/testing/selftests/bpf/progs/linked_list_fail.c +++ b/tools/testing/selftests/bpf/progs/linked_list_fail.c @@ -226,8 +226,7 @@ int obj_new_no_composite(void *ctx) SEC("?tc") int obj_new_no_struct(void *ctx) { - - bpf_obj_new(union { int data; unsigned udata; }); + (void)bpf_obj_new(union { int data; unsigned udata; }); return 0; } @@ -252,7 +251,7 @@ int new_null_ret(void *ctx) SEC("?tc") int obj_new_acq(void *ctx) { - bpf_obj_new(struct foo); + (void)bpf_obj_new(struct foo); return 0; } -- cgit v1.2.3 From 6624fb2f3382271953f951d46f2ea30415a0917e Mon Sep 17 00:00:00 2001 From: Rong Tao Date: Sat, 30 Aug 2025 00:32:13 +0800 Subject: selftests/bpf: Add tests for bpf_strnstr Add tests for bpf_strnstr(): bpf_strnstr("", "", 0) = 0 bpf_strnstr("hello world", "hello", 5) = 0 bpf_strnstr(str, "hello", 4) = -ENOENT bpf_strnstr("", "a", 0) = -ENOENT Signed-off-by: Rong Tao Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/tencent_2ED218F8082565C95D86A804BDDA8DBA200A@qq.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/string_kfuncs_success.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c index 46697f381878..a47690174e0e 100644 --- a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c @@ -30,8 +30,12 @@ __test(2) int test_strcspn(void *ctx) { return bpf_strcspn(str, "lo"); } __test(6) int test_strstr_found(void *ctx) { return bpf_strstr(str, "world"); } __test(-ENOENT) int test_strstr_notfound(void *ctx) { return bpf_strstr(str, "hi"); } __test(0) int test_strstr_empty(void *ctx) { return bpf_strstr(str, ""); } -__test(0) int test_strnstr_found(void *ctx) { return bpf_strnstr(str, "hello", 6); } -__test(-ENOENT) int test_strnstr_notfound(void *ctx) { return bpf_strnstr(str, "hi", 10); } +__test(0) int test_strnstr_found1(void *ctx) { return bpf_strnstr("", "", 0); } +__test(0) int test_strnstr_found2(void *ctx) { return bpf_strnstr(str, "hello", 5); } +__test(0) int test_strnstr_found3(void *ctx) { return bpf_strnstr(str, "hello", 6); } +__test(-ENOENT) int test_strnstr_notfound1(void *ctx) { return bpf_strnstr(str, "hi", 10); } +__test(-ENOENT) int test_strnstr_notfound2(void *ctx) { return bpf_strnstr(str, "hello", 4); } +__test(-ENOENT) int test_strnstr_notfound3(void *ctx) { return bpf_strnstr("", "a", 0); } __test(0) int test_strnstr_empty(void *ctx) { return bpf_strnstr(str, "", 1); } char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From ef1bd93b3b924086088b7818d9e5d89ede944f1f Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 8 Sep 2025 23:27:29 +0200 Subject: selftests: mptcp: shellcheck: support v0.11.0 This v0.11.0 version introduces SC2329: Warn when (non-escaping) functions are never invoked. Except that, similar to SC2317, ShellCheck is currently unable to figure out functions that are invoked via trap, or indirectly, when calling functions via variables. It is then needed to disable this new SC2329. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250908-net-mptcp-misc-fixes-6-17-rc5-v1-3-5f2168a66079@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/diag.sh | 2 +- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 2 +- tools/testing/selftests/net/mptcp/mptcp_join.sh | 2 +- tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 2 +- tools/testing/selftests/net/mptcp/pm_netlink.sh | 5 +++-- tools/testing/selftests/net/mptcp/simult_flows.sh | 2 +- tools/testing/selftests/net/mptcp/userspace_pm.sh | 2 +- 7 files changed, 9 insertions(+), 8 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 7a3cb4c09e45..d847ff1737c3 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -28,7 +28,7 @@ flush_pids() } # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGKILL &>/dev/null diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 5e3c56253274..c2ab9f7f0d21 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -134,7 +134,7 @@ ns4="" TEST_GROUP="" # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { rm -f "$cin_disconnect" diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 82cae37d9c20..7fd555b123b9 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -8,7 +8,7 @@ # ShellCheck incorrectly believes that most of the code here is unreachable # because it's invoked by variable name, see how the "tests" array is used -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 . "$(dirname "${0}")/mptcp_lib.sh" diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 418a903c3a4d..f01989be6e9b 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -95,7 +95,7 @@ init() } # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns_sbox}" diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index ac7ec6f94023..ec6a87588191 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -32,7 +32,7 @@ ns1="" err=$(mktemp) # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { rm -f "${err}" @@ -70,8 +70,9 @@ format_endpoints() { mptcp_lib_pm_nl_format_endpoints "${@}" } +# This function is invoked indirectly +#shellcheck disable=SC2317,SC2329 get_endpoint() { - # shellcheck disable=SC2317 # invoked indirectly mptcp_lib_pm_nl_get_endpoint "${ns1}" "${@}" } diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 2329c2f8519b..1903e8e84a31 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -35,7 +35,7 @@ usage() { } # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { rm -f "$cout" "$sout" diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 333064b0b5ac..970c329735ff 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -94,7 +94,7 @@ test_fail() } # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { print_title "Cleanup" -- cgit v1.2.3 From d013ebc3499fd87cb9dee1dafd0c58aeb05c27c1 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 10 Sep 2025 16:56:06 +0200 Subject: selftests: can: enable CONFIG_CAN_VCAN as a module A proper kernel configuration for running kselftest can be obtained with: $ yes | make kselftest-merge Build of 'vcan' driver is currently missing, while the other required knobs are already there because of net/link_netns.py [1]. Add a config file in selftests/net/can to store the minimum set of kconfig needed for CAN selftests. [1] https://patch.msgid.link/20250219125039.18024-14-shaw.leon@gmail.com Fixes: 77442ffa83e8 ("selftests: can: Import tst-filter from can-tests") Reviewed-by: Vincent Mailhol Signed-off-by: Davide Caratti Link: https://patch.msgid.link/fa4c0ea262ec529f25e5f5aa9269d84764c67321.1757516009.git.dcaratti@redhat.com Signed-off-by: Marc Kleine-Budde --- tools/testing/selftests/net/can/config | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 tools/testing/selftests/net/can/config (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/net/can/config b/tools/testing/selftests/net/can/config new file mode 100644 index 000000000000..188f79796670 --- /dev/null +++ b/tools/testing/selftests/net/can/config @@ -0,0 +1,3 @@ +CONFIG_CAN=m +CONFIG_CAN_DEV=m +CONFIG_CAN_VCAN=m -- cgit v1.2.3 From fbdd61c94bcb09b0c0eb0655917bf4193d07aac1 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Wed, 10 Sep 2025 20:57:40 +0800 Subject: selftests/bpf: Skip timer cases when bpf_timer is not supported When enable CONFIG_PREEMPT_RT, verifier will reject bpf_timer with returning -EOPNOTSUPP. Therefore, skip test cases when errno is EOPNOTSUPP. cd tools/testing/selftests/bpf ./test_progs -t timer 125 free_timer:SKIP 456 timer:SKIP 457/1 timer_crash/array:SKIP 457/2 timer_crash/hash:SKIP 457 timer_crash:SKIP 458 timer_lockup:SKIP 459 timer_mim:SKIP Summary: 5/0 PASSED, 6 SKIPPED, 0 FAILED Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20250910125740.52172-3-leon.hwang@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/free_timer.c | 4 ++++ tools/testing/selftests/bpf/prog_tests/timer.c | 4 ++++ tools/testing/selftests/bpf/prog_tests/timer_crash.c | 4 ++++ tools/testing/selftests/bpf/prog_tests/timer_lockup.c | 4 ++++ tools/testing/selftests/bpf/prog_tests/timer_mim.c | 4 ++++ 5 files changed, 20 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/bpf/prog_tests/free_timer.c b/tools/testing/selftests/bpf/prog_tests/free_timer.c index b7b77a6b2979..0de8facca4c5 100644 --- a/tools/testing/selftests/bpf/prog_tests/free_timer.c +++ b/tools/testing/selftests/bpf/prog_tests/free_timer.c @@ -124,6 +124,10 @@ void test_free_timer(void) int err; skel = free_timer__open_and_load(); + if (!skel && errno == EOPNOTSUPP) { + test__skip(); + return; + } if (!ASSERT_OK_PTR(skel, "open_load")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c index d66687f1ee6a..56f660ca567b 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer.c +++ b/tools/testing/selftests/bpf/prog_tests/timer.c @@ -86,6 +86,10 @@ void serial_test_timer(void) int err; timer_skel = timer__open_and_load(); + if (!timer_skel && errno == EOPNOTSUPP) { + test__skip(); + return; + } if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/timer_crash.c b/tools/testing/selftests/bpf/prog_tests/timer_crash.c index f74b82305da8..b841597c8a3a 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer_crash.c +++ b/tools/testing/selftests/bpf/prog_tests/timer_crash.c @@ -12,6 +12,10 @@ static void test_timer_crash_mode(int mode) struct timer_crash *skel; skel = timer_crash__open_and_load(); + if (!skel && errno == EOPNOTSUPP) { + test__skip(); + return; + } if (!ASSERT_OK_PTR(skel, "timer_crash__open_and_load")) return; skel->bss->pid = getpid(); diff --git a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c index 1a2f99596916..eb303fa1e09a 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c +++ b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c @@ -59,6 +59,10 @@ void test_timer_lockup(void) } skel = timer_lockup__open_and_load(); + if (!skel && errno == EOPNOTSUPP) { + test__skip(); + return; + } if (!ASSERT_OK_PTR(skel, "timer_lockup__open_and_load")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c index 9ff7843909e7..c930c7d7105b 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer_mim.c +++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c @@ -65,6 +65,10 @@ void serial_test_timer_mim(void) goto cleanup; timer_skel = timer_mim__open_and_load(); + if (!timer_skel && errno == EOPNOTSUPP) { + test__skip(); + return; + } if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load")) goto cleanup; -- cgit v1.2.3