From 9ab121d65e03b4dc38f207871070eb353b396b05 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 8 Mar 2024 11:58:56 +0100 Subject: sched/debug: Allow CONFIG_SCHEDSTATS even on !KERNEL_DEBUG kernels All major Linux distributions enable CONFIG_SCHEDSTATS, so make it more widely available. Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20240308105901.1096078-6-mingo@kernel.org --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Kconfig.debug') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 6c596e65de8a..ed2ad1a0cec6 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1250,7 +1250,7 @@ config SCHED_INFO config SCHEDSTATS bool "Collect scheduler statistics" - depends on DEBUG_KERNEL && PROC_FS + depends on PROC_FS select SCHED_INFO help If you say Y here, additional code will be inserted into the -- cgit v1.2.3 From 229087f6f1dc2d0c38feba805770f28529980ec0 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 4 Apr 2024 15:03:44 -0700 Subject: bpf, kconfig: Fix DEBUG_INFO_BTF_MODULES Kconfig definition Turns out that due to CONFIG_DEBUG_INFO_BTF_MODULES not having an explicitly specified "menu item name" in Kconfig, it's basically impossible to turn it off (see [0]). This patch fixes the issue by defining menu name for CONFIG_DEBUG_INFO_BTF_MODULES, which makes it actually adjustable and independent of CONFIG_DEBUG_INFO_BTF, in the sense that one can have DEBUG_INFO_BTF=y and DEBUG_INFO_BTF_MODULES=n. We still keep it as defaulting to Y, of course. Fixes: 5f9ae91f7c0d ("kbuild: Build kernel module BTFs if BTF is enabled and pahole supports it") Reported-by: Vincent Li Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/CAK3+h2xiFfzQ9UXf56nrRRP=p1+iUxGoEP5B+aq9MDT5jLXDSg@mail.gmail.com [0] Link: https://lore.kernel.org/bpf/20240404220344.3879270-1-andrii@kernel.org --- lib/Kconfig.debug | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'lib/Kconfig.debug') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c63a5fbf1f1c..291185f54ee4 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -375,7 +375,7 @@ config DEBUG_INFO_SPLIT Incompatible with older versions of ccache. config DEBUG_INFO_BTF - bool "Generate BTF typeinfo" + bool "Generate BTF type information" depends on !DEBUG_INFO_SPLIT && !DEBUG_INFO_REDUCED depends on !GCC_PLUGIN_RANDSTRUCT || COMPILE_TEST depends on BPF_SYSCALL @@ -408,7 +408,8 @@ config PAHOLE_HAS_LANG_EXCLUDE using DEBUG_INFO_BTF_MODULES. config DEBUG_INFO_BTF_MODULES - def_bool y + bool "Generate BTF type information for kernel modules" + default y depends on DEBUG_INFO_BTF && MODULES && PAHOLE_HAS_SPLIT_BTF help Generate compact split BTF type information for kernel modules. -- cgit v1.2.3 From d7037381d00286aa4beb631c401da761ee564c94 Mon Sep 17 00:00:00 2001 From: Bitao Hu Date: Thu, 11 Apr 2024 15:41:33 +0800 Subject: watchdog/softlockup: Low-overhead detection of interrupt storm The following softlockup is caused by interrupt storm, but it cannot be identified from the call tree. Because the call tree is just a snapshot and doesn't fully capture the behavior of the CPU during the soft lockup. watchdog: BUG: soft lockup - CPU#28 stuck for 23s! [fio:83921] ... Call trace: __do_softirq+0xa0/0x37c __irq_exit_rcu+0x108/0x140 irq_exit+0x14/0x20 __handle_domain_irq+0x84/0xe0 gic_handle_irq+0x80/0x108 el0_irq_naked+0x50/0x58 Therefore, it is necessary to report CPU utilization during the softlockup_threshold period (report once every sample_period, for a total of 5 reportings), like this: watchdog: BUG: soft lockup - CPU#28 stuck for 23s! [fio:83921] CPU#28 Utilization every 4s during lockup: #1: 0% system, 0% softirq, 100% hardirq, 0% idle #2: 0% system, 0% softirq, 100% hardirq, 0% idle #3: 0% system, 0% softirq, 100% hardirq, 0% idle #4: 0% system, 0% softirq, 100% hardirq, 0% idle #5: 0% system, 0% softirq, 100% hardirq, 0% idle ... This is helpful in determining whether an interrupt storm has occurred or in identifying the cause of the softlockup. The criteria for determination are as follows: a. If the hardirq utilization is high, then interrupt storm should be considered and the root cause cannot be determined from the call tree. b. If the softirq utilization is high, then the call might not necessarily point at the root cause. c. If the system utilization is high, then analyzing the root cause from the call tree is possible in most cases. The mechanism requires a considerable amount of global storage space when configured for the maximum number of CPUs. Therefore, adding a SOFTLOCKUP_DETECTOR_INTR_STORM Kconfig knob that defaults to "yes" if the max number of CPUs is <= 128. Signed-off-by: Bitao Hu Signed-off-by: Thomas Gleixner Reviewed-by: Douglas Anderson Reviewed-by: Liu Song Link: https://lore.kernel.org/r/20240411074134.30922-5-yaoma@linux.alibaba.com --- kernel/watchdog.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- lib/Kconfig.debug | 14 ++++++++ 2 files changed, 112 insertions(+), 1 deletion(-) (limited to 'lib/Kconfig.debug') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index d7b2125503af..ef8ebd31fdab 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include #include #include @@ -35,6 +37,8 @@ static DEFINE_MUTEX(watchdog_mutex); # define WATCHDOG_HARDLOCKUP_DEFAULT 0 #endif +#define NUM_SAMPLE_PERIODS 5 + unsigned long __read_mostly watchdog_enabled; int __read_mostly watchdog_user_enabled = 1; static int __read_mostly watchdog_hardlockup_user_enabled = WATCHDOG_HARDLOCKUP_DEFAULT; @@ -333,6 +337,96 @@ __setup("watchdog_thresh=", watchdog_thresh_setup); static void __lockup_detector_cleanup(void); +#ifdef CONFIG_SOFTLOCKUP_DETECTOR_INTR_STORM +enum stats_per_group { + STATS_SYSTEM, + STATS_SOFTIRQ, + STATS_HARDIRQ, + STATS_IDLE, + NUM_STATS_PER_GROUP, +}; + +static const enum cpu_usage_stat tracked_stats[NUM_STATS_PER_GROUP] = { + CPUTIME_SYSTEM, + CPUTIME_SOFTIRQ, + CPUTIME_IRQ, + CPUTIME_IDLE, +}; + +static DEFINE_PER_CPU(u16, cpustat_old[NUM_STATS_PER_GROUP]); +static DEFINE_PER_CPU(u8, cpustat_util[NUM_SAMPLE_PERIODS][NUM_STATS_PER_GROUP]); +static DEFINE_PER_CPU(u8, cpustat_tail); + +/* + * We don't need nanosecond resolution. A granularity of 16ms is + * sufficient for our precision, allowing us to use u16 to store + * cpustats, which will roll over roughly every ~1000 seconds. + * 2^24 ~= 16 * 10^6 + */ +static u16 get_16bit_precision(u64 data_ns) +{ + return data_ns >> 24LL; /* 2^24ns ~= 16.8ms */ +} + +static void update_cpustat(void) +{ + int i; + u8 util; + u16 old_stat, new_stat; + struct kernel_cpustat kcpustat; + u64 *cpustat = kcpustat.cpustat; + u8 tail = __this_cpu_read(cpustat_tail); + u16 sample_period_16 = get_16bit_precision(sample_period); + + kcpustat_cpu_fetch(&kcpustat, smp_processor_id()); + + for (i = 0; i < NUM_STATS_PER_GROUP; i++) { + old_stat = __this_cpu_read(cpustat_old[i]); + new_stat = get_16bit_precision(cpustat[tracked_stats[i]]); + util = DIV_ROUND_UP(100 * (new_stat - old_stat), sample_period_16); + __this_cpu_write(cpustat_util[tail][i], util); + __this_cpu_write(cpustat_old[i], new_stat); + } + + __this_cpu_write(cpustat_tail, (tail + 1) % NUM_SAMPLE_PERIODS); +} + +static void print_cpustat(void) +{ + int i, group; + u8 tail = __this_cpu_read(cpustat_tail); + u64 sample_period_second = sample_period; + + do_div(sample_period_second, NSEC_PER_SEC); + + /* + * Outputting the "watchdog" prefix on every line is redundant and not + * concise, and the original alarm information is sufficient for + * positioning in logs, hence here printk() is used instead of pr_crit(). + */ + printk(KERN_CRIT "CPU#%d Utilization every %llus during lockup:\n", + smp_processor_id(), sample_period_second); + + for (i = 0; i < NUM_SAMPLE_PERIODS; i++) { + group = (tail + i) % NUM_SAMPLE_PERIODS; + printk(KERN_CRIT "\t#%d: %3u%% system,\t%3u%% softirq,\t" + "%3u%% hardirq,\t%3u%% idle\n", i + 1, + __this_cpu_read(cpustat_util[group][STATS_SYSTEM]), + __this_cpu_read(cpustat_util[group][STATS_SOFTIRQ]), + __this_cpu_read(cpustat_util[group][STATS_HARDIRQ]), + __this_cpu_read(cpustat_util[group][STATS_IDLE])); + } +} + +static void report_cpu_status(void) +{ + print_cpustat(); +} +#else +static inline void update_cpustat(void) { } +static inline void report_cpu_status(void) { } +#endif + /* * Hard-lockup warnings should be triggered after just a few seconds. Soft- * lockups can have false positives under extreme conditions. So we generally @@ -364,7 +458,7 @@ static void set_sample_period(void) * and hard thresholds) to increment before the * hardlockup detector generates a warning */ - sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5); + sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / NUM_SAMPLE_PERIODS); watchdog_update_hrtimer_threshold(sample_period); } @@ -504,6 +598,8 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) */ period_ts = READ_ONCE(*this_cpu_ptr(&watchdog_report_ts)); + update_cpustat(); + /* Reset the interval when touched by known problematic code. */ if (period_ts == SOFTLOCKUP_DELAY_REPORT) { if (unlikely(__this_cpu_read(softlockup_touch_sync))) { @@ -539,6 +635,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", smp_processor_id(), duration, current->comm, task_pid_nr(current)); + report_cpu_status(); print_modules(); print_irqtrace_events(current); if (regs) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c63a5fbf1f1c..1f0ce712a671 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1029,6 +1029,20 @@ config SOFTLOCKUP_DETECTOR chance to run. The current stack trace is displayed upon detection and the system will stay locked up. +config SOFTLOCKUP_DETECTOR_INTR_STORM + bool "Detect Interrupt Storm in Soft Lockups" + depends on SOFTLOCKUP_DETECTOR && IRQ_TIME_ACCOUNTING + select GENERIC_IRQ_STAT_SNAPSHOT + default y if NR_CPUS <= 128 + help + Say Y here to enable the kernel to detect interrupt storm + during "soft lockups". + + "soft lockups" can be caused by a variety of reasons. If one is + caused by an interrupt storm, then the storming interrupts will not + be on the callstack. To detect this case, it is necessary to report + the CPU stats and the interrupt counts during the "soft lockups". + config BOOTPARAM_SOFTLOCKUP_PANIC bool "Panic (Reboot) On Soft Lockups" depends on SOFTLOCKUP_DETECTOR -- cgit v1.2.3 From bb8d9b742aa7c576d39b354612224b3c6bfd3cbc Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 19 Apr 2024 07:01:51 -0700 Subject: string: Merge strscpy KUnit tests into string_kunit.c Move the strscpy() tests into string_kunit.c. Remove the separate Kconfig and Makefile rule. Reviewed-by: Andy Shevchenko Tested-by: Ivan Orlov Link: https://lore.kernel.org/r/20240419140155.3028912-2-keescook@chromium.org Signed-off-by: Kees Cook --- MAINTAINERS | 1 - lib/Kconfig.debug | 5 -- lib/Makefile | 1 - lib/string_kunit.c | 120 +++++++++++++++++++++++++++++++++++++++++++ lib/strscpy_kunit.c | 143 ---------------------------------------------------- 5 files changed, 120 insertions(+), 150 deletions(-) delete mode 100644 lib/strscpy_kunit.c (limited to 'lib/Kconfig.debug') diff --git a/MAINTAINERS b/MAINTAINERS index 7c121493f43d..17d079aa15ec 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8442,7 +8442,6 @@ F: include/linux/fortify-string.h F: lib/fortify_kunit.c F: lib/memcpy_kunit.c F: lib/strcat_kunit.c -F: lib/strscpy_kunit.c F: lib/test_fortify/* F: scripts/test_fortify.sh K: \b__NO_FORTIFY\b diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c63a5fbf1f1c..7ffb06eabcd1 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2763,11 +2763,6 @@ config STRCAT_KUNIT_TEST depends on KUNIT default KUNIT_ALL_TESTS -config STRSCPY_KUNIT_TEST - tristate "Test strscpy*() family of functions at runtime" if !KUNIT_ALL_TESTS - depends on KUNIT - default KUNIT_ALL_TESTS - config SIPHASH_KUNIT_TEST tristate "Perform selftest on siphash functions" if !KUNIT_ALL_TESTS depends on KUNIT diff --git a/lib/Makefile b/lib/Makefile index ffc6b2341b45..5f994b963d1a 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -404,7 +404,6 @@ CFLAGS_fortify_kunit.o += $(call cc-disable-warning, stringop-truncation) CFLAGS_fortify_kunit.o += $(DISABLE_STRUCTLEAK_PLUGIN) obj-$(CONFIG_FORTIFY_KUNIT_TEST) += fortify_kunit.o obj-$(CONFIG_STRCAT_KUNIT_TEST) += strcat_kunit.o -obj-$(CONFIG_STRSCPY_KUNIT_TEST) += strscpy_kunit.o obj-$(CONFIG_SIPHASH_KUNIT_TEST) += siphash_kunit.o obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o diff --git a/lib/string_kunit.c b/lib/string_kunit.c index dd19bd7748aa..4af04643f4c2 100644 --- a/lib/string_kunit.c +++ b/lib/string_kunit.c @@ -326,6 +326,125 @@ static void test_strncasecmp_long_strings(struct kunit *test) strcmp_buffer2, STRCMP_CHANGE_POINT + 1); } +/** + * strscpy_check() - Run a specific test case. + * @test: KUnit test context pointer + * @src: Source string, argument to strscpy_pad() + * @count: Size of destination buffer, argument to strscpy_pad() + * @expected: Expected return value from call to strscpy_pad() + * @chars: Number of characters from the src string expected to be + * written to the dst buffer. + * @terminator: 1 if there should be a terminating null byte 0 otherwise. + * @pad: Number of pad characters expected (in the tail of dst buffer). + * (@pad does not include the null terminator byte.) + * + * Calls strscpy_pad() and verifies the return value and state of the + * destination buffer after the call returns. + */ +static void strscpy_check(struct kunit *test, char *src, int count, + int expected, int chars, int terminator, int pad) +{ + int nr_bytes_poison; + int max_expected; + int max_count; + int written; + char buf[6]; + int index, i; + const char POISON = 'z'; + + KUNIT_ASSERT_TRUE_MSG(test, src != NULL, + "null source string not supported"); + + memset(buf, POISON, sizeof(buf)); + /* Future proofing test suite, validate args */ + max_count = sizeof(buf) - 2; /* Space for null and to verify overflow */ + max_expected = count - 1; /* Space for the null */ + + KUNIT_ASSERT_LE_MSG(test, count, max_count, + "count (%d) is too big (%d) ... aborting", count, max_count); + KUNIT_EXPECT_LE_MSG(test, expected, max_expected, + "expected (%d) is bigger than can possibly be returned (%d)", + expected, max_expected); + + written = strscpy_pad(buf, src, count); + KUNIT_ASSERT_EQ(test, written, expected); + + if (count && written == -E2BIG) { + KUNIT_ASSERT_EQ_MSG(test, 0, strncmp(buf, src, count - 1), + "buffer state invalid for -E2BIG"); + KUNIT_ASSERT_EQ_MSG(test, buf[count - 1], '\0', + "too big string is not null terminated correctly"); + } + + for (i = 0; i < chars; i++) + KUNIT_ASSERT_EQ_MSG(test, buf[i], src[i], + "buf[i]==%c != src[i]==%c", buf[i], src[i]); + + if (terminator) + KUNIT_ASSERT_EQ_MSG(test, buf[count - 1], '\0', + "string is not null terminated correctly"); + + for (i = 0; i < pad; i++) { + index = chars + terminator + i; + KUNIT_ASSERT_EQ_MSG(test, buf[index], '\0', + "padding missing at index: %d", i); + } + + nr_bytes_poison = sizeof(buf) - chars - terminator - pad; + for (i = 0; i < nr_bytes_poison; i++) { + index = sizeof(buf) - 1 - i; /* Check from the end back */ + KUNIT_ASSERT_EQ_MSG(test, buf[index], POISON, + "poison value missing at index: %d", i); + } +} + +static void test_strscpy(struct kunit *test) +{ + char dest[8]; + + /* + * strscpy_check() uses a destination buffer of size 6 and needs at + * least 2 characters spare (one for null and one to check for + * overflow). This means we should only call tc() with + * strings up to a maximum of 4 characters long and 'count' + * should not exceed 4. To test with longer strings increase + * the buffer size in tc(). + */ + + /* strscpy_check(test, src, count, expected, chars, terminator, pad) */ + strscpy_check(test, "a", 0, -E2BIG, 0, 0, 0); + strscpy_check(test, "", 0, -E2BIG, 0, 0, 0); + + strscpy_check(test, "a", 1, -E2BIG, 0, 1, 0); + strscpy_check(test, "", 1, 0, 0, 1, 0); + + strscpy_check(test, "ab", 2, -E2BIG, 1, 1, 0); + strscpy_check(test, "a", 2, 1, 1, 1, 0); + strscpy_check(test, "", 2, 0, 0, 1, 1); + + strscpy_check(test, "abc", 3, -E2BIG, 2, 1, 0); + strscpy_check(test, "ab", 3, 2, 2, 1, 0); + strscpy_check(test, "a", 3, 1, 1, 1, 1); + strscpy_check(test, "", 3, 0, 0, 1, 2); + + strscpy_check(test, "abcd", 4, -E2BIG, 3, 1, 0); + strscpy_check(test, "abc", 4, 3, 3, 1, 0); + strscpy_check(test, "ab", 4, 2, 2, 1, 1); + strscpy_check(test, "a", 4, 1, 1, 1, 2); + strscpy_check(test, "", 4, 0, 0, 1, 3); + + /* Compile-time-known source strings. */ + KUNIT_EXPECT_EQ(test, strscpy(dest, "", ARRAY_SIZE(dest)), 0); + KUNIT_EXPECT_EQ(test, strscpy(dest, "", 3), 0); + KUNIT_EXPECT_EQ(test, strscpy(dest, "", 1), 0); + KUNIT_EXPECT_EQ(test, strscpy(dest, "", 0), -E2BIG); + KUNIT_EXPECT_EQ(test, strscpy(dest, "Fixed", ARRAY_SIZE(dest)), 5); + KUNIT_EXPECT_EQ(test, strscpy(dest, "Fixed", 3), -E2BIG); + KUNIT_EXPECT_EQ(test, strscpy(dest, "Fixed", 1), -E2BIG); + KUNIT_EXPECT_EQ(test, strscpy(dest, "Fixed", 0), -E2BIG); + KUNIT_EXPECT_EQ(test, strscpy(dest, "This is too long", ARRAY_SIZE(dest)), -E2BIG); +} + static struct kunit_case string_test_cases[] = { KUNIT_CASE(test_memset16), KUNIT_CASE(test_memset32), @@ -341,6 +460,7 @@ static struct kunit_case string_test_cases[] = { KUNIT_CASE(test_strcasecmp_long_strings), KUNIT_CASE(test_strncasecmp), KUNIT_CASE(test_strncasecmp_long_strings), + KUNIT_CASE(test_strscpy), {} }; diff --git a/lib/strscpy_kunit.c b/lib/strscpy_kunit.c deleted file mode 100644 index b6d1d93a8883..000000000000 --- a/lib/strscpy_kunit.c +++ /dev/null @@ -1,143 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Kernel module for testing 'strscpy' family of functions. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include - -/** - * strscpy_check() - Run a specific test case. - * @test: KUnit test context pointer - * @src: Source string, argument to strscpy_pad() - * @count: Size of destination buffer, argument to strscpy_pad() - * @expected: Expected return value from call to strscpy_pad() - * @chars: Number of characters from the src string expected to be - * written to the dst buffer. - * @terminator: 1 if there should be a terminating null byte 0 otherwise. - * @pad: Number of pad characters expected (in the tail of dst buffer). - * (@pad does not include the null terminator byte.) - * - * Calls strscpy_pad() and verifies the return value and state of the - * destination buffer after the call returns. - */ -static void strscpy_check(struct kunit *test, char *src, int count, - int expected, int chars, int terminator, int pad) -{ - int nr_bytes_poison; - int max_expected; - int max_count; - int written; - char buf[6]; - int index, i; - const char POISON = 'z'; - - KUNIT_ASSERT_TRUE_MSG(test, src != NULL, - "null source string not supported"); - - memset(buf, POISON, sizeof(buf)); - /* Future proofing test suite, validate args */ - max_count = sizeof(buf) - 2; /* Space for null and to verify overflow */ - max_expected = count - 1; /* Space for the null */ - - KUNIT_ASSERT_LE_MSG(test, count, max_count, - "count (%d) is too big (%d) ... aborting", count, max_count); - KUNIT_EXPECT_LE_MSG(test, expected, max_expected, - "expected (%d) is bigger than can possibly be returned (%d)", - expected, max_expected); - - written = strscpy_pad(buf, src, count); - KUNIT_ASSERT_EQ(test, written, expected); - - if (count && written == -E2BIG) { - KUNIT_ASSERT_EQ_MSG(test, 0, strncmp(buf, src, count - 1), - "buffer state invalid for -E2BIG"); - KUNIT_ASSERT_EQ_MSG(test, buf[count - 1], '\0', - "too big string is not null terminated correctly"); - } - - for (i = 0; i < chars; i++) - KUNIT_ASSERT_EQ_MSG(test, buf[i], src[i], - "buf[i]==%c != src[i]==%c", buf[i], src[i]); - - if (terminator) - KUNIT_ASSERT_EQ_MSG(test, buf[count - 1], '\0', - "string is not null terminated correctly"); - - for (i = 0; i < pad; i++) { - index = chars + terminator + i; - KUNIT_ASSERT_EQ_MSG(test, buf[index], '\0', - "padding missing at index: %d", i); - } - - nr_bytes_poison = sizeof(buf) - chars - terminator - pad; - for (i = 0; i < nr_bytes_poison; i++) { - index = sizeof(buf) - 1 - i; /* Check from the end back */ - KUNIT_ASSERT_EQ_MSG(test, buf[index], POISON, - "poison value missing at index: %d", i); - } -} - -static void test_strscpy(struct kunit *test) -{ - char dest[8]; - - /* - * strscpy_check() uses a destination buffer of size 6 and needs at - * least 2 characters spare (one for null and one to check for - * overflow). This means we should only call tc() with - * strings up to a maximum of 4 characters long and 'count' - * should not exceed 4. To test with longer strings increase - * the buffer size in tc(). - */ - - /* strscpy_check(test, src, count, expected, chars, terminator, pad) */ - strscpy_check(test, "a", 0, -E2BIG, 0, 0, 0); - strscpy_check(test, "", 0, -E2BIG, 0, 0, 0); - - strscpy_check(test, "a", 1, -E2BIG, 0, 1, 0); - strscpy_check(test, "", 1, 0, 0, 1, 0); - - strscpy_check(test, "ab", 2, -E2BIG, 1, 1, 0); - strscpy_check(test, "a", 2, 1, 1, 1, 0); - strscpy_check(test, "", 2, 0, 0, 1, 1); - - strscpy_check(test, "abc", 3, -E2BIG, 2, 1, 0); - strscpy_check(test, "ab", 3, 2, 2, 1, 0); - strscpy_check(test, "a", 3, 1, 1, 1, 1); - strscpy_check(test, "", 3, 0, 0, 1, 2); - - strscpy_check(test, "abcd", 4, -E2BIG, 3, 1, 0); - strscpy_check(test, "abc", 4, 3, 3, 1, 0); - strscpy_check(test, "ab", 4, 2, 2, 1, 1); - strscpy_check(test, "a", 4, 1, 1, 1, 2); - strscpy_check(test, "", 4, 0, 0, 1, 3); - - /* Compile-time-known source strings. */ - KUNIT_EXPECT_EQ(test, strscpy(dest, "", ARRAY_SIZE(dest)), 0); - KUNIT_EXPECT_EQ(test, strscpy(dest, "", 3), 0); - KUNIT_EXPECT_EQ(test, strscpy(dest, "", 1), 0); - KUNIT_EXPECT_EQ(test, strscpy(dest, "", 0), -E2BIG); - KUNIT_EXPECT_EQ(test, strscpy(dest, "Fixed", ARRAY_SIZE(dest)), 5); - KUNIT_EXPECT_EQ(test, strscpy(dest, "Fixed", 3), -E2BIG); - KUNIT_EXPECT_EQ(test, strscpy(dest, "Fixed", 1), -E2BIG); - KUNIT_EXPECT_EQ(test, strscpy(dest, "Fixed", 0), -E2BIG); - KUNIT_EXPECT_EQ(test, strscpy(dest, "This is too long", ARRAY_SIZE(dest)), -E2BIG); -} - -static struct kunit_case strscpy_test_cases[] = { - KUNIT_CASE(test_strscpy), - {} -}; - -static struct kunit_suite strscpy_test_suite = { - .name = "strscpy", - .test_cases = strscpy_test_cases, -}; - -kunit_test_suite(strscpy_test_suite); - -MODULE_AUTHOR("Tobin C. Harding "); -MODULE_LICENSE("GPL"); -- cgit v1.2.3 From bd678f7d9b72ab8b6978dac92b841e46f4b935a3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 19 Apr 2024 07:01:53 -0700 Subject: string: Merge strcat KUnit tests into string_kunit.c Move the strcat() tests into string_kunit.c. Remove the separate Kconfig and Makefile rule. Reviewed-by: Andy Shevchenko Tested-by: Ivan Orlov Link: https://lore.kernel.org/r/20240419140155.3028912-4-keescook@chromium.org Signed-off-by: Kees Cook --- MAINTAINERS | 1 - lib/Kconfig.debug | 5 --- lib/Makefile | 1 - lib/strcat_kunit.c | 104 ----------------------------------------------------- lib/string_kunit.c | 82 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 82 insertions(+), 111 deletions(-) delete mode 100644 lib/strcat_kunit.c (limited to 'lib/Kconfig.debug') diff --git a/MAINTAINERS b/MAINTAINERS index 17d079aa15ec..8974511315c3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8441,7 +8441,6 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/har F: include/linux/fortify-string.h F: lib/fortify_kunit.c F: lib/memcpy_kunit.c -F: lib/strcat_kunit.c F: lib/test_fortify/* F: scripts/test_fortify.sh K: \b__NO_FORTIFY\b diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 7ffb06eabcd1..a384070c74bc 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2758,11 +2758,6 @@ config HW_BREAKPOINT_KUNIT_TEST If unsure, say N. -config STRCAT_KUNIT_TEST - tristate "Test strcat() family of functions at runtime" if !KUNIT_ALL_TESTS - depends on KUNIT - default KUNIT_ALL_TESTS - config SIPHASH_KUNIT_TEST tristate "Perform selftest on siphash functions" if !KUNIT_ALL_TESTS depends on KUNIT diff --git a/lib/Makefile b/lib/Makefile index 5f994b963d1a..b040ad5f8022 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -403,7 +403,6 @@ CFLAGS_fortify_kunit.o += $(call cc-disable-warning, stringop-overread) CFLAGS_fortify_kunit.o += $(call cc-disable-warning, stringop-truncation) CFLAGS_fortify_kunit.o += $(DISABLE_STRUCTLEAK_PLUGIN) obj-$(CONFIG_FORTIFY_KUNIT_TEST) += fortify_kunit.o -obj-$(CONFIG_STRCAT_KUNIT_TEST) += strcat_kunit.o obj-$(CONFIG_SIPHASH_KUNIT_TEST) += siphash_kunit.o obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o diff --git a/lib/strcat_kunit.c b/lib/strcat_kunit.c deleted file mode 100644 index ca09f7f0e6a2..000000000000 --- a/lib/strcat_kunit.c +++ /dev/null @@ -1,104 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Kernel module for testing 'strcat' family of functions. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include - -static volatile int unconst; - -static void test_strcat(struct kunit *test) -{ - char dest[8]; - - /* Destination is terminated. */ - memset(dest, 0, sizeof(dest)); - KUNIT_EXPECT_EQ(test, strlen(dest), 0); - /* Empty copy does nothing. */ - KUNIT_EXPECT_TRUE(test, strcat(dest, "") == dest); - KUNIT_EXPECT_STREQ(test, dest, ""); - /* 4 characters copied in, stops at %NUL. */ - KUNIT_EXPECT_TRUE(test, strcat(dest, "four\000123") == dest); - KUNIT_EXPECT_STREQ(test, dest, "four"); - KUNIT_EXPECT_EQ(test, dest[5], '\0'); - /* 2 more characters copied in okay. */ - KUNIT_EXPECT_TRUE(test, strcat(dest, "AB") == dest); - KUNIT_EXPECT_STREQ(test, dest, "fourAB"); -} - -static void test_strncat(struct kunit *test) -{ - char dest[8]; - - /* Destination is terminated. */ - memset(dest, 0, sizeof(dest)); - KUNIT_EXPECT_EQ(test, strlen(dest), 0); - /* Empty copy of size 0 does nothing. */ - KUNIT_EXPECT_TRUE(test, strncat(dest, "", 0 + unconst) == dest); - KUNIT_EXPECT_STREQ(test, dest, ""); - /* Empty copy of size 1 does nothing too. */ - KUNIT_EXPECT_TRUE(test, strncat(dest, "", 1 + unconst) == dest); - KUNIT_EXPECT_STREQ(test, dest, ""); - /* Copy of max 0 characters should do nothing. */ - KUNIT_EXPECT_TRUE(test, strncat(dest, "asdf", 0 + unconst) == dest); - KUNIT_EXPECT_STREQ(test, dest, ""); - - /* 4 characters copied in, even if max is 8. */ - KUNIT_EXPECT_TRUE(test, strncat(dest, "four\000123", 8 + unconst) == dest); - KUNIT_EXPECT_STREQ(test, dest, "four"); - KUNIT_EXPECT_EQ(test, dest[5], '\0'); - KUNIT_EXPECT_EQ(test, dest[6], '\0'); - /* 2 characters copied in okay, 2 ignored. */ - KUNIT_EXPECT_TRUE(test, strncat(dest, "ABCD", 2 + unconst) == dest); - KUNIT_EXPECT_STREQ(test, dest, "fourAB"); -} - -static void test_strlcat(struct kunit *test) -{ - char dest[8] = ""; - int len = sizeof(dest) + unconst; - - /* Destination is terminated. */ - KUNIT_EXPECT_EQ(test, strlen(dest), 0); - /* Empty copy is size 0. */ - KUNIT_EXPECT_EQ(test, strlcat(dest, "", len), 0); - KUNIT_EXPECT_STREQ(test, dest, ""); - /* Size 1 should keep buffer terminated, report size of source only. */ - KUNIT_EXPECT_EQ(test, strlcat(dest, "four", 1 + unconst), 4); - KUNIT_EXPECT_STREQ(test, dest, ""); - - /* 4 characters copied in. */ - KUNIT_EXPECT_EQ(test, strlcat(dest, "four", len), 4); - KUNIT_EXPECT_STREQ(test, dest, "four"); - /* 2 characters copied in okay, gets to 6 total. */ - KUNIT_EXPECT_EQ(test, strlcat(dest, "AB", len), 6); - KUNIT_EXPECT_STREQ(test, dest, "fourAB"); - /* 2 characters ignored if max size (7) reached. */ - KUNIT_EXPECT_EQ(test, strlcat(dest, "CD", 7 + unconst), 8); - KUNIT_EXPECT_STREQ(test, dest, "fourAB"); - /* 1 of 2 characters skipped, now at true max size. */ - KUNIT_EXPECT_EQ(test, strlcat(dest, "EFG", len), 9); - KUNIT_EXPECT_STREQ(test, dest, "fourABE"); - /* Everything else ignored, now at full size. */ - KUNIT_EXPECT_EQ(test, strlcat(dest, "1234", len), 11); - KUNIT_EXPECT_STREQ(test, dest, "fourABE"); -} - -static struct kunit_case strcat_test_cases[] = { - KUNIT_CASE(test_strcat), - KUNIT_CASE(test_strncat), - KUNIT_CASE(test_strlcat), - {} -}; - -static struct kunit_suite strcat_test_suite = { - .name = "strcat", - .test_cases = strcat_test_cases, -}; - -kunit_test_suite(strcat_test_suite); - -MODULE_LICENSE("GPL"); diff --git a/lib/string_kunit.c b/lib/string_kunit.c index 4af04643f4c2..48752ed19d56 100644 --- a/lib/string_kunit.c +++ b/lib/string_kunit.c @@ -445,6 +445,85 @@ static void test_strscpy(struct kunit *test) KUNIT_EXPECT_EQ(test, strscpy(dest, "This is too long", ARRAY_SIZE(dest)), -E2BIG); } +static volatile int unconst; + +static void test_strcat(struct kunit *test) +{ + char dest[8]; + + /* Destination is terminated. */ + memset(dest, 0, sizeof(dest)); + KUNIT_EXPECT_EQ(test, strlen(dest), 0); + /* Empty copy does nothing. */ + KUNIT_EXPECT_TRUE(test, strcat(dest, "") == dest); + KUNIT_EXPECT_STREQ(test, dest, ""); + /* 4 characters copied in, stops at %NUL. */ + KUNIT_EXPECT_TRUE(test, strcat(dest, "four\000123") == dest); + KUNIT_EXPECT_STREQ(test, dest, "four"); + KUNIT_EXPECT_EQ(test, dest[5], '\0'); + /* 2 more characters copied in okay. */ + KUNIT_EXPECT_TRUE(test, strcat(dest, "AB") == dest); + KUNIT_EXPECT_STREQ(test, dest, "fourAB"); +} + +static void test_strncat(struct kunit *test) +{ + char dest[8]; + + /* Destination is terminated. */ + memset(dest, 0, sizeof(dest)); + KUNIT_EXPECT_EQ(test, strlen(dest), 0); + /* Empty copy of size 0 does nothing. */ + KUNIT_EXPECT_TRUE(test, strncat(dest, "", 0 + unconst) == dest); + KUNIT_EXPECT_STREQ(test, dest, ""); + /* Empty copy of size 1 does nothing too. */ + KUNIT_EXPECT_TRUE(test, strncat(dest, "", 1 + unconst) == dest); + KUNIT_EXPECT_STREQ(test, dest, ""); + /* Copy of max 0 characters should do nothing. */ + KUNIT_EXPECT_TRUE(test, strncat(dest, "asdf", 0 + unconst) == dest); + KUNIT_EXPECT_STREQ(test, dest, ""); + + /* 4 characters copied in, even if max is 8. */ + KUNIT_EXPECT_TRUE(test, strncat(dest, "four\000123", 8 + unconst) == dest); + KUNIT_EXPECT_STREQ(test, dest, "four"); + KUNIT_EXPECT_EQ(test, dest[5], '\0'); + KUNIT_EXPECT_EQ(test, dest[6], '\0'); + /* 2 characters copied in okay, 2 ignored. */ + KUNIT_EXPECT_TRUE(test, strncat(dest, "ABCD", 2 + unconst) == dest); + KUNIT_EXPECT_STREQ(test, dest, "fourAB"); +} + +static void test_strlcat(struct kunit *test) +{ + char dest[8] = ""; + int len = sizeof(dest) + unconst; + + /* Destination is terminated. */ + KUNIT_EXPECT_EQ(test, strlen(dest), 0); + /* Empty copy is size 0. */ + KUNIT_EXPECT_EQ(test, strlcat(dest, "", len), 0); + KUNIT_EXPECT_STREQ(test, dest, ""); + /* Size 1 should keep buffer terminated, report size of source only. */ + KUNIT_EXPECT_EQ(test, strlcat(dest, "four", 1 + unconst), 4); + KUNIT_EXPECT_STREQ(test, dest, ""); + + /* 4 characters copied in. */ + KUNIT_EXPECT_EQ(test, strlcat(dest, "four", len), 4); + KUNIT_EXPECT_STREQ(test, dest, "four"); + /* 2 characters copied in okay, gets to 6 total. */ + KUNIT_EXPECT_EQ(test, strlcat(dest, "AB", len), 6); + KUNIT_EXPECT_STREQ(test, dest, "fourAB"); + /* 2 characters ignored if max size (7) reached. */ + KUNIT_EXPECT_EQ(test, strlcat(dest, "CD", 7 + unconst), 8); + KUNIT_EXPECT_STREQ(test, dest, "fourAB"); + /* 1 of 2 characters skipped, now at true max size. */ + KUNIT_EXPECT_EQ(test, strlcat(dest, "EFG", len), 9); + KUNIT_EXPECT_STREQ(test, dest, "fourABE"); + /* Everything else ignored, now at full size. */ + KUNIT_EXPECT_EQ(test, strlcat(dest, "1234", len), 11); + KUNIT_EXPECT_STREQ(test, dest, "fourABE"); +} + static struct kunit_case string_test_cases[] = { KUNIT_CASE(test_memset16), KUNIT_CASE(test_memset32), @@ -461,6 +540,9 @@ static struct kunit_case string_test_cases[] = { KUNIT_CASE(test_strncasecmp), KUNIT_CASE(test_strncasecmp_long_strings), KUNIT_CASE(test_strscpy), + KUNIT_CASE(test_strcat), + KUNIT_CASE(test_strncat), + KUNIT_CASE(test_strlcat), {} }; -- cgit v1.2.3 From 916cc5167cc6f81651f4460d9a35c939ad18ecff Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 21 Mar 2024 09:36:32 -0700 Subject: lib: code tagging framework MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add basic infrastructure to support code tagging which stores tag common information consisting of the module name, function, file name and line number. Provide functions to register a new code tag type and navigate between code tags. Link: https://lkml.kernel.org/r/20240321163705.3067592-11-surenb@google.com Co-developed-by: Kent Overstreet Signed-off-by: Kent Overstreet Signed-off-by: Suren Baghdasaryan Tested-by: Kees Cook Cc: Alexander Viro Cc: Alex Gaynor Cc: Alice Ryhl Cc: Andreas Hindborg Cc: Benno Lossin Cc: "Björn Roy Baron" Cc: Boqun Feng Cc: Christoph Lameter Cc: Dennis Zhou Cc: Gary Guo Cc: Miguel Ojeda Cc: Pasha Tatashin Cc: Peter Zijlstra Cc: Tejun Heo Cc: Vlastimil Babka Cc: Wedson Almeida Filho Signed-off-by: Andrew Morton --- include/linux/codetag.h | 68 +++++++++++++++ lib/Kconfig.debug | 4 + lib/Makefile | 1 + lib/codetag.c | 219 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 292 insertions(+) create mode 100644 include/linux/codetag.h create mode 100644 lib/codetag.c (limited to 'lib/Kconfig.debug') diff --git a/include/linux/codetag.h b/include/linux/codetag.h new file mode 100644 index 000000000000..7734269cdb63 --- /dev/null +++ b/include/linux/codetag.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * code tagging framework + */ +#ifndef _LINUX_CODETAG_H +#define _LINUX_CODETAG_H + +#include + +struct codetag_iterator; +struct codetag_type; +struct codetag_module; +struct seq_buf; +struct module; + +/* + * An instance of this structure is created in a special ELF section at every + * code location being tagged. At runtime, the special section is treated as + * an array of these. + */ +struct codetag { + unsigned int flags; /* used in later patches */ + unsigned int lineno; + const char *modname; + const char *function; + const char *filename; +} __aligned(8); + +union codetag_ref { + struct codetag *ct; +}; + +struct codetag_type_desc { + const char *section; + size_t tag_size; +}; + +struct codetag_iterator { + struct codetag_type *cttype; + struct codetag_module *cmod; + unsigned long mod_id; + struct codetag *ct; +}; + +#ifdef MODULE +#define CT_MODULE_NAME KBUILD_MODNAME +#else +#define CT_MODULE_NAME NULL +#endif + +#define CODE_TAG_INIT { \ + .modname = CT_MODULE_NAME, \ + .function = __func__, \ + .filename = __FILE__, \ + .lineno = __LINE__, \ + .flags = 0, \ +} + +void codetag_lock_module_list(struct codetag_type *cttype, bool lock); +struct codetag_iterator codetag_get_ct_iter(struct codetag_type *cttype); +struct codetag *codetag_next_ct(struct codetag_iterator *iter); + +void codetag_to_text(struct seq_buf *out, struct codetag *ct); + +struct codetag_type * +codetag_register_type(const struct codetag_type_desc *desc); + +#endif /* _LINUX_CODETAG_H */ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c63a5fbf1f1c..015fc6ee9849 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -968,6 +968,10 @@ config DEBUG_STACKOVERFLOW If in doubt, say "N". +config CODE_TAGGING + bool + select KALLSYMS + source "lib/Kconfig.kasan" source "lib/Kconfig.kfence" source "lib/Kconfig.kmsan" diff --git a/lib/Makefile b/lib/Makefile index ffc6b2341b45..910335da8f13 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -233,6 +233,7 @@ obj-$(CONFIG_OF_RECONFIG_NOTIFIER_ERROR_INJECT) += \ of-reconfig-notifier-error-inject.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o +obj-$(CONFIG_CODE_TAGGING) += codetag.o lib-$(CONFIG_GENERIC_BUG) += bug.o obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o diff --git a/lib/codetag.c b/lib/codetag.c new file mode 100644 index 000000000000..8b5b89ad508d --- /dev/null +++ b/lib/codetag.c @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include +#include + +struct codetag_type { + struct list_head link; + unsigned int count; + struct idr mod_idr; + struct rw_semaphore mod_lock; /* protects mod_idr */ + struct codetag_type_desc desc; +}; + +struct codetag_range { + struct codetag *start; + struct codetag *stop; +}; + +struct codetag_module { + struct module *mod; + struct codetag_range range; +}; + +static DEFINE_MUTEX(codetag_lock); +static LIST_HEAD(codetag_types); + +void codetag_lock_module_list(struct codetag_type *cttype, bool lock) +{ + if (lock) + down_read(&cttype->mod_lock); + else + up_read(&cttype->mod_lock); +} + +struct codetag_iterator codetag_get_ct_iter(struct codetag_type *cttype) +{ + struct codetag_iterator iter = { + .cttype = cttype, + .cmod = NULL, + .mod_id = 0, + .ct = NULL, + }; + + return iter; +} + +static inline struct codetag *get_first_module_ct(struct codetag_module *cmod) +{ + return cmod->range.start < cmod->range.stop ? cmod->range.start : NULL; +} + +static inline +struct codetag *get_next_module_ct(struct codetag_iterator *iter) +{ + struct codetag *res = (struct codetag *) + ((char *)iter->ct + iter->cttype->desc.tag_size); + + return res < iter->cmod->range.stop ? res : NULL; +} + +struct codetag *codetag_next_ct(struct codetag_iterator *iter) +{ + struct codetag_type *cttype = iter->cttype; + struct codetag_module *cmod; + struct codetag *ct; + + lockdep_assert_held(&cttype->mod_lock); + + if (unlikely(idr_is_empty(&cttype->mod_idr))) + return NULL; + + ct = NULL; + while (true) { + cmod = idr_find(&cttype->mod_idr, iter->mod_id); + + /* If module was removed move to the next one */ + if (!cmod) + cmod = idr_get_next_ul(&cttype->mod_idr, + &iter->mod_id); + + /* Exit if no more modules */ + if (!cmod) + break; + + if (cmod != iter->cmod) { + iter->cmod = cmod; + ct = get_first_module_ct(cmod); + } else + ct = get_next_module_ct(iter); + + if (ct) + break; + + iter->mod_id++; + } + + iter->ct = ct; + return ct; +} + +void codetag_to_text(struct seq_buf *out, struct codetag *ct) +{ + if (ct->modname) + seq_buf_printf(out, "%s:%u [%s] func:%s", + ct->filename, ct->lineno, + ct->modname, ct->function); + else + seq_buf_printf(out, "%s:%u func:%s", + ct->filename, ct->lineno, ct->function); +} + +static inline size_t range_size(const struct codetag_type *cttype, + const struct codetag_range *range) +{ + return ((char *)range->stop - (char *)range->start) / + cttype->desc.tag_size; +} + +#ifdef CONFIG_MODULES +static void *get_symbol(struct module *mod, const char *prefix, const char *name) +{ + DECLARE_SEQ_BUF(sb, KSYM_NAME_LEN); + const char *buf; + + seq_buf_printf(&sb, "%s%s", prefix, name); + if (seq_buf_has_overflowed(&sb)) + return NULL; + + buf = seq_buf_str(&sb); + return mod ? + (void *)find_kallsyms_symbol_value(mod, buf) : + (void *)kallsyms_lookup_name(buf); +} + +static struct codetag_range get_section_range(struct module *mod, + const char *section) +{ + return (struct codetag_range) { + get_symbol(mod, "__start_", section), + get_symbol(mod, "__stop_", section), + }; +} + +static int codetag_module_init(struct codetag_type *cttype, struct module *mod) +{ + struct codetag_range range; + struct codetag_module *cmod; + int err; + + range = get_section_range(mod, cttype->desc.section); + if (!range.start || !range.stop) { + pr_warn("Failed to load code tags of type %s from the module %s\n", + cttype->desc.section, + mod ? mod->name : "(built-in)"); + return -EINVAL; + } + + /* Ignore empty ranges */ + if (range.start == range.stop) + return 0; + + BUG_ON(range.start > range.stop); + + cmod = kmalloc(sizeof(*cmod), GFP_KERNEL); + if (unlikely(!cmod)) + return -ENOMEM; + + cmod->mod = mod; + cmod->range = range; + + down_write(&cttype->mod_lock); + err = idr_alloc(&cttype->mod_idr, cmod, 0, 0, GFP_KERNEL); + if (err >= 0) + cttype->count += range_size(cttype, &range); + up_write(&cttype->mod_lock); + + if (err < 0) { + kfree(cmod); + return err; + } + + return 0; +} + +#else /* CONFIG_MODULES */ +static int codetag_module_init(struct codetag_type *cttype, struct module *mod) { return 0; } +#endif /* CONFIG_MODULES */ + +struct codetag_type * +codetag_register_type(const struct codetag_type_desc *desc) +{ + struct codetag_type *cttype; + int err; + + BUG_ON(desc->tag_size <= 0); + + cttype = kzalloc(sizeof(*cttype), GFP_KERNEL); + if (unlikely(!cttype)) + return ERR_PTR(-ENOMEM); + + cttype->desc = *desc; + idr_init(&cttype->mod_idr); + init_rwsem(&cttype->mod_lock); + + err = codetag_module_init(cttype, NULL); + if (unlikely(err)) { + kfree(cttype); + return ERR_PTR(err); + } + + mutex_lock(&codetag_lock); + list_add_tail(&cttype->link, &codetag_types); + mutex_unlock(&codetag_lock); + + return cttype; +} -- cgit v1.2.3 From 22d407b164ff79de42d21f37d99f9ee7abdd51c8 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 21 Mar 2024 09:36:35 -0700 Subject: lib: add allocation tagging support for memory allocation profiling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce CONFIG_MEM_ALLOC_PROFILING which provides definitions to easily instrument memory allocators. It registers an "alloc_tags" codetag type with /proc/allocinfo interface to output allocation tag information when the feature is enabled. CONFIG_MEM_ALLOC_PROFILING_DEBUG is provided for debugging the memory allocation profiling instrumentation. Memory allocation profiling can be enabled or disabled at runtime using /proc/sys/vm/mem_profiling sysctl when CONFIG_MEM_ALLOC_PROFILING_DEBUG=n. CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT enables memory allocation profiling by default. [surenb@google.com: Documentation/filesystems/proc.rst: fix allocinfo title] Link: https://lkml.kernel.org/r/20240326073813.727090-1-surenb@google.com [surenb@google.com: do limited memory accounting for modules with ARCH_NEEDS_WEAK_PER_CPU] Link: https://lkml.kernel.org/r/20240402180933.1663992-2-surenb@google.com [klarasmodin@gmail.com: explicitly include irqflags.h in alloc_tag.h] Link: https://lkml.kernel.org/r/20240407133252.173636-1-klarasmodin@gmail.com [surenb@google.com: fix alloc_tag_init() to prevent passing NULL to PTR_ERR()] Link: https://lkml.kernel.org/r/20240417003349.2520094-1-surenb@google.com Link: https://lkml.kernel.org/r/20240321163705.3067592-14-surenb@google.com Signed-off-by: Suren Baghdasaryan Co-developed-by: Kent Overstreet Signed-off-by: Kent Overstreet Signed-off-by: Klara Modin Tested-by: Kees Cook Cc: Alexander Viro Cc: Alex Gaynor Cc: Alice Ryhl Cc: Andreas Hindborg Cc: Benno Lossin Cc: "Björn Roy Baron" Cc: Boqun Feng Cc: Christoph Lameter Cc: Dennis Zhou Cc: Gary Guo Cc: Miguel Ojeda Cc: Pasha Tatashin Cc: Peter Zijlstra Cc: Tejun Heo Cc: Vlastimil Babka Cc: Wedson Almeida Filho Signed-off-by: Andrew Morton --- Documentation/admin-guide/sysctl/vm.rst | 16 ++++ Documentation/filesystems/proc.rst | 29 ++++++ include/asm-generic/codetag.lds.h | 14 +++ include/asm-generic/vmlinux.lds.h | 3 + include/linux/alloc_tag.h | 156 ++++++++++++++++++++++++++++++++ include/linux/sched.h | 24 +++++ lib/Kconfig.debug | 25 +++++ lib/Makefile | 2 + lib/alloc_tag.c | 152 +++++++++++++++++++++++++++++++ scripts/module.lds.S | 7 ++ 10 files changed, 428 insertions(+) create mode 100644 include/asm-generic/codetag.lds.h create mode 100644 include/linux/alloc_tag.h create mode 100644 lib/alloc_tag.c (limited to 'lib/Kconfig.debug') diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index c59889de122b..e86c968a7a0e 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -43,6 +43,7 @@ Currently, these files are in /proc/sys/vm: - legacy_va_layout - lowmem_reserve_ratio - max_map_count +- mem_profiling (only if CONFIG_MEM_ALLOC_PROFILING=y) - memory_failure_early_kill - memory_failure_recovery - min_free_kbytes @@ -425,6 +426,21 @@ e.g., up to one or two maps per allocation. The default value is 65530. +mem_profiling +============== + +Enable memory profiling (when CONFIG_MEM_ALLOC_PROFILING=y) + +1: Enable memory profiling. + +0: Disable memory profiling. + +Enabling memory profiling introduces a small performance overhead for all +memory allocations. + +The default value depends on CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT. + + memory_failure_early_kill: ========================== diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index c6a6b9df2104..245269dd6e02 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -688,6 +688,7 @@ files are there, and which are missing. ============ =============================================================== File Content ============ =============================================================== + allocinfo Memory allocations profiling information apm Advanced power management info bootconfig Kernel command line obtained from boot config, and, if there were kernel parameters from the @@ -953,6 +954,34 @@ also be allocatable although a lot of filesystem metadata may have to be reclaimed to achieve this. +allocinfo +~~~~~~~~~ + +Provides information about memory allocations at all locations in the code +base. Each allocation in the code is identified by its source file, line +number, module (if originates from a loadable module) and the function calling +the allocation. The number of bytes allocated and number of calls at each +location are reported. + +Example output. + +:: + + > sort -rn /proc/allocinfo + 127664128 31168 mm/page_ext.c:270 func:alloc_page_ext + 56373248 4737 mm/slub.c:2259 func:alloc_slab_page + 14880768 3633 mm/readahead.c:247 func:page_cache_ra_unbounded + 14417920 3520 mm/mm_init.c:2530 func:alloc_large_system_hash + 13377536 234 block/blk-mq.c:3421 func:blk_mq_alloc_rqs + 11718656 2861 mm/filemap.c:1919 func:__filemap_get_folio + 9192960 2800 kernel/fork.c:307 func:alloc_thread_stack_node + 4206592 4 net/netfilter/nf_conntrack_core.c:2567 func:nf_ct_alloc_hashtable + 4136960 1010 drivers/staging/ctagmod/ctagmod.c:20 [ctagmod] func:ctagmod_start + 3940352 962 mm/memory.c:4214 func:alloc_anon_folio + 2894464 22613 fs/kernfs/dir.c:615 func:__kernfs_new_node + ... + + meminfo ~~~~~~~ diff --git a/include/asm-generic/codetag.lds.h b/include/asm-generic/codetag.lds.h new file mode 100644 index 000000000000..64f536b80380 --- /dev/null +++ b/include/asm-generic/codetag.lds.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_GENERIC_CODETAG_LDS_H +#define __ASM_GENERIC_CODETAG_LDS_H + +#define SECTION_WITH_BOUNDARIES(_name) \ + . = ALIGN(8); \ + __start_##_name = .; \ + KEEP(*(_name)) \ + __stop_##_name = .; + +#define CODETAG_SECTIONS() \ + SECTION_WITH_BOUNDARIES(alloc_tags) + +#endif /* __ASM_GENERIC_CODETAG_LDS_H */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index f7749d0f2562..3e4497b5135a 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -50,6 +50,8 @@ * [__nosave_begin, __nosave_end] for the nosave data */ +#include + #ifndef LOAD_OFFSET #define LOAD_OFFSET 0 #endif @@ -366,6 +368,7 @@ . = ALIGN(8); \ BOUNDED_SECTION_BY(__dyndbg_classes, ___dyndbg_classes) \ BOUNDED_SECTION_BY(__dyndbg, ___dyndbg) \ + CODETAG_SECTIONS() \ LIKELY_PROFILE() \ BRANCH_PROFILE() \ TRACE_PRINTKS() \ diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h new file mode 100644 index 000000000000..13561223fdab --- /dev/null +++ b/include/linux/alloc_tag.h @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * allocation tagging + */ +#ifndef _LINUX_ALLOC_TAG_H +#define _LINUX_ALLOC_TAG_H + +#include +#include +#include +#include +#include +#include +#include +#include + +struct alloc_tag_counters { + u64 bytes; + u64 calls; +}; + +/* + * An instance of this structure is created in a special ELF section at every + * allocation callsite. At runtime, the special section is treated as + * an array of these. Embedded codetag utilizes codetag framework. + */ +struct alloc_tag { + struct codetag ct; + struct alloc_tag_counters __percpu *counters; +} __aligned(8); + +#ifdef CONFIG_MEM_ALLOC_PROFILING + +static inline struct alloc_tag *ct_to_alloc_tag(struct codetag *ct) +{ + return container_of(ct, struct alloc_tag, ct); +} + +#ifdef ARCH_NEEDS_WEAK_PER_CPU +/* + * When percpu variables are required to be defined as weak, static percpu + * variables can't be used inside a function (see comments for DECLARE_PER_CPU_SECTION). + * Instead we will accound all module allocations to a single counter. + */ +DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); + +#define DEFINE_ALLOC_TAG(_alloc_tag) \ + static struct alloc_tag _alloc_tag __used __aligned(8) \ + __section("alloc_tags") = { \ + .ct = CODE_TAG_INIT, \ + .counters = &_shared_alloc_tag }; + +#else /* ARCH_NEEDS_WEAK_PER_CPU */ + +#define DEFINE_ALLOC_TAG(_alloc_tag) \ + static DEFINE_PER_CPU(struct alloc_tag_counters, _alloc_tag_cntr); \ + static struct alloc_tag _alloc_tag __used __aligned(8) \ + __section("alloc_tags") = { \ + .ct = CODE_TAG_INIT, \ + .counters = &_alloc_tag_cntr }; + +#endif /* ARCH_NEEDS_WEAK_PER_CPU */ + +DECLARE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT, + mem_alloc_profiling_key); + +static inline bool mem_alloc_profiling_enabled(void) +{ + return static_branch_maybe(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT, + &mem_alloc_profiling_key); +} + +static inline struct alloc_tag_counters alloc_tag_read(struct alloc_tag *tag) +{ + struct alloc_tag_counters v = { 0, 0 }; + struct alloc_tag_counters *counter; + int cpu; + + for_each_possible_cpu(cpu) { + counter = per_cpu_ptr(tag->counters, cpu); + v.bytes += counter->bytes; + v.calls += counter->calls; + } + + return v; +} + +#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG +static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag) +{ + WARN_ONCE(ref && ref->ct, + "alloc_tag was not cleared (got tag for %s:%u)\n", + ref->ct->filename, ref->ct->lineno); + + WARN_ONCE(!tag, "current->alloc_tag not set"); +} + +static inline void alloc_tag_sub_check(union codetag_ref *ref) +{ + WARN_ONCE(ref && !ref->ct, "alloc_tag was not set\n"); +} +#else +static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag) {} +static inline void alloc_tag_sub_check(union codetag_ref *ref) {} +#endif + +/* Caller should verify both ref and tag to be valid */ +static inline void __alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag) +{ + ref->ct = &tag->ct; + /* + * We need in increment the call counter every time we have a new + * allocation or when we split a large allocation into smaller ones. + * Each new reference for every sub-allocation needs to increment call + * counter because when we free each part the counter will be decremented. + */ + this_cpu_inc(tag->counters->calls); +} + +static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag, size_t bytes) +{ + alloc_tag_add_check(ref, tag); + if (!ref || !tag) + return; + + __alloc_tag_ref_set(ref, tag); + this_cpu_add(tag->counters->bytes, bytes); +} + +static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes) +{ + struct alloc_tag *tag; + + alloc_tag_sub_check(ref); + if (!ref || !ref->ct) + return; + + tag = ct_to_alloc_tag(ref->ct); + + this_cpu_sub(tag->counters->bytes, bytes); + this_cpu_dec(tag->counters->calls); + + ref->ct = NULL; +} + +#else /* CONFIG_MEM_ALLOC_PROFILING */ + +#define DEFINE_ALLOC_TAG(_alloc_tag) +static inline bool mem_alloc_profiling_enabled(void) { return false; } +static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag, + size_t bytes) {} +static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes) {} + +#endif /* CONFIG_MEM_ALLOC_PROFILING */ + +#endif /* _LINUX_ALLOC_TAG_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 3c2abbc587b4..4118b3f959c3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -770,6 +770,10 @@ struct task_struct { unsigned int flags; unsigned int ptrace; +#ifdef CONFIG_MEM_ALLOC_PROFILING + struct alloc_tag *alloc_tag; +#endif + #ifdef CONFIG_SMP int on_cpu; struct __call_single_node wake_entry; @@ -810,6 +814,7 @@ struct task_struct { struct task_group *sched_task_group; #endif + #ifdef CONFIG_UCLAMP_TASK /* * Clamp values requested for a scheduling entity. @@ -2187,4 +2192,23 @@ static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); } extern void sched_set_stop_task(int cpu, struct task_struct *stop); +#ifdef CONFIG_MEM_ALLOC_PROFILING +static inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag) +{ + swap(current->alloc_tag, tag); + return tag; +} + +static inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old) +{ +#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG + WARN(current->alloc_tag != tag, "current->alloc_tag was changed:\n"); +#endif + current->alloc_tag = old; +} +#else +#define alloc_tag_save(_tag) NULL +#define alloc_tag_restore(_tag, _old) do {} while (0) +#endif + #endif diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 015fc6ee9849..fa7aa32ba11a 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -972,6 +972,31 @@ config CODE_TAGGING bool select KALLSYMS +config MEM_ALLOC_PROFILING + bool "Enable memory allocation profiling" + default n + depends on PROC_FS + depends on !DEBUG_FORCE_WEAK_PER_CPU + select CODE_TAGGING + help + Track allocation source code and record total allocation size + initiated at that code location. The mechanism can be used to track + memory leaks with a low performance and memory impact. + +config MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT + bool "Enable memory allocation profiling by default" + default y + depends on MEM_ALLOC_PROFILING + +config MEM_ALLOC_PROFILING_DEBUG + bool "Memory allocation profiler debugging" + default n + depends on MEM_ALLOC_PROFILING + select MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT + help + Adds warnings with helpful error messages for memory allocation + profiling. + source "lib/Kconfig.kasan" source "lib/Kconfig.kfence" source "lib/Kconfig.kmsan" diff --git a/lib/Makefile b/lib/Makefile index 910335da8f13..2f4e17bfb299 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -234,6 +234,8 @@ obj-$(CONFIG_OF_RECONFIG_NOTIFIER_ERROR_INJECT) += \ obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o obj-$(CONFIG_CODE_TAGGING) += codetag.o +obj-$(CONFIG_MEM_ALLOC_PROFILING) += alloc_tag.o + lib-$(CONFIG_GENERIC_BUG) += bug.o obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c new file mode 100644 index 000000000000..331dd17650f3 --- /dev/null +++ b/lib/alloc_tag.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include +#include +#include + +static struct codetag_type *alloc_tag_cttype; + +DEFINE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); +EXPORT_SYMBOL(_shared_alloc_tag); + +DEFINE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT, + mem_alloc_profiling_key); + +static void *allocinfo_start(struct seq_file *m, loff_t *pos) +{ + struct codetag_iterator *iter; + struct codetag *ct; + loff_t node = *pos; + + iter = kzalloc(sizeof(*iter), GFP_KERNEL); + m->private = iter; + if (!iter) + return NULL; + + codetag_lock_module_list(alloc_tag_cttype, true); + *iter = codetag_get_ct_iter(alloc_tag_cttype); + while ((ct = codetag_next_ct(iter)) != NULL && node) + node--; + + return ct ? iter : NULL; +} + +static void *allocinfo_next(struct seq_file *m, void *arg, loff_t *pos) +{ + struct codetag_iterator *iter = (struct codetag_iterator *)arg; + struct codetag *ct = codetag_next_ct(iter); + + (*pos)++; + if (!ct) + return NULL; + + return iter; +} + +static void allocinfo_stop(struct seq_file *m, void *arg) +{ + struct codetag_iterator *iter = (struct codetag_iterator *)m->private; + + if (iter) { + codetag_lock_module_list(alloc_tag_cttype, false); + kfree(iter); + } +} + +static void alloc_tag_to_text(struct seq_buf *out, struct codetag *ct) +{ + struct alloc_tag *tag = ct_to_alloc_tag(ct); + struct alloc_tag_counters counter = alloc_tag_read(tag); + s64 bytes = counter.bytes; + + seq_buf_printf(out, "%12lli %8llu ", bytes, counter.calls); + codetag_to_text(out, ct); + seq_buf_putc(out, ' '); + seq_buf_putc(out, '\n'); +} + +static int allocinfo_show(struct seq_file *m, void *arg) +{ + struct codetag_iterator *iter = (struct codetag_iterator *)arg; + char *bufp; + size_t n = seq_get_buf(m, &bufp); + struct seq_buf buf; + + seq_buf_init(&buf, bufp, n); + alloc_tag_to_text(&buf, iter->ct); + seq_commit(m, seq_buf_used(&buf)); + return 0; +} + +static const struct seq_operations allocinfo_seq_op = { + .start = allocinfo_start, + .next = allocinfo_next, + .stop = allocinfo_stop, + .show = allocinfo_show, +}; + +static void __init procfs_init(void) +{ + proc_create_seq("allocinfo", 0444, NULL, &allocinfo_seq_op); +} + +static bool alloc_tag_module_unload(struct codetag_type *cttype, + struct codetag_module *cmod) +{ + struct codetag_iterator iter = codetag_get_ct_iter(cttype); + struct alloc_tag_counters counter; + bool module_unused = true; + struct alloc_tag *tag; + struct codetag *ct; + + for (ct = codetag_next_ct(&iter); ct; ct = codetag_next_ct(&iter)) { + if (iter.cmod != cmod) + continue; + + tag = ct_to_alloc_tag(ct); + counter = alloc_tag_read(tag); + + if (WARN(counter.bytes, + "%s:%u module %s func:%s has %llu allocated at module unload", + ct->filename, ct->lineno, ct->modname, ct->function, counter.bytes)) + module_unused = false; + } + + return module_unused; +} + +static struct ctl_table memory_allocation_profiling_sysctls[] = { + { + .procname = "mem_profiling", + .data = &mem_alloc_profiling_key, +#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG + .mode = 0444, +#else + .mode = 0644, +#endif + .proc_handler = proc_do_static_key, + }, + { } +}; + +static int __init alloc_tag_init(void) +{ + const struct codetag_type_desc desc = { + .section = "alloc_tags", + .tag_size = sizeof(struct alloc_tag), + .module_unload = alloc_tag_module_unload, + }; + + alloc_tag_cttype = codetag_register_type(&desc); + if (IS_ERR(alloc_tag_cttype)) + return PTR_ERR(alloc_tag_cttype); + + register_sysctl_init("vm", memory_allocation_profiling_sysctls); + procfs_init(); + + return 0; +} +module_init(alloc_tag_init); diff --git a/scripts/module.lds.S b/scripts/module.lds.S index bf5bcf2836d8..45c67a0994f3 100644 --- a/scripts/module.lds.S +++ b/scripts/module.lds.S @@ -9,6 +9,8 @@ #define DISCARD_EH_FRAME *(.eh_frame) #endif +#include + SECTIONS { /DISCARD/ : { *(.discard) @@ -47,12 +49,17 @@ SECTIONS { .data : { *(.data .data.[0-9a-zA-Z_]*) *(.data..L*) + CODETAG_SECTIONS() } .rodata : { *(.rodata .rodata.[0-9a-zA-Z_]*) *(.rodata..L*) } +#else + .data : { + CODETAG_SECTIONS() + } #endif } -- cgit v1.2.3 From dcfe378c81f72f146890ce1dcfdcc742d3b66924 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 21 Mar 2024 09:36:36 -0700 Subject: lib: introduce support for page allocation tagging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce helper functions to easily instrument page allocators by storing a pointer to the allocation tag associated with the code that allocated the page in a page_ext field. Link: https://lkml.kernel.org/r/20240321163705.3067592-15-surenb@google.com Signed-off-by: Suren Baghdasaryan Co-developed-by: Kent Overstreet Signed-off-by: Kent Overstreet Reviewed-by: Vlastimil Babka Tested-by: Kees Cook Cc: Alexander Viro Cc: Alex Gaynor Cc: Alice Ryhl Cc: Andreas Hindborg Cc: Benno Lossin Cc: "Björn Roy Baron" Cc: Boqun Feng Cc: Christoph Lameter Cc: Dennis Zhou Cc: Gary Guo Cc: Miguel Ojeda Cc: Pasha Tatashin Cc: Peter Zijlstra Cc: Tejun Heo Cc: Wedson Almeida Filho Signed-off-by: Andrew Morton --- include/linux/page_ext.h | 1 - include/linux/pgalloc_tag.h | 78 +++++++++++++++++++++++++++++++++++++++++++++ lib/Kconfig.debug | 1 + lib/alloc_tag.c | 17 ++++++++++ mm/mm_init.c | 1 + mm/page_alloc.c | 4 +++ mm/page_ext.c | 4 +++ 7 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 include/linux/pgalloc_tag.h (limited to 'lib/Kconfig.debug') diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index be98564191e6..07e0656898f9 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -4,7 +4,6 @@ #include #include -#include struct pglist_data; diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h new file mode 100644 index 000000000000..66bd021eb46e --- /dev/null +++ b/include/linux/pgalloc_tag.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * page allocation tagging + */ +#ifndef _LINUX_PGALLOC_TAG_H +#define _LINUX_PGALLOC_TAG_H + +#include + +#ifdef CONFIG_MEM_ALLOC_PROFILING + +#include + +extern struct page_ext_operations page_alloc_tagging_ops; +extern struct page_ext *page_ext_get(struct page *page); +extern void page_ext_put(struct page_ext *page_ext); + +static inline union codetag_ref *codetag_ref_from_page_ext(struct page_ext *page_ext) +{ + return (void *)page_ext + page_alloc_tagging_ops.offset; +} + +static inline struct page_ext *page_ext_from_codetag_ref(union codetag_ref *ref) +{ + return (void *)ref - page_alloc_tagging_ops.offset; +} + +/* Should be called only if mem_alloc_profiling_enabled() */ +static inline union codetag_ref *get_page_tag_ref(struct page *page) +{ + if (page) { + struct page_ext *page_ext = page_ext_get(page); + + if (page_ext) + return codetag_ref_from_page_ext(page_ext); + } + return NULL; +} + +static inline void put_page_tag_ref(union codetag_ref *ref) +{ + page_ext_put(page_ext_from_codetag_ref(ref)); +} + +static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, + unsigned int nr) +{ + if (mem_alloc_profiling_enabled()) { + union codetag_ref *ref = get_page_tag_ref(page); + + if (ref) { + alloc_tag_add(ref, task->alloc_tag, PAGE_SIZE * nr); + put_page_tag_ref(ref); + } + } +} + +static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) +{ + if (mem_alloc_profiling_enabled()) { + union codetag_ref *ref = get_page_tag_ref(page); + + if (ref) { + alloc_tag_sub(ref, PAGE_SIZE * nr); + put_page_tag_ref(ref); + } + } +} + +#else /* CONFIG_MEM_ALLOC_PROFILING */ + +static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, + unsigned int nr) {} +static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {} + +#endif /* CONFIG_MEM_ALLOC_PROFILING */ + +#endif /* _LINUX_PGALLOC_TAG_H */ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index fa7aa32ba11a..a8f77de5e8ea 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -978,6 +978,7 @@ config MEM_ALLOC_PROFILING depends on PROC_FS depends on !DEBUG_FORCE_WEAK_PER_CPU select CODE_TAGGING + select PAGE_EXTENSION help Track allocation source code and record total allocation size initiated at that code location. The mechanism can be used to track diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index 331dd17650f3..59bb71036837 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -118,6 +119,22 @@ static bool alloc_tag_module_unload(struct codetag_type *cttype, return module_unused; } +static __init bool need_page_alloc_tagging(void) +{ + return true; +} + +static __init void init_page_alloc_tagging(void) +{ +} + +struct page_ext_operations page_alloc_tagging_ops = { + .size = sizeof(union codetag_ref), + .need = need_page_alloc_tagging, + .init = init_page_alloc_tagging, +}; +EXPORT_SYMBOL(page_alloc_tagging_ops); + static struct ctl_table memory_allocation_profiling_sysctls[] = { { .procname = "mem_profiling", diff --git a/mm/mm_init.c b/mm/mm_init.c index 549e76af8f82..2fd9bf044a79 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5083ac034d26..3fd273c22749 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include "internal.h" #include "shuffle.h" @@ -1101,6 +1102,7 @@ __always_inline bool free_pages_prepare(struct page *page, /* Do not let hwpoison pages hit pcplists/buddy */ reset_page_owner(page, order); page_table_check_free(page, order); + pgalloc_tag_sub(page, 1 << order); return false; } @@ -1140,6 +1142,7 @@ __always_inline bool free_pages_prepare(struct page *page, page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; reset_page_owner(page, order); page_table_check_free(page, order); + pgalloc_tag_sub(page, 1 << order); if (!PageHighMem(page)) { debug_check_no_locks_freed(page_address(page), @@ -1533,6 +1536,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order, set_page_owner(page, order, gfp_flags); page_table_check_alloc(page, order); + pgalloc_tag_add(page, current, 1 << order); } static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags, diff --git a/mm/page_ext.c b/mm/page_ext.c index 4548fcc66d74..3c58fe8a24df 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -10,6 +10,7 @@ #include #include #include +#include /* * struct page extension @@ -82,6 +83,9 @@ static struct page_ext_operations *page_ext_ops[] __initdata = { #if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT) &page_idle_ops, #endif +#ifdef CONFIG_MEM_ALLOC_PROFILING + &page_alloc_tagging_ops, +#endif #ifdef CONFIG_PAGE_TABLE_CHECK &page_table_check_ops, #endif -- cgit v1.2.3 From c789b5fe38f386bbdec8c9c41e9af52d3775e16c Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 21 Mar 2024 09:36:44 -0700 Subject: lib: add codetag reference into slabobj_ext MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To store code tag for every slab object, a codetag reference is embedded into slabobj_ext when CONFIG_MEM_ALLOC_PROFILING=y. Link: https://lkml.kernel.org/r/20240321163705.3067592-23-surenb@google.com Signed-off-by: Suren Baghdasaryan Co-developed-by: Kent Overstreet Signed-off-by: Kent Overstreet Reviewed-by: Vlastimil Babka Tested-by: Kees Cook Cc: Alexander Viro Cc: Alex Gaynor Cc: Alice Ryhl Cc: Andreas Hindborg Cc: Benno Lossin Cc: "Björn Roy Baron" Cc: Boqun Feng Cc: Christoph Lameter Cc: Dennis Zhou Cc: Gary Guo Cc: Miguel Ojeda Cc: Pasha Tatashin Cc: Peter Zijlstra Cc: Tejun Heo Cc: Wedson Almeida Filho Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 5 +++++ lib/Kconfig.debug | 1 + 2 files changed, 6 insertions(+) (limited to 'lib/Kconfig.debug') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 12afc2647cf0..24a6df30be49 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1653,7 +1653,12 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, * if MEMCG_DATA_OBJEXTS is set. */ struct slabobj_ext { +#ifdef CONFIG_MEMCG_KMEM struct obj_cgroup *objcg; +#endif +#ifdef CONFIG_MEM_ALLOC_PROFILING + union codetag_ref ref; +#endif } __aligned(8); static inline void __inc_lruvec_kmem_state(void *p, enum node_stat_item idx) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a8f77de5e8ea..9d2685ec4592 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -979,6 +979,7 @@ config MEM_ALLOC_PROFILING depends on !DEBUG_FORCE_WEAK_PER_CPU select CODE_TAGGING select PAGE_EXTENSION + select SLAB_OBJ_EXT help Track allocation source code and record total allocation size initiated at that code location. The mechanism can be used to track -- cgit v1.2.3 From 77db1920a88103e8ef9ee58130df7c970aea3d17 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 2 May 2024 09:56:33 -0700 Subject: lib: make test_bitops compilable into the kernel image The test now is limited to be compiled as a module. There's no technical reason for it. Now that the test bears some performance benchmarks, it would be reasonable to run it at kernel load time, before userspace starts, to reduce possible jitter. Reviewed-by: Kuan-Wei Chiu Signed-off-by: Yury Norov --- lib/Kconfig.debug | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/Kconfig.debug') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c63a5fbf1f1c..fc8fe1ea5b49 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2436,7 +2436,6 @@ config TEST_LKM config TEST_BITOPS tristate "Test module for compilation of bitops operations" - depends on m help This builds the "test_bitops" module that is much like the TEST_LKM module except that it does a basic exercise of the -- cgit v1.2.3 From 790a4a3dd1039ee07e7bb5854dcc7ccfbb40c876 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 29 Mar 2024 00:18:30 -0700 Subject: selftests/fpu: allow building on other architectures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that ARCH_HAS_KERNEL_FPU_SUPPORT provides a common way to compile and run floating-point code, this test is no longer x86-specific. Link: https://lkml.kernel.org/r/20240329072441.591471-16-samuel.holland@sifive.com Signed-off-by: Samuel Holland Reviewed-by: Christoph Hellwig Acked-by: Christian König Cc: Alex Deucher Cc: Borislav Petkov (AMD) Cc: Catalin Marinas Cc: Dave Hansen Cc: Huacai Chen Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Michael Ellerman Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Palmer Dabbelt Cc: Russell King Cc: Thomas Gleixner Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- lib/Kconfig.debug | 2 +- lib/Makefile | 25 ++----------------------- lib/test_fpu_glue.c | 5 ++++- 3 files changed, 7 insertions(+), 25 deletions(-) (limited to 'lib/Kconfig.debug') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c1c1b19525a5..f4384fd60a76 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2926,7 +2926,7 @@ config TEST_FREE_PAGES config TEST_FPU tristate "Test floating point operations in kernel space" - depends on X86 && !KCOV_INSTRUMENT_ALL + depends on ARCH_HAS_KERNEL_FPU_SUPPORT && !KCOV_INSTRUMENT_ALL help Enable this option to add /sys/kernel/debug/selftest_helpers/test_fpu which will trigger a sequence of floating point operations. This is used diff --git a/lib/Makefile b/lib/Makefile index 87f8803587ae..3b1769045651 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -110,31 +110,10 @@ CFLAGS_test_fprobe.o += $(CC_FLAGS_FTRACE) obj-$(CONFIG_FPROBE_SANITY_TEST) += test_fprobe.o obj-$(CONFIG_TEST_OBJPOOL) += test_objpool.o -# -# CFLAGS for compiling floating point code inside the kernel. x86/Makefile turns -# off the generation of FPU/SSE* instructions for kernel proper but FPU_FLAGS -# get appended last to CFLAGS and thus override those previous compiler options. -# -FPU_CFLAGS := -msse -msse2 -ifdef CONFIG_CC_IS_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -# See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383 -# -# The "-msse" in the first argument is there so that the -# -mpreferred-stack-boundary=3 build error: -# -# -mpreferred-stack-boundary=3 is not between 4 and 12 -# -# can be triggered. Otherwise gcc doesn't complain. -FPU_CFLAGS += -mhard-float -FPU_CFLAGS += $(call cc-option,-msse -mpreferred-stack-boundary=3,-mpreferred-stack-boundary=4) -endif - obj-$(CONFIG_TEST_FPU) += test_fpu.o test_fpu-y := test_fpu_glue.o test_fpu_impl.o -CFLAGS_test_fpu_impl.o += $(FPU_CFLAGS) +CFLAGS_test_fpu_impl.o += $(CC_FLAGS_FPU) +CFLAGS_REMOVE_test_fpu_impl.o += $(CC_FLAGS_NO_FPU) # Some KUnit files (hooks.o) need to be built-in even when KUnit is a module, # so we can't just use obj-$(CONFIG_KUNIT). diff --git a/lib/test_fpu_glue.c b/lib/test_fpu_glue.c index 85963d7be826..eef282a2715f 100644 --- a/lib/test_fpu_glue.c +++ b/lib/test_fpu_glue.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include "test_fpu.h" @@ -38,6 +38,9 @@ static struct dentry *selftest_dir; static int __init test_fpu_init(void) { + if (!kernel_fpu_available()) + return -EINVAL; + selftest_dir = debugfs_create_dir("selftest_helpers", NULL); if (!selftest_dir) return -ENOMEM; -- cgit v1.2.3