From 7bb5da0d490b2d836c5218f5186ee588d2145310 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Thu, 30 Jun 2022 23:32:57 +0100 Subject: kexec: turn all kexec_mutex acquisitions into trylocks Patch series "kexec, panic: Making crash_kexec() NMI safe", v4. This patch (of 2): Most acquistions of kexec_mutex are done via mutex_trylock() - those were a direct "translation" from: 8c5a1cf0ad3a ("kexec: use a mutex for locking rather than xchg()") there have however been two additions since then that use mutex_lock(): crash_get_memory_size() and crash_shrink_memory(). A later commit will replace said mutex with an atomic variable, and locking operations will become atomic_cmpxchg(). Rather than having those mutex_lock() become while (atomic_cmpxchg(&lock, 0, 1)), turn them into trylocks that can return -EBUSY on acquisition failure. This does halve the printable size of the crash kernel, but that's still neighbouring 2G for 32bit kernels which should be ample enough. Link: https://lkml.kernel.org/r/20220630223258.4144112-1-vschneid@redhat.com Link: https://lkml.kernel.org/r/20220630223258.4144112-2-vschneid@redhat.com Signed-off-by: Valentin Schneider Cc: Arnd Bergmann Cc: "Eric W . Biederman" Cc: Juri Lelli Cc: Luis Claudio R. Goncalves Cc: Miaohe Lin Cc: Petr Mladek Cc: Sebastian Andrzej Siewior Cc: Thomas Gleixner Cc: Baoquan He Signed-off-by: Andrew Morton --- kernel/kexec_core.c | 12 ++++++++---- kernel/ksysfs.c | 7 ++++++- 2 files changed, 14 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index acd029b307e4..8fa4eeb95b30 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -1004,13 +1004,16 @@ void crash_kexec(struct pt_regs *regs) } } -size_t crash_get_memory_size(void) +ssize_t crash_get_memory_size(void) { - size_t size = 0; + ssize_t size = 0; + + if (!mutex_trylock(&kexec_mutex)) + return -EBUSY; - mutex_lock(&kexec_mutex); if (crashk_res.end != crashk_res.start) size = resource_size(&crashk_res); + mutex_unlock(&kexec_mutex); return size; } @@ -1022,7 +1025,8 @@ int crash_shrink_memory(unsigned long new_size) unsigned long old_size; struct resource *ram_res; - mutex_lock(&kexec_mutex); + if (!mutex_trylock(&kexec_mutex)) + return -EBUSY; if (kexec_crash_image) { ret = -ENOENT; diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index b1292a57c2a5..65dba9076f31 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -105,7 +105,12 @@ KERNEL_ATTR_RO(kexec_crash_loaded); static ssize_t kexec_crash_size_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "%zu\n", crash_get_memory_size()); + ssize_t size = crash_get_memory_size(); + + if (size < 0) + return size; + + return sprintf(buf, "%zd\n", size); } static ssize_t kexec_crash_size_store(struct kobject *kobj, struct kobj_attribute *attr, -- cgit v1.2.3 From 05c6257433b7212f07a7e53479a8ab038fc1666a Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Thu, 30 Jun 2022 23:32:58 +0100 Subject: panic, kexec: make __crash_kexec() NMI safe Attempting to get a crash dump out of a debug PREEMPT_RT kernel via an NMI panic() doesn't work. The cause of that lies in the PREEMPT_RT definition of mutex_trylock(): if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task())) return 0; This prevents an nmi_panic() from executing the main body of __crash_kexec() which does the actual kexec into the kdump kernel. The warning and return are explained by: 6ce47fd961fa ("rtmutex: Warn if trylock is called from hard/softirq context") [...] The reasons for this are: 1) There is a potential deadlock in the slowpath 2) Another cpu which blocks on the rtmutex will boost the task which allegedly locked the rtmutex, but that cannot work because the hard/softirq context borrows the task context. Furthermore, grabbing the lock isn't NMI safe, so do away with kexec_mutex and replace it with an atomic variable. This is somewhat overzealous as *some* callsites could keep using a mutex (e.g. the sysfs-facing ones like crash_shrink_memory()), but this has the benefit of involving a single unified lock and preventing any future NMI-related surprises. Tested by triggering NMI panics via: $ echo 1 > /proc/sys/kernel/panic_on_unrecovered_nmi $ echo 1 > /proc/sys/kernel/unknown_nmi_panic $ echo 1 > /proc/sys/kernel/panic $ ipmitool power diag Link: https://lkml.kernel.org/r/20220630223258.4144112-3-vschneid@redhat.com Fixes: 6ce47fd961fa ("rtmutex: Warn if trylock is called from hard/softirq context") Signed-off-by: Valentin Schneider Cc: Arnd Bergmann Cc: Baoquan He Cc: "Eric W . Biederman" Cc: Juri Lelli Cc: Luis Claudio R. Goncalves Cc: Miaohe Lin Cc: Petr Mladek Cc: Sebastian Andrzej Siewior Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- kernel/kexec.c | 11 ++++------- kernel/kexec_core.c | 20 ++++++++++---------- kernel/kexec_file.c | 4 ++-- kernel/kexec_internal.h | 15 ++++++++++++++- 4 files changed, 30 insertions(+), 20 deletions(-) (limited to 'kernel') diff --git a/kernel/kexec.c b/kernel/kexec.c index b5e40f069768..cb8e6e6f983c 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -93,13 +93,10 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments, /* * Because we write directly to the reserved memory region when loading - * crash kernels we need a mutex here to prevent multiple crash kernels - * from attempting to load simultaneously, and to prevent a crash kernel - * from loading over the top of a in use crash kernel. - * - * KISS: always take the mutex. + * crash kernels we need a serialization here to prevent multiple crash + * kernels from attempting to load simultaneously. */ - if (!mutex_trylock(&kexec_mutex)) + if (!kexec_trylock()) return -EBUSY; if (flags & KEXEC_ON_CRASH) { @@ -165,7 +162,7 @@ out: kimage_free(image); out_unlock: - mutex_unlock(&kexec_mutex); + kexec_unlock(); return ret; } diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 8fa4eeb95b30..5ca4d40c9ec1 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -46,7 +46,7 @@ #include #include "kexec_internal.h" -DEFINE_MUTEX(kexec_mutex); +atomic_t __kexec_lock = ATOMIC_INIT(0); /* Per cpu memory for storing cpu states in case of system crash. */ note_buf_t __percpu *crash_notes; @@ -959,7 +959,7 @@ late_initcall(kexec_core_sysctl_init); */ void __noclone __crash_kexec(struct pt_regs *regs) { - /* Take the kexec_mutex here to prevent sys_kexec_load + /* Take the kexec_lock here to prevent sys_kexec_load * running on one cpu from replacing the crash kernel * we are using after a panic on a different cpu. * @@ -967,7 +967,7 @@ void __noclone __crash_kexec(struct pt_regs *regs) * of memory the xchg(&kexec_crash_image) would be * sufficient. But since I reuse the memory... */ - if (mutex_trylock(&kexec_mutex)) { + if (kexec_trylock()) { if (kexec_crash_image) { struct pt_regs fixed_regs; @@ -976,7 +976,7 @@ void __noclone __crash_kexec(struct pt_regs *regs) machine_crash_shutdown(&fixed_regs); machine_kexec(kexec_crash_image); } - mutex_unlock(&kexec_mutex); + kexec_unlock(); } } STACK_FRAME_NON_STANDARD(__crash_kexec); @@ -1008,13 +1008,13 @@ ssize_t crash_get_memory_size(void) { ssize_t size = 0; - if (!mutex_trylock(&kexec_mutex)) + if (!kexec_trylock()) return -EBUSY; if (crashk_res.end != crashk_res.start) size = resource_size(&crashk_res); - mutex_unlock(&kexec_mutex); + kexec_unlock(); return size; } @@ -1025,7 +1025,7 @@ int crash_shrink_memory(unsigned long new_size) unsigned long old_size; struct resource *ram_res; - if (!mutex_trylock(&kexec_mutex)) + if (!kexec_trylock()) return -EBUSY; if (kexec_crash_image) { @@ -1064,7 +1064,7 @@ int crash_shrink_memory(unsigned long new_size) insert_resource(&iomem_resource, ram_res); unlock: - mutex_unlock(&kexec_mutex); + kexec_unlock(); return ret; } @@ -1136,7 +1136,7 @@ int kernel_kexec(void) { int error = 0; - if (!mutex_trylock(&kexec_mutex)) + if (!kexec_trylock()) return -EBUSY; if (!kexec_image) { error = -EINVAL; @@ -1212,6 +1212,6 @@ int kernel_kexec(void) #endif Unlock: - mutex_unlock(&kexec_mutex); + kexec_unlock(); return error; } diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 1d546dc97c50..45637511e0de 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -339,7 +339,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd, image = NULL; - if (!mutex_trylock(&kexec_mutex)) + if (!kexec_trylock()) return -EBUSY; dest_image = &kexec_image; @@ -411,7 +411,7 @@ out: if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image) arch_kexec_protect_crashkres(); - mutex_unlock(&kexec_mutex); + kexec_unlock(); kimage_free(image); return ret; } diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h index 48aaf2ac0d0d..74da1409cd14 100644 --- a/kernel/kexec_internal.h +++ b/kernel/kexec_internal.h @@ -13,7 +13,20 @@ void kimage_terminate(struct kimage *image); int kimage_is_destination_range(struct kimage *image, unsigned long start, unsigned long end); -extern struct mutex kexec_mutex; +/* + * Whatever is used to serialize accesses to the kexec_crash_image needs to be + * NMI safe, as __crash_kexec() can happen during nmi_panic(), so here we use a + * "simple" atomic variable that is acquired with a cmpxchg(). + */ +extern atomic_t __kexec_lock; +static inline bool kexec_trylock(void) +{ + return atomic_cmpxchg_acquire(&__kexec_lock, 0, 1) == 0; +} +static inline void kexec_unlock(void) +{ + atomic_set_release(&__kexec_lock, 0); +} #ifdef CONFIG_KEXEC_FILE #include -- cgit v1.2.3 From 2be9880dc87342dc7ae459c9ea5c9ee2a45b33d8 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Fri, 19 Aug 2022 09:44:06 +0800 Subject: kernel: exit: cleanup release_thread() Only x86 has own release_thread(), introduce a new weak release_thread() function to clean empty definitions in other ARCHs. Link: https://lkml.kernel.org/r/20220819014406.32266-1-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Acked-by: Guo Ren [csky] Acked-by: Russell King (Oracle) Acked-by: Geert Uytterhoeven Acked-by: Brian Cain Acked-by: Michael Ellerman [powerpc] Acked-by: Stafford Horne [openrisc] Acked-by: Catalin Marinas [arm64] Acked-by: Huacai Chen [LoongArch] Cc: Alexander Gordeev Cc: Anton Ivanov Cc: Borislav Petkov Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Chris Zankel Cc: Dave Hansen Cc: "David S. Miller" Cc: Dinh Nguyen Cc: Guo Ren [csky] Cc: Heiko Carstens Cc: Helge Deller Cc: Ingo Molnar Cc: Ivan Kokshaysky Cc: James Bottomley Cc: Johannes Berg Cc: Jonas Bonn Cc: Matt Turner Cc: Max Filippov Cc: Michal Simek Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Richard Henderson Cc: Richard Weinberger Cc: Rich Felker Cc: Stefan Kristiansson Cc: Sven Schnelle Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Cc: Xuerui Wang Cc: Yoshinori Sato Signed-off-by: Andrew Morton --- arch/alpha/include/asm/processor.h | 2 -- arch/alpha/kernel/process.c | 5 ----- arch/arc/include/asm/processor.h | 3 --- arch/arm/include/asm/processor.h | 3 --- arch/arm/kernel/process.c | 4 ---- arch/arm64/include/asm/processor.h | 3 --- arch/arm64/kernel/process.c | 4 ---- arch/csky/include/asm/processor.h | 5 ----- arch/hexagon/include/asm/processor.h | 4 ---- arch/hexagon/kernel/process.c | 7 ------- arch/ia64/include/asm/processor.h | 7 ------- arch/loongarch/include/asm/processor.h | 3 --- arch/m68k/include/asm/processor.h | 5 ----- arch/microblaze/include/asm/processor.h | 5 ----- arch/mips/include/asm/processor.h | 3 --- arch/nios2/include/asm/processor.h | 5 ----- arch/openrisc/include/asm/processor.h | 1 - arch/openrisc/kernel/process.c | 4 ---- arch/parisc/include/asm/processor.h | 3 --- arch/parisc/kernel/process.c | 4 ---- arch/powerpc/include/asm/processor.h | 1 - arch/powerpc/kernel/process.c | 5 ----- arch/riscv/include/asm/processor.h | 5 ----- arch/s390/include/asm/processor.h | 3 --- arch/sh/include/asm/processor_32.h | 3 --- arch/sh/kernel/process_32.c | 5 ----- arch/sparc/include/asm/processor_32.h | 3 --- arch/sparc/include/asm/processor_64.h | 3 --- arch/um/include/asm/processor-generic.h | 4 ---- arch/x86/include/asm/processor.h | 3 --- arch/xtensa/include/asm/processor.h | 3 --- include/linux/sched/task.h | 3 +++ kernel/exit.c | 4 ++++ 33 files changed, 7 insertions(+), 118 deletions(-) (limited to 'kernel') diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h index 43e234c518b1..714abe494e5f 100644 --- a/arch/alpha/include/asm/processor.h +++ b/arch/alpha/include/asm/processor.h @@ -36,8 +36,6 @@ extern void start_thread(struct pt_regs *, unsigned long, unsigned long); /* Free all resources held by a thread. */ struct task_struct; -extern void release_thread(struct task_struct *); - unsigned long __get_wchan(struct task_struct *p); #define KSTK_EIP(tsk) (task_pt_regs(tsk)->pc) diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index e2e25f8b5e76..dbf1bc5e2ad2 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -225,11 +225,6 @@ flush_thread(void) current_thread_info()->pcb.unique = 0; } -void -release_thread(struct task_struct *dead_task) -{ -} - /* * Copy architecture-specific thread state */ diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index 54db9d7bb562..fb844fce1ab6 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -43,9 +43,6 @@ struct task_struct; #define task_pt_regs(p) \ ((struct pt_regs *)(THREAD_SIZE + (void *)task_stack_page(p)) - 1) -/* Free all resources held by a thread */ -#define release_thread(thread) do { } while (0) - /* * A lot of busy-wait loops in SMP are based off of non-volatile data otherwise * get optimised away by gcc diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index bdc35c0e8dfb..326864f79d18 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h @@ -81,9 +81,6 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, /* Forward declaration, a strange C thing */ struct task_struct; -/* Free all resources held by a thread. */ -extern void release_thread(struct task_struct *); - unsigned long __get_wchan(struct task_struct *p); #define task_pt_regs(p) \ diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 3d9cace63884..712d3e6d9be9 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -232,10 +232,6 @@ void flush_thread(void) thread_notify(THREAD_NOTIFY_FLUSH, thread); } -void release_thread(struct task_struct *dead_task) -{ -} - asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 86eb0bfe3b38..4cfb4cd1d475 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -323,9 +323,6 @@ static inline bool is_ttbr1_addr(unsigned long addr) /* Forward declaration, a strange C thing */ struct task_struct; -/* Free all resources held by a thread. */ -extern void release_thread(struct task_struct *); - unsigned long __get_wchan(struct task_struct *p); void update_sctlr_el1(u64 sctlr); diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 92bcc1768f0b..9015f49c206e 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -279,10 +279,6 @@ void flush_thread(void) flush_tagged_addr_state(); } -void release_thread(struct task_struct *dead_task) -{ -} - void arch_release_task_struct(struct task_struct *tsk) { fpsimd_release_task(tsk); diff --git a/arch/csky/include/asm/processor.h b/arch/csky/include/asm/processor.h index 9638206bc44f..63ad71fab30d 100644 --- a/arch/csky/include/asm/processor.h +++ b/arch/csky/include/asm/processor.h @@ -69,11 +69,6 @@ do { \ /* Forward declaration, a strange C thing */ struct task_struct; -/* Free all resources held by a thread. */ -static inline void release_thread(struct task_struct *dead_task) -{ -} - /* Prepare to copy thread state - unlazy all lazy status */ #define prepare_to_copy(tsk) do { } while (0) diff --git a/arch/hexagon/include/asm/processor.h b/arch/hexagon/include/asm/processor.h index 615f7e49968e..0cd39c2cdf8f 100644 --- a/arch/hexagon/include/asm/processor.h +++ b/arch/hexagon/include/asm/processor.h @@ -60,10 +60,6 @@ struct thread_struct { #define KSTK_EIP(tsk) (pt_elr(task_pt_regs(tsk))) #define KSTK_ESP(tsk) (pt_psp(task_pt_regs(tsk))) -/* Free all resources held by a thread; defined in process.c */ -extern void release_thread(struct task_struct *dead_task); - -/* Get wait channel for task P. */ extern unsigned long __get_wchan(struct task_struct *p); /* The following stuff is pretty HEXAGON specific. */ diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c index f0552f98a7ba..e15eeaebd785 100644 --- a/arch/hexagon/kernel/process.c +++ b/arch/hexagon/kernel/process.c @@ -112,13 +112,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) return 0; } -/* - * Release any architecture-specific resources locked by thread - */ -void release_thread(struct task_struct *dead_task) -{ -} - /* * Some archs flush debug and FPU info here */ diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h index 757c2f6d8d4b..d1978e004054 100644 --- a/arch/ia64/include/asm/processor.h +++ b/arch/ia64/include/asm/processor.h @@ -318,13 +318,6 @@ struct thread_struct { struct mm_struct; struct task_struct; -/* - * Free all resources held by a thread. This is called after the - * parent of DEAD_TASK has collected the exit status of the task via - * wait(). - */ -#define release_thread(dead_task) - /* Get wait channel for task P. */ extern unsigned long __get_wchan (struct task_struct *p); diff --git a/arch/loongarch/include/asm/processor.h b/arch/loongarch/include/asm/processor.h index 1c4b4308378d..6954dc5d24e9 100644 --- a/arch/loongarch/include/asm/processor.h +++ b/arch/loongarch/include/asm/processor.h @@ -176,9 +176,6 @@ struct thread_struct { struct task_struct; -/* Free all resources held by a thread. */ -#define release_thread(thread) do { } while (0) - enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_HALT, IDLE_NOMWAIT, IDLE_POLL}; extern unsigned long boot_option_idle_override; diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h index d86b4009880b..7a2da780830b 100644 --- a/arch/m68k/include/asm/processor.h +++ b/arch/m68k/include/asm/processor.h @@ -145,11 +145,6 @@ static inline void start_thread(struct pt_regs * regs, unsigned long pc, /* Forward declaration, a strange C thing */ struct task_struct; -/* Free all resources held by a thread. */ -static inline void release_thread(struct task_struct *dead_task) -{ -} - unsigned long __get_wchan(struct task_struct *p); void show_registers(struct pt_regs *regs); diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h index 7e9e92670df3..4e193c7550df 100644 --- a/arch/microblaze/include/asm/processor.h +++ b/arch/microblaze/include/asm/processor.h @@ -63,11 +63,6 @@ struct thread_struct { .pgdir = swapper_pg_dir, \ } -/* Free all resources held by a thread. */ -static inline void release_thread(struct task_struct *dead_task) -{ -} - unsigned long __get_wchan(struct task_struct *p); /* The size allocated for kernel stacks. This _must_ be a power of two! */ diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index 4bb24579d12e..3fde1ff72bd1 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -344,9 +344,6 @@ struct thread_struct { struct task_struct; -/* Free all resources held by a thread. */ -#define release_thread(thread) do { } while(0) - /* * Do necessary setup to start up a newly executed thread. */ diff --git a/arch/nios2/include/asm/processor.h b/arch/nios2/include/asm/processor.h index b8125dfbcad2..8916d93d5c2d 100644 --- a/arch/nios2/include/asm/processor.h +++ b/arch/nios2/include/asm/processor.h @@ -64,11 +64,6 @@ extern void start_thread(struct pt_regs *regs, unsigned long pc, struct task_struct; -/* Free all resources held by a thread. */ -static inline void release_thread(struct task_struct *dead_task) -{ -} - extern unsigned long __get_wchan(struct task_struct *p); #define task_pt_regs(p) \ diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h index aa1699c18add..ed9efb430afa 100644 --- a/arch/openrisc/include/asm/processor.h +++ b/arch/openrisc/include/asm/processor.h @@ -72,7 +72,6 @@ struct thread_struct { void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp); -void release_thread(struct task_struct *); unsigned long __get_wchan(struct task_struct *p); #define cpu_relax() barrier() diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index 52dc983ddeba..f94b5ec06786 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -125,10 +125,6 @@ void show_regs(struct pt_regs *regs) show_registers(regs); } -void release_thread(struct task_struct *dead_task) -{ -} - /* * Copy the thread-specific (arch specific) info from the current * process to the new one p diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index 4621ceb51314..a608970b249a 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -266,9 +266,6 @@ on downward growing arches, it looks like this: struct mm_struct; -/* Free all resources held by a thread. */ -extern void release_thread(struct task_struct *); - extern unsigned long __get_wchan(struct task_struct *p); #define KSTK_EIP(tsk) ((tsk)->thread.regs.iaoq[0]) diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 7c37e09c92da..3db0e97e6c06 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -146,10 +146,6 @@ void flush_thread(void) */ } -void release_thread(struct task_struct *dead_task) -{ -} - /* * Idle thread support * diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index fdfaae194ddd..92e332415d02 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -75,7 +75,6 @@ extern int _chrp_type; struct task_struct; void start_thread(struct pt_regs *regs, unsigned long fdptr, unsigned long sp); -void release_thread(struct task_struct *); #define TS_FPR(i) fp_state.fpr[i][TS_FPROFFSET] #define TS_CKFPR(i) ckfp_state.fpr[i][TS_FPROFFSET] diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 0fbda89cd1bb..991cda25b9a9 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1655,11 +1655,6 @@ EXPORT_SYMBOL_GPL(set_thread_tidr); #endif /* CONFIG_PPC64 */ -void -release_thread(struct task_struct *t) -{ -} - /* * this gets called so that we can store coprocessor state into memory and * copy the current task into the new thread. diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 19eedd4af4cd..94a0590c6971 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -65,11 +65,6 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, extern void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp); -/* Free all resources held by a thread. */ -static inline void release_thread(struct task_struct *dead_task) -{ -} - extern unsigned long __get_wchan(struct task_struct *p); diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index bd66f8e34949..c52fe651eeba 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -186,9 +186,6 @@ struct pt_regs; void show_registers(struct pt_regs *regs); void show_cacheinfo(struct seq_file *m); -/* Free all resources held by a thread. */ -static inline void release_thread(struct task_struct *tsk) { } - /* Free guarded storage control block */ void guarded_storage_release(struct task_struct *tsk); void gs_load_bc_cb(struct pt_regs *regs); diff --git a/arch/sh/include/asm/processor_32.h b/arch/sh/include/asm/processor_32.h index 45240ec6b85a..27aebf1e75a2 100644 --- a/arch/sh/include/asm/processor_32.h +++ b/arch/sh/include/asm/processor_32.h @@ -127,9 +127,6 @@ struct task_struct; extern void start_thread(struct pt_regs *regs, unsigned long new_pc, unsigned long new_sp); -/* Free all resources held by a thread. */ -extern void release_thread(struct task_struct *); - /* * FPU lazy state save handling. */ diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index a808843375e7..92b6649d4929 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -84,11 +84,6 @@ void flush_thread(void) #endif } -void release_thread(struct task_struct *dead_task) -{ - /* do nothing */ -} - asmlinkage void ret_from_fork(void); asmlinkage void ret_from_kernel_thread(void); diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h index b26c35336b51..ba8b70ffec08 100644 --- a/arch/sparc/include/asm/processor_32.h +++ b/arch/sparc/include/asm/processor_32.h @@ -80,9 +80,6 @@ static inline void start_thread(struct pt_regs * regs, unsigned long pc, : "memory"); } -/* Free all resources held by a thread. */ -#define release_thread(tsk) do { } while(0) - unsigned long __get_wchan(struct task_struct *); #define task_pt_regs(tsk) ((tsk)->thread.kregs) diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h index 89850dff6b03..2667f35d5ea5 100644 --- a/arch/sparc/include/asm/processor_64.h +++ b/arch/sparc/include/asm/processor_64.h @@ -176,9 +176,6 @@ do { \ regs->tstate &= ~TSTATE_PEF; \ } while (0) -/* Free all resources held by a thread. */ -#define release_thread(tsk) do { } while (0) - unsigned long __get_wchan(struct task_struct *task); #define task_pt_regs(tsk) (task_thread_info(tsk)->kregs) diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h index d0fc1862da95..bb5f06480da9 100644 --- a/arch/um/include/asm/processor-generic.h +++ b/arch/um/include/asm/processor-generic.h @@ -55,10 +55,6 @@ struct thread_struct { .request = { 0 } \ } -static inline void release_thread(struct task_struct *task) -{ -} - /* * User space process size: 3GB (default). */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 356308c73951..67c9d73b31fa 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -587,9 +587,6 @@ static inline void load_sp0(unsigned long sp0) #endif /* CONFIG_PARAVIRT_XXL */ -/* Free all resources held by a thread. */ -extern void release_thread(struct task_struct *); - unsigned long __get_wchan(struct task_struct *p); /* diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index 76bc63127c66..5abde43c570c 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -221,9 +221,6 @@ struct thread_struct { struct task_struct; struct mm_struct; -/* Free all resources held by a thread. */ -#define release_thread(thread) do { } while(0) - extern unsigned long __get_wchan(struct task_struct *p); #define KSTK_EIP(tsk) (task_pt_regs(tsk)->pc) diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 81cab4b01edc..d6c48163c6de 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -127,6 +127,9 @@ static inline void put_task_struct_many(struct task_struct *t, int nr) void put_task_struct_rcu_user(struct task_struct *task); +/* Free all architecture-specific resources held by a thread. */ +void release_thread(struct task_struct *dead_task); + #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT extern int arch_task_struct_size __read_mostly; #else diff --git a/kernel/exit.c b/kernel/exit.c index 84021b24f79e..f4b7b058f4e6 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -183,6 +183,10 @@ void put_task_struct_rcu_user(struct task_struct *task) call_rcu(&task->rcu, delayed_put_task_struct); } +void __weak release_thread(struct task_struct *dead_task) +{ +} + void release_task(struct task_struct *p) { struct task_struct *leader; -- cgit v1.2.3 From 948084f0f6959f602f89f679522b706a72da0285 Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Sun, 21 Aug 2022 20:25:19 +0200 Subject: kexec: replace kmap() with kmap_local_page() kmap() is being deprecated in favor of kmap_local_page(). There are two main problems with kmap(): (1) It comes with an overhead as mapping space is restricted and protected by a global lock for synchronization and (2) it also requires global TLB invalidation when the kmap's pool wraps and it might block when the mapping space is fully utilized until a slot becomes available. With kmap_local_page() the mappings are per thread, CPU local, can take page faults, and can be called from any context (including interrupts). It is faster than kmap() in kernels with HIGHMEM enabled. Furthermore, the tasks can be preempted and, when they are scheduled to run again, the kernel virtual addresses are restored and are still valid. Since its use in kexec_core.c is safe everywhere, it should be preferred. Therefore, replace kmap() with kmap_local_page() in kexec_core.c. Tested on a QEMU/KVM x86_32 VM, 6GB RAM, booting a kernel with HIGHMEM64GB enabled. Link: https://lkml.kernel.org/r/20220821182519.9483-1-fmdefrancesco@gmail.com Signed-off-by: Fabio M. De Francesco Suggested-by: Ira Weiny Reviewed-by: Ira Weiny Acked-by: Baoquan He Signed-off-by: Andrew Morton --- kernel/kexec_core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 5ca4d40c9ec1..ca2743f9c634 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -809,7 +809,7 @@ static int kimage_load_normal_segment(struct kimage *image, if (result < 0) goto out; - ptr = kmap(page); + ptr = kmap_local_page(page); /* Start with a clear page */ clear_page(ptr); ptr += maddr & ~PAGE_MASK; @@ -822,7 +822,7 @@ static int kimage_load_normal_segment(struct kimage *image, memcpy(ptr, kbuf, uchunk); else result = copy_from_user(ptr, buf, uchunk); - kunmap(page); + kunmap_local(ptr); if (result) { result = -EFAULT; goto out; @@ -873,7 +873,7 @@ static int kimage_load_crash_segment(struct kimage *image, goto out; } arch_kexec_post_alloc_pages(page_address(page), 1, 0); - ptr = kmap(page); + ptr = kmap_local_page(page); ptr += maddr & ~PAGE_MASK; mchunk = min_t(size_t, mbytes, PAGE_SIZE - (maddr & ~PAGE_MASK)); @@ -889,7 +889,7 @@ static int kimage_load_crash_segment(struct kimage *image, else result = copy_from_user(ptr, buf, uchunk); kexec_flush_icache_page(page); - kunmap(page); + kunmap_local(ptr); arch_kexec_pre_free_pages(page_address(page), 1); if (result) { result = -EFAULT; -- cgit v1.2.3 From 5fdfa161b2043001f82cbce49e87e8e9f581d510 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 23 Aug 2022 17:26:32 +0200 Subject: task_work: use try_cmpxchg in task_work_add, task_work_cancel_match and task_work_run Use try_cmpxchg instead of cmpxchg (*ptr, old, new) == old in task_work_add, task_work_cancel_match and task_work_run. x86 CMPXCHG instruction returns success in ZF flag, so this change saves a compare after cmpxchg (and related move instruction in front of cmpxchg). Also, atomic_try_cmpxchg implicitly assigns old *ptr value to "old" when cmpxchg fails, enabling further code simplifications. The patch avoids extra memory read in case cmpxchg fails. Link: https://lkml.kernel.org/r/20220823152632.4517-1-ubizjak@gmail.com Signed-off-by: Uros Bizjak Signed-off-by: Andrew Morton --- kernel/task_work.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/task_work.c b/kernel/task_work.c index dff75bcde151..065e1ef8fc8d 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -47,12 +47,12 @@ int task_work_add(struct task_struct *task, struct callback_head *work, /* record the work call stack in order to print it in KASAN reports */ kasan_record_aux_stack(work); + head = READ_ONCE(task->task_works); do { - head = READ_ONCE(task->task_works); if (unlikely(head == &work_exited)) return -ESRCH; work->next = head; - } while (cmpxchg(&task->task_works, head, work) != head); + } while (!try_cmpxchg(&task->task_works, &head, work)); switch (notify) { case TWA_NONE: @@ -100,10 +100,12 @@ task_work_cancel_match(struct task_struct *task, * we raced with task_work_run(), *pprev == NULL/exited. */ raw_spin_lock_irqsave(&task->pi_lock, flags); - while ((work = READ_ONCE(*pprev))) { - if (!match(work, data)) + work = READ_ONCE(*pprev); + while (work) { + if (!match(work, data)) { pprev = &work->next; - else if (cmpxchg(pprev, work, work->next) == work) + work = READ_ONCE(*pprev); + } else if (try_cmpxchg(pprev, &work, work->next)) break; } raw_spin_unlock_irqrestore(&task->pi_lock, flags); @@ -151,16 +153,16 @@ void task_work_run(void) * work->func() can do task_work_add(), do not set * work_exited unless the list is empty. */ + work = READ_ONCE(task->task_works); do { head = NULL; - work = READ_ONCE(task->task_works); if (!work) { if (task->flags & PF_EXITING) head = &work_exited; else break; } - } while (cmpxchg(&task->task_works, work, head) != work); + } while (!try_cmpxchg(&task->task_works, &work, head)); if (!work) break; -- cgit v1.2.3 From 9a15193e23b780d1da77e3db18698beb0637897d Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 25 Aug 2022 16:56:03 +0200 Subject: smpboot: use atomic_try_cmpxchg in cpu_wait_death and cpu_report_death Use atomic_try_cmpxchg instead of atomic_cmpxchg (*ptr, old, new) == old in cpu_wait_death and cpu_report_death. x86 CMPXCHG instruction returns success in ZF flag, so this change saves a compare after cmpxchg (and related move instruction in front of cmpxchg). Also, atomic_try_cmpxchg implicitly assigns old *ptr value to "old" when cmpxchg fails, enabling further code simplifications. No functional change intended. Link: https://lkml.kernel.org/r/20220825145603.5811-1-ubizjak@gmail.com Signed-off-by: Uros Bizjak Signed-off-by: Andrew Morton --- kernel/smpboot.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/smpboot.c b/kernel/smpboot.c index b9f54544e749..2c7396da470c 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -433,7 +433,7 @@ bool cpu_wait_death(unsigned int cpu, int seconds) /* The outgoing CPU will normally get done quite quickly. */ if (atomic_read(&per_cpu(cpu_hotplug_state, cpu)) == CPU_DEAD) - goto update_state; + goto update_state_early; udelay(5); /* But if the outgoing CPU dawdles, wait increasingly long times. */ @@ -444,16 +444,17 @@ bool cpu_wait_death(unsigned int cpu, int seconds) break; sleep_jf = DIV_ROUND_UP(sleep_jf * 11, 10); } -update_state: +update_state_early: oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu)); +update_state: if (oldstate == CPU_DEAD) { /* Outgoing CPU died normally, update state. */ smp_mb(); /* atomic_read() before update. */ atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_POST_DEAD); } else { /* Outgoing CPU still hasn't died, set state accordingly. */ - if (atomic_cmpxchg(&per_cpu(cpu_hotplug_state, cpu), - oldstate, CPU_BROKEN) != oldstate) + if (!atomic_try_cmpxchg(&per_cpu(cpu_hotplug_state, cpu), + &oldstate, CPU_BROKEN)) goto update_state; ret = false; } @@ -475,14 +476,14 @@ bool cpu_report_death(void) int newstate; int cpu = smp_processor_id(); + oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu)); do { - oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu)); if (oldstate != CPU_BROKEN) newstate = CPU_DEAD; else newstate = CPU_DEAD_FROZEN; - } while (atomic_cmpxchg(&per_cpu(cpu_hotplug_state, cpu), - oldstate, newstate) != oldstate); + } while (!atomic_try_cmpxchg(&per_cpu(cpu_hotplug_state, cpu), + &oldstate, newstate)); return newstate == CPU_DEAD; } -- cgit v1.2.3 From f81259c6dbcefb255fa473090cd975f3827bca89 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 26 Aug 2022 15:33:35 +0800 Subject: fail_function: switch to memdup_user_nul() helper Use memdup_user_nul() helper instead of open-coding to simplify the code. Link: https://lkml.kernel.org/r/20220826073337.2085798-1-yangyingliang@huawei.com Signed-off-by: Yang Yingliang Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton --- kernel/fail_function.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/fail_function.c b/kernel/fail_function.c index 60dc825ecc2b..03643e33e4c3 100644 --- a/kernel/fail_function.c +++ b/kernel/fail_function.c @@ -247,15 +247,11 @@ static ssize_t fei_write(struct file *file, const char __user *buffer, /* cut off if it is too long */ if (count > KSYM_NAME_LEN) count = KSYM_NAME_LEN; - buf = kmalloc(count + 1, GFP_KERNEL); - if (!buf) - return -ENOMEM; - if (copy_from_user(buf, buffer, count)) { - ret = -EFAULT; - goto out_free; - } - buf[count] = '\0'; + buf = memdup_user_nul(buffer, count); + if (IS_ERR(buf)) + return PTR_ERR(buf); + sym = strstrip(buf); mutex_lock(&fei_lock); @@ -308,7 +304,6 @@ static ssize_t fei_write(struct file *file, const char __user *buffer, } out: mutex_unlock(&fei_lock); -out_free: kfree(buf); return ret; } -- cgit v1.2.3 From cef9f5f866ad45a2dd64fed6e6b657043c2c6f17 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 26 Aug 2022 15:33:36 +0800 Subject: fail_function: refactor code of checking return value of register_kprobe() Refactor the error handling of register_kprobe() to improve readability. No functional change. Link: https://lkml.kernel.org/r/20220826073337.2085798-2-yangyingliang@huawei.com Signed-off-by: Yang Yingliang Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton --- kernel/fail_function.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/fail_function.c b/kernel/fail_function.c index 03643e33e4c3..893e8f9a9118 100644 --- a/kernel/fail_function.c +++ b/kernel/fail_function.c @@ -294,14 +294,13 @@ static ssize_t fei_write(struct file *file, const char __user *buffer, } ret = register_kprobe(&attr->kp); - if (!ret) - fei_debugfs_add_attr(attr); - if (ret < 0) + if (ret) { fei_attr_remove(attr); - else { - list_add_tail(&attr->list, &fei_attr_list); - ret = count; + goto out; } + fei_debugfs_add_attr(attr); + list_add_tail(&attr->list, &fei_attr_list); + ret = count; out: mutex_unlock(&fei_lock); kfree(buf); -- cgit v1.2.3 From d2e85432a2e0a6f31bd9489800f443228f020ed6 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 26 Aug 2022 15:33:37 +0800 Subject: fail_function: fix wrong use of fei_attr_remove() If register_kprobe() fails, the new attr is not added to the list yet, so it should call fei_attr_free() intstead. Link: https://lkml.kernel.org/r/20220826073337.2085798-3-yangyingliang@huawei.com Fixes: 4b1a29a7f542 ("error-injection: Support fault injection framework") Signed-off-by: Yang Yingliang Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton --- kernel/fail_function.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/fail_function.c b/kernel/fail_function.c index 893e8f9a9118..a7ccd2930c5f 100644 --- a/kernel/fail_function.c +++ b/kernel/fail_function.c @@ -295,7 +295,7 @@ static ssize_t fei_write(struct file *file, const char __user *buffer, ret = register_kprobe(&attr->kp); if (ret) { - fei_attr_remove(attr); + fei_attr_free(attr); goto out; } fei_debugfs_add_attr(attr); -- cgit v1.2.3 From 35783ccbe519b33f6652b2d7aafcfc82f10b1a1b Mon Sep 17 00:00:00 2001 From: wuchi Date: Thu, 1 Sep 2022 08:31:21 +0800 Subject: kernel/profile.c: simplify duplicated code in profile_setup() The code to parse option string "schedule/sleep/kvm" of cmdline in function profile_setup is redundant, so simplify that. Link: https://lkml.kernel.org/r/20220901003121.53597-1-wuchi.zero@gmail.com Signed-off-by: wuchi Reviewed-by: Andrew Morton Cc: Christoph Hellwig Signed-off-by: Andrew Morton --- kernel/profile.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) (limited to 'kernel') diff --git a/kernel/profile.c b/kernel/profile.c index 7ea01ba30e75..8a77769bc4b4 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -59,43 +59,39 @@ int profile_setup(char *str) static const char schedstr[] = "schedule"; static const char sleepstr[] = "sleep"; static const char kvmstr[] = "kvm"; + const char *select = NULL; int par; if (!strncmp(str, sleepstr, strlen(sleepstr))) { #ifdef CONFIG_SCHEDSTATS force_schedstat_enabled(); prof_on = SLEEP_PROFILING; - if (str[strlen(sleepstr)] == ',') - str += strlen(sleepstr) + 1; - if (get_option(&str, &par)) - prof_shift = clamp(par, 0, BITS_PER_LONG - 1); - pr_info("kernel sleep profiling enabled (shift: %u)\n", - prof_shift); + select = sleepstr; #else pr_warn("kernel sleep profiling requires CONFIG_SCHEDSTATS\n"); #endif /* CONFIG_SCHEDSTATS */ } else if (!strncmp(str, schedstr, strlen(schedstr))) { prof_on = SCHED_PROFILING; - if (str[strlen(schedstr)] == ',') - str += strlen(schedstr) + 1; - if (get_option(&str, &par)) - prof_shift = clamp(par, 0, BITS_PER_LONG - 1); - pr_info("kernel schedule profiling enabled (shift: %u)\n", - prof_shift); + select = schedstr; } else if (!strncmp(str, kvmstr, strlen(kvmstr))) { prof_on = KVM_PROFILING; - if (str[strlen(kvmstr)] == ',') - str += strlen(kvmstr) + 1; - if (get_option(&str, &par)) - prof_shift = clamp(par, 0, BITS_PER_LONG - 1); - pr_info("kernel KVM profiling enabled (shift: %u)\n", - prof_shift); + select = kvmstr; } else if (get_option(&str, &par)) { prof_shift = clamp(par, 0, BITS_PER_LONG - 1); prof_on = CPU_PROFILING; pr_info("kernel profiling enabled (shift: %u)\n", prof_shift); } + + if (select) { + if (str[strlen(select)] == ',') + str += strlen(select) + 1; + if (get_option(&str, &par)) + prof_shift = clamp(par, 0, BITS_PER_LONG - 1); + pr_info("kernel %s profiling enabled (shift: %u)\n", + select, prof_shift); + } + return 1; } __setup("profile=", profile_setup); -- cgit v1.2.3 From bfca3dd3d0680fc2fc7f659a152234afbac26e4d Mon Sep 17 00:00:00 2001 From: Petr Vorel Date: Thu, 1 Sep 2022 21:44:03 +0200 Subject: kernel/utsname_sysctl.c: print kernel arch Print the machine hardware name (UTS_MACHINE) in /proc/sys/kernel/arch. This helps people who debug kernel with initramfs with minimal environment (i.e. without coreutils or even busybox) or allow to open sysfs file instead of run 'uname -m' in high level languages. Link: https://lkml.kernel.org/r/20220901194403.3819-1-pvorel@suse.cz Signed-off-by: Petr Vorel Acked-by: Greg Kroah-Hartman Cc: David Sterba Cc: "Eric W . Biederman" Cc: Rafael J. Wysocki Signed-off-by: Andrew Morton --- Documentation/admin-guide/sysctl/kernel.rst | 5 +++++ kernel/utsname_sysctl.c | 7 +++++++ 2 files changed, 12 insertions(+) (limited to 'kernel') diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index ee6572b1edad..bbaa85194695 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -65,6 +65,11 @@ combining the following values: 4 s3_beep = ======= +arch +==== + +The machine hardware name, the same output as ``uname -m`` +(e.g. ``x86_64`` or ``aarch64``). auto_msgmni =========== diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c index 4ca61d49885b..7ffdd2cd5ff9 100644 --- a/kernel/utsname_sysctl.c +++ b/kernel/utsname_sysctl.c @@ -73,6 +73,13 @@ static DEFINE_CTL_TABLE_POLL(hostname_poll); static DEFINE_CTL_TABLE_POLL(domainname_poll); static struct ctl_table uts_kern_table[] = { + { + .procname = "arch", + .data = init_uts_ns.name.machine, + .maxlen = sizeof(init_uts_ns.name.machine), + .mode = 0444, + .proc_handler = proc_do_uts_string, + }, { .procname = "ostype", .data = init_uts_ns.name.sysname, -- cgit v1.2.3 From b814751175470b00969a317bf3192260750f9455 Mon Sep 17 00:00:00 2001 From: wuchi Date: Sat, 3 Sep 2022 21:52:33 +0800 Subject: latencytop: use the last element of latency_record of system In account_global_scheduler_latency(), when we don't find the matching latency_record we try to select one which is unused in latency_record[MAXLR], but the condition will skip the last one. if (i >= MAXLR-1) Fix that. Link: https://lkml.kernel.org/r/20220903135233.5225-1-wuchi.zero@gmail.com Signed-off-by: wuchi Reviewed-by: Andrew Morton Cc: Alexander Viro Cc: Luis Chamberlain Signed-off-by: Andrew Morton --- kernel/latencytop.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/latencytop.c b/kernel/latencytop.c index 76166df011a4..781249098cb6 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c @@ -112,7 +112,7 @@ static void __sched account_global_scheduler_latency(struct task_struct *tsk, struct latency_record *lat) { - int firstnonnull = MAXLR + 1; + int firstnonnull = MAXLR; int i; /* skip kernel threads for now */ @@ -150,7 +150,7 @@ account_global_scheduler_latency(struct task_struct *tsk, } i = firstnonnull; - if (i >= MAXLR - 1) + if (i >= MAXLR) return; /* Allocted a new one: */ -- cgit v1.2.3 From 83d87a4ddb3b4a42bb73b314b3d1acc3965a689f Mon Sep 17 00:00:00 2001 From: wuchi Date: Fri, 9 Sep 2022 18:10:25 +0800 Subject: relay: use kvcalloc to alloc page array in relay_alloc_page_array kvcalloc() is safer because it will check the integer overflows, and using it will simple the logic of allocation size. Link: https://lkml.kernel.org/r/20220909101025.82955-1-wuchi.zero@gmail.com Signed-off-by: wuchi Cc: Christoph Hellwig Cc: Jens Axboe Signed-off-by: Andrew Morton --- kernel/relay.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/relay.c b/kernel/relay.c index 6a611e779e95..d7edc934c56d 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -60,10 +60,7 @@ static const struct vm_operations_struct relay_file_mmap_ops = { */ static struct page **relay_alloc_page_array(unsigned int n_pages) { - const size_t pa_size = n_pages * sizeof(struct page *); - if (pa_size > PAGE_SIZE) - return vzalloc(pa_size); - return kzalloc(pa_size, GFP_KERNEL); + return kvcalloc(n_pages, sizeof(struct page *), GFP_KERNEL); } /* -- cgit v1.2.3 From ef79361b265dd229725ad62bc850f6913ef2f94a Mon Sep 17 00:00:00 2001 From: Xu Panda Date: Mon, 12 Sep 2022 07:15:57 +0000 Subject: fork: remove duplicate included header files linux/sched/mm.h is included more than once. Link: https://lkml.kernel.org/r/20220912071556.16811-1-xu.panda@zte.com.cn Signed-off-by: Xu Panda Reported-by: Zeal Robot Cc: Andy Lutomirski Cc: Christian Brauner (Microsoft) Cc: "Eric W . Biederman" Cc: Fenghua Yu Cc: Liam Howlett Cc: Sebastian Andrzej Siewior Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- kernel/fork.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index 90c85b17bf69..3b8784e3ce97 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -97,7 +97,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3