From 668a9fe5c6a1bcac6b65d5e9b91a9eca86f782a3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 8 Jun 2022 14:45:35 +0100 Subject: genirq: PM: Use runtime PM for chained interrupts When requesting an interrupt, we correctly call into the runtime PM framework to guarantee that the underlying interrupt controller is up and running. However, we fail to do so for chained interrupt controllers, as the mux interrupt is not requested along the same path. Augment __irq_do_set_handler() to call into the runtime PM code in this case, making sure the PM flow is the same for all interrupts. Reported-by: Lucas Stach Tested-by: Liu Ying Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/26973cddee5f527ea17184c0f3fccb70bc8969a0.camel@pengutronix.de --- kernel/irq/chip.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index e6b8e564b37f..886789dcee43 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -1006,8 +1006,10 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, if (desc->irq_data.chip != &no_irq_chip) mask_ack_irq(desc); irq_state_set_disabled(desc); - if (is_chained) + if (is_chained) { desc->action = NULL; + WARN_ON(irq_chip_pm_put(irq_desc_get_irq_data(desc))); + } desc->depth = 1; } desc->handle_irq = handle; @@ -1033,6 +1035,7 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, irq_settings_set_norequest(desc); irq_settings_set_nothread(desc); desc->action = &chained_action; + WARN_ON(irq_chip_pm_get(irq_desc_get_irq_data(desc))); irq_activate_and_startup(desc, IRQ_RESEND); } } -- cgit v1.2.3 From 04193d590b390ec7a0592630f46d559ec6564ba1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 7 Jun 2022 22:41:55 +0200 Subject: sched: Fix balance_push() vs __sched_setscheduler() The purpose of balance_push() is to act as a filter on task selection in the case of CPU hotplug, specifically when taking the CPU out. It does this by (ab)using the balance callback infrastructure, with the express purpose of keeping all the unlikely/odd cases in a single place. In order to serve its purpose, the balance_push_callback needs to be (exclusively) on the callback list at all times (noting that the callback always places itself back on the list the moment it runs, also noting that when the CPU goes down, regular balancing concerns are moot, so ignoring them is fine). And here-in lies the problem, __sched_setscheduler()'s use of splice_balance_callbacks() takes the callbacks off the list across a lock-break, making it possible for, an interleaving, __schedule() to see an empty list and not get filtered. Fixes: ae7927023243 ("sched: Optimize finish_lock_switch()") Reported-by: Jing-Ting Wu Signed-off-by: Peter Zijlstra (Intel) Tested-by: Jing-Ting Wu Link: https://lkml.kernel.org/r/20220519134706.GH2578@worktop.programming.kicks-ass.net --- kernel/sched/core.c | 36 +++++++++++++++++++++++++++++++++--- kernel/sched/sched.h | 5 +++++ 2 files changed, 38 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index bfa7452ca92e..da0bf6fe9ecd 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4798,25 +4798,55 @@ static void do_balance_callbacks(struct rq *rq, struct callback_head *head) static void balance_push(struct rq *rq); +/* + * balance_push_callback is a right abuse of the callback interface and plays + * by significantly different rules. + * + * Where the normal balance_callback's purpose is to be ran in the same context + * that queued it (only later, when it's safe to drop rq->lock again), + * balance_push_callback is specifically targeted at __schedule(). + * + * This abuse is tolerated because it places all the unlikely/odd cases behind + * a single test, namely: rq->balance_callback == NULL. + */ struct callback_head balance_push_callback = { .next = NULL, .func = (void (*)(struct callback_head *))balance_push, }; -static inline struct callback_head *splice_balance_callbacks(struct rq *rq) +static inline struct callback_head * +__splice_balance_callbacks(struct rq *rq, bool split) { struct callback_head *head = rq->balance_callback; + if (likely(!head)) + return NULL; + lockdep_assert_rq_held(rq); - if (head) + /* + * Must not take balance_push_callback off the list when + * splice_balance_callbacks() and balance_callbacks() are not + * in the same rq->lock section. + * + * In that case it would be possible for __schedule() to interleave + * and observe the list empty. + */ + if (split && head == &balance_push_callback) + head = NULL; + else rq->balance_callback = NULL; return head; } +static inline struct callback_head *splice_balance_callbacks(struct rq *rq) +{ + return __splice_balance_callbacks(rq, true); +} + static void __balance_callbacks(struct rq *rq) { - do_balance_callbacks(rq, splice_balance_callbacks(rq)); + do_balance_callbacks(rq, __splice_balance_callbacks(rq, false)); } static inline void balance_callbacks(struct rq *rq, struct callback_head *head) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 01259611beb9..47b89a0fc6e5 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1693,6 +1693,11 @@ queue_balance_callback(struct rq *rq, { lockdep_assert_rq_held(rq); + /* + * Don't (re)queue an already queued item; nor queue anything when + * balance_push() is active, see the comment with + * balance_push_callback. + */ if (unlikely(head->next || rq->balance_callback == &balance_push_callback)) return; -- cgit v1.2.3 From 4051a81774d6d8e28192742c26999d6f29bc0e68 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 17 May 2022 11:16:14 +0200 Subject: locking/lockdep: Use sched_clock() for random numbers Since the rewrote of prandom_u32(), in the commit mentioned below, the function uses sleeping locks which extracing random numbers and filling the batch. This breaks lockdep on PREEMPT_RT because lock_pin_lock() disables interrupts while calling __lock_pin_lock(). This can't be moved earlier because the main user of the function (rq_pin_lock()) invokes that function after disabling interrupts in order to acquire the lock. The cookie does not require random numbers as its goal is to provide a random value in order to notice unexpected "unlock + lock" sites. Use sched_clock() to provide random numbers. Fixes: a0103f4d86f88 ("random32: use real rng for non-deterministic randomness") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/YoNn3pTkm5+QzE5k@linutronix.de --- kernel/locking/lockdep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 81e87280513e..f06b91ca6482 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -5432,7 +5432,7 @@ static struct pin_cookie __lock_pin_lock(struct lockdep_map *lock) * be guessable and still allows some pin nesting in * our u32 pin_count. */ - cookie.val = 1 + (prandom_u32() >> 16); + cookie.val = 1 + (sched_clock() & 0xffff); hlock->pin_count += cookie.val; return cookie; } -- cgit v1.2.3 From 57cd6d157eb479f0a8e820fd36b7240845c8a937 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 31 May 2022 10:59:10 -0700 Subject: cfi: Fix __cfi_slowpath_diag RCU usage with cpuidle RCU_NONIDLE usage during __cfi_slowpath_diag can result in an invalid RCU state in the cpuidle code path: WARNING: CPU: 1 PID: 0 at kernel/rcu/tree.c:613 rcu_eqs_enter+0xe4/0x138 ... Call trace: rcu_eqs_enter+0xe4/0x138 rcu_idle_enter+0xa8/0x100 cpuidle_enter_state+0x154/0x3a8 cpuidle_enter+0x3c/0x58 do_idle.llvm.6590768638138871020+0x1f4/0x2ec cpu_startup_entry+0x28/0x2c secondary_start_kernel+0x1b8/0x220 __secondary_switched+0x94/0x98 Instead, call rcu_irq_enter/exit to wake up RCU only when needed and disable interrupts for the entire CFI shadow/module check when we do. Signed-off-by: Sami Tolvanen Link: https://lore.kernel.org/r/20220531175910.890307-1-samitolvanen@google.com Fixes: cf68fffb66d6 ("add support for Clang CFI") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook --- kernel/cfi.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/cfi.c b/kernel/cfi.c index 9594cfd1cf2c..08102d19ec15 100644 --- a/kernel/cfi.c +++ b/kernel/cfi.c @@ -281,6 +281,8 @@ static inline cfi_check_fn find_module_check_fn(unsigned long ptr) static inline cfi_check_fn find_check_fn(unsigned long ptr) { cfi_check_fn fn = NULL; + unsigned long flags; + bool rcu_idle; if (is_kernel_text(ptr)) return __cfi_check; @@ -290,13 +292,21 @@ static inline cfi_check_fn find_check_fn(unsigned long ptr) * the shadow and __module_address use RCU, so we need to wake it * up if necessary. */ - RCU_NONIDLE({ - if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW)) - fn = find_shadow_check_fn(ptr); + rcu_idle = !rcu_is_watching(); + if (rcu_idle) { + local_irq_save(flags); + rcu_irq_enter(); + } + + if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW)) + fn = find_shadow_check_fn(ptr); + if (!fn) + fn = find_module_check_fn(ptr); - if (!fn) - fn = find_module_check_fn(ptr); - }); + if (rcu_idle) { + rcu_irq_exit(); + local_irq_restore(flags); + } return fn; } -- cgit v1.2.3 From d1a374a1aeb7e31191448e225ed2f9c5e894f280 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Wed, 15 Jun 2022 09:51:51 +0530 Subject: bpf: Limit maximum modifier chain length in btf_check_type_tags On processing a module BTF of module built for an older kernel, we might sometimes find that some type points to itself forming a loop. If such a type is a modifier, btf_check_type_tags's while loop following modifier chain will be caught in an infinite loop. Fix this by defining a maximum chain length and bailing out if we spin any longer than that. Fixes: eb596b090558 ("bpf: Ensure type tags precede modifiers in BTF") Reported-by: Daniel Borkmann Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20220615042151.2266537-1-memxor@gmail.com --- kernel/bpf/btf.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel') diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 63d0ac7dfe2f..eb12d4f705cc 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -4815,6 +4815,7 @@ static int btf_check_type_tags(struct btf_verifier_env *env, n = btf_nr_types(btf); for (i = start_id; i < n; i++) { const struct btf_type *t; + int chain_limit = 32; u32 cur_id = i; t = btf_type_by_id(btf, i); @@ -4827,6 +4828,10 @@ static int btf_check_type_tags(struct btf_verifier_env *env, in_tags = btf_type_is_type_tag(t); while (btf_type_is_modifier(t)) { + if (!chain_limit--) { + btf_verifier_log(env, "Max chain length or cycle detected"); + return -ELOOP; + } if (btf_type_is_type_tag(t)) { if (!in_tags) { btf_verifier_log(env, "Type tags don't precede modifiers"); -- cgit v1.2.3 From c3230283e2819a69dad2cf7a63143fde8bab8b5c Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 15 Jun 2022 18:28:04 +0200 Subject: printk: Block console kthreads when direct printing will be required There are known situations when the console kthreads are not reliable or does not work in principle, for example, early boot, panic, shutdown. For these situations there is the direct (legacy) mode when printk() tries to get console_lock() and flush the messages directly. It works very well during the early boot when the console kthreads are not available at all. It gets more complicated in the other situations when console kthreads might be actively printing and block console_trylock() in printk(). The same problem is in the legacy code as well. Any console_lock() owner could block console_trylock() in printk(). It is solved by a trick that the current console_lock() owner is responsible for printing all pending messages. It is actually the reason why there is the risk of softlockups and why the console kthreads were introduced. The console kthreads use the same approach. They are responsible for printing the messages by definition. So that they handle the messages anytime when they are awake and see new ones. The global console_lock is available when there is nothing to do. It should work well when the problematic context is correctly detected and printk() switches to the direct mode. But it seems that it is not enough in practice. There are reports that the messages are not printed during panic() or shutdown() even though printk() tries to use the direct mode here. The problem seems to be that console kthreads become active in these situation as well. They steel the job before other CPUs are stopped. Then they are stopped in the middle of the job and block the global console_lock. First part of the solution is to block console kthreads when the system is in a problematic state and requires the direct printk() mode. Link: https://lore.kernel.org/r/20220610205038.GA3050413@paulmck-ThinkPad-P17-Gen-1 Link: https://lore.kernel.org/r/CAMdYzYpF4FNTBPZsEFeWRuEwSies36QM_As8osPWZSr2q-viEA@mail.gmail.com Suggested-by: John Ogness Tested-by: Paul E. McKenney Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20220615162805.27962-2-pmladek@suse.com --- kernel/printk/printk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index ea3dd55709e7..45c6c2b0b104 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3729,7 +3729,9 @@ static bool printer_should_wake(struct console *con, u64 seq) return true; if (con->blocked || - console_kthreads_atomically_blocked()) { + console_kthreads_atomically_blocked() || + system_state > SYSTEM_RUNNING || + oops_in_progress) { return false; } -- cgit v1.2.3 From b87f02307d3cfbda768520f0687c51ca77e14fc3 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 15 Jun 2022 18:28:05 +0200 Subject: printk: Wait for the global console lock when the system is going down There are reports that the console kthreads block the global console lock when the system is going down, for example, reboot, panic. First part of the solution was to block kthreads in these problematic system states so they stopped handling newly added messages. Second part of the solution is to wait when for the kthreads when they are actively printing. It solves the problem when a message was printed before the system entered the problematic state and the kthreads managed to step in. A busy waiting has to be used because panic() can be called in any context and in an unknown state of the scheduler. There must be a timeout because the kthread might get stuck or sleeping and never release the lock. The timeout 10s is an arbitrary value inspired by the softlockup timeout. Link: https://lore.kernel.org/r/20220610205038.GA3050413@paulmck-ThinkPad-P17-Gen-1 Link: https://lore.kernel.org/r/CAMdYzYpF4FNTBPZsEFeWRuEwSies36QM_As8osPWZSr2q-viEA@mail.gmail.com Signed-off-by: Petr Mladek Tested-by: Paul E. McKenney Link: https://lore.kernel.org/r/20220615162805.27962-3-pmladek@suse.com --- include/linux/printk.h | 5 +++++ kernel/panic.c | 2 ++ kernel/printk/internal.h | 2 ++ kernel/printk/printk.c | 4 ++++ kernel/printk/printk_safe.c | 32 ++++++++++++++++++++++++++++++++ kernel/reboot.c | 2 ++ 6 files changed, 47 insertions(+) (limited to 'kernel') diff --git a/include/linux/printk.h b/include/linux/printk.h index cd26aab0ab2a..c1e07c0652c7 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -174,6 +174,7 @@ extern void printk_prefer_direct_enter(void); extern void printk_prefer_direct_exit(void); extern bool pr_flush(int timeout_ms, bool reset_on_progress); +extern void try_block_console_kthreads(int timeout_ms); /* * Please don't use printk_ratelimit(), because it shares ratelimiting state @@ -238,6 +239,10 @@ static inline bool pr_flush(int timeout_ms, bool reset_on_progress) return true; } +static inline void try_block_console_kthreads(int timeout_ms) +{ +} + static inline int printk_ratelimit(void) { return 0; diff --git a/kernel/panic.c b/kernel/panic.c index 6737b2332275..fe73d18ecdf0 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -273,6 +273,7 @@ void panic(const char *fmt, ...) * unfortunately means it may not be hardened to work in a * panic situation. */ + try_block_console_kthreads(10000); smp_send_stop(); } else { /* @@ -280,6 +281,7 @@ void panic(const char *fmt, ...) * kmsg_dump, we will need architecture dependent extra * works in addition to stopping other CPUs. */ + try_block_console_kthreads(10000); crash_smp_send_stop(); } diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index d947ca6c84f9..e7d8578860ad 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -20,6 +20,8 @@ enum printk_info_flags { LOG_CONT = 8, /* text is a fragment of a continuation line */ }; +extern bool block_console_kthreads; + __printf(4, 0) int vprintk_store(int facility, int level, const struct dev_printk_info *dev_info, diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 45c6c2b0b104..b095fb5f5f61 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -250,6 +250,9 @@ static atomic_t console_kthreads_active = ATOMIC_INIT(0); #define console_kthread_printing_exit() \ atomic_dec(&console_kthreads_active) +/* Block console kthreads to avoid processing new messages. */ +bool block_console_kthreads; + /* * Helper macros to handle lockdep when locking/unlocking console_sem. We use * macros instead of functions so that _RET_IP_ contains useful information. @@ -3730,6 +3733,7 @@ static bool printer_should_wake(struct console *con, u64 seq) if (con->blocked || console_kthreads_atomically_blocked() || + block_console_kthreads || system_state > SYSTEM_RUNNING || oops_in_progress) { return false; diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index ef0f9a2044da..caac4de1ea59 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -8,7 +8,9 @@ #include #include #include +#include #include +#include #include "internal.h" @@ -50,3 +52,33 @@ asmlinkage int vprintk(const char *fmt, va_list args) return vprintk_default(fmt, args); } EXPORT_SYMBOL(vprintk); + +/** + * try_block_console_kthreads() - Try to block console kthreads and + * make the global console_lock() avaialble + * + * @timeout_ms: The maximum time (in ms) to wait. + * + * Prevent console kthreads from starting processing new messages. Wait + * until the global console_lock() become available. + * + * Context: Can be called in any context. + */ +void try_block_console_kthreads(int timeout_ms) +{ + block_console_kthreads = true; + + /* Do not wait when the console lock could not be safely taken. */ + if (this_cpu_read(printk_context) || in_nmi()) + return; + + while (timeout_ms > 0) { + if (console_trylock()) { + console_unlock(); + return; + } + + udelay(1000); + timeout_ms -= 1; + } +} diff --git a/kernel/reboot.c b/kernel/reboot.c index 4177645e74d6..310363685502 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -74,6 +74,7 @@ void kernel_restart_prepare(char *cmd) { blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); system_state = SYSTEM_RESTART; + try_block_console_kthreads(10000); usermodehelper_disable(); device_shutdown(); } @@ -262,6 +263,7 @@ static void kernel_shutdown_prepare(enum system_states state) blocking_notifier_call_chain(&reboot_notifier_list, (state == SYSTEM_HALT) ? SYS_HALT : SYS_POWER_OFF, NULL); system_state = state; + try_block_console_kthreads(10000); usermodehelper_disable(); device_shutdown(); } -- cgit v1.2.3 From ef79c396c664be99d0c5660dc75fe863c1e20315 Mon Sep 17 00:00:00 2001 From: Christian Göttsche Date: Wed, 15 Jun 2022 17:44:31 +0200 Subject: audit: free module name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reset the type of the record last as the helper `audit_free_module()` depends on it. unreferenced object 0xffff888153b707f0 (size 16): comm "modprobe", pid 1319, jiffies 4295110033 (age 1083.016s) hex dump (first 16 bytes): 62 69 6e 66 6d 74 5f 6d 69 73 63 00 6b 6b 6b a5 binfmt_misc.kkk. backtrace: [] kstrdup+0x2b/0x50 [] __audit_log_kern_module+0x4d/0xf0 [] load_module+0x9d4/0x2e10 [] __do_sys_finit_module+0x114/0x1b0 [] do_syscall_64+0x34/0x80 [] entry_SYSCALL_64_after_hwframe+0x46/0xb0 Cc: stable@vger.kernel.org Fixes: 12c5e81d3fd0 ("audit: prepare audit_context for use in calling contexts beyond syscalls") Signed-off-by: Christian Göttsche Signed-off-by: Paul Moore --- kernel/auditsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/auditsc.c b/kernel/auditsc.c index f3a2abd6d1a1..3a8c9d744800 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1014,10 +1014,10 @@ static void audit_reset_context(struct audit_context *ctx) ctx->target_comm[0] = '\0'; unroll_tree_refs(ctx, NULL, 0); WARN_ON(!list_empty(&ctx->killed_trees)); - ctx->type = 0; audit_free_module(ctx); ctx->fds[0] = -1; audit_proctitle_free(ctx); + ctx->type = 0; /* reset last for audit_free_*() */ } static inline struct audit_context *audit_alloc_context(enum audit_state state) -- cgit v1.2.3 From d25c83c6606ffc3abdf0868136ad3399f648ad70 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Tue, 15 Mar 2022 11:24:44 +0100 Subject: kthread: make it clear that kthread_create_on_node() might be terminated by any fatal signal The comments in kernel/kthread.c create a feeling that only SIGKILL is able to terminate the creation of kernel kthreads by kthread_create()/_on_node()/_on_cpu() APIs. In reality, wait_for_completion_killable() might be killed by any fatal signal that does not have a custom handler: (!siginmask(signr, SIG_KERNEL_IGNORE_MASK|SIG_KERNEL_STOP_MASK) && \ (t)->sighand->action[(signr)-1].sa.sa_handler == SIG_DFL) static inline void signal_wake_up(struct task_struct *t, bool resume) { signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0); } static void complete_signal(int sig, struct task_struct *p, enum pid_type type) { [...] /* * Found a killable thread. If the signal will be fatal, * then start taking the whole group down immediately. */ if (sig_fatal(p, sig) ...) { if (!sig_kernel_coredump(sig)) { [...] do { task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); } while_each_thread(p, t); return; } } } Update the comments in kernel/kthread.c to make this more obvious. The motivation for this change was debugging why a module initialization failed. The module was being loaded from initrd. It "magically" failed when systemd was switching to the real root. The clean up operations sent SIGTERM to various pending processed that were started from initrd. Link: https://lkml.kernel.org/r/20220315102444.2380-1-pmladek@suse.com Signed-off-by: Petr Mladek Reviewed-by: "Eric W. Biederman" Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Kees Cook Cc: Marco Elver Cc: Jens Axboe Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- kernel/kthread.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/kthread.c b/kernel/kthread.c index 544fd4097406..3c677918d8f2 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -340,7 +340,7 @@ static int kthread(void *_create) self = to_kthread(current); - /* If user was SIGKILLed, I release the structure. */ + /* Release the structure when caller killed by a fatal signal. */ done = xchg(&create->done, NULL); if (!done) { kfree(create); @@ -398,7 +398,7 @@ static void create_kthread(struct kthread_create_info *create) /* We want our own signal handler (we take no signals by default). */ pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); if (pid < 0) { - /* If user was SIGKILLed, I release the structure. */ + /* Release the structure when caller killed by a fatal signal. */ struct completion *done = xchg(&create->done, NULL); if (!done) { @@ -440,9 +440,9 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data), */ if (unlikely(wait_for_completion_killable(&done))) { /* - * If I was SIGKILLed before kthreadd (or new kernel thread) - * calls complete(), leave the cleanup of this structure to - * that thread. + * If I was killed by a fatal signal before kthreadd (or new + * kernel thread) calls complete(), leave the cleanup of this + * structure to that thread. */ if (xchg(&create->done, NULL)) return ERR_PTR(-EINTR); @@ -876,7 +876,7 @@ fail_task: * * Returns a pointer to the allocated worker on success, ERR_PTR(-ENOMEM) * when the needed structures could not get allocated, and ERR_PTR(-EINTR) - * when the worker was SIGKILLed. + * when the caller was killed by a fatal signal. */ struct kthread_worker * kthread_create_worker(unsigned int flags, const char namefmt[], ...) @@ -925,7 +925,7 @@ EXPORT_SYMBOL(kthread_create_worker); * Return: * The pointer to the allocated worker on success, ERR_PTR(-ENOMEM) * when the needed structures could not get allocated, and ERR_PTR(-EINTR) - * when the worker was SIGKILLed. + * when the caller was killed by a fatal signal. */ struct kthread_worker * kthread_create_worker_on_cpu(int cpu, unsigned int flags, -- cgit v1.2.3 From eb1b2985fe5c5f02e43e4c0d47bbe7ed835007f3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 15 Jun 2022 13:21:16 +0200 Subject: ftrace: Keep address offset in ftrace_lookup_symbols We want to store the resolved address on the same index as the symbol string, because that's the user (bpf kprobe link) code assumption. Also making sure we don't store duplicates that might be present in kallsyms. Acked-by: Song Liu Acked-by: Steven Rostedt (Google) Fixes: bed0d9a50dac ("ftrace: Add ftrace_lookup_symbols function") Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20220615112118.497303-3-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/trace/ftrace.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index e750fe141a60..601ccf1b2f09 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -8029,15 +8029,23 @@ static int kallsyms_callback(void *data, const char *name, struct module *mod, unsigned long addr) { struct kallsyms_data *args = data; + const char **sym; + int idx; - if (!bsearch(&name, args->syms, args->cnt, sizeof(*args->syms), symbols_cmp)) + sym = bsearch(&name, args->syms, args->cnt, sizeof(*args->syms), symbols_cmp); + if (!sym) + return 0; + + idx = sym - args->syms; + if (args->addrs[idx]) return 0; addr = ftrace_location(addr); if (!addr) return 0; - args->addrs[args->found++] = addr; + args->addrs[idx] = addr; + args->found++; return args->found == args->cnt ? 1 : 0; } @@ -8062,6 +8070,7 @@ int ftrace_lookup_symbols(const char **sorted_syms, size_t cnt, unsigned long *a struct kallsyms_data args; int err; + memset(addrs, 0, sizeof(*addrs) * cnt); args.addrs = addrs; args.syms = sorted_syms; args.cnt = cnt; -- cgit v1.2.3 From eb5fb0325698d05f0bf78d322de82c451a3685a2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 15 Jun 2022 13:21:17 +0200 Subject: bpf: Force cookies array to follow symbols sorting When user specifies symbols and cookies for kprobe_multi link interface it's very likely the cookies will be misplaced and returned to wrong functions (via get_attach_cookie helper). The reason is that to resolve the provided functions we sort them before passing them to ftrace_lookup_symbols, but we do not do the same sort on the cookie values. Fixing this by using sort_r function with custom swap callback that swaps cookie values as well. Fixes: 0236fec57a15 ("bpf: Resolve symbols with ftrace_lookup_symbols for kprobe multi link") Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20220615112118.497303-4-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/trace/bpf_trace.c | 60 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 45 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 7a13e6ac6327..88589d74a892 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2423,7 +2423,7 @@ kprobe_multi_link_handler(struct fprobe *fp, unsigned long entry_ip, kprobe_multi_link_prog_run(link, entry_ip, regs); } -static int symbols_cmp(const void *a, const void *b) +static int symbols_cmp_r(const void *a, const void *b, const void *priv) { const char **str_a = (const char **) a; const char **str_b = (const char **) b; @@ -2431,6 +2431,28 @@ static int symbols_cmp(const void *a, const void *b) return strcmp(*str_a, *str_b); } +struct multi_symbols_sort { + const char **funcs; + u64 *cookies; +}; + +static void symbols_swap_r(void *a, void *b, int size, const void *priv) +{ + const struct multi_symbols_sort *data = priv; + const char **name_a = a, **name_b = b; + + swap(*name_a, *name_b); + + /* If defined, swap also related cookies. */ + if (data->cookies) { + u64 *cookie_a, *cookie_b; + + cookie_a = data->cookies + (name_a - data->funcs); + cookie_b = data->cookies + (name_b - data->funcs); + swap(*cookie_a, *cookie_b); + } +} + int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { struct bpf_kprobe_multi_link *link = NULL; @@ -2468,38 +2490,46 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr if (!addrs) return -ENOMEM; + ucookies = u64_to_user_ptr(attr->link_create.kprobe_multi.cookies); + if (ucookies) { + cookies = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL); + if (!cookies) { + err = -ENOMEM; + goto error; + } + if (copy_from_user(cookies, ucookies, size)) { + err = -EFAULT; + goto error; + } + } + if (uaddrs) { if (copy_from_user(addrs, uaddrs, size)) { err = -EFAULT; goto error; } } else { + struct multi_symbols_sort data = { + .cookies = cookies, + }; struct user_syms us; err = copy_user_syms(&us, usyms, cnt); if (err) goto error; - sort(us.syms, cnt, sizeof(*us.syms), symbols_cmp, NULL); + if (cookies) + data.funcs = us.syms; + + sort_r(us.syms, cnt, sizeof(*us.syms), symbols_cmp_r, + symbols_swap_r, &data); + err = ftrace_lookup_symbols(us.syms, cnt, addrs); free_user_syms(&us); if (err) goto error; } - ucookies = u64_to_user_ptr(attr->link_create.kprobe_multi.cookies); - if (ucookies) { - cookies = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL); - if (!cookies) { - err = -ENOMEM; - goto error; - } - if (copy_from_user(cookies, ucookies, size)) { - err = -EFAULT; - goto error; - } - } - link = kzalloc(sizeof(*link), GFP_KERNEL); if (!link) { err = -ENOMEM; -- cgit v1.2.3 From 5cf9c91ba927119fc6606b938b1895bb2459d3bc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jun 2022 09:48:25 +0200 Subject: block: serialize all debugfs operations using q->debugfs_mutex Various places like I/O schedulers or the QOS infrastructure try to register debugfs files on demans, which can race with creating and removing the main queue debugfs directory. Use the existing debugfs_mutex to serialize all debugfs operations that rely on q->debugfs_dir or the directories hanging off it. To make the teardown code a little simpler declare all debugfs dentry pointers and not just the main one uncoditionally in blkdev.h. Move debugfs_mutex next to the dentries that it protects and document what it is used for. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220614074827.458955-3-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 25 ++++++++++++++++++++----- block/blk-mq-debugfs.h | 5 ----- block/blk-mq-sched.c | 11 +++++++++++ block/blk-rq-qos.c | 2 ++ block/blk-rq-qos.h | 7 ++++++- block/blk-sysfs.c | 20 +++++++++----------- include/linux/blkdev.h | 8 ++++---- kernel/trace/blktrace.c | 3 --- 8 files changed, 52 insertions(+), 29 deletions(-) (limited to 'kernel') diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 7e4136a60e1c..f0fcfe1387cb 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -711,11 +711,6 @@ void blk_mq_debugfs_register(struct request_queue *q) } } -void blk_mq_debugfs_unregister(struct request_queue *q) -{ - q->sched_debugfs_dir = NULL; -} - static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx) { @@ -746,6 +741,8 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q, void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx) { + if (!hctx->queue->debugfs_dir) + return; debugfs_remove_recursive(hctx->debugfs_dir); hctx->sched_debugfs_dir = NULL; hctx->debugfs_dir = NULL; @@ -773,6 +770,8 @@ void blk_mq_debugfs_register_sched(struct request_queue *q) { struct elevator_type *e = q->elevator->type; + lockdep_assert_held(&q->debugfs_mutex); + /* * If the parent directory has not been created yet, return, we will be * called again later on and the directory/files will be created then. @@ -790,6 +789,8 @@ void blk_mq_debugfs_register_sched(struct request_queue *q) void blk_mq_debugfs_unregister_sched(struct request_queue *q) { + lockdep_assert_held(&q->debugfs_mutex); + debugfs_remove_recursive(q->sched_debugfs_dir); q->sched_debugfs_dir = NULL; } @@ -811,6 +812,10 @@ static const char *rq_qos_id_to_name(enum rq_qos_id id) void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) { + lockdep_assert_held(&rqos->q->debugfs_mutex); + + if (!rqos->q->debugfs_dir) + return; debugfs_remove_recursive(rqos->debugfs_dir); rqos->debugfs_dir = NULL; } @@ -820,6 +825,8 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) struct request_queue *q = rqos->q; const char *dir_name = rq_qos_id_to_name(rqos->id); + lockdep_assert_held(&q->debugfs_mutex); + if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs) return; @@ -835,6 +842,8 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q) { + lockdep_assert_held(&q->debugfs_mutex); + debugfs_remove_recursive(q->rqos_debugfs_dir); q->rqos_debugfs_dir = NULL; } @@ -844,6 +853,8 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, { struct elevator_type *e = q->elevator->type; + lockdep_assert_held(&q->debugfs_mutex); + /* * If the parent debugfs directory has not been created yet, return; * We will be called again later on with appropriate parent debugfs @@ -863,6 +874,10 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx) { + lockdep_assert_held(&hctx->queue->debugfs_mutex); + + if (!hctx->queue->debugfs_dir) + return; debugfs_remove_recursive(hctx->sched_debugfs_dir); hctx->sched_debugfs_dir = NULL; } diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h index 69918f4170d6..771d45832878 100644 --- a/block/blk-mq-debugfs.h +++ b/block/blk-mq-debugfs.h @@ -21,7 +21,6 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq); int blk_mq_debugfs_rq_show(struct seq_file *m, void *v); void blk_mq_debugfs_register(struct request_queue *q); -void blk_mq_debugfs_unregister(struct request_queue *q); void blk_mq_debugfs_register_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx); void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx); @@ -42,10 +41,6 @@ static inline void blk_mq_debugfs_register(struct request_queue *q) { } -static inline void blk_mq_debugfs_unregister(struct request_queue *q) -{ -} - static inline void blk_mq_debugfs_register_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx) { diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index eb3c65a21362..a4f7c101b53b 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -594,7 +594,9 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) if (ret) goto err_free_map_and_rqs; + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_sched(q); + mutex_unlock(&q->debugfs_mutex); queue_for_each_hw_ctx(q, hctx, i) { if (e->ops.init_hctx) { @@ -607,7 +609,9 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) return ret; } } + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_sched_hctx(q, hctx); + mutex_unlock(&q->debugfs_mutex); } return 0; @@ -648,14 +652,21 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e) unsigned int flags = 0; queue_for_each_hw_ctx(q, hctx, i) { + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_sched_hctx(hctx); + mutex_unlock(&q->debugfs_mutex); + if (e->type->ops.exit_hctx && hctx->sched_data) { e->type->ops.exit_hctx(hctx, i); hctx->sched_data = NULL; } flags = hctx->flags; } + + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_sched(q); + mutex_unlock(&q->debugfs_mutex); + if (e->type->ops.exit_sched) e->type->ops.exit_sched(e); blk_mq_sched_tags_teardown(q, flags); diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index e83af7bc7591..249a6f05dd3b 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -294,7 +294,9 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data, void rq_qos_exit(struct request_queue *q) { + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_queue_rqos(q); + mutex_unlock(&q->debugfs_mutex); while (q->rq_qos) { struct rq_qos *rqos = q->rq_qos; diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 68267007da1c..0e46052b018a 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -104,8 +104,11 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos) blk_mq_unfreeze_queue(q); - if (rqos->ops->debugfs_attrs) + if (rqos->ops->debugfs_attrs) { + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_rqos(rqos); + mutex_unlock(&q->debugfs_mutex); + } } static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos) @@ -129,7 +132,9 @@ static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos) blk_mq_unfreeze_queue(q); + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_rqos(rqos); + mutex_unlock(&q->debugfs_mutex); } typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 88bd41d4cb59..6e4801b217a7 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -779,14 +779,13 @@ static void blk_release_queue(struct kobject *kobj) if (queue_is_mq(q)) blk_mq_release(q); - blk_trace_shutdown(q); mutex_lock(&q->debugfs_mutex); + blk_trace_shutdown(q); debugfs_remove_recursive(q->debugfs_dir); + q->debugfs_dir = NULL; + q->sched_debugfs_dir = NULL; mutex_unlock(&q->debugfs_mutex); - if (queue_is_mq(q)) - blk_mq_debugfs_unregister(q); - bioset_exit(&q->bio_split); if (blk_queue_has_srcu(q)) @@ -836,17 +835,16 @@ int blk_register_queue(struct gendisk *disk) goto unlock; } + if (queue_is_mq(q)) + __blk_mq_register_dev(dev, q); + mutex_lock(&q->sysfs_lock); + mutex_lock(&q->debugfs_mutex); q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent), blk_debugfs_root); - mutex_unlock(&q->debugfs_mutex); - - if (queue_is_mq(q)) { - __blk_mq_register_dev(dev, q); + if (queue_is_mq(q)) blk_mq_debugfs_register(q); - } - - mutex_lock(&q->sysfs_lock); + mutex_unlock(&q->debugfs_mutex); ret = disk_register_independent_access_ranges(disk, NULL); if (ret) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bb6e3c31b3b7..73c886eba8e1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -482,7 +482,6 @@ struct request_queue { #endif /* CONFIG_BLK_DEV_ZONED */ int node; - struct mutex debugfs_mutex; #ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace __rcu *blk_trace; #endif @@ -526,11 +525,12 @@ struct request_queue { struct bio_set bio_split; struct dentry *debugfs_dir; - -#ifdef CONFIG_BLK_DEBUG_FS struct dentry *sched_debugfs_dir; struct dentry *rqos_debugfs_dir; -#endif + /* + * Serializes all debugfs metadata operations using the above dentries. + */ + struct mutex debugfs_mutex; bool mq_sysfs_init_done; diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 10a32b0f2deb..fe04c6f96ca5 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -770,14 +770,11 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) **/ void blk_trace_shutdown(struct request_queue *q) { - mutex_lock(&q->debugfs_mutex); if (rcu_dereference_protected(q->blk_trace, lockdep_is_held(&q->debugfs_mutex))) { __blk_trace_startstop(q, 0); __blk_trace_remove(q); } - - mutex_unlock(&q->debugfs_mutex); } #ifdef CONFIG_BLK_CGROUP -- cgit v1.2.3 From c0f3bb4054ef036e5f67e27f2e3cad9e6512cf00 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 8 Jun 2022 01:11:12 +0900 Subject: rethook: Reject getting a rethook if RCU is not watching Since the rethook_recycle() will involve the call_rcu() for reclaiming the rethook_instance, the rethook must be set up at the RCU available context (non idle). This rethook_recycle() in the rethook trampoline handler is inevitable, thus the RCU available check must be done before setting the rethook trampoline. This adds a rcu_is_watching() check in the rethook_try_get() so that it will return NULL if it is called when !rcu_is_watching(). Fixes: 54ecbe6f1ed5 ("rethook: Add a generic return hook") Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Daniel Borkmann Acked-by: Steven Rostedt (Google) Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/165461827269.280167.7379263615545598958.stgit@devnote2 --- kernel/trace/rethook.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'kernel') diff --git a/kernel/trace/rethook.c b/kernel/trace/rethook.c index b56833700d23..c69d82273ce7 100644 --- a/kernel/trace/rethook.c +++ b/kernel/trace/rethook.c @@ -154,6 +154,15 @@ struct rethook_node *rethook_try_get(struct rethook *rh) if (unlikely(!handler)) return NULL; + /* + * This expects the caller will set up a rethook on a function entry. + * When the function returns, the rethook will eventually be reclaimed + * or released in the rethook_recycle() with call_rcu(). + * This means the caller must be run in the RCU-availabe context. + */ + if (unlikely(!rcu_is_watching())) + return NULL; + fn = freelist_try_get(&rh->pool); if (!fn) return NULL; -- cgit v1.2.3 From cc72b72073ac982a954d3b43519ca1c28f03c27c Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Sat, 28 May 2022 00:55:39 +0900 Subject: tracing/kprobes: Check whether get_kretprobe() returns NULL in kretprobe_dispatcher() There is a small chance that get_kretprobe(ri) returns NULL in kretprobe_dispatcher() when another CPU unregisters the kretprobe right after __kretprobe_trampoline_handler(). To avoid this issue, kretprobe_dispatcher() checks the get_kretprobe() return value again. And if it is NULL, it returns soon because that kretprobe is under unregistering process. This issue has been introduced when the kretprobe is decoupled from the struct kretprobe_instance by commit d741bf41d7c7 ("kprobes: Remove kretprobe hash"). Before that commit, the struct kretprob_instance::rp directly points the kretprobe and it is never be NULL. Link: https://lkml.kernel.org/r/165366693881.797669.16926184644089588731.stgit@devnote2 Reported-by: Yonghong Song Fixes: d741bf41d7c7 ("kprobes: Remove kretprobe hash") Cc: Peter Zijlstra Cc: Ingo Molnar Cc: bpf Cc: Kernel Team Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) Acked-by: Jiri Olsa Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_kprobe.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 93507330462c..a245ea673715 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1718,8 +1718,17 @@ static int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) { struct kretprobe *rp = get_kretprobe(ri); - struct trace_kprobe *tk = container_of(rp, struct trace_kprobe, rp); + struct trace_kprobe *tk; + + /* + * There is a small chance that get_kretprobe(ri) returns NULL when + * the kretprobe is unregister on another CPU between kretprobe's + * trampoline_handler and this function. + */ + if (unlikely(!rp)) + return 0; + tk = container_of(rp, struct trace_kprobe, rp); raw_cpu_inc(*tk->nhit); if (trace_probe_test_flag(&tk->tp, TP_FLAG_TRACE)) -- cgit v1.2.3 From f4b0d318097e45cbac5e14976f8bb56aa2cef504 Mon Sep 17 00:00:00 2001 From: sunliming Date: Thu, 2 Jun 2022 22:06:13 +0800 Subject: tracing: Simplify conditional compilation code in tracing_set_tracer() Two conditional compilation directives "#ifdef CONFIG_TRACER_MAX_TRACE" are used consecutively, and no other code in between. Simplify conditional the compilation code and only use one "#ifdef CONFIG_TRACER_MAX_TRACE". Link: https://lkml.kernel.org/r/20220602140613.545069-1-sunliming@kylinos.cn Signed-off-by: sunliming Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2c95992e2c71..a8cfac0611bc 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6424,9 +6424,7 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) synchronize_rcu(); free_snapshot(tr); } -#endif -#ifdef CONFIG_TRACER_MAX_TRACE if (t->use_max_tr && !had_max_tr) { ret = tracing_alloc_snapshot_instance(tr); if (ret < 0) -- cgit v1.2.3 From 12c3e0c92fd7cb3d3b698d84fdde7dccb6ba8822 Mon Sep 17 00:00:00 2001 From: Gautam Menghani Date: Sun, 12 Jun 2022 07:42:32 -0700 Subject: tracing/uprobes: Remove unwanted initialization in __trace_uprobe_create() Remove the unwanted initialization of variable 'ret'. This fixes the clang scan warning: Value stored to 'ret' is never read [deadcode.DeadStores] Link: https://lkml.kernel.org/r/20220612144232.145209-1-gautammenghani201@gmail.com Signed-off-by: Gautam Menghani Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_uprobe.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 9711589273cd..c3dc4f859a6b 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -546,7 +546,6 @@ static int __trace_uprobe_create(int argc, const char **argv) bool is_return = false; int i, ret; - ret = 0; ref_ctr_offset = 0; switch (argv[0][0]) { -- cgit v1.2.3 From 202773260023b56e868d09d13d3a417028f1ff5b Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 17 Jun 2022 15:24:02 +0300 Subject: PM: hibernate: Use kernel_can_power_off() Use new kernel_can_power_off() API instead of legacy pm_power_off global variable to fix regressed hibernation to disk where machine no longer powers off when it should because ACPI power driver transitioned to the new sys-off based API and it doesn't use pm_power_off anymore. Fixes: 98f30d0ecf79 ("ACPI: power: Switch to sys-off handler API") Tested-by: Ken Moffat Reported-by: Ken Moffat Signed-off-by: Dmitry Osipenko Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 20a66bf9f465..89c71fce225d 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -665,7 +665,7 @@ static void power_down(void) hibernation_platform_enter(); fallthrough; case HIBERNATION_SHUTDOWN: - if (pm_power_off) + if (kernel_can_power_off()) kernel_power_off(); break; } -- cgit v1.2.3 From 3be4562584bba603f33863a00c1c32eecf772ee6 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 22 Jun 2022 12:14:24 -0700 Subject: dma-direct: use the correct size for dma_set_encrypted() The third parameter of dma_set_encrypted() is a size in bytes rather than the number of pages. Fixes: 4d0564785bb0 ("dma-direct: factor out dma_set_{de,en}crypted helpers") Signed-off-by: Dexuan Cui Reviewed-by: Robin Murphy Signed-off-by: Christoph Hellwig --- kernel/dma/direct.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index e978f36e6be8..8d0b68a17042 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -357,7 +357,7 @@ void dma_direct_free(struct device *dev, size_t size, } else { if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED)) arch_dma_clear_uncached(cpu_addr, size); - if (dma_set_encrypted(dev, cpu_addr, 1 << page_order)) + if (dma_set_encrypted(dev, cpu_addr, size)) return; } @@ -392,7 +392,6 @@ void dma_direct_free_pages(struct device *dev, size_t size, struct page *page, dma_addr_t dma_addr, enum dma_data_direction dir) { - unsigned int page_order = get_order(size); void *vaddr = page_address(page); /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */ @@ -400,7 +399,7 @@ void dma_direct_free_pages(struct device *dev, size_t size, dma_free_from_pool(dev, vaddr, size)) return; - if (dma_set_encrypted(dev, vaddr, 1 << page_order)) + if (dma_set_encrypted(dev, vaddr, size)) return; __dma_direct_free_pages(dev, page, size); } -- cgit v1.2.3 From 20fb0c8272bbb102d15bdd11aa64f828619dd7cc Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Thu, 23 Jun 2022 16:51:52 +0200 Subject: Revert "printk: Wait for the global console lock when the system is going down" This reverts commit b87f02307d3cfbda768520f0687c51ca77e14fc3. The testing of 5.19 release candidates revealed missing synchronization between early and regular console functionality. It would be possible to start the console kthreads later as a workaround. But it is clear that console lock serialized console drivers between each other. It opens a big area of possible problems that were not considered by people involved in the development and review. printk() is crucial for debugging kernel issues and console output is very important part of it. The number of consoles is huge and a proper review would take some time. As a result it need to be reverted for 5.19. Link: https://lore.kernel.org/r/YrBdjVwBOVgLfHyb@alley Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20220623145157.21938-2-pmladek@suse.com --- include/linux/printk.h | 5 ----- kernel/panic.c | 2 -- kernel/printk/internal.h | 2 -- kernel/printk/printk.c | 4 ---- kernel/printk/printk_safe.c | 32 -------------------------------- kernel/reboot.c | 2 -- 6 files changed, 47 deletions(-) (limited to 'kernel') diff --git a/include/linux/printk.h b/include/linux/printk.h index c1e07c0652c7..cd26aab0ab2a 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -174,7 +174,6 @@ extern void printk_prefer_direct_enter(void); extern void printk_prefer_direct_exit(void); extern bool pr_flush(int timeout_ms, bool reset_on_progress); -extern void try_block_console_kthreads(int timeout_ms); /* * Please don't use printk_ratelimit(), because it shares ratelimiting state @@ -239,10 +238,6 @@ static inline bool pr_flush(int timeout_ms, bool reset_on_progress) return true; } -static inline void try_block_console_kthreads(int timeout_ms) -{ -} - static inline int printk_ratelimit(void) { return 0; diff --git a/kernel/panic.c b/kernel/panic.c index fe73d18ecdf0..6737b2332275 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -273,7 +273,6 @@ void panic(const char *fmt, ...) * unfortunately means it may not be hardened to work in a * panic situation. */ - try_block_console_kthreads(10000); smp_send_stop(); } else { /* @@ -281,7 +280,6 @@ void panic(const char *fmt, ...) * kmsg_dump, we will need architecture dependent extra * works in addition to stopping other CPUs. */ - try_block_console_kthreads(10000); crash_smp_send_stop(); } diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index e7d8578860ad..d947ca6c84f9 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -20,8 +20,6 @@ enum printk_info_flags { LOG_CONT = 8, /* text is a fragment of a continuation line */ }; -extern bool block_console_kthreads; - __printf(4, 0) int vprintk_store(int facility, int level, const struct dev_printk_info *dev_info, diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index b095fb5f5f61..45c6c2b0b104 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -250,9 +250,6 @@ static atomic_t console_kthreads_active = ATOMIC_INIT(0); #define console_kthread_printing_exit() \ atomic_dec(&console_kthreads_active) -/* Block console kthreads to avoid processing new messages. */ -bool block_console_kthreads; - /* * Helper macros to handle lockdep when locking/unlocking console_sem. We use * macros instead of functions so that _RET_IP_ contains useful information. @@ -3733,7 +3730,6 @@ static bool printer_should_wake(struct console *con, u64 seq) if (con->blocked || console_kthreads_atomically_blocked() || - block_console_kthreads || system_state > SYSTEM_RUNNING || oops_in_progress) { return false; diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index caac4de1ea59..ef0f9a2044da 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -8,9 +8,7 @@ #include #include #include -#include #include -#include #include "internal.h" @@ -52,33 +50,3 @@ asmlinkage int vprintk(const char *fmt, va_list args) return vprintk_default(fmt, args); } EXPORT_SYMBOL(vprintk); - -/** - * try_block_console_kthreads() - Try to block console kthreads and - * make the global console_lock() avaialble - * - * @timeout_ms: The maximum time (in ms) to wait. - * - * Prevent console kthreads from starting processing new messages. Wait - * until the global console_lock() become available. - * - * Context: Can be called in any context. - */ -void try_block_console_kthreads(int timeout_ms) -{ - block_console_kthreads = true; - - /* Do not wait when the console lock could not be safely taken. */ - if (this_cpu_read(printk_context) || in_nmi()) - return; - - while (timeout_ms > 0) { - if (console_trylock()) { - console_unlock(); - return; - } - - udelay(1000); - timeout_ms -= 1; - } -} diff --git a/kernel/reboot.c b/kernel/reboot.c index 310363685502..4177645e74d6 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -74,7 +74,6 @@ void kernel_restart_prepare(char *cmd) { blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); system_state = SYSTEM_RESTART; - try_block_console_kthreads(10000); usermodehelper_disable(); device_shutdown(); } @@ -263,7 +262,6 @@ static void kernel_shutdown_prepare(enum system_states state) blocking_notifier_call_chain(&reboot_notifier_list, (state == SYSTEM_HALT) ? SYS_HALT : SYS_POWER_OFF, NULL); system_state = state; - try_block_console_kthreads(10000); usermodehelper_disable(); device_shutdown(); } -- cgit v1.2.3 From 05c96b3713aa2a406d0c4ef0505bf80a6748fedb Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Thu, 23 Jun 2022 16:51:53 +0200 Subject: Revert "printk: Block console kthreads when direct printing will be required" This reverts commit c3230283e2819a69dad2cf7a63143fde8bab8b5c. The testing of 5.19 release candidates revealed missing synchronization between early and regular console functionality. It would be possible to start the console kthreads later as a workaround. But it is clear that console lock serialized console drivers between each other. It opens a big area of possible problems that were not considered by people involved in the development and review. printk() is crucial for debugging kernel issues and console output is very important part of it. The number of consoles is huge and a proper review would take some time. As a result it need to be reverted for 5.19. Link: https://lore.kernel.org/r/YrBdjVwBOVgLfHyb@alley Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20220623145157.21938-3-pmladek@suse.com --- kernel/printk/printk.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 45c6c2b0b104..ea3dd55709e7 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3729,9 +3729,7 @@ static bool printer_should_wake(struct console *con, u64 seq) return true; if (con->blocked || - console_kthreads_atomically_blocked() || - system_state > SYSTEM_RUNNING || - oops_in_progress) { + console_kthreads_atomically_blocked()) { return false; } -- cgit v1.2.3 From 007eeab7e9f03a3108c300c03e11a6c151e430c9 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Thu, 23 Jun 2022 16:51:54 +0200 Subject: Revert "printk: remove @console_locked" This reverts commit ab406816fca009349b89cbde885daf68a8c77e33. The testing of 5.19 release candidates revealed missing synchronization between early and regular console functionality. It would be possible to start the console kthreads later as a workaround. But it is clear that console lock serialized console drivers between each other. It opens a big area of possible problems that were not considered by people involved in the development and review. printk() is crucial for debugging kernel issues and console output is very important part of it. The number of consoles is huge and a proper review would take some time. As a result it need to be reverted for 5.19. Link: https://lore.kernel.org/r/YrBdjVwBOVgLfHyb@alley Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20220623145157.21938-4-pmladek@suse.com --- kernel/printk/printk.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index ea3dd55709e7..dfd1a19b95d6 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -340,7 +340,15 @@ static void console_kthreads_unblock(void) console_kthreads_blocked = false; } -static int console_suspended; +/* + * This is used for debugging the mess that is the VT code by + * keeping track if we have the console semaphore held. It's + * definitely not the perfect debug tool (we don't know if _WE_ + * hold it and are racing, but it helps tracking those weird code + * paths in the console code where we end up in places I want + * locked without the console semaphore held). + */ +static int console_locked, console_suspended; /* * Array of consoles built from command line options (console=) @@ -2711,6 +2719,7 @@ void console_lock(void) if (console_suspended) return; console_kthreads_block(); + console_locked = 1; console_may_schedule = 1; } EXPORT_SYMBOL(console_lock); @@ -2735,26 +2744,15 @@ int console_trylock(void) up_console_sem(); return 0; } + console_locked = 1; console_may_schedule = 0; return 1; } EXPORT_SYMBOL(console_trylock); -/* - * This is used to help to make sure that certain paths within the VT code are - * running with the console lock held. It is definitely not the perfect debug - * tool (it is not known if the VT code is the task holding the console lock), - * but it helps tracking those weird code paths in the console code such as - * when the console is suspended: where the console is not locked but no - * console printing may occur. - * - * Note: This returns true when the console is suspended but is not locked. - * This is intentional because the VT code must consider that situation - * the same as if the console was locked. - */ int is_console_locked(void) { - return (console_kthreads_blocked || atomic_read(&console_kthreads_active)); + return (console_locked || atomic_read(&console_kthreads_active)); } EXPORT_SYMBOL(is_console_locked); @@ -2810,6 +2808,8 @@ static inline bool console_is_usable(struct console *con) static void __console_unlock(void) { + console_locked = 0; + /* * Depending on whether console_lock() or console_trylock() was used, * appropriately allow the kthread printers to continue. @@ -3127,6 +3127,7 @@ void console_unblank(void) } else console_lock(); + console_locked = 1; console_may_schedule = 0; for_each_console(c) if ((c->flags & CON_ENABLED) && c->unblank) -- cgit v1.2.3 From 2d9ef940f89e0ab4fde7ba6f769d82f2a450c35a Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Thu, 23 Jun 2022 16:51:55 +0200 Subject: Revert "printk: extend console_lock for per-console locking" This reverts commit 8e274732115f63c1d09136284431b3555bd5cc56. The testing of 5.19 release candidates revealed missing synchronization between early and regular console functionality. It would be possible to start the console kthreads later as a workaround. But it is clear that console lock serialized console drivers between each other. It opens a big area of possible problems that were not considered by people involved in the development and review. printk() is crucial for debugging kernel issues and console output is very important part of it. The number of consoles is huge and a proper review would take some time. As a result it need to be reverted for 5.19. Link: https://lore.kernel.org/r/YrBdjVwBOVgLfHyb@alley Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20220623145157.21938-5-pmladek@suse.com --- include/linux/console.h | 15 --- kernel/printk/printk.c | 261 +++++++++++------------------------------------- 2 files changed, 56 insertions(+), 220 deletions(-) (limited to 'kernel') diff --git a/include/linux/console.h b/include/linux/console.h index 143653090c48..9a251e70c090 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -16,7 +16,6 @@ #include #include -#include struct vc_data; struct console_font_op; @@ -155,20 +154,6 @@ struct console { u64 seq; unsigned long dropped; struct task_struct *thread; - bool blocked; - - /* - * The per-console lock is used by printing kthreads to synchronize - * this console with callers of console_lock(). This is necessary in - * order to allow printing kthreads to run in parallel to each other, - * while each safely accessing the @blocked field and synchronizing - * against direct printing via console_lock/console_unlock. - * - * Note: For synchronizing against direct printing via - * console_trylock/console_unlock, see the static global - * variable @console_kthreads_active. - */ - struct mutex lock; void *data; struct console *next; diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index dfd1a19b95d6..ae489dc685a1 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -223,33 +223,6 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, /* Number of registered extended console drivers. */ static int nr_ext_console_drivers; -/* - * Used to synchronize printing kthreads against direct printing via - * console_trylock/console_unlock. - * - * Values: - * -1 = console kthreads atomically blocked (via global trylock) - * 0 = no kthread printing, console not locked (via trylock) - * >0 = kthread(s) actively printing - * - * Note: For synchronizing against direct printing via - * console_lock/console_unlock, see the @lock variable in - * struct console. - */ -static atomic_t console_kthreads_active = ATOMIC_INIT(0); - -#define console_kthreads_atomic_tryblock() \ - (atomic_cmpxchg(&console_kthreads_active, 0, -1) == 0) -#define console_kthreads_atomic_unblock() \ - atomic_cmpxchg(&console_kthreads_active, -1, 0) -#define console_kthreads_atomically_blocked() \ - (atomic_read(&console_kthreads_active) == -1) - -#define console_kthread_printing_tryenter() \ - atomic_inc_unless_negative(&console_kthreads_active) -#define console_kthread_printing_exit() \ - atomic_dec(&console_kthreads_active) - /* * Helper macros to handle lockdep when locking/unlocking console_sem. We use * macros instead of functions so that _RET_IP_ contains useful information. @@ -297,49 +270,6 @@ static bool panic_in_progress(void) return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID); } -/* - * Tracks whether kthread printers are all blocked. A value of true implies - * that the console is locked via console_lock() or the console is suspended. - * Writing to this variable requires holding @console_sem. - */ -static bool console_kthreads_blocked; - -/* - * Block all kthread printers from a schedulable context. - * - * Requires holding @console_sem. - */ -static void console_kthreads_block(void) -{ - struct console *con; - - for_each_console(con) { - mutex_lock(&con->lock); - con->blocked = true; - mutex_unlock(&con->lock); - } - - console_kthreads_blocked = true; -} - -/* - * Unblock all kthread printers from a schedulable context. - * - * Requires holding @console_sem. - */ -static void console_kthreads_unblock(void) -{ - struct console *con; - - for_each_console(con) { - mutex_lock(&con->lock); - con->blocked = false; - mutex_unlock(&con->lock); - } - - console_kthreads_blocked = false; -} - /* * This is used for debugging the mess that is the VT code by * keeping track if we have the console semaphore held. It's @@ -2673,6 +2603,13 @@ void resume_console(void) down_console_sem(); console_suspended = 0; console_unlock(); + + /* + * While suspended, new records may have been added to the + * ringbuffer. Wake up the kthread printers to print them. + */ + wake_up_klogd(); + pr_flush(1000, true); } @@ -2691,14 +2628,9 @@ static int console_cpu_notify(unsigned int cpu) /* If trylock fails, someone else is doing the printing */ if (console_trylock()) console_unlock(); - else { - /* - * If a new CPU comes online, the conditions for - * printer_should_wake() may have changed for some - * kthread printer with !CON_ANYTIME. - */ - wake_up_klogd(); - } + + /* Wake kthread printers. Some may have become usable. */ + wake_up_klogd(); } return 0; } @@ -2718,7 +2650,6 @@ void console_lock(void) down_console_sem(); if (console_suspended) return; - console_kthreads_block(); console_locked = 1; console_may_schedule = 1; } @@ -2740,10 +2671,6 @@ int console_trylock(void) up_console_sem(); return 0; } - if (!console_kthreads_atomic_tryblock()) { - up_console_sem(); - return 0; - } console_locked = 1; console_may_schedule = 0; return 1; @@ -2752,7 +2679,7 @@ EXPORT_SYMBOL(console_trylock); int is_console_locked(void) { - return (console_locked || atomic_read(&console_kthreads_active)); + return console_locked; } EXPORT_SYMBOL(is_console_locked); @@ -2796,7 +2723,7 @@ static inline bool __console_is_usable(short flags) * Check if the given console is currently capable and allowed to print * records. * - * Requires holding the console_lock. + * Requires the console_lock. */ static inline bool console_is_usable(struct console *con) { @@ -2809,22 +2736,6 @@ static inline bool console_is_usable(struct console *con) static void __console_unlock(void) { console_locked = 0; - - /* - * Depending on whether console_lock() or console_trylock() was used, - * appropriately allow the kthread printers to continue. - */ - if (console_kthreads_blocked) - console_kthreads_unblock(); - else - console_kthreads_atomic_unblock(); - - /* - * New records may have arrived while the console was locked. - * Wake the kthread printers to print them. - */ - wake_up_klogd(); - up_console_sem(); } @@ -2842,19 +2753,17 @@ static void __console_unlock(void) * * @handover will be set to true if a printk waiter has taken over the * console_lock, in which case the caller is no longer holding the - * console_lock. Otherwise it is set to false. A NULL pointer may be provided - * to disable allowing the console_lock to be taken over by a printk waiter. + * console_lock. Otherwise it is set to false. * * Returns false if the given console has no next record to print, otherwise * true. * - * Requires the console_lock if @handover is non-NULL. - * Requires con->lock otherwise. + * Requires the console_lock. */ -static bool __console_emit_next_record(struct console *con, char *text, char *ext_text, - char *dropped_text, bool *handover) +static bool console_emit_next_record(struct console *con, char *text, char *ext_text, + char *dropped_text, bool *handover) { - static atomic_t panic_console_dropped = ATOMIC_INIT(0); + static int panic_console_dropped; struct printk_info info; struct printk_record r; unsigned long flags; @@ -2863,8 +2772,7 @@ static bool __console_emit_next_record(struct console *con, char *text, char *ex prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX); - if (handover) - *handover = false; + *handover = false; if (!prb_read_valid(prb, con->seq, &r)) return false; @@ -2872,8 +2780,7 @@ static bool __console_emit_next_record(struct console *con, char *text, char *ex if (con->seq != r.info->seq) { con->dropped += r.info->seq - con->seq; con->seq = r.info->seq; - if (panic_in_progress() && - atomic_fetch_inc_relaxed(&panic_console_dropped) > 10) { + if (panic_in_progress() && panic_console_dropped++ > 10) { suppress_panic_printk = 1; pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n"); } @@ -2895,61 +2802,31 @@ static bool __console_emit_next_record(struct console *con, char *text, char *ex len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time); } - if (handover) { - /* - * While actively printing out messages, if another printk() - * were to occur on another CPU, it may wait for this one to - * finish. This task can not be preempted if there is a - * waiter waiting to take over. - * - * Interrupts are disabled because the hand over to a waiter - * must not be interrupted until the hand over is completed - * (@console_waiter is cleared). - */ - printk_safe_enter_irqsave(flags); - console_lock_spinning_enable(); - - /* don't trace irqsoff print latency */ - stop_critical_timings(); - } + /* + * While actively printing out messages, if another printk() + * were to occur on another CPU, it may wait for this one to + * finish. This task can not be preempted if there is a + * waiter waiting to take over. + * + * Interrupts are disabled because the hand over to a waiter + * must not be interrupted until the hand over is completed + * (@console_waiter is cleared). + */ + printk_safe_enter_irqsave(flags); + console_lock_spinning_enable(); + stop_critical_timings(); /* don't trace print latency */ call_console_driver(con, write_text, len, dropped_text); + start_critical_timings(); con->seq++; - if (handover) { - start_critical_timings(); - *handover = console_lock_spinning_disable_and_check(); - printk_safe_exit_irqrestore(flags); - } + *handover = console_lock_spinning_disable_and_check(); + printk_safe_exit_irqrestore(flags); skip: return true; } -/* - * Print a record for a given console, but allow another printk() caller to - * take over the console_lock and continue printing. - * - * Requires the console_lock, but depending on @handover after the call, the - * caller may no longer have the console_lock. - * - * See __console_emit_next_record() for argument and return details. - */ -static bool console_emit_next_record_transferable(struct console *con, char *text, char *ext_text, - char *dropped_text, bool *handover) -{ - /* - * Handovers are only supported if threaded printers are atomically - * blocked. The context taking over the console_lock may be atomic. - */ - if (!console_kthreads_atomically_blocked()) { - *handover = false; - handover = NULL; - } - - return __console_emit_next_record(con, text, ext_text, dropped_text, handover); -} - /* * Print out all remaining records to all consoles. * @@ -3001,11 +2878,13 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove if (con->flags & CON_EXTENDED) { /* Extended consoles do not print "dropped messages". */ - progress = console_emit_next_record_transferable(con, &text[0], - &ext_text[0], NULL, handover); + progress = console_emit_next_record(con, &text[0], + &ext_text[0], NULL, + handover); } else { - progress = console_emit_next_record_transferable(con, &text[0], - NULL, &dropped_text[0], handover); + progress = console_emit_next_record(con, &text[0], + NULL, &dropped_text[0], + handover); } if (*handover) return false; @@ -3120,10 +2999,6 @@ void console_unblank(void) if (oops_in_progress) { if (down_trylock_console_sem() != 0) return; - if (!console_kthreads_atomic_tryblock()) { - up_console_sem(); - return; - } } else console_lock(); @@ -3206,6 +3081,10 @@ void console_start(struct console *console) console_lock(); console->flags |= CON_ENABLED; console_unlock(); + + /* Wake the newly enabled kthread printer. */ + wake_up_klogd(); + __pr_flush(console, 1000, true); } EXPORT_SYMBOL(console_start); @@ -3407,8 +3286,6 @@ void register_console(struct console *newcon) newcon->dropped = 0; newcon->thread = NULL; - newcon->blocked = true; - mutex_init(&newcon->lock); if (newcon->flags & CON_PRINTBUFFER) { /* Get a consistent copy of @syslog_seq. */ @@ -3709,19 +3586,6 @@ static void printk_fallback_preferred_direct(void) console_unlock(); } -/* - * Print a record for a given console, not allowing another printk() caller - * to take over. This is appropriate for contexts that do not have the - * console_lock. - * - * See __console_emit_next_record() for argument and return details. - */ -static bool console_emit_next_record(struct console *con, char *text, char *ext_text, - char *dropped_text) -{ - return __console_emit_next_record(con, text, ext_text, dropped_text, NULL); -} - static bool printer_should_wake(struct console *con, u64 seq) { short flags; @@ -3729,10 +3593,8 @@ static bool printer_should_wake(struct console *con, u64 seq) if (kthread_should_stop() || !printk_kthreads_available) return true; - if (con->blocked || - console_kthreads_atomically_blocked()) { + if (console_suspended) return false; - } /* * This is an unsafe read from con->flags, but a false positive is @@ -3753,6 +3615,7 @@ static int printk_kthread_func(void *data) struct console *con = data; char *dropped_text = NULL; char *ext_text = NULL; + bool handover; u64 seq = 0; char *text; int error; @@ -3802,27 +3665,15 @@ static int printk_kthread_func(void *data) if (error) continue; - error = mutex_lock_interruptible(&con->lock); - if (error) - continue; + console_lock(); - if (con->blocked || - !console_kthread_printing_tryenter()) { - /* Another context has locked the console_lock. */ - mutex_unlock(&con->lock); + if (console_suspended) { + up_console_sem(); continue; } - /* - * Although this context has not locked the console_lock, it - * is known that the console_lock is not locked and it is not - * possible for any other context to lock the console_lock. - * Therefore it is safe to read con->flags. - */ - - if (!__console_is_usable(con->flags)) { - console_kthread_printing_exit(); - mutex_unlock(&con->lock); + if (!console_is_usable(con)) { + __console_unlock(); continue; } @@ -3835,13 +3686,13 @@ static int printk_kthread_func(void *data) * which can conditionally invoke cond_resched(). */ console_may_schedule = 0; - console_emit_next_record(con, text, ext_text, dropped_text); + console_emit_next_record(con, text, ext_text, dropped_text, &handover); + if (handover) + continue; seq = con->seq; - console_kthread_printing_exit(); - - mutex_unlock(&con->lock); + __console_unlock(); } con_printk(KERN_INFO, con, "printing thread stopped\n"); -- cgit v1.2.3 From 5831788afb17b89c5b531fb60cbd798613ccbb63 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Thu, 23 Jun 2022 16:51:56 +0200 Subject: Revert "printk: add kthread console printers" This reverts commit 09c5ba0aa2fcfdadb17d045c3ee6f86d69270df7. This reverts commit b87f02307d3cfbda768520f0687c51ca77e14fc3. The testing of 5.19 release candidates revealed missing synchronization between early and regular console functionality. It would be possible to start the console kthreads later as a workaround. But it is clear that console lock serialized console drivers between each other. It opens a big area of possible problems that were not considered by people involved in the development and review. printk() is crucial for debugging kernel issues and console output is very important part of it. The number of consoles is huge and a proper review would take some time. As a result it need to be reverted for 5.19. Link: https://lore.kernel.org/r/YrBdjVwBOVgLfHyb@alley Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20220623145157.21938-6-pmladek@suse.com --- include/linux/console.h | 2 - kernel/printk/printk.c | 329 ++++-------------------------------------------- 2 files changed, 22 insertions(+), 309 deletions(-) (limited to 'kernel') diff --git a/include/linux/console.h b/include/linux/console.h index 9a251e70c090..8c1686e2c233 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -153,8 +153,6 @@ struct console { uint ospeed; u64 seq; unsigned long dropped; - struct task_struct *thread; - void *data; struct console *next; }; diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index ae489dc685a1..5a6273e56b4e 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -361,13 +361,6 @@ static int console_msg_format = MSG_FORMAT_DEFAULT; /* syslog_lock protects syslog_* variables and write access to clear_seq. */ static DEFINE_MUTEX(syslog_lock); -/* - * A flag to signify if printk_activate_kthreads() has already started the - * kthread printers. If true, any later registered consoles must start their - * own kthread directly. The flag is write protected by the console_lock. - */ -static bool printk_kthreads_available; - #ifdef CONFIG_PRINTK static atomic_t printk_prefer_direct = ATOMIC_INIT(0); @@ -397,39 +390,6 @@ void printk_prefer_direct_exit(void) WARN_ON(atomic_dec_if_positive(&printk_prefer_direct) < 0); } -/* - * Calling printk() always wakes kthread printers so that they can - * flush the new message to their respective consoles. Also, if direct - * printing is allowed, printk() tries to flush the messages directly. - * - * Direct printing is allowed in situations when the kthreads - * are not available or the system is in a problematic state. - * - * See the implementation about possible races. - */ -static inline bool allow_direct_printing(void) -{ - /* - * Checking kthread availability is a possible race because the - * kthread printers can become permanently disabled during runtime. - * However, doing that requires holding the console_lock, so any - * pending messages will be direct printed by console_unlock(). - */ - if (!printk_kthreads_available) - return true; - - /* - * Prefer direct printing when the system is in a problematic state. - * The context that sets this state will always see the updated value. - * The other contexts do not care. Anyway, direct printing is just a - * best effort. The direct output is only possible when console_lock - * is not already taken and no kthread printers are actively printing. - */ - return (system_state > SYSTEM_RUNNING || - oops_in_progress || - atomic_read(&printk_prefer_direct)); -} - DECLARE_WAIT_QUEUE_HEAD(log_wait); /* All 3 protected by @syslog_lock. */ /* the next printk record to read by syslog(READ) or /proc/kmsg */ @@ -2320,10 +2280,10 @@ asmlinkage int vprintk_emit(int facility, int level, printed_len = vprintk_store(facility, level, dev_info, fmt, args); /* If called from the scheduler, we can not call up(). */ - if (!in_sched && allow_direct_printing()) { + if (!in_sched) { /* * The caller may be holding system-critical or - * timing-sensitive locks. Disable preemption during direct + * timing-sensitive locks. Disable preemption during * printing of all remaining records to all consoles so that * this context can return as soon as possible. Hopefully * another printk() caller will take over the printing. @@ -2366,8 +2326,6 @@ EXPORT_SYMBOL(_printk); static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress); -static void printk_start_kthread(struct console *con); - #else /* CONFIG_PRINTK */ #define CONSOLE_LOG_MAX 0 @@ -2401,8 +2359,6 @@ static void call_console_driver(struct console *con, const char *text, size_t le } static bool suppress_message_printing(int level) { return false; } static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; } -static void printk_start_kthread(struct console *con) { } -static bool allow_direct_printing(void) { return true; } #endif /* CONFIG_PRINTK */ @@ -2603,13 +2559,6 @@ void resume_console(void) down_console_sem(); console_suspended = 0; console_unlock(); - - /* - * While suspended, new records may have been added to the - * ringbuffer. Wake up the kthread printers to print them. - */ - wake_up_klogd(); - pr_flush(1000, true); } @@ -2628,9 +2577,6 @@ static int console_cpu_notify(unsigned int cpu) /* If trylock fails, someone else is doing the printing */ if (console_trylock()) console_unlock(); - - /* Wake kthread printers. Some may have become usable. */ - wake_up_klogd(); } return 0; } @@ -2702,9 +2648,18 @@ static bool abandon_console_lock_in_panic(void) return atomic_read(&panic_cpu) != raw_smp_processor_id(); } -static inline bool __console_is_usable(short flags) +/* + * Check if the given console is currently capable and allowed to print + * records. + * + * Requires the console_lock. + */ +static inline bool console_is_usable(struct console *con) { - if (!(flags & CON_ENABLED)) + if (!(con->flags & CON_ENABLED)) + return false; + + if (!con->write) return false; /* @@ -2713,26 +2668,12 @@ static inline bool __console_is_usable(short flags) * cope (CON_ANYTIME) don't call them until this CPU is officially up. */ if (!cpu_online(raw_smp_processor_id()) && - !(flags & CON_ANYTIME)) + !(con->flags & CON_ANYTIME)) return false; return true; } -/* - * Check if the given console is currently capable and allowed to print - * records. - * - * Requires the console_lock. - */ -static inline bool console_is_usable(struct console *con) -{ - if (!con->write) - return false; - - return __console_is_usable(con->flags); -} - static void __console_unlock(void) { console_locked = 0; @@ -2845,8 +2786,8 @@ skip: * were flushed to all usable consoles. A returned false informs the caller * that everything was not flushed (either there were no usable consoles or * another context has taken over printing or it is a panic situation and this - * is not the panic CPU or direct printing is not preferred). Regardless the - * reason, the caller should assume it is not useful to immediately try again. + * is not the panic CPU). Regardless the reason, the caller should assume it + * is not useful to immediately try again. * * Requires the console_lock. */ @@ -2863,10 +2804,6 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove *handover = false; do { - /* Let the kthread printers do the work if they can. */ - if (!allow_direct_printing()) - return false; - any_progress = false; for_each_console(con) { @@ -3081,10 +3018,6 @@ void console_start(struct console *console) console_lock(); console->flags |= CON_ENABLED; console_unlock(); - - /* Wake the newly enabled kthread printer. */ - wake_up_klogd(); - __pr_flush(console, 1000, true); } EXPORT_SYMBOL(console_start); @@ -3285,8 +3218,6 @@ void register_console(struct console *newcon) nr_ext_console_drivers++; newcon->dropped = 0; - newcon->thread = NULL; - if (newcon->flags & CON_PRINTBUFFER) { /* Get a consistent copy of @syslog_seq. */ mutex_lock(&syslog_lock); @@ -3296,10 +3227,6 @@ void register_console(struct console *newcon) /* Begin with next message. */ newcon->seq = prb_next_seq(prb); } - - if (printk_kthreads_available) - printk_start_kthread(newcon); - console_unlock(); console_sysfs_notify(); @@ -3326,7 +3253,6 @@ EXPORT_SYMBOL(register_console); int unregister_console(struct console *console) { - struct task_struct *thd; struct console *con; int res; @@ -3367,20 +3293,7 @@ int unregister_console(struct console *console) console_drivers->flags |= CON_CONSDEV; console->flags &= ~CON_ENABLED; - - /* - * console->thread can only be cleared under the console lock. But - * stopping the thread must be done without the console lock. The - * task that clears @thread is the task that stops the kthread. - */ - thd = console->thread; - console->thread = NULL; - console_unlock(); - - if (thd) - kthread_stop(thd); - console_sysfs_notify(); if (console->exit) @@ -3476,20 +3389,6 @@ static int __init printk_late_init(void) } late_initcall(printk_late_init); -static int __init printk_activate_kthreads(void) -{ - struct console *con; - - console_lock(); - printk_kthreads_available = true; - for_each_console(con) - printk_start_kthread(con); - console_unlock(); - - return 0; -} -early_initcall(printk_activate_kthreads); - #if defined CONFIG_PRINTK /* If @con is specified, only wait for that console. Otherwise wait for all. */ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) @@ -3564,180 +3463,11 @@ bool pr_flush(int timeout_ms, bool reset_on_progress) } EXPORT_SYMBOL(pr_flush); -static void __printk_fallback_preferred_direct(void) -{ - printk_prefer_direct_enter(); - pr_err("falling back to preferred direct printing\n"); - printk_kthreads_available = false; -} - -/* - * Enter preferred direct printing, but never exit. Mark console threads as - * unavailable. The system is then forever in preferred direct printing and - * any printing threads will exit. - * - * Must *not* be called under console_lock. Use - * __printk_fallback_preferred_direct() if already holding console_lock. - */ -static void printk_fallback_preferred_direct(void) -{ - console_lock(); - __printk_fallback_preferred_direct(); - console_unlock(); -} - -static bool printer_should_wake(struct console *con, u64 seq) -{ - short flags; - - if (kthread_should_stop() || !printk_kthreads_available) - return true; - - if (console_suspended) - return false; - - /* - * This is an unsafe read from con->flags, but a false positive is - * not a problem. Worst case it would allow the printer to wake up - * although it is disabled. But the printer will notice that when - * attempting to print and instead go back to sleep. - */ - flags = data_race(READ_ONCE(con->flags)); - - if (!__console_is_usable(flags)) - return false; - - return prb_read_valid(prb, seq, NULL); -} - -static int printk_kthread_func(void *data) -{ - struct console *con = data; - char *dropped_text = NULL; - char *ext_text = NULL; - bool handover; - u64 seq = 0; - char *text; - int error; - - text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL); - if (!text) { - con_printk(KERN_ERR, con, "failed to allocate text buffer\n"); - printk_fallback_preferred_direct(); - goto out; - } - - if (con->flags & CON_EXTENDED) { - ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL); - if (!ext_text) { - con_printk(KERN_ERR, con, "failed to allocate ext_text buffer\n"); - printk_fallback_preferred_direct(); - goto out; - } - } else { - dropped_text = kmalloc(DROPPED_TEXT_MAX, GFP_KERNEL); - if (!dropped_text) { - con_printk(KERN_ERR, con, "failed to allocate dropped_text buffer\n"); - printk_fallback_preferred_direct(); - goto out; - } - } - - con_printk(KERN_INFO, con, "printing thread started\n"); - - for (;;) { - /* - * Guarantee this task is visible on the waitqueue before - * checking the wake condition. - * - * The full memory barrier within set_current_state() of - * prepare_to_wait_event() pairs with the full memory barrier - * within wq_has_sleeper(). - * - * This pairs with __wake_up_klogd:A. - */ - error = wait_event_interruptible(log_wait, - printer_should_wake(con, seq)); /* LMM(printk_kthread_func:A) */ - - if (kthread_should_stop() || !printk_kthreads_available) - break; - - if (error) - continue; - - console_lock(); - - if (console_suspended) { - up_console_sem(); - continue; - } - - if (!console_is_usable(con)) { - __console_unlock(); - continue; - } - - /* - * Even though the printk kthread is always preemptible, it is - * still not allowed to call cond_resched() from within - * console drivers. The task may become non-preemptible in the - * console driver call chain. For example, vt_console_print() - * takes a spinlock and then can call into fbcon_redraw(), - * which can conditionally invoke cond_resched(). - */ - console_may_schedule = 0; - console_emit_next_record(con, text, ext_text, dropped_text, &handover); - if (handover) - continue; - - seq = con->seq; - - __console_unlock(); - } - - con_printk(KERN_INFO, con, "printing thread stopped\n"); -out: - kfree(dropped_text); - kfree(ext_text); - kfree(text); - - console_lock(); - /* - * If this kthread is being stopped by another task, con->thread will - * already be NULL. That is fine. The important thing is that it is - * NULL after the kthread exits. - */ - con->thread = NULL; - console_unlock(); - - return 0; -} - -/* Must be called under console_lock. */ -static void printk_start_kthread(struct console *con) -{ - /* - * Do not start a kthread if there is no write() callback. The - * kthreads assume the write() callback exists. - */ - if (!con->write) - return; - - con->thread = kthread_run(printk_kthread_func, con, - "pr/%s%d", con->name, con->index); - if (IS_ERR(con->thread)) { - con->thread = NULL; - con_printk(KERN_ERR, con, "unable to start printing thread\n"); - __printk_fallback_preferred_direct(); - return; - } -} - /* * Delayed printk version, for scheduler-internal messages: */ -#define PRINTK_PENDING_WAKEUP 0x01 -#define PRINTK_PENDING_DIRECT_OUTPUT 0x02 +#define PRINTK_PENDING_WAKEUP 0x01 +#define PRINTK_PENDING_OUTPUT 0x02 static DEFINE_PER_CPU(int, printk_pending); @@ -3745,14 +3475,10 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work) { int pending = this_cpu_xchg(printk_pending, 0); - if (pending & PRINTK_PENDING_DIRECT_OUTPUT) { - printk_prefer_direct_enter(); - + if (pending & PRINTK_PENDING_OUTPUT) { /* If trylock fails, someone else is doing the printing */ if (console_trylock()) console_unlock(); - - printk_prefer_direct_exit(); } if (pending & PRINTK_PENDING_WAKEUP) @@ -3777,11 +3503,10 @@ static void __wake_up_klogd(int val) * prepare_to_wait_event(), which is called after ___wait_event() adds * the waiter but before it has checked the wait condition. * - * This pairs with devkmsg_read:A, syslog_print:A, and - * printk_kthread_func:A. + * This pairs with devkmsg_read:A and syslog_print:A. */ if (wq_has_sleeper(&log_wait) || /* LMM(__wake_up_klogd:A) */ - (val & PRINTK_PENDING_DIRECT_OUTPUT)) { + (val & PRINTK_PENDING_OUTPUT)) { this_cpu_or(printk_pending, val); irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); } @@ -3799,17 +3524,7 @@ void defer_console_output(void) * New messages may have been added directly to the ringbuffer * using vprintk_store(), so wake any waiters as well. */ - int val = PRINTK_PENDING_WAKEUP; - - /* - * Make sure that some context will print the messages when direct - * printing is allowed. This happens in situations when the kthreads - * may not be as reliable or perhaps unusable. - */ - if (allow_direct_printing()) - val |= PRINTK_PENDING_DIRECT_OUTPUT; - - __wake_up_klogd(val); + __wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT); } void printk_trigger_flush(void) -- cgit v1.2.3 From 07a22b61946f0b80065b0ddcc703b715f84355f5 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Thu, 23 Jun 2022 16:51:57 +0200 Subject: Revert "printk: add functions to prefer direct printing" This reverts commit 2bb2b7b57f81255c13f4395ea911d6bdc70c9fe2. The testing of 5.19 release candidates revealed missing synchronization between early and regular console functionality. It would be possible to start the console kthreads later as a workaround. But it is clear that console lock serialized console drivers between each other. It opens a big area of possible problems that were not considered by people involved in the development and review. printk() is crucial for debugging kernel issues and console output is very important part of it. The number of consoles is huge and a proper review would take some time. As a result it need to be reverted for 5.19. Link: https://lore.kernel.org/r/YrBdjVwBOVgLfHyb@alley Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20220623145157.21938-7-pmladek@suse.com --- drivers/tty/sysrq.c | 2 -- include/linux/printk.h | 11 ----------- kernel/hung_task.c | 11 +---------- kernel/panic.c | 4 ---- kernel/printk/printk.c | 28 ---------------------------- kernel/rcu/tree_stall.h | 2 -- kernel/reboot.c | 14 +------------- kernel/watchdog.c | 4 ---- kernel/watchdog_hld.c | 4 ---- 9 files changed, 2 insertions(+), 78 deletions(-) (limited to 'kernel') diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 2884cd638d64..bbfd004449b5 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -578,7 +578,6 @@ void __handle_sysrq(int key, bool check_mask) rcu_sysrq_start(); rcu_read_lock(); - printk_prefer_direct_enter(); /* * Raise the apparent loglevel to maximum so that the sysrq header * is shown to provide the user with positive feedback. We do not @@ -620,7 +619,6 @@ void __handle_sysrq(int key, bool check_mask) pr_cont("\n"); console_loglevel = orig_log_level; } - printk_prefer_direct_exit(); rcu_read_unlock(); rcu_sysrq_end(); diff --git a/include/linux/printk.h b/include/linux/printk.h index cd26aab0ab2a..091fba7283e1 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -170,9 +170,6 @@ extern void __printk_safe_exit(void); #define printk_deferred_enter __printk_safe_enter #define printk_deferred_exit __printk_safe_exit -extern void printk_prefer_direct_enter(void); -extern void printk_prefer_direct_exit(void); - extern bool pr_flush(int timeout_ms, bool reset_on_progress); /* @@ -225,14 +222,6 @@ static inline void printk_deferred_exit(void) { } -static inline void printk_prefer_direct_enter(void) -{ -} - -static inline void printk_prefer_direct_exit(void) -{ -} - static inline bool pr_flush(int timeout_ms, bool reset_on_progress) { return true; diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 02a65d554340..52501e5f7655 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -127,8 +127,6 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) * complain: */ if (sysctl_hung_task_warnings) { - printk_prefer_direct_enter(); - if (sysctl_hung_task_warnings > 0) sysctl_hung_task_warnings--; pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n", @@ -144,8 +142,6 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) if (sysctl_hung_task_all_cpu_backtrace) hung_task_show_all_bt = true; - - printk_prefer_direct_exit(); } touch_nmi_watchdog(); @@ -208,17 +204,12 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) } unlock: rcu_read_unlock(); - if (hung_task_show_lock) { - printk_prefer_direct_enter(); + if (hung_task_show_lock) debug_show_all_locks(); - printk_prefer_direct_exit(); - } if (hung_task_show_all_bt) { hung_task_show_all_bt = false; - printk_prefer_direct_enter(); trigger_all_cpu_backtrace(); - printk_prefer_direct_exit(); } if (hung_task_call_panic) diff --git a/kernel/panic.c b/kernel/panic.c index 6737b2332275..8355b19676f8 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -579,8 +579,6 @@ void __warn(const char *file, int line, void *caller, unsigned taint, { disable_trace_on_warning(); - printk_prefer_direct_enter(); - if (file) pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS\n", raw_smp_processor_id(), current->pid, file, line, @@ -610,8 +608,6 @@ void __warn(const char *file, int line, void *caller, unsigned taint, /* Just a warning, don't kill lockdep. */ add_taint(taint, LOCKDEP_STILL_OK); - - printk_prefer_direct_exit(); } #ifndef __WARN_FLAGS diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 5a6273e56b4e..b49c6ff6dca0 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -362,34 +362,6 @@ static int console_msg_format = MSG_FORMAT_DEFAULT; static DEFINE_MUTEX(syslog_lock); #ifdef CONFIG_PRINTK -static atomic_t printk_prefer_direct = ATOMIC_INIT(0); - -/** - * printk_prefer_direct_enter - cause printk() calls to attempt direct - * printing to all enabled consoles - * - * Since it is not possible to call into the console printing code from any - * context, there is no guarantee that direct printing will occur. - * - * This globally effects all printk() callers. - * - * Context: Any context. - */ -void printk_prefer_direct_enter(void) -{ - atomic_inc(&printk_prefer_direct); -} - -/** - * printk_prefer_direct_exit - restore printk() behavior - * - * Context: Any context. - */ -void printk_prefer_direct_exit(void) -{ - WARN_ON(atomic_dec_if_positive(&printk_prefer_direct) < 0); -} - DECLARE_WAIT_QUEUE_HEAD(log_wait); /* All 3 protected by @syslog_lock. */ /* the next printk record to read by syslog(READ) or /proc/kmsg */ diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 4995c078cff9..a001e1e7a992 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -647,7 +647,6 @@ static void print_cpu_stall(unsigned long gps) * See Documentation/RCU/stallwarn.rst for info on how to debug * RCU CPU stall warnings. */ - printk_prefer_direct_enter(); trace_rcu_stall_warning(rcu_state.name, TPS("SelfDetected")); pr_err("INFO: %s self-detected stall on CPU\n", rcu_state.name); raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags); @@ -685,7 +684,6 @@ static void print_cpu_stall(unsigned long gps) */ set_tsk_need_resched(current); set_preempt_need_resched(); - printk_prefer_direct_exit(); } static void check_cpu_stall(struct rcu_data *rdp) diff --git a/kernel/reboot.c b/kernel/reboot.c index 4177645e74d6..6bcc5d6a6572 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -447,11 +447,9 @@ static int __orderly_reboot(void) ret = run_cmd(reboot_cmd); if (ret) { - printk_prefer_direct_enter(); pr_warn("Failed to start orderly reboot: forcing the issue\n"); emergency_sync(); kernel_restart(NULL); - printk_prefer_direct_exit(); } return ret; @@ -464,7 +462,6 @@ static int __orderly_poweroff(bool force) ret = run_cmd(poweroff_cmd); if (ret && force) { - printk_prefer_direct_enter(); pr_warn("Failed to start orderly shutdown: forcing the issue\n"); /* @@ -474,7 +471,6 @@ static int __orderly_poweroff(bool force) */ emergency_sync(); kernel_power_off(); - printk_prefer_direct_exit(); } return ret; @@ -532,8 +528,6 @@ EXPORT_SYMBOL_GPL(orderly_reboot); */ static void hw_failure_emergency_poweroff_func(struct work_struct *work) { - printk_prefer_direct_enter(); - /* * We have reached here after the emergency shutdown waiting period has * expired. This means orderly_poweroff has not been able to shut off @@ -550,8 +544,6 @@ static void hw_failure_emergency_poweroff_func(struct work_struct *work) */ pr_emerg("Hardware protection shutdown failed. Trying emergency restart\n"); emergency_restart(); - - printk_prefer_direct_exit(); } static DECLARE_DELAYED_WORK(hw_failure_emergency_poweroff_work, @@ -590,13 +582,11 @@ void hw_protection_shutdown(const char *reason, int ms_until_forced) { static atomic_t allow_proceed = ATOMIC_INIT(1); - printk_prefer_direct_enter(); - pr_emerg("HARDWARE PROTECTION shutdown (%s)\n", reason); /* Shutdown should be initiated only once. */ if (!atomic_dec_and_test(&allow_proceed)) - goto out; + return; /* * Queue a backup emergency shutdown in the event of @@ -604,8 +594,6 @@ void hw_protection_shutdown(const char *reason, int ms_until_forced) */ hw_failure_emergency_poweroff(ms_until_forced); orderly_poweroff(true); -out: - printk_prefer_direct_exit(); } EXPORT_SYMBOL_GPL(hw_protection_shutdown); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 40024e03d422..9166220457bc 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -424,8 +424,6 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) /* Start period for the next softlockup warning. */ update_report_ts(); - printk_prefer_direct_enter(); - pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", smp_processor_id(), duration, current->comm, task_pid_nr(current)); @@ -444,8 +442,6 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK); if (softlockup_panic) panic("softlockup: hung tasks"); - - printk_prefer_direct_exit(); } return HRTIMER_RESTART; diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 701f35f0e2d4..247bf0b1582c 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -135,8 +135,6 @@ static void watchdog_overflow_callback(struct perf_event *event, if (__this_cpu_read(hard_watchdog_warn) == true) return; - printk_prefer_direct_enter(); - pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", this_cpu); print_modules(); @@ -157,8 +155,6 @@ static void watchdog_overflow_callback(struct perf_event *event, if (hardlockup_panic) nmi_panic(regs, "Hard LOCKUP"); - printk_prefer_direct_exit(); - __this_cpu_write(hard_watchdog_warn, true); return; } -- cgit v1.2.3