From 22b886dd1018093920c4250dee2a9a3cb7cff7b8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 4 Nov 2015 12:15:33 -0500 Subject: timers: Use proper base migration in add_timer_on() Regardless of the previous CPU a timer was on, add_timer_on() currently simply sets timer->flags to the new CPU. As the caller must be seeing the timer as idle, this is locally fine, but the timer leaving the old base while unlocked can lead to race conditions as follows. Let's say timer was on cpu 0. cpu 0 cpu 1 ----------------------------------------------------------------------------- del_timer(timer) succeeds del_timer(timer) lock_timer_base(timer) locks cpu_0_base add_timer_on(timer, 1) spin_lock(&cpu_1_base->lock) timer->flags set to cpu_1_base operates on @timer operates on @timer This triggered with mod_delayed_work_on() which contains "if (del_timer()) add_timer_on()" sequence eventually leading to the following oops. BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] detach_if_pending+0x69/0x1a0 ... Workqueue: wqthrash wqthrash_workfunc [wqthrash] task: ffff8800172ca680 ti: ffff8800172d0000 task.ti: ffff8800172d0000 RIP: 0010:[] [] detach_if_pending+0x69/0x1a0 ... Call Trace: [] del_timer+0x44/0x60 [] try_to_grab_pending+0xb6/0x160 [] mod_delayed_work_on+0x33/0x80 [] wqthrash_workfunc+0x61/0x90 [wqthrash] [] process_one_work+0x1e8/0x650 [] worker_thread+0x4e/0x450 [] kthread+0xef/0x110 [] ret_from_fork+0x3f/0x70 Fix it by updating add_timer_on() to perform proper migration as __mod_timer() does. Reported-and-tested-by: Jeff Layton Signed-off-by: Tejun Heo Cc: Chris Worley Cc: bfields@fieldses.org Cc: Michael Skralivetsky Cc: Trond Myklebust Cc: Shaohua Li Cc: Jeff Layton Cc: kernel-team@fb.com Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20151029103113.2f893924@tlielax.poochiereds.net Link: http://lkml.kernel.org/r/20151104171533.GI5749@mtj.duckdns.org Signed-off-by: Thomas Gleixner --- kernel/time/timer.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 74591ba9474f..bbc5d1114583 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -977,13 +977,29 @@ EXPORT_SYMBOL(add_timer); */ void add_timer_on(struct timer_list *timer, int cpu) { - struct tvec_base *base = per_cpu_ptr(&tvec_bases, cpu); + struct tvec_base *new_base = per_cpu_ptr(&tvec_bases, cpu); + struct tvec_base *base; unsigned long flags; timer_stats_timer_set_start_info(timer); BUG_ON(timer_pending(timer) || !timer->function); - spin_lock_irqsave(&base->lock, flags); - timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu; + + /* + * If @timer was on a different CPU, it should be migrated with the + * old base locked to prevent other operations proceeding with the + * wrong base locked. See lock_timer_base(). + */ + base = lock_timer_base(timer, &flags); + if (base != new_base) { + timer->flags |= TIMER_MIGRATING; + + spin_unlock(&base->lock); + base = new_base; + spin_lock(&base->lock); + WRITE_ONCE(timer->flags, + (timer->flags & ~TIMER_BASEMASK) | cpu); + } + debug_activate(timer, timer->expires); internal_add_timer(base, timer); spin_unlock_irqrestore(&base->lock, flags); -- cgit v1.2.3 From 8b1291994d8e5e621a8af7e165b106e50d04bbf1 Mon Sep 17 00:00:00 2001 From: Jiaxing Wang Date: Fri, 6 Nov 2015 16:04:16 +0800 Subject: tracing: Make tracing work when debugfs is not configured in Currently tracing_init_dentry() returns -ENODEV when debugfs is not configured in, which causes tracefs not populated with tracing files and directories, so we will get an empty directory even after we manually mount tracefs. We can make tracing_init_dentry() return NULL if debugfs is not configured in and can manually mount tracefs. But return -ENODEV if debugfs is configured in but not initialized or failed to create automount point as that would break backward compatibility with older tools. Link: http://lkml.kernel.org/r/1446797056-11683-1-git-send-email-hello.wjx@gmail.com Signed-off-by: Jiaxing Wang Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2198a630ef58..08af79c106e1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6847,7 +6847,9 @@ struct dentry *tracing_init_dentry(void) if (tr->dir) return NULL; - if (WARN_ON(!debugfs_initialized())) + if (WARN_ON(!tracefs_initialized()) || + (IS_ENABLED(CONFIG_DEBUG_FS) && + WARN_ON(!debugfs_initialized()))) return ERR_PTR(-ENODEV); /* -- cgit v1.2.3 From 03e88ae6b369da2a26a6e09ad165e57d210789cd Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Fri, 6 Nov 2015 22:07:26 +0300 Subject: tracing: Remove unused ftrace_cpu_disabled per cpu variable Since the ring buffer is lockless, there is no need to disable ftrace on CPU. And no one doing so: after commit 68179686ac67cb ("tracing: Remove ftrace_disable/enable_cpu()") ftrace_cpu_disabled stays the same after initialization, nothing changes it. ftrace_cpu_disabled shouldn't be used by any external module since it disables only function and graph_function tracers but not any other tracer. Link: http://lkml.kernel.org/r/1446836846-22239-1-git-send-email-0x7f454c46@gmail.com Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 6 ------ kernel/trace/trace.h | 1 - kernel/trace/trace_functions_graph.c | 6 ------ 3 files changed, 13 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 08af79c106e1..b11582618991 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -100,8 +100,6 @@ static DEFINE_PER_CPU(bool, trace_cmdline_save); */ static int tracing_disabled = 1; -DEFINE_PER_CPU(int, ftrace_cpu_disabled); - cpumask_var_t __read_mostly tracing_buffer_mask; /* @@ -1775,10 +1773,6 @@ trace_function(struct trace_array *tr, struct ring_buffer_event *event; struct ftrace_entry *entry; - /* If we are reading the ring buffer, don't trace */ - if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) - return; - event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), flags, pc); if (!event) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index dd7620802e72..919d9d07686f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -667,7 +667,6 @@ extern int DYN_FTRACE_TEST_NAME2(void); extern bool ring_buffer_expanded; extern bool tracing_selftest_disabled; -DECLARE_PER_CPU(int, ftrace_cpu_disabled); #ifdef CONFIG_FTRACE_STARTUP_TEST extern int trace_selftest_startup_function(struct tracer *trace, diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 92382af7a213..a663cbb84107 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -288,9 +288,6 @@ int __trace_graph_entry(struct trace_array *tr, struct ring_buffer *buffer = tr->trace_buffer.buffer; struct ftrace_graph_ent_entry *entry; - if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) - return 0; - event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, sizeof(*entry), flags, pc); if (!event) @@ -403,9 +400,6 @@ void __trace_graph_return(struct trace_array *tr, struct ring_buffer *buffer = tr->trace_buffer.buffer; struct ftrace_graph_ret_entry *entry; - if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) - return; - event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, sizeof(*entry), flags, pc); if (!event) -- cgit v1.2.3 From 2fd59077755c44dbbd9b2fa89cf988235a3a6a2b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 4 Nov 2015 05:48:38 -0800 Subject: perf: Disable IRQs across RCU RS CS that acquires scheduler lock The perf_lock_task_context() function disables preemption across its RCU read-side critical section because that critical section acquires a scheduler lock. If there was a preemption during that RCU read-side critical section, the rcu_read_unlock() could attempt to acquire scheduler locks, resulting in deadlock. However, recent optimizations to expedited grace periods mean that IPI handlers that execute during preemptible RCU read-side critical sections can now cause the subsequent rcu_read_unlock() to acquire scheduler locks. Disabling preemption does nothiing to prevent these IPI handlers from executing, so these optimizations introduced a deadlock. In theory, this deadlock could be avoided by pulling all wakeups and printk()s out from rnp->lock critical sections, but in practice this would re-introduce some RCU CPU stall warning bugs. Given that acquiring scheduler locks entails disabling interrupts, these deadlocks can be avoided by disabling interrupts (instead of disabling preemption) across any RCU read-side critical that acquires scheduler locks and holds them across the rcu_read_unlock(). This commit therefore makes this change for perf_lock_task_context(). Reported-by: Dave Jones Reported-by: Peter Zijlstra Signed-off-by: Paul E. McKenney Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Stephane Eranian Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20151104134838.GR29027@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index ea02109aee77..f8e5c443d74e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1050,13 +1050,13 @@ retry: /* * One of the few rules of preemptible RCU is that one cannot do * rcu_read_unlock() while holding a scheduler (or nested) lock when - * part of the read side critical section was preemptible -- see + * part of the read side critical section was irqs-enabled -- see * rcu_read_unlock_special(). * * Since ctx->lock nests under rq->lock we must ensure the entire read - * side critical section is non-preemptible. + * side critical section has interrupts disabled. */ - preempt_disable(); + local_irq_save(*flags); rcu_read_lock(); ctx = rcu_dereference(task->perf_event_ctxp[ctxn]); if (ctx) { @@ -1070,21 +1070,22 @@ retry: * if so. If we locked the right context, then it * can't get swapped on us any more. */ - raw_spin_lock_irqsave(&ctx->lock, *flags); + raw_spin_lock(&ctx->lock); if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) { - raw_spin_unlock_irqrestore(&ctx->lock, *flags); + raw_spin_unlock(&ctx->lock); rcu_read_unlock(); - preempt_enable(); + local_irq_restore(*flags); goto retry; } if (!atomic_inc_not_zero(&ctx->refcount)) { - raw_spin_unlock_irqrestore(&ctx->lock, *flags); + raw_spin_unlock(&ctx->lock); ctx = NULL; } } rcu_read_unlock(); - preempt_enable(); + if (!ctx) + local_irq_restore(*flags); return ctx; } -- cgit v1.2.3 From b71b437eedaed985062492565d9d421d975ae845 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 2 Nov 2015 10:50:51 +0100 Subject: perf: Fix inherited events vs. tracepoint filters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Arnaldo reported that tracepoint filters seem to misbehave (ie. not apply) on inherited events. The fix is obvious; filters are only set on the actual (parent) event, use the normal pattern of using this parent event for filters. This is safe because each child event has a reference to it. Reported-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Signed-off-by: Peter Zijlstra (Intel) Cc: Adrian Hunter Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Frédéric Weisbecker Cc: Jiri Olsa Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Wang Nan Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20151102095051.GN17308@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/events/core.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index f8e5c443d74e..98a4b9db7f37 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6909,6 +6909,10 @@ static int perf_tp_filter_match(struct perf_event *event, { void *record = data->raw->data; + /* only top level events have filters set */ + if (event->parent) + event = event->parent; + if (likely(!event->filter) || filter_match_preds(event->filter, record)) return 1; return 0; -- cgit v1.2.3 From 25b3e5a3344e1f700c1efec5b6f0199f04707fb1 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Thu, 5 Nov 2015 15:56:22 -0500 Subject: sched/numa: Fix math underflow in task_tick_numa() The NUMA balancing code implements delays in scanning by advancing curr->node_stamp beyond curr->se.sum_exec_runtime. With unsigned math, that creates an underflow, which results in task_numa_work being queued all the time, even when we don't want to. Avoiding the math underflow makes it possible to reduce CPU overhead in the NUMA balancing code. Reported-and-tested-by: Jan Stancek Signed-off-by: Rik van Riel Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: mgorman@suse.de Link: http://lkml.kernel.org/r/1446756983-28173-2-git-send-email-riel@redhat.com Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 824aa9f501a3..f04fda8f669c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2302,7 +2302,7 @@ void task_tick_numa(struct rq *rq, struct task_struct *curr) now = curr->se.sum_exec_runtime; period = (u64)curr->numa_scan_period * NSEC_PER_MSEC; - if (now - curr->node_stamp > period) { + if (now > curr->node_stamp + period) { if (!curr->node_stamp) curr->numa_scan_period = task_scan_min(curr); curr->node_stamp += period; -- cgit v1.2.3 From f70cd6b07e629f367bb9b1ac9d0e3e669eb325c0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 28 Oct 2015 02:39:55 +0100 Subject: context_tracking: remove duplicate enabled check All calls to context_tracking_enter and context_tracking_exit are already checking context_tracking_is_enabled, except the context_tracking_user_enter and context_tracking_user_exit functions left in for the benefit of assembly calls. Pull the check up to those functions, by making them simple wrappers around the user_enter and user_exit inline functions. Cc: Frederic Weisbecker Cc: Paul McKenney Reviewed-by: Rik van Riel Tested-by: Rik van Riel Acked-by: Andy Lutomirski Signed-off-by: Paolo Bonzini --- include/linux/context_tracking.h | 4 ++-- kernel/context_tracking.c | 16 ++-------------- 2 files changed, 4 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 008fc67d0d96..6ef136ff0897 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -18,13 +18,13 @@ extern void context_tracking_user_exit(void); static inline void user_enter(void) { if (context_tracking_is_enabled()) - context_tracking_user_enter(); + context_tracking_enter(CONTEXT_USER); } static inline void user_exit(void) { if (context_tracking_is_enabled()) - context_tracking_user_exit(); + context_tracking_exit(CONTEXT_USER); } static inline enum ctx_state exception_enter(void) diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 0a495ab35bc7..6d4c6ce21275 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -62,15 +62,6 @@ void context_tracking_enter(enum ctx_state state) { unsigned long flags; - /* - * Repeat the user_enter() check here because some archs may be calling - * this from asm and if no CPU needs context tracking, they shouldn't - * go further. Repeat the check here until they support the inline static - * key check. - */ - if (!context_tracking_is_enabled()) - return; - /* * Some contexts may involve an exception occuring in an irq, * leading to that nesting: @@ -128,7 +119,7 @@ EXPORT_SYMBOL_GPL(context_tracking_enter); void context_tracking_user_enter(void) { - context_tracking_enter(CONTEXT_USER); + user_enter(); } NOKPROBE_SYMBOL(context_tracking_user_enter); @@ -148,9 +139,6 @@ void context_tracking_exit(enum ctx_state state) { unsigned long flags; - if (!context_tracking_is_enabled()) - return; - if (in_interrupt()) return; @@ -181,7 +169,7 @@ EXPORT_SYMBOL_GPL(context_tracking_exit); void context_tracking_user_exit(void) { - context_tracking_exit(CONTEXT_USER); + user_exit(); } NOKPROBE_SYMBOL(context_tracking_user_exit); -- cgit v1.2.3 From d0e536d89395ecd8ab78fe999dc4d6f5d140ce46 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 28 Oct 2015 02:39:56 +0100 Subject: context_tracking: avoid irq_save/irq_restore on guest entry and exit guest_enter and guest_exit must be called with interrupts disabled, since they take the vtime_seqlock with write_seq{lock,unlock}. Therefore, it is not necessary to check for exceptions, nor to save/restore the IRQ state, when context tracking functions are called by guest_enter and guest_exit. Split the body of context_tracking_entry and context_tracking_exit out to __-prefixed functions, and use them from KVM. Rik van Riel has measured this to speed up a tight vmentry/vmexit loop by about 2%. Cc: Andy Lutomirski Cc: Frederic Weisbecker Cc: Paul McKenney Reviewed-by: Rik van Riel Tested-by: Rik van Riel Signed-off-by: Paolo Bonzini --- include/linux/context_tracking.h | 8 +++-- kernel/context_tracking.c | 64 ++++++++++++++++++++++++---------------- 2 files changed, 44 insertions(+), 28 deletions(-) (limited to 'kernel') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 6ef136ff0897..68b575afe5f5 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -10,6 +10,10 @@ #ifdef CONFIG_CONTEXT_TRACKING extern void context_tracking_cpu_set(int cpu); +/* Called with interrupts disabled. */ +extern void __context_tracking_enter(enum ctx_state state); +extern void __context_tracking_exit(enum ctx_state state); + extern void context_tracking_enter(enum ctx_state state); extern void context_tracking_exit(enum ctx_state state); extern void context_tracking_user_enter(void); @@ -88,13 +92,13 @@ static inline void guest_enter(void) current->flags |= PF_VCPU; if (context_tracking_is_enabled()) - context_tracking_enter(CONTEXT_GUEST); + __context_tracking_enter(CONTEXT_GUEST); } static inline void guest_exit(void) { if (context_tracking_is_enabled()) - context_tracking_exit(CONTEXT_GUEST); + __context_tracking_exit(CONTEXT_GUEST); if (vtime_accounting_enabled()) vtime_guest_exit(current); diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 6d4c6ce21275..d8560ee3bab7 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -58,27 +58,13 @@ static void context_tracking_recursion_exit(void) * instructions to execute won't use any RCU read side critical section * because this function sets RCU in extended quiescent state. */ -void context_tracking_enter(enum ctx_state state) +void __context_tracking_enter(enum ctx_state state) { - unsigned long flags; - - /* - * Some contexts may involve an exception occuring in an irq, - * leading to that nesting: - * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() - * This would mess up the dyntick_nesting count though. And rcu_irq_*() - * helpers are enough to protect RCU uses inside the exception. So - * just return immediately if we detect we are in an IRQ. - */ - if (in_interrupt()) - return; - /* Kernel threads aren't supposed to go to userspace */ WARN_ON_ONCE(!current->mm); - local_irq_save(flags); if (!context_tracking_recursion_enter()) - goto out_irq_restore; + return; if ( __this_cpu_read(context_tracking.state) != state) { if (__this_cpu_read(context_tracking.active)) { @@ -111,7 +97,27 @@ void context_tracking_enter(enum ctx_state state) __this_cpu_write(context_tracking.state, state); } context_tracking_recursion_exit(); -out_irq_restore: +} +NOKPROBE_SYMBOL(__context_tracking_enter); +EXPORT_SYMBOL_GPL(__context_tracking_enter); + +void context_tracking_enter(enum ctx_state state) +{ + unsigned long flags; + + /* + * Some contexts may involve an exception occuring in an irq, + * leading to that nesting: + * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() + * This would mess up the dyntick_nesting count though. And rcu_irq_*() + * helpers are enough to protect RCU uses inside the exception. So + * just return immediately if we detect we are in an IRQ. + */ + if (in_interrupt()) + return; + + local_irq_save(flags); + __context_tracking_enter(state); local_irq_restore(flags); } NOKPROBE_SYMBOL(context_tracking_enter); @@ -135,16 +141,10 @@ NOKPROBE_SYMBOL(context_tracking_user_enter); * This call supports re-entrancy. This way it can be called from any exception * handler without needing to know if we came from userspace or not. */ -void context_tracking_exit(enum ctx_state state) +void __context_tracking_exit(enum ctx_state state) { - unsigned long flags; - - if (in_interrupt()) - return; - - local_irq_save(flags); if (!context_tracking_recursion_enter()) - goto out_irq_restore; + return; if (__this_cpu_read(context_tracking.state) == state) { if (__this_cpu_read(context_tracking.active)) { @@ -161,7 +161,19 @@ void context_tracking_exit(enum ctx_state state) __this_cpu_write(context_tracking.state, CONTEXT_KERNEL); } context_tracking_recursion_exit(); -out_irq_restore: +} +NOKPROBE_SYMBOL(__context_tracking_exit); +EXPORT_SYMBOL_GPL(__context_tracking_exit); + +void context_tracking_exit(enum ctx_state state) +{ + unsigned long flags; + + if (in_interrupt()) + return; + + local_irq_save(flags); + __context_tracking_exit(state); local_irq_restore(flags); } NOKPROBE_SYMBOL(context_tracking_exit); -- cgit v1.2.3 From 4717f133736dec10605da9e29e707144c8d486df Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Tue, 10 Nov 2015 11:58:12 +0200 Subject: genirq/PM: Restore system wake up from chained interrupts Commit e509bd7da149 ("genirq: Allow migration of chained interrupts by installing default action") breaks PCS wake up IRQ behaviour on TI OMAP based platforms (dra7-evm). TI OMAP IRQ wake up configuration: GIC-irqchip->PCM_IRQ |- omap_prcm_register_chain_handler |- PRCM-irqchip -> PRCM_IO_IRQ |- pcs_irq_chain_handler |- pinctrl-irqchip -> PCS_uart1_wakeup_irq This happens because IRQ PM code (irq/pm.c) is expected to ignore chained interrupts by default: static bool suspend_device_irq(struct irq_desc *desc) { if (!desc->action || desc->no_suspend_depth) return false; - it's expected !desc->action = true for chained interrupts; but, after above change, all chained interrupt descriptors will have default action handler installed - chained_action. As result, chained interrupts will be silently disabled during system suspend. Hence, fix it by introducing helper function irq_desc_is_chained() and use it in suspend_device_irq() for chained interrupts identification and skip them, once detected. Fixes: e509bd7da149 ("genirq: Allow migration of chained interrupts..") Signed-off-by: Grygorii Strashko Reviewed-by: Mika Westerberg Cc: Tony Lindgren Cc: Cc: Cc: Tony Lindgren Link: http://lkml.kernel.org/r/1447149492-20699-1-git-send-email-grygorii.strashko@ti.com Signed-off-by: Thomas Gleixner --- kernel/irq/internals.h | 5 +++++ kernel/irq/pm.c | 3 ++- kernel/irq/proc.c | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 05c2188271b8..fcab63c66905 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -199,6 +199,11 @@ static inline int irq_desc_get_node(struct irq_desc *desc) return irq_common_data_get_node(&desc->irq_common_data); } +static inline int irq_desc_is_chained(struct irq_desc *desc) +{ + return (desc->action && desc->action == &chained_action); +} + #ifdef CONFIG_PM_SLEEP bool irq_pm_check_wakeup(struct irq_desc *desc); void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action); diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index 21c62617a35a..84ab239a00e2 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c @@ -70,7 +70,8 @@ void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action) static bool suspend_device_irq(struct irq_desc *desc) { - if (!desc->action || desc->no_suspend_depth) + if (!desc->action || irq_desc_is_chained(desc) || + desc->no_suspend_depth) return false; if (irqd_is_wakeup_set(&desc->irq_data)) { diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index a916cf144b65..a2c02fd5d6d0 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -475,7 +475,7 @@ int show_interrupts(struct seq_file *p, void *v) for_each_online_cpu(j) any_count |= kstat_irqs_cpu(i, j); action = desc->action; - if ((!action || action == &chained_action) && !any_count) + if ((!action || irq_desc_is_chained(desc)) && !any_count) goto out; seq_printf(p, "%*d: ", prec, i); -- cgit v1.2.3 From e428abbbf616cd8fdd1162e4a624ad1d47b47544 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Tue, 10 Nov 2015 05:15:15 +0800 Subject: tracing: #ifdef out uses of max trace when CONFIG_TRACER_MAX_TRACE is not set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tracing_max_lat_fops is used only when TRACER_MAX_TRACE enabled, so also swith the related code. The related warning with defconfig under x86_64: CC kernel/trace/trace.o kernel/trace/trace.c:5466:37: warning: ‘tracing_max_lat_fops’ defined but not used [-Wunused-const-variable] static const struct file_operations tracing_max_lat_fops = { Signed-off-by: Chen Gang Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b11582618991..87fb9801bd9e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4548,6 +4548,8 @@ out: return ret; } +#ifdef CONFIG_TRACER_MAX_TRACE + static ssize_t tracing_max_lat_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) @@ -4562,6 +4564,8 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf, return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos); } +#endif + static int tracing_open_pipe(struct inode *inode, struct file *filp) { struct trace_array *tr = inode->i_private; @@ -5463,12 +5467,14 @@ static const struct file_operations tracing_thresh_fops = { .llseek = generic_file_llseek, }; +#ifdef CONFIG_TRACER_MAX_TRACE static const struct file_operations tracing_max_lat_fops = { .open = tracing_open_generic, .read = tracing_max_lat_read, .write = tracing_max_lat_write, .llseek = generic_file_llseek, }; +#endif static const struct file_operations set_tracer_fops = { .open = tracing_open_generic, -- cgit v1.2.3 From f99bf205dab026ef434520198af2fcb7dae0efdb Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 19 Nov 2015 11:56:22 +0100 Subject: bpf: add show_fdinfo handler for maps Add a handler for show_fdinfo() to be used by the anon-inodes backend for eBPF maps, and dump the map specification there. Not only useful for admins, but also it provides a minimal way to compare specs from ELF vs pinned object. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- kernel/bpf/syscall.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0d3313d02a7e..6d1407bc1531 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -93,6 +93,23 @@ void bpf_map_put(struct bpf_map *map) } } +#ifdef CONFIG_PROC_FS +static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) +{ + const struct bpf_map *map = filp->private_data; + + seq_printf(m, + "map_type:\t%u\n" + "key_size:\t%u\n" + "value_size:\t%u\n" + "max_entries:\t%u\n", + map->map_type, + map->key_size, + map->value_size, + map->max_entries); +} +#endif + static int bpf_map_release(struct inode *inode, struct file *filp) { struct bpf_map *map = filp->private_data; @@ -108,7 +125,10 @@ static int bpf_map_release(struct inode *inode, struct file *filp) } static const struct file_operations bpf_map_fops = { - .release = bpf_map_release, +#ifdef CONFIG_PROC_FS + .show_fdinfo = bpf_map_show_fdinfo, +#endif + .release = bpf_map_release, }; int bpf_map_new_fd(struct bpf_map *map) -- cgit v1.2.3