From 22b886dd1018093920c4250dee2a9a3cb7cff7b8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 4 Nov 2015 12:15:33 -0500
Subject: timers: Use proper base migration in add_timer_on()

Regardless of the previous CPU a timer was on, add_timer_on()
currently simply sets timer->flags to the new CPU.  As the caller must
be seeing the timer as idle, this is locally fine, but the timer
leaving the old base while unlocked can lead to race conditions as
follows.

Let's say timer was on cpu 0.

  cpu 0					cpu 1
  -----------------------------------------------------------------------------
  del_timer(timer) succeeds
					del_timer(timer)
					  lock_timer_base(timer) locks cpu_0_base
  add_timer_on(timer, 1)
    spin_lock(&cpu_1_base->lock)
    timer->flags set to cpu_1_base
    operates on @timer			  operates on @timer

This triggered with mod_delayed_work_on() which contains
"if (del_timer()) add_timer_on()" sequence eventually leading to the
following oops.

  BUG: unable to handle kernel NULL pointer dereference at           (null)
  IP: [<ffffffff810ca6e9>] detach_if_pending+0x69/0x1a0
  ...
  Workqueue: wqthrash wqthrash_workfunc [wqthrash]
  task: ffff8800172ca680 ti: ffff8800172d0000 task.ti: ffff8800172d0000
  RIP: 0010:[<ffffffff810ca6e9>]  [<ffffffff810ca6e9>] detach_if_pending+0x69/0x1a0
  ...
  Call Trace:
   [<ffffffff810cb0b4>] del_timer+0x44/0x60
   [<ffffffff8106e836>] try_to_grab_pending+0xb6/0x160
   [<ffffffff8106e913>] mod_delayed_work_on+0x33/0x80
   [<ffffffffa0000081>] wqthrash_workfunc+0x61/0x90 [wqthrash]
   [<ffffffff8106dba8>] process_one_work+0x1e8/0x650
   [<ffffffff8106e05e>] worker_thread+0x4e/0x450
   [<ffffffff810746af>] kthread+0xef/0x110
   [<ffffffff8185980f>] ret_from_fork+0x3f/0x70

Fix it by updating add_timer_on() to perform proper migration as
__mod_timer() does.

Reported-and-tested-by: Jeff Layton <jlayton@poochiereds.net>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Chris Worley <chris.worley@primarydata.com>
Cc: bfields@fieldses.org
Cc: Michael Skralivetsky <michael.skralivetsky@primarydata.com>
Cc: Trond Myklebust <trond.myklebust@primarydata.com>
Cc: Shaohua Li <shli@fb.com>
Cc: Jeff Layton <jlayton@poochiereds.net>
Cc: kernel-team@fb.com
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20151029103113.2f893924@tlielax.poochiereds.net
Link: http://lkml.kernel.org/r/20151104171533.GI5749@mtj.duckdns.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/timer.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

(limited to 'kernel')
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 74591ba9474f..bbc5d1114583 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -977,13 +977,29 @@ EXPORT_SYMBOL(add_timer);
  */
 void add_timer_on(struct timer_list *timer, int cpu)
 {
-	struct tvec_base *base = per_cpu_ptr(&tvec_bases, cpu);
+	struct tvec_base *new_base = per_cpu_ptr(&tvec_bases, cpu);
+	struct tvec_base *base;
 	unsigned long flags;
 
 	timer_stats_timer_set_start_info(timer);
 	BUG_ON(timer_pending(timer) || !timer->function);
-	spin_lock_irqsave(&base->lock, flags);
-	timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
+
+	/*
+	 * If @timer was on a different CPU, it should be migrated with the
+	 * old base locked to prevent other operations proceeding with the
+	 * wrong base locked.  See lock_timer_base().
+	 */
+	base = lock_timer_base(timer, &flags);
+	if (base != new_base) {
+		timer->flags |= TIMER_MIGRATING;
+
+		spin_unlock(&base->lock);
+		base = new_base;
+		spin_lock(&base->lock);
+		WRITE_ONCE(timer->flags,
+			   (timer->flags & ~TIMER_BASEMASK) | cpu);
+	}
+
 	debug_activate(timer, timer->expires);
 	internal_add_timer(base, timer);
 	spin_unlock_irqrestore(&base->lock, flags);
-- 
cgit v1.2.3


From 8b1291994d8e5e621a8af7e165b106e50d04bbf1 Mon Sep 17 00:00:00 2001
From: Jiaxing Wang <hello.wjx@gmail.com>
Date: Fri, 6 Nov 2015 16:04:16 +0800
Subject: tracing: Make tracing work when debugfs is not configured in

Currently tracing_init_dentry() returns -ENODEV when debugfs is not
configured in, which causes tracefs not populated with tracing files and
directories, so we will get an empty directory even after we manually
mount tracefs.

We can make tracing_init_dentry() return NULL if debugfs is not
configured in and can manually mount tracefs. But return -ENODEV
if debugfs is configured in but not initialized or failed to create
automount point as that would break backward compatibility with older
tools.

Link: http://lkml.kernel.org/r/1446797056-11683-1-git-send-email-hello.wjx@gmail.com

Signed-off-by: Jiaxing Wang <hello.wjx@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2198a630ef58..08af79c106e1 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6847,7 +6847,9 @@ struct dentry *tracing_init_dentry(void)
 	if (tr->dir)
 		return NULL;
 
-	if (WARN_ON(!debugfs_initialized()))
+	if (WARN_ON(!tracefs_initialized()) ||
+		(IS_ENABLED(CONFIG_DEBUG_FS) &&
+		 WARN_ON(!debugfs_initialized())))
 		return ERR_PTR(-ENODEV);
 
 	/*
-- 
cgit v1.2.3


From 03e88ae6b369da2a26a6e09ad165e57d210789cd Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <0x7f454c46@gmail.com>
Date: Fri, 6 Nov 2015 22:07:26 +0300
Subject: tracing: Remove unused ftrace_cpu_disabled per cpu variable

Since the ring buffer is lockless, there is no need to disable ftrace on
CPU. And no one doing so: after commit 68179686ac67cb ("tracing: Remove
ftrace_disable/enable_cpu()") ftrace_cpu_disabled stays the same after
initialization, nothing changes it.
ftrace_cpu_disabled shouldn't be used by any external module since it
disables only function and graph_function tracers but not any other
tracer.

Link: http://lkml.kernel.org/r/1446836846-22239-1-git-send-email-0x7f454c46@gmail.com

Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c                 | 6 ------
 kernel/trace/trace.h                 | 1 -
 kernel/trace/trace_functions_graph.c | 6 ------
 3 files changed, 13 deletions(-)

(limited to 'kernel')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 08af79c106e1..b11582618991 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -100,8 +100,6 @@ static DEFINE_PER_CPU(bool, trace_cmdline_save);
  */
 static int tracing_disabled = 1;
 
-DEFINE_PER_CPU(int, ftrace_cpu_disabled);
-
 cpumask_var_t __read_mostly	tracing_buffer_mask;
 
 /*
@@ -1775,10 +1773,6 @@ trace_function(struct trace_array *tr,
 	struct ring_buffer_event *event;
 	struct ftrace_entry *entry;
 
-	/* If we are reading the ring buffer, don't trace */
-	if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
-		return;
-
 	event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
 					  flags, pc);
 	if (!event)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index dd7620802e72..919d9d07686f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -667,7 +667,6 @@ extern int DYN_FTRACE_TEST_NAME2(void);
 
 extern bool ring_buffer_expanded;
 extern bool tracing_selftest_disabled;
-DECLARE_PER_CPU(int, ftrace_cpu_disabled);
 
 #ifdef CONFIG_FTRACE_STARTUP_TEST
 extern int trace_selftest_startup_function(struct tracer *trace,
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 92382af7a213..a663cbb84107 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -288,9 +288,6 @@ int __trace_graph_entry(struct trace_array *tr,
 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
 	struct ftrace_graph_ent_entry *entry;
 
-	if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
-		return 0;
-
 	event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
 					  sizeof(*entry), flags, pc);
 	if (!event)
@@ -403,9 +400,6 @@ void __trace_graph_return(struct trace_array *tr,
 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
 	struct ftrace_graph_ret_entry *entry;
 
-	if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
-		return;
-
 	event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
 					  sizeof(*entry), flags, pc);
 	if (!event)
-- 
cgit v1.2.3


From 2fd59077755c44dbbd9b2fa89cf988235a3a6a2b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 4 Nov 2015 05:48:38 -0800
Subject: perf: Disable IRQs across RCU RS CS that acquires scheduler lock

The perf_lock_task_context() function disables preemption across its
RCU read-side critical section because that critical section acquires
a scheduler lock.  If there was a preemption during that RCU read-side
critical section, the rcu_read_unlock() could attempt to acquire scheduler
locks, resulting in deadlock.

However, recent optimizations to expedited grace periods mean that IPI
handlers that execute during preemptible RCU read-side critical sections
can now cause the subsequent rcu_read_unlock() to acquire scheduler locks.
Disabling preemption does nothiing to prevent these IPI handlers from
executing, so these optimizations introduced a deadlock.  In theory,
this deadlock could be avoided by pulling all wakeups and printk()s out
from rnp->lock critical sections, but in practice this would re-introduce
some RCU CPU stall warning bugs.

Given that acquiring scheduler locks entails disabling interrupts, these
deadlocks can be avoided by disabling interrupts (instead of disabling
preemption) across any RCU read-side critical that acquires scheduler
locks and holds them across the rcu_read_unlock().  This commit therefore
makes this change for perf_lock_task_context().

Reported-by: Dave Jones <davej@codemonkey.org.uk>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Stephane Eranian <eranian@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20151104134838.GR29027@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'kernel')

diff --git a/kernel/events/core.c b/kernel/events/core.c
index ea02109aee77..f8e5c443d74e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1050,13 +1050,13 @@ retry:
 	/*
 	 * One of the few rules of preemptible RCU is that one cannot do
 	 * rcu_read_unlock() while holding a scheduler (or nested) lock when
-	 * part of the read side critical section was preemptible -- see
+	 * part of the read side critical section was irqs-enabled -- see
 	 * rcu_read_unlock_special().
 	 *
 	 * Since ctx->lock nests under rq->lock we must ensure the entire read
-	 * side critical section is non-preemptible.
+	 * side critical section has interrupts disabled.
 	 */
-	preempt_disable();
+	local_irq_save(*flags);
 	rcu_read_lock();
 	ctx = rcu_dereference(task->perf_event_ctxp[ctxn]);
 	if (ctx) {
@@ -1070,21 +1070,22 @@ retry:
 		 * if so.  If we locked the right context, then it
 		 * can't get swapped on us any more.
 		 */
-		raw_spin_lock_irqsave(&ctx->lock, *flags);
+		raw_spin_lock(&ctx->lock);
 		if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) {
-			raw_spin_unlock_irqrestore(&ctx->lock, *flags);
+			raw_spin_unlock(&ctx->lock);
 			rcu_read_unlock();
-			preempt_enable();
+			local_irq_restore(*flags);
 			goto retry;
 		}
 
 		if (!atomic_inc_not_zero(&ctx->refcount)) {
-			raw_spin_unlock_irqrestore(&ctx->lock, *flags);
+			raw_spin_unlock(&ctx->lock);
 			ctx = NULL;
 		}
 	}
 	rcu_read_unlock();
-	preempt_enable();
+	if (!ctx)
+		local_irq_restore(*flags);
 	return ctx;
 }
 
-- 
cgit v1.2.3


From b71b437eedaed985062492565d9d421d975ae845 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 2 Nov 2015 10:50:51 +0100
Subject: perf: Fix inherited events vs. tracepoint filters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Arnaldo reported that tracepoint filters seem to misbehave (ie. not
apply) on inherited events.

The fix is obvious; filters are only set on the actual (parent)
event, use the normal pattern of using this parent event for filters.
This is safe because each child event has a reference to it.

Reported-by: Arnaldo Carvalho de Melo <acme@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20151102095051.GN17308@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'kernel')

diff --git a/kernel/events/core.c b/kernel/events/core.c
index f8e5c443d74e..98a4b9db7f37 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6909,6 +6909,10 @@ static int perf_tp_filter_match(struct perf_event *event,
 {
 	void *record = data->raw->data;
 
+	/* only top level events have filters set */
+	if (event->parent)
+		event = event->parent;
+
 	if (likely(!event->filter) || filter_match_preds(event->filter, record))
 		return 1;
 	return 0;
-- 
cgit v1.2.3


From 25b3e5a3344e1f700c1efec5b6f0199f04707fb1 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Thu, 5 Nov 2015 15:56:22 -0500
Subject: sched/numa: Fix math underflow in task_tick_numa()

The NUMA balancing code implements delays in scanning by
advancing curr->node_stamp beyond curr->se.sum_exec_runtime.

With unsigned math, that creates an underflow, which results
in task_numa_work being queued all the time, even when we
don't want to.

Avoiding the math underflow makes it possible to reduce CPU
overhead in the NUMA balancing code.

Reported-and-tested-by: Jan Stancek <jstancek@redhat.com>
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: mgorman@suse.de
Link: http://lkml.kernel.org/r/1446756983-28173-2-git-send-email-riel@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 824aa9f501a3..f04fda8f669c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2302,7 +2302,7 @@ void task_tick_numa(struct rq *rq, struct task_struct *curr)
 	now = curr->se.sum_exec_runtime;
 	period = (u64)curr->numa_scan_period * NSEC_PER_MSEC;
 
-	if (now - curr->node_stamp > period) {
+	if (now > curr->node_stamp + period) {
 		if (!curr->node_stamp)
 			curr->numa_scan_period = task_scan_min(curr);
 		curr->node_stamp += period;
-- 
cgit v1.2.3


From f70cd6b07e629f367bb9b1ac9d0e3e669eb325c0 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 28 Oct 2015 02:39:55 +0100
Subject: context_tracking: remove duplicate enabled check

All calls to context_tracking_enter and context_tracking_exit
are already checking context_tracking_is_enabled, except the
context_tracking_user_enter and context_tracking_user_exit
functions left in for the benefit of assembly calls.

Pull the check up to those functions, by making them simple
wrappers around the user_enter and user_exit inline functions.

Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Tested-by: Rik van Riel <riel@redhat.com>
Acked-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/context_tracking.h |  4 ++--
 kernel/context_tracking.c        | 16 ++--------------
 2 files changed, 4 insertions(+), 16 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index 008fc67d0d96..6ef136ff0897 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -18,13 +18,13 @@ extern void context_tracking_user_exit(void);
 static inline void user_enter(void)
 {
 	if (context_tracking_is_enabled())
-		context_tracking_user_enter();
+		context_tracking_enter(CONTEXT_USER);
 
 }
 static inline void user_exit(void)
 {
 	if (context_tracking_is_enabled())
-		context_tracking_user_exit();
+		context_tracking_exit(CONTEXT_USER);
 }
 
 static inline enum ctx_state exception_enter(void)
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 0a495ab35bc7..6d4c6ce21275 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -62,15 +62,6 @@ void context_tracking_enter(enum ctx_state state)
 {
 	unsigned long flags;
 
-	/*
-	 * Repeat the user_enter() check here because some archs may be calling
-	 * this from asm and if no CPU needs context tracking, they shouldn't
-	 * go further. Repeat the check here until they support the inline static
-	 * key check.
-	 */
-	if (!context_tracking_is_enabled())
-		return;
-
 	/*
 	 * Some contexts may involve an exception occuring in an irq,
 	 * leading to that nesting:
@@ -128,7 +119,7 @@ EXPORT_SYMBOL_GPL(context_tracking_enter);
 
 void context_tracking_user_enter(void)
 {
-	context_tracking_enter(CONTEXT_USER);
+	user_enter();
 }
 NOKPROBE_SYMBOL(context_tracking_user_enter);
 
@@ -148,9 +139,6 @@ void context_tracking_exit(enum ctx_state state)
 {
 	unsigned long flags;
 
-	if (!context_tracking_is_enabled())
-		return;
-
 	if (in_interrupt())
 		return;
 
@@ -181,7 +169,7 @@ EXPORT_SYMBOL_GPL(context_tracking_exit);
 
 void context_tracking_user_exit(void)
 {
-	context_tracking_exit(CONTEXT_USER);
+	user_exit();
 }
 NOKPROBE_SYMBOL(context_tracking_user_exit);
 
-- 
cgit v1.2.3


From d0e536d89395ecd8ab78fe999dc4d6f5d140ce46 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 28 Oct 2015 02:39:56 +0100
Subject: context_tracking: avoid irq_save/irq_restore on guest entry and exit

guest_enter and guest_exit must be called with interrupts disabled,
since they take the vtime_seqlock with write_seq{lock,unlock}.
Therefore, it is not necessary to check for exceptions, nor to
save/restore the IRQ state, when context tracking functions are
called by guest_enter and guest_exit.

Split the body of context_tracking_entry and context_tracking_exit
out to __-prefixed functions, and use them from KVM.

Rik van Riel has measured this to speed up a tight vmentry/vmexit
loop by about 2%.

Cc: Andy Lutomirski <luto@kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Tested-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/context_tracking.h |  8 +++--
 kernel/context_tracking.c        | 64 ++++++++++++++++++++++++----------------
 2 files changed, 44 insertions(+), 28 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index 6ef136ff0897..68b575afe5f5 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -10,6 +10,10 @@
 #ifdef CONFIG_CONTEXT_TRACKING
 extern void context_tracking_cpu_set(int cpu);
 
+/* Called with interrupts disabled.  */
+extern void __context_tracking_enter(enum ctx_state state);
+extern void __context_tracking_exit(enum ctx_state state);
+
 extern void context_tracking_enter(enum ctx_state state);
 extern void context_tracking_exit(enum ctx_state state);
 extern void context_tracking_user_enter(void);
@@ -88,13 +92,13 @@ static inline void guest_enter(void)
 		current->flags |= PF_VCPU;
 
 	if (context_tracking_is_enabled())
-		context_tracking_enter(CONTEXT_GUEST);
+		__context_tracking_enter(CONTEXT_GUEST);
 }
 
 static inline void guest_exit(void)
 {
 	if (context_tracking_is_enabled())
-		context_tracking_exit(CONTEXT_GUEST);
+		__context_tracking_exit(CONTEXT_GUEST);
 
 	if (vtime_accounting_enabled())
 		vtime_guest_exit(current);
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 6d4c6ce21275..d8560ee3bab7 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -58,27 +58,13 @@ static void context_tracking_recursion_exit(void)
  * instructions to execute won't use any RCU read side critical section
  * because this function sets RCU in extended quiescent state.
  */
-void context_tracking_enter(enum ctx_state state)
+void __context_tracking_enter(enum ctx_state state)
 {
-	unsigned long flags;
-
-	/*
-	 * Some contexts may involve an exception occuring in an irq,
-	 * leading to that nesting:
-	 * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
-	 * This would mess up the dyntick_nesting count though. And rcu_irq_*()
-	 * helpers are enough to protect RCU uses inside the exception. So
-	 * just return immediately if we detect we are in an IRQ.
-	 */
-	if (in_interrupt())
-		return;
-
 	/* Kernel threads aren't supposed to go to userspace */
 	WARN_ON_ONCE(!current->mm);
 
-	local_irq_save(flags);
 	if (!context_tracking_recursion_enter())
-		goto out_irq_restore;
+		return;
 
 	if ( __this_cpu_read(context_tracking.state) != state) {
 		if (__this_cpu_read(context_tracking.active)) {
@@ -111,7 +97,27 @@ void context_tracking_enter(enum ctx_state state)
 		__this_cpu_write(context_tracking.state, state);
 	}
 	context_tracking_recursion_exit();
-out_irq_restore:
+}
+NOKPROBE_SYMBOL(__context_tracking_enter);
+EXPORT_SYMBOL_GPL(__context_tracking_enter);
+
+void context_tracking_enter(enum ctx_state state)
+{
+	unsigned long flags;
+
+	/*
+	 * Some contexts may involve an exception occuring in an irq,
+	 * leading to that nesting:
+	 * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
+	 * This would mess up the dyntick_nesting count though. And rcu_irq_*()
+	 * helpers are enough to protect RCU uses inside the exception. So
+	 * just return immediately if we detect we are in an IRQ.
+	 */
+	if (in_interrupt())
+		return;
+
+	local_irq_save(flags);
+	__context_tracking_enter(state);
 	local_irq_restore(flags);
 }
 NOKPROBE_SYMBOL(context_tracking_enter);
@@ -135,16 +141,10 @@ NOKPROBE_SYMBOL(context_tracking_user_enter);
  * This call supports re-entrancy. This way it can be called from any exception
  * handler without needing to know if we came from userspace or not.
  */
-void context_tracking_exit(enum ctx_state state)
+void __context_tracking_exit(enum ctx_state state)
 {
-	unsigned long flags;
-
-	if (in_interrupt())
-		return;
-
-	local_irq_save(flags);
 	if (!context_tracking_recursion_enter())
-		goto out_irq_restore;
+		return;
 
 	if (__this_cpu_read(context_tracking.state) == state) {
 		if (__this_cpu_read(context_tracking.active)) {
@@ -161,7 +161,19 @@ void context_tracking_exit(enum ctx_state state)
 		__this_cpu_write(context_tracking.state, CONTEXT_KERNEL);
 	}
 	context_tracking_recursion_exit();
-out_irq_restore:
+}
+NOKPROBE_SYMBOL(__context_tracking_exit);
+EXPORT_SYMBOL_GPL(__context_tracking_exit);
+
+void context_tracking_exit(enum ctx_state state)
+{
+	unsigned long flags;
+
+	if (in_interrupt())
+		return;
+
+	local_irq_save(flags);
+	__context_tracking_exit(state);
 	local_irq_restore(flags);
 }
 NOKPROBE_SYMBOL(context_tracking_exit);
-- 
cgit v1.2.3


From 4717f133736dec10605da9e29e707144c8d486df Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Tue, 10 Nov 2015 11:58:12 +0200
Subject: genirq/PM: Restore system wake up from chained interrupts

Commit e509bd7da149 ("genirq: Allow migration of chained interrupts
by installing default action") breaks PCS wake up IRQ behaviour on
TI OMAP based platforms (dra7-evm).

TI OMAP IRQ wake up configuration:
GIC-irqchip->PCM_IRQ
  |- omap_prcm_register_chain_handler
     |- PRCM-irqchip -> PRCM_IO_IRQ
        |- pcs_irq_chain_handler
           |- pinctrl-irqchip -> PCS_uart1_wakeup_irq

This happens because IRQ PM code (irq/pm.c) is expected to ignore
chained interrupts by default:
  static bool suspend_device_irq(struct irq_desc *desc)
  {
	if (!desc->action || desc->no_suspend_depth)
		return false;
 - it's expected !desc->action = true for chained interrupts;

but, after above change, all chained interrupt descriptors will
have default action handler installed - chained_action.
As result, chained interrupts will be silently disabled during system
suspend.

Hence, fix it by introducing helper function irq_desc_is_chained() and
use it in suspend_device_irq() for chained interrupts identification
and skip them, once detected.

Fixes: e509bd7da149 ("genirq: Allow migration of chained interrupts..")
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Tony Lindgren <tony@atomide.com>
Cc: <nsekhar@ti.com>
Cc: <linux-arm-kernel@lists.infradead.org>
Cc: Tony Lindgren <tony@atomide.com>
Link: http://lkml.kernel.org/r/1447149492-20699-1-git-send-email-grygorii.strashko@ti.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/irq/internals.h | 5 +++++
 kernel/irq/pm.c        | 3 ++-
 kernel/irq/proc.c      | 2 +-
 3 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 05c2188271b8..fcab63c66905 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -199,6 +199,11 @@ static inline int irq_desc_get_node(struct irq_desc *desc)
 	return irq_common_data_get_node(&desc->irq_common_data);
 }
 
+static inline int irq_desc_is_chained(struct irq_desc *desc)
+{
+	return (desc->action && desc->action == &chained_action);
+}
+
 #ifdef CONFIG_PM_SLEEP
 bool irq_pm_check_wakeup(struct irq_desc *desc);
 void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action);
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index 21c62617a35a..84ab239a00e2 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -70,7 +70,8 @@ void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action)
 
 static bool suspend_device_irq(struct irq_desc *desc)
 {
-	if (!desc->action || desc->no_suspend_depth)
+	if (!desc->action || irq_desc_is_chained(desc) ||
+	    desc->no_suspend_depth)
 		return false;
 
 	if (irqd_is_wakeup_set(&desc->irq_data)) {
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index a916cf144b65..a2c02fd5d6d0 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -475,7 +475,7 @@ int show_interrupts(struct seq_file *p, void *v)
 	for_each_online_cpu(j)
 		any_count |= kstat_irqs_cpu(i, j);
 	action = desc->action;
-	if ((!action || action == &chained_action) && !any_count)
+	if ((!action || irq_desc_is_chained(desc)) && !any_count)
 		goto out;
 
 	seq_printf(p, "%*d: ", prec, i);
-- 
cgit v1.2.3


From e428abbbf616cd8fdd1162e4a624ad1d47b47544 Mon Sep 17 00:00:00 2001
From: Chen Gang <gang.chen.5i5j@gmail.com>
Date: Tue, 10 Nov 2015 05:15:15 +0800
Subject: tracing: #ifdef out uses of max trace when CONFIG_TRACER_MAX_TRACE is
 not set
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

tracing_max_lat_fops is used only when TRACER_MAX_TRACE enabled, so also
swith the related code. The related warning with defconfig under x86_64:

    CC      kernel/trace/trace.o
  kernel/trace/trace.c:5466:37: warning: ‘tracing_max_lat_fops’ defined but not used [-Wunused-const-variable]
   static const struct file_operations tracing_max_lat_fops = {

Signed-off-by: Chen Gang <gang.chen.5i5j@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'kernel')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index b11582618991..87fb9801bd9e 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4548,6 +4548,8 @@ out:
 	return ret;
 }
 
+#ifdef CONFIG_TRACER_MAX_TRACE
+
 static ssize_t
 tracing_max_lat_read(struct file *filp, char __user *ubuf,
 		     size_t cnt, loff_t *ppos)
@@ -4562,6 +4564,8 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf,
 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
 }
 
+#endif
+
 static int tracing_open_pipe(struct inode *inode, struct file *filp)
 {
 	struct trace_array *tr = inode->i_private;
@@ -5463,12 +5467,14 @@ static const struct file_operations tracing_thresh_fops = {
 	.llseek		= generic_file_llseek,
 };
 
+#ifdef CONFIG_TRACER_MAX_TRACE
 static const struct file_operations tracing_max_lat_fops = {
 	.open		= tracing_open_generic,
 	.read		= tracing_max_lat_read,
 	.write		= tracing_max_lat_write,
 	.llseek		= generic_file_llseek,
 };
+#endif
 
 static const struct file_operations set_tracer_fops = {
 	.open		= tracing_open_generic,
-- 
cgit v1.2.3


From f99bf205dab026ef434520198af2fcb7dae0efdb Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 19 Nov 2015 11:56:22 +0100
Subject: bpf: add show_fdinfo handler for maps

Add a handler for show_fdinfo() to be used by the anon-inodes
backend for eBPF maps, and dump the map specification there. Not
only useful for admins, but also it provides a minimal way to
compare specs from ELF vs pinned object.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/syscall.c | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0d3313d02a7e..6d1407bc1531 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -93,6 +93,23 @@ void bpf_map_put(struct bpf_map *map)
 	}
 }
 
+#ifdef CONFIG_PROC_FS
+static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
+{
+	const struct bpf_map *map = filp->private_data;
+
+	seq_printf(m,
+		   "map_type:\t%u\n"
+		   "key_size:\t%u\n"
+		   "value_size:\t%u\n"
+		   "max_entries:\t%u\n",
+		   map->map_type,
+		   map->key_size,
+		   map->value_size,
+		   map->max_entries);
+}
+#endif
+
 static int bpf_map_release(struct inode *inode, struct file *filp)
 {
 	struct bpf_map *map = filp->private_data;
@@ -108,7 +125,10 @@ static int bpf_map_release(struct inode *inode, struct file *filp)
 }
 
 static const struct file_operations bpf_map_fops = {
-	.release = bpf_map_release,
+#ifdef CONFIG_PROC_FS
+	.show_fdinfo	= bpf_map_show_fdinfo,
+#endif
+	.release	= bpf_map_release,
 };
 
 int bpf_map_new_fd(struct bpf_map *map)
-- 
cgit v1.2.3