From 9642d18eee2cd169b60c6ac0f20bda745b5a3d1e Mon Sep 17 00:00:00 2001
From: Vatika Harlalka <vatikaharlalka@gmail.com>
Date: Tue, 1 Sep 2015 16:50:59 +0200
Subject: nohz: Affine unpinned timers to housekeepers

The problem addressed in this patch is about affining unpinned
timers. Adaptive or Full Dynticks CPUs are currently disturbed
by unnecessary jitter due to firing of such timers on them.

This patch will affine timers to online CPUs which are not full
dynticks in NOHZ_FULL configured systems. It should not
introduce overhead in nohz full off case due to static keys.

Signed-off-by: Vatika Harlalka <vatikaharlalka@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1441119060-2230-2-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8b864ecee0e1..0902e4d72671 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -623,18 +623,21 @@ int get_nohz_timer_target(void)
 	int i, cpu = smp_processor_id();
 	struct sched_domain *sd;
 
-	if (!idle_cpu(cpu))
+	if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu))
 		return cpu;
 
 	rcu_read_lock();
 	for_each_domain(cpu, sd) {
 		for_each_cpu(i, sched_domain_span(sd)) {
-			if (!idle_cpu(i)) {
+			if (!idle_cpu(i) && is_housekeeping_cpu(cpu)) {
 				cpu = i;
 				goto unlock;
 			}
 		}
 	}
+
+	if (!is_housekeeping_cpu(cpu))
+		cpu = housekeeping_any_cpu();
 unlock:
 	rcu_read_unlock();
 	return cpu;
-- 
cgit v1.2.3


From 7c8bb6cb95061b3143759459ed6c6b0c73bcfecb Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 1 Sep 2015 16:51:00 +0200
Subject: nohz: Assert existing housekeepers when nohz full enabled

The code ensures that when nohz full is running, at least the
boot CPU serves as a housekeeper and it can't be later offlined.

Let's assert this assumption to make sure that we have CPUs to
handle unbound jobs like workqueues and timers while nohz full
CPUs run undisturbed.

Also improve the comments on housekeeper offlining prevention.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Cc: Vatika Harlalka <vatikaharlalka@gmail.com>
Link: http://lkml.kernel.org/r/1441119060-2230-3-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/tick-sched.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 3319e16f31e5..7c7ec4515983 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -290,16 +290,17 @@ static int __init tick_nohz_full_setup(char *str)
 __setup("nohz_full=", tick_nohz_full_setup);
 
 static int tick_nohz_cpu_down_callback(struct notifier_block *nfb,
-						 unsigned long action,
-						 void *hcpu)
+				       unsigned long action,
+				       void *hcpu)
 {
 	unsigned int cpu = (unsigned long)hcpu;
 
 	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_DOWN_PREPARE:
 		/*
-		 * If we handle the timekeeping duty for full dynticks CPUs,
-		 * we can't safely shutdown that CPU.
+		 * The boot CPU handles housekeeping duty (unbound timers,
+		 * workqueues, timekeeping, ...) on behalf of full dynticks
+		 * CPUs. It must remain online when nohz full is enabled.
 		 */
 		if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
 			return NOTIFY_BAD;
@@ -370,6 +371,12 @@ void __init tick_nohz_init(void)
 	cpu_notifier(tick_nohz_cpu_down_callback, 0);
 	pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n",
 		cpumask_pr_args(tick_nohz_full_mask));
+
+	/*
+	 * We need at least one CPU to handle housekeeping work such
+	 * as timekeeping, unbound timers, workqueues, ...
+	 */
+	WARN_ON_ONCE(cpumask_empty(housekeeping_mask));
 }
 #endif
 
-- 
cgit v1.2.3


From 21dd33b09c61597df603c654589adffd7955491a Mon Sep 17 00:00:00 2001
From: Lina Iyer <lina.iyer@linaro.org>
Date: Wed, 2 Sep 2015 16:18:57 -0600
Subject: kernel/cpu_pm: fix cpu_cluster_pm_exit comment

cpu_cluster_pm_exit() must be sent after cpu_cluster_pm_enter() has been
sent for the cluster and before any cpu_pm_exit() notifications are sent
for any CPU.

Cc: Nicolas Pitre <nicolas.pitre@linaro.org>
Acked-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Lina Iyer <lina.iyer@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/cpu_pm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c
index 9656a3c36503..009cc9a17d95 100644
--- a/kernel/cpu_pm.c
+++ b/kernel/cpu_pm.c
@@ -180,7 +180,7 @@ EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter);
  * low power state that may have caused some blocks in the same power domain
  * to reset.
  *
- * Must be called after cpu_pm_exit has been called on all cpus in the power
+ * Must be called after cpu_cluster_pm_enter has been called for the power
  * domain, and before cpu_pm_exit has been called on any cpu in the power
  * domain. Notified drivers can include VFP co-processor, interrupt controller
  * and its PM extensions, local CPU timers context save/restore which
-- 
cgit v1.2.3


From 43b3f02899f74ae9914a39547cc5492156f0027a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 4 Sep 2015 17:25:23 +0200
Subject: locking/qspinlock/x86: Fix performance regression under unaccelerated
 VMs

Dave ran into horrible performance on a VM without PARAVIRT_SPINLOCKS
set and Linus noted that the test-and-set implementation was retarded.

One should spin on the variable with a load, not a RMW.

While there, remove 'queued' from the name, as the lock isn't queued
at all, but a simple test-and-set.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Reported-by: Dave Chinner <david@fromorbit.com>
Tested-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <Waiman.Long@hp.com>
Cc: stable@vger.kernel.org # v4.2+
Link: http://lkml.kernel.org/r/20150904152523.GR18673@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/qspinlock.h | 16 ++++++++++++----
 include/asm-generic/qspinlock.h  |  4 ++--
 kernel/locking/qspinlock.c       |  2 +-
 3 files changed, 15 insertions(+), 7 deletions(-)

(limited to 'kernel')

diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index 9d51fae1cba3..8dde3bdc4a05 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -39,15 +39,23 @@ static inline void queued_spin_unlock(struct qspinlock *lock)
 }
 #endif
 
-#define virt_queued_spin_lock virt_queued_spin_lock
+#define virt_spin_lock virt_spin_lock
 
-static inline bool virt_queued_spin_lock(struct qspinlock *lock)
+static inline bool virt_spin_lock(struct qspinlock *lock)
 {
 	if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
 		return false;
 
-	while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0)
-		cpu_relax();
+	/*
+	 * On hypervisors without PARAVIRT_SPINLOCKS support we fall
+	 * back to a Test-and-Set spinlock, because fair locks have
+	 * horrible lock 'holder' preemption issues.
+	 */
+
+	do {
+		while (atomic_read(&lock->val) != 0)
+			cpu_relax();
+	} while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0);
 
 	return true;
 }
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index 83bfb87f5bf1..e2aadbc7151f 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -111,8 +111,8 @@ static inline void queued_spin_unlock_wait(struct qspinlock *lock)
 		cpu_relax();
 }
 
-#ifndef virt_queued_spin_lock
-static __always_inline bool virt_queued_spin_lock(struct qspinlock *lock)
+#ifndef virt_spin_lock
+static __always_inline bool virt_spin_lock(struct qspinlock *lock)
 {
 	return false;
 }
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 337c8818541d..87e9ce6a63c5 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -289,7 +289,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 	if (pv_enabled())
 		goto queue;
 
-	if (virt_queued_spin_lock(lock))
+	if (virt_spin_lock(lock))
 		return;
 
 	/*
-- 
cgit v1.2.3


From 5473e0cc37c03c576adbda7591a6cc8e37c1bb7f Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Fri, 28 Aug 2015 14:55:56 +0800
Subject: sched: 'Annotate' migrate_tasks()

Kernel testing triggered this warning:

| WARNING: CPU: 0 PID: 13 at kernel/sched/core.c:1156 do_set_cpus_allowed+0x7e/0x80()
| Modules linked in:
| CPU: 0 PID: 13 Comm: migration/0 Not tainted 4.2.0-rc1-00049-g25834c7 #2
| Call Trace:
|   dump_stack+0x4b/0x75
|   warn_slowpath_common+0x8b/0xc0
|   warn_slowpath_null+0x22/0x30
|   do_set_cpus_allowed+0x7e/0x80
|   cpuset_cpus_allowed_fallback+0x7c/0x170
|   select_fallback_rq+0x221/0x280
|   migration_call+0xe3/0x250
|   notifier_call_chain+0x53/0x70
|   __raw_notifier_call_chain+0x1e/0x30
|   cpu_notify+0x28/0x50
|   take_cpu_down+0x22/0x40
|   multi_cpu_stop+0xd5/0x140
|   cpu_stopper_thread+0xbc/0x170
|   smpboot_thread_fn+0x174/0x2f0
|   kthread+0xc4/0xe0
|   ret_from_kernel_thread+0x21/0x30

As Peterz pointed out:

| So the normal rules for changing task_struct::cpus_allowed are holding
| both pi_lock and rq->lock, such that holding either stabilizes the mask.
|
| This is so that wakeup can happen without rq->lock and load-balance
| without pi_lock.
|
| From this we already get the relaxation that we can omit acquiring
| rq->lock if the task is not on the rq, because in that case
| load-balancing will not apply to it.
|
| ** these are the rules currently tested in do_set_cpus_allowed() **
|
| Now, since __set_cpus_allowed_ptr() uses task_rq_lock() which
| unconditionally acquires both locks, we could get away with holding just
| rq->lock when on_rq for modification because that'd still exclude
| __set_cpus_allowed_ptr(), it would also work against
| __kthread_bind_mask() because that assumes !on_rq.
|
| That said, this is all somewhat fragile.
|
| Now, I don't think dropping rq->lock is quite as disastrous as it
| usually is because !cpu_active at this point, which means load-balance
| will not interfere, but that too is somewhat fragile.
|
| So we end up with a choice of two fragile..

This patch fixes it by following the rules for changing
task_struct::cpus_allowed with both pi_lock and rq->lock held.

Reported-by: kernel test robot <ying.huang@intel.com>
Reported-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
[ Modified changelog and patch. ]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/BLU436-SMTP1660820490DE202E3934ED3806E0@phx.gbl
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0902e4d72671..9b786704d34b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5183,24 +5183,47 @@ static void migrate_tasks(struct rq *dead_rq)
 			break;
 
 		/*
-		 * Ensure rq->lock covers the entire task selection
-		 * until the migration.
+		 * pick_next_task assumes pinned rq->lock.
 		 */
 		lockdep_pin_lock(&rq->lock);
 		next = pick_next_task(rq, &fake_task);
 		BUG_ON(!next);
 		next->sched_class->put_prev_task(rq, next);
 
+		/*
+		 * Rules for changing task_struct::cpus_allowed are holding
+		 * both pi_lock and rq->lock, such that holding either
+		 * stabilizes the mask.
+		 *
+		 * Drop rq->lock is not quite as disastrous as it usually is
+		 * because !cpu_active at this point, which means load-balance
+		 * will not interfere. Also, stop-machine.
+		 */
+		lockdep_unpin_lock(&rq->lock);
+		raw_spin_unlock(&rq->lock);
+		raw_spin_lock(&next->pi_lock);
+		raw_spin_lock(&rq->lock);
+
+		/*
+		 * Since we're inside stop-machine, _nothing_ should have
+		 * changed the task, WARN if weird stuff happened, because in
+		 * that case the above rq->lock drop is a fail too.
+		 */
+		if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) {
+			raw_spin_unlock(&next->pi_lock);
+			continue;
+		}
+
 		/* Find suitable destination for @next, with force if needed. */
 		dest_cpu = select_fallback_rq(dead_rq->cpu, next);
 
-		lockdep_unpin_lock(&rq->lock);
 		rq = __migrate_task(rq, next, dest_cpu);
 		if (rq != dead_rq) {
 			raw_spin_unlock(&rq->lock);
 			rq = dead_rq;
 			raw_spin_lock(&rq->lock);
 		}
+		raw_spin_unlock(&next->pi_lock);
 	}
 
 	rq->stop = stop;
-- 
cgit v1.2.3


From 5b25b13ab08f616efd566347d809b4ece54570d1 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Fri, 11 Sep 2015 13:07:39 -0700
Subject: sys_membarrier(): system-wide memory barrier (generic, x86)

Here is an implementation of a new system call, sys_membarrier(), which
executes a memory barrier on all threads running on the system.  It is
implemented by calling synchronize_sched().  It can be used to
distribute the cost of user-space memory barriers asymmetrically by
transforming pairs of memory barriers into pairs consisting of
sys_membarrier() and a compiler barrier.  For synchronization primitives
that distinguish between read-side and write-side (e.g.  userspace RCU
[1], rwlocks), the read-side can be accelerated significantly by moving
the bulk of the memory barrier overhead to the write-side.

The existing applications of which I am aware that would be improved by
this system call are as follows:

* Through Userspace RCU library (http://urcu.so)
  - DNS server (Knot DNS) https://www.knot-dns.cz/
  - Network sniffer (http://netsniff-ng.org/)
  - Distributed object storage (https://sheepdog.github.io/sheepdog/)
  - User-space tracing (http://lttng.org)
  - Network storage system (https://www.gluster.org/)
  - Virtual routers (https://events.linuxfoundation.org/sites/events/files/slides/DPDK_RCU_0MQ.pdf)
  - Financial software (https://lkml.org/lkml/2015/3/23/189)

Those projects use RCU in userspace to increase read-side speed and
scalability compared to locking.  Especially in the case of RCU used by
libraries, sys_membarrier can speed up the read-side by moving the bulk of
the memory barrier cost to synchronize_rcu().

* Direct users of sys_membarrier
  - core dotnet garbage collector (https://github.com/dotnet/coreclr/issues/198)

Microsoft core dotnet GC developers are planning to use the mprotect()
side-effect of issuing memory barriers through IPIs as a way to implement
Windows FlushProcessWriteBuffers() on Linux.  They are referring to
sys_membarrier in their github thread, specifically stating that
sys_membarrier() is what they are looking for.

To explain the benefit of this scheme, let's introduce two example threads:

Thread A (non-frequent, e.g. executing liburcu synchronize_rcu())
Thread B (frequent, e.g. executing liburcu
rcu_read_lock()/rcu_read_unlock())

In a scheme where all smp_mb() in thread A are ordering memory accesses
with respect to smp_mb() present in Thread B, we can change each
smp_mb() within Thread A into calls to sys_membarrier() and each
smp_mb() within Thread B into compiler barriers "barrier()".

Before the change, we had, for each smp_mb() pairs:

Thread A                    Thread B
previous mem accesses       previous mem accesses
smp_mb()                    smp_mb()
following mem accesses      following mem accesses

After the change, these pairs become:

Thread A                    Thread B
prev mem accesses           prev mem accesses
sys_membarrier()            barrier()
follow mem accesses         follow mem accesses

As we can see, there are two possible scenarios: either Thread B memory
accesses do not happen concurrently with Thread A accesses (1), or they
do (2).

1) Non-concurrent Thread A vs Thread B accesses:

Thread A                    Thread B
prev mem accesses
sys_membarrier()
follow mem accesses
                            prev mem accesses
                            barrier()
                            follow mem accesses

In this case, thread B accesses will be weakly ordered. This is OK,
because at that point, thread A is not particularly interested in
ordering them with respect to its own accesses.

2) Concurrent Thread A vs Thread B accesses

Thread A                    Thread B
prev mem accesses           prev mem accesses
sys_membarrier()            barrier()
follow mem accesses         follow mem accesses

In this case, thread B accesses, which are ensured to be in program
order thanks to the compiler barrier, will be "upgraded" to full
smp_mb() by synchronize_sched().

* Benchmarks

On Intel Xeon E5405 (8 cores)
(one thread is calling sys_membarrier, the other 7 threads are busy
looping)

1000 non-expedited sys_membarrier calls in 33s =3D 33 milliseconds/call.

* User-space user of this system call: Userspace RCU library

Both the signal-based and the sys_membarrier userspace RCU schemes
permit us to remove the memory barrier from the userspace RCU
rcu_read_lock() and rcu_read_unlock() primitives, thus significantly
accelerating them. These memory barriers are replaced by compiler
barriers on the read-side, and all matching memory barriers on the
write-side are turned into an invocation of a memory barrier on all
active threads in the process. By letting the kernel perform this
synchronization rather than dumbly sending a signal to every process
threads (as we currently do), we diminish the number of unnecessary wake
ups and only issue the memory barriers on active threads. Non-running
threads do not need to execute such barrier anyway, because these are
implied by the scheduler context switches.

Results in liburcu:

Operations in 10s, 6 readers, 2 writers:

memory barriers in reader:    1701557485 reads, 2202847 writes
signal-based scheme:          9830061167 reads,    6700 writes
sys_membarrier:               9952759104 reads,     425 writes
sys_membarrier (dyn. check):  7970328887 reads,     425 writes

The dynamic sys_membarrier availability check adds some overhead to
the read-side compared to the signal-based scheme, but besides that,
sys_membarrier slightly outperforms the signal-based scheme. However,
this non-expedited sys_membarrier implementation has a much slower grace
period than signal and memory barrier schemes.

Besides diminishing the number of wake-ups, one major advantage of the
membarrier system call over the signal-based scheme is that it does not
need to reserve a signal. This plays much more nicely with libraries,
and with processes injected into for tracing purposes, for which we
cannot expect that signals will be unused by the application.

An expedited version of this system call can be added later on to speed
up the grace period. Its implementation will likely depend on reading
the cpu_curr()->mm without holding each CPU's rq lock.

This patch adds the system call to x86 and to asm-generic.

[1] http://urcu.so

membarrier(2) man page:

MEMBARRIER(2)              Linux Programmer's Manual             MEMBARRIER(2)

NAME
       membarrier - issue memory barriers on a set of threads

SYNOPSIS
       #include <linux/membarrier.h>

       int membarrier(int cmd, int flags);

DESCRIPTION
       The cmd argument is one of the following:

       MEMBARRIER_CMD_QUERY
              Query  the  set  of  supported commands. It returns a bitmask of
              supported commands.

       MEMBARRIER_CMD_SHARED
              Execute a memory barrier on all threads running on  the  system.
              Upon  return from system call, the caller thread is ensured that
              all running threads have passed through a state where all memory
              accesses  to  user-space  addresses  match program order between
              entry to and return from the system  call  (non-running  threads
              are de facto in such a state). This covers threads from all pro=E2=80=90
              cesses running on the system.  This command returns 0.

       The flags argument needs to be 0. For future extensions.

       All memory accesses performed  in  program  order  from  each  targeted
       thread is guaranteed to be ordered with respect to sys_membarrier(). If
       we use the semantic "barrier()" to represent a compiler barrier forcing
       memory  accesses  to  be performed in program order across the barrier,
       and smp_mb() to represent explicit memory barriers forcing full  memory
       ordering  across  the barrier, we have the following ordering table for
       each pair of barrier(), sys_membarrier() and smp_mb():

       The pair ordering is detailed as (O: ordered, X: not ordered):

                              barrier()   smp_mb() sys_membarrier()
              barrier()          X           X            O
              smp_mb()           X           O            O
              sys_membarrier()   O           O            O

RETURN VALUE
       On success, these system calls return zero.  On error, -1 is  returned,
       and errno is set appropriately. For a given command, with flags
       argument set to 0, this system call is guaranteed to always return the
       same value until reboot.

ERRORS
       ENOSYS System call is not implemented.

       EINVAL Invalid arguments.

Linux                             2015-04-15                     MEMBARRIER(2)

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Nicholas Miell <nmiell@comcast.net>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Pranith Kumar <bobby.prani@gmail.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS                            |  8 +++++
 arch/x86/entry/syscalls/syscall_32.tbl |  1 +
 arch/x86/entry/syscalls/syscall_64.tbl |  1 +
 include/linux/syscalls.h               |  2 ++
 include/uapi/asm-generic/unistd.h      |  4 ++-
 include/uapi/linux/Kbuild              |  1 +
 include/uapi/linux/membarrier.h        | 53 +++++++++++++++++++++++++++
 init/Kconfig                           | 12 +++++++
 kernel/Makefile                        |  1 +
 kernel/membarrier.c                    | 66 ++++++++++++++++++++++++++++++++++
 kernel/sys_ni.c                        |  3 ++
 11 files changed, 151 insertions(+), 1 deletion(-)
 create mode 100644 include/uapi/linux/membarrier.h
 create mode 100644 kernel/membarrier.c

(limited to 'kernel')

diff --git a/MAINTAINERS b/MAINTAINERS
index 310da4295c70..e77bc84dc580 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6789,6 +6789,14 @@ W:	http://www.mellanox.com
 Q:	http://patchwork.ozlabs.org/project/netdev/list/
 F:	drivers/net/ethernet/mellanox/mlxsw/
 
+MEMBARRIER SUPPORT
+M:	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+M:	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
+L:	linux-kernel@vger.kernel.org
+S:	Supported
+F:	kernel/membarrier.c
+F:	include/uapi/linux/membarrier.h
+
 MEMORY MANAGEMENT
 L:	linux-mm@kvack.org
 W:	http://www.linux-mm.org
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 477bfa6db370..7663c455b9f6 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -381,3 +381,4 @@
 372	i386	recvmsg			sys_recvmsg			compat_sys_recvmsg
 373	i386	shutdown		sys_shutdown
 374	i386	userfaultfd		sys_userfaultfd
+375	i386	membarrier		sys_membarrier
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 81c490634db9..278842fdf1f6 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -330,6 +330,7 @@
 321	common	bpf			sys_bpf
 322	64	execveat		stub_execveat
 323	common	userfaultfd		sys_userfaultfd
+324	common	membarrier		sys_membarrier
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 08001317aee7..a460e2ef2843 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -885,4 +885,6 @@ asmlinkage long sys_execveat(int dfd, const char __user *filename,
 			const char __user *const __user *argv,
 			const char __user *const __user *envp, int flags);
 
+asmlinkage long sys_membarrier(int cmd, int flags);
+
 #endif
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index e016bd9b1a04..8da542a2874d 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -709,9 +709,11 @@ __SYSCALL(__NR_memfd_create, sys_memfd_create)
 __SYSCALL(__NR_bpf, sys_bpf)
 #define __NR_execveat 281
 __SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat)
+#define __NR_membarrier 282
+__SYSCALL(__NR_membarrier, sys_membarrier)
 
 #undef __NR_syscalls
-#define __NR_syscalls 282
+#define __NR_syscalls 283
 
 /*
  * All syscalls below here should go away really,
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 70ff1d9abf0d..f7b2db44eb4b 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -252,6 +252,7 @@ header-y += mdio.h
 header-y += media.h
 header-y += media-bus-format.h
 header-y += mei.h
+header-y += membarrier.h
 header-y += memfd.h
 header-y += mempolicy.h
 header-y += meye.h
diff --git a/include/uapi/linux/membarrier.h b/include/uapi/linux/membarrier.h
new file mode 100644
index 000000000000..e0b108bd2624
--- /dev/null
+++ b/include/uapi/linux/membarrier.h
@@ -0,0 +1,53 @@
+#ifndef _UAPI_LINUX_MEMBARRIER_H
+#define _UAPI_LINUX_MEMBARRIER_H
+
+/*
+ * linux/membarrier.h
+ *
+ * membarrier system call API
+ *
+ * Copyright (c) 2010, 2015 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/**
+ * enum membarrier_cmd - membarrier system call command
+ * @MEMBARRIER_CMD_QUERY:   Query the set of supported commands. It returns
+ *                          a bitmask of valid commands.
+ * @MEMBARRIER_CMD_SHARED:  Execute a memory barrier on all running threads.
+ *                          Upon return from system call, the caller thread
+ *                          is ensured that all running threads have passed
+ *                          through a state where all memory accesses to
+ *                          user-space addresses match program order between
+ *                          entry to and return from the system call
+ *                          (non-running threads are de facto in such a
+ *                          state). This covers threads from all processes
+ *                          running on the system. This command returns 0.
+ *
+ * Command to be passed to the membarrier system call. The commands need to
+ * be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to
+ * the value 0.
+ */
+enum membarrier_cmd {
+	MEMBARRIER_CMD_QUERY = 0,
+	MEMBARRIER_CMD_SHARED = (1 << 0),
+};
+
+#endif /* _UAPI_LINUX_MEMBARRIER_H */
diff --git a/init/Kconfig b/init/Kconfig
index 02da9f1fd9df..c24b6f767bf0 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1602,6 +1602,18 @@ config PCI_QUIRKS
 	  bugs/quirks. Disable this only if your target machine is
 	  unaffected by PCI quirks.
 
+config MEMBARRIER
+	bool "Enable membarrier() system call" if EXPERT
+	default y
+	help
+	  Enable the membarrier() system call that allows issuing memory
+	  barriers across all running threads, which can be used to distribute
+	  the cost of user-space memory barriers asymmetrically by transforming
+	  pairs of memory barriers into pairs consisting of membarrier() and a
+	  compiler barrier.
+
+	  If unsure, say Y.
+
 config EMBEDDED
 	bool "Embedded system"
 	option allnoconfig_y
diff --git a/kernel/Makefile b/kernel/Makefile
index d4988410b410..53abf008ecb3 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -100,6 +100,7 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
 obj-$(CONFIG_JUMP_LABEL) += jump_label.o
 obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
 obj-$(CONFIG_TORTURE_TEST) += torture.o
+obj-$(CONFIG_MEMBARRIER) += membarrier.o
 
 obj-$(CONFIG_HAS_IOMEM) += memremap.o
 
diff --git a/kernel/membarrier.c b/kernel/membarrier.c
new file mode 100644
index 000000000000..536c727a56e9
--- /dev/null
+++ b/kernel/membarrier.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2010, 2015 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * membarrier system call
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/syscalls.h>
+#include <linux/membarrier.h>
+
+/*
+ * Bitmask made from a "or" of all commands within enum membarrier_cmd,
+ * except MEMBARRIER_CMD_QUERY.
+ */
+#define MEMBARRIER_CMD_BITMASK	(MEMBARRIER_CMD_SHARED)
+
+/**
+ * sys_membarrier - issue memory barriers on a set of threads
+ * @cmd:   Takes command values defined in enum membarrier_cmd.
+ * @flags: Currently needs to be 0. For future extensions.
+ *
+ * If this system call is not implemented, -ENOSYS is returned. If the
+ * command specified does not exist, or if the command argument is invalid,
+ * this system call returns -EINVAL. For a given command, with flags argument
+ * set to 0, this system call is guaranteed to always return the same value
+ * until reboot.
+ *
+ * All memory accesses performed in program order from each targeted thread
+ * is guaranteed to be ordered with respect to sys_membarrier(). If we use
+ * the semantic "barrier()" to represent a compiler barrier forcing memory
+ * accesses to be performed in program order across the barrier, and
+ * smp_mb() to represent explicit memory barriers forcing full memory
+ * ordering across the barrier, we have the following ordering table for
+ * each pair of barrier(), sys_membarrier() and smp_mb():
+ *
+ * The pair ordering is detailed as (O: ordered, X: not ordered):
+ *
+ *                        barrier()   smp_mb() sys_membarrier()
+ *        barrier()          X           X            O
+ *        smp_mb()           X           O            O
+ *        sys_membarrier()   O           O            O
+ */
+SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
+{
+	if (unlikely(flags))
+		return -EINVAL;
+	switch (cmd) {
+	case MEMBARRIER_CMD_QUERY:
+		return MEMBARRIER_CMD_BITMASK;
+	case MEMBARRIER_CMD_SHARED:
+		if (num_online_cpus() > 1)
+			synchronize_sched();
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 03c3875d9958..a02decf15583 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -245,3 +245,6 @@ cond_syscall(sys_bpf);
 
 /* execveat */
 cond_syscall(sys_execveat);
+
+/* membarrier */
+cond_syscall(sys_membarrier);
-- 
cgit v1.2.3


From 2619d7e9c92d524cb155ec89fd72875321512e5b Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 9 Sep 2015 16:07:30 -0700
Subject: time: Fix timekeeping_freqadjust()'s incorrect use of abs() instead
 of abs64()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The internal clocksteering done for fine-grained error
correction uses a logarithmic approximation, so any time
adjtimex() adjusts the clock steering, timekeeping_freqadjust()
quickly approximates the correct clock frequency over a series
of ticks.

Unfortunately, the logic in timekeeping_freqadjust(), introduced
in commit:

  dc491596f639 ("timekeeping: Rework frequency adjustments to work better w/ nohz")

used the abs() function with a s64 error value to calculate the
size of the approximated adjustment to be made.

Per include/linux/kernel.h:

  "abs() should not be used for 64-bit types (s64, u64, long long) - use abs64()".

Thus on 32-bit platforms, this resulted in the clocksteering to
take a quite dampended random walk trying to converge on the
proper frequency, which caused the adjustments to be made much
slower then intended (most easily observed when large
adjustments are made).

This patch fixes the issue by using abs64() instead.

Reported-by: Nuno Gonçalves <nunojpg@gmail.com>
Tested-by: Nuno Goncalves <nunojpg@gmail.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: <stable@vger.kernel.org> # v3.17+
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Miroslav Lichvar <mlichvar@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1441840051-20244-1-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/timekeeping.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index f6ee2e6b6f5d..3739ac6aa473 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1614,7 +1614,7 @@ static __always_inline void timekeeping_freqadjust(struct timekeeper *tk,
 	negative = (tick_error < 0);
 
 	/* Sort out the magnitude of the correction */
-	tick_error = abs(tick_error);
+	tick_error = abs64(tick_error);
 	for (adj = 0; tick_error > interval; adj++)
 		tick_error >>= 1;
 
-- 
cgit v1.2.3


From eef7635a22f6b144206b5ca2f1398f637acffc4d Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 11 Sep 2015 09:34:26 +0530
Subject: clockevents: Remove unused set_mode() callback

All users are migrated to the per-state callbacks, get rid of the
unused interface and the core support code.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linaro-kernel@lists.linaro.org
Cc: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/fd60de14cf6d125489c031207567bb255ad946f6.1441943991.git.viresh.kumar@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/clockchips.h | 29 +++++--------------------
 kernel/time/clockevents.c  | 42 +-----------------------------------
 kernel/time/tick-common.c  |  1 -
 kernel/time/timer_list.c   | 54 +++++++++++++++++++++-------------------------
 4 files changed, 30 insertions(+), 96 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 31ce435981fe..bdcf358dfce2 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -18,15 +18,6 @@
 struct clock_event_device;
 struct module;
 
-/* Clock event mode commands for legacy ->set_mode(): OBSOLETE */
-enum clock_event_mode {
-	CLOCK_EVT_MODE_UNUSED,
-	CLOCK_EVT_MODE_SHUTDOWN,
-	CLOCK_EVT_MODE_PERIODIC,
-	CLOCK_EVT_MODE_ONESHOT,
-	CLOCK_EVT_MODE_RESUME,
-};
-
 /*
  * Possible states of a clock event device.
  *
@@ -86,16 +77,14 @@ enum clock_event_state {
  * @min_delta_ns:	minimum delta value in ns
  * @mult:		nanosecond to cycles multiplier
  * @shift:		nanoseconds to cycles divisor (power of two)
- * @mode:		operating mode, relevant only to ->set_mode(), OBSOLETE
  * @state_use_accessors:current state of the device, assigned by the core code
  * @features:		features
  * @retries:		number of forced programming retries
- * @set_mode:		legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME.
- * @set_state_periodic:	switch state to periodic, if !set_mode
- * @set_state_oneshot:	switch state to oneshot, if !set_mode
- * @set_state_oneshot_stopped: switch state to oneshot_stopped, if !set_mode
- * @set_state_shutdown:	switch state to shutdown, if !set_mode
- * @tick_resume:	resume clkevt device, if !set_mode
+ * @set_state_periodic:	switch state to periodic
+ * @set_state_oneshot:	switch state to oneshot
+ * @set_state_oneshot_stopped: switch state to oneshot_stopped
+ * @set_state_shutdown:	switch state to shutdown
+ * @tick_resume:	resume clkevt device
  * @broadcast:		function to broadcast events
  * @min_delta_ticks:	minimum delta value in ticks stored for reconfiguration
  * @max_delta_ticks:	maximum delta value in ticks stored for reconfiguration
@@ -116,18 +105,10 @@ struct clock_event_device {
 	u64			min_delta_ns;
 	u32			mult;
 	u32			shift;
-	enum clock_event_mode	mode;
 	enum clock_event_state	state_use_accessors;
 	unsigned int		features;
 	unsigned long		retries;
 
-	/*
-	 * State transition callback(s): Only one of the two groups should be
-	 * defined:
-	 * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME.
-	 * - set_state_{shutdown|periodic|oneshot|oneshot_stopped}(), tick_resume().
-	 */
-	void			(*set_mode)(enum clock_event_mode mode, struct clock_event_device *);
 	int			(*set_state_periodic)(struct clock_event_device *);
 	int			(*set_state_oneshot)(struct clock_event_device *);
 	int			(*set_state_oneshot_stopped)(struct clock_event_device *);
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 50eb107f1198..a9b76a40319e 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -97,20 +97,6 @@ EXPORT_SYMBOL_GPL(clockevent_delta2ns);
 static int __clockevents_switch_state(struct clock_event_device *dev,
 				      enum clock_event_state state)
 {
-	/* Transition with legacy set_mode() callback */
-	if (dev->set_mode) {
-		/* Legacy callback doesn't support new modes */
-		if (state > CLOCK_EVT_STATE_ONESHOT)
-			return -ENOSYS;
-		/*
-		 * 'clock_event_state' and 'clock_event_mode' have 1-to-1
-		 * mapping until *_ONESHOT, and so a simple cast will work.
-		 */
-		dev->set_mode((enum clock_event_mode)state, dev);
-		dev->mode = (enum clock_event_mode)state;
-		return 0;
-	}
-
 	if (dev->features & CLOCK_EVT_FEAT_DUMMY)
 		return 0;
 
@@ -204,12 +190,8 @@ int clockevents_tick_resume(struct clock_event_device *dev)
 {
 	int ret = 0;
 
-	if (dev->set_mode) {
-		dev->set_mode(CLOCK_EVT_MODE_RESUME, dev);
-		dev->mode = CLOCK_EVT_MODE_RESUME;
-	} else if (dev->tick_resume) {
+	if (dev->tick_resume)
 		ret = dev->tick_resume(dev);
-	}
 
 	return ret;
 }
@@ -460,26 +442,6 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu)
 }
 EXPORT_SYMBOL_GPL(clockevents_unbind_device);
 
-/* Sanity check of state transition callbacks */
-static int clockevents_sanity_check(struct clock_event_device *dev)
-{
-	/* Legacy set_mode() callback */
-	if (dev->set_mode) {
-		/* We shouldn't be supporting new modes now */
-		WARN_ON(dev->set_state_periodic || dev->set_state_oneshot ||
-			dev->set_state_shutdown || dev->tick_resume ||
-			dev->set_state_oneshot_stopped);
-
-		BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
-		return 0;
-	}
-
-	if (dev->features & CLOCK_EVT_FEAT_DUMMY)
-		return 0;
-
-	return 0;
-}
-
 /**
  * clockevents_register_device - register a clock event device
  * @dev:	device to register
@@ -488,8 +450,6 @@ void clockevents_register_device(struct clock_event_device *dev)
 {
 	unsigned long flags;
 
-	BUG_ON(clockevents_sanity_check(dev));
-
 	/* Initialize state to DETACHED */
 	clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED);
 
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index d11c55b6ab7d..4fcd99e12aa0 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -398,7 +398,6 @@ void tick_shutdown(unsigned int cpu)
 		 * the set mode function!
 		 */
 		clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED);
-		dev->mode = CLOCK_EVT_MODE_UNUSED;
 		clockevents_exchange_device(dev, NULL);
 		dev->event_handler = clockevents_handle_noop;
 		td->evtdev = NULL;
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 129c96033e46..f75e35b60149 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -225,7 +225,7 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
 		   (unsigned long long) dev->min_delta_ns);
 	SEQ_printf(m, " mult:           %u\n", dev->mult);
 	SEQ_printf(m, " shift:          %u\n", dev->shift);
-	SEQ_printf(m, " mode:           %d\n", dev->mode);
+	SEQ_printf(m, " mode:           %d\n", clockevent_get_state(dev));
 	SEQ_printf(m, " next_event:     %Ld nsecs\n",
 		   (unsigned long long) ktime_to_ns(dev->next_event));
 
@@ -233,40 +233,34 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
 	print_name_offset(m, dev->set_next_event);
 	SEQ_printf(m, "\n");
 
-	if (dev->set_mode) {
-		SEQ_printf(m, " set_mode:       ");
-		print_name_offset(m, dev->set_mode);
+	if (dev->set_state_shutdown) {
+		SEQ_printf(m, " shutdown: ");
+		print_name_offset(m, dev->set_state_shutdown);
 		SEQ_printf(m, "\n");
-	} else {
-		if (dev->set_state_shutdown) {
-			SEQ_printf(m, " shutdown: ");
-			print_name_offset(m, dev->set_state_shutdown);
-			SEQ_printf(m, "\n");
-		}
+	}
 
-		if (dev->set_state_periodic) {
-			SEQ_printf(m, " periodic: ");
-			print_name_offset(m, dev->set_state_periodic);
-			SEQ_printf(m, "\n");
-		}
+	if (dev->set_state_periodic) {
+		SEQ_printf(m, " periodic: ");
+		print_name_offset(m, dev->set_state_periodic);
+		SEQ_printf(m, "\n");
+	}
 
-		if (dev->set_state_oneshot) {
-			SEQ_printf(m, " oneshot:  ");
-			print_name_offset(m, dev->set_state_oneshot);
-			SEQ_printf(m, "\n");
-		}
+	if (dev->set_state_oneshot) {
+		SEQ_printf(m, " oneshot:  ");
+		print_name_offset(m, dev->set_state_oneshot);
+		SEQ_printf(m, "\n");
+	}
 
-		if (dev->set_state_oneshot_stopped) {
-			SEQ_printf(m, " oneshot stopped: ");
-			print_name_offset(m, dev->set_state_oneshot_stopped);
-			SEQ_printf(m, "\n");
-		}
+	if (dev->set_state_oneshot_stopped) {
+		SEQ_printf(m, " oneshot stopped: ");
+		print_name_offset(m, dev->set_state_oneshot_stopped);
+		SEQ_printf(m, "\n");
+	}
 
-		if (dev->tick_resume) {
-			SEQ_printf(m, " resume:   ");
-			print_name_offset(m, dev->tick_resume);
-			SEQ_printf(m, "\n");
-		}
+	if (dev->tick_resume) {
+		SEQ_printf(m, " resume:   ");
+		print_name_offset(m, dev->tick_resume);
+		SEQ_printf(m, "\n");
 	}
 
 	SEQ_printf(m, " event_handler:  ");
-- 
cgit v1.2.3


From 449e9cae58b06be1293858ec8e5d8cb728238baa Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Mon, 1 Jun 2015 16:05:16 +0800
Subject: genirq: Move field 'node' from irq_data into irq_common_data

NUMA node information is per-irq instead of per-irqchip, so move it into
struct irq_common_data. Also use CONFIG_NUMA to guard irq_common_data.node.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Kevin Cernekee <cernekee@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Link: http://lkml.kernel.org/r/1433145945-789-8-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h    | 17 ++++++++++++++---
 kernel/irq/internals.h |  2 +-
 kernel/irq/irqdesc.c   |  4 +++-
 kernel/irq/irqdomain.c |  1 -
 4 files changed, 18 insertions(+), 6 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index e54ae8295460..ebcc5c6745eb 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -129,9 +129,13 @@ struct irq_domain;
  * struct irq_common_data - per irq data shared by all irqchips
  * @state_use_accessors: status information for irq chip functions.
  *			Use accessor functions to deal with it
+ * @node:		node index useful for balancing
  */
 struct irq_common_data {
 	unsigned int		state_use_accessors;
+#ifdef CONFIG_NUMA
+	unsigned int		node;
+#endif
 };
 
 /**
@@ -139,7 +143,6 @@ struct irq_common_data {
  * @mask:		precomputed bitmask for accessing the chip registers
  * @irq:		interrupt number
  * @hwirq:		hardware interrupt number, local to the interrupt domain
- * @node:		node index useful for balancing
  * @common:		point to data shared by all irqchips
  * @chip:		low level interrupt hardware access
  * @domain:		Interrupt translation domain; responsible for mapping
@@ -156,7 +159,6 @@ struct irq_data {
 	u32			mask;
 	unsigned int		irq;
 	unsigned long		hwirq;
-	unsigned int		node;
 	struct irq_common_data	*common;
 	struct irq_chip		*chip;
 	struct irq_domain	*domain;
@@ -664,9 +666,18 @@ static inline u32 irq_get_trigger_type(unsigned int irq)
 	return d ? irqd_get_trigger_type(d) : 0;
 }
 
-static inline int irq_data_get_node(struct irq_data *d)
+static inline int irq_common_data_get_node(struct irq_common_data *d)
 {
+#ifdef CONFIG_NUMA
 	return d->node;
+#else
+	return 0;
+#endif
+}
+
+static inline int irq_data_get_node(struct irq_data *d)
+{
+	return irq_common_data_get_node(d->common);
 }
 
 static inline struct cpumask *irq_get_affinity_mask(int irq)
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index eee4b385cffb..5ef0c2dbe930 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -194,7 +194,7 @@ static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc)
 
 static inline int irq_desc_get_node(struct irq_desc *desc)
 {
-	return irq_data_get_node(&desc->irq_data);
+	return irq_common_data_get_node(&desc->irq_common_data);
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 0a2a4b697bcb..7f3e9faa6e4d 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -52,11 +52,13 @@ static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node)
 
 static void desc_smp_init(struct irq_desc *desc, int node)
 {
-	desc->irq_data.node = node;
 	cpumask_copy(desc->irq_data.affinity, irq_default_affinity);
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	cpumask_clear(desc->pending_mask);
 #endif
+#ifdef CONFIG_NUMA
+	desc->irq_common_data.node = node;
+#endif
 }
 
 #else
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 79baaf8a7813..dc9d27c0c158 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -844,7 +844,6 @@ static struct irq_data *irq_domain_insert_irq_data(struct irq_domain *domain,
 		child->parent_data = irq_data;
 		irq_data->irq = child->irq;
 		irq_data->common = child->common;
-		irq_data->node = child->node;
 		irq_data->domain = domain;
 	}
 
-- 
cgit v1.2.3


From af7080e040d223b5e7d0a8de28f7cea24ef017c4 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Mon, 1 Jun 2015 16:05:21 +0800
Subject: genirq: Move field 'handler_data' from irq_data into irq_common_data

Handler data (handler_data) is per-irq instead of per irqchip, so move
it into struct irq_common_data.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Kevin Cernekee <cernekee@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Link: http://lkml.kernel.org/r/1433145945-789-13-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h     | 8 ++++----
 include/linux/irqdesc.h | 2 +-
 kernel/irq/chip.c       | 4 ++--
 kernel/irq/irqdesc.c    | 3 ++-
 4 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index ebcc5c6745eb..516aadbfc072 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -130,12 +130,14 @@ struct irq_domain;
  * @state_use_accessors: status information for irq chip functions.
  *			Use accessor functions to deal with it
  * @node:		node index useful for balancing
+ * @handler_data:	per-IRQ data for the irq_chip methods
  */
 struct irq_common_data {
 	unsigned int		state_use_accessors;
 #ifdef CONFIG_NUMA
 	unsigned int		node;
 #endif
+	void			*handler_data;
 };
 
 /**
@@ -149,7 +151,6 @@ struct irq_common_data {
  *			between hwirq number and linux irq number.
  * @parent_data:	pointer to parent struct irq_data to support hierarchy
  *			irq_domain
- * @handler_data:	per-IRQ data for the irq_chip methods
  * @chip_data:		platform-specific per-chip private data for the chip
  *			methods, to allow shared chip implementations
  * @msi_desc:		MSI descriptor
@@ -165,7 +166,6 @@ struct irq_data {
 #ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
 	struct irq_data		*parent_data;
 #endif
-	void			*handler_data;
 	void			*chip_data;
 	struct msi_desc		*msi_desc;
 	cpumask_var_t		affinity;
@@ -641,12 +641,12 @@ static inline void *irq_data_get_irq_chip_data(struct irq_data *d)
 static inline void *irq_get_handler_data(unsigned int irq)
 {
 	struct irq_data *d = irq_get_irq_data(irq);
-	return d ? d->handler_data : NULL;
+	return d ? d->common->handler_data : NULL;
 }
 
 static inline void *irq_data_get_irq_handler_data(struct irq_data *d)
 {
-	return d->handler_data;
+	return d->common->handler_data;
 }
 
 static inline struct msi_desc *irq_get_msi_desc(unsigned int irq)
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 1fc5304641a1..c7b3e1cc6d59 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -123,7 +123,7 @@ static inline void *irq_desc_get_chip_data(struct irq_desc *desc)
 
 static inline void *irq_desc_get_handler_data(struct irq_desc *desc)
 {
-	return desc->irq_data.handler_data;
+	return desc->irq_common_data.handler_data;
 }
 
 static inline struct msi_desc *irq_desc_get_msi_desc(struct irq_desc *desc)
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 6e40a9539763..a48e00e345d7 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -83,7 +83,7 @@ int irq_set_handler_data(unsigned int irq, void *data)
 
 	if (!desc)
 		return -EINVAL;
-	desc->irq_data.handler_data = data;
+	desc->irq_common_data.handler_data = data;
 	irq_put_desc_unlock(desc, flags);
 	return 0;
 }
@@ -796,7 +796,7 @@ irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle,
 		return;
 
 	__irq_do_set_handler(desc, handle, 1, NULL);
-	desc->irq_data.handler_data = data;
+	desc->irq_common_data.handler_data = data;
 
 	irq_put_desc_busunlock(desc, flags);
 }
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 7f3e9faa6e4d..594b3e349aac 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -72,11 +72,12 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
 {
 	int cpu;
 
+	desc->irq_common_data.handler_data = NULL;
+
 	desc->irq_data.common = &desc->irq_common_data;
 	desc->irq_data.irq = irq;
 	desc->irq_data.chip = &no_irq_chip;
 	desc->irq_data.chip_data = NULL;
-	desc->irq_data.handler_data = NULL;
 	desc->irq_data.msi_desc = NULL;
 	irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS);
 	irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED);
-- 
cgit v1.2.3


From 9df872faa7e1619e9278bec00ceaed2236533530 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Wed, 3 Jun 2015 11:47:50 +0800
Subject: genirq: Move field 'affinity' from irq_data into irq_common_data

Irq affinity mask is per-irq instead of per irqchip, so move it into
struct irq_common_data.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Kevin Cernekee <cernekee@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Link: http://lkml.kernel.org/r/1433303281-27688-1-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c |  4 +---
 include/linux/irq.h           | 12 ++++++------
 kernel/irq/irqdesc.c          |  9 +++++----
 kernel/irq/manage.c           | 12 ++++++------
 kernel/irq/proc.c             |  2 +-
 5 files changed, 19 insertions(+), 20 deletions(-)

(limited to 'kernel')

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 1bbd0fe2c806..836d11b92811 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -489,10 +489,8 @@ static int apic_set_affinity(struct irq_data *irq_data,
 
 	err = assign_irq_vector(irq, data, dest);
 	if (err) {
-		struct irq_data *top = irq_get_irq_data(irq);
-
 		if (assign_irq_vector(irq, data,
-				      irq_data_get_affinity_mask(top)))
+				      irq_data_get_affinity_mask(irq_data)))
 			pr_err("Failed to recover vector for irq %d\n", irq);
 		return err;
 	}
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 516aadbfc072..75d50544a18f 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -110,8 +110,8 @@ enum {
 /*
  * Return value for chip->irq_set_affinity()
  *
- * IRQ_SET_MASK_OK	- OK, core updates irq_data.affinity
- * IRQ_SET_MASK_NOCPY	- OK, chip did update irq_data.affinity
+ * IRQ_SET_MASK_OK	- OK, core updates irq_common_data.affinity
+ * IRQ_SET_MASK_NOCPY	- OK, chip did update irq_common_data.affinity
  * IRQ_SET_MASK_OK_DONE	- Same as IRQ_SET_MASK_OK for core. Special code to
  *			  support stacked irqchips, which indicates skipping
  *			  all descendent irqchips.
@@ -131,6 +131,7 @@ struct irq_domain;
  *			Use accessor functions to deal with it
  * @node:		node index useful for balancing
  * @handler_data:	per-IRQ data for the irq_chip methods
+ * @affinity:		IRQ affinity on SMP
  */
 struct irq_common_data {
 	unsigned int		state_use_accessors;
@@ -138,6 +139,7 @@ struct irq_common_data {
 	unsigned int		node;
 #endif
 	void			*handler_data;
+	cpumask_var_t		affinity;
 };
 
 /**
@@ -154,7 +156,6 @@ struct irq_common_data {
  * @chip_data:		platform-specific per-chip private data for the chip
  *			methods, to allow shared chip implementations
  * @msi_desc:		MSI descriptor
- * @affinity:		IRQ affinity on SMP
  */
 struct irq_data {
 	u32			mask;
@@ -168,7 +169,6 @@ struct irq_data {
 #endif
 	void			*chip_data;
 	struct msi_desc		*msi_desc;
-	cpumask_var_t		affinity;
 };
 
 /*
@@ -684,12 +684,12 @@ static inline struct cpumask *irq_get_affinity_mask(int irq)
 {
 	struct irq_data *d = irq_get_irq_data(irq);
 
-	return d ? d->affinity : NULL;
+	return d ? d->common->affinity : NULL;
 }
 
 static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d)
 {
-	return d->affinity;
+	return d->common->affinity;
 }
 
 unsigned int arch_dynirq_lower_bound(unsigned int from);
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 594b3e349aac..bb48a5c1964e 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -38,12 +38,13 @@ static void __init init_irq_default_affinity(void)
 #ifdef CONFIG_SMP
 static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node)
 {
-	if (!zalloc_cpumask_var_node(&desc->irq_data.affinity, gfp, node))
+	if (!zalloc_cpumask_var_node(&desc->irq_common_data.affinity,
+				     gfp, node))
 		return -ENOMEM;
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	if (!zalloc_cpumask_var_node(&desc->pending_mask, gfp, node)) {
-		free_cpumask_var(desc->irq_data.affinity);
+		free_cpumask_var(desc->irq_common_data.affinity);
 		return -ENOMEM;
 	}
 #endif
@@ -52,7 +53,7 @@ static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node)
 
 static void desc_smp_init(struct irq_desc *desc, int node)
 {
-	cpumask_copy(desc->irq_data.affinity, irq_default_affinity);
+	cpumask_copy(desc->irq_common_data.affinity, irq_default_affinity);
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	cpumask_clear(desc->pending_mask);
 #endif
@@ -124,7 +125,7 @@ static void free_masks(struct irq_desc *desc)
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	free_cpumask_var(desc->pending_mask);
 #endif
-	free_cpumask_var(desc->irq_data.affinity);
+	free_cpumask_var(desc->irq_common_data.affinity);
 }
 #else
 static inline void free_masks(struct irq_desc *desc) { }
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index ad1b064f94fe..f9a59f6cabd2 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -192,7 +192,7 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
 	switch (ret) {
 	case IRQ_SET_MASK_OK:
 	case IRQ_SET_MASK_OK_DONE:
-		cpumask_copy(data->affinity, mask);
+		cpumask_copy(desc->irq_common_data.affinity, mask);
 	case IRQ_SET_MASK_OK_NOCOPY:
 		irq_set_thread_affinity(desc);
 		ret = 0;
@@ -304,7 +304,7 @@ static void irq_affinity_notify(struct work_struct *work)
 	if (irq_move_pending(&desc->irq_data))
 		irq_get_pending(cpumask, desc);
 	else
-		cpumask_copy(cpumask, desc->irq_data.affinity);
+		cpumask_copy(cpumask, desc->irq_common_data.affinity);
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 
 	notify->notify(notify, cpumask);
@@ -375,9 +375,9 @@ static int setup_affinity(struct irq_desc *desc, struct cpumask *mask)
 	 * one of the targets is online.
 	 */
 	if (irqd_has_set(&desc->irq_data, IRQD_AFFINITY_SET)) {
-		if (cpumask_intersects(desc->irq_data.affinity,
+		if (cpumask_intersects(desc->irq_common_data.affinity,
 				       cpu_online_mask))
-			set = desc->irq_data.affinity;
+			set = desc->irq_common_data.affinity;
 		else
 			irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET);
 	}
@@ -829,8 +829,8 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action)
 	 * This code is triggered unconditionally. Check the affinity
 	 * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out.
 	 */
-	if (desc->irq_data.affinity)
-		cpumask_copy(mask, desc->irq_data.affinity);
+	if (desc->irq_common_data.affinity)
+		cpumask_copy(mask, desc->irq_common_data.affinity);
 	else
 		valid = false;
 	raw_spin_unlock_irq(&desc->lock);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 0e97c142ce40..e3a8c9577ba6 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -39,7 +39,7 @@ static struct proc_dir_entry *root_irq_dir;
 static int show_irq_affinity(int type, struct seq_file *m, void *v)
 {
 	struct irq_desc *desc = irq_to_desc((long)m->private);
-	const struct cpumask *mask = desc->irq_data.affinity;
+	const struct cpumask *mask = desc->irq_common_data.affinity;
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	if (irqd_is_setaffinity_pending(&desc->irq_data))
-- 
cgit v1.2.3


From b237721c5d95082a803c0be686f56d2dd1de995b Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Mon, 1 Jun 2015 16:05:43 +0800
Subject: genirq: Move field 'msi_desc' from irq_data into irq_common_data

MSI descriptors are per-irq instead of per irqchip, so move it into
struct irq_common_data.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Kevin Cernekee <cernekee@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Link: http://lkml.kernel.org/r/1433145945-789-35-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h     | 8 ++++----
 include/linux/irqdesc.h | 2 +-
 kernel/irq/chip.c       | 2 +-
 kernel/irq/irqdesc.c    | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 75d50544a18f..4913c32db942 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -132,6 +132,7 @@ struct irq_domain;
  * @node:		node index useful for balancing
  * @handler_data:	per-IRQ data for the irq_chip methods
  * @affinity:		IRQ affinity on SMP
+ * @msi_desc:		MSI descriptor
  */
 struct irq_common_data {
 	unsigned int		state_use_accessors;
@@ -139,6 +140,7 @@ struct irq_common_data {
 	unsigned int		node;
 #endif
 	void			*handler_data;
+	struct msi_desc		*msi_desc;
 	cpumask_var_t		affinity;
 };
 
@@ -155,7 +157,6 @@ struct irq_common_data {
  *			irq_domain
  * @chip_data:		platform-specific per-chip private data for the chip
  *			methods, to allow shared chip implementations
- * @msi_desc:		MSI descriptor
  */
 struct irq_data {
 	u32			mask;
@@ -168,7 +169,6 @@ struct irq_data {
 	struct irq_data		*parent_data;
 #endif
 	void			*chip_data;
-	struct msi_desc		*msi_desc;
 };
 
 /*
@@ -652,12 +652,12 @@ static inline void *irq_data_get_irq_handler_data(struct irq_data *d)
 static inline struct msi_desc *irq_get_msi_desc(unsigned int irq)
 {
 	struct irq_data *d = irq_get_irq_data(irq);
-	return d ? d->msi_desc : NULL;
+	return d ? d->common->msi_desc : NULL;
 }
 
 static inline struct msi_desc *irq_data_get_msi_desc(struct irq_data *d)
 {
-	return d->msi_desc;
+	return d->common->msi_desc;
 }
 
 static inline u32 irq_get_trigger_type(unsigned int irq)
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index c7b3e1cc6d59..fbb4d5afc32b 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -128,7 +128,7 @@ static inline void *irq_desc_get_handler_data(struct irq_desc *desc)
 
 static inline struct msi_desc *irq_desc_get_msi_desc(struct irq_desc *desc)
 {
-	return desc->irq_data.msi_desc;
+	return desc->irq_common_data.msi_desc;
 }
 
 /*
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index a48e00e345d7..8c55d545558f 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -105,7 +105,7 @@ int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset,
 
 	if (!desc)
 		return -EINVAL;
-	desc->irq_data.msi_desc = entry;
+	desc->irq_common_data.msi_desc = entry;
 	if (entry && !irq_offset)
 		entry->irq = irq_base;
 	irq_put_desc_unlock(desc, flags);
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index bb48a5c1964e..596669436f7a 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -74,12 +74,12 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
 	int cpu;
 
 	desc->irq_common_data.handler_data = NULL;
+	desc->irq_common_data.msi_desc = NULL;
 
 	desc->irq_data.common = &desc->irq_common_data;
 	desc->irq_data.irq = irq;
 	desc->irq_data.chip = &no_irq_chip;
 	desc->irq_data.chip_data = NULL;
-	desc->irq_data.msi_desc = NULL;
 	irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS);
 	irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED);
 	desc->handle_irq = handle_bad_irq;
-- 
cgit v1.2.3


From bd0b9ac405e1794d72533c3d487aa65b6b955a0c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 14 Sep 2015 10:42:37 +0200
Subject: genirq: Remove irq argument from irq flow handlers

Most interrupt flow handlers do not use the irq argument. Those few
which use it can retrieve the irq number from the irq descriptor.

Remove the argument.

Search and replace was done with coccinelle and some extra helper
scripts around it. Thanks to Julia for her help!

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Julia Lawall <Julia.Lawall@lip6.fr>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
---
 arch/alpha/kernel/irq.c                         |  2 +-
 arch/arc/kernel/mcip.c                          |  2 +-
 arch/arm/common/it8152.c                        |  2 +-
 arch/arm/common/locomo.c                        |  2 +-
 arch/arm/common/sa1111.c                        |  6 ++----
 arch/arm/include/asm/hardware/it8152.h          |  2 +-
 arch/arm/include/asm/mach/irq.h                 |  4 ++--
 arch/arm/mach-dove/irq.c                        |  6 +++---
 arch/arm/mach-footbridge/isa-irq.c              |  5 ++---
 arch/arm/mach-gemini/gpio.c                     |  2 +-
 arch/arm/mach-imx/3ds_debugboard.c              |  2 +-
 arch/arm/mach-imx/mach-mx31ads.c                |  2 +-
 arch/arm/mach-iop13xx/msi.c                     |  2 +-
 arch/arm/mach-lpc32xx/irq.c                     |  4 ++--
 arch/arm/mach-netx/generic.c                    |  3 +--
 arch/arm/mach-omap1/fpga.c                      |  2 +-
 arch/arm/mach-omap2/prm_common.c                |  2 +-
 arch/arm/mach-pxa/balloon3.c                    |  2 +-
 arch/arm/mach-pxa/cm-x2xx-pci.c                 |  5 ++---
 arch/arm/mach-pxa/lpd270.c                      |  2 +-
 arch/arm/mach-pxa/pcm990-baseboard.c            |  2 +-
 arch/arm/mach-pxa/viper.c                       |  2 +-
 arch/arm/mach-pxa/zeus.c                        |  2 +-
 arch/arm/mach-rpc/ecard.c                       |  3 +--
 arch/arm/mach-s3c24xx/bast-irq.c                |  4 +---
 arch/arm/mach-s3c64xx/common.c                  |  8 ++++----
 arch/arm/mach-sa1100/neponset.c                 |  2 +-
 arch/arm/plat-orion/gpio.c                      |  2 +-
 arch/avr32/mach-at32ap/extint.c                 |  2 +-
 arch/avr32/mach-at32ap/pio.c                    |  2 +-
 arch/blackfin/include/asm/irq_handler.h         |  4 ++--
 arch/blackfin/kernel/irqchip.c                  |  2 +-
 arch/blackfin/mach-bf537/ints-priority.c        | 10 +++------
 arch/blackfin/mach-common/ints-priority.c       |  5 ++---
 arch/c6x/platforms/megamod-pic.c                |  2 +-
 arch/m68k/amiga/amiints.c                       |  8 ++++----
 arch/m68k/coldfire/intc-5272.c                  |  6 ++----
 arch/m68k/include/asm/irq.h                     |  3 +--
 arch/m68k/include/asm/mac_via.h                 |  2 +-
 arch/m68k/mac/baboon.c                          |  2 +-
 arch/m68k/mac/oss.c                             |  4 ++--
 arch/m68k/mac/psc.c                             |  2 +-
 arch/m68k/mac/via.c                             |  6 +++---
 arch/metag/kernel/irq.c                         |  4 +---
 arch/mips/alchemy/common/irq.c                  |  4 ++--
 arch/mips/alchemy/devboards/bcsr.c              |  2 +-
 arch/mips/ath25/ar2315.c                        |  2 +-
 arch/mips/ath25/ar5312.c                        |  2 +-
 arch/mips/ath79/irq.c                           |  8 ++++----
 arch/mips/cavium-octeon/octeon-irq.c            |  2 +-
 arch/mips/include/asm/netlogic/common.h         |  4 ++--
 arch/mips/jz4740/gpio.c                         |  2 +-
 arch/mips/netlogic/common/smp.c                 |  4 ++--
 arch/mips/pci/pci-ar2315.c                      |  2 +-
 arch/mips/pci/pci-ar71xx.c                      |  2 +-
 arch/mips/pci/pci-ar724x.c                      |  2 +-
 arch/mips/pci/pci-rt3883.c                      |  2 +-
 arch/mips/ralink/irq.c                          |  2 +-
 arch/powerpc/include/asm/qe_ic.h                | 23 +++++++++------------
 arch/powerpc/include/asm/tsi108_pci.h           |  2 +-
 arch/powerpc/platforms/512x/mpc5121_ads_cpld.c  |  3 +--
 arch/powerpc/platforms/52xx/media5200.c         |  2 +-
 arch/powerpc/platforms/52xx/mpc52xx_gpt.c       |  2 +-
 arch/powerpc/platforms/82xx/pq2ads-pci-pic.c    |  2 +-
 arch/powerpc/platforms/85xx/common.c            |  2 +-
 arch/powerpc/platforms/85xx/mpc85xx_cds.c       |  6 ++----
 arch/powerpc/platforms/85xx/mpc85xx_ds.c        |  2 +-
 arch/powerpc/platforms/85xx/socrates_fpga_pic.c |  2 +-
 arch/powerpc/platforms/86xx/pic.c               |  2 +-
 arch/powerpc/platforms/8xx/m8xx_setup.c         |  2 +-
 arch/powerpc/platforms/cell/axon_msi.c          |  2 +-
 arch/powerpc/platforms/cell/interrupt.c         |  2 +-
 arch/powerpc/platforms/cell/spider-pic.c        |  2 +-
 arch/powerpc/platforms/chrp/setup.c             |  2 +-
 arch/powerpc/platforms/embedded6xx/hlwd-pic.c   |  3 +--
 arch/powerpc/platforms/embedded6xx/mvme5100.c   |  2 +-
 arch/powerpc/platforms/pseries/setup.c          |  2 +-
 arch/powerpc/sysdev/ge/ge_pic.c                 |  2 +-
 arch/powerpc/sysdev/ge/ge_pic.h                 |  2 --
 arch/powerpc/sysdev/mpic.c                      |  2 +-
 arch/powerpc/sysdev/qe_lib/qe_ic.c              |  4 ++--
 arch/powerpc/sysdev/tsi108_pci.c                |  2 +-
 arch/powerpc/sysdev/uic.c                       |  2 +-
 arch/powerpc/sysdev/xilinx_intc.c               |  2 +-
 arch/sh/boards/mach-se/7343/irq.c               |  2 +-
 arch/sh/boards/mach-se/7722/irq.c               |  2 +-
 arch/sh/boards/mach-se/7724/irq.c               |  2 +-
 arch/sh/boards/mach-x3proto/gpio.c              |  2 +-
 arch/sh/cchips/hd6446x/hd64461.c                |  2 +-
 arch/sparc/kernel/leon_kernel.c                 |  2 +-
 arch/sparc/kernel/leon_pci_grpci1.c             |  2 +-
 arch/sparc/kernel/leon_pci_grpci2.c             |  2 +-
 arch/tile/kernel/pci_gx.c                       |  5 ++---
 arch/unicore32/kernel/irq.c                     |  2 +-
 arch/x86/kernel/irq_32.c                        | 19 +++++++----------
 arch/x86/kernel/irq_64.c                        |  2 +-
 arch/x86/lguest/boot.c                          |  2 +-
 drivers/dma/ipu/ipu_irq.c                       |  2 +-
 drivers/gpio/gpio-altera.c                      |  6 ++----
 drivers/gpio/gpio-bcm-kona.c                    |  2 +-
 drivers/gpio/gpio-brcmstb.c                     |  2 +-
 drivers/gpio/gpio-davinci.c                     |  3 +--
 drivers/gpio/gpio-dwapb.c                       |  2 +-
 drivers/gpio/gpio-ep93xx.c                      |  5 ++---
 drivers/gpio/gpio-intel-mid.c                   |  2 +-
 drivers/gpio/gpio-lynxpoint.c                   |  2 +-
 drivers/gpio/gpio-mpc8xxx.c                     |  2 +-
 drivers/gpio/gpio-msic.c                        |  2 +-
 drivers/gpio/gpio-msm-v2.c                      |  2 +-
 drivers/gpio/gpio-mvebu.c                       |  2 +-
 drivers/gpio/gpio-mxc.c                         |  4 ++--
 drivers/gpio/gpio-mxs.c                         |  2 +-
 drivers/gpio/gpio-omap.c                        |  2 +-
 drivers/gpio/gpio-pl061.c                       |  2 +-
 drivers/gpio/gpio-pxa.c                         |  2 +-
 drivers/gpio/gpio-sa1100.c                      |  3 +--
 drivers/gpio/gpio-tegra.c                       |  2 +-
 drivers/gpio/gpio-timberdale.c                  |  2 +-
 drivers/gpio/gpio-tz1090.c                      |  4 ++--
 drivers/gpio/gpio-vf610.c                       |  2 +-
 drivers/gpio/gpio-zx.c                          |  2 +-
 drivers/gpio/gpio-zynq.c                        |  2 +-
 drivers/gpu/ipu-v3/ipu-common.c                 |  4 ++--
 drivers/irqchip/exynos-combiner.c               |  6 ++----
 drivers/irqchip/irq-armada-370-xp.c             |  3 +--
 drivers/irqchip/irq-bcm2835.c                   |  4 ++--
 drivers/irqchip/irq-bcm7038-l1.c                |  2 +-
 drivers/irqchip/irq-bcm7120-l2.c                |  2 +-
 drivers/irqchip/irq-brcmstb-l2.c                |  7 +++----
 drivers/irqchip/irq-dw-apb-ictl.c               |  2 +-
 drivers/irqchip/irq-gic.c                       |  4 ++--
 drivers/irqchip/irq-i8259.c                     |  2 +-
 drivers/irqchip/irq-imgpdc.c                    |  4 ++--
 drivers/irqchip/irq-keystone.c                  |  2 +-
 drivers/irqchip/irq-metag-ext.c                 |  2 +-
 drivers/irqchip/irq-metag.c                     |  2 +-
 drivers/irqchip/irq-mips-gic.c                  |  2 +-
 drivers/irqchip/irq-mmp.c                       |  2 +-
 drivers/irqchip/irq-orion.c                     |  2 +-
 drivers/irqchip/irq-s3c24xx.c                   |  2 +-
 drivers/irqchip/irq-sunxi-nmi.c                 |  2 +-
 drivers/irqchip/irq-tb10x.c                     |  2 +-
 drivers/irqchip/irq-versatile-fpga.c            |  8 ++++----
 drivers/irqchip/irq-vic.c                       |  2 +-
 drivers/irqchip/spear-shirq.c                   |  2 +-
 drivers/mfd/asic3.c                             |  2 +-
 drivers/mfd/ezx-pcap.c                          |  2 +-
 drivers/mfd/htc-egpio.c                         |  2 +-
 drivers/mfd/jz4740-adc.c                        |  2 +-
 drivers/mfd/pm8921-core.c                       |  2 +-
 drivers/mfd/t7l66xb.c                           |  2 +-
 drivers/mfd/tc6393xb.c                          |  3 +--
 drivers/mfd/ucb1x00-core.c                      |  2 +-
 drivers/pci/host/pci-keystone.c                 |  5 ++---
 drivers/pci/host/pci-xgene-msi.c                |  2 +-
 drivers/pinctrl/bcm/pinctrl-cygnus-gpio.c       |  2 +-
 drivers/pinctrl/intel/pinctrl-baytrail.c        |  2 +-
 drivers/pinctrl/intel/pinctrl-cherryview.c      |  2 +-
 drivers/pinctrl/intel/pinctrl-intel.c           |  2 +-
 drivers/pinctrl/mediatek/pinctrl-mtk-common.c   |  2 +-
 drivers/pinctrl/nomadik/pinctrl-nomadik.c       |  4 ++--
 drivers/pinctrl/pinctrl-adi2.c                  |  3 +--
 drivers/pinctrl/pinctrl-amd.c                   |  6 +++---
 drivers/pinctrl/pinctrl-at91.c                  |  2 +-
 drivers/pinctrl/pinctrl-coh901.c                |  2 +-
 drivers/pinctrl/pinctrl-pistachio.c             |  6 ++----
 drivers/pinctrl/pinctrl-rockchip.c              |  2 +-
 drivers/pinctrl/pinctrl-single.c                |  2 +-
 drivers/pinctrl/pinctrl-st.c                    |  4 ++--
 drivers/pinctrl/qcom/pinctrl-msm.c              |  5 ++---
 drivers/pinctrl/samsung/pinctrl-exynos.c        |  4 ++--
 drivers/pinctrl/samsung/pinctrl-s3c24xx.c       |  8 ++++----
 drivers/pinctrl/samsung/pinctrl-s3c64xx.c       | 10 ++++-----
 drivers/pinctrl/sirf/pinctrl-atlas7.c           |  4 ++--
 drivers/pinctrl/sirf/pinctrl-sirf.c             |  4 ++--
 drivers/pinctrl/spear/pinctrl-plgpio.c          |  2 +-
 drivers/pinctrl/sunxi/pinctrl-sunxi.c           |  2 +-
 drivers/sh/intc/core.c                          |  2 +-
 drivers/sh/intc/virq.c                          |  4 ++--
 drivers/soc/dove/pmu.c                          |  4 ++--
 drivers/spmi/spmi-pmic-arb.c                    |  2 +-
 include/linux/irq.h                             | 16 +++++++--------
 include/linux/irqdesc.h                         |  4 ++--
 include/linux/irqhandler.h                      |  2 +-
 kernel/irq/chip.c                               | 27 ++++++++-----------------
 kernel/irq/handle.c                             |  4 +++-
 kernel/irq/irqdesc.c                            |  2 +-
 kernel/irq/resend.c                             |  2 +-
 188 files changed, 281 insertions(+), 340 deletions(-)

(limited to 'kernel')

diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
index 2804648c8ff4..2d6efcff3bf3 100644
--- a/arch/alpha/kernel/irq.c
+++ b/arch/alpha/kernel/irq.c
@@ -117,6 +117,6 @@ handle_irq(int irq)
 	}
 
 	irq_enter();
-	generic_handle_irq_desc(irq, desc);
+	generic_handle_irq_desc(desc);
 	irq_exit();
 }
diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
index d9e44b62df05..4ffd1855f1bd 100644
--- a/arch/arc/kernel/mcip.c
+++ b/arch/arc/kernel/mcip.c
@@ -252,7 +252,7 @@ static struct irq_chip idu_irq_chip = {
 
 static int idu_first_irq;
 
-static void idu_cascade_isr(unsigned int __core_irq, struct irq_desc *desc)
+static void idu_cascade_isr(struct irq_desc *desc)
 {
 	struct irq_domain *domain = irq_desc_get_handler_data(desc);
 	unsigned int core_irq = irq_desc_get_irq(desc);
diff --git a/arch/arm/common/it8152.c b/arch/arm/common/it8152.c
index 96dabcb6c621..996aed3b4eee 100644
--- a/arch/arm/common/it8152.c
+++ b/arch/arm/common/it8152.c
@@ -95,7 +95,7 @@ void it8152_init_irq(void)
 	}
 }
 
-void it8152_irq_demux(unsigned int irq, struct irq_desc *desc)
+void it8152_irq_demux(struct irq_desc *desc)
 {
        int bits_pd, bits_lp, bits_ld;
        int i;
diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c
index 304adea4bc52..0e97b4b871f9 100644
--- a/arch/arm/common/locomo.c
+++ b/arch/arm/common/locomo.c
@@ -138,7 +138,7 @@ static struct locomo_dev_info locomo_devices[] = {
 	},
 };
 
-static void locomo_handler(unsigned int __irq, struct irq_desc *desc)
+static void locomo_handler(struct irq_desc *desc)
 {
 	struct locomo *lchip = irq_desc_get_chip_data(desc);
 	int req, i;
diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index 4f290250fa93..3d224941b541 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -196,10 +196,8 @@ static struct sa1111_dev_info sa1111_devices[] = {
  * active IRQs causes the interrupt output to pulse, the upper levels
  * will call us again if there are more interrupts to process.
  */
-static void
-sa1111_irq_handler(unsigned int __irq, struct irq_desc *desc)
+static void sa1111_irq_handler(struct irq_desc *desc)
 {
-	unsigned int irq = irq_desc_get_irq(desc);
 	unsigned int stat0, stat1, i;
 	struct sa1111 *sachip = irq_desc_get_handler_data(desc);
 	void __iomem *mapbase = sachip->base + SA1111_INTC;
@@ -214,7 +212,7 @@ sa1111_irq_handler(unsigned int __irq, struct irq_desc *desc)
 	sa1111_writel(stat1, mapbase + SA1111_INTSTATCLR1);
 
 	if (stat0 == 0 && stat1 == 0) {
-		do_bad_IRQ(irq, desc);
+		do_bad_IRQ(desc);
 		return;
 	}
 
diff --git a/arch/arm/include/asm/hardware/it8152.h b/arch/arm/include/asm/hardware/it8152.h
index d36a73d7c0e8..076777ff3daa 100644
--- a/arch/arm/include/asm/hardware/it8152.h
+++ b/arch/arm/include/asm/hardware/it8152.h
@@ -106,7 +106,7 @@ extern void __iomem *it8152_base_address;
 struct pci_dev;
 struct pci_sys_data;
 
-extern void it8152_irq_demux(unsigned int irq, struct irq_desc *desc);
+extern void it8152_irq_demux(struct irq_desc *desc);
 extern void it8152_init_irq(void);
 extern int it8152_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin);
 extern int it8152_pci_setup(int nr, struct pci_sys_data *sys);
diff --git a/arch/arm/include/asm/mach/irq.h b/arch/arm/include/asm/mach/irq.h
index 2092ee1e1300..de4634b51456 100644
--- a/arch/arm/include/asm/mach/irq.h
+++ b/arch/arm/include/asm/mach/irq.h
@@ -23,10 +23,10 @@ extern int show_fiq_list(struct seq_file *, int);
 /*
  * This is for easy migration, but should be changed in the source
  */
-#define do_bad_IRQ(irq,desc)				\
+#define do_bad_IRQ(desc)				\
 do {							\
 	raw_spin_lock(&desc->lock);			\
-	handle_bad_irq(irq, desc);			\
+	handle_bad_irq(desc);				\
 	raw_spin_unlock(&desc->lock);			\
 } while(0)
 
diff --git a/arch/arm/mach-dove/irq.c b/arch/arm/mach-dove/irq.c
index 305d7c6242bb..bfb3703357c5 100644
--- a/arch/arm/mach-dove/irq.c
+++ b/arch/arm/mach-dove/irq.c
@@ -69,14 +69,14 @@ static struct irq_chip pmu_irq_chip = {
 	.irq_ack	= pmu_irq_ack,
 };
 
-static void pmu_irq_handler(unsigned int __irq, struct irq_desc *desc)
+static void pmu_irq_handler(struct irq_desc *desc)
 {
-	unsigned int irq = irq_desc_get_irq(desc);
 	unsigned long cause = readl(PMU_INTERRUPT_CAUSE);
+	unsigned int irq;
 
 	cause &= readl(PMU_INTERRUPT_MASK);
 	if (cause == 0) {
-		do_bad_IRQ(irq, desc);
+		do_bad_IRQ(desc);
 		return;
 	}
 
diff --git a/arch/arm/mach-footbridge/isa-irq.c b/arch/arm/mach-footbridge/isa-irq.c
index fcd79bc3a3e1..c01fca11b224 100644
--- a/arch/arm/mach-footbridge/isa-irq.c
+++ b/arch/arm/mach-footbridge/isa-irq.c
@@ -87,13 +87,12 @@ static struct irq_chip isa_hi_chip = {
 	.irq_unmask	= isa_unmask_pic_hi_irq,
 };
 
-static void
-isa_irq_handler(unsigned int irq, struct irq_desc *desc)
+static void isa_irq_handler(struct irq_desc *desc)
 {
 	unsigned int isa_irq = *(unsigned char *)PCIIACK_BASE;
 
 	if (isa_irq < _ISA_IRQ(0) || isa_irq >= _ISA_IRQ(16)) {
-		do_bad_IRQ(isa_irq, desc);
+		do_bad_IRQ(desc);
 		return;
 	}
 
diff --git a/arch/arm/mach-gemini/gpio.c b/arch/arm/mach-gemini/gpio.c
index 220333ed741d..2478d9f4d92d 100644
--- a/arch/arm/mach-gemini/gpio.c
+++ b/arch/arm/mach-gemini/gpio.c
@@ -126,7 +126,7 @@ static int gpio_set_irq_type(struct irq_data *d, unsigned int type)
 	return 0;
 }
 
-static void gpio_irq_handler(unsigned int irq, struct irq_desc *desc)
+static void gpio_irq_handler(struct irq_desc *desc)
 {
 	unsigned int port = (unsigned int)irq_desc_get_handler_data(desc);
 	unsigned int gpio_irq_no, irq_stat;
diff --git a/arch/arm/mach-imx/3ds_debugboard.c b/arch/arm/mach-imx/3ds_debugboard.c
index 45903be6e7b3..16496a071ecb 100644
--- a/arch/arm/mach-imx/3ds_debugboard.c
+++ b/arch/arm/mach-imx/3ds_debugboard.c
@@ -85,7 +85,7 @@ static struct platform_device smsc_lan9217_device = {
 	.resource = smsc911x_resources,
 };
 
-static void mxc_expio_irq_handler(u32 irq, struct irq_desc *desc)
+static void mxc_expio_irq_handler(struct irq_desc *desc)
 {
 	u32 imr_val;
 	u32 int_valid;
diff --git a/arch/arm/mach-imx/mach-mx31ads.c b/arch/arm/mach-imx/mach-mx31ads.c
index 2c0853560bd2..2b147e4bf9c9 100644
--- a/arch/arm/mach-imx/mach-mx31ads.c
+++ b/arch/arm/mach-imx/mach-mx31ads.c
@@ -154,7 +154,7 @@ static inline void mxc_init_imx_uart(void)
 	imx31_add_imx_uart0(&uart_pdata);
 }
 
-static void mx31ads_expio_irq_handler(u32 irq, struct irq_desc *desc)
+static void mx31ads_expio_irq_handler(struct irq_desc *desc)
 {
 	u32 imr_val;
 	u32 int_valid;
diff --git a/arch/arm/mach-iop13xx/msi.c b/arch/arm/mach-iop13xx/msi.c
index 9f89e76dfbb9..f6235b28578c 100644
--- a/arch/arm/mach-iop13xx/msi.c
+++ b/arch/arm/mach-iop13xx/msi.c
@@ -91,7 +91,7 @@ static void (*write_imipr[])(u32) = {
 	write_imipr_3,
 };
 
-static void iop13xx_msi_handler(unsigned int irq, struct irq_desc *desc)
+static void iop13xx_msi_handler(struct irq_desc *desc)
 {
 	int i, j;
 	unsigned long status;
diff --git a/arch/arm/mach-lpc32xx/irq.c b/arch/arm/mach-lpc32xx/irq.c
index cce4cef12b6e..2ae431e8bc1b 100644
--- a/arch/arm/mach-lpc32xx/irq.c
+++ b/arch/arm/mach-lpc32xx/irq.c
@@ -370,7 +370,7 @@ static struct irq_chip lpc32xx_irq_chip = {
 	.irq_set_wake = lpc32xx_irq_wake
 };
 
-static void lpc32xx_sic1_handler(unsigned int irq, struct irq_desc *desc)
+static void lpc32xx_sic1_handler(struct irq_desc *desc)
 {
 	unsigned long ints = __raw_readl(LPC32XX_INTC_STAT(LPC32XX_SIC1_BASE));
 
@@ -383,7 +383,7 @@ static void lpc32xx_sic1_handler(unsigned int irq, struct irq_desc *desc)
 	}
 }
 
-static void lpc32xx_sic2_handler(unsigned int irq, struct irq_desc *desc)
+static void lpc32xx_sic2_handler(struct irq_desc *desc)
 {
 	unsigned long ints = __raw_readl(LPC32XX_INTC_STAT(LPC32XX_SIC2_BASE));
 
diff --git a/arch/arm/mach-netx/generic.c b/arch/arm/mach-netx/generic.c
index 6373e2bff203..842302df99c1 100644
--- a/arch/arm/mach-netx/generic.c
+++ b/arch/arm/mach-netx/generic.c
@@ -69,8 +69,7 @@ static struct platform_device *devices[] __initdata = {
 #define DEBUG_IRQ(fmt...)	while (0) {}
 #endif
 
-static void
-netx_hif_demux_handler(unsigned int irq_unused, struct irq_desc *desc)
+static void netx_hif_demux_handler(struct irq_desc *desc)
 {
 	unsigned int irq = NETX_IRQ_HIF_CHAINED(0);
 	unsigned int stat;
diff --git a/arch/arm/mach-omap1/fpga.c b/arch/arm/mach-omap1/fpga.c
index dfec671b1639..39e20d0ead08 100644
--- a/arch/arm/mach-omap1/fpga.c
+++ b/arch/arm/mach-omap1/fpga.c
@@ -87,7 +87,7 @@ static void fpga_mask_ack_irq(struct irq_data *d)
 	fpga_ack_irq(d);
 }
 
-static void innovator_fpga_IRQ_demux(unsigned int irq, struct irq_desc *desc)
+static void innovator_fpga_IRQ_demux(struct irq_desc *desc)
 {
 	u32 stat;
 	int fpga_irq;
diff --git a/arch/arm/mach-omap2/prm_common.c b/arch/arm/mach-omap2/prm_common.c
index 257e98c26618..3fc2cbe52113 100644
--- a/arch/arm/mach-omap2/prm_common.c
+++ b/arch/arm/mach-omap2/prm_common.c
@@ -102,7 +102,7 @@ static void omap_prcm_events_filter_priority(unsigned long *events,
  * dispatched accordingly. Clearing of the wakeup events should be
  * done by the SoC specific individual handlers.
  */
-static void omap_prcm_irq_handler(unsigned int irq, struct irq_desc *desc)
+static void omap_prcm_irq_handler(struct irq_desc *desc)
 {
 	unsigned long pending[OMAP_PRCM_MAX_NR_PENDING_REG];
 	unsigned long priority_pending[OMAP_PRCM_MAX_NR_PENDING_REG];
diff --git a/arch/arm/mach-pxa/balloon3.c b/arch/arm/mach-pxa/balloon3.c
index 70366b35d299..a3ebb517cca1 100644
--- a/arch/arm/mach-pxa/balloon3.c
+++ b/arch/arm/mach-pxa/balloon3.c
@@ -496,7 +496,7 @@ static struct irq_chip balloon3_irq_chip = {
 	.irq_unmask	= balloon3_unmask_irq,
 };
 
-static void balloon3_irq_handler(unsigned int __irq, struct irq_desc *desc)
+static void balloon3_irq_handler(struct irq_desc *desc)
 {
 	unsigned long pending = __raw_readl(BALLOON3_INT_CONTROL_REG) &
 					balloon3_irq_enabled;
diff --git a/arch/arm/mach-pxa/cm-x2xx-pci.c b/arch/arm/mach-pxa/cm-x2xx-pci.c
index 1fa79f1f832d..3221ae15bef7 100644
--- a/arch/arm/mach-pxa/cm-x2xx-pci.c
+++ b/arch/arm/mach-pxa/cm-x2xx-pci.c
@@ -29,13 +29,12 @@
 void __iomem *it8152_base_address;
 static int cmx2xx_it8152_irq_gpio;
 
-static void cmx2xx_it8152_irq_demux(unsigned int __irq, struct irq_desc *desc)
+static void cmx2xx_it8152_irq_demux(struct irq_desc *desc)
 {
-	unsigned int irq = irq_desc_get_irq(desc);
 	/* clear our parent irq */
 	desc->irq_data.chip->irq_ack(&desc->irq_data);
 
-	it8152_irq_demux(irq, desc);
+	it8152_irq_demux(desc);
 }
 
 void __cmx2xx_pci_init_irq(int irq_gpio)
diff --git a/arch/arm/mach-pxa/lpd270.c b/arch/arm/mach-pxa/lpd270.c
index b070167deef2..4823d972e647 100644
--- a/arch/arm/mach-pxa/lpd270.c
+++ b/arch/arm/mach-pxa/lpd270.c
@@ -120,7 +120,7 @@ static struct irq_chip lpd270_irq_chip = {
 	.irq_unmask	= lpd270_unmask_irq,
 };
 
-static void lpd270_irq_handler(unsigned int __irq, struct irq_desc *desc)
+static void lpd270_irq_handler(struct irq_desc *desc)
 {
 	unsigned int irq;
 	unsigned long pending;
diff --git a/arch/arm/mach-pxa/pcm990-baseboard.c b/arch/arm/mach-pxa/pcm990-baseboard.c
index 9a0c8affdadb..d8319b54299a 100644
--- a/arch/arm/mach-pxa/pcm990-baseboard.c
+++ b/arch/arm/mach-pxa/pcm990-baseboard.c
@@ -284,7 +284,7 @@ static struct irq_chip pcm990_irq_chip = {
 	.irq_unmask	= pcm990_unmask_irq,
 };
 
-static void pcm990_irq_handler(unsigned int __irq, struct irq_desc *desc)
+static void pcm990_irq_handler(struct irq_desc *desc)
 {
 	unsigned int irq;
 	unsigned long pending;
diff --git a/arch/arm/mach-pxa/viper.c b/arch/arm/mach-pxa/viper.c
index 4841d6cefe76..8ab26370107e 100644
--- a/arch/arm/mach-pxa/viper.c
+++ b/arch/arm/mach-pxa/viper.c
@@ -276,7 +276,7 @@ static inline unsigned long viper_irq_pending(void)
 			viper_irq_enabled_mask;
 }
 
-static void viper_irq_handler(unsigned int __irq, struct irq_desc *desc)
+static void viper_irq_handler(struct irq_desc *desc)
 {
 	unsigned int irq;
 	unsigned long pending;
diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c
index 6f94dd7b4dee..30e62a3f0701 100644
--- a/arch/arm/mach-pxa/zeus.c
+++ b/arch/arm/mach-pxa/zeus.c
@@ -105,7 +105,7 @@ static inline unsigned long zeus_irq_pending(void)
 	return __raw_readw(ZEUS_CPLD_ISA_IRQ) & zeus_irq_enabled_mask;
 }
 
-static void zeus_irq_handler(unsigned int __irq, struct irq_desc *desc)
+static void zeus_irq_handler(struct irq_desc *desc)
 {
 	unsigned int irq;
 	unsigned long pending;
diff --git a/arch/arm/mach-rpc/ecard.c b/arch/arm/mach-rpc/ecard.c
index f726d4c4e6dd..dc67a7fb3831 100644
--- a/arch/arm/mach-rpc/ecard.c
+++ b/arch/arm/mach-rpc/ecard.c
@@ -551,8 +551,7 @@ static void ecard_check_lockup(struct irq_desc *desc)
 	}
 }
 
-static void
-ecard_irq_handler(unsigned int irq, struct irq_desc *desc)
+static void ecard_irq_handler(struct irq_desc *desc)
 {
 	ecard_t *ec;
 	int called = 0;
diff --git a/arch/arm/mach-s3c24xx/bast-irq.c b/arch/arm/mach-s3c24xx/bast-irq.c
index ced1ab86ac83..2bb08961e934 100644
--- a/arch/arm/mach-s3c24xx/bast-irq.c
+++ b/arch/arm/mach-s3c24xx/bast-irq.c
@@ -100,9 +100,7 @@ static struct irq_chip  bast_pc104_chip = {
 	.irq_ack	= bast_pc104_maskack
 };
 
-static void
-bast_irq_pc104_demux(unsigned int irq,
-		     struct irq_desc *desc)
+static void bast_irq_pc104_demux(struct irq_desc *desc)
 {
 	unsigned int stat;
 	unsigned int irqno;
diff --git a/arch/arm/mach-s3c64xx/common.c b/arch/arm/mach-s3c64xx/common.c
index fd63ecfb2f81..ddb30b8434c5 100644
--- a/arch/arm/mach-s3c64xx/common.c
+++ b/arch/arm/mach-s3c64xx/common.c
@@ -388,22 +388,22 @@ static inline void s3c_irq_demux_eint(unsigned int start, unsigned int end)
 	}
 }
 
-static void s3c_irq_demux_eint0_3(unsigned int irq, struct irq_desc *desc)
+static void s3c_irq_demux_eint0_3(struct irq_desc *desc)
 {
 	s3c_irq_demux_eint(0, 3);
 }
 
-static void s3c_irq_demux_eint4_11(unsigned int irq, struct irq_desc *desc)
+static void s3c_irq_demux_eint4_11(struct irq_desc *desc)
 {
 	s3c_irq_demux_eint(4, 11);
 }
 
-static void s3c_irq_demux_eint12_19(unsigned int irq, struct irq_desc *desc)
+static void s3c_irq_demux_eint12_19(struct irq_desc *desc)
 {
 	s3c_irq_demux_eint(12, 19);
 }
 
-static void s3c_irq_demux_eint20_27(unsigned int irq, struct irq_desc *desc)
+static void s3c_irq_demux_eint20_27(struct irq_desc *desc)
 {
 	s3c_irq_demux_eint(20, 27);
 }
diff --git a/arch/arm/mach-sa1100/neponset.c b/arch/arm/mach-sa1100/neponset.c
index 6d237b4f7a8e..8411985af9ff 100644
--- a/arch/arm/mach-sa1100/neponset.c
+++ b/arch/arm/mach-sa1100/neponset.c
@@ -166,7 +166,7 @@ static struct sa1100_port_fns neponset_port_fns = {
  * ensure that the IRQ signal is deasserted before returning.  This
  * is rather unfortunate.
  */
-static void neponset_irq_handler(unsigned int irq, struct irq_desc *desc)
+static void neponset_irq_handler(struct irq_desc *desc)
 {
 	struct neponset_drvdata *d = irq_desc_get_handler_data(desc);
 	unsigned int irr;
diff --git a/arch/arm/plat-orion/gpio.c b/arch/arm/plat-orion/gpio.c
index 79c33eca09a3..7bd22d8e5b11 100644
--- a/arch/arm/plat-orion/gpio.c
+++ b/arch/arm/plat-orion/gpio.c
@@ -407,7 +407,7 @@ static int gpio_irq_set_type(struct irq_data *d, u32 type)
 	return 0;
 }
 
-static void gpio_irq_handler(unsigned __irq, struct irq_desc *desc)
+static void gpio_irq_handler(struct irq_desc *desc)
 {
 	struct orion_gpio_chip *ochip = irq_desc_get_handler_data(desc);
 	u32 cause, type;
diff --git a/arch/avr32/mach-at32ap/extint.c b/arch/avr32/mach-at32ap/extint.c
index d51ff8f1c541..96cabad68489 100644
--- a/arch/avr32/mach-at32ap/extint.c
+++ b/arch/avr32/mach-at32ap/extint.c
@@ -144,7 +144,7 @@ static struct irq_chip eic_chip = {
 	.irq_set_type	= eic_set_irq_type,
 };
 
-static void demux_eic_irq(unsigned int irq, struct irq_desc *desc)
+static void demux_eic_irq(struct irq_desc *desc)
 {
 	struct eic *eic = irq_desc_get_handler_data(desc);
 	unsigned long status, pending;
diff --git a/arch/avr32/mach-at32ap/pio.c b/arch/avr32/mach-at32ap/pio.c
index 157a5e0e789f..4f61378c3453 100644
--- a/arch/avr32/mach-at32ap/pio.c
+++ b/arch/avr32/mach-at32ap/pio.c
@@ -281,7 +281,7 @@ static struct irq_chip gpio_irqchip = {
 	.irq_set_type	= gpio_irq_type,
 };
 
-static void gpio_irq_handler(unsigned irq, struct irq_desc *desc)
+static void gpio_irq_handler(struct irq_desc *desc)
 {
 	struct pio_device	*pio = irq_desc_get_chip_data(desc);
 	unsigned		gpio_irq;
diff --git a/arch/blackfin/include/asm/irq_handler.h b/arch/blackfin/include/asm/irq_handler.h
index 4b2a992794d7..d2f90c72378e 100644
--- a/arch/blackfin/include/asm/irq_handler.h
+++ b/arch/blackfin/include/asm/irq_handler.h
@@ -60,7 +60,7 @@ extern void bfin_internal_mask_irq(unsigned int irq);
 extern void bfin_internal_unmask_irq(unsigned int irq);
 
 struct irq_desc;
-extern void bfin_demux_mac_status_irq(unsigned int, struct irq_desc *);
-extern void bfin_demux_gpio_irq(unsigned int, struct irq_desc *);
+extern void bfin_demux_mac_status_irq(struct irq_desc *);
+extern void bfin_demux_gpio_irq(struct irq_desc *);
 
 #endif
diff --git a/arch/blackfin/kernel/irqchip.c b/arch/blackfin/kernel/irqchip.c
index 0ba25764b8c0..052cde5ed2e4 100644
--- a/arch/blackfin/kernel/irqchip.c
+++ b/arch/blackfin/kernel/irqchip.c
@@ -107,7 +107,7 @@ asmlinkage void asm_do_IRQ(unsigned int irq, struct pt_regs *regs)
 	 * than crashing, do something sensible.
 	 */
 	if (irq >= NR_IRQS)
-		handle_bad_irq(irq, &bad_irq_desc);
+		handle_bad_irq(&bad_irq_desc);
 	else
 		generic_handle_irq(irq);
 
diff --git a/arch/blackfin/mach-bf537/ints-priority.c b/arch/blackfin/mach-bf537/ints-priority.c
index 14b2f74554dc..a48baae4384d 100644
--- a/arch/blackfin/mach-bf537/ints-priority.c
+++ b/arch/blackfin/mach-bf537/ints-priority.c
@@ -89,8 +89,7 @@ static struct irq_chip bf537_generic_error_irqchip = {
 	.irq_unmask = bf537_generic_error_unmask_irq,
 };
 
-static void bf537_demux_error_irq(unsigned int int_err_irq,
-				  struct irq_desc *inta_desc)
+static void bf537_demux_error_irq(struct irq_desc *inta_desc)
 {
 	int irq = 0;
 
@@ -182,15 +181,12 @@ static struct irq_chip bf537_mac_rx_irqchip = {
 	.irq_unmask = bf537_mac_rx_unmask_irq,
 };
 
-static void bf537_demux_mac_rx_irq(unsigned int __int_irq,
-				   struct irq_desc *desc)
+static void bf537_demux_mac_rx_irq(struct irq_desc *desc)
 {
-	unsigned int int_irq = irq_desc_get_irq(desc);
-
 	if (bfin_read_DMA1_IRQ_STATUS() & (DMA_DONE | DMA_ERR))
 		bfin_handle_irq(IRQ_MAC_RX);
 	else
-		bfin_demux_gpio_irq(int_irq, desc);
+		bfin_demux_gpio_irq(desc);
 }
 #endif
 
diff --git a/arch/blackfin/mach-common/ints-priority.c b/arch/blackfin/mach-common/ints-priority.c
index a6d1b03cdf36..e8d4d748d0fd 100644
--- a/arch/blackfin/mach-common/ints-priority.c
+++ b/arch/blackfin/mach-common/ints-priority.c
@@ -656,8 +656,7 @@ static struct irq_chip bfin_mac_status_irqchip = {
 	.irq_set_wake = bfin_mac_status_set_wake,
 };
 
-void bfin_demux_mac_status_irq(unsigned int int_err_irq,
-			       struct irq_desc *inta_desc)
+void bfin_demux_mac_status_irq(struct irq_desc *inta_desc)
 {
 	int i, irq = 0;
 	u32 status = bfin_read_EMAC_SYSTAT();
@@ -825,7 +824,7 @@ static void bfin_demux_gpio_block(unsigned int irq)
 	}
 }
 
-void bfin_demux_gpio_irq(unsigned int __inta_irq, struct irq_desc *desc)
+void bfin_demux_gpio_irq(struct irq_desc *desc)
 {
 	unsigned int inta_irq = irq_desc_get_irq(desc);
 	unsigned int irq;
diff --git a/arch/c6x/platforms/megamod-pic.c b/arch/c6x/platforms/megamod-pic.c
index d487698e978a..ddcb45d7dfa7 100644
--- a/arch/c6x/platforms/megamod-pic.c
+++ b/arch/c6x/platforms/megamod-pic.c
@@ -93,7 +93,7 @@ static struct irq_chip megamod_chip = {
 	.irq_unmask	= unmask_megamod,
 };
 
-static void megamod_irq_cascade(unsigned int __irq, struct irq_desc *desc)
+static void megamod_irq_cascade(struct irq_desc *desc)
 {
 	struct megamod_cascade_data *cascade;
 	struct megamod_pic *pic;
diff --git a/arch/m68k/amiga/amiints.c b/arch/m68k/amiga/amiints.c
index 47b5f90002ab..7ff739e94896 100644
--- a/arch/m68k/amiga/amiints.c
+++ b/arch/m68k/amiga/amiints.c
@@ -46,7 +46,7 @@ static struct irq_chip amiga_irq_chip = {
  * The builtin Amiga hardware interrupt handlers.
  */
 
-static void ami_int1(unsigned int irq, struct irq_desc *desc)
+static void ami_int1(struct irq_desc *desc)
 {
 	unsigned short ints = amiga_custom.intreqr & amiga_custom.intenar;
 
@@ -69,7 +69,7 @@ static void ami_int1(unsigned int irq, struct irq_desc *desc)
 	}
 }
 
-static void ami_int3(unsigned int irq, struct irq_desc *desc)
+static void ami_int3(struct irq_desc *desc)
 {
 	unsigned short ints = amiga_custom.intreqr & amiga_custom.intenar;
 
@@ -92,7 +92,7 @@ static void ami_int3(unsigned int irq, struct irq_desc *desc)
 	}
 }
 
-static void ami_int4(unsigned int irq, struct irq_desc *desc)
+static void ami_int4(struct irq_desc *desc)
 {
 	unsigned short ints = amiga_custom.intreqr & amiga_custom.intenar;
 
@@ -121,7 +121,7 @@ static void ami_int4(unsigned int irq, struct irq_desc *desc)
 	}
 }
 
-static void ami_int5(unsigned int irq, struct irq_desc *desc)
+static void ami_int5(struct irq_desc *desc)
 {
 	unsigned short ints = amiga_custom.intreqr & amiga_custom.intenar;
 
diff --git a/arch/m68k/coldfire/intc-5272.c b/arch/m68k/coldfire/intc-5272.c
index 47371de60427..b0a19e207a63 100644
--- a/arch/m68k/coldfire/intc-5272.c
+++ b/arch/m68k/coldfire/intc-5272.c
@@ -143,12 +143,10 @@ static int intc_irq_set_type(struct irq_data *d, unsigned int type)
  * We need to be careful with the masking/acking due to the side effects
  * of masking an interrupt.
  */
-static void intc_external_irq(unsigned int __irq, struct irq_desc *desc)
+static void intc_external_irq(struct irq_desc *desc)
 {
-	unsigned int irq = irq_desc_get_irq(desc);
-
 	irq_desc_get_chip(desc)->irq_ack(&desc->irq_data);
-	handle_simple_irq(irq, desc);
+	handle_simple_irq(desc);
 }
 
 static struct irq_chip intc_irq_chip = {
diff --git a/arch/m68k/include/asm/irq.h b/arch/m68k/include/asm/irq.h
index 81ca118d58af..a644f4a53b94 100644
--- a/arch/m68k/include/asm/irq.h
+++ b/arch/m68k/include/asm/irq.h
@@ -64,8 +64,7 @@ extern void m68k_setup_auto_interrupt(void (*handler)(unsigned int,
 						      struct pt_regs *));
 extern void m68k_setup_user_interrupt(unsigned int vec, unsigned int cnt);
 extern void m68k_setup_irq_controller(struct irq_chip *,
-				      void (*handle)(unsigned int irq,
-						     struct irq_desc *desc),
+				      void (*handle)(struct irq_desc *desc),
 				      unsigned int irq, unsigned int cnt);
 
 extern unsigned int irq_canonicalize(unsigned int irq);
diff --git a/arch/m68k/include/asm/mac_via.h b/arch/m68k/include/asm/mac_via.h
index fe3fc9ae1b69..53c632c85b03 100644
--- a/arch/m68k/include/asm/mac_via.h
+++ b/arch/m68k/include/asm/mac_via.h
@@ -261,7 +261,7 @@ extern void via_irq_enable(int);
 extern void via_irq_disable(int);
 extern void via_nubus_irq_startup(int irq);
 extern void via_nubus_irq_shutdown(int irq);
-extern void via1_irq(unsigned int irq, struct irq_desc *desc);
+extern void via1_irq(struct irq_desc *desc);
 extern void via1_set_head(int);
 extern int via2_scsi_drq_pending(void);
 
diff --git a/arch/m68k/mac/baboon.c b/arch/m68k/mac/baboon.c
index 3fe0e43d44f6..f6f7d42713ec 100644
--- a/arch/m68k/mac/baboon.c
+++ b/arch/m68k/mac/baboon.c
@@ -45,7 +45,7 @@ void __init baboon_init(void)
  * Baboon interrupt handler. This works a lot like a VIA.
  */
 
-static void baboon_irq(unsigned int irq, struct irq_desc *desc)
+static void baboon_irq(struct irq_desc *desc)
 {
 	int irq_bit, irq_num;
 	unsigned char events;
diff --git a/arch/m68k/mac/oss.c b/arch/m68k/mac/oss.c
index 191610d97689..55d6592783f5 100644
--- a/arch/m68k/mac/oss.c
+++ b/arch/m68k/mac/oss.c
@@ -63,7 +63,7 @@ void __init oss_nubus_init(void)
  * Handle miscellaneous OSS interrupts.
  */
 
-static void oss_irq(unsigned int __irq, struct irq_desc *desc)
+static void oss_irq(struct irq_desc *desc)
 {
 	int events = oss->irq_pending &
 		(OSS_IP_IOPSCC | OSS_IP_SCSI | OSS_IP_IOPISM);
@@ -99,7 +99,7 @@ static void oss_irq(unsigned int __irq, struct irq_desc *desc)
  * Unlike the VIA/RBV this is on its own autovector interrupt level.
  */
 
-static void oss_nubus_irq(unsigned int irq, struct irq_desc *desc)
+static void oss_nubus_irq(struct irq_desc *desc)
 {
 	int events, irq_bit, i;
 
diff --git a/arch/m68k/mac/psc.c b/arch/m68k/mac/psc.c
index 3b9e302e7a37..cd38f29955c8 100644
--- a/arch/m68k/mac/psc.c
+++ b/arch/m68k/mac/psc.c
@@ -113,7 +113,7 @@ void __init psc_init(void)
  * PSC interrupt handler. It's a lot like the VIA interrupt handler.
  */
 
-static void psc_irq(unsigned int __irq, struct irq_desc *desc)
+static void psc_irq(struct irq_desc *desc)
 {
 	unsigned int offset = (unsigned int)irq_desc_get_handler_data(desc);
 	unsigned int irq = irq_desc_get_irq(desc);
diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c
index e198dec868e4..ce56e04386e7 100644
--- a/arch/m68k/mac/via.c
+++ b/arch/m68k/mac/via.c
@@ -446,7 +446,7 @@ void via_nubus_irq_shutdown(int irq)
  * via6522.c :-), disable/pending masks added.
  */
 
-void via1_irq(unsigned int irq, struct irq_desc *desc)
+void via1_irq(struct irq_desc *desc)
 {
 	int irq_num;
 	unsigned char irq_bit, events;
@@ -467,7 +467,7 @@ void via1_irq(unsigned int irq, struct irq_desc *desc)
 	} while (events >= irq_bit);
 }
 
-static void via2_irq(unsigned int irq, struct irq_desc *desc)
+static void via2_irq(struct irq_desc *desc)
 {
 	int irq_num;
 	unsigned char irq_bit, events;
@@ -493,7 +493,7 @@ static void via2_irq(unsigned int irq, struct irq_desc *desc)
  * VIA2 dispatcher as a fast interrupt handler.
  */
 
-void via_nubus_irq(unsigned int irq, struct irq_desc *desc)
+static void via_nubus_irq(struct irq_desc *desc)
 {
 	int slot_irq;
 	unsigned char slot_bit, events;
diff --git a/arch/metag/kernel/irq.c b/arch/metag/kernel/irq.c
index a336094a7a6c..3074b64793e6 100644
--- a/arch/metag/kernel/irq.c
+++ b/arch/metag/kernel/irq.c
@@ -94,13 +94,11 @@ void do_IRQ(int irq, struct pt_regs *regs)
 			"MOV   D0.5,%0\n"
 			"MOV   D1Ar1,%1\n"
 			"MOV   D1RtP,%2\n"
-			"MOV   D0Ar2,%3\n"
 			"SWAP  A0StP,D0.5\n"
 			"SWAP  PC,D1RtP\n"
 			"MOV   A0StP,D0.5\n"
 			:
-			: "r" (isp), "r" (irq), "r" (desc->handle_irq),
-			  "r" (desc)
+			: "r" (isp), "r" (desc), "r" (desc->handle_irq)
 			: "memory", "cc", "D1Ar1", "D0Ar2", "D1Ar3", "D0Ar4",
 			  "D1Ar5", "D0Ar6", "D0Re0", "D1Re0", "D0.4", "D1RtP",
 			  "D0.5"
diff --git a/arch/mips/alchemy/common/irq.c b/arch/mips/alchemy/common/irq.c
index 4c496c50edf6..da9f9220048f 100644
--- a/arch/mips/alchemy/common/irq.c
+++ b/arch/mips/alchemy/common/irq.c
@@ -851,7 +851,7 @@ static struct syscore_ops alchemy_gpic_pmops = {
 
 /* create chained handlers for the 4 IC requests to the MIPS IRQ ctrl */
 #define DISP(name, base, addr)						      \
-static void au1000_##name##_dispatch(unsigned int irq, struct irq_desc *d)    \
+static void au1000_##name##_dispatch(struct irq_desc *d)		      \
 {									      \
 	unsigned long r = __raw_readl((void __iomem *)KSEG1ADDR(addr));	      \
 	if (likely(r))							      \
@@ -865,7 +865,7 @@ DISP(ic0r1, AU1000_INTC0_INT_BASE, AU1000_IC0_PHYS_ADDR + IC_REQ1INT)
 DISP(ic1r0, AU1000_INTC1_INT_BASE, AU1000_IC1_PHYS_ADDR + IC_REQ0INT)
 DISP(ic1r1, AU1000_INTC1_INT_BASE, AU1000_IC1_PHYS_ADDR + IC_REQ1INT)
 
-static void alchemy_gpic_dispatch(unsigned int irq, struct irq_desc *d)
+static void alchemy_gpic_dispatch(struct irq_desc *d)
 {
 	int i = __raw_readl(AU1300_GPIC_ADDR + AU1300_GPIC_PRIENC);
 	generic_handle_irq(ALCHEMY_GPIC_INT_BASE + i);
diff --git a/arch/mips/alchemy/devboards/bcsr.c b/arch/mips/alchemy/devboards/bcsr.c
index 324ad72d7c36..faeddf119fd4 100644
--- a/arch/mips/alchemy/devboards/bcsr.c
+++ b/arch/mips/alchemy/devboards/bcsr.c
@@ -86,7 +86,7 @@ EXPORT_SYMBOL_GPL(bcsr_mod);
 /*
  * DB1200/PB1200 CPLD IRQ muxer
  */
-static void bcsr_csc_handler(unsigned int irq, struct irq_desc *d)
+static void bcsr_csc_handler(struct irq_desc *d)
 {
 	unsigned short bisr = __raw_readw(bcsr_virt + BCSR_REG_INTSTAT);
 	struct irq_chip *chip = irq_desc_get_chip(d);
diff --git a/arch/mips/ath25/ar2315.c b/arch/mips/ath25/ar2315.c
index ec9a371f1e62..8da996142d6a 100644
--- a/arch/mips/ath25/ar2315.c
+++ b/arch/mips/ath25/ar2315.c
@@ -69,7 +69,7 @@ static struct irqaction ar2315_ahb_err_interrupt  = {
 	.name		= "ar2315-ahb-error",
 };
 
-static void ar2315_misc_irq_handler(unsigned irq, struct irq_desc *desc)
+static void ar2315_misc_irq_handler(struct irq_desc *desc)
 {
 	u32 pending = ar2315_rst_reg_read(AR2315_ISR) &
 		      ar2315_rst_reg_read(AR2315_IMR);
diff --git a/arch/mips/ath25/ar5312.c b/arch/mips/ath25/ar5312.c
index e63e38fa4880..acd55a9cffe3 100644
--- a/arch/mips/ath25/ar5312.c
+++ b/arch/mips/ath25/ar5312.c
@@ -73,7 +73,7 @@ static struct irqaction ar5312_ahb_err_interrupt  = {
 	.name    = "ar5312-ahb-error",
 };
 
-static void ar5312_misc_irq_handler(unsigned irq, struct irq_desc *desc)
+static void ar5312_misc_irq_handler(struct irq_desc *desc)
 {
 	u32 pending = ar5312_rst_reg_read(AR5312_ISR) &
 		      ar5312_rst_reg_read(AR5312_IMR);
diff --git a/arch/mips/ath79/irq.c b/arch/mips/ath79/irq.c
index 807132b838b2..15ecb4831e12 100644
--- a/arch/mips/ath79/irq.c
+++ b/arch/mips/ath79/irq.c
@@ -26,7 +26,7 @@
 #include "common.h"
 #include "machtypes.h"
 
-static void ath79_misc_irq_handler(unsigned int irq, struct irq_desc *desc)
+static void ath79_misc_irq_handler(struct irq_desc *desc)
 {
 	void __iomem *base = ath79_reset_base;
 	u32 pending;
@@ -119,7 +119,7 @@ static void __init ath79_misc_irq_init(void)
 	irq_set_chained_handler(ATH79_CPU_IRQ(6), ath79_misc_irq_handler);
 }
 
-static void ar934x_ip2_irq_dispatch(unsigned int irq, struct irq_desc *desc)
+static void ar934x_ip2_irq_dispatch(struct irq_desc *desc)
 {
 	u32 status;
 
@@ -148,7 +148,7 @@ static void ar934x_ip2_irq_init(void)
 	irq_set_chained_handler(ATH79_CPU_IRQ(2), ar934x_ip2_irq_dispatch);
 }
 
-static void qca955x_ip2_irq_dispatch(unsigned int irq, struct irq_desc *desc)
+static void qca955x_ip2_irq_dispatch(struct irq_desc *desc)
 {
 	u32 status;
 
@@ -171,7 +171,7 @@ static void qca955x_ip2_irq_dispatch(unsigned int irq, struct irq_desc *desc)
 	}
 }
 
-static void qca955x_ip3_irq_dispatch(unsigned int irq, struct irq_desc *desc)
+static void qca955x_ip3_irq_dispatch(struct irq_desc *desc)
 {
 	u32 status;
 
diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index f26c3c661cca..0352bc8d56b3 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -2221,7 +2221,7 @@ static irqreturn_t octeon_irq_cib_handler(int my_irq, void *data)
 			if (irqd_get_trigger_type(irq_data) &
 				IRQ_TYPE_EDGE_BOTH)
 				cvmx_write_csr(host_data->raw_reg, 1ull << i);
-			generic_handle_irq_desc(irq, desc);
+			generic_handle_irq_desc(desc);
 		}
 	}
 
diff --git a/arch/mips/include/asm/netlogic/common.h b/arch/mips/include/asm/netlogic/common.h
index 2a4c128277e4..be52c2125d71 100644
--- a/arch/mips/include/asm/netlogic/common.h
+++ b/arch/mips/include/asm/netlogic/common.h
@@ -57,8 +57,8 @@
 #include <asm/mach-netlogic/multi-node.h>
 
 struct irq_desc;
-void nlm_smp_function_ipi_handler(unsigned int irq, struct irq_desc *desc);
-void nlm_smp_resched_ipi_handler(unsigned int irq, struct irq_desc *desc);
+void nlm_smp_function_ipi_handler(struct irq_desc *desc);
+void nlm_smp_resched_ipi_handler(struct irq_desc *desc);
 void nlm_smp_irq_init(int hwcpuid);
 void nlm_boot_secondary_cpus(void);
 int nlm_wakeup_secondary_cpus(void);
diff --git a/arch/mips/jz4740/gpio.c b/arch/mips/jz4740/gpio.c
index 6cd69fdaa1c5..a74e181058b0 100644
--- a/arch/mips/jz4740/gpio.c
+++ b/arch/mips/jz4740/gpio.c
@@ -291,7 +291,7 @@ static void jz_gpio_check_trigger_both(struct jz_gpio_chip *chip, unsigned int i
 	writel(mask, reg);
 }
 
-static void jz_gpio_irq_demux_handler(unsigned int irq, struct irq_desc *desc)
+static void jz_gpio_irq_demux_handler(struct irq_desc *desc)
 {
 	uint32_t flag;
 	unsigned int gpio_irq;
diff --git a/arch/mips/netlogic/common/smp.c b/arch/mips/netlogic/common/smp.c
index 0136b4f9c9cd..10d86d54880a 100644
--- a/arch/mips/netlogic/common/smp.c
+++ b/arch/mips/netlogic/common/smp.c
@@ -82,7 +82,7 @@ void nlm_send_ipi_mask(const struct cpumask *mask, unsigned int action)
 }
 
 /* IRQ_IPI_SMP_FUNCTION Handler */
-void nlm_smp_function_ipi_handler(unsigned int __irq, struct irq_desc *desc)
+void nlm_smp_function_ipi_handler(struct irq_desc *desc)
 {
 	unsigned int irq = irq_desc_get_irq(desc);
 	clear_c0_eimr(irq);
@@ -92,7 +92,7 @@ void nlm_smp_function_ipi_handler(unsigned int __irq, struct irq_desc *desc)
 }
 
 /* IRQ_IPI_SMP_RESCHEDULE  handler */
-void nlm_smp_resched_ipi_handler(unsigned int __irq, struct irq_desc *desc)
+void nlm_smp_resched_ipi_handler(struct irq_desc *desc)
 {
 	unsigned int irq = irq_desc_get_irq(desc);
 	clear_c0_eimr(irq);
diff --git a/arch/mips/pci/pci-ar2315.c b/arch/mips/pci/pci-ar2315.c
index f8d0acb4f973..b4fa6413c4e5 100644
--- a/arch/mips/pci/pci-ar2315.c
+++ b/arch/mips/pci/pci-ar2315.c
@@ -318,7 +318,7 @@ static int ar2315_pci_host_setup(struct ar2315_pci_ctrl *apc)
 	return 0;
 }
 
-static void ar2315_pci_irq_handler(unsigned irq, struct irq_desc *desc)
+static void ar2315_pci_irq_handler(struct irq_desc *desc)
 {
 	struct ar2315_pci_ctrl *apc = irq_desc_get_handler_data(desc);
 	u32 pending = ar2315_pci_reg_read(apc, AR2315_PCI_ISR) &
diff --git a/arch/mips/pci/pci-ar71xx.c b/arch/mips/pci/pci-ar71xx.c
index ad35a5e6a56c..7db963deec73 100644
--- a/arch/mips/pci/pci-ar71xx.c
+++ b/arch/mips/pci/pci-ar71xx.c
@@ -226,7 +226,7 @@ static struct pci_ops ar71xx_pci_ops = {
 	.write	= ar71xx_pci_write_config,
 };
 
-static void ar71xx_pci_irq_handler(unsigned int irq, struct irq_desc *desc)
+static void ar71xx_pci_irq_handler(struct irq_desc *desc)
 {
 	struct ar71xx_pci_controller *apc;
 	void __iomem *base = ath79_reset_base;
diff --git a/arch/mips/pci/pci-ar724x.c b/arch/mips/pci/pci-ar724x.c
index 907d11dd921b..2013dad700df 100644
--- a/arch/mips/pci/pci-ar724x.c
+++ b/arch/mips/pci/pci-ar724x.c
@@ -225,7 +225,7 @@ static struct pci_ops ar724x_pci_ops = {
 	.write	= ar724x_pci_write,
 };
 
-static void ar724x_pci_irq_handler(unsigned int irq, struct irq_desc *desc)
+static void ar724x_pci_irq_handler(struct irq_desc *desc)
 {
 	struct ar724x_pci_controller *apc;
 	void __iomem *base;
diff --git a/arch/mips/pci/pci-rt3883.c b/arch/mips/pci/pci-rt3883.c
index 53c8efaf1572..ed6732f9aa87 100644
--- a/arch/mips/pci/pci-rt3883.c
+++ b/arch/mips/pci/pci-rt3883.c
@@ -129,7 +129,7 @@ static void rt3883_pci_write_cfg32(struct rt3883_pci_controller *rpc,
 	rt3883_pci_w32(rpc, val, RT3883_PCI_REG_CFGDATA);
 }
 
-static void rt3883_pci_irq_handler(unsigned int __irq, struct irq_desc *desc)
+static void rt3883_pci_irq_handler(struct irq_desc *desc)
 {
 	struct rt3883_pci_controller *rpc;
 	u32 pending;
diff --git a/arch/mips/ralink/irq.c b/arch/mips/ralink/irq.c
index 8c624a8b9ea2..4cf77f358395 100644
--- a/arch/mips/ralink/irq.c
+++ b/arch/mips/ralink/irq.c
@@ -96,7 +96,7 @@ unsigned int get_c0_compare_int(void)
 	return CP0_LEGACY_COMPARE_IRQ;
 }
 
-static void ralink_intc_irq_handler(unsigned int irq, struct irq_desc *desc)
+static void ralink_intc_irq_handler(struct irq_desc *desc)
 {
 	u32 pending = rt_intc_r32(INTC_REG_STATUS0);
 
diff --git a/arch/powerpc/include/asm/qe_ic.h b/arch/powerpc/include/asm/qe_ic.h
index 25784cc959a0..1e155ca6d33c 100644
--- a/arch/powerpc/include/asm/qe_ic.h
+++ b/arch/powerpc/include/asm/qe_ic.h
@@ -59,14 +59,14 @@ enum qe_ic_grp_id {
 
 #ifdef CONFIG_QUICC_ENGINE
 void qe_ic_init(struct device_node *node, unsigned int flags,
-		void (*low_handler)(unsigned int irq, struct irq_desc *desc),
-		void (*high_handler)(unsigned int irq, struct irq_desc *desc));
+		void (*low_handler)(struct irq_desc *desc),
+		void (*high_handler)(struct irq_desc *desc));
 unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic);
 unsigned int qe_ic_get_high_irq(struct qe_ic *qe_ic);
 #else
 static inline void qe_ic_init(struct device_node *node, unsigned int flags,
-		void (*low_handler)(unsigned int irq, struct irq_desc *desc),
-		void (*high_handler)(unsigned int irq, struct irq_desc *desc))
+		void (*low_handler)(struct irq_desc *desc),
+		void (*high_handler)(struct irq_desc *desc))
 {}
 static inline unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic)
 { return 0; }
@@ -78,8 +78,7 @@ void qe_ic_set_highest_priority(unsigned int virq, int high);
 int qe_ic_set_priority(unsigned int virq, unsigned int priority);
 int qe_ic_set_high_priority(unsigned int virq, unsigned int priority, int high);
 
-static inline void qe_ic_cascade_low_ipic(unsigned int irq,
-					  struct irq_desc *desc)
+static inline void qe_ic_cascade_low_ipic(struct irq_desc *desc)
 {
 	struct qe_ic *qe_ic = irq_desc_get_handler_data(desc);
 	unsigned int cascade_irq = qe_ic_get_low_irq(qe_ic);
@@ -88,8 +87,7 @@ static inline void qe_ic_cascade_low_ipic(unsigned int irq,
 		generic_handle_irq(cascade_irq);
 }
 
-static inline void qe_ic_cascade_high_ipic(unsigned int irq,
-					   struct irq_desc *desc)
+static inline void qe_ic_cascade_high_ipic(struct irq_desc *desc)
 {
 	struct qe_ic *qe_ic = irq_desc_get_handler_data(desc);
 	unsigned int cascade_irq = qe_ic_get_high_irq(qe_ic);
@@ -98,8 +96,7 @@ static inline void qe_ic_cascade_high_ipic(unsigned int irq,
 		generic_handle_irq(cascade_irq);
 }
 
-static inline void qe_ic_cascade_low_mpic(unsigned int irq,
-					  struct irq_desc *desc)
+static inline void qe_ic_cascade_low_mpic(struct irq_desc *desc)
 {
 	struct qe_ic *qe_ic = irq_desc_get_handler_data(desc);
 	unsigned int cascade_irq = qe_ic_get_low_irq(qe_ic);
@@ -111,8 +108,7 @@ static inline void qe_ic_cascade_low_mpic(unsigned int irq,
 	chip->irq_eoi(&desc->irq_data);
 }
 
-static inline void qe_ic_cascade_high_mpic(unsigned int irq,
-					   struct irq_desc *desc)
+static inline void qe_ic_cascade_high_mpic(struct irq_desc *desc)
 {
 	struct qe_ic *qe_ic = irq_desc_get_handler_data(desc);
 	unsigned int cascade_irq = qe_ic_get_high_irq(qe_ic);
@@ -124,8 +120,7 @@ static inline void qe_ic_cascade_high_mpic(unsigned int irq,
 	chip->irq_eoi(&desc->irq_data);
 }
 
-static inline void qe_ic_cascade_muxed_mpic(unsigned int irq,
-					    struct irq_desc *desc)
+static inline void qe_ic_cascade_muxed_mpic(struct irq_desc *desc)
 {
 	struct qe_ic *qe_ic = irq_desc_get_handler_data(desc);
 	unsigned int cascade_irq;
diff --git a/arch/powerpc/include/asm/tsi108_pci.h b/arch/powerpc/include/asm/tsi108_pci.h
index 5653d7cc3e24..ae59d5b672b0 100644
--- a/arch/powerpc/include/asm/tsi108_pci.h
+++ b/arch/powerpc/include/asm/tsi108_pci.h
@@ -39,7 +39,7 @@
 
 extern int tsi108_setup_pci(struct device_node *dev, u32 cfg_phys, int primary);
 extern void tsi108_pci_int_init(struct device_node *node);
-extern void tsi108_irq_cascade(unsigned int irq, struct irq_desc *desc);
+extern void tsi108_irq_cascade(struct irq_desc *desc);
 extern void tsi108_clear_pci_cfg_error(void);
 
 #endif				/*  _ASM_POWERPC_TSI108_PCI_H */
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
index cf8287487157..0035d146df73 100644
--- a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
+++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
@@ -104,8 +104,7 @@ cpld_pic_get_irq(int offset, u8 ignore, u8 __iomem *statusp,
 	return irq_linear_revmap(cpld_pic_host, cpld_irq);
 }
 
-static void
-cpld_pic_cascade(unsigned int __irq, struct irq_desc *desc)
+static void cpld_pic_cascade(struct irq_desc *desc)
 {
 	unsigned int irq;
 
diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c
index 32cae33c4266..8fb95480fd73 100644
--- a/arch/powerpc/platforms/52xx/media5200.c
+++ b/arch/powerpc/platforms/52xx/media5200.c
@@ -80,7 +80,7 @@ static struct irq_chip media5200_irq_chip = {
 	.irq_mask_ack = media5200_irq_mask,
 };
 
-void media5200_irq_cascade(unsigned int virq, struct irq_desc *desc)
+static void media5200_irq_cascade(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	int sub_virq, val;
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index 63016621aff8..78ac19aefa4d 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -191,7 +191,7 @@ static struct irq_chip mpc52xx_gpt_irq_chip = {
 	.irq_set_type = mpc52xx_gpt_irq_set_type,
 };
 
-void mpc52xx_gpt_irq_cascade(unsigned int virq, struct irq_desc *desc)
+static void mpc52xx_gpt_irq_cascade(struct irq_desc *desc)
 {
 	struct mpc52xx_gpt_priv *gpt = irq_desc_get_handler_data(desc);
 	int sub_virq;
diff --git a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
index 74861a7fb807..60e89fc9c753 100644
--- a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
+++ b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
@@ -78,7 +78,7 @@ static struct irq_chip pq2ads_pci_ic = {
 	.irq_disable = pq2ads_pci_mask_irq
 };
 
-static void pq2ads_pci_irq_demux(unsigned int irq, struct irq_desc *desc)
+static void pq2ads_pci_irq_demux(struct irq_desc *desc)
 {
 	struct pq2ads_pci_pic *priv = irq_desc_get_handler_data(desc);
 	u32 stat, mask, pend;
diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c
index 7bfb9b184dd4..23791de7b688 100644
--- a/arch/powerpc/platforms/85xx/common.c
+++ b/arch/powerpc/platforms/85xx/common.c
@@ -49,7 +49,7 @@ int __init mpc85xx_common_publish_devices(void)
 	return of_platform_bus_probe(NULL, mpc85xx_common_ids, NULL);
 }
 #ifdef CONFIG_CPM2
-static void cpm2_cascade(unsigned int irq, struct irq_desc *desc)
+static void cpm2_cascade(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	int cascade_irq;
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
index 13a8d1a3d55c..5ac70de3e48a 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
@@ -192,10 +192,8 @@ void mpc85xx_cds_fixup_bus(struct pci_bus *bus)
 }
 
 #ifdef CONFIG_PPC_I8259
-static void mpc85xx_8259_cascade_handler(unsigned int __irq,
-					 struct irq_desc *desc)
+static void mpc85xx_8259_cascade_handler(struct irq_desc *desc)
 {
-	unsigned int irq = irq_desc_get_irq(desc);
 	unsigned int cascade_irq = i8259_irq();
 
 	if (cascade_irq != NO_IRQ)
@@ -203,7 +201,7 @@ static void mpc85xx_8259_cascade_handler(unsigned int __irq,
 		generic_handle_irq(cascade_irq);
 
 	/* check for any interrupts from the shared IRQ line */
-	handle_fasteoi_irq(irq, desc);
+	handle_fasteoi_irq(desc);
 }
 
 static irqreturn_t mpc85xx_8259_cascade_action(int irq, void *dev_id)
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
index ffdf02121a7c..f858306dba6a 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
@@ -46,7 +46,7 @@
 #endif
 
 #ifdef CONFIG_PPC_I8259
-static void mpc85xx_8259_cascade(unsigned int irq, struct irq_desc *desc)
+static void mpc85xx_8259_cascade(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	unsigned int cascade_irq = i8259_irq();
diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
index d78fda892e8b..b02d6a5bb035 100644
--- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
+++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
@@ -91,7 +91,7 @@ static inline unsigned int socrates_fpga_pic_get_irq(unsigned int irq)
 			(irq_hw_number_t)i);
 }
 
-void socrates_fpga_pic_cascade(unsigned int __irq, struct irq_desc *desc)
+static void socrates_fpga_pic_cascade(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	unsigned int irq = irq_desc_get_irq(desc);
diff --git a/arch/powerpc/platforms/86xx/pic.c b/arch/powerpc/platforms/86xx/pic.c
index d5b98c0f958a..845defa1fd19 100644
--- a/arch/powerpc/platforms/86xx/pic.c
+++ b/arch/powerpc/platforms/86xx/pic.c
@@ -17,7 +17,7 @@
 #include <asm/i8259.h>
 
 #ifdef CONFIG_PPC_I8259
-static void mpc86xx_8259_cascade(unsigned int irq, struct irq_desc *desc)
+static void mpc86xx_8259_cascade(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	unsigned int cascade_irq = i8259_irq();
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
index d3037747031d..c289fc77b4ba 100644
--- a/arch/powerpc/platforms/8xx/m8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -214,7 +214,7 @@ void mpc8xx_restart(char *cmd)
 	panic("Restart failed\n");
 }
 
-static void cpm_cascade(unsigned int irq, struct irq_desc *desc)
+static void cpm_cascade(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	int cascade_irq = cpm_get_irq();
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index 306888acb737..e0e68a1c0d3c 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -93,7 +93,7 @@ static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val)
 	dcr_write(msic->dcr_host, dcr_n, val);
 }
 
-static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc)
+static void axon_msi_cascade(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	struct axon_msic *msic = irq_desc_get_handler_data(desc);
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 6558e7e8ee50..9f609fc8d331 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -99,7 +99,7 @@ static void iic_ioexc_eoi(struct irq_data *d)
 {
 }
 
-static void iic_ioexc_cascade(unsigned int __irq, struct irq_desc *desc)
+static void iic_ioexc_cascade(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	struct cbe_iic_regs __iomem *node_iic =
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index 1f72f4ab6353..9d27de62dc62 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -199,7 +199,7 @@ static const struct irq_domain_ops spider_host_ops = {
 	.xlate = spider_host_xlate,
 };
 
-static void spider_irq_cascade(unsigned int irq, struct irq_desc *desc)
+static void spider_irq_cascade(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	struct spider_pic *pic = irq_desc_get_handler_data(desc);
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index 15ebc4e8a151..987d1b8d68e3 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -363,7 +363,7 @@ void __init chrp_setup_arch(void)
 	if (ppc_md.progress) ppc_md.progress("Linux/PPC "UTS_RELEASE"\n", 0x0);
 }
 
-static void chrp_8259_cascade(unsigned int irq, struct irq_desc *desc)
+static void chrp_8259_cascade(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	unsigned int cascade_irq = i8259_irq();
diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
index 9dd154d6f89a..9b7975706bfc 100644
--- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
@@ -120,8 +120,7 @@ static unsigned int __hlwd_pic_get_irq(struct irq_domain *h)
 	return irq_linear_revmap(h, irq);
 }
 
-static void hlwd_pic_irq_cascade(unsigned int cascade_virq,
-				      struct irq_desc *desc)
+static void hlwd_pic_irq_cascade(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	struct irq_domain *irq_domain = irq_desc_get_handler_data(desc);
diff --git a/arch/powerpc/platforms/embedded6xx/mvme5100.c b/arch/powerpc/platforms/embedded6xx/mvme5100.c
index 1613303177e6..8f65aa3747f5 100644
--- a/arch/powerpc/platforms/embedded6xx/mvme5100.c
+++ b/arch/powerpc/platforms/embedded6xx/mvme5100.c
@@ -42,7 +42,7 @@
 static phys_addr_t pci_membase;
 static u_char *restart;
 
-static void mvme5100_8259_cascade(unsigned int irq, struct irq_desc *desc)
+static void mvme5100_8259_cascade(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	unsigned int cascade_irq = i8259_irq();
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 39a74fad3e04..9a83eb71b030 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -111,7 +111,7 @@ static void __init fwnmi_init(void)
 		fwnmi_active = 1;
 }
 
-static void pseries_8259_cascade(unsigned int irq, struct irq_desc *desc)
+static void pseries_8259_cascade(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	unsigned int cascade_irq = i8259_irq();
diff --git a/arch/powerpc/sysdev/ge/ge_pic.c b/arch/powerpc/sysdev/ge/ge_pic.c
index 2bcb78bb3a15..d57b77573068 100644
--- a/arch/powerpc/sysdev/ge/ge_pic.c
+++ b/arch/powerpc/sysdev/ge/ge_pic.c
@@ -91,7 +91,7 @@ static int gef_pic_cascade_irq;
  * should be masked out.
  */
 
-void gef_pic_cascade(unsigned int irq, struct irq_desc *desc)
+static void gef_pic_cascade(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	unsigned int cascade_irq;
diff --git a/arch/powerpc/sysdev/ge/ge_pic.h b/arch/powerpc/sysdev/ge/ge_pic.h
index 908dbd9826b6..5bf7e4b81e36 100644
--- a/arch/powerpc/sysdev/ge/ge_pic.h
+++ b/arch/powerpc/sysdev/ge/ge_pic.h
@@ -1,8 +1,6 @@
 #ifndef __GEF_PIC_H__
 #define __GEF_PIC_H__
 
-
-void gef_pic_cascade(unsigned int, struct irq_desc *);
 unsigned int gef_pic_get_irq(void);
 void gef_pic_init(struct device_node *);
 
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 97a8ae8f94dd..537e5db85a06 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -1181,7 +1181,7 @@ static int mpic_host_xlate(struct irq_domain *h, struct device_node *ct,
 }
 
 /* IRQ handler for a secondary MPIC cascaded from another IRQ controller */
-static void mpic_cascade(unsigned int irq, struct irq_desc *desc)
+static void mpic_cascade(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	struct mpic *mpic = irq_desc_get_handler_data(desc);
diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.c b/arch/powerpc/sysdev/qe_lib/qe_ic.c
index 47b352e4bc74..fbcc1f855a7f 100644
--- a/arch/powerpc/sysdev/qe_lib/qe_ic.c
+++ b/arch/powerpc/sysdev/qe_lib/qe_ic.c
@@ -311,8 +311,8 @@ unsigned int qe_ic_get_high_irq(struct qe_ic *qe_ic)
 }
 
 void __init qe_ic_init(struct device_node *node, unsigned int flags,
-		void (*low_handler)(unsigned int irq, struct irq_desc *desc),
-		void (*high_handler)(unsigned int irq, struct irq_desc *desc))
+		       void (*low_handler)(struct irq_desc *desc),
+		       void (*high_handler)(struct irq_desc *desc))
 {
 	struct qe_ic *qe_ic;
 	struct resource res;
diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c
index 57b54476e747..379de955aae3 100644
--- a/arch/powerpc/sysdev/tsi108_pci.c
+++ b/arch/powerpc/sysdev/tsi108_pci.c
@@ -428,7 +428,7 @@ void __init tsi108_pci_int_init(struct device_node *node)
 	init_pci_source();
 }
 
-void tsi108_irq_cascade(unsigned int irq, struct irq_desc *desc)
+void tsi108_irq_cascade(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	unsigned int cascade_irq = get_pci_source();
diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c
index d77345338671..6893d8f236df 100644
--- a/arch/powerpc/sysdev/uic.c
+++ b/arch/powerpc/sysdev/uic.c
@@ -194,7 +194,7 @@ static const struct irq_domain_ops uic_host_ops = {
 	.xlate	= irq_domain_xlate_twocell,
 };
 
-void uic_irq_cascade(unsigned int virq, struct irq_desc *desc)
+static void uic_irq_cascade(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	struct irq_data *idata = irq_desc_get_irq_data(desc);
diff --git a/arch/powerpc/sysdev/xilinx_intc.c b/arch/powerpc/sysdev/xilinx_intc.c
index 43b8b275bc5c..0f52d7955796 100644
--- a/arch/powerpc/sysdev/xilinx_intc.c
+++ b/arch/powerpc/sysdev/xilinx_intc.c
@@ -222,7 +222,7 @@ int xilinx_intc_get_irq(void)
 /*
  * Support code for cascading to 8259 interrupt controllers
  */
-static void xilinx_i8259_cascade(unsigned int irq, struct irq_desc *desc)
+static void xilinx_i8259_cascade(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	unsigned int cascade_irq = i8259_irq();
diff --git a/arch/sh/boards/mach-se/7343/irq.c b/arch/sh/boards/mach-se/7343/irq.c
index 6f97a8f0d0d6..6129aef6db76 100644
--- a/arch/sh/boards/mach-se/7343/irq.c
+++ b/arch/sh/boards/mach-se/7343/irq.c
@@ -29,7 +29,7 @@
 static void __iomem *se7343_irq_regs;
 struct irq_domain *se7343_irq_domain;
 
-static void se7343_irq_demux(unsigned int irq, struct irq_desc *desc)
+static void se7343_irq_demux(struct irq_desc *desc)
 {
 	struct irq_data *data = irq_desc_get_irq_data(desc);
 	struct irq_chip *chip = irq_data_get_irq_chip(data);
diff --git a/arch/sh/boards/mach-se/7722/irq.c b/arch/sh/boards/mach-se/7722/irq.c
index 60aebd14ccf8..24c74a88290c 100644
--- a/arch/sh/boards/mach-se/7722/irq.c
+++ b/arch/sh/boards/mach-se/7722/irq.c
@@ -28,7 +28,7 @@
 static void __iomem *se7722_irq_regs;
 struct irq_domain *se7722_irq_domain;
 
-static void se7722_irq_demux(unsigned int irq, struct irq_desc *desc)
+static void se7722_irq_demux(struct irq_desc *desc)
 {
 	struct irq_data *data = irq_desc_get_irq_data(desc);
 	struct irq_chip *chip = irq_data_get_irq_chip(data);
diff --git a/arch/sh/boards/mach-se/7724/irq.c b/arch/sh/boards/mach-se/7724/irq.c
index 9f2033898652..64e681e66c57 100644
--- a/arch/sh/boards/mach-se/7724/irq.c
+++ b/arch/sh/boards/mach-se/7724/irq.c
@@ -92,7 +92,7 @@ static struct irq_chip se7724_irq_chip __read_mostly = {
 	.irq_unmask	= enable_se7724_irq,
 };
 
-static void se7724_irq_demux(unsigned int __irq, struct irq_desc *desc)
+static void se7724_irq_demux(struct irq_desc *desc)
 {
 	unsigned int irq = irq_desc_get_irq(desc);
 	struct fpga_irq set = get_fpga_irq(irq);
diff --git a/arch/sh/boards/mach-x3proto/gpio.c b/arch/sh/boards/mach-x3proto/gpio.c
index 24555c364d5b..1fb2cbee25f2 100644
--- a/arch/sh/boards/mach-x3proto/gpio.c
+++ b/arch/sh/boards/mach-x3proto/gpio.c
@@ -60,7 +60,7 @@ static int x3proto_gpio_to_irq(struct gpio_chip *chip, unsigned gpio)
 	return virq;
 }
 
-static void x3proto_gpio_irq_handler(unsigned int irq, struct irq_desc *desc)
+static void x3proto_gpio_irq_handler(struct irq_desc *desc)
 {
 	struct irq_data *data = irq_desc_get_irq_data(desc);
 	struct irq_chip *chip = irq_data_get_irq_chip(data);
diff --git a/arch/sh/cchips/hd6446x/hd64461.c b/arch/sh/cchips/hd6446x/hd64461.c
index e9735616bdc8..8180092502f7 100644
--- a/arch/sh/cchips/hd6446x/hd64461.c
+++ b/arch/sh/cchips/hd6446x/hd64461.c
@@ -56,7 +56,7 @@ static struct irq_chip hd64461_irq_chip = {
 	.irq_unmask	= hd64461_unmask_irq,
 };
 
-static void hd64461_irq_demux(unsigned int irq, struct irq_desc *desc)
+static void hd64461_irq_demux(struct irq_desc *desc)
 {
 	unsigned short intv = __raw_readw(HD64461_NIRR);
 	unsigned int ext_irq = HD64461_IRQBASE;
diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c
index 0299f052a2ef..42efcf85f721 100644
--- a/arch/sparc/kernel/leon_kernel.c
+++ b/arch/sparc/kernel/leon_kernel.c
@@ -53,7 +53,7 @@ static inline unsigned int leon_eirq_get(int cpu)
 }
 
 /* Handle one or multiple IRQs from the extended interrupt controller */
-static void leon_handle_ext_irq(unsigned int irq, struct irq_desc *desc)
+static void leon_handle_ext_irq(struct irq_desc *desc)
 {
 	unsigned int eirq;
 	struct irq_bucket *p;
diff --git a/arch/sparc/kernel/leon_pci_grpci1.c b/arch/sparc/kernel/leon_pci_grpci1.c
index 3382f7b3eeef..1e77128a8f88 100644
--- a/arch/sparc/kernel/leon_pci_grpci1.c
+++ b/arch/sparc/kernel/leon_pci_grpci1.c
@@ -357,7 +357,7 @@ static struct irq_chip grpci1_irq = {
 };
 
 /* Handle one or multiple IRQs from the PCI core */
-static void grpci1_pci_flow_irq(unsigned int irq, struct irq_desc *desc)
+static void grpci1_pci_flow_irq(struct irq_desc *desc)
 {
 	struct grpci1_priv *priv = grpci1priv;
 	int i, ack = 0;
diff --git a/arch/sparc/kernel/leon_pci_grpci2.c b/arch/sparc/kernel/leon_pci_grpci2.c
index 814fb1729b12..f727c4de1316 100644
--- a/arch/sparc/kernel/leon_pci_grpci2.c
+++ b/arch/sparc/kernel/leon_pci_grpci2.c
@@ -498,7 +498,7 @@ static struct irq_chip grpci2_irq = {
 };
 
 /* Handle one or multiple IRQs from the PCI core */
-static void grpci2_pci_flow_irq(unsigned int irq, struct irq_desc *desc)
+static void grpci2_pci_flow_irq(struct irq_desc *desc)
 {
 	struct grpci2_priv *priv = grpci2priv;
 	int i, ack = 0;
diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c
index b3f73fd764a3..4c017d0d2de8 100644
--- a/arch/tile/kernel/pci_gx.c
+++ b/arch/tile/kernel/pci_gx.c
@@ -304,17 +304,16 @@ static struct irq_chip tilegx_legacy_irq_chip = {
  * to Linux which just calls handle_level_irq() after clearing the
  * MAC INTx Assert status bit associated with this interrupt.
  */
-static void trio_handle_level_irq(unsigned int __irq, struct irq_desc *desc)
+static void trio_handle_level_irq(struct irq_desc *desc)
 {
 	struct pci_controller *controller = irq_desc_get_handler_data(desc);
 	gxio_trio_context_t *trio_context = controller->trio;
 	uint64_t intx = (uint64_t)irq_desc_get_chip_data(desc);
-	unsigned int irq = irq_desc_get_irq(desc);
 	int mac = controller->mac;
 	unsigned int reg_offset;
 	uint64_t level_mask;
 
-	handle_level_irq(irq, desc);
+	handle_level_irq(desc);
 
 	/*
 	 * Clear the INTx Level status, otherwise future interrupts are
diff --git a/arch/unicore32/kernel/irq.c b/arch/unicore32/kernel/irq.c
index c53729d92e8d..eb1fd0030359 100644
--- a/arch/unicore32/kernel/irq.c
+++ b/arch/unicore32/kernel/irq.c
@@ -112,7 +112,7 @@ static struct irq_chip puv3_low_gpio_chip = {
  * irq_controller_lock held, and IRQs disabled.  Decode the IRQ
  * and call the handler.
  */
-static void puv3_gpio_handler(unsigned int __irq, struct irq_desc *desc)
+static void puv3_gpio_handler(struct irq_desc *desc)
 {
 	unsigned int mask, irq;
 
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index c80cf6699678..38da8f29a9c8 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -68,11 +68,10 @@ static inline void *current_stack(void)
 	return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
 }
 
-static inline int
-execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
+static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
 {
 	struct irq_stack *curstk, *irqstk;
-	u32 *isp, *prev_esp, arg1, arg2;
+	u32 *isp, *prev_esp, arg1;
 
 	curstk = (struct irq_stack *) current_stack();
 	irqstk = __this_cpu_read(hardirq_stack);
@@ -98,8 +97,8 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
 	asm volatile("xchgl	%%ebx,%%esp	\n"
 		     "call	*%%edi		\n"
 		     "movl	%%ebx,%%esp	\n"
-		     : "=a" (arg1), "=d" (arg2), "=b" (isp)
-		     :  "0" (irq),   "1" (desc),  "2" (isp),
+		     : "=a" (arg1), "=b" (isp)
+		     :  "0" (desc),   "1" (isp),
 			"D" (desc->handle_irq)
 		     : "memory", "cc", "ecx");
 	return 1;
@@ -150,19 +149,15 @@ void do_softirq_own_stack(void)
 
 bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
 {
-	unsigned int irq;
-	int overflow;
-
-	overflow = check_stack_overflow();
+	int overflow = check_stack_overflow();
 
 	if (IS_ERR_OR_NULL(desc))
 		return false;
 
-	irq = irq_desc_get_irq(desc);
-	if (user_mode(regs) || !execute_on_irq_stack(overflow, desc, irq)) {
+	if (user_mode(regs) || !execute_on_irq_stack(overflow, desc)) {
 		if (unlikely(overflow))
 			print_stack_overflow();
-		generic_handle_irq_desc(irq, desc);
+		generic_handle_irq_desc(desc);
 	}
 
 	return true;
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index ff16ccb918f2..c767cf2bc80a 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -75,6 +75,6 @@ bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
 	if (unlikely(IS_ERR_OR_NULL(desc)))
 		return false;
 
-	generic_handle_irq_desc(irq_desc_get_irq(desc), desc);
+	generic_handle_irq_desc(desc);
 	return true;
 }
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 161804de124a..a0d09f6c6533 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1015,7 +1015,7 @@ static struct clock_event_device lguest_clockevent = {
  * This is the Guest timer interrupt handler (hardware interrupt 0).  We just
  * call the clockevent infrastructure and it does whatever needs doing.
  */
-static void lguest_time_irq(unsigned int irq, struct irq_desc *desc)
+static void lguest_time_irq(struct irq_desc *desc)
 {
 	unsigned long flags;
 
diff --git a/drivers/dma/ipu/ipu_irq.c b/drivers/dma/ipu/ipu_irq.c
index 4768a829253a..2bf37e68ad0f 100644
--- a/drivers/dma/ipu/ipu_irq.c
+++ b/drivers/dma/ipu/ipu_irq.c
@@ -266,7 +266,7 @@ int ipu_irq_unmap(unsigned int source)
 }
 
 /* Chained IRQ handler for IPU function and error interrupt */
-static void ipu_irq_handler(unsigned int __irq, struct irq_desc *desc)
+static void ipu_irq_handler(struct irq_desc *desc)
 {
 	struct ipu *ipu = irq_desc_get_handler_data(desc);
 	u32 status;
diff --git a/drivers/gpio/gpio-altera.c b/drivers/gpio/gpio-altera.c
index 9b7e0b3db387..1b44941574fa 100644
--- a/drivers/gpio/gpio-altera.c
+++ b/drivers/gpio/gpio-altera.c
@@ -201,8 +201,7 @@ static int altera_gpio_direction_output(struct gpio_chip *gc,
 	return 0;
 }
 
-static void altera_gpio_irq_edge_handler(unsigned int irq,
-					struct irq_desc *desc)
+static void altera_gpio_irq_edge_handler(struct irq_desc *desc)
 {
 	struct altera_gpio_chip *altera_gc;
 	struct irq_chip *chip;
@@ -231,8 +230,7 @@ static void altera_gpio_irq_edge_handler(unsigned int irq,
 }
 
 
-static void altera_gpio_irq_leveL_high_handler(unsigned int irq,
-					      struct irq_desc *desc)
+static void altera_gpio_irq_leveL_high_handler(struct irq_desc *desc)
 {
 	struct altera_gpio_chip *altera_gc;
 	struct irq_chip *chip;
diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c
index 31b90ac15204..33a1f9779b86 100644
--- a/drivers/gpio/gpio-bcm-kona.c
+++ b/drivers/gpio/gpio-bcm-kona.c
@@ -433,7 +433,7 @@ static int bcm_kona_gpio_irq_set_type(struct irq_data *d, unsigned int type)
 	return 0;
 }
 
-static void bcm_kona_gpio_irq_handler(unsigned int irq, struct irq_desc *desc)
+static void bcm_kona_gpio_irq_handler(struct irq_desc *desc)
 {
 	void __iomem *reg_base;
 	int bit, bank_id;
diff --git a/drivers/gpio/gpio-brcmstb.c b/drivers/gpio/gpio-brcmstb.c
index 9ea86d2ac054..4c64627c6bb5 100644
--- a/drivers/gpio/gpio-brcmstb.c
+++ b/drivers/gpio/gpio-brcmstb.c
@@ -236,7 +236,7 @@ static void brcmstb_gpio_irq_bank_handler(struct brcmstb_gpio_bank *bank)
 }
 
 /* Each UPG GIO block has one IRQ for all banks */
-static void brcmstb_gpio_irq_handler(unsigned int irq, struct irq_desc *desc)
+static void brcmstb_gpio_irq_handler(struct irq_desc *desc)
 {
 	struct gpio_chip *gc = irq_desc_get_handler_data(desc);
 	struct brcmstb_gpio_priv *priv = brcmstb_gpio_gc_to_priv(gc);
diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c
index 94b0ab709721..5e715388803d 100644
--- a/drivers/gpio/gpio-davinci.c
+++ b/drivers/gpio/gpio-davinci.c
@@ -326,8 +326,7 @@ static struct irq_chip gpio_irqchip = {
 	.flags		= IRQCHIP_SET_TYPE_MASKED,
 };
 
-static void
-gpio_irq_handler(unsigned __irq, struct irq_desc *desc)
+static void gpio_irq_handler(struct irq_desc *desc)
 {
 	unsigned int irq = irq_desc_get_irq(desc);
 	struct davinci_gpio_regs __iomem *g;
diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c
index c5be4b9b8baf..fcd5b0acfc72 100644
--- a/drivers/gpio/gpio-dwapb.c
+++ b/drivers/gpio/gpio-dwapb.c
@@ -147,7 +147,7 @@ static u32 dwapb_do_irq(struct dwapb_gpio *gpio)
 	return ret;
 }
 
-static void dwapb_irq_handler(u32 irq, struct irq_desc *desc)
+static void dwapb_irq_handler(struct irq_desc *desc)
 {
 	struct dwapb_gpio *gpio = irq_desc_get_handler_data(desc);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
diff --git a/drivers/gpio/gpio-ep93xx.c b/drivers/gpio/gpio-ep93xx.c
index 9d90366ea259..3e3947b35c83 100644
--- a/drivers/gpio/gpio-ep93xx.c
+++ b/drivers/gpio/gpio-ep93xx.c
@@ -78,7 +78,7 @@ static void ep93xx_gpio_int_debounce(unsigned int irq, bool enable)
 		EP93XX_GPIO_REG(int_debounce_register_offset[port]));
 }
 
-static void ep93xx_gpio_ab_irq_handler(unsigned int irq, struct irq_desc *desc)
+static void ep93xx_gpio_ab_irq_handler(struct irq_desc *desc)
 {
 	unsigned char status;
 	int i;
@@ -100,8 +100,7 @@ static void ep93xx_gpio_ab_irq_handler(unsigned int irq, struct irq_desc *desc)
 	}
 }
 
-static void ep93xx_gpio_f_irq_handler(unsigned int __irq,
-				      struct irq_desc *desc)
+static void ep93xx_gpio_f_irq_handler(struct irq_desc *desc)
 {
 	/*
 	 * map discontiguous hw irq range to continuous sw irq range:
diff --git a/drivers/gpio/gpio-intel-mid.c b/drivers/gpio/gpio-intel-mid.c
index aa28c65eb6b4..70097472b02c 100644
--- a/drivers/gpio/gpio-intel-mid.c
+++ b/drivers/gpio/gpio-intel-mid.c
@@ -301,7 +301,7 @@ static const struct pci_device_id intel_gpio_ids[] = {
 };
 MODULE_DEVICE_TABLE(pci, intel_gpio_ids);
 
-static void intel_mid_irq_handler(unsigned irq, struct irq_desc *desc)
+static void intel_mid_irq_handler(struct irq_desc *desc)
 {
 	struct gpio_chip *gc = irq_desc_get_handler_data(desc);
 	struct intel_mid_gpio *priv = to_intel_gpio_priv(gc);
diff --git a/drivers/gpio/gpio-lynxpoint.c b/drivers/gpio/gpio-lynxpoint.c
index 153af464c7a7..127c37b380ae 100644
--- a/drivers/gpio/gpio-lynxpoint.c
+++ b/drivers/gpio/gpio-lynxpoint.c
@@ -234,7 +234,7 @@ static int lp_gpio_direction_output(struct gpio_chip *chip,
 	return 0;
 }
 
-static void lp_gpio_irq_handler(unsigned hwirq, struct irq_desc *desc)
+static void lp_gpio_irq_handler(struct irq_desc *desc)
 {
 	struct irq_data *data = irq_desc_get_irq_data(desc);
 	struct gpio_chip *gc = irq_desc_get_handler_data(desc);
diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c
index 8ef7a12de983..48ef368347ab 100644
--- a/drivers/gpio/gpio-mpc8xxx.c
+++ b/drivers/gpio/gpio-mpc8xxx.c
@@ -194,7 +194,7 @@ static int mpc8xxx_gpio_to_irq(struct gpio_chip *gc, unsigned offset)
 		return -ENXIO;
 }
 
-static void mpc8xxx_gpio_irq_cascade(unsigned int irq, struct irq_desc *desc)
+static void mpc8xxx_gpio_irq_cascade(struct irq_desc *desc)
 {
 	struct mpc8xxx_gpio_chip *mpc8xxx_gc = irq_desc_get_handler_data(desc);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
diff --git a/drivers/gpio/gpio-msic.c b/drivers/gpio/gpio-msic.c
index 7bcfb87a5fa6..22523aae8abe 100644
--- a/drivers/gpio/gpio-msic.c
+++ b/drivers/gpio/gpio-msic.c
@@ -232,7 +232,7 @@ static struct irq_chip msic_irqchip = {
 	.irq_bus_sync_unlock	= msic_bus_sync_unlock,
 };
 
-static void msic_gpio_irq_handler(unsigned irq, struct irq_desc *desc)
+static void msic_gpio_irq_handler(struct irq_desc *desc)
 {
 	struct irq_data *data = irq_desc_get_irq_data(desc);
 	struct msic_gpio *mg = irq_data_get_irq_handler_data(data);
diff --git a/drivers/gpio/gpio-msm-v2.c b/drivers/gpio/gpio-msm-v2.c
index d2012cfb5571..4b4222145f10 100644
--- a/drivers/gpio/gpio-msm-v2.c
+++ b/drivers/gpio/gpio-msm-v2.c
@@ -305,7 +305,7 @@ static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int flow_type)
  * which have been set as summary IRQ lines and which are triggered,
  * and to call their interrupt handlers.
  */
-static void msm_summary_irq_handler(unsigned int irq, struct irq_desc *desc)
+static void msm_summary_irq_handler(struct irq_desc *desc)
 {
 	unsigned long i;
 	struct irq_chip *chip = irq_desc_get_chip(desc);
diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c
index b396bf3bf294..df418b81456d 100644
--- a/drivers/gpio/gpio-mvebu.c
+++ b/drivers/gpio/gpio-mvebu.c
@@ -458,7 +458,7 @@ static int mvebu_gpio_irq_set_type(struct irq_data *d, unsigned int type)
 	return 0;
 }
 
-static void mvebu_gpio_irq_handler(unsigned int __irq, struct irq_desc *desc)
+static void mvebu_gpio_irq_handler(struct irq_desc *desc)
 {
 	struct mvebu_gpio_chip *mvchip = irq_desc_get_handler_data(desc);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c
index b752b560126e..5e3235a73bf9 100644
--- a/drivers/gpio/gpio-mxc.c
+++ b/drivers/gpio/gpio-mxc.c
@@ -272,7 +272,7 @@ static void mxc_gpio_irq_handler(struct mxc_gpio_port *port, u32 irq_stat)
 }
 
 /* MX1 and MX3 has one interrupt *per* gpio port */
-static void mx3_gpio_irq_handler(u32 irq, struct irq_desc *desc)
+static void mx3_gpio_irq_handler(struct irq_desc *desc)
 {
 	u32 irq_stat;
 	struct mxc_gpio_port *port = irq_desc_get_handler_data(desc);
@@ -288,7 +288,7 @@ static void mx3_gpio_irq_handler(u32 irq, struct irq_desc *desc)
 }
 
 /* MX2 has one interrupt *for all* gpio ports */
-static void mx2_gpio_irq_handler(u32 irq, struct irq_desc *desc)
+static void mx2_gpio_irq_handler(struct irq_desc *desc)
 {
 	u32 irq_msk, irq_stat;
 	struct mxc_gpio_port *port;
diff --git a/drivers/gpio/gpio-mxs.c b/drivers/gpio/gpio-mxs.c
index b7f383eb18d9..b7763f078b23 100644
--- a/drivers/gpio/gpio-mxs.c
+++ b/drivers/gpio/gpio-mxs.c
@@ -154,7 +154,7 @@ static void mxs_flip_edge(struct mxs_gpio_port *port, u32 gpio)
 }
 
 /* MXS has one interrupt *per* gpio port */
-static void mxs_gpio_irq_handler(u32 irq, struct irq_desc *desc)
+static void mxs_gpio_irq_handler(struct irq_desc *desc)
 {
 	u32 irq_stat;
 	struct mxs_gpio_port *port = irq_desc_get_handler_data(desc);
diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c
index 2ae0d47e9554..9df014c0e3e4 100644
--- a/drivers/gpio/gpio-omap.c
+++ b/drivers/gpio/gpio-omap.c
@@ -709,7 +709,7 @@ static void omap_gpio_free(struct gpio_chip *chip, unsigned offset)
  * line's interrupt handler has been run, we may miss some nested
  * interrupts.
  */
-static void omap_gpio_irq_handler(unsigned int irq, struct irq_desc *desc)
+static void omap_gpio_irq_handler(struct irq_desc *desc)
 {
 	void __iomem *isr_reg = NULL;
 	u32 isr;
diff --git a/drivers/gpio/gpio-pl061.c b/drivers/gpio/gpio-pl061.c
index 04756130437f..229ef653e0f8 100644
--- a/drivers/gpio/gpio-pl061.c
+++ b/drivers/gpio/gpio-pl061.c
@@ -187,7 +187,7 @@ static int pl061_irq_type(struct irq_data *d, unsigned trigger)
 	return 0;
 }
 
-static void pl061_irq_handler(unsigned irq, struct irq_desc *desc)
+static void pl061_irq_handler(struct irq_desc *desc)
 {
 	unsigned long pending;
 	int offset;
diff --git a/drivers/gpio/gpio-pxa.c b/drivers/gpio/gpio-pxa.c
index 55a11de3d5b7..df2ce550f309 100644
--- a/drivers/gpio/gpio-pxa.c
+++ b/drivers/gpio/gpio-pxa.c
@@ -401,7 +401,7 @@ static int pxa_gpio_irq_type(struct irq_data *d, unsigned int type)
 	return 0;
 }
 
-static void pxa_gpio_demux_handler(unsigned int irq, struct irq_desc *desc)
+static void pxa_gpio_demux_handler(struct irq_desc *desc)
 {
 	struct pxa_gpio_chip *c;
 	int loop, gpio, gpio_base, n;
diff --git a/drivers/gpio/gpio-sa1100.c b/drivers/gpio/gpio-sa1100.c
index 67bd2f5d89e8..990fa9023e22 100644
--- a/drivers/gpio/gpio-sa1100.c
+++ b/drivers/gpio/gpio-sa1100.c
@@ -172,8 +172,7 @@ static struct irq_domain *sa1100_gpio_irqdomain;
  * irq_controller_lock held, and IRQs disabled.  Decode the IRQ
  * and call the handler.
  */
-static void
-sa1100_gpio_handler(unsigned int __irq, struct irq_desc *desc)
+static void sa1100_gpio_handler(struct irq_desc *desc)
 {
 	unsigned int irq, mask;
 
diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c
index 9b14aafb576d..027e5f47dd28 100644
--- a/drivers/gpio/gpio-tegra.c
+++ b/drivers/gpio/gpio-tegra.c
@@ -266,7 +266,7 @@ static void tegra_gpio_irq_shutdown(struct irq_data *d)
 	gpiochip_unlock_as_irq(&tegra_gpio_chip, gpio);
 }
 
-static void tegra_gpio_irq_handler(unsigned int irq, struct irq_desc *desc)
+static void tegra_gpio_irq_handler(struct irq_desc *desc)
 {
 	int port;
 	int pin;
diff --git a/drivers/gpio/gpio-timberdale.c b/drivers/gpio/gpio-timberdale.c
index 5a492054589f..30653e6319e9 100644
--- a/drivers/gpio/gpio-timberdale.c
+++ b/drivers/gpio/gpio-timberdale.c
@@ -192,7 +192,7 @@ out:
 	return ret;
 }
 
-static void timbgpio_irq(unsigned int irq, struct irq_desc *desc)
+static void timbgpio_irq(struct irq_desc *desc)
 {
 	struct timbgpio *tgpio = irq_desc_get_handler_data(desc);
 	struct irq_data *data = irq_desc_get_irq_data(desc);
diff --git a/drivers/gpio/gpio-tz1090.c b/drivers/gpio/gpio-tz1090.c
index bbac92ae4c32..87bb1b1eee8d 100644
--- a/drivers/gpio/gpio-tz1090.c
+++ b/drivers/gpio/gpio-tz1090.c
@@ -375,7 +375,7 @@ static int gpio_set_irq_wake(struct irq_data *data, unsigned int on)
 #define gpio_set_irq_wake NULL
 #endif
 
-static void tz1090_gpio_irq_handler(unsigned int irq, struct irq_desc *desc)
+static void tz1090_gpio_irq_handler(struct irq_desc *desc)
 {
 	irq_hw_number_t hw;
 	unsigned int irq_stat, irq_no;
@@ -400,7 +400,7 @@ static void tz1090_gpio_irq_handler(unsigned int irq, struct irq_desc *desc)
 				== IRQ_TYPE_EDGE_BOTH)
 			tz1090_gpio_irq_next_edge(bank, hw);
 
-		generic_handle_irq_desc(irq_no, child_desc);
+		generic_handle_irq_desc(child_desc);
 	}
 }
 
diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c
index 7a6640b51911..069f9e4b7daa 100644
--- a/drivers/gpio/gpio-vf610.c
+++ b/drivers/gpio/gpio-vf610.c
@@ -120,7 +120,7 @@ static int vf610_gpio_direction_output(struct gpio_chip *chip, unsigned gpio,
 	return pinctrl_gpio_direction_output(chip->base + gpio);
 }
 
-static void vf610_gpio_irq_handler(u32 irq, struct irq_desc *desc)
+static void vf610_gpio_irq_handler(struct irq_desc *desc)
 {
 	struct vf610_gpio_port *port = irq_desc_get_handler_data(desc);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
diff --git a/drivers/gpio/gpio-zx.c b/drivers/gpio/gpio-zx.c
index 12ee1969298c..4b8a26910705 100644
--- a/drivers/gpio/gpio-zx.c
+++ b/drivers/gpio/gpio-zx.c
@@ -177,7 +177,7 @@ static int zx_irq_type(struct irq_data *d, unsigned trigger)
 	return 0;
 }
 
-static void zx_irq_handler(unsigned irq, struct irq_desc *desc)
+static void zx_irq_handler(struct irq_desc *desc)
 {
 	unsigned long pending;
 	int offset;
diff --git a/drivers/gpio/gpio-zynq.c b/drivers/gpio/gpio-zynq.c
index 27348e7cb705..1d1a5865ede9 100644
--- a/drivers/gpio/gpio-zynq.c
+++ b/drivers/gpio/gpio-zynq.c
@@ -514,7 +514,7 @@ static void zynq_gpio_handle_bank_irq(struct zynq_gpio *gpio,
  * application for that pin.
  * Note: A bug is reported if no handler is set for the gpio pin.
  */
-static void zynq_gpio_irqhandler(unsigned int irq, struct irq_desc *desc)
+static void zynq_gpio_irqhandler(struct irq_desc *desc)
 {
 	u32 int_sts, int_enb;
 	unsigned int bank_num;
diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c
index 243f99a80253..649d03be2f38 100644
--- a/drivers/gpu/ipu-v3/ipu-common.c
+++ b/drivers/gpu/ipu-v3/ipu-common.c
@@ -912,7 +912,7 @@ static void ipu_irq_handle(struct ipu_soc *ipu, const int *regs, int num_regs)
 	}
 }
 
-static void ipu_irq_handler(unsigned int irq, struct irq_desc *desc)
+static void ipu_irq_handler(struct irq_desc *desc)
 {
 	struct ipu_soc *ipu = irq_desc_get_handler_data(desc);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
@@ -925,7 +925,7 @@ static void ipu_irq_handler(unsigned int irq, struct irq_desc *desc)
 	chained_irq_exit(chip, desc);
 }
 
-static void ipu_err_irq_handler(unsigned int irq, struct irq_desc *desc)
+static void ipu_err_irq_handler(struct irq_desc *desc)
 {
 	struct ipu_soc *ipu = irq_desc_get_handler_data(desc);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
diff --git a/drivers/irqchip/exynos-combiner.c b/drivers/irqchip/exynos-combiner.c
index e9c6f2a5b52d..94ddc96f2f7d 100644
--- a/drivers/irqchip/exynos-combiner.c
+++ b/drivers/irqchip/exynos-combiner.c
@@ -65,12 +65,10 @@ static void combiner_unmask_irq(struct irq_data *data)
 	__raw_writel(mask, combiner_base(data) + COMBINER_ENABLE_SET);
 }
 
-static void combiner_handle_cascade_irq(unsigned int __irq,
-					struct irq_desc *desc)
+static void combiner_handle_cascade_irq(struct irq_desc *desc)
 {
 	struct combiner_chip_data *chip_data = irq_desc_get_handler_data(desc);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
-	unsigned int irq = irq_desc_get_irq(desc);
 	unsigned int cascade_irq, combiner_irq;
 	unsigned long status;
 
@@ -88,7 +86,7 @@ static void combiner_handle_cascade_irq(unsigned int __irq,
 	cascade_irq = irq_find_mapping(combiner_irq_domain, combiner_irq);
 
 	if (unlikely(!cascade_irq))
-		handle_bad_irq(irq, desc);
+		handle_bad_irq(desc);
 	else
 		generic_handle_irq(cascade_irq);
 
diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c
index 39b72da0c143..693b9fb879bd 100644
--- a/drivers/irqchip/irq-armada-370-xp.c
+++ b/drivers/irqchip/irq-armada-370-xp.c
@@ -447,8 +447,7 @@ static void armada_370_xp_handle_msi_irq(struct pt_regs *regs, bool is_chained)
 static void armada_370_xp_handle_msi_irq(struct pt_regs *r, bool b) {}
 #endif
 
-static void armada_370_xp_mpic_handle_cascade_irq(unsigned int irq,
-						  struct irq_desc *desc)
+static void armada_370_xp_mpic_handle_cascade_irq(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	unsigned long irqmap, irqn, irqsrc, cpuid;
diff --git a/drivers/irqchip/irq-bcm2835.c b/drivers/irqchip/irq-bcm2835.c
index ed4ca9deca70..56c9cf488e6c 100644
--- a/drivers/irqchip/irq-bcm2835.c
+++ b/drivers/irqchip/irq-bcm2835.c
@@ -96,7 +96,7 @@ struct armctrl_ic {
 static struct armctrl_ic intc __read_mostly;
 static void __exception_irq_entry bcm2835_handle_irq(
 	struct pt_regs *regs);
-static void bcm2836_chained_handle_irq(unsigned int irq, struct irq_desc *desc);
+static void bcm2836_chained_handle_irq(struct irq_desc *desc);
 
 static void armctrl_mask_irq(struct irq_data *d)
 {
@@ -245,7 +245,7 @@ static void __exception_irq_entry bcm2835_handle_irq(
 		handle_IRQ(irq_linear_revmap(intc.domain, hwirq), regs);
 }
 
-static void bcm2836_chained_handle_irq(unsigned int irq, struct irq_desc *desc)
+static void bcm2836_chained_handle_irq(struct irq_desc *desc)
 {
 	u32 hwirq;
 
diff --git a/drivers/irqchip/irq-bcm7038-l1.c b/drivers/irqchip/irq-bcm7038-l1.c
index 409bdc6366c2..0fea985ef1dc 100644
--- a/drivers/irqchip/irq-bcm7038-l1.c
+++ b/drivers/irqchip/irq-bcm7038-l1.c
@@ -115,7 +115,7 @@ static inline void l1_writel(u32 val, void __iomem *reg)
 		writel(val, reg);
 }
 
-static void bcm7038_l1_irq_handle(unsigned int irq, struct irq_desc *desc)
+static void bcm7038_l1_irq_handle(struct irq_desc *desc)
 {
 	struct bcm7038_l1_chip *intc = irq_desc_get_handler_data(desc);
 	struct bcm7038_l1_cpu *cpu;
diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c
index d3f976913a6f..61b18ab33ad9 100644
--- a/drivers/irqchip/irq-bcm7120-l2.c
+++ b/drivers/irqchip/irq-bcm7120-l2.c
@@ -56,7 +56,7 @@ struct bcm7120_l2_intc_data {
 	const __be32 *map_mask_prop;
 };
 
-static void bcm7120_l2_intc_irq_handle(unsigned int irq, struct irq_desc *desc)
+static void bcm7120_l2_intc_irq_handle(struct irq_desc *desc)
 {
 	struct bcm7120_l1_intc_data *data = irq_desc_get_handler_data(desc);
 	struct bcm7120_l2_intc_data *b = data->b;
diff --git a/drivers/irqchip/irq-brcmstb-l2.c b/drivers/irqchip/irq-brcmstb-l2.c
index aedda06191eb..65cd341f331a 100644
--- a/drivers/irqchip/irq-brcmstb-l2.c
+++ b/drivers/irqchip/irq-brcmstb-l2.c
@@ -49,13 +49,12 @@ struct brcmstb_l2_intc_data {
 	u32 saved_mask; /* for suspend/resume */
 };
 
-static void brcmstb_l2_intc_irq_handle(unsigned int __irq,
-				       struct irq_desc *desc)
+static void brcmstb_l2_intc_irq_handle(struct irq_desc *desc)
 {
 	struct brcmstb_l2_intc_data *b = irq_desc_get_handler_data(desc);
 	struct irq_chip_generic *gc = irq_get_domain_generic_chip(b->domain, 0);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
-	unsigned int irq = irq_desc_get_irq(desc);
+	unsigned int irq;
 	u32 status;
 
 	chained_irq_enter(chip, desc);
@@ -65,7 +64,7 @@ static void brcmstb_l2_intc_irq_handle(unsigned int __irq,
 
 	if (status == 0) {
 		raw_spin_lock(&desc->lock);
-		handle_bad_irq(irq, desc);
+		handle_bad_irq(desc);
 		raw_spin_unlock(&desc->lock);
 		goto out;
 	}
diff --git a/drivers/irqchip/irq-dw-apb-ictl.c b/drivers/irqchip/irq-dw-apb-ictl.c
index efd95d9955e7..052f266364c0 100644
--- a/drivers/irqchip/irq-dw-apb-ictl.c
+++ b/drivers/irqchip/irq-dw-apb-ictl.c
@@ -26,7 +26,7 @@
 #define APB_INT_FINALSTATUS_H	0x34
 #define APB_INT_BASE_OFFSET	0x04
 
-static void dw_apb_ictl_handler(unsigned int irq, struct irq_desc *desc)
+static void dw_apb_ictl_handler(struct irq_desc *desc)
 {
 	struct irq_domain *d = irq_desc_get_handler_data(desc);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 9bccdd295769..00bb7c05a55e 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -341,7 +341,7 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
 	} while (1);
 }
 
-static void gic_handle_cascade_irq(unsigned int irq, struct irq_desc *desc)
+static void gic_handle_cascade_irq(struct irq_desc *desc)
 {
 	struct gic_chip_data *chip_data = irq_desc_get_handler_data(desc);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
@@ -360,7 +360,7 @@ static void gic_handle_cascade_irq(unsigned int irq, struct irq_desc *desc)
 
 	cascade_irq = irq_find_mapping(chip_data->domain, gic_irq);
 	if (unlikely(gic_irq < 32 || gic_irq > 1020))
-		handle_bad_irq(cascade_irq, desc);
+		handle_bad_irq(desc);
 	else
 		generic_handle_irq(cascade_irq);
 
diff --git a/drivers/irqchip/irq-i8259.c b/drivers/irqchip/irq-i8259.c
index 4836102ba312..e484fd255321 100644
--- a/drivers/irqchip/irq-i8259.c
+++ b/drivers/irqchip/irq-i8259.c
@@ -352,7 +352,7 @@ void __init init_i8259_irqs(void)
 	__init_i8259_irqs(NULL);
 }
 
-static void i8259_irq_dispatch(unsigned int __irq, struct irq_desc *desc)
+static void i8259_irq_dispatch(struct irq_desc *desc)
 {
 	struct irq_domain *domain = irq_desc_get_handler_data(desc);
 	int hwirq = i8259_irq();
diff --git a/drivers/irqchip/irq-imgpdc.c b/drivers/irqchip/irq-imgpdc.c
index 841604b81004..c02d29c9dc05 100644
--- a/drivers/irqchip/irq-imgpdc.c
+++ b/drivers/irqchip/irq-imgpdc.c
@@ -218,7 +218,7 @@ static int pdc_irq_set_wake(struct irq_data *data, unsigned int on)
 	return 0;
 }
 
-static void pdc_intc_perip_isr(unsigned int __irq, struct irq_desc *desc)
+static void pdc_intc_perip_isr(struct irq_desc *desc)
 {
 	unsigned int irq = irq_desc_get_irq(desc);
 	struct pdc_intc_priv *priv;
@@ -240,7 +240,7 @@ found:
 	generic_handle_irq(irq_no);
 }
 
-static void pdc_intc_syswake_isr(unsigned int irq, struct irq_desc *desc)
+static void pdc_intc_syswake_isr(struct irq_desc *desc)
 {
 	struct pdc_intc_priv *priv;
 	unsigned int syswake, irq_no;
diff --git a/drivers/irqchip/irq-keystone.c b/drivers/irqchip/irq-keystone.c
index c1517267b5db..7b784b692f3c 100644
--- a/drivers/irqchip/irq-keystone.c
+++ b/drivers/irqchip/irq-keystone.c
@@ -83,7 +83,7 @@ static void keystone_irq_ack(struct irq_data *d)
 	/* nothing to do here */
 }
 
-static void keystone_irq_handler(unsigned __irq, struct irq_desc *desc)
+static void keystone_irq_handler(struct irq_desc *desc)
 {
 	unsigned int irq = irq_desc_get_irq(desc);
 	struct keystone_irq_device *kirq = irq_desc_get_handler_data(desc);
diff --git a/drivers/irqchip/irq-metag-ext.c b/drivers/irqchip/irq-metag-ext.c
index 5f4c52928d16..8c38b3d92e1c 100644
--- a/drivers/irqchip/irq-metag-ext.c
+++ b/drivers/irqchip/irq-metag-ext.c
@@ -446,7 +446,7 @@ static int meta_intc_irq_set_type(struct irq_data *data, unsigned int flow_type)
  * Whilst using TR2 to detect external interrupts is a software convention it is
  * (hopefully) unlikely to change.
  */
-static void meta_intc_irq_demux(unsigned int irq, struct irq_desc *desc)
+static void meta_intc_irq_demux(struct irq_desc *desc)
 {
 	struct meta_intc_priv *priv = &meta_intc_priv;
 	irq_hw_number_t hw;
diff --git a/drivers/irqchip/irq-metag.c b/drivers/irqchip/irq-metag.c
index 3d23ce3edb5c..a5f053bd2f44 100644
--- a/drivers/irqchip/irq-metag.c
+++ b/drivers/irqchip/irq-metag.c
@@ -220,7 +220,7 @@ static int metag_internal_irq_set_affinity(struct irq_data *data,
  *	occurred. It is this function's job to demux this irq and
  *	figure out exactly which trigger needs servicing.
  */
-static void metag_internal_irq_demux(unsigned int irq, struct irq_desc *desc)
+static void metag_internal_irq_demux(struct irq_desc *desc)
 {
 	struct metag_internal_irq_priv *priv = irq_desc_get_handler_data(desc);
 	irq_hw_number_t hw;
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index 1764bcf8ee6b..af2f16bb8a94 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -546,7 +546,7 @@ static void __gic_irq_dispatch(void)
 	gic_handle_shared_int(false);
 }
 
-static void gic_irq_dispatch(unsigned int irq, struct irq_desc *desc)
+static void gic_irq_dispatch(struct irq_desc *desc)
 {
 	gic_handle_local_int(true);
 	gic_handle_shared_int(true);
diff --git a/drivers/irqchip/irq-mmp.c b/drivers/irqchip/irq-mmp.c
index 781ed6e71dbb..ea6e3a96f3b3 100644
--- a/drivers/irqchip/irq-mmp.c
+++ b/drivers/irqchip/irq-mmp.c
@@ -129,7 +129,7 @@ struct irq_chip icu_irq_chip = {
 	.irq_unmask	= icu_unmask_irq,
 };
 
-static void icu_mux_irq_demux(unsigned int __irq, struct irq_desc *desc)
+static void icu_mux_irq_demux(struct irq_desc *desc)
 {
 	unsigned int irq = irq_desc_get_irq(desc);
 	struct irq_domain *domain;
diff --git a/drivers/irqchip/irq-orion.c b/drivers/irqchip/irq-orion.c
index 5ea999a724b5..be4c5a8c9659 100644
--- a/drivers/irqchip/irq-orion.c
+++ b/drivers/irqchip/irq-orion.c
@@ -106,7 +106,7 @@ IRQCHIP_DECLARE(orion_intc, "marvell,orion-intc", orion_irq_init);
 #define ORION_BRIDGE_IRQ_CAUSE	0x00
 #define ORION_BRIDGE_IRQ_MASK	0x04
 
-static void orion_bridge_irq_handler(unsigned int irq, struct irq_desc *desc)
+static void orion_bridge_irq_handler(struct irq_desc *desc)
 {
 	struct irq_domain *d = irq_desc_get_handler_data(desc);
 
diff --git a/drivers/irqchip/irq-s3c24xx.c b/drivers/irqchip/irq-s3c24xx.c
index 506d9f20ca51..e47572a9bbb2 100644
--- a/drivers/irqchip/irq-s3c24xx.c
+++ b/drivers/irqchip/irq-s3c24xx.c
@@ -298,7 +298,7 @@ static struct irq_chip s3c_irq_eint0t4 = {
 	.irq_set_type	= s3c_irqext0_type,
 };
 
-static void s3c_irq_demux(unsigned int __irq, struct irq_desc *desc)
+static void s3c_irq_demux(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	struct s3c_irq_data *irq_data = irq_desc_get_chip_data(desc);
diff --git a/drivers/irqchip/irq-sunxi-nmi.c b/drivers/irqchip/irq-sunxi-nmi.c
index 772a82cacbf7..c143dd58410c 100644
--- a/drivers/irqchip/irq-sunxi-nmi.c
+++ b/drivers/irqchip/irq-sunxi-nmi.c
@@ -58,7 +58,7 @@ static inline u32 sunxi_sc_nmi_read(struct irq_chip_generic *gc, u32 off)
 	return irq_reg_readl(gc, off);
 }
 
-static void sunxi_sc_nmi_handle_irq(unsigned int irq, struct irq_desc *desc)
+static void sunxi_sc_nmi_handle_irq(struct irq_desc *desc)
 {
 	struct irq_domain *domain = irq_desc_get_handler_data(desc);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
diff --git a/drivers/irqchip/irq-tb10x.c b/drivers/irqchip/irq-tb10x.c
index 331829661366..848d782a2a3b 100644
--- a/drivers/irqchip/irq-tb10x.c
+++ b/drivers/irqchip/irq-tb10x.c
@@ -97,7 +97,7 @@ static int tb10x_irq_set_type(struct irq_data *data, unsigned int flow_type)
 	return IRQ_SET_MASK_OK;
 }
 
-static void tb10x_irq_cascade(unsigned int __irq, struct irq_desc *desc)
+static void tb10x_irq_cascade(struct irq_desc *desc)
 {
 	struct irq_domain *domain = irq_desc_get_handler_data(desc);
 	unsigned int irq = irq_desc_get_irq(desc);
diff --git a/drivers/irqchip/irq-versatile-fpga.c b/drivers/irqchip/irq-versatile-fpga.c
index 16123f688768..1b1c63e8d249 100644
--- a/drivers/irqchip/irq-versatile-fpga.c
+++ b/drivers/irqchip/irq-versatile-fpga.c
@@ -65,19 +65,19 @@ static void fpga_irq_unmask(struct irq_data *d)
 	writel(mask, f->base + IRQ_ENABLE_SET);
 }
 
-static void fpga_irq_handle(unsigned int __irq, struct irq_desc *desc)
+static void fpga_irq_handle(struct irq_desc *desc)
 {
 	struct fpga_irq_data *f = irq_desc_get_handler_data(desc);
-	unsigned int irq = irq_desc_get_irq(desc);
 	u32 status = readl(f->base + IRQ_STATUS);
 
 	if (status == 0) {
-		do_bad_IRQ(irq, desc);
+		do_bad_IRQ(desc);
 		return;
 	}
 
 	do {
-		irq = ffs(status) - 1;
+		unsigned int irq = ffs(status) - 1;
+
 		status &= ~(1 << irq);
 		generic_handle_irq(irq_find_mapping(f->domain, irq));
 	} while (status);
diff --git a/drivers/irqchip/irq-vic.c b/drivers/irqchip/irq-vic.c
index 03846dff4212..cb85504c2ff1 100644
--- a/drivers/irqchip/irq-vic.c
+++ b/drivers/irqchip/irq-vic.c
@@ -225,7 +225,7 @@ static int handle_one_vic(struct vic_device *vic, struct pt_regs *regs)
 	return handled;
 }
 
-static void vic_handle_irq_cascaded(unsigned int irq, struct irq_desc *desc)
+static void vic_handle_irq_cascaded(struct irq_desc *desc)
 {
 	u32 stat, hwirq;
 	struct irq_chip *host_chip = irq_desc_get_chip(desc);
diff --git a/drivers/irqchip/spear-shirq.c b/drivers/irqchip/spear-shirq.c
index 4cbd9c5dc1e6..c838c9286ea5 100644
--- a/drivers/irqchip/spear-shirq.c
+++ b/drivers/irqchip/spear-shirq.c
@@ -182,7 +182,7 @@ static struct spear_shirq *spear320_shirq_blocks[] = {
 	&spear320_shirq_intrcomm_ras,
 };
 
-static void shirq_handler(unsigned __irq, struct irq_desc *desc)
+static void shirq_handler(struct irq_desc *desc)
 {
 	struct spear_shirq *shirq = irq_desc_get_handler_data(desc);
 	u32 pend;
diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c
index 4b54128bc78e..a726f01e3b02 100644
--- a/drivers/mfd/asic3.c
+++ b/drivers/mfd/asic3.c
@@ -138,7 +138,7 @@ static void asic3_irq_flip_edge(struct asic3 *asic,
 	spin_unlock_irqrestore(&asic->lock, flags);
 }
 
-static void asic3_irq_demux(unsigned int irq, struct irq_desc *desc)
+static void asic3_irq_demux(struct irq_desc *desc)
 {
 	struct asic3 *asic = irq_desc_get_handler_data(desc);
 	struct irq_data *data = irq_desc_get_irq_data(desc);
diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c
index a76eb6ef47a0..b279205659a4 100644
--- a/drivers/mfd/ezx-pcap.c
+++ b/drivers/mfd/ezx-pcap.c
@@ -205,7 +205,7 @@ static void pcap_isr_work(struct work_struct *work)
 	} while (gpio_get_value(pdata->gpio));
 }
 
-static void pcap_irq_handler(unsigned int irq, struct irq_desc *desc)
+static void pcap_irq_handler(struct irq_desc *desc)
 {
 	struct pcap_chip *pcap = irq_desc_get_handler_data(desc);
 
diff --git a/drivers/mfd/htc-egpio.c b/drivers/mfd/htc-egpio.c
index 9131cdcdc64a..6ccaf90d98fd 100644
--- a/drivers/mfd/htc-egpio.c
+++ b/drivers/mfd/htc-egpio.c
@@ -98,7 +98,7 @@ static struct irq_chip egpio_muxed_chip = {
 	.irq_unmask	= egpio_unmask,
 };
 
-static void egpio_handler(unsigned int irq, struct irq_desc *desc)
+static void egpio_handler(struct irq_desc *desc)
 {
 	struct egpio_info *ei = irq_desc_get_handler_data(desc);
 	int irqpin;
diff --git a/drivers/mfd/jz4740-adc.c b/drivers/mfd/jz4740-adc.c
index 5bb49f08955d..798e44306382 100644
--- a/drivers/mfd/jz4740-adc.c
+++ b/drivers/mfd/jz4740-adc.c
@@ -65,7 +65,7 @@ struct jz4740_adc {
 	spinlock_t lock;
 };
 
-static void jz4740_adc_irq_demux(unsigned int irq, struct irq_desc *desc)
+static void jz4740_adc_irq_demux(struct irq_desc *desc)
 {
 	struct irq_chip_generic *gc = irq_desc_get_handler_data(desc);
 	uint8_t status;
diff --git a/drivers/mfd/pm8921-core.c b/drivers/mfd/pm8921-core.c
index 59502d02cd15..1b7ec0870c2a 100644
--- a/drivers/mfd/pm8921-core.c
+++ b/drivers/mfd/pm8921-core.c
@@ -156,7 +156,7 @@ static int pm8xxx_irq_master_handler(struct pm_irq_chip *chip, int master)
 	return ret;
 }
 
-static void pm8xxx_irq_handler(unsigned int irq, struct irq_desc *desc)
+static void pm8xxx_irq_handler(struct irq_desc *desc)
 {
 	struct pm_irq_chip *chip = irq_desc_get_handler_data(desc);
 	struct irq_chip *irq_chip = irq_desc_get_chip(desc);
diff --git a/drivers/mfd/t7l66xb.c b/drivers/mfd/t7l66xb.c
index 16fc1adc4fa3..94bd89cb1f06 100644
--- a/drivers/mfd/t7l66xb.c
+++ b/drivers/mfd/t7l66xb.c
@@ -185,7 +185,7 @@ static struct mfd_cell t7l66xb_cells[] = {
 /*--------------------------------------------------------------------------*/
 
 /* Handle the T7L66XB interrupt mux */
-static void t7l66xb_irq(unsigned int irq, struct irq_desc *desc)
+static void t7l66xb_irq(struct irq_desc *desc)
 {
 	struct t7l66xb *t7l66xb = irq_desc_get_handler_data(desc);
 	unsigned int isr;
diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c
index 775b9aca871a..8c84a513016b 100644
--- a/drivers/mfd/tc6393xb.c
+++ b/drivers/mfd/tc6393xb.c
@@ -522,8 +522,7 @@ static int tc6393xb_register_gpio(struct tc6393xb *tc6393xb, int gpio_base)
 
 /*--------------------------------------------------------------------------*/
 
-static void
-tc6393xb_irq(unsigned int irq, struct irq_desc *desc)
+static void tc6393xb_irq(struct irq_desc *desc)
 {
 	struct tc6393xb *tc6393xb = irq_desc_get_handler_data(desc);
 	unsigned int isr;
diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c
index 9a2302129711..f691d7ecad52 100644
--- a/drivers/mfd/ucb1x00-core.c
+++ b/drivers/mfd/ucb1x00-core.c
@@ -282,7 +282,7 @@ void ucb1x00_adc_disable(struct ucb1x00 *ucb)
  * SIBCLK to talk to the chip.  We leave the clock running until
  * we have finished processing all interrupts from the chip.
  */
-static void ucb1x00_irq(unsigned int __irq, struct irq_desc *desc)
+static void ucb1x00_irq(struct irq_desc *desc)
 {
 	struct ucb1x00 *ucb = irq_desc_get_handler_data(desc);
 	unsigned int isr, i;
diff --git a/drivers/pci/host/pci-keystone.c b/drivers/pci/host/pci-keystone.c
index 81253e70b1c5..0aa81bd3de12 100644
--- a/drivers/pci/host/pci-keystone.c
+++ b/drivers/pci/host/pci-keystone.c
@@ -110,7 +110,7 @@ static int ks_pcie_establish_link(struct keystone_pcie *ks_pcie)
 	return -EINVAL;
 }
 
-static void ks_pcie_msi_irq_handler(unsigned int __irq, struct irq_desc *desc)
+static void ks_pcie_msi_irq_handler(struct irq_desc *desc)
 {
 	unsigned int irq = irq_desc_get_irq(desc);
 	struct keystone_pcie *ks_pcie = irq_desc_get_handler_data(desc);
@@ -138,8 +138,7 @@ static void ks_pcie_msi_irq_handler(unsigned int __irq, struct irq_desc *desc)
  * Traverse through pending legacy interrupts and invoke handler for each. Also
  * takes care of interrupt controller level mask/ack operation.
  */
-static void ks_pcie_legacy_irq_handler(unsigned int __irq,
-				       struct irq_desc *desc)
+static void ks_pcie_legacy_irq_handler(struct irq_desc *desc)
 {
 	unsigned int irq = irq_desc_get_irq(desc);
 	struct keystone_pcie *ks_pcie = irq_desc_get_handler_data(desc);
diff --git a/drivers/pci/host/pci-xgene-msi.c b/drivers/pci/host/pci-xgene-msi.c
index 996327cfa1e1..e491681daf22 100644
--- a/drivers/pci/host/pci-xgene-msi.c
+++ b/drivers/pci/host/pci-xgene-msi.c
@@ -295,7 +295,7 @@ static int xgene_msi_init_allocator(struct xgene_msi *xgene_msi)
 	return 0;
 }
 
-static void xgene_msi_isr(unsigned int irq, struct irq_desc *desc)
+static void xgene_msi_isr(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	struct xgene_msi_group *msi_groups;
diff --git a/drivers/pinctrl/bcm/pinctrl-cygnus-gpio.c b/drivers/pinctrl/bcm/pinctrl-cygnus-gpio.c
index 7d9482bf8252..1ca783098e47 100644
--- a/drivers/pinctrl/bcm/pinctrl-cygnus-gpio.c
+++ b/drivers/pinctrl/bcm/pinctrl-cygnus-gpio.c
@@ -143,7 +143,7 @@ static inline bool cygnus_get_bit(struct cygnus_gpio *chip, unsigned int reg,
 	return !!(readl(chip->base + offset) & BIT(shift));
 }
 
-static void cygnus_gpio_irq_handler(unsigned int irq, struct irq_desc *desc)
+static void cygnus_gpio_irq_handler(struct irq_desc *desc)
 {
 	struct gpio_chip *gc = irq_desc_get_handler_data(desc);
 	struct cygnus_gpio *chip = to_cygnus_gpio(gc);
diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c
index dac4865f3203..f79ea430f651 100644
--- a/drivers/pinctrl/intel/pinctrl-baytrail.c
+++ b/drivers/pinctrl/intel/pinctrl-baytrail.c
@@ -425,7 +425,7 @@ static void byt_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip)
 	}
 }
 
-static void byt_gpio_irq_handler(unsigned irq, struct irq_desc *desc)
+static void byt_gpio_irq_handler(struct irq_desc *desc)
 {
 	struct irq_data *data = irq_desc_get_irq_data(desc);
 	struct byt_gpio *vg = to_byt_gpio(irq_desc_get_handler_data(desc));
diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c
index 2d5d3ddc36e5..270c127e03ea 100644
--- a/drivers/pinctrl/intel/pinctrl-cherryview.c
+++ b/drivers/pinctrl/intel/pinctrl-cherryview.c
@@ -1414,7 +1414,7 @@ static struct irq_chip chv_gpio_irqchip = {
 	.flags = IRQCHIP_SKIP_SET_WAKE,
 };
 
-static void chv_gpio_irq_handler(unsigned irq, struct irq_desc *desc)
+static void chv_gpio_irq_handler(struct irq_desc *desc)
 {
 	struct gpio_chip *gc = irq_desc_get_handler_data(desc);
 	struct chv_pinctrl *pctrl = gpiochip_to_pinctrl(gc);
diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c
index bb377c110541..54848b8decef 100644
--- a/drivers/pinctrl/intel/pinctrl-intel.c
+++ b/drivers/pinctrl/intel/pinctrl-intel.c
@@ -836,7 +836,7 @@ static void intel_gpio_community_irq_handler(struct gpio_chip *gc,
 	}
 }
 
-static void intel_gpio_irq_handler(unsigned irq, struct irq_desc *desc)
+static void intel_gpio_irq_handler(struct irq_desc *desc)
 {
 	struct gpio_chip *gc = irq_desc_get_handler_data(desc);
 	struct intel_pinctrl *pctrl = gpiochip_to_pinctrl(gc);
diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
index 7726c6caaf83..1b22f96ba839 100644
--- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
+++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
@@ -1190,7 +1190,7 @@ mtk_eint_debounce_process(struct mtk_pinctrl *pctl, int index)
 	}
 }
 
-static void mtk_eint_irq_handler(unsigned irq, struct irq_desc *desc)
+static void mtk_eint_irq_handler(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	struct mtk_pinctrl *pctl = irq_desc_get_handler_data(desc);
diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.c b/drivers/pinctrl/nomadik/pinctrl-nomadik.c
index 352ede13a9e9..96cf03908e93 100644
--- a/drivers/pinctrl/nomadik/pinctrl-nomadik.c
+++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.c
@@ -860,7 +860,7 @@ static void __nmk_gpio_irq_handler(struct irq_desc *desc, u32 status)
 	chained_irq_exit(host_chip, desc);
 }
 
-static void nmk_gpio_irq_handler(unsigned int irq, struct irq_desc *desc)
+static void nmk_gpio_irq_handler(struct irq_desc *desc)
 {
 	struct gpio_chip *chip = irq_desc_get_handler_data(desc);
 	struct nmk_gpio_chip *nmk_chip = container_of(chip, struct nmk_gpio_chip, chip);
@@ -873,7 +873,7 @@ static void nmk_gpio_irq_handler(unsigned int irq, struct irq_desc *desc)
 	__nmk_gpio_irq_handler(desc, status);
 }
 
-static void nmk_gpio_latent_irq_handler(unsigned int irq, struct irq_desc *desc)
+static void nmk_gpio_latent_irq_handler(struct irq_desc *desc)
 {
 	struct gpio_chip *chip = irq_desc_get_handler_data(desc);
 	struct nmk_gpio_chip *nmk_chip = container_of(chip, struct nmk_gpio_chip, chip);
diff --git a/drivers/pinctrl/pinctrl-adi2.c b/drivers/pinctrl/pinctrl-adi2.c
index a5976ebc4482..f6be68518c87 100644
--- a/drivers/pinctrl/pinctrl-adi2.c
+++ b/drivers/pinctrl/pinctrl-adi2.c
@@ -530,8 +530,7 @@ static inline void preflow_handler(struct irq_desc *desc)
 static inline void preflow_handler(struct irq_desc *desc) { }
 #endif
 
-static void adi_gpio_handle_pint_irq(unsigned int inta_irq,
-			struct irq_desc *desc)
+static void adi_gpio_handle_pint_irq(struct irq_desc *desc)
 {
 	u32 request;
 	u32 level_mask, hwirq;
diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c
index 5e86bb8ca80e..3318f1d6193c 100644
--- a/drivers/pinctrl/pinctrl-amd.c
+++ b/drivers/pinctrl/pinctrl-amd.c
@@ -492,15 +492,15 @@ static struct irq_chip amd_gpio_irqchip = {
 	.irq_set_type = amd_gpio_irq_set_type,
 };
 
-static void amd_gpio_irq_handler(unsigned int __irq, struct irq_desc *desc)
+static void amd_gpio_irq_handler(struct irq_desc *desc)
 {
-	unsigned int irq = irq_desc_get_irq(desc);
 	u32 i;
 	u32 off;
 	u32 reg;
 	u32 pin_reg;
 	u64 reg64;
 	int handled = 0;
+	unsigned int irq;
 	unsigned long flags;
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	struct gpio_chip *gc = irq_desc_get_handler_data(desc);
@@ -541,7 +541,7 @@ static void amd_gpio_irq_handler(unsigned int __irq, struct irq_desc *desc)
 	}
 
 	if (handled == 0)
-		handle_bad_irq(irq, desc);
+		handle_bad_irq(desc);
 
 	spin_lock_irqsave(&gpio_dev->lock, flags);
 	reg = readl(gpio_dev->base + WAKE_INT_MASTER_REG);
diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c
index bae0012ee356..b0fde0f385e6 100644
--- a/drivers/pinctrl/pinctrl-at91.c
+++ b/drivers/pinctrl/pinctrl-at91.c
@@ -1585,7 +1585,7 @@ static struct irq_chip gpio_irqchip = {
 	.irq_set_wake	= gpio_irq_set_wake,
 };
 
-static void gpio_irq_handler(unsigned irq, struct irq_desc *desc)
+static void gpio_irq_handler(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	struct gpio_chip *gpio_chip = irq_desc_get_handler_data(desc);
diff --git a/drivers/pinctrl/pinctrl-coh901.c b/drivers/pinctrl/pinctrl-coh901.c
index 3731cc67a88b..9c9b88934bcc 100644
--- a/drivers/pinctrl/pinctrl-coh901.c
+++ b/drivers/pinctrl/pinctrl-coh901.c
@@ -519,7 +519,7 @@ static struct irq_chip u300_gpio_irqchip = {
 	.irq_set_type		= u300_gpio_irq_type,
 };
 
-static void u300_gpio_irq_handler(unsigned __irq, struct irq_desc *desc)
+static void u300_gpio_irq_handler(struct irq_desc *desc)
 {
 	unsigned int irq = irq_desc_get_irq(desc);
 	struct irq_chip *parent_chip = irq_desc_get_chip(desc);
diff --git a/drivers/pinctrl/pinctrl-pistachio.c b/drivers/pinctrl/pinctrl-pistachio.c
index f22d585d9300..952b1c623887 100644
--- a/drivers/pinctrl/pinctrl-pistachio.c
+++ b/drivers/pinctrl/pinctrl-pistachio.c
@@ -1310,13 +1310,11 @@ static int pistachio_gpio_irq_set_type(struct irq_data *data, unsigned int type)
 	return 0;
 }
 
-static void pistachio_gpio_irq_handler(unsigned int __irq,
-				       struct irq_desc *desc)
+static void pistachio_gpio_irq_handler(struct irq_desc *desc)
 {
-	unsigned int irq = irq_desc_get_irq(desc);
 	struct gpio_chip *gc = irq_desc_get_handler_data(desc);
 	struct pistachio_gpio_bank *bank = gc_to_bank(gc);
-	struct irq_chip *chip = irq_get_chip(irq);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
 	unsigned long pending;
 	unsigned int pin;
 
diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c
index c5246c05f70c..88bb707e107a 100644
--- a/drivers/pinctrl/pinctrl-rockchip.c
+++ b/drivers/pinctrl/pinctrl-rockchip.c
@@ -1475,7 +1475,7 @@ static const struct gpio_chip rockchip_gpiolib_chip = {
  * Interrupt handling
  */
 
-static void rockchip_irq_demux(unsigned int __irq, struct irq_desc *desc)
+static void rockchip_irq_demux(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	struct rockchip_pin_bank *bank = irq_desc_get_handler_data(desc);
diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
index bf548c2a7a9d..ef04b962c3d5 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -1679,7 +1679,7 @@ static irqreturn_t pcs_irq_handler(int irq, void *d)
  * Use this if you have a separate interrupt for each
  * pinctrl-single instance.
  */
-static void pcs_irq_chain_handler(unsigned int irq, struct irq_desc *desc)
+static void pcs_irq_chain_handler(struct irq_desc *desc)
 {
 	struct pcs_soc_data *pcs_soc = irq_desc_get_handler_data(desc);
 	struct irq_chip *chip;
diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c
index f8338d2e6b6b..389526e704fb 100644
--- a/drivers/pinctrl/pinctrl-st.c
+++ b/drivers/pinctrl/pinctrl-st.c
@@ -1460,7 +1460,7 @@ static void __gpio_irq_handler(struct st_gpio_bank *bank)
 	}
 }
 
-static void st_gpio_irq_handler(unsigned irq, struct irq_desc *desc)
+static void st_gpio_irq_handler(struct irq_desc *desc)
 {
 	/* interrupt dedicated per bank */
 	struct irq_chip *chip = irq_desc_get_chip(desc);
@@ -1472,7 +1472,7 @@ static void st_gpio_irq_handler(unsigned irq, struct irq_desc *desc)
 	chained_irq_exit(chip, desc);
 }
 
-static void st_gpio_irqmux_handler(unsigned irq, struct irq_desc *desc)
+static void st_gpio_irqmux_handler(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	struct st_pinctrl *info = irq_desc_get_handler_data(desc);
diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c
index 492cdd51dc5c..a0c7407c1cac 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm.c
@@ -765,9 +765,8 @@ static struct irq_chip msm_gpio_irq_chip = {
 	.irq_set_wake   = msm_gpio_irq_set_wake,
 };
 
-static void msm_gpio_irq_handler(unsigned int __irq, struct irq_desc *desc)
+static void msm_gpio_irq_handler(struct irq_desc *desc)
 {
-	unsigned int irq = irq_desc_get_irq(desc);
 	struct gpio_chip *gc = irq_desc_get_handler_data(desc);
 	const struct msm_pingroup *g;
 	struct msm_pinctrl *pctrl = to_msm_pinctrl(gc);
@@ -795,7 +794,7 @@ static void msm_gpio_irq_handler(unsigned int __irq, struct irq_desc *desc)
 
 	/* No interrupts were flagged */
 	if (handled == 0)
-		handle_bad_irq(irq, desc);
+		handle_bad_irq(desc);
 
 	chained_irq_exit(chip, desc);
 }
diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.c b/drivers/pinctrl/samsung/pinctrl-exynos.c
index 5f45caaef46d..71ccf6a90b22 100644
--- a/drivers/pinctrl/samsung/pinctrl-exynos.c
+++ b/drivers/pinctrl/samsung/pinctrl-exynos.c
@@ -419,7 +419,7 @@ static const struct of_device_id exynos_wkup_irq_ids[] = {
 };
 
 /* interrupt handler for wakeup interrupts 0..15 */
-static void exynos_irq_eint0_15(unsigned int irq, struct irq_desc *desc)
+static void exynos_irq_eint0_15(struct irq_desc *desc)
 {
 	struct exynos_weint_data *eintd = irq_desc_get_handler_data(desc);
 	struct samsung_pin_bank *bank = eintd->bank;
@@ -451,7 +451,7 @@ static inline void exynos_irq_demux_eint(unsigned long pend,
 }
 
 /* interrupt handler for wakeup interrupt 16 */
-static void exynos_irq_demux_eint16_31(unsigned int irq, struct irq_desc *desc)
+static void exynos_irq_demux_eint16_31(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	struct exynos_muxed_weint_data *eintd = irq_desc_get_handler_data(desc);
diff --git a/drivers/pinctrl/samsung/pinctrl-s3c24xx.c b/drivers/pinctrl/samsung/pinctrl-s3c24xx.c
index 019844d479bb..b3cd9ae3f4a2 100644
--- a/drivers/pinctrl/samsung/pinctrl-s3c24xx.c
+++ b/drivers/pinctrl/samsung/pinctrl-s3c24xx.c
@@ -240,7 +240,7 @@ static struct irq_chip s3c2410_eint0_3_chip = {
 	.irq_set_type	= s3c24xx_eint_type,
 };
 
-static void s3c2410_demux_eint0_3(unsigned int irq, struct irq_desc *desc)
+static void s3c2410_demux_eint0_3(struct irq_desc *desc)
 {
 	struct irq_data *data = irq_desc_get_irq_data(desc);
 	struct s3c24xx_eint_data *eint_data = irq_desc_get_handler_data(desc);
@@ -295,7 +295,7 @@ static struct irq_chip s3c2412_eint0_3_chip = {
 	.irq_set_type	= s3c24xx_eint_type,
 };
 
-static void s3c2412_demux_eint0_3(unsigned int irq, struct irq_desc *desc)
+static void s3c2412_demux_eint0_3(struct irq_desc *desc)
 {
 	struct s3c24xx_eint_data *eint_data = irq_desc_get_handler_data(desc);
 	struct irq_data *data = irq_desc_get_irq_data(desc);
@@ -388,12 +388,12 @@ static inline void s3c24xx_demux_eint(struct irq_desc *desc,
 	chained_irq_exit(chip, desc);
 }
 
-static void s3c24xx_demux_eint4_7(unsigned int irq, struct irq_desc *desc)
+static void s3c24xx_demux_eint4_7(struct irq_desc *desc)
 {
 	s3c24xx_demux_eint(desc, 0, 0xf0);
 }
 
-static void s3c24xx_demux_eint8_23(unsigned int irq, struct irq_desc *desc)
+static void s3c24xx_demux_eint8_23(struct irq_desc *desc)
 {
 	s3c24xx_demux_eint(desc, 8, 0xffff00);
 }
diff --git a/drivers/pinctrl/samsung/pinctrl-s3c64xx.c b/drivers/pinctrl/samsung/pinctrl-s3c64xx.c
index f5ea40a69711..43407ab248f5 100644
--- a/drivers/pinctrl/samsung/pinctrl-s3c64xx.c
+++ b/drivers/pinctrl/samsung/pinctrl-s3c64xx.c
@@ -407,7 +407,7 @@ static const struct irq_domain_ops s3c64xx_gpio_irqd_ops = {
 	.xlate	= irq_domain_xlate_twocell,
 };
 
-static void s3c64xx_eint_gpio_irq(unsigned int irq, struct irq_desc *desc)
+static void s3c64xx_eint_gpio_irq(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	struct s3c64xx_eint_gpio_data *data = irq_desc_get_handler_data(desc);
@@ -631,22 +631,22 @@ static inline void s3c64xx_irq_demux_eint(struct irq_desc *desc, u32 range)
 	chained_irq_exit(chip, desc);
 }
 
-static void s3c64xx_demux_eint0_3(unsigned int irq, struct irq_desc *desc)
+static void s3c64xx_demux_eint0_3(struct irq_desc *desc)
 {
 	s3c64xx_irq_demux_eint(desc, 0xf);
 }
 
-static void s3c64xx_demux_eint4_11(unsigned int irq, struct irq_desc *desc)
+static void s3c64xx_demux_eint4_11(struct irq_desc *desc)
 {
 	s3c64xx_irq_demux_eint(desc, 0xff0);
 }
 
-static void s3c64xx_demux_eint12_19(unsigned int irq, struct irq_desc *desc)
+static void s3c64xx_demux_eint12_19(struct irq_desc *desc)
 {
 	s3c64xx_irq_demux_eint(desc, 0xff000);
 }
 
-static void s3c64xx_demux_eint20_27(unsigned int irq, struct irq_desc *desc)
+static void s3c64xx_demux_eint20_27(struct irq_desc *desc)
 {
 	s3c64xx_irq_demux_eint(desc, 0xff00000);
 }
diff --git a/drivers/pinctrl/sirf/pinctrl-atlas7.c b/drivers/pinctrl/sirf/pinctrl-atlas7.c
index 9df0c5f25824..0d24d9e4b70c 100644
--- a/drivers/pinctrl/sirf/pinctrl-atlas7.c
+++ b/drivers/pinctrl/sirf/pinctrl-atlas7.c
@@ -4489,7 +4489,7 @@ static struct irq_chip atlas7_gpio_irq_chip = {
 	.irq_set_type = atlas7_gpio_irq_type,
 };
 
-static void atlas7_gpio_handle_irq(unsigned int __irq, struct irq_desc *desc)
+static void atlas7_gpio_handle_irq(struct irq_desc *desc)
 {
 	struct gpio_chip *gc = irq_desc_get_handler_data(desc);
 	struct atlas7_gpio_chip *a7gc = to_atlas7_gpio(gc);
@@ -4512,7 +4512,7 @@ static void atlas7_gpio_handle_irq(unsigned int __irq, struct irq_desc *desc)
 	if (!status) {
 		pr_warn("%s: gpio [%s] status %#x no interrupt is flaged\n",
 			__func__, gc->label, status);
-		handle_bad_irq(irq, desc);
+		handle_bad_irq(desc);
 		return;
 	}
 
diff --git a/drivers/pinctrl/sirf/pinctrl-sirf.c b/drivers/pinctrl/sirf/pinctrl-sirf.c
index f8bd9fb52033..2a8d69725de8 100644
--- a/drivers/pinctrl/sirf/pinctrl-sirf.c
+++ b/drivers/pinctrl/sirf/pinctrl-sirf.c
@@ -545,7 +545,7 @@ static struct irq_chip sirfsoc_irq_chip = {
 	.irq_set_type = sirfsoc_gpio_irq_type,
 };
 
-static void sirfsoc_gpio_handle_irq(unsigned int __irq, struct irq_desc *desc)
+static void sirfsoc_gpio_handle_irq(struct irq_desc *desc)
 {
 	unsigned int irq = irq_desc_get_irq(desc);
 	struct gpio_chip *gc = irq_desc_get_handler_data(desc);
@@ -570,7 +570,7 @@ static void sirfsoc_gpio_handle_irq(unsigned int __irq, struct irq_desc *desc)
 		printk(KERN_WARNING
 			"%s: gpio id %d status %#x no interrupt is flagged\n",
 			__func__, bank->id, status);
-		handle_bad_irq(irq, desc);
+		handle_bad_irq(desc);
 		return;
 	}
 
diff --git a/drivers/pinctrl/spear/pinctrl-plgpio.c b/drivers/pinctrl/spear/pinctrl-plgpio.c
index ae8f29fb5536..1f0af250dbb5 100644
--- a/drivers/pinctrl/spear/pinctrl-plgpio.c
+++ b/drivers/pinctrl/spear/pinctrl-plgpio.c
@@ -356,7 +356,7 @@ static struct irq_chip plgpio_irqchip = {
 	.irq_set_type	= plgpio_irq_set_type,
 };
 
-static void plgpio_irq_handler(unsigned irq, struct irq_desc *desc)
+static void plgpio_irq_handler(struct irq_desc *desc)
 {
 	struct gpio_chip *gc = irq_desc_get_handler_data(desc);
 	struct plgpio *plgpio = container_of(gc, struct plgpio, chip);
diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
index 31af97d89272..38e0c7bdd2ac 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
@@ -740,7 +740,7 @@ static struct irq_domain_ops sunxi_pinctrl_irq_domain_ops = {
 	.xlate		= sunxi_pinctrl_irq_of_xlate,
 };
 
-static void sunxi_pinctrl_irq_handler(unsigned __irq, struct irq_desc *desc)
+static void sunxi_pinctrl_irq_handler(struct irq_desc *desc)
 {
 	unsigned int irq = irq_desc_get_irq(desc);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
diff --git a/drivers/sh/intc/core.c b/drivers/sh/intc/core.c
index 043419dcee92..8e72bcbd3d6d 100644
--- a/drivers/sh/intc/core.c
+++ b/drivers/sh/intc/core.c
@@ -65,7 +65,7 @@ void intc_set_prio_level(unsigned int irq, unsigned int level)
 	raw_spin_unlock_irqrestore(&intc_big_lock, flags);
 }
 
-static void intc_redirect_irq(unsigned int irq, struct irq_desc *desc)
+static void intc_redirect_irq(struct irq_desc *desc)
 {
 	generic_handle_irq((unsigned int)irq_desc_get_handler_data(desc));
 }
diff --git a/drivers/sh/intc/virq.c b/drivers/sh/intc/virq.c
index bafc51c6f0ba..e7899624aa0b 100644
--- a/drivers/sh/intc/virq.c
+++ b/drivers/sh/intc/virq.c
@@ -109,7 +109,7 @@ static int add_virq_to_pirq(unsigned int irq, unsigned int virq)
 	return 0;
 }
 
-static void intc_virq_handler(unsigned int __irq, struct irq_desc *desc)
+static void intc_virq_handler(struct irq_desc *desc)
 {
 	unsigned int irq = irq_desc_get_irq(desc);
 	struct irq_data *data = irq_desc_get_irq_data(desc);
@@ -127,7 +127,7 @@ static void intc_virq_handler(unsigned int __irq, struct irq_desc *desc)
 			handle = (unsigned long)irq_desc_get_handler_data(vdesc);
 			addr = INTC_REG(d, _INTC_ADDR_E(handle), 0);
 			if (intc_reg_fns[_INTC_FN(handle)](addr, handle, 0))
-				generic_handle_irq_desc(entry->irq, vdesc);
+				generic_handle_irq_desc(vdesc);
 		}
 	}
 
diff --git a/drivers/soc/dove/pmu.c b/drivers/soc/dove/pmu.c
index 6bc13f99489a..052aecf29893 100644
--- a/drivers/soc/dove/pmu.c
+++ b/drivers/soc/dove/pmu.c
@@ -222,7 +222,7 @@ static void __pmu_domain_register(struct pmu_domain *domain,
 }
 
 /* PMU IRQ controller */
-static void pmu_irq_handler(unsigned int irq, struct irq_desc *desc)
+static void pmu_irq_handler(struct irq_desc *desc)
 {
 	struct pmu_data *pmu = irq_desc_get_handler_data(desc);
 	struct irq_chip_generic *gc = pmu->irq_gc;
@@ -232,7 +232,7 @@ static void pmu_irq_handler(unsigned int irq, struct irq_desc *desc)
 	u32 done = ~0;
 
 	if (stat == 0) {
-		handle_bad_irq(irq_desc_get_irq(desc), desc);
+		handle_bad_irq(desc);
 		return;
 	}
 
diff --git a/drivers/spmi/spmi-pmic-arb.c b/drivers/spmi/spmi-pmic-arb.c
index bdfb3c84c3cb..4a3cf9ba152f 100644
--- a/drivers/spmi/spmi-pmic-arb.c
+++ b/drivers/spmi/spmi-pmic-arb.c
@@ -451,7 +451,7 @@ static void periph_interrupt(struct spmi_pmic_arb_dev *pa, u8 apid)
 	}
 }
 
-static void pmic_arb_chained_irq(unsigned int irq, struct irq_desc *desc)
+static void pmic_arb_chained_irq(struct irq_desc *desc)
 {
 	struct spmi_pmic_arb_dev *pa = irq_desc_get_handler_data(desc);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 4913c32db942..11bf09288ddb 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -475,14 +475,14 @@ static inline int irq_set_parent(int irq, int parent_irq)
  * Built-in IRQ handlers for various IRQ types,
  * callable via desc->handle_irq()
  */
-extern void handle_level_irq(unsigned int irq, struct irq_desc *desc);
-extern void handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc);
-extern void handle_edge_irq(unsigned int irq, struct irq_desc *desc);
-extern void handle_edge_eoi_irq(unsigned int irq, struct irq_desc *desc);
-extern void handle_simple_irq(unsigned int irq, struct irq_desc *desc);
-extern void handle_percpu_irq(unsigned int irq, struct irq_desc *desc);
-extern void handle_percpu_devid_irq(unsigned int irq, struct irq_desc *desc);
-extern void handle_bad_irq(unsigned int irq, struct irq_desc *desc);
+extern void handle_level_irq(struct irq_desc *desc);
+extern void handle_fasteoi_irq(struct irq_desc *desc);
+extern void handle_edge_irq(struct irq_desc *desc);
+extern void handle_edge_eoi_irq(struct irq_desc *desc);
+extern void handle_simple_irq(struct irq_desc *desc);
+extern void handle_percpu_irq(struct irq_desc *desc);
+extern void handle_percpu_devid_irq(struct irq_desc *desc);
+extern void handle_bad_irq(struct irq_desc *desc);
 extern void handle_nested_irq(unsigned int irq);
 
 extern int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg);
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index fbb4d5afc32b..a587a33363c7 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -135,9 +135,9 @@ static inline struct msi_desc *irq_desc_get_msi_desc(struct irq_desc *desc)
  * Architectures call this to let the generic IRQ layer
  * handle an interrupt.
  */
-static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc)
+static inline void generic_handle_irq_desc(struct irq_desc *desc)
 {
-	desc->handle_irq(irq, desc);
+	desc->handle_irq(desc);
 }
 
 int generic_handle_irq(unsigned int irq);
diff --git a/include/linux/irqhandler.h b/include/linux/irqhandler.h
index 62d543004197..661bed0ed1f3 100644
--- a/include/linux/irqhandler.h
+++ b/include/linux/irqhandler.h
@@ -8,7 +8,7 @@
 
 struct irq_desc;
 struct irq_data;
-typedef	void (*irq_flow_handler_t)(unsigned int irq, struct irq_desc *desc);
+typedef	void (*irq_flow_handler_t)(struct irq_desc *desc);
 typedef	void (*irq_preflow_handler_t)(struct irq_data *data);
 
 #endif
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 8c55d545558f..e28169dd1c36 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -372,7 +372,6 @@ static bool irq_may_run(struct irq_desc *desc)
 
 /**
  *	handle_simple_irq - Simple and software-decoded IRQs.
- *	@irq:	the interrupt number
  *	@desc:	the interrupt description structure for this irq
  *
  *	Simple interrupts are either sent from a demultiplexing interrupt
@@ -382,8 +381,7 @@ static bool irq_may_run(struct irq_desc *desc)
  *	Note: The caller is expected to handle the ack, clear, mask and
  *	unmask issues if necessary.
  */
-void
-handle_simple_irq(unsigned int irq, struct irq_desc *desc)
+void handle_simple_irq(struct irq_desc *desc)
 {
 	raw_spin_lock(&desc->lock);
 
@@ -425,7 +423,6 @@ static void cond_unmask_irq(struct irq_desc *desc)
 
 /**
  *	handle_level_irq - Level type irq handler
- *	@irq:	the interrupt number
  *	@desc:	the interrupt description structure for this irq
  *
  *	Level type interrupts are active as long as the hardware line has
@@ -433,8 +430,7 @@ static void cond_unmask_irq(struct irq_desc *desc)
  *	it after the associated handler has acknowledged the device, so the
  *	interrupt line is back to inactive.
  */
-void
-handle_level_irq(unsigned int irq, struct irq_desc *desc)
+void handle_level_irq(struct irq_desc *desc)
 {
 	raw_spin_lock(&desc->lock);
 	mask_ack_irq(desc);
@@ -496,7 +492,6 @@ static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip)
 
 /**
  *	handle_fasteoi_irq - irq handler for transparent controllers
- *	@irq:	the interrupt number
  *	@desc:	the interrupt description structure for this irq
  *
  *	Only a single callback will be issued to the chip: an ->eoi()
@@ -504,8 +499,7 @@ static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip)
  *	for modern forms of interrupt handlers, which handle the flow
  *	details in hardware, transparently.
  */
-void
-handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
+void handle_fasteoi_irq(struct irq_desc *desc)
 {
 	struct irq_chip *chip = desc->irq_data.chip;
 
@@ -546,7 +540,6 @@ EXPORT_SYMBOL_GPL(handle_fasteoi_irq);
 
 /**
  *	handle_edge_irq - edge type IRQ handler
- *	@irq:	the interrupt number
  *	@desc:	the interrupt description structure for this irq
  *
  *	Interrupt occures on the falling and/or rising edge of a hardware
@@ -560,8 +553,7 @@ EXPORT_SYMBOL_GPL(handle_fasteoi_irq);
  *	the handler was running. If all pending interrupts are handled, the
  *	loop is left.
  */
-void
-handle_edge_irq(unsigned int irq, struct irq_desc *desc)
+void handle_edge_irq(struct irq_desc *desc)
 {
 	raw_spin_lock(&desc->lock);
 
@@ -618,13 +610,12 @@ EXPORT_SYMBOL(handle_edge_irq);
 #ifdef CONFIG_IRQ_EDGE_EOI_HANDLER
 /**
  *	handle_edge_eoi_irq - edge eoi type IRQ handler
- *	@irq:	the interrupt number
  *	@desc:	the interrupt description structure for this irq
  *
  * Similar as the above handle_edge_irq, but using eoi and w/o the
  * mask/unmask logic.
  */
-void handle_edge_eoi_irq(unsigned int irq, struct irq_desc *desc)
+void handle_edge_eoi_irq(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 
@@ -665,13 +656,11 @@ out_eoi:
 
 /**
  *	handle_percpu_irq - Per CPU local irq handler
- *	@irq:	the interrupt number
  *	@desc:	the interrupt description structure for this irq
  *
  *	Per CPU interrupts on SMP machines without locking requirements
  */
-void
-handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
+void handle_percpu_irq(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 
@@ -688,7 +677,6 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
 
 /**
  * handle_percpu_devid_irq - Per CPU local irq handler with per cpu dev ids
- * @irq:	the interrupt number
  * @desc:	the interrupt description structure for this irq
  *
  * Per CPU interrupts on SMP machines without locking requirements. Same as
@@ -698,11 +686,12 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
  * contain the real device id for the cpu on which this handler is
  * called
  */
-void handle_percpu_devid_irq(unsigned int irq, struct irq_desc *desc)
+void handle_percpu_devid_irq(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	struct irqaction *action = desc->action;
 	void *dev_id = raw_cpu_ptr(action->percpu_dev_id);
+	unsigned int irq = irq_desc_get_irq(desc);
 	irqreturn_t res;
 
 	kstat_incr_irqs_this_cpu(desc);
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index b6eeea8a80c5..de41a68fc038 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -27,8 +27,10 @@
  *
  * Handles spurious and unhandled IRQ's. It also prints a debugmessage.
  */
-void handle_bad_irq(unsigned int irq, struct irq_desc *desc)
+void handle_bad_irq(struct irq_desc *desc)
 {
+	unsigned int irq = irq_desc_get_irq(desc);
+
 	print_irq_desc(irq, desc);
 	kstat_incr_irqs_this_cpu(desc);
 	ack_bad_irq(irq);
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 596669436f7a..239e2ae2c947 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -347,7 +347,7 @@ int generic_handle_irq(unsigned int irq)
 
 	if (!desc)
 		return -EINVAL;
-	generic_handle_irq_desc(irq, desc);
+	generic_handle_irq_desc(desc);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(generic_handle_irq);
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index dd95f44f99b2..b86886beee4f 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -38,7 +38,7 @@ static void resend_irqs(unsigned long arg)
 		clear_bit(irq, irqs_resend);
 		desc = irq_to_desc(irq);
 		local_irq_disable();
-		desc->handle_irq(irq, desc);
+		desc->handle_irq(desc);
 		local_irq_enable();
 	}
 }
-- 
cgit v1.2.3


From f9f9e7b776142fb1c0782cade004cc8e0147a199 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 16 Sep 2015 11:51:12 -0400
Subject: Revert "cgroup: simplify threadgroup locking"

This reverts commit b5ba75b5fc0e8404e2c50cb68f39bb6a53fc916f.

d59cfc09c32a ("sched, cgroup: replace signal_struct->group_rwsem with
a global percpu_rwsem") and b5ba75b5fc0e ("cgroup: simplify
threadgroup locking") changed how cgroup synchronizes against task
fork and exits so that it uses global percpu_rwsem instead of
per-process rwsem; unfortunately, the write [un]lock paths of
percpu_rwsem always involve synchronize_rcu_expedited() which turned
out to be too expensive.

Improvements for percpu_rwsem are scheduled to be merged in the coming
v4.4-rc1 merge window which alleviates this issue.  For now, revert
the two commits to restore per-process rwsem.  They will be re-applied
for the v4.4-rc1 merge window.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/g/55F8097A.7000206@de.ibm.com
Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: stable@vger.kernel.org # v4.2+
---
 kernel/cgroup.c | 45 +++++++++++++++++++++++++++++++++------------
 1 file changed, 33 insertions(+), 12 deletions(-)

(limited to 'kernel')

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2cf0f79f1fc9..115091efa889 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2460,13 +2460,14 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
 	if (!cgrp)
 		return -ENODEV;
 
-	percpu_down_write(&cgroup_threadgroup_rwsem);
+retry_find_task:
 	rcu_read_lock();
 	if (pid) {
 		tsk = find_task_by_vpid(pid);
 		if (!tsk) {
+			rcu_read_unlock();
 			ret = -ESRCH;
-			goto out_unlock_rcu;
+			goto out_unlock_cgroup;
 		}
 	} else {
 		tsk = current;
@@ -2482,23 +2483,37 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
 	 */
 	if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
 		ret = -EINVAL;
-		goto out_unlock_rcu;
+		rcu_read_unlock();
+		goto out_unlock_cgroup;
 	}
 
 	get_task_struct(tsk);
 	rcu_read_unlock();
 
+	percpu_down_write(&cgroup_threadgroup_rwsem);
+	if (threadgroup) {
+		if (!thread_group_leader(tsk)) {
+			/*
+			 * a race with de_thread from another thread's exec()
+			 * may strip us of our leadership, if this happens,
+			 * there is no choice but to throw this task away and
+			 * try again; this is
+			 * "double-double-toil-and-trouble-check locking".
+			 */
+			percpu_up_write(&cgroup_threadgroup_rwsem);
+			put_task_struct(tsk);
+			goto retry_find_task;
+		}
+	}
+
 	ret = cgroup_procs_write_permission(tsk, cgrp, of);
 	if (!ret)
 		ret = cgroup_attach_task(cgrp, tsk, threadgroup);
 
-	put_task_struct(tsk);
-	goto out_unlock_threadgroup;
-
-out_unlock_rcu:
-	rcu_read_unlock();
-out_unlock_threadgroup:
 	percpu_up_write(&cgroup_threadgroup_rwsem);
+
+	put_task_struct(tsk);
+out_unlock_cgroup:
 	cgroup_kn_unlock(of->kn);
 	return ret ?: nbytes;
 }
@@ -2643,8 +2658,6 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
 
 	lockdep_assert_held(&cgroup_mutex);
 
-	percpu_down_write(&cgroup_threadgroup_rwsem);
-
 	/* look up all csses currently attached to @cgrp's subtree */
 	down_read(&css_set_rwsem);
 	css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) {
@@ -2700,8 +2713,17 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
 				goto out_finish;
 			last_task = task;
 
+			percpu_down_write(&cgroup_threadgroup_rwsem);
+			/* raced against de_thread() from another thread? */
+			if (!thread_group_leader(task)) {
+				percpu_up_write(&cgroup_threadgroup_rwsem);
+				put_task_struct(task);
+				continue;
+			}
+
 			ret = cgroup_migrate(src_cset->dfl_cgrp, task, true);
 
+			percpu_up_write(&cgroup_threadgroup_rwsem);
 			put_task_struct(task);
 
 			if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret))
@@ -2711,7 +2733,6 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
 
 out_finish:
 	cgroup_migrate_finish(&preloaded_csets);
-	percpu_up_write(&cgroup_threadgroup_rwsem);
 	return ret;
 }
 
-- 
cgit v1.2.3


From 0c986253b939cc14c69d4adbe2b4121bdf4aa220 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 16 Sep 2015 11:51:12 -0400
Subject: Revert "sched, cgroup: replace signal_struct->group_rwsem with a
 global percpu_rwsem"

This reverts commit d59cfc09c32a2ae31f1c3bc2983a0cd79afb3f14.

d59cfc09c32a ("sched, cgroup: replace signal_struct->group_rwsem with
a global percpu_rwsem") and b5ba75b5fc0e ("cgroup: simplify
threadgroup locking") changed how cgroup synchronizes against task
fork and exits so that it uses global percpu_rwsem instead of
per-process rwsem; unfortunately, the write [un]lock paths of
percpu_rwsem always involve synchronize_rcu_expedited() which turned
out to be too expensive.

Improvements for percpu_rwsem are scheduled to be merged in the coming
v4.4-rc1 merge window which alleviates this issue.  For now, revert
the two commits to restore per-process rwsem.  They will be re-applied
for the v4.4-rc1 merge window.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/g/55F8097A.7000206@de.ibm.com
Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: stable@vger.kernel.org # v4.2+
---
 include/linux/cgroup-defs.h | 27 ++--------------
 include/linux/init_task.h   |  8 +++++
 include/linux/sched.h       | 12 +++++++
 kernel/cgroup.c             | 77 +++++++++++++++++++++++++++++++++------------
 kernel/fork.c               |  4 +++
 5 files changed, 83 insertions(+), 45 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 4d8fcf2187dc..8492721b39be 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -473,31 +473,8 @@ struct cgroup_subsys {
 	unsigned int depends_on;
 };
 
-extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
-
-/**
- * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups
- * @tsk: target task
- *
- * Called from threadgroup_change_begin() and allows cgroup operations to
- * synchronize against threadgroup changes using a percpu_rw_semaphore.
- */
-static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk)
-{
-	percpu_down_read(&cgroup_threadgroup_rwsem);
-}
-
-/**
- * cgroup_threadgroup_change_end - threadgroup exclusion for cgroups
- * @tsk: target task
- *
- * Called from threadgroup_change_end().  Counterpart of
- * cgroup_threadcgroup_change_begin().
- */
-static inline void cgroup_threadgroup_change_end(struct task_struct *tsk)
-{
-	percpu_up_read(&cgroup_threadgroup_rwsem);
-}
+void cgroup_threadgroup_change_begin(struct task_struct *tsk);
+void cgroup_threadgroup_change_end(struct task_struct *tsk);
 
 #else	/* CONFIG_CGROUPS */
 
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index d0b380ee7d67..e38681f4912d 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -25,6 +25,13 @@
 extern struct files_struct init_files;
 extern struct fs_struct init_fs;
 
+#ifdef CONFIG_CGROUPS
+#define INIT_GROUP_RWSEM(sig)						\
+	.group_rwsem = __RWSEM_INITIALIZER(sig.group_rwsem),
+#else
+#define INIT_GROUP_RWSEM(sig)
+#endif
+
 #ifdef CONFIG_CPUSETS
 #define INIT_CPUSET_SEQ(tsk)							\
 	.mems_allowed_seq = SEQCNT_ZERO(tsk.mems_allowed_seq),
@@ -57,6 +64,7 @@ extern struct fs_struct init_fs;
 	INIT_PREV_CPUTIME(sig)						\
 	.cred_guard_mutex =						\
 		 __MUTEX_INITIALIZER(sig.cred_guard_mutex),		\
+	INIT_GROUP_RWSEM(sig)						\
 }
 
 extern struct nsproxy init_nsproxy;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a4ab9daa387c..b7b9501b41af 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -762,6 +762,18 @@ struct signal_struct {
 	unsigned audit_tty_log_passwd;
 	struct tty_audit_buf *tty_audit_buf;
 #endif
+#ifdef CONFIG_CGROUPS
+	/*
+	 * group_rwsem prevents new tasks from entering the threadgroup and
+	 * member tasks from exiting,a more specifically, setting of
+	 * PF_EXITING.  fork and exit paths are protected with this rwsem
+	 * using threadgroup_change_begin/end().  Users which require
+	 * threadgroup to remain stable should use threadgroup_[un]lock()
+	 * which also takes care of exec path.  Currently, cgroup is the
+	 * only user.
+	 */
+	struct rw_semaphore group_rwsem;
+#endif
 
 	oom_flags_t oom_flags;
 	short oom_score_adj;		/* OOM kill score adjustment */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 115091efa889..2c9eae6ad970 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -46,7 +46,6 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/rwsem.h>
-#include <linux/percpu-rwsem.h>
 #include <linux/string.h>
 #include <linux/sort.h>
 #include <linux/kmod.h>
@@ -104,8 +103,6 @@ static DEFINE_SPINLOCK(cgroup_idr_lock);
  */
 static DEFINE_SPINLOCK(release_agent_path_lock);
 
-struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
-
 #define cgroup_assert_mutex_or_rcu_locked()				\
 	RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&			\
 			   !lockdep_is_held(&cgroup_mutex),		\
@@ -874,6 +871,48 @@ static struct css_set *find_css_set(struct css_set *old_cset,
 	return cset;
 }
 
+void cgroup_threadgroup_change_begin(struct task_struct *tsk)
+{
+	down_read(&tsk->signal->group_rwsem);
+}
+
+void cgroup_threadgroup_change_end(struct task_struct *tsk)
+{
+	up_read(&tsk->signal->group_rwsem);
+}
+
+/**
+ * threadgroup_lock - lock threadgroup
+ * @tsk: member task of the threadgroup to lock
+ *
+ * Lock the threadgroup @tsk belongs to.  No new task is allowed to enter
+ * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or
+ * change ->group_leader/pid.  This is useful for cases where the threadgroup
+ * needs to stay stable across blockable operations.
+ *
+ * fork and exit explicitly call threadgroup_change_{begin|end}() for
+ * synchronization.  While held, no new task will be added to threadgroup
+ * and no existing live task will have its PF_EXITING set.
+ *
+ * de_thread() does threadgroup_change_{begin|end}() when a non-leader
+ * sub-thread becomes a new leader.
+ */
+static void threadgroup_lock(struct task_struct *tsk)
+{
+	down_write(&tsk->signal->group_rwsem);
+}
+
+/**
+ * threadgroup_unlock - unlock threadgroup
+ * @tsk: member task of the threadgroup to unlock
+ *
+ * Reverse threadgroup_lock().
+ */
+static inline void threadgroup_unlock(struct task_struct *tsk)
+{
+	up_write(&tsk->signal->group_rwsem);
+}
+
 static struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root)
 {
 	struct cgroup *root_cgrp = kf_root->kn->priv;
@@ -2074,9 +2113,9 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp,
 	lockdep_assert_held(&css_set_rwsem);
 
 	/*
-	 * We are synchronized through cgroup_threadgroup_rwsem against
-	 * PF_EXITING setting such that we can't race against cgroup_exit()
-	 * changing the css_set to init_css_set and dropping the old one.
+	 * We are synchronized through threadgroup_lock() against PF_EXITING
+	 * setting such that we can't race against cgroup_exit() changing the
+	 * css_set to init_css_set and dropping the old one.
 	 */
 	WARN_ON_ONCE(tsk->flags & PF_EXITING);
 	old_cset = task_css_set(tsk);
@@ -2133,11 +2172,10 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets)
  * @src_cset and add it to @preloaded_csets, which should later be cleaned
  * up by cgroup_migrate_finish().
  *
- * This function may be called without holding cgroup_threadgroup_rwsem
- * even if the target is a process.  Threads may be created and destroyed
- * but as long as cgroup_mutex is not dropped, no new css_set can be put
- * into play and the preloaded css_sets are guaranteed to cover all
- * migrations.
+ * This function may be called without holding threadgroup_lock even if the
+ * target is a process.  Threads may be created and destroyed but as long
+ * as cgroup_mutex is not dropped, no new css_set can be put into play and
+ * the preloaded css_sets are guaranteed to cover all migrations.
  */
 static void cgroup_migrate_add_src(struct css_set *src_cset,
 				   struct cgroup *dst_cgrp,
@@ -2240,7 +2278,7 @@ err:
  * @threadgroup: whether @leader points to the whole process or a single task
  *
  * Migrate a process or task denoted by @leader to @cgrp.  If migrating a
- * process, the caller must be holding cgroup_threadgroup_rwsem.  The
+ * process, the caller must be holding threadgroup_lock of @leader.  The
  * caller is also responsible for invoking cgroup_migrate_add_src() and
  * cgroup_migrate_prepare_dst() on the targets before invoking this
  * function and following up with cgroup_migrate_finish().
@@ -2368,7 +2406,7 @@ out_release_tset:
  * @leader: the task or the leader of the threadgroup to be attached
  * @threadgroup: attach the whole threadgroup?
  *
- * Call holding cgroup_mutex and cgroup_threadgroup_rwsem.
+ * Call holding cgroup_mutex and threadgroup_lock of @leader.
  */
 static int cgroup_attach_task(struct cgroup *dst_cgrp,
 			      struct task_struct *leader, bool threadgroup)
@@ -2490,7 +2528,7 @@ retry_find_task:
 	get_task_struct(tsk);
 	rcu_read_unlock();
 
-	percpu_down_write(&cgroup_threadgroup_rwsem);
+	threadgroup_lock(tsk);
 	if (threadgroup) {
 		if (!thread_group_leader(tsk)) {
 			/*
@@ -2500,7 +2538,7 @@ retry_find_task:
 			 * try again; this is
 			 * "double-double-toil-and-trouble-check locking".
 			 */
-			percpu_up_write(&cgroup_threadgroup_rwsem);
+			threadgroup_unlock(tsk);
 			put_task_struct(tsk);
 			goto retry_find_task;
 		}
@@ -2510,7 +2548,7 @@ retry_find_task:
 	if (!ret)
 		ret = cgroup_attach_task(cgrp, tsk, threadgroup);
 
-	percpu_up_write(&cgroup_threadgroup_rwsem);
+	threadgroup_unlock(tsk);
 
 	put_task_struct(tsk);
 out_unlock_cgroup:
@@ -2713,17 +2751,17 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
 				goto out_finish;
 			last_task = task;
 
-			percpu_down_write(&cgroup_threadgroup_rwsem);
+			threadgroup_lock(task);
 			/* raced against de_thread() from another thread? */
 			if (!thread_group_leader(task)) {
-				percpu_up_write(&cgroup_threadgroup_rwsem);
+				threadgroup_unlock(task);
 				put_task_struct(task);
 				continue;
 			}
 
 			ret = cgroup_migrate(src_cset->dfl_cgrp, task, true);
 
-			percpu_up_write(&cgroup_threadgroup_rwsem);
+			threadgroup_unlock(task);
 			put_task_struct(task);
 
 			if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret))
@@ -5045,7 +5083,6 @@ int __init cgroup_init(void)
 	unsigned long key;
 	int ssid, err;
 
-	BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem));
 	BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files));
 	BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files));
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 7d5f0f118a63..2845623fb582 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1149,6 +1149,10 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	tty_audit_fork(sig);
 	sched_autogroup_fork(sig);
 
+#ifdef CONFIG_CGROUPS
+	init_rwsem(&sig->group_rwsem);
+#endif
+
 	sig->oom_score_adj = current->signal->oom_score_adj;
 	sig->oom_score_adj_min = current->signal->oom_score_adj_min;
 
-- 
cgit v1.2.3


From de9b8f5dcbd94bfb1d249907a635f1fb1968e19c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 13 Aug 2015 23:09:29 +0200
Subject: sched: Fix crash trying to dequeue/enqueue the idle thread

Sasha reports that his virtual machine tries to schedule the idle
thread since commit 6c37067e2786 ("sched: Change the
sched_class::set_cpus_allowed() calling context").

Hit trace shows this happening from idle_thread_get()->init_idle(),
which is the _second_ init_idle() invocation on that task_struct, the
first being done through idle_init()->fork_idle(). (this code is
insane...)

Because we call init_idle() twice in a row, its ->sched_class ==
&idle_sched_class and ->on_rq = TASK_ON_RQ_QUEUED. This means
do_set_cpus_allowed() think we're queued and will call dequeue_task(),
which is implemented with BUG() for the idle class, seeing how
dequeueing the idle task is a daft thing.

Aside of the whole insanity of calling init_idle() _twice_, change the
code to call set_cpus_allowed_common() instead as this is 'obviously'
before the idle task gets ran etc..

Reported-by: Sasha Levin <sasha.levin@oracle.com>
Tested-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 6c37067e2786 ("sched: Change the sched_class::set_cpus_allowed() calling context")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 97d276ff1edb..f0d043ec0182 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4927,7 +4927,15 @@ void init_idle(struct task_struct *idle, int cpu)
 	idle->state = TASK_RUNNING;
 	idle->se.exec_start = sched_clock();
 
-	do_set_cpus_allowed(idle, cpumask_of(cpu));
+#ifdef CONFIG_SMP
+	/*
+	 * Its possible that init_idle() gets called multiple times on a task,
+	 * in that case do_set_cpus_allowed() will not do the right thing.
+	 *
+	 * And since this is boot we can forgo the serialization.
+	 */
+	set_cpus_allowed_common(idle, cpumask_of(cpu));
+#endif
 	/*
 	 * We're having a chicken and egg problem, even though we are
 	 * holding rq->lock, the cpu isn't yet set to this cpu so the
@@ -4944,7 +4952,7 @@ void init_idle(struct task_struct *idle, int cpu)
 
 	rq->curr = rq->idle = idle;
 	idle->on_rq = TASK_ON_RQ_QUEUED;
-#if defined(CONFIG_SMP)
+#ifdef CONFIG_SMP
 	idle->on_cpu = 1;
 #endif
 	raw_spin_unlock(&rq->lock);
@@ -4959,7 +4967,7 @@ void init_idle(struct task_struct *idle, int cpu)
 	idle->sched_class = &idle_sched_class;
 	ftrace_graph_init_idle_task(idle, cpu);
 	vtime_init_idle(idle, cpu);
-#if defined(CONFIG_SMP)
+#ifdef CONFIG_SMP
 	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
 #endif
 }
-- 
cgit v1.2.3


From f55fc2a57cc9ca3b1bb4fb8eb25b6e1989e5b993 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 9 Sep 2015 19:06:33 +0200
Subject: perf: Restructure perf syscall point of no return

The exclusive_event_installable() stuff only works because its
exclusive with the grouping bits.

Rework the code such that there is a sane place to error out before we
go do things we cannot undo.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 49 ++++++++++++++++++++++++++++++++-----------------
 1 file changed, 32 insertions(+), 17 deletions(-)

(limited to 'kernel')

diff --git a/kernel/events/core.c b/kernel/events/core.c
index f548f69c4299..39679f749500 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8297,13 +8297,30 @@ SYSCALL_DEFINE5(perf_event_open,
 
 	if (move_group) {
 		gctx = group_leader->ctx;
+		mutex_lock_double(&gctx->mutex, &ctx->mutex);
+	} else {
+		mutex_lock(&ctx->mutex);
+	}
+
+	/*
+	 * Must be under the same ctx::mutex as perf_install_in_context(),
+	 * because we need to serialize with concurrent event creation.
+	 */
+	if (!exclusive_event_installable(event, ctx)) {
+		/* exclusive and group stuff are assumed mutually exclusive */
+		WARN_ON_ONCE(move_group);
+
+		err = -EBUSY;
+		goto err_locked;
+	}
 
+	WARN_ON_ONCE(ctx->parent_ctx);
+
+	if (move_group) {
 		/*
 		 * See perf_event_ctx_lock() for comments on the details
 		 * of swizzling perf_event::ctx.
 		 */
-		mutex_lock_double(&gctx->mutex, &ctx->mutex);
-
 		perf_remove_from_context(group_leader, false);
 
 		list_for_each_entry(sibling, &group_leader->sibling_list,
@@ -8311,13 +8328,7 @@ SYSCALL_DEFINE5(perf_event_open,
 			perf_remove_from_context(sibling, false);
 			put_ctx(gctx);
 		}
-	} else {
-		mutex_lock(&ctx->mutex);
-	}
-
-	WARN_ON_ONCE(ctx->parent_ctx);
 
-	if (move_group) {
 		/*
 		 * Wait for everybody to stop referencing the events through
 		 * the old lists, before installing it on new lists.
@@ -8349,22 +8360,20 @@ SYSCALL_DEFINE5(perf_event_open,
 		perf_event__state_init(group_leader);
 		perf_install_in_context(ctx, group_leader, group_leader->cpu);
 		get_ctx(ctx);
-	}
 
-	if (!exclusive_event_installable(event, ctx)) {
-		err = -EBUSY;
-		mutex_unlock(&ctx->mutex);
-		fput(event_file);
-		goto err_context;
+		/*
+		 * Now that all events are installed in @ctx, nothing
+		 * references @gctx anymore, so drop the last reference we have
+		 * on it.
+		 */
+		put_ctx(gctx);
 	}
 
 	perf_install_in_context(ctx, event, event->cpu);
 	perf_unpin_context(ctx);
 
-	if (move_group) {
+	if (move_group)
 		mutex_unlock(&gctx->mutex);
-		put_ctx(gctx);
-	}
 	mutex_unlock(&ctx->mutex);
 
 	put_online_cpus();
@@ -8391,6 +8400,12 @@ SYSCALL_DEFINE5(perf_event_open,
 	fd_install(event_fd, event_file);
 	return event_fd;
 
+err_locked:
+	if (move_group)
+		mutex_unlock(&gctx->mutex);
+	mutex_unlock(&ctx->mutex);
+/* err_file: */
+	fput(event_file);
 err_context:
 	perf_unpin_context(ctx);
 	put_ctx(ctx);
-- 
cgit v1.2.3


From a723968c0ed36db676478c3d26078f13484fe01c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 9 Sep 2015 19:06:33 +0200
Subject: perf: Fix u16 overflows

Vince reported that its possible to overflow the various size fields
and get weird stuff if you stick too many events in a group.

Put a lid on this by requiring the fixed record size not exceed 16k.
This is still a fair amount of events (silly amount really) and leaves
plenty room for callchains and stack dwarves while also avoiding
overflowing the u16 variables.

Reported-by: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 50 ++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 40 insertions(+), 10 deletions(-)

(limited to 'kernel')

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 39679f749500..dbb5329b6a3a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1243,11 +1243,7 @@ static inline void perf_event__state_init(struct perf_event *event)
 					      PERF_EVENT_STATE_INACTIVE;
 }
 
-/*
- * Called at perf_event creation and when events are attached/detached from a
- * group.
- */
-static void perf_event__read_size(struct perf_event *event)
+static void __perf_event_read_size(struct perf_event *event, int nr_siblings)
 {
 	int entry = sizeof(u64); /* value */
 	int size = 0;
@@ -1263,7 +1259,7 @@ static void perf_event__read_size(struct perf_event *event)
 		entry += sizeof(u64);
 
 	if (event->attr.read_format & PERF_FORMAT_GROUP) {
-		nr += event->group_leader->nr_siblings;
+		nr += nr_siblings;
 		size += sizeof(u64);
 	}
 
@@ -1271,14 +1267,11 @@ static void perf_event__read_size(struct perf_event *event)
 	event->read_size = size;
 }
 
-static void perf_event__header_size(struct perf_event *event)
+static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
 {
 	struct perf_sample_data *data;
-	u64 sample_type = event->attr.sample_type;
 	u16 size = 0;
 
-	perf_event__read_size(event);
-
 	if (sample_type & PERF_SAMPLE_IP)
 		size += sizeof(data->ip);
 
@@ -1303,6 +1296,17 @@ static void perf_event__header_size(struct perf_event *event)
 	event->header_size = size;
 }
 
+/*
+ * Called at perf_event creation and when events are attached/detached from a
+ * group.
+ */
+static void perf_event__header_size(struct perf_event *event)
+{
+	__perf_event_read_size(event,
+			       event->group_leader->nr_siblings);
+	__perf_event_header_size(event, event->attr.sample_type);
+}
+
 static void perf_event__id_header_size(struct perf_event *event)
 {
 	struct perf_sample_data *data;
@@ -1330,6 +1334,27 @@ static void perf_event__id_header_size(struct perf_event *event)
 	event->id_header_size = size;
 }
 
+static bool perf_event_validate_size(struct perf_event *event)
+{
+	/*
+	 * The values computed here will be over-written when we actually
+	 * attach the event.
+	 */
+	__perf_event_read_size(event, event->group_leader->nr_siblings + 1);
+	__perf_event_header_size(event, event->attr.sample_type & ~PERF_SAMPLE_READ);
+	perf_event__id_header_size(event);
+
+	/*
+	 * Sum the lot; should not exceed the 64k limit we have on records.
+	 * Conservative limit to allow for callchains and other variable fields.
+	 */
+	if (event->read_size + event->header_size +
+	    event->id_header_size + sizeof(struct perf_event_header) >= 16*1024)
+		return false;
+
+	return true;
+}
+
 static void perf_group_attach(struct perf_event *event)
 {
 	struct perf_event *group_leader = event->group_leader, *pos;
@@ -8302,6 +8327,11 @@ SYSCALL_DEFINE5(perf_event_open,
 		mutex_lock(&ctx->mutex);
 	}
 
+	if (!perf_event_validate_size(event)) {
+		err = -E2BIG;
+		goto err_locked;
+	}
+
 	/*
 	 * Must be under the same ctx::mutex as perf_install_in_context(),
 	 * because we need to serialize with concurrent event creation.
-- 
cgit v1.2.3


From f73e22ab450140830005581c2c7ec389791a1b8d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 9 Sep 2015 20:48:22 +0200
Subject: perf: Fix races in computing the header sizes

There are two races with the current code:

 - Another event can join the group and compute a larger header_size
   concurrently, if the smaller store wins we'll have an incorrect
   header_size set.

 - We compute the header_size after the event becomes active,
   therefore its possible to use the size before its computed.

Remedy the first by moving the computation inside the ctx::mutex lock,
and the second by placing it _before_ perf_install_in_context().

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'kernel')

diff --git a/kernel/events/core.c b/kernel/events/core.c
index dbb5329b6a3a..b11756f9b6dc 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8399,6 +8399,15 @@ SYSCALL_DEFINE5(perf_event_open,
 		put_ctx(gctx);
 	}
 
+	/*
+	 * Precalculate sample_data sizes; do while holding ctx::mutex such
+	 * that we're serialized against further additions and before
+	 * perf_install_in_context() which is the point the event is active and
+	 * can use these values.
+	 */
+	perf_event__header_size(event);
+	perf_event__id_header_size(event);
+
 	perf_install_in_context(ctx, event, event->cpu);
 	perf_unpin_context(ctx);
 
@@ -8414,12 +8423,6 @@ SYSCALL_DEFINE5(perf_event_open,
 	list_add_tail(&event->owner_entry, &current->perf_event_list);
 	mutex_unlock(&current->perf_event_mutex);
 
-	/*
-	 * Precalculate sample_data sizes
-	 */
-	perf_event__header_size(event);
-	perf_event__id_header_size(event);
-
 	/*
 	 * Drop the reference on the group_event after placing the
 	 * new event on the sibling_list. This ensures destruction
-- 
cgit v1.2.3


From 00cc1633816de8c95f337608a1ea64e228faf771 Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Fri, 18 Sep 2015 11:27:45 +0200
Subject: sched: access local runqueue directly in single_task_running

Commit 2ee507c47293 ("sched: Add function single_task_running to let a task
check if it is the only task running on a cpu") referenced the current
runqueue with the smp_processor_id.  When CONFIG_DEBUG_PREEMPT is enabled,
that is only allowed if preemption is disabled or the currrent task is
bound to the local cpu (e.g. kernel worker).

With commit f78195129963 ("kvm: add halt_poll_ns module parameter") KVM
calls single_task_running. If CONFIG_DEBUG_PREEMPT is enabled that
generates a lot of kernel messages.

To avoid adding preemption in that cases, as it would limit the usefulness,
we change single_task_running to access directly the cpu local runqueue.

Cc: Tim Chen <tim.c.chen@linux.intel.com>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <stable@vger.kernel.org>
Fixes: 2ee507c472939db4b146d545352b8a7c79ef47f8
Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 kernel/sched/core.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3595403921bd..4064f794ab8c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2666,13 +2666,20 @@ unsigned long nr_running(void)
 
 /*
  * Check if only the current task is running on the cpu.
+ *
+ * Caution: this function does not check that the caller has disabled
+ * preemption, thus the result might have a time-of-check-to-time-of-use
+ * race.  The caller is responsible to use it correctly, for example:
+ *
+ * - from a non-preemptable section (of course)
+ *
+ * - from a thread that is bound to a single CPU
+ *
+ * - in a loop with very short iterations (e.g. a polling loop)
  */
 bool single_task_running(void)
 {
-	if (cpu_rq(smp_processor_id())->nr_running == 1)
-		return true;
-	else
-		return false;
+	return raw_rq()->nr_running == 1;
 }
 EXPORT_SYMBOL(single_task_running);
 
-- 
cgit v1.2.3


From 19a5ecde086a6a5287978b12ae948fa691b197b7 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 20 Sep 2015 21:01:22 -0700
Subject: rcu: Suppress lockdep false positive for rcp->exp_funnel_mutex

In kernels built with CONFIG_PREEMPT=y, synchronize_rcu_expedited()
invokes synchronize_sched_expedited() while holding RCU-preempt's
root rcu_node structure's ->exp_funnel_mutex, which is acquired after
the rcu_data structure's ->exp_funnel_mutex.  The first thing that
synchronize_sched_expedited() will do is acquire RCU-sched's rcu_data
structure's ->exp_funnel_mutex.   There is no danger of an actual deadlock
because the locking order is always from RCU-preempt's expedited mutexes
to those of RCU-sched.  Unfortunately, lockdep considers both rcu_data
structures' ->exp_funnel_mutex to be in the same lock class and therefore
reports a deadlock cycle.

This commit silences this false positive by placing RCU-sched's rcu_data
structures' ->exp_funnel_mutex locks into their own lock class.

Reported-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'kernel')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 9f75f25cc5d9..775d36cc0050 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3868,6 +3868,7 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
 static void __init
 rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
 {
+	static struct lock_class_key rcu_exp_sched_rdp_class;
 	unsigned long flags;
 	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
 	struct rcu_node *rnp = rcu_get_root(rsp);
@@ -3883,6 +3884,10 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
 	mutex_init(&rdp->exp_funnel_mutex);
 	rcu_boot_init_nocb_percpu_data(rdp);
 	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	if (rsp == &rcu_sched_state)
+		lockdep_set_class_and_name(&rdp->exp_funnel_mutex,
+					   &rcu_exp_sched_rdp_class,
+					   "rcu_data_exp_sched");
 }
 
 /*
-- 
cgit v1.2.3


From ac5be6b47e8bd25b62bed2c82cda7398999f59e9 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Tue, 22 Sep 2015 14:58:49 -0700
Subject: userfaultfd: revert "userfaultfd: waitqueue: add nr wake parameter to
 __wake_up_locked_key"

This reverts commit 51360155eccb907ff8635bd10fc7de876408c2e0 and adapts
fs/userfaultfd.c to use the old version of that function.

It didn't look robust to call __wake_up_common with "nr == 1" when we
absolutely require wakeall semantics, but we've full control of what we
insert in the two waitqueue heads of the blocked userfaults.  No
exclusive waitqueue risks to be inserted into those two waitqueue heads
so we can as well stick to "nr == 1" of the old code and we can rely
purely on the fact no waitqueue inserted in one of the two waitqueue
heads we must enforce as wakeall, has wait->flags WQ_FLAG_EXCLUSIVE set.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Cc: Thierry Reding <treding@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/userfaultfd.c     | 8 ++++----
 include/linux/wait.h | 5 ++---
 kernel/sched/wait.c  | 7 +++----
 net/sunrpc/sched.c   | 2 +-
 4 files changed, 10 insertions(+), 12 deletions(-)

(limited to 'kernel')

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index f9aeb40a7197..50311703135b 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -467,8 +467,8 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
 	 * the fault_*wqh.
 	 */
 	spin_lock(&ctx->fault_pending_wqh.lock);
-	__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, 0, &range);
-	__wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, 0, &range);
+	__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, &range);
+	__wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, &range);
 	spin_unlock(&ctx->fault_pending_wqh.lock);
 
 	wake_up_poll(&ctx->fd_wqh, POLLHUP);
@@ -650,10 +650,10 @@ static void __wake_userfault(struct userfaultfd_ctx *ctx,
 	spin_lock(&ctx->fault_pending_wqh.lock);
 	/* wake all in the range and autoremove */
 	if (waitqueue_active(&ctx->fault_pending_wqh))
-		__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, 0,
+		__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL,
 				     range);
 	if (waitqueue_active(&ctx->fault_wqh))
-		__wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, 0, range);
+		__wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, range);
 	spin_unlock(&ctx->fault_pending_wqh.lock);
 }
 
diff --git a/include/linux/wait.h b/include/linux/wait.h
index d3d077228d4c..1e1bf9f963a9 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -147,8 +147,7 @@ __remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old)
 
 typedef int wait_bit_action_f(struct wait_bit_key *);
 void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
-void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, int nr,
-			  void *key);
+void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
 void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
 void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr);
 void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr);
@@ -180,7 +179,7 @@ wait_queue_head_t *bit_waitqueue(void *, int);
 #define wake_up_poll(x, m)						\
 	__wake_up(x, TASK_NORMAL, 1, (void *) (m))
 #define wake_up_locked_poll(x, m)					\
-	__wake_up_locked_key((x), TASK_NORMAL, 1, (void *) (m))
+	__wake_up_locked_key((x), TASK_NORMAL, (void *) (m))
 #define wake_up_interruptible_poll(x, m)				\
 	__wake_up(x, TASK_INTERRUPTIBLE, 1, (void *) (m))
 #define wake_up_interruptible_sync_poll(x, m)				\
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 272d9322bc5d..052e02672d12 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -106,10 +106,9 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr)
 }
 EXPORT_SYMBOL_GPL(__wake_up_locked);
 
-void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, int nr,
-			  void *key)
+void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
 {
-	__wake_up_common(q, mode, nr, 0, key);
+	__wake_up_common(q, mode, 1, 0, key);
 }
 EXPORT_SYMBOL_GPL(__wake_up_locked_key);
 
@@ -284,7 +283,7 @@ void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
 	if (!list_empty(&wait->task_list))
 		list_del_init(&wait->task_list);
 	else if (waitqueue_active(q))
-		__wake_up_locked_key(q, mode, 1, key);
+		__wake_up_locked_key(q, mode, key);
 	spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL(abort_exclusive_wait);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index b140c092d226..337ca851a350 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -297,7 +297,7 @@ static int rpc_complete_task(struct rpc_task *task)
 	clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
 	ret = atomic_dec_and_test(&task->tk_count);
 	if (waitqueue_active(wq))
-		__wake_up_locked_key(wq, TASK_NORMAL, 1, &k);
+		__wake_up_locked_key(wq, TASK_NORMAL, &k);
 	spin_unlock_irqrestore(&wq->lock, flags);
 	return ret;
 }
-- 
cgit v1.2.3


From 21199f27b430576552b26210b3194a363d7f05cd Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 16 Sep 2015 16:10:40 +0200
Subject: locking/lockdep: Fix hlock->pin_count reset on lock stack rebuilds

Various people reported hitting the "unpinning an unpinned lock"
warning. As it turns out there are 2 places where we take a lock out
of the middle of a stack, and in those cases it would fail to preserve
the pin_count when rebuilding the lock stack.

Reported-by: Sasha Levin <sasha.levin@oracle.com>
Reported-by: Tim Spriggs <tspriggs@apple.com>
Tested-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: davej@codemonkey.org.uk
Link: http://lkml.kernel.org/r/20150916141040.GA11639@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/lockdep.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 8acfbf773e06..4e49cc4c9952 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3068,7 +3068,7 @@ static int __lock_is_held(struct lockdep_map *lock);
 static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 			  int trylock, int read, int check, int hardirqs_off,
 			  struct lockdep_map *nest_lock, unsigned long ip,
-			  int references)
+			  int references, int pin_count)
 {
 	struct task_struct *curr = current;
 	struct lock_class *class = NULL;
@@ -3157,7 +3157,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 	hlock->waittime_stamp = 0;
 	hlock->holdtime_stamp = lockstat_clock();
 #endif
-	hlock->pin_count = 0;
+	hlock->pin_count = pin_count;
 
 	if (check && !mark_irqflags(curr, hlock))
 		return 0;
@@ -3343,7 +3343,7 @@ found_it:
 			hlock_class(hlock)->subclass, hlock->trylock,
 				hlock->read, hlock->check, hlock->hardirqs_off,
 				hlock->nest_lock, hlock->acquire_ip,
-				hlock->references))
+				hlock->references, hlock->pin_count))
 			return 0;
 	}
 
@@ -3433,7 +3433,7 @@ found_it:
 			hlock_class(hlock)->subclass, hlock->trylock,
 				hlock->read, hlock->check, hlock->hardirqs_off,
 				hlock->nest_lock, hlock->acquire_ip,
-				hlock->references))
+				hlock->references, hlock->pin_count))
 			return 0;
 	}
 
@@ -3583,7 +3583,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 	current->lockdep_recursion = 1;
 	trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
 	__lock_acquire(lock, subclass, trylock, read, check,
-		       irqs_disabled_flags(flags), nest_lock, ip, 0);
+		       irqs_disabled_flags(flags), nest_lock, ip, 0, 0);
 	current->lockdep_recursion = 0;
 	raw_local_irq_restore(flags);
 }
-- 
cgit v1.2.3


From a1b7febd725a2cdfc8ac245b7b7437ce4b91aecb Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Fri, 25 Sep 2015 18:09:32 +0200
Subject: genirq: Fix the documentation of request_percpu_irq

The documentation of request_percpu_irq is confusing and suggest that the
interrupt is not enabled at all, while it is actually enabled on the local
CPU.

Clarify that.

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/irq/manage.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index f9a59f6cabd2..a4360f0f62a5 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1790,9 +1790,10 @@ int setup_percpu_irq(unsigned int irq, struct irqaction *act)
  *	@devname: An ascii name for the claiming device
  *	@dev_id: A percpu cookie passed back to the handler function
  *
- *	This call allocates interrupt resources, but doesn't
- *	automatically enable the interrupt. It has to be done on each
- *	CPU using enable_percpu_irq().
+ *	This call allocates interrupt resources and enables the
+ *	interrupt on the local CPU. If the interrupt is supposed to be
+ *	enabled on other CPUs, it has to be done on each CPU using
+ *	enable_percpu_irq().
  *
  *	Dev_id must be globally unique. It is a per-cpu variable, and
  *	the handler gets called with the interrupted CPU's instance of
-- 
cgit v1.2.3


From aec2e2ad1786eb07814ae60988e0e306cd24a6cc Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Fri, 25 Sep 2015 18:09:33 +0200
Subject: irq: Export per-cpu irq allocation and de-allocation functions

Some drivers might use the per-cpu interrupts and still might be built as a
module. Export request_percpu_irq an free_percpu_irq to these user, which
also make it consistent with enable/disable_percpu_irq that were exported.

Reported-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/irq/manage.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel')

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index a4360f0f62a5..4c213864ec45 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1761,6 +1761,7 @@ void free_percpu_irq(unsigned int irq, void __percpu *dev_id)
 	kfree(__free_percpu_irq(irq, dev_id));
 	chip_bus_sync_unlock(desc);
 }
+EXPORT_SYMBOL_GPL(free_percpu_irq);
 
 /**
  *	setup_percpu_irq - setup a per-cpu interrupt
@@ -1832,6 +1833,7 @@ int request_percpu_irq(unsigned int irq, irq_handler_t handler,
 
 	return retval;
 }
+EXPORT_SYMBOL_GPL(request_percpu_irq);
 
 /**
  *	irq_get_irqchip_state - returns the irqchip state of a interrupt.
-- 
cgit v1.2.3


From a91263d520246b63c63e75ddfb072ee6a853fe15 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 30 Sep 2015 01:41:50 +0200
Subject: ebpf: migrate bpf_prog's flags to bitfield

As we need to add further flags to the bpf_prog structure, lets migrate
both bools to a bitfield representation. The size of the base structure
(excluding insns) remains unchanged at 40 bytes.

Add also tags for the kmemchecker, so that it doesn't throw false
positives. Even in case gcc would generate suboptimal code, it's not
being accessed in performance critical paths.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/net/bpf_jit_32.c       | 2 +-
 arch/arm64/net/bpf_jit_comp.c   | 2 +-
 arch/mips/net/bpf_jit.c         | 2 +-
 arch/powerpc/net/bpf_jit_comp.c | 2 +-
 arch/s390/net/bpf_jit_comp.c    | 2 +-
 arch/sparc/net/bpf_jit_comp.c   | 2 +-
 arch/x86/net/bpf_jit_comp.c     | 2 +-
 include/linux/filter.h          | 6 ++++--
 kernel/bpf/core.c               | 4 ++++
 kernel/bpf/syscall.c            | 4 ++--
 net/core/filter.c               | 2 +-
 11 files changed, 18 insertions(+), 12 deletions(-)

(limited to 'kernel')

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 876060bcceeb..0df5fd561513 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -1047,7 +1047,7 @@ void bpf_jit_compile(struct bpf_prog *fp)
 
 	set_memory_ro((unsigned long)header, header->pages);
 	fp->bpf_func = (void *)ctx.target;
-	fp->jited = true;
+	fp->jited = 1;
 out:
 	kfree(ctx.offsets);
 	return;
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index c047598b09e0..a44e5293c6f5 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -744,7 +744,7 @@ void bpf_int_jit_compile(struct bpf_prog *prog)
 
 	set_memory_ro((unsigned long)header, header->pages);
 	prog->bpf_func = (void *)ctx.image;
-	prog->jited = true;
+	prog->jited = 1;
 out:
 	kfree(ctx.offset);
 }
diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 0c4a133f6216..77cb27309db2 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -1251,7 +1251,7 @@ void bpf_jit_compile(struct bpf_prog *fp)
 		bpf_jit_dump(fp->len, alloc_size, 2, ctx.target);
 
 	fp->bpf_func = (void *)ctx.target;
-	fp->jited = true;
+	fp->jited = 1;
 
 out:
 	kfree(ctx.offsets);
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 17cea18a09d3..04782164ee67 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -679,7 +679,7 @@ void bpf_jit_compile(struct bpf_prog *fp)
 		((u64 *)image)[1] = local_paca->kernel_toc;
 #endif
 		fp->bpf_func = (void *)image;
-		fp->jited = true;
+		fp->jited = 1;
 	}
 out:
 	kfree(addrs);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index eeda051442c3..9a0c4c22e536 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -1310,7 +1310,7 @@ void bpf_int_jit_compile(struct bpf_prog *fp)
 	if (jit.prg_buf) {
 		set_memory_ro((unsigned long)header, header->pages);
 		fp->bpf_func = (void *) jit.prg_buf;
-		fp->jited = true;
+		fp->jited = 1;
 	}
 free_addrs:
 	kfree(jit.addrs);
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index f8b9f71b9a2b..22564f5f2364 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c
@@ -812,7 +812,7 @@ cond_branch:			f_offset = addrs[i + filter[i].jf];
 	if (image) {
 		bpf_flush_icache(image, image + proglen);
 		fp->bpf_func = (void *)image;
-		fp->jited = true;
+		fp->jited = 1;
 	}
 out:
 	kfree(addrs);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 70efcd0940f9..75991979f667 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1109,7 +1109,7 @@ void bpf_int_jit_compile(struct bpf_prog *prog)
 		bpf_flush_icache(header, image + proglen);
 		set_memory_ro((unsigned long)header, header->pages);
 		prog->bpf_func = (void *)image;
-		prog->jited = true;
+		prog->jited = 1;
 	}
 out:
 	kfree(addrs);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index fa2cab985e57..bad618f316d7 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -326,8 +326,10 @@ struct bpf_binary_header {
 
 struct bpf_prog {
 	u16			pages;		/* Number of allocated pages */
-	bool			jited;		/* Is our filter JIT'ed? */
-	bool			gpl_compatible;	/* Is our filter GPL compatible? */
+	kmemcheck_bitfield_begin(meta);
+	u16			jited:1,	/* Is our filter JIT'ed? */
+				gpl_compatible:1; /* Is filter GPL compatible? */
+	kmemcheck_bitfield_end(meta);
 	u32			len;		/* Number of filter blocks */
 	enum bpf_prog_type	type;		/* Type of BPF program */
 	struct bpf_prog_aux	*aux;		/* Auxiliary fields */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 67c380cfa9ca..c8855c2a7a48 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -82,6 +82,8 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
 	if (fp == NULL)
 		return NULL;
 
+	kmemcheck_annotate_bitfield(fp, meta);
+
 	aux = kzalloc(sizeof(*aux), GFP_KERNEL | gfp_extra_flags);
 	if (aux == NULL) {
 		vfree(fp);
@@ -110,6 +112,8 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
 
 	fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
 	if (fp != NULL) {
+		kmemcheck_annotate_bitfield(fp, meta);
+
 		memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
 		fp->pages = size / PAGE_SIZE;
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 35bac8e8b071..2190ab14b763 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -553,10 +553,10 @@ static int bpf_prog_load(union bpf_attr *attr)
 		goto free_prog;
 
 	prog->orig_prog = NULL;
-	prog->jited = false;
+	prog->jited = 0;
 
 	atomic_set(&prog->aux->refcnt, 1);
-	prog->gpl_compatible = is_gpl;
+	prog->gpl_compatible = is_gpl ? 1 : 0;
 
 	/* find program type: socket_filter vs tracing_filter */
 	err = find_prog_type(type, prog);
diff --git a/net/core/filter.c b/net/core/filter.c
index 60e3fe7c59c0..04664acb86ce 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1001,7 +1001,7 @@ static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp,
 	int err;
 
 	fp->bpf_func = NULL;
-	fp->jited = false;
+	fp->jited = 0;
 
 	err = bpf_check_classic(fp->insns, fp->len);
 	if (err) {
-- 
cgit v1.2.3


From c46646d0484f5d08e2bede9b45034ba5b8b489cc Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 30 Sep 2015 01:41:51 +0200
Subject: sched, bpf: add helper for retrieving routing realms

Using routing realms as part of the classifier is quite useful, it
can be viewed as a tag for one or multiple routing entries (think of
an analogy to net_cls cgroup for processes), set by user space routing
daemons or via iproute2 as an indicator for traffic classifiers and
later on processed in the eBPF program.

Unlike actions, the classifier can inspect device flags and enable
netif_keep_dst() if necessary. tc actions don't have that possibility,
but in case people know what they are doing, it can be used from there
as well (e.g. via devs that must keep dsts by design anyway).

If a realm is set, the handler returns the non-zero realm. User space
can set the full 32bit realm for the dst.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h   |  3 ++-
 include/uapi/linux/bpf.h |  7 +++++++
 kernel/bpf/syscall.c     |  2 ++
 net/core/filter.c        | 22 ++++++++++++++++++++++
 net/sched/cls_bpf.c      |  8 ++++++--
 5 files changed, 39 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index bad618f316d7..3d5fd24b321b 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -328,7 +328,8 @@ struct bpf_prog {
 	u16			pages;		/* Number of allocated pages */
 	kmemcheck_bitfield_begin(meta);
 	u16			jited:1,	/* Is our filter JIT'ed? */
-				gpl_compatible:1; /* Is filter GPL compatible? */
+				gpl_compatible:1, /* Is filter GPL compatible? */
+				dst_needed:1;	/* Do we need dst entry? */
 	kmemcheck_bitfield_end(meta);
 	u32			len;		/* Number of filter blocks */
 	enum bpf_prog_type	type;		/* Type of BPF program */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4ec0b5488294..564f1f091991 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -280,6 +280,13 @@ enum bpf_func_id {
 	 * Return: TC_ACT_REDIRECT
 	 */
 	BPF_FUNC_redirect,
+
+	/**
+	 * bpf_get_route_realm(skb) - retrieve a dst's tclassid
+	 * @skb: pointer to skb
+	 * Return: realm if != 0
+	 */
+	BPF_FUNC_get_route_realm,
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2190ab14b763..5f35f420c12f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -402,6 +402,8 @@ static void fixup_bpf_calls(struct bpf_prog *prog)
 			 */
 			BUG_ON(!prog->aux->ops->get_func_proto);
 
+			if (insn->imm == BPF_FUNC_get_route_realm)
+				prog->dst_needed = 1;
 			if (insn->imm == BPF_FUNC_tail_call) {
 				/* mark bpf_tail_call as different opcode
 				 * to avoid conditional branch in
diff --git a/net/core/filter.c b/net/core/filter.c
index 04664acb86ce..45c69ce4c847 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -49,6 +49,7 @@
 #include <net/sch_generic.h>
 #include <net/cls_cgroup.h>
 #include <net/dst_metadata.h>
+#include <net/dst.h>
 
 /**
  *	sk_filter - run a packet through a socket filter
@@ -1478,6 +1479,25 @@ static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
 	.arg1_type      = ARG_PTR_TO_CTX,
 };
 
+static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+#ifdef CONFIG_IP_ROUTE_CLASSID
+	const struct dst_entry *dst;
+
+	dst = skb_dst((struct sk_buff *) (unsigned long) r1);
+	if (dst)
+		return dst->tclassid;
+#endif
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_get_route_realm_proto = {
+	.func           = bpf_get_route_realm,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,
+};
+
 static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5)
 {
 	struct sk_buff *skb = (struct sk_buff *) (long) r1;
@@ -1648,6 +1668,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 		return bpf_get_skb_set_tunnel_key_proto();
 	case BPF_FUNC_redirect:
 		return &bpf_redirect_proto;
+	case BPF_FUNC_get_route_realm:
+		return &bpf_get_route_realm_proto;
 	default:
 		return sk_filter_func_proto(func_id);
 	}
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 7eeffaf69c75..5faaa5425f7b 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -262,7 +262,8 @@ static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog)
 	return 0;
 }
 
-static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog)
+static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
+				 const struct tcf_proto *tp)
 {
 	struct bpf_prog *fp;
 	char *name = NULL;
@@ -294,6 +295,9 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog)
 	prog->bpf_name = name;
 	prog->filter = fp;
 
+	if (fp->dst_needed)
+		netif_keep_dst(qdisc_dev(tp->q));
+
 	return 0;
 }
 
@@ -330,7 +334,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 	prog->exts_integrated = have_exts;
 
 	ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
-		       cls_bpf_prog_from_efd(tb, prog);
+		       cls_bpf_prog_from_efd(tb, prog, tp);
 	if (ret < 0) {
 		tcf_exts_destroy(&exts);
 		return ret;
-- 
cgit v1.2.3


From bab18991871545dfbd10c931eb0fe8f7637156a9 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 2 Oct 2015 15:17:33 +0200
Subject: bpf, seccomp: prepare for upcoming criu support

The current ongoing effort to dump existing cBPF seccomp filters back
to user space requires to hold the pre-transformed instructions like
we do in case of socket filters from sk_attach_filter() side, so they
can be reloaded in original form at a later point in time by utilities
such as criu.

To prepare for this, simply extend the bpf_prog_create_from_user()
API to hold a flag that tells whether we should store the original
or not. Also, fanout filters could make use of that in future for
things like diag. While fanout filters already use bpf_prog_destroy(),
move seccomp over to them as well to handle original programs when
present.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Tycho Andersen <tycho.andersen@canonical.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Tested-by: Tycho Andersen <tycho.andersen@canonical.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h |  2 +-
 kernel/seccomp.c       |  4 ++--
 net/core/filter.c      | 16 +++++++++++-----
 net/packet/af_packet.c |  2 +-
 4 files changed, 15 insertions(+), 9 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 3d5fd24b321b..1bbce14bcf17 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -411,7 +411,7 @@ typedef int (*bpf_aux_classic_check_t)(struct sock_filter *filter,
 
 int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog);
 int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
-			      bpf_aux_classic_check_t trans);
+			      bpf_aux_classic_check_t trans, bool save_orig);
 void bpf_prog_destroy(struct bpf_prog *fp);
 
 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 5bd4779282df..06858a74bb9c 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -370,7 +370,7 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
 		return ERR_PTR(-ENOMEM);
 
 	ret = bpf_prog_create_from_user(&sfilter->prog, fprog,
-					seccomp_check_filter);
+					seccomp_check_filter, false);
 	if (ret < 0) {
 		kfree(sfilter);
 		return ERR_PTR(ret);
@@ -469,7 +469,7 @@ void get_seccomp_filter(struct task_struct *tsk)
 static inline void seccomp_filter_free(struct seccomp_filter *filter)
 {
 	if (filter) {
-		bpf_prog_free(filter->prog);
+		bpf_prog_destroy(filter->prog);
 		kfree(filter);
 	}
 }
diff --git a/net/core/filter.c b/net/core/filter.c
index 53a5036fb32d..da3e5357f138 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1084,16 +1084,18 @@ EXPORT_SYMBOL_GPL(bpf_prog_create);
  *	@pfp: the unattached filter that is created
  *	@fprog: the filter program
  *	@trans: post-classic verifier transformation handler
+ *	@save_orig: save classic BPF program
  *
  * This function effectively does the same as bpf_prog_create(), only
  * that it builds up its insns buffer from user space provided buffer.
  * It also allows for passing a bpf_aux_classic_check_t handler.
  */
 int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
-			      bpf_aux_classic_check_t trans)
+			      bpf_aux_classic_check_t trans, bool save_orig)
 {
 	unsigned int fsize = bpf_classic_proglen(fprog);
 	struct bpf_prog *fp;
+	int err;
 
 	/* Make sure new filter is there and in the right amounts. */
 	if (fprog->filter == NULL)
@@ -1109,12 +1111,16 @@ int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
 	}
 
 	fp->len = fprog->len;
-	/* Since unattached filters are not copied back to user
-	 * space through sk_get_filter(), we do not need to hold
-	 * a copy here, and can spare us the work.
-	 */
 	fp->orig_prog = NULL;
 
+	if (save_orig) {
+		err = bpf_prog_store_orig_filter(fp, fprog);
+		if (err) {
+			__bpf_prog_free(fp);
+			return -ENOMEM;
+		}
+	}
+
 	/* bpf_prepare_filter() already takes care of freeing
 	 * memory in case something goes wrong.
 	 */
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index aa4b15c35884..81c900fbc4a4 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1567,7 +1567,7 @@ static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data,
 	if (copy_from_user(&fprog, data, len))
 		return -EFAULT;
 
-	ret = bpf_prog_create_from_user(&new, &fprog, NULL);
+	ret = bpf_prog_create_from_user(&new, &fprog, NULL, false);
 	if (ret)
 		return ret;
 
-- 
cgit v1.2.3


From 0cdf5640e4f6940bdbbefee4bb0adb7dffb185ec Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 2 Oct 2015 18:42:00 +0200
Subject: ebpf: include perf_event only where really needed

Commit ea317b267e9d ("bpf: Add new bpf map type to store the pointer
to struct perf_event") added perf_event.h to the main eBPF header, so
it gets included for all users. perf_event.h is actually only needed
from array map side, so lets sanitize this a bit.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Kaixu Xia <xiakaixu@huawei.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h   | 1 -
 kernel/bpf/arraymap.c | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f57d7fed9ec3..c915a6b54570 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -10,7 +10,6 @@
 #include <uapi/linux/bpf.h>
 #include <linux/workqueue.h>
 #include <linux/file.h>
-#include <linux/perf_event.h>
 
 struct bpf_map;
 
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 29ace107f236..2fecc4aed119 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/filter.h>
+#include <linux/perf_event.h>
 
 /* Called from syscall */
 static struct bpf_map *array_map_alloc(union bpf_attr *attr)
-- 
cgit v1.2.3


From 3ad0040573b0c00f88488bc31958acd07a55ee2e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 8 Oct 2015 01:20:39 +0200
Subject: bpf: split state from prandom_u32() and consolidate {c, e}BPF prngs

While recently arguing on a seccomp discussion that raw prandom_u32()
access shouldn't be exposed to unpriviledged user space, I forgot the
fact that SKF_AD_RANDOM extension actually already does it for some time
in cBPF via commit 4cd3675ebf74 ("filter: added BPF random opcode").

Since prandom_u32() is being used in a lot of critical networking code,
lets be more conservative and split their states. Furthermore, consolidate
eBPF and cBPF prandom handlers to use the new internal PRNG. For eBPF,
bpf_get_prandom_u32() was only accessible for priviledged users, but
should that change one day, we also don't want to leak raw sequences
through things like eBPF maps.

One thought was also to have own per bpf_prog states, but due to ABI
reasons this is not easily possible, i.e. the program code currently
cannot access bpf_prog itself, and copying the rnd_state to/from the
stack scratch space whenever a program uses the prng seems not really
worth the trouble and seems too hacky. If needed, taus113 could in such
cases be implemented within eBPF using a map entry to keep the state
space, or get_random_bytes() could become a second helper in cases where
performance would not be critical.

Both sides can trigger a one-time late init via prandom_init_once() on
the shared state. Performance-wise, there should even be a tiny gain
as bpf_user_rnd_u32() saves one function call. The PRNG needs to live
inside the BPF core since kernels could have a NET-less config as well.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Cc: Chema Gonzalez <chema@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h  |  4 ++++
 kernel/bpf/core.c    | 26 ++++++++++++++++++++++++++
 kernel/bpf/helpers.c |  7 +------
 kernel/bpf/syscall.c |  2 ++
 net/core/filter.c    |  9 ++-------
 5 files changed, 35 insertions(+), 13 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c915a6b54570..3697ad563899 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -200,4 +200,8 @@ extern const struct bpf_func_proto bpf_get_current_comm_proto;
 extern const struct bpf_func_proto bpf_skb_vlan_push_proto;
 extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
 
+/* Shared helpers among cBPF and eBPF. */
+void bpf_user_rnd_init_once(void);
+u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
+
 #endif /* _LINUX_BPF_H */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index c8855c2a7a48..80864712d2c4 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -731,6 +731,32 @@ void bpf_prog_free(struct bpf_prog *fp)
 }
 EXPORT_SYMBOL_GPL(bpf_prog_free);
 
+/* RNG for unpriviledged user space with separated state from prandom_u32(). */
+static DEFINE_PER_CPU(struct rnd_state, bpf_user_rnd_state);
+
+void bpf_user_rnd_init_once(void)
+{
+	prandom_init_once(&bpf_user_rnd_state);
+}
+
+u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	/* Should someone ever have the rather unwise idea to use some
+	 * of the registers passed into this function, then note that
+	 * this function is called from native eBPF and classic-to-eBPF
+	 * transformations. Register assignments from both sides are
+	 * different, f.e. classic always sets fn(ctx, A, X) here.
+	 */
+	struct rnd_state *state;
+	u32 res;
+
+	state = &get_cpu_var(bpf_user_rnd_state);
+	res = prandom_u32_state(state);
+	put_cpu_var(state);
+
+	return res;
+}
+
 /* Weak definitions of helper functions in case we don't have bpf syscall. */
 const struct bpf_func_proto bpf_map_lookup_elem_proto __weak;
 const struct bpf_func_proto bpf_map_update_elem_proto __weak;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 1447ec09421e..4504ca66118d 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -93,13 +93,8 @@ const struct bpf_func_proto bpf_map_delete_elem_proto = {
 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
 };
 
-static u64 bpf_get_prandom_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
-{
-	return prandom_u32();
-}
-
 const struct bpf_func_proto bpf_get_prandom_u32_proto = {
-	.func		= bpf_get_prandom_u32,
+	.func		= bpf_user_rnd_u32,
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
 };
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 5f35f420c12f..c868cafbc00c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -404,6 +404,8 @@ static void fixup_bpf_calls(struct bpf_prog *prog)
 
 			if (insn->imm == BPF_FUNC_get_route_realm)
 				prog->dst_needed = 1;
+			if (insn->imm == BPF_FUNC_get_prandom_u32)
+				bpf_user_rnd_init_once();
 			if (insn->imm == BPF_FUNC_tail_call) {
 				/* mark bpf_tail_call as different opcode
 				 * to avoid conditional branch in
diff --git a/net/core/filter.c b/net/core/filter.c
index 8f4603c712cd..342e6c8fc415 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -149,12 +149,6 @@ static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 	return raw_smp_processor_id();
 }
 
-/* note that this only generates 32-bit random numbers */
-static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
-{
-	return prandom_u32();
-}
-
 static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
 			      struct bpf_insn *insn_buf)
 {
@@ -313,7 +307,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 			*insn = BPF_EMIT_CALL(__get_raw_cpu_id);
 			break;
 		case SKF_AD_OFF + SKF_AD_RANDOM:
-			*insn = BPF_EMIT_CALL(__get_random_u32);
+			*insn = BPF_EMIT_CALL(bpf_user_rnd_u32);
+			bpf_user_rnd_init_once();
 			break;
 		}
 		break;
-- 
cgit v1.2.3


From ff936a04e5f28b7e0455be0e7fa91334f89e4b44 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 7 Oct 2015 10:55:41 -0700
Subject: bpf: fix cb access in socket filter programs

eBPF socket filter programs may see junk in 'u32 cb[5]' area,
since it could have been used by protocol layers earlier.

For socket filter programs used in af_packet we need to clean
20 bytes of skb->cb area if it could be used by the program.
For programs attached to TCP/UDP sockets we need to save/restore
these 20 bytes, since it's used by protocol layers.

Remove SK_RUN_FILTER macro, since it's no longer used.

Long term we may move this bpf cb area to per-cpu scratch, but that
requires addition of new 'per-cpu load/store' instructions,
so not suitable as a short term fix.

Fixes: d691f9e8d440 ("bpf: allow programs to write to certain skb fields")
Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h    |  6 +++---
 include/linux/filter.h | 39 +++++++++++++++++++++++++++++++++++----
 kernel/bpf/verifier.c  |  2 +-
 net/core/filter.c      | 12 +++++++-----
 net/packet/af_packet.c | 10 +++++-----
 5 files changed, 51 insertions(+), 18 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 3697ad563899..b4fdee6cb686 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -100,6 +100,8 @@ enum bpf_access_type {
 	BPF_WRITE = 2
 };
 
+struct bpf_prog;
+
 struct bpf_verifier_ops {
 	/* return eBPF function prototype for verification */
 	const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
@@ -111,7 +113,7 @@ struct bpf_verifier_ops {
 
 	u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg,
 				  int src_reg, int ctx_off,
-				  struct bpf_insn *insn);
+				  struct bpf_insn *insn, struct bpf_prog *prog);
 };
 
 struct bpf_prog_type_list {
@@ -120,8 +122,6 @@ struct bpf_prog_type_list {
 	enum bpf_prog_type type;
 };
 
-struct bpf_prog;
-
 struct bpf_prog_aux {
 	atomic_t refcnt;
 	u32 used_map_cnt;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1bbce14bcf17..4165e9ac9e36 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -13,6 +13,7 @@
 #include <linux/printk.h>
 #include <linux/workqueue.h>
 #include <linux/sched.h>
+#include <net/sch_generic.h>
 
 #include <asm/cacheflush.h>
 
@@ -302,10 +303,6 @@ struct bpf_prog_aux;
 	bpf_size;						\
 })
 
-/* Macro to invoke filter function. */
-#define SK_RUN_FILTER(filter, ctx) \
-	(*filter->prog->bpf_func)(ctx, filter->prog->insnsi)
-
 #ifdef CONFIG_COMPAT
 /* A struct sock_filter is architecture independent. */
 struct compat_sock_fprog {
@@ -329,6 +326,7 @@ struct bpf_prog {
 	kmemcheck_bitfield_begin(meta);
 	u16			jited:1,	/* Is our filter JIT'ed? */
 				gpl_compatible:1, /* Is filter GPL compatible? */
+				cb_access:1,	/* Is control block accessed? */
 				dst_needed:1;	/* Do we need dst entry? */
 	kmemcheck_bitfield_end(meta);
 	u32			len;		/* Number of filter blocks */
@@ -352,6 +350,39 @@ struct sk_filter {
 
 #define BPF_PROG_RUN(filter, ctx)  (*filter->bpf_func)(ctx, filter->insnsi)
 
+static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog,
+				       struct sk_buff *skb)
+{
+	u8 *cb_data = qdisc_skb_cb(skb)->data;
+	u8 saved_cb[QDISC_CB_PRIV_LEN];
+	u32 res;
+
+	BUILD_BUG_ON(FIELD_SIZEOF(struct __sk_buff, cb) !=
+		     QDISC_CB_PRIV_LEN);
+
+	if (unlikely(prog->cb_access)) {
+		memcpy(saved_cb, cb_data, sizeof(saved_cb));
+		memset(cb_data, 0, sizeof(saved_cb));
+	}
+
+	res = BPF_PROG_RUN(prog, skb);
+
+	if (unlikely(prog->cb_access))
+		memcpy(cb_data, saved_cb, sizeof(saved_cb));
+
+	return res;
+}
+
+static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
+					struct sk_buff *skb)
+{
+	u8 *cb_data = qdisc_skb_cb(skb)->data;
+
+	if (unlikely(prog->cb_access))
+		memset(cb_data, 0, QDISC_CB_PRIV_LEN);
+	return BPF_PROG_RUN(prog, skb);
+}
+
 static inline unsigned int bpf_prog_size(unsigned int proglen)
 {
 	return max(sizeof(struct bpf_prog),
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b074b23000d6..f8da034c2258 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2024,7 +2024,7 @@ static int convert_ctx_accesses(struct verifier_env *env)
 
 		cnt = env->prog->aux->ops->
 			convert_ctx_access(type, insn->dst_reg, insn->src_reg,
-					   insn->off, insn_buf);
+					   insn->off, insn_buf, env->prog);
 		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
 			verbose("bpf verifier is misconfigured\n");
 			return -EINVAL;
diff --git a/net/core/filter.c b/net/core/filter.c
index 342e6c8fc415..5f4cf1cffed3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -56,10 +56,10 @@
  *	@sk: sock associated with &sk_buff
  *	@skb: buffer to filter
  *
- * Run the filter code and then cut skb->data to correct size returned by
- * SK_RUN_FILTER. If pkt_len is 0 we toss packet. If skb->len is smaller
+ * Run the eBPF program and then cut skb->data to correct size returned by
+ * the program. If pkt_len is 0 we toss packet. If skb->len is smaller
  * than pkt_len we keep whole skb->data. This is the socket level
- * wrapper to SK_RUN_FILTER. It returns 0 if the packet should
+ * wrapper to BPF_PROG_RUN. It returns 0 if the packet should
  * be accepted or -EPERM if the packet should be tossed.
  *
  */
@@ -83,7 +83,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
 	rcu_read_lock();
 	filter = rcu_dereference(sk->sk_filter);
 	if (filter) {
-		unsigned int pkt_len = SK_RUN_FILTER(filter, skb);
+		unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
 
 		err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
 	}
@@ -1736,7 +1736,8 @@ static bool tc_cls_act_is_valid_access(int off, int size,
 
 static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
 				      int src_reg, int ctx_off,
-				      struct bpf_insn *insn_buf)
+				      struct bpf_insn *insn_buf,
+				      struct bpf_prog *prog)
 {
 	struct bpf_insn *insn = insn_buf;
 
@@ -1827,6 +1828,7 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
 		offsetof(struct __sk_buff, cb[4]):
 		BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
 
+		prog->cb_access = 1;
 		ctx_off -= offsetof(struct __sk_buff, cb[0]);
 		ctx_off += offsetof(struct sk_buff, cb);
 		ctx_off += offsetof(struct qdisc_skb_cb, data);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 81c900fbc4a4..104910f7d1fb 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1423,7 +1423,7 @@ static unsigned int fanout_demux_bpf(struct packet_fanout *f,
 	rcu_read_lock();
 	prog = rcu_dereference(f->bpf_prog);
 	if (prog)
-		ret = BPF_PROG_RUN(prog, skb) % num;
+		ret = bpf_prog_run_clear_cb(prog, skb) % num;
 	rcu_read_unlock();
 
 	return ret;
@@ -1939,16 +1939,16 @@ out_free:
 	return err;
 }
 
-static unsigned int run_filter(const struct sk_buff *skb,
-				      const struct sock *sk,
-				      unsigned int res)
+static unsigned int run_filter(struct sk_buff *skb,
+			       const struct sock *sk,
+			       unsigned int res)
 {
 	struct sk_filter *filter;
 
 	rcu_read_lock();
 	filter = rcu_dereference(sk->sk_filter);
 	if (filter != NULL)
-		res = SK_RUN_FILTER(filter, skb);
+		res = bpf_prog_run_clear_cb(filter->prog, skb);
 	rcu_read_unlock();
 
 	return res;
-- 
cgit v1.2.3


From 1be7f75d1668d6296b80bf35dcf6762393530afc Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 7 Oct 2015 22:23:21 -0700
Subject: bpf: enable non-root eBPF programs

In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
  (except R10+Imm which is used to compute stack addresses)
- comparison of pointers
  (except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps

Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.

Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.

Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.

For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
  u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
  u64 *value = bpf_map_lookup_elem(&my_map, &index);

  if (value)
	*value += skb->len;
  return 0;
}

but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
  u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
  u64 *value = bpf_map_lookup_elem(&my_map, &index);

  if (value)
	*value += (u64) skb;
  return 0;
}
since it would leak the kernel address into the map.

Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns

The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1).  Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h   |   2 +
 kernel/bpf/syscall.c  |  11 +++---
 kernel/bpf/verifier.c | 106 +++++++++++++++++++++++++++++++++++++++++++++-----
 kernel/sysctl.c       |  13 +++++++
 net/core/filter.c     |   3 +-
 5 files changed, 120 insertions(+), 15 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b4fdee6cb686..02fa3db3c1ec 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -167,6 +167,8 @@ void bpf_prog_put_rcu(struct bpf_prog *prog);
 struct bpf_map *bpf_map_get(struct fd f);
 void bpf_map_put(struct bpf_map *map);
 
+extern int sysctl_unprivileged_bpf_disabled;
+
 /* verify correctness of eBPF program */
 int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
 #else
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index c868cafbc00c..83697bc8e574 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -18,6 +18,8 @@
 #include <linux/filter.h>
 #include <linux/version.h>
 
+int sysctl_unprivileged_bpf_disabled __read_mostly;
+
 static LIST_HEAD(bpf_map_types);
 
 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
@@ -544,6 +546,9 @@ static int bpf_prog_load(union bpf_attr *attr)
 	    attr->kern_version != LINUX_VERSION_CODE)
 		return -EINVAL;
 
+	if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
 	/* plain bpf_prog allocation */
 	prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
 	if (!prog)
@@ -599,11 +604,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	union bpf_attr attr = {};
 	int err;
 
-	/* the syscall is limited to root temporarily. This restriction will be
-	 * lifted when security audit is clean. Note that eBPF+tracing must have
-	 * this restriction, since it may pass kernel data to user space
-	 */
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled)
 		return -EPERM;
 
 	if (!access_ok(VERIFY_READ, uattr, 1))
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f8da034c2258..1d6b97be79e1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -199,6 +199,7 @@ struct verifier_env {
 	struct verifier_state_list **explored_states; /* search pruning optimization */
 	struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
 	u32 used_map_cnt;		/* number of used maps */
+	bool allow_ptr_leaks;
 };
 
 /* verbose verifier prints what it's seeing
@@ -538,6 +539,21 @@ static int bpf_size_to_bytes(int bpf_size)
 		return -EINVAL;
 }
 
+static bool is_spillable_regtype(enum bpf_reg_type type)
+{
+	switch (type) {
+	case PTR_TO_MAP_VALUE:
+	case PTR_TO_MAP_VALUE_OR_NULL:
+	case PTR_TO_STACK:
+	case PTR_TO_CTX:
+	case FRAME_PTR:
+	case CONST_PTR_TO_MAP:
+		return true;
+	default:
+		return false;
+	}
+}
+
 /* check_stack_read/write functions track spill/fill of registers,
  * stack boundary and alignment are checked in check_mem_access()
  */
@@ -550,9 +566,7 @@ static int check_stack_write(struct verifier_state *state, int off, int size,
 	 */
 
 	if (value_regno >= 0 &&
-	    (state->regs[value_regno].type == PTR_TO_MAP_VALUE ||
-	     state->regs[value_regno].type == PTR_TO_STACK ||
-	     state->regs[value_regno].type == PTR_TO_CTX)) {
+	    is_spillable_regtype(state->regs[value_regno].type)) {
 
 		/* register containing pointer is being spilled into stack */
 		if (size != BPF_REG_SIZE) {
@@ -643,6 +657,20 @@ static int check_ctx_access(struct verifier_env *env, int off, int size,
 	return -EACCES;
 }
 
+static bool is_pointer_value(struct verifier_env *env, int regno)
+{
+	if (env->allow_ptr_leaks)
+		return false;
+
+	switch (env->cur_state.regs[regno].type) {
+	case UNKNOWN_VALUE:
+	case CONST_IMM:
+		return false;
+	default:
+		return true;
+	}
+}
+
 /* check whether memory at (regno + off) is accessible for t = (read | write)
  * if t==write, value_regno is a register which value is stored into memory
  * if t==read, value_regno is a register which will receive the value from memory
@@ -669,11 +697,21 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
 	}
 
 	if (state->regs[regno].type == PTR_TO_MAP_VALUE) {
+		if (t == BPF_WRITE && value_regno >= 0 &&
+		    is_pointer_value(env, value_regno)) {
+			verbose("R%d leaks addr into map\n", value_regno);
+			return -EACCES;
+		}
 		err = check_map_access(env, regno, off, size);
 		if (!err && t == BPF_READ && value_regno >= 0)
 			mark_reg_unknown_value(state->regs, value_regno);
 
 	} else if (state->regs[regno].type == PTR_TO_CTX) {
+		if (t == BPF_WRITE && value_regno >= 0 &&
+		    is_pointer_value(env, value_regno)) {
+			verbose("R%d leaks addr into ctx\n", value_regno);
+			return -EACCES;
+		}
 		err = check_ctx_access(env, off, size, t);
 		if (!err && t == BPF_READ && value_regno >= 0)
 			mark_reg_unknown_value(state->regs, value_regno);
@@ -684,10 +722,17 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
 			verbose("invalid stack off=%d size=%d\n", off, size);
 			return -EACCES;
 		}
-		if (t == BPF_WRITE)
+		if (t == BPF_WRITE) {
+			if (!env->allow_ptr_leaks &&
+			    state->stack_slot_type[MAX_BPF_STACK + off] == STACK_SPILL &&
+			    size != BPF_REG_SIZE) {
+				verbose("attempt to corrupt spilled pointer on stack\n");
+				return -EACCES;
+			}
 			err = check_stack_write(state, off, size, value_regno);
-		else
+		} else {
 			err = check_stack_read(state, off, size, value_regno);
+		}
 	} else {
 		verbose("R%d invalid mem access '%s'\n",
 			regno, reg_type_str[state->regs[regno].type]);
@@ -775,8 +820,13 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
 		return -EACCES;
 	}
 
-	if (arg_type == ARG_ANYTHING)
+	if (arg_type == ARG_ANYTHING) {
+		if (is_pointer_value(env, regno)) {
+			verbose("R%d leaks addr into helper function\n", regno);
+			return -EACCES;
+		}
 		return 0;
+	}
 
 	if (arg_type == ARG_PTR_TO_STACK || arg_type == ARG_PTR_TO_MAP_KEY ||
 	    arg_type == ARG_PTR_TO_MAP_VALUE) {
@@ -950,8 +1000,9 @@ static int check_call(struct verifier_env *env, int func_id)
 }
 
 /* check validity of 32-bit and 64-bit arithmetic operations */
-static int check_alu_op(struct reg_state *regs, struct bpf_insn *insn)
+static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
 {
+	struct reg_state *regs = env->cur_state.regs;
 	u8 opcode = BPF_OP(insn->code);
 	int err;
 
@@ -976,6 +1027,12 @@ static int check_alu_op(struct reg_state *regs, struct bpf_insn *insn)
 		if (err)
 			return err;
 
+		if (is_pointer_value(env, insn->dst_reg)) {
+			verbose("R%d pointer arithmetic prohibited\n",
+				insn->dst_reg);
+			return -EACCES;
+		}
+
 		/* check dest operand */
 		err = check_reg_arg(regs, insn->dst_reg, DST_OP);
 		if (err)
@@ -1012,6 +1069,11 @@ static int check_alu_op(struct reg_state *regs, struct bpf_insn *insn)
 				 */
 				regs[insn->dst_reg] = regs[insn->src_reg];
 			} else {
+				if (is_pointer_value(env, insn->src_reg)) {
+					verbose("R%d partial copy of pointer\n",
+						insn->src_reg);
+					return -EACCES;
+				}
 				regs[insn->dst_reg].type = UNKNOWN_VALUE;
 				regs[insn->dst_reg].map_ptr = NULL;
 			}
@@ -1061,8 +1123,18 @@ static int check_alu_op(struct reg_state *regs, struct bpf_insn *insn)
 		/* pattern match 'bpf_add Rx, imm' instruction */
 		if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 &&
 		    regs[insn->dst_reg].type == FRAME_PTR &&
-		    BPF_SRC(insn->code) == BPF_K)
+		    BPF_SRC(insn->code) == BPF_K) {
 			stack_relative = true;
+		} else if (is_pointer_value(env, insn->dst_reg)) {
+			verbose("R%d pointer arithmetic prohibited\n",
+				insn->dst_reg);
+			return -EACCES;
+		} else if (BPF_SRC(insn->code) == BPF_X &&
+			   is_pointer_value(env, insn->src_reg)) {
+			verbose("R%d pointer arithmetic prohibited\n",
+				insn->src_reg);
+			return -EACCES;
+		}
 
 		/* check dest operand */
 		err = check_reg_arg(regs, insn->dst_reg, DST_OP);
@@ -1101,6 +1173,12 @@ static int check_cond_jmp_op(struct verifier_env *env,
 		err = check_reg_arg(regs, insn->src_reg, SRC_OP);
 		if (err)
 			return err;
+
+		if (is_pointer_value(env, insn->src_reg)) {
+			verbose("R%d pointer comparison prohibited\n",
+				insn->src_reg);
+			return -EACCES;
+		}
 	} else {
 		if (insn->src_reg != BPF_REG_0) {
 			verbose("BPF_JMP uses reserved fields\n");
@@ -1155,6 +1233,9 @@ static int check_cond_jmp_op(struct verifier_env *env,
 			regs[insn->dst_reg].type = CONST_IMM;
 			regs[insn->dst_reg].imm = 0;
 		}
+	} else if (is_pointer_value(env, insn->dst_reg)) {
+		verbose("R%d pointer comparison prohibited\n", insn->dst_reg);
+		return -EACCES;
 	} else if (BPF_SRC(insn->code) == BPF_K &&
 		   (opcode == BPF_JEQ || opcode == BPF_JNE)) {
 
@@ -1658,7 +1739,7 @@ static int do_check(struct verifier_env *env)
 		}
 
 		if (class == BPF_ALU || class == BPF_ALU64) {
-			err = check_alu_op(regs, insn);
+			err = check_alu_op(env, insn);
 			if (err)
 				return err;
 
@@ -1816,6 +1897,11 @@ static int do_check(struct verifier_env *env)
 				if (err)
 					return err;
 
+				if (is_pointer_value(env, BPF_REG_0)) {
+					verbose("R0 leaks addr as return value\n");
+					return -EACCES;
+				}
+
 process_bpf_exit:
 				insn_idx = pop_stack(env, &prev_insn_idx);
 				if (insn_idx < 0) {
@@ -2144,6 +2230,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 	if (ret < 0)
 		goto skip_full_check;
 
+	env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
+
 	ret = do_check(env);
 
 skip_full_check:
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index e69201d8094e..96c856b04081 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -64,6 +64,7 @@
 #include <linux/binfmts.h>
 #include <linux/sched/sysctl.h>
 #include <linux/kexec.h>
+#include <linux/bpf.h>
 
 #include <asm/uaccess.h>
 #include <asm/processor.h>
@@ -1138,6 +1139,18 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= timer_migration_handler,
 	},
+#endif
+#ifdef CONFIG_BPF_SYSCALL
+	{
+		.procname	= "unprivileged_bpf_disabled",
+		.data		= &sysctl_unprivileged_bpf_disabled,
+		.maxlen		= sizeof(sysctl_unprivileged_bpf_disabled),
+		.mode		= 0644,
+		/* only handle a transition from default "0" to "1" */
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
+		.extra2		= &one,
+	},
 #endif
 	{ }
 };
diff --git a/net/core/filter.c b/net/core/filter.c
index 5f4cf1cffed3..0b00094932ab 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1640,7 +1640,8 @@ sk_filter_func_proto(enum bpf_func_id func_id)
 	case BPF_FUNC_ktime_get_ns:
 		return &bpf_ktime_get_ns_proto;
 	case BPF_FUNC_trace_printk:
-		return bpf_get_trace_printk_proto();
+		if (capable(CAP_SYS_ADMIN))
+			return bpf_get_trace_printk_proto();
 	default:
 		return NULL;
 	}
-- 
cgit v1.2.3


From aaac3ba95e4c8b496d22f68bd1bc01cfbf525eca Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 7 Oct 2015 22:23:22 -0700
Subject: bpf: charge user for creation of BPF maps and programs

since eBPF programs and maps use kernel memory consider it 'locked' memory
from user accounting point of view and charge it against RLIMIT_MEMLOCK limit.
This limit is typically set to 64Kbytes by distros, so almost all
bpf+tracing programs would need to increase it, since they use maps,
but kernel charges maximum map size upfront.
For example the hash map of 1024 elements will be charged as 64Kbyte.
It's inconvenient for current users and changes current behavior for root,
but probably worth doing to be consistent root vs non-root.

Similar accounting logic is done by mmap of perf_event.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h   |  3 +++
 include/linux/sched.h |  2 +-
 kernel/bpf/arraymap.c |  2 +-
 kernel/bpf/hashtab.c  |  4 ++++
 kernel/bpf/syscall.c  | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 72 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 02fa3db3c1ec..e3a51b74e275 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -36,6 +36,8 @@ struct bpf_map {
 	u32 key_size;
 	u32 value_size;
 	u32 max_entries;
+	u32 pages;
+	struct user_struct *user;
 	const struct bpf_map_ops *ops;
 	struct work_struct work;
 };
@@ -128,6 +130,7 @@ struct bpf_prog_aux {
 	const struct bpf_verifier_ops *ops;
 	struct bpf_map **used_maps;
 	struct bpf_prog *prog;
+	struct user_struct *user;
 	union {
 		struct work_struct work;
 		struct rcu_head	rcu;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b7b9501b41af..4817df5fffae 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -840,7 +840,7 @@ struct user_struct {
 	struct hlist_node uidhash_node;
 	kuid_t uid;
 
-#ifdef CONFIG_PERF_EVENTS
+#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL)
 	atomic_long_t locked_vm;
 #endif
 };
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 2fecc4aed119..f2d9e698c753 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -49,7 +49,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 	array->map.key_size = attr->key_size;
 	array->map.value_size = attr->value_size;
 	array->map.max_entries = attr->max_entries;
-
+	array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT;
 	array->elem_size = elem_size;
 
 	return &array->map;
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 83c209d9b17a..28592d79502b 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -88,6 +88,10 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 	htab->elem_size = sizeof(struct htab_elem) +
 			  round_up(htab->map.key_size, 8) +
 			  htab->map.value_size;
+
+	htab->map.pages = round_up(htab->n_buckets * sizeof(struct hlist_head) +
+				   htab->elem_size * htab->map.max_entries,
+				   PAGE_SIZE) >> PAGE_SHIFT;
 	return &htab->map;
 
 free_htab:
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 83697bc8e574..f640e5f7afbd 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -46,11 +46,38 @@ void bpf_register_map_type(struct bpf_map_type_list *tl)
 	list_add(&tl->list_node, &bpf_map_types);
 }
 
+static int bpf_map_charge_memlock(struct bpf_map *map)
+{
+	struct user_struct *user = get_current_user();
+	unsigned long memlock_limit;
+
+	memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+
+	atomic_long_add(map->pages, &user->locked_vm);
+
+	if (atomic_long_read(&user->locked_vm) > memlock_limit) {
+		atomic_long_sub(map->pages, &user->locked_vm);
+		free_uid(user);
+		return -EPERM;
+	}
+	map->user = user;
+	return 0;
+}
+
+static void bpf_map_uncharge_memlock(struct bpf_map *map)
+{
+	struct user_struct *user = map->user;
+
+	atomic_long_sub(map->pages, &user->locked_vm);
+	free_uid(user);
+}
+
 /* called from workqueue */
 static void bpf_map_free_deferred(struct work_struct *work)
 {
 	struct bpf_map *map = container_of(work, struct bpf_map, work);
 
+	bpf_map_uncharge_memlock(map);
 	/* implementation dependent freeing */
 	map->ops->map_free(map);
 }
@@ -110,6 +137,10 @@ static int map_create(union bpf_attr *attr)
 
 	atomic_set(&map->refcnt, 1);
 
+	err = bpf_map_charge_memlock(map);
+	if (err)
+		goto free_map;
+
 	err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR | O_CLOEXEC);
 
 	if (err < 0)
@@ -442,11 +473,37 @@ static void free_used_maps(struct bpf_prog_aux *aux)
 	kfree(aux->used_maps);
 }
 
+static int bpf_prog_charge_memlock(struct bpf_prog *prog)
+{
+	struct user_struct *user = get_current_user();
+	unsigned long memlock_limit;
+
+	memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+
+	atomic_long_add(prog->pages, &user->locked_vm);
+	if (atomic_long_read(&user->locked_vm) > memlock_limit) {
+		atomic_long_sub(prog->pages, &user->locked_vm);
+		free_uid(user);
+		return -EPERM;
+	}
+	prog->aux->user = user;
+	return 0;
+}
+
+static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
+{
+	struct user_struct *user = prog->aux->user;
+
+	atomic_long_sub(prog->pages, &user->locked_vm);
+	free_uid(user);
+}
+
 static void __prog_put_rcu(struct rcu_head *rcu)
 {
 	struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
 
 	free_used_maps(aux);
+	bpf_prog_uncharge_memlock(aux->prog);
 	bpf_prog_free(aux->prog);
 }
 
@@ -554,6 +611,10 @@ static int bpf_prog_load(union bpf_attr *attr)
 	if (!prog)
 		return -ENOMEM;
 
+	err = bpf_prog_charge_memlock(prog);
+	if (err)
+		goto free_prog_nouncharge;
+
 	prog->len = attr->insn_cnt;
 
 	err = -EFAULT;
@@ -595,6 +656,8 @@ static int bpf_prog_load(union bpf_attr *attr)
 free_used_maps:
 	free_used_maps(prog->aux);
 free_prog:
+	bpf_prog_uncharge_memlock(prog);
+free_prog_nouncharge:
 	bpf_prog_free(prog);
 	return err;
 }
-- 
cgit v1.2.3