From 01341fbd0d8d4e717fc1231cdffe00343088ce0b Mon Sep 17 00:00:00 2001
From: Yunfeng Ye <yeyunfeng@huawei.com>
Date: Thu, 19 Nov 2020 14:21:25 +0800
Subject: workqueue: Kick a worker based on the actual activation of delayed
 works

In realtime scenario, We do not want to have interference on the
isolated cpu cores. but when invoking alloc_workqueue() for percpu wq
on the housekeeping cpu, it kick a kworker on the isolated cpu.

  alloc_workqueue
    pwq_adjust_max_active
      wake_up_worker

The comment in pwq_adjust_max_active() said:
  "Need to kick a worker after thawed or an unbound wq's
   max_active is bumped"

So it is unnecessary to kick a kworker for percpu's wq when invoking
alloc_workqueue(). this patch only kick a worker based on the actual
activation of delayed works.

Signed-off-by: Yunfeng Ye <yeyunfeng@huawei.com>
Reviewed-by: Lai Jiangshan <jiangshanlai@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 437935e7a199..0695c7895c89 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3728,17 +3728,24 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
 	 * is updated and visible.
 	 */
 	if (!freezable || !workqueue_freezing) {
+		bool kick = false;
+
 		pwq->max_active = wq->saved_max_active;
 
 		while (!list_empty(&pwq->delayed_works) &&
-		       pwq->nr_active < pwq->max_active)
+		       pwq->nr_active < pwq->max_active) {
 			pwq_activate_first_delayed(pwq);
+			kick = true;
+		}
 
 		/*
 		 * Need to kick a worker after thawed or an unbound wq's
-		 * max_active is bumped.  It's a slow path.  Do it always.
+		 * max_active is bumped. In realtime scenarios, always kicking a
+		 * worker will cause interference on the isolated cpu cores, so
+		 * let's kick iff work items were activated.
 		 */
-		wake_up_worker(pwq->pool);
+		if (kick)
+			wake_up_worker(pwq->pool);
 	} else {
 		pwq->max_active = 0;
 	}
-- 
cgit v1.2.3


From 58315c96651152b9f438e5e56c910994234e2c7a Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Mon, 9 Nov 2020 16:01:11 +0530
Subject: kernel: cgroup: Mundane spelling fixes throughout the file

Few spelling fixes throughout the file.

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/cgroup/cgroup.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'kernel')

diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index e41c21819ba0..690f477d537c 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -244,7 +244,7 @@ bool cgroup_ssid_enabled(int ssid)
  *
  * The default hierarchy is the v2 interface of cgroup and this function
  * can be used to test whether a cgroup is on the default hierarchy for
- * cases where a subsystem should behave differnetly depending on the
+ * cases where a subsystem should behave differently depending on the
  * interface version.
  *
  * List of changed behaviors:
@@ -262,7 +262,7 @@ bool cgroup_ssid_enabled(int ssid)
  *   "cgroup.procs" instead.
  *
  * - "cgroup.procs" is not sorted.  pids will be unique unless they got
- *   recycled inbetween reads.
+ *   recycled in-between reads.
  *
  * - "release_agent" and "notify_on_release" are removed.  Replacement
  *   notification mechanism will be implemented.
@@ -345,7 +345,7 @@ static bool cgroup_is_mixable(struct cgroup *cgrp)
 	return !cgroup_parent(cgrp);
 }
 
-/* can @cgrp become a thread root? should always be true for a thread root */
+/* can @cgrp become a thread root? Should always be true for a thread root */
 static bool cgroup_can_be_thread_root(struct cgroup *cgrp)
 {
 	/* mixables don't care */
@@ -530,7 +530,7 @@ static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
  * the root css is returned, so this function always returns a valid css.
  *
  * The returned css is not guaranteed to be online, and therefore it is the
- * callers responsiblity to tryget a reference for it.
+ * callers responsibility to try get a reference for it.
  */
 struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
 					 struct cgroup_subsys *ss)
@@ -702,7 +702,7 @@ EXPORT_SYMBOL_GPL(of_css);
 			;						\
 		else
 
-/* walk live descendants in preorder */
+/* walk live descendants in pre order */
 #define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp)		\
 	css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL))	\
 		if (({ lockdep_assert_held(&cgroup_mutex);		\
@@ -936,7 +936,7 @@ void put_css_set_locked(struct css_set *cset)
 
 	WARN_ON_ONCE(!list_empty(&cset->threaded_csets));
 
-	/* This css_set is dead. unlink it and release cgroup and css refs */
+	/* This css_set is dead. Unlink it and release cgroup and css refs */
 	for_each_subsys(ss, ssid) {
 		list_del(&cset->e_cset_node[ssid]);
 		css_put(cset->subsys[ssid]);
@@ -1061,7 +1061,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset,
 
 	/*
 	 * Build the set of subsystem state objects that we want to see in the
-	 * new css_set. while subsystems can change globally, the entries here
+	 * new css_set. While subsystems can change globally, the entries here
 	 * won't change, so no need for locking.
 	 */
 	for_each_subsys(ss, i) {
@@ -1151,7 +1151,7 @@ static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
 
 	/*
 	 * Always add links to the tail of the lists so that the lists are
-	 * in choronological order.
+	 * in chronological order.
 	 */
 	list_move_tail(&link->cset_link, &cgrp->cset_links);
 	list_add_tail(&link->cgrp_link, &cset->cgrp_links);
@@ -4137,7 +4137,7 @@ struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
 	 * implies that if we observe !CSS_RELEASED on @pos in this RCU
 	 * critical section, the one pointed to by its next pointer is
 	 * guaranteed to not have finished its RCU grace period even if we
-	 * have dropped rcu_read_lock() inbetween iterations.
+	 * have dropped rcu_read_lock() in-between iterations.
 	 *
 	 * If @pos has CSS_RELEASED set, its next pointer can't be
 	 * dereferenced; however, as each css is given a monotonically
@@ -4385,7 +4385,7 @@ static struct css_set *css_task_iter_next_css_set(struct css_task_iter *it)
 }
 
 /**
- * css_task_iter_advance_css_set - advance a task itererator to the next css_set
+ * css_task_iter_advance_css_set - advance a task iterator to the next css_set
  * @it: the iterator to advance
  *
  * Advance @it to the next css_set to walk.
@@ -6320,7 +6320,7 @@ struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
  *
  * Find the cgroup at @path on the default hierarchy, increment its
  * reference count and return it.  Returns pointer to the found cgroup on
- * success, ERR_PTR(-ENOENT) if @path doens't exist and ERR_PTR(-ENOTDIR)
+ * success, ERR_PTR(-ENOENT) if @path doesn't exist and ERR_PTR(-ENOTDIR)
  * if @path points to a non-directory.
  */
 struct cgroup *cgroup_get_from_path(const char *path)
-- 
cgit v1.2.3


From 5a7b5f32c5aa628841502d19a813c633ff6ecbe4 Mon Sep 17 00:00:00 2001
From: Hui Su <sh_def@163.com>
Date: Fri, 6 Nov 2020 22:47:40 +0800
Subject: cgroup/cgroup.c: replace 'of->kn->priv' with of_cft()

we have supplied the inline function: of_cft() in cgroup.h.

So replace the direct use 'of->kn->priv' with inline func
of_cft(), which is more readable.

Signed-off-by: Hui Su <sh_def@163.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/cgroup/cgroup.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 690f477d537c..385f80cf7d0c 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3657,7 +3657,7 @@ static ssize_t cgroup_freeze_write(struct kernfs_open_file *of,
 
 static int cgroup_file_open(struct kernfs_open_file *of)
 {
-	struct cftype *cft = of->kn->priv;
+	struct cftype *cft = of_cft(of);
 
 	if (cft->open)
 		return cft->open(of);
@@ -3666,7 +3666,7 @@ static int cgroup_file_open(struct kernfs_open_file *of)
 
 static void cgroup_file_release(struct kernfs_open_file *of)
 {
-	struct cftype *cft = of->kn->priv;
+	struct cftype *cft = of_cft(of);
 
 	if (cft->release)
 		cft->release(of);
@@ -3677,7 +3677,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
 {
 	struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
 	struct cgroup *cgrp = of->kn->parent->priv;
-	struct cftype *cft = of->kn->priv;
+	struct cftype *cft = of_cft(of);
 	struct cgroup_subsys_state *css;
 	int ret;
 
@@ -3727,7 +3727,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
 
 static __poll_t cgroup_file_poll(struct kernfs_open_file *of, poll_table *pt)
 {
-	struct cftype *cft = of->kn->priv;
+	struct cftype *cft = of_cft(of);
 
 	if (cft->poll)
 		return cft->poll(of, pt);
-- 
cgit v1.2.3


From 1b04fa9900263b4e217ca2509fd778b32c2b4eb2 Mon Sep 17 00:00:00 2001
From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Date: Wed, 9 Dec 2020 21:27:31 +0100
Subject: rcu-tasks: Move RCU-tasks initialization to before early_initcall()

PowerPC testing encountered boot failures due to RCU Tasks not being
fully initialized until core_initcall() time.  This commit therefore
initializes RCU Tasks (along with Rude RCU and RCU Tasks Trace) just
before early_initcall() time, thus allowing waiting on RCU Tasks grace
periods from early_initcall() handlers.

Link: https://lore.kernel.org/rcu/87eekfh80a.fsf@dja-thinkpad.axtens.net/
Fixes: 36dadef23fcc ("kprobes: Init kprobes in early_initcall")
Tested-by: Daniel Axtens <dja@axtens.net>
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/rcupdate.h |  6 ++++++
 init/main.c              |  1 +
 kernel/rcu/tasks.h       | 25 +++++++++++++++++++++----
 3 files changed, 28 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 6cdd0152c253..5c119d6cecf1 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -86,6 +86,12 @@ void rcu_sched_clock_irq(int user);
 void rcu_report_dead(unsigned int cpu);
 void rcutree_migrate_callbacks(int cpu);
 
+#ifdef CONFIG_TASKS_RCU_GENERIC
+void rcu_init_tasks_generic(void);
+#else
+static inline void rcu_init_tasks_generic(void) { }
+#endif
+
 #ifdef CONFIG_RCU_STALL_COMMON
 void rcu_sysrq_start(void);
 void rcu_sysrq_end(void);
diff --git a/init/main.c b/init/main.c
index 32b2a8affafd..9d964511fe0c 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1512,6 +1512,7 @@ static noinline void __init kernel_init_freeable(void)
 
 	init_mm_internals();
 
+	rcu_init_tasks_generic();
 	do_pre_smp_initcalls();
 	lockup_detector_init();
 
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index d5d9f2d03e8a..73bbe792fe1e 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -241,7 +241,7 @@ static int __noreturn rcu_tasks_kthread(void *arg)
 	}
 }
 
-/* Spawn RCU-tasks grace-period kthread, e.g., at core_initcall() time. */
+/* Spawn RCU-tasks grace-period kthread. */
 static void __init rcu_spawn_tasks_kthread_generic(struct rcu_tasks *rtp)
 {
 	struct task_struct *t;
@@ -569,7 +569,6 @@ static int __init rcu_spawn_tasks_kthread(void)
 	rcu_spawn_tasks_kthread_generic(&rcu_tasks);
 	return 0;
 }
-core_initcall(rcu_spawn_tasks_kthread);
 
 #ifndef CONFIG_TINY_RCU
 static void show_rcu_tasks_classic_gp_kthread(void)
@@ -697,7 +696,6 @@ static int __init rcu_spawn_tasks_rude_kthread(void)
 	rcu_spawn_tasks_kthread_generic(&rcu_tasks_rude);
 	return 0;
 }
-core_initcall(rcu_spawn_tasks_rude_kthread);
 
 #ifndef CONFIG_TINY_RCU
 static void show_rcu_tasks_rude_gp_kthread(void)
@@ -975,6 +973,11 @@ static void rcu_tasks_trace_pregp_step(void)
 static void rcu_tasks_trace_pertask(struct task_struct *t,
 				    struct list_head *hop)
 {
+	// During early boot when there is only the one boot CPU, there
+	// is no idle task for the other CPUs. Just return.
+	if (unlikely(t == NULL))
+		return;
+
 	WRITE_ONCE(t->trc_reader_special.b.need_qs, false);
 	WRITE_ONCE(t->trc_reader_checked, false);
 	t->trc_ipi_to_cpu = -1;
@@ -1200,7 +1203,6 @@ static int __init rcu_spawn_tasks_trace_kthread(void)
 	rcu_spawn_tasks_kthread_generic(&rcu_tasks_trace);
 	return 0;
 }
-core_initcall(rcu_spawn_tasks_trace_kthread);
 
 #ifndef CONFIG_TINY_RCU
 static void show_rcu_tasks_trace_gp_kthread(void)
@@ -1229,6 +1231,21 @@ void show_rcu_tasks_gp_kthreads(void)
 }
 #endif /* #ifndef CONFIG_TINY_RCU */
 
+void __init rcu_init_tasks_generic(void)
+{
+#ifdef CONFIG_TASKS_RCU
+	rcu_spawn_tasks_kthread();
+#endif
+
+#ifdef CONFIG_TASKS_RUDE_RCU
+	rcu_spawn_tasks_rude_kthread();
+#endif
+
+#ifdef CONFIG_TASKS_TRACE_RCU
+	rcu_spawn_tasks_trace_kthread();
+#endif
+}
+
 #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
 static inline void rcu_tasks_bootup_oddness(void) {}
 void show_rcu_tasks_gp_kthreads(void) {}
-- 
cgit v1.2.3


From 2d18e54dd8662442ef5898c6bdadeaf90b3cebbc Mon Sep 17 00:00:00 2001
From: Qinglang Miao <miaoqinglang@huawei.com>
Date: Thu, 10 Dec 2020 09:29:43 +0800
Subject: cgroup: Fix memory leak when parsing multiple source parameters

A memory leak is found in cgroup1_parse_param() when multiple source
parameters overwrite fc->source in the fs_context struct without free.

unreferenced object 0xffff888100d930e0 (size 16):
  comm "mount", pid 520, jiffies 4303326831 (age 152.783s)
  hex dump (first 16 bytes):
    74 65 73 74 6c 65 61 6b 00 00 00 00 00 00 00 00  testleak........
  backtrace:
    [<000000003e5023ec>] kmemdup_nul+0x2d/0xa0
    [<00000000377dbdaa>] vfs_parse_fs_string+0xc0/0x150
    [<00000000cb2b4882>] generic_parse_monolithic+0x15a/0x1d0
    [<000000000f750198>] path_mount+0xee1/0x1820
    [<0000000004756de2>] do_mount+0xea/0x100
    [<0000000094cafb0a>] __x64_sys_mount+0x14b/0x1f0

Fix this bug by permitting a single source parameter and rejecting with
an error all subsequent ones.

Fixes: 8d2451f4994f ("cgroup1: switch to option-by-option parsing")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Qinglang Miao <miaoqinglang@huawei.com>
Reviewed-by: Zefan Li <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/cgroup/cgroup-v1.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel')

diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 191c329e482a..32596fdbcd5b 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -908,6 +908,8 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param)
 	opt = fs_parse(fc, cgroup1_fs_parameters, param, &result);
 	if (opt == -ENOPARAM) {
 		if (strcmp(param->key, "source") == 0) {
+			if (fc->source)
+				return invalf(fc, "Multiple sources not supported");
 			fc->source = param->string;
 			param->string = NULL;
 			return 0;
-- 
cgit v1.2.3


From d467d80dc399ba77875d647f2f37b7d1a70d94c2 Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Wed, 16 Dec 2020 10:47:15 +0800
Subject: bpf: Remove unused including <linux/version.h>

Remove including <linux/version.h> that don't need it.

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1608086835-54523-1-git-send-email-tiantao6@hisilicon.com
---
 kernel/bpf/syscall.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 287be337d5f6..bb2700ec5bf3 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -17,7 +17,6 @@
 #include <linux/fs.h>
 #include <linux/license.h>
 #include <linux/filter.h>
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/idr.h>
 #include <linux/cred.h>
-- 
cgit v1.2.3


From e7e518053c267bb6be3799520d9f4a34c7264a2e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 21 Dec 2020 11:25:06 -0800
Subject: bpf: Add schedule point in htab_init_buckets()

We noticed that with a LOCKDEP enabled kernel,
allocating a hash table with 65536 buckets would
use more than 60ms.

htab_init_buckets() runs from process context,
it is safe to schedule to avoid latency spikes.

Fixes: c50eb518e262 ("bpf: Use separate lockdep class for each hashtab")
Reported-by: John Sperbeck <jsperbeck@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201221192506.707584-1-eric.dumazet@gmail.com
---
 kernel/bpf/hashtab.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel')

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 7e848200cd26..c1ac7f964bc9 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -152,6 +152,7 @@ static void htab_init_buckets(struct bpf_htab *htab)
 			lockdep_set_class(&htab->buckets[i].lock,
 					  &htab->lockdep_key);
 		}
+		cond_resched();
 	}
 }
 
-- 
cgit v1.2.3


From 69ca310f34168eae0ada434796bfc22fb4a0fa26 Mon Sep 17 00:00:00 2001
From: Jonathan Lemon <bsd@fb.com>
Date: Fri, 18 Dec 2020 10:50:30 -0800
Subject: bpf: Save correct stopping point in file seq iteration

On some systems, some variant of the following splat is
repeatedly seen.  The common factor in all traces seems
to be the entry point to task_file_seq_next().  With the
patch, all warnings go away.

    rcu: INFO: rcu_sched self-detected stall on CPU
    rcu: \x0926-....: (20992 ticks this GP) idle=d7e/1/0x4000000000000002 softirq=81556231/81556231 fqs=4876
    \x09(t=21033 jiffies g=159148529 q=223125)
    NMI backtrace for cpu 26
    CPU: 26 PID: 2015853 Comm: bpftool Kdump: loaded Not tainted 5.6.13-0_fbk4_3876_gd8d1f9bf80bb #1
    Hardware name: Quanta Twin Lakes MP/Twin Lakes Passive MP, BIOS F09_3A12 10/08/2018
    Call Trace:
     <IRQ>
     dump_stack+0x50/0x70
     nmi_cpu_backtrace.cold.6+0x13/0x50
     ? lapic_can_unplug_cpu.cold.30+0x40/0x40
     nmi_trigger_cpumask_backtrace+0xba/0xca
     rcu_dump_cpu_stacks+0x99/0xc7
     rcu_sched_clock_irq.cold.90+0x1b4/0x3aa
     ? tick_sched_do_timer+0x60/0x60
     update_process_times+0x24/0x50
     tick_sched_timer+0x37/0x70
     __hrtimer_run_queues+0xfe/0x270
     hrtimer_interrupt+0xf4/0x210
     smp_apic_timer_interrupt+0x5e/0x120
     apic_timer_interrupt+0xf/0x20
     </IRQ>
    RIP: 0010:get_pid_task+0x38/0x80
    Code: 89 f6 48 8d 44 f7 08 48 8b 00 48 85 c0 74 2b 48 83 c6 55 48 c1 e6 04 48 29 f0 74 19 48 8d 78 20 ba 01 00 00 00 f0 0f c1 50 20 <85> d2 74 27 78 11 83 c2 01 78 0c 48 83 c4 08 c3 31 c0 48 83 c4 08
    RSP: 0018:ffffc9000d293dc8 EFLAGS: 00000202 ORIG_RAX: ffffffffffffff13
    RAX: ffff888637c05600 RBX: ffffc9000d293e0c RCX: 0000000000000000
    RDX: 0000000000000001 RSI: 0000000000000550 RDI: ffff888637c05620
    RBP: ffffffff8284eb80 R08: ffff88831341d300 R09: ffff88822ffd8248
    R10: ffff88822ffd82d0 R11: 00000000003a93c0 R12: 0000000000000001
    R13: 00000000ffffffff R14: ffff88831341d300 R15: 0000000000000000
     ? find_ge_pid+0x1b/0x20
     task_seq_get_next+0x52/0xc0
     task_file_seq_get_next+0x159/0x220
     task_file_seq_next+0x4f/0xa0
     bpf_seq_read+0x159/0x390
     vfs_read+0x8a/0x140
     ksys_read+0x59/0xd0
     do_syscall_64+0x42/0x110
     entry_SYSCALL_64_after_hwframe+0x44/0xa9
    RIP: 0033:0x7f95ae73e76e
    Code: Bad RIP value.
    RSP: 002b:00007ffc02c1dbf8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
    RAX: ffffffffffffffda RBX: 000000000170faa0 RCX: 00007f95ae73e76e
    RDX: 0000000000001000 RSI: 00007ffc02c1dc30 RDI: 0000000000000007
    RBP: 00007ffc02c1ec70 R08: 0000000000000005 R09: 0000000000000006
    R10: fffffffffffff20b R11: 0000000000000246 R12: 00000000019112a0
    R13: 0000000000000000 R14: 0000000000000007 R15: 00000000004283c0

If unable to obtain the file structure for the current task,
proceed to the next task number after the one returned from
task_seq_get_next(), instead of the next task number from the
original iterator.

Also, save the stopping task number from task_seq_get_next()
on failure in case of restarts.

Fixes: eaaacd23910f ("bpf: Add task and task/file iterator targets")
Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201218185032.2464558-2-jonathan.lemon@gmail.com
---
 kernel/bpf/task_iter.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 0458a40edf10..8033ab19138a 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -158,13 +158,14 @@ again:
 		if (!curr_task) {
 			info->task = NULL;
 			info->files = NULL;
+			info->tid = curr_tid;
 			return NULL;
 		}
 
 		curr_files = get_files_struct(curr_task);
 		if (!curr_files) {
 			put_task_struct(curr_task);
-			curr_tid = ++(info->tid);
+			curr_tid = curr_tid + 1;
 			info->fd = 0;
 			goto again;
 		}
-- 
cgit v1.2.3


From a61daaf351da7c8493f2586437617d60c24350b0 Mon Sep 17 00:00:00 2001
From: Jonathan Lemon <bsd@fb.com>
Date: Fri, 18 Dec 2020 10:50:31 -0800
Subject: bpf: Use thread_group_leader()

Instead of directly comparing task->tgid and task->pid, use the
thread_group_leader() helper.  This helps with readability, and
there should be no functional change.

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201218185032.2464558-3-jonathan.lemon@gmail.com
---
 kernel/bpf/task_iter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 8033ab19138a..dc4007f1843b 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -37,7 +37,7 @@ retry:
 		if (!task) {
 			++*tid;
 			goto retry;
-		} else if (skip_if_dup_files && task->tgid != task->pid &&
+		} else if (skip_if_dup_files && !thread_group_leader(task) &&
 			   task->files == task->group_leader->files) {
 			put_task_struct(task);
 			task = NULL;
-- 
cgit v1.2.3


From b1b6b5a30dce872f500dc43f067cba8e7f86fc7d Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Wed, 30 Dec 2020 21:34:16 +0000
Subject: kernel/io_uring: cancel io_uring before task works

For cancelling io_uring requests it needs either to be able to run
currently enqueued task_works or having it shut down by that moment.
Otherwise io_uring_cancel_files() may be waiting for requests that won't
ever complete.

Go with the first way and do cancellations before setting PF_EXITING and
so before putting the task_work infrastructure into a transition state
where task_work_run() would better not be called.

Cc: stable@vger.kernel.org # 5.5+
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/file.c     | 2 --
 kernel/exit.c | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/fs/file.c b/fs/file.c
index c0b60961c672..dab120b71e44 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -21,7 +21,6 @@
 #include <linux/rcupdate.h>
 #include <linux/close_range.h>
 #include <net/sock.h>
-#include <linux/io_uring.h>
 
 unsigned int sysctl_nr_open __read_mostly = 1024*1024;
 unsigned int sysctl_nr_open_min = BITS_PER_LONG;
@@ -428,7 +427,6 @@ void exit_files(struct task_struct *tsk)
 	struct files_struct * files = tsk->files;
 
 	if (files) {
-		io_uring_files_cancel(files);
 		task_lock(tsk);
 		tsk->files = NULL;
 		task_unlock(tsk);
diff --git a/kernel/exit.c b/kernel/exit.c
index 3594291a8542..04029e35e69a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -63,6 +63,7 @@
 #include <linux/random.h>
 #include <linux/rcuwait.h>
 #include <linux/compat.h>
+#include <linux/io_uring.h>
 
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
@@ -776,6 +777,7 @@ void __noreturn do_exit(long code)
 		schedule();
 	}
 
+	io_uring_files_cancel(tsk->files);
 	exit_signals(tsk);  /* sets PF_EXITING */
 
 	/* sync mm's RSS info before statistics gathering */
-- 
cgit v1.2.3


From 04901aab40ea3779f6fc6383ef74d8e130e817bf Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 30 Dec 2020 21:24:18 -0800
Subject: bpf: Fix a task_iter bug caused by a merge conflict resolution

Latest bpf tree has a bug for bpf_iter selftest:

  $ ./test_progs -n 4/25
  test_bpf_sk_storage_get:PASS:bpf_iter_bpf_sk_storage_helpers__open_and_load 0 nsec
  test_bpf_sk_storage_get:PASS:socket 0 nsec
  ...
  do_dummy_read:PASS:read 0 nsec
  test_bpf_sk_storage_get:FAIL:bpf_map_lookup_elem map value wasn't set correctly
                          (expected 1792, got -1, err=0)
  #4/25 bpf_sk_storage_get:FAIL
  #4 bpf_iter:FAIL
  Summary: 0/0 PASSED, 0 SKIPPED, 2 FAILED

When doing merge conflict resolution, Commit 4bfc4714849d missed to
save curr_task to seq_file private data. The task pointer in seq_file
private data is passed to bpf program. This caused NULL-pointer task
passed to bpf program which will immediately return upon checking
whether task pointer is NULL.

This patch added back the assignment of curr_task to seq_file private
data and fixed the issue.

Fixes: 4bfc4714849d ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf")
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20201231052418.577024-1-yhs@fb.com
---
 kernel/bpf/task_iter.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel')

diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 3efe38191d1c..175b7b42bfc4 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -159,6 +159,7 @@ again:
                 }
 
                 /* set info->task and info->tid */
+		info->task = curr_task;
 		if (curr_tid == info->tid) {
 			curr_fd = info->fd;
 		} else {
-- 
cgit v1.2.3


From 00b8c557d096f0930d5c07df618223d3d06902d6 Mon Sep 17 00:00:00 2001
From: Matthias Maennich <maennich@google.com>
Date: Wed, 6 Jan 2021 15:52:01 +0000
Subject: staging: ION: remove some references to CONFIG_ION

With commit e722a295cf49 ("staging: ion: remove from the tree"), ION and
its corresponding config CONFIG_ION is gone. Remove stale references
from drivers/staging/media/atomisp/pci and from the recommended Android
kernel config.

Fixes: e722a295cf49 ("staging: ion: remove from the tree")
Cc: Hridya Valsaraju <hridya@google.com>
Cc: Rob Herring <robh@kernel.org>
Cc: linux-media@vger.kernel.org
Cc: devel@driverdev.osuosl.org
Signed-off-by: Matthias Maennich <maennich@google.com>
Link: https://lore.kernel.org/r/20210106155201.2845319-1-maennich@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/media/atomisp/pci/atomisp_subdev.c | 20 --------------------
 kernel/configs/android-recommended.config          |  1 -
 2 files changed, 21 deletions(-)

(limited to 'kernel')

diff --git a/drivers/staging/media/atomisp/pci/atomisp_subdev.c b/drivers/staging/media/atomisp/pci/atomisp_subdev.c
index 52b9fb18c87f..b666cb23e5ca 100644
--- a/drivers/staging/media/atomisp/pci/atomisp_subdev.c
+++ b/drivers/staging/media/atomisp/pci/atomisp_subdev.c
@@ -1062,26 +1062,6 @@ static const struct v4l2_ctrl_config ctrl_select_isp_version = {
 	.def = 0,
 };
 
-#if 0 /* #ifdef CONFIG_ION */
-/*
- * Control for ISP ion device fd
- *
- * userspace will open ion device and pass the fd to kernel.
- * this fd will be used to map shared fd to buffer.
- */
-/* V4L2_CID_ATOMISP_ION_DEVICE_FD is not defined */
-static const struct v4l2_ctrl_config ctrl_ion_dev_fd = {
-	.ops = &ctrl_ops,
-	.id = V4L2_CID_ATOMISP_ION_DEVICE_FD,
-	.type = V4L2_CTRL_TYPE_INTEGER,
-	.name = "Ion Device Fd",
-	.min = -1,
-	.max = 1024,
-	.step = 1,
-	.def = ION_FD_UNSET
-};
-#endif
-
 static void atomisp_init_subdev_pipe(struct atomisp_sub_device *asd,
 				     struct atomisp_video_pipe *pipe, enum v4l2_buf_type buf_type)
 {
diff --git a/kernel/configs/android-recommended.config b/kernel/configs/android-recommended.config
index 53d688bdd894..eb0029c9a6a6 100644
--- a/kernel/configs/android-recommended.config
+++ b/kernel/configs/android-recommended.config
@@ -81,7 +81,6 @@ CONFIG_INPUT_JOYSTICK=y
 CONFIG_INPUT_MISC=y
 CONFIG_INPUT_TABLET=y
 CONFIG_INPUT_UINPUT=y
-CONFIG_ION=y
 CONFIG_JOYSTICK_XPAD=y
 CONFIG_JOYSTICK_XPAD_FF=y
 CONFIG_JOYSTICK_XPAD_LEDS=y
-- 
cgit v1.2.3


From 35d0b389f3b23439ad15b610d6e43fc72fc75779 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 5 Jan 2021 11:32:43 -0700
Subject: task_work: unconditionally run task_work from get_signal()

Song reported a boot regression in a kvm image with 5.11-rc, and bisected
it down to the below patch. Debugging this issue, turns out that the boot
stalled when a task is waiting on a pipe being released. As we no longer
run task_work from get_signal() unless it's queued with TWA_SIGNAL, the
task goes idle without running the task_work. This prevents ->release()
from being called on the pipe, which another boot task is waiting on.

For now, re-instate the unconditional task_work run from get_signal().
For 5.12, we'll collapse TWA_RESUME and TWA_SIGNAL, as it no longer
makes sense to have a distinction between the two. This will turn
task_work notification into a simple boolean, whether to notify or not.

Fixes: 98b89b649fce ("signal: kill JOBCTL_TASK_WORK")
Reported-by: Song Liu <songliubraving@fb.com>
Tested-by: John Stultz <john.stultz@linaro.org>
Tested-by: Douglas Anderson <dianders@chromium.org>
Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # LLVM/Clang version 11.0.1
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 kernel/signal.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'kernel')

diff --git a/kernel/signal.c b/kernel/signal.c
index 5736c55aaa1a..6b9c431da08f 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2550,6 +2550,9 @@ bool get_signal(struct ksignal *ksig)
 	struct signal_struct *signal = current->signal;
 	int signr;
 
+	if (unlikely(current->task_works))
+		task_work_run();
+
 	/*
 	 * For non-generic architectures, check for TIF_NOTIFY_SIGNAL so
 	 * that the arch handlers don't all have to do it. If we get here
-- 
cgit v1.2.3


From 7bb83f6fc4ee84e95d0ac0d14452c2619fb3fe70 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Fri, 8 Jan 2021 13:19:38 +0900
Subject: tracing/kprobes: Do the notrace functions check without kprobes on
 ftrace

Enable the notrace function check on the architecture which doesn't
support kprobes on ftrace but support dynamic ftrace. This notrace
function check is not only for the kprobes on ftrace but also
sw-breakpoint based kprobes.
Thus there is no reason to limit this check for the arch which
supports kprobes on ftrace.

This also changes the dependency of Kconfig. Because kprobe event
uses the function tracer's address list for identifying notrace
function, if the CONFIG_DYNAMIC_FTRACE=n, it can not check whether
the target function is notrace or not.

Link: https://lkml.kernel.org/r/20210105065730.2634785-1-naveen.n.rao@linux.vnet.ibm.com
Link: https://lkml.kernel.org/r/161007957862.114704.4512260007555399463.stgit@devnote2

Cc: stable@vger.kernel.org
Fixes: 45408c4f92506 ("tracing: kprobes: Prohibit probing on notrace function")
Acked-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/Kconfig        | 2 +-
 kernel/trace/trace_kprobe.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d5a19413d4f8..c1a62ae7e812 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -538,7 +538,7 @@ config KPROBE_EVENTS
 config KPROBE_EVENTS_ON_NOTRACE
 	bool "Do NOT protect notrace function from kprobe events"
 	depends on KPROBE_EVENTS
-	depends on KPROBES_ON_FTRACE
+	depends on DYNAMIC_FTRACE
 	default n
 	help
 	  This is only for the developers who want to debug ftrace itself
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 9c31f42245e9..e6fba1798771 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -434,7 +434,7 @@ static int disable_trace_kprobe(struct trace_event_call *call,
 	return 0;
 }
 
-#if defined(CONFIG_KPROBES_ON_FTRACE) && \
+#if defined(CONFIG_DYNAMIC_FTRACE) && \
 	!defined(CONFIG_KPROBE_EVENTS_ON_NOTRACE)
 static bool __within_notrace_func(unsigned long addr)
 {
-- 
cgit v1.2.3