aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/core.c52
-rw-r--r--kernel/sched/debug.c2
-rw-r--r--kernel/sched/idle.c10
-rw-r--r--kernel/sched/psi.c27
-rw-r--r--kernel/sched/sched.h8
-rw-r--r--kernel/sched/wait_bit.c2
6 files changed, 72 insertions, 29 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5555e49c4e12..ee28253c9ac0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -91,7 +91,7 @@
#include "stats.h"
#include "../workqueue_internal.h"
-#include "../../fs/io-wq.h"
+#include "../../io_uring/io-wq.h"
#include "../smpboot.h"
/*
@@ -3802,7 +3802,7 @@ bool cpus_share_cache(int this_cpu, int that_cpu)
return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
}
-static inline bool ttwu_queue_cond(int cpu)
+static inline bool ttwu_queue_cond(struct task_struct *p, int cpu)
{
/*
* Do not complicate things with the async wake_list while the CPU is
@@ -3811,6 +3811,10 @@ static inline bool ttwu_queue_cond(int cpu)
if (!cpu_active(cpu))
return false;
+ /* Ensure the task will still be allowed to run on the CPU. */
+ if (!cpumask_test_cpu(cpu, p->cpus_ptr))
+ return false;
+
/*
* If the CPU does not share cache, then queue the task on the
* remote rqs wakelist to avoid accessing remote data.
@@ -3840,7 +3844,7 @@ static inline bool ttwu_queue_cond(int cpu)
static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
{
- if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu)) {
+ if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(p, cpu)) {
sched_clock_cpu(cpu); /* Sync clocks across CPUs */
__ttwu_queue_wakelist(p, cpu, wake_flags);
return true;
@@ -4263,6 +4267,38 @@ int task_call_func(struct task_struct *p, task_call_f func, void *arg)
}
/**
+ * cpu_curr_snapshot - Return a snapshot of the currently running task
+ * @cpu: The CPU on which to snapshot the task.
+ *
+ * Returns the task_struct pointer of the task "currently" running on
+ * the specified CPU. If the same task is running on that CPU throughout,
+ * the return value will be a pointer to that task's task_struct structure.
+ * If the CPU did any context switches even vaguely concurrently with the
+ * execution of this function, the return value will be a pointer to the
+ * task_struct structure of a randomly chosen task that was running on
+ * that CPU somewhere around the time that this function was executing.
+ *
+ * If the specified CPU was offline, the return value is whatever it
+ * is, perhaps a pointer to the task_struct structure of that CPU's idle
+ * task, but there is no guarantee. Callers wishing a useful return
+ * value must take some action to ensure that the specified CPU remains
+ * online throughout.
+ *
+ * This function executes full memory barriers before and after fetching
+ * the pointer, which permits the caller to confine this function's fetch
+ * with respect to the caller's accesses to other shared variables.
+ */
+struct task_struct *cpu_curr_snapshot(int cpu)
+{
+ struct task_struct *t;
+
+ smp_mb(); /* Pairing determined by caller's synchronization design. */
+ t = rcu_dereference(cpu_curr(cpu));
+ smp_mb(); /* Pairing determined by caller's synchronization design. */
+ return t;
+}
+
+/**
* wake_up_process - Wake up a specific process
* @p: The process to be woken up.
*
@@ -6563,7 +6599,7 @@ void __sched schedule_idle(void)
} while (need_resched());
}
-#if defined(CONFIG_CONTEXT_TRACKING) && !defined(CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK)
+#if defined(CONFIG_CONTEXT_TRACKING_USER) && !defined(CONFIG_HAVE_CONTEXT_TRACKING_USER_OFFSTACK)
asmlinkage __visible void __sched schedule_user(void)
{
/*
@@ -8980,7 +9016,7 @@ int cpuset_cpumask_can_shrink(const struct cpumask *cur,
}
int task_can_attach(struct task_struct *p,
- const struct cpumask *cs_cpus_allowed)
+ const struct cpumask *cs_effective_cpus)
{
int ret = 0;
@@ -8999,9 +9035,11 @@ int task_can_attach(struct task_struct *p,
}
if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span,
- cs_cpus_allowed)) {
- int cpu = cpumask_any_and(cpu_active_mask, cs_cpus_allowed);
+ cs_effective_cpus)) {
+ int cpu = cpumask_any_and(cpu_active_mask, cs_effective_cpus);
+ if (unlikely(cpu >= nr_cpu_ids))
+ return -EINVAL;
ret = dl_cpu_busy(cpu, p);
}
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index bb3d63bdf4ae..667876da8382 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -416,7 +416,7 @@ void update_sched_domain_debugfs(void)
char buf[32];
snprintf(buf, sizeof(buf), "cpu%d", cpu);
- debugfs_remove(debugfs_lookup(buf, sd_dentry));
+ debugfs_lookup_and_remove(buf, sd_dentry);
d_cpu = debugfs_create_dir(buf, sd_dentry);
i = 0;
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 328cccbee444..f26ab2675f7d 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -53,14 +53,14 @@ static noinline int __cpuidle cpu_idle_poll(void)
{
trace_cpu_idle(0, smp_processor_id());
stop_critical_timings();
- rcu_idle_enter();
+ ct_idle_enter();
local_irq_enable();
while (!tif_need_resched() &&
(cpu_idle_force_poll || tick_check_broadcast_expired()))
cpu_relax();
- rcu_idle_exit();
+ ct_idle_exit();
start_critical_timings();
trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
@@ -98,12 +98,12 @@ void __cpuidle default_idle_call(void)
*
* Trace IRQs enable here, then switch off RCU, and have
* arch_cpu_idle() use raw_local_irq_enable(). Note that
- * rcu_idle_enter() relies on lockdep IRQ state, so switch that
+ * ct_idle_enter() relies on lockdep IRQ state, so switch that
* last -- this is very similar to the entry code.
*/
trace_hardirqs_on_prepare();
lockdep_hardirqs_on_prepare();
- rcu_idle_enter();
+ ct_idle_enter();
lockdep_hardirqs_on(_THIS_IP_);
arch_cpu_idle();
@@ -116,7 +116,7 @@ void __cpuidle default_idle_call(void)
*/
raw_local_irq_disable();
lockdep_hardirqs_off(_THIS_IP_);
- rcu_idle_exit();
+ ct_idle_exit();
lockdep_hardirqs_on(_THIS_IP_);
raw_local_irq_enable();
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index a337f3e35997..ecb4b4ff4ce0 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -190,12 +190,8 @@ static void group_init(struct psi_group *group)
/* Init trigger-related members */
mutex_init(&group->trigger_lock);
INIT_LIST_HEAD(&group->triggers);
- memset(group->nr_triggers, 0, sizeof(group->nr_triggers));
- group->poll_states = 0;
group->poll_min_period = U32_MAX;
- memset(group->polling_total, 0, sizeof(group->polling_total));
group->polling_next_update = ULLONG_MAX;
- group->polling_until = 0;
init_waitqueue_head(&group->poll_wait);
timer_setup(&group->poll_timer, poll_timer_fn, 0);
rcu_assign_pointer(group->poll_task, NULL);
@@ -957,10 +953,16 @@ int psi_cgroup_alloc(struct cgroup *cgroup)
if (static_branch_likely(&psi_disabled))
return 0;
- cgroup->psi.pcpu = alloc_percpu(struct psi_group_cpu);
- if (!cgroup->psi.pcpu)
+ cgroup->psi = kzalloc(sizeof(struct psi_group), GFP_KERNEL);
+ if (!cgroup->psi)
return -ENOMEM;
- group_init(&cgroup->psi);
+
+ cgroup->psi->pcpu = alloc_percpu(struct psi_group_cpu);
+ if (!cgroup->psi->pcpu) {
+ kfree(cgroup->psi);
+ return -ENOMEM;
+ }
+ group_init(cgroup->psi);
return 0;
}
@@ -969,10 +971,11 @@ void psi_cgroup_free(struct cgroup *cgroup)
if (static_branch_likely(&psi_disabled))
return;
- cancel_delayed_work_sync(&cgroup->psi.avgs_work);
- free_percpu(cgroup->psi.pcpu);
+ cancel_delayed_work_sync(&cgroup->psi->avgs_work);
+ free_percpu(cgroup->psi->pcpu);
/* All triggers must be removed by now */
- WARN_ONCE(cgroup->psi.poll_states, "psi: trigger leak\n");
+ WARN_ONCE(cgroup->psi->poll_states, "psi: trigger leak\n");
+ kfree(cgroup->psi);
}
/**
@@ -1084,7 +1087,7 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
}
struct psi_trigger *psi_trigger_create(struct psi_group *group,
- char *buf, size_t nbytes, enum psi_res res)
+ char *buf, enum psi_res res)
{
struct psi_trigger *t;
enum psi_states state;
@@ -1313,7 +1316,7 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
return -EBUSY;
}
- new = psi_trigger_create(&psi_system, buf, nbytes, res);
+ new = psi_trigger_create(&psi_system, buf, res);
if (IS_ERR(new)) {
mutex_unlock(&seq->lock);
return PTR_ERR(new);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index aad7f5ee9666..e26688d387ae 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -27,6 +27,7 @@
#include <linux/capability.h>
#include <linux/cgroup_api.h>
#include <linux/cgroup.h>
+#include <linux/context_tracking.h>
#include <linux/cpufreq.h>
#include <linux/cpumask_api.h>
#include <linux/ctype.h>
@@ -480,9 +481,6 @@ extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
-extern void unregister_rt_sched_group(struct task_group *tg);
-extern void free_rt_sched_group(struct task_group *tg);
-extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent);
extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
struct sched_rt_entity *rt_se, int cpu,
struct sched_rt_entity *parent);
@@ -520,6 +518,10 @@ struct cfs_bandwidth { };
#endif /* CONFIG_CGROUP_SCHED */
+extern void unregister_rt_sched_group(struct task_group *tg);
+extern void free_rt_sched_group(struct task_group *tg);
+extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent);
+
/*
* u64_u32_load/u64_u32_store
*
diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c
index d4788f810b55..0b1cd985dc27 100644
--- a/kernel/sched/wait_bit.c
+++ b/kernel/sched/wait_bit.c
@@ -47,7 +47,7 @@ __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_
prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode);
if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags))
ret = (*action)(&wbq_entry->key, mode);
- } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret);
+ } while (test_bit_acquire(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret);
finish_wait(wq_head, &wbq_entry->wq_entry);