diff options
| author | Tejun Heo <tj@kernel.org> | 2025-06-13 15:34:22 -1000 |
|---|---|---|
| committer | Tejun Heo <tj@kernel.org> | 2025-06-20 17:03:51 -1000 |
| commit | ddceadce63d9cb752c2472e220ded05cabaf7971 (patch) | |
| tree | a011e5b43cd6a369b2442009851a97dcc5e47e92 /kernel/sched/ext.c | |
| parent | sched_ext, sched/core: Factor out struct scx_task_group (diff) | |
| download | linux-ddceadce63d9cb752c2472e220ded05cabaf7971.tar.gz linux-ddceadce63d9cb752c2472e220ded05cabaf7971.zip | |
sched_ext: Add support for cgroup bandwidth control interface
From 077814f57f8acce13f91dc34bbd2b7e4911fbf25 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 13 Jun 2025 15:06:47 -1000
- Add CONFIG_GROUP_SCHED_BANDWIDTH which is selected by both
CONFIG_CFS_BANDWIDTH and EXT_GROUP_SCHED.
- Put bandwidth control interface files for both cgroup v1 and v2 under
CONFIG_GROUP_SCHED_BANDWIDTH.
- Update tg_bandwidth() to fetch configuration parameters from fair if
CONFIG_CFS_BANDWIDTH, SCX otherwise.
- Update tg_set_bandwidth() to update the parameters for both fair and SCX.
- Add bandwidth control parameters to struct scx_cgroup_init_args.
- Add sched_ext_ops.cgroup_set_bandwidth() which is invoked on bandwidth
control parameter updates.
- Update scx_qmap and maximal selftest to test the new feature.
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel/sched/ext.c')
| -rw-r--r-- | kernel/sched/ext.c | 66 |
1 files changed, 63 insertions, 3 deletions
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 6732e50e0679..39cba11688a9 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -203,6 +203,11 @@ struct scx_exit_task_args { struct scx_cgroup_init_args { /* the weight of the cgroup [1..10000] */ u32 weight; + + /* bandwidth control parameters from cpu.max and cpu.max.burst */ + u64 bw_period_us; + u64 bw_quota_us; + u64 bw_burst_us; }; enum scx_cpu_preempt_reason { @@ -664,9 +669,31 @@ struct sched_ext_ops { * @cgrp: cgroup whose weight is being updated * @weight: new weight [1..10000] * - * Update @tg's weight to @weight. + * Update @cgrp's weight to @weight. */ void (*cgroup_set_weight)(struct cgroup *cgrp, u32 weight); + + /** + * @cgroup_set_bandwidth: A cgroup's bandwidth is being changed + * @cgrp: cgroup whose bandwidth is being updated + * @period_us: bandwidth control period + * @quota_us: bandwidth control quota + * @burst_us: bandwidth control burst + * + * Update @cgrp's bandwidth control parameters. This is from the cpu.max + * cgroup interface. + * + * @quota_us / @period_us determines the CPU bandwidth @cgrp is entitled + * to. For example, if @period_us is 1_000_000 and @quota_us is + * 2_500_000. @cgrp is entitled to 2.5 CPUs. @burst_us can be + * interpreted in the same fashion and specifies how much @cgrp can + * burst temporarily. The specific control mechanism and thus the + * interpretation of @period_us and burstiness is upto to the BPF + * scheduler. + */ + void (*cgroup_set_bandwidth)(struct cgroup *cgrp, + u64 period_us, u64 quota_us, u64 burst_us); + #endif /* CONFIG_EXT_GROUP_SCHED */ /* @@ -4059,6 +4086,8 @@ static bool scx_cgroup_enabled; void scx_tg_init(struct task_group *tg) { tg->scx.weight = CGROUP_WEIGHT_DFL; + tg->scx.bw_period_us = default_bw_period_us(); + tg->scx.bw_quota_us = RUNTIME_INF; } int scx_tg_online(struct task_group *tg) @@ -4073,7 +4102,10 @@ int scx_tg_online(struct task_group *tg) if (scx_cgroup_enabled) { if (SCX_HAS_OP(sch, cgroup_init)) { struct scx_cgroup_init_args args = - { .weight = tg->scx.weight }; + { .weight = tg->scx.weight, + .bw_period_us = tg->scx.bw_period_us, + .bw_quota_us = tg->scx.bw_quota_us, + .bw_burst_us = tg->scx.bw_burst_us }; ret = SCX_CALL_OP_RET(sch, SCX_KF_UNLOCKED, cgroup_init, NULL, tg->css.cgroup, &args); @@ -4225,6 +4257,27 @@ void scx_group_set_idle(struct task_group *tg, bool idle) /* TODO: Implement ops->cgroup_set_idle() */ } +void scx_group_set_bandwidth(struct task_group *tg, + u64 period_us, u64 quota_us, u64 burst_us) +{ + struct scx_sched *sch = scx_root; + + percpu_down_read(&scx_cgroup_rwsem); + + if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_set_bandwidth) && + (tg->scx.bw_period_us != period_us || + tg->scx.bw_quota_us != quota_us || + tg->scx.bw_burst_us != burst_us)) + SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_set_bandwidth, NULL, + tg_cgrp(tg), period_us, quota_us, burst_us); + + tg->scx.bw_period_us = period_us; + tg->scx.bw_quota_us = quota_us; + tg->scx.bw_burst_us = burst_us; + + percpu_up_read(&scx_cgroup_rwsem); +} + static void scx_cgroup_lock(void) { percpu_down_write(&scx_cgroup_rwsem); @@ -4400,7 +4453,12 @@ static int scx_cgroup_init(struct scx_sched *sch) rcu_read_lock(); css_for_each_descendant_pre(css, &root_task_group.css) { struct task_group *tg = css_tg(css); - struct scx_cgroup_init_args args = { .weight = tg->scx.weight }; + struct scx_cgroup_init_args args = { + .weight = tg->scx.weight, + .bw_period_us = tg->scx.bw_period_us, + .bw_quota_us = tg->scx.bw_quota_us, + .bw_burst_us = tg->scx.bw_burst_us, + }; if ((tg->scx.flags & (SCX_TG_ONLINE | SCX_TG_INITED)) != SCX_TG_ONLINE) @@ -5902,6 +5960,7 @@ static s32 sched_ext_ops__cgroup_prep_move(struct task_struct *p, struct cgroup static void sched_ext_ops__cgroup_move(struct task_struct *p, struct cgroup *from, struct cgroup *to) {} static void sched_ext_ops__cgroup_cancel_move(struct task_struct *p, struct cgroup *from, struct cgroup *to) {} static void sched_ext_ops__cgroup_set_weight(struct cgroup *cgrp, u32 weight) {} +static void sched_ext_ops__cgroup_set_bandwidth(struct cgroup *cgrp, u64 period_us, u64 quota_us, u64 burst_us) {} #endif static void sched_ext_ops__cpu_online(s32 cpu) {} static void sched_ext_ops__cpu_offline(s32 cpu) {} @@ -5939,6 +5998,7 @@ static struct sched_ext_ops __bpf_ops_sched_ext_ops = { .cgroup_move = sched_ext_ops__cgroup_move, .cgroup_cancel_move = sched_ext_ops__cgroup_cancel_move, .cgroup_set_weight = sched_ext_ops__cgroup_set_weight, + .cgroup_set_bandwidth = sched_ext_ops__cgroup_set_bandwidth, #endif .cpu_online = sched_ext_ops__cpu_online, .cpu_offline = sched_ext_ops__cpu_offline, |
