mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2026-03-03 18:28:01 +01:00
sched/core: Rework sched_class::wakeup_preempt() and rq_modified_*()
Change sched_class::wakeup_preempt() to also get called for cross-class wakeups, specifically those where the woken task is of a higher class than the previous highest class. In order to do this, track the current highest class of the runqueue in rq::next_class and have wakeup_preempt() track this upwards for each new wakeup. Additionally have schedule() re-set the value on pick. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Ingo Molnar <mingo@kernel.org> Link: https://patch.msgid.link/20251127154725.901391274@infradead.org
This commit is contained in:
@@ -2090,7 +2090,6 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
|
||||
*/
|
||||
uclamp_rq_inc(rq, p, flags);
|
||||
|
||||
rq->queue_mask |= p->sched_class->queue_mask;
|
||||
p->sched_class->enqueue_task(rq, p, flags);
|
||||
|
||||
psi_enqueue(p, flags);
|
||||
@@ -2123,7 +2122,6 @@ inline bool dequeue_task(struct rq *rq, struct task_struct *p, int flags)
|
||||
* and mark the task ->sched_delayed.
|
||||
*/
|
||||
uclamp_rq_dec(rq, p);
|
||||
rq->queue_mask |= p->sched_class->queue_mask;
|
||||
return p->sched_class->dequeue_task(rq, p, flags);
|
||||
}
|
||||
|
||||
@@ -2174,10 +2172,14 @@ void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
struct task_struct *donor = rq->donor;
|
||||
|
||||
if (p->sched_class == donor->sched_class)
|
||||
donor->sched_class->wakeup_preempt(rq, p, flags);
|
||||
else if (sched_class_above(p->sched_class, donor->sched_class))
|
||||
if (p->sched_class == rq->next_class) {
|
||||
rq->next_class->wakeup_preempt(rq, p, flags);
|
||||
|
||||
} else if (sched_class_above(p->sched_class, rq->next_class)) {
|
||||
rq->next_class->wakeup_preempt(rq, p, flags);
|
||||
resched_curr(rq);
|
||||
rq->next_class = p->sched_class;
|
||||
}
|
||||
|
||||
/*
|
||||
* A queue event has occurred, and we're going to schedule. In
|
||||
@@ -6804,6 +6806,7 @@ static void __sched notrace __schedule(int sched_mode)
|
||||
pick_again:
|
||||
next = pick_next_task(rq, rq->donor, &rf);
|
||||
rq_set_donor(rq, next);
|
||||
rq->next_class = next->sched_class;
|
||||
if (unlikely(task_is_blocked(next))) {
|
||||
next = find_proxy_task(rq, next, &rf);
|
||||
if (!next)
|
||||
@@ -8650,6 +8653,8 @@ void __init sched_init(void)
|
||||
rq->rt.rt_runtime = global_rt_runtime();
|
||||
init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL);
|
||||
#endif
|
||||
rq->next_class = &idle_sched_class;
|
||||
|
||||
rq->sd = NULL;
|
||||
rq->rd = NULL;
|
||||
rq->cpu_capacity = SCHED_CAPACITY_SCALE;
|
||||
@@ -10775,10 +10780,8 @@ struct sched_change_ctx *sched_change_begin(struct task_struct *p, unsigned int
|
||||
flags |= DEQUEUE_NOCLOCK;
|
||||
}
|
||||
|
||||
if (flags & DEQUEUE_CLASS) {
|
||||
if (p->sched_class->switching_from)
|
||||
p->sched_class->switching_from(rq, p);
|
||||
}
|
||||
if ((flags & DEQUEUE_CLASS) && p->sched_class->switching_from)
|
||||
p->sched_class->switching_from(rq, p);
|
||||
|
||||
*ctx = (struct sched_change_ctx){
|
||||
.p = p,
|
||||
@@ -10830,6 +10833,17 @@ void sched_change_end(struct sched_change_ctx *ctx)
|
||||
if (p->sched_class->switched_to)
|
||||
p->sched_class->switched_to(rq, p);
|
||||
|
||||
/*
|
||||
* If this was a class promotion; let the old class know it
|
||||
* got preempted. Note that none of the switch*_from() methods
|
||||
* know the new class and none of the switch*_to() methods
|
||||
* know the old class.
|
||||
*/
|
||||
if (ctx->running && sched_class_above(p->sched_class, ctx->class)) {
|
||||
rq->next_class->wakeup_preempt(rq, p, 0);
|
||||
rq->next_class = p->sched_class;
|
||||
}
|
||||
|
||||
/*
|
||||
* If this was a degradation in class someone should have set
|
||||
* need_resched by now.
|
||||
|
||||
@@ -2499,9 +2499,16 @@ static int balance_dl(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
|
||||
* Only called when both the current and waking task are -deadline
|
||||
* tasks.
|
||||
*/
|
||||
static void wakeup_preempt_dl(struct rq *rq, struct task_struct *p,
|
||||
int flags)
|
||||
static void wakeup_preempt_dl(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
/*
|
||||
* Can only get preempted by stop-class, and those should be
|
||||
* few and short lived, doesn't really make sense to push
|
||||
* anything away for that.
|
||||
*/
|
||||
if (p->sched_class != &dl_sched_class)
|
||||
return;
|
||||
|
||||
if (dl_entity_preempt(&p->dl, &rq->donor->dl)) {
|
||||
resched_curr(rq);
|
||||
return;
|
||||
@@ -3346,9 +3353,6 @@ static int task_is_throttled_dl(struct task_struct *p, int cpu)
|
||||
#endif
|
||||
|
||||
DEFINE_SCHED_CLASS(dl) = {
|
||||
|
||||
.queue_mask = 8,
|
||||
|
||||
.enqueue_task = enqueue_task_dl,
|
||||
.dequeue_task = dequeue_task_dl,
|
||||
.yield_task = yield_task_dl,
|
||||
|
||||
@@ -2431,7 +2431,7 @@ do_pick_task_scx(struct rq *rq, struct rq_flags *rf, bool force_scx)
|
||||
/* see kick_cpus_irq_workfn() */
|
||||
smp_store_release(&rq->scx.kick_sync, rq->scx.kick_sync + 1);
|
||||
|
||||
rq_modified_clear(rq);
|
||||
rq->next_class = &ext_sched_class;
|
||||
|
||||
rq_unpin_lock(rq, rf);
|
||||
balance_one(rq, prev);
|
||||
@@ -2446,7 +2446,7 @@ do_pick_task_scx(struct rq *rq, struct rq_flags *rf, bool force_scx)
|
||||
* If @force_scx is true, always try to pick a SCHED_EXT task,
|
||||
* regardless of any higher-priority sched classes activity.
|
||||
*/
|
||||
if (!force_scx && rq_modified_above(rq, &ext_sched_class))
|
||||
if (!force_scx && sched_class_above(rq->next_class, &ext_sched_class))
|
||||
return RETRY_TASK;
|
||||
|
||||
keep_prev = rq->scx.flags & SCX_RQ_BAL_KEEP;
|
||||
@@ -3075,7 +3075,8 @@ static void switched_from_scx(struct rq *rq, struct task_struct *p)
|
||||
scx_disable_task(p);
|
||||
}
|
||||
|
||||
static void wakeup_preempt_scx(struct rq *rq, struct task_struct *p,int wake_flags) {}
|
||||
static void wakeup_preempt_scx(struct rq *rq, struct task_struct *p, int wake_flags) {}
|
||||
|
||||
static void switched_to_scx(struct rq *rq, struct task_struct *p) {}
|
||||
|
||||
int scx_check_setscheduler(struct task_struct *p, int policy)
|
||||
@@ -3336,8 +3337,6 @@ static void scx_cgroup_unlock(void) {}
|
||||
* their current sched_class. Call them directly from sched core instead.
|
||||
*/
|
||||
DEFINE_SCHED_CLASS(ext) = {
|
||||
.queue_mask = 1,
|
||||
|
||||
.enqueue_task = enqueue_task_scx,
|
||||
.dequeue_task = dequeue_task_scx,
|
||||
.yield_task = yield_task_scx,
|
||||
|
||||
@@ -8736,7 +8736,7 @@ preempt_sync(struct rq *rq, int wake_flags,
|
||||
/*
|
||||
* Preempt the current task with a newly woken task if needed:
|
||||
*/
|
||||
static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int wake_flags)
|
||||
static void wakeup_preempt_fair(struct rq *rq, struct task_struct *p, int wake_flags)
|
||||
{
|
||||
enum preempt_wakeup_action preempt_action = PREEMPT_WAKEUP_PICK;
|
||||
struct task_struct *donor = rq->donor;
|
||||
@@ -8744,6 +8744,12 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
|
||||
struct cfs_rq *cfs_rq = task_cfs_rq(donor);
|
||||
int cse_is_idle, pse_is_idle;
|
||||
|
||||
/*
|
||||
* XXX Getting preempted by higher class, try and find idle CPU?
|
||||
*/
|
||||
if (p->sched_class != &fair_sched_class)
|
||||
return;
|
||||
|
||||
if (unlikely(se == pse))
|
||||
return;
|
||||
|
||||
@@ -12911,7 +12917,7 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf)
|
||||
t0 = sched_clock_cpu(this_cpu);
|
||||
__sched_balance_update_blocked_averages(this_rq);
|
||||
|
||||
rq_modified_clear(this_rq);
|
||||
this_rq->next_class = &fair_sched_class;
|
||||
raw_spin_rq_unlock(this_rq);
|
||||
|
||||
for_each_domain(this_cpu, sd) {
|
||||
@@ -12978,7 +12984,7 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf)
|
||||
pulled_task = 1;
|
||||
|
||||
/* If a higher prio class was modified, restart the pick */
|
||||
if (rq_modified_above(this_rq, &fair_sched_class))
|
||||
if (sched_class_above(this_rq->next_class, &fair_sched_class))
|
||||
pulled_task = -1;
|
||||
|
||||
out:
|
||||
@@ -13882,15 +13888,12 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task
|
||||
* All the scheduling class methods:
|
||||
*/
|
||||
DEFINE_SCHED_CLASS(fair) = {
|
||||
|
||||
.queue_mask = 2,
|
||||
|
||||
.enqueue_task = enqueue_task_fair,
|
||||
.dequeue_task = dequeue_task_fair,
|
||||
.yield_task = yield_task_fair,
|
||||
.yield_to_task = yield_to_task_fair,
|
||||
|
||||
.wakeup_preempt = check_preempt_wakeup_fair,
|
||||
.wakeup_preempt = wakeup_preempt_fair,
|
||||
|
||||
.pick_task = pick_task_fair,
|
||||
.pick_next_task = pick_next_task_fair,
|
||||
|
||||
@@ -536,9 +536,6 @@ static void update_curr_idle(struct rq *rq)
|
||||
* Simple, special scheduling class for the per-CPU idle tasks:
|
||||
*/
|
||||
DEFINE_SCHED_CLASS(idle) = {
|
||||
|
||||
.queue_mask = 0,
|
||||
|
||||
/* no enqueue/yield_task for idle tasks */
|
||||
|
||||
/* dequeue is not valid, we print a debug message there: */
|
||||
|
||||
@@ -1615,6 +1615,12 @@ static void wakeup_preempt_rt(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
struct task_struct *donor = rq->donor;
|
||||
|
||||
/*
|
||||
* XXX If we're preempted by DL, queue a push?
|
||||
*/
|
||||
if (p->sched_class != &rt_sched_class)
|
||||
return;
|
||||
|
||||
if (p->prio < donor->prio) {
|
||||
resched_curr(rq);
|
||||
return;
|
||||
@@ -2568,9 +2574,6 @@ static int task_is_throttled_rt(struct task_struct *p, int cpu)
|
||||
#endif /* CONFIG_SCHED_CORE */
|
||||
|
||||
DEFINE_SCHED_CLASS(rt) = {
|
||||
|
||||
.queue_mask = 4,
|
||||
|
||||
.enqueue_task = enqueue_task_rt,
|
||||
.dequeue_task = dequeue_task_rt,
|
||||
.yield_task = yield_task_rt,
|
||||
|
||||
@@ -1118,8 +1118,6 @@ struct rq {
|
||||
/* runqueue lock: */
|
||||
raw_spinlock_t __lock;
|
||||
|
||||
/* Per class runqueue modification mask; bits in class order. */
|
||||
unsigned int queue_mask;
|
||||
unsigned int nr_running;
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
unsigned int nr_numa_running;
|
||||
@@ -1179,6 +1177,7 @@ struct rq {
|
||||
struct sched_dl_entity *dl_server;
|
||||
struct task_struct *idle;
|
||||
struct task_struct *stop;
|
||||
const struct sched_class *next_class;
|
||||
unsigned long next_balance;
|
||||
struct mm_struct *prev_mm;
|
||||
|
||||
@@ -2426,15 +2425,6 @@ struct sched_class {
|
||||
#ifdef CONFIG_UCLAMP_TASK
|
||||
int uclamp_enabled;
|
||||
#endif
|
||||
/*
|
||||
* idle: 0
|
||||
* ext: 1
|
||||
* fair: 2
|
||||
* rt: 4
|
||||
* dl: 8
|
||||
* stop: 16
|
||||
*/
|
||||
unsigned int queue_mask;
|
||||
|
||||
/*
|
||||
* move_queued_task/activate_task/enqueue_task: rq->lock
|
||||
@@ -2593,20 +2583,6 @@ struct sched_class {
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
* Does not nest; only used around sched_class::pick_task() rq-lock-breaks.
|
||||
*/
|
||||
static inline void rq_modified_clear(struct rq *rq)
|
||||
{
|
||||
rq->queue_mask = 0;
|
||||
}
|
||||
|
||||
static inline bool rq_modified_above(struct rq *rq, const struct sched_class * class)
|
||||
{
|
||||
unsigned int mask = class->queue_mask;
|
||||
return rq->queue_mask & ~((mask << 1) - 1);
|
||||
}
|
||||
|
||||
static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
|
||||
{
|
||||
WARN_ON_ONCE(rq->donor != prev);
|
||||
@@ -3899,6 +3875,7 @@ void move_queued_task_locked(struct rq *src_rq, struct rq *dst_rq, struct task_s
|
||||
deactivate_task(src_rq, task, 0);
|
||||
set_task_cpu(task, dst_rq->cpu);
|
||||
activate_task(dst_rq, task, 0);
|
||||
wakeup_preempt(dst_rq, task, 0);
|
||||
}
|
||||
|
||||
static inline
|
||||
|
||||
@@ -97,9 +97,6 @@ static void update_curr_stop(struct rq *rq)
|
||||
* Simple, special scheduling class for the per-CPU stop tasks:
|
||||
*/
|
||||
DEFINE_SCHED_CLASS(stop) = {
|
||||
|
||||
.queue_mask = 16,
|
||||
|
||||
.enqueue_task = enqueue_task_stop,
|
||||
.dequeue_task = dequeue_task_stop,
|
||||
.yield_task = yield_task_stop,
|
||||
|
||||
Reference in New Issue
Block a user