Files
linux-stable-mirror/kernel/sched/stop_task.c
T
John Stultz 7a3a6bfbd6 sched: Rework prev_balance() to avoid stale prev references
Historically, the prev value from __schedule() was the rq->curr.
This prev value is passed down through numerous functions, and
used in the class scheduler implementations. The fact that
prev was on_cpu until the end of __schedule(), meant it was
stable across the rq lock drops that the class->balance()
implementations often do.

However, with proxy-exec, the prev passed to functions called
by __schedule() is rq->donor, which may not be the same as
rq->curr and may not be on_cpu, this makes the prev value
potentially unstable across rq lock drops.

A recently found issue with proxy-exec, is when we begin doing
return migration from try_to_wake_up(), its possible we may be
waking up the rq->donor.  When we do this, we proxy_resched_idle()
to put_prev_set_next() setting the rq->donor to rq->idle, allowing
the rq->donor to be return migrated and allowed to run.

This however runs into trouble, as on another cpu we might be in
the middle of calling __schedule(). Conceptually the rq lock is
held for the majority of the time, but in calling prev_balance()
its possible the class->balance() handler call may briefly drop the rq lock.
This opens a window for try_to_wake_up() to wake and return migrate the
rq->donor before the class logic reacquires the rq lock.

Unfortunately prev_balance() pass in a prev argument, to which we pass
rq->donor. However this prev value can now become stale and incorrect across a
rq lock drop.

So, to correct this, rework the prev_balance() call so that it does not take a
"prev" argument.

Signed-off-by: John Stultz <jstultz@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260512025635.2840817-2-jstultz@google.com
2026-06-02 12:26:06 +02:00

120 lines
2.6 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* stop-task scheduling class.
*
* The stop task is the highest priority task in the system, it preempts
* everything and will be preempted by nothing.
*
* See kernel/stop_machine.c
*/
#include "sched.h"
static int
select_task_rq_stop(struct task_struct *p, int cpu, int flags)
{
return task_cpu(p); /* stop tasks as never migrate */
}
static int
balance_stop(struct rq *rq, struct rq_flags *rf)
{
return sched_stop_runnable(rq);
}
static void
wakeup_preempt_stop(struct rq *rq, struct task_struct *p, int flags)
{
/* we're never preempted */
}
static void set_next_task_stop(struct rq *rq, struct task_struct *stop, bool first)
{
stop->se.exec_start = rq_clock_task(rq);
}
static struct task_struct *pick_task_stop(struct rq *rq, struct rq_flags *rf)
{
if (!sched_stop_runnable(rq))
return NULL;
return rq->stop;
}
static void
enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags)
{
add_nr_running(rq, 1);
}
static bool
dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags)
{
sub_nr_running(rq, 1);
return true;
}
static void yield_task_stop(struct rq *rq)
{
BUG(); /* the stop task should never yield, its pointless. */
}
static void put_prev_task_stop(struct rq *rq, struct task_struct *prev, struct task_struct *next)
{
update_curr_common(rq);
}
/*
* scheduler tick hitting a task of our scheduling class.
*
* NOTE: This function can be called remotely by the tick offload that
* goes along full dynticks. Therefore no local assumption can be made
* and everything must be accessed through the @rq and @curr passed in
* parameters.
*/
static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued)
{
}
static void switching_to_stop(struct rq *rq, struct task_struct *p)
{
BUG(); /* its impossible to change to this class */
}
static void
prio_changed_stop(struct rq *rq, struct task_struct *p, u64 oldprio)
{
if (p->prio == oldprio)
return;
BUG(); /* how!?, what priority? */
}
static void update_curr_stop(struct rq *rq)
{
}
/*
* Simple, special scheduling class for the per-CPU stop tasks:
*/
DEFINE_SCHED_CLASS(stop) = {
.enqueue_task = enqueue_task_stop,
.dequeue_task = dequeue_task_stop,
.yield_task = yield_task_stop,
.wakeup_preempt = wakeup_preempt_stop,
.pick_task = pick_task_stop,
.put_prev_task = put_prev_task_stop,
.set_next_task = set_next_task_stop,
.balance = balance_stop,
.select_task_rq = select_task_rq_stop,
.set_cpus_allowed = set_cpus_allowed_common,
.task_tick = task_tick_stop,
.prio_changed = prio_changed_stop,
.switching_to = switching_to_stop,
.update_curr = update_curr_stop,
};