mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2026-05-26 11:40:24 +02:00
sched_ext: idle: Accept an arbitrary cpumask in scx_select_cpu_dfl()
Many scx schedulers implement their own hard or soft-affinity rules to support topology characteristics, such as heterogeneous architectures (e.g., big.LITTLE, P-cores/E-cores), or to categorize tasks based on specific properties (e.g., running certain tasks only in a subset of CPUs). Currently, there is no mechanism that allows to use the built-in idle CPU selection policy to an arbitrary subset of CPUs. As a result, schedulers often implement their own idle CPU selection policies, which are typically similar to one another, leading to a lot of code duplication. To address this, modify scx_select_cpu_dfl() to accept an arbitrary cpumask, that can be used by the BPF schedulers to apply the existent built-in idle CPU selection policy to a subset of allowed CPUs. With this concept the idle CPU selection policy becomes the following: - always prioritize CPUs from fully idle SMT cores (if SMT is enabled), - select the same CPU if it's idle and in the allowed CPUs, - select an idle CPU within the same LLC, if the LLC cpumask is a subset of the allowed CPUs, - select an idle CPU within the same node, if the node cpumask is a subset of the allowed CPUs, - select an idle CPU within the allowed CPUs. This functionality will be exposed through a dedicated kfunc in a separate patch. Signed-off-by: Andrea Righi <arighi@nvidia.com> Signed-off-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
+44
-4
@@ -49,6 +49,7 @@ static struct scx_idle_cpus **scx_idle_node_masks;
|
||||
/*
|
||||
* Local per-CPU cpumasks (used to generate temporary idle cpumasks).
|
||||
*/
|
||||
static DEFINE_PER_CPU(cpumask_var_t, local_idle_cpumask);
|
||||
static DEFINE_PER_CPU(cpumask_var_t, local_llc_idle_cpumask);
|
||||
static DEFINE_PER_CPU(cpumask_var_t, local_numa_idle_cpumask);
|
||||
|
||||
@@ -417,13 +418,15 @@ static inline bool task_affinity_all(const struct task_struct *p)
|
||||
* branch prediction optimizations.
|
||||
*
|
||||
* 3. Pick a CPU within the same LLC (Last-Level Cache):
|
||||
* - if the above conditions aren't met, pick a CPU that shares the same LLC
|
||||
* to maintain cache locality.
|
||||
* - if the above conditions aren't met, pick a CPU that shares the same
|
||||
* LLC, if the LLC domain is a subset of @cpus_allowed, to maintain
|
||||
* cache locality.
|
||||
*
|
||||
* 4. Pick a CPU within the same NUMA node, if enabled:
|
||||
* - choose a CPU from the same NUMA node to reduce memory access latency.
|
||||
* - choose a CPU from the same NUMA node, if the node cpumask is a
|
||||
* subset of @cpus_allowed, to reduce memory access latency.
|
||||
*
|
||||
* 5. Pick any idle CPU usable by the task.
|
||||
* 5. Pick any idle CPU within the @cpus_allowed domain.
|
||||
*
|
||||
* Step 3 and 4 are performed only if the system has, respectively,
|
||||
* multiple LLCs / multiple NUMA nodes (see scx_selcpu_topo_llc and
|
||||
@@ -446,6 +449,39 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
|
||||
int node = scx_cpu_node_if_enabled(prev_cpu);
|
||||
s32 cpu;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
/*
|
||||
* Determine the subset of CPUs usable by @p within @cpus_allowed.
|
||||
*/
|
||||
if (allowed != p->cpus_ptr) {
|
||||
struct cpumask *local_cpus = this_cpu_cpumask_var_ptr(local_idle_cpumask);
|
||||
|
||||
if (task_affinity_all(p)) {
|
||||
allowed = cpus_allowed;
|
||||
} else if (cpumask_and(local_cpus, cpus_allowed, p->cpus_ptr)) {
|
||||
allowed = local_cpus;
|
||||
} else {
|
||||
cpu = -EBUSY;
|
||||
goto out_enable;
|
||||
}
|
||||
|
||||
/*
|
||||
* If @prev_cpu is not in the allowed CPUs, skip topology
|
||||
* optimizations and try to pick any idle CPU usable by the
|
||||
* task.
|
||||
*
|
||||
* If %SCX_OPS_BUILTIN_IDLE_PER_NODE is enabled, prioritize
|
||||
* the current node, as it may optimize some waker->wakee
|
||||
* workloads.
|
||||
*/
|
||||
if (!cpumask_test_cpu(prev_cpu, allowed)) {
|
||||
node = scx_cpu_node_if_enabled(smp_processor_id());
|
||||
cpu = scx_pick_idle_cpu(allowed, node, flags);
|
||||
goto out_enable;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This is necessary to protect llc_cpus.
|
||||
*/
|
||||
@@ -610,6 +646,8 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
|
||||
|
||||
out_unlock:
|
||||
rcu_read_unlock();
|
||||
out_enable:
|
||||
preempt_enable();
|
||||
|
||||
return cpu;
|
||||
}
|
||||
@@ -641,6 +679,8 @@ void scx_idle_init_masks(void)
|
||||
|
||||
/* Allocate local per-cpu idle cpumasks */
|
||||
for_each_possible_cpu(i) {
|
||||
BUG_ON(!alloc_cpumask_var_node(&per_cpu(local_idle_cpumask, i),
|
||||
GFP_KERNEL, cpu_to_node(i)));
|
||||
BUG_ON(!alloc_cpumask_var_node(&per_cpu(local_llc_idle_cpumask, i),
|
||||
GFP_KERNEL, cpu_to_node(i)));
|
||||
BUG_ON(!alloc_cpumask_var_node(&per_cpu(local_numa_idle_cpumask, i),
|
||||
|
||||
Reference in New Issue
Block a user