mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2026-06-21 15:43:21 +02:00
6b1c66c9cc
The dumpable flag captured at execve() is consulted by __ptrace_may_access() and several /proc owner / visibility checks. It lives on mm_struct today, which exit_mm() clears from the task long before the task itself is reaped. exec_state is anchored to the execve() that established the current privilege domain. CLONE_VM siblings refcount-share the parent's exec_state via copy_exec_state(); non-CLONE_VM clones allocate a fresh exec_state inheriting the parent's dumpable mode and user_ns reference via task_exec_state_copy(). execve() allocates a fresh instance (via alloc_task_exec_state() in begin_new_exec()) and installs it under task_lock + exec_update_lock with task_exec_state_replace(). init_task uses a static instance. The dumpable mode now lives on task->exec_state->dumpable. task->mm->flags no longer carries dumpability; MMF_DUMPABLE_MASK is removed, but MMF_DUMPABLE_BITS is reserved so MMF_DUMP_FILTER_* bit positions remain stable for the /proc/<pid>/coredump_filter ABI. The task->user_dumpable cache bit and its assignment in exit_mm() are removed; readers go through get_dumpable(task) directly. coredump_params gains a snapshot field cprm.dumpable, populated from get_dumpable(current) at vfs_coredump() entry, replacing the previous __get_dumpable(cprm->mm_flags) consumers in fs/coredump.c and fs/pidfs.c. The user namespace recorded at execve() is consulted by __ptrace_may_access() and by /proc/PID/* owner derivation. Move the captured user_ns onto task_exec_state, which stays attached to the task past exit_mm() and across exit_files(). bprm grows a user_ns field staged in bprm_mm_init() with the caller's user_ns, narrowed by would_dump() to the closest privileged ancestor, and consumed by exec_mmap() via alloc_task_exec_state(bprm->user_ns). free_bprm() releases the staging reference. mm_struct loses ->user_ns entirely. Initializers in init-mm, efi_mm, and the implicit one in mm_init()/dup_mm()/mm_alloc() are removed; __mmdrop() drops the matching put_user_ns(). The kthread_use_mm() WARN_ON_ONCE(!mm->user_ns) is no longer meaningful and goes too. Reviewed-by: Jann Horn <jannh@google.com> Link: https://patch.msgid.link/20260520-work-task_exec_state-v3-4-69f895bc1385@kernel.org Signed-off-by: Christian Brauner (Amutable) <brauner@kernel.org>
120 lines
3.4 KiB
C
120 lines
3.4 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/* Copyright (c) 2026 Christian Brauner <brauner@kernel.org> */
|
|
#include <linux/init.h>
|
|
#include <linux/rcupdate.h>
|
|
#include <linux/refcount.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/sched/coredump.h>
|
|
#include <linux/sched/exec_state.h>
|
|
#include <linux/sched/signal.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/user_namespace.h>
|
|
|
|
static struct kmem_cache *task_exec_state_cachep;
|
|
|
|
static void __free_task_exec_state(struct rcu_head *rcu)
|
|
{
|
|
struct task_exec_state *exec_state = container_of(rcu, struct task_exec_state, rcu);
|
|
|
|
put_user_ns(exec_state->user_ns);
|
|
kmem_cache_free(task_exec_state_cachep, exec_state);
|
|
}
|
|
|
|
void put_task_exec_state(struct task_exec_state *exec_state)
|
|
{
|
|
if (exec_state && refcount_dec_and_test(&exec_state->count))
|
|
call_rcu(&exec_state->rcu, __free_task_exec_state);
|
|
}
|
|
|
|
struct task_exec_state *alloc_task_exec_state(struct user_namespace *user_ns)
|
|
{
|
|
struct task_exec_state *exec_state;
|
|
|
|
exec_state = kmem_cache_alloc(task_exec_state_cachep, GFP_KERNEL);
|
|
if (!exec_state)
|
|
return NULL;
|
|
refcount_set(&exec_state->count, 1);
|
|
exec_state->dumpable = TASK_DUMPABLE_OFF;
|
|
exec_state->user_ns = get_user_ns(user_ns);
|
|
return exec_state;
|
|
}
|
|
|
|
struct task_exec_state *task_exec_state_rcu(const struct task_struct *tsk)
|
|
{
|
|
struct task_exec_state *exec_state;
|
|
|
|
exec_state = rcu_dereference_check(tsk->exec_state,
|
|
lockdep_is_held(&tsk->alloc_lock));
|
|
WARN_ON_ONCE(!exec_state);
|
|
return exec_state;
|
|
}
|
|
|
|
struct task_exec_state *task_exec_state_replace(struct task_struct *tsk,
|
|
struct task_exec_state *exec_state)
|
|
{
|
|
/*
|
|
* Updates must hold both locks so callers needing a consistent
|
|
* snapshot of mm + dumpability are covered.
|
|
*/
|
|
lockdep_assert_held(&tsk->alloc_lock);
|
|
lockdep_assert_held_write(&tsk->signal->exec_update_lock);
|
|
|
|
return rcu_replace_pointer(tsk->exec_state, exec_state, true);
|
|
}
|
|
|
|
/*
|
|
* The non-CLONE_VM clone path: allocate a fresh exec_state and
|
|
* inherit the parent's dumpable mode and user_ns reference. CLONE_VM
|
|
* siblings refcount-share via copy_exec_state() in fork.c; only this
|
|
* path and execve() ever allocate.
|
|
*/
|
|
int task_exec_state_copy(struct task_struct *tsk)
|
|
{
|
|
struct task_exec_state *src, *dst;
|
|
|
|
src = rcu_dereference_protected(current->exec_state, true);
|
|
dst = alloc_task_exec_state(src->user_ns);
|
|
if (!dst)
|
|
return -ENOMEM;
|
|
dst->dumpable = READ_ONCE(src->dumpable);
|
|
rcu_assign_pointer(tsk->exec_state, dst);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Store TASK_DUMPABLE_* on current->exec_state. All callers
|
|
* (commit_creds, begin_new_exec, prctl(PR_SET_DUMPABLE)) act on the
|
|
* running task, which guarantees ->exec_state is allocated and cannot
|
|
* be replaced under us.
|
|
*/
|
|
void task_exec_state_set_dumpable(enum task_dumpable value)
|
|
{
|
|
struct task_exec_state *exec_state;
|
|
|
|
if (WARN_ON_ONCE(value > TASK_DUMPABLE_ROOT))
|
|
value = TASK_DUMPABLE_OFF;
|
|
|
|
exec_state = rcu_dereference_protected(current->exec_state, true);
|
|
/* mm-less tasks share init_task's exec_state; never mutate it */
|
|
if (WARN_ON_ONCE(exec_state == &init_task_exec_state))
|
|
return;
|
|
WRITE_ONCE(exec_state->dumpable, value);
|
|
}
|
|
|
|
enum task_dumpable task_exec_state_get_dumpable(struct task_struct *task)
|
|
{
|
|
struct task_exec_state *exec_state;
|
|
|
|
guard(rcu)();
|
|
exec_state = rcu_dereference(task->exec_state);
|
|
return READ_ONCE(exec_state->dumpable);
|
|
}
|
|
|
|
void __init exec_state_init(void)
|
|
{
|
|
task_exec_state_cachep = kmem_cache_create("task_exec_state",
|
|
sizeof(struct task_exec_state), 0,
|
|
SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT,
|
|
NULL);
|
|
}
|