Files
linux-stable-mirror/include/linux/coredump.h
T
Christian Brauner (Amutable) 6b1c66c9cc exec_state: relocate dumpable information
The dumpable flag captured at execve() is consulted by
__ptrace_may_access() and several /proc owner / visibility checks.
It lives on mm_struct today, which exit_mm() clears from the task
long before the task itself is reaped.

exec_state is anchored to the execve() that established the current
privilege domain.  CLONE_VM siblings refcount-share the parent's
exec_state via copy_exec_state(); non-CLONE_VM clones allocate a
fresh exec_state inheriting the parent's dumpable mode and user_ns
reference via task_exec_state_copy().  execve() allocates a fresh
instance (via alloc_task_exec_state() in begin_new_exec()) and
installs it under task_lock + exec_update_lock with
task_exec_state_replace().  init_task uses a static instance.

The dumpable mode now lives on task->exec_state->dumpable.
task->mm->flags no longer carries dumpability; MMF_DUMPABLE_MASK is
removed, but MMF_DUMPABLE_BITS is reserved so MMF_DUMP_FILTER_* bit
positions remain stable for the /proc/<pid>/coredump_filter ABI. The
task->user_dumpable cache bit and its assignment in exit_mm() are
removed; readers go through get_dumpable(task) directly.

coredump_params gains a snapshot field cprm.dumpable, populated from
get_dumpable(current) at vfs_coredump() entry, replacing the previous
__get_dumpable(cprm->mm_flags) consumers in fs/coredump.c and
fs/pidfs.c.

The user namespace recorded at execve() is consulted by
__ptrace_may_access() and by /proc/PID/* owner derivation. Move the
captured user_ns onto task_exec_state, which stays attached to the task
past exit_mm() and across exit_files().

bprm grows a user_ns field staged in bprm_mm_init() with the caller's
user_ns, narrowed by would_dump() to the closest privileged ancestor,
and consumed by exec_mmap() via alloc_task_exec_state(bprm->user_ns).
free_bprm() releases the staging reference.

mm_struct loses ->user_ns entirely.  Initializers in init-mm, efi_mm,
and the implicit one in mm_init()/dup_mm()/mm_alloc() are removed;
__mmdrop() drops the matching put_user_ns(). The kthread_use_mm()
WARN_ON_ONCE(!mm->user_ns) is no longer meaningful and goes too.

Reviewed-by: Jann Horn <jannh@google.com>
Link: https://patch.msgid.link/20260520-work-task_exec_state-v3-4-69f895bc1385@kernel.org
Signed-off-by: Christian Brauner (Amutable) <brauner@kernel.org>
2026-05-26 11:02:01 +02:00

84 lines
2.3 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_COREDUMP_H
#define _LINUX_COREDUMP_H
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/sched/coredump.h>
#include <asm/siginfo.h>
#ifdef CONFIG_COREDUMP
struct core_vma_metadata {
unsigned long start, end;
vm_flags_t flags;
unsigned long dump_size;
unsigned long pgoff;
struct file *file;
};
struct coredump_params {
const kernel_siginfo_t *siginfo;
struct file *file;
unsigned long limit;
/* MMF_DUMP_FILTER_* bits, snapshot of mm->flags at dump start. */
unsigned long mm_flags;
/* Snapshot of dumpable at dump start. */
enum task_dumpable dumpable;
int cpu;
loff_t written;
loff_t pos;
loff_t to_skip;
int vma_count;
size_t vma_data_size;
struct core_vma_metadata *vma_meta;
struct pid *pid;
};
extern unsigned int core_file_note_size_limit;
/*
* These are the only things you should do on a core-file: use only these
* functions to write out all the necessary info.
*/
extern void dump_skip_to(struct coredump_params *cprm, unsigned long to);
extern void dump_skip(struct coredump_params *cprm, size_t nr);
extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr);
extern int dump_align(struct coredump_params *cprm, int align);
int dump_user_range(struct coredump_params *cprm, unsigned long start,
unsigned long len);
extern void vfs_coredump(const kernel_siginfo_t *siginfo);
/*
* Logging for the coredump code, ratelimited.
* The TGID and comm fields are added to the message.
*/
#define __COREDUMP_PRINTK(Level, Format, ...) \
do { \
char comm[TASK_COMM_LEN]; \
/* This will always be NUL terminated. */ \
memcpy(comm, current->comm, sizeof(comm)); \
printk_ratelimited(Level "coredump: %d(%*pE): " Format "\n", \
task_tgid_vnr(current), (int)strlen(comm), comm, ##__VA_ARGS__); \
} while (0) \
#define coredump_report(fmt, ...) __COREDUMP_PRINTK(KERN_INFO, fmt, ##__VA_ARGS__)
#define coredump_report_failure(fmt, ...) __COREDUMP_PRINTK(KERN_WARNING, fmt, ##__VA_ARGS__)
#else
static inline void vfs_coredump(const kernel_siginfo_t *siginfo) {}
#define coredump_report(...)
#define coredump_report_failure(...)
#endif
#if defined(CONFIG_COREDUMP) && defined(CONFIG_SYSCTL)
extern void validate_coredump_safety(void);
#else
static inline void validate_coredump_safety(void) {}
#endif
#endif /* _LINUX_COREDUMP_H */