mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2026-03-03 18:28:01 +01:00
Fix PROCMAP_QUERY to fetch optional build ID only after dropping mmap_lock or per-VMA lock, whichever was used to lock VMA under question, to avoid deadlock reported by syzbot: -> #1 (&mm->mmap_lock){++++}-{4:4}: __might_fault+0xed/0x170 _copy_to_iter+0x118/0x1720 copy_page_to_iter+0x12d/0x1e0 filemap_read+0x720/0x10a0 blkdev_read_iter+0x2b5/0x4e0 vfs_read+0x7f4/0xae0 ksys_read+0x12a/0x250 do_syscall_64+0xcb/0xf80 entry_SYSCALL_64_after_hwframe+0x77/0x7f -> #0 (&sb->s_type->i_mutex_key#8){++++}-{4:4}: __lock_acquire+0x1509/0x26d0 lock_acquire+0x185/0x340 down_read+0x98/0x490 blkdev_read_iter+0x2a7/0x4e0 __kernel_read+0x39a/0xa90 freader_fetch+0x1d5/0xa80 __build_id_parse.isra.0+0xea/0x6a0 do_procmap_query+0xd75/0x1050 procfs_procmap_ioctl+0x7a/0xb0 __x64_sys_ioctl+0x18e/0x210 do_syscall_64+0xcb/0xf80 entry_SYSCALL_64_after_hwframe+0x77/0x7f other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- rlock(&mm->mmap_lock); lock(&sb->s_type->i_mutex_key#8); lock(&mm->mmap_lock); rlock(&sb->s_type->i_mutex_key#8); *** DEADLOCK *** This seems to be exacerbated (as we haven't seen these syzbot reports before that) by the recent:777a8560fd("lib/buildid: use __kernel_read() for sleepable context") To make this safe, we need to grab file refcount while VMA is still locked, but other than that everything is pretty straightforward. Internal build_id_parse() API assumes VMA is passed, but it only needs the underlying file reference, so just add another variant build_id_parse_file() that expects file passed directly. [akpm@linux-foundation.org: fix up kerneldoc] Link: https://lkml.kernel.org/r/20260129215340.3742283-1-andrii@kernel.org Fixes:ed5d583a88("fs/procfs: implement efficient VMA querying API for /proc/<pid>/maps") Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Reported-by: <syzbot+4e70c8e0a2017b432f7a@syzkaller.appspotmail.com> Reviewed-by: Suren Baghdasaryan <surenb@google.com> Tested-by: Suren Baghdasaryan <surenb@google.com> Reviewed-by: Shakeel Butt <shakeel.butt@linux.dev> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: Eduard Zingerman <eddyz87@gmail.com> Cc: Hao Luo <haoluo@google.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: John Fastabend <john.fastabend@gmail.com> Cc: KP Singh <kpsingh@kernel.org> Cc: Martin KaFai Lau <martin.lau@linux.dev> Cc: Song Liu <song@kernel.org> Cc: Stanislav Fomichev <sdf@fomichev.me> Cc: Yonghong Song <yonghong.song@linux.dev> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
408 lines
10 KiB
C
408 lines
10 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
#include <linux/buildid.h>
|
|
#include <linux/cache.h>
|
|
#include <linux/elf.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/secretmem.h>
|
|
|
|
#define BUILD_ID 3
|
|
|
|
#define MAX_PHDR_CNT 256
|
|
|
|
void freader_init_from_file(struct freader *r, void *buf, u32 buf_sz,
|
|
struct file *file, bool may_fault)
|
|
{
|
|
memset(r, 0, sizeof(*r));
|
|
r->buf = buf;
|
|
r->buf_sz = buf_sz;
|
|
r->file = file;
|
|
r->may_fault = may_fault;
|
|
}
|
|
|
|
void freader_init_from_mem(struct freader *r, const char *data, u64 data_sz)
|
|
{
|
|
memset(r, 0, sizeof(*r));
|
|
r->data = data;
|
|
r->data_sz = data_sz;
|
|
}
|
|
|
|
static void freader_put_folio(struct freader *r)
|
|
{
|
|
if (!r->folio)
|
|
return;
|
|
kunmap_local(r->addr);
|
|
folio_put(r->folio);
|
|
r->folio = NULL;
|
|
}
|
|
|
|
static int freader_get_folio(struct freader *r, loff_t file_off)
|
|
{
|
|
/* check if we can just reuse current folio */
|
|
if (r->folio && file_off >= r->folio_off &&
|
|
file_off < r->folio_off + folio_size(r->folio))
|
|
return 0;
|
|
|
|
freader_put_folio(r);
|
|
|
|
/* only use page cache lookup - fail if not already cached */
|
|
r->folio = filemap_get_folio(r->file->f_mapping, file_off >> PAGE_SHIFT);
|
|
|
|
if (IS_ERR(r->folio) || !folio_test_uptodate(r->folio)) {
|
|
if (!IS_ERR(r->folio))
|
|
folio_put(r->folio);
|
|
r->folio = NULL;
|
|
return -EFAULT;
|
|
}
|
|
|
|
r->folio_off = folio_pos(r->folio);
|
|
r->addr = kmap_local_folio(r->folio, 0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz)
|
|
{
|
|
size_t folio_sz;
|
|
|
|
/* provided internal temporary buffer should be sized correctly */
|
|
if (WARN_ON(r->buf && sz > r->buf_sz)) {
|
|
r->err = -E2BIG;
|
|
return NULL;
|
|
}
|
|
|
|
if (unlikely(file_off + sz < file_off)) {
|
|
r->err = -EOVERFLOW;
|
|
return NULL;
|
|
}
|
|
|
|
/* working with memory buffer is much more straightforward */
|
|
if (!r->buf) {
|
|
if (file_off + sz > r->data_sz) {
|
|
r->err = -ERANGE;
|
|
return NULL;
|
|
}
|
|
return r->data + file_off;
|
|
}
|
|
|
|
/* reject secretmem folios created with memfd_secret() */
|
|
if (secretmem_mapping(r->file->f_mapping)) {
|
|
r->err = -EFAULT;
|
|
return NULL;
|
|
}
|
|
|
|
/* use __kernel_read() for sleepable context */
|
|
if (r->may_fault) {
|
|
ssize_t ret;
|
|
|
|
ret = __kernel_read(r->file, r->buf, sz, &file_off);
|
|
if (ret != sz) {
|
|
r->err = (ret < 0) ? ret : -EIO;
|
|
return NULL;
|
|
}
|
|
return r->buf;
|
|
}
|
|
|
|
/* fetch or reuse folio for given file offset */
|
|
r->err = freader_get_folio(r, file_off);
|
|
if (r->err)
|
|
return NULL;
|
|
|
|
/* if requested data is crossing folio boundaries, we have to copy
|
|
* everything into our local buffer to keep a simple linear memory
|
|
* access interface
|
|
*/
|
|
folio_sz = folio_size(r->folio);
|
|
if (file_off + sz > r->folio_off + folio_sz) {
|
|
u64 part_sz = r->folio_off + folio_sz - file_off, off;
|
|
|
|
memcpy(r->buf, r->addr + file_off - r->folio_off, part_sz);
|
|
off = part_sz;
|
|
|
|
while (off < sz) {
|
|
/* fetch next folio */
|
|
r->err = freader_get_folio(r, r->folio_off + folio_sz);
|
|
if (r->err)
|
|
return NULL;
|
|
folio_sz = folio_size(r->folio);
|
|
part_sz = min_t(u64, sz - off, folio_sz);
|
|
memcpy(r->buf + off, r->addr, part_sz);
|
|
off += part_sz;
|
|
}
|
|
|
|
return r->buf;
|
|
}
|
|
|
|
/* if data fits in a single folio, just return direct pointer */
|
|
return r->addr + (file_off - r->folio_off);
|
|
}
|
|
|
|
void freader_cleanup(struct freader *r)
|
|
{
|
|
if (!r->buf)
|
|
return; /* non-file-backed mode */
|
|
|
|
freader_put_folio(r);
|
|
}
|
|
|
|
/*
|
|
* Parse build id from the note segment. This logic can be shared between
|
|
* 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are
|
|
* identical.
|
|
*/
|
|
static int parse_build_id(struct freader *r, unsigned char *build_id, __u32 *size,
|
|
loff_t note_off, Elf32_Word note_size)
|
|
{
|
|
const char note_name[] = "GNU";
|
|
const size_t note_name_sz = sizeof(note_name);
|
|
u32 build_id_off, new_off, note_end, name_sz, desc_sz;
|
|
const Elf32_Nhdr *nhdr;
|
|
const char *data;
|
|
|
|
if (check_add_overflow(note_off, note_size, ¬e_end))
|
|
return -EINVAL;
|
|
|
|
while (note_end - note_off > sizeof(Elf32_Nhdr) + note_name_sz) {
|
|
nhdr = freader_fetch(r, note_off, sizeof(Elf32_Nhdr) + note_name_sz);
|
|
if (!nhdr)
|
|
return r->err;
|
|
|
|
name_sz = READ_ONCE(nhdr->n_namesz);
|
|
desc_sz = READ_ONCE(nhdr->n_descsz);
|
|
|
|
new_off = note_off + sizeof(Elf32_Nhdr);
|
|
if (check_add_overflow(new_off, ALIGN(name_sz, 4), &new_off) ||
|
|
check_add_overflow(new_off, ALIGN(desc_sz, 4), &new_off) ||
|
|
new_off > note_end)
|
|
break;
|
|
|
|
if (nhdr->n_type == BUILD_ID &&
|
|
name_sz == note_name_sz &&
|
|
memcmp(nhdr + 1, note_name, note_name_sz) == 0 &&
|
|
desc_sz > 0 && desc_sz <= BUILD_ID_SIZE_MAX) {
|
|
build_id_off = note_off + sizeof(Elf32_Nhdr) + ALIGN(note_name_sz, 4);
|
|
|
|
/* freader_fetch() will invalidate nhdr pointer */
|
|
data = freader_fetch(r, build_id_off, desc_sz);
|
|
if (!data)
|
|
return r->err;
|
|
|
|
memcpy(build_id, data, desc_sz);
|
|
memset(build_id + desc_sz, 0, BUILD_ID_SIZE_MAX - desc_sz);
|
|
if (size)
|
|
*size = desc_sz;
|
|
return 0;
|
|
}
|
|
|
|
note_off = new_off;
|
|
}
|
|
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* Parse build ID from 32-bit ELF */
|
|
static int get_build_id_32(struct freader *r, unsigned char *build_id, __u32 *size)
|
|
{
|
|
const Elf32_Ehdr *ehdr;
|
|
const Elf32_Phdr *phdr;
|
|
__u32 phnum, phoff, i;
|
|
|
|
ehdr = freader_fetch(r, 0, sizeof(Elf32_Ehdr));
|
|
if (!ehdr)
|
|
return r->err;
|
|
|
|
/* subsequent freader_fetch() calls invalidate pointers, so remember locally */
|
|
phnum = READ_ONCE(ehdr->e_phnum);
|
|
phoff = READ_ONCE(ehdr->e_phoff);
|
|
|
|
/* set upper bound on amount of segments (phdrs) we iterate */
|
|
if (phnum > MAX_PHDR_CNT)
|
|
phnum = MAX_PHDR_CNT;
|
|
|
|
/* check that phoff is not large enough to cause an overflow */
|
|
if (phoff + phnum * sizeof(Elf32_Phdr) < phoff)
|
|
return -EINVAL;
|
|
|
|
for (i = 0; i < phnum; ++i) {
|
|
phdr = freader_fetch(r, phoff + i * sizeof(Elf32_Phdr), sizeof(Elf32_Phdr));
|
|
if (!phdr)
|
|
return r->err;
|
|
|
|
if (phdr->p_type == PT_NOTE &&
|
|
!parse_build_id(r, build_id, size, READ_ONCE(phdr->p_offset),
|
|
READ_ONCE(phdr->p_filesz)))
|
|
return 0;
|
|
}
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* Parse build ID from 64-bit ELF */
|
|
static int get_build_id_64(struct freader *r, unsigned char *build_id, __u32 *size)
|
|
{
|
|
const Elf64_Ehdr *ehdr;
|
|
const Elf64_Phdr *phdr;
|
|
__u32 phnum, i;
|
|
__u64 phoff;
|
|
|
|
ehdr = freader_fetch(r, 0, sizeof(Elf64_Ehdr));
|
|
if (!ehdr)
|
|
return r->err;
|
|
|
|
/* subsequent freader_fetch() calls invalidate pointers, so remember locally */
|
|
phnum = READ_ONCE(ehdr->e_phnum);
|
|
phoff = READ_ONCE(ehdr->e_phoff);
|
|
|
|
/* set upper bound on amount of segments (phdrs) we iterate */
|
|
if (phnum > MAX_PHDR_CNT)
|
|
phnum = MAX_PHDR_CNT;
|
|
|
|
/* check that phoff is not large enough to cause an overflow */
|
|
if (phoff + phnum * sizeof(Elf64_Phdr) < phoff)
|
|
return -EINVAL;
|
|
|
|
for (i = 0; i < phnum; ++i) {
|
|
phdr = freader_fetch(r, phoff + i * sizeof(Elf64_Phdr), sizeof(Elf64_Phdr));
|
|
if (!phdr)
|
|
return r->err;
|
|
|
|
if (phdr->p_type == PT_NOTE &&
|
|
!parse_build_id(r, build_id, size, READ_ONCE(phdr->p_offset),
|
|
READ_ONCE(phdr->p_filesz)))
|
|
return 0;
|
|
}
|
|
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* enough for Elf64_Ehdr, Elf64_Phdr, and all the smaller requests */
|
|
#define MAX_FREADER_BUF_SZ 64
|
|
|
|
static int __build_id_parse(struct file *file, unsigned char *build_id,
|
|
__u32 *size, bool may_fault)
|
|
{
|
|
const Elf32_Ehdr *ehdr;
|
|
struct freader r;
|
|
char buf[MAX_FREADER_BUF_SZ];
|
|
int ret;
|
|
|
|
freader_init_from_file(&r, buf, sizeof(buf), file, may_fault);
|
|
|
|
/* fetch first 18 bytes of ELF header for checks */
|
|
ehdr = freader_fetch(&r, 0, offsetofend(Elf32_Ehdr, e_type));
|
|
if (!ehdr) {
|
|
ret = r.err;
|
|
goto out;
|
|
}
|
|
|
|
ret = -EINVAL;
|
|
|
|
/* compare magic x7f "ELF" */
|
|
if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0)
|
|
goto out;
|
|
|
|
/* only support executable file and shared object file */
|
|
if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN)
|
|
goto out;
|
|
|
|
if (ehdr->e_ident[EI_CLASS] == ELFCLASS32)
|
|
ret = get_build_id_32(&r, build_id, size);
|
|
else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64)
|
|
ret = get_build_id_64(&r, build_id, size);
|
|
out:
|
|
freader_cleanup(&r);
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* build_id_parse_nofault() - Parse build ID of ELF file mapped to vma
|
|
* @vma: vma object
|
|
* @build_id: buffer to store build id, at least BUILD_ID_SIZE long
|
|
* @size: returns actual build id size in case of success
|
|
*
|
|
* Assumes no page fault can be taken, so if relevant portions of ELF file are
|
|
* not already paged in, fetching of build ID fails.
|
|
*
|
|
* Return: 0 on success; negative error, otherwise
|
|
*/
|
|
int build_id_parse_nofault(struct vm_area_struct *vma, unsigned char *build_id, __u32 *size)
|
|
{
|
|
if (!vma->vm_file)
|
|
return -EINVAL;
|
|
|
|
return __build_id_parse(vma->vm_file, build_id, size, false /* !may_fault */);
|
|
}
|
|
|
|
/**
|
|
* build_id_parse() - Parse build ID of ELF file mapped to VMA
|
|
* @vma: vma object
|
|
* @build_id: buffer to store build id, at least BUILD_ID_SIZE long
|
|
* @size: returns actual build id size in case of success
|
|
*
|
|
* Assumes faultable context and can cause page faults to bring in file data
|
|
* into page cache.
|
|
*
|
|
* Return: 0 on success; negative error, otherwise
|
|
*/
|
|
int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id, __u32 *size)
|
|
{
|
|
if (!vma->vm_file)
|
|
return -EINVAL;
|
|
|
|
return __build_id_parse(vma->vm_file, build_id, size, true /* may_fault */);
|
|
}
|
|
|
|
/**
|
|
* build_id_parse_file() - Parse build ID of ELF file
|
|
* @file: file object
|
|
* @build_id: buffer to store build id, at least BUILD_ID_SIZE long
|
|
* @size: returns actual build id size in case of success
|
|
*
|
|
* Assumes faultable context and can cause page faults to bring in file data
|
|
* into page cache.
|
|
*
|
|
* Return: 0 on success; negative error, otherwise
|
|
*/
|
|
int build_id_parse_file(struct file *file, unsigned char *build_id, __u32 *size)
|
|
{
|
|
return __build_id_parse(file, build_id, size, true /* may_fault */);
|
|
}
|
|
|
|
/**
|
|
* build_id_parse_buf - Get build ID from a buffer
|
|
* @buf: ELF note section(s) to parse
|
|
* @buf_size: Size of @buf in bytes
|
|
* @build_id: Build ID parsed from @buf, at least BUILD_ID_SIZE_MAX long
|
|
*
|
|
* Return: 0 on success, -EINVAL otherwise
|
|
*/
|
|
int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size)
|
|
{
|
|
struct freader r;
|
|
int err;
|
|
|
|
freader_init_from_mem(&r, buf, buf_size);
|
|
|
|
err = parse_build_id(&r, build_id, NULL, 0, buf_size);
|
|
|
|
freader_cleanup(&r);
|
|
return err;
|
|
}
|
|
|
|
#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_VMCORE_INFO)
|
|
unsigned char vmlinux_build_id[BUILD_ID_SIZE_MAX] __ro_after_init;
|
|
|
|
/**
|
|
* init_vmlinux_build_id - Compute and stash the running kernel's build ID
|
|
*/
|
|
void __init init_vmlinux_build_id(void)
|
|
{
|
|
extern const void __start_notes;
|
|
extern const void __stop_notes;
|
|
unsigned int size = &__stop_notes - &__start_notes;
|
|
|
|
build_id_parse_buf(&__start_notes, vmlinux_build_id, size);
|
|
}
|
|
#endif
|