mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2026-03-08 18:26:12 +01:00
There is a race window where BPF hash map elements can leak special fields if the program with access to the map value recreates these special fields between the check_and_free_fields done on the map value and its eventual return to the memory allocator. Several ways were explored prior to this patch, most notably [0] tried to use a poison value to reject attempts to recreate special fields for map values that have been logically deleted but still accessible to BPF programs (either while sitting in the free list or when reused). While this approach works well for task work, timers, wq, etc., it is harder to apply the idea to kptrs, which have a similar race and failure mode. Instead, we change bpf_mem_alloc to allow registering destructor for allocated elements, such that when they are returned to the allocator, any special fields created while they were accessible to programs in the mean time will be freed. If these values get reused, we do not free the fields again before handing the element back. The special fields thus may remain initialized while the map value sits in a free list. When bpf_mem_alloc is retired in the future, a similar concept can be introduced to kmalloc_nolock-backed kmem_cache, paired with the existing idea of a constructor. Note that the destructor registration happens in map_check_btf, after the BTF record is populated and (at that point) avaiable for inspection and duplication. Duplication is necessary since the freeing of embedded bpf_mem_alloc can be decoupled from actual map lifetime due to logic introduced to reduce the cost of rcu_barrier()s in mem alloc free path in9f2c6e96c6("bpf: Optimize rcu_barrier usage between hash map and bpf_mem_alloc."). As such, once all callbacks are done, we must also free the duplicated record. To remove dependency on the bpf_map itself, also stash the key size of the map to obtain value from htab_elem long after the map is gone. [0]: https://lore.kernel.org/bpf/20260216131341.1285427-1-mykyta.yatsenko5@gmail.com Fixes:14a324f6a6("bpf: Wire up freeing of referenced kptr") Fixes:1bfbc267ec("bpf: Enable bpf_timer and bpf_wq in any context") Reported-by: Alexei Starovoitov <ast@kernel.org> Tested-by: syzbot@syzkaller.appspotmail.com Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Link: https://lore.kernel.org/r/20260227224806.646888-2-memxor@gmail.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
58 lines
2.3 KiB
C
58 lines
2.3 KiB
C
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
|
|
#ifndef _BPF_MEM_ALLOC_H
|
|
#define _BPF_MEM_ALLOC_H
|
|
#include <linux/compiler_types.h>
|
|
#include <linux/workqueue.h>
|
|
|
|
struct bpf_mem_cache;
|
|
struct bpf_mem_caches;
|
|
|
|
struct bpf_mem_alloc {
|
|
struct bpf_mem_caches __percpu *caches;
|
|
struct bpf_mem_cache __percpu *cache;
|
|
struct obj_cgroup *objcg;
|
|
bool percpu;
|
|
struct work_struct work;
|
|
void (*dtor_ctx_free)(void *ctx);
|
|
void *dtor_ctx;
|
|
};
|
|
|
|
/* 'size != 0' is for bpf_mem_alloc which manages fixed-size objects.
|
|
* Alloc and free are done with bpf_mem_cache_{alloc,free}().
|
|
*
|
|
* 'size = 0' is for bpf_mem_alloc which manages many fixed-size objects.
|
|
* Alloc and free are done with bpf_mem_{alloc,free}() and the size of
|
|
* the returned object is given by the size argument of bpf_mem_alloc().
|
|
* If percpu equals true, error will be returned in order to avoid
|
|
* large memory consumption and the below bpf_mem_alloc_percpu_unit_init()
|
|
* should be used to do on-demand per-cpu allocation for each size.
|
|
*/
|
|
int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu);
|
|
/* Initialize a non-fix-size percpu memory allocator */
|
|
int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg);
|
|
/* The percpu allocation with a specific unit size. */
|
|
int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size);
|
|
void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma);
|
|
void bpf_mem_alloc_set_dtor(struct bpf_mem_alloc *ma,
|
|
void (*dtor)(void *obj, void *ctx),
|
|
void (*dtor_ctx_free)(void *ctx),
|
|
void *ctx);
|
|
|
|
/* Check the allocation size for kmalloc equivalent allocator */
|
|
int bpf_mem_alloc_check_size(bool percpu, size_t size);
|
|
|
|
/* kmalloc/kfree equivalent: */
|
|
void *bpf_mem_alloc(struct bpf_mem_alloc *ma, size_t size);
|
|
void bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr);
|
|
void bpf_mem_free_rcu(struct bpf_mem_alloc *ma, void *ptr);
|
|
|
|
/* kmem_cache_alloc/free equivalent: */
|
|
void *bpf_mem_cache_alloc(struct bpf_mem_alloc *ma);
|
|
void bpf_mem_cache_free(struct bpf_mem_alloc *ma, void *ptr);
|
|
void bpf_mem_cache_free_rcu(struct bpf_mem_alloc *ma, void *ptr);
|
|
void bpf_mem_cache_raw_free(void *ptr);
|
|
void *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags);
|
|
|
|
#endif /* _BPF_MEM_ALLOC_H */
|