mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2026-06-21 15:43:21 +02:00
35b535db69
gpu_buddy APIs are expected to be called with the driver-provided lock held, but there is no runtime enforcement of this contract. Add lockdep annotations to catch locking violations early. Introduce gpu_buddy_driver_set_lock() for the driver to register the lock that protects the buddy manager. Add gpu_buddy_driver_lock_held() assertions to all exported gpu_buddy and drm_buddy APIs that access/modify the manager state. The lock_dep_map field is only compiled in when CONFIG_LOCKDEP is enabled, adding zero overhead to production builds. Wire up xe_ttm_vram_mgr to register its mutex with the buddy manager after initialization. Assisted-by: Copilot:claude-opus-4.6 Suggested-by: Matthew Brost <matthew.brost@intel.com> Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com> Link: https://patch.msgid.link/20260508065544.4049240-2-tejas.upadhyay@intel.com
283 lines
8.5 KiB
C
283 lines
8.5 KiB
C
/* SPDX-License-Identifier: MIT */
|
|
/*
|
|
* Copyright © 2021 Intel Corporation
|
|
*/
|
|
|
|
#ifndef __GPU_BUDDY_H__
|
|
#define __GPU_BUDDY_H__
|
|
|
|
#include <linux/bitops.h>
|
|
#include <linux/list.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/rbtree.h>
|
|
#include <linux/rbtree_augmented.h>
|
|
|
|
/**
|
|
* GPU_BUDDY_RANGE_ALLOCATION - Allocate within a specific address range
|
|
*
|
|
* When set, allocation is restricted to the range [start, end) specified
|
|
* in gpu_buddy_alloc_blocks(). Without this flag, start/end are ignored
|
|
* and allocation can use any free space.
|
|
*/
|
|
#define GPU_BUDDY_RANGE_ALLOCATION BIT(0)
|
|
|
|
/**
|
|
* GPU_BUDDY_TOPDOWN_ALLOCATION - Allocate from top of address space
|
|
*
|
|
* Allocate starting from high addresses and working down. Useful for
|
|
* separating different allocation types (e.g., kernel vs userspace)
|
|
* to reduce fragmentation.
|
|
*/
|
|
#define GPU_BUDDY_TOPDOWN_ALLOCATION BIT(1)
|
|
|
|
/**
|
|
* GPU_BUDDY_CONTIGUOUS_ALLOCATION - Require physically contiguous blocks
|
|
*
|
|
* The allocation must be satisfied with a single contiguous block.
|
|
* If the requested size cannot be allocated contiguously, the
|
|
* allocation fails with -ENOSPC.
|
|
*/
|
|
#define GPU_BUDDY_CONTIGUOUS_ALLOCATION BIT(2)
|
|
|
|
/**
|
|
* GPU_BUDDY_CLEAR_ALLOCATION - Prefer pre-cleared (zeroed) memory
|
|
*
|
|
* Attempt to allocate from the clear tree first. If insufficient clear
|
|
* memory is available, falls back to dirty memory. Useful when the
|
|
* caller needs zeroed memory and wants to avoid GPU clear operations.
|
|
*/
|
|
#define GPU_BUDDY_CLEAR_ALLOCATION BIT(3)
|
|
|
|
/**
|
|
* GPU_BUDDY_CLEARED - Mark returned blocks as cleared
|
|
*
|
|
* Used with gpu_buddy_free_list() to indicate that the memory being
|
|
* freed has been cleared (zeroed). The blocks will be placed in the
|
|
* clear tree for future GPU_BUDDY_CLEAR_ALLOCATION requests.
|
|
*/
|
|
#define GPU_BUDDY_CLEARED BIT(4)
|
|
|
|
/**
|
|
* GPU_BUDDY_TRIM_DISABLE - Disable automatic block trimming
|
|
*
|
|
* By default, if an allocation is smaller than the allocated block,
|
|
* excess memory is trimmed and returned to the free pool. This flag
|
|
* disables trimming, keeping the full power-of-two block size.
|
|
*/
|
|
#define GPU_BUDDY_TRIM_DISABLE BIT(5)
|
|
|
|
enum gpu_buddy_free_tree {
|
|
GPU_BUDDY_CLEAR_TREE = 0,
|
|
GPU_BUDDY_DIRTY_TREE,
|
|
GPU_BUDDY_MAX_FREE_TREES,
|
|
};
|
|
|
|
#define for_each_free_tree(tree) \
|
|
for ((tree) = 0; (tree) < GPU_BUDDY_MAX_FREE_TREES; (tree)++)
|
|
|
|
/**
|
|
* struct gpu_buddy_block - Block within a buddy allocator
|
|
*
|
|
* Each block in the buddy allocator is represented by this structure.
|
|
* Blocks are organized in a binary tree where each parent block can be
|
|
* split into two children (left and right buddies). The allocator manages
|
|
* blocks at various orders (power-of-2 sizes) from chunk_size up to the
|
|
* largest contiguous region.
|
|
*
|
|
* @private: Private data owned by the allocator user (e.g., driver-specific data)
|
|
* @link: List node for user ownership while block is allocated
|
|
*/
|
|
struct gpu_buddy_block {
|
|
/* private: */
|
|
/*
|
|
* Header bit layout:
|
|
* - Bits 63:12: block offset within the address space
|
|
* - Bits 11:10: state (ALLOCATED, FREE, or SPLIT)
|
|
* - Bit 9: clear bit (1 if memory is zeroed)
|
|
* - Bits 8:6: reserved
|
|
* - Bits 5:0: order (log2 of size relative to chunk_size)
|
|
*/
|
|
#define GPU_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
|
|
#define GPU_BUDDY_HEADER_STATE GENMASK_ULL(11, 10)
|
|
#define GPU_BUDDY_ALLOCATED (1 << 10)
|
|
#define GPU_BUDDY_FREE (2 << 10)
|
|
#define GPU_BUDDY_SPLIT (3 << 10)
|
|
#define GPU_BUDDY_HEADER_CLEAR GENMASK_ULL(9, 9)
|
|
/* Free to be used, if needed in the future */
|
|
#define GPU_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6)
|
|
#define GPU_BUDDY_HEADER_ORDER GENMASK_ULL(5, 0)
|
|
u64 header;
|
|
|
|
struct gpu_buddy_block *left;
|
|
struct gpu_buddy_block *right;
|
|
struct gpu_buddy_block *parent;
|
|
/* public: */
|
|
void *private; /* owned by creator */
|
|
|
|
/*
|
|
* While the block is allocated by the user through gpu_buddy_alloc*,
|
|
* the user has ownership of the link, for example to maintain within
|
|
* a list, if so desired. As soon as the block is freed with
|
|
* gpu_buddy_free* ownership is given back to the mm.
|
|
*/
|
|
union {
|
|
/* private: */
|
|
struct rb_node rb;
|
|
/* public: */
|
|
struct list_head link;
|
|
};
|
|
/* private: */
|
|
struct list_head tmp_link;
|
|
unsigned int subtree_max_alignment;
|
|
};
|
|
|
|
/* Order-zero must be at least SZ_4K */
|
|
#define GPU_BUDDY_MAX_ORDER (63 - 12)
|
|
|
|
/**
|
|
* struct gpu_buddy - GPU binary buddy allocator
|
|
*
|
|
* The buddy allocator provides efficient power-of-two memory allocation
|
|
* with fast allocation and free operations. It is commonly used for GPU
|
|
* memory management where allocations can be split into power-of-two
|
|
* block sizes.
|
|
*
|
|
* Locking should be handled by the user; a simple mutex around
|
|
* gpu_buddy_alloc_blocks() and gpu_buddy_free_block()/gpu_buddy_free_list()
|
|
* should suffice.
|
|
*
|
|
* @n_roots: Number of root blocks in the roots array.
|
|
* @max_order: Maximum block order (log2 of largest block size / chunk_size).
|
|
* @chunk_size: Minimum allocation granularity in bytes. Must be at least SZ_4K.
|
|
* @size: Total size of the address space managed by this allocator in bytes.
|
|
* @avail: Total free space currently available for allocation in bytes.
|
|
* @clear_avail: Free space available in the clear tree (zeroed memory) in bytes.
|
|
* This is a subset of @avail.
|
|
* @lock_dep_map: Annotates gpu_buddy API with a driver provided lock.
|
|
*/
|
|
struct gpu_buddy {
|
|
/* private: */
|
|
/*
|
|
* Array of red-black trees for free block management.
|
|
* Indexed as free_trees[clear/dirty][order] where:
|
|
* - Index 0 (GPU_BUDDY_CLEAR_TREE): blocks with zeroed content
|
|
* - Index 1 (GPU_BUDDY_DIRTY_TREE): blocks with unknown content
|
|
* Each tree holds free blocks of the corresponding order.
|
|
*/
|
|
struct rb_root **free_trees;
|
|
/*
|
|
* Array of root blocks representing the top-level blocks of the
|
|
* binary tree(s). Multiple roots exist when the total size is not
|
|
* a power of two, with each root being the largest power-of-two
|
|
* that fits in the remaining space.
|
|
*/
|
|
struct gpu_buddy_block **roots;
|
|
/* public: */
|
|
unsigned int n_roots;
|
|
unsigned int max_order;
|
|
u64 chunk_size;
|
|
u64 size;
|
|
u64 avail;
|
|
u64 clear_avail;
|
|
#ifdef CONFIG_LOCKDEP
|
|
struct lockdep_map *lock_dep_map;
|
|
#endif
|
|
};
|
|
|
|
#ifdef CONFIG_LOCKDEP
|
|
/**
|
|
* gpu_buddy_driver_set_lock() - Set the lock protecting accesses to GPU BUDDY
|
|
* @mm: Pointer to GPU buddy structure.
|
|
* @lock: the lock used to protect the gpu buddy. The locking primitive
|
|
* must contain a dep_map field.
|
|
*
|
|
* Call this to annotate gpu_buddy APIs which access/modify gpu_buddy manager
|
|
*/
|
|
#define gpu_buddy_driver_set_lock(mm, lock) \
|
|
do { \
|
|
struct gpu_buddy *__mm = (mm); \
|
|
if (!WARN(__mm->lock_dep_map, "GPU BUDDY MM lock should be set only once.")) \
|
|
__mm->lock_dep_map = &(lock)->dep_map; \
|
|
} while (0)
|
|
#else
|
|
#define gpu_buddy_driver_set_lock(mm, lock) do { (void)(mm); (void)(lock); } while (0)
|
|
#endif
|
|
|
|
#ifdef CONFIG_LOCKDEP
|
|
/**
|
|
* gpu_buddy_driver_lock_held() - Assert GPU BUDDY manager lock is held
|
|
* @mm: Pointer to the GPU BUDDY structure.
|
|
*
|
|
* Ensure driver lock is held.
|
|
*/
|
|
static inline void gpu_buddy_driver_lock_held(struct gpu_buddy *mm)
|
|
{
|
|
if (mm->lock_dep_map)
|
|
lockdep_assert(lock_is_held_type(mm->lock_dep_map, 0));
|
|
}
|
|
#else
|
|
static inline void gpu_buddy_driver_lock_held(struct gpu_buddy *mm)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
static inline u64
|
|
gpu_buddy_block_offset(const struct gpu_buddy_block *block)
|
|
{
|
|
return block->header & GPU_BUDDY_HEADER_OFFSET;
|
|
}
|
|
|
|
static inline unsigned int
|
|
gpu_buddy_block_order(struct gpu_buddy_block *block)
|
|
{
|
|
return block->header & GPU_BUDDY_HEADER_ORDER;
|
|
}
|
|
|
|
static inline bool
|
|
gpu_buddy_block_is_free(struct gpu_buddy_block *block)
|
|
{
|
|
return (block->header & GPU_BUDDY_HEADER_STATE) == GPU_BUDDY_FREE;
|
|
}
|
|
|
|
static inline bool
|
|
gpu_buddy_block_is_clear(struct gpu_buddy_block *block)
|
|
{
|
|
return block->header & GPU_BUDDY_HEADER_CLEAR;
|
|
}
|
|
|
|
static inline u64
|
|
gpu_buddy_block_size(struct gpu_buddy *mm,
|
|
struct gpu_buddy_block *block)
|
|
{
|
|
return mm->chunk_size << gpu_buddy_block_order(block);
|
|
}
|
|
|
|
int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size);
|
|
|
|
void gpu_buddy_fini(struct gpu_buddy *mm);
|
|
|
|
int gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
|
|
u64 start, u64 end, u64 size,
|
|
u64 min_page_size,
|
|
struct list_head *blocks,
|
|
unsigned long flags);
|
|
|
|
int gpu_buddy_block_trim(struct gpu_buddy *mm,
|
|
u64 *start,
|
|
u64 new_size,
|
|
struct list_head *blocks);
|
|
|
|
void gpu_buddy_reset_clear(struct gpu_buddy *mm, bool is_clear);
|
|
|
|
void gpu_buddy_free_block(struct gpu_buddy *mm, struct gpu_buddy_block *block);
|
|
|
|
void gpu_buddy_free_list(struct gpu_buddy *mm,
|
|
struct list_head *objects,
|
|
unsigned int flags);
|
|
|
|
void gpu_buddy_print(struct gpu_buddy *mm);
|
|
void gpu_buddy_block_print(struct gpu_buddy *mm,
|
|
struct gpu_buddy_block *block);
|
|
#endif
|