mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2026-05-05 09:57:21 +02:00
493740d790
Large alignment requests previously forced the buddy allocator to search by alignment order, which often caused higher-order free blocks to be split even when a suitably aligned smaller region already existed within them. This led to excessive fragmentation, especially for workloads requesting small sizes with large alignment constraints. This change prioritizes the requested allocation size during the search and uses an augmented RB-tree field (subtree_max_alignment) to efficiently locate free blocks that satisfy both size and offset-alignment requirements. As a result, the allocator can directly select an aligned sub-region without splitting larger blocks unnecessarily. A practical example is the VKCTS test dEQP-VK.memory.allocation.basic.size_8KiB.reverse.count_4000, which repeatedly allocates 8 KiB buffers with a 256 KiB alignment. Previously, such allocations caused large blocks to be split aggressively, despite smaller aligned regions being sufficient. With this change, those aligned regions are reused directly, significantly reducing fragmentation. This improvement is visible in the amdgpu VRAM buddy allocator state (/sys/kernel/debug/dri/1/amdgpu_vram_mm). After the change, higher-order blocks are preserved and the number of low-order fragments is substantially reduced. Before: order- 5 free: 1936 MiB, blocks: 15490 order- 4 free: 967 MiB, blocks: 15486 order- 3 free: 483 MiB, blocks: 15485 order- 2 free: 241 MiB, blocks: 15486 order- 1 free: 241 MiB, blocks: 30948 After: order- 5 free: 493 MiB, blocks: 3941 order- 4 free: 246 MiB, blocks: 3943 order- 3 free: 123 MiB, blocks: 4101 order- 2 free: 61 MiB, blocks: 4101 order- 1 free: 61 MiB, blocks: 8018 By avoiding unnecessary splits, this change improves allocator efficiency and helps maintain larger contiguous free regions under heavy offset-aligned allocation workloads. v2:(Matthew) - Update augmented information along the path to the inserted node. v3: - Move the patch to gpu/buddy.c file. v4:(Matthew) - Use the helper instead of calling _ffs directly - Remove gpu_buddy_block_order(block) >= order check and drop order - Drop !node check as all callers handle this already - Return larger than any other possible alignment for __ffs64(0) - Replace __ffs with __ffs64 v5:(Matthew) - Drop subtree_max_alignment initialization at gpu_block_alloc() Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com> Suggested-by: Christian König <christian.koenig@amd.com> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Link: https://patch.msgid.link/20260306060155.2114-1-Arunpravin.PaneerSelvam@amd.com
242 lines
7.4 KiB
C
242 lines
7.4 KiB
C
/* SPDX-License-Identifier: MIT */
|
|
/*
|
|
* Copyright © 2021 Intel Corporation
|
|
*/
|
|
|
|
#ifndef __GPU_BUDDY_H__
|
|
#define __GPU_BUDDY_H__
|
|
|
|
#include <linux/bitops.h>
|
|
#include <linux/list.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/rbtree.h>
|
|
#include <linux/rbtree_augmented.h>
|
|
|
|
/**
|
|
* GPU_BUDDY_RANGE_ALLOCATION - Allocate within a specific address range
|
|
*
|
|
* When set, allocation is restricted to the range [start, end) specified
|
|
* in gpu_buddy_alloc_blocks(). Without this flag, start/end are ignored
|
|
* and allocation can use any free space.
|
|
*/
|
|
#define GPU_BUDDY_RANGE_ALLOCATION BIT(0)
|
|
|
|
/**
|
|
* GPU_BUDDY_TOPDOWN_ALLOCATION - Allocate from top of address space
|
|
*
|
|
* Allocate starting from high addresses and working down. Useful for
|
|
* separating different allocation types (e.g., kernel vs userspace)
|
|
* to reduce fragmentation.
|
|
*/
|
|
#define GPU_BUDDY_TOPDOWN_ALLOCATION BIT(1)
|
|
|
|
/**
|
|
* GPU_BUDDY_CONTIGUOUS_ALLOCATION - Require physically contiguous blocks
|
|
*
|
|
* The allocation must be satisfied with a single contiguous block.
|
|
* If the requested size cannot be allocated contiguously, the
|
|
* allocation fails with -ENOSPC.
|
|
*/
|
|
#define GPU_BUDDY_CONTIGUOUS_ALLOCATION BIT(2)
|
|
|
|
/**
|
|
* GPU_BUDDY_CLEAR_ALLOCATION - Prefer pre-cleared (zeroed) memory
|
|
*
|
|
* Attempt to allocate from the clear tree first. If insufficient clear
|
|
* memory is available, falls back to dirty memory. Useful when the
|
|
* caller needs zeroed memory and wants to avoid GPU clear operations.
|
|
*/
|
|
#define GPU_BUDDY_CLEAR_ALLOCATION BIT(3)
|
|
|
|
/**
|
|
* GPU_BUDDY_CLEARED - Mark returned blocks as cleared
|
|
*
|
|
* Used with gpu_buddy_free_list() to indicate that the memory being
|
|
* freed has been cleared (zeroed). The blocks will be placed in the
|
|
* clear tree for future GPU_BUDDY_CLEAR_ALLOCATION requests.
|
|
*/
|
|
#define GPU_BUDDY_CLEARED BIT(4)
|
|
|
|
/**
|
|
* GPU_BUDDY_TRIM_DISABLE - Disable automatic block trimming
|
|
*
|
|
* By default, if an allocation is smaller than the allocated block,
|
|
* excess memory is trimmed and returned to the free pool. This flag
|
|
* disables trimming, keeping the full power-of-two block size.
|
|
*/
|
|
#define GPU_BUDDY_TRIM_DISABLE BIT(5)
|
|
|
|
enum gpu_buddy_free_tree {
|
|
GPU_BUDDY_CLEAR_TREE = 0,
|
|
GPU_BUDDY_DIRTY_TREE,
|
|
GPU_BUDDY_MAX_FREE_TREES,
|
|
};
|
|
|
|
#define for_each_free_tree(tree) \
|
|
for ((tree) = 0; (tree) < GPU_BUDDY_MAX_FREE_TREES; (tree)++)
|
|
|
|
/**
|
|
* struct gpu_buddy_block - Block within a buddy allocator
|
|
*
|
|
* Each block in the buddy allocator is represented by this structure.
|
|
* Blocks are organized in a binary tree where each parent block can be
|
|
* split into two children (left and right buddies). The allocator manages
|
|
* blocks at various orders (power-of-2 sizes) from chunk_size up to the
|
|
* largest contiguous region.
|
|
*
|
|
* @private: Private data owned by the allocator user (e.g., driver-specific data)
|
|
* @link: List node for user ownership while block is allocated
|
|
*/
|
|
struct gpu_buddy_block {
|
|
/* private: */
|
|
/*
|
|
* Header bit layout:
|
|
* - Bits 63:12: block offset within the address space
|
|
* - Bits 11:10: state (ALLOCATED, FREE, or SPLIT)
|
|
* - Bit 9: clear bit (1 if memory is zeroed)
|
|
* - Bits 8:6: reserved
|
|
* - Bits 5:0: order (log2 of size relative to chunk_size)
|
|
*/
|
|
#define GPU_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
|
|
#define GPU_BUDDY_HEADER_STATE GENMASK_ULL(11, 10)
|
|
#define GPU_BUDDY_ALLOCATED (1 << 10)
|
|
#define GPU_BUDDY_FREE (2 << 10)
|
|
#define GPU_BUDDY_SPLIT (3 << 10)
|
|
#define GPU_BUDDY_HEADER_CLEAR GENMASK_ULL(9, 9)
|
|
/* Free to be used, if needed in the future */
|
|
#define GPU_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6)
|
|
#define GPU_BUDDY_HEADER_ORDER GENMASK_ULL(5, 0)
|
|
u64 header;
|
|
|
|
struct gpu_buddy_block *left;
|
|
struct gpu_buddy_block *right;
|
|
struct gpu_buddy_block *parent;
|
|
/* public: */
|
|
void *private; /* owned by creator */
|
|
|
|
/*
|
|
* While the block is allocated by the user through gpu_buddy_alloc*,
|
|
* the user has ownership of the link, for example to maintain within
|
|
* a list, if so desired. As soon as the block is freed with
|
|
* gpu_buddy_free* ownership is given back to the mm.
|
|
*/
|
|
union {
|
|
/* private: */
|
|
struct rb_node rb;
|
|
/* public: */
|
|
struct list_head link;
|
|
};
|
|
/* private: */
|
|
struct list_head tmp_link;
|
|
unsigned int subtree_max_alignment;
|
|
};
|
|
|
|
/* Order-zero must be at least SZ_4K */
|
|
#define GPU_BUDDY_MAX_ORDER (63 - 12)
|
|
|
|
/**
|
|
* struct gpu_buddy - GPU binary buddy allocator
|
|
*
|
|
* The buddy allocator provides efficient power-of-two memory allocation
|
|
* with fast allocation and free operations. It is commonly used for GPU
|
|
* memory management where allocations can be split into power-of-two
|
|
* block sizes.
|
|
*
|
|
* Locking should be handled by the user; a simple mutex around
|
|
* gpu_buddy_alloc_blocks() and gpu_buddy_free_block()/gpu_buddy_free_list()
|
|
* should suffice.
|
|
*
|
|
* @n_roots: Number of root blocks in the roots array.
|
|
* @max_order: Maximum block order (log2 of largest block size / chunk_size).
|
|
* @chunk_size: Minimum allocation granularity in bytes. Must be at least SZ_4K.
|
|
* @size: Total size of the address space managed by this allocator in bytes.
|
|
* @avail: Total free space currently available for allocation in bytes.
|
|
* @clear_avail: Free space available in the clear tree (zeroed memory) in bytes.
|
|
* This is a subset of @avail.
|
|
*/
|
|
struct gpu_buddy {
|
|
/* private: */
|
|
/*
|
|
* Array of red-black trees for free block management.
|
|
* Indexed as free_trees[clear/dirty][order] where:
|
|
* - Index 0 (GPU_BUDDY_CLEAR_TREE): blocks with zeroed content
|
|
* - Index 1 (GPU_BUDDY_DIRTY_TREE): blocks with unknown content
|
|
* Each tree holds free blocks of the corresponding order.
|
|
*/
|
|
struct rb_root **free_trees;
|
|
/*
|
|
* Array of root blocks representing the top-level blocks of the
|
|
* binary tree(s). Multiple roots exist when the total size is not
|
|
* a power of two, with each root being the largest power-of-two
|
|
* that fits in the remaining space.
|
|
*/
|
|
struct gpu_buddy_block **roots;
|
|
/* public: */
|
|
unsigned int n_roots;
|
|
unsigned int max_order;
|
|
u64 chunk_size;
|
|
u64 size;
|
|
u64 avail;
|
|
u64 clear_avail;
|
|
};
|
|
|
|
static inline u64
|
|
gpu_buddy_block_offset(const struct gpu_buddy_block *block)
|
|
{
|
|
return block->header & GPU_BUDDY_HEADER_OFFSET;
|
|
}
|
|
|
|
static inline unsigned int
|
|
gpu_buddy_block_order(struct gpu_buddy_block *block)
|
|
{
|
|
return block->header & GPU_BUDDY_HEADER_ORDER;
|
|
}
|
|
|
|
static inline bool
|
|
gpu_buddy_block_is_free(struct gpu_buddy_block *block)
|
|
{
|
|
return (block->header & GPU_BUDDY_HEADER_STATE) == GPU_BUDDY_FREE;
|
|
}
|
|
|
|
static inline bool
|
|
gpu_buddy_block_is_clear(struct gpu_buddy_block *block)
|
|
{
|
|
return block->header & GPU_BUDDY_HEADER_CLEAR;
|
|
}
|
|
|
|
static inline u64
|
|
gpu_buddy_block_size(struct gpu_buddy *mm,
|
|
struct gpu_buddy_block *block)
|
|
{
|
|
return mm->chunk_size << gpu_buddy_block_order(block);
|
|
}
|
|
|
|
int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size);
|
|
|
|
void gpu_buddy_fini(struct gpu_buddy *mm);
|
|
|
|
int gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
|
|
u64 start, u64 end, u64 size,
|
|
u64 min_page_size,
|
|
struct list_head *blocks,
|
|
unsigned long flags);
|
|
|
|
int gpu_buddy_block_trim(struct gpu_buddy *mm,
|
|
u64 *start,
|
|
u64 new_size,
|
|
struct list_head *blocks);
|
|
|
|
void gpu_buddy_reset_clear(struct gpu_buddy *mm, bool is_clear);
|
|
|
|
void gpu_buddy_free_block(struct gpu_buddy *mm, struct gpu_buddy_block *block);
|
|
|
|
void gpu_buddy_free_list(struct gpu_buddy *mm,
|
|
struct list_head *objects,
|
|
unsigned int flags);
|
|
|
|
void gpu_buddy_print(struct gpu_buddy *mm);
|
|
void gpu_buddy_block_print(struct gpu_buddy *mm,
|
|
struct gpu_buddy_block *block);
|
|
#endif
|