mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2026-05-26 11:40:24 +02:00
drm/amdgpu/mes_v12_1: add mes self test
Add mes self test to ensure that mes user queue work. V2: add pasid on amdgpu_vm_init. V3: Squash in fix non-SPX modes (Mukul) Signed-off-by: Jack Xiao <Jack.Xiao@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
@@ -31,6 +31,8 @@
|
||||
#include "gc/gc_11_0_0_default.h"
|
||||
#include "v12_structs.h"
|
||||
#include "mes_v12_api_def.h"
|
||||
#include "gfx_v12_1_pkt.h"
|
||||
#include "sdma_v7_1_0_pkt_open.h"
|
||||
|
||||
MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes.bin");
|
||||
MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes1.bin");
|
||||
@@ -41,6 +43,7 @@ static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id);
|
||||
static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block);
|
||||
static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id);
|
||||
static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id);
|
||||
static int mes_v12_1_self_test(struct amdgpu_device *adev, int xcc_id);
|
||||
|
||||
#define MES_EOP_SIZE 2048
|
||||
|
||||
@@ -1949,10 +1952,31 @@ static int mes_v12_1_early_init(struct amdgpu_ip_block *ip_block)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mes_v12_1_late_init(struct amdgpu_ip_block *ip_block)
|
||||
{
|
||||
struct amdgpu_device *adev = ip_block->adev;
|
||||
int xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask);
|
||||
|
||||
/* TODO: remove it if issue fixed. */
|
||||
if (adev->mes.enable_coop_mode)
|
||||
return 0;
|
||||
|
||||
for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
|
||||
/* for COOP mode, only test master xcc. */
|
||||
if (adev->mes.enable_coop_mode &&
|
||||
adev->mes.master_xcc_ids[xcc_id] != xcc_id)
|
||||
continue;
|
||||
|
||||
mes_v12_1_self_test(adev, xcc_id);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct amd_ip_funcs mes_v12_1_ip_funcs = {
|
||||
.name = "mes_v12_1",
|
||||
.early_init = mes_v12_1_early_init,
|
||||
.late_init = NULL,
|
||||
.late_init = mes_v12_1_late_init,
|
||||
.sw_init = mes_v12_1_sw_init,
|
||||
.sw_fini = mes_v12_1_sw_fini,
|
||||
.hw_init = mes_v12_1_hw_init,
|
||||
@@ -1968,3 +1992,312 @@ const struct amdgpu_ip_block_version mes_v12_1_ip_block = {
|
||||
.rev = 0,
|
||||
.funcs = &mes_v12_1_ip_funcs,
|
||||
};
|
||||
|
||||
static int mes_v12_1_alloc_test_buf(struct amdgpu_device *adev,
|
||||
struct amdgpu_bo **bo, uint64_t *addr,
|
||||
void **ptr, int size)
|
||||
{
|
||||
amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
|
||||
bo, addr, ptr);
|
||||
if (!*bo) {
|
||||
dev_err(adev->dev, "failed to allocate test buffer bo\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
memset(*ptr, 0, size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mes_v12_1_map_test_bo(struct amdgpu_device *adev,
|
||||
struct amdgpu_bo *bo, struct amdgpu_vm *vm,
|
||||
struct amdgpu_bo_va **bo_va, u64 va, int size)
|
||||
{
|
||||
struct amdgpu_sync sync;
|
||||
int r;
|
||||
|
||||
r = amdgpu_map_static_csa(adev, vm, bo, bo_va, va, size);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
amdgpu_sync_create(&sync);
|
||||
|
||||
r = amdgpu_vm_bo_update(adev, *bo_va, false);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "failed to do vm_bo_update on meta data\n");
|
||||
goto error;
|
||||
}
|
||||
amdgpu_sync_fence(&sync, (*bo_va)->last_pt_update, GFP_KERNEL);
|
||||
|
||||
r = amdgpu_vm_update_pdes(adev, vm, false);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "failed to update pdes on meta data\n");
|
||||
goto error;
|
||||
}
|
||||
amdgpu_sync_fence(&sync, vm->last_update, GFP_KERNEL);
|
||||
amdgpu_sync_wait(&sync, false);
|
||||
|
||||
error:
|
||||
amdgpu_sync_free(&sync);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mes_v12_1_test_ring(struct amdgpu_device *adev, int xcc_id,
|
||||
u32 *queue_ptr, u64 fence_gpu_addr,
|
||||
void *fence_cpu_ptr, void *wptr_cpu_addr,
|
||||
u64 doorbell_idx, int queue_type)
|
||||
{
|
||||
volatile uint32_t *cpu_ptr = fence_cpu_ptr;
|
||||
int num_xcc = NUM_XCC(adev->gfx.xcc_mask);
|
||||
int sdma_ring_align = 0x10, compute_ring_align = 0x100;
|
||||
uint32_t tmp, xcc_offset;
|
||||
int r = 0, i, wptr = 0;
|
||||
|
||||
if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
|
||||
if (!adev->mes.enable_coop_mode) {
|
||||
WREG32_SOC15(GC, GET_INST(GC, xcc_id),
|
||||
regSCRATCH_REG0, 0xCAFEDEAD);
|
||||
} else {
|
||||
for (i = 0; i < num_xcc; i++) {
|
||||
if (adev->mes.master_xcc_ids[i] == xcc_id)
|
||||
WREG32_SOC15(GC, GET_INST(GC, i),
|
||||
regSCRATCH_REG0, 0xCAFEDEAD);
|
||||
}
|
||||
}
|
||||
|
||||
xcc_offset = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
|
||||
queue_ptr[wptr++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
|
||||
queue_ptr[wptr++] = xcc_offset - PACKET3_SET_UCONFIG_REG_START;
|
||||
queue_ptr[wptr++] = 0xDEADBEEF;
|
||||
|
||||
for (i = wptr; i < compute_ring_align; i++)
|
||||
queue_ptr[wptr++] = PACKET3(PACKET3_NOP, 0x3FFF);
|
||||
|
||||
} else if (queue_type == AMDGPU_RING_TYPE_SDMA) {
|
||||
*cpu_ptr = 0xCAFEDEAD;
|
||||
|
||||
queue_ptr[wptr++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
|
||||
SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
|
||||
queue_ptr[wptr++] = lower_32_bits(fence_gpu_addr);
|
||||
queue_ptr[wptr++] = upper_32_bits(fence_gpu_addr);
|
||||
queue_ptr[wptr++] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
|
||||
queue_ptr[wptr++] = 0xDEADBEEF;
|
||||
|
||||
for (i = wptr; i < sdma_ring_align; i++)
|
||||
queue_ptr[wptr++] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
|
||||
|
||||
wptr <<= 2;
|
||||
}
|
||||
|
||||
atomic64_set((atomic64_t *)wptr_cpu_addr, wptr);
|
||||
WDOORBELL64(doorbell_idx, wptr);
|
||||
|
||||
for (i = 0; i < adev->usec_timeout; i++) {
|
||||
if (queue_type == AMDGPU_RING_TYPE_SDMA) {
|
||||
tmp = le32_to_cpu(*cpu_ptr);
|
||||
} else {
|
||||
if (!adev->mes.enable_coop_mode) {
|
||||
tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
|
||||
regSCRATCH_REG0);
|
||||
} else {
|
||||
for (i = 0; i < num_xcc; i++) {
|
||||
if (xcc_id != adev->mes.master_xcc_ids[i])
|
||||
continue;
|
||||
|
||||
tmp = RREG32_SOC15(GC, GET_INST(GC, i),
|
||||
regSCRATCH_REG0);
|
||||
if (tmp != 0xDEADBEEF)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (tmp == 0xDEADBEEF)
|
||||
break;
|
||||
|
||||
if (amdgpu_emu_mode == 1)
|
||||
msleep(1);
|
||||
else
|
||||
udelay(1);
|
||||
}
|
||||
|
||||
if (i >= adev->usec_timeout) {
|
||||
dev_err(adev->dev, "xcc%d: mes self test (%s) failed\n", xcc_id,
|
||||
queue_type == AMDGPU_RING_TYPE_SDMA ? "sdma" : "compute");
|
||||
|
||||
while (halt_if_hws_hang)
|
||||
schedule();
|
||||
|
||||
r = -ETIMEDOUT;
|
||||
} else {
|
||||
dev_info(adev->dev, "xcc%d: mes self test (%s) pass\n", xcc_id,
|
||||
queue_type == AMDGPU_RING_TYPE_SDMA ? "sdma" : "compute");
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
#define USER_CTX_SIZE (PAGE_SIZE * 2)
|
||||
#define USER_CTX_VA AMDGPU_VA_RESERVED_BOTTOM
|
||||
#define RING_OFFSET(addr) ((addr))
|
||||
#define EOP_OFFSET(addr) ((addr) + PAGE_SIZE)
|
||||
#define WPTR_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64))
|
||||
#define RPTR_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64) * 2)
|
||||
#define FENCE_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64) * 3)
|
||||
|
||||
static int mes_v12_1_test_queue(struct amdgpu_device *adev, int xcc_id,
|
||||
int pasid, struct amdgpu_vm *vm, u64 meta_gpu_addr,
|
||||
u64 queue_gpu_addr, void *ctx_ptr, int queue_type)
|
||||
{
|
||||
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
|
||||
struct amdgpu_mqd *mqd_mgr = &adev->mqds[queue_type];
|
||||
struct amdgpu_mqd_prop mqd_prop = {0};
|
||||
struct mes_add_queue_input add_queue = {0};
|
||||
struct mes_remove_queue_input remove_queue = {0};
|
||||
struct amdgpu_bo *mqd_bo = NULL;
|
||||
int num_xcc = NUM_XCC(adev->gfx.xcc_mask);
|
||||
int i, r, off, mqd_size, mqd_count = 1;
|
||||
void *mqd_ptr = NULL;
|
||||
u64 mqd_gpu_addr, doorbell_idx;
|
||||
|
||||
/* extra one page size padding for mes fw */
|
||||
mqd_size = mqd_mgr->mqd_size + PAGE_SIZE;
|
||||
|
||||
if (queue_type == AMDGPU_RING_TYPE_SDMA) {
|
||||
doorbell_idx = adev->mes.db_start_dw_offset + \
|
||||
adev->doorbell_index.sdma_engine[0];
|
||||
} else {
|
||||
doorbell_idx = adev->mes.db_start_dw_offset + \
|
||||
adev->doorbell_index.userqueue_start;
|
||||
}
|
||||
|
||||
if (adev->mes.enable_coop_mode &&
|
||||
queue_type == AMDGPU_RING_TYPE_COMPUTE) {
|
||||
for (i = 0, mqd_count = 0; i < num_xcc; i++) {
|
||||
if (adev->mes.master_xcc_ids[i] == xcc_id)
|
||||
mqd_count++;
|
||||
}
|
||||
mqd_size *= mqd_count;
|
||||
}
|
||||
|
||||
r = mes_v12_1_alloc_test_buf(adev, &mqd_bo, &mqd_gpu_addr,
|
||||
&mqd_ptr, mqd_size * mqd_count);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
mqd_prop.mqd_gpu_addr = mqd_gpu_addr;
|
||||
mqd_prop.hqd_base_gpu_addr = RING_OFFSET(USER_CTX_VA);
|
||||
mqd_prop.eop_gpu_addr = EOP_OFFSET(USER_CTX_VA);
|
||||
mqd_prop.wptr_gpu_addr = WPTR_OFFSET(USER_CTX_VA);
|
||||
mqd_prop.rptr_gpu_addr = RPTR_OFFSET(USER_CTX_VA);
|
||||
mqd_prop.doorbell_index = doorbell_idx;
|
||||
mqd_prop.queue_size = PAGE_SIZE;
|
||||
mqd_prop.mqd_stride_size = mqd_size;
|
||||
mqd_prop.use_doorbell = true;
|
||||
mqd_prop.hqd_active = false;
|
||||
|
||||
mqd_mgr->init_mqd(adev, mqd_ptr, &mqd_prop);
|
||||
if (mqd_count > 1) {
|
||||
for (i = 1; i < mqd_count; i++) {
|
||||
off = mqd_size * i;
|
||||
mqd_prop.mqd_gpu_addr = mqd_gpu_addr + off;
|
||||
mqd_mgr->init_mqd(adev, (char *)mqd_ptr + off,
|
||||
&mqd_prop);
|
||||
}
|
||||
}
|
||||
|
||||
add_queue.xcc_id = xcc_id;
|
||||
add_queue.process_id = pasid;
|
||||
add_queue.page_table_base_addr = adev->vm_manager.vram_base_offset +
|
||||
amdgpu_bo_gpu_offset(vm->root.bo) - adev->gmc.vram_start;
|
||||
add_queue.process_va_start = 0;
|
||||
add_queue.process_va_end = adev->vm_manager.max_pfn - 1;
|
||||
add_queue.process_context_addr = meta_gpu_addr;
|
||||
add_queue.gang_context_addr = meta_gpu_addr + AMDGPU_MES_PROC_CTX_SIZE;
|
||||
add_queue.doorbell_offset = doorbell_idx;
|
||||
add_queue.mqd_addr = mqd_gpu_addr;
|
||||
add_queue.wptr_addr = mqd_prop.wptr_gpu_addr;
|
||||
add_queue.wptr_mc_addr = WPTR_OFFSET(queue_gpu_addr);
|
||||
add_queue.queue_type = queue_type;
|
||||
add_queue.vm_cntx_cntl = hub->vm_cntx_cntl;
|
||||
|
||||
r = mes_v12_1_add_hw_queue(&adev->mes, &add_queue);
|
||||
if (r)
|
||||
goto error;
|
||||
|
||||
mes_v12_1_test_ring(adev, xcc_id, (u32 *)RING_OFFSET((char *)ctx_ptr),
|
||||
FENCE_OFFSET(USER_CTX_VA),
|
||||
FENCE_OFFSET((char *)ctx_ptr),
|
||||
WPTR_OFFSET((char *)ctx_ptr),
|
||||
doorbell_idx, queue_type);
|
||||
|
||||
remove_queue.xcc_id = xcc_id;
|
||||
remove_queue.doorbell_offset = doorbell_idx;
|
||||
remove_queue.gang_context_addr = add_queue.gang_context_addr;
|
||||
r = mes_v12_1_remove_hw_queue(&adev->mes, &remove_queue);
|
||||
|
||||
error:
|
||||
amdgpu_bo_free_kernel(&mqd_bo, &mqd_gpu_addr, &mqd_ptr);
|
||||
return r;
|
||||
}
|
||||
|
||||
static int mes_v12_1_self_test(struct amdgpu_device *adev, int xcc_id)
|
||||
{
|
||||
int queue_types[] = { AMDGPU_RING_TYPE_COMPUTE,
|
||||
/* AMDGPU_RING_TYPE_SDMA */ };
|
||||
struct amdgpu_bo_va *bo_va = NULL;
|
||||
struct amdgpu_vm *vm = NULL;
|
||||
struct amdgpu_bo *meta_bo = NULL, *ctx_bo = NULL;
|
||||
void *meta_ptr = NULL, *ctx_ptr = NULL;
|
||||
u64 meta_gpu_addr, ctx_gpu_addr;
|
||||
int size, i, r, pasid;;
|
||||
|
||||
pasid = amdgpu_pasid_alloc(16);
|
||||
if (pasid < 0)
|
||||
pasid = 0;
|
||||
|
||||
size = AMDGPU_MES_PROC_CTX_SIZE + AMDGPU_MES_GANG_CTX_SIZE;
|
||||
r = mes_v12_1_alloc_test_buf(adev, &meta_bo, &meta_gpu_addr,
|
||||
&meta_ptr, size);
|
||||
if (r < 0)
|
||||
goto err2;
|
||||
|
||||
r = mes_v12_1_alloc_test_buf(adev, &ctx_bo, &ctx_gpu_addr,
|
||||
&ctx_ptr, USER_CTX_SIZE);
|
||||
if (r < 0)
|
||||
goto err2;
|
||||
|
||||
vm = kzalloc(sizeof(*vm), GFP_KERNEL);
|
||||
if (!vm) {
|
||||
r = -ENOMEM;
|
||||
goto err2;
|
||||
}
|
||||
|
||||
r = amdgpu_vm_init(adev, vm, -1, pasid);
|
||||
if (r)
|
||||
goto err1;
|
||||
|
||||
r = mes_v12_1_map_test_bo(adev, ctx_bo, vm, &bo_va,
|
||||
USER_CTX_VA, USER_CTX_SIZE);
|
||||
if (r)
|
||||
goto err0;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(queue_types); i++) {
|
||||
memset(ctx_ptr, 0, USER_CTX_SIZE);
|
||||
|
||||
r = mes_v12_1_test_queue(adev, xcc_id, pasid, vm, meta_gpu_addr,
|
||||
ctx_gpu_addr, ctx_ptr, queue_types[i]);
|
||||
if (r)
|
||||
break;
|
||||
}
|
||||
|
||||
amdgpu_unmap_static_csa(adev, vm, ctx_bo, bo_va, USER_CTX_VA);
|
||||
err0:
|
||||
amdgpu_vm_fini(adev, vm);
|
||||
err1:
|
||||
kfree(vm);
|
||||
err2:
|
||||
amdgpu_bo_free_kernel(&meta_bo, &meta_gpu_addr, &meta_ptr);
|
||||
amdgpu_bo_free_kernel(&ctx_bo, &ctx_gpu_addr, &ctx_ptr);
|
||||
amdgpu_pasid_free(pasid);
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user