Files
Srinivasan Shanmugam e1fb16cef0 drm/amdgpu/mes_v12_1: Fix iterator reuse in mes_v12_1_test_ring()
This code waits for the MES self-test to complete by repeatedly checking
a register or memory value until it becomes valid or a timeout occurs.
The fix ensures the timeout counter works correctly by not reusing the
same variable inside another loop.

mes_v12_1_test_ring() uses 'i' as the outer timeout loop counter, but
reuses the same variable for the inner XCC scan in cooperative mode.

This makes the timeout counter ambiguous and can lead to incorrect
timeout handling. It also triggers a Smatch warning about reusing the
outer loop iterator.

Fix this by introducing a separate iterator for the inner XCC loop so
that 'i' continues to represent only the timeout wait duration.

drivers/gpu/drm/amd/amdgpu/mes_v12_1.c:2080 mes_v12_1_test_ring()
warn: reusing outside iterator: 'i'

drivers/gpu/drm/amd/amdgpu/mes_v12_1.c
    2069         atomic64_set((atomic64_t *)wptr_cpu_addr, wptr);
    2070         WDOORBELL64(doorbell_idx, wptr);
    2071
    2072         for (i = 0; i < adev->usec_timeout; i++) {

i is counting usec

    2073                 if (queue_type == AMDGPU_RING_TYPE_SDMA) {
    2074                         tmp = le32_to_cpu(*cpu_ptr);
    2075                 } else {
    2076                         if (!adev->mes.enable_coop_mode) {
    2077                                 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
    2078                                                    regSCRATCH_REG0);
    2079                         } else {
--> 2080                                 for (i = 0; i < num_xcc; i++) {

and then re-used to count something else

Fixes: 44e5195fa3 ("drm/amdgpu/mes_v12_1: add mes self test")
Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Cc: Jack Xiao <Jack.Xiao@amd.com>
Cc: Hawking Zhang <Hawking.Zhang@amd.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Reviewed-by: Jack Xiao <Jack.Xiao@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
2026-04-17 14:53:10 -04:00

2283 lines
69 KiB
C

/*
* Copyright 2025 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <linux/firmware.h>
#include <linux/module.h>
#include "amdgpu.h"
#include "soc15_common.h"
#include "soc_v1_0.h"
#include "gc/gc_12_1_0_offset.h"
#include "gc/gc_12_1_0_sh_mask.h"
#include "gc/gc_11_0_0_default.h"
#include "v12_structs.h"
#include "mes_v12_api_def.h"
#include "gfx_v12_1_pkt.h"
#include "sdma_v7_1_0_pkt_open.h"
MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes.bin");
MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes1.bin");
MODULE_FIRMWARE("amdgpu/gc_12_1_0_uni_mes.bin");
static int mes_v12_1_hw_init(struct amdgpu_ip_block *ip_block);
static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id);
static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block);
static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id);
static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id);
static int mes_v12_1_self_test(struct amdgpu_device *adev, int xcc_id);
#define MES_EOP_SIZE 2048
#define regCP_HQD_IB_CONTROL_MES_12_1_DEFAULT 0x100000
#define XCC_MID_MASK 0x41000000
static void mes_v12_1_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell) {
atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
ring->wptr);
WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
BUG();
}
}
static u64 mes_v12_1_ring_get_rptr(struct amdgpu_ring *ring)
{
return *ring->rptr_cpu_addr;
}
static u64 mes_v12_1_ring_get_wptr(struct amdgpu_ring *ring)
{
u64 wptr;
if (ring->use_doorbell)
wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
else
BUG();
return wptr;
}
static const struct amdgpu_ring_funcs mes_v12_1_ring_funcs = {
.type = AMDGPU_RING_TYPE_MES,
.align_mask = 1,
.nop = 0,
.support_64bit_ptrs = true,
.get_rptr = mes_v12_1_ring_get_rptr,
.get_wptr = mes_v12_1_ring_get_wptr,
.set_wptr = mes_v12_1_ring_set_wptr,
.insert_nop = amdgpu_ring_insert_nop,
};
static const char *mes_v12_1_opcodes[] = {
"SET_HW_RSRC",
"SET_SCHEDULING_CONFIG",
"ADD_QUEUE",
"REMOVE_QUEUE",
"PERFORM_YIELD",
"SET_GANG_PRIORITY_LEVEL",
"SUSPEND",
"RESUME",
"RESET",
"SET_LOG_BUFFER",
"CHANGE_GANG_PRORITY",
"QUERY_SCHEDULER_STATUS",
"unused",
"SET_DEBUG_VMID",
"MISC",
"UPDATE_ROOT_PAGE_TABLE",
"AMD_LOG",
"SET_SE_MODE",
"SET_GANG_SUBMIT",
"SET_HW_RSRC_1",
"INVALIDATE_TLBS",
};
static const char *mes_v12_1_misc_opcodes[] = {
"WRITE_REG",
"INV_GART",
"QUERY_STATUS",
"READ_REG",
"WAIT_REG_MEM",
"SET_SHADER_DEBUGGER",
"NOTIFY_WORK_ON_UNMAPPED_QUEUE",
"NOTIFY_TO_UNMAP_PROCESSES",
};
static const char *mes_v12_1_get_op_string(union MESAPI__MISC *x_pkt)
{
const char *op_str = NULL;
if (x_pkt->header.opcode < ARRAY_SIZE(mes_v12_1_opcodes))
op_str = mes_v12_1_opcodes[x_pkt->header.opcode];
return op_str;
}
static const char *mes_v12_1_get_misc_op_string(union MESAPI__MISC *x_pkt)
{
const char *op_str = NULL;
if ((x_pkt->header.opcode == MES_SCH_API_MISC) &&
(x_pkt->opcode < ARRAY_SIZE(mes_v12_1_misc_opcodes)))
op_str = mes_v12_1_misc_opcodes[x_pkt->opcode];
return op_str;
}
static int mes_v12_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
int xcc_id, int pipe, void *pkt,
int size, int api_status_off)
{
union MESAPI__QUERY_MES_STATUS mes_status_pkt;
signed long timeout = 2100000; /* 2100 ms */
struct amdgpu_device *adev = mes->adev;
struct amdgpu_ring *ring = &mes->ring[MES_PIPE_INST(xcc_id, pipe)];
spinlock_t *ring_lock = &mes->ring_lock[MES_PIPE_INST(xcc_id, pipe)];
struct MES_API_STATUS *api_status;
union MESAPI__MISC *x_pkt = pkt;
const char *op_str, *misc_op_str;
unsigned long flags;
u64 status_gpu_addr;
u32 seq, status_offset;
u64 *status_ptr;
signed long r;
int ret;
if (x_pkt->header.opcode >= MES_SCH_API_MAX)
return -EINVAL;
if (amdgpu_emu_mode) {
timeout *= 1000;
} else if (amdgpu_sriov_vf(adev)) {
/* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */
timeout = 15 * 600 * 1000;
}
ret = amdgpu_device_wb_get(adev, &status_offset);
if (ret)
return ret;
status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4);
status_ptr = (u64 *)&adev->wb.wb[status_offset];
*status_ptr = 0;
spin_lock_irqsave(ring_lock, flags);
r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4);
if (r)
goto error_unlock_free;
seq = ++ring->fence_drv.sync_seq;
r = amdgpu_fence_wait_polling(ring,
seq - ring->fence_drv.num_fences_mask,
timeout);
if (r < 1)
goto error_undo;
api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
api_status->api_completion_fence_addr = status_gpu_addr;
api_status->api_completion_fence_value = 1;
amdgpu_ring_write_multiple(ring, pkt, size / 4);
memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_status_pkt.api_status.api_completion_fence_addr =
ring->fence_drv.gpu_addr;
mes_status_pkt.api_status.api_completion_fence_value = seq;
amdgpu_ring_write_multiple(ring, &mes_status_pkt,
sizeof(mes_status_pkt) / 4);
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(ring_lock, flags);
op_str = mes_v12_1_get_op_string(x_pkt);
misc_op_str = mes_v12_1_get_misc_op_string(x_pkt);
if (misc_op_str)
dev_dbg(adev->dev, "MES(%d, %d) msg=%s (%s) was emitted\n",
xcc_id, pipe, op_str, misc_op_str);
else if (op_str)
dev_dbg(adev->dev, "MES(%d, %d) msg=%s was emitted\n",
xcc_id, pipe, op_str);
else
dev_dbg(adev->dev, "MES(%d, %d) msg=%d was emitted\n",
xcc_id, pipe, x_pkt->header.opcode);
r = amdgpu_fence_wait_polling(ring, seq, timeout);
if (r < 1 || !*status_ptr) {
if (misc_op_str)
dev_err(adev->dev,
"MES(%d, %d) failed to respond to msg=%s (%s)\n",
xcc_id, pipe, op_str, misc_op_str);
else if (op_str)
dev_err(adev->dev,
"MES(%d, %d) failed to respond to msg=%s\n",
xcc_id, pipe, op_str);
else
dev_err(adev->dev,
"MES(%d, %d) failed to respond to msg=%d\n",
xcc_id, pipe, x_pkt->header.opcode);
while (halt_if_hws_hang)
schedule();
r = -ETIMEDOUT;
goto error_wb_free;
}
amdgpu_device_wb_free(adev, status_offset);
return 0;
error_undo:
dev_err(adev->dev, "MES(%d, %d) ring buffer is full.\n", xcc_id, pipe);
amdgpu_ring_undo(ring);
error_unlock_free:
spin_unlock_irqrestore(ring_lock, flags);
error_wb_free:
amdgpu_device_wb_free(adev, status_offset);
return r;
}
static int convert_to_mes_queue_type(int queue_type)
{
if (queue_type == AMDGPU_RING_TYPE_GFX)
return MES_QUEUE_TYPE_GFX;
else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
return MES_QUEUE_TYPE_COMPUTE;
else if (queue_type == AMDGPU_RING_TYPE_SDMA)
return MES_QUEUE_TYPE_SDMA;
else if (queue_type == AMDGPU_RING_TYPE_MES)
return MES_QUEUE_TYPE_SCHQ;
else
BUG();
return -1;
}
static int mes_v12_1_add_hw_queue(struct amdgpu_mes *mes,
struct mes_add_queue_input *input)
{
union MESAPI__ADD_QUEUE mes_add_queue_pkt;
int xcc_id = input->xcc_id;
int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE);
if (mes->enable_coop_mode)
xcc_id = mes->master_xcc_ids[inst];
memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_add_queue_pkt.process_id = input->process_id;
mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr;
mes_add_queue_pkt.process_va_start = input->process_va_start;
mes_add_queue_pkt.process_va_end = input->process_va_end;
mes_add_queue_pkt.process_quantum = input->process_quantum;
mes_add_queue_pkt.process_context_addr = input->process_context_addr;
mes_add_queue_pkt.gang_quantum = input->gang_quantum;
mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
mes_add_queue_pkt.inprocess_gang_priority =
input->inprocess_gang_priority;
mes_add_queue_pkt.gang_global_priority_level =
input->gang_global_priority_level;
mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
mes_add_queue_pkt.mqd_addr = input->mqd_addr;
mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr;
mes_add_queue_pkt.queue_type =
convert_to_mes_queue_type(input->queue_type);
mes_add_queue_pkt.paging = input->paging;
mes_add_queue_pkt.vm_context_cntl = input->vm_cntx_cntl;
mes_add_queue_pkt.gws_base = input->gws_base;
mes_add_queue_pkt.gws_size = input->gws_size;
mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
mes_add_queue_pkt.tma_addr = input->tma_addr;
mes_add_queue_pkt.trap_en = input->trap_en;
mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear;
mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
/* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
mes_add_queue_pkt.gds_size = input->queue_size;
/* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
mes_add_queue_pkt.gds_size = input->queue_size;
mes_add_queue_pkt.full_sh_mem_config_data = input->sh_mem_config_data;
return mes_v12_1_submit_pkt_and_poll_completion(mes,
xcc_id, AMDGPU_MES_SCHED_PIPE,
&mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
offsetof(union MESAPI__ADD_QUEUE, api_status));
}
static int mes_v12_1_remove_hw_queue(struct amdgpu_mes *mes,
struct mes_remove_queue_input *input)
{
union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
int xcc_id = input->xcc_id;
int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE);
if (mes->enable_coop_mode)
xcc_id = mes->master_xcc_ids[inst];
memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
return mes_v12_1_submit_pkt_and_poll_completion(mes,
xcc_id, AMDGPU_MES_SCHED_PIPE,
&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
offsetof(union MESAPI__REMOVE_QUEUE, api_status));
}
static int mes_v12_1_reset_hw_queue(struct amdgpu_mes *mes,
struct mes_reset_queue_input *input)
{
union MESAPI__RESET mes_reset_queue_pkt;
int pipe;
memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
/* mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr; */
/*mes_reset_queue_pkt.reset_queue_only = 1;*/
if (mes->adev->enable_uni_mes)
pipe = AMDGPU_MES_KIQ_PIPE;
else
pipe = AMDGPU_MES_SCHED_PIPE;
return mes_v12_1_submit_pkt_and_poll_completion(mes,
input->xcc_id, pipe,
&mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
offsetof(union MESAPI__REMOVE_QUEUE, api_status));
}
static int mes_v12_1_map_legacy_queue(struct amdgpu_mes *mes,
struct mes_map_legacy_queue_input *input)
{
union MESAPI__ADD_QUEUE mes_add_queue_pkt;
int pipe;
memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_add_queue_pkt.pipe_id = input->pipe_id;
mes_add_queue_pkt.queue_id = input->queue_id;
mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
mes_add_queue_pkt.mqd_addr = input->mqd_addr;
mes_add_queue_pkt.wptr_addr = input->wptr_addr;
mes_add_queue_pkt.queue_type =
convert_to_mes_queue_type(input->queue_type);
mes_add_queue_pkt.map_legacy_kq = 1;
if (mes->adev->enable_uni_mes)
pipe = AMDGPU_MES_KIQ_PIPE;
else
pipe = AMDGPU_MES_SCHED_PIPE;
return mes_v12_1_submit_pkt_and_poll_completion(mes,
input->xcc_id, pipe,
&mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
offsetof(union MESAPI__ADD_QUEUE, api_status));
}
static int mes_v12_1_unmap_legacy_queue(struct amdgpu_mes *mes,
struct mes_unmap_legacy_queue_input *input)
{
union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
int pipe;
memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
mes_remove_queue_pkt.gang_context_addr = 0;
mes_remove_queue_pkt.pipe_id = input->pipe_id;
mes_remove_queue_pkt.queue_id = input->queue_id;
if (input->action == PREEMPT_QUEUES_NO_UNMAP) {
mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1;
mes_remove_queue_pkt.tf_addr = input->trail_fence_addr;
mes_remove_queue_pkt.tf_data =
lower_32_bits(input->trail_fence_data);
} else {
mes_remove_queue_pkt.unmap_legacy_queue = 1;
mes_remove_queue_pkt.queue_type =
convert_to_mes_queue_type(input->queue_type);
}
if (mes->adev->enable_uni_mes)
pipe = AMDGPU_MES_KIQ_PIPE;
else
pipe = AMDGPU_MES_SCHED_PIPE;
return mes_v12_1_submit_pkt_and_poll_completion(mes,
input->xcc_id, pipe,
&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
offsetof(union MESAPI__REMOVE_QUEUE, api_status));
}
static int mes_v12_1_suspend_gang(struct amdgpu_mes *mes,
struct mes_suspend_gang_input *input)
{
return 0;
}
static int mes_v12_1_resume_gang(struct amdgpu_mes *mes,
struct mes_resume_gang_input *input)
{
return 0;
}
static int mes_v12_1_query_sched_status(struct amdgpu_mes *mes,
int pipe, int xcc_id)
{
union MESAPI__QUERY_MES_STATUS mes_status_pkt;
memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe,
&mes_status_pkt, sizeof(mes_status_pkt),
offsetof(union MESAPI__QUERY_MES_STATUS, api_status));
}
static uint32_t mes_v12_1_get_xcc_from_reg(uint32_t reg_offset)
{
return ((reg_offset >> 16) & 0x7);
}
static void mes_v12_1_get_rrmt(uint32_t reg, uint32_t xcc_id,
struct RRMT_OPTION *rrmt_opt,
uint32_t *out_reg)
{
uint32_t normalized_reg = soc_v1_0_normalize_xcc_reg_offset(reg);
if (soc_v1_0_normalize_xcc_reg_range(normalized_reg)) {
rrmt_opt->xcd_die_id = mes_v12_1_get_xcc_from_reg(reg);
rrmt_opt->mode = (xcc_id == rrmt_opt->xcd_die_id) ?
MES_RRMT_MODE_LOCAL_XCD : MES_RRMT_MODE_REMOTE_XCD;
} else {
rrmt_opt->mode = MES_RRMT_MODE_REMOTE_MID;
if (soc_v1_0_mid1_reg_range(reg))
rrmt_opt->mid_die_id = 1;
}
*out_reg = soc_v1_0_normalize_reg_offset(reg);
}
static int mes_v12_1_misc_op(struct amdgpu_mes *mes,
struct mes_misc_op_input *input)
{
struct amdgpu_device *adev = mes->adev;
union MESAPI__MISC misc_pkt;
int pipe;
if (mes->adev->enable_uni_mes)
pipe = AMDGPU_MES_KIQ_PIPE;
else
pipe = AMDGPU_MES_SCHED_PIPE;
memset(&misc_pkt, 0, sizeof(misc_pkt));
misc_pkt.header.type = MES_API_TYPE_SCHEDULER;
misc_pkt.header.opcode = MES_SCH_API_MISC;
misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
switch (input->op) {
case MES_MISC_OP_READ_REG:
misc_pkt.opcode = MESAPI_MISC__READ_REG;
misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr;
mes_v12_1_get_rrmt(input->read_reg.reg_offset,
GET_INST(GC, input->xcc_id),
&misc_pkt.read_reg.rrmt_opt,
&misc_pkt.read_reg.reg_offset);
break;
case MES_MISC_OP_WRITE_REG:
misc_pkt.opcode = MESAPI_MISC__WRITE_REG;
misc_pkt.write_reg.reg_value = input->write_reg.reg_value;
mes_v12_1_get_rrmt(input->write_reg.reg_offset,
GET_INST(GC, input->xcc_id),
&misc_pkt.write_reg.rrmt_opt,
&misc_pkt.write_reg.reg_offset);
break;
case MES_MISC_OP_WRM_REG_WAIT:
misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM;
misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
misc_pkt.wait_reg_mem.reg_offset2 = 0;
mes_v12_1_get_rrmt(input->wrm_reg.reg0,
GET_INST(GC, input->xcc_id),
&misc_pkt.wait_reg_mem.rrmt_opt1,
&misc_pkt.wait_reg_mem.reg_offset1);
break;
case MES_MISC_OP_WRM_REG_WR_WAIT:
misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG;
misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
mes_v12_1_get_rrmt(input->wrm_reg.reg0,
GET_INST(GC, input->xcc_id),
&misc_pkt.wait_reg_mem.rrmt_opt1,
&misc_pkt.wait_reg_mem.reg_offset1);
mes_v12_1_get_rrmt(input->wrm_reg.reg1,
GET_INST(GC, input->xcc_id),
&misc_pkt.wait_reg_mem.rrmt_opt2,
&misc_pkt.wait_reg_mem.reg_offset2);
break;
case MES_MISC_OP_SET_SHADER_DEBUGGER:
pipe = AMDGPU_MES_SCHED_PIPE;
misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER;
misc_pkt.set_shader_debugger.process_context_addr =
input->set_shader_debugger.process_context_addr;
misc_pkt.set_shader_debugger.flags.u32all =
input->set_shader_debugger.flags.u32all;
misc_pkt.set_shader_debugger.spi_gdbg_per_vmid_cntl =
input->set_shader_debugger.spi_gdbg_per_vmid_cntl;
memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl,
input->set_shader_debugger.tcp_watch_cntl,
sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl));
misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en;
break;
case MES_MISC_OP_CHANGE_CONFIG:
misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG;
misc_pkt.change_config.opcode =
MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS;
misc_pkt.change_config.option.bits.limit_single_process =
input->change_config.option.limit_single_process;
break;
default:
DRM_ERROR("unsupported misc op (%d) \n", input->op);
return -EINVAL;
}
return mes_v12_1_submit_pkt_and_poll_completion(mes,
input->xcc_id, pipe,
&misc_pkt, sizeof(misc_pkt),
offsetof(union MESAPI__MISC, api_status));
}
static int mes_v12_1_set_hw_resources_1(struct amdgpu_mes *mes,
int pipe, int xcc_id)
{
union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt;
int master_xcc_id, inst = MES_PIPE_INST(xcc_id, pipe);
memset(&mes_set_hw_res_1_pkt, 0, sizeof(mes_set_hw_res_1_pkt));
mes_set_hw_res_1_pkt.header.type = MES_API_TYPE_SCHEDULER;
mes_set_hw_res_1_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1;
mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 100;
if (mes->enable_coop_mode && pipe == AMDGPU_MES_SCHED_PIPE) {
master_xcc_id = mes->master_xcc_ids[inst];
mes_set_hw_res_1_pkt.mes_coop_mode = 1;
mes_set_hw_res_1_pkt.coop_sch_shared_mc_addr =
mes->shared_cmd_buf_gpu_addr[master_xcc_id];
}
return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe,
&mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt),
offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status));
}
static void mes_v12_1_set_gfx_hqd_mask(union MESAPI_SET_HW_RESOURCES *pkt)
{
/*
* GFX V12 has only one GFX pipe, but 8 queues in it.
* GFX pipe 0 queue 0 is being used by Kernel queue.
* Set GFX pipe 0 queue 1-7 for MES scheduling
* mask = 1111 1110b
*/
pkt->gfx_hqd_mask[0] = 0xFE;
}
static int mes_v12_1_set_hw_resources(struct amdgpu_mes *mes,
int pipe, int xcc_id)
{
int i;
struct amdgpu_device *adev = mes->adev;
union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt;
memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
if (pipe == AMDGPU_MES_SCHED_PIPE) {
mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
mes_set_hw_res_pkt.paging_vmid = 0;
for (i = 0; i < MAX_COMPUTE_PIPES; i++)
mes_set_hw_res_pkt.compute_hqd_mask[i] =
mes->compute_hqd_mask[i];
mes_v12_1_set_gfx_hqd_mask(&mes_set_hw_res_pkt);
for (i = 0; i < MAX_SDMA_PIPES; i++)
mes_set_hw_res_pkt.sdma_hqd_mask[i] =
mes->sdma_hqd_mask[i];
for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
mes_set_hw_res_pkt.aggregated_doorbells[i] =
mes->aggregated_doorbells[i];
}
mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr =
mes->sch_ctx_gpu_addr[pipe];
mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
mes->query_status_fence_gpu_addr[pipe];
for (i = 0; i < 5; i++) {
mes_set_hw_res_pkt.gc_base[i] =
adev->reg_offset[GC_HWIP][0][i];
mes_set_hw_res_pkt.mmhub_base[i] =
adev->reg_offset[MMHUB_HWIP][0][i];
mes_set_hw_res_pkt.osssys_base[i] =
adev->reg_offset[OSSSYS_HWIP][0][i];
}
mes_set_hw_res_pkt.disable_reset = 1;
mes_set_hw_res_pkt.disable_mes_log = 1;
mes_set_hw_res_pkt.use_different_vmid_compute = 1;
mes_set_hw_res_pkt.enable_reg_active_poll = 1;
mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
/*
* Keep oversubscribe timer for sdma . When we have unmapped doorbell
* handling support, other queue will not use the oversubscribe timer.
* handling mode - 0: disabled; 1: basic version; 2: basic+ version
*/
mes_set_hw_res_pkt.oversubscription_timer = 50;
mes_set_hw_res_pkt.unmapped_doorbell_handling = 1;
if (amdgpu_mes_log_enable) {
mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr =
mes->event_log_gpu_addr + MES_PIPE_INST(xcc_id, pipe) * AMDGPU_MES_LOG_BUFFER_SIZE;
}
if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
mes_set_hw_res_pkt.limit_single_process = 1;
return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe,
&mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
}
static void mes_v12_1_init_aggregated_doorbell(struct amdgpu_mes *mes,
int xcc_id)
{
struct amdgpu_device *adev = mes->adev;
uint32_t data;
data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL1);
data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK |
CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK |
CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK);
data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] <<
CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT;
data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT;
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL1, data);
data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL2);
data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK |
CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK |
CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK);
data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] <<
CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT;
data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT;
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL2, data);
data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL3);
data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK |
CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK |
CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK);
data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] <<
CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT;
data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT;
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL3, data);
data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL4);
data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK |
CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK |
CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK);
data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] <<
CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT;
data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT;
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL4, data);
data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL5);
data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK |
CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK |
CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK);
data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] <<
CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT;
data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT;
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL5, data);
data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT;
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_GFX_CONTROL, data);
}
static void mes_v12_1_enable_unmapped_doorbell_handling(
struct amdgpu_mes *mes, bool enable, int xcc_id)
{
struct amdgpu_device *adev = mes->adev;
uint32_t data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_UNMAPPED_DOORBELL);
/*
* The default PROC_LSB settng is 0xc which means doorbell
* addr[16:12] gives the doorbell page number. For kfd, each
* process will use 2 pages of doorbell, we need to change the
* setting to 0xd
*/
data &= ~CP_UNMAPPED_DOORBELL__PROC_LSB_MASK;
data |= 0xd << CP_UNMAPPED_DOORBELL__PROC_LSB__SHIFT;
data |= (enable ? 1 : 0) << CP_UNMAPPED_DOORBELL__ENABLE__SHIFT;
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_UNMAPPED_DOORBELL, data);
}
#if 0
static int mes_v12_1_reset_legacy_queue(struct amdgpu_mes *mes,
struct mes_reset_legacy_queue_input *input)
{
union MESAPI__RESET mes_reset_queue_pkt;
int pipe;
memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_reset_queue_pkt.queue_type =
convert_to_mes_queue_type(input->queue_type);
if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) {
mes_reset_queue_pkt.reset_legacy_gfx = 1;
mes_reset_queue_pkt.pipe_id_lp = input->pipe_id;
mes_reset_queue_pkt.queue_id_lp = input->queue_id;
mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr;
mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset;
mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr;
mes_reset_queue_pkt.vmid_id_lp = input->vmid;
} else {
mes_reset_queue_pkt.reset_queue_only = 1;
mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
}
if (mes->adev->enable_uni_mes)
pipe = AMDGPU_MES_KIQ_PIPE;
else
pipe = AMDGPU_MES_SCHED_PIPE;
return mes_v12_1_submit_pkt_and_poll_completion(mes,
input->xcc_id, pipe,
&mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
offsetof(union MESAPI__RESET, api_status));
}
#endif
static int mes_v12_inv_tlb_convert_hub_id(uint8_t id)
{
/*
* MES doesn't support invalidate gc_hub on slave xcc individually
* master xcc will invalidate all gc_hub for the partition
*/
if (AMDGPU_IS_GFXHUB(id))
return 0;
else if (AMDGPU_IS_MMHUB0(id))
return 1;
else if (AMDGPU_IS_MMHUB1(id))
return 2;
return -EINVAL;
}
static int mes_v12_1_inv_tlbs_pasid(struct amdgpu_mes *mes,
struct mes_inv_tlbs_pasid_input *input)
{
union MESAPI__INV_TLBS mes_inv_tlbs;
int xcc_id = input->xcc_id;
int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE);
int ret;
if (mes->enable_coop_mode)
xcc_id = mes->master_xcc_ids[inst];
memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs));
mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER;
mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS;
mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_inv_tlbs.invalidate_tlbs.inv_sel = 0;
mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type;
mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid;
/*convert amdgpu_mes_hub_id to mes expected hub_id */
ret = mes_v12_inv_tlb_convert_hub_id(input->hub_id);
if (ret < 0)
return -EINVAL;
mes_inv_tlbs.invalidate_tlbs.hub_id = ret;
return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, AMDGPU_MES_KIQ_PIPE,
&mes_inv_tlbs, sizeof(mes_inv_tlbs),
offsetof(union MESAPI__INV_TLBS, api_status));
}
static const struct amdgpu_mes_funcs mes_v12_1_funcs = {
.add_hw_queue = mes_v12_1_add_hw_queue,
.remove_hw_queue = mes_v12_1_remove_hw_queue,
.map_legacy_queue = mes_v12_1_map_legacy_queue,
.unmap_legacy_queue = mes_v12_1_unmap_legacy_queue,
.suspend_gang = mes_v12_1_suspend_gang,
.resume_gang = mes_v12_1_resume_gang,
.misc_op = mes_v12_1_misc_op,
.reset_hw_queue = mes_v12_1_reset_hw_queue,
.invalidate_tlbs_pasid = mes_v12_1_inv_tlbs_pasid,
};
static int mes_v12_1_allocate_ucode_buffer(struct amdgpu_device *adev,
enum amdgpu_mes_pipe pipe,
int xcc_id)
{
int r, inst = MES_PIPE_INST(xcc_id, pipe);
const struct mes_firmware_header_v1_0 *mes_hdr;
const __le32 *fw_data;
unsigned fw_size;
mes_hdr = (const struct mes_firmware_header_v1_0 *)
adev->mes.fw[pipe]->data;
fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
r = amdgpu_bo_create_reserved(adev, fw_size,
PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->mes.ucode_fw_obj[inst],
&adev->mes.ucode_fw_gpu_addr[inst],
(void **)&adev->mes.ucode_fw_ptr[inst]);
if (r) {
dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r);
return r;
}
memcpy(adev->mes.ucode_fw_ptr[inst], fw_data, fw_size);
amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[inst]);
amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[inst]);
return 0;
}
static int mes_v12_1_allocate_ucode_data_buffer(struct amdgpu_device *adev,
enum amdgpu_mes_pipe pipe,
int xcc_id)
{
int r, inst = MES_PIPE_INST(xcc_id, pipe);
const struct mes_firmware_header_v1_0 *mes_hdr;
const __le32 *fw_data;
unsigned fw_size;
mes_hdr = (const struct mes_firmware_header_v1_0 *)
adev->mes.fw[pipe]->data;
fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
r = amdgpu_bo_create_reserved(adev, fw_size,
64 * 1024,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->mes.data_fw_obj[inst],
&adev->mes.data_fw_gpu_addr[inst],
(void **)&adev->mes.data_fw_ptr[inst]);
if (r) {
dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r);
return r;
}
memcpy(adev->mes.data_fw_ptr[inst], fw_data, fw_size);
amdgpu_bo_kunmap(adev->mes.data_fw_obj[inst]);
amdgpu_bo_unreserve(adev->mes.data_fw_obj[inst]);
return 0;
}
static void mes_v12_1_free_ucode_buffers(struct amdgpu_device *adev,
enum amdgpu_mes_pipe pipe,
int xcc_id)
{
int inst = MES_PIPE_INST(xcc_id, pipe);
amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[inst],
&adev->mes.data_fw_gpu_addr[inst],
(void **)&adev->mes.data_fw_ptr[inst]);
amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[inst],
&adev->mes.ucode_fw_gpu_addr[inst],
(void **)&adev->mes.ucode_fw_ptr[inst]);
}
static void mes_v12_1_enable(struct amdgpu_device *adev,
bool enable, int xcc_id)
{
uint64_t ucode_addr;
uint32_t pipe, data = 0;
if (enable) {
data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL);
data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1);
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data);
mutex_lock(&adev->srbm_mutex);
for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
soc_v1_0_grbm_select(adev, 3, pipe, 0, 0,
GET_INST(GC, xcc_id));
ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
WREG32_SOC15(GC, GET_INST(GC, xcc_id),
regCP_MES_PRGRM_CNTR_START,
lower_32_bits(ucode_addr));
WREG32_SOC15(GC, GET_INST(GC, xcc_id),
regCP_MES_PRGRM_CNTR_START_HI,
upper_32_bits(ucode_addr));
}
soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
mutex_unlock(&adev->srbm_mutex);
/* unhalt MES and activate pipe0 */
data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1);
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data);
if (amdgpu_emu_mode)
msleep(500);
else if (adev->enable_uni_mes)
udelay(500);
else
udelay(50);
} else {
data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL);
data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0);
data = REG_SET_FIELD(data, CP_MES_CNTL,
MES_INVALIDATE_ICACHE, 1);
data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1);
data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data);
}
}
static void mes_v12_1_set_ucode_start_addr(struct amdgpu_device *adev,
int xcc_id)
{
uint64_t ucode_addr;
int pipe;
mes_v12_1_enable(adev, false, xcc_id);
mutex_lock(&adev->srbm_mutex);
for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
/* me=3, queue=0 */
soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id));
/* set ucode start address */
ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_PRGRM_CNTR_START,
lower_32_bits(ucode_addr));
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_PRGRM_CNTR_START_HI,
upper_32_bits(ucode_addr));
soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
}
mutex_unlock(&adev->srbm_mutex);
}
/* This function is for backdoor MES firmware */
static int mes_v12_1_load_microcode(struct amdgpu_device *adev,
enum amdgpu_mes_pipe pipe,
bool prime_icache, int xcc_id)
{
int r, inst = MES_PIPE_INST(xcc_id, pipe);
uint32_t data;
mes_v12_1_enable(adev, false, xcc_id);
if (!adev->mes.fw[pipe])
return -EINVAL;
r = mes_v12_1_allocate_ucode_buffer(adev, pipe, xcc_id);
if (r)
return r;
r = mes_v12_1_allocate_ucode_data_buffer(adev, pipe, xcc_id);
if (r) {
mes_v12_1_free_ucode_buffers(adev, pipe, xcc_id);
return r;
}
mutex_lock(&adev->srbm_mutex);
/* me=3, pipe=0, queue=0 */
soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id));
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_CNTL, 0);
/* set ucode fimrware address */
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_LO,
lower_32_bits(adev->mes.ucode_fw_gpu_addr[inst]));
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_HI,
upper_32_bits(adev->mes.ucode_fw_gpu_addr[inst]));
/* set ucode instruction cache boundary to 2M-1 */
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MIBOUND_LO, 0x1FFFFF);
/* set ucode data firmware address */
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBASE_LO,
lower_32_bits(adev->mes.data_fw_gpu_addr[inst]));
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBASE_HI,
upper_32_bits(adev->mes.data_fw_gpu_addr[inst]));
/* Set data cache boundary CP_MES_MDBOUND_LO */
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBOUND_LO, 0x7FFFF);
if (prime_icache) {
/* invalidate ICACHE */
data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL);
data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL, data);
/* prime the ICACHE. */
data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL);
data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL, data);
}
soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
mutex_unlock(&adev->srbm_mutex);
return 0;
}
static int mes_v12_1_allocate_eop_buf(struct amdgpu_device *adev,
enum amdgpu_mes_pipe pipe,
int xcc_id)
{
int r, inst = MES_PIPE_INST(xcc_id, pipe);
u32 *eop;
r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
&adev->mes.eop_gpu_obj[inst],
&adev->mes.eop_gpu_addr[inst],
(void **)&eop);
if (r) {
dev_warn(adev->dev, "(%d) create EOP bo failed\n", r);
return r;
}
memset(eop, 0,
adev->mes.eop_gpu_obj[inst]->tbo.base.size);
amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[inst]);
amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[inst]);
return 0;
}
static int mes_v12_1_allocate_shared_cmd_buf(struct amdgpu_device *adev,
enum amdgpu_mes_pipe pipe,
int xcc_id)
{
int r, inst = MES_PIPE_INST(xcc_id, pipe);
if (pipe == AMDGPU_MES_KIQ_PIPE)
return 0;
r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->mes.shared_cmd_buf_obj[inst],
&adev->mes.shared_cmd_buf_gpu_addr[inst],
NULL);
if (r) {
dev_err(adev->dev,
"(%d) failed to create shared cmd buf bo\n", r);
return r;
}
return 0;
}
static int mes_v12_1_mqd_init(struct amdgpu_ring *ring)
{
struct v12_1_mes_mqd *mqd = ring->mqd_ptr;
uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
uint32_t tmp;
mqd->header = 0xC0310800;
mqd->compute_pipelinestat_enable = 0x00000001;
mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
mqd->compute_misc_reserved = 0x00000007;
eop_base_addr = ring->eop_gpu_addr >> 8;
/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
(order_base_2(MES_EOP_SIZE / 4) - 1));
mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr);
mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
mqd->cp_hqd_eop_control = tmp;
/* disable the queue if it's active */
ring->wptr = 0;
mqd->cp_hqd_pq_rptr = 0;
mqd->cp_hqd_pq_wptr_lo = 0;
mqd->cp_hqd_pq_wptr_hi = 0;
/* set the pointer to the MQD */
mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
/* set MQD vmid to 0 */
tmp = regCP_MQD_CONTROL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
mqd->cp_mqd_control = tmp;
/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
hqd_gpu_addr = ring->gpu_addr >> 8;
mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr);
mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
/* set the wb address whether it's enabled or not */
wb_gpu_addr = ring->rptr_gpu_addr;
mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_rptr_report_addr_hi =
upper_32_bits(wb_gpu_addr) & 0xffff;
/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
wb_gpu_addr = ring->wptr_gpu_addr;
mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8;
mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
/* set up the HQD, this is similar to CP_RB0_CNTL */
tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
(order_base_2(ring->ring_size / 4) - 1));
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1);
mqd->cp_hqd_pq_control = tmp;
/* enable doorbell */
tmp = 0;
if (ring->use_doorbell) {
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_OFFSET, ring->doorbell_index);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_EN, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_SOURCE, 0);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_HIT, 0);
} else {
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_EN, 0);
}
mqd->cp_hqd_pq_doorbell_control = tmp;
mqd->cp_hqd_vmid = 0;
/* activate the queue */
mqd->cp_hqd_active = 1;
tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE,
PRELOAD_SIZE, 0x63);
mqd->cp_hqd_persistent_state = tmp;
mqd->cp_hqd_ib_control = regCP_HQD_IB_CONTROL_MES_12_1_DEFAULT;
mqd->cp_hqd_iq_timer = regCP_HQD_IQ_TIMER_DEFAULT;
mqd->cp_hqd_quantum = regCP_HQD_QUANTUM_DEFAULT;
/*
* Set CP_HQD_GFX_CONTROL.DB_UPDATED_MSG_EN[15] to enable unmapped
* doorbell handling. This is a reserved CP internal register can
* not be accesss by others
*/
mqd->cp_hqd_gfx_control = BIT(15);
return 0;
}
static void mes_v12_1_queue_init_register(struct amdgpu_ring *ring,
int xcc_id)
{
struct v12_1_mes_mqd *mqd = ring->mqd_ptr;
struct amdgpu_device *adev = ring->adev;
uint32_t data = 0;
mutex_lock(&adev->srbm_mutex);
soc_v1_0_grbm_select(adev, 3, ring->pipe, 0, 0, GET_INST(GC, xcc_id));
/* set CP_HQD_VMID.VMID = 0. */
data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID);
data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0);
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID, data);
/* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */
data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL);
data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_EN, 0);
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, data);
/* set CP_MQD_BASE_ADDR/HI with the MQD base address */
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
/* set CP_MQD_CONTROL.VMID=0 */
data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL);
data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0);
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL, 0);
/* set CP_HQD_PQ_BASE/HI with the ring buffer base address */
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
/* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR,
mqd->cp_hqd_pq_rptr_report_addr_lo);
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
mqd->cp_hqd_pq_rptr_report_addr_hi);
/* set CP_HQD_PQ_CONTROL */
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
/* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR,
mqd->cp_hqd_pq_wptr_poll_addr_lo);
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
mqd->cp_hqd_pq_wptr_poll_addr_hi);
/* set CP_HQD_PQ_DOORBELL_CONTROL */
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL,
mqd->cp_hqd_pq_doorbell_control);
/* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
/* set CP_HQD_ACTIVE.ACTIVE=1 */
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, mqd->cp_hqd_active);
soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
mutex_unlock(&adev->srbm_mutex);
}
static int mes_v12_1_kiq_enable_queue(struct amdgpu_device *adev, int xcc_id)
{
struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[xcc_id].ring;
int r, inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE);
if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
return -EINVAL;
r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
if (r) {
DRM_ERROR("Failed to lock KIQ (%d).\n", r);
return r;
}
kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[inst]);
r = amdgpu_ring_test_ring(kiq_ring);
if (r) {
DRM_ERROR("kfq enable failed\n");
kiq_ring->sched.ready = false;
}
return r;
}
static int mes_v12_1_queue_init(struct amdgpu_device *adev,
enum amdgpu_mes_pipe pipe,
int xcc_id)
{
struct amdgpu_ring *ring;
int r;
if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
ring = &adev->gfx.kiq[xcc_id].ring;
else
ring = &adev->mes.ring[MES_PIPE_INST(xcc_id, pipe)];
if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) &&
(amdgpu_in_reset(adev) || adev->in_suspend)) {
*(ring->wptr_cpu_addr) = 0;
*(ring->rptr_cpu_addr) = 0;
amdgpu_ring_clear_ring(ring);
}
r = mes_v12_1_mqd_init(ring);
if (r)
return r;
if (pipe == AMDGPU_MES_SCHED_PIPE) {
if (adev->enable_uni_mes)
r = amdgpu_mes_map_legacy_queue(adev, ring, xcc_id);
else
r = mes_v12_1_kiq_enable_queue(adev, xcc_id);
if (r)
return r;
} else {
mes_v12_1_queue_init_register(ring, xcc_id);
}
/* get MES scheduler/KIQ versions */
mutex_lock(&adev->srbm_mutex);
soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id));
if (pipe == AMDGPU_MES_SCHED_PIPE)
adev->mes.sched_version = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_GP3_LO);
else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq)
adev->mes.kiq_version = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_GP3_LO);
soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
mutex_unlock(&adev->srbm_mutex);
return 0;
}
static int mes_v12_1_ring_init(struct amdgpu_device *adev,
int xcc_id, int pipe)
{
struct amdgpu_ring *ring;
int inst = MES_PIPE_INST(xcc_id, pipe);
ring = &adev->mes.ring[inst];
ring->funcs = &mes_v12_1_ring_funcs;
ring->me = 3;
ring->pipe = pipe;
ring->queue = 0;
ring->xcc_id = xcc_id;
ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
ring->ring_obj = NULL;
ring->use_doorbell = true;
ring->eop_gpu_addr = adev->mes.eop_gpu_addr[inst];
ring->no_scheduler = true;
snprintf(ring->name, sizeof(ring->name), "mes_%hhu.%hhu.%hhu.%hhu",
(unsigned char)xcc_id, (unsigned char)ring->me,
(unsigned char)ring->pipe, (unsigned char)ring->queue);
if (pipe == AMDGPU_MES_SCHED_PIPE)
ring->doorbell_index =
(adev->doorbell_index.mes_ring0 +
xcc_id * adev->doorbell_index.xcc_doorbell_range)
<< 1;
else
ring->doorbell_index =
(adev->doorbell_index.mes_ring1 +
xcc_id * adev->doorbell_index.xcc_doorbell_range)
<< 1;
return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
AMDGPU_RING_PRIO_DEFAULT, NULL);
}
static int mes_v12_1_kiq_ring_init(struct amdgpu_device *adev, int xcc_id)
{
struct amdgpu_ring *ring;
int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_KIQ_PIPE);
spin_lock_init(&adev->gfx.kiq[xcc_id].ring_lock);
ring = &adev->gfx.kiq[xcc_id].ring;
ring->me = 3;
ring->pipe = 1;
ring->queue = 0;
ring->xcc_id = xcc_id;
ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
ring->adev = NULL;
ring->ring_obj = NULL;
ring->use_doorbell = true;
ring->eop_gpu_addr = adev->mes.eop_gpu_addr[inst];
ring->no_scheduler = true;
ring->doorbell_index =
(adev->doorbell_index.mes_ring1 +
xcc_id * adev->doorbell_index.xcc_doorbell_range)
<< 1;
snprintf(ring->name, sizeof(ring->name), "mes_kiq_%hhu.%hhu.%hhu.%hhu",
(unsigned char)xcc_id, (unsigned char)ring->me,
(unsigned char)ring->pipe, (unsigned char)ring->queue);
return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
AMDGPU_RING_PRIO_DEFAULT, NULL);
}
static int mes_v12_1_mqd_sw_init(struct amdgpu_device *adev,
enum amdgpu_mes_pipe pipe,
int xcc_id)
{
int r, mqd_size = sizeof(struct v12_1_mes_mqd);
struct amdgpu_ring *ring;
int inst = MES_PIPE_INST(xcc_id, pipe);
if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
ring = &adev->gfx.kiq[xcc_id].ring;
else
ring = &adev->mes.ring[inst];
if (ring->mqd_obj)
return 0;
r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
&ring->mqd_gpu_addr, &ring->mqd_ptr);
if (r) {
dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
return r;
}
memset(ring->mqd_ptr, 0, mqd_size);
/* prepare MQD backup */
adev->mes.mqd_backup[inst] = kmalloc(mqd_size, GFP_KERNEL);
if (!adev->mes.mqd_backup[inst])
dev_warn(adev->dev,
"no memory to create MQD backup for ring %s\n",
ring->name);
return 0;
}
static int mes_v12_1_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
int pipe, r, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask);
adev->mes.funcs = &mes_v12_1_funcs;
adev->mes.kiq_hw_init = &mes_v12_1_kiq_hw_init;
adev->mes.kiq_hw_fini = &mes_v12_1_kiq_hw_fini;
adev->mes.enable_legacy_queue_map = true;
adev->mes.event_log_size =
adev->enable_uni_mes ? (AMDGPU_MAX_MES_PIPES * AMDGPU_MES_LOG_BUFFER_SIZE * num_xcc) : AMDGPU_MES_LOG_BUFFER_SIZE;
r = amdgpu_mes_init(adev);
if (r)
return r;
for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
r = mes_v12_1_allocate_eop_buf(adev, pipe, xcc_id);
if (r)
return r;
r = mes_v12_1_mqd_sw_init(adev, pipe, xcc_id);
if (r)
return r;
if (!adev->enable_uni_mes && pipe ==
AMDGPU_MES_KIQ_PIPE)
r = mes_v12_1_kiq_ring_init(adev, xcc_id);
else
r = mes_v12_1_ring_init(adev, xcc_id, pipe);
if (r)
return r;
if (adev->enable_uni_mes && num_xcc > 1) {
r = mes_v12_1_allocate_shared_cmd_buf(adev,
pipe, xcc_id);
if (r)
return r;
}
}
}
return 0;
}
static int mes_v12_1_sw_fini(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
int pipe, inst, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask);
for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
inst = MES_PIPE_INST(xcc_id, pipe);
amdgpu_bo_free_kernel(&adev->mes.shared_cmd_buf_obj[inst],
&adev->mes.shared_cmd_buf_gpu_addr[inst],
NULL);
kfree(adev->mes.mqd_backup[inst]);
amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[inst],
&adev->mes.eop_gpu_addr[inst],
NULL);
if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) {
amdgpu_bo_free_kernel(&adev->mes.ring[inst].mqd_obj,
&adev->mes.ring[inst].mqd_gpu_addr,
&adev->mes.ring[inst].mqd_ptr);
amdgpu_ring_fini(&adev->mes.ring[inst]);
}
}
}
for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++)
amdgpu_ucode_release(&adev->mes.fw[pipe]);
for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
if (!adev->enable_uni_mes) {
amdgpu_bo_free_kernel(&adev->gfx.kiq[xcc_id].ring.mqd_obj,
&adev->gfx.kiq[xcc_id].ring.mqd_gpu_addr,
&adev->gfx.kiq[xcc_id].ring.mqd_ptr);
amdgpu_ring_fini(&adev->gfx.kiq[xcc_id].ring);
}
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
mes_v12_1_free_ucode_buffers(adev,
AMDGPU_MES_KIQ_PIPE, xcc_id);
mes_v12_1_free_ucode_buffers(adev,
AMDGPU_MES_SCHED_PIPE, xcc_id);
}
}
amdgpu_mes_fini(adev);
return 0;
}
static void mes_v12_1_kiq_dequeue_sched(struct amdgpu_device *adev,
int xcc_id)
{
uint32_t data;
int i;
mutex_lock(&adev->srbm_mutex);
soc_v1_0_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0,
GET_INST(GC, xcc_id));
/* disable the queue if it's active */
if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) {
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1);
for (i = 0; i < adev->usec_timeout; i++) {
if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1))
break;
udelay(1);
}
}
data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL);
data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_EN, 0);
data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_HIT, 1);
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, data);
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0);
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO, 0);
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI, 0);
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR, 0);
soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
mutex_unlock(&adev->srbm_mutex);
adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready = false;
}
static void mes_v12_1_kiq_setting(struct amdgpu_ring *ring, int xcc_id)
{
uint32_t tmp;
struct amdgpu_device *adev = ring->adev;
/* tell RLC which is KIQ queue */
tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
tmp |= 0x80;
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
}
static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id)
{
int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_KIQ_PIPE);
int r = 0;
struct amdgpu_ip_block *ip_block;
if (adev->enable_uni_mes)
mes_v12_1_kiq_setting(&adev->mes.ring[inst], xcc_id);
else
mes_v12_1_kiq_setting(&adev->gfx.kiq[xcc_id].ring, xcc_id);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
r = mes_v12_1_load_microcode(adev, AMDGPU_MES_SCHED_PIPE,
false, xcc_id);
if (r) {
DRM_ERROR("failed to load MES fw, r=%d\n", r);
return r;
}
r = mes_v12_1_load_microcode(adev, AMDGPU_MES_KIQ_PIPE,
true, xcc_id);
if (r) {
DRM_ERROR("failed to load MES kiq fw, r=%d\n", r);
return r;
}
mes_v12_1_set_ucode_start_addr(adev, xcc_id);
} else if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
mes_v12_1_set_ucode_start_addr(adev, xcc_id);
mes_v12_1_enable(adev, true, xcc_id);
ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_MES);
if (unlikely(!ip_block)) {
dev_err(adev->dev, "Failed to get MES handle\n");
return -EINVAL;
}
r = mes_v12_1_queue_init(adev, AMDGPU_MES_KIQ_PIPE, xcc_id);
if (r)
goto failure;
if (adev->enable_uni_mes) {
r = mes_v12_1_set_hw_resources(&adev->mes,
AMDGPU_MES_KIQ_PIPE, xcc_id);
if (r)
goto failure;
mes_v12_1_set_hw_resources_1(&adev->mes,
AMDGPU_MES_KIQ_PIPE, xcc_id);
}
if (adev->mes.enable_legacy_queue_map) {
r = mes_v12_1_xcc_hw_init(ip_block, xcc_id);
if (r)
goto failure;
}
return r;
failure:
mes_v12_1_hw_fini(ip_block);
return r;
}
static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id)
{
int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE);
if (adev->mes.ring[inst].sched.ready) {
if (adev->enable_uni_mes)
amdgpu_mes_unmap_legacy_queue(adev,
&adev->mes.ring[inst],
RESET_QUEUES, 0, 0, xcc_id);
else
mes_v12_1_kiq_dequeue_sched(adev, xcc_id);
adev->mes.ring[inst].sched.ready = false;
}
mes_v12_1_enable(adev, false, xcc_id);
return 0;
}
static int mes_v12_1_setup_coop_mode(struct amdgpu_device *adev, int xcc_id)
{
u32 num_xcc_per_xcp, num_xcc = NUM_XCC(adev->gfx.xcc_mask);
int r = 0;
if (num_xcc == 1)
return r;
if (adev->gfx.funcs &&
adev->gfx.funcs->get_xccs_per_xcp)
num_xcc_per_xcp = adev->gfx.funcs->get_xccs_per_xcp(adev);
else
return -EINVAL;
switch (adev->xcp_mgr->mode) {
case AMDGPU_SPX_PARTITION_MODE:
adev->mes.enable_coop_mode = 1;
adev->mes.master_xcc_ids[xcc_id] = 0;
break;
case AMDGPU_DPX_PARTITION_MODE:
adev->mes.enable_coop_mode = 1;
adev->mes.master_xcc_ids[xcc_id] =
(xcc_id/num_xcc_per_xcp) * (num_xcc / 2);
break;
case AMDGPU_QPX_PARTITION_MODE:
adev->mes.enable_coop_mode = 1;
adev->mes.master_xcc_ids[xcc_id] =
(xcc_id/num_xcc_per_xcp) * (num_xcc / 4);
break;
case AMDGPU_CPX_PARTITION_MODE:
adev->mes.enable_coop_mode = 0;
break;
default:
r = -EINVAL;
break;
}
return r;
}
static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id)
{
int r;
struct amdgpu_device *adev = ip_block->adev;
if (adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready)
goto out;
if (!adev->enable_mes_kiq) {
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
r = mes_v12_1_load_microcode(adev,
AMDGPU_MES_SCHED_PIPE, true, xcc_id);
if (r) {
DRM_ERROR("failed to MES fw, r=%d\n", r);
return r;
}
mes_v12_1_set_ucode_start_addr(adev, xcc_id);
} else if (adev->firmware.load_type ==
AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
mes_v12_1_set_ucode_start_addr(adev, xcc_id);
}
mes_v12_1_enable(adev, true, xcc_id);
}
/* Enable the MES to handle doorbell ring on unmapped queue */
mes_v12_1_enable_unmapped_doorbell_handling(&adev->mes, true, xcc_id);
r = mes_v12_1_queue_init(adev, AMDGPU_MES_SCHED_PIPE, xcc_id);
if (r)
goto failure;
r = mes_v12_1_set_hw_resources(&adev->mes,
AMDGPU_MES_SCHED_PIPE, xcc_id);
if (r)
goto failure;
if (adev->enable_uni_mes) {
r = mes_v12_1_setup_coop_mode(adev, xcc_id);
if (r)
goto failure;
mes_v12_1_set_hw_resources_1(&adev->mes,
AMDGPU_MES_SCHED_PIPE, xcc_id);
}
mes_v12_1_init_aggregated_doorbell(&adev->mes, xcc_id);
r = mes_v12_1_query_sched_status(&adev->mes,
AMDGPU_MES_SCHED_PIPE, xcc_id);
if (r) {
DRM_ERROR("MES is busy\n");
goto failure;
}
out:
/*
* Disable KIQ ring usage from the driver once MES is enabled.
* MES uses KIQ ring exclusively so driver cannot access KIQ ring
* with MES enabled.
*/
adev->gfx.kiq[xcc_id].ring.sched.ready = false;
adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready = true;
return 0;
failure:
mes_v12_1_hw_fini(ip_block);
return r;
}
static int mes_v12_1_hw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
int r, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask);
for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
r = mes_v12_1_xcc_hw_init(ip_block, xcc_id);
if (r)
return r;
}
return 0;
}
static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block)
{
return 0;
}
static int mes_v12_1_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
r = amdgpu_mes_suspend(ip_block->adev);
if (r)
return r;
return mes_v12_1_hw_fini(ip_block);
}
static int mes_v12_1_resume(struct amdgpu_ip_block *ip_block)
{
int r;
r = mes_v12_1_hw_init(ip_block);
if (r)
return r;
return amdgpu_mes_resume(ip_block->adev);
}
static int mes_v12_1_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
int pipe, r;
for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
r = amdgpu_mes_init_microcode(adev, pipe);
if (r)
return r;
}
return 0;
}
static int mes_v12_1_late_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
int xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask);
/* TODO: remove it if issue fixed. */
if (adev->mes.enable_coop_mode)
return 0;
for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
/* for COOP mode, only test master xcc. */
if (adev->mes.enable_coop_mode &&
adev->mes.master_xcc_ids[xcc_id] != xcc_id)
continue;
mes_v12_1_self_test(adev, xcc_id);
}
return 0;
}
static const struct amd_ip_funcs mes_v12_1_ip_funcs = {
.name = "mes_v12_1",
.early_init = mes_v12_1_early_init,
.late_init = mes_v12_1_late_init,
.sw_init = mes_v12_1_sw_init,
.sw_fini = mes_v12_1_sw_fini,
.hw_init = mes_v12_1_hw_init,
.hw_fini = mes_v12_1_hw_fini,
.suspend = mes_v12_1_suspend,
.resume = mes_v12_1_resume,
};
const struct amdgpu_ip_block_version mes_v12_1_ip_block = {
.type = AMD_IP_BLOCK_TYPE_MES,
.major = 12,
.minor = 1,
.rev = 0,
.funcs = &mes_v12_1_ip_funcs,
};
static int mes_v12_1_alloc_test_buf(struct amdgpu_device *adev,
struct amdgpu_bo **bo, uint64_t *addr,
void **ptr, int size)
{
amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
bo, addr, ptr);
if (!*bo) {
dev_err(adev->dev, "failed to allocate test buffer bo\n");
return -ENOMEM;
}
memset(*ptr, 0, size);
return 0;
}
static int mes_v12_1_map_test_bo(struct amdgpu_device *adev,
struct amdgpu_bo *bo, struct amdgpu_vm *vm,
struct amdgpu_bo_va **bo_va, u64 va, int size)
{
struct amdgpu_sync sync;
int r;
r = amdgpu_map_static_csa(adev, vm, bo, bo_va, va, size);
if (r)
return r;
amdgpu_sync_create(&sync);
r = amdgpu_vm_bo_update(adev, *bo_va, false);
if (r) {
dev_err(adev->dev, "failed to do vm_bo_update on meta data\n");
goto error;
}
amdgpu_sync_fence(&sync, (*bo_va)->last_pt_update, GFP_KERNEL);
r = amdgpu_vm_update_pdes(adev, vm, false);
if (r) {
dev_err(adev->dev, "failed to update pdes on meta data\n");
goto error;
}
amdgpu_sync_fence(&sync, vm->last_update, GFP_KERNEL);
amdgpu_sync_wait(&sync, false);
error:
amdgpu_sync_free(&sync);
return 0;
}
static int mes_v12_1_test_ring(struct amdgpu_device *adev, int xcc_id,
u32 *queue_ptr, u64 fence_gpu_addr,
void *fence_cpu_ptr, void *wptr_cpu_addr,
u64 doorbell_idx, int queue_type)
{
volatile uint32_t *cpu_ptr = fence_cpu_ptr;
int num_xcc = NUM_XCC(adev->gfx.xcc_mask);
int sdma_ring_align = 0x10, compute_ring_align = 0x100;
uint32_t tmp, xcc_offset;
int r = 0, i, j, wptr = 0;
if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
if (!adev->mes.enable_coop_mode) {
WREG32_SOC15(GC, GET_INST(GC, xcc_id),
regSCRATCH_REG0, 0xCAFEDEAD);
} else {
for (i = 0; i < num_xcc; i++) {
if (adev->mes.master_xcc_ids[i] == xcc_id)
WREG32_SOC15(GC, GET_INST(GC, i),
regSCRATCH_REG0, 0xCAFEDEAD);
}
}
xcc_offset = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
queue_ptr[wptr++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
queue_ptr[wptr++] = xcc_offset - PACKET3_SET_UCONFIG_REG_START;
queue_ptr[wptr++] = 0xDEADBEEF;
for (i = wptr; i < compute_ring_align; i++)
queue_ptr[wptr++] = PACKET3(PACKET3_NOP, 0x3FFF);
} else if (queue_type == AMDGPU_RING_TYPE_SDMA) {
*cpu_ptr = 0xCAFEDEAD;
queue_ptr[wptr++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
queue_ptr[wptr++] = lower_32_bits(fence_gpu_addr);
queue_ptr[wptr++] = upper_32_bits(fence_gpu_addr);
queue_ptr[wptr++] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
queue_ptr[wptr++] = 0xDEADBEEF;
for (i = wptr; i < sdma_ring_align; i++)
queue_ptr[wptr++] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
wptr <<= 2;
}
atomic64_set((atomic64_t *)wptr_cpu_addr, wptr);
WDOORBELL64(doorbell_idx, wptr);
for (i = 0; i < adev->usec_timeout; i++) {
if (queue_type == AMDGPU_RING_TYPE_SDMA) {
tmp = le32_to_cpu(*cpu_ptr);
} else {
if (!adev->mes.enable_coop_mode) {
tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
regSCRATCH_REG0);
} else {
for (j = 0; j < num_xcc; j++) {
if (xcc_id != adev->mes.master_xcc_ids[j])
continue;
tmp = RREG32_SOC15(GC, GET_INST(GC, j),
regSCRATCH_REG0);
if (tmp != 0xDEADBEEF)
break;
}
}
}
if (tmp == 0xDEADBEEF)
break;
if (amdgpu_emu_mode == 1)
msleep(1);
else
udelay(1);
}
if (i >= adev->usec_timeout) {
dev_err(adev->dev, "xcc%d: mes self test (%s) failed\n", xcc_id,
queue_type == AMDGPU_RING_TYPE_SDMA ? "sdma" : "compute");
while (halt_if_hws_hang)
schedule();
r = -ETIMEDOUT;
} else {
dev_info(adev->dev, "xcc%d: mes self test (%s) pass\n", xcc_id,
queue_type == AMDGPU_RING_TYPE_SDMA ? "sdma" : "compute");
}
return r;
}
#define USER_CTX_SIZE (PAGE_SIZE * 2)
#define USER_CTX_VA AMDGPU_VA_RESERVED_BOTTOM
#define RING_OFFSET(addr) ((addr))
#define EOP_OFFSET(addr) ((addr) + PAGE_SIZE)
#define WPTR_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64))
#define RPTR_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64) * 2)
#define FENCE_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64) * 3)
static int mes_v12_1_test_queue(struct amdgpu_device *adev, int xcc_id,
int pasid, struct amdgpu_vm *vm, u64 meta_gpu_addr,
u64 queue_gpu_addr, void *ctx_ptr, int queue_type)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
struct amdgpu_mqd *mqd_mgr = &adev->mqds[queue_type];
struct amdgpu_mqd_prop mqd_prop = {0};
struct mes_add_queue_input add_queue = {0};
struct mes_remove_queue_input remove_queue = {0};
struct amdgpu_bo *mqd_bo = NULL;
int num_xcc = NUM_XCC(adev->gfx.xcc_mask);
int i, r, off, mqd_size, mqd_count = 1;
void *mqd_ptr = NULL;
u64 mqd_gpu_addr, doorbell_idx;
/* extra one page size padding for mes fw */
mqd_size = mqd_mgr->mqd_size + PAGE_SIZE;
if (queue_type == AMDGPU_RING_TYPE_SDMA) {
doorbell_idx = adev->mes.db_start_dw_offset + \
adev->doorbell_index.sdma_engine[0];
} else {
doorbell_idx = adev->mes.db_start_dw_offset + \
adev->doorbell_index.userqueue_start;
}
if (adev->mes.enable_coop_mode &&
queue_type == AMDGPU_RING_TYPE_COMPUTE) {
for (i = 0, mqd_count = 0; i < num_xcc; i++) {
if (adev->mes.master_xcc_ids[i] == xcc_id)
mqd_count++;
}
mqd_size *= mqd_count;
}
r = mes_v12_1_alloc_test_buf(adev, &mqd_bo, &mqd_gpu_addr,
&mqd_ptr, mqd_size * mqd_count);
if (r < 0)
return r;
mqd_prop.mqd_gpu_addr = mqd_gpu_addr;
mqd_prop.hqd_base_gpu_addr = RING_OFFSET(USER_CTX_VA);
mqd_prop.eop_gpu_addr = EOP_OFFSET(USER_CTX_VA);
mqd_prop.wptr_gpu_addr = WPTR_OFFSET(USER_CTX_VA);
mqd_prop.rptr_gpu_addr = RPTR_OFFSET(USER_CTX_VA);
mqd_prop.doorbell_index = doorbell_idx;
mqd_prop.queue_size = PAGE_SIZE;
mqd_prop.mqd_stride_size = mqd_size;
mqd_prop.use_doorbell = true;
mqd_prop.hqd_active = false;
mqd_mgr->init_mqd(adev, mqd_ptr, &mqd_prop);
if (mqd_count > 1) {
for (i = 1; i < mqd_count; i++) {
off = mqd_size * i;
mqd_prop.mqd_gpu_addr = mqd_gpu_addr + off;
mqd_mgr->init_mqd(adev, (char *)mqd_ptr + off,
&mqd_prop);
}
}
add_queue.xcc_id = xcc_id;
add_queue.process_id = pasid;
add_queue.page_table_base_addr = adev->vm_manager.vram_base_offset +
amdgpu_bo_gpu_offset(vm->root.bo) - adev->gmc.vram_start;
add_queue.process_va_start = 0;
add_queue.process_va_end = adev->vm_manager.max_pfn - 1;
add_queue.process_context_addr = meta_gpu_addr;
add_queue.gang_context_addr = meta_gpu_addr + AMDGPU_MES_PROC_CTX_SIZE;
add_queue.doorbell_offset = doorbell_idx;
add_queue.mqd_addr = mqd_gpu_addr;
add_queue.wptr_addr = mqd_prop.wptr_gpu_addr;
add_queue.wptr_mc_addr = WPTR_OFFSET(queue_gpu_addr);
add_queue.queue_type = queue_type;
add_queue.vm_cntx_cntl = hub->vm_cntx_cntl;
r = mes_v12_1_add_hw_queue(&adev->mes, &add_queue);
if (r)
goto error;
mes_v12_1_test_ring(adev, xcc_id, (u32 *)RING_OFFSET((char *)ctx_ptr),
FENCE_OFFSET(USER_CTX_VA),
FENCE_OFFSET((char *)ctx_ptr),
WPTR_OFFSET((char *)ctx_ptr),
doorbell_idx, queue_type);
remove_queue.xcc_id = xcc_id;
remove_queue.doorbell_offset = doorbell_idx;
remove_queue.gang_context_addr = add_queue.gang_context_addr;
r = mes_v12_1_remove_hw_queue(&adev->mes, &remove_queue);
error:
amdgpu_bo_free_kernel(&mqd_bo, &mqd_gpu_addr, &mqd_ptr);
return r;
}
static int mes_v12_1_self_test(struct amdgpu_device *adev, int xcc_id)
{
int queue_types[] = { AMDGPU_RING_TYPE_COMPUTE,
/* AMDGPU_RING_TYPE_SDMA */ };
struct amdgpu_bo_va *bo_va = NULL;
struct amdgpu_vm *vm = NULL;
struct amdgpu_bo *meta_bo = NULL, *ctx_bo = NULL;
void *meta_ptr = NULL, *ctx_ptr = NULL;
u64 meta_gpu_addr, ctx_gpu_addr;
int size, i, r, pasid;
pasid = amdgpu_pasid_alloc(16);
if (pasid < 0)
pasid = 0;
size = AMDGPU_MES_PROC_CTX_SIZE + AMDGPU_MES_GANG_CTX_SIZE;
r = mes_v12_1_alloc_test_buf(adev, &meta_bo, &meta_gpu_addr,
&meta_ptr, size);
if (r < 0)
goto err2;
r = mes_v12_1_alloc_test_buf(adev, &ctx_bo, &ctx_gpu_addr,
&ctx_ptr, USER_CTX_SIZE);
if (r < 0)
goto err2;
vm = kzalloc(sizeof(*vm), GFP_KERNEL);
if (!vm) {
r = -ENOMEM;
goto err2;
}
r = amdgpu_vm_init(adev, vm, -1, pasid);
if (r)
goto err1;
r = mes_v12_1_map_test_bo(adev, ctx_bo, vm, &bo_va,
USER_CTX_VA, USER_CTX_SIZE);
if (r)
goto err0;
for (i = 0; i < ARRAY_SIZE(queue_types); i++) {
memset(ctx_ptr, 0, USER_CTX_SIZE);
r = mes_v12_1_test_queue(adev, xcc_id, pasid, vm, meta_gpu_addr,
ctx_gpu_addr, ctx_ptr, queue_types[i]);
if (r)
break;
}
amdgpu_unmap_static_csa(adev, vm, ctx_bo, bo_va, USER_CTX_VA);
err0:
amdgpu_vm_fini(adev, vm);
err1:
kfree(vm);
err2:
amdgpu_bo_free_kernel(&meta_bo, &meta_gpu_addr, &meta_ptr);
amdgpu_bo_free_kernel(&ctx_bo, &ctx_gpu_addr, &ctx_ptr);
amdgpu_pasid_free(pasid);
return r;
}