Line data Source code
1 : /*
2 : * Copyright 2019 Advanced Micro Devices, Inc.
3 : *
4 : * Permission is hereby granted, free of charge, to any person obtaining a
5 : * copy of this software and associated documentation files (the "Software"),
6 : * to deal in the Software without restriction, including without limitation
7 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 : * and/or sell copies of the Software, and to permit persons to whom the
9 : * Software is furnished to do so, subject to the following conditions:
10 : *
11 : * The above copyright notice and this permission notice shall be included in
12 : * all copies or substantial portions of the Software.
13 : *
14 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 : * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 : * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 : * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 : * OTHER DEALINGS IN THE SOFTWARE.
21 : *
22 : */
23 :
24 : #include <linux/firmware.h>
25 : #include <linux/module.h>
26 : #include "amdgpu.h"
27 : #include "soc15_common.h"
28 : #include "soc21.h"
29 : #include "gc/gc_11_0_0_offset.h"
30 : #include "gc/gc_11_0_0_sh_mask.h"
31 : #include "gc/gc_11_0_0_default.h"
32 : #include "v11_structs.h"
33 : #include "mes_v11_api_def.h"
34 :
35 : MODULE_FIRMWARE("amdgpu/gc_11_0_0_mes.bin");
36 : MODULE_FIRMWARE("amdgpu/gc_11_0_0_mes1.bin");
37 : MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes.bin");
38 : MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes1.bin");
39 : MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes.bin");
40 : MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes1.bin");
41 : MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes.bin");
42 : MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes1.bin");
43 :
44 : static int mes_v11_0_hw_fini(void *handle);
45 : static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev);
46 : static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev);
47 :
48 : #define MES_EOP_SIZE 2048
49 :
50 0 : static void mes_v11_0_ring_set_wptr(struct amdgpu_ring *ring)
51 : {
52 0 : struct amdgpu_device *adev = ring->adev;
53 :
54 0 : if (ring->use_doorbell) {
55 0 : atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
56 0 : ring->wptr);
57 0 : WDOORBELL64(ring->doorbell_index, ring->wptr);
58 : } else {
59 0 : BUG();
60 : }
61 0 : }
62 :
63 0 : static u64 mes_v11_0_ring_get_rptr(struct amdgpu_ring *ring)
64 : {
65 0 : return *ring->rptr_cpu_addr;
66 : }
67 :
68 0 : static u64 mes_v11_0_ring_get_wptr(struct amdgpu_ring *ring)
69 : {
70 : u64 wptr;
71 :
72 0 : if (ring->use_doorbell)
73 0 : wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
74 : else
75 0 : BUG();
76 0 : return wptr;
77 : }
78 :
79 : static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = {
80 : .type = AMDGPU_RING_TYPE_MES,
81 : .align_mask = 1,
82 : .nop = 0,
83 : .support_64bit_ptrs = true,
84 : .get_rptr = mes_v11_0_ring_get_rptr,
85 : .get_wptr = mes_v11_0_ring_get_wptr,
86 : .set_wptr = mes_v11_0_ring_set_wptr,
87 : .insert_nop = amdgpu_ring_insert_nop,
88 : };
89 :
90 0 : static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
91 : void *pkt, int size,
92 : int api_status_off)
93 : {
94 0 : int ndw = size / 4;
95 : signed long r;
96 0 : union MESAPI__ADD_QUEUE *x_pkt = pkt;
97 : struct MES_API_STATUS *api_status;
98 0 : struct amdgpu_device *adev = mes->adev;
99 0 : struct amdgpu_ring *ring = &mes->ring;
100 : unsigned long flags;
101 :
102 0 : BUG_ON(size % 4 != 0);
103 :
104 0 : spin_lock_irqsave(&mes->ring_lock, flags);
105 0 : if (amdgpu_ring_alloc(ring, ndw)) {
106 0 : spin_unlock_irqrestore(&mes->ring_lock, flags);
107 0 : return -ENOMEM;
108 : }
109 :
110 0 : api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
111 0 : api_status->api_completion_fence_addr = mes->ring.fence_drv.gpu_addr;
112 0 : api_status->api_completion_fence_value = ++mes->ring.fence_drv.sync_seq;
113 :
114 0 : amdgpu_ring_write_multiple(ring, pkt, ndw);
115 0 : amdgpu_ring_commit(ring);
116 0 : spin_unlock_irqrestore(&mes->ring_lock, flags);
117 :
118 0 : DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
119 :
120 0 : r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
121 0 : adev->usec_timeout * (amdgpu_emu_mode ? 100 : 1));
122 0 : if (r < 1) {
123 0 : DRM_ERROR("MES failed to response msg=%d\n",
124 : x_pkt->header.opcode);
125 0 : return -ETIMEDOUT;
126 : }
127 :
128 : return 0;
129 : }
130 :
131 0 : static int convert_to_mes_queue_type(int queue_type)
132 : {
133 0 : if (queue_type == AMDGPU_RING_TYPE_GFX)
134 : return MES_QUEUE_TYPE_GFX;
135 0 : else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
136 : return MES_QUEUE_TYPE_COMPUTE;
137 0 : else if (queue_type == AMDGPU_RING_TYPE_SDMA)
138 : return MES_QUEUE_TYPE_SDMA;
139 : else
140 0 : BUG();
141 : return -1;
142 : }
143 :
144 0 : static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
145 : struct mes_add_queue_input *input)
146 : {
147 0 : struct amdgpu_device *adev = mes->adev;
148 : union MESAPI__ADD_QUEUE mes_add_queue_pkt;
149 0 : struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
150 0 : uint32_t vm_cntx_cntl = hub->vm_cntx_cntl;
151 :
152 0 : memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
153 :
154 0 : mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
155 0 : mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
156 0 : mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
157 :
158 0 : mes_add_queue_pkt.process_id = input->process_id;
159 0 : mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr;
160 0 : mes_add_queue_pkt.process_va_start = input->process_va_start;
161 0 : mes_add_queue_pkt.process_va_end = input->process_va_end;
162 0 : mes_add_queue_pkt.process_quantum = input->process_quantum;
163 0 : mes_add_queue_pkt.process_context_addr = input->process_context_addr;
164 0 : mes_add_queue_pkt.gang_quantum = input->gang_quantum;
165 0 : mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
166 0 : mes_add_queue_pkt.inprocess_gang_priority =
167 0 : input->inprocess_gang_priority;
168 0 : mes_add_queue_pkt.gang_global_priority_level =
169 0 : input->gang_global_priority_level;
170 0 : mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
171 0 : mes_add_queue_pkt.mqd_addr = input->mqd_addr;
172 :
173 0 : if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >>
174 : AMDGPU_MES_API_VERSION_SHIFT) >= 2)
175 0 : mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr;
176 : else
177 0 : mes_add_queue_pkt.wptr_addr = input->wptr_addr;
178 :
179 0 : mes_add_queue_pkt.queue_type =
180 0 : convert_to_mes_queue_type(input->queue_type);
181 0 : mes_add_queue_pkt.paging = input->paging;
182 0 : mes_add_queue_pkt.vm_context_cntl = vm_cntx_cntl;
183 0 : mes_add_queue_pkt.gws_base = input->gws_base;
184 0 : mes_add_queue_pkt.gws_size = input->gws_size;
185 0 : mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
186 0 : mes_add_queue_pkt.tma_addr = input->tma_addr;
187 0 : mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
188 0 : mes_add_queue_pkt.trap_en = 1;
189 :
190 0 : return mes_v11_0_submit_pkt_and_poll_completion(mes,
191 : &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
192 : offsetof(union MESAPI__ADD_QUEUE, api_status));
193 : }
194 :
195 0 : static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes,
196 : struct mes_remove_queue_input *input)
197 : {
198 : union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
199 :
200 0 : memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
201 :
202 0 : mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
203 0 : mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
204 0 : mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
205 :
206 0 : mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
207 0 : mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
208 :
209 0 : return mes_v11_0_submit_pkt_and_poll_completion(mes,
210 : &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
211 : offsetof(union MESAPI__REMOVE_QUEUE, api_status));
212 : }
213 :
214 0 : static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes,
215 : struct mes_unmap_legacy_queue_input *input)
216 : {
217 : union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
218 :
219 0 : memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
220 :
221 0 : mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
222 0 : mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
223 0 : mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
224 :
225 0 : mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
226 0 : mes_remove_queue_pkt.gang_context_addr = 0;
227 :
228 0 : mes_remove_queue_pkt.pipe_id = input->pipe_id;
229 0 : mes_remove_queue_pkt.queue_id = input->queue_id;
230 :
231 0 : if (input->action == PREEMPT_QUEUES_NO_UNMAP) {
232 0 : mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1;
233 0 : mes_remove_queue_pkt.tf_addr = input->trail_fence_addr;
234 0 : mes_remove_queue_pkt.tf_data =
235 0 : lower_32_bits(input->trail_fence_data);
236 : } else {
237 0 : mes_remove_queue_pkt.unmap_legacy_queue = 1;
238 0 : mes_remove_queue_pkt.queue_type =
239 0 : convert_to_mes_queue_type(input->queue_type);
240 : }
241 :
242 0 : return mes_v11_0_submit_pkt_and_poll_completion(mes,
243 : &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
244 : offsetof(union MESAPI__REMOVE_QUEUE, api_status));
245 : }
246 :
247 0 : static int mes_v11_0_suspend_gang(struct amdgpu_mes *mes,
248 : struct mes_suspend_gang_input *input)
249 : {
250 0 : return 0;
251 : }
252 :
253 0 : static int mes_v11_0_resume_gang(struct amdgpu_mes *mes,
254 : struct mes_resume_gang_input *input)
255 : {
256 0 : return 0;
257 : }
258 :
259 0 : static int mes_v11_0_query_sched_status(struct amdgpu_mes *mes)
260 : {
261 : union MESAPI__QUERY_MES_STATUS mes_status_pkt;
262 :
263 0 : memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
264 :
265 0 : mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
266 0 : mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
267 0 : mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
268 :
269 0 : return mes_v11_0_submit_pkt_and_poll_completion(mes,
270 : &mes_status_pkt, sizeof(mes_status_pkt),
271 : offsetof(union MESAPI__QUERY_MES_STATUS, api_status));
272 : }
273 :
274 0 : static int mes_v11_0_misc_op(struct amdgpu_mes *mes,
275 : struct mes_misc_op_input *input)
276 : {
277 : union MESAPI__MISC misc_pkt;
278 :
279 0 : memset(&misc_pkt, 0, sizeof(misc_pkt));
280 :
281 0 : misc_pkt.header.type = MES_API_TYPE_SCHEDULER;
282 0 : misc_pkt.header.opcode = MES_SCH_API_MISC;
283 0 : misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
284 :
285 0 : switch (input->op) {
286 : case MES_MISC_OP_READ_REG:
287 0 : misc_pkt.opcode = MESAPI_MISC__READ_REG;
288 0 : misc_pkt.read_reg.reg_offset = input->read_reg.reg_offset;
289 0 : misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr;
290 0 : break;
291 : case MES_MISC_OP_WRITE_REG:
292 0 : misc_pkt.opcode = MESAPI_MISC__WRITE_REG;
293 0 : misc_pkt.write_reg.reg_offset = input->write_reg.reg_offset;
294 0 : misc_pkt.write_reg.reg_value = input->write_reg.reg_value;
295 0 : break;
296 : case MES_MISC_OP_WRM_REG_WAIT:
297 0 : misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
298 0 : misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM;
299 0 : misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
300 0 : misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
301 0 : misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
302 0 : misc_pkt.wait_reg_mem.reg_offset2 = 0;
303 0 : break;
304 : case MES_MISC_OP_WRM_REG_WR_WAIT:
305 0 : misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
306 0 : misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG;
307 0 : misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
308 0 : misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
309 0 : misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
310 0 : misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1;
311 0 : break;
312 : default:
313 0 : DRM_ERROR("unsupported misc op (%d) \n", input->op);
314 0 : return -EINVAL;
315 : }
316 :
317 0 : return mes_v11_0_submit_pkt_and_poll_completion(mes,
318 : &misc_pkt, sizeof(misc_pkt),
319 : offsetof(union MESAPI__MISC, api_status));
320 : }
321 :
322 0 : static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
323 : {
324 : int i;
325 0 : struct amdgpu_device *adev = mes->adev;
326 : union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt;
327 :
328 0 : memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
329 :
330 0 : mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
331 0 : mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
332 0 : mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
333 :
334 0 : mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
335 0 : mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
336 0 : mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
337 0 : mes_set_hw_res_pkt.paging_vmid = 0;
338 0 : mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr;
339 0 : mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
340 0 : mes->query_status_fence_gpu_addr;
341 :
342 0 : for (i = 0; i < MAX_COMPUTE_PIPES; i++)
343 0 : mes_set_hw_res_pkt.compute_hqd_mask[i] =
344 0 : mes->compute_hqd_mask[i];
345 :
346 0 : for (i = 0; i < MAX_GFX_PIPES; i++)
347 0 : mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i];
348 :
349 0 : for (i = 0; i < MAX_SDMA_PIPES; i++)
350 0 : mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i];
351 :
352 0 : for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
353 0 : mes_set_hw_res_pkt.aggregated_doorbells[i] =
354 0 : mes->aggregated_doorbells[i];
355 :
356 0 : for (i = 0; i < 5; i++) {
357 0 : mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i];
358 0 : mes_set_hw_res_pkt.mmhub_base[i] =
359 0 : adev->reg_offset[MMHUB_HWIP][0][i];
360 0 : mes_set_hw_res_pkt.osssys_base[i] =
361 0 : adev->reg_offset[OSSSYS_HWIP][0][i];
362 : }
363 :
364 0 : mes_set_hw_res_pkt.disable_reset = 1;
365 0 : mes_set_hw_res_pkt.disable_mes_log = 1;
366 0 : mes_set_hw_res_pkt.use_different_vmid_compute = 1;
367 0 : mes_set_hw_res_pkt.oversubscription_timer = 50;
368 :
369 0 : return mes_v11_0_submit_pkt_and_poll_completion(mes,
370 : &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
371 : offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
372 : }
373 :
374 0 : static void mes_v11_0_init_aggregated_doorbell(struct amdgpu_mes *mes)
375 : {
376 0 : struct amdgpu_device *adev = mes->adev;
377 : uint32_t data;
378 :
379 0 : data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1);
380 0 : data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK |
381 : CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK |
382 : CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK);
383 0 : data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] <<
384 : CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT;
385 0 : data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT;
386 0 : WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1, data);
387 :
388 0 : data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2);
389 0 : data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK |
390 : CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK |
391 : CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK);
392 0 : data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] <<
393 : CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT;
394 0 : data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT;
395 0 : WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2, data);
396 :
397 0 : data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3);
398 0 : data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK |
399 : CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK |
400 : CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK);
401 0 : data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] <<
402 : CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT;
403 0 : data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT;
404 0 : WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3, data);
405 :
406 0 : data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4);
407 0 : data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK |
408 : CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK |
409 : CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK);
410 0 : data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] <<
411 : CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT;
412 0 : data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT;
413 0 : WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4, data);
414 :
415 0 : data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5);
416 0 : data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK |
417 : CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK |
418 : CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK);
419 0 : data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] <<
420 : CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT;
421 0 : data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT;
422 0 : WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5, data);
423 :
424 0 : data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT;
425 0 : WREG32_SOC15(GC, 0, regCP_HQD_GFX_CONTROL, data);
426 0 : }
427 :
428 : static const struct amdgpu_mes_funcs mes_v11_0_funcs = {
429 : .add_hw_queue = mes_v11_0_add_hw_queue,
430 : .remove_hw_queue = mes_v11_0_remove_hw_queue,
431 : .unmap_legacy_queue = mes_v11_0_unmap_legacy_queue,
432 : .suspend_gang = mes_v11_0_suspend_gang,
433 : .resume_gang = mes_v11_0_resume_gang,
434 : .misc_op = mes_v11_0_misc_op,
435 : };
436 :
437 0 : static int mes_v11_0_init_microcode(struct amdgpu_device *adev,
438 : enum admgpu_mes_pipe pipe)
439 : {
440 : char fw_name[30];
441 : char ucode_prefix[30];
442 : int err;
443 : const struct mes_firmware_header_v1_0 *mes_hdr;
444 : struct amdgpu_firmware_info *info;
445 :
446 0 : amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
447 :
448 0 : if (pipe == AMDGPU_MES_SCHED_PIPE)
449 0 : snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin",
450 : ucode_prefix);
451 : else
452 0 : snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes1.bin",
453 : ucode_prefix);
454 :
455 0 : err = request_firmware(&adev->mes.fw[pipe], fw_name, adev->dev);
456 0 : if (err)
457 : return err;
458 :
459 0 : err = amdgpu_ucode_validate(adev->mes.fw[pipe]);
460 0 : if (err) {
461 0 : release_firmware(adev->mes.fw[pipe]);
462 0 : adev->mes.fw[pipe] = NULL;
463 0 : return err;
464 : }
465 :
466 0 : mes_hdr = (const struct mes_firmware_header_v1_0 *)
467 0 : adev->mes.fw[pipe]->data;
468 0 : adev->mes.ucode_fw_version[pipe] =
469 0 : le32_to_cpu(mes_hdr->mes_ucode_version);
470 0 : adev->mes.ucode_fw_version[pipe] =
471 0 : le32_to_cpu(mes_hdr->mes_ucode_data_version);
472 0 : adev->mes.uc_start_addr[pipe] =
473 0 : le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) |
474 0 : ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32);
475 0 : adev->mes.data_start_addr[pipe] =
476 0 : le32_to_cpu(mes_hdr->mes_data_start_addr_lo) |
477 0 : ((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32);
478 :
479 0 : if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
480 : int ucode, ucode_data;
481 :
482 0 : if (pipe == AMDGPU_MES_SCHED_PIPE) {
483 : ucode = AMDGPU_UCODE_ID_CP_MES;
484 : ucode_data = AMDGPU_UCODE_ID_CP_MES_DATA;
485 : } else {
486 0 : ucode = AMDGPU_UCODE_ID_CP_MES1;
487 0 : ucode_data = AMDGPU_UCODE_ID_CP_MES1_DATA;
488 : }
489 :
490 0 : info = &adev->firmware.ucode[ucode];
491 0 : info->ucode_id = ucode;
492 0 : info->fw = adev->mes.fw[pipe];
493 0 : adev->firmware.fw_size +=
494 0 : ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes),
495 : PAGE_SIZE);
496 :
497 0 : info = &adev->firmware.ucode[ucode_data];
498 0 : info->ucode_id = ucode_data;
499 0 : info->fw = adev->mes.fw[pipe];
500 0 : adev->firmware.fw_size +=
501 0 : ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes),
502 : PAGE_SIZE);
503 : }
504 :
505 : return 0;
506 : }
507 :
508 : static void mes_v11_0_free_microcode(struct amdgpu_device *adev,
509 : enum admgpu_mes_pipe pipe)
510 : {
511 0 : release_firmware(adev->mes.fw[pipe]);
512 0 : adev->mes.fw[pipe] = NULL;
513 : }
514 :
515 0 : static int mes_v11_0_allocate_ucode_buffer(struct amdgpu_device *adev,
516 : enum admgpu_mes_pipe pipe)
517 : {
518 : int r;
519 : const struct mes_firmware_header_v1_0 *mes_hdr;
520 : const __le32 *fw_data;
521 : unsigned fw_size;
522 :
523 0 : mes_hdr = (const struct mes_firmware_header_v1_0 *)
524 0 : adev->mes.fw[pipe]->data;
525 :
526 0 : fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
527 0 : le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
528 0 : fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
529 :
530 0 : r = amdgpu_bo_create_reserved(adev, fw_size,
531 : PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
532 : &adev->mes.ucode_fw_obj[pipe],
533 0 : &adev->mes.ucode_fw_gpu_addr[pipe],
534 0 : (void **)&adev->mes.ucode_fw_ptr[pipe]);
535 0 : if (r) {
536 0 : dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r);
537 0 : return r;
538 : }
539 :
540 0 : memcpy(adev->mes.ucode_fw_ptr[pipe], fw_data, fw_size);
541 :
542 0 : amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[pipe]);
543 0 : amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[pipe]);
544 :
545 0 : return 0;
546 : }
547 :
548 0 : static int mes_v11_0_allocate_ucode_data_buffer(struct amdgpu_device *adev,
549 : enum admgpu_mes_pipe pipe)
550 : {
551 : int r;
552 : const struct mes_firmware_header_v1_0 *mes_hdr;
553 : const __le32 *fw_data;
554 : unsigned fw_size;
555 :
556 0 : mes_hdr = (const struct mes_firmware_header_v1_0 *)
557 0 : adev->mes.fw[pipe]->data;
558 :
559 0 : fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
560 0 : le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
561 0 : fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
562 :
563 0 : r = amdgpu_bo_create_reserved(adev, fw_size,
564 : 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
565 : &adev->mes.data_fw_obj[pipe],
566 0 : &adev->mes.data_fw_gpu_addr[pipe],
567 0 : (void **)&adev->mes.data_fw_ptr[pipe]);
568 0 : if (r) {
569 0 : dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r);
570 0 : return r;
571 : }
572 :
573 0 : memcpy(adev->mes.data_fw_ptr[pipe], fw_data, fw_size);
574 :
575 0 : amdgpu_bo_kunmap(adev->mes.data_fw_obj[pipe]);
576 0 : amdgpu_bo_unreserve(adev->mes.data_fw_obj[pipe]);
577 :
578 0 : return 0;
579 : }
580 :
581 0 : static void mes_v11_0_free_ucode_buffers(struct amdgpu_device *adev,
582 : enum admgpu_mes_pipe pipe)
583 : {
584 0 : amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[pipe],
585 0 : &adev->mes.data_fw_gpu_addr[pipe],
586 0 : (void **)&adev->mes.data_fw_ptr[pipe]);
587 :
588 0 : amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[pipe],
589 0 : &adev->mes.ucode_fw_gpu_addr[pipe],
590 0 : (void **)&adev->mes.ucode_fw_ptr[pipe]);
591 0 : }
592 :
593 0 : static void mes_v11_0_enable(struct amdgpu_device *adev, bool enable)
594 : {
595 : uint64_t ucode_addr;
596 0 : uint32_t pipe, data = 0;
597 :
598 0 : if (enable) {
599 0 : data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
600 0 : data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
601 0 : data = REG_SET_FIELD(data, CP_MES_CNTL,
602 : MES_PIPE1_RESET, adev->enable_mes_kiq ? 1 : 0);
603 0 : WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
604 :
605 0 : mutex_lock(&adev->srbm_mutex);
606 0 : for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
607 0 : if (!adev->enable_mes_kiq &&
608 : pipe == AMDGPU_MES_KIQ_PIPE)
609 0 : continue;
610 :
611 0 : soc21_grbm_select(adev, 3, pipe, 0, 0);
612 :
613 0 : ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
614 0 : WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START,
615 : lower_32_bits(ucode_addr));
616 0 : WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START_HI,
617 : upper_32_bits(ucode_addr));
618 : }
619 0 : soc21_grbm_select(adev, 0, 0, 0, 0);
620 0 : mutex_unlock(&adev->srbm_mutex);
621 :
622 : /* unhalt MES and activate pipe0 */
623 0 : data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
624 0 : data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE,
625 : adev->enable_mes_kiq ? 1 : 0);
626 0 : WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
627 :
628 0 : if (amdgpu_emu_mode)
629 0 : msleep(100);
630 : else
631 : udelay(50);
632 : } else {
633 0 : data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
634 0 : data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
635 0 : data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0);
636 0 : data = REG_SET_FIELD(data, CP_MES_CNTL,
637 : MES_INVALIDATE_ICACHE, 1);
638 0 : data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
639 0 : data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET,
640 : adev->enable_mes_kiq ? 1 : 0);
641 0 : data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
642 0 : WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
643 : }
644 0 : }
645 :
646 : /* This function is for backdoor MES firmware */
647 0 : static int mes_v11_0_load_microcode(struct amdgpu_device *adev,
648 : enum admgpu_mes_pipe pipe, bool prime_icache)
649 : {
650 : int r;
651 : uint32_t data;
652 : uint64_t ucode_addr;
653 :
654 0 : mes_v11_0_enable(adev, false);
655 :
656 0 : if (!adev->mes.fw[pipe])
657 : return -EINVAL;
658 :
659 0 : r = mes_v11_0_allocate_ucode_buffer(adev, pipe);
660 0 : if (r)
661 : return r;
662 :
663 0 : r = mes_v11_0_allocate_ucode_data_buffer(adev, pipe);
664 0 : if (r) {
665 0 : mes_v11_0_free_ucode_buffers(adev, pipe);
666 0 : return r;
667 : }
668 :
669 0 : mutex_lock(&adev->srbm_mutex);
670 : /* me=3, pipe=0, queue=0 */
671 0 : soc21_grbm_select(adev, 3, pipe, 0, 0);
672 :
673 0 : WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_CNTL, 0);
674 :
675 : /* set ucode start address */
676 0 : ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
677 0 : WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START,
678 : lower_32_bits(ucode_addr));
679 0 : WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START_HI,
680 : upper_32_bits(ucode_addr));
681 :
682 : /* set ucode fimrware address */
683 0 : WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_LO,
684 : lower_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
685 0 : WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_HI,
686 : upper_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
687 :
688 : /* set ucode instruction cache boundary to 2M-1 */
689 0 : WREG32_SOC15(GC, 0, regCP_MES_MIBOUND_LO, 0x1FFFFF);
690 :
691 : /* set ucode data firmware address */
692 0 : WREG32_SOC15(GC, 0, regCP_MES_MDBASE_LO,
693 : lower_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
694 0 : WREG32_SOC15(GC, 0, regCP_MES_MDBASE_HI,
695 : upper_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
696 :
697 : /* Set 0x3FFFF (256K-1) to CP_MES_MDBOUND_LO */
698 0 : WREG32_SOC15(GC, 0, regCP_MES_MDBOUND_LO, 0x3FFFF);
699 :
700 0 : if (prime_icache) {
701 : /* invalidate ICACHE */
702 0 : data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL);
703 0 : data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
704 0 : data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
705 0 : WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data);
706 :
707 : /* prime the ICACHE. */
708 0 : data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL);
709 0 : data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
710 0 : WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data);
711 : }
712 :
713 0 : soc21_grbm_select(adev, 0, 0, 0, 0);
714 0 : mutex_unlock(&adev->srbm_mutex);
715 :
716 0 : return 0;
717 : }
718 :
719 0 : static int mes_v11_0_allocate_eop_buf(struct amdgpu_device *adev,
720 : enum admgpu_mes_pipe pipe)
721 : {
722 : int r;
723 : u32 *eop;
724 :
725 0 : r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE,
726 : AMDGPU_GEM_DOMAIN_GTT,
727 : &adev->mes.eop_gpu_obj[pipe],
728 0 : &adev->mes.eop_gpu_addr[pipe],
729 : (void **)&eop);
730 0 : if (r) {
731 0 : dev_warn(adev->dev, "(%d) create EOP bo failed\n", r);
732 0 : return r;
733 : }
734 :
735 0 : memset(eop, 0,
736 0 : adev->mes.eop_gpu_obj[pipe]->tbo.base.size);
737 :
738 0 : amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[pipe]);
739 0 : amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[pipe]);
740 :
741 0 : return 0;
742 : }
743 :
744 0 : static int mes_v11_0_mqd_init(struct amdgpu_ring *ring)
745 : {
746 0 : struct v11_compute_mqd *mqd = ring->mqd_ptr;
747 : uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
748 : uint32_t tmp;
749 :
750 0 : mqd->header = 0xC0310800;
751 0 : mqd->compute_pipelinestat_enable = 0x00000001;
752 0 : mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
753 0 : mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
754 0 : mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
755 0 : mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
756 0 : mqd->compute_misc_reserved = 0x00000007;
757 :
758 0 : eop_base_addr = ring->eop_gpu_addr >> 8;
759 :
760 : /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
761 0 : tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
762 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
763 : (order_base_2(MES_EOP_SIZE / 4) - 1));
764 :
765 0 : mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr);
766 0 : mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
767 0 : mqd->cp_hqd_eop_control = tmp;
768 :
769 : /* disable the queue if it's active */
770 0 : ring->wptr = 0;
771 0 : mqd->cp_hqd_pq_rptr = 0;
772 0 : mqd->cp_hqd_pq_wptr_lo = 0;
773 0 : mqd->cp_hqd_pq_wptr_hi = 0;
774 :
775 : /* set the pointer to the MQD */
776 0 : mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
777 0 : mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
778 :
779 : /* set MQD vmid to 0 */
780 0 : tmp = regCP_MQD_CONTROL_DEFAULT;
781 0 : tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
782 0 : mqd->cp_mqd_control = tmp;
783 :
784 : /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
785 0 : hqd_gpu_addr = ring->gpu_addr >> 8;
786 0 : mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr);
787 0 : mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
788 :
789 : /* set the wb address whether it's enabled or not */
790 0 : wb_gpu_addr = ring->rptr_gpu_addr;
791 0 : mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
792 0 : mqd->cp_hqd_pq_rptr_report_addr_hi =
793 0 : upper_32_bits(wb_gpu_addr) & 0xffff;
794 :
795 : /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
796 0 : wb_gpu_addr = ring->wptr_gpu_addr;
797 0 : mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8;
798 0 : mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
799 :
800 : /* set up the HQD, this is similar to CP_RB0_CNTL */
801 0 : tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
802 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
803 : (order_base_2(ring->ring_size / 4) - 1));
804 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
805 : ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
806 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
807 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
808 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
809 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
810 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1);
811 0 : mqd->cp_hqd_pq_control = tmp;
812 :
813 : /* enable doorbell */
814 0 : tmp = 0;
815 0 : if (ring->use_doorbell) {
816 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
817 : DOORBELL_OFFSET, ring->doorbell_index);
818 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
819 : DOORBELL_EN, 1);
820 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
821 : DOORBELL_SOURCE, 0);
822 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
823 : DOORBELL_HIT, 0);
824 : }
825 : else
826 : tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
827 : DOORBELL_EN, 0);
828 0 : mqd->cp_hqd_pq_doorbell_control = tmp;
829 :
830 0 : mqd->cp_hqd_vmid = 0;
831 : /* activate the queue */
832 0 : mqd->cp_hqd_active = 1;
833 :
834 0 : tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
835 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE,
836 : PRELOAD_SIZE, 0x55);
837 0 : mqd->cp_hqd_persistent_state = tmp;
838 :
839 0 : mqd->cp_hqd_ib_control = regCP_HQD_IB_CONTROL_DEFAULT;
840 0 : mqd->cp_hqd_iq_timer = regCP_HQD_IQ_TIMER_DEFAULT;
841 0 : mqd->cp_hqd_quantum = regCP_HQD_QUANTUM_DEFAULT;
842 :
843 0 : return 0;
844 : }
845 :
846 0 : static void mes_v11_0_queue_init_register(struct amdgpu_ring *ring)
847 : {
848 0 : struct v11_compute_mqd *mqd = ring->mqd_ptr;
849 0 : struct amdgpu_device *adev = ring->adev;
850 0 : uint32_t data = 0;
851 :
852 0 : mutex_lock(&adev->srbm_mutex);
853 0 : soc21_grbm_select(adev, 3, ring->pipe, 0, 0);
854 :
855 : /* set CP_HQD_VMID.VMID = 0. */
856 0 : data = RREG32_SOC15(GC, 0, regCP_HQD_VMID);
857 0 : data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0);
858 0 : WREG32_SOC15(GC, 0, regCP_HQD_VMID, data);
859 :
860 : /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */
861 0 : data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
862 0 : data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
863 : DOORBELL_EN, 0);
864 0 : WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data);
865 :
866 : /* set CP_MQD_BASE_ADDR/HI with the MQD base address */
867 0 : WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
868 0 : WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
869 :
870 : /* set CP_MQD_CONTROL.VMID=0 */
871 0 : data = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
872 0 : data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0);
873 0 : WREG32_SOC15(GC, 0, regCP_MQD_CONTROL, 0);
874 :
875 : /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */
876 0 : WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
877 0 : WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
878 :
879 : /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */
880 0 : WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
881 : mqd->cp_hqd_pq_rptr_report_addr_lo);
882 0 : WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
883 : mqd->cp_hqd_pq_rptr_report_addr_hi);
884 :
885 : /* set CP_HQD_PQ_CONTROL */
886 0 : WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
887 :
888 : /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */
889 0 : WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
890 : mqd->cp_hqd_pq_wptr_poll_addr_lo);
891 0 : WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
892 : mqd->cp_hqd_pq_wptr_poll_addr_hi);
893 :
894 : /* set CP_HQD_PQ_DOORBELL_CONTROL */
895 0 : WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
896 : mqd->cp_hqd_pq_doorbell_control);
897 :
898 : /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */
899 0 : WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
900 :
901 : /* set CP_HQD_ACTIVE.ACTIVE=1 */
902 0 : WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, mqd->cp_hqd_active);
903 :
904 0 : soc21_grbm_select(adev, 0, 0, 0, 0);
905 0 : mutex_unlock(&adev->srbm_mutex);
906 0 : }
907 :
908 0 : static int mes_v11_0_kiq_enable_queue(struct amdgpu_device *adev)
909 : {
910 0 : struct amdgpu_kiq *kiq = &adev->gfx.kiq;
911 0 : struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
912 : int r;
913 :
914 0 : if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
915 : return -EINVAL;
916 :
917 0 : r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
918 0 : if (r) {
919 0 : DRM_ERROR("Failed to lock KIQ (%d).\n", r);
920 0 : return r;
921 : }
922 :
923 0 : kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring);
924 :
925 0 : r = amdgpu_ring_test_ring(kiq_ring);
926 0 : if (r) {
927 0 : DRM_ERROR("kfq enable failed\n");
928 0 : kiq_ring->sched.ready = false;
929 : }
930 : return r;
931 : }
932 :
933 0 : static int mes_v11_0_queue_init(struct amdgpu_device *adev,
934 : enum admgpu_mes_pipe pipe)
935 : {
936 : struct amdgpu_ring *ring;
937 : int r;
938 :
939 0 : if (pipe == AMDGPU_MES_KIQ_PIPE)
940 0 : ring = &adev->gfx.kiq.ring;
941 0 : else if (pipe == AMDGPU_MES_SCHED_PIPE)
942 0 : ring = &adev->mes.ring;
943 : else
944 0 : BUG();
945 :
946 0 : if ((pipe == AMDGPU_MES_SCHED_PIPE) &&
947 0 : (amdgpu_in_reset(adev) || adev->in_suspend)) {
948 0 : *(ring->wptr_cpu_addr) = 0;
949 0 : *(ring->rptr_cpu_addr) = 0;
950 : amdgpu_ring_clear_ring(ring);
951 : }
952 :
953 0 : r = mes_v11_0_mqd_init(ring);
954 0 : if (r)
955 : return r;
956 :
957 0 : if (pipe == AMDGPU_MES_SCHED_PIPE) {
958 0 : r = mes_v11_0_kiq_enable_queue(adev);
959 0 : if (r)
960 : return r;
961 : } else {
962 0 : mes_v11_0_queue_init_register(ring);
963 : }
964 :
965 : /* get MES scheduler/KIQ versions */
966 0 : mutex_lock(&adev->srbm_mutex);
967 0 : soc21_grbm_select(adev, 3, pipe, 0, 0);
968 :
969 0 : if (pipe == AMDGPU_MES_SCHED_PIPE)
970 0 : adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
971 0 : else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq)
972 0 : adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
973 :
974 0 : soc21_grbm_select(adev, 0, 0, 0, 0);
975 0 : mutex_unlock(&adev->srbm_mutex);
976 :
977 0 : return 0;
978 : }
979 :
980 0 : static int mes_v11_0_ring_init(struct amdgpu_device *adev)
981 : {
982 : struct amdgpu_ring *ring;
983 :
984 0 : ring = &adev->mes.ring;
985 :
986 0 : ring->funcs = &mes_v11_0_ring_funcs;
987 :
988 0 : ring->me = 3;
989 0 : ring->pipe = 0;
990 0 : ring->queue = 0;
991 :
992 0 : ring->ring_obj = NULL;
993 0 : ring->use_doorbell = true;
994 0 : ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1;
995 0 : ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_SCHED_PIPE];
996 0 : ring->no_scheduler = true;
997 0 : sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue);
998 :
999 0 : return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
1000 : AMDGPU_RING_PRIO_DEFAULT, NULL);
1001 : }
1002 :
1003 0 : static int mes_v11_0_kiq_ring_init(struct amdgpu_device *adev)
1004 : {
1005 : struct amdgpu_ring *ring;
1006 :
1007 0 : spin_lock_init(&adev->gfx.kiq.ring_lock);
1008 :
1009 0 : ring = &adev->gfx.kiq.ring;
1010 :
1011 0 : ring->me = 3;
1012 0 : ring->pipe = 1;
1013 0 : ring->queue = 0;
1014 :
1015 0 : ring->adev = NULL;
1016 0 : ring->ring_obj = NULL;
1017 0 : ring->use_doorbell = true;
1018 0 : ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1;
1019 0 : ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_KIQ_PIPE];
1020 0 : ring->no_scheduler = true;
1021 0 : sprintf(ring->name, "mes_kiq_%d.%d.%d",
1022 : ring->me, ring->pipe, ring->queue);
1023 :
1024 0 : return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
1025 : AMDGPU_RING_PRIO_DEFAULT, NULL);
1026 : }
1027 :
1028 0 : static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev,
1029 : enum admgpu_mes_pipe pipe)
1030 : {
1031 0 : int r, mqd_size = sizeof(struct v11_compute_mqd);
1032 : struct amdgpu_ring *ring;
1033 :
1034 0 : if (pipe == AMDGPU_MES_KIQ_PIPE)
1035 0 : ring = &adev->gfx.kiq.ring;
1036 0 : else if (pipe == AMDGPU_MES_SCHED_PIPE)
1037 0 : ring = &adev->mes.ring;
1038 : else
1039 0 : BUG();
1040 :
1041 0 : if (ring->mqd_obj)
1042 : return 0;
1043 :
1044 0 : r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
1045 : AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
1046 0 : &ring->mqd_gpu_addr, &ring->mqd_ptr);
1047 0 : if (r) {
1048 0 : dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
1049 0 : return r;
1050 : }
1051 :
1052 0 : memset(ring->mqd_ptr, 0, mqd_size);
1053 :
1054 : /* prepare MQD backup */
1055 0 : adev->mes.mqd_backup[pipe] = kmalloc(mqd_size, GFP_KERNEL);
1056 0 : if (!adev->mes.mqd_backup[pipe])
1057 0 : dev_warn(adev->dev,
1058 : "no memory to create MQD backup for ring %s\n",
1059 : ring->name);
1060 :
1061 : return 0;
1062 : }
1063 :
1064 0 : static int mes_v11_0_sw_init(void *handle)
1065 : {
1066 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1067 : int pipe, r;
1068 :
1069 0 : adev->mes.adev = adev;
1070 0 : adev->mes.funcs = &mes_v11_0_funcs;
1071 0 : adev->mes.kiq_hw_init = &mes_v11_0_kiq_hw_init;
1072 0 : adev->mes.kiq_hw_fini = &mes_v11_0_kiq_hw_fini;
1073 :
1074 0 : r = amdgpu_mes_init(adev);
1075 0 : if (r)
1076 : return r;
1077 :
1078 0 : for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1079 0 : if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE)
1080 0 : continue;
1081 :
1082 0 : r = mes_v11_0_init_microcode(adev, pipe);
1083 0 : if (r)
1084 : return r;
1085 :
1086 0 : r = mes_v11_0_allocate_eop_buf(adev, pipe);
1087 0 : if (r)
1088 : return r;
1089 :
1090 0 : r = mes_v11_0_mqd_sw_init(adev, pipe);
1091 0 : if (r)
1092 : return r;
1093 : }
1094 :
1095 0 : if (adev->enable_mes_kiq) {
1096 0 : r = mes_v11_0_kiq_ring_init(adev);
1097 0 : if (r)
1098 : return r;
1099 : }
1100 :
1101 0 : r = mes_v11_0_ring_init(adev);
1102 0 : if (r)
1103 : return r;
1104 :
1105 0 : return 0;
1106 : }
1107 :
1108 0 : static int mes_v11_0_sw_fini(void *handle)
1109 : {
1110 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1111 : int pipe;
1112 :
1113 0 : amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
1114 0 : amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
1115 :
1116 0 : for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1117 0 : kfree(adev->mes.mqd_backup[pipe]);
1118 :
1119 0 : amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[pipe],
1120 0 : &adev->mes.eop_gpu_addr[pipe],
1121 : NULL);
1122 :
1123 0 : mes_v11_0_free_microcode(adev, pipe);
1124 : }
1125 :
1126 0 : amdgpu_bo_free_kernel(&adev->gfx.kiq.ring.mqd_obj,
1127 0 : &adev->gfx.kiq.ring.mqd_gpu_addr,
1128 : &adev->gfx.kiq.ring.mqd_ptr);
1129 :
1130 0 : amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj,
1131 0 : &adev->mes.ring.mqd_gpu_addr,
1132 : &adev->mes.ring.mqd_ptr);
1133 :
1134 0 : amdgpu_ring_fini(&adev->gfx.kiq.ring);
1135 0 : amdgpu_ring_fini(&adev->mes.ring);
1136 :
1137 0 : if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1138 0 : mes_v11_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE);
1139 0 : mes_v11_0_free_ucode_buffers(adev, AMDGPU_MES_SCHED_PIPE);
1140 : }
1141 :
1142 0 : amdgpu_mes_fini(adev);
1143 0 : return 0;
1144 : }
1145 :
1146 0 : static void mes_v11_0_kiq_setting(struct amdgpu_ring *ring)
1147 : {
1148 : uint32_t tmp;
1149 0 : struct amdgpu_device *adev = ring->adev;
1150 :
1151 : /* tell RLC which is KIQ queue */
1152 0 : tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
1153 0 : tmp &= 0xffffff00;
1154 0 : tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
1155 0 : WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
1156 0 : tmp |= 0x80;
1157 0 : WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
1158 0 : }
1159 :
1160 0 : static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev)
1161 : {
1162 0 : int r = 0;
1163 :
1164 0 : if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1165 :
1166 0 : r = mes_v11_0_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, false);
1167 0 : if (r) {
1168 0 : DRM_ERROR("failed to load MES fw, r=%d\n", r);
1169 0 : return r;
1170 : }
1171 :
1172 0 : r = mes_v11_0_load_microcode(adev, AMDGPU_MES_KIQ_PIPE, true);
1173 0 : if (r) {
1174 0 : DRM_ERROR("failed to load MES kiq fw, r=%d\n", r);
1175 0 : return r;
1176 : }
1177 :
1178 : }
1179 :
1180 0 : mes_v11_0_enable(adev, true);
1181 :
1182 0 : mes_v11_0_kiq_setting(&adev->gfx.kiq.ring);
1183 :
1184 0 : r = mes_v11_0_queue_init(adev, AMDGPU_MES_KIQ_PIPE);
1185 0 : if (r)
1186 : goto failure;
1187 :
1188 : return r;
1189 :
1190 : failure:
1191 0 : mes_v11_0_hw_fini(adev);
1192 0 : return r;
1193 : }
1194 :
1195 0 : static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev)
1196 : {
1197 0 : mes_v11_0_enable(adev, false);
1198 0 : return 0;
1199 : }
1200 :
1201 0 : static int mes_v11_0_hw_init(void *handle)
1202 : {
1203 : int r;
1204 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1205 :
1206 0 : if (!adev->enable_mes_kiq) {
1207 0 : if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1208 0 : r = mes_v11_0_load_microcode(adev,
1209 : AMDGPU_MES_SCHED_PIPE, true);
1210 0 : if (r) {
1211 0 : DRM_ERROR("failed to MES fw, r=%d\n", r);
1212 0 : return r;
1213 : }
1214 : }
1215 :
1216 0 : mes_v11_0_enable(adev, true);
1217 : }
1218 :
1219 0 : r = mes_v11_0_queue_init(adev, AMDGPU_MES_SCHED_PIPE);
1220 0 : if (r)
1221 : goto failure;
1222 :
1223 0 : r = mes_v11_0_set_hw_resources(&adev->mes);
1224 0 : if (r)
1225 : goto failure;
1226 :
1227 0 : mes_v11_0_init_aggregated_doorbell(&adev->mes);
1228 :
1229 0 : r = mes_v11_0_query_sched_status(&adev->mes);
1230 0 : if (r) {
1231 0 : DRM_ERROR("MES is busy\n");
1232 0 : goto failure;
1233 : }
1234 :
1235 : /*
1236 : * Disable KIQ ring usage from the driver once MES is enabled.
1237 : * MES uses KIQ ring exclusively so driver cannot access KIQ ring
1238 : * with MES enabled.
1239 : */
1240 0 : adev->gfx.kiq.ring.sched.ready = false;
1241 0 : adev->mes.ring.sched.ready = true;
1242 :
1243 0 : return 0;
1244 :
1245 : failure:
1246 0 : mes_v11_0_hw_fini(adev);
1247 0 : return r;
1248 : }
1249 :
1250 0 : static int mes_v11_0_hw_fini(void *handle)
1251 : {
1252 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1253 :
1254 0 : adev->mes.ring.sched.ready = false;
1255 0 : return 0;
1256 : }
1257 :
1258 0 : static int mes_v11_0_suspend(void *handle)
1259 : {
1260 : int r;
1261 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1262 :
1263 0 : r = amdgpu_mes_suspend(adev);
1264 0 : if (r)
1265 : return r;
1266 :
1267 0 : return mes_v11_0_hw_fini(adev);
1268 : }
1269 :
1270 0 : static int mes_v11_0_resume(void *handle)
1271 : {
1272 : int r;
1273 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1274 :
1275 0 : r = mes_v11_0_hw_init(adev);
1276 0 : if (r)
1277 : return r;
1278 :
1279 0 : return amdgpu_mes_resume(adev);
1280 : }
1281 :
1282 0 : static int mes_v11_0_late_init(void *handle)
1283 : {
1284 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1285 :
1286 0 : if (!amdgpu_in_reset(adev))
1287 0 : amdgpu_mes_self_test(adev);
1288 :
1289 0 : return 0;
1290 : }
1291 :
1292 : static const struct amd_ip_funcs mes_v11_0_ip_funcs = {
1293 : .name = "mes_v11_0",
1294 : .late_init = mes_v11_0_late_init,
1295 : .sw_init = mes_v11_0_sw_init,
1296 : .sw_fini = mes_v11_0_sw_fini,
1297 : .hw_init = mes_v11_0_hw_init,
1298 : .hw_fini = mes_v11_0_hw_fini,
1299 : .suspend = mes_v11_0_suspend,
1300 : .resume = mes_v11_0_resume,
1301 : };
1302 :
1303 : const struct amdgpu_ip_block_version mes_v11_0_ip_block = {
1304 : .type = AMD_IP_BLOCK_TYPE_MES,
1305 : .major = 11,
1306 : .minor = 0,
1307 : .rev = 0,
1308 : .funcs = &mes_v11_0_ip_funcs,
1309 : };
|