Line data Source code
1 : /*
2 : * Copyright 2020 Advanced Micro Devices, Inc.
3 : *
4 : * Permission is hereby granted, free of charge, to any person obtaining a
5 : * copy of this software and associated documentation files (the "Software"),
6 : * to deal in the Software without restriction, including without limitation
7 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 : * and/or sell copies of the Software, and to permit persons to whom the
9 : * Software is furnished to do so, subject to the following conditions:
10 : *
11 : * The above copyright notice and this permission notice shall be included in
12 : * all copies or substantial portions of the Software.
13 : *
14 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 : * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 : * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 : * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 : * OTHER DEALINGS IN THE SOFTWARE.
21 : *
22 : */
23 :
24 : #include <linux/delay.h>
25 : #include <linux/firmware.h>
26 : #include <linux/module.h>
27 : #include <linux/pci.h>
28 :
29 : #include "amdgpu.h"
30 : #include "amdgpu_ucode.h"
31 : #include "amdgpu_trace.h"
32 :
33 : #include "gc/gc_11_0_0_offset.h"
34 : #include "gc/gc_11_0_0_sh_mask.h"
35 : #include "gc/gc_11_0_0_default.h"
36 : #include "hdp/hdp_6_0_0_offset.h"
37 : #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
38 :
39 : #include "soc15_common.h"
40 : #include "soc15.h"
41 : #include "sdma_v6_0_0_pkt_open.h"
42 : #include "nbio_v4_3.h"
43 : #include "sdma_common.h"
44 : #include "sdma_v6_0.h"
45 : #include "v11_structs.h"
46 :
47 : MODULE_FIRMWARE("amdgpu/sdma_6_0_0.bin");
48 : MODULE_FIRMWARE("amdgpu/sdma_6_0_1.bin");
49 : MODULE_FIRMWARE("amdgpu/sdma_6_0_2.bin");
50 : MODULE_FIRMWARE("amdgpu/sdma_6_0_3.bin");
51 :
52 : #define SDMA1_REG_OFFSET 0x600
53 : #define SDMA0_HYP_DEC_REG_START 0x5880
54 : #define SDMA0_HYP_DEC_REG_END 0x589a
55 : #define SDMA1_HYP_DEC_REG_OFFSET 0x20
56 :
57 : static void sdma_v6_0_set_ring_funcs(struct amdgpu_device *adev);
58 : static void sdma_v6_0_set_buffer_funcs(struct amdgpu_device *adev);
59 : static void sdma_v6_0_set_vm_pte_funcs(struct amdgpu_device *adev);
60 : static void sdma_v6_0_set_irq_funcs(struct amdgpu_device *adev);
61 : static int sdma_v6_0_start(struct amdgpu_device *adev);
62 :
63 : static u32 sdma_v6_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
64 : {
65 : u32 base;
66 :
67 : if (internal_offset >= SDMA0_HYP_DEC_REG_START &&
68 : internal_offset <= SDMA0_HYP_DEC_REG_END) {
69 0 : base = adev->reg_offset[GC_HWIP][0][1];
70 0 : if (instance != 0)
71 0 : internal_offset += SDMA1_HYP_DEC_REG_OFFSET * instance;
72 : } else {
73 0 : base = adev->reg_offset[GC_HWIP][0][0];
74 0 : if (instance == 1)
75 0 : internal_offset += SDMA1_REG_OFFSET;
76 : }
77 :
78 0 : return base + internal_offset;
79 : }
80 :
81 0 : static int sdma_v6_0_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
82 : {
83 0 : int err = 0;
84 : const struct sdma_firmware_header_v2_0 *hdr;
85 :
86 0 : err = amdgpu_ucode_validate(sdma_inst->fw);
87 0 : if (err)
88 : return err;
89 :
90 0 : hdr = (const struct sdma_firmware_header_v2_0 *)sdma_inst->fw->data;
91 0 : sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version);
92 0 : sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version);
93 :
94 0 : if (sdma_inst->feature_version >= 20)
95 0 : sdma_inst->burst_nop = true;
96 :
97 : return 0;
98 : }
99 :
100 0 : static void sdma_v6_0_destroy_inst_ctx(struct amdgpu_device *adev)
101 : {
102 0 : release_firmware(adev->sdma.instance[0].fw);
103 :
104 0 : memset((void*)adev->sdma.instance, 0,
105 : sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES);
106 0 : }
107 :
108 : /**
109 : * sdma_v6_0_init_microcode - load ucode images from disk
110 : *
111 : * @adev: amdgpu_device pointer
112 : *
113 : * Use the firmware interface to load the ucode images into
114 : * the driver (not loaded into hw).
115 : * Returns 0 on success, error on failure.
116 : */
117 :
118 : // emulation only, won't work on real chip
119 : // sdma 6.0.0 real chip need to use PSP to load firmware
120 0 : static int sdma_v6_0_init_microcode(struct amdgpu_device *adev)
121 : {
122 : char fw_name[30];
123 : char ucode_prefix[30];
124 0 : int err = 0, i;
125 0 : struct amdgpu_firmware_info *info = NULL;
126 : const struct sdma_firmware_header_v2_0 *sdma_hdr;
127 :
128 0 : DRM_DEBUG("\n");
129 :
130 0 : amdgpu_ucode_ip_version_decode(adev, SDMA0_HWIP, ucode_prefix, sizeof(ucode_prefix));
131 :
132 0 : snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix);
133 :
134 0 : err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev);
135 0 : if (err)
136 : goto out;
137 :
138 0 : err = sdma_v6_0_init_inst_ctx(&adev->sdma.instance[0]);
139 0 : if (err)
140 : goto out;
141 :
142 0 : for (i = 1; i < adev->sdma.num_instances; i++) {
143 0 : memcpy((void*)&adev->sdma.instance[i],
144 : (void*)&adev->sdma.instance[0],
145 : sizeof(struct amdgpu_sdma_instance));
146 : }
147 :
148 0 : DRM_DEBUG("psp_load == '%s'\n",
149 : adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
150 :
151 0 : if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
152 0 : sdma_hdr = (const struct sdma_firmware_header_v2_0 *)adev->sdma.instance[0].fw->data;
153 0 : info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA_UCODE_TH0];
154 0 : info->ucode_id = AMDGPU_UCODE_ID_SDMA_UCODE_TH0;
155 0 : info->fw = adev->sdma.instance[0].fw;
156 0 : adev->firmware.fw_size +=
157 0 : ALIGN(le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes), PAGE_SIZE);
158 0 : info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA_UCODE_TH1];
159 0 : info->ucode_id = AMDGPU_UCODE_ID_SDMA_UCODE_TH1;
160 0 : info->fw = adev->sdma.instance[0].fw;
161 0 : adev->firmware.fw_size +=
162 0 : ALIGN(le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes), PAGE_SIZE);
163 : }
164 :
165 : out:
166 0 : if (err) {
167 0 : DRM_ERROR("sdma_v6_0: Failed to load firmware \"%s\"\n", fw_name);
168 0 : sdma_v6_0_destroy_inst_ctx(adev);
169 : }
170 0 : return err;
171 : }
172 :
173 0 : static unsigned sdma_v6_0_ring_init_cond_exec(struct amdgpu_ring *ring)
174 : {
175 : unsigned ret;
176 :
177 0 : amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COND_EXE));
178 0 : amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
179 0 : amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
180 0 : amdgpu_ring_write(ring, 1);
181 0 : ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
182 0 : amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
183 :
184 0 : return ret;
185 : }
186 :
187 0 : static void sdma_v6_0_ring_patch_cond_exec(struct amdgpu_ring *ring,
188 : unsigned offset)
189 : {
190 : unsigned cur;
191 :
192 0 : BUG_ON(offset > ring->buf_mask);
193 0 : BUG_ON(ring->ring[offset] != 0x55aa55aa);
194 :
195 0 : cur = (ring->wptr - 1) & ring->buf_mask;
196 0 : if (cur > offset)
197 0 : ring->ring[offset] = cur - offset;
198 : else
199 0 : ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
200 0 : }
201 :
202 : /**
203 : * sdma_v6_0_ring_get_rptr - get the current read pointer
204 : *
205 : * @ring: amdgpu ring pointer
206 : *
207 : * Get the current rptr from the hardware.
208 : */
209 0 : static uint64_t sdma_v6_0_ring_get_rptr(struct amdgpu_ring *ring)
210 : {
211 : u64 *rptr;
212 :
213 : /* XXX check if swapping is necessary on BE */
214 0 : rptr = (u64 *)ring->rptr_cpu_addr;
215 :
216 0 : DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
217 0 : return ((*rptr) >> 2);
218 : }
219 :
220 : /**
221 : * sdma_v6_0_ring_get_wptr - get the current write pointer
222 : *
223 : * @ring: amdgpu ring pointer
224 : *
225 : * Get the current wptr from the hardware.
226 : */
227 0 : static uint64_t sdma_v6_0_ring_get_wptr(struct amdgpu_ring *ring)
228 : {
229 0 : u64 wptr = 0;
230 :
231 0 : if (ring->use_doorbell) {
232 : /* XXX check if swapping is necessary on BE */
233 0 : wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
234 0 : DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
235 : }
236 :
237 0 : return wptr >> 2;
238 : }
239 :
240 : /**
241 : * sdma_v6_0_ring_set_wptr - commit the write pointer
242 : *
243 : * @ring: amdgpu ring pointer
244 : *
245 : * Write the wptr back to the hardware.
246 : */
247 0 : static void sdma_v6_0_ring_set_wptr(struct amdgpu_ring *ring)
248 : {
249 0 : struct amdgpu_device *adev = ring->adev;
250 : uint32_t *wptr_saved;
251 : uint32_t *is_queue_unmap;
252 : uint64_t aggregated_db_index;
253 0 : uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size;
254 :
255 0 : DRM_DEBUG("Setting write pointer\n");
256 :
257 0 : if (ring->is_mes_queue) {
258 0 : wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
259 0 : is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
260 : sizeof(uint32_t));
261 0 : aggregated_db_index =
262 0 : amdgpu_mes_get_aggregated_doorbell_index(adev,
263 0 : ring->hw_prio);
264 :
265 0 : atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
266 0 : ring->wptr << 2);
267 0 : *wptr_saved = ring->wptr << 2;
268 0 : if (*is_queue_unmap) {
269 0 : WDOORBELL64(aggregated_db_index, ring->wptr << 2);
270 0 : DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
271 : ring->doorbell_index, ring->wptr << 2);
272 0 : WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
273 : } else {
274 0 : DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
275 : ring->doorbell_index, ring->wptr << 2);
276 0 : WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
277 :
278 0 : if (*is_queue_unmap)
279 0 : WDOORBELL64(aggregated_db_index,
280 : ring->wptr << 2);
281 : }
282 : } else {
283 0 : if (ring->use_doorbell) {
284 0 : DRM_DEBUG("Using doorbell -- "
285 : "wptr_offs == 0x%08x "
286 : "lower_32_bits(ring->wptr) << 2 == 0x%08x "
287 : "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
288 : ring->wptr_offs,
289 : lower_32_bits(ring->wptr << 2),
290 : upper_32_bits(ring->wptr << 2));
291 : /* XXX check if swapping is necessary on BE */
292 0 : atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
293 0 : ring->wptr << 2);
294 0 : DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
295 : ring->doorbell_index, ring->wptr << 2);
296 0 : WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
297 : } else {
298 0 : DRM_DEBUG("Not using doorbell -- "
299 : "regSDMA%i_GFX_RB_WPTR == 0x%08x "
300 : "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
301 : ring->me,
302 : lower_32_bits(ring->wptr << 2),
303 : ring->me,
304 : upper_32_bits(ring->wptr << 2));
305 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev,
306 : ring->me, regSDMA0_QUEUE0_RB_WPTR),
307 : lower_32_bits(ring->wptr << 2));
308 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev,
309 : ring->me, regSDMA0_QUEUE0_RB_WPTR_HI),
310 : upper_32_bits(ring->wptr << 2));
311 : }
312 : }
313 0 : }
314 :
315 0 : static void sdma_v6_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
316 : {
317 0 : struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
318 : int i;
319 :
320 0 : for (i = 0; i < count; i++)
321 0 : if (sdma && sdma->burst_nop && (i == 0))
322 0 : amdgpu_ring_write(ring, ring->funcs->nop |
323 0 : SDMA_PKT_NOP_HEADER_COUNT(count - 1));
324 : else
325 0 : amdgpu_ring_write(ring, ring->funcs->nop);
326 0 : }
327 :
328 : /**
329 : * sdma_v6_0_ring_emit_ib - Schedule an IB on the DMA engine
330 : *
331 : * @ring: amdgpu ring pointer
332 : * @ib: IB object to schedule
333 : *
334 : * Schedule an IB in the DMA ring.
335 : */
336 0 : static void sdma_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
337 : struct amdgpu_job *job,
338 : struct amdgpu_ib *ib,
339 : uint32_t flags)
340 : {
341 0 : unsigned vmid = AMDGPU_JOB_GET_VMID(job);
342 0 : uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
343 :
344 : /* An IB packet must end on a 8 DW boundary--the next dword
345 : * must be on a 8-dword boundary. Our IB packet below is 6
346 : * dwords long, thus add x number of NOPs, such that, in
347 : * modular arithmetic,
348 : * wptr + 6 + x = 8k, k >= 0, which in C is,
349 : * (wptr + 6 + x) % 8 = 0.
350 : * The expression below, is a solution of x.
351 : */
352 0 : sdma_v6_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
353 :
354 0 : amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_INDIRECT) |
355 0 : SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
356 : /* base must be 32 byte aligned */
357 0 : amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
358 0 : amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
359 0 : amdgpu_ring_write(ring, ib->length_dw);
360 0 : amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr));
361 0 : amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
362 0 : }
363 :
364 : /**
365 : * sdma_v6_0_ring_emit_mem_sync - flush the IB by graphics cache rinse
366 : *
367 : * @ring: amdgpu ring pointer
368 : * @job: job to retrieve vmid from
369 : * @ib: IB object to schedule
370 : *
371 : * flush the IB by graphics cache rinse.
372 : */
373 0 : static void sdma_v6_0_ring_emit_mem_sync(struct amdgpu_ring *ring)
374 : {
375 0 : uint32_t gcr_cntl = SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV |
376 : SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV |
377 : SDMA_GCR_GLI_INV(1);
378 :
379 : /* flush entire cache L0/L1/L2, this can be optimized by performance requirement */
380 0 : amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_GCR_REQ));
381 0 : amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0));
382 0 : amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(gcr_cntl) |
383 : SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(0));
384 0 : amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(0) |
385 : SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(gcr_cntl >> 16));
386 0 : amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(0) |
387 : SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(0));
388 0 : }
389 :
390 :
391 : /**
392 : * sdma_v6_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
393 : *
394 : * @ring: amdgpu ring pointer
395 : *
396 : * Emit an hdp flush packet on the requested DMA ring.
397 : */
398 0 : static void sdma_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
399 : {
400 0 : struct amdgpu_device *adev = ring->adev;
401 0 : u32 ref_and_mask = 0;
402 0 : const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
403 :
404 0 : ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
405 :
406 0 : amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
407 : SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
408 : SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
409 0 : amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
410 0 : amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
411 0 : amdgpu_ring_write(ring, ref_and_mask); /* reference */
412 0 : amdgpu_ring_write(ring, ref_and_mask); /* mask */
413 0 : amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
414 : SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
415 0 : }
416 :
417 : /**
418 : * sdma_v6_0_ring_emit_fence - emit a fence on the DMA ring
419 : *
420 : * @ring: amdgpu ring pointer
421 : * @fence: amdgpu fence object
422 : *
423 : * Add a DMA fence packet to the ring to write
424 : * the fence seq number and DMA trap packet to generate
425 : * an interrupt if needed.
426 : */
427 0 : static void sdma_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
428 : unsigned flags)
429 : {
430 0 : bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
431 : /* write the fence */
432 0 : amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) |
433 : SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */
434 : /* zero in first two bits */
435 0 : BUG_ON(addr & 0x3);
436 0 : amdgpu_ring_write(ring, lower_32_bits(addr));
437 0 : amdgpu_ring_write(ring, upper_32_bits(addr));
438 0 : amdgpu_ring_write(ring, lower_32_bits(seq));
439 :
440 : /* optionally write high bits as well */
441 0 : if (write64bit) {
442 0 : addr += 4;
443 0 : amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) |
444 : SDMA_PKT_FENCE_HEADER_MTYPE(0x3));
445 : /* zero in first two bits */
446 0 : BUG_ON(addr & 0x3);
447 0 : amdgpu_ring_write(ring, lower_32_bits(addr));
448 0 : amdgpu_ring_write(ring, upper_32_bits(addr));
449 0 : amdgpu_ring_write(ring, upper_32_bits(seq));
450 : }
451 :
452 0 : if (flags & AMDGPU_FENCE_FLAG_INT) {
453 0 : uint32_t ctx = ring->is_mes_queue ?
454 0 : (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0;
455 : /* generate an interrupt */
456 0 : amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP));
457 0 : amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx));
458 : }
459 0 : }
460 :
461 : /**
462 : * sdma_v6_0_gfx_stop - stop the gfx async dma engines
463 : *
464 : * @adev: amdgpu_device pointer
465 : *
466 : * Stop the gfx async dma ring buffers.
467 : */
468 0 : static void sdma_v6_0_gfx_stop(struct amdgpu_device *adev)
469 : {
470 0 : struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
471 0 : struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
472 : u32 rb_cntl, ib_cntl;
473 : int i;
474 :
475 0 : if ((adev->mman.buffer_funcs_ring == sdma0) ||
476 : (adev->mman.buffer_funcs_ring == sdma1))
477 0 : amdgpu_ttm_set_buffer_funcs_status(adev, false);
478 :
479 0 : for (i = 0; i < adev->sdma.num_instances; i++) {
480 0 : rb_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL));
481 0 : rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 0);
482 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
483 0 : ib_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL));
484 0 : ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 0);
485 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl);
486 : }
487 :
488 0 : sdma0->sched.ready = false;
489 0 : sdma1->sched.ready = false;
490 0 : }
491 :
492 : /**
493 : * sdma_v6_0_rlc_stop - stop the compute async dma engines
494 : *
495 : * @adev: amdgpu_device pointer
496 : *
497 : * Stop the compute async dma queues.
498 : */
499 : static void sdma_v6_0_rlc_stop(struct amdgpu_device *adev)
500 : {
501 : /* XXX todo */
502 : }
503 :
504 : /**
505 : * sdma_v6_0_ctx_switch_enable - stop the async dma engines context switch
506 : *
507 : * @adev: amdgpu_device pointer
508 : * @enable: enable/disable the DMA MEs context switch.
509 : *
510 : * Halt or unhalt the async dma engines context switch.
511 : */
512 : static void sdma_v6_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
513 : {
514 : }
515 :
516 : /**
517 : * sdma_v6_0_enable - stop the async dma engines
518 : *
519 : * @adev: amdgpu_device pointer
520 : * @enable: enable/disable the DMA MEs.
521 : *
522 : * Halt or unhalt the async dma engines.
523 : */
524 0 : static void sdma_v6_0_enable(struct amdgpu_device *adev, bool enable)
525 : {
526 : u32 f32_cntl;
527 : int i;
528 :
529 0 : if (!enable) {
530 0 : sdma_v6_0_gfx_stop(adev);
531 0 : sdma_v6_0_rlc_stop(adev);
532 : }
533 :
534 0 : for (i = 0; i < adev->sdma.num_instances; i++) {
535 0 : f32_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL));
536 0 : f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
537 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL), f32_cntl);
538 : }
539 0 : }
540 :
541 : /**
542 : * sdma_v6_0_gfx_resume - setup and start the async dma engines
543 : *
544 : * @adev: amdgpu_device pointer
545 : *
546 : * Set up the gfx DMA ring buffers and enable them.
547 : * Returns 0 for success, error for failure.
548 : */
549 0 : static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev)
550 : {
551 : struct amdgpu_ring *ring;
552 : u32 rb_cntl, ib_cntl;
553 : u32 rb_bufsz;
554 : u32 doorbell;
555 : u32 doorbell_offset;
556 : u32 temp;
557 : u64 wptr_gpu_addr;
558 : int i, r;
559 :
560 0 : for (i = 0; i < adev->sdma.num_instances; i++) {
561 0 : ring = &adev->sdma.instance[i].ring;
562 :
563 0 : if (!amdgpu_sriov_vf(adev))
564 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
565 :
566 : /* Set ring buffer size in dwords */
567 0 : rb_bufsz = order_base_2(ring->ring_size / 4);
568 0 : rb_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL));
569 0 : rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz);
570 : #ifdef __BIG_ENDIAN
571 : rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1);
572 : rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL,
573 : RPTR_WRITEBACK_SWAP_ENABLE, 1);
574 : #endif
575 0 : rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_PRIV, 1);
576 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
577 :
578 : /* Initialize the ring buffer's read and write pointers */
579 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), 0);
580 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), 0);
581 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), 0);
582 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), 0);
583 :
584 : /* setup the wptr shadow polling */
585 0 : wptr_gpu_addr = ring->wptr_gpu_addr;
586 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_LO),
587 : lower_32_bits(wptr_gpu_addr));
588 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_HI),
589 : upper_32_bits(wptr_gpu_addr));
590 :
591 : /* set the wb address whether it's enabled or not */
592 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_HI),
593 : upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
594 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_LO),
595 : lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
596 :
597 0 : rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
598 0 : if (amdgpu_sriov_vf(adev))
599 0 : rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 1);
600 : else
601 0 : rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0);
602 0 : rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, F32_WPTR_POLL_ENABLE, 1);
603 :
604 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8);
605 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40);
606 :
607 0 : ring->wptr = 0;
608 :
609 : /* before programing wptr to a less value, need set minor_ptr_update first */
610 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 1);
611 :
612 0 : if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
613 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2);
614 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
615 : }
616 :
617 0 : doorbell = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL));
618 0 : doorbell_offset = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET));
619 :
620 0 : if (ring->use_doorbell) {
621 0 : doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 1);
622 0 : doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_QUEUE0_DOORBELL_OFFSET,
623 : OFFSET, ring->doorbell_index);
624 : } else {
625 0 : doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 0);
626 : }
627 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL), doorbell);
628 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET), doorbell_offset);
629 :
630 0 : if (i == 0)
631 0 : adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
632 0 : ring->doorbell_index,
633 0 : adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances);
634 :
635 0 : if (amdgpu_sriov_vf(adev))
636 0 : sdma_v6_0_ring_set_wptr(ring);
637 :
638 : /* set minor_ptr_update to 0 after wptr programed */
639 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0);
640 :
641 : /* Set up RESP_MODE to non-copy addresses */
642 0 : temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL));
643 0 : temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
644 0 : temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
645 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL), temp);
646 :
647 : /* program default cache read and write policy */
648 0 : temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE));
649 : /* clean read policy and write policy bits */
650 0 : temp &= 0xFF0FFF;
651 0 : temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
652 : (CACHE_WRITE_POLICY_L2__DEFAULT << 14) |
653 : SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK);
654 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE), temp);
655 :
656 0 : if (!amdgpu_sriov_vf(adev)) {
657 : /* unhalt engine */
658 0 : temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL));
659 0 : temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
660 0 : temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, TH1_RESET, 0);
661 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL), temp);
662 : }
663 :
664 : /* enable DMA RB */
665 0 : rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 1);
666 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
667 :
668 0 : ib_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL));
669 0 : ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 1);
670 : #ifdef __BIG_ENDIAN
671 : ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1);
672 : #endif
673 : /* enable DMA IBs */
674 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl);
675 :
676 0 : ring->sched.ready = true;
677 :
678 0 : if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
679 0 : sdma_v6_0_ctx_switch_enable(adev, true);
680 0 : sdma_v6_0_enable(adev, true);
681 : }
682 :
683 0 : r = amdgpu_ring_test_helper(ring);
684 0 : if (r) {
685 0 : ring->sched.ready = false;
686 0 : return r;
687 : }
688 :
689 0 : if (adev->mman.buffer_funcs_ring == ring)
690 0 : amdgpu_ttm_set_buffer_funcs_status(adev, true);
691 : }
692 :
693 : return 0;
694 : }
695 :
696 : /**
697 : * sdma_v6_0_rlc_resume - setup and start the async dma engines
698 : *
699 : * @adev: amdgpu_device pointer
700 : *
701 : * Set up the compute DMA queues and enable them.
702 : * Returns 0 for success, error for failure.
703 : */
704 : static int sdma_v6_0_rlc_resume(struct amdgpu_device *adev)
705 : {
706 : return 0;
707 : }
708 :
709 : /**
710 : * sdma_v6_0_load_microcode - load the sDMA ME ucode
711 : *
712 : * @adev: amdgpu_device pointer
713 : *
714 : * Loads the sDMA0/1 ucode.
715 : * Returns 0 for success, -EINVAL if the ucode is not available.
716 : */
717 0 : static int sdma_v6_0_load_microcode(struct amdgpu_device *adev)
718 : {
719 : const struct sdma_firmware_header_v2_0 *hdr;
720 : const __le32 *fw_data;
721 : u32 fw_size;
722 : int i, j;
723 : bool use_broadcast;
724 :
725 : /* halt the MEs */
726 0 : sdma_v6_0_enable(adev, false);
727 :
728 0 : if (!adev->sdma.instance[0].fw)
729 : return -EINVAL;
730 :
731 : /* use broadcast mode to load SDMA microcode by default */
732 0 : use_broadcast = true;
733 :
734 : if (use_broadcast) {
735 0 : dev_info(adev->dev, "Use broadcast method to load SDMA firmware\n");
736 : /* load Control Thread microcode */
737 0 : hdr = (const struct sdma_firmware_header_v2_0 *)adev->sdma.instance[0].fw->data;
738 0 : amdgpu_ucode_print_sdma_hdr(&hdr->header);
739 0 : fw_size = le32_to_cpu(hdr->ctx_jt_offset + hdr->ctx_jt_size) / 4;
740 :
741 0 : fw_data = (const __le32 *)
742 0 : (adev->sdma.instance[0].fw->data +
743 0 : le32_to_cpu(hdr->header.ucode_array_offset_bytes));
744 :
745 0 : WREG32(sdma_v6_0_get_reg_offset(adev, 0, regSDMA0_BROADCAST_UCODE_ADDR), 0);
746 :
747 0 : for (j = 0; j < fw_size; j++) {
748 0 : if (amdgpu_emu_mode == 1 && j % 500 == 0)
749 0 : msleep(1);
750 0 : WREG32(sdma_v6_0_get_reg_offset(adev, 0, regSDMA0_BROADCAST_UCODE_DATA), le32_to_cpup(fw_data++));
751 : }
752 :
753 : /* load Context Switch microcode */
754 0 : fw_size = le32_to_cpu(hdr->ctl_jt_offset + hdr->ctl_jt_size) / 4;
755 :
756 0 : fw_data = (const __le32 *)
757 0 : (adev->sdma.instance[0].fw->data +
758 0 : le32_to_cpu(hdr->ctl_ucode_offset));
759 :
760 0 : WREG32(sdma_v6_0_get_reg_offset(adev, 0, regSDMA0_BROADCAST_UCODE_ADDR), 0x8000);
761 :
762 0 : for (j = 0; j < fw_size; j++) {
763 0 : if (amdgpu_emu_mode == 1 && j % 500 == 0)
764 0 : msleep(1);
765 0 : WREG32(sdma_v6_0_get_reg_offset(adev, 0, regSDMA0_BROADCAST_UCODE_DATA), le32_to_cpup(fw_data++));
766 : }
767 : } else {
768 : dev_info(adev->dev, "Use legacy method to load SDMA firmware\n");
769 : for (i = 0; i < adev->sdma.num_instances; i++) {
770 : /* load Control Thread microcode */
771 : hdr = (const struct sdma_firmware_header_v2_0 *)adev->sdma.instance[0].fw->data;
772 : amdgpu_ucode_print_sdma_hdr(&hdr->header);
773 : fw_size = le32_to_cpu(hdr->ctx_jt_offset + hdr->ctx_jt_size) / 4;
774 :
775 : fw_data = (const __le32 *)
776 : (adev->sdma.instance[0].fw->data +
777 : le32_to_cpu(hdr->header.ucode_array_offset_bytes));
778 :
779 : WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_ADDR), 0);
780 :
781 : for (j = 0; j < fw_size; j++) {
782 : if (amdgpu_emu_mode == 1 && j % 500 == 0)
783 : msleep(1);
784 : WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
785 : }
786 :
787 : WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_ADDR), adev->sdma.instance[0].fw_version);
788 :
789 : /* load Context Switch microcode */
790 : fw_size = le32_to_cpu(hdr->ctl_jt_offset + hdr->ctl_jt_size) / 4;
791 :
792 : fw_data = (const __le32 *)
793 : (adev->sdma.instance[0].fw->data +
794 : le32_to_cpu(hdr->ctl_ucode_offset));
795 :
796 : WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_ADDR), 0x8000);
797 :
798 : for (j = 0; j < fw_size; j++) {
799 : if (amdgpu_emu_mode == 1 && j % 500 == 0)
800 : msleep(1);
801 : WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
802 : }
803 :
804 : WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_ADDR), adev->sdma.instance[0].fw_version);
805 : }
806 : }
807 :
808 : return 0;
809 : }
810 :
811 0 : static int sdma_v6_0_soft_reset(void *handle)
812 : {
813 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
814 : u32 tmp;
815 : int i;
816 :
817 0 : sdma_v6_0_gfx_stop(adev);
818 :
819 0 : for (i = 0; i < adev->sdma.num_instances; i++) {
820 0 : tmp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_FREEZE));
821 0 : tmp |= SDMA0_FREEZE__FREEZE_MASK;
822 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_FREEZE), tmp);
823 0 : tmp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL));
824 0 : tmp |= SDMA0_F32_CNTL__HALT_MASK;
825 0 : tmp |= SDMA0_F32_CNTL__TH1_RESET_MASK;
826 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL), tmp);
827 :
828 0 : WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_PREEMPT), 0);
829 :
830 0 : udelay(100);
831 :
832 0 : tmp = GRBM_SOFT_RESET__SOFT_RESET_SDMA0_MASK << i;
833 0 : WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
834 0 : tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
835 :
836 0 : udelay(100);
837 :
838 0 : WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, 0);
839 0 : tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
840 :
841 0 : udelay(100);
842 : }
843 :
844 0 : return sdma_v6_0_start(adev);
845 : }
846 :
847 0 : static bool sdma_v6_0_check_soft_reset(void *handle)
848 : {
849 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
850 : struct amdgpu_ring *ring;
851 : int i, r;
852 : long tmo = msecs_to_jiffies(1000);
853 :
854 0 : for (i = 0; i < adev->sdma.num_instances; i++) {
855 0 : ring = &adev->sdma.instance[i].ring;
856 0 : r = amdgpu_ring_test_ib(ring, tmo);
857 0 : if (r)
858 : return true;
859 : }
860 :
861 : return false;
862 : }
863 :
864 : /**
865 : * sdma_v6_0_start - setup and start the async dma engines
866 : *
867 : * @adev: amdgpu_device pointer
868 : *
869 : * Set up the DMA engines and enable them.
870 : * Returns 0 for success, error for failure.
871 : */
872 0 : static int sdma_v6_0_start(struct amdgpu_device *adev)
873 : {
874 0 : int r = 0;
875 :
876 0 : if (amdgpu_sriov_vf(adev)) {
877 0 : sdma_v6_0_ctx_switch_enable(adev, false);
878 0 : sdma_v6_0_enable(adev, false);
879 :
880 : /* set RB registers */
881 0 : r = sdma_v6_0_gfx_resume(adev);
882 0 : return r;
883 : }
884 :
885 0 : if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
886 0 : r = sdma_v6_0_load_microcode(adev);
887 0 : if (r)
888 : return r;
889 :
890 : /* The value of regSDMA_F32_CNTL is invalid the moment after loading fw */
891 0 : if (amdgpu_emu_mode == 1)
892 0 : msleep(1000);
893 : }
894 :
895 : /* unhalt the MEs */
896 0 : sdma_v6_0_enable(adev, true);
897 : /* enable sdma ring preemption */
898 0 : sdma_v6_0_ctx_switch_enable(adev, true);
899 :
900 : /* start the gfx rings and rlc compute queues */
901 0 : r = sdma_v6_0_gfx_resume(adev);
902 0 : if (r)
903 : return r;
904 0 : r = sdma_v6_0_rlc_resume(adev);
905 :
906 0 : return r;
907 : }
908 :
909 0 : static int sdma_v6_0_mqd_init(struct amdgpu_device *adev, void *mqd,
910 : struct amdgpu_mqd_prop *prop)
911 : {
912 0 : struct v11_sdma_mqd *m = mqd;
913 : uint64_t wb_gpu_addr;
914 :
915 0 : m->sdmax_rlcx_rb_cntl =
916 0 : order_base_2(prop->queue_size / 4) << SDMA0_QUEUE0_RB_CNTL__RB_SIZE__SHIFT |
917 0 : 1 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
918 : 4 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
919 :
920 0 : m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8);
921 0 : m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8);
922 :
923 0 : wb_gpu_addr = prop->wptr_gpu_addr;
924 0 : m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr);
925 0 : m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr);
926 :
927 0 : wb_gpu_addr = prop->rptr_gpu_addr;
928 0 : m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr);
929 0 : m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr);
930 :
931 0 : m->sdmax_rlcx_ib_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, 0,
932 : regSDMA0_QUEUE0_IB_CNTL));
933 :
934 0 : m->sdmax_rlcx_doorbell_offset =
935 0 : prop->doorbell_index << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
936 :
937 0 : m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_QUEUE0_DOORBELL, ENABLE, 1);
938 :
939 0 : m->sdmax_rlcx_skip_cntl = 0;
940 0 : m->sdmax_rlcx_context_status = 0;
941 0 : m->sdmax_rlcx_doorbell_log = 0;
942 :
943 0 : m->sdmax_rlcx_rb_aql_cntl = regSDMA0_QUEUE0_RB_AQL_CNTL_DEFAULT;
944 0 : m->sdmax_rlcx_dummy_reg = regSDMA0_QUEUE0_DUMMY_REG_DEFAULT;
945 :
946 0 : return 0;
947 : }
948 :
949 : static void sdma_v6_0_set_mqd_funcs(struct amdgpu_device *adev)
950 : {
951 0 : adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v11_sdma_mqd);
952 0 : adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v6_0_mqd_init;
953 : }
954 :
955 : /**
956 : * sdma_v6_0_ring_test_ring - simple async dma engine test
957 : *
958 : * @ring: amdgpu_ring structure holding ring information
959 : *
960 : * Test the DMA engine by writing using it to write an
961 : * value to memory.
962 : * Returns 0 for success, error for failure.
963 : */
964 0 : static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring)
965 : {
966 0 : struct amdgpu_device *adev = ring->adev;
967 : unsigned i;
968 : unsigned index;
969 : int r;
970 : u32 tmp;
971 : u64 gpu_addr;
972 0 : volatile uint32_t *cpu_ptr = NULL;
973 :
974 0 : tmp = 0xCAFEDEAD;
975 :
976 0 : if (ring->is_mes_queue) {
977 0 : uint32_t offset = 0;
978 0 : offset = amdgpu_mes_ctx_get_offs(ring,
979 : AMDGPU_MES_CTX_PADDING_OFFS);
980 0 : gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
981 0 : cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
982 0 : *cpu_ptr = tmp;
983 : } else {
984 0 : r = amdgpu_device_wb_get(adev, &index);
985 0 : if (r) {
986 0 : dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
987 0 : return r;
988 : }
989 :
990 0 : gpu_addr = adev->wb.gpu_addr + (index * 4);
991 0 : adev->wb.wb[index] = cpu_to_le32(tmp);
992 : }
993 :
994 0 : r = amdgpu_ring_alloc(ring, 5);
995 0 : if (r) {
996 0 : DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
997 0 : amdgpu_device_wb_free(adev, index);
998 0 : return r;
999 : }
1000 :
1001 0 : amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
1002 : SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
1003 0 : amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
1004 0 : amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
1005 0 : amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
1006 0 : amdgpu_ring_write(ring, 0xDEADBEEF);
1007 0 : amdgpu_ring_commit(ring);
1008 :
1009 0 : for (i = 0; i < adev->usec_timeout; i++) {
1010 0 : if (ring->is_mes_queue)
1011 0 : tmp = le32_to_cpu(*cpu_ptr);
1012 : else
1013 0 : tmp = le32_to_cpu(adev->wb.wb[index]);
1014 0 : if (tmp == 0xDEADBEEF)
1015 : break;
1016 0 : if (amdgpu_emu_mode == 1)
1017 0 : msleep(1);
1018 : else
1019 : udelay(1);
1020 : }
1021 :
1022 0 : if (i >= adev->usec_timeout)
1023 0 : r = -ETIMEDOUT;
1024 :
1025 0 : if (!ring->is_mes_queue)
1026 0 : amdgpu_device_wb_free(adev, index);
1027 :
1028 : return r;
1029 : }
1030 :
1031 : /**
1032 : * sdma_v6_0_ring_test_ib - test an IB on the DMA engine
1033 : *
1034 : * @ring: amdgpu_ring structure holding ring information
1035 : *
1036 : * Test a simple IB in the DMA ring.
1037 : * Returns 0 on success, error on failure.
1038 : */
1039 0 : static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1040 : {
1041 0 : struct amdgpu_device *adev = ring->adev;
1042 : struct amdgpu_ib ib;
1043 0 : struct dma_fence *f = NULL;
1044 : unsigned index;
1045 : long r;
1046 0 : u32 tmp = 0;
1047 : u64 gpu_addr;
1048 0 : volatile uint32_t *cpu_ptr = NULL;
1049 :
1050 0 : tmp = 0xCAFEDEAD;
1051 0 : memset(&ib, 0, sizeof(ib));
1052 :
1053 0 : if (ring->is_mes_queue) {
1054 0 : uint32_t offset = 0;
1055 0 : offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
1056 0 : ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
1057 0 : ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
1058 :
1059 0 : offset = amdgpu_mes_ctx_get_offs(ring,
1060 : AMDGPU_MES_CTX_PADDING_OFFS);
1061 0 : gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
1062 0 : cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
1063 0 : *cpu_ptr = tmp;
1064 : } else {
1065 0 : r = amdgpu_device_wb_get(adev, &index);
1066 0 : if (r) {
1067 0 : dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
1068 0 : return r;
1069 : }
1070 :
1071 0 : gpu_addr = adev->wb.gpu_addr + (index * 4);
1072 0 : adev->wb.wb[index] = cpu_to_le32(tmp);
1073 :
1074 0 : r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
1075 0 : if (r) {
1076 0 : DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
1077 0 : goto err0;
1078 : }
1079 : }
1080 :
1081 0 : ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
1082 : SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
1083 0 : ib.ptr[1] = lower_32_bits(gpu_addr);
1084 0 : ib.ptr[2] = upper_32_bits(gpu_addr);
1085 0 : ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
1086 0 : ib.ptr[4] = 0xDEADBEEF;
1087 0 : ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
1088 0 : ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
1089 0 : ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
1090 0 : ib.length_dw = 8;
1091 :
1092 0 : r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1093 0 : if (r)
1094 : goto err1;
1095 :
1096 0 : r = dma_fence_wait_timeout(f, false, timeout);
1097 0 : if (r == 0) {
1098 0 : DRM_ERROR("amdgpu: IB test timed out\n");
1099 0 : r = -ETIMEDOUT;
1100 0 : goto err1;
1101 0 : } else if (r < 0) {
1102 0 : DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
1103 0 : goto err1;
1104 : }
1105 :
1106 0 : if (ring->is_mes_queue)
1107 0 : tmp = le32_to_cpu(*cpu_ptr);
1108 : else
1109 0 : tmp = le32_to_cpu(adev->wb.wb[index]);
1110 :
1111 0 : if (tmp == 0xDEADBEEF)
1112 : r = 0;
1113 : else
1114 0 : r = -EINVAL;
1115 :
1116 : err1:
1117 0 : amdgpu_ib_free(adev, &ib, NULL);
1118 0 : dma_fence_put(f);
1119 : err0:
1120 0 : if (!ring->is_mes_queue)
1121 0 : amdgpu_device_wb_free(adev, index);
1122 0 : return r;
1123 : }
1124 :
1125 :
1126 : /**
1127 : * sdma_v6_0_vm_copy_pte - update PTEs by copying them from the GART
1128 : *
1129 : * @ib: indirect buffer to fill with commands
1130 : * @pe: addr of the page entry
1131 : * @src: src addr to copy from
1132 : * @count: number of page entries to update
1133 : *
1134 : * Update PTEs by copying them from the GART using sDMA.
1135 : */
1136 0 : static void sdma_v6_0_vm_copy_pte(struct amdgpu_ib *ib,
1137 : uint64_t pe, uint64_t src,
1138 : unsigned count)
1139 : {
1140 0 : unsigned bytes = count * 8;
1141 :
1142 0 : ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
1143 : SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
1144 0 : ib->ptr[ib->length_dw++] = bytes - 1;
1145 0 : ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
1146 0 : ib->ptr[ib->length_dw++] = lower_32_bits(src);
1147 0 : ib->ptr[ib->length_dw++] = upper_32_bits(src);
1148 0 : ib->ptr[ib->length_dw++] = lower_32_bits(pe);
1149 0 : ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1150 :
1151 0 : }
1152 :
1153 : /**
1154 : * sdma_v6_0_vm_write_pte - update PTEs by writing them manually
1155 : *
1156 : * @ib: indirect buffer to fill with commands
1157 : * @pe: addr of the page entry
1158 : * @addr: dst addr to write into pe
1159 : * @count: number of page entries to update
1160 : * @incr: increase next addr by incr bytes
1161 : * @flags: access flags
1162 : *
1163 : * Update PTEs by writing them manually using sDMA.
1164 : */
1165 0 : static void sdma_v6_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
1166 : uint64_t value, unsigned count,
1167 : uint32_t incr)
1168 : {
1169 0 : unsigned ndw = count * 2;
1170 :
1171 0 : ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
1172 : SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
1173 0 : ib->ptr[ib->length_dw++] = lower_32_bits(pe);
1174 0 : ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1175 0 : ib->ptr[ib->length_dw++] = ndw - 1;
1176 0 : for (; ndw > 0; ndw -= 2) {
1177 0 : ib->ptr[ib->length_dw++] = lower_32_bits(value);
1178 0 : ib->ptr[ib->length_dw++] = upper_32_bits(value);
1179 0 : value += incr;
1180 : }
1181 0 : }
1182 :
1183 : /**
1184 : * sdma_v6_0_vm_set_pte_pde - update the page tables using sDMA
1185 : *
1186 : * @ib: indirect buffer to fill with commands
1187 : * @pe: addr of the page entry
1188 : * @addr: dst addr to write into pe
1189 : * @count: number of page entries to update
1190 : * @incr: increase next addr by incr bytes
1191 : * @flags: access flags
1192 : *
1193 : * Update the page tables using sDMA.
1194 : */
1195 0 : static void sdma_v6_0_vm_set_pte_pde(struct amdgpu_ib *ib,
1196 : uint64_t pe,
1197 : uint64_t addr, unsigned count,
1198 : uint32_t incr, uint64_t flags)
1199 : {
1200 : /* for physically contiguous pages (vram) */
1201 0 : ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_PTEPDE);
1202 0 : ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
1203 0 : ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1204 0 : ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
1205 0 : ib->ptr[ib->length_dw++] = upper_32_bits(flags);
1206 0 : ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
1207 0 : ib->ptr[ib->length_dw++] = upper_32_bits(addr);
1208 0 : ib->ptr[ib->length_dw++] = incr; /* increment size */
1209 0 : ib->ptr[ib->length_dw++] = 0;
1210 0 : ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
1211 0 : }
1212 :
1213 : /**
1214 : * sdma_v6_0_ring_pad_ib - pad the IB
1215 : * @ib: indirect buffer to fill with padding
1216 : *
1217 : * Pad the IB with NOPs to a boundary multiple of 8.
1218 : */
1219 0 : static void sdma_v6_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
1220 : {
1221 0 : struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
1222 : u32 pad_count;
1223 : int i;
1224 :
1225 0 : pad_count = (-ib->length_dw) & 0x7;
1226 0 : for (i = 0; i < pad_count; i++)
1227 0 : if (sdma && sdma->burst_nop && (i == 0))
1228 0 : ib->ptr[ib->length_dw++] =
1229 0 : SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP) |
1230 0 : SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
1231 : else
1232 0 : ib->ptr[ib->length_dw++] =
1233 : SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP);
1234 0 : }
1235 :
1236 : /**
1237 : * sdma_v6_0_ring_emit_pipeline_sync - sync the pipeline
1238 : *
1239 : * @ring: amdgpu_ring pointer
1240 : *
1241 : * Make sure all previous operations are completed (CIK).
1242 : */
1243 0 : static void sdma_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
1244 : {
1245 0 : uint32_t seq = ring->fence_drv.sync_seq;
1246 0 : uint64_t addr = ring->fence_drv.gpu_addr;
1247 :
1248 : /* wait for idle */
1249 0 : amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
1250 : SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
1251 : SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
1252 : SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
1253 0 : amdgpu_ring_write(ring, addr & 0xfffffffc);
1254 0 : amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
1255 0 : amdgpu_ring_write(ring, seq); /* reference */
1256 0 : amdgpu_ring_write(ring, 0xffffffff); /* mask */
1257 0 : amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
1258 : SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
1259 0 : }
1260 :
1261 : /**
1262 : * sdma_v6_0_ring_emit_vm_flush - vm flush using sDMA
1263 : *
1264 : * @ring: amdgpu_ring pointer
1265 : * @vm: amdgpu_vm pointer
1266 : *
1267 : * Update the page table base and flush the VM TLB
1268 : * using sDMA.
1269 : */
1270 0 : static void sdma_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
1271 : unsigned vmid, uint64_t pd_addr)
1272 : {
1273 0 : amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1274 0 : }
1275 :
1276 0 : static void sdma_v6_0_ring_emit_wreg(struct amdgpu_ring *ring,
1277 : uint32_t reg, uint32_t val)
1278 : {
1279 0 : amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_SRBM_WRITE) |
1280 : SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
1281 0 : amdgpu_ring_write(ring, reg);
1282 0 : amdgpu_ring_write(ring, val);
1283 0 : }
1284 :
1285 0 : static void sdma_v6_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1286 : uint32_t val, uint32_t mask)
1287 : {
1288 0 : amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
1289 : SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
1290 : SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
1291 0 : amdgpu_ring_write(ring, reg << 2);
1292 0 : amdgpu_ring_write(ring, 0);
1293 0 : amdgpu_ring_write(ring, val); /* reference */
1294 0 : amdgpu_ring_write(ring, mask); /* mask */
1295 0 : amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
1296 : SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
1297 0 : }
1298 :
1299 0 : static void sdma_v6_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
1300 : uint32_t reg0, uint32_t reg1,
1301 : uint32_t ref, uint32_t mask)
1302 : {
1303 0 : amdgpu_ring_emit_wreg(ring, reg0, ref);
1304 : /* wait for a cycle to reset vm_inv_eng*_ack */
1305 0 : amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
1306 0 : amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
1307 0 : }
1308 :
1309 0 : static int sdma_v6_0_early_init(void *handle)
1310 : {
1311 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1312 :
1313 0 : sdma_v6_0_set_ring_funcs(adev);
1314 0 : sdma_v6_0_set_buffer_funcs(adev);
1315 0 : sdma_v6_0_set_vm_pte_funcs(adev);
1316 0 : sdma_v6_0_set_irq_funcs(adev);
1317 0 : sdma_v6_0_set_mqd_funcs(adev);
1318 :
1319 0 : return 0;
1320 : }
1321 :
1322 0 : static int sdma_v6_0_sw_init(void *handle)
1323 : {
1324 : struct amdgpu_ring *ring;
1325 : int r, i;
1326 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1327 :
1328 : /* SDMA trap event */
1329 0 : r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
1330 : GFX_11_0_0__SRCID__SDMA_TRAP,
1331 : &adev->sdma.trap_irq);
1332 0 : if (r)
1333 : return r;
1334 :
1335 0 : r = sdma_v6_0_init_microcode(adev);
1336 0 : if (r) {
1337 0 : DRM_ERROR("Failed to load sdma firmware!\n");
1338 0 : return r;
1339 : }
1340 :
1341 0 : for (i = 0; i < adev->sdma.num_instances; i++) {
1342 0 : ring = &adev->sdma.instance[i].ring;
1343 0 : ring->ring_obj = NULL;
1344 0 : ring->use_doorbell = true;
1345 0 : ring->me = i;
1346 :
1347 0 : DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
1348 : ring->use_doorbell?"true":"false");
1349 :
1350 0 : ring->doorbell_index =
1351 0 : (adev->doorbell_index.sdma_engine[i] << 1); // get DWORD offset
1352 :
1353 0 : sprintf(ring->name, "sdma%d", i);
1354 0 : r = amdgpu_ring_init(adev, ring, 1024,
1355 : &adev->sdma.trap_irq,
1356 : AMDGPU_SDMA_IRQ_INSTANCE0 + i,
1357 : AMDGPU_RING_PRIO_DEFAULT, NULL);
1358 0 : if (r)
1359 : return r;
1360 : }
1361 :
1362 : return r;
1363 : }
1364 :
1365 0 : static int sdma_v6_0_sw_fini(void *handle)
1366 : {
1367 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1368 : int i;
1369 :
1370 0 : for (i = 0; i < adev->sdma.num_instances; i++)
1371 0 : amdgpu_ring_fini(&adev->sdma.instance[i].ring);
1372 :
1373 0 : sdma_v6_0_destroy_inst_ctx(adev);
1374 :
1375 0 : return 0;
1376 : }
1377 :
1378 0 : static int sdma_v6_0_hw_init(void *handle)
1379 : {
1380 : int r;
1381 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1382 :
1383 0 : r = sdma_v6_0_start(adev);
1384 :
1385 0 : return r;
1386 : }
1387 :
1388 0 : static int sdma_v6_0_hw_fini(void *handle)
1389 : {
1390 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1391 :
1392 0 : if (amdgpu_sriov_vf(adev))
1393 : return 0;
1394 :
1395 0 : sdma_v6_0_ctx_switch_enable(adev, false);
1396 0 : sdma_v6_0_enable(adev, false);
1397 :
1398 0 : return 0;
1399 : }
1400 :
1401 0 : static int sdma_v6_0_suspend(void *handle)
1402 : {
1403 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1404 :
1405 0 : return sdma_v6_0_hw_fini(adev);
1406 : }
1407 :
1408 0 : static int sdma_v6_0_resume(void *handle)
1409 : {
1410 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1411 :
1412 0 : return sdma_v6_0_hw_init(adev);
1413 : }
1414 :
1415 0 : static bool sdma_v6_0_is_idle(void *handle)
1416 : {
1417 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1418 : u32 i;
1419 :
1420 0 : for (i = 0; i < adev->sdma.num_instances; i++) {
1421 0 : u32 tmp = RREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_STATUS_REG));
1422 :
1423 0 : if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
1424 : return false;
1425 : }
1426 :
1427 : return true;
1428 : }
1429 :
1430 0 : static int sdma_v6_0_wait_for_idle(void *handle)
1431 : {
1432 : unsigned i;
1433 : u32 sdma0, sdma1;
1434 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1435 :
1436 0 : for (i = 0; i < adev->usec_timeout; i++) {
1437 0 : sdma0 = RREG32(sdma_v6_0_get_reg_offset(adev, 0, regSDMA0_STATUS_REG));
1438 0 : sdma1 = RREG32(sdma_v6_0_get_reg_offset(adev, 1, regSDMA0_STATUS_REG));
1439 :
1440 0 : if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK)
1441 : return 0;
1442 0 : udelay(1);
1443 : }
1444 : return -ETIMEDOUT;
1445 : }
1446 :
1447 0 : static int sdma_v6_0_ring_preempt_ib(struct amdgpu_ring *ring)
1448 : {
1449 0 : int i, r = 0;
1450 0 : struct amdgpu_device *adev = ring->adev;
1451 0 : u32 index = 0;
1452 : u64 sdma_gfx_preempt;
1453 :
1454 0 : amdgpu_sdma_get_index_from_ring(ring, &index);
1455 0 : sdma_gfx_preempt =
1456 0 : sdma_v6_0_get_reg_offset(adev, index, regSDMA0_QUEUE0_PREEMPT);
1457 :
1458 : /* assert preemption condition */
1459 0 : amdgpu_ring_set_preempt_cond_exec(ring, false);
1460 :
1461 : /* emit the trailing fence */
1462 0 : ring->trail_seq += 1;
1463 0 : amdgpu_ring_alloc(ring, 10);
1464 0 : sdma_v6_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
1465 0 : ring->trail_seq, 0);
1466 0 : amdgpu_ring_commit(ring);
1467 :
1468 : /* assert IB preemption */
1469 0 : WREG32(sdma_gfx_preempt, 1);
1470 :
1471 : /* poll the trailing fence */
1472 0 : for (i = 0; i < adev->usec_timeout; i++) {
1473 0 : if (ring->trail_seq ==
1474 0 : le32_to_cpu(*(ring->trail_fence_cpu_addr)))
1475 : break;
1476 0 : udelay(1);
1477 : }
1478 :
1479 0 : if (i >= adev->usec_timeout) {
1480 0 : r = -EINVAL;
1481 0 : DRM_ERROR("ring %d failed to be preempted\n", ring->idx);
1482 : }
1483 :
1484 : /* deassert IB preemption */
1485 0 : WREG32(sdma_gfx_preempt, 0);
1486 :
1487 : /* deassert the preemption condition */
1488 0 : amdgpu_ring_set_preempt_cond_exec(ring, true);
1489 0 : return r;
1490 : }
1491 :
1492 0 : static int sdma_v6_0_set_trap_irq_state(struct amdgpu_device *adev,
1493 : struct amdgpu_irq_src *source,
1494 : unsigned type,
1495 : enum amdgpu_interrupt_state state)
1496 : {
1497 : u32 sdma_cntl;
1498 :
1499 0 : u32 reg_offset = sdma_v6_0_get_reg_offset(adev, type, regSDMA0_CNTL);
1500 :
1501 0 : sdma_cntl = RREG32(reg_offset);
1502 0 : sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
1503 : state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
1504 0 : WREG32(reg_offset, sdma_cntl);
1505 :
1506 0 : return 0;
1507 : }
1508 :
1509 0 : static int sdma_v6_0_process_trap_irq(struct amdgpu_device *adev,
1510 : struct amdgpu_irq_src *source,
1511 : struct amdgpu_iv_entry *entry)
1512 : {
1513 : int instances, queue;
1514 0 : uint32_t mes_queue_id = entry->src_data[0];
1515 :
1516 0 : DRM_DEBUG("IH: SDMA trap\n");
1517 :
1518 0 : if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
1519 : struct amdgpu_mes_queue *queue;
1520 :
1521 0 : mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
1522 :
1523 0 : spin_lock(&adev->mes.queue_id_lock);
1524 0 : queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
1525 0 : if (queue) {
1526 0 : DRM_DEBUG("process smda queue id = %d\n", mes_queue_id);
1527 0 : amdgpu_fence_process(queue->ring);
1528 : }
1529 0 : spin_unlock(&adev->mes.queue_id_lock);
1530 0 : return 0;
1531 : }
1532 :
1533 0 : queue = entry->ring_id & 0xf;
1534 0 : instances = (entry->ring_id & 0xf0) >> 4;
1535 0 : if (instances > 1) {
1536 0 : DRM_ERROR("IH: wrong ring_ID detected, as wrong sdma instance\n");
1537 0 : return -EINVAL;
1538 : }
1539 :
1540 0 : switch (entry->client_id) {
1541 : case SOC21_IH_CLIENTID_GFX:
1542 0 : switch (queue) {
1543 : case 0:
1544 0 : amdgpu_fence_process(&adev->sdma.instance[instances].ring);
1545 0 : break;
1546 : default:
1547 : break;
1548 : }
1549 : break;
1550 : }
1551 : return 0;
1552 : }
1553 :
1554 0 : static int sdma_v6_0_process_illegal_inst_irq(struct amdgpu_device *adev,
1555 : struct amdgpu_irq_src *source,
1556 : struct amdgpu_iv_entry *entry)
1557 : {
1558 0 : return 0;
1559 : }
1560 :
1561 0 : static int sdma_v6_0_set_clockgating_state(void *handle,
1562 : enum amd_clockgating_state state)
1563 : {
1564 0 : return 0;
1565 : }
1566 :
1567 0 : static int sdma_v6_0_set_powergating_state(void *handle,
1568 : enum amd_powergating_state state)
1569 : {
1570 0 : return 0;
1571 : }
1572 :
1573 0 : static void sdma_v6_0_get_clockgating_state(void *handle, u64 *flags)
1574 : {
1575 0 : }
1576 :
1577 : const struct amd_ip_funcs sdma_v6_0_ip_funcs = {
1578 : .name = "sdma_v6_0",
1579 : .early_init = sdma_v6_0_early_init,
1580 : .late_init = NULL,
1581 : .sw_init = sdma_v6_0_sw_init,
1582 : .sw_fini = sdma_v6_0_sw_fini,
1583 : .hw_init = sdma_v6_0_hw_init,
1584 : .hw_fini = sdma_v6_0_hw_fini,
1585 : .suspend = sdma_v6_0_suspend,
1586 : .resume = sdma_v6_0_resume,
1587 : .is_idle = sdma_v6_0_is_idle,
1588 : .wait_for_idle = sdma_v6_0_wait_for_idle,
1589 : .soft_reset = sdma_v6_0_soft_reset,
1590 : .check_soft_reset = sdma_v6_0_check_soft_reset,
1591 : .set_clockgating_state = sdma_v6_0_set_clockgating_state,
1592 : .set_powergating_state = sdma_v6_0_set_powergating_state,
1593 : .get_clockgating_state = sdma_v6_0_get_clockgating_state,
1594 : };
1595 :
1596 : static const struct amdgpu_ring_funcs sdma_v6_0_ring_funcs = {
1597 : .type = AMDGPU_RING_TYPE_SDMA,
1598 : .align_mask = 0xf,
1599 : .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
1600 : .support_64bit_ptrs = true,
1601 : .vmhub = AMDGPU_GFXHUB_0,
1602 : .get_rptr = sdma_v6_0_ring_get_rptr,
1603 : .get_wptr = sdma_v6_0_ring_get_wptr,
1604 : .set_wptr = sdma_v6_0_ring_set_wptr,
1605 : .emit_frame_size =
1606 : 5 + /* sdma_v6_0_ring_init_cond_exec */
1607 : 6 + /* sdma_v6_0_ring_emit_hdp_flush */
1608 : 6 + /* sdma_v6_0_ring_emit_pipeline_sync */
1609 : /* sdma_v6_0_ring_emit_vm_flush */
1610 : SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1611 : SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
1612 : 10 + 10 + 10, /* sdma_v6_0_ring_emit_fence x3 for user fence, vm fence */
1613 : .emit_ib_size = 5 + 7 + 6, /* sdma_v6_0_ring_emit_ib */
1614 : .emit_ib = sdma_v6_0_ring_emit_ib,
1615 : .emit_mem_sync = sdma_v6_0_ring_emit_mem_sync,
1616 : .emit_fence = sdma_v6_0_ring_emit_fence,
1617 : .emit_pipeline_sync = sdma_v6_0_ring_emit_pipeline_sync,
1618 : .emit_vm_flush = sdma_v6_0_ring_emit_vm_flush,
1619 : .emit_hdp_flush = sdma_v6_0_ring_emit_hdp_flush,
1620 : .test_ring = sdma_v6_0_ring_test_ring,
1621 : .test_ib = sdma_v6_0_ring_test_ib,
1622 : .insert_nop = sdma_v6_0_ring_insert_nop,
1623 : .pad_ib = sdma_v6_0_ring_pad_ib,
1624 : .emit_wreg = sdma_v6_0_ring_emit_wreg,
1625 : .emit_reg_wait = sdma_v6_0_ring_emit_reg_wait,
1626 : .emit_reg_write_reg_wait = sdma_v6_0_ring_emit_reg_write_reg_wait,
1627 : .init_cond_exec = sdma_v6_0_ring_init_cond_exec,
1628 : .patch_cond_exec = sdma_v6_0_ring_patch_cond_exec,
1629 : .preempt_ib = sdma_v6_0_ring_preempt_ib,
1630 : };
1631 :
1632 : static void sdma_v6_0_set_ring_funcs(struct amdgpu_device *adev)
1633 : {
1634 : int i;
1635 :
1636 0 : for (i = 0; i < adev->sdma.num_instances; i++) {
1637 0 : adev->sdma.instance[i].ring.funcs = &sdma_v6_0_ring_funcs;
1638 0 : adev->sdma.instance[i].ring.me = i;
1639 : }
1640 : }
1641 :
1642 : static const struct amdgpu_irq_src_funcs sdma_v6_0_trap_irq_funcs = {
1643 : .set = sdma_v6_0_set_trap_irq_state,
1644 : .process = sdma_v6_0_process_trap_irq,
1645 : };
1646 :
1647 : static const struct amdgpu_irq_src_funcs sdma_v6_0_illegal_inst_irq_funcs = {
1648 : .process = sdma_v6_0_process_illegal_inst_irq,
1649 : };
1650 :
1651 : static void sdma_v6_0_set_irq_funcs(struct amdgpu_device *adev)
1652 : {
1653 0 : adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 +
1654 : adev->sdma.num_instances;
1655 0 : adev->sdma.trap_irq.funcs = &sdma_v6_0_trap_irq_funcs;
1656 0 : adev->sdma.illegal_inst_irq.funcs = &sdma_v6_0_illegal_inst_irq_funcs;
1657 : }
1658 :
1659 : /**
1660 : * sdma_v6_0_emit_copy_buffer - copy buffer using the sDMA engine
1661 : *
1662 : * @ring: amdgpu_ring structure holding ring information
1663 : * @src_offset: src GPU address
1664 : * @dst_offset: dst GPU address
1665 : * @byte_count: number of bytes to xfer
1666 : *
1667 : * Copy GPU buffers using the DMA engine.
1668 : * Used by the amdgpu ttm implementation to move pages if
1669 : * registered as the asic copy callback.
1670 : */
1671 0 : static void sdma_v6_0_emit_copy_buffer(struct amdgpu_ib *ib,
1672 : uint64_t src_offset,
1673 : uint64_t dst_offset,
1674 : uint32_t byte_count,
1675 : bool tmz)
1676 : {
1677 0 : ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
1678 0 : SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
1679 : SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
1680 0 : ib->ptr[ib->length_dw++] = byte_count - 1;
1681 0 : ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
1682 0 : ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
1683 0 : ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
1684 0 : ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
1685 0 : ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
1686 0 : }
1687 :
1688 : /**
1689 : * sdma_v6_0_emit_fill_buffer - fill buffer using the sDMA engine
1690 : *
1691 : * @ring: amdgpu_ring structure holding ring information
1692 : * @src_data: value to write to buffer
1693 : * @dst_offset: dst GPU address
1694 : * @byte_count: number of bytes to xfer
1695 : *
1696 : * Fill GPU buffers using the DMA engine.
1697 : */
1698 0 : static void sdma_v6_0_emit_fill_buffer(struct amdgpu_ib *ib,
1699 : uint32_t src_data,
1700 : uint64_t dst_offset,
1701 : uint32_t byte_count)
1702 : {
1703 0 : ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_CONST_FILL);
1704 0 : ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
1705 0 : ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
1706 0 : ib->ptr[ib->length_dw++] = src_data;
1707 0 : ib->ptr[ib->length_dw++] = byte_count - 1;
1708 0 : }
1709 :
1710 : static const struct amdgpu_buffer_funcs sdma_v6_0_buffer_funcs = {
1711 : .copy_max_bytes = 0x400000,
1712 : .copy_num_dw = 7,
1713 : .emit_copy_buffer = sdma_v6_0_emit_copy_buffer,
1714 :
1715 : .fill_max_bytes = 0x400000,
1716 : .fill_num_dw = 5,
1717 : .emit_fill_buffer = sdma_v6_0_emit_fill_buffer,
1718 : };
1719 :
1720 : static void sdma_v6_0_set_buffer_funcs(struct amdgpu_device *adev)
1721 : {
1722 0 : adev->mman.buffer_funcs = &sdma_v6_0_buffer_funcs;
1723 0 : adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
1724 : }
1725 :
1726 : static const struct amdgpu_vm_pte_funcs sdma_v6_0_vm_pte_funcs = {
1727 : .copy_pte_num_dw = 7,
1728 : .copy_pte = sdma_v6_0_vm_copy_pte,
1729 : .write_pte = sdma_v6_0_vm_write_pte,
1730 : .set_pte_pde = sdma_v6_0_vm_set_pte_pde,
1731 : };
1732 :
1733 : static void sdma_v6_0_set_vm_pte_funcs(struct amdgpu_device *adev)
1734 : {
1735 : unsigned i;
1736 :
1737 0 : adev->vm_manager.vm_pte_funcs = &sdma_v6_0_vm_pte_funcs;
1738 0 : for (i = 0; i < adev->sdma.num_instances; i++) {
1739 0 : adev->vm_manager.vm_pte_scheds[i] =
1740 0 : &adev->sdma.instance[i].ring.sched;
1741 : }
1742 0 : adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
1743 : }
1744 :
1745 : const struct amdgpu_ip_block_version sdma_v6_0_ip_block = {
1746 : .type = AMD_IP_BLOCK_TYPE_SDMA,
1747 : .major = 6,
1748 : .minor = 0,
1749 : .rev = 0,
1750 : .funcs = &sdma_v6_0_ip_funcs,
1751 : };
|