Line data Source code
1 : /*
2 : * Copyright 2016 Advanced Micro Devices, Inc.
3 : * All Rights Reserved.
4 : *
5 : * Permission is hereby granted, free of charge, to any person obtaining a
6 : * copy of this software and associated documentation files (the
7 : * "Software"), to deal in the Software without restriction, including
8 : * without limitation the rights to use, copy, modify, merge, publish,
9 : * distribute, sub license, and/or sell copies of the Software, and to
10 : * permit persons to whom the Software is furnished to do so, subject to
11 : * the following conditions:
12 : *
13 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 : * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 : * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 : * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 : * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 : * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 : *
21 : * The above copyright notice and this permission notice (including the
22 : * next paragraph) shall be included in all copies or substantial portions
23 : * of the Software.
24 : *
25 : */
26 :
27 : #include <linux/firmware.h>
28 : #include <drm/drm_drv.h>
29 :
30 : #include "amdgpu.h"
31 : #include "amdgpu_vce.h"
32 : #include "soc15.h"
33 : #include "soc15d.h"
34 : #include "soc15_common.h"
35 : #include "mmsch_v1_0.h"
36 :
37 : #include "vce/vce_4_0_offset.h"
38 : #include "vce/vce_4_0_default.h"
39 : #include "vce/vce_4_0_sh_mask.h"
40 : #include "mmhub/mmhub_1_0_offset.h"
41 : #include "mmhub/mmhub_1_0_sh_mask.h"
42 :
43 : #include "ivsrcid/vce/irqsrcs_vce_4_0.h"
44 :
45 : #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
46 :
47 : #define VCE_V4_0_FW_SIZE (384 * 1024)
48 : #define VCE_V4_0_STACK_SIZE (64 * 1024)
49 : #define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
50 :
51 : static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
52 : static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
53 : static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
54 :
55 : /**
56 : * vce_v4_0_ring_get_rptr - get read pointer
57 : *
58 : * @ring: amdgpu_ring pointer
59 : *
60 : * Returns the current hardware read pointer
61 : */
62 0 : static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
63 : {
64 0 : struct amdgpu_device *adev = ring->adev;
65 :
66 0 : if (ring->me == 0)
67 0 : return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
68 0 : else if (ring->me == 1)
69 0 : return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
70 : else
71 0 : return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
72 : }
73 :
74 : /**
75 : * vce_v4_0_ring_get_wptr - get write pointer
76 : *
77 : * @ring: amdgpu_ring pointer
78 : *
79 : * Returns the current hardware write pointer
80 : */
81 0 : static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
82 : {
83 0 : struct amdgpu_device *adev = ring->adev;
84 :
85 0 : if (ring->use_doorbell)
86 0 : return *ring->wptr_cpu_addr;
87 :
88 0 : if (ring->me == 0)
89 0 : return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
90 0 : else if (ring->me == 1)
91 0 : return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
92 : else
93 0 : return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
94 : }
95 :
96 : /**
97 : * vce_v4_0_ring_set_wptr - set write pointer
98 : *
99 : * @ring: amdgpu_ring pointer
100 : *
101 : * Commits the write pointer to the hardware
102 : */
103 0 : static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
104 : {
105 0 : struct amdgpu_device *adev = ring->adev;
106 :
107 0 : if (ring->use_doorbell) {
108 : /* XXX check if swapping is necessary on BE */
109 0 : *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
110 0 : WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
111 0 : return;
112 : }
113 :
114 0 : if (ring->me == 0)
115 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
116 : lower_32_bits(ring->wptr));
117 0 : else if (ring->me == 1)
118 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
119 : lower_32_bits(ring->wptr));
120 : else
121 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
122 : lower_32_bits(ring->wptr));
123 : }
124 :
125 0 : static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
126 : {
127 : int i, j;
128 :
129 0 : for (i = 0; i < 10; ++i) {
130 0 : for (j = 0; j < 100; ++j) {
131 0 : uint32_t status =
132 0 : RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
133 :
134 0 : if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
135 : return 0;
136 0 : mdelay(10);
137 : }
138 :
139 0 : DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
140 0 : WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
141 : VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
142 : ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
143 0 : mdelay(10);
144 0 : WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
145 : ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
146 0 : mdelay(10);
147 :
148 : }
149 :
150 : return -ETIMEDOUT;
151 : }
152 :
153 0 : static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
154 : struct amdgpu_mm_table *table)
155 : {
156 0 : uint32_t data = 0, loop;
157 0 : uint64_t addr = table->gpu_addr;
158 0 : struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
159 : uint32_t size;
160 :
161 0 : size = header->header_size + header->vce_table_size + header->uvd_table_size;
162 :
163 : /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
164 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
165 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
166 :
167 : /* 2, update vmid of descriptor */
168 0 : data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
169 0 : data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
170 0 : data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
171 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
172 :
173 : /* 3, notify mmsch about the size of this descriptor */
174 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
175 :
176 : /* 4, set resp to zero */
177 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
178 :
179 0 : WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
180 0 : *adev->vce.ring[0].wptr_cpu_addr = 0;
181 0 : adev->vce.ring[0].wptr = 0;
182 0 : adev->vce.ring[0].wptr_old = 0;
183 :
184 : /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
185 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
186 :
187 0 : data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
188 0 : loop = 1000;
189 0 : while ((data & 0x10000002) != 0x10000002) {
190 0 : udelay(10);
191 0 : data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
192 0 : loop--;
193 0 : if (!loop)
194 : break;
195 : }
196 :
197 0 : if (!loop) {
198 0 : dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
199 : return -EBUSY;
200 : }
201 :
202 : return 0;
203 : }
204 :
205 0 : static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
206 : {
207 : struct amdgpu_ring *ring;
208 : uint32_t offset, size;
209 0 : uint32_t table_size = 0;
210 0 : struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
211 0 : struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
212 0 : struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
213 0 : struct mmsch_v1_0_cmd_end end = { { 0 } };
214 0 : uint32_t *init_table = adev->virt.mm_table.cpu_addr;
215 0 : struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
216 :
217 : direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
218 0 : direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
219 0 : direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
220 0 : end.cmd_header.command_type = MMSCH_COMMAND__END;
221 :
222 0 : if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
223 0 : header->version = MMSCH_VERSION;
224 0 : header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
225 :
226 0 : if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
227 0 : header->vce_table_offset = header->header_size;
228 : else
229 0 : header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
230 :
231 0 : init_table += header->vce_table_offset;
232 :
233 0 : ring = &adev->vce.ring[0];
234 0 : MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
235 : lower_32_bits(ring->gpu_addr));
236 0 : MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
237 : upper_32_bits(ring->gpu_addr));
238 0 : MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
239 : ring->ring_size / 4);
240 :
241 : /* BEGING OF MC_RESUME */
242 0 : MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
243 0 : MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
244 0 : MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
245 0 : MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
246 0 : MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
247 :
248 0 : offset = AMDGPU_VCE_FIRMWARE_OFFSET;
249 0 : if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
250 0 : uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
251 0 : uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
252 0 : uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
253 :
254 0 : MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
255 : mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
256 0 : MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
257 : mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
258 : (tmr_mc_addr >> 40) & 0xff);
259 0 : MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
260 : } else {
261 0 : MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
262 : mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
263 : adev->vce.gpu_addr >> 8);
264 0 : MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
265 : mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
266 : (adev->vce.gpu_addr >> 40) & 0xff);
267 0 : MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
268 : offset & ~0x0f000000);
269 :
270 : }
271 0 : MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
272 : mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
273 : adev->vce.gpu_addr >> 8);
274 0 : MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
275 : mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
276 : (adev->vce.gpu_addr >> 40) & 0xff);
277 0 : MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
278 : mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
279 : adev->vce.gpu_addr >> 8);
280 0 : MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
281 : mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
282 : (adev->vce.gpu_addr >> 40) & 0xff);
283 :
284 0 : size = VCE_V4_0_FW_SIZE;
285 0 : MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
286 :
287 0 : offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
288 0 : size = VCE_V4_0_STACK_SIZE;
289 0 : MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
290 : (offset & ~0x0f000000) | (1 << 24));
291 0 : MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
292 :
293 0 : offset += size;
294 0 : size = VCE_V4_0_DATA_SIZE;
295 0 : MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
296 : (offset & ~0x0f000000) | (2 << 24));
297 0 : MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
298 :
299 0 : MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
300 0 : MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
301 : VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
302 : VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
303 :
304 : /* end of MC_RESUME */
305 0 : MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
306 : VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
307 0 : MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
308 : ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
309 0 : MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
310 : ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
311 :
312 0 : MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
313 : VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
314 : VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
315 :
316 : /* clear BUSY flag */
317 0 : MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
318 : ~VCE_STATUS__JOB_BUSY_MASK, 0);
319 :
320 : /* add end packet */
321 0 : memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
322 0 : table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
323 0 : header->vce_table_size = table_size;
324 : }
325 :
326 0 : return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
327 : }
328 :
329 : /**
330 : * vce_v4_0_start - start VCE block
331 : *
332 : * @adev: amdgpu_device pointer
333 : *
334 : * Setup and start the VCE block
335 : */
336 0 : static int vce_v4_0_start(struct amdgpu_device *adev)
337 : {
338 : struct amdgpu_ring *ring;
339 : int r;
340 :
341 0 : ring = &adev->vce.ring[0];
342 :
343 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
344 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
345 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
346 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
347 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
348 :
349 0 : ring = &adev->vce.ring[1];
350 :
351 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
352 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
353 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
354 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
355 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
356 :
357 0 : ring = &adev->vce.ring[2];
358 :
359 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
360 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
361 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
362 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
363 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
364 :
365 0 : vce_v4_0_mc_resume(adev);
366 0 : WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
367 : ~VCE_STATUS__JOB_BUSY_MASK);
368 :
369 0 : WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
370 :
371 0 : WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
372 : ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
373 0 : mdelay(100);
374 :
375 0 : r = vce_v4_0_firmware_loaded(adev);
376 :
377 : /* clear BUSY flag */
378 0 : WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
379 :
380 0 : if (r) {
381 0 : DRM_ERROR("VCE not responding, giving up!!!\n");
382 0 : return r;
383 : }
384 :
385 : return 0;
386 : }
387 :
388 0 : static int vce_v4_0_stop(struct amdgpu_device *adev)
389 : {
390 :
391 : /* Disable VCPU */
392 0 : WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
393 :
394 : /* hold on ECPU */
395 0 : WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
396 : VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
397 : ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
398 :
399 : /* clear VCE_STATUS */
400 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
401 :
402 : /* Set Clock-Gating off */
403 : /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
404 : vce_v4_0_set_vce_sw_clock_gating(adev, false);
405 : */
406 :
407 0 : return 0;
408 : }
409 :
410 0 : static int vce_v4_0_early_init(void *handle)
411 : {
412 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
413 :
414 0 : if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
415 0 : adev->vce.num_rings = 1;
416 : else
417 0 : adev->vce.num_rings = 3;
418 :
419 0 : vce_v4_0_set_ring_funcs(adev);
420 0 : vce_v4_0_set_irq_funcs(adev);
421 :
422 0 : return 0;
423 : }
424 :
425 0 : static int vce_v4_0_sw_init(void *handle)
426 : {
427 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
428 : struct amdgpu_ring *ring;
429 :
430 : unsigned size;
431 : int r, i;
432 :
433 0 : r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
434 0 : if (r)
435 : return r;
436 :
437 0 : size = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
438 0 : if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
439 0 : size += VCE_V4_0_FW_SIZE;
440 :
441 0 : r = amdgpu_vce_sw_init(adev, size);
442 0 : if (r)
443 : return r;
444 :
445 0 : if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
446 : const struct common_firmware_header *hdr;
447 0 : unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
448 :
449 0 : adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
450 0 : if (!adev->vce.saved_bo)
451 : return -ENOMEM;
452 :
453 0 : hdr = (const struct common_firmware_header *)adev->vce.fw->data;
454 0 : adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
455 0 : adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
456 0 : adev->firmware.fw_size +=
457 0 : ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
458 0 : DRM_INFO("PSP loading VCE firmware\n");
459 : } else {
460 0 : r = amdgpu_vce_resume(adev);
461 0 : if (r)
462 : return r;
463 : }
464 :
465 0 : for (i = 0; i < adev->vce.num_rings; i++) {
466 0 : enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
467 :
468 0 : ring = &adev->vce.ring[i];
469 0 : sprintf(ring->name, "vce%d", i);
470 0 : if (amdgpu_sriov_vf(adev)) {
471 : /* DOORBELL only works under SRIOV */
472 0 : ring->use_doorbell = true;
473 :
474 : /* currently only use the first encoding ring for sriov,
475 : * so set unused location for other unused rings.
476 : */
477 0 : if (i == 0)
478 0 : ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
479 : else
480 0 : ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
481 : }
482 0 : r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
483 : hw_prio, NULL);
484 0 : if (r)
485 : return r;
486 : }
487 :
488 :
489 0 : r = amdgpu_vce_entity_init(adev);
490 0 : if (r)
491 : return r;
492 :
493 0 : r = amdgpu_virt_alloc_mm_table(adev);
494 : if (r)
495 : return r;
496 :
497 : return r;
498 : }
499 :
500 0 : static int vce_v4_0_sw_fini(void *handle)
501 : {
502 : int r;
503 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
504 :
505 : /* free MM table */
506 0 : amdgpu_virt_free_mm_table(adev);
507 :
508 0 : if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
509 0 : kvfree(adev->vce.saved_bo);
510 0 : adev->vce.saved_bo = NULL;
511 : }
512 :
513 0 : r = amdgpu_vce_suspend(adev);
514 0 : if (r)
515 : return r;
516 :
517 0 : return amdgpu_vce_sw_fini(adev);
518 : }
519 :
520 0 : static int vce_v4_0_hw_init(void *handle)
521 : {
522 : int r, i;
523 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
524 :
525 0 : if (amdgpu_sriov_vf(adev))
526 0 : r = vce_v4_0_sriov_start(adev);
527 : else
528 0 : r = vce_v4_0_start(adev);
529 0 : if (r)
530 : return r;
531 :
532 0 : for (i = 0; i < adev->vce.num_rings; i++) {
533 0 : r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
534 0 : if (r)
535 : return r;
536 : }
537 :
538 0 : DRM_INFO("VCE initialized successfully.\n");
539 :
540 0 : return 0;
541 : }
542 :
543 0 : static int vce_v4_0_hw_fini(void *handle)
544 : {
545 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
546 :
547 0 : cancel_delayed_work_sync(&adev->vce.idle_work);
548 :
549 0 : if (!amdgpu_sriov_vf(adev)) {
550 : /* vce_v4_0_wait_for_idle(handle); */
551 0 : vce_v4_0_stop(adev);
552 : } else {
553 : /* full access mode, so don't touch any VCE register */
554 0 : DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
555 : }
556 :
557 0 : return 0;
558 : }
559 :
560 0 : static int vce_v4_0_suspend(void *handle)
561 : {
562 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
563 : int r, idx;
564 :
565 0 : if (adev->vce.vcpu_bo == NULL)
566 : return 0;
567 :
568 0 : if (drm_dev_enter(adev_to_drm(adev), &idx)) {
569 0 : if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
570 0 : unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
571 0 : void *ptr = adev->vce.cpu_addr;
572 :
573 0 : memcpy_fromio(adev->vce.saved_bo, ptr, size);
574 : }
575 0 : drm_dev_exit(idx);
576 : }
577 :
578 : /*
579 : * Proper cleanups before halting the HW engine:
580 : * - cancel the delayed idle work
581 : * - enable powergating
582 : * - enable clockgating
583 : * - disable dpm
584 : *
585 : * TODO: to align with the VCN implementation, move the
586 : * jobs for clockgating/powergating/dpm setting to
587 : * ->set_powergating_state().
588 : */
589 0 : cancel_delayed_work_sync(&adev->vce.idle_work);
590 :
591 0 : if (adev->pm.dpm_enabled) {
592 0 : amdgpu_dpm_enable_vce(adev, false);
593 : } else {
594 0 : amdgpu_asic_set_vce_clocks(adev, 0, 0);
595 0 : amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
596 : AMD_PG_STATE_GATE);
597 0 : amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
598 : AMD_CG_STATE_GATE);
599 : }
600 :
601 0 : r = vce_v4_0_hw_fini(adev);
602 0 : if (r)
603 : return r;
604 :
605 0 : return amdgpu_vce_suspend(adev);
606 : }
607 :
608 0 : static int vce_v4_0_resume(void *handle)
609 : {
610 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
611 : int r, idx;
612 :
613 0 : if (adev->vce.vcpu_bo == NULL)
614 : return -EINVAL;
615 :
616 0 : if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
617 :
618 0 : if (drm_dev_enter(adev_to_drm(adev), &idx)) {
619 0 : unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
620 0 : void *ptr = adev->vce.cpu_addr;
621 :
622 0 : memcpy_toio(ptr, adev->vce.saved_bo, size);
623 0 : drm_dev_exit(idx);
624 : }
625 : } else {
626 0 : r = amdgpu_vce_resume(adev);
627 0 : if (r)
628 : return r;
629 : }
630 :
631 0 : return vce_v4_0_hw_init(adev);
632 : }
633 :
634 0 : static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
635 : {
636 : uint32_t offset, size;
637 : uint64_t tmr_mc_addr;
638 :
639 0 : WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
640 0 : WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
641 0 : WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
642 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
643 :
644 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
645 0 : WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
646 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
647 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
648 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
649 :
650 0 : offset = AMDGPU_VCE_FIRMWARE_OFFSET;
651 :
652 0 : if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
653 0 : tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
654 0 : adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
655 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
656 : (tmr_mc_addr >> 8));
657 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
658 : (tmr_mc_addr >> 40) & 0xff);
659 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
660 : } else {
661 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
662 : (adev->vce.gpu_addr >> 8));
663 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
664 : (adev->vce.gpu_addr >> 40) & 0xff);
665 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
666 : }
667 :
668 0 : size = VCE_V4_0_FW_SIZE;
669 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
670 :
671 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
672 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
673 0 : offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
674 0 : size = VCE_V4_0_STACK_SIZE;
675 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
676 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
677 :
678 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
679 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
680 0 : offset += size;
681 0 : size = VCE_V4_0_DATA_SIZE;
682 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
683 0 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
684 :
685 0 : WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
686 0 : WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
687 : VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
688 : ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
689 0 : }
690 :
691 0 : static int vce_v4_0_set_clockgating_state(void *handle,
692 : enum amd_clockgating_state state)
693 : {
694 : /* needed for driver unload*/
695 0 : return 0;
696 : }
697 :
698 : #if 0
699 : static bool vce_v4_0_is_idle(void *handle)
700 : {
701 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
702 : u32 mask = 0;
703 :
704 : mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
705 : mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
706 :
707 : return !(RREG32(mmSRBM_STATUS2) & mask);
708 : }
709 :
710 : static int vce_v4_0_wait_for_idle(void *handle)
711 : {
712 : unsigned i;
713 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
714 :
715 : for (i = 0; i < adev->usec_timeout; i++)
716 : if (vce_v4_0_is_idle(handle))
717 : return 0;
718 :
719 : return -ETIMEDOUT;
720 : }
721 :
722 : #define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */
723 : #define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */
724 : #define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */
725 : #define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
726 : VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
727 :
728 : static bool vce_v4_0_check_soft_reset(void *handle)
729 : {
730 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
731 : u32 srbm_soft_reset = 0;
732 :
733 : /* According to VCE team , we should use VCE_STATUS instead
734 : * SRBM_STATUS.VCE_BUSY bit for busy status checking.
735 : * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
736 : * instance's registers are accessed
737 : * (0 for 1st instance, 10 for 2nd instance).
738 : *
739 : *VCE_STATUS
740 : *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB |
741 : *|----+----+-----------+----+----+----+----------+---------+----|
742 : *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0|
743 : *
744 : * VCE team suggest use bit 3--bit 6 for busy status check
745 : */
746 : mutex_lock(&adev->grbm_idx_mutex);
747 : WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
748 : if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
749 : srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
750 : srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
751 : }
752 : WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
753 : if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
754 : srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
755 : srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
756 : }
757 : WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
758 : mutex_unlock(&adev->grbm_idx_mutex);
759 :
760 : if (srbm_soft_reset) {
761 : adev->vce.srbm_soft_reset = srbm_soft_reset;
762 : return true;
763 : } else {
764 : adev->vce.srbm_soft_reset = 0;
765 : return false;
766 : }
767 : }
768 :
769 : static int vce_v4_0_soft_reset(void *handle)
770 : {
771 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
772 : u32 srbm_soft_reset;
773 :
774 : if (!adev->vce.srbm_soft_reset)
775 : return 0;
776 : srbm_soft_reset = adev->vce.srbm_soft_reset;
777 :
778 : if (srbm_soft_reset) {
779 : u32 tmp;
780 :
781 : tmp = RREG32(mmSRBM_SOFT_RESET);
782 : tmp |= srbm_soft_reset;
783 : dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
784 : WREG32(mmSRBM_SOFT_RESET, tmp);
785 : tmp = RREG32(mmSRBM_SOFT_RESET);
786 :
787 : udelay(50);
788 :
789 : tmp &= ~srbm_soft_reset;
790 : WREG32(mmSRBM_SOFT_RESET, tmp);
791 : tmp = RREG32(mmSRBM_SOFT_RESET);
792 :
793 : /* Wait a little for things to settle down */
794 : udelay(50);
795 : }
796 :
797 : return 0;
798 : }
799 :
800 : static int vce_v4_0_pre_soft_reset(void *handle)
801 : {
802 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
803 :
804 : if (!adev->vce.srbm_soft_reset)
805 : return 0;
806 :
807 : mdelay(5);
808 :
809 : return vce_v4_0_suspend(adev);
810 : }
811 :
812 :
813 : static int vce_v4_0_post_soft_reset(void *handle)
814 : {
815 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
816 :
817 : if (!adev->vce.srbm_soft_reset)
818 : return 0;
819 :
820 : mdelay(5);
821 :
822 : return vce_v4_0_resume(adev);
823 : }
824 :
825 : static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
826 : {
827 : u32 tmp, data;
828 :
829 : tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
830 : if (override)
831 : data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
832 : else
833 : data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
834 :
835 : if (tmp != data)
836 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
837 : }
838 :
839 : static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
840 : bool gated)
841 : {
842 : u32 data;
843 :
844 : /* Set Override to disable Clock Gating */
845 : vce_v4_0_override_vce_clock_gating(adev, true);
846 :
847 : /* This function enables MGCG which is controlled by firmware.
848 : With the clocks in the gated state the core is still
849 : accessible but the firmware will throttle the clocks on the
850 : fly as necessary.
851 : */
852 : if (gated) {
853 : data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
854 : data |= 0x1ff;
855 : data &= ~0xef0000;
856 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
857 :
858 : data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
859 : data |= 0x3ff000;
860 : data &= ~0xffc00000;
861 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
862 :
863 : data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
864 : data |= 0x2;
865 : data &= ~0x00010000;
866 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
867 :
868 : data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
869 : data |= 0x37f;
870 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
871 :
872 : data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
873 : data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
874 : VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
875 : VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
876 : 0x8;
877 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
878 : } else {
879 : data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
880 : data &= ~0x80010;
881 : data |= 0xe70008;
882 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
883 :
884 : data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
885 : data |= 0xffc00000;
886 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
887 :
888 : data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
889 : data |= 0x10000;
890 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
891 :
892 : data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
893 : data &= ~0xffc00000;
894 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
895 :
896 : data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
897 : data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
898 : VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
899 : VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
900 : 0x8);
901 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
902 : }
903 : vce_v4_0_override_vce_clock_gating(adev, false);
904 : }
905 :
906 : static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
907 : {
908 : u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
909 :
910 : if (enable)
911 : tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
912 : else
913 : tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
914 :
915 : WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
916 : }
917 :
918 : static int vce_v4_0_set_clockgating_state(void *handle,
919 : enum amd_clockgating_state state)
920 : {
921 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
922 : bool enable = (state == AMD_CG_STATE_GATE);
923 : int i;
924 :
925 : if ((adev->asic_type == CHIP_POLARIS10) ||
926 : (adev->asic_type == CHIP_TONGA) ||
927 : (adev->asic_type == CHIP_FIJI))
928 : vce_v4_0_set_bypass_mode(adev, enable);
929 :
930 : if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
931 : return 0;
932 :
933 : mutex_lock(&adev->grbm_idx_mutex);
934 : for (i = 0; i < 2; i++) {
935 : /* Program VCE Instance 0 or 1 if not harvested */
936 : if (adev->vce.harvest_config & (1 << i))
937 : continue;
938 :
939 : WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
940 :
941 : if (enable) {
942 : /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
943 : uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
944 : data &= ~(0xf | 0xff0);
945 : data |= ((0x0 << 0) | (0x04 << 4));
946 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
947 :
948 : /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
949 : data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
950 : data &= ~(0xf | 0xff0);
951 : data |= ((0x0 << 0) | (0x04 << 4));
952 : WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
953 : }
954 :
955 : vce_v4_0_set_vce_sw_clock_gating(adev, enable);
956 : }
957 :
958 : WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
959 : mutex_unlock(&adev->grbm_idx_mutex);
960 :
961 : return 0;
962 : }
963 : #endif
964 :
965 0 : static int vce_v4_0_set_powergating_state(void *handle,
966 : enum amd_powergating_state state)
967 : {
968 : /* This doesn't actually powergate the VCE block.
969 : * That's done in the dpm code via the SMC. This
970 : * just re-inits the block as necessary. The actual
971 : * gating still happens in the dpm code. We should
972 : * revisit this when there is a cleaner line between
973 : * the smc and the hw blocks
974 : */
975 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
976 :
977 0 : if (state == AMD_PG_STATE_GATE)
978 0 : return vce_v4_0_stop(adev);
979 : else
980 0 : return vce_v4_0_start(adev);
981 : }
982 :
983 0 : static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
984 : struct amdgpu_ib *ib, uint32_t flags)
985 : {
986 0 : unsigned vmid = AMDGPU_JOB_GET_VMID(job);
987 :
988 0 : amdgpu_ring_write(ring, VCE_CMD_IB_VM);
989 0 : amdgpu_ring_write(ring, vmid);
990 0 : amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
991 0 : amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
992 0 : amdgpu_ring_write(ring, ib->length_dw);
993 0 : }
994 :
995 0 : static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
996 : u64 seq, unsigned flags)
997 : {
998 0 : WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
999 :
1000 0 : amdgpu_ring_write(ring, VCE_CMD_FENCE);
1001 0 : amdgpu_ring_write(ring, addr);
1002 0 : amdgpu_ring_write(ring, upper_32_bits(addr));
1003 0 : amdgpu_ring_write(ring, seq);
1004 0 : amdgpu_ring_write(ring, VCE_CMD_TRAP);
1005 0 : }
1006 :
1007 0 : static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
1008 : {
1009 0 : amdgpu_ring_write(ring, VCE_CMD_END);
1010 0 : }
1011 :
1012 0 : static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1013 : uint32_t val, uint32_t mask)
1014 : {
1015 0 : amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
1016 0 : amdgpu_ring_write(ring, reg << 2);
1017 0 : amdgpu_ring_write(ring, mask);
1018 0 : amdgpu_ring_write(ring, val);
1019 0 : }
1020 :
1021 0 : static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
1022 : unsigned int vmid, uint64_t pd_addr)
1023 : {
1024 0 : struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
1025 :
1026 0 : pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1027 :
1028 : /* wait for reg writes */
1029 0 : vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
1030 0 : vmid * hub->ctx_addr_distance,
1031 : lower_32_bits(pd_addr), 0xffffffff);
1032 0 : }
1033 :
1034 0 : static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
1035 : uint32_t reg, uint32_t val)
1036 : {
1037 0 : amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1038 0 : amdgpu_ring_write(ring, reg << 2);
1039 0 : amdgpu_ring_write(ring, val);
1040 0 : }
1041 :
1042 0 : static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1043 : struct amdgpu_irq_src *source,
1044 : unsigned type,
1045 : enum amdgpu_interrupt_state state)
1046 : {
1047 0 : uint32_t val = 0;
1048 :
1049 0 : if (!amdgpu_sriov_vf(adev)) {
1050 0 : if (state == AMDGPU_IRQ_STATE_ENABLE)
1051 0 : val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1052 :
1053 0 : WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1054 : ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1055 : }
1056 0 : return 0;
1057 : }
1058 :
1059 0 : static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1060 : struct amdgpu_irq_src *source,
1061 : struct amdgpu_iv_entry *entry)
1062 : {
1063 0 : DRM_DEBUG("IH: VCE\n");
1064 :
1065 0 : switch (entry->src_data[0]) {
1066 : case 0:
1067 : case 1:
1068 : case 2:
1069 0 : amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1070 0 : break;
1071 : default:
1072 0 : DRM_ERROR("Unhandled interrupt: %d %d\n",
1073 : entry->src_id, entry->src_data[0]);
1074 0 : break;
1075 : }
1076 :
1077 0 : return 0;
1078 : }
1079 :
1080 : const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1081 : .name = "vce_v4_0",
1082 : .early_init = vce_v4_0_early_init,
1083 : .late_init = NULL,
1084 : .sw_init = vce_v4_0_sw_init,
1085 : .sw_fini = vce_v4_0_sw_fini,
1086 : .hw_init = vce_v4_0_hw_init,
1087 : .hw_fini = vce_v4_0_hw_fini,
1088 : .suspend = vce_v4_0_suspend,
1089 : .resume = vce_v4_0_resume,
1090 : .is_idle = NULL /* vce_v4_0_is_idle */,
1091 : .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1092 : .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1093 : .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1094 : .soft_reset = NULL /* vce_v4_0_soft_reset */,
1095 : .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1096 : .set_clockgating_state = vce_v4_0_set_clockgating_state,
1097 : .set_powergating_state = vce_v4_0_set_powergating_state,
1098 : };
1099 :
1100 : static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1101 : .type = AMDGPU_RING_TYPE_VCE,
1102 : .align_mask = 0x3f,
1103 : .nop = VCE_CMD_NO_OP,
1104 : .support_64bit_ptrs = false,
1105 : .no_user_fence = true,
1106 : .vmhub = AMDGPU_MMHUB_0,
1107 : .get_rptr = vce_v4_0_ring_get_rptr,
1108 : .get_wptr = vce_v4_0_ring_get_wptr,
1109 : .set_wptr = vce_v4_0_ring_set_wptr,
1110 : .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1111 : .emit_frame_size =
1112 : SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1113 : SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1114 : 4 + /* vce_v4_0_emit_vm_flush */
1115 : 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1116 : 1, /* vce_v4_0_ring_insert_end */
1117 : .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1118 : .emit_ib = vce_v4_0_ring_emit_ib,
1119 : .emit_vm_flush = vce_v4_0_emit_vm_flush,
1120 : .emit_fence = vce_v4_0_ring_emit_fence,
1121 : .test_ring = amdgpu_vce_ring_test_ring,
1122 : .test_ib = amdgpu_vce_ring_test_ib,
1123 : .insert_nop = amdgpu_ring_insert_nop,
1124 : .insert_end = vce_v4_0_ring_insert_end,
1125 : .pad_ib = amdgpu_ring_generic_pad_ib,
1126 : .begin_use = amdgpu_vce_ring_begin_use,
1127 : .end_use = amdgpu_vce_ring_end_use,
1128 : .emit_wreg = vce_v4_0_emit_wreg,
1129 : .emit_reg_wait = vce_v4_0_emit_reg_wait,
1130 : .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1131 : };
1132 :
1133 : static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1134 : {
1135 : int i;
1136 :
1137 0 : for (i = 0; i < adev->vce.num_rings; i++) {
1138 0 : adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1139 0 : adev->vce.ring[i].me = i;
1140 : }
1141 0 : DRM_INFO("VCE enabled in VM mode\n");
1142 : }
1143 :
1144 : static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1145 : .set = vce_v4_0_set_interrupt_state,
1146 : .process = vce_v4_0_process_interrupt,
1147 : };
1148 :
1149 : static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1150 : {
1151 0 : adev->vce.irq.num_types = 1;
1152 0 : adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1153 : };
1154 :
1155 : const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1156 : {
1157 : .type = AMD_IP_BLOCK_TYPE_VCE,
1158 : .major = 4,
1159 : .minor = 0,
1160 : .rev = 0,
1161 : .funcs = &vce_v4_0_ip_funcs,
1162 : };
|