Line data Source code
1 : /*
2 : * Copyright 2019 Advanced Micro Devices, Inc.
3 : *
4 : * Permission is hereby granted, free of charge, to any person obtaining a
5 : * copy of this software and associated documentation files (the "Software"),
6 : * to deal in the Software without restriction, including without limitation
7 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 : * and/or sell copies of the Software, and to permit persons to whom the
9 : * Software is furnished to do so, subject to the following conditions:
10 : *
11 : * The above copyright notice and this permission notice shall be included in
12 : * all copies or substantial portions of the Software.
13 : *
14 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 : * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 : * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 : * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 : * OTHER DEALINGS IN THE SOFTWARE.
21 : *
22 : */
23 :
24 : #include <linux/delay.h>
25 : #include <linux/firmware.h>
26 : #include <linux/module.h>
27 : #include <linux/pci.h>
28 :
29 : #include "amdgpu.h"
30 : #include "amdgpu_ucode.h"
31 : #include "amdgpu_trace.h"
32 :
33 : #include "gc/gc_10_1_0_offset.h"
34 : #include "gc/gc_10_1_0_sh_mask.h"
35 : #include "ivsrcid/sdma0/irqsrcs_sdma0_5_0.h"
36 : #include "ivsrcid/sdma1/irqsrcs_sdma1_5_0.h"
37 :
38 : #include "soc15_common.h"
39 : #include "soc15.h"
40 : #include "navi10_sdma_pkt_open.h"
41 : #include "nbio_v2_3.h"
42 : #include "sdma_common.h"
43 : #include "sdma_v5_0.h"
44 :
45 : MODULE_FIRMWARE("amdgpu/navi10_sdma.bin");
46 : MODULE_FIRMWARE("amdgpu/navi10_sdma1.bin");
47 :
48 : MODULE_FIRMWARE("amdgpu/navi14_sdma.bin");
49 : MODULE_FIRMWARE("amdgpu/navi14_sdma1.bin");
50 :
51 : MODULE_FIRMWARE("amdgpu/navi12_sdma.bin");
52 : MODULE_FIRMWARE("amdgpu/navi12_sdma1.bin");
53 :
54 : MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma.bin");
55 : MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma1.bin");
56 :
57 : #define SDMA1_REG_OFFSET 0x600
58 : #define SDMA0_HYP_DEC_REG_START 0x5880
59 : #define SDMA0_HYP_DEC_REG_END 0x5893
60 : #define SDMA1_HYP_DEC_REG_OFFSET 0x20
61 :
62 : static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev);
63 : static void sdma_v5_0_set_buffer_funcs(struct amdgpu_device *adev);
64 : static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev);
65 : static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev);
66 :
67 : static const struct soc15_reg_golden golden_settings_sdma_5[] = {
68 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107),
69 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
70 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
71 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
72 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
73 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
74 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
75 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
76 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
77 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
78 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
79 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_UTCL1_PAGE, 0x00ffffff, 0x000c5c00),
80 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107),
81 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
82 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
83 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
84 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
85 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
86 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
87 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
88 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
89 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
90 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
91 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_UTCL1_PAGE, 0x00ffffff, 0x000c5c00)
92 : };
93 :
94 : static const struct soc15_reg_golden golden_settings_sdma_5_sriov[] = {
95 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
96 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
97 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
98 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
99 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
100 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
101 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
102 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
103 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
104 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
105 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
106 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
107 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
108 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
109 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
110 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
111 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
112 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
113 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
114 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
115 : };
116 :
117 : static const struct soc15_reg_golden golden_settings_sdma_nv10[] = {
118 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
119 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
120 : };
121 :
122 : static const struct soc15_reg_golden golden_settings_sdma_nv14[] = {
123 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
124 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
125 : };
126 :
127 : static const struct soc15_reg_golden golden_settings_sdma_nv12[] = {
128 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
129 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GB_ADDR_CONFIG, 0x001877ff, 0x00000044),
130 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x001877ff, 0x00000044),
131 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GB_ADDR_CONFIG, 0x001877ff, 0x00000044),
132 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x001877ff, 0x00000044),
133 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
134 : };
135 :
136 : static const struct soc15_reg_golden golden_settings_sdma_cyan_skillfish[] = {
137 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107),
138 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GB_ADDR_CONFIG, 0x001877ff, 0x00000044),
139 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x001877ff, 0x00000044),
140 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
141 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
142 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
143 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
144 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
145 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
146 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
147 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
148 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
149 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
150 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_UTCL1_PAGE, 0x007fffff, 0x004c5c00),
151 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107),
152 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GB_ADDR_CONFIG, 0x001877ff, 0x00000044),
153 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x001877ff, 0x00000044),
154 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
155 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
156 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
157 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
158 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
159 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
160 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
161 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
162 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
163 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
164 : SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_UTCL1_PAGE, 0x007fffff, 0x004c5c00)
165 : };
166 :
167 : static u32 sdma_v5_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
168 : {
169 : u32 base;
170 :
171 : if (internal_offset >= SDMA0_HYP_DEC_REG_START &&
172 : internal_offset <= SDMA0_HYP_DEC_REG_END) {
173 0 : base = adev->reg_offset[GC_HWIP][0][1];
174 0 : if (instance == 1)
175 0 : internal_offset += SDMA1_HYP_DEC_REG_OFFSET;
176 : } else {
177 0 : base = adev->reg_offset[GC_HWIP][0][0];
178 0 : if (instance == 1)
179 0 : internal_offset += SDMA1_REG_OFFSET;
180 : }
181 :
182 0 : return base + internal_offset;
183 : }
184 :
185 0 : static void sdma_v5_0_init_golden_registers(struct amdgpu_device *adev)
186 : {
187 0 : switch (adev->ip_versions[SDMA0_HWIP][0]) {
188 : case IP_VERSION(5, 0, 0):
189 0 : soc15_program_register_sequence(adev,
190 : golden_settings_sdma_5,
191 : (const u32)ARRAY_SIZE(golden_settings_sdma_5));
192 0 : soc15_program_register_sequence(adev,
193 : golden_settings_sdma_nv10,
194 : (const u32)ARRAY_SIZE(golden_settings_sdma_nv10));
195 0 : break;
196 : case IP_VERSION(5, 0, 2):
197 0 : soc15_program_register_sequence(adev,
198 : golden_settings_sdma_5,
199 : (const u32)ARRAY_SIZE(golden_settings_sdma_5));
200 0 : soc15_program_register_sequence(adev,
201 : golden_settings_sdma_nv14,
202 : (const u32)ARRAY_SIZE(golden_settings_sdma_nv14));
203 0 : break;
204 : case IP_VERSION(5, 0, 5):
205 0 : if (amdgpu_sriov_vf(adev))
206 0 : soc15_program_register_sequence(adev,
207 : golden_settings_sdma_5_sriov,
208 : (const u32)ARRAY_SIZE(golden_settings_sdma_5_sriov));
209 : else
210 0 : soc15_program_register_sequence(adev,
211 : golden_settings_sdma_5,
212 : (const u32)ARRAY_SIZE(golden_settings_sdma_5));
213 0 : soc15_program_register_sequence(adev,
214 : golden_settings_sdma_nv12,
215 : (const u32)ARRAY_SIZE(golden_settings_sdma_nv12));
216 0 : break;
217 : case IP_VERSION(5, 0, 1):
218 0 : soc15_program_register_sequence(adev,
219 : golden_settings_sdma_cyan_skillfish,
220 : (const u32)ARRAY_SIZE(golden_settings_sdma_cyan_skillfish));
221 0 : break;
222 : default:
223 : break;
224 : }
225 0 : }
226 :
227 : /**
228 : * sdma_v5_0_init_microcode - load ucode images from disk
229 : *
230 : * @adev: amdgpu_device pointer
231 : *
232 : * Use the firmware interface to load the ucode images into
233 : * the driver (not loaded into hw).
234 : * Returns 0 on success, error on failure.
235 : */
236 :
237 : // emulation only, won't work on real chip
238 : // navi10 real chip need to use PSP to load firmware
239 0 : static int sdma_v5_0_init_microcode(struct amdgpu_device *adev)
240 : {
241 : const char *chip_name;
242 : char fw_name[40];
243 0 : int err = 0, i;
244 0 : struct amdgpu_firmware_info *info = NULL;
245 0 : const struct common_firmware_header *header = NULL;
246 : const struct sdma_firmware_header_v1_0 *hdr;
247 :
248 0 : if (amdgpu_sriov_vf(adev) && (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(5, 0, 5)))
249 : return 0;
250 :
251 0 : DRM_DEBUG("\n");
252 :
253 0 : switch (adev->ip_versions[SDMA0_HWIP][0]) {
254 : case IP_VERSION(5, 0, 0):
255 : chip_name = "navi10";
256 : break;
257 : case IP_VERSION(5, 0, 2):
258 0 : chip_name = "navi14";
259 0 : break;
260 : case IP_VERSION(5, 0, 5):
261 0 : chip_name = "navi12";
262 0 : break;
263 : case IP_VERSION(5, 0, 1):
264 0 : chip_name = "cyan_skillfish2";
265 0 : break;
266 : default:
267 0 : BUG();
268 : }
269 :
270 0 : for (i = 0; i < adev->sdma.num_instances; i++) {
271 0 : if (i == 0)
272 0 : snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
273 : else
274 0 : snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
275 0 : err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
276 0 : if (err)
277 : goto out;
278 0 : err = amdgpu_ucode_validate(adev->sdma.instance[i].fw);
279 0 : if (err)
280 : goto out;
281 0 : hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
282 0 : adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
283 0 : adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
284 0 : if (adev->sdma.instance[i].feature_version >= 20)
285 0 : adev->sdma.instance[i].burst_nop = true;
286 0 : DRM_DEBUG("psp_load == '%s'\n",
287 : adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
288 :
289 0 : if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
290 0 : info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
291 0 : info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
292 0 : info->fw = adev->sdma.instance[i].fw;
293 0 : header = (const struct common_firmware_header *)info->fw->data;
294 0 : adev->firmware.fw_size +=
295 0 : ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
296 : }
297 : }
298 : out:
299 0 : if (err) {
300 0 : DRM_ERROR("sdma_v5_0: Failed to load firmware \"%s\"\n", fw_name);
301 0 : for (i = 0; i < adev->sdma.num_instances; i++) {
302 0 : release_firmware(adev->sdma.instance[i].fw);
303 0 : adev->sdma.instance[i].fw = NULL;
304 : }
305 : }
306 : return err;
307 : }
308 :
309 0 : static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring)
310 : {
311 : unsigned ret;
312 :
313 0 : amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE));
314 0 : amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
315 0 : amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
316 0 : amdgpu_ring_write(ring, 1);
317 0 : ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
318 0 : amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
319 :
320 0 : return ret;
321 : }
322 :
323 0 : static void sdma_v5_0_ring_patch_cond_exec(struct amdgpu_ring *ring,
324 : unsigned offset)
325 : {
326 : unsigned cur;
327 :
328 0 : BUG_ON(offset > ring->buf_mask);
329 0 : BUG_ON(ring->ring[offset] != 0x55aa55aa);
330 :
331 0 : cur = (ring->wptr - 1) & ring->buf_mask;
332 0 : if (cur > offset)
333 0 : ring->ring[offset] = cur - offset;
334 : else
335 0 : ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
336 0 : }
337 :
338 : /**
339 : * sdma_v5_0_ring_get_rptr - get the current read pointer
340 : *
341 : * @ring: amdgpu ring pointer
342 : *
343 : * Get the current rptr from the hardware (NAVI10+).
344 : */
345 0 : static uint64_t sdma_v5_0_ring_get_rptr(struct amdgpu_ring *ring)
346 : {
347 : u64 *rptr;
348 :
349 : /* XXX check if swapping is necessary on BE */
350 0 : rptr = (u64 *)ring->rptr_cpu_addr;
351 :
352 0 : DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
353 0 : return ((*rptr) >> 2);
354 : }
355 :
356 : /**
357 : * sdma_v5_0_ring_get_wptr - get the current write pointer
358 : *
359 : * @ring: amdgpu ring pointer
360 : *
361 : * Get the current wptr from the hardware (NAVI10+).
362 : */
363 0 : static uint64_t sdma_v5_0_ring_get_wptr(struct amdgpu_ring *ring)
364 : {
365 0 : struct amdgpu_device *adev = ring->adev;
366 : u64 wptr;
367 :
368 0 : if (ring->use_doorbell) {
369 : /* XXX check if swapping is necessary on BE */
370 0 : wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
371 0 : DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
372 : } else {
373 0 : wptr = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI));
374 0 : wptr = wptr << 32;
375 0 : wptr |= RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR));
376 0 : DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n", ring->me, wptr);
377 : }
378 :
379 0 : return wptr >> 2;
380 : }
381 :
382 : /**
383 : * sdma_v5_0_ring_set_wptr - commit the write pointer
384 : *
385 : * @ring: amdgpu ring pointer
386 : *
387 : * Write the wptr back to the hardware (NAVI10+).
388 : */
389 0 : static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring)
390 : {
391 0 : struct amdgpu_device *adev = ring->adev;
392 : uint32_t *wptr_saved;
393 : uint32_t *is_queue_unmap;
394 : uint64_t aggregated_db_index;
395 0 : uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size;
396 :
397 0 : DRM_DEBUG("Setting write pointer\n");
398 0 : if (ring->is_mes_queue) {
399 0 : wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
400 0 : is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
401 : sizeof(uint32_t));
402 0 : aggregated_db_index =
403 0 : amdgpu_mes_get_aggregated_doorbell_index(adev,
404 : AMDGPU_MES_PRIORITY_LEVEL_NORMAL);
405 :
406 0 : atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
407 0 : ring->wptr << 2);
408 0 : *wptr_saved = ring->wptr << 2;
409 0 : if (*is_queue_unmap) {
410 0 : WDOORBELL64(aggregated_db_index, ring->wptr << 2);
411 0 : DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
412 : ring->doorbell_index, ring->wptr << 2);
413 0 : WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
414 : } else {
415 0 : DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
416 : ring->doorbell_index, ring->wptr << 2);
417 0 : WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
418 :
419 0 : if (*is_queue_unmap)
420 0 : WDOORBELL64(aggregated_db_index,
421 : ring->wptr << 2);
422 : }
423 : } else {
424 0 : if (ring->use_doorbell) {
425 0 : DRM_DEBUG("Using doorbell -- "
426 : "wptr_offs == 0x%08x "
427 : "lower_32_bits(ring->wptr) << 2 == 0x%08x "
428 : "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
429 : ring->wptr_offs,
430 : lower_32_bits(ring->wptr << 2),
431 : upper_32_bits(ring->wptr << 2));
432 : /* XXX check if swapping is necessary on BE */
433 0 : atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
434 0 : ring->wptr << 2);
435 0 : DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
436 : ring->doorbell_index, ring->wptr << 2);
437 0 : WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
438 : } else {
439 0 : DRM_DEBUG("Not using doorbell -- "
440 : "mmSDMA%i_GFX_RB_WPTR == 0x%08x "
441 : "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
442 : ring->me,
443 : lower_32_bits(ring->wptr << 2),
444 : ring->me,
445 : upper_32_bits(ring->wptr << 2));
446 0 : WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev,
447 : ring->me, mmSDMA0_GFX_RB_WPTR),
448 : lower_32_bits(ring->wptr << 2));
449 0 : WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev,
450 : ring->me, mmSDMA0_GFX_RB_WPTR_HI),
451 : upper_32_bits(ring->wptr << 2));
452 : }
453 : }
454 0 : }
455 :
456 0 : static void sdma_v5_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
457 : {
458 0 : struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
459 : int i;
460 :
461 0 : for (i = 0; i < count; i++)
462 0 : if (sdma && sdma->burst_nop && (i == 0))
463 0 : amdgpu_ring_write(ring, ring->funcs->nop |
464 0 : SDMA_PKT_NOP_HEADER_COUNT(count - 1));
465 : else
466 0 : amdgpu_ring_write(ring, ring->funcs->nop);
467 0 : }
468 :
469 : /**
470 : * sdma_v5_0_ring_emit_ib - Schedule an IB on the DMA engine
471 : *
472 : * @ring: amdgpu ring pointer
473 : * @job: job to retrieve vmid from
474 : * @ib: IB object to schedule
475 : * @flags: unused
476 : *
477 : * Schedule an IB in the DMA ring (NAVI10).
478 : */
479 0 : static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
480 : struct amdgpu_job *job,
481 : struct amdgpu_ib *ib,
482 : uint32_t flags)
483 : {
484 0 : unsigned vmid = AMDGPU_JOB_GET_VMID(job);
485 0 : uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
486 :
487 : /* An IB packet must end on a 8 DW boundary--the next dword
488 : * must be on a 8-dword boundary. Our IB packet below is 6
489 : * dwords long, thus add x number of NOPs, such that, in
490 : * modular arithmetic,
491 : * wptr + 6 + x = 8k, k >= 0, which in C is,
492 : * (wptr + 6 + x) % 8 = 0.
493 : * The expression below, is a solution of x.
494 : */
495 0 : sdma_v5_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
496 :
497 0 : amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
498 0 : SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
499 : /* base must be 32 byte aligned */
500 0 : amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
501 0 : amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
502 0 : amdgpu_ring_write(ring, ib->length_dw);
503 0 : amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr));
504 0 : amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
505 0 : }
506 :
507 : /**
508 : * sdma_v5_0_ring_emit_mem_sync - flush the IB by graphics cache rinse
509 : *
510 : * @ring: amdgpu ring pointer
511 : *
512 : * flush the IB by graphics cache rinse.
513 : */
514 0 : static void sdma_v5_0_ring_emit_mem_sync(struct amdgpu_ring *ring)
515 : {
516 0 : uint32_t gcr_cntl = SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV |
517 : SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV |
518 : SDMA_GCR_GLI_INV(1);
519 :
520 : /* flush entire cache L0/L1/L2, this can be optimized by performance requirement */
521 0 : amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ));
522 0 : amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0));
523 0 : amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(gcr_cntl) |
524 : SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(0));
525 0 : amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(0) |
526 : SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(gcr_cntl >> 16));
527 0 : amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(0) |
528 : SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(0));
529 0 : }
530 :
531 : /**
532 : * sdma_v5_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
533 : *
534 : * @ring: amdgpu ring pointer
535 : *
536 : * Emit an hdp flush packet on the requested DMA ring.
537 : */
538 0 : static void sdma_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
539 : {
540 0 : struct amdgpu_device *adev = ring->adev;
541 0 : u32 ref_and_mask = 0;
542 0 : const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
543 :
544 0 : if (ring->me == 0)
545 0 : ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0;
546 : else
547 0 : ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1;
548 :
549 0 : amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
550 : SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
551 : SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
552 0 : amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
553 0 : amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
554 0 : amdgpu_ring_write(ring, ref_and_mask); /* reference */
555 0 : amdgpu_ring_write(ring, ref_and_mask); /* mask */
556 0 : amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
557 : SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
558 0 : }
559 :
560 : /**
561 : * sdma_v5_0_ring_emit_fence - emit a fence on the DMA ring
562 : *
563 : * @ring: amdgpu ring pointer
564 : * @addr: address
565 : * @seq: sequence number
566 : * @flags: fence related flags
567 : *
568 : * Add a DMA fence packet to the ring to write
569 : * the fence seq number and DMA trap packet to generate
570 : * an interrupt if needed (NAVI10).
571 : */
572 0 : static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
573 : unsigned flags)
574 : {
575 0 : bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
576 : /* write the fence */
577 0 : amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) |
578 : SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */
579 : /* zero in first two bits */
580 0 : BUG_ON(addr & 0x3);
581 0 : amdgpu_ring_write(ring, lower_32_bits(addr));
582 0 : amdgpu_ring_write(ring, upper_32_bits(addr));
583 0 : amdgpu_ring_write(ring, lower_32_bits(seq));
584 :
585 : /* optionally write high bits as well */
586 0 : if (write64bit) {
587 0 : addr += 4;
588 0 : amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) |
589 : SDMA_PKT_FENCE_HEADER_MTYPE(0x3));
590 : /* zero in first two bits */
591 0 : BUG_ON(addr & 0x3);
592 0 : amdgpu_ring_write(ring, lower_32_bits(addr));
593 0 : amdgpu_ring_write(ring, upper_32_bits(addr));
594 0 : amdgpu_ring_write(ring, upper_32_bits(seq));
595 : }
596 :
597 0 : if (flags & AMDGPU_FENCE_FLAG_INT) {
598 0 : uint32_t ctx = ring->is_mes_queue ?
599 0 : (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0;
600 : /* generate an interrupt */
601 0 : amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
602 0 : amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx));
603 : }
604 0 : }
605 :
606 :
607 : /**
608 : * sdma_v5_0_gfx_stop - stop the gfx async dma engines
609 : *
610 : * @adev: amdgpu_device pointer
611 : *
612 : * Stop the gfx async dma ring buffers (NAVI10).
613 : */
614 0 : static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev)
615 : {
616 0 : struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
617 0 : struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
618 : u32 rb_cntl, ib_cntl;
619 : int i;
620 :
621 0 : if ((adev->mman.buffer_funcs_ring == sdma0) ||
622 : (adev->mman.buffer_funcs_ring == sdma1))
623 0 : amdgpu_ttm_set_buffer_funcs_status(adev, false);
624 :
625 0 : for (i = 0; i < adev->sdma.num_instances; i++) {
626 0 : rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
627 0 : rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
628 0 : WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
629 0 : ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
630 0 : ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
631 0 : WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
632 : }
633 0 : }
634 :
635 : /**
636 : * sdma_v5_0_rlc_stop - stop the compute async dma engines
637 : *
638 : * @adev: amdgpu_device pointer
639 : *
640 : * Stop the compute async dma queues (NAVI10).
641 : */
642 : static void sdma_v5_0_rlc_stop(struct amdgpu_device *adev)
643 : {
644 : /* XXX todo */
645 : }
646 :
647 : /**
648 : * sdma_v5_0_ctx_switch_enable - stop the async dma engines context switch
649 : *
650 : * @adev: amdgpu_device pointer
651 : * @enable: enable/disable the DMA MEs context switch.
652 : *
653 : * Halt or unhalt the async dma engines context switch (NAVI10).
654 : */
655 0 : static void sdma_v5_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
656 : {
657 0 : u32 f32_cntl = 0, phase_quantum = 0;
658 : int i;
659 :
660 0 : if (amdgpu_sdma_phase_quantum) {
661 : unsigned value = amdgpu_sdma_phase_quantum;
662 : unsigned unit = 0;
663 :
664 0 : while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
665 : SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
666 0 : value = (value + 1) >> 1;
667 0 : unit++;
668 : }
669 0 : if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
670 : SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
671 0 : value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
672 : SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
673 0 : unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
674 : SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
675 0 : WARN_ONCE(1,
676 : "clamping sdma_phase_quantum to %uK clock cycles\n",
677 : value << unit);
678 : }
679 0 : phase_quantum =
680 0 : value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
681 : unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
682 : }
683 :
684 0 : for (i = 0; i < adev->sdma.num_instances; i++) {
685 0 : if (!amdgpu_sriov_vf(adev)) {
686 0 : f32_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
687 0 : f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
688 : AUTO_CTXSW_ENABLE, enable ? 1 : 0);
689 : }
690 :
691 0 : if (enable && amdgpu_sdma_phase_quantum) {
692 0 : WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
693 : phase_quantum);
694 0 : WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
695 : phase_quantum);
696 0 : WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
697 : phase_quantum);
698 : }
699 0 : if (!amdgpu_sriov_vf(adev))
700 0 : WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
701 : }
702 :
703 0 : }
704 :
705 : /**
706 : * sdma_v5_0_enable - stop the async dma engines
707 : *
708 : * @adev: amdgpu_device pointer
709 : * @enable: enable/disable the DMA MEs.
710 : *
711 : * Halt or unhalt the async dma engines (NAVI10).
712 : */
713 0 : static void sdma_v5_0_enable(struct amdgpu_device *adev, bool enable)
714 : {
715 : u32 f32_cntl;
716 : int i;
717 :
718 0 : if (!enable) {
719 0 : sdma_v5_0_gfx_stop(adev);
720 0 : sdma_v5_0_rlc_stop(adev);
721 : }
722 :
723 0 : if (amdgpu_sriov_vf(adev))
724 : return;
725 :
726 0 : for (i = 0; i < adev->sdma.num_instances; i++) {
727 0 : f32_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
728 0 : f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
729 0 : WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
730 : }
731 : }
732 :
733 : /**
734 : * sdma_v5_0_gfx_resume - setup and start the async dma engines
735 : *
736 : * @adev: amdgpu_device pointer
737 : *
738 : * Set up the gfx DMA ring buffers and enable them (NAVI10).
739 : * Returns 0 for success, error for failure.
740 : */
741 0 : static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
742 : {
743 : struct amdgpu_ring *ring;
744 : u32 rb_cntl, ib_cntl;
745 : u32 rb_bufsz;
746 : u32 doorbell;
747 : u32 doorbell_offset;
748 : u32 temp;
749 : u32 wptr_poll_cntl;
750 : u64 wptr_gpu_addr;
751 : int i, r;
752 :
753 0 : for (i = 0; i < adev->sdma.num_instances; i++) {
754 0 : ring = &adev->sdma.instance[i].ring;
755 :
756 0 : if (!amdgpu_sriov_vf(adev))
757 0 : WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
758 :
759 : /* Set ring buffer size in dwords */
760 0 : rb_bufsz = order_base_2(ring->ring_size / 4);
761 0 : rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
762 0 : rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
763 : #ifdef __BIG_ENDIAN
764 : rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
765 : rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
766 : RPTR_WRITEBACK_SWAP_ENABLE, 1);
767 : #endif
768 0 : WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
769 :
770 : /* Initialize the ring buffer's read and write pointers */
771 0 : WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
772 0 : WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
773 0 : WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
774 0 : WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
775 :
776 : /* setup the wptr shadow polling */
777 0 : wptr_gpu_addr = ring->wptr_gpu_addr;
778 0 : WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
779 : lower_32_bits(wptr_gpu_addr));
780 0 : WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
781 : upper_32_bits(wptr_gpu_addr));
782 0 : wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
783 : mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
784 0 : wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
785 : SDMA0_GFX_RB_WPTR_POLL_CNTL,
786 : F32_POLL_ENABLE, 1);
787 0 : WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL),
788 : wptr_poll_cntl);
789 :
790 : /* set the wb address whether it's enabled or not */
791 0 : WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
792 : upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
793 0 : WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
794 : lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
795 :
796 0 : rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
797 :
798 0 : WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE),
799 : ring->gpu_addr >> 8);
800 0 : WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI),
801 : ring->gpu_addr >> 40);
802 :
803 0 : ring->wptr = 0;
804 :
805 : /* before programing wptr to a less value, need set minor_ptr_update first */
806 0 : WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
807 :
808 0 : if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
809 0 : WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
810 : lower_32_bits(ring->wptr << 2));
811 0 : WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
812 : upper_32_bits(ring->wptr << 2));
813 : }
814 :
815 0 : doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
816 0 : doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
817 : mmSDMA0_GFX_DOORBELL_OFFSET));
818 :
819 0 : if (ring->use_doorbell) {
820 0 : doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
821 0 : doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
822 : OFFSET, ring->doorbell_index);
823 : } else {
824 0 : doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
825 : }
826 0 : WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
827 0 : WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET),
828 : doorbell_offset);
829 :
830 0 : adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
831 0 : ring->doorbell_index, 20);
832 :
833 0 : if (amdgpu_sriov_vf(adev))
834 0 : sdma_v5_0_ring_set_wptr(ring);
835 :
836 : /* set minor_ptr_update to 0 after wptr programed */
837 0 : WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
838 :
839 0 : if (!amdgpu_sriov_vf(adev)) {
840 : /* set utc l1 enable flag always to 1 */
841 0 : temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
842 0 : temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
843 :
844 : /* enable MCBP */
845 0 : temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1);
846 0 : WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
847 :
848 : /* Set up RESP_MODE to non-copy addresses */
849 0 : temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL));
850 0 : temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
851 0 : temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
852 0 : WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp);
853 :
854 : /* program default cache read and write policy */
855 0 : temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE));
856 : /* clean read policy and write policy bits */
857 0 : temp &= 0xFF0FFF;
858 0 : temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | (CACHE_WRITE_POLICY_L2__DEFAULT << 14));
859 0 : WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
860 : }
861 :
862 0 : if (!amdgpu_sriov_vf(adev)) {
863 : /* unhalt engine */
864 0 : temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
865 0 : temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
866 0 : WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
867 : }
868 :
869 : /* enable DMA RB */
870 0 : rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
871 0 : WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
872 :
873 0 : ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
874 0 : ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
875 : #ifdef __BIG_ENDIAN
876 : ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
877 : #endif
878 : /* enable DMA IBs */
879 0 : WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
880 :
881 0 : ring->sched.ready = true;
882 :
883 0 : if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
884 0 : sdma_v5_0_ctx_switch_enable(adev, true);
885 0 : sdma_v5_0_enable(adev, true);
886 : }
887 :
888 0 : r = amdgpu_ring_test_helper(ring);
889 0 : if (r)
890 : return r;
891 :
892 0 : if (adev->mman.buffer_funcs_ring == ring)
893 0 : amdgpu_ttm_set_buffer_funcs_status(adev, true);
894 : }
895 :
896 : return 0;
897 : }
898 :
899 : /**
900 : * sdma_v5_0_rlc_resume - setup and start the async dma engines
901 : *
902 : * @adev: amdgpu_device pointer
903 : *
904 : * Set up the compute DMA queues and enable them (NAVI10).
905 : * Returns 0 for success, error for failure.
906 : */
907 : static int sdma_v5_0_rlc_resume(struct amdgpu_device *adev)
908 : {
909 : return 0;
910 : }
911 :
912 : /**
913 : * sdma_v5_0_load_microcode - load the sDMA ME ucode
914 : *
915 : * @adev: amdgpu_device pointer
916 : *
917 : * Loads the sDMA0/1 ucode.
918 : * Returns 0 for success, -EINVAL if the ucode is not available.
919 : */
920 0 : static int sdma_v5_0_load_microcode(struct amdgpu_device *adev)
921 : {
922 : const struct sdma_firmware_header_v1_0 *hdr;
923 : const __le32 *fw_data;
924 : u32 fw_size;
925 : int i, j;
926 :
927 : /* halt the MEs */
928 0 : sdma_v5_0_enable(adev, false);
929 :
930 0 : for (i = 0; i < adev->sdma.num_instances; i++) {
931 0 : if (!adev->sdma.instance[i].fw)
932 : return -EINVAL;
933 :
934 0 : hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
935 0 : amdgpu_ucode_print_sdma_hdr(&hdr->header);
936 0 : fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
937 :
938 0 : fw_data = (const __le32 *)
939 0 : (adev->sdma.instance[i].fw->data +
940 0 : le32_to_cpu(hdr->header.ucode_array_offset_bytes));
941 :
942 0 : WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0);
943 :
944 0 : for (j = 0; j < fw_size; j++) {
945 0 : if (amdgpu_emu_mode == 1 && j % 500 == 0)
946 0 : msleep(1);
947 0 : WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
948 : }
949 :
950 0 : WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version);
951 : }
952 :
953 : return 0;
954 : }
955 :
956 : /**
957 : * sdma_v5_0_start - setup and start the async dma engines
958 : *
959 : * @adev: amdgpu_device pointer
960 : *
961 : * Set up the DMA engines and enable them (NAVI10).
962 : * Returns 0 for success, error for failure.
963 : */
964 0 : static int sdma_v5_0_start(struct amdgpu_device *adev)
965 : {
966 0 : int r = 0;
967 :
968 0 : if (amdgpu_sriov_vf(adev)) {
969 0 : sdma_v5_0_ctx_switch_enable(adev, false);
970 0 : sdma_v5_0_enable(adev, false);
971 :
972 : /* set RB registers */
973 0 : r = sdma_v5_0_gfx_resume(adev);
974 0 : return r;
975 : }
976 :
977 0 : if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
978 0 : r = sdma_v5_0_load_microcode(adev);
979 0 : if (r)
980 : return r;
981 : }
982 :
983 : /* unhalt the MEs */
984 0 : sdma_v5_0_enable(adev, true);
985 : /* enable sdma ring preemption */
986 0 : sdma_v5_0_ctx_switch_enable(adev, true);
987 :
988 : /* start the gfx rings and rlc compute queues */
989 0 : r = sdma_v5_0_gfx_resume(adev);
990 0 : if (r)
991 : return r;
992 0 : r = sdma_v5_0_rlc_resume(adev);
993 :
994 0 : return r;
995 : }
996 :
997 0 : static int sdma_v5_0_mqd_init(struct amdgpu_device *adev, void *mqd,
998 : struct amdgpu_mqd_prop *prop)
999 : {
1000 0 : struct v10_sdma_mqd *m = mqd;
1001 : uint64_t wb_gpu_addr;
1002 :
1003 0 : m->sdmax_rlcx_rb_cntl =
1004 0 : order_base_2(prop->queue_size / 4) << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
1005 : 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
1006 0 : 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
1007 : 1 << SDMA0_RLC0_RB_CNTL__RB_PRIV__SHIFT;
1008 :
1009 0 : m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8);
1010 0 : m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8);
1011 :
1012 0 : m->sdmax_rlcx_rb_wptr_poll_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, 0,
1013 : mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
1014 :
1015 0 : wb_gpu_addr = prop->wptr_gpu_addr;
1016 0 : m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr);
1017 0 : m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr);
1018 :
1019 0 : wb_gpu_addr = prop->rptr_gpu_addr;
1020 0 : m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr);
1021 0 : m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr);
1022 :
1023 0 : m->sdmax_rlcx_ib_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, 0,
1024 : mmSDMA0_GFX_IB_CNTL));
1025 :
1026 0 : m->sdmax_rlcx_doorbell_offset =
1027 0 : prop->doorbell_index << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
1028 :
1029 0 : m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_RLC0_DOORBELL, ENABLE, 1);
1030 :
1031 0 : return 0;
1032 : }
1033 :
1034 : static void sdma_v5_0_set_mqd_funcs(struct amdgpu_device *adev)
1035 : {
1036 0 : adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v10_sdma_mqd);
1037 0 : adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v5_0_mqd_init;
1038 : }
1039 :
1040 : /**
1041 : * sdma_v5_0_ring_test_ring - simple async dma engine test
1042 : *
1043 : * @ring: amdgpu_ring structure holding ring information
1044 : *
1045 : * Test the DMA engine by writing using it to write an
1046 : * value to memory. (NAVI10).
1047 : * Returns 0 for success, error for failure.
1048 : */
1049 0 : static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring)
1050 : {
1051 0 : struct amdgpu_device *adev = ring->adev;
1052 : unsigned i;
1053 : unsigned index;
1054 : int r;
1055 : u32 tmp;
1056 : u64 gpu_addr;
1057 0 : volatile uint32_t *cpu_ptr = NULL;
1058 :
1059 0 : tmp = 0xCAFEDEAD;
1060 :
1061 0 : if (ring->is_mes_queue) {
1062 0 : uint32_t offset = 0;
1063 0 : offset = amdgpu_mes_ctx_get_offs(ring,
1064 : AMDGPU_MES_CTX_PADDING_OFFS);
1065 0 : gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
1066 0 : cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
1067 0 : *cpu_ptr = tmp;
1068 : } else {
1069 0 : r = amdgpu_device_wb_get(adev, &index);
1070 0 : if (r) {
1071 0 : dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
1072 0 : return r;
1073 : }
1074 :
1075 0 : gpu_addr = adev->wb.gpu_addr + (index * 4);
1076 0 : adev->wb.wb[index] = cpu_to_le32(tmp);
1077 : }
1078 :
1079 0 : r = amdgpu_ring_alloc(ring, 20);
1080 0 : if (r) {
1081 0 : DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
1082 0 : amdgpu_device_wb_free(adev, index);
1083 0 : return r;
1084 : }
1085 :
1086 0 : amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
1087 : SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
1088 0 : amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
1089 0 : amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
1090 0 : amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
1091 0 : amdgpu_ring_write(ring, 0xDEADBEEF);
1092 0 : amdgpu_ring_commit(ring);
1093 :
1094 0 : for (i = 0; i < adev->usec_timeout; i++) {
1095 0 : if (ring->is_mes_queue)
1096 0 : tmp = le32_to_cpu(*cpu_ptr);
1097 : else
1098 0 : tmp = le32_to_cpu(adev->wb.wb[index]);
1099 0 : if (tmp == 0xDEADBEEF)
1100 : break;
1101 0 : if (amdgpu_emu_mode == 1)
1102 0 : msleep(1);
1103 : else
1104 : udelay(1);
1105 : }
1106 :
1107 0 : if (i >= adev->usec_timeout)
1108 0 : r = -ETIMEDOUT;
1109 :
1110 0 : if (!ring->is_mes_queue)
1111 0 : amdgpu_device_wb_free(adev, index);
1112 :
1113 : return r;
1114 : }
1115 :
1116 : /**
1117 : * sdma_v5_0_ring_test_ib - test an IB on the DMA engine
1118 : *
1119 : * @ring: amdgpu_ring structure holding ring information
1120 : * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
1121 : *
1122 : * Test a simple IB in the DMA ring (NAVI10).
1123 : * Returns 0 on success, error on failure.
1124 : */
1125 0 : static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1126 : {
1127 0 : struct amdgpu_device *adev = ring->adev;
1128 : struct amdgpu_ib ib;
1129 0 : struct dma_fence *f = NULL;
1130 : unsigned index;
1131 : long r;
1132 0 : u32 tmp = 0;
1133 : u64 gpu_addr;
1134 0 : volatile uint32_t *cpu_ptr = NULL;
1135 :
1136 0 : tmp = 0xCAFEDEAD;
1137 0 : memset(&ib, 0, sizeof(ib));
1138 :
1139 0 : if (ring->is_mes_queue) {
1140 0 : uint32_t offset = 0;
1141 0 : offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
1142 0 : ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
1143 0 : ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
1144 :
1145 0 : offset = amdgpu_mes_ctx_get_offs(ring,
1146 : AMDGPU_MES_CTX_PADDING_OFFS);
1147 0 : gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
1148 0 : cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
1149 0 : *cpu_ptr = tmp;
1150 : } else {
1151 0 : r = amdgpu_device_wb_get(adev, &index);
1152 0 : if (r) {
1153 0 : dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
1154 0 : return r;
1155 : }
1156 :
1157 0 : gpu_addr = adev->wb.gpu_addr + (index * 4);
1158 0 : adev->wb.wb[index] = cpu_to_le32(tmp);
1159 :
1160 0 : r = amdgpu_ib_get(adev, NULL, 256,
1161 : AMDGPU_IB_POOL_DIRECT, &ib);
1162 0 : if (r) {
1163 0 : DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
1164 0 : goto err0;
1165 : }
1166 : }
1167 :
1168 0 : ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
1169 : SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
1170 0 : ib.ptr[1] = lower_32_bits(gpu_addr);
1171 0 : ib.ptr[2] = upper_32_bits(gpu_addr);
1172 0 : ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
1173 0 : ib.ptr[4] = 0xDEADBEEF;
1174 0 : ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
1175 0 : ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
1176 0 : ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
1177 0 : ib.length_dw = 8;
1178 :
1179 0 : r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1180 0 : if (r)
1181 : goto err1;
1182 :
1183 0 : r = dma_fence_wait_timeout(f, false, timeout);
1184 0 : if (r == 0) {
1185 0 : DRM_ERROR("amdgpu: IB test timed out\n");
1186 0 : r = -ETIMEDOUT;
1187 0 : goto err1;
1188 0 : } else if (r < 0) {
1189 0 : DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
1190 0 : goto err1;
1191 : }
1192 :
1193 0 : if (ring->is_mes_queue)
1194 0 : tmp = le32_to_cpu(*cpu_ptr);
1195 : else
1196 0 : tmp = le32_to_cpu(adev->wb.wb[index]);
1197 :
1198 0 : if (tmp == 0xDEADBEEF)
1199 : r = 0;
1200 : else
1201 0 : r = -EINVAL;
1202 :
1203 : err1:
1204 0 : amdgpu_ib_free(adev, &ib, NULL);
1205 0 : dma_fence_put(f);
1206 : err0:
1207 0 : if (!ring->is_mes_queue)
1208 0 : amdgpu_device_wb_free(adev, index);
1209 0 : return r;
1210 : }
1211 :
1212 :
1213 : /**
1214 : * sdma_v5_0_vm_copy_pte - update PTEs by copying them from the GART
1215 : *
1216 : * @ib: indirect buffer to fill with commands
1217 : * @pe: addr of the page entry
1218 : * @src: src addr to copy from
1219 : * @count: number of page entries to update
1220 : *
1221 : * Update PTEs by copying them from the GART using sDMA (NAVI10).
1222 : */
1223 0 : static void sdma_v5_0_vm_copy_pte(struct amdgpu_ib *ib,
1224 : uint64_t pe, uint64_t src,
1225 : unsigned count)
1226 : {
1227 0 : unsigned bytes = count * 8;
1228 :
1229 0 : ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
1230 : SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
1231 0 : ib->ptr[ib->length_dw++] = bytes - 1;
1232 0 : ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
1233 0 : ib->ptr[ib->length_dw++] = lower_32_bits(src);
1234 0 : ib->ptr[ib->length_dw++] = upper_32_bits(src);
1235 0 : ib->ptr[ib->length_dw++] = lower_32_bits(pe);
1236 0 : ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1237 :
1238 0 : }
1239 :
1240 : /**
1241 : * sdma_v5_0_vm_write_pte - update PTEs by writing them manually
1242 : *
1243 : * @ib: indirect buffer to fill with commands
1244 : * @pe: addr of the page entry
1245 : * @value: dst addr to write into pe
1246 : * @count: number of page entries to update
1247 : * @incr: increase next addr by incr bytes
1248 : *
1249 : * Update PTEs by writing them manually using sDMA (NAVI10).
1250 : */
1251 0 : static void sdma_v5_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
1252 : uint64_t value, unsigned count,
1253 : uint32_t incr)
1254 : {
1255 0 : unsigned ndw = count * 2;
1256 :
1257 0 : ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
1258 : SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
1259 0 : ib->ptr[ib->length_dw++] = lower_32_bits(pe);
1260 0 : ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1261 0 : ib->ptr[ib->length_dw++] = ndw - 1;
1262 0 : for (; ndw > 0; ndw -= 2) {
1263 0 : ib->ptr[ib->length_dw++] = lower_32_bits(value);
1264 0 : ib->ptr[ib->length_dw++] = upper_32_bits(value);
1265 0 : value += incr;
1266 : }
1267 0 : }
1268 :
1269 : /**
1270 : * sdma_v5_0_vm_set_pte_pde - update the page tables using sDMA
1271 : *
1272 : * @ib: indirect buffer to fill with commands
1273 : * @pe: addr of the page entry
1274 : * @addr: dst addr to write into pe
1275 : * @count: number of page entries to update
1276 : * @incr: increase next addr by incr bytes
1277 : * @flags: access flags
1278 : *
1279 : * Update the page tables using sDMA (NAVI10).
1280 : */
1281 0 : static void sdma_v5_0_vm_set_pte_pde(struct amdgpu_ib *ib,
1282 : uint64_t pe,
1283 : uint64_t addr, unsigned count,
1284 : uint32_t incr, uint64_t flags)
1285 : {
1286 : /* for physically contiguous pages (vram) */
1287 0 : ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_PTEPDE);
1288 0 : ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
1289 0 : ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1290 0 : ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
1291 0 : ib->ptr[ib->length_dw++] = upper_32_bits(flags);
1292 0 : ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
1293 0 : ib->ptr[ib->length_dw++] = upper_32_bits(addr);
1294 0 : ib->ptr[ib->length_dw++] = incr; /* increment size */
1295 0 : ib->ptr[ib->length_dw++] = 0;
1296 0 : ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
1297 0 : }
1298 :
1299 : /**
1300 : * sdma_v5_0_ring_pad_ib - pad the IB
1301 : * @ring: amdgpu_ring structure holding ring information
1302 : * @ib: indirect buffer to fill with padding
1303 : *
1304 : * Pad the IB with NOPs to a boundary multiple of 8.
1305 : */
1306 0 : static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
1307 : {
1308 0 : struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
1309 : u32 pad_count;
1310 : int i;
1311 :
1312 0 : pad_count = (-ib->length_dw) & 0x7;
1313 0 : for (i = 0; i < pad_count; i++)
1314 0 : if (sdma && sdma->burst_nop && (i == 0))
1315 0 : ib->ptr[ib->length_dw++] =
1316 0 : SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
1317 0 : SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
1318 : else
1319 0 : ib->ptr[ib->length_dw++] =
1320 : SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
1321 0 : }
1322 :
1323 :
1324 : /**
1325 : * sdma_v5_0_ring_emit_pipeline_sync - sync the pipeline
1326 : *
1327 : * @ring: amdgpu_ring pointer
1328 : *
1329 : * Make sure all previous operations are completed (CIK).
1330 : */
1331 0 : static void sdma_v5_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
1332 : {
1333 0 : uint32_t seq = ring->fence_drv.sync_seq;
1334 0 : uint64_t addr = ring->fence_drv.gpu_addr;
1335 :
1336 : /* wait for idle */
1337 0 : amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
1338 : SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
1339 : SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
1340 : SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
1341 0 : amdgpu_ring_write(ring, addr & 0xfffffffc);
1342 0 : amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
1343 0 : amdgpu_ring_write(ring, seq); /* reference */
1344 0 : amdgpu_ring_write(ring, 0xffffffff); /* mask */
1345 0 : amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
1346 : SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
1347 0 : }
1348 :
1349 :
1350 : /**
1351 : * sdma_v5_0_ring_emit_vm_flush - vm flush using sDMA
1352 : *
1353 : * @ring: amdgpu_ring pointer
1354 : * @vmid: vmid number to use
1355 : * @pd_addr: address
1356 : *
1357 : * Update the page table base and flush the VM TLB
1358 : * using sDMA (NAVI10).
1359 : */
1360 0 : static void sdma_v5_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
1361 : unsigned vmid, uint64_t pd_addr)
1362 : {
1363 0 : amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1364 0 : }
1365 :
1366 0 : static void sdma_v5_0_ring_emit_wreg(struct amdgpu_ring *ring,
1367 : uint32_t reg, uint32_t val)
1368 : {
1369 0 : amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
1370 : SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
1371 0 : amdgpu_ring_write(ring, reg);
1372 0 : amdgpu_ring_write(ring, val);
1373 0 : }
1374 :
1375 0 : static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1376 : uint32_t val, uint32_t mask)
1377 : {
1378 0 : amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
1379 : SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
1380 : SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
1381 0 : amdgpu_ring_write(ring, reg << 2);
1382 0 : amdgpu_ring_write(ring, 0);
1383 0 : amdgpu_ring_write(ring, val); /* reference */
1384 0 : amdgpu_ring_write(ring, mask); /* mask */
1385 0 : amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
1386 : SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
1387 0 : }
1388 :
1389 0 : static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
1390 : uint32_t reg0, uint32_t reg1,
1391 : uint32_t ref, uint32_t mask)
1392 : {
1393 0 : amdgpu_ring_emit_wreg(ring, reg0, ref);
1394 : /* wait for a cycle to reset vm_inv_eng*_ack */
1395 0 : amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
1396 0 : amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
1397 0 : }
1398 :
1399 0 : static int sdma_v5_0_early_init(void *handle)
1400 : {
1401 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1402 :
1403 0 : sdma_v5_0_set_ring_funcs(adev);
1404 0 : sdma_v5_0_set_buffer_funcs(adev);
1405 0 : sdma_v5_0_set_vm_pte_funcs(adev);
1406 0 : sdma_v5_0_set_irq_funcs(adev);
1407 0 : sdma_v5_0_set_mqd_funcs(adev);
1408 :
1409 0 : return 0;
1410 : }
1411 :
1412 :
1413 0 : static int sdma_v5_0_sw_init(void *handle)
1414 : {
1415 : struct amdgpu_ring *ring;
1416 : int r, i;
1417 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1418 :
1419 : /* SDMA trap event */
1420 0 : r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0,
1421 : SDMA0_5_0__SRCID__SDMA_TRAP,
1422 : &adev->sdma.trap_irq);
1423 0 : if (r)
1424 : return r;
1425 :
1426 : /* SDMA trap event */
1427 0 : r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1,
1428 : SDMA1_5_0__SRCID__SDMA_TRAP,
1429 : &adev->sdma.trap_irq);
1430 0 : if (r)
1431 : return r;
1432 :
1433 0 : r = sdma_v5_0_init_microcode(adev);
1434 0 : if (r) {
1435 0 : DRM_ERROR("Failed to load sdma firmware!\n");
1436 0 : return r;
1437 : }
1438 :
1439 0 : for (i = 0; i < adev->sdma.num_instances; i++) {
1440 0 : ring = &adev->sdma.instance[i].ring;
1441 0 : ring->ring_obj = NULL;
1442 0 : ring->use_doorbell = true;
1443 :
1444 0 : DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
1445 : ring->use_doorbell?"true":"false");
1446 :
1447 0 : ring->doorbell_index = (i == 0) ?
1448 0 : (adev->doorbell_index.sdma_engine[0] << 1) //get DWORD offset
1449 0 : : (adev->doorbell_index.sdma_engine[1] << 1); // get DWORD offset
1450 :
1451 0 : sprintf(ring->name, "sdma%d", i);
1452 0 : r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
1453 : (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 :
1454 : AMDGPU_SDMA_IRQ_INSTANCE1,
1455 : AMDGPU_RING_PRIO_DEFAULT, NULL);
1456 0 : if (r)
1457 : return r;
1458 : }
1459 :
1460 : return r;
1461 : }
1462 :
1463 0 : static int sdma_v5_0_sw_fini(void *handle)
1464 : {
1465 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1466 : int i;
1467 :
1468 0 : for (i = 0; i < adev->sdma.num_instances; i++) {
1469 0 : release_firmware(adev->sdma.instance[i].fw);
1470 0 : adev->sdma.instance[i].fw = NULL;
1471 :
1472 0 : amdgpu_ring_fini(&adev->sdma.instance[i].ring);
1473 : }
1474 :
1475 0 : return 0;
1476 : }
1477 :
1478 0 : static int sdma_v5_0_hw_init(void *handle)
1479 : {
1480 : int r;
1481 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1482 :
1483 0 : sdma_v5_0_init_golden_registers(adev);
1484 :
1485 0 : r = sdma_v5_0_start(adev);
1486 :
1487 0 : return r;
1488 : }
1489 :
1490 0 : static int sdma_v5_0_hw_fini(void *handle)
1491 : {
1492 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1493 :
1494 0 : if (amdgpu_sriov_vf(adev))
1495 : return 0;
1496 :
1497 0 : sdma_v5_0_ctx_switch_enable(adev, false);
1498 0 : sdma_v5_0_enable(adev, false);
1499 :
1500 0 : return 0;
1501 : }
1502 :
1503 0 : static int sdma_v5_0_suspend(void *handle)
1504 : {
1505 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1506 :
1507 0 : return sdma_v5_0_hw_fini(adev);
1508 : }
1509 :
1510 0 : static int sdma_v5_0_resume(void *handle)
1511 : {
1512 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1513 :
1514 0 : return sdma_v5_0_hw_init(adev);
1515 : }
1516 :
1517 0 : static bool sdma_v5_0_is_idle(void *handle)
1518 : {
1519 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1520 : u32 i;
1521 :
1522 0 : for (i = 0; i < adev->sdma.num_instances; i++) {
1523 0 : u32 tmp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_STATUS_REG));
1524 :
1525 0 : if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
1526 : return false;
1527 : }
1528 :
1529 : return true;
1530 : }
1531 :
1532 0 : static int sdma_v5_0_wait_for_idle(void *handle)
1533 : {
1534 : unsigned i;
1535 : u32 sdma0, sdma1;
1536 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1537 :
1538 0 : for (i = 0; i < adev->usec_timeout; i++) {
1539 0 : sdma0 = RREG32(sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG));
1540 0 : sdma1 = RREG32(sdma_v5_0_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG));
1541 :
1542 0 : if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK)
1543 : return 0;
1544 0 : udelay(1);
1545 : }
1546 : return -ETIMEDOUT;
1547 : }
1548 :
1549 0 : static int sdma_v5_0_soft_reset(void *handle)
1550 : {
1551 : /* todo */
1552 :
1553 0 : return 0;
1554 : }
1555 :
1556 0 : static int sdma_v5_0_ring_preempt_ib(struct amdgpu_ring *ring)
1557 : {
1558 0 : int i, r = 0;
1559 0 : struct amdgpu_device *adev = ring->adev;
1560 0 : u32 index = 0;
1561 : u64 sdma_gfx_preempt;
1562 :
1563 0 : amdgpu_sdma_get_index_from_ring(ring, &index);
1564 0 : if (index == 0)
1565 : sdma_gfx_preempt = mmSDMA0_GFX_PREEMPT;
1566 : else
1567 0 : sdma_gfx_preempt = mmSDMA1_GFX_PREEMPT;
1568 :
1569 : /* assert preemption condition */
1570 0 : amdgpu_ring_set_preempt_cond_exec(ring, false);
1571 :
1572 : /* emit the trailing fence */
1573 0 : ring->trail_seq += 1;
1574 0 : amdgpu_ring_alloc(ring, 10);
1575 0 : sdma_v5_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
1576 0 : ring->trail_seq, 0);
1577 0 : amdgpu_ring_commit(ring);
1578 :
1579 : /* assert IB preemption */
1580 0 : WREG32(sdma_gfx_preempt, 1);
1581 :
1582 : /* poll the trailing fence */
1583 0 : for (i = 0; i < adev->usec_timeout; i++) {
1584 0 : if (ring->trail_seq ==
1585 0 : le32_to_cpu(*(ring->trail_fence_cpu_addr)))
1586 : break;
1587 0 : udelay(1);
1588 : }
1589 :
1590 0 : if (i >= adev->usec_timeout) {
1591 0 : r = -EINVAL;
1592 0 : DRM_ERROR("ring %d failed to be preempted\n", ring->idx);
1593 : }
1594 :
1595 : /* deassert IB preemption */
1596 0 : WREG32(sdma_gfx_preempt, 0);
1597 :
1598 : /* deassert the preemption condition */
1599 0 : amdgpu_ring_set_preempt_cond_exec(ring, true);
1600 0 : return r;
1601 : }
1602 :
1603 0 : static int sdma_v5_0_set_trap_irq_state(struct amdgpu_device *adev,
1604 : struct amdgpu_irq_src *source,
1605 : unsigned type,
1606 : enum amdgpu_interrupt_state state)
1607 : {
1608 : u32 sdma_cntl;
1609 :
1610 0 : if (!amdgpu_sriov_vf(adev)) {
1611 0 : u32 reg_offset = (type == AMDGPU_SDMA_IRQ_INSTANCE0) ?
1612 0 : sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_CNTL) :
1613 : sdma_v5_0_get_reg_offset(adev, 1, mmSDMA0_CNTL);
1614 :
1615 0 : sdma_cntl = RREG32(reg_offset);
1616 0 : sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
1617 : state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
1618 0 : WREG32(reg_offset, sdma_cntl);
1619 : }
1620 :
1621 0 : return 0;
1622 : }
1623 :
1624 0 : static int sdma_v5_0_process_trap_irq(struct amdgpu_device *adev,
1625 : struct amdgpu_irq_src *source,
1626 : struct amdgpu_iv_entry *entry)
1627 : {
1628 0 : uint32_t mes_queue_id = entry->src_data[0];
1629 :
1630 0 : DRM_DEBUG("IH: SDMA trap\n");
1631 :
1632 0 : if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
1633 : struct amdgpu_mes_queue *queue;
1634 :
1635 0 : mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
1636 :
1637 0 : spin_lock(&adev->mes.queue_id_lock);
1638 0 : queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
1639 0 : if (queue) {
1640 0 : DRM_DEBUG("process smda queue id = %d\n", mes_queue_id);
1641 0 : amdgpu_fence_process(queue->ring);
1642 : }
1643 0 : spin_unlock(&adev->mes.queue_id_lock);
1644 0 : return 0;
1645 : }
1646 :
1647 0 : switch (entry->client_id) {
1648 : case SOC15_IH_CLIENTID_SDMA0:
1649 0 : switch (entry->ring_id) {
1650 : case 0:
1651 0 : amdgpu_fence_process(&adev->sdma.instance[0].ring);
1652 0 : break;
1653 : case 1:
1654 : /* XXX compute */
1655 : break;
1656 : case 2:
1657 : /* XXX compute */
1658 : break;
1659 : case 3:
1660 : /* XXX page queue*/
1661 : break;
1662 : }
1663 : break;
1664 : case SOC15_IH_CLIENTID_SDMA1:
1665 0 : switch (entry->ring_id) {
1666 : case 0:
1667 0 : amdgpu_fence_process(&adev->sdma.instance[1].ring);
1668 0 : break;
1669 : case 1:
1670 : /* XXX compute */
1671 : break;
1672 : case 2:
1673 : /* XXX compute */
1674 : break;
1675 : case 3:
1676 : /* XXX page queue*/
1677 : break;
1678 : }
1679 : break;
1680 : }
1681 : return 0;
1682 : }
1683 :
1684 0 : static int sdma_v5_0_process_illegal_inst_irq(struct amdgpu_device *adev,
1685 : struct amdgpu_irq_src *source,
1686 : struct amdgpu_iv_entry *entry)
1687 : {
1688 0 : return 0;
1689 : }
1690 :
1691 0 : static void sdma_v5_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
1692 : bool enable)
1693 : {
1694 : uint32_t data, def;
1695 : int i;
1696 :
1697 0 : for (i = 0; i < adev->sdma.num_instances; i++) {
1698 0 : if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
1699 : /* Enable sdma clock gating */
1700 0 : def = data = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL));
1701 0 : data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
1702 : SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
1703 : SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
1704 : SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
1705 : SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
1706 : SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
1707 : SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
1708 : SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
1709 0 : if (def != data)
1710 0 : WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL), data);
1711 : } else {
1712 : /* Disable sdma clock gating */
1713 0 : def = data = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL));
1714 0 : data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
1715 : SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
1716 : SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
1717 : SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
1718 : SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
1719 : SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
1720 : SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
1721 : SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
1722 0 : if (def != data)
1723 0 : WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL), data);
1724 : }
1725 : }
1726 0 : }
1727 :
1728 0 : static void sdma_v5_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
1729 : bool enable)
1730 : {
1731 : uint32_t data, def;
1732 : int i;
1733 :
1734 0 : for (i = 0; i < adev->sdma.num_instances; i++) {
1735 0 : if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
1736 : /* Enable sdma mem light sleep */
1737 0 : def = data = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL));
1738 0 : data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
1739 0 : if (def != data)
1740 0 : WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL), data);
1741 :
1742 : } else {
1743 : /* Disable sdma mem light sleep */
1744 0 : def = data = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL));
1745 0 : data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
1746 0 : if (def != data)
1747 0 : WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL), data);
1748 :
1749 : }
1750 : }
1751 0 : }
1752 :
1753 0 : static int sdma_v5_0_set_clockgating_state(void *handle,
1754 : enum amd_clockgating_state state)
1755 : {
1756 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1757 :
1758 0 : if (amdgpu_sriov_vf(adev))
1759 : return 0;
1760 :
1761 0 : switch (adev->ip_versions[SDMA0_HWIP][0]) {
1762 : case IP_VERSION(5, 0, 0):
1763 : case IP_VERSION(5, 0, 2):
1764 : case IP_VERSION(5, 0, 5):
1765 0 : sdma_v5_0_update_medium_grain_clock_gating(adev,
1766 : state == AMD_CG_STATE_GATE);
1767 0 : sdma_v5_0_update_medium_grain_light_sleep(adev,
1768 : state == AMD_CG_STATE_GATE);
1769 0 : break;
1770 : default:
1771 : break;
1772 : }
1773 :
1774 : return 0;
1775 : }
1776 :
1777 0 : static int sdma_v5_0_set_powergating_state(void *handle,
1778 : enum amd_powergating_state state)
1779 : {
1780 0 : return 0;
1781 : }
1782 :
1783 0 : static void sdma_v5_0_get_clockgating_state(void *handle, u64 *flags)
1784 : {
1785 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1786 : int data;
1787 :
1788 0 : if (amdgpu_sriov_vf(adev))
1789 0 : *flags = 0;
1790 :
1791 : /* AMD_CG_SUPPORT_SDMA_MGCG */
1792 0 : data = RREG32(sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_CLK_CTRL));
1793 0 : if (!(data & SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK))
1794 0 : *flags |= AMD_CG_SUPPORT_SDMA_MGCG;
1795 :
1796 : /* AMD_CG_SUPPORT_SDMA_LS */
1797 0 : data = RREG32(sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_POWER_CNTL));
1798 0 : if (data & SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK)
1799 0 : *flags |= AMD_CG_SUPPORT_SDMA_LS;
1800 0 : }
1801 :
1802 : const struct amd_ip_funcs sdma_v5_0_ip_funcs = {
1803 : .name = "sdma_v5_0",
1804 : .early_init = sdma_v5_0_early_init,
1805 : .late_init = NULL,
1806 : .sw_init = sdma_v5_0_sw_init,
1807 : .sw_fini = sdma_v5_0_sw_fini,
1808 : .hw_init = sdma_v5_0_hw_init,
1809 : .hw_fini = sdma_v5_0_hw_fini,
1810 : .suspend = sdma_v5_0_suspend,
1811 : .resume = sdma_v5_0_resume,
1812 : .is_idle = sdma_v5_0_is_idle,
1813 : .wait_for_idle = sdma_v5_0_wait_for_idle,
1814 : .soft_reset = sdma_v5_0_soft_reset,
1815 : .set_clockgating_state = sdma_v5_0_set_clockgating_state,
1816 : .set_powergating_state = sdma_v5_0_set_powergating_state,
1817 : .get_clockgating_state = sdma_v5_0_get_clockgating_state,
1818 : };
1819 :
1820 : static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
1821 : .type = AMDGPU_RING_TYPE_SDMA,
1822 : .align_mask = 0xf,
1823 : .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
1824 : .support_64bit_ptrs = true,
1825 : .secure_submission_supported = true,
1826 : .vmhub = AMDGPU_GFXHUB_0,
1827 : .get_rptr = sdma_v5_0_ring_get_rptr,
1828 : .get_wptr = sdma_v5_0_ring_get_wptr,
1829 : .set_wptr = sdma_v5_0_ring_set_wptr,
1830 : .emit_frame_size =
1831 : 5 + /* sdma_v5_0_ring_init_cond_exec */
1832 : 6 + /* sdma_v5_0_ring_emit_hdp_flush */
1833 : 3 + /* hdp_invalidate */
1834 : 6 + /* sdma_v5_0_ring_emit_pipeline_sync */
1835 : /* sdma_v5_0_ring_emit_vm_flush */
1836 : SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1837 : SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
1838 : 10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
1839 : .emit_ib_size = 5 + 7 + 6, /* sdma_v5_0_ring_emit_ib */
1840 : .emit_ib = sdma_v5_0_ring_emit_ib,
1841 : .emit_mem_sync = sdma_v5_0_ring_emit_mem_sync,
1842 : .emit_fence = sdma_v5_0_ring_emit_fence,
1843 : .emit_pipeline_sync = sdma_v5_0_ring_emit_pipeline_sync,
1844 : .emit_vm_flush = sdma_v5_0_ring_emit_vm_flush,
1845 : .emit_hdp_flush = sdma_v5_0_ring_emit_hdp_flush,
1846 : .test_ring = sdma_v5_0_ring_test_ring,
1847 : .test_ib = sdma_v5_0_ring_test_ib,
1848 : .insert_nop = sdma_v5_0_ring_insert_nop,
1849 : .pad_ib = sdma_v5_0_ring_pad_ib,
1850 : .emit_wreg = sdma_v5_0_ring_emit_wreg,
1851 : .emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
1852 : .emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
1853 : .init_cond_exec = sdma_v5_0_ring_init_cond_exec,
1854 : .patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
1855 : .preempt_ib = sdma_v5_0_ring_preempt_ib,
1856 : };
1857 :
1858 : static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev)
1859 : {
1860 : int i;
1861 :
1862 0 : for (i = 0; i < adev->sdma.num_instances; i++) {
1863 0 : adev->sdma.instance[i].ring.funcs = &sdma_v5_0_ring_funcs;
1864 0 : adev->sdma.instance[i].ring.me = i;
1865 : }
1866 : }
1867 :
1868 : static const struct amdgpu_irq_src_funcs sdma_v5_0_trap_irq_funcs = {
1869 : .set = sdma_v5_0_set_trap_irq_state,
1870 : .process = sdma_v5_0_process_trap_irq,
1871 : };
1872 :
1873 : static const struct amdgpu_irq_src_funcs sdma_v5_0_illegal_inst_irq_funcs = {
1874 : .process = sdma_v5_0_process_illegal_inst_irq,
1875 : };
1876 :
1877 : static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev)
1878 : {
1879 0 : adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 +
1880 : adev->sdma.num_instances;
1881 0 : adev->sdma.trap_irq.funcs = &sdma_v5_0_trap_irq_funcs;
1882 0 : adev->sdma.illegal_inst_irq.funcs = &sdma_v5_0_illegal_inst_irq_funcs;
1883 : }
1884 :
1885 : /**
1886 : * sdma_v5_0_emit_copy_buffer - copy buffer using the sDMA engine
1887 : *
1888 : * @ib: indirect buffer to copy to
1889 : * @src_offset: src GPU address
1890 : * @dst_offset: dst GPU address
1891 : * @byte_count: number of bytes to xfer
1892 : * @tmz: if a secure copy should be used
1893 : *
1894 : * Copy GPU buffers using the DMA engine (NAVI10).
1895 : * Used by the amdgpu ttm implementation to move pages if
1896 : * registered as the asic copy callback.
1897 : */
1898 0 : static void sdma_v5_0_emit_copy_buffer(struct amdgpu_ib *ib,
1899 : uint64_t src_offset,
1900 : uint64_t dst_offset,
1901 : uint32_t byte_count,
1902 : bool tmz)
1903 : {
1904 0 : ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
1905 0 : SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
1906 : SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
1907 0 : ib->ptr[ib->length_dw++] = byte_count - 1;
1908 0 : ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
1909 0 : ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
1910 0 : ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
1911 0 : ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
1912 0 : ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
1913 0 : }
1914 :
1915 : /**
1916 : * sdma_v5_0_emit_fill_buffer - fill buffer using the sDMA engine
1917 : *
1918 : * @ib: indirect buffer to fill
1919 : * @src_data: value to write to buffer
1920 : * @dst_offset: dst GPU address
1921 : * @byte_count: number of bytes to xfer
1922 : *
1923 : * Fill GPU buffers using the DMA engine (NAVI10).
1924 : */
1925 0 : static void sdma_v5_0_emit_fill_buffer(struct amdgpu_ib *ib,
1926 : uint32_t src_data,
1927 : uint64_t dst_offset,
1928 : uint32_t byte_count)
1929 : {
1930 0 : ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
1931 0 : ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
1932 0 : ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
1933 0 : ib->ptr[ib->length_dw++] = src_data;
1934 0 : ib->ptr[ib->length_dw++] = byte_count - 1;
1935 0 : }
1936 :
1937 : static const struct amdgpu_buffer_funcs sdma_v5_0_buffer_funcs = {
1938 : .copy_max_bytes = 0x400000,
1939 : .copy_num_dw = 7,
1940 : .emit_copy_buffer = sdma_v5_0_emit_copy_buffer,
1941 :
1942 : .fill_max_bytes = 0x400000,
1943 : .fill_num_dw = 5,
1944 : .emit_fill_buffer = sdma_v5_0_emit_fill_buffer,
1945 : };
1946 :
1947 : static void sdma_v5_0_set_buffer_funcs(struct amdgpu_device *adev)
1948 : {
1949 0 : if (adev->mman.buffer_funcs == NULL) {
1950 0 : adev->mman.buffer_funcs = &sdma_v5_0_buffer_funcs;
1951 0 : adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
1952 : }
1953 : }
1954 :
1955 : static const struct amdgpu_vm_pte_funcs sdma_v5_0_vm_pte_funcs = {
1956 : .copy_pte_num_dw = 7,
1957 : .copy_pte = sdma_v5_0_vm_copy_pte,
1958 : .write_pte = sdma_v5_0_vm_write_pte,
1959 : .set_pte_pde = sdma_v5_0_vm_set_pte_pde,
1960 : };
1961 :
1962 : static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev)
1963 : {
1964 : unsigned i;
1965 :
1966 0 : if (adev->vm_manager.vm_pte_funcs == NULL) {
1967 0 : adev->vm_manager.vm_pte_funcs = &sdma_v5_0_vm_pte_funcs;
1968 0 : for (i = 0; i < adev->sdma.num_instances; i++) {
1969 0 : adev->vm_manager.vm_pte_scheds[i] =
1970 0 : &adev->sdma.instance[i].ring.sched;
1971 : }
1972 0 : adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
1973 : }
1974 : }
1975 :
1976 : const struct amdgpu_ip_block_version sdma_v5_0_ip_block = {
1977 : .type = AMD_IP_BLOCK_TYPE_SDMA,
1978 : .major = 5,
1979 : .minor = 0,
1980 : .rev = 0,
1981 : .funcs = &sdma_v5_0_ip_funcs,
1982 : };
|