Line data Source code
1 : /*
2 : * Copyright 2014 Advanced Micro Devices, Inc.
3 : *
4 : * Permission is hereby granted, free of charge, to any person obtaining a
5 : * copy of this software and associated documentation files (the "Software"),
6 : * to deal in the Software without restriction, including without limitation
7 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 : * and/or sell copies of the Software, and to permit persons to whom the
9 : * Software is furnished to do so, subject to the following conditions:
10 : *
11 : * The above copyright notice and this permission notice shall be included in
12 : * all copies or substantial portions of the Software.
13 : *
14 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 : * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 : * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 : * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 : * OTHER DEALINGS IN THE SOFTWARE.
21 : *
22 : */
23 :
24 : #include <linux/delay.h>
25 : #include <linux/kernel.h>
26 : #include <linux/firmware.h>
27 : #include <linux/module.h>
28 : #include <linux/pci.h>
29 :
30 : #include "amdgpu.h"
31 : #include "amdgpu_gfx.h"
32 : #include "amdgpu_ring.h"
33 : #include "vi.h"
34 : #include "vi_structs.h"
35 : #include "vid.h"
36 : #include "amdgpu_ucode.h"
37 : #include "amdgpu_atombios.h"
38 : #include "atombios_i2c.h"
39 : #include "clearstate_vi.h"
40 :
41 : #include "gmc/gmc_8_2_d.h"
42 : #include "gmc/gmc_8_2_sh_mask.h"
43 :
44 : #include "oss/oss_3_0_d.h"
45 : #include "oss/oss_3_0_sh_mask.h"
46 :
47 : #include "bif/bif_5_0_d.h"
48 : #include "bif/bif_5_0_sh_mask.h"
49 : #include "gca/gfx_8_0_d.h"
50 : #include "gca/gfx_8_0_enum.h"
51 : #include "gca/gfx_8_0_sh_mask.h"
52 :
53 : #include "dce/dce_10_0_d.h"
54 : #include "dce/dce_10_0_sh_mask.h"
55 :
56 : #include "smu/smu_7_1_3_d.h"
57 :
58 : #include "ivsrcid/ivsrcid_vislands30.h"
59 :
60 : #define GFX8_NUM_GFX_RINGS 1
61 : #define GFX8_MEC_HPD_SIZE 4096
62 :
63 : #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
64 : #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
65 : #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
66 : #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
67 :
68 : #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
69 : #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
70 : #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
71 : #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
72 : #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
73 : #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
74 : #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
75 : #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
76 : #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
77 :
78 : #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
79 : #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
80 : #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
81 : #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
82 : #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
83 : #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
84 :
85 : /* BPM SERDES CMD */
86 : #define SET_BPM_SERDES_CMD 1
87 : #define CLE_BPM_SERDES_CMD 0
88 :
89 : /* BPM Register Address*/
90 : enum {
91 : BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
92 : BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
93 : BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
94 : BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
95 : BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
96 : BPM_REG_FGCG_MAX
97 : };
98 :
99 : #define RLC_FormatDirectRegListLength 14
100 :
101 : MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
102 : MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
103 : MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
104 : MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
105 : MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
106 : MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
107 :
108 : MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
109 : MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
110 : MODULE_FIRMWARE("amdgpu/stoney_me.bin");
111 : MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
112 : MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
113 :
114 : MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
115 : MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
116 : MODULE_FIRMWARE("amdgpu/tonga_me.bin");
117 : MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
118 : MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
119 : MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
120 :
121 : MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
122 : MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
123 : MODULE_FIRMWARE("amdgpu/topaz_me.bin");
124 : MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
125 : MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
126 :
127 : MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
128 : MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
129 : MODULE_FIRMWARE("amdgpu/fiji_me.bin");
130 : MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
131 : MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
132 : MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
133 :
134 : MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 : MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
136 : MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 : MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
138 : MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
139 : MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
140 : MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
141 : MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
142 : MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
143 : MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
144 : MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
145 :
146 : MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
147 : MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
148 : MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
149 : MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
150 : MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
151 : MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
152 : MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
153 : MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
154 : MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
155 : MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
156 : MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
157 :
158 : MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
159 : MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
160 : MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
161 : MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
162 : MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
163 : MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
164 : MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
165 : MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
166 : MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
167 : MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
168 : MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
169 :
170 : MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
171 : MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
172 : MODULE_FIRMWARE("amdgpu/vegam_me.bin");
173 : MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
174 : MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
175 : MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
176 :
177 : static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
178 : {
179 : {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
180 : {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
181 : {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
182 : {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
183 : {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
184 : {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
185 : {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
186 : {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
187 : {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
188 : {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
189 : {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
190 : {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
191 : {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
192 : {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
193 : {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
194 : {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
195 : };
196 :
197 : static const u32 golden_settings_tonga_a11[] =
198 : {
199 : mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
200 : mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
201 : mmDB_DEBUG2, 0xf00fffff, 0x00000400,
202 : mmGB_GPU_ID, 0x0000000f, 0x00000000,
203 : mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
204 : mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
205 : mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
206 : mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
207 : mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
208 : mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
209 : mmTCC_CTRL, 0x00100000, 0xf31fff7f,
210 : mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
211 : mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
212 : mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
213 : mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
214 : mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
215 : };
216 :
217 : static const u32 tonga_golden_common_all[] =
218 : {
219 : mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
220 : mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
221 : mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
222 : mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
223 : mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
224 : mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
225 : mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
226 : mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
227 : };
228 :
229 : static const u32 tonga_mgcg_cgcg_init[] =
230 : {
231 : mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
232 : mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
233 : mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
234 : mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
235 : mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
236 : mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
237 : mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
238 : mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
239 : mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
240 : mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
241 : mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
242 : mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
243 : mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
244 : mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
245 : mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
246 : mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
247 : mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
248 : mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
249 : mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
250 : mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
251 : mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
252 : mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
253 : mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
254 : mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
255 : mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
256 : mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
257 : mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
258 : mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
259 : mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
260 : mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
261 : mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
262 : mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
263 : mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
264 : mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
265 : mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
266 : mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
267 : mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
268 : mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
269 : mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
270 : mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
271 : mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
272 : mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
273 : mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
274 : mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
275 : mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
276 : mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
277 : mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
278 : mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
279 : mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
280 : mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
281 : mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
282 : mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
283 : mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
284 : mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
285 : mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
286 : mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
287 : mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
288 : mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
289 : mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
290 : mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
291 : mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
292 : mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
293 : mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
294 : mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
295 : mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
296 : mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
297 : mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
298 : mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
299 : mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
300 : mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
301 : mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
302 : mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
303 : mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
304 : mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
305 : mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
306 : };
307 :
308 : static const u32 golden_settings_vegam_a11[] =
309 : {
310 : mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
311 : mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
312 : mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
313 : mmDB_DEBUG2, 0xf00fffff, 0x00000400,
314 : mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
315 : mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
316 : mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
317 : mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
318 : mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
319 : mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
320 : mmSQ_CONFIG, 0x07f80000, 0x01180000,
321 : mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
322 : mmTCC_CTRL, 0x00100000, 0xf31fff7f,
323 : mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
324 : mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
325 : mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
326 : mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
327 : };
328 :
329 : static const u32 vegam_golden_common_all[] =
330 : {
331 : mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
332 : mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
333 : mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
334 : mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
335 : mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
336 : mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
337 : };
338 :
339 : static const u32 golden_settings_polaris11_a11[] =
340 : {
341 : mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
342 : mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
343 : mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
344 : mmDB_DEBUG2, 0xf00fffff, 0x00000400,
345 : mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
346 : mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
347 : mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
348 : mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
349 : mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
350 : mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
351 : mmSQ_CONFIG, 0x07f80000, 0x01180000,
352 : mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
353 : mmTCC_CTRL, 0x00100000, 0xf31fff7f,
354 : mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
355 : mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
356 : mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
357 : mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
358 : };
359 :
360 : static const u32 polaris11_golden_common_all[] =
361 : {
362 : mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
363 : mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
364 : mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
365 : mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
366 : mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
367 : mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
368 : };
369 :
370 : static const u32 golden_settings_polaris10_a11[] =
371 : {
372 : mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
373 : mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
374 : mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
375 : mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
376 : mmDB_DEBUG2, 0xf00fffff, 0x00000400,
377 : mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
378 : mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
379 : mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
380 : mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
381 : mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
382 : mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
383 : mmSQ_CONFIG, 0x07f80000, 0x07180000,
384 : mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
385 : mmTCC_CTRL, 0x00100000, 0xf31fff7f,
386 : mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
387 : mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
388 : mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
389 : };
390 :
391 : static const u32 polaris10_golden_common_all[] =
392 : {
393 : mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
394 : mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
395 : mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
396 : mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
397 : mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
398 : mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
399 : mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
400 : mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
401 : };
402 :
403 : static const u32 fiji_golden_common_all[] =
404 : {
405 : mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406 : mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
407 : mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
408 : mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
409 : mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
410 : mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
411 : mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
412 : mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
413 : mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
414 : mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
415 : };
416 :
417 : static const u32 golden_settings_fiji_a10[] =
418 : {
419 : mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
420 : mmDB_DEBUG2, 0xf00fffff, 0x00000400,
421 : mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
422 : mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
423 : mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
424 : mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
425 : mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
426 : mmTCC_CTRL, 0x00100000, 0xf31fff7f,
427 : mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
428 : mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
429 : mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
430 : };
431 :
432 : static const u32 fiji_mgcg_cgcg_init[] =
433 : {
434 : mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
435 : mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
436 : mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
437 : mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
438 : mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
439 : mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
440 : mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
441 : mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
442 : mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
443 : mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
444 : mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
445 : mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
446 : mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
447 : mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
448 : mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
449 : mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
450 : mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
451 : mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
452 : mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
453 : mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
454 : mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
455 : mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
456 : mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
457 : mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
458 : mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
459 : mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
460 : mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
461 : mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
462 : mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
463 : mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
464 : mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
465 : mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
466 : mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
467 : mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
468 : mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
469 : };
470 :
471 : static const u32 golden_settings_iceland_a11[] =
472 : {
473 : mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
474 : mmDB_DEBUG2, 0xf00fffff, 0x00000400,
475 : mmDB_DEBUG3, 0xc0000000, 0xc0000000,
476 : mmGB_GPU_ID, 0x0000000f, 0x00000000,
477 : mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
478 : mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
479 : mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
480 : mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
481 : mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
482 : mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
483 : mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
484 : mmTCC_CTRL, 0x00100000, 0xf31fff7f,
485 : mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
486 : mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
487 : mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
488 : mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
489 : };
490 :
491 : static const u32 iceland_golden_common_all[] =
492 : {
493 : mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
494 : mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
495 : mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
496 : mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
497 : mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
498 : mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
499 : mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
500 : mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
501 : };
502 :
503 : static const u32 iceland_mgcg_cgcg_init[] =
504 : {
505 : mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
506 : mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
507 : mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
508 : mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
509 : mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
510 : mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
511 : mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
512 : mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
513 : mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
514 : mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
515 : mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
516 : mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
517 : mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
518 : mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
519 : mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
520 : mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
521 : mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
522 : mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
523 : mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
524 : mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
525 : mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
526 : mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
527 : mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
528 : mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
529 : mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
530 : mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
531 : mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
532 : mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
533 : mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
534 : mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
535 : mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
536 : mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
537 : mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
538 : mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
539 : mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
540 : mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
541 : mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
542 : mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
543 : mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
544 : mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
545 : mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
546 : mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
547 : mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
548 : mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
549 : mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
550 : mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
551 : mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
552 : mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
553 : mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
554 : mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
555 : mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
556 : mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
557 : mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
558 : mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
559 : mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
560 : mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
561 : mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
562 : mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
563 : mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
564 : mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
565 : mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
566 : mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
567 : mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
568 : mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
569 : };
570 :
571 : static const u32 cz_golden_settings_a11[] =
572 : {
573 : mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
574 : mmDB_DEBUG2, 0xf00fffff, 0x00000400,
575 : mmGB_GPU_ID, 0x0000000f, 0x00000000,
576 : mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
577 : mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
578 : mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
579 : mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
580 : mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
581 : mmTCC_CTRL, 0x00100000, 0xf31fff7f,
582 : mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
583 : mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
584 : mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
585 : };
586 :
587 : static const u32 cz_golden_common_all[] =
588 : {
589 : mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
590 : mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
591 : mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
592 : mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
593 : mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
594 : mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
595 : mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
596 : mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
597 : };
598 :
599 : static const u32 cz_mgcg_cgcg_init[] =
600 : {
601 : mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
602 : mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
603 : mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
604 : mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
605 : mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
606 : mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
607 : mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
608 : mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
609 : mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
610 : mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
611 : mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
612 : mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
613 : mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
614 : mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
615 : mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
616 : mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
617 : mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
618 : mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
619 : mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
620 : mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
621 : mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
622 : mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
623 : mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
624 : mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
625 : mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
626 : mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
627 : mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
628 : mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
629 : mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
630 : mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
631 : mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
632 : mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
633 : mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
634 : mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
635 : mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
636 : mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
637 : mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
638 : mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
639 : mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
640 : mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
641 : mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
642 : mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
643 : mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
644 : mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
645 : mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
646 : mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
647 : mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
648 : mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
649 : mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
650 : mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
651 : mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
652 : mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
653 : mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
654 : mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
655 : mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
656 : mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
657 : mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
658 : mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
659 : mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
660 : mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
661 : mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
662 : mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
663 : mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
664 : mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
665 : mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
666 : mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
667 : mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
668 : mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
669 : mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
670 : mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
671 : mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
672 : mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
673 : mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
674 : mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
675 : mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
676 : };
677 :
678 : static const u32 stoney_golden_settings_a11[] =
679 : {
680 : mmDB_DEBUG2, 0xf00fffff, 0x00000400,
681 : mmGB_GPU_ID, 0x0000000f, 0x00000000,
682 : mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
683 : mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
684 : mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
685 : mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
686 : mmTCC_CTRL, 0x00100000, 0xf31fff7f,
687 : mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
688 : mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
689 : mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
690 : };
691 :
692 : static const u32 stoney_golden_common_all[] =
693 : {
694 : mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
695 : mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
696 : mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
697 : mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
698 : mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
699 : mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
700 : mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
701 : mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
702 : };
703 :
704 : static const u32 stoney_mgcg_cgcg_init[] =
705 : {
706 : mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
707 : mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
708 : mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
709 : mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
710 : mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
711 : };
712 :
713 :
714 : static const char * const sq_edc_source_names[] = {
715 : "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
716 : "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
717 : "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
718 : "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
719 : "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
720 : "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
721 : "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
722 : };
723 :
724 : static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
725 : static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
726 : static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
727 : static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
728 : static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
729 : static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
730 : static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
731 : static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
732 :
733 : #define CG_ACLK_CNTL__ACLK_DIVIDER_MASK 0x0000007fL
734 : #define CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT 0x00000000L
735 :
736 0 : static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
737 : {
738 : uint32_t data;
739 :
740 0 : switch (adev->asic_type) {
741 : case CHIP_TOPAZ:
742 0 : amdgpu_device_program_register_sequence(adev,
743 : iceland_mgcg_cgcg_init,
744 : ARRAY_SIZE(iceland_mgcg_cgcg_init));
745 0 : amdgpu_device_program_register_sequence(adev,
746 : golden_settings_iceland_a11,
747 : ARRAY_SIZE(golden_settings_iceland_a11));
748 0 : amdgpu_device_program_register_sequence(adev,
749 : iceland_golden_common_all,
750 : ARRAY_SIZE(iceland_golden_common_all));
751 0 : break;
752 : case CHIP_FIJI:
753 0 : amdgpu_device_program_register_sequence(adev,
754 : fiji_mgcg_cgcg_init,
755 : ARRAY_SIZE(fiji_mgcg_cgcg_init));
756 0 : amdgpu_device_program_register_sequence(adev,
757 : golden_settings_fiji_a10,
758 : ARRAY_SIZE(golden_settings_fiji_a10));
759 0 : amdgpu_device_program_register_sequence(adev,
760 : fiji_golden_common_all,
761 : ARRAY_SIZE(fiji_golden_common_all));
762 0 : break;
763 :
764 : case CHIP_TONGA:
765 0 : amdgpu_device_program_register_sequence(adev,
766 : tonga_mgcg_cgcg_init,
767 : ARRAY_SIZE(tonga_mgcg_cgcg_init));
768 0 : amdgpu_device_program_register_sequence(adev,
769 : golden_settings_tonga_a11,
770 : ARRAY_SIZE(golden_settings_tonga_a11));
771 0 : amdgpu_device_program_register_sequence(adev,
772 : tonga_golden_common_all,
773 : ARRAY_SIZE(tonga_golden_common_all));
774 0 : break;
775 : case CHIP_VEGAM:
776 0 : amdgpu_device_program_register_sequence(adev,
777 : golden_settings_vegam_a11,
778 : ARRAY_SIZE(golden_settings_vegam_a11));
779 0 : amdgpu_device_program_register_sequence(adev,
780 : vegam_golden_common_all,
781 : ARRAY_SIZE(vegam_golden_common_all));
782 0 : break;
783 : case CHIP_POLARIS11:
784 : case CHIP_POLARIS12:
785 0 : amdgpu_device_program_register_sequence(adev,
786 : golden_settings_polaris11_a11,
787 : ARRAY_SIZE(golden_settings_polaris11_a11));
788 0 : amdgpu_device_program_register_sequence(adev,
789 : polaris11_golden_common_all,
790 : ARRAY_SIZE(polaris11_golden_common_all));
791 0 : break;
792 : case CHIP_POLARIS10:
793 0 : amdgpu_device_program_register_sequence(adev,
794 : golden_settings_polaris10_a11,
795 : ARRAY_SIZE(golden_settings_polaris10_a11));
796 0 : amdgpu_device_program_register_sequence(adev,
797 : polaris10_golden_common_all,
798 : ARRAY_SIZE(polaris10_golden_common_all));
799 0 : data = RREG32_SMC(ixCG_ACLK_CNTL);
800 0 : data &= ~CG_ACLK_CNTL__ACLK_DIVIDER_MASK;
801 0 : data |= 0x18 << CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT;
802 0 : WREG32_SMC(ixCG_ACLK_CNTL, data);
803 0 : if ((adev->pdev->device == 0x67DF) && (adev->pdev->revision == 0xc7) &&
804 0 : ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
805 0 : (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
806 : (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1680))) {
807 0 : amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
808 0 : amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
809 : }
810 : break;
811 : case CHIP_CARRIZO:
812 0 : amdgpu_device_program_register_sequence(adev,
813 : cz_mgcg_cgcg_init,
814 : ARRAY_SIZE(cz_mgcg_cgcg_init));
815 0 : amdgpu_device_program_register_sequence(adev,
816 : cz_golden_settings_a11,
817 : ARRAY_SIZE(cz_golden_settings_a11));
818 0 : amdgpu_device_program_register_sequence(adev,
819 : cz_golden_common_all,
820 : ARRAY_SIZE(cz_golden_common_all));
821 0 : break;
822 : case CHIP_STONEY:
823 0 : amdgpu_device_program_register_sequence(adev,
824 : stoney_mgcg_cgcg_init,
825 : ARRAY_SIZE(stoney_mgcg_cgcg_init));
826 0 : amdgpu_device_program_register_sequence(adev,
827 : stoney_golden_settings_a11,
828 : ARRAY_SIZE(stoney_golden_settings_a11));
829 0 : amdgpu_device_program_register_sequence(adev,
830 : stoney_golden_common_all,
831 : ARRAY_SIZE(stoney_golden_common_all));
832 0 : break;
833 : default:
834 : break;
835 : }
836 0 : }
837 :
838 0 : static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
839 : {
840 0 : struct amdgpu_device *adev = ring->adev;
841 0 : uint32_t tmp = 0;
842 : unsigned i;
843 : int r;
844 :
845 0 : WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
846 0 : r = amdgpu_ring_alloc(ring, 3);
847 0 : if (r)
848 : return r;
849 :
850 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
851 0 : amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START);
852 0 : amdgpu_ring_write(ring, 0xDEADBEEF);
853 0 : amdgpu_ring_commit(ring);
854 :
855 0 : for (i = 0; i < adev->usec_timeout; i++) {
856 0 : tmp = RREG32(mmSCRATCH_REG0);
857 0 : if (tmp == 0xDEADBEEF)
858 : break;
859 0 : udelay(1);
860 : }
861 :
862 0 : if (i >= adev->usec_timeout)
863 0 : r = -ETIMEDOUT;
864 :
865 : return r;
866 : }
867 :
868 0 : static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
869 : {
870 0 : struct amdgpu_device *adev = ring->adev;
871 : struct amdgpu_ib ib;
872 0 : struct dma_fence *f = NULL;
873 :
874 : unsigned int index;
875 : uint64_t gpu_addr;
876 : uint32_t tmp;
877 : long r;
878 :
879 0 : r = amdgpu_device_wb_get(adev, &index);
880 0 : if (r)
881 : return r;
882 :
883 0 : gpu_addr = adev->wb.gpu_addr + (index * 4);
884 0 : adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
885 0 : memset(&ib, 0, sizeof(ib));
886 0 : r = amdgpu_ib_get(adev, NULL, 16,
887 : AMDGPU_IB_POOL_DIRECT, &ib);
888 0 : if (r)
889 : goto err1;
890 :
891 0 : ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
892 0 : ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
893 0 : ib.ptr[2] = lower_32_bits(gpu_addr);
894 0 : ib.ptr[3] = upper_32_bits(gpu_addr);
895 0 : ib.ptr[4] = 0xDEADBEEF;
896 0 : ib.length_dw = 5;
897 :
898 0 : r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
899 0 : if (r)
900 : goto err2;
901 :
902 0 : r = dma_fence_wait_timeout(f, false, timeout);
903 0 : if (r == 0) {
904 : r = -ETIMEDOUT;
905 : goto err2;
906 0 : } else if (r < 0) {
907 : goto err2;
908 : }
909 :
910 0 : tmp = adev->wb.wb[index];
911 0 : if (tmp == 0xDEADBEEF)
912 : r = 0;
913 : else
914 0 : r = -EINVAL;
915 :
916 : err2:
917 0 : amdgpu_ib_free(adev, &ib, NULL);
918 0 : dma_fence_put(f);
919 : err1:
920 0 : amdgpu_device_wb_free(adev, index);
921 0 : return r;
922 : }
923 :
924 :
925 0 : static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
926 : {
927 0 : release_firmware(adev->gfx.pfp_fw);
928 0 : adev->gfx.pfp_fw = NULL;
929 0 : release_firmware(adev->gfx.me_fw);
930 0 : adev->gfx.me_fw = NULL;
931 0 : release_firmware(adev->gfx.ce_fw);
932 0 : adev->gfx.ce_fw = NULL;
933 0 : release_firmware(adev->gfx.rlc_fw);
934 0 : adev->gfx.rlc_fw = NULL;
935 0 : release_firmware(adev->gfx.mec_fw);
936 0 : adev->gfx.mec_fw = NULL;
937 0 : if ((adev->asic_type != CHIP_STONEY) &&
938 : (adev->asic_type != CHIP_TOPAZ))
939 0 : release_firmware(adev->gfx.mec2_fw);
940 0 : adev->gfx.mec2_fw = NULL;
941 :
942 0 : kfree(adev->gfx.rlc.register_list_format);
943 0 : }
944 :
945 0 : static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
946 : {
947 : const char *chip_name;
948 : char fw_name[30];
949 : int err;
950 0 : struct amdgpu_firmware_info *info = NULL;
951 0 : const struct common_firmware_header *header = NULL;
952 : const struct gfx_firmware_header_v1_0 *cp_hdr;
953 : const struct rlc_firmware_header_v2_0 *rlc_hdr;
954 0 : unsigned int *tmp = NULL, i;
955 :
956 0 : DRM_DEBUG("\n");
957 :
958 0 : switch (adev->asic_type) {
959 : case CHIP_TOPAZ:
960 : chip_name = "topaz";
961 : break;
962 : case CHIP_TONGA:
963 0 : chip_name = "tonga";
964 0 : break;
965 : case CHIP_CARRIZO:
966 0 : chip_name = "carrizo";
967 0 : break;
968 : case CHIP_FIJI:
969 0 : chip_name = "fiji";
970 0 : break;
971 : case CHIP_STONEY:
972 0 : chip_name = "stoney";
973 0 : break;
974 : case CHIP_POLARIS10:
975 0 : chip_name = "polaris10";
976 0 : break;
977 : case CHIP_POLARIS11:
978 0 : chip_name = "polaris11";
979 0 : break;
980 : case CHIP_POLARIS12:
981 0 : chip_name = "polaris12";
982 0 : break;
983 : case CHIP_VEGAM:
984 0 : chip_name = "vegam";
985 0 : break;
986 : default:
987 0 : BUG();
988 : }
989 :
990 0 : if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
991 0 : snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
992 0 : err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
993 0 : if (err == -ENOENT) {
994 0 : snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
995 0 : err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
996 : }
997 : } else {
998 0 : snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
999 0 : err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1000 : }
1001 0 : if (err)
1002 : goto out;
1003 0 : err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1004 0 : if (err)
1005 : goto out;
1006 0 : cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1007 0 : adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1008 0 : adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1009 :
1010 0 : if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1011 0 : snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1012 0 : err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1013 0 : if (err == -ENOENT) {
1014 0 : snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1015 0 : err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1016 : }
1017 : } else {
1018 0 : snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1019 0 : err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1020 : }
1021 0 : if (err)
1022 : goto out;
1023 0 : err = amdgpu_ucode_validate(adev->gfx.me_fw);
1024 0 : if (err)
1025 : goto out;
1026 0 : cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1027 0 : adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1028 :
1029 0 : adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1030 :
1031 0 : if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1032 0 : snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1033 0 : err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1034 0 : if (err == -ENOENT) {
1035 0 : snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1036 0 : err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1037 : }
1038 : } else {
1039 0 : snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1040 0 : err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1041 : }
1042 0 : if (err)
1043 : goto out;
1044 0 : err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1045 0 : if (err)
1046 : goto out;
1047 0 : cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1048 0 : adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1049 0 : adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1050 :
1051 : /*
1052 : * Support for MCBP/Virtualization in combination with chained IBs is
1053 : * formal released on feature version #46
1054 : */
1055 0 : if (adev->gfx.ce_feature_version >= 46 &&
1056 0 : adev->gfx.pfp_feature_version >= 46) {
1057 0 : adev->virt.chained_ib_support = true;
1058 0 : DRM_INFO("Chained IB support enabled!\n");
1059 : } else
1060 0 : adev->virt.chained_ib_support = false;
1061 :
1062 0 : snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1063 0 : err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1064 0 : if (err)
1065 : goto out;
1066 0 : err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1067 0 : rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1068 0 : adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1069 0 : adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1070 :
1071 0 : adev->gfx.rlc.save_and_restore_offset =
1072 0 : le32_to_cpu(rlc_hdr->save_and_restore_offset);
1073 0 : adev->gfx.rlc.clear_state_descriptor_offset =
1074 0 : le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1075 0 : adev->gfx.rlc.avail_scratch_ram_locations =
1076 0 : le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1077 0 : adev->gfx.rlc.reg_restore_list_size =
1078 0 : le32_to_cpu(rlc_hdr->reg_restore_list_size);
1079 0 : adev->gfx.rlc.reg_list_format_start =
1080 0 : le32_to_cpu(rlc_hdr->reg_list_format_start);
1081 0 : adev->gfx.rlc.reg_list_format_separate_start =
1082 0 : le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1083 0 : adev->gfx.rlc.starting_offsets_start =
1084 0 : le32_to_cpu(rlc_hdr->starting_offsets_start);
1085 0 : adev->gfx.rlc.reg_list_format_size_bytes =
1086 0 : le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1087 0 : adev->gfx.rlc.reg_list_size_bytes =
1088 0 : le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1089 :
1090 0 : adev->gfx.rlc.register_list_format =
1091 0 : kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1092 : adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1093 :
1094 0 : if (!adev->gfx.rlc.register_list_format) {
1095 : err = -ENOMEM;
1096 : goto out;
1097 : }
1098 :
1099 0 : tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1100 0 : le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1101 0 : for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1102 0 : adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1103 :
1104 0 : adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1105 :
1106 0 : tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1107 0 : le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1108 0 : for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1109 0 : adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1110 :
1111 0 : if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1112 0 : snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1113 0 : err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1114 0 : if (err == -ENOENT) {
1115 0 : snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1116 0 : err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1117 : }
1118 : } else {
1119 0 : snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1120 0 : err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1121 : }
1122 0 : if (err)
1123 : goto out;
1124 0 : err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1125 0 : if (err)
1126 : goto out;
1127 0 : cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1128 0 : adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1129 0 : adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1130 :
1131 0 : if ((adev->asic_type != CHIP_STONEY) &&
1132 : (adev->asic_type != CHIP_TOPAZ)) {
1133 0 : if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1134 0 : snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1135 0 : err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1136 0 : if (err == -ENOENT) {
1137 0 : snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1138 0 : err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1139 : }
1140 : } else {
1141 0 : snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1142 0 : err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1143 : }
1144 0 : if (!err) {
1145 0 : err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1146 0 : if (err)
1147 : goto out;
1148 0 : cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1149 0 : adev->gfx.mec2_fw->data;
1150 0 : adev->gfx.mec2_fw_version =
1151 0 : le32_to_cpu(cp_hdr->header.ucode_version);
1152 0 : adev->gfx.mec2_feature_version =
1153 0 : le32_to_cpu(cp_hdr->ucode_feature_version);
1154 : } else {
1155 0 : err = 0;
1156 0 : adev->gfx.mec2_fw = NULL;
1157 : }
1158 : }
1159 :
1160 0 : info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1161 0 : info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1162 0 : info->fw = adev->gfx.pfp_fw;
1163 0 : header = (const struct common_firmware_header *)info->fw->data;
1164 0 : adev->firmware.fw_size +=
1165 0 : ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1166 :
1167 0 : info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1168 0 : info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1169 0 : info->fw = adev->gfx.me_fw;
1170 0 : header = (const struct common_firmware_header *)info->fw->data;
1171 0 : adev->firmware.fw_size +=
1172 0 : ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1173 :
1174 0 : info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1175 0 : info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1176 0 : info->fw = adev->gfx.ce_fw;
1177 0 : header = (const struct common_firmware_header *)info->fw->data;
1178 0 : adev->firmware.fw_size +=
1179 0 : ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1180 :
1181 0 : info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1182 0 : info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1183 0 : info->fw = adev->gfx.rlc_fw;
1184 0 : header = (const struct common_firmware_header *)info->fw->data;
1185 0 : adev->firmware.fw_size +=
1186 0 : ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1187 :
1188 0 : info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1189 0 : info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1190 0 : info->fw = adev->gfx.mec_fw;
1191 0 : header = (const struct common_firmware_header *)info->fw->data;
1192 0 : adev->firmware.fw_size +=
1193 0 : ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1194 :
1195 : /* we need account JT in */
1196 0 : cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1197 0 : adev->firmware.fw_size +=
1198 0 : ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1199 :
1200 0 : if (amdgpu_sriov_vf(adev)) {
1201 0 : info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1202 0 : info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1203 0 : info->fw = adev->gfx.mec_fw;
1204 0 : adev->firmware.fw_size +=
1205 : ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1206 : }
1207 :
1208 0 : if (adev->gfx.mec2_fw) {
1209 0 : info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1210 0 : info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1211 0 : info->fw = adev->gfx.mec2_fw;
1212 0 : header = (const struct common_firmware_header *)info->fw->data;
1213 0 : adev->firmware.fw_size +=
1214 0 : ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1215 : }
1216 :
1217 : out:
1218 0 : if (err) {
1219 0 : dev_err(adev->dev,
1220 : "gfx8: Failed to load firmware \"%s\"\n",
1221 : fw_name);
1222 0 : release_firmware(adev->gfx.pfp_fw);
1223 0 : adev->gfx.pfp_fw = NULL;
1224 0 : release_firmware(adev->gfx.me_fw);
1225 0 : adev->gfx.me_fw = NULL;
1226 0 : release_firmware(adev->gfx.ce_fw);
1227 0 : adev->gfx.ce_fw = NULL;
1228 0 : release_firmware(adev->gfx.rlc_fw);
1229 0 : adev->gfx.rlc_fw = NULL;
1230 0 : release_firmware(adev->gfx.mec_fw);
1231 0 : adev->gfx.mec_fw = NULL;
1232 0 : release_firmware(adev->gfx.mec2_fw);
1233 0 : adev->gfx.mec2_fw = NULL;
1234 : }
1235 0 : return err;
1236 : }
1237 :
1238 0 : static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1239 : volatile u32 *buffer)
1240 : {
1241 0 : u32 count = 0, i;
1242 0 : const struct cs_section_def *sect = NULL;
1243 0 : const struct cs_extent_def *ext = NULL;
1244 :
1245 0 : if (adev->gfx.rlc.cs_data == NULL)
1246 : return;
1247 0 : if (buffer == NULL)
1248 : return;
1249 :
1250 0 : buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1251 0 : buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1252 :
1253 0 : buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1254 0 : buffer[count++] = cpu_to_le32(0x80000000);
1255 0 : buffer[count++] = cpu_to_le32(0x80000000);
1256 :
1257 0 : for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1258 0 : for (ext = sect->section; ext->extent != NULL; ++ext) {
1259 0 : if (sect->id == SECT_CONTEXT) {
1260 0 : buffer[count++] =
1261 0 : cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1262 0 : buffer[count++] = cpu_to_le32(ext->reg_index -
1263 : PACKET3_SET_CONTEXT_REG_START);
1264 0 : for (i = 0; i < ext->reg_count; i++)
1265 0 : buffer[count++] = cpu_to_le32(ext->extent[i]);
1266 : } else {
1267 : return;
1268 : }
1269 : }
1270 : }
1271 :
1272 0 : buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1273 0 : buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1274 : PACKET3_SET_CONTEXT_REG_START);
1275 0 : buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1276 0 : buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1277 :
1278 0 : buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1279 0 : buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1280 :
1281 0 : buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1282 0 : buffer[count++] = cpu_to_le32(0);
1283 : }
1284 :
1285 0 : static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1286 : {
1287 0 : if (adev->asic_type == CHIP_CARRIZO)
1288 : return 5;
1289 : else
1290 0 : return 4;
1291 : }
1292 :
1293 0 : static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1294 : {
1295 : const struct cs_section_def *cs_data;
1296 : int r;
1297 :
1298 0 : adev->gfx.rlc.cs_data = vi_cs_data;
1299 :
1300 0 : cs_data = adev->gfx.rlc.cs_data;
1301 :
1302 : if (cs_data) {
1303 : /* init clear state block */
1304 0 : r = amdgpu_gfx_rlc_init_csb(adev);
1305 0 : if (r)
1306 : return r;
1307 : }
1308 :
1309 0 : if ((adev->asic_type == CHIP_CARRIZO) ||
1310 : (adev->asic_type == CHIP_STONEY)) {
1311 0 : adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1312 0 : r = amdgpu_gfx_rlc_init_cpt(adev);
1313 0 : if (r)
1314 : return r;
1315 : }
1316 :
1317 : /* init spm vmid with 0xf */
1318 0 : if (adev->gfx.rlc.funcs->update_spm_vmid)
1319 0 : adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1320 :
1321 : return 0;
1322 : }
1323 :
1324 : static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1325 : {
1326 0 : amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1327 : }
1328 :
1329 0 : static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1330 : {
1331 : int r;
1332 : u32 *hpd;
1333 : size_t mec_hpd_size;
1334 :
1335 0 : bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1336 :
1337 : /* take ownership of the relevant compute queues */
1338 0 : amdgpu_gfx_compute_queue_acquire(adev);
1339 :
1340 0 : mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1341 0 : if (mec_hpd_size) {
1342 0 : r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1343 : AMDGPU_GEM_DOMAIN_VRAM,
1344 : &adev->gfx.mec.hpd_eop_obj,
1345 : &adev->gfx.mec.hpd_eop_gpu_addr,
1346 : (void **)&hpd);
1347 0 : if (r) {
1348 0 : dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1349 0 : return r;
1350 : }
1351 :
1352 0 : memset(hpd, 0, mec_hpd_size);
1353 :
1354 0 : amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1355 0 : amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1356 : }
1357 :
1358 : return 0;
1359 : }
1360 :
1361 : static const u32 vgpr_init_compute_shader[] =
1362 : {
1363 : 0x7e000209, 0x7e020208,
1364 : 0x7e040207, 0x7e060206,
1365 : 0x7e080205, 0x7e0a0204,
1366 : 0x7e0c0203, 0x7e0e0202,
1367 : 0x7e100201, 0x7e120200,
1368 : 0x7e140209, 0x7e160208,
1369 : 0x7e180207, 0x7e1a0206,
1370 : 0x7e1c0205, 0x7e1e0204,
1371 : 0x7e200203, 0x7e220202,
1372 : 0x7e240201, 0x7e260200,
1373 : 0x7e280209, 0x7e2a0208,
1374 : 0x7e2c0207, 0x7e2e0206,
1375 : 0x7e300205, 0x7e320204,
1376 : 0x7e340203, 0x7e360202,
1377 : 0x7e380201, 0x7e3a0200,
1378 : 0x7e3c0209, 0x7e3e0208,
1379 : 0x7e400207, 0x7e420206,
1380 : 0x7e440205, 0x7e460204,
1381 : 0x7e480203, 0x7e4a0202,
1382 : 0x7e4c0201, 0x7e4e0200,
1383 : 0x7e500209, 0x7e520208,
1384 : 0x7e540207, 0x7e560206,
1385 : 0x7e580205, 0x7e5a0204,
1386 : 0x7e5c0203, 0x7e5e0202,
1387 : 0x7e600201, 0x7e620200,
1388 : 0x7e640209, 0x7e660208,
1389 : 0x7e680207, 0x7e6a0206,
1390 : 0x7e6c0205, 0x7e6e0204,
1391 : 0x7e700203, 0x7e720202,
1392 : 0x7e740201, 0x7e760200,
1393 : 0x7e780209, 0x7e7a0208,
1394 : 0x7e7c0207, 0x7e7e0206,
1395 : 0xbf8a0000, 0xbf810000,
1396 : };
1397 :
1398 : static const u32 sgpr_init_compute_shader[] =
1399 : {
1400 : 0xbe8a0100, 0xbe8c0102,
1401 : 0xbe8e0104, 0xbe900106,
1402 : 0xbe920108, 0xbe940100,
1403 : 0xbe960102, 0xbe980104,
1404 : 0xbe9a0106, 0xbe9c0108,
1405 : 0xbe9e0100, 0xbea00102,
1406 : 0xbea20104, 0xbea40106,
1407 : 0xbea60108, 0xbea80100,
1408 : 0xbeaa0102, 0xbeac0104,
1409 : 0xbeae0106, 0xbeb00108,
1410 : 0xbeb20100, 0xbeb40102,
1411 : 0xbeb60104, 0xbeb80106,
1412 : 0xbeba0108, 0xbebc0100,
1413 : 0xbebe0102, 0xbec00104,
1414 : 0xbec20106, 0xbec40108,
1415 : 0xbec60100, 0xbec80102,
1416 : 0xbee60004, 0xbee70005,
1417 : 0xbeea0006, 0xbeeb0007,
1418 : 0xbee80008, 0xbee90009,
1419 : 0xbefc0000, 0xbf8a0000,
1420 : 0xbf810000, 0x00000000,
1421 : };
1422 :
1423 : static const u32 vgpr_init_regs[] =
1424 : {
1425 : mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1426 : mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1427 : mmCOMPUTE_NUM_THREAD_X, 256*4,
1428 : mmCOMPUTE_NUM_THREAD_Y, 1,
1429 : mmCOMPUTE_NUM_THREAD_Z, 1,
1430 : mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1431 : mmCOMPUTE_PGM_RSRC2, 20,
1432 : mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1433 : mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1434 : mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1435 : mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1436 : mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1437 : mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1438 : mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1439 : mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1440 : mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1441 : mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1442 : };
1443 :
1444 : static const u32 sgpr1_init_regs[] =
1445 : {
1446 : mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1447 : mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1448 : mmCOMPUTE_NUM_THREAD_X, 256*5,
1449 : mmCOMPUTE_NUM_THREAD_Y, 1,
1450 : mmCOMPUTE_NUM_THREAD_Z, 1,
1451 : mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1452 : mmCOMPUTE_PGM_RSRC2, 20,
1453 : mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1454 : mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1455 : mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1456 : mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1457 : mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1458 : mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1459 : mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1460 : mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1461 : mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1462 : mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1463 : };
1464 :
1465 : static const u32 sgpr2_init_regs[] =
1466 : {
1467 : mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1468 : mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1469 : mmCOMPUTE_NUM_THREAD_X, 256*5,
1470 : mmCOMPUTE_NUM_THREAD_Y, 1,
1471 : mmCOMPUTE_NUM_THREAD_Z, 1,
1472 : mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1473 : mmCOMPUTE_PGM_RSRC2, 20,
1474 : mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1475 : mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1476 : mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1477 : mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1478 : mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1479 : mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1480 : mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1481 : mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1482 : mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1483 : mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1484 : };
1485 :
1486 : static const u32 sec_ded_counter_registers[] =
1487 : {
1488 : mmCPC_EDC_ATC_CNT,
1489 : mmCPC_EDC_SCRATCH_CNT,
1490 : mmCPC_EDC_UCODE_CNT,
1491 : mmCPF_EDC_ATC_CNT,
1492 : mmCPF_EDC_ROQ_CNT,
1493 : mmCPF_EDC_TAG_CNT,
1494 : mmCPG_EDC_ATC_CNT,
1495 : mmCPG_EDC_DMA_CNT,
1496 : mmCPG_EDC_TAG_CNT,
1497 : mmDC_EDC_CSINVOC_CNT,
1498 : mmDC_EDC_RESTORE_CNT,
1499 : mmDC_EDC_STATE_CNT,
1500 : mmGDS_EDC_CNT,
1501 : mmGDS_EDC_GRBM_CNT,
1502 : mmGDS_EDC_OA_DED,
1503 : mmSPI_EDC_CNT,
1504 : mmSQC_ATC_EDC_GATCL1_CNT,
1505 : mmSQC_EDC_CNT,
1506 : mmSQ_EDC_DED_CNT,
1507 : mmSQ_EDC_INFO,
1508 : mmSQ_EDC_SEC_CNT,
1509 : mmTCC_EDC_CNT,
1510 : mmTCP_ATC_EDC_GATCL1_CNT,
1511 : mmTCP_EDC_CNT,
1512 : mmTD_EDC_CNT
1513 : };
1514 :
1515 0 : static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1516 : {
1517 0 : struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1518 : struct amdgpu_ib ib;
1519 0 : struct dma_fence *f = NULL;
1520 : int r, i;
1521 : u32 tmp;
1522 : unsigned total_size, vgpr_offset, sgpr_offset;
1523 : u64 gpu_addr;
1524 :
1525 : /* only supported on CZ */
1526 0 : if (adev->asic_type != CHIP_CARRIZO)
1527 : return 0;
1528 :
1529 : /* bail if the compute ring is not ready */
1530 0 : if (!ring->sched.ready)
1531 : return 0;
1532 :
1533 0 : tmp = RREG32(mmGB_EDC_MODE);
1534 0 : WREG32(mmGB_EDC_MODE, 0);
1535 :
1536 0 : total_size =
1537 : (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1538 0 : total_size +=
1539 : (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1540 0 : total_size +=
1541 : (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1542 0 : total_size = ALIGN(total_size, 256);
1543 0 : vgpr_offset = total_size;
1544 0 : total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1545 0 : sgpr_offset = total_size;
1546 0 : total_size += sizeof(sgpr_init_compute_shader);
1547 :
1548 : /* allocate an indirect buffer to put the commands in */
1549 0 : memset(&ib, 0, sizeof(ib));
1550 0 : r = amdgpu_ib_get(adev, NULL, total_size,
1551 : AMDGPU_IB_POOL_DIRECT, &ib);
1552 0 : if (r) {
1553 0 : DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1554 0 : return r;
1555 : }
1556 :
1557 : /* load the compute shaders */
1558 0 : for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1559 0 : ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1560 :
1561 0 : for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1562 0 : ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1563 :
1564 : /* init the ib length to 0 */
1565 0 : ib.length_dw = 0;
1566 :
1567 : /* VGPR */
1568 : /* write the register state for the compute dispatch */
1569 0 : for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1570 0 : ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1571 0 : ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1572 0 : ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1573 : }
1574 : /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1575 0 : gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1576 0 : ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1577 0 : ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1578 0 : ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1579 0 : ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1580 :
1581 : /* write dispatch packet */
1582 0 : ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1583 0 : ib.ptr[ib.length_dw++] = 8; /* x */
1584 0 : ib.ptr[ib.length_dw++] = 1; /* y */
1585 0 : ib.ptr[ib.length_dw++] = 1; /* z */
1586 0 : ib.ptr[ib.length_dw++] =
1587 : REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1588 :
1589 : /* write CS partial flush packet */
1590 0 : ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1591 0 : ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1592 :
1593 : /* SGPR1 */
1594 : /* write the register state for the compute dispatch */
1595 0 : for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1596 0 : ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1597 0 : ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1598 0 : ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1599 : }
1600 : /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1601 0 : gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1602 0 : ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1603 0 : ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1604 0 : ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1605 0 : ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1606 :
1607 : /* write dispatch packet */
1608 0 : ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1609 0 : ib.ptr[ib.length_dw++] = 8; /* x */
1610 0 : ib.ptr[ib.length_dw++] = 1; /* y */
1611 0 : ib.ptr[ib.length_dw++] = 1; /* z */
1612 0 : ib.ptr[ib.length_dw++] =
1613 : REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1614 :
1615 : /* write CS partial flush packet */
1616 0 : ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1617 0 : ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1618 :
1619 : /* SGPR2 */
1620 : /* write the register state for the compute dispatch */
1621 0 : for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1622 0 : ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1623 0 : ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1624 0 : ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1625 : }
1626 : /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1627 0 : gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1628 0 : ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1629 0 : ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1630 0 : ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1631 0 : ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1632 :
1633 : /* write dispatch packet */
1634 0 : ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1635 0 : ib.ptr[ib.length_dw++] = 8; /* x */
1636 0 : ib.ptr[ib.length_dw++] = 1; /* y */
1637 0 : ib.ptr[ib.length_dw++] = 1; /* z */
1638 0 : ib.ptr[ib.length_dw++] =
1639 : REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1640 :
1641 : /* write CS partial flush packet */
1642 0 : ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1643 0 : ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1644 :
1645 : /* shedule the ib on the ring */
1646 0 : r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1647 0 : if (r) {
1648 0 : DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1649 0 : goto fail;
1650 : }
1651 :
1652 : /* wait for the GPU to finish processing the IB */
1653 0 : r = dma_fence_wait(f, false);
1654 0 : if (r) {
1655 0 : DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1656 0 : goto fail;
1657 : }
1658 :
1659 0 : tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1660 0 : tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1661 0 : WREG32(mmGB_EDC_MODE, tmp);
1662 :
1663 0 : tmp = RREG32(mmCC_GC_EDC_CONFIG);
1664 0 : tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1665 0 : WREG32(mmCC_GC_EDC_CONFIG, tmp);
1666 :
1667 :
1668 : /* read back registers to clear the counters */
1669 0 : for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1670 0 : RREG32(sec_ded_counter_registers[i]);
1671 :
1672 : fail:
1673 0 : amdgpu_ib_free(adev, &ib, NULL);
1674 0 : dma_fence_put(f);
1675 :
1676 : return r;
1677 : }
1678 :
1679 0 : static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1680 : {
1681 : u32 gb_addr_config;
1682 : u32 mc_arb_ramcfg;
1683 : u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1684 : u32 tmp;
1685 : int ret;
1686 :
1687 0 : switch (adev->asic_type) {
1688 : case CHIP_TOPAZ:
1689 0 : adev->gfx.config.max_shader_engines = 1;
1690 0 : adev->gfx.config.max_tile_pipes = 2;
1691 0 : adev->gfx.config.max_cu_per_sh = 6;
1692 0 : adev->gfx.config.max_sh_per_se = 1;
1693 0 : adev->gfx.config.max_backends_per_se = 2;
1694 0 : adev->gfx.config.max_texture_channel_caches = 2;
1695 0 : adev->gfx.config.max_gprs = 256;
1696 0 : adev->gfx.config.max_gs_threads = 32;
1697 0 : adev->gfx.config.max_hw_contexts = 8;
1698 :
1699 0 : adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1700 0 : adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1701 0 : adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1702 0 : adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1703 0 : gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1704 0 : break;
1705 : case CHIP_FIJI:
1706 0 : adev->gfx.config.max_shader_engines = 4;
1707 0 : adev->gfx.config.max_tile_pipes = 16;
1708 0 : adev->gfx.config.max_cu_per_sh = 16;
1709 0 : adev->gfx.config.max_sh_per_se = 1;
1710 0 : adev->gfx.config.max_backends_per_se = 4;
1711 0 : adev->gfx.config.max_texture_channel_caches = 16;
1712 0 : adev->gfx.config.max_gprs = 256;
1713 0 : adev->gfx.config.max_gs_threads = 32;
1714 0 : adev->gfx.config.max_hw_contexts = 8;
1715 :
1716 0 : adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1717 0 : adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1718 0 : adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1719 0 : adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1720 0 : gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1721 0 : break;
1722 : case CHIP_POLARIS11:
1723 : case CHIP_POLARIS12:
1724 0 : ret = amdgpu_atombios_get_gfx_info(adev);
1725 0 : if (ret)
1726 : return ret;
1727 0 : adev->gfx.config.max_gprs = 256;
1728 0 : adev->gfx.config.max_gs_threads = 32;
1729 0 : adev->gfx.config.max_hw_contexts = 8;
1730 :
1731 0 : adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1732 0 : adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1733 0 : adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1734 0 : adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1735 0 : gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1736 0 : break;
1737 : case CHIP_POLARIS10:
1738 : case CHIP_VEGAM:
1739 0 : ret = amdgpu_atombios_get_gfx_info(adev);
1740 0 : if (ret)
1741 : return ret;
1742 0 : adev->gfx.config.max_gprs = 256;
1743 0 : adev->gfx.config.max_gs_threads = 32;
1744 0 : adev->gfx.config.max_hw_contexts = 8;
1745 :
1746 0 : adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1747 0 : adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1748 0 : adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1749 0 : adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1750 0 : gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1751 0 : break;
1752 : case CHIP_TONGA:
1753 0 : adev->gfx.config.max_shader_engines = 4;
1754 0 : adev->gfx.config.max_tile_pipes = 8;
1755 0 : adev->gfx.config.max_cu_per_sh = 8;
1756 0 : adev->gfx.config.max_sh_per_se = 1;
1757 0 : adev->gfx.config.max_backends_per_se = 2;
1758 0 : adev->gfx.config.max_texture_channel_caches = 8;
1759 0 : adev->gfx.config.max_gprs = 256;
1760 0 : adev->gfx.config.max_gs_threads = 32;
1761 0 : adev->gfx.config.max_hw_contexts = 8;
1762 :
1763 0 : adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1764 0 : adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1765 0 : adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1766 0 : adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1767 0 : gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1768 0 : break;
1769 : case CHIP_CARRIZO:
1770 0 : adev->gfx.config.max_shader_engines = 1;
1771 0 : adev->gfx.config.max_tile_pipes = 2;
1772 0 : adev->gfx.config.max_sh_per_se = 1;
1773 0 : adev->gfx.config.max_backends_per_se = 2;
1774 0 : adev->gfx.config.max_cu_per_sh = 8;
1775 0 : adev->gfx.config.max_texture_channel_caches = 2;
1776 0 : adev->gfx.config.max_gprs = 256;
1777 0 : adev->gfx.config.max_gs_threads = 32;
1778 0 : adev->gfx.config.max_hw_contexts = 8;
1779 :
1780 0 : adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1781 0 : adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1782 0 : adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1783 0 : adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1784 0 : gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1785 0 : break;
1786 : case CHIP_STONEY:
1787 0 : adev->gfx.config.max_shader_engines = 1;
1788 0 : adev->gfx.config.max_tile_pipes = 2;
1789 0 : adev->gfx.config.max_sh_per_se = 1;
1790 0 : adev->gfx.config.max_backends_per_se = 1;
1791 0 : adev->gfx.config.max_cu_per_sh = 3;
1792 0 : adev->gfx.config.max_texture_channel_caches = 2;
1793 0 : adev->gfx.config.max_gprs = 256;
1794 0 : adev->gfx.config.max_gs_threads = 16;
1795 0 : adev->gfx.config.max_hw_contexts = 8;
1796 :
1797 0 : adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1798 0 : adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1799 0 : adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1800 0 : adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1801 0 : gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1802 0 : break;
1803 : default:
1804 0 : adev->gfx.config.max_shader_engines = 2;
1805 0 : adev->gfx.config.max_tile_pipes = 4;
1806 0 : adev->gfx.config.max_cu_per_sh = 2;
1807 0 : adev->gfx.config.max_sh_per_se = 1;
1808 0 : adev->gfx.config.max_backends_per_se = 2;
1809 0 : adev->gfx.config.max_texture_channel_caches = 4;
1810 0 : adev->gfx.config.max_gprs = 256;
1811 0 : adev->gfx.config.max_gs_threads = 32;
1812 0 : adev->gfx.config.max_hw_contexts = 8;
1813 :
1814 0 : adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1815 0 : adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1816 0 : adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1817 0 : adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1818 0 : gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1819 0 : break;
1820 : }
1821 :
1822 0 : adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1823 0 : mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1824 :
1825 0 : adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
1826 : MC_ARB_RAMCFG, NOOFBANK);
1827 0 : adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
1828 : MC_ARB_RAMCFG, NOOFRANKS);
1829 :
1830 0 : adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1831 0 : adev->gfx.config.mem_max_burst_length_bytes = 256;
1832 0 : if (adev->flags & AMD_IS_APU) {
1833 : /* Get memory bank mapping mode. */
1834 0 : tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1835 0 : dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1836 0 : dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1837 :
1838 0 : tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1839 0 : dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1840 0 : dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1841 :
1842 : /* Validate settings in case only one DIMM installed. */
1843 0 : if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1844 0 : dimm00_addr_map = 0;
1845 0 : if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1846 0 : dimm01_addr_map = 0;
1847 0 : if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1848 0 : dimm10_addr_map = 0;
1849 0 : if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1850 0 : dimm11_addr_map = 0;
1851 :
1852 : /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1853 : /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1854 0 : if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1855 0 : adev->gfx.config.mem_row_size_in_kb = 2;
1856 : else
1857 0 : adev->gfx.config.mem_row_size_in_kb = 1;
1858 : } else {
1859 0 : tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1860 0 : adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1861 0 : if (adev->gfx.config.mem_row_size_in_kb > 4)
1862 0 : adev->gfx.config.mem_row_size_in_kb = 4;
1863 : }
1864 :
1865 0 : adev->gfx.config.shader_engine_tile_size = 32;
1866 0 : adev->gfx.config.num_gpus = 1;
1867 0 : adev->gfx.config.multi_gpu_tile_size = 64;
1868 :
1869 : /* fix up row size */
1870 0 : switch (adev->gfx.config.mem_row_size_in_kb) {
1871 : case 1:
1872 : default:
1873 0 : gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1874 0 : break;
1875 : case 2:
1876 0 : gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1877 0 : break;
1878 : case 4:
1879 0 : gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1880 0 : break;
1881 : }
1882 0 : adev->gfx.config.gb_addr_config = gb_addr_config;
1883 :
1884 0 : return 0;
1885 : }
1886 :
1887 0 : static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1888 : int mec, int pipe, int queue)
1889 : {
1890 : int r;
1891 : unsigned irq_type;
1892 0 : struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1893 : unsigned int hw_prio;
1894 :
1895 0 : ring = &adev->gfx.compute_ring[ring_id];
1896 :
1897 : /* mec0 is me1 */
1898 0 : ring->me = mec + 1;
1899 0 : ring->pipe = pipe;
1900 0 : ring->queue = queue;
1901 :
1902 0 : ring->ring_obj = NULL;
1903 0 : ring->use_doorbell = true;
1904 0 : ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1905 0 : ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1906 0 : + (ring_id * GFX8_MEC_HPD_SIZE);
1907 0 : sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1908 :
1909 0 : irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1910 0 : + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1911 0 : + ring->pipe;
1912 :
1913 0 : hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1914 0 : AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
1915 : /* type-2 packets are deprecated on MEC, use type-3 instead */
1916 0 : r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1917 : hw_prio, NULL);
1918 0 : if (r)
1919 : return r;
1920 :
1921 :
1922 0 : return 0;
1923 : }
1924 :
1925 : static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1926 :
1927 0 : static int gfx_v8_0_sw_init(void *handle)
1928 : {
1929 : int i, j, k, r, ring_id;
1930 : struct amdgpu_ring *ring;
1931 : struct amdgpu_kiq *kiq;
1932 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1933 :
1934 0 : switch (adev->asic_type) {
1935 : case CHIP_TONGA:
1936 : case CHIP_CARRIZO:
1937 : case CHIP_FIJI:
1938 : case CHIP_POLARIS10:
1939 : case CHIP_POLARIS11:
1940 : case CHIP_POLARIS12:
1941 : case CHIP_VEGAM:
1942 0 : adev->gfx.mec.num_mec = 2;
1943 0 : break;
1944 : case CHIP_TOPAZ:
1945 : case CHIP_STONEY:
1946 : default:
1947 0 : adev->gfx.mec.num_mec = 1;
1948 0 : break;
1949 : }
1950 :
1951 0 : adev->gfx.mec.num_pipe_per_mec = 4;
1952 0 : adev->gfx.mec.num_queue_per_pipe = 8;
1953 :
1954 : /* EOP Event */
1955 0 : r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1956 0 : if (r)
1957 : return r;
1958 :
1959 : /* Privileged reg */
1960 0 : r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1961 : &adev->gfx.priv_reg_irq);
1962 0 : if (r)
1963 : return r;
1964 :
1965 : /* Privileged inst */
1966 0 : r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1967 : &adev->gfx.priv_inst_irq);
1968 0 : if (r)
1969 : return r;
1970 :
1971 : /* Add CP EDC/ECC irq */
1972 0 : r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1973 : &adev->gfx.cp_ecc_error_irq);
1974 0 : if (r)
1975 : return r;
1976 :
1977 : /* SQ interrupts. */
1978 0 : r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1979 : &adev->gfx.sq_irq);
1980 0 : if (r) {
1981 0 : DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1982 0 : return r;
1983 : }
1984 :
1985 0 : INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1986 :
1987 0 : adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1988 :
1989 0 : r = gfx_v8_0_init_microcode(adev);
1990 0 : if (r) {
1991 0 : DRM_ERROR("Failed to load gfx firmware!\n");
1992 0 : return r;
1993 : }
1994 :
1995 0 : r = adev->gfx.rlc.funcs->init(adev);
1996 0 : if (r) {
1997 0 : DRM_ERROR("Failed to init rlc BOs!\n");
1998 0 : return r;
1999 : }
2000 :
2001 0 : r = gfx_v8_0_mec_init(adev);
2002 0 : if (r) {
2003 0 : DRM_ERROR("Failed to init MEC BOs!\n");
2004 0 : return r;
2005 : }
2006 :
2007 : /* set up the gfx ring */
2008 0 : for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2009 0 : ring = &adev->gfx.gfx_ring[i];
2010 0 : ring->ring_obj = NULL;
2011 0 : sprintf(ring->name, "gfx");
2012 : /* no gfx doorbells on iceland */
2013 0 : if (adev->asic_type != CHIP_TOPAZ) {
2014 0 : ring->use_doorbell = true;
2015 0 : ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2016 : }
2017 :
2018 0 : r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2019 : AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2020 : AMDGPU_RING_PRIO_DEFAULT, NULL);
2021 0 : if (r)
2022 : return r;
2023 : }
2024 :
2025 :
2026 : /* set up the compute queues - allocate horizontally across pipes */
2027 : ring_id = 0;
2028 0 : for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2029 0 : for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2030 0 : for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2031 0 : if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2032 0 : continue;
2033 :
2034 0 : r = gfx_v8_0_compute_ring_init(adev,
2035 : ring_id,
2036 : i, k, j);
2037 0 : if (r)
2038 : return r;
2039 :
2040 0 : ring_id++;
2041 : }
2042 : }
2043 : }
2044 :
2045 0 : r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2046 0 : if (r) {
2047 0 : DRM_ERROR("Failed to init KIQ BOs!\n");
2048 0 : return r;
2049 : }
2050 :
2051 0 : kiq = &adev->gfx.kiq;
2052 0 : r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2053 0 : if (r)
2054 : return r;
2055 :
2056 : /* create MQD for all compute queues as well as KIQ for SRIOV case */
2057 0 : r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2058 0 : if (r)
2059 : return r;
2060 :
2061 0 : adev->gfx.ce_ram_size = 0x8000;
2062 :
2063 0 : r = gfx_v8_0_gpu_early_init(adev);
2064 0 : if (r)
2065 : return r;
2066 :
2067 0 : return 0;
2068 : }
2069 :
2070 0 : static int gfx_v8_0_sw_fini(void *handle)
2071 : {
2072 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2073 : int i;
2074 :
2075 0 : for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2076 0 : amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2077 0 : for (i = 0; i < adev->gfx.num_compute_rings; i++)
2078 0 : amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2079 :
2080 0 : amdgpu_gfx_mqd_sw_fini(adev);
2081 0 : amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2082 0 : amdgpu_gfx_kiq_fini(adev);
2083 :
2084 0 : gfx_v8_0_mec_fini(adev);
2085 0 : amdgpu_gfx_rlc_fini(adev);
2086 0 : amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2087 0 : &adev->gfx.rlc.clear_state_gpu_addr,
2088 0 : (void **)&adev->gfx.rlc.cs_ptr);
2089 0 : if ((adev->asic_type == CHIP_CARRIZO) ||
2090 : (adev->asic_type == CHIP_STONEY)) {
2091 0 : amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2092 0 : &adev->gfx.rlc.cp_table_gpu_addr,
2093 0 : (void **)&adev->gfx.rlc.cp_table_ptr);
2094 : }
2095 0 : gfx_v8_0_free_microcode(adev);
2096 :
2097 0 : return 0;
2098 : }
2099 :
2100 0 : static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2101 : {
2102 : uint32_t *modearray, *mod2array;
2103 0 : const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2104 0 : const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2105 : u32 reg_offset;
2106 :
2107 0 : modearray = adev->gfx.config.tile_mode_array;
2108 0 : mod2array = adev->gfx.config.macrotile_mode_array;
2109 :
2110 0 : for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2111 0 : modearray[reg_offset] = 0;
2112 :
2113 0 : for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2114 0 : mod2array[reg_offset] = 0;
2115 :
2116 0 : switch (adev->asic_type) {
2117 : case CHIP_TOPAZ:
2118 0 : modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2119 : PIPE_CONFIG(ADDR_SURF_P2) |
2120 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2121 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2122 0 : modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2123 : PIPE_CONFIG(ADDR_SURF_P2) |
2124 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2125 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2126 0 : modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2127 : PIPE_CONFIG(ADDR_SURF_P2) |
2128 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2129 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2130 0 : modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2131 : PIPE_CONFIG(ADDR_SURF_P2) |
2132 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2133 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2134 0 : modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2135 : PIPE_CONFIG(ADDR_SURF_P2) |
2136 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2137 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2138 0 : modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2139 : PIPE_CONFIG(ADDR_SURF_P2) |
2140 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2141 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2142 0 : modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2143 : PIPE_CONFIG(ADDR_SURF_P2) |
2144 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2145 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2146 0 : modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2147 : PIPE_CONFIG(ADDR_SURF_P2));
2148 0 : modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2149 : PIPE_CONFIG(ADDR_SURF_P2) |
2150 : MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2151 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2152 0 : modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2153 : PIPE_CONFIG(ADDR_SURF_P2) |
2154 : MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2155 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2156 0 : modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2157 : PIPE_CONFIG(ADDR_SURF_P2) |
2158 : MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2159 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2160 0 : modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2161 : PIPE_CONFIG(ADDR_SURF_P2) |
2162 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2163 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2164 0 : modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2165 : PIPE_CONFIG(ADDR_SURF_P2) |
2166 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2167 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2168 0 : modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2169 : PIPE_CONFIG(ADDR_SURF_P2) |
2170 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2171 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2172 0 : modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2173 : PIPE_CONFIG(ADDR_SURF_P2) |
2174 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2175 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2176 0 : modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2177 : PIPE_CONFIG(ADDR_SURF_P2) |
2178 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2179 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2180 0 : modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2181 : PIPE_CONFIG(ADDR_SURF_P2) |
2182 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2183 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2184 0 : modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2185 : PIPE_CONFIG(ADDR_SURF_P2) |
2186 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2187 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2188 0 : modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2189 : PIPE_CONFIG(ADDR_SURF_P2) |
2190 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2191 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2192 0 : modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2193 : PIPE_CONFIG(ADDR_SURF_P2) |
2194 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2195 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2196 0 : modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2197 : PIPE_CONFIG(ADDR_SURF_P2) |
2198 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2199 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2200 0 : modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2201 : PIPE_CONFIG(ADDR_SURF_P2) |
2202 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2203 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2204 0 : modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2205 : PIPE_CONFIG(ADDR_SURF_P2) |
2206 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2207 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2208 0 : modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2209 : PIPE_CONFIG(ADDR_SURF_P2) |
2210 : MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2211 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2212 0 : modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2213 : PIPE_CONFIG(ADDR_SURF_P2) |
2214 : MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2215 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2216 0 : modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2217 : PIPE_CONFIG(ADDR_SURF_P2) |
2218 : MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2219 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2220 :
2221 0 : mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2222 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2223 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2224 : NUM_BANKS(ADDR_SURF_8_BANK));
2225 0 : mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2226 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2227 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2228 : NUM_BANKS(ADDR_SURF_8_BANK));
2229 0 : mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2230 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2231 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2232 : NUM_BANKS(ADDR_SURF_8_BANK));
2233 0 : mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2234 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2235 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2236 : NUM_BANKS(ADDR_SURF_8_BANK));
2237 0 : mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2239 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2240 : NUM_BANKS(ADDR_SURF_8_BANK));
2241 0 : mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2242 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2243 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2244 : NUM_BANKS(ADDR_SURF_8_BANK));
2245 0 : mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2246 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2247 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2248 : NUM_BANKS(ADDR_SURF_8_BANK));
2249 0 : mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2250 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2251 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2252 : NUM_BANKS(ADDR_SURF_16_BANK));
2253 0 : mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2254 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2255 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2256 : NUM_BANKS(ADDR_SURF_16_BANK));
2257 0 : mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2258 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2259 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2260 : NUM_BANKS(ADDR_SURF_16_BANK));
2261 0 : mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2262 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2263 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2264 : NUM_BANKS(ADDR_SURF_16_BANK));
2265 0 : mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2266 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2267 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2268 : NUM_BANKS(ADDR_SURF_16_BANK));
2269 0 : mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2270 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2271 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2272 : NUM_BANKS(ADDR_SURF_16_BANK));
2273 0 : mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2274 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2275 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2276 : NUM_BANKS(ADDR_SURF_8_BANK));
2277 :
2278 0 : for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2279 0 : if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2280 0 : reg_offset != 23)
2281 0 : WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2282 :
2283 0 : for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2284 0 : if (reg_offset != 7)
2285 0 : WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2286 :
2287 : break;
2288 : case CHIP_FIJI:
2289 : case CHIP_VEGAM:
2290 0 : modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2291 : PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2292 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2293 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2294 0 : modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2295 : PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2296 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2297 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2298 0 : modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2299 : PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2301 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2302 0 : modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2303 : PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2305 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2306 0 : modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2307 : PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2308 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2309 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2310 0 : modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2311 : PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2312 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2313 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2314 0 : modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2315 : PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2317 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2318 0 : modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2319 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2320 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2321 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2322 0 : modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2323 : PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2324 0 : modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2325 : PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2326 : MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2327 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2328 0 : modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2329 : PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2330 : MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2331 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2332 0 : modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2333 : PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2334 : MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2335 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2336 0 : modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2337 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2338 : MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2339 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2340 0 : modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2341 : PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2343 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2344 0 : modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2345 : PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2347 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2348 0 : modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2349 : PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2351 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2352 0 : modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2353 : PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2355 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2356 0 : modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2357 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2358 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2359 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2360 0 : modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2361 : PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2363 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2364 0 : modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2365 : PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2367 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2368 0 : modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2369 : PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2371 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2372 0 : modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2373 : PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2374 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2375 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2376 0 : modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2377 : PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2379 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2380 0 : modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2381 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2382 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2383 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2384 0 : modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2385 : PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2387 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2388 0 : modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2389 : PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2391 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2392 0 : modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2393 : PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2395 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2396 0 : modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2397 : PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2398 : MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2399 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2400 0 : modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2401 : PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402 : MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2403 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2404 0 : modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2405 : PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406 : MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2407 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2408 0 : modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2409 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2410 : MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2411 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2412 :
2413 0 : mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2414 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2415 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2416 : NUM_BANKS(ADDR_SURF_8_BANK));
2417 0 : mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2418 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2419 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2420 : NUM_BANKS(ADDR_SURF_8_BANK));
2421 0 : mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2423 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2424 : NUM_BANKS(ADDR_SURF_8_BANK));
2425 0 : mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2426 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2427 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2428 : NUM_BANKS(ADDR_SURF_8_BANK));
2429 0 : mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2430 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2431 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2432 : NUM_BANKS(ADDR_SURF_8_BANK));
2433 0 : mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2434 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2435 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2436 : NUM_BANKS(ADDR_SURF_8_BANK));
2437 0 : mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2439 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2440 : NUM_BANKS(ADDR_SURF_8_BANK));
2441 0 : mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2443 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2444 : NUM_BANKS(ADDR_SURF_8_BANK));
2445 0 : mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2447 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2448 : NUM_BANKS(ADDR_SURF_8_BANK));
2449 0 : mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2451 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2452 : NUM_BANKS(ADDR_SURF_8_BANK));
2453 0 : mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2455 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2456 : NUM_BANKS(ADDR_SURF_8_BANK));
2457 0 : mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2459 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2460 : NUM_BANKS(ADDR_SURF_8_BANK));
2461 0 : mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2464 : NUM_BANKS(ADDR_SURF_8_BANK));
2465 0 : mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468 : NUM_BANKS(ADDR_SURF_4_BANK));
2469 :
2470 0 : for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2471 0 : WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2472 :
2473 0 : for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2474 0 : if (reg_offset != 7)
2475 0 : WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2476 :
2477 : break;
2478 : case CHIP_TONGA:
2479 0 : modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2480 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2481 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2482 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2483 0 : modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2484 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2485 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2486 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2487 0 : modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2488 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2489 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2490 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2491 0 : modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2492 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2493 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2494 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2495 0 : modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2496 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2498 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2499 0 : modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2500 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2501 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2502 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2503 0 : modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2504 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2506 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2507 0 : modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2508 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2509 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2510 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2511 0 : modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2512 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2513 0 : modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2514 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515 : MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2516 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2517 0 : modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2519 : MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2520 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2521 0 : modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2522 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2523 : MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2524 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2525 0 : modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2526 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2527 : MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2528 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2529 0 : modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2530 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2532 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2533 0 : modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2536 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2537 0 : modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2538 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2540 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2541 0 : modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2542 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2544 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2545 0 : modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2546 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2547 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2548 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2549 0 : modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2550 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2552 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2553 0 : modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2554 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2556 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2557 0 : modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2558 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2560 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2561 0 : modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2562 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2564 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2565 0 : modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2566 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2568 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2569 0 : modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2570 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2571 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2572 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2573 0 : modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2574 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2576 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2577 0 : modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2578 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2579 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2580 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2581 0 : modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2582 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2584 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2585 0 : modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2586 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587 : MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2588 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2589 0 : modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2590 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591 : MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2592 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2593 0 : modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2594 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2595 : MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2596 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2597 0 : modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2598 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2599 : MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2600 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2601 :
2602 0 : mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2603 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2604 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2605 : NUM_BANKS(ADDR_SURF_16_BANK));
2606 0 : mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2607 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2608 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2609 : NUM_BANKS(ADDR_SURF_16_BANK));
2610 0 : mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2612 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2613 : NUM_BANKS(ADDR_SURF_16_BANK));
2614 0 : mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2615 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2616 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2617 : NUM_BANKS(ADDR_SURF_16_BANK));
2618 0 : mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2619 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2620 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2621 : NUM_BANKS(ADDR_SURF_16_BANK));
2622 0 : mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2623 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2624 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2625 : NUM_BANKS(ADDR_SURF_16_BANK));
2626 0 : mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2628 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2629 : NUM_BANKS(ADDR_SURF_16_BANK));
2630 0 : mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2632 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2633 : NUM_BANKS(ADDR_SURF_16_BANK));
2634 0 : mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2636 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2637 : NUM_BANKS(ADDR_SURF_16_BANK));
2638 0 : mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2640 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2641 : NUM_BANKS(ADDR_SURF_16_BANK));
2642 0 : mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2644 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2645 : NUM_BANKS(ADDR_SURF_16_BANK));
2646 0 : mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2648 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2649 : NUM_BANKS(ADDR_SURF_8_BANK));
2650 0 : mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2652 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2653 : NUM_BANKS(ADDR_SURF_4_BANK));
2654 0 : mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2656 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2657 : NUM_BANKS(ADDR_SURF_4_BANK));
2658 :
2659 0 : for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2660 0 : WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2661 :
2662 0 : for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2663 0 : if (reg_offset != 7)
2664 0 : WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2665 :
2666 : break;
2667 : case CHIP_POLARIS11:
2668 : case CHIP_POLARIS12:
2669 0 : modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2672 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2673 0 : modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2676 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2677 0 : modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2680 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2681 0 : modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2682 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2684 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2685 0 : modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2688 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2689 0 : modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2690 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2692 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2693 0 : modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2694 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2696 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2697 0 : modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2698 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2700 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2701 0 : modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2702 : PIPE_CONFIG(ADDR_SURF_P4_16x16));
2703 0 : modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2704 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705 : MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2706 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2707 0 : modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709 : MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2710 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2711 0 : modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2712 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713 : MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2714 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2715 0 : modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2716 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2717 : MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2718 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2719 0 : modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2720 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2722 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2723 0 : modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2724 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2726 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727 0 : modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2728 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2730 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731 0 : modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2732 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2734 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2735 0 : modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2736 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2738 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2739 0 : modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2740 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2742 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2743 0 : modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2744 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2746 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2747 0 : modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2748 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2750 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2751 0 : modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2752 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2754 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2755 0 : modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2756 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2758 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2759 0 : modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2760 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2762 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2763 0 : modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2764 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2766 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2767 0 : modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2768 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2770 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2771 0 : modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2772 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2774 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2775 0 : modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2776 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777 : MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2778 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2779 0 : modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2780 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781 : MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2782 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2783 0 : modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2784 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2785 : MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2786 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2787 0 : modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2788 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789 : MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2790 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2791 :
2792 0 : mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2793 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2794 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2795 : NUM_BANKS(ADDR_SURF_16_BANK));
2796 :
2797 0 : mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2798 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2799 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2800 : NUM_BANKS(ADDR_SURF_16_BANK));
2801 :
2802 0 : mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2803 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2804 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2805 : NUM_BANKS(ADDR_SURF_16_BANK));
2806 :
2807 0 : mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2808 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2809 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2810 : NUM_BANKS(ADDR_SURF_16_BANK));
2811 :
2812 0 : mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2814 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2815 : NUM_BANKS(ADDR_SURF_16_BANK));
2816 :
2817 0 : mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2819 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2820 : NUM_BANKS(ADDR_SURF_16_BANK));
2821 :
2822 0 : mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2823 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2824 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2825 : NUM_BANKS(ADDR_SURF_16_BANK));
2826 :
2827 0 : mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2828 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2829 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2830 : NUM_BANKS(ADDR_SURF_16_BANK));
2831 :
2832 0 : mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2833 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2834 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835 : NUM_BANKS(ADDR_SURF_16_BANK));
2836 :
2837 0 : mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2839 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2840 : NUM_BANKS(ADDR_SURF_16_BANK));
2841 :
2842 0 : mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2843 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2844 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2845 : NUM_BANKS(ADDR_SURF_16_BANK));
2846 :
2847 0 : mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2848 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2849 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2850 : NUM_BANKS(ADDR_SURF_16_BANK));
2851 :
2852 0 : mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855 : NUM_BANKS(ADDR_SURF_8_BANK));
2856 :
2857 0 : mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2859 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2860 : NUM_BANKS(ADDR_SURF_4_BANK));
2861 :
2862 0 : for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2863 0 : WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2864 :
2865 0 : for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2866 0 : if (reg_offset != 7)
2867 0 : WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2868 :
2869 : break;
2870 : case CHIP_POLARIS10:
2871 0 : modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2872 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2873 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2874 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2875 0 : modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2876 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2877 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2878 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2879 0 : modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2880 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2881 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2882 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2883 0 : modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2884 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2885 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2886 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2887 0 : modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2888 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2889 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2890 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2891 0 : modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2892 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2893 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2894 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2895 0 : modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2896 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2897 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2898 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2899 0 : modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2900 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2901 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2902 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2903 0 : modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2904 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2905 0 : modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2906 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2907 : MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2908 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2909 0 : modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2911 : MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2912 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2913 0 : modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2914 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2915 : MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2916 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2917 0 : modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2918 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2919 : MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2920 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2921 0 : modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2922 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2923 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2924 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925 0 : modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2926 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2928 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2929 0 : modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2930 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2932 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2933 0 : modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2934 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2935 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2936 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2937 0 : modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2938 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2939 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2940 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2941 0 : modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2942 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2944 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2945 0 : modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2946 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2948 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2949 0 : modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2950 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2952 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2953 0 : modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2954 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2955 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2956 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2957 0 : modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2958 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2960 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2961 0 : modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2962 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2963 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2964 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2965 0 : modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2966 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2968 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2969 0 : modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2970 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2971 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2972 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2973 0 : modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2974 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2975 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2976 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2977 0 : modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2978 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2979 : MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2980 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2981 0 : modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2982 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2983 : MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2984 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2985 0 : modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2986 : PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2987 : MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2988 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2989 0 : modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2990 : PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2991 : MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2992 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2993 :
2994 0 : mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2995 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2996 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2997 : NUM_BANKS(ADDR_SURF_16_BANK));
2998 :
2999 0 : mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3000 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3001 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3002 : NUM_BANKS(ADDR_SURF_16_BANK));
3003 :
3004 0 : mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3006 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007 : NUM_BANKS(ADDR_SURF_16_BANK));
3008 :
3009 0 : mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3010 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3011 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3012 : NUM_BANKS(ADDR_SURF_16_BANK));
3013 :
3014 0 : mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3015 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3016 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3017 : NUM_BANKS(ADDR_SURF_16_BANK));
3018 :
3019 0 : mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3021 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3022 : NUM_BANKS(ADDR_SURF_16_BANK));
3023 :
3024 0 : mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3025 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3026 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3027 : NUM_BANKS(ADDR_SURF_16_BANK));
3028 :
3029 0 : mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3030 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3031 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3032 : NUM_BANKS(ADDR_SURF_16_BANK));
3033 :
3034 0 : mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3036 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3037 : NUM_BANKS(ADDR_SURF_16_BANK));
3038 :
3039 0 : mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3041 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3042 : NUM_BANKS(ADDR_SURF_16_BANK));
3043 :
3044 0 : mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3045 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3046 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3047 : NUM_BANKS(ADDR_SURF_16_BANK));
3048 :
3049 0 : mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3050 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3051 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3052 : NUM_BANKS(ADDR_SURF_8_BANK));
3053 :
3054 0 : mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3055 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3056 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3057 : NUM_BANKS(ADDR_SURF_4_BANK));
3058 :
3059 0 : mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3060 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3061 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3062 : NUM_BANKS(ADDR_SURF_4_BANK));
3063 :
3064 0 : for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3065 0 : WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3066 :
3067 0 : for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3068 0 : if (reg_offset != 7)
3069 0 : WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3070 :
3071 : break;
3072 : case CHIP_STONEY:
3073 0 : modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3074 : PIPE_CONFIG(ADDR_SURF_P2) |
3075 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3076 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3077 0 : modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3078 : PIPE_CONFIG(ADDR_SURF_P2) |
3079 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3080 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3081 0 : modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3082 : PIPE_CONFIG(ADDR_SURF_P2) |
3083 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3084 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3085 0 : modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3086 : PIPE_CONFIG(ADDR_SURF_P2) |
3087 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3088 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3089 0 : modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3090 : PIPE_CONFIG(ADDR_SURF_P2) |
3091 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3092 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3093 0 : modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3094 : PIPE_CONFIG(ADDR_SURF_P2) |
3095 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3096 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3097 0 : modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3098 : PIPE_CONFIG(ADDR_SURF_P2) |
3099 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3100 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3101 0 : modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3102 : PIPE_CONFIG(ADDR_SURF_P2));
3103 0 : modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3104 : PIPE_CONFIG(ADDR_SURF_P2) |
3105 : MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3106 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3107 0 : modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3108 : PIPE_CONFIG(ADDR_SURF_P2) |
3109 : MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3110 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3111 0 : modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3112 : PIPE_CONFIG(ADDR_SURF_P2) |
3113 : MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3114 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3115 0 : modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3116 : PIPE_CONFIG(ADDR_SURF_P2) |
3117 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3118 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3119 0 : modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3120 : PIPE_CONFIG(ADDR_SURF_P2) |
3121 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3122 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3123 0 : modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3124 : PIPE_CONFIG(ADDR_SURF_P2) |
3125 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3126 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3127 0 : modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3128 : PIPE_CONFIG(ADDR_SURF_P2) |
3129 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3130 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3131 0 : modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3132 : PIPE_CONFIG(ADDR_SURF_P2) |
3133 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3134 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3135 0 : modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3136 : PIPE_CONFIG(ADDR_SURF_P2) |
3137 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3138 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3139 0 : modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3140 : PIPE_CONFIG(ADDR_SURF_P2) |
3141 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3142 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3143 0 : modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3144 : PIPE_CONFIG(ADDR_SURF_P2) |
3145 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3146 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3147 0 : modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3148 : PIPE_CONFIG(ADDR_SURF_P2) |
3149 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3150 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3151 0 : modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3152 : PIPE_CONFIG(ADDR_SURF_P2) |
3153 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3154 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3155 0 : modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3156 : PIPE_CONFIG(ADDR_SURF_P2) |
3157 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3158 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3159 0 : modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3160 : PIPE_CONFIG(ADDR_SURF_P2) |
3161 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3162 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3163 0 : modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3164 : PIPE_CONFIG(ADDR_SURF_P2) |
3165 : MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3166 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3167 0 : modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3168 : PIPE_CONFIG(ADDR_SURF_P2) |
3169 : MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3170 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3171 0 : modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3172 : PIPE_CONFIG(ADDR_SURF_P2) |
3173 : MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3174 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3175 :
3176 0 : mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3177 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3178 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3179 : NUM_BANKS(ADDR_SURF_8_BANK));
3180 0 : mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3181 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3182 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3183 : NUM_BANKS(ADDR_SURF_8_BANK));
3184 0 : mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3185 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3186 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3187 : NUM_BANKS(ADDR_SURF_8_BANK));
3188 0 : mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3189 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3190 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3191 : NUM_BANKS(ADDR_SURF_8_BANK));
3192 0 : mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3193 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3194 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3195 : NUM_BANKS(ADDR_SURF_8_BANK));
3196 0 : mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3197 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3198 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3199 : NUM_BANKS(ADDR_SURF_8_BANK));
3200 0 : mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3201 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3202 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3203 : NUM_BANKS(ADDR_SURF_8_BANK));
3204 0 : mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3205 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3206 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3207 : NUM_BANKS(ADDR_SURF_16_BANK));
3208 0 : mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3209 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3210 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3211 : NUM_BANKS(ADDR_SURF_16_BANK));
3212 0 : mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3213 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3214 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3215 : NUM_BANKS(ADDR_SURF_16_BANK));
3216 0 : mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3217 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3218 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3219 : NUM_BANKS(ADDR_SURF_16_BANK));
3220 0 : mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3221 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3222 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3223 : NUM_BANKS(ADDR_SURF_16_BANK));
3224 0 : mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3225 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3226 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3227 : NUM_BANKS(ADDR_SURF_16_BANK));
3228 0 : mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3229 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3230 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3231 : NUM_BANKS(ADDR_SURF_8_BANK));
3232 :
3233 0 : for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3234 0 : if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3235 0 : reg_offset != 23)
3236 0 : WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3237 :
3238 0 : for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3239 0 : if (reg_offset != 7)
3240 0 : WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3241 :
3242 : break;
3243 : default:
3244 0 : dev_warn(adev->dev,
3245 : "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3246 : adev->asic_type);
3247 : fallthrough;
3248 :
3249 : case CHIP_CARRIZO:
3250 0 : modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3251 : PIPE_CONFIG(ADDR_SURF_P2) |
3252 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3253 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3254 0 : modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3255 : PIPE_CONFIG(ADDR_SURF_P2) |
3256 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3257 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3258 0 : modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3259 : PIPE_CONFIG(ADDR_SURF_P2) |
3260 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3261 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3262 0 : modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3263 : PIPE_CONFIG(ADDR_SURF_P2) |
3264 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3265 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3266 0 : modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3267 : PIPE_CONFIG(ADDR_SURF_P2) |
3268 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3269 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3270 0 : modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3271 : PIPE_CONFIG(ADDR_SURF_P2) |
3272 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3273 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3274 0 : modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3275 : PIPE_CONFIG(ADDR_SURF_P2) |
3276 : TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3277 : MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3278 0 : modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3279 : PIPE_CONFIG(ADDR_SURF_P2));
3280 0 : modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3281 : PIPE_CONFIG(ADDR_SURF_P2) |
3282 : MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3283 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3284 0 : modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3285 : PIPE_CONFIG(ADDR_SURF_P2) |
3286 : MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3287 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3288 0 : modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3289 : PIPE_CONFIG(ADDR_SURF_P2) |
3290 : MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3291 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3292 0 : modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3293 : PIPE_CONFIG(ADDR_SURF_P2) |
3294 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3295 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3296 0 : modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3297 : PIPE_CONFIG(ADDR_SURF_P2) |
3298 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3299 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3300 0 : modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3301 : PIPE_CONFIG(ADDR_SURF_P2) |
3302 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3303 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3304 0 : modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3305 : PIPE_CONFIG(ADDR_SURF_P2) |
3306 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3307 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3308 0 : modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3309 : PIPE_CONFIG(ADDR_SURF_P2) |
3310 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3311 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3312 0 : modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3313 : PIPE_CONFIG(ADDR_SURF_P2) |
3314 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3315 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3316 0 : modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3317 : PIPE_CONFIG(ADDR_SURF_P2) |
3318 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3319 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3320 0 : modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3321 : PIPE_CONFIG(ADDR_SURF_P2) |
3322 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3323 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3324 0 : modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3325 : PIPE_CONFIG(ADDR_SURF_P2) |
3326 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3327 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3328 0 : modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3329 : PIPE_CONFIG(ADDR_SURF_P2) |
3330 : MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3331 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3332 0 : modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3333 : PIPE_CONFIG(ADDR_SURF_P2) |
3334 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3335 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3336 0 : modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3337 : PIPE_CONFIG(ADDR_SURF_P2) |
3338 : MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3339 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3340 0 : modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3341 : PIPE_CONFIG(ADDR_SURF_P2) |
3342 : MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3343 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3344 0 : modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3345 : PIPE_CONFIG(ADDR_SURF_P2) |
3346 : MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3347 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3348 0 : modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3349 : PIPE_CONFIG(ADDR_SURF_P2) |
3350 : MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3351 : SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3352 :
3353 0 : mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3354 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3355 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3356 : NUM_BANKS(ADDR_SURF_8_BANK));
3357 0 : mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3358 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3359 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3360 : NUM_BANKS(ADDR_SURF_8_BANK));
3361 0 : mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3362 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3363 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3364 : NUM_BANKS(ADDR_SURF_8_BANK));
3365 0 : mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3366 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3367 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3368 : NUM_BANKS(ADDR_SURF_8_BANK));
3369 0 : mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3370 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3371 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3372 : NUM_BANKS(ADDR_SURF_8_BANK));
3373 0 : mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3374 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3375 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3376 : NUM_BANKS(ADDR_SURF_8_BANK));
3377 0 : mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3378 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3379 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3380 : NUM_BANKS(ADDR_SURF_8_BANK));
3381 0 : mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3382 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3383 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3384 : NUM_BANKS(ADDR_SURF_16_BANK));
3385 0 : mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3386 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3387 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3388 : NUM_BANKS(ADDR_SURF_16_BANK));
3389 0 : mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3390 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3391 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3392 : NUM_BANKS(ADDR_SURF_16_BANK));
3393 0 : mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3394 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3395 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3396 : NUM_BANKS(ADDR_SURF_16_BANK));
3397 0 : mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3398 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3399 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3400 : NUM_BANKS(ADDR_SURF_16_BANK));
3401 0 : mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3402 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3403 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3404 : NUM_BANKS(ADDR_SURF_16_BANK));
3405 0 : mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3406 : BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3407 : MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3408 : NUM_BANKS(ADDR_SURF_8_BANK));
3409 :
3410 0 : for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3411 0 : if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3412 0 : reg_offset != 23)
3413 0 : WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3414 :
3415 0 : for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3416 0 : if (reg_offset != 7)
3417 0 : WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3418 :
3419 : break;
3420 : }
3421 0 : }
3422 :
3423 0 : static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3424 : u32 se_num, u32 sh_num, u32 instance)
3425 : {
3426 : u32 data;
3427 :
3428 0 : if (instance == 0xffffffff)
3429 : data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3430 : else
3431 0 : data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3432 :
3433 0 : if (se_num == 0xffffffff)
3434 0 : data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3435 : else
3436 0 : data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3437 :
3438 0 : if (sh_num == 0xffffffff)
3439 0 : data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3440 : else
3441 0 : data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3442 :
3443 0 : WREG32(mmGRBM_GFX_INDEX, data);
3444 0 : }
3445 :
3446 0 : static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3447 : u32 me, u32 pipe, u32 q, u32 vm)
3448 : {
3449 0 : vi_srbm_select(adev, me, pipe, q, vm);
3450 0 : }
3451 :
3452 0 : static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3453 : {
3454 : u32 data, mask;
3455 :
3456 0 : data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3457 0 : RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3458 :
3459 0 : data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3460 :
3461 0 : mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3462 0 : adev->gfx.config.max_sh_per_se);
3463 :
3464 0 : return (~data) & mask;
3465 : }
3466 :
3467 : static void
3468 0 : gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3469 : {
3470 0 : switch (adev->asic_type) {
3471 : case CHIP_FIJI:
3472 : case CHIP_VEGAM:
3473 0 : *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3474 : RB_XSEL2(1) | PKR_MAP(2) |
3475 : PKR_XSEL(1) | PKR_YSEL(1) |
3476 : SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3477 0 : *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3478 : SE_PAIR_YSEL(2);
3479 : break;
3480 : case CHIP_TONGA:
3481 : case CHIP_POLARIS10:
3482 0 : *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3483 : SE_XSEL(1) | SE_YSEL(1);
3484 0 : *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3485 : SE_PAIR_YSEL(2);
3486 : break;
3487 : case CHIP_TOPAZ:
3488 : case CHIP_CARRIZO:
3489 0 : *rconf |= RB_MAP_PKR0(2);
3490 : *rconf1 |= 0x0;
3491 : break;
3492 : case CHIP_POLARIS11:
3493 : case CHIP_POLARIS12:
3494 0 : *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3495 : SE_XSEL(1) | SE_YSEL(1);
3496 : *rconf1 |= 0x0;
3497 : break;
3498 : case CHIP_STONEY:
3499 : *rconf |= 0x0;
3500 : *rconf1 |= 0x0;
3501 : break;
3502 : default:
3503 0 : DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3504 : break;
3505 : }
3506 0 : }
3507 :
3508 : static void
3509 0 : gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3510 : u32 raster_config, u32 raster_config_1,
3511 : unsigned rb_mask, unsigned num_rb)
3512 : {
3513 0 : unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3514 0 : unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3515 0 : unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3516 0 : unsigned rb_per_se = num_rb / num_se;
3517 : unsigned se_mask[4];
3518 : unsigned se;
3519 :
3520 0 : se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3521 0 : se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3522 0 : se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3523 0 : se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3524 :
3525 0 : WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3526 0 : WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3527 0 : WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3528 :
3529 0 : if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3530 0 : (!se_mask[2] && !se_mask[3]))) {
3531 0 : raster_config_1 &= ~SE_PAIR_MAP_MASK;
3532 :
3533 0 : if (!se_mask[0] && !se_mask[1]) {
3534 0 : raster_config_1 |=
3535 : SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3536 : } else {
3537 : raster_config_1 |=
3538 : SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3539 : }
3540 : }
3541 :
3542 0 : for (se = 0; se < num_se; se++) {
3543 0 : unsigned raster_config_se = raster_config;
3544 0 : unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3545 0 : unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3546 0 : int idx = (se / 2) * 2;
3547 :
3548 0 : if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3549 0 : raster_config_se &= ~SE_MAP_MASK;
3550 :
3551 0 : if (!se_mask[idx]) {
3552 0 : raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3553 : } else {
3554 : raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3555 : }
3556 : }
3557 :
3558 0 : pkr0_mask &= rb_mask;
3559 0 : pkr1_mask &= rb_mask;
3560 0 : if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3561 0 : raster_config_se &= ~PKR_MAP_MASK;
3562 :
3563 0 : if (!pkr0_mask) {
3564 0 : raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3565 : } else {
3566 : raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3567 : }
3568 : }
3569 :
3570 0 : if (rb_per_se >= 2) {
3571 0 : unsigned rb0_mask = 1 << (se * rb_per_se);
3572 0 : unsigned rb1_mask = rb0_mask << 1;
3573 :
3574 0 : rb0_mask &= rb_mask;
3575 0 : rb1_mask &= rb_mask;
3576 0 : if (!rb0_mask || !rb1_mask) {
3577 0 : raster_config_se &= ~RB_MAP_PKR0_MASK;
3578 :
3579 0 : if (!rb0_mask) {
3580 0 : raster_config_se |=
3581 : RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3582 : } else {
3583 : raster_config_se |=
3584 : RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3585 : }
3586 : }
3587 :
3588 0 : if (rb_per_se > 2) {
3589 0 : rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3590 0 : rb1_mask = rb0_mask << 1;
3591 0 : rb0_mask &= rb_mask;
3592 0 : rb1_mask &= rb_mask;
3593 0 : if (!rb0_mask || !rb1_mask) {
3594 0 : raster_config_se &= ~RB_MAP_PKR1_MASK;
3595 :
3596 0 : if (!rb0_mask) {
3597 0 : raster_config_se |=
3598 : RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3599 : } else {
3600 : raster_config_se |=
3601 : RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3602 : }
3603 : }
3604 : }
3605 : }
3606 :
3607 : /* GRBM_GFX_INDEX has a different offset on VI */
3608 0 : gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3609 0 : WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3610 0 : WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3611 : }
3612 :
3613 : /* GRBM_GFX_INDEX has a different offset on VI */
3614 0 : gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3615 0 : }
3616 :
3617 0 : static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3618 : {
3619 : int i, j;
3620 : u32 data;
3621 0 : u32 raster_config = 0, raster_config_1 = 0;
3622 0 : u32 active_rbs = 0;
3623 0 : u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3624 0 : adev->gfx.config.max_sh_per_se;
3625 : unsigned num_rb_pipes;
3626 :
3627 0 : mutex_lock(&adev->grbm_idx_mutex);
3628 0 : for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3629 0 : for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3630 0 : gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3631 0 : data = gfx_v8_0_get_rb_active_bitmap(adev);
3632 0 : active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3633 : rb_bitmap_width_per_sh);
3634 : }
3635 : }
3636 0 : gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3637 :
3638 0 : adev->gfx.config.backend_enable_mask = active_rbs;
3639 0 : adev->gfx.config.num_rbs = hweight32(active_rbs);
3640 :
3641 0 : num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3642 : adev->gfx.config.max_shader_engines, 16);
3643 :
3644 0 : gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3645 :
3646 0 : if (!adev->gfx.config.backend_enable_mask ||
3647 0 : adev->gfx.config.num_rbs >= num_rb_pipes) {
3648 0 : WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3649 0 : WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3650 : } else {
3651 0 : gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3652 : adev->gfx.config.backend_enable_mask,
3653 : num_rb_pipes);
3654 : }
3655 :
3656 : /* cache the values for userspace */
3657 0 : for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3658 0 : for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3659 0 : gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3660 0 : adev->gfx.config.rb_config[i][j].rb_backend_disable =
3661 0 : RREG32(mmCC_RB_BACKEND_DISABLE);
3662 0 : adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3663 0 : RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3664 0 : adev->gfx.config.rb_config[i][j].raster_config =
3665 0 : RREG32(mmPA_SC_RASTER_CONFIG);
3666 0 : adev->gfx.config.rb_config[i][j].raster_config_1 =
3667 0 : RREG32(mmPA_SC_RASTER_CONFIG_1);
3668 : }
3669 : }
3670 0 : gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3671 0 : mutex_unlock(&adev->grbm_idx_mutex);
3672 0 : }
3673 :
3674 : #define DEFAULT_SH_MEM_BASES (0x6000)
3675 : /**
3676 : * gfx_v8_0_init_compute_vmid - gart enable
3677 : *
3678 : * @adev: amdgpu_device pointer
3679 : *
3680 : * Initialize compute vmid sh_mem registers
3681 : *
3682 : */
3683 0 : static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3684 : {
3685 : int i;
3686 : uint32_t sh_mem_config;
3687 : uint32_t sh_mem_bases;
3688 :
3689 : /*
3690 : * Configure apertures:
3691 : * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3692 : * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3693 : * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3694 : */
3695 0 : sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3696 :
3697 0 : sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3698 : SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3699 : SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3700 : SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3701 : MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3702 : SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3703 :
3704 0 : mutex_lock(&adev->srbm_mutex);
3705 0 : for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3706 0 : vi_srbm_select(adev, 0, 0, 0, i);
3707 : /* CP and shaders */
3708 0 : WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3709 0 : WREG32(mmSH_MEM_APE1_BASE, 1);
3710 0 : WREG32(mmSH_MEM_APE1_LIMIT, 0);
3711 0 : WREG32(mmSH_MEM_BASES, sh_mem_bases);
3712 : }
3713 0 : vi_srbm_select(adev, 0, 0, 0, 0);
3714 0 : mutex_unlock(&adev->srbm_mutex);
3715 :
3716 : /* Initialize all compute VMIDs to have no GDS, GWS, or OA
3717 : access. These should be enabled by FW for target VMIDs. */
3718 0 : for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3719 0 : WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3720 0 : WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3721 0 : WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3722 0 : WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3723 : }
3724 0 : }
3725 :
3726 0 : static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3727 : {
3728 : int vmid;
3729 :
3730 : /*
3731 : * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3732 : * access. Compute VMIDs should be enabled by FW for target VMIDs,
3733 : * the driver can enable them for graphics. VMID0 should maintain
3734 : * access so that HWS firmware can save/restore entries.
3735 : */
3736 0 : for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
3737 0 : WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3738 0 : WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3739 0 : WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3740 0 : WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3741 : }
3742 0 : }
3743 :
3744 : static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3745 : {
3746 0 : switch (adev->asic_type) {
3747 : default:
3748 0 : adev->gfx.config.double_offchip_lds_buf = 1;
3749 : break;
3750 : case CHIP_CARRIZO:
3751 : case CHIP_STONEY:
3752 0 : adev->gfx.config.double_offchip_lds_buf = 0;
3753 : break;
3754 : }
3755 : }
3756 :
3757 0 : static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3758 : {
3759 : u32 tmp, sh_static_mem_cfg;
3760 : int i;
3761 :
3762 0 : WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3763 0 : WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3764 0 : WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3765 0 : WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3766 :
3767 0 : gfx_v8_0_tiling_mode_table_init(adev);
3768 0 : gfx_v8_0_setup_rb(adev);
3769 0 : gfx_v8_0_get_cu_info(adev);
3770 0 : gfx_v8_0_config_init(adev);
3771 :
3772 : /* XXX SH_MEM regs */
3773 : /* where to put LDS, scratch, GPUVM in FSA64 space */
3774 0 : sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3775 : SWIZZLE_ENABLE, 1);
3776 0 : sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3777 : ELEMENT_SIZE, 1);
3778 0 : sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3779 : INDEX_STRIDE, 3);
3780 0 : WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3781 :
3782 0 : mutex_lock(&adev->srbm_mutex);
3783 0 : for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3784 0 : vi_srbm_select(adev, 0, 0, 0, i);
3785 : /* CP and shaders */
3786 0 : if (i == 0) {
3787 0 : tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3788 0 : tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3789 0 : tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3790 : SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3791 0 : WREG32(mmSH_MEM_CONFIG, tmp);
3792 0 : WREG32(mmSH_MEM_BASES, 0);
3793 : } else {
3794 0 : tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3795 0 : tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3796 0 : tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3797 : SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3798 0 : WREG32(mmSH_MEM_CONFIG, tmp);
3799 0 : tmp = adev->gmc.shared_aperture_start >> 48;
3800 0 : WREG32(mmSH_MEM_BASES, tmp);
3801 : }
3802 :
3803 0 : WREG32(mmSH_MEM_APE1_BASE, 1);
3804 0 : WREG32(mmSH_MEM_APE1_LIMIT, 0);
3805 : }
3806 0 : vi_srbm_select(adev, 0, 0, 0, 0);
3807 0 : mutex_unlock(&adev->srbm_mutex);
3808 :
3809 0 : gfx_v8_0_init_compute_vmid(adev);
3810 0 : gfx_v8_0_init_gds_vmid(adev);
3811 :
3812 0 : mutex_lock(&adev->grbm_idx_mutex);
3813 : /*
3814 : * making sure that the following register writes will be broadcasted
3815 : * to all the shaders
3816 : */
3817 0 : gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3818 :
3819 0 : WREG32(mmPA_SC_FIFO_SIZE,
3820 : (adev->gfx.config.sc_prim_fifo_size_frontend <<
3821 : PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3822 : (adev->gfx.config.sc_prim_fifo_size_backend <<
3823 : PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3824 : (adev->gfx.config.sc_hiz_tile_fifo_size <<
3825 : PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3826 : (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3827 : PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3828 :
3829 0 : tmp = RREG32(mmSPI_ARB_PRIORITY);
3830 0 : tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3831 0 : tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3832 0 : tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3833 0 : tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3834 0 : WREG32(mmSPI_ARB_PRIORITY, tmp);
3835 :
3836 0 : mutex_unlock(&adev->grbm_idx_mutex);
3837 :
3838 0 : }
3839 :
3840 0 : static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3841 : {
3842 : u32 i, j, k;
3843 : u32 mask;
3844 :
3845 0 : mutex_lock(&adev->grbm_idx_mutex);
3846 0 : for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3847 0 : for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3848 0 : gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3849 0 : for (k = 0; k < adev->usec_timeout; k++) {
3850 0 : if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3851 : break;
3852 0 : udelay(1);
3853 : }
3854 0 : if (k == adev->usec_timeout) {
3855 0 : gfx_v8_0_select_se_sh(adev, 0xffffffff,
3856 : 0xffffffff, 0xffffffff);
3857 0 : mutex_unlock(&adev->grbm_idx_mutex);
3858 0 : DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3859 : i, j);
3860 0 : return;
3861 : }
3862 : }
3863 : }
3864 0 : gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3865 0 : mutex_unlock(&adev->grbm_idx_mutex);
3866 :
3867 0 : mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3868 : RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3869 : RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3870 : RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3871 0 : for (k = 0; k < adev->usec_timeout; k++) {
3872 0 : if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3873 : break;
3874 0 : udelay(1);
3875 : }
3876 : }
3877 :
3878 0 : static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3879 : bool enable)
3880 : {
3881 0 : u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3882 :
3883 0 : tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3884 0 : tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3885 0 : tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3886 0 : tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3887 :
3888 0 : WREG32(mmCP_INT_CNTL_RING0, tmp);
3889 0 : }
3890 :
3891 0 : static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3892 : {
3893 0 : adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3894 : /* csib */
3895 0 : WREG32(mmRLC_CSIB_ADDR_HI,
3896 : adev->gfx.rlc.clear_state_gpu_addr >> 32);
3897 0 : WREG32(mmRLC_CSIB_ADDR_LO,
3898 : adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3899 0 : WREG32(mmRLC_CSIB_LENGTH,
3900 : adev->gfx.rlc.clear_state_size);
3901 0 : }
3902 :
3903 0 : static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3904 : int ind_offset,
3905 : int list_size,
3906 : int *unique_indices,
3907 : int *indices_count,
3908 : int max_indices,
3909 : int *ind_start_offsets,
3910 : int *offset_count,
3911 : int max_offset)
3912 : {
3913 : int indices;
3914 0 : bool new_entry = true;
3915 :
3916 0 : for (; ind_offset < list_size; ind_offset++) {
3917 :
3918 0 : if (new_entry) {
3919 0 : new_entry = false;
3920 0 : ind_start_offsets[*offset_count] = ind_offset;
3921 0 : *offset_count = *offset_count + 1;
3922 0 : BUG_ON(*offset_count >= max_offset);
3923 : }
3924 :
3925 0 : if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3926 0 : new_entry = true;
3927 0 : continue;
3928 : }
3929 :
3930 0 : ind_offset += 2;
3931 :
3932 : /* look for the matching indice */
3933 0 : for (indices = 0;
3934 0 : indices < *indices_count;
3935 0 : indices++) {
3936 0 : if (unique_indices[indices] ==
3937 0 : register_list_format[ind_offset])
3938 : break;
3939 : }
3940 :
3941 0 : if (indices >= *indices_count) {
3942 0 : unique_indices[*indices_count] =
3943 0 : register_list_format[ind_offset];
3944 0 : indices = *indices_count;
3945 0 : *indices_count = *indices_count + 1;
3946 0 : BUG_ON(*indices_count >= max_indices);
3947 : }
3948 :
3949 0 : register_list_format[ind_offset] = indices;
3950 : }
3951 0 : }
3952 :
3953 0 : static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3954 : {
3955 : int i, temp, data;
3956 0 : int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3957 0 : int indices_count = 0;
3958 0 : int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3959 0 : int offset_count = 0;
3960 :
3961 : int list_size;
3962 0 : unsigned int *register_list_format =
3963 0 : kmemdup(adev->gfx.rlc.register_list_format,
3964 0 : adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3965 0 : if (!register_list_format)
3966 : return -ENOMEM;
3967 :
3968 0 : gfx_v8_0_parse_ind_reg_list(register_list_format,
3969 : RLC_FormatDirectRegListLength,
3970 0 : adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3971 : unique_indices,
3972 : &indices_count,
3973 : ARRAY_SIZE(unique_indices),
3974 : indirect_start_offsets,
3975 : &offset_count,
3976 : ARRAY_SIZE(indirect_start_offsets));
3977 :
3978 : /* save and restore list */
3979 0 : WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3980 :
3981 0 : WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3982 0 : for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3983 0 : WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3984 :
3985 : /* indirect list */
3986 0 : WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3987 0 : for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3988 0 : WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3989 :
3990 0 : list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3991 0 : list_size = list_size >> 1;
3992 0 : WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3993 0 : WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3994 :
3995 : /* starting offsets starts */
3996 0 : WREG32(mmRLC_GPM_SCRATCH_ADDR,
3997 : adev->gfx.rlc.starting_offsets_start);
3998 0 : for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
3999 0 : WREG32(mmRLC_GPM_SCRATCH_DATA,
4000 : indirect_start_offsets[i]);
4001 :
4002 : /* unique indices */
4003 : temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4004 : data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4005 0 : for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4006 0 : if (unique_indices[i] != 0) {
4007 0 : WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4008 0 : WREG32(data + i, unique_indices[i] >> 20);
4009 : }
4010 : }
4011 0 : kfree(register_list_format);
4012 :
4013 0 : return 0;
4014 : }
4015 :
4016 0 : static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4017 : {
4018 0 : WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4019 0 : }
4020 :
4021 0 : static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4022 : {
4023 : uint32_t data;
4024 :
4025 0 : WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4026 :
4027 0 : data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4028 0 : data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4029 0 : data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4030 0 : data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4031 0 : WREG32(mmRLC_PG_DELAY, data);
4032 :
4033 0 : WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4034 0 : WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4035 :
4036 0 : }
4037 :
4038 0 : static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4039 : bool enable)
4040 : {
4041 0 : WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4042 0 : }
4043 :
4044 0 : static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4045 : bool enable)
4046 : {
4047 0 : WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4048 0 : }
4049 :
4050 0 : static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4051 : {
4052 0 : WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4053 0 : }
4054 :
4055 0 : static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4056 : {
4057 0 : if ((adev->asic_type == CHIP_CARRIZO) ||
4058 : (adev->asic_type == CHIP_STONEY)) {
4059 0 : gfx_v8_0_init_csb(adev);
4060 0 : gfx_v8_0_init_save_restore_list(adev);
4061 0 : gfx_v8_0_enable_save_restore_machine(adev);
4062 0 : WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4063 0 : gfx_v8_0_init_power_gating(adev);
4064 0 : WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4065 0 : } else if ((adev->asic_type == CHIP_POLARIS11) ||
4066 0 : (adev->asic_type == CHIP_POLARIS12) ||
4067 : (adev->asic_type == CHIP_VEGAM)) {
4068 0 : gfx_v8_0_init_csb(adev);
4069 0 : gfx_v8_0_init_save_restore_list(adev);
4070 0 : gfx_v8_0_enable_save_restore_machine(adev);
4071 0 : gfx_v8_0_init_power_gating(adev);
4072 : }
4073 :
4074 0 : }
4075 :
4076 0 : static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4077 : {
4078 0 : WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4079 :
4080 0 : gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4081 0 : gfx_v8_0_wait_for_rlc_serdes(adev);
4082 0 : }
4083 :
4084 0 : static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4085 : {
4086 0 : WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4087 0 : udelay(50);
4088 :
4089 0 : WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4090 0 : udelay(50);
4091 0 : }
4092 :
4093 0 : static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4094 : {
4095 0 : WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4096 :
4097 : /* carrizo do enable cp interrupt after cp inited */
4098 0 : if (!(adev->flags & AMD_IS_APU))
4099 0 : gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4100 :
4101 0 : udelay(50);
4102 0 : }
4103 :
4104 0 : static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4105 : {
4106 0 : if (amdgpu_sriov_vf(adev)) {
4107 0 : gfx_v8_0_init_csb(adev);
4108 0 : return 0;
4109 : }
4110 :
4111 0 : adev->gfx.rlc.funcs->stop(adev);
4112 0 : adev->gfx.rlc.funcs->reset(adev);
4113 0 : gfx_v8_0_init_pg(adev);
4114 0 : adev->gfx.rlc.funcs->start(adev);
4115 :
4116 0 : return 0;
4117 : }
4118 :
4119 0 : static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4120 : {
4121 0 : u32 tmp = RREG32(mmCP_ME_CNTL);
4122 :
4123 0 : if (enable) {
4124 0 : tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4125 0 : tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4126 0 : tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4127 : } else {
4128 0 : tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4129 0 : tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4130 0 : tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4131 : }
4132 0 : WREG32(mmCP_ME_CNTL, tmp);
4133 0 : udelay(50);
4134 0 : }
4135 :
4136 0 : static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4137 : {
4138 0 : u32 count = 0;
4139 0 : const struct cs_section_def *sect = NULL;
4140 0 : const struct cs_extent_def *ext = NULL;
4141 :
4142 : /* begin clear state */
4143 0 : count += 2;
4144 : /* context control state */
4145 0 : count += 3;
4146 :
4147 0 : for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4148 0 : for (ext = sect->section; ext->extent != NULL; ++ext) {
4149 0 : if (sect->id == SECT_CONTEXT)
4150 0 : count += 2 + ext->reg_count;
4151 : else
4152 : return 0;
4153 : }
4154 : }
4155 : /* pa_sc_raster_config/pa_sc_raster_config1 */
4156 0 : count += 4;
4157 : /* end clear state */
4158 0 : count += 2;
4159 : /* clear state */
4160 0 : count += 2;
4161 :
4162 0 : return count;
4163 : }
4164 :
4165 0 : static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4166 : {
4167 0 : struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4168 0 : const struct cs_section_def *sect = NULL;
4169 0 : const struct cs_extent_def *ext = NULL;
4170 : int r, i;
4171 :
4172 : /* init the CP */
4173 0 : WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4174 0 : WREG32(mmCP_ENDIAN_SWAP, 0);
4175 0 : WREG32(mmCP_DEVICE_ID, 1);
4176 :
4177 0 : gfx_v8_0_cp_gfx_enable(adev, true);
4178 :
4179 0 : r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4180 0 : if (r) {
4181 0 : DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4182 0 : return r;
4183 : }
4184 :
4185 : /* clear state buffer */
4186 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4187 0 : amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4188 :
4189 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4190 0 : amdgpu_ring_write(ring, 0x80000000);
4191 0 : amdgpu_ring_write(ring, 0x80000000);
4192 :
4193 0 : for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4194 0 : for (ext = sect->section; ext->extent != NULL; ++ext) {
4195 0 : if (sect->id == SECT_CONTEXT) {
4196 0 : amdgpu_ring_write(ring,
4197 0 : PACKET3(PACKET3_SET_CONTEXT_REG,
4198 : ext->reg_count));
4199 0 : amdgpu_ring_write(ring,
4200 0 : ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4201 0 : for (i = 0; i < ext->reg_count; i++)
4202 0 : amdgpu_ring_write(ring, ext->extent[i]);
4203 : }
4204 : }
4205 : }
4206 :
4207 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4208 0 : amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4209 0 : amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4210 0 : amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4211 :
4212 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4213 0 : amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4214 :
4215 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4216 0 : amdgpu_ring_write(ring, 0);
4217 :
4218 : /* init the CE partitions */
4219 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4220 0 : amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4221 0 : amdgpu_ring_write(ring, 0x8000);
4222 0 : amdgpu_ring_write(ring, 0x8000);
4223 :
4224 0 : amdgpu_ring_commit(ring);
4225 :
4226 0 : return 0;
4227 : }
4228 0 : static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4229 : {
4230 : u32 tmp;
4231 : /* no gfx doorbells on iceland */
4232 0 : if (adev->asic_type == CHIP_TOPAZ)
4233 : return;
4234 :
4235 0 : tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4236 :
4237 0 : if (ring->use_doorbell) {
4238 0 : tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4239 : DOORBELL_OFFSET, ring->doorbell_index);
4240 0 : tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4241 : DOORBELL_HIT, 0);
4242 0 : tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4243 : DOORBELL_EN, 1);
4244 : } else {
4245 0 : tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4246 : }
4247 :
4248 0 : WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4249 :
4250 0 : if (adev->flags & AMD_IS_APU)
4251 : return;
4252 :
4253 0 : tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4254 : DOORBELL_RANGE_LOWER,
4255 : adev->doorbell_index.gfx_ring0);
4256 0 : WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4257 :
4258 0 : WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4259 : CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4260 : }
4261 :
4262 0 : static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4263 : {
4264 : struct amdgpu_ring *ring;
4265 : u32 tmp;
4266 : u32 rb_bufsz;
4267 : u64 rb_addr, rptr_addr, wptr_gpu_addr;
4268 :
4269 : /* Set the write pointer delay */
4270 0 : WREG32(mmCP_RB_WPTR_DELAY, 0);
4271 :
4272 : /* set the RB to use vmid 0 */
4273 0 : WREG32(mmCP_RB_VMID, 0);
4274 :
4275 : /* Set ring buffer size */
4276 0 : ring = &adev->gfx.gfx_ring[0];
4277 0 : rb_bufsz = order_base_2(ring->ring_size / 8);
4278 0 : tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4279 0 : tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4280 0 : tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4281 0 : tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4282 : #ifdef __BIG_ENDIAN
4283 : tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4284 : #endif
4285 0 : WREG32(mmCP_RB0_CNTL, tmp);
4286 :
4287 : /* Initialize the ring buffer's read and write pointers */
4288 0 : WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4289 0 : ring->wptr = 0;
4290 0 : WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4291 :
4292 : /* set the wb address wether it's enabled or not */
4293 0 : rptr_addr = ring->rptr_gpu_addr;
4294 0 : WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4295 0 : WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4296 :
4297 0 : wptr_gpu_addr = ring->wptr_gpu_addr;
4298 0 : WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4299 0 : WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4300 0 : mdelay(1);
4301 0 : WREG32(mmCP_RB0_CNTL, tmp);
4302 :
4303 0 : rb_addr = ring->gpu_addr >> 8;
4304 0 : WREG32(mmCP_RB0_BASE, rb_addr);
4305 0 : WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4306 :
4307 0 : gfx_v8_0_set_cpg_door_bell(adev, ring);
4308 : /* start the ring */
4309 0 : amdgpu_ring_clear_ring(ring);
4310 0 : gfx_v8_0_cp_gfx_start(adev);
4311 0 : ring->sched.ready = true;
4312 :
4313 0 : return 0;
4314 : }
4315 :
4316 0 : static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4317 : {
4318 0 : if (enable) {
4319 0 : WREG32(mmCP_MEC_CNTL, 0);
4320 : } else {
4321 0 : WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4322 0 : adev->gfx.kiq.ring.sched.ready = false;
4323 : }
4324 0 : udelay(50);
4325 0 : }
4326 :
4327 : /* KIQ functions */
4328 0 : static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4329 : {
4330 : uint32_t tmp;
4331 0 : struct amdgpu_device *adev = ring->adev;
4332 :
4333 : /* tell RLC which is KIQ queue */
4334 0 : tmp = RREG32(mmRLC_CP_SCHEDULERS);
4335 0 : tmp &= 0xffffff00;
4336 0 : tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4337 0 : WREG32(mmRLC_CP_SCHEDULERS, tmp);
4338 0 : tmp |= 0x80;
4339 0 : WREG32(mmRLC_CP_SCHEDULERS, tmp);
4340 0 : }
4341 :
4342 0 : static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4343 : {
4344 0 : struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4345 0 : uint64_t queue_mask = 0;
4346 : int r, i;
4347 :
4348 0 : for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4349 0 : if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4350 0 : continue;
4351 :
4352 : /* This situation may be hit in the future if a new HW
4353 : * generation exposes more than 64 queues. If so, the
4354 : * definition of queue_mask needs updating */
4355 0 : if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4356 0 : DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4357 0 : break;
4358 : }
4359 :
4360 0 : queue_mask |= (1ull << i);
4361 : }
4362 :
4363 0 : r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4364 0 : if (r) {
4365 0 : DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4366 0 : return r;
4367 : }
4368 : /* set resources */
4369 0 : amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4370 0 : amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4371 0 : amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4372 0 : amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4373 0 : amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4374 0 : amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4375 0 : amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4376 0 : amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4377 0 : for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4378 0 : struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4379 0 : uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4380 0 : uint64_t wptr_addr = ring->wptr_gpu_addr;
4381 :
4382 : /* map queues */
4383 0 : amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4384 : /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4385 0 : amdgpu_ring_write(kiq_ring,
4386 : PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4387 0 : amdgpu_ring_write(kiq_ring,
4388 0 : PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4389 0 : PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4390 0 : PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4391 0 : PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4392 0 : amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4393 0 : amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4394 0 : amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4395 0 : amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4396 : }
4397 :
4398 0 : amdgpu_ring_commit(kiq_ring);
4399 :
4400 0 : return 0;
4401 : }
4402 :
4403 0 : static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4404 : {
4405 0 : int i, r = 0;
4406 :
4407 0 : if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4408 0 : WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4409 0 : for (i = 0; i < adev->usec_timeout; i++) {
4410 0 : if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4411 : break;
4412 0 : udelay(1);
4413 : }
4414 0 : if (i == adev->usec_timeout)
4415 0 : r = -ETIMEDOUT;
4416 : }
4417 0 : WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4418 0 : WREG32(mmCP_HQD_PQ_RPTR, 0);
4419 0 : WREG32(mmCP_HQD_PQ_WPTR, 0);
4420 :
4421 0 : return r;
4422 : }
4423 :
4424 : static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
4425 : {
4426 0 : struct amdgpu_device *adev = ring->adev;
4427 :
4428 0 : if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4429 0 : if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
4430 0 : mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
4431 0 : mqd->cp_hqd_queue_priority =
4432 : AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
4433 : }
4434 : }
4435 : }
4436 :
4437 0 : static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4438 : {
4439 0 : struct amdgpu_device *adev = ring->adev;
4440 0 : struct vi_mqd *mqd = ring->mqd_ptr;
4441 : uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4442 : uint32_t tmp;
4443 :
4444 0 : mqd->header = 0xC0310800;
4445 0 : mqd->compute_pipelinestat_enable = 0x00000001;
4446 0 : mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4447 0 : mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4448 0 : mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4449 0 : mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4450 0 : mqd->compute_misc_reserved = 0x00000003;
4451 0 : mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4452 : + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4453 0 : mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4454 : + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4455 0 : eop_base_addr = ring->eop_gpu_addr >> 8;
4456 0 : mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4457 0 : mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4458 :
4459 : /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4460 0 : tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4461 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4462 : (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4463 :
4464 0 : mqd->cp_hqd_eop_control = tmp;
4465 :
4466 : /* enable doorbell? */
4467 0 : tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4468 : CP_HQD_PQ_DOORBELL_CONTROL,
4469 : DOORBELL_EN,
4470 : ring->use_doorbell ? 1 : 0);
4471 :
4472 0 : mqd->cp_hqd_pq_doorbell_control = tmp;
4473 :
4474 : /* set the pointer to the MQD */
4475 0 : mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4476 0 : mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4477 :
4478 : /* set MQD vmid to 0 */
4479 0 : tmp = RREG32(mmCP_MQD_CONTROL);
4480 0 : tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4481 0 : mqd->cp_mqd_control = tmp;
4482 :
4483 : /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4484 0 : hqd_gpu_addr = ring->gpu_addr >> 8;
4485 0 : mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4486 0 : mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4487 :
4488 : /* set up the HQD, this is similar to CP_RB0_CNTL */
4489 0 : tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4490 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4491 : (order_base_2(ring->ring_size / 4) - 1));
4492 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4493 : (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
4494 : #ifdef __BIG_ENDIAN
4495 : tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4496 : #endif
4497 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4498 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4499 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4500 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4501 0 : mqd->cp_hqd_pq_control = tmp;
4502 :
4503 : /* set the wb address whether it's enabled or not */
4504 0 : wb_gpu_addr = ring->rptr_gpu_addr;
4505 0 : mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4506 0 : mqd->cp_hqd_pq_rptr_report_addr_hi =
4507 0 : upper_32_bits(wb_gpu_addr) & 0xffff;
4508 :
4509 : /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4510 0 : wb_gpu_addr = ring->wptr_gpu_addr;
4511 0 : mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4512 0 : mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4513 :
4514 0 : tmp = 0;
4515 : /* enable the doorbell if requested */
4516 0 : if (ring->use_doorbell) {
4517 0 : tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4518 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4519 : DOORBELL_OFFSET, ring->doorbell_index);
4520 :
4521 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4522 : DOORBELL_EN, 1);
4523 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4524 : DOORBELL_SOURCE, 0);
4525 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4526 : DOORBELL_HIT, 0);
4527 : }
4528 :
4529 0 : mqd->cp_hqd_pq_doorbell_control = tmp;
4530 :
4531 : /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4532 0 : ring->wptr = 0;
4533 0 : mqd->cp_hqd_pq_wptr = ring->wptr;
4534 0 : mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4535 :
4536 : /* set the vmid for the queue */
4537 0 : mqd->cp_hqd_vmid = 0;
4538 :
4539 0 : tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4540 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4541 0 : mqd->cp_hqd_persistent_state = tmp;
4542 :
4543 : /* set MTYPE */
4544 0 : tmp = RREG32(mmCP_HQD_IB_CONTROL);
4545 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4546 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4547 0 : mqd->cp_hqd_ib_control = tmp;
4548 :
4549 0 : tmp = RREG32(mmCP_HQD_IQ_TIMER);
4550 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4551 0 : mqd->cp_hqd_iq_timer = tmp;
4552 :
4553 0 : tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4554 0 : tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4555 0 : mqd->cp_hqd_ctx_save_control = tmp;
4556 :
4557 : /* defaults */
4558 0 : mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4559 0 : mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4560 0 : mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4561 0 : mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4562 0 : mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4563 0 : mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4564 0 : mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4565 0 : mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4566 0 : mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4567 0 : mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4568 0 : mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4569 0 : mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4570 :
4571 : /* set static priority for a queue/ring */
4572 0 : gfx_v8_0_mqd_set_priority(ring, mqd);
4573 0 : mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4574 :
4575 : /* map_queues packet doesn't need activate the queue,
4576 : * so only kiq need set this field.
4577 : */
4578 0 : if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4579 0 : mqd->cp_hqd_active = 1;
4580 :
4581 0 : return 0;
4582 : }
4583 :
4584 0 : static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4585 : struct vi_mqd *mqd)
4586 : {
4587 : uint32_t mqd_reg;
4588 : uint32_t *mqd_data;
4589 :
4590 : /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4591 0 : mqd_data = &mqd->cp_mqd_base_addr_lo;
4592 :
4593 : /* disable wptr polling */
4594 0 : WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4595 :
4596 : /* program all HQD registers */
4597 0 : for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4598 0 : WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4599 :
4600 : /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4601 : * This is safe since EOP RPTR==WPTR for any inactive HQD
4602 : * on ASICs that do not support context-save.
4603 : * EOP writes/reads can start anywhere in the ring.
4604 : */
4605 0 : if (adev->asic_type != CHIP_TONGA) {
4606 0 : WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4607 0 : WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4608 0 : WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4609 : }
4610 :
4611 0 : for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4612 0 : WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4613 :
4614 : /* activate the HQD */
4615 0 : for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4616 0 : WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4617 :
4618 0 : return 0;
4619 : }
4620 :
4621 0 : static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4622 : {
4623 0 : struct amdgpu_device *adev = ring->adev;
4624 0 : struct vi_mqd *mqd = ring->mqd_ptr;
4625 0 : int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4626 :
4627 0 : gfx_v8_0_kiq_setting(ring);
4628 :
4629 0 : if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4630 : /* reset MQD to a clean status */
4631 0 : if (adev->gfx.mec.mqd_backup[mqd_idx])
4632 0 : memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4633 :
4634 : /* reset ring buffer */
4635 0 : ring->wptr = 0;
4636 0 : amdgpu_ring_clear_ring(ring);
4637 0 : mutex_lock(&adev->srbm_mutex);
4638 0 : vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4639 0 : gfx_v8_0_mqd_commit(adev, mqd);
4640 0 : vi_srbm_select(adev, 0, 0, 0, 0);
4641 0 : mutex_unlock(&adev->srbm_mutex);
4642 : } else {
4643 0 : memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4644 0 : ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4645 0 : ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4646 0 : mutex_lock(&adev->srbm_mutex);
4647 0 : vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4648 0 : gfx_v8_0_mqd_init(ring);
4649 0 : gfx_v8_0_mqd_commit(adev, mqd);
4650 0 : vi_srbm_select(adev, 0, 0, 0, 0);
4651 0 : mutex_unlock(&adev->srbm_mutex);
4652 :
4653 0 : if (adev->gfx.mec.mqd_backup[mqd_idx])
4654 0 : memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4655 : }
4656 :
4657 0 : return 0;
4658 : }
4659 :
4660 0 : static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4661 : {
4662 0 : struct amdgpu_device *adev = ring->adev;
4663 0 : struct vi_mqd *mqd = ring->mqd_ptr;
4664 0 : int mqd_idx = ring - &adev->gfx.compute_ring[0];
4665 :
4666 0 : if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4667 0 : memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4668 0 : ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4669 0 : ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4670 0 : mutex_lock(&adev->srbm_mutex);
4671 0 : vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4672 0 : gfx_v8_0_mqd_init(ring);
4673 0 : vi_srbm_select(adev, 0, 0, 0, 0);
4674 0 : mutex_unlock(&adev->srbm_mutex);
4675 :
4676 0 : if (adev->gfx.mec.mqd_backup[mqd_idx])
4677 0 : memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4678 0 : } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4679 : /* reset MQD to a clean status */
4680 0 : if (adev->gfx.mec.mqd_backup[mqd_idx])
4681 0 : memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4682 : /* reset ring buffer */
4683 0 : ring->wptr = 0;
4684 : amdgpu_ring_clear_ring(ring);
4685 : } else {
4686 : amdgpu_ring_clear_ring(ring);
4687 : }
4688 0 : return 0;
4689 : }
4690 :
4691 0 : static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4692 : {
4693 0 : if (adev->asic_type > CHIP_TONGA) {
4694 0 : WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4695 0 : WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4696 : }
4697 : /* enable doorbells */
4698 0 : WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4699 0 : }
4700 :
4701 0 : static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4702 : {
4703 : struct amdgpu_ring *ring;
4704 : int r;
4705 :
4706 0 : ring = &adev->gfx.kiq.ring;
4707 :
4708 0 : r = amdgpu_bo_reserve(ring->mqd_obj, false);
4709 0 : if (unlikely(r != 0))
4710 : return r;
4711 :
4712 0 : r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4713 0 : if (unlikely(r != 0))
4714 : return r;
4715 :
4716 0 : gfx_v8_0_kiq_init_queue(ring);
4717 0 : amdgpu_bo_kunmap(ring->mqd_obj);
4718 0 : ring->mqd_ptr = NULL;
4719 0 : amdgpu_bo_unreserve(ring->mqd_obj);
4720 0 : ring->sched.ready = true;
4721 0 : return 0;
4722 : }
4723 :
4724 0 : static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4725 : {
4726 0 : struct amdgpu_ring *ring = NULL;
4727 0 : int r = 0, i;
4728 :
4729 0 : gfx_v8_0_cp_compute_enable(adev, true);
4730 :
4731 0 : for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4732 0 : ring = &adev->gfx.compute_ring[i];
4733 :
4734 0 : r = amdgpu_bo_reserve(ring->mqd_obj, false);
4735 0 : if (unlikely(r != 0))
4736 : goto done;
4737 0 : r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4738 0 : if (!r) {
4739 0 : r = gfx_v8_0_kcq_init_queue(ring);
4740 0 : amdgpu_bo_kunmap(ring->mqd_obj);
4741 0 : ring->mqd_ptr = NULL;
4742 : }
4743 0 : amdgpu_bo_unreserve(ring->mqd_obj);
4744 0 : if (r)
4745 : goto done;
4746 : }
4747 :
4748 0 : gfx_v8_0_set_mec_doorbell_range(adev);
4749 :
4750 0 : r = gfx_v8_0_kiq_kcq_enable(adev);
4751 : if (r)
4752 : goto done;
4753 :
4754 : done:
4755 0 : return r;
4756 : }
4757 :
4758 0 : static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4759 : {
4760 : int r, i;
4761 : struct amdgpu_ring *ring;
4762 :
4763 : /* collect all the ring_tests here, gfx, kiq, compute */
4764 0 : ring = &adev->gfx.gfx_ring[0];
4765 0 : r = amdgpu_ring_test_helper(ring);
4766 0 : if (r)
4767 : return r;
4768 :
4769 0 : ring = &adev->gfx.kiq.ring;
4770 0 : r = amdgpu_ring_test_helper(ring);
4771 0 : if (r)
4772 : return r;
4773 :
4774 0 : for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4775 0 : ring = &adev->gfx.compute_ring[i];
4776 0 : amdgpu_ring_test_helper(ring);
4777 : }
4778 :
4779 : return 0;
4780 : }
4781 :
4782 0 : static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4783 : {
4784 : int r;
4785 :
4786 0 : if (!(adev->flags & AMD_IS_APU))
4787 0 : gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4788 :
4789 0 : r = gfx_v8_0_kiq_resume(adev);
4790 0 : if (r)
4791 : return r;
4792 :
4793 0 : r = gfx_v8_0_cp_gfx_resume(adev);
4794 0 : if (r)
4795 : return r;
4796 :
4797 0 : r = gfx_v8_0_kcq_resume(adev);
4798 0 : if (r)
4799 : return r;
4800 :
4801 0 : r = gfx_v8_0_cp_test_all_rings(adev);
4802 0 : if (r)
4803 : return r;
4804 :
4805 0 : gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4806 :
4807 0 : return 0;
4808 : }
4809 :
4810 : static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4811 : {
4812 0 : gfx_v8_0_cp_gfx_enable(adev, enable);
4813 0 : gfx_v8_0_cp_compute_enable(adev, enable);
4814 : }
4815 :
4816 0 : static int gfx_v8_0_hw_init(void *handle)
4817 : {
4818 : int r;
4819 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4820 :
4821 0 : gfx_v8_0_init_golden_registers(adev);
4822 0 : gfx_v8_0_constants_init(adev);
4823 :
4824 0 : r = adev->gfx.rlc.funcs->resume(adev);
4825 0 : if (r)
4826 : return r;
4827 :
4828 0 : r = gfx_v8_0_cp_resume(adev);
4829 :
4830 0 : return r;
4831 : }
4832 :
4833 0 : static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4834 : {
4835 : int r, i;
4836 0 : struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4837 :
4838 0 : r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4839 0 : if (r)
4840 0 : DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4841 :
4842 0 : for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4843 0 : struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4844 :
4845 0 : amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4846 0 : amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4847 : PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4848 : PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4849 : PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4850 : PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4851 0 : amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4852 0 : amdgpu_ring_write(kiq_ring, 0);
4853 0 : amdgpu_ring_write(kiq_ring, 0);
4854 0 : amdgpu_ring_write(kiq_ring, 0);
4855 : }
4856 0 : r = amdgpu_ring_test_helper(kiq_ring);
4857 0 : if (r)
4858 0 : DRM_ERROR("KCQ disable failed\n");
4859 :
4860 0 : return r;
4861 : }
4862 :
4863 0 : static bool gfx_v8_0_is_idle(void *handle)
4864 : {
4865 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4866 :
4867 0 : if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4868 0 : || RREG32(mmGRBM_STATUS2) != 0x8)
4869 : return false;
4870 : else
4871 : return true;
4872 : }
4873 :
4874 : static bool gfx_v8_0_rlc_is_idle(void *handle)
4875 : {
4876 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4877 :
4878 0 : if (RREG32(mmGRBM_STATUS2) != 0x8)
4879 : return false;
4880 : else
4881 : return true;
4882 : }
4883 :
4884 0 : static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4885 : {
4886 : unsigned int i;
4887 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4888 :
4889 0 : for (i = 0; i < adev->usec_timeout; i++) {
4890 0 : if (gfx_v8_0_rlc_is_idle(handle))
4891 : return 0;
4892 :
4893 0 : udelay(1);
4894 : }
4895 : return -ETIMEDOUT;
4896 : }
4897 :
4898 0 : static int gfx_v8_0_wait_for_idle(void *handle)
4899 : {
4900 : unsigned int i;
4901 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4902 :
4903 0 : for (i = 0; i < adev->usec_timeout; i++) {
4904 0 : if (gfx_v8_0_is_idle(handle))
4905 : return 0;
4906 :
4907 0 : udelay(1);
4908 : }
4909 : return -ETIMEDOUT;
4910 : }
4911 :
4912 0 : static int gfx_v8_0_hw_fini(void *handle)
4913 : {
4914 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4915 :
4916 0 : amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4917 0 : amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4918 :
4919 0 : amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4920 :
4921 0 : amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4922 :
4923 : /* disable KCQ to avoid CPC touch memory not valid anymore */
4924 0 : gfx_v8_0_kcq_disable(adev);
4925 :
4926 0 : if (amdgpu_sriov_vf(adev)) {
4927 : pr_debug("For SRIOV client, shouldn't do anything.\n");
4928 : return 0;
4929 : }
4930 0 : amdgpu_gfx_rlc_enter_safe_mode(adev);
4931 0 : if (!gfx_v8_0_wait_for_idle(adev))
4932 : gfx_v8_0_cp_enable(adev, false);
4933 : else
4934 0 : pr_err("cp is busy, skip halt cp\n");
4935 0 : if (!gfx_v8_0_wait_for_rlc_idle(adev))
4936 0 : adev->gfx.rlc.funcs->stop(adev);
4937 : else
4938 0 : pr_err("rlc is busy, skip halt rlc\n");
4939 0 : amdgpu_gfx_rlc_exit_safe_mode(adev);
4940 :
4941 0 : return 0;
4942 : }
4943 :
4944 0 : static int gfx_v8_0_suspend(void *handle)
4945 : {
4946 0 : return gfx_v8_0_hw_fini(handle);
4947 : }
4948 :
4949 0 : static int gfx_v8_0_resume(void *handle)
4950 : {
4951 0 : return gfx_v8_0_hw_init(handle);
4952 : }
4953 :
4954 0 : static bool gfx_v8_0_check_soft_reset(void *handle)
4955 : {
4956 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4957 0 : u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4958 : u32 tmp;
4959 :
4960 : /* GRBM_STATUS */
4961 0 : tmp = RREG32(mmGRBM_STATUS);
4962 0 : if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4963 : GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4964 : GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4965 : GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4966 : GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4967 : GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4968 : GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4969 0 : grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4970 : GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4971 0 : grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4972 : GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4973 0 : srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4974 : SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4975 : }
4976 :
4977 : /* GRBM_STATUS2 */
4978 0 : tmp = RREG32(mmGRBM_STATUS2);
4979 0 : if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4980 0 : grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4981 : GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4982 :
4983 0 : if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4984 0 : REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4985 : REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4986 0 : grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4987 : SOFT_RESET_CPF, 1);
4988 0 : grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4989 : SOFT_RESET_CPC, 1);
4990 0 : grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4991 : SOFT_RESET_CPG, 1);
4992 0 : srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4993 : SOFT_RESET_GRBM, 1);
4994 : }
4995 :
4996 : /* SRBM_STATUS */
4997 0 : tmp = RREG32(mmSRBM_STATUS);
4998 0 : if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4999 0 : srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5000 : SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5001 0 : if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5002 0 : srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5003 : SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5004 :
5005 0 : if (grbm_soft_reset || srbm_soft_reset) {
5006 0 : adev->gfx.grbm_soft_reset = grbm_soft_reset;
5007 0 : adev->gfx.srbm_soft_reset = srbm_soft_reset;
5008 0 : return true;
5009 : } else {
5010 0 : adev->gfx.grbm_soft_reset = 0;
5011 0 : adev->gfx.srbm_soft_reset = 0;
5012 0 : return false;
5013 : }
5014 : }
5015 :
5016 0 : static int gfx_v8_0_pre_soft_reset(void *handle)
5017 : {
5018 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5019 0 : u32 grbm_soft_reset = 0;
5020 :
5021 0 : if ((!adev->gfx.grbm_soft_reset) &&
5022 : (!adev->gfx.srbm_soft_reset))
5023 : return 0;
5024 :
5025 0 : grbm_soft_reset = adev->gfx.grbm_soft_reset;
5026 :
5027 : /* stop the rlc */
5028 0 : adev->gfx.rlc.funcs->stop(adev);
5029 :
5030 0 : if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5031 : REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5032 : /* Disable GFX parsing/prefetching */
5033 0 : gfx_v8_0_cp_gfx_enable(adev, false);
5034 :
5035 0 : if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5036 : REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5037 0 : REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5038 : REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5039 : int i;
5040 :
5041 0 : for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5042 0 : struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5043 :
5044 0 : mutex_lock(&adev->srbm_mutex);
5045 0 : vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5046 0 : gfx_v8_0_deactivate_hqd(adev, 2);
5047 0 : vi_srbm_select(adev, 0, 0, 0, 0);
5048 0 : mutex_unlock(&adev->srbm_mutex);
5049 : }
5050 : /* Disable MEC parsing/prefetching */
5051 0 : gfx_v8_0_cp_compute_enable(adev, false);
5052 : }
5053 :
5054 : return 0;
5055 : }
5056 :
5057 0 : static int gfx_v8_0_soft_reset(void *handle)
5058 : {
5059 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5060 0 : u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5061 : u32 tmp;
5062 :
5063 0 : if ((!adev->gfx.grbm_soft_reset) &&
5064 : (!adev->gfx.srbm_soft_reset))
5065 : return 0;
5066 :
5067 0 : grbm_soft_reset = adev->gfx.grbm_soft_reset;
5068 0 : srbm_soft_reset = adev->gfx.srbm_soft_reset;
5069 :
5070 0 : if (grbm_soft_reset || srbm_soft_reset) {
5071 0 : tmp = RREG32(mmGMCON_DEBUG);
5072 0 : tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5073 0 : tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5074 0 : WREG32(mmGMCON_DEBUG, tmp);
5075 : udelay(50);
5076 : }
5077 :
5078 0 : if (grbm_soft_reset) {
5079 0 : tmp = RREG32(mmGRBM_SOFT_RESET);
5080 0 : tmp |= grbm_soft_reset;
5081 0 : dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5082 0 : WREG32(mmGRBM_SOFT_RESET, tmp);
5083 0 : tmp = RREG32(mmGRBM_SOFT_RESET);
5084 :
5085 0 : udelay(50);
5086 :
5087 0 : tmp &= ~grbm_soft_reset;
5088 0 : WREG32(mmGRBM_SOFT_RESET, tmp);
5089 0 : tmp = RREG32(mmGRBM_SOFT_RESET);
5090 : }
5091 :
5092 0 : if (srbm_soft_reset) {
5093 0 : tmp = RREG32(mmSRBM_SOFT_RESET);
5094 0 : tmp |= srbm_soft_reset;
5095 0 : dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5096 0 : WREG32(mmSRBM_SOFT_RESET, tmp);
5097 0 : tmp = RREG32(mmSRBM_SOFT_RESET);
5098 :
5099 0 : udelay(50);
5100 :
5101 0 : tmp &= ~srbm_soft_reset;
5102 0 : WREG32(mmSRBM_SOFT_RESET, tmp);
5103 0 : tmp = RREG32(mmSRBM_SOFT_RESET);
5104 : }
5105 :
5106 0 : if (grbm_soft_reset || srbm_soft_reset) {
5107 0 : tmp = RREG32(mmGMCON_DEBUG);
5108 0 : tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5109 0 : tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5110 0 : WREG32(mmGMCON_DEBUG, tmp);
5111 : }
5112 :
5113 : /* Wait a little for things to settle down */
5114 0 : udelay(50);
5115 :
5116 0 : return 0;
5117 : }
5118 :
5119 0 : static int gfx_v8_0_post_soft_reset(void *handle)
5120 : {
5121 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5122 0 : u32 grbm_soft_reset = 0;
5123 :
5124 0 : if ((!adev->gfx.grbm_soft_reset) &&
5125 : (!adev->gfx.srbm_soft_reset))
5126 : return 0;
5127 :
5128 0 : grbm_soft_reset = adev->gfx.grbm_soft_reset;
5129 :
5130 0 : if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5131 : REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5132 0 : REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5133 : REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5134 : int i;
5135 :
5136 0 : for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5137 0 : struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5138 :
5139 0 : mutex_lock(&adev->srbm_mutex);
5140 0 : vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5141 0 : gfx_v8_0_deactivate_hqd(adev, 2);
5142 0 : vi_srbm_select(adev, 0, 0, 0, 0);
5143 0 : mutex_unlock(&adev->srbm_mutex);
5144 : }
5145 0 : gfx_v8_0_kiq_resume(adev);
5146 0 : gfx_v8_0_kcq_resume(adev);
5147 : }
5148 :
5149 0 : if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5150 : REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5151 0 : gfx_v8_0_cp_gfx_resume(adev);
5152 :
5153 0 : gfx_v8_0_cp_test_all_rings(adev);
5154 :
5155 0 : adev->gfx.rlc.funcs->start(adev);
5156 :
5157 0 : return 0;
5158 : }
5159 :
5160 : /**
5161 : * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5162 : *
5163 : * @adev: amdgpu_device pointer
5164 : *
5165 : * Fetches a GPU clock counter snapshot.
5166 : * Returns the 64 bit clock counter snapshot.
5167 : */
5168 0 : static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5169 : {
5170 : uint64_t clock;
5171 :
5172 0 : mutex_lock(&adev->gfx.gpu_clock_mutex);
5173 0 : WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5174 0 : clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5175 0 : ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5176 0 : mutex_unlock(&adev->gfx.gpu_clock_mutex);
5177 0 : return clock;
5178 : }
5179 :
5180 0 : static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5181 : uint32_t vmid,
5182 : uint32_t gds_base, uint32_t gds_size,
5183 : uint32_t gws_base, uint32_t gws_size,
5184 : uint32_t oa_base, uint32_t oa_size)
5185 : {
5186 : /* GDS Base */
5187 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5188 0 : amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5189 : WRITE_DATA_DST_SEL(0)));
5190 0 : amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5191 0 : amdgpu_ring_write(ring, 0);
5192 0 : amdgpu_ring_write(ring, gds_base);
5193 :
5194 : /* GDS Size */
5195 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5196 0 : amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5197 : WRITE_DATA_DST_SEL(0)));
5198 0 : amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5199 0 : amdgpu_ring_write(ring, 0);
5200 0 : amdgpu_ring_write(ring, gds_size);
5201 :
5202 : /* GWS */
5203 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5204 0 : amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5205 : WRITE_DATA_DST_SEL(0)));
5206 0 : amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5207 0 : amdgpu_ring_write(ring, 0);
5208 0 : amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5209 :
5210 : /* OA */
5211 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5212 0 : amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5213 : WRITE_DATA_DST_SEL(0)));
5214 0 : amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5215 0 : amdgpu_ring_write(ring, 0);
5216 0 : amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5217 0 : }
5218 :
5219 0 : static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5220 : {
5221 0 : WREG32(mmSQ_IND_INDEX,
5222 : (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5223 : (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5224 : (address << SQ_IND_INDEX__INDEX__SHIFT) |
5225 : (SQ_IND_INDEX__FORCE_READ_MASK));
5226 0 : return RREG32(mmSQ_IND_DATA);
5227 : }
5228 :
5229 0 : static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5230 : uint32_t wave, uint32_t thread,
5231 : uint32_t regno, uint32_t num, uint32_t *out)
5232 : {
5233 0 : WREG32(mmSQ_IND_INDEX,
5234 : (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5235 : (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5236 : (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5237 : (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5238 : (SQ_IND_INDEX__FORCE_READ_MASK) |
5239 : (SQ_IND_INDEX__AUTO_INCR_MASK));
5240 0 : while (num--)
5241 0 : *(out++) = RREG32(mmSQ_IND_DATA);
5242 0 : }
5243 :
5244 0 : static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5245 : {
5246 : /* type 0 wave data */
5247 0 : dst[(*no_fields)++] = 0;
5248 0 : dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5249 0 : dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5250 0 : dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5251 0 : dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5252 0 : dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5253 0 : dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5254 0 : dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5255 0 : dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5256 0 : dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5257 0 : dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5258 0 : dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5259 0 : dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5260 0 : dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5261 0 : dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5262 0 : dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5263 0 : dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5264 0 : dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5265 0 : dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5266 0 : dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
5267 0 : }
5268 :
5269 0 : static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5270 : uint32_t wave, uint32_t start,
5271 : uint32_t size, uint32_t *dst)
5272 : {
5273 0 : wave_read_regs(
5274 : adev, simd, wave, 0,
5275 : start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5276 0 : }
5277 :
5278 :
5279 : static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5280 : .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5281 : .select_se_sh = &gfx_v8_0_select_se_sh,
5282 : .read_wave_data = &gfx_v8_0_read_wave_data,
5283 : .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5284 : .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5285 : };
5286 :
5287 0 : static int gfx_v8_0_early_init(void *handle)
5288 : {
5289 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5290 :
5291 0 : adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5292 0 : adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
5293 : AMDGPU_MAX_COMPUTE_RINGS);
5294 0 : adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5295 0 : gfx_v8_0_set_ring_funcs(adev);
5296 0 : gfx_v8_0_set_irq_funcs(adev);
5297 0 : gfx_v8_0_set_gds_init(adev);
5298 0 : gfx_v8_0_set_rlc_funcs(adev);
5299 :
5300 0 : return 0;
5301 : }
5302 :
5303 0 : static int gfx_v8_0_late_init(void *handle)
5304 : {
5305 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5306 : int r;
5307 :
5308 0 : r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5309 0 : if (r)
5310 : return r;
5311 :
5312 0 : r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5313 0 : if (r)
5314 : return r;
5315 :
5316 : /* requires IBs so do in late init after IB pool is initialized */
5317 0 : r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5318 0 : if (r)
5319 : return r;
5320 :
5321 0 : r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5322 0 : if (r) {
5323 0 : DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5324 0 : return r;
5325 : }
5326 :
5327 0 : r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5328 0 : if (r) {
5329 0 : DRM_ERROR(
5330 : "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5331 : r);
5332 0 : return r;
5333 : }
5334 :
5335 : return 0;
5336 : }
5337 :
5338 0 : static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5339 : bool enable)
5340 : {
5341 0 : if ((adev->asic_type == CHIP_POLARIS11) ||
5342 0 : (adev->asic_type == CHIP_POLARIS12) ||
5343 : (adev->asic_type == CHIP_VEGAM))
5344 : /* Send msg to SMU via Powerplay */
5345 0 : amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5346 :
5347 0 : WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5348 0 : }
5349 :
5350 0 : static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5351 : bool enable)
5352 : {
5353 0 : WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5354 0 : }
5355 :
5356 0 : static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5357 : bool enable)
5358 : {
5359 0 : WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5360 0 : }
5361 :
5362 0 : static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5363 : bool enable)
5364 : {
5365 0 : WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5366 0 : }
5367 :
5368 0 : static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5369 : bool enable)
5370 : {
5371 0 : WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5372 :
5373 : /* Read any GFX register to wake up GFX. */
5374 0 : if (!enable)
5375 0 : RREG32(mmDB_RENDER_CONTROL);
5376 0 : }
5377 :
5378 0 : static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5379 : bool enable)
5380 : {
5381 0 : if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5382 0 : cz_enable_gfx_cg_power_gating(adev, true);
5383 0 : if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5384 0 : cz_enable_gfx_pipeline_power_gating(adev, true);
5385 : } else {
5386 0 : cz_enable_gfx_cg_power_gating(adev, false);
5387 0 : cz_enable_gfx_pipeline_power_gating(adev, false);
5388 : }
5389 0 : }
5390 :
5391 0 : static int gfx_v8_0_set_powergating_state(void *handle,
5392 : enum amd_powergating_state state)
5393 : {
5394 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5395 0 : bool enable = (state == AMD_PG_STATE_GATE);
5396 :
5397 0 : if (amdgpu_sriov_vf(adev))
5398 : return 0;
5399 :
5400 0 : if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5401 : AMD_PG_SUPPORT_RLC_SMU_HS |
5402 : AMD_PG_SUPPORT_CP |
5403 : AMD_PG_SUPPORT_GFX_DMG))
5404 0 : amdgpu_gfx_rlc_enter_safe_mode(adev);
5405 0 : switch (adev->asic_type) {
5406 : case CHIP_CARRIZO:
5407 : case CHIP_STONEY:
5408 :
5409 0 : if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5410 0 : cz_enable_sck_slow_down_on_power_up(adev, true);
5411 0 : cz_enable_sck_slow_down_on_power_down(adev, true);
5412 : } else {
5413 0 : cz_enable_sck_slow_down_on_power_up(adev, false);
5414 0 : cz_enable_sck_slow_down_on_power_down(adev, false);
5415 : }
5416 0 : if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5417 0 : cz_enable_cp_power_gating(adev, true);
5418 : else
5419 0 : cz_enable_cp_power_gating(adev, false);
5420 :
5421 0 : cz_update_gfx_cg_power_gating(adev, enable);
5422 :
5423 0 : if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5424 0 : gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5425 : else
5426 0 : gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5427 :
5428 0 : if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5429 0 : gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5430 : else
5431 0 : gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5432 : break;
5433 : case CHIP_POLARIS11:
5434 : case CHIP_POLARIS12:
5435 : case CHIP_VEGAM:
5436 0 : if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5437 0 : gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5438 : else
5439 0 : gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5440 :
5441 0 : if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5442 0 : gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5443 : else
5444 0 : gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5445 :
5446 0 : if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5447 0 : polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5448 : else
5449 0 : polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5450 : break;
5451 : default:
5452 : break;
5453 : }
5454 0 : if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5455 : AMD_PG_SUPPORT_RLC_SMU_HS |
5456 : AMD_PG_SUPPORT_CP |
5457 : AMD_PG_SUPPORT_GFX_DMG))
5458 0 : amdgpu_gfx_rlc_exit_safe_mode(adev);
5459 : return 0;
5460 : }
5461 :
5462 0 : static void gfx_v8_0_get_clockgating_state(void *handle, u64 *flags)
5463 : {
5464 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5465 : int data;
5466 :
5467 0 : if (amdgpu_sriov_vf(adev))
5468 0 : *flags = 0;
5469 :
5470 : /* AMD_CG_SUPPORT_GFX_MGCG */
5471 0 : data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5472 0 : if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5473 0 : *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5474 :
5475 : /* AMD_CG_SUPPORT_GFX_CGLG */
5476 0 : data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5477 0 : if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5478 0 : *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5479 :
5480 : /* AMD_CG_SUPPORT_GFX_CGLS */
5481 0 : if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5482 0 : *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5483 :
5484 : /* AMD_CG_SUPPORT_GFX_CGTS */
5485 0 : data = RREG32(mmCGTS_SM_CTRL_REG);
5486 0 : if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5487 0 : *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5488 :
5489 : /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5490 0 : if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5491 0 : *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5492 :
5493 : /* AMD_CG_SUPPORT_GFX_RLC_LS */
5494 0 : data = RREG32(mmRLC_MEM_SLP_CNTL);
5495 0 : if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5496 0 : *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5497 :
5498 : /* AMD_CG_SUPPORT_GFX_CP_LS */
5499 0 : data = RREG32(mmCP_MEM_SLP_CNTL);
5500 0 : if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5501 0 : *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5502 0 : }
5503 :
5504 0 : static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5505 : uint32_t reg_addr, uint32_t cmd)
5506 : {
5507 : uint32_t data;
5508 :
5509 0 : gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5510 :
5511 0 : WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5512 0 : WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5513 :
5514 0 : data = RREG32(mmRLC_SERDES_WR_CTRL);
5515 0 : if (adev->asic_type == CHIP_STONEY)
5516 0 : data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5517 : RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5518 : RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5519 : RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5520 : RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5521 : RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5522 : RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5523 : RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5524 : RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5525 : else
5526 0 : data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5527 : RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5528 : RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5529 : RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5530 : RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5531 : RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5532 : RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5533 : RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5534 : RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5535 : RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5536 : RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5537 0 : data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5538 0 : (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5539 0 : (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5540 : (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5541 :
5542 0 : WREG32(mmRLC_SERDES_WR_CTRL, data);
5543 0 : }
5544 :
5545 : #define MSG_ENTER_RLC_SAFE_MODE 1
5546 : #define MSG_EXIT_RLC_SAFE_MODE 0
5547 : #define RLC_GPR_REG2__REQ_MASK 0x00000001
5548 : #define RLC_GPR_REG2__REQ__SHIFT 0
5549 : #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5550 : #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5551 :
5552 0 : static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5553 : {
5554 : uint32_t rlc_setting;
5555 :
5556 0 : rlc_setting = RREG32(mmRLC_CNTL);
5557 0 : if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5558 : return false;
5559 :
5560 0 : return true;
5561 : }
5562 :
5563 0 : static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5564 : {
5565 : uint32_t data;
5566 : unsigned i;
5567 0 : data = RREG32(mmRLC_CNTL);
5568 0 : data |= RLC_SAFE_MODE__CMD_MASK;
5569 0 : data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5570 0 : data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5571 0 : WREG32(mmRLC_SAFE_MODE, data);
5572 :
5573 : /* wait for RLC_SAFE_MODE */
5574 0 : for (i = 0; i < adev->usec_timeout; i++) {
5575 0 : if ((RREG32(mmRLC_GPM_STAT) &
5576 : (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5577 : RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5578 : (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5579 : RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5580 : break;
5581 0 : udelay(1);
5582 : }
5583 0 : for (i = 0; i < adev->usec_timeout; i++) {
5584 0 : if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5585 : break;
5586 0 : udelay(1);
5587 : }
5588 0 : }
5589 :
5590 0 : static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5591 : {
5592 : uint32_t data;
5593 : unsigned i;
5594 :
5595 0 : data = RREG32(mmRLC_CNTL);
5596 0 : data |= RLC_SAFE_MODE__CMD_MASK;
5597 0 : data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5598 0 : WREG32(mmRLC_SAFE_MODE, data);
5599 :
5600 0 : for (i = 0; i < adev->usec_timeout; i++) {
5601 0 : if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5602 : break;
5603 0 : udelay(1);
5604 : }
5605 0 : }
5606 :
5607 0 : static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5608 : {
5609 : u32 data;
5610 :
5611 0 : amdgpu_gfx_off_ctrl(adev, false);
5612 :
5613 0 : if (amdgpu_sriov_is_pp_one_vf(adev))
5614 0 : data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
5615 : else
5616 0 : data = RREG32(mmRLC_SPM_VMID);
5617 :
5618 0 : data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
5619 0 : data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
5620 :
5621 0 : if (amdgpu_sriov_is_pp_one_vf(adev))
5622 0 : WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
5623 : else
5624 0 : WREG32(mmRLC_SPM_VMID, data);
5625 :
5626 0 : amdgpu_gfx_off_ctrl(adev, true);
5627 0 : }
5628 :
5629 : static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5630 : .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5631 : .set_safe_mode = gfx_v8_0_set_safe_mode,
5632 : .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5633 : .init = gfx_v8_0_rlc_init,
5634 : .get_csb_size = gfx_v8_0_get_csb_size,
5635 : .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5636 : .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5637 : .resume = gfx_v8_0_rlc_resume,
5638 : .stop = gfx_v8_0_rlc_stop,
5639 : .reset = gfx_v8_0_rlc_reset,
5640 : .start = gfx_v8_0_rlc_start,
5641 : .update_spm_vmid = gfx_v8_0_update_spm_vmid
5642 : };
5643 :
5644 0 : static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5645 : bool enable)
5646 : {
5647 : uint32_t temp, data;
5648 :
5649 0 : amdgpu_gfx_rlc_enter_safe_mode(adev);
5650 :
5651 : /* It is disabled by HW by default */
5652 0 : if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5653 0 : if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5654 0 : if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5655 : /* 1 - RLC memory Light sleep */
5656 0 : WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5657 :
5658 0 : if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5659 0 : WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5660 : }
5661 :
5662 : /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5663 0 : temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5664 0 : if (adev->flags & AMD_IS_APU)
5665 0 : data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5666 : RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5667 : RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5668 : else
5669 0 : data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5670 : RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5671 : RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5672 : RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5673 :
5674 0 : if (temp != data)
5675 0 : WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5676 :
5677 : /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5678 0 : gfx_v8_0_wait_for_rlc_serdes(adev);
5679 :
5680 : /* 5 - clear mgcg override */
5681 0 : gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5682 :
5683 0 : if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5684 : /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5685 0 : temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5686 0 : data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5687 0 : data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5688 0 : data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5689 0 : data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5690 0 : if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5691 : (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5692 0 : data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5693 0 : data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5694 0 : data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5695 0 : if (temp != data)
5696 0 : WREG32(mmCGTS_SM_CTRL_REG, data);
5697 : }
5698 0 : udelay(50);
5699 :
5700 : /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5701 0 : gfx_v8_0_wait_for_rlc_serdes(adev);
5702 : } else {
5703 : /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5704 0 : temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5705 0 : data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5706 : RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5707 : RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5708 : RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5709 0 : if (temp != data)
5710 0 : WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5711 :
5712 : /* 2 - disable MGLS in RLC */
5713 0 : data = RREG32(mmRLC_MEM_SLP_CNTL);
5714 0 : if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5715 0 : data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5716 0 : WREG32(mmRLC_MEM_SLP_CNTL, data);
5717 : }
5718 :
5719 : /* 3 - disable MGLS in CP */
5720 0 : data = RREG32(mmCP_MEM_SLP_CNTL);
5721 0 : if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5722 0 : data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5723 0 : WREG32(mmCP_MEM_SLP_CNTL, data);
5724 : }
5725 :
5726 : /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5727 0 : temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5728 0 : data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5729 : CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5730 0 : if (temp != data)
5731 0 : WREG32(mmCGTS_SM_CTRL_REG, data);
5732 :
5733 : /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5734 0 : gfx_v8_0_wait_for_rlc_serdes(adev);
5735 :
5736 : /* 6 - set mgcg override */
5737 0 : gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5738 :
5739 0 : udelay(50);
5740 :
5741 : /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5742 0 : gfx_v8_0_wait_for_rlc_serdes(adev);
5743 : }
5744 :
5745 0 : amdgpu_gfx_rlc_exit_safe_mode(adev);
5746 0 : }
5747 :
5748 0 : static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5749 : bool enable)
5750 : {
5751 : uint32_t temp, temp1, data, data1;
5752 :
5753 0 : temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5754 :
5755 0 : amdgpu_gfx_rlc_enter_safe_mode(adev);
5756 :
5757 0 : if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5758 0 : temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5759 0 : data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5760 0 : if (temp1 != data1)
5761 0 : WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5762 :
5763 : /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5764 0 : gfx_v8_0_wait_for_rlc_serdes(adev);
5765 :
5766 : /* 2 - clear cgcg override */
5767 0 : gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5768 :
5769 : /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5770 0 : gfx_v8_0_wait_for_rlc_serdes(adev);
5771 :
5772 : /* 3 - write cmd to set CGLS */
5773 0 : gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5774 :
5775 : /* 4 - enable cgcg */
5776 0 : data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5777 :
5778 0 : if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5779 : /* enable cgls*/
5780 0 : data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5781 :
5782 0 : temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5783 0 : data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5784 :
5785 0 : if (temp1 != data1)
5786 0 : WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5787 : } else {
5788 0 : data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5789 : }
5790 :
5791 0 : if (temp != data)
5792 0 : WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5793 :
5794 : /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5795 : * Cmp_busy/GFX_Idle interrupts
5796 : */
5797 0 : gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5798 : } else {
5799 : /* disable cntx_empty_int_enable & GFX Idle interrupt */
5800 0 : gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5801 :
5802 : /* TEST CGCG */
5803 0 : temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5804 0 : data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5805 : RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5806 0 : if (temp1 != data1)
5807 0 : WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5808 :
5809 : /* read gfx register to wake up cgcg */
5810 0 : RREG32(mmCB_CGTT_SCLK_CTRL);
5811 0 : RREG32(mmCB_CGTT_SCLK_CTRL);
5812 0 : RREG32(mmCB_CGTT_SCLK_CTRL);
5813 0 : RREG32(mmCB_CGTT_SCLK_CTRL);
5814 :
5815 : /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5816 0 : gfx_v8_0_wait_for_rlc_serdes(adev);
5817 :
5818 : /* write cmd to Set CGCG Override */
5819 0 : gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5820 :
5821 : /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5822 0 : gfx_v8_0_wait_for_rlc_serdes(adev);
5823 :
5824 : /* write cmd to Clear CGLS */
5825 0 : gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5826 :
5827 : /* disable cgcg, cgls should be disabled too. */
5828 0 : data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5829 : RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5830 0 : if (temp != data)
5831 0 : WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5832 : /* enable interrupts again for PG */
5833 0 : gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5834 : }
5835 :
5836 0 : gfx_v8_0_wait_for_rlc_serdes(adev);
5837 :
5838 0 : amdgpu_gfx_rlc_exit_safe_mode(adev);
5839 0 : }
5840 0 : static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5841 : bool enable)
5842 : {
5843 0 : if (enable) {
5844 : /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5845 : * === MGCG + MGLS + TS(CG/LS) ===
5846 : */
5847 0 : gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5848 0 : gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5849 : } else {
5850 : /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5851 : * === CGCG + CGLS ===
5852 : */
5853 0 : gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5854 0 : gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5855 : }
5856 0 : return 0;
5857 : }
5858 :
5859 0 : static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5860 : enum amd_clockgating_state state)
5861 : {
5862 0 : uint32_t msg_id, pp_state = 0;
5863 0 : uint32_t pp_support_state = 0;
5864 :
5865 0 : if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5866 0 : if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5867 0 : pp_support_state = PP_STATE_SUPPORT_LS;
5868 0 : pp_state = PP_STATE_LS;
5869 : }
5870 0 : if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5871 0 : pp_support_state |= PP_STATE_SUPPORT_CG;
5872 0 : pp_state |= PP_STATE_CG;
5873 : }
5874 0 : if (state == AMD_CG_STATE_UNGATE)
5875 0 : pp_state = 0;
5876 :
5877 0 : msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5878 : PP_BLOCK_GFX_CG,
5879 : pp_support_state,
5880 : pp_state);
5881 0 : amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5882 : }
5883 :
5884 0 : if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5885 0 : if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5886 0 : pp_support_state = PP_STATE_SUPPORT_LS;
5887 0 : pp_state = PP_STATE_LS;
5888 : }
5889 :
5890 0 : if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5891 0 : pp_support_state |= PP_STATE_SUPPORT_CG;
5892 0 : pp_state |= PP_STATE_CG;
5893 : }
5894 :
5895 0 : if (state == AMD_CG_STATE_UNGATE)
5896 0 : pp_state = 0;
5897 :
5898 0 : msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5899 : PP_BLOCK_GFX_MG,
5900 : pp_support_state,
5901 : pp_state);
5902 0 : amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5903 : }
5904 :
5905 0 : return 0;
5906 : }
5907 :
5908 0 : static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5909 : enum amd_clockgating_state state)
5910 : {
5911 :
5912 0 : uint32_t msg_id, pp_state = 0;
5913 0 : uint32_t pp_support_state = 0;
5914 :
5915 0 : if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5916 0 : if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5917 0 : pp_support_state = PP_STATE_SUPPORT_LS;
5918 0 : pp_state = PP_STATE_LS;
5919 : }
5920 0 : if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5921 0 : pp_support_state |= PP_STATE_SUPPORT_CG;
5922 0 : pp_state |= PP_STATE_CG;
5923 : }
5924 0 : if (state == AMD_CG_STATE_UNGATE)
5925 0 : pp_state = 0;
5926 :
5927 0 : msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5928 : PP_BLOCK_GFX_CG,
5929 : pp_support_state,
5930 : pp_state);
5931 0 : amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5932 : }
5933 :
5934 0 : if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5935 0 : if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5936 0 : pp_support_state = PP_STATE_SUPPORT_LS;
5937 0 : pp_state = PP_STATE_LS;
5938 : }
5939 0 : if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5940 0 : pp_support_state |= PP_STATE_SUPPORT_CG;
5941 0 : pp_state |= PP_STATE_CG;
5942 : }
5943 0 : if (state == AMD_CG_STATE_UNGATE)
5944 0 : pp_state = 0;
5945 :
5946 0 : msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5947 : PP_BLOCK_GFX_3D,
5948 : pp_support_state,
5949 : pp_state);
5950 0 : amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5951 : }
5952 :
5953 0 : if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5954 0 : if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5955 0 : pp_support_state = PP_STATE_SUPPORT_LS;
5956 0 : pp_state = PP_STATE_LS;
5957 : }
5958 :
5959 0 : if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5960 0 : pp_support_state |= PP_STATE_SUPPORT_CG;
5961 0 : pp_state |= PP_STATE_CG;
5962 : }
5963 :
5964 0 : if (state == AMD_CG_STATE_UNGATE)
5965 0 : pp_state = 0;
5966 :
5967 0 : msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5968 : PP_BLOCK_GFX_MG,
5969 : pp_support_state,
5970 : pp_state);
5971 0 : amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5972 : }
5973 :
5974 0 : if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5975 0 : pp_support_state = PP_STATE_SUPPORT_LS;
5976 :
5977 0 : if (state == AMD_CG_STATE_UNGATE)
5978 : pp_state = 0;
5979 : else
5980 0 : pp_state = PP_STATE_LS;
5981 :
5982 0 : msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5983 : PP_BLOCK_GFX_RLC,
5984 : pp_support_state,
5985 : pp_state);
5986 0 : amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5987 : }
5988 :
5989 0 : if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5990 0 : pp_support_state = PP_STATE_SUPPORT_LS;
5991 :
5992 0 : if (state == AMD_CG_STATE_UNGATE)
5993 : pp_state = 0;
5994 : else
5995 0 : pp_state = PP_STATE_LS;
5996 0 : msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5997 : PP_BLOCK_GFX_CP,
5998 : pp_support_state,
5999 : pp_state);
6000 0 : amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6001 : }
6002 :
6003 0 : return 0;
6004 : }
6005 :
6006 0 : static int gfx_v8_0_set_clockgating_state(void *handle,
6007 : enum amd_clockgating_state state)
6008 : {
6009 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6010 :
6011 0 : if (amdgpu_sriov_vf(adev))
6012 : return 0;
6013 :
6014 0 : switch (adev->asic_type) {
6015 : case CHIP_FIJI:
6016 : case CHIP_CARRIZO:
6017 : case CHIP_STONEY:
6018 0 : gfx_v8_0_update_gfx_clock_gating(adev,
6019 : state == AMD_CG_STATE_GATE);
6020 0 : break;
6021 : case CHIP_TONGA:
6022 0 : gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6023 0 : break;
6024 : case CHIP_POLARIS10:
6025 : case CHIP_POLARIS11:
6026 : case CHIP_POLARIS12:
6027 : case CHIP_VEGAM:
6028 0 : gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6029 0 : break;
6030 : default:
6031 : break;
6032 : }
6033 : return 0;
6034 : }
6035 :
6036 0 : static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6037 : {
6038 0 : return *ring->rptr_cpu_addr;
6039 : }
6040 :
6041 0 : static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6042 : {
6043 0 : struct amdgpu_device *adev = ring->adev;
6044 :
6045 0 : if (ring->use_doorbell)
6046 : /* XXX check if swapping is necessary on BE */
6047 0 : return *ring->wptr_cpu_addr;
6048 : else
6049 0 : return RREG32(mmCP_RB0_WPTR);
6050 : }
6051 :
6052 0 : static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6053 : {
6054 0 : struct amdgpu_device *adev = ring->adev;
6055 :
6056 0 : if (ring->use_doorbell) {
6057 : /* XXX check if swapping is necessary on BE */
6058 0 : *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
6059 0 : WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6060 : } else {
6061 0 : WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6062 0 : (void)RREG32(mmCP_RB0_WPTR);
6063 : }
6064 0 : }
6065 :
6066 0 : static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6067 : {
6068 : u32 ref_and_mask, reg_mem_engine;
6069 :
6070 0 : if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6071 : (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6072 0 : switch (ring->me) {
6073 : case 1:
6074 0 : ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6075 0 : break;
6076 : case 2:
6077 0 : ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6078 0 : break;
6079 : default:
6080 : return;
6081 : }
6082 : reg_mem_engine = 0;
6083 : } else {
6084 : ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6085 : reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6086 : }
6087 :
6088 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6089 0 : amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6090 : WAIT_REG_MEM_FUNCTION(3) | /* == */
6091 : reg_mem_engine));
6092 0 : amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6093 0 : amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6094 0 : amdgpu_ring_write(ring, ref_and_mask);
6095 0 : amdgpu_ring_write(ring, ref_and_mask);
6096 0 : amdgpu_ring_write(ring, 0x20); /* poll interval */
6097 : }
6098 :
6099 0 : static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6100 : {
6101 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6102 0 : amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6103 : EVENT_INDEX(4));
6104 :
6105 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6106 0 : amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6107 : EVENT_INDEX(0));
6108 0 : }
6109 :
6110 0 : static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6111 : struct amdgpu_job *job,
6112 : struct amdgpu_ib *ib,
6113 : uint32_t flags)
6114 : {
6115 0 : unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6116 0 : u32 header, control = 0;
6117 :
6118 0 : if (ib->flags & AMDGPU_IB_FLAG_CE)
6119 : header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6120 : else
6121 0 : header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6122 :
6123 0 : control |= ib->length_dw | (vmid << 24);
6124 :
6125 0 : if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6126 0 : control |= INDIRECT_BUFFER_PRE_ENB(1);
6127 :
6128 0 : if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
6129 0 : gfx_v8_0_ring_emit_de_meta(ring);
6130 : }
6131 :
6132 0 : amdgpu_ring_write(ring, header);
6133 0 : amdgpu_ring_write(ring,
6134 : #ifdef __BIG_ENDIAN
6135 : (2 << 0) |
6136 : #endif
6137 0 : (ib->gpu_addr & 0xFFFFFFFC));
6138 0 : amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6139 0 : amdgpu_ring_write(ring, control);
6140 0 : }
6141 :
6142 0 : static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6143 : struct amdgpu_job *job,
6144 : struct amdgpu_ib *ib,
6145 : uint32_t flags)
6146 : {
6147 0 : unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6148 0 : u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6149 :
6150 : /* Currently, there is a high possibility to get wave ID mismatch
6151 : * between ME and GDS, leading to a hw deadlock, because ME generates
6152 : * different wave IDs than the GDS expects. This situation happens
6153 : * randomly when at least 5 compute pipes use GDS ordered append.
6154 : * The wave IDs generated by ME are also wrong after suspend/resume.
6155 : * Those are probably bugs somewhere else in the kernel driver.
6156 : *
6157 : * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6158 : * GDS to 0 for this ring (me/pipe).
6159 : */
6160 0 : if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6161 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6162 0 : amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6163 0 : amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6164 : }
6165 :
6166 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6167 0 : amdgpu_ring_write(ring,
6168 : #ifdef __BIG_ENDIAN
6169 : (2 << 0) |
6170 : #endif
6171 0 : (ib->gpu_addr & 0xFFFFFFFC));
6172 0 : amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6173 0 : amdgpu_ring_write(ring, control);
6174 0 : }
6175 :
6176 0 : static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6177 : u64 seq, unsigned flags)
6178 : {
6179 0 : bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6180 0 : bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6181 :
6182 : /* Workaround for cache flush problems. First send a dummy EOP
6183 : * event down the pipe with seq one below.
6184 : */
6185 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6186 0 : amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6187 : EOP_TC_ACTION_EN |
6188 : EOP_TC_WB_ACTION_EN |
6189 : EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6190 : EVENT_INDEX(5)));
6191 0 : amdgpu_ring_write(ring, addr & 0xfffffffc);
6192 0 : amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6193 : DATA_SEL(1) | INT_SEL(0));
6194 0 : amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6195 0 : amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6196 :
6197 : /* Then send the real EOP event down the pipe:
6198 : * EVENT_WRITE_EOP - flush caches, send int */
6199 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6200 0 : amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6201 : EOP_TC_ACTION_EN |
6202 : EOP_TC_WB_ACTION_EN |
6203 : EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6204 : EVENT_INDEX(5)));
6205 0 : amdgpu_ring_write(ring, addr & 0xfffffffc);
6206 0 : amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6207 0 : DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6208 0 : amdgpu_ring_write(ring, lower_32_bits(seq));
6209 0 : amdgpu_ring_write(ring, upper_32_bits(seq));
6210 :
6211 0 : }
6212 :
6213 0 : static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6214 : {
6215 0 : int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6216 0 : uint32_t seq = ring->fence_drv.sync_seq;
6217 0 : uint64_t addr = ring->fence_drv.gpu_addr;
6218 :
6219 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6220 0 : amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6221 0 : WAIT_REG_MEM_FUNCTION(3) | /* equal */
6222 0 : WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6223 0 : amdgpu_ring_write(ring, addr & 0xfffffffc);
6224 0 : amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6225 0 : amdgpu_ring_write(ring, seq);
6226 0 : amdgpu_ring_write(ring, 0xffffffff);
6227 0 : amdgpu_ring_write(ring, 4); /* poll interval */
6228 0 : }
6229 :
6230 0 : static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6231 : unsigned vmid, uint64_t pd_addr)
6232 : {
6233 0 : int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6234 :
6235 0 : amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6236 :
6237 : /* wait for the invalidate to complete */
6238 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6239 0 : amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6240 : WAIT_REG_MEM_FUNCTION(0) | /* always */
6241 : WAIT_REG_MEM_ENGINE(0))); /* me */
6242 0 : amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6243 0 : amdgpu_ring_write(ring, 0);
6244 0 : amdgpu_ring_write(ring, 0); /* ref */
6245 0 : amdgpu_ring_write(ring, 0); /* mask */
6246 0 : amdgpu_ring_write(ring, 0x20); /* poll interval */
6247 :
6248 : /* compute doesn't have PFP */
6249 0 : if (usepfp) {
6250 : /* sync PFP to ME, otherwise we might get invalid PFP reads */
6251 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6252 0 : amdgpu_ring_write(ring, 0x0);
6253 : }
6254 0 : }
6255 :
6256 0 : static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6257 : {
6258 0 : return *ring->wptr_cpu_addr;
6259 : }
6260 :
6261 0 : static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6262 : {
6263 0 : struct amdgpu_device *adev = ring->adev;
6264 :
6265 : /* XXX check if swapping is necessary on BE */
6266 0 : *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
6267 0 : WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6268 0 : }
6269 :
6270 0 : static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6271 : u64 addr, u64 seq,
6272 : unsigned flags)
6273 : {
6274 0 : bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6275 0 : bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6276 :
6277 : /* RELEASE_MEM - flush caches, send int */
6278 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6279 0 : amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6280 : EOP_TC_ACTION_EN |
6281 : EOP_TC_WB_ACTION_EN |
6282 : EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6283 : EVENT_INDEX(5)));
6284 0 : amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6285 0 : amdgpu_ring_write(ring, addr & 0xfffffffc);
6286 0 : amdgpu_ring_write(ring, upper_32_bits(addr));
6287 0 : amdgpu_ring_write(ring, lower_32_bits(seq));
6288 0 : amdgpu_ring_write(ring, upper_32_bits(seq));
6289 0 : }
6290 :
6291 0 : static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6292 : u64 seq, unsigned int flags)
6293 : {
6294 : /* we only allocate 32bit for each seq wb address */
6295 0 : BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6296 :
6297 : /* write fence seq to the "addr" */
6298 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6299 0 : amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6300 : WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6301 0 : amdgpu_ring_write(ring, lower_32_bits(addr));
6302 0 : amdgpu_ring_write(ring, upper_32_bits(addr));
6303 0 : amdgpu_ring_write(ring, lower_32_bits(seq));
6304 :
6305 0 : if (flags & AMDGPU_FENCE_FLAG_INT) {
6306 : /* set register to trigger INT */
6307 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6308 0 : amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6309 : WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6310 0 : amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6311 0 : amdgpu_ring_write(ring, 0);
6312 0 : amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6313 : }
6314 0 : }
6315 :
6316 0 : static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6317 : {
6318 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6319 0 : amdgpu_ring_write(ring, 0);
6320 0 : }
6321 :
6322 0 : static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6323 : {
6324 0 : uint32_t dw2 = 0;
6325 :
6326 0 : if (amdgpu_sriov_vf(ring->adev))
6327 0 : gfx_v8_0_ring_emit_ce_meta(ring);
6328 :
6329 0 : dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6330 0 : if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6331 0 : gfx_v8_0_ring_emit_vgt_flush(ring);
6332 : /* set load_global_config & load_global_uconfig */
6333 0 : dw2 |= 0x8001;
6334 : /* set load_cs_sh_regs */
6335 0 : dw2 |= 0x01000000;
6336 : /* set load_per_context_state & load_gfx_sh_regs for GFX */
6337 0 : dw2 |= 0x10002;
6338 :
6339 : /* set load_ce_ram if preamble presented */
6340 0 : if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6341 0 : dw2 |= 0x10000000;
6342 : } else {
6343 : /* still load_ce_ram if this is the first time preamble presented
6344 : * although there is no context switch happens.
6345 : */
6346 0 : if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6347 0 : dw2 |= 0x10000000;
6348 : }
6349 :
6350 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6351 0 : amdgpu_ring_write(ring, dw2);
6352 0 : amdgpu_ring_write(ring, 0);
6353 0 : }
6354 :
6355 0 : static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6356 : {
6357 : unsigned ret;
6358 :
6359 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6360 0 : amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6361 0 : amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6362 0 : amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6363 0 : ret = ring->wptr & ring->buf_mask;
6364 0 : amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6365 0 : return ret;
6366 : }
6367 :
6368 0 : static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6369 : {
6370 : unsigned cur;
6371 :
6372 0 : BUG_ON(offset > ring->buf_mask);
6373 0 : BUG_ON(ring->ring[offset] != 0x55aa55aa);
6374 :
6375 0 : cur = (ring->wptr & ring->buf_mask) - 1;
6376 0 : if (likely(cur > offset))
6377 0 : ring->ring[offset] = cur - offset;
6378 : else
6379 0 : ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6380 0 : }
6381 :
6382 0 : static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6383 : uint32_t reg_val_offs)
6384 : {
6385 0 : struct amdgpu_device *adev = ring->adev;
6386 :
6387 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6388 0 : amdgpu_ring_write(ring, 0 | /* src: register*/
6389 : (5 << 8) | /* dst: memory */
6390 : (1 << 20)); /* write confirm */
6391 0 : amdgpu_ring_write(ring, reg);
6392 0 : amdgpu_ring_write(ring, 0);
6393 0 : amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6394 : reg_val_offs * 4));
6395 0 : amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6396 : reg_val_offs * 4));
6397 0 : }
6398 :
6399 0 : static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6400 : uint32_t val)
6401 : {
6402 : uint32_t cmd;
6403 :
6404 0 : switch (ring->funcs->type) {
6405 : case AMDGPU_RING_TYPE_GFX:
6406 : cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6407 : break;
6408 : case AMDGPU_RING_TYPE_KIQ:
6409 0 : cmd = 1 << 16; /* no inc addr */
6410 0 : break;
6411 : default:
6412 0 : cmd = WR_CONFIRM;
6413 0 : break;
6414 : }
6415 :
6416 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6417 0 : amdgpu_ring_write(ring, cmd);
6418 0 : amdgpu_ring_write(ring, reg);
6419 0 : amdgpu_ring_write(ring, 0);
6420 0 : amdgpu_ring_write(ring, val);
6421 0 : }
6422 :
6423 0 : static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6424 : {
6425 0 : struct amdgpu_device *adev = ring->adev;
6426 0 : uint32_t value = 0;
6427 :
6428 0 : value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6429 0 : value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6430 0 : value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6431 0 : value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6432 0 : WREG32(mmSQ_CMD, value);
6433 0 : }
6434 :
6435 0 : static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6436 : enum amdgpu_interrupt_state state)
6437 : {
6438 0 : WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6439 : state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6440 0 : }
6441 :
6442 0 : static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6443 : int me, int pipe,
6444 : enum amdgpu_interrupt_state state)
6445 : {
6446 : u32 mec_int_cntl, mec_int_cntl_reg;
6447 :
6448 : /*
6449 : * amdgpu controls only the first MEC. That's why this function only
6450 : * handles the setting of interrupts for this specific MEC. All other
6451 : * pipes' interrupts are set by amdkfd.
6452 : */
6453 :
6454 0 : if (me == 1) {
6455 0 : switch (pipe) {
6456 : case 0:
6457 : mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6458 : break;
6459 : case 1:
6460 0 : mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6461 0 : break;
6462 : case 2:
6463 0 : mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6464 0 : break;
6465 : case 3:
6466 0 : mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6467 0 : break;
6468 : default:
6469 0 : DRM_DEBUG("invalid pipe %d\n", pipe);
6470 0 : return;
6471 : }
6472 : } else {
6473 0 : DRM_DEBUG("invalid me %d\n", me);
6474 0 : return;
6475 : }
6476 :
6477 0 : switch (state) {
6478 : case AMDGPU_IRQ_STATE_DISABLE:
6479 0 : mec_int_cntl = RREG32(mec_int_cntl_reg);
6480 0 : mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6481 0 : WREG32(mec_int_cntl_reg, mec_int_cntl);
6482 0 : break;
6483 : case AMDGPU_IRQ_STATE_ENABLE:
6484 0 : mec_int_cntl = RREG32(mec_int_cntl_reg);
6485 0 : mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6486 0 : WREG32(mec_int_cntl_reg, mec_int_cntl);
6487 0 : break;
6488 : default:
6489 : break;
6490 : }
6491 : }
6492 :
6493 0 : static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6494 : struct amdgpu_irq_src *source,
6495 : unsigned type,
6496 : enum amdgpu_interrupt_state state)
6497 : {
6498 0 : WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6499 : state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6500 :
6501 0 : return 0;
6502 : }
6503 :
6504 0 : static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6505 : struct amdgpu_irq_src *source,
6506 : unsigned type,
6507 : enum amdgpu_interrupt_state state)
6508 : {
6509 0 : WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6510 : state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6511 :
6512 0 : return 0;
6513 : }
6514 :
6515 0 : static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6516 : struct amdgpu_irq_src *src,
6517 : unsigned type,
6518 : enum amdgpu_interrupt_state state)
6519 : {
6520 0 : switch (type) {
6521 : case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6522 0 : gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6523 0 : break;
6524 : case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6525 0 : gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6526 0 : break;
6527 : case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6528 0 : gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6529 0 : break;
6530 : case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6531 0 : gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6532 0 : break;
6533 : case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6534 0 : gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6535 0 : break;
6536 : case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6537 : gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6538 : break;
6539 : case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6540 : gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6541 : break;
6542 : case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6543 : gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6544 : break;
6545 : case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6546 : gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6547 : break;
6548 : default:
6549 : break;
6550 : }
6551 0 : return 0;
6552 : }
6553 :
6554 0 : static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6555 : struct amdgpu_irq_src *source,
6556 : unsigned int type,
6557 : enum amdgpu_interrupt_state state)
6558 : {
6559 : int enable_flag;
6560 :
6561 0 : switch (state) {
6562 : case AMDGPU_IRQ_STATE_DISABLE:
6563 : enable_flag = 0;
6564 : break;
6565 :
6566 : case AMDGPU_IRQ_STATE_ENABLE:
6567 0 : enable_flag = 1;
6568 0 : break;
6569 :
6570 : default:
6571 : return -EINVAL;
6572 : }
6573 :
6574 0 : WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6575 0 : WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6576 0 : WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6577 0 : WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6578 0 : WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6579 0 : WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6580 : enable_flag);
6581 0 : WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6582 : enable_flag);
6583 0 : WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6584 : enable_flag);
6585 0 : WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6586 : enable_flag);
6587 0 : WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6588 : enable_flag);
6589 0 : WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6590 : enable_flag);
6591 0 : WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6592 : enable_flag);
6593 0 : WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6594 : enable_flag);
6595 :
6596 0 : return 0;
6597 : }
6598 :
6599 0 : static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6600 : struct amdgpu_irq_src *source,
6601 : unsigned int type,
6602 : enum amdgpu_interrupt_state state)
6603 : {
6604 : int enable_flag;
6605 :
6606 0 : switch (state) {
6607 : case AMDGPU_IRQ_STATE_DISABLE:
6608 : enable_flag = 1;
6609 : break;
6610 :
6611 : case AMDGPU_IRQ_STATE_ENABLE:
6612 0 : enable_flag = 0;
6613 0 : break;
6614 :
6615 : default:
6616 : return -EINVAL;
6617 : }
6618 :
6619 0 : WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6620 : enable_flag);
6621 :
6622 0 : return 0;
6623 : }
6624 :
6625 0 : static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6626 : struct amdgpu_irq_src *source,
6627 : struct amdgpu_iv_entry *entry)
6628 : {
6629 : int i;
6630 : u8 me_id, pipe_id, queue_id;
6631 : struct amdgpu_ring *ring;
6632 :
6633 0 : DRM_DEBUG("IH: CP EOP\n");
6634 0 : me_id = (entry->ring_id & 0x0c) >> 2;
6635 0 : pipe_id = (entry->ring_id & 0x03) >> 0;
6636 0 : queue_id = (entry->ring_id & 0x70) >> 4;
6637 :
6638 0 : switch (me_id) {
6639 : case 0:
6640 0 : amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6641 0 : break;
6642 : case 1:
6643 : case 2:
6644 0 : for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6645 0 : ring = &adev->gfx.compute_ring[i];
6646 : /* Per-queue interrupt is supported for MEC starting from VI.
6647 : * The interrupt can only be enabled/disabled per pipe instead of per queue.
6648 : */
6649 0 : if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6650 0 : amdgpu_fence_process(ring);
6651 : }
6652 : break;
6653 : }
6654 0 : return 0;
6655 : }
6656 :
6657 0 : static void gfx_v8_0_fault(struct amdgpu_device *adev,
6658 : struct amdgpu_iv_entry *entry)
6659 : {
6660 : u8 me_id, pipe_id, queue_id;
6661 : struct amdgpu_ring *ring;
6662 : int i;
6663 :
6664 0 : me_id = (entry->ring_id & 0x0c) >> 2;
6665 0 : pipe_id = (entry->ring_id & 0x03) >> 0;
6666 0 : queue_id = (entry->ring_id & 0x70) >> 4;
6667 :
6668 0 : switch (me_id) {
6669 : case 0:
6670 0 : drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6671 : break;
6672 : case 1:
6673 : case 2:
6674 0 : for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6675 0 : ring = &adev->gfx.compute_ring[i];
6676 0 : if (ring->me == me_id && ring->pipe == pipe_id &&
6677 0 : ring->queue == queue_id)
6678 0 : drm_sched_fault(&ring->sched);
6679 : }
6680 : break;
6681 : }
6682 0 : }
6683 :
6684 0 : static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6685 : struct amdgpu_irq_src *source,
6686 : struct amdgpu_iv_entry *entry)
6687 : {
6688 0 : DRM_ERROR("Illegal register access in command stream\n");
6689 0 : gfx_v8_0_fault(adev, entry);
6690 0 : return 0;
6691 : }
6692 :
6693 0 : static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6694 : struct amdgpu_irq_src *source,
6695 : struct amdgpu_iv_entry *entry)
6696 : {
6697 0 : DRM_ERROR("Illegal instruction in command stream\n");
6698 0 : gfx_v8_0_fault(adev, entry);
6699 0 : return 0;
6700 : }
6701 :
6702 0 : static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6703 : struct amdgpu_irq_src *source,
6704 : struct amdgpu_iv_entry *entry)
6705 : {
6706 0 : DRM_ERROR("CP EDC/ECC error detected.");
6707 0 : return 0;
6708 : }
6709 :
6710 0 : static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data,
6711 : bool from_wq)
6712 : {
6713 : u32 enc, se_id, sh_id, cu_id;
6714 : char type[20];
6715 0 : int sq_edc_source = -1;
6716 :
6717 0 : enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6718 0 : se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6719 :
6720 0 : switch (enc) {
6721 : case 0:
6722 0 : DRM_INFO("SQ general purpose intr detected:"
6723 : "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6724 : "host_cmd_overflow %d, cmd_timestamp %d,"
6725 : "reg_timestamp %d, thread_trace_buff_full %d,"
6726 : "wlt %d, thread_trace %d.\n",
6727 : se_id,
6728 : REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6729 : REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6730 : REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6731 : REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6732 : REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6733 : REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6734 : REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6735 : REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6736 : );
6737 0 : break;
6738 : case 1:
6739 : case 2:
6740 :
6741 0 : cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6742 0 : sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6743 :
6744 : /*
6745 : * This function can be called either directly from ISR
6746 : * or from BH in which case we can access SQ_EDC_INFO
6747 : * instance
6748 : */
6749 0 : if (from_wq) {
6750 0 : mutex_lock(&adev->grbm_idx_mutex);
6751 0 : gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6752 :
6753 0 : sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6754 :
6755 0 : gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6756 0 : mutex_unlock(&adev->grbm_idx_mutex);
6757 : }
6758 :
6759 0 : if (enc == 1)
6760 0 : sprintf(type, "instruction intr");
6761 : else
6762 0 : sprintf(type, "EDC/ECC error");
6763 :
6764 0 : DRM_INFO(
6765 : "SQ %s detected: "
6766 : "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6767 : "trap %s, sq_ed_info.source %s.\n",
6768 : type, se_id, sh_id, cu_id,
6769 : REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6770 : REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6771 : REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6772 : REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6773 : (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6774 : );
6775 0 : break;
6776 : default:
6777 0 : DRM_ERROR("SQ invalid encoding type\n.");
6778 : }
6779 0 : }
6780 :
6781 0 : static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6782 : {
6783 :
6784 0 : struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6785 0 : struct sq_work *sq_work = container_of(work, struct sq_work, work);
6786 :
6787 0 : gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data, true);
6788 0 : }
6789 :
6790 0 : static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6791 : struct amdgpu_irq_src *source,
6792 : struct amdgpu_iv_entry *entry)
6793 : {
6794 0 : unsigned ih_data = entry->src_data[0];
6795 :
6796 : /*
6797 : * Try to submit work so SQ_EDC_INFO can be accessed from
6798 : * BH. If previous work submission hasn't finished yet
6799 : * just print whatever info is possible directly from the ISR.
6800 : */
6801 0 : if (work_pending(&adev->gfx.sq_work.work)) {
6802 0 : gfx_v8_0_parse_sq_irq(adev, ih_data, false);
6803 : } else {
6804 0 : adev->gfx.sq_work.ih_data = ih_data;
6805 0 : schedule_work(&adev->gfx.sq_work.work);
6806 : }
6807 :
6808 0 : return 0;
6809 : }
6810 :
6811 0 : static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
6812 : {
6813 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
6814 0 : amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6815 : PACKET3_TC_ACTION_ENA |
6816 : PACKET3_SH_KCACHE_ACTION_ENA |
6817 : PACKET3_SH_ICACHE_ACTION_ENA |
6818 : PACKET3_TC_WB_ACTION_ENA); /* CP_COHER_CNTL */
6819 0 : amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
6820 0 : amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6821 0 : amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
6822 0 : }
6823 :
6824 0 : static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
6825 : {
6826 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6827 0 : amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6828 : PACKET3_TC_ACTION_ENA |
6829 : PACKET3_SH_KCACHE_ACTION_ENA |
6830 : PACKET3_SH_ICACHE_ACTION_ENA |
6831 : PACKET3_TC_WB_ACTION_ENA); /* CP_COHER_CNTL */
6832 0 : amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
6833 0 : amdgpu_ring_write(ring, 0xff); /* CP_COHER_SIZE_HI */
6834 0 : amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6835 0 : amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
6836 0 : amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
6837 0 : }
6838 :
6839 :
6840 : /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6841 : #define mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT 0x0000007f
6842 0 : static void gfx_v8_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6843 : uint32_t pipe, bool enable)
6844 : {
6845 : uint32_t val;
6846 : uint32_t wcl_cs_reg;
6847 :
6848 0 : val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT;
6849 :
6850 0 : switch (pipe) {
6851 : case 0:
6852 : wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS0;
6853 : break;
6854 : case 1:
6855 0 : wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS1;
6856 0 : break;
6857 : case 2:
6858 0 : wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS2;
6859 0 : break;
6860 : case 3:
6861 0 : wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS3;
6862 0 : break;
6863 : default:
6864 0 : DRM_DEBUG("invalid pipe %d\n", pipe);
6865 0 : return;
6866 : }
6867 :
6868 0 : amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6869 :
6870 : }
6871 :
6872 : #define mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT 0x07ffffff
6873 0 : static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6874 : {
6875 0 : struct amdgpu_device *adev = ring->adev;
6876 : uint32_t val;
6877 : int i;
6878 :
6879 : /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6880 : * number of gfx waves. Setting 5 bit will make sure gfx only gets
6881 : * around 25% of gpu resources.
6882 : */
6883 0 : val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6884 0 : amdgpu_ring_emit_wreg(ring, mmSPI_WCL_PIPE_PERCENT_GFX, val);
6885 :
6886 : /* Restrict waves for normal/low priority compute queues as well
6887 : * to get best QoS for high priority compute jobs.
6888 : *
6889 : * amdgpu controls only 1st ME(0-3 CS pipes).
6890 : */
6891 0 : for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6892 0 : if (i != ring->pipe)
6893 0 : gfx_v8_0_emit_wave_limit_cs(ring, i, enable);
6894 :
6895 : }
6896 :
6897 0 : }
6898 :
6899 : static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6900 : .name = "gfx_v8_0",
6901 : .early_init = gfx_v8_0_early_init,
6902 : .late_init = gfx_v8_0_late_init,
6903 : .sw_init = gfx_v8_0_sw_init,
6904 : .sw_fini = gfx_v8_0_sw_fini,
6905 : .hw_init = gfx_v8_0_hw_init,
6906 : .hw_fini = gfx_v8_0_hw_fini,
6907 : .suspend = gfx_v8_0_suspend,
6908 : .resume = gfx_v8_0_resume,
6909 : .is_idle = gfx_v8_0_is_idle,
6910 : .wait_for_idle = gfx_v8_0_wait_for_idle,
6911 : .check_soft_reset = gfx_v8_0_check_soft_reset,
6912 : .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6913 : .soft_reset = gfx_v8_0_soft_reset,
6914 : .post_soft_reset = gfx_v8_0_post_soft_reset,
6915 : .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6916 : .set_powergating_state = gfx_v8_0_set_powergating_state,
6917 : .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6918 : };
6919 :
6920 : static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6921 : .type = AMDGPU_RING_TYPE_GFX,
6922 : .align_mask = 0xff,
6923 : .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6924 : .support_64bit_ptrs = false,
6925 : .get_rptr = gfx_v8_0_ring_get_rptr,
6926 : .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6927 : .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6928 : .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6929 : 5 + /* COND_EXEC */
6930 : 7 + /* PIPELINE_SYNC */
6931 : VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6932 : 12 + /* FENCE for VM_FLUSH */
6933 : 20 + /* GDS switch */
6934 : 4 + /* double SWITCH_BUFFER,
6935 : the first COND_EXEC jump to the place just
6936 : prior to this double SWITCH_BUFFER */
6937 : 5 + /* COND_EXEC */
6938 : 7 + /* HDP_flush */
6939 : 4 + /* VGT_flush */
6940 : 14 + /* CE_META */
6941 : 31 + /* DE_META */
6942 : 3 + /* CNTX_CTRL */
6943 : 5 + /* HDP_INVL */
6944 : 12 + 12 + /* FENCE x2 */
6945 : 2 + /* SWITCH_BUFFER */
6946 : 5, /* SURFACE_SYNC */
6947 : .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6948 : .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6949 : .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6950 : .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6951 : .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6952 : .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6953 : .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6954 : .test_ring = gfx_v8_0_ring_test_ring,
6955 : .test_ib = gfx_v8_0_ring_test_ib,
6956 : .insert_nop = amdgpu_ring_insert_nop,
6957 : .pad_ib = amdgpu_ring_generic_pad_ib,
6958 : .emit_switch_buffer = gfx_v8_ring_emit_sb,
6959 : .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6960 : .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6961 : .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6962 : .emit_wreg = gfx_v8_0_ring_emit_wreg,
6963 : .soft_recovery = gfx_v8_0_ring_soft_recovery,
6964 : .emit_mem_sync = gfx_v8_0_emit_mem_sync,
6965 : };
6966 :
6967 : static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6968 : .type = AMDGPU_RING_TYPE_COMPUTE,
6969 : .align_mask = 0xff,
6970 : .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6971 : .support_64bit_ptrs = false,
6972 : .get_rptr = gfx_v8_0_ring_get_rptr,
6973 : .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6974 : .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6975 : .emit_frame_size =
6976 : 20 + /* gfx_v8_0_ring_emit_gds_switch */
6977 : 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6978 : 5 + /* hdp_invalidate */
6979 : 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6980 : VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6981 : 7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6982 : 7 + /* gfx_v8_0_emit_mem_sync_compute */
6983 : 5 + /* gfx_v8_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6984 : 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6985 : .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6986 : .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6987 : .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6988 : .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6989 : .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6990 : .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6991 : .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6992 : .test_ring = gfx_v8_0_ring_test_ring,
6993 : .test_ib = gfx_v8_0_ring_test_ib,
6994 : .insert_nop = amdgpu_ring_insert_nop,
6995 : .pad_ib = amdgpu_ring_generic_pad_ib,
6996 : .emit_wreg = gfx_v8_0_ring_emit_wreg,
6997 : .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
6998 : .emit_wave_limit = gfx_v8_0_emit_wave_limit,
6999 : };
7000 :
7001 : static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7002 : .type = AMDGPU_RING_TYPE_KIQ,
7003 : .align_mask = 0xff,
7004 : .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7005 : .support_64bit_ptrs = false,
7006 : .get_rptr = gfx_v8_0_ring_get_rptr,
7007 : .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7008 : .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7009 : .emit_frame_size =
7010 : 20 + /* gfx_v8_0_ring_emit_gds_switch */
7011 : 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7012 : 5 + /* hdp_invalidate */
7013 : 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7014 : 17 + /* gfx_v8_0_ring_emit_vm_flush */
7015 : 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7016 : .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
7017 : .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7018 : .test_ring = gfx_v8_0_ring_test_ring,
7019 : .insert_nop = amdgpu_ring_insert_nop,
7020 : .pad_ib = amdgpu_ring_generic_pad_ib,
7021 : .emit_rreg = gfx_v8_0_ring_emit_rreg,
7022 : .emit_wreg = gfx_v8_0_ring_emit_wreg,
7023 : };
7024 :
7025 : static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7026 : {
7027 : int i;
7028 :
7029 0 : adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7030 :
7031 0 : for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7032 0 : adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7033 :
7034 0 : for (i = 0; i < adev->gfx.num_compute_rings; i++)
7035 0 : adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7036 : }
7037 :
7038 : static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7039 : .set = gfx_v8_0_set_eop_interrupt_state,
7040 : .process = gfx_v8_0_eop_irq,
7041 : };
7042 :
7043 : static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7044 : .set = gfx_v8_0_set_priv_reg_fault_state,
7045 : .process = gfx_v8_0_priv_reg_irq,
7046 : };
7047 :
7048 : static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7049 : .set = gfx_v8_0_set_priv_inst_fault_state,
7050 : .process = gfx_v8_0_priv_inst_irq,
7051 : };
7052 :
7053 : static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7054 : .set = gfx_v8_0_set_cp_ecc_int_state,
7055 : .process = gfx_v8_0_cp_ecc_error_irq,
7056 : };
7057 :
7058 : static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7059 : .set = gfx_v8_0_set_sq_int_state,
7060 : .process = gfx_v8_0_sq_irq,
7061 : };
7062 :
7063 : static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7064 : {
7065 0 : adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7066 0 : adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7067 :
7068 0 : adev->gfx.priv_reg_irq.num_types = 1;
7069 0 : adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7070 :
7071 0 : adev->gfx.priv_inst_irq.num_types = 1;
7072 0 : adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7073 :
7074 0 : adev->gfx.cp_ecc_error_irq.num_types = 1;
7075 0 : adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7076 :
7077 0 : adev->gfx.sq_irq.num_types = 1;
7078 0 : adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7079 : }
7080 :
7081 : static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7082 : {
7083 0 : adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7084 : }
7085 :
7086 0 : static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7087 : {
7088 : /* init asci gds info */
7089 0 : adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7090 0 : adev->gds.gws_size = 64;
7091 0 : adev->gds.oa_size = 16;
7092 0 : adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7093 0 : }
7094 :
7095 : static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7096 : u32 bitmap)
7097 : {
7098 : u32 data;
7099 :
7100 0 : if (!bitmap)
7101 : return;
7102 :
7103 0 : data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7104 0 : data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7105 :
7106 0 : WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7107 : }
7108 :
7109 0 : static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7110 : {
7111 : u32 data, mask;
7112 :
7113 0 : data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7114 0 : RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7115 :
7116 0 : mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7117 :
7118 0 : return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7119 : }
7120 :
7121 0 : static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7122 : {
7123 0 : int i, j, k, counter, active_cu_number = 0;
7124 0 : u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7125 0 : struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7126 : unsigned disable_masks[4 * 2];
7127 : u32 ao_cu_num;
7128 :
7129 0 : memset(cu_info, 0, sizeof(*cu_info));
7130 :
7131 0 : if (adev->flags & AMD_IS_APU)
7132 : ao_cu_num = 2;
7133 : else
7134 0 : ao_cu_num = adev->gfx.config.max_cu_per_sh;
7135 :
7136 0 : amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7137 :
7138 0 : mutex_lock(&adev->grbm_idx_mutex);
7139 0 : for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7140 0 : for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7141 0 : mask = 1;
7142 0 : ao_bitmap = 0;
7143 0 : counter = 0;
7144 0 : gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7145 0 : if (i < 4 && j < 2)
7146 0 : gfx_v8_0_set_user_cu_inactive_bitmap(
7147 0 : adev, disable_masks[i * 2 + j]);
7148 0 : bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7149 0 : cu_info->bitmap[i][j] = bitmap;
7150 :
7151 0 : for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7152 0 : if (bitmap & mask) {
7153 0 : if (counter < ao_cu_num)
7154 0 : ao_bitmap |= mask;
7155 0 : counter ++;
7156 : }
7157 0 : mask <<= 1;
7158 : }
7159 0 : active_cu_number += counter;
7160 0 : if (i < 2 && j < 2)
7161 0 : ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7162 0 : cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7163 : }
7164 : }
7165 0 : gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7166 0 : mutex_unlock(&adev->grbm_idx_mutex);
7167 :
7168 0 : cu_info->number = active_cu_number;
7169 0 : cu_info->ao_cu_mask = ao_cu_mask;
7170 0 : cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7171 0 : cu_info->max_waves_per_simd = 10;
7172 0 : cu_info->max_scratch_slots_per_cu = 32;
7173 0 : cu_info->wave_front_size = 64;
7174 0 : cu_info->lds_size = 64;
7175 0 : }
7176 :
7177 : const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7178 : {
7179 : .type = AMD_IP_BLOCK_TYPE_GFX,
7180 : .major = 8,
7181 : .minor = 0,
7182 : .rev = 0,
7183 : .funcs = &gfx_v8_0_ip_funcs,
7184 : };
7185 :
7186 : const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7187 : {
7188 : .type = AMD_IP_BLOCK_TYPE_GFX,
7189 : .major = 8,
7190 : .minor = 1,
7191 : .rev = 0,
7192 : .funcs = &gfx_v8_0_ip_funcs,
7193 : };
7194 :
7195 0 : static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7196 : {
7197 : uint64_t ce_payload_addr;
7198 : int cnt_ce;
7199 : union {
7200 : struct vi_ce_ib_state regular;
7201 : struct vi_ce_ib_state_chained_ib chained;
7202 0 : } ce_payload = {};
7203 :
7204 0 : if (ring->adev->virt.chained_ib_support) {
7205 0 : ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7206 : offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7207 0 : cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7208 : } else {
7209 0 : ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7210 : offsetof(struct vi_gfx_meta_data, ce_payload);
7211 0 : cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7212 : }
7213 :
7214 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7215 0 : amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7216 : WRITE_DATA_DST_SEL(8) |
7217 : WR_CONFIRM) |
7218 : WRITE_DATA_CACHE_POLICY(0));
7219 0 : amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7220 0 : amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7221 0 : amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7222 0 : }
7223 :
7224 0 : static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7225 : {
7226 : uint64_t de_payload_addr, gds_addr, csa_addr;
7227 : int cnt_de;
7228 : union {
7229 : struct vi_de_ib_state regular;
7230 : struct vi_de_ib_state_chained_ib chained;
7231 0 : } de_payload = {};
7232 :
7233 0 : csa_addr = amdgpu_csa_vaddr(ring->adev);
7234 0 : gds_addr = csa_addr + 4096;
7235 0 : if (ring->adev->virt.chained_ib_support) {
7236 0 : de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7237 0 : de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7238 0 : de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7239 0 : cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7240 : } else {
7241 0 : de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7242 0 : de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7243 0 : de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7244 0 : cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7245 : }
7246 :
7247 0 : amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7248 0 : amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7249 : WRITE_DATA_DST_SEL(8) |
7250 : WR_CONFIRM) |
7251 : WRITE_DATA_CACHE_POLICY(0));
7252 0 : amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7253 0 : amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7254 0 : amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7255 0 : }
|