Line data Source code
1 : /*
2 : * Copyright 2014 Advanced Micro Devices, Inc.
3 : * Copyright 2008 Red Hat Inc.
4 : * Copyright 2009 Jerome Glisse.
5 : *
6 : * Permission is hereby granted, free of charge, to any person obtaining a
7 : * copy of this software and associated documentation files (the "Software"),
8 : * to deal in the Software without restriction, including without limitation
9 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 : * and/or sell copies of the Software, and to permit persons to whom the
11 : * Software is furnished to do so, subject to the following conditions:
12 : *
13 : * The above copyright notice and this permission notice shall be included in
14 : * all copies or substantial portions of the Software.
15 : *
16 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 : * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 : * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 : * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 : * OTHER DEALINGS IN THE SOFTWARE.
23 : *
24 : */
25 :
26 : #include "amdgpu.h"
27 : #include "amdgpu_gfx.h"
28 : #include "amdgpu_rlc.h"
29 : #include "amdgpu_ras.h"
30 :
31 : /* delay 0.1 second to enable gfx off feature */
32 : #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
33 :
34 : #define GFX_OFF_NO_DELAY 0
35 :
36 : /*
37 : * GPU GFX IP block helpers function.
38 : */
39 :
40 0 : int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
41 : int pipe, int queue)
42 : {
43 0 : int bit = 0;
44 :
45 0 : bit += mec * adev->gfx.mec.num_pipe_per_mec
46 0 : * adev->gfx.mec.num_queue_per_pipe;
47 0 : bit += pipe * adev->gfx.mec.num_queue_per_pipe;
48 0 : bit += queue;
49 :
50 0 : return bit;
51 : }
52 :
53 0 : void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
54 : int *mec, int *pipe, int *queue)
55 : {
56 0 : *queue = bit % adev->gfx.mec.num_queue_per_pipe;
57 0 : *pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
58 0 : % adev->gfx.mec.num_pipe_per_mec;
59 0 : *mec = (bit / adev->gfx.mec.num_queue_per_pipe)
60 0 : / adev->gfx.mec.num_pipe_per_mec;
61 :
62 0 : }
63 :
64 0 : bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
65 : int mec, int pipe, int queue)
66 : {
67 0 : return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
68 0 : adev->gfx.mec.queue_bitmap);
69 : }
70 :
71 0 : int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
72 : int me, int pipe, int queue)
73 : {
74 0 : int bit = 0;
75 :
76 0 : bit += me * adev->gfx.me.num_pipe_per_me
77 0 : * adev->gfx.me.num_queue_per_pipe;
78 0 : bit += pipe * adev->gfx.me.num_queue_per_pipe;
79 0 : bit += queue;
80 :
81 0 : return bit;
82 : }
83 :
84 0 : void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
85 : int *me, int *pipe, int *queue)
86 : {
87 0 : *queue = bit % adev->gfx.me.num_queue_per_pipe;
88 0 : *pipe = (bit / adev->gfx.me.num_queue_per_pipe)
89 0 : % adev->gfx.me.num_pipe_per_me;
90 0 : *me = (bit / adev->gfx.me.num_queue_per_pipe)
91 0 : / adev->gfx.me.num_pipe_per_me;
92 0 : }
93 :
94 0 : bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
95 : int me, int pipe, int queue)
96 : {
97 0 : return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue),
98 0 : adev->gfx.me.queue_bitmap);
99 : }
100 :
101 : /**
102 : * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
103 : *
104 : * @mask: array in which the per-shader array disable masks will be stored
105 : * @max_se: number of SEs
106 : * @max_sh: number of SHs
107 : *
108 : * The bitmask of CUs to be disabled in the shader array determined by se and
109 : * sh is stored in mask[se * max_sh + sh].
110 : */
111 0 : void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh)
112 : {
113 : unsigned se, sh, cu;
114 : const char *p;
115 :
116 0 : memset(mask, 0, sizeof(*mask) * max_se * max_sh);
117 :
118 0 : if (!amdgpu_disable_cu || !*amdgpu_disable_cu)
119 0 : return;
120 :
121 : p = amdgpu_disable_cu;
122 0 : for (;;) {
123 : char *next;
124 0 : int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
125 0 : if (ret < 3) {
126 0 : DRM_ERROR("amdgpu: could not parse disable_cu\n");
127 0 : return;
128 : }
129 :
130 0 : if (se < max_se && sh < max_sh && cu < 16) {
131 0 : DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
132 0 : mask[se * max_sh + sh] |= 1u << cu;
133 : } else {
134 0 : DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
135 : se, sh, cu);
136 : }
137 :
138 0 : next = strchr(p, ',');
139 0 : if (!next)
140 : break;
141 0 : p = next + 1;
142 : }
143 : }
144 :
145 : static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev)
146 : {
147 0 : return amdgpu_async_gfx_ring && adev->gfx.me.num_pipe_per_me > 1;
148 : }
149 :
150 0 : static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev)
151 : {
152 0 : if (amdgpu_compute_multipipe != -1) {
153 0 : DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
154 : amdgpu_compute_multipipe);
155 0 : return amdgpu_compute_multipipe == 1;
156 : }
157 :
158 : /* FIXME: spreading the queues across pipes causes perf regressions
159 : * on POLARIS11 compute workloads */
160 0 : if (adev->asic_type == CHIP_POLARIS11)
161 : return false;
162 :
163 0 : return adev->gfx.mec.num_mec > 1;
164 : }
165 :
166 0 : bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
167 : struct amdgpu_ring *ring)
168 : {
169 0 : int queue = ring->queue;
170 0 : int pipe = ring->pipe;
171 :
172 : /* Policy: use pipe1 queue0 as high priority graphics queue if we
173 : * have more than one gfx pipe.
174 : */
175 0 : if (amdgpu_gfx_is_graphics_multipipe_capable(adev) &&
176 0 : adev->gfx.num_gfx_rings > 1 && pipe == 1 && queue == 0) {
177 0 : int me = ring->me;
178 : int bit;
179 :
180 0 : bit = amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue);
181 0 : if (ring == &adev->gfx.gfx_ring[bit])
182 : return true;
183 : }
184 :
185 : return false;
186 : }
187 :
188 0 : bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
189 : struct amdgpu_ring *ring)
190 : {
191 : /* Policy: use 1st queue as high priority compute queue if we
192 : * have more than one compute queue.
193 : */
194 0 : if (adev->gfx.num_compute_rings > 1 &&
195 0 : ring == &adev->gfx.compute_ring[0])
196 : return true;
197 :
198 0 : return false;
199 : }
200 :
201 0 : void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
202 : {
203 : int i, queue, pipe;
204 0 : bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev);
205 0 : int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
206 : adev->gfx.mec.num_queue_per_pipe,
207 : adev->gfx.num_compute_rings);
208 :
209 0 : if (multipipe_policy) {
210 : /* policy: make queues evenly cross all pipes on MEC1 only */
211 0 : for (i = 0; i < max_queues_per_mec; i++) {
212 0 : pipe = i % adev->gfx.mec.num_pipe_per_mec;
213 0 : queue = (i / adev->gfx.mec.num_pipe_per_mec) %
214 0 : adev->gfx.mec.num_queue_per_pipe;
215 :
216 0 : set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
217 0 : adev->gfx.mec.queue_bitmap);
218 : }
219 : } else {
220 : /* policy: amdgpu owns all queues in the given pipe */
221 0 : for (i = 0; i < max_queues_per_mec; ++i)
222 0 : set_bit(i, adev->gfx.mec.queue_bitmap);
223 : }
224 :
225 : dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
226 0 : }
227 :
228 0 : void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
229 : {
230 : int i, queue, pipe;
231 0 : bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev);
232 0 : int max_queues_per_me = adev->gfx.me.num_pipe_per_me *
233 0 : adev->gfx.me.num_queue_per_pipe;
234 :
235 0 : if (multipipe_policy) {
236 : /* policy: amdgpu owns the first queue per pipe at this stage
237 : * will extend to mulitple queues per pipe later */
238 0 : for (i = 0; i < max_queues_per_me; i++) {
239 0 : pipe = i % adev->gfx.me.num_pipe_per_me;
240 0 : queue = (i / adev->gfx.me.num_pipe_per_me) %
241 0 : adev->gfx.me.num_queue_per_pipe;
242 :
243 0 : set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue,
244 0 : adev->gfx.me.queue_bitmap);
245 : }
246 : } else {
247 0 : for (i = 0; i < max_queues_per_me; ++i)
248 0 : set_bit(i, adev->gfx.me.queue_bitmap);
249 : }
250 :
251 : /* update the number of active graphics rings */
252 0 : adev->gfx.num_gfx_rings =
253 0 : bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
254 0 : }
255 :
256 0 : static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
257 : struct amdgpu_ring *ring)
258 : {
259 : int queue_bit;
260 : int mec, pipe, queue;
261 :
262 0 : queue_bit = adev->gfx.mec.num_mec
263 0 : * adev->gfx.mec.num_pipe_per_mec
264 0 : * adev->gfx.mec.num_queue_per_pipe;
265 :
266 0 : while (--queue_bit >= 0) {
267 0 : if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap))
268 0 : continue;
269 :
270 0 : amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
271 :
272 : /*
273 : * 1. Using pipes 2/3 from MEC 2 seems cause problems.
274 : * 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN
275 : * only can be issued on queue 0.
276 : */
277 0 : if ((mec == 1 && pipe > 1) || queue != 0)
278 0 : continue;
279 :
280 0 : ring->me = mec + 1;
281 0 : ring->pipe = pipe;
282 0 : ring->queue = queue;
283 :
284 0 : return 0;
285 : }
286 :
287 0 : dev_err(adev->dev, "Failed to find a queue for KIQ\n");
288 0 : return -EINVAL;
289 : }
290 :
291 0 : int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
292 : struct amdgpu_ring *ring,
293 : struct amdgpu_irq_src *irq)
294 : {
295 0 : struct amdgpu_kiq *kiq = &adev->gfx.kiq;
296 0 : int r = 0;
297 :
298 0 : spin_lock_init(&kiq->ring_lock);
299 :
300 0 : ring->adev = NULL;
301 0 : ring->ring_obj = NULL;
302 0 : ring->use_doorbell = true;
303 0 : ring->doorbell_index = adev->doorbell_index.kiq;
304 :
305 0 : r = amdgpu_gfx_kiq_acquire(adev, ring);
306 0 : if (r)
307 : return r;
308 :
309 0 : ring->eop_gpu_addr = kiq->eop_gpu_addr;
310 0 : ring->no_scheduler = true;
311 0 : sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue);
312 0 : r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
313 : AMDGPU_RING_PRIO_DEFAULT, NULL);
314 0 : if (r)
315 0 : dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
316 :
317 : return r;
318 : }
319 :
320 0 : void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
321 : {
322 0 : amdgpu_ring_fini(ring);
323 0 : }
324 :
325 0 : void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev)
326 : {
327 0 : struct amdgpu_kiq *kiq = &adev->gfx.kiq;
328 :
329 0 : amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
330 0 : }
331 :
332 0 : int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
333 : unsigned hpd_size)
334 : {
335 : int r;
336 : u32 *hpd;
337 0 : struct amdgpu_kiq *kiq = &adev->gfx.kiq;
338 :
339 0 : r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
340 : AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
341 : &kiq->eop_gpu_addr, (void **)&hpd);
342 0 : if (r) {
343 0 : dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
344 0 : return r;
345 : }
346 :
347 0 : memset(hpd, 0, hpd_size);
348 :
349 0 : r = amdgpu_bo_reserve(kiq->eop_obj, true);
350 0 : if (unlikely(r != 0))
351 0 : dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
352 0 : amdgpu_bo_kunmap(kiq->eop_obj);
353 0 : amdgpu_bo_unreserve(kiq->eop_obj);
354 :
355 0 : return 0;
356 : }
357 :
358 : /* create MQD for each compute/gfx queue */
359 0 : int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
360 : unsigned mqd_size)
361 : {
362 0 : struct amdgpu_ring *ring = NULL;
363 : int r, i;
364 :
365 : /* create MQD for KIQ */
366 0 : ring = &adev->gfx.kiq.ring;
367 0 : if (!adev->enable_mes_kiq && !ring->mqd_obj) {
368 : /* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
369 : * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
370 : * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
371 : * KIQ MQD no matter SRIOV or Bare-metal
372 : */
373 0 : r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
374 : AMDGPU_GEM_DOMAIN_VRAM, &ring->mqd_obj,
375 0 : &ring->mqd_gpu_addr, &ring->mqd_ptr);
376 0 : if (r) {
377 0 : dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
378 0 : return r;
379 : }
380 :
381 : /* prepare MQD backup */
382 0 : adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(mqd_size, GFP_KERNEL);
383 0 : if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
384 0 : dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
385 : }
386 :
387 0 : if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
388 : /* create MQD for each KGQ */
389 0 : for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
390 0 : ring = &adev->gfx.gfx_ring[i];
391 0 : if (!ring->mqd_obj) {
392 0 : r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
393 : AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
394 0 : &ring->mqd_gpu_addr, &ring->mqd_ptr);
395 0 : if (r) {
396 0 : dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
397 0 : return r;
398 : }
399 :
400 : /* prepare MQD backup */
401 0 : adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
402 0 : if (!adev->gfx.me.mqd_backup[i])
403 0 : dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
404 : }
405 : }
406 : }
407 :
408 : /* create MQD for each KCQ */
409 0 : for (i = 0; i < adev->gfx.num_compute_rings; i++) {
410 0 : ring = &adev->gfx.compute_ring[i];
411 0 : if (!ring->mqd_obj) {
412 0 : r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
413 : AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
414 0 : &ring->mqd_gpu_addr, &ring->mqd_ptr);
415 0 : if (r) {
416 0 : dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
417 0 : return r;
418 : }
419 :
420 : /* prepare MQD backup */
421 0 : adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
422 0 : if (!adev->gfx.mec.mqd_backup[i])
423 0 : dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
424 : }
425 : }
426 :
427 : return 0;
428 : }
429 :
430 0 : void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
431 : {
432 0 : struct amdgpu_ring *ring = NULL;
433 : int i;
434 :
435 0 : if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
436 0 : for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
437 0 : ring = &adev->gfx.gfx_ring[i];
438 0 : kfree(adev->gfx.me.mqd_backup[i]);
439 0 : amdgpu_bo_free_kernel(&ring->mqd_obj,
440 0 : &ring->mqd_gpu_addr,
441 : &ring->mqd_ptr);
442 : }
443 : }
444 :
445 0 : for (i = 0; i < adev->gfx.num_compute_rings; i++) {
446 0 : ring = &adev->gfx.compute_ring[i];
447 0 : kfree(adev->gfx.mec.mqd_backup[i]);
448 0 : amdgpu_bo_free_kernel(&ring->mqd_obj,
449 0 : &ring->mqd_gpu_addr,
450 : &ring->mqd_ptr);
451 : }
452 :
453 0 : ring = &adev->gfx.kiq.ring;
454 0 : kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
455 0 : amdgpu_bo_free_kernel(&ring->mqd_obj,
456 0 : &ring->mqd_gpu_addr,
457 : &ring->mqd_ptr);
458 0 : }
459 :
460 0 : int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
461 : {
462 0 : struct amdgpu_kiq *kiq = &adev->gfx.kiq;
463 0 : struct amdgpu_ring *kiq_ring = &kiq->ring;
464 0 : int i, r = 0;
465 :
466 0 : if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
467 : return -EINVAL;
468 :
469 0 : spin_lock(&adev->gfx.kiq.ring_lock);
470 0 : if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
471 0 : adev->gfx.num_compute_rings)) {
472 0 : spin_unlock(&adev->gfx.kiq.ring_lock);
473 0 : return -ENOMEM;
474 : }
475 :
476 0 : for (i = 0; i < adev->gfx.num_compute_rings; i++)
477 0 : kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i],
478 : RESET_QUEUES, 0, 0);
479 :
480 0 : if (adev->gfx.kiq.ring.sched.ready && !adev->job_hang)
481 0 : r = amdgpu_ring_test_helper(kiq_ring);
482 0 : spin_unlock(&adev->gfx.kiq.ring_lock);
483 :
484 0 : return r;
485 : }
486 :
487 0 : int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
488 : int queue_bit)
489 : {
490 : int mec, pipe, queue;
491 0 : int set_resource_bit = 0;
492 :
493 0 : amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
494 :
495 0 : set_resource_bit = mec * 4 * 8 + pipe * 8 + queue;
496 :
497 0 : return set_resource_bit;
498 : }
499 :
500 0 : int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
501 : {
502 0 : struct amdgpu_kiq *kiq = &adev->gfx.kiq;
503 0 : struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
504 0 : uint64_t queue_mask = 0;
505 : int r, i;
506 :
507 0 : if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
508 : return -EINVAL;
509 :
510 0 : for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
511 0 : if (!test_bit(i, adev->gfx.mec.queue_bitmap))
512 0 : continue;
513 :
514 : /* This situation may be hit in the future if a new HW
515 : * generation exposes more than 64 queues. If so, the
516 : * definition of queue_mask needs updating */
517 0 : if (WARN_ON(i > (sizeof(queue_mask)*8))) {
518 0 : DRM_ERROR("Invalid KCQ enabled: %d\n", i);
519 0 : break;
520 : }
521 :
522 0 : queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i));
523 : }
524 :
525 0 : DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
526 : kiq_ring->queue);
527 0 : spin_lock(&adev->gfx.kiq.ring_lock);
528 0 : r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
529 0 : adev->gfx.num_compute_rings +
530 0 : kiq->pmf->set_resources_size);
531 0 : if (r) {
532 0 : DRM_ERROR("Failed to lock KIQ (%d).\n", r);
533 0 : spin_unlock(&adev->gfx.kiq.ring_lock);
534 0 : return r;
535 : }
536 :
537 0 : if (adev->enable_mes)
538 0 : queue_mask = ~0ULL;
539 :
540 0 : kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
541 0 : for (i = 0; i < adev->gfx.num_compute_rings; i++)
542 0 : kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]);
543 :
544 0 : r = amdgpu_ring_test_helper(kiq_ring);
545 0 : spin_unlock(&adev->gfx.kiq.ring_lock);
546 0 : if (r)
547 0 : DRM_ERROR("KCQ enable failed\n");
548 :
549 : return r;
550 : }
551 :
552 : /* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
553 : *
554 : * @adev: amdgpu_device pointer
555 : * @bool enable true: enable gfx off feature, false: disable gfx off feature
556 : *
557 : * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled.
558 : * 2. other client can send request to disable gfx off feature, the request should be honored.
559 : * 3. other client can cancel their request of disable gfx off feature
560 : * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
561 : */
562 :
563 0 : void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
564 : {
565 0 : unsigned long delay = GFX_OFF_DELAY_ENABLE;
566 :
567 0 : if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
568 : return;
569 :
570 0 : mutex_lock(&adev->gfx.gfx_off_mutex);
571 :
572 0 : if (enable) {
573 : /* If the count is already 0, it means there's an imbalance bug somewhere.
574 : * Note that the bug may be in a different caller than the one which triggers the
575 : * WARN_ON_ONCE.
576 : */
577 0 : if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
578 : goto unlock;
579 :
580 0 : adev->gfx.gfx_off_req_count--;
581 :
582 0 : if (adev->gfx.gfx_off_req_count == 0 &&
583 0 : !adev->gfx.gfx_off_state) {
584 : /* If going to s2idle, no need to wait */
585 0 : if (adev->in_s0ix)
586 0 : delay = GFX_OFF_NO_DELAY;
587 0 : schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
588 : delay);
589 : }
590 : } else {
591 0 : if (adev->gfx.gfx_off_req_count == 0) {
592 0 : cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
593 :
594 0 : if (adev->gfx.gfx_off_state &&
595 0 : !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) {
596 0 : adev->gfx.gfx_off_state = false;
597 :
598 0 : if (adev->gfx.funcs->init_spm_golden) {
599 : dev_dbg(adev->dev,
600 : "GFXOFF is disabled, re-init SPM golden settings\n");
601 0 : amdgpu_gfx_init_spm_golden(adev);
602 : }
603 : }
604 : }
605 :
606 0 : adev->gfx.gfx_off_req_count++;
607 : }
608 :
609 : unlock:
610 0 : mutex_unlock(&adev->gfx.gfx_off_mutex);
611 : }
612 :
613 0 : int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value)
614 : {
615 0 : int r = 0;
616 :
617 0 : mutex_lock(&adev->gfx.gfx_off_mutex);
618 :
619 0 : r = amdgpu_dpm_set_residency_gfxoff(adev, value);
620 :
621 0 : mutex_unlock(&adev->gfx.gfx_off_mutex);
622 :
623 0 : return r;
624 : }
625 :
626 0 : int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *value)
627 : {
628 0 : int r = 0;
629 :
630 0 : mutex_lock(&adev->gfx.gfx_off_mutex);
631 :
632 0 : r = amdgpu_dpm_get_residency_gfxoff(adev, value);
633 :
634 0 : mutex_unlock(&adev->gfx.gfx_off_mutex);
635 :
636 0 : return r;
637 : }
638 :
639 0 : int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value)
640 : {
641 0 : int r = 0;
642 :
643 0 : mutex_lock(&adev->gfx.gfx_off_mutex);
644 :
645 0 : r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
646 :
647 0 : mutex_unlock(&adev->gfx.gfx_off_mutex);
648 :
649 0 : return r;
650 : }
651 :
652 0 : int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value)
653 : {
654 :
655 0 : int r = 0;
656 :
657 0 : mutex_lock(&adev->gfx.gfx_off_mutex);
658 :
659 0 : r = amdgpu_dpm_get_status_gfxoff(adev, value);
660 :
661 0 : mutex_unlock(&adev->gfx.gfx_off_mutex);
662 :
663 0 : return r;
664 : }
665 :
666 0 : int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
667 : {
668 : int r;
669 :
670 0 : if (amdgpu_ras_is_supported(adev, ras_block->block)) {
671 0 : if (!amdgpu_persistent_edc_harvesting_supported(adev))
672 0 : amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX);
673 :
674 0 : r = amdgpu_ras_block_late_init(adev, ras_block);
675 0 : if (r)
676 : return r;
677 :
678 0 : r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
679 0 : if (r)
680 : goto late_fini;
681 : } else {
682 0 : amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
683 : }
684 :
685 : return 0;
686 : late_fini:
687 0 : amdgpu_ras_block_late_fini(adev, ras_block);
688 0 : return r;
689 : }
690 :
691 0 : int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
692 : void *err_data,
693 : struct amdgpu_iv_entry *entry)
694 : {
695 : /* TODO ue will trigger an interrupt.
696 : *
697 : * When “Full RAS” is enabled, the per-IP interrupt sources should
698 : * be disabled and the driver should only look for the aggregated
699 : * interrupt via sync flood
700 : */
701 0 : if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
702 0 : kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
703 0 : if (adev->gfx.ras && adev->gfx.ras->ras_block.hw_ops &&
704 0 : adev->gfx.ras->ras_block.hw_ops->query_ras_error_count)
705 0 : adev->gfx.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
706 0 : amdgpu_ras_reset_gpu(adev);
707 : }
708 0 : return AMDGPU_RAS_SUCCESS;
709 : }
710 :
711 0 : int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
712 : struct amdgpu_irq_src *source,
713 : struct amdgpu_iv_entry *entry)
714 : {
715 0 : struct ras_common_if *ras_if = adev->gfx.ras_if;
716 0 : struct ras_dispatch_if ih_data = {
717 : .entry = entry,
718 : };
719 :
720 0 : if (!ras_if)
721 : return 0;
722 :
723 0 : ih_data.head = *ras_if;
724 :
725 0 : DRM_ERROR("CP ECC ERROR IRQ\n");
726 0 : amdgpu_ras_interrupt_dispatch(adev, &ih_data);
727 0 : return 0;
728 : }
729 :
730 0 : uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
731 : {
732 0 : signed long r, cnt = 0;
733 : unsigned long flags;
734 0 : uint32_t seq, reg_val_offs = 0, value = 0;
735 0 : struct amdgpu_kiq *kiq = &adev->gfx.kiq;
736 0 : struct amdgpu_ring *ring = &kiq->ring;
737 :
738 0 : if (amdgpu_device_skip_hw_access(adev))
739 : return 0;
740 :
741 0 : if (adev->mes.ring.sched.ready)
742 0 : return amdgpu_mes_rreg(adev, reg);
743 :
744 0 : BUG_ON(!ring->funcs->emit_rreg);
745 :
746 0 : spin_lock_irqsave(&kiq->ring_lock, flags);
747 0 : if (amdgpu_device_wb_get(adev, ®_val_offs)) {
748 0 : pr_err("critical bug! too many kiq readers\n");
749 0 : goto failed_unlock;
750 : }
751 0 : amdgpu_ring_alloc(ring, 32);
752 0 : amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
753 0 : r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
754 0 : if (r)
755 : goto failed_undo;
756 :
757 0 : amdgpu_ring_commit(ring);
758 0 : spin_unlock_irqrestore(&kiq->ring_lock, flags);
759 :
760 0 : r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
761 :
762 : /* don't wait anymore for gpu reset case because this way may
763 : * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
764 : * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
765 : * never return if we keep waiting in virt_kiq_rreg, which cause
766 : * gpu_recover() hang there.
767 : *
768 : * also don't wait anymore for IRQ context
769 : * */
770 0 : if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
771 : goto failed_kiq_read;
772 :
773 : might_sleep();
774 0 : while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
775 0 : msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
776 0 : r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
777 : }
778 :
779 0 : if (cnt > MAX_KIQ_REG_TRY)
780 : goto failed_kiq_read;
781 :
782 0 : mb();
783 0 : value = adev->wb.wb[reg_val_offs];
784 0 : amdgpu_device_wb_free(adev, reg_val_offs);
785 0 : return value;
786 :
787 : failed_undo:
788 0 : amdgpu_ring_undo(ring);
789 : failed_unlock:
790 0 : spin_unlock_irqrestore(&kiq->ring_lock, flags);
791 : failed_kiq_read:
792 0 : if (reg_val_offs)
793 0 : amdgpu_device_wb_free(adev, reg_val_offs);
794 0 : dev_err(adev->dev, "failed to read reg:%x\n", reg);
795 0 : return ~0;
796 : }
797 :
798 0 : void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
799 : {
800 0 : signed long r, cnt = 0;
801 : unsigned long flags;
802 : uint32_t seq;
803 0 : struct amdgpu_kiq *kiq = &adev->gfx.kiq;
804 0 : struct amdgpu_ring *ring = &kiq->ring;
805 :
806 0 : BUG_ON(!ring->funcs->emit_wreg);
807 :
808 0 : if (amdgpu_device_skip_hw_access(adev))
809 0 : return;
810 :
811 0 : if (adev->mes.ring.sched.ready) {
812 0 : amdgpu_mes_wreg(adev, reg, v);
813 0 : return;
814 : }
815 :
816 0 : spin_lock_irqsave(&kiq->ring_lock, flags);
817 0 : amdgpu_ring_alloc(ring, 32);
818 0 : amdgpu_ring_emit_wreg(ring, reg, v);
819 0 : r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
820 0 : if (r)
821 : goto failed_undo;
822 :
823 0 : amdgpu_ring_commit(ring);
824 0 : spin_unlock_irqrestore(&kiq->ring_lock, flags);
825 :
826 0 : r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
827 :
828 : /* don't wait anymore for gpu reset case because this way may
829 : * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
830 : * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
831 : * never return if we keep waiting in virt_kiq_rreg, which cause
832 : * gpu_recover() hang there.
833 : *
834 : * also don't wait anymore for IRQ context
835 : * */
836 0 : if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
837 : goto failed_kiq_write;
838 :
839 : might_sleep();
840 0 : while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
841 :
842 0 : msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
843 0 : r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
844 : }
845 :
846 0 : if (cnt > MAX_KIQ_REG_TRY)
847 : goto failed_kiq_write;
848 :
849 : return;
850 :
851 : failed_undo:
852 0 : amdgpu_ring_undo(ring);
853 0 : spin_unlock_irqrestore(&kiq->ring_lock, flags);
854 : failed_kiq_write:
855 0 : dev_err(adev->dev, "failed to write reg:%x\n", reg);
856 : }
857 :
858 0 : int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
859 : {
860 0 : if (amdgpu_num_kcq == -1) {
861 : return 8;
862 0 : } else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
863 0 : dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n");
864 0 : return 8;
865 : }
866 : return amdgpu_num_kcq;
867 : }
|