Line data Source code
1 : /*
2 : * Copyright 2015 Advanced Micro Devices, Inc.
3 : *
4 : * Permission is hereby granted, free of charge, to any person obtaining a
5 : * copy of this software and associated documentation files (the "Software"),
6 : * to deal in the Software without restriction, including without limitation
7 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 : * and/or sell copies of the Software, and to permit persons to whom the
9 : * Software is furnished to do so, subject to the following conditions:
10 : *
11 : * The above copyright notice and this permission notice shall be included in
12 : * all copies or substantial portions of the Software.
13 : *
14 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 : * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 : * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 : * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 : * OTHER DEALINGS IN THE SOFTWARE.
21 : *
22 : * Authors: monk liu <monk.liu@amd.com>
23 : */
24 :
25 : #include <drm/drm_auth.h>
26 : #include <drm/drm_drv.h>
27 : #include "amdgpu.h"
28 : #include "amdgpu_sched.h"
29 : #include "amdgpu_ras.h"
30 : #include <linux/nospec.h>
31 :
32 : #define to_amdgpu_ctx_entity(e) \
33 : container_of((e), struct amdgpu_ctx_entity, entity)
34 :
35 : const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
36 : [AMDGPU_HW_IP_GFX] = 1,
37 : [AMDGPU_HW_IP_COMPUTE] = 4,
38 : [AMDGPU_HW_IP_DMA] = 2,
39 : [AMDGPU_HW_IP_UVD] = 1,
40 : [AMDGPU_HW_IP_VCE] = 1,
41 : [AMDGPU_HW_IP_UVD_ENC] = 1,
42 : [AMDGPU_HW_IP_VCN_DEC] = 1,
43 : [AMDGPU_HW_IP_VCN_ENC] = 1,
44 : [AMDGPU_HW_IP_VCN_JPEG] = 1,
45 : };
46 :
47 0 : bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio)
48 : {
49 0 : switch (ctx_prio) {
50 : case AMDGPU_CTX_PRIORITY_UNSET:
51 : case AMDGPU_CTX_PRIORITY_VERY_LOW:
52 : case AMDGPU_CTX_PRIORITY_LOW:
53 : case AMDGPU_CTX_PRIORITY_NORMAL:
54 : case AMDGPU_CTX_PRIORITY_HIGH:
55 : case AMDGPU_CTX_PRIORITY_VERY_HIGH:
56 : return true;
57 : default:
58 0 : return false;
59 : }
60 : }
61 :
62 : static enum drm_sched_priority
63 0 : amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio)
64 : {
65 0 : switch (ctx_prio) {
66 : case AMDGPU_CTX_PRIORITY_UNSET:
67 : return DRM_SCHED_PRIORITY_UNSET;
68 :
69 : case AMDGPU_CTX_PRIORITY_VERY_LOW:
70 0 : return DRM_SCHED_PRIORITY_MIN;
71 :
72 : case AMDGPU_CTX_PRIORITY_LOW:
73 0 : return DRM_SCHED_PRIORITY_MIN;
74 :
75 : case AMDGPU_CTX_PRIORITY_NORMAL:
76 0 : return DRM_SCHED_PRIORITY_NORMAL;
77 :
78 : case AMDGPU_CTX_PRIORITY_HIGH:
79 0 : return DRM_SCHED_PRIORITY_HIGH;
80 :
81 : case AMDGPU_CTX_PRIORITY_VERY_HIGH:
82 0 : return DRM_SCHED_PRIORITY_HIGH;
83 :
84 : /* This should not happen as we sanitized userspace provided priority
85 : * already, WARN if this happens.
86 : */
87 : default:
88 0 : WARN(1, "Invalid context priority %d\n", ctx_prio);
89 0 : return DRM_SCHED_PRIORITY_NORMAL;
90 : }
91 :
92 : }
93 :
94 0 : static int amdgpu_ctx_priority_permit(struct drm_file *filp,
95 : int32_t priority)
96 : {
97 0 : if (!amdgpu_ctx_priority_is_valid(priority))
98 : return -EINVAL;
99 :
100 : /* NORMAL and below are accessible by everyone */
101 0 : if (priority <= AMDGPU_CTX_PRIORITY_NORMAL)
102 : return 0;
103 :
104 0 : if (capable(CAP_SYS_NICE))
105 : return 0;
106 :
107 0 : if (drm_is_current_master(filp))
108 : return 0;
109 :
110 0 : return -EACCES;
111 : }
112 :
113 : static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_gfx_pipe_prio(int32_t prio)
114 : {
115 0 : switch (prio) {
116 : case AMDGPU_CTX_PRIORITY_HIGH:
117 : case AMDGPU_CTX_PRIORITY_VERY_HIGH:
118 : return AMDGPU_GFX_PIPE_PRIO_HIGH;
119 : default:
120 : return AMDGPU_GFX_PIPE_PRIO_NORMAL;
121 : }
122 : }
123 :
124 : static enum amdgpu_ring_priority_level amdgpu_ctx_sched_prio_to_ring_prio(int32_t prio)
125 : {
126 0 : switch (prio) {
127 : case AMDGPU_CTX_PRIORITY_HIGH:
128 : return AMDGPU_RING_PRIO_1;
129 : case AMDGPU_CTX_PRIORITY_VERY_HIGH:
130 : return AMDGPU_RING_PRIO_2;
131 : default:
132 : return AMDGPU_RING_PRIO_0;
133 : }
134 : }
135 :
136 0 : static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
137 : {
138 0 : struct amdgpu_device *adev = ctx->mgr->adev;
139 : unsigned int hw_prio;
140 : int32_t ctx_prio;
141 :
142 0 : ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
143 0 : ctx->init_priority : ctx->override_priority;
144 :
145 0 : switch (hw_ip) {
146 : case AMDGPU_HW_IP_GFX:
147 : case AMDGPU_HW_IP_COMPUTE:
148 : hw_prio = amdgpu_ctx_prio_to_gfx_pipe_prio(ctx_prio);
149 : break;
150 : case AMDGPU_HW_IP_VCE:
151 : case AMDGPU_HW_IP_VCN_ENC:
152 : hw_prio = amdgpu_ctx_sched_prio_to_ring_prio(ctx_prio);
153 : break;
154 : default:
155 : hw_prio = AMDGPU_RING_PRIO_DEFAULT;
156 : break;
157 : }
158 :
159 0 : hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
160 0 : if (adev->gpu_sched[hw_ip][hw_prio].num_scheds == 0)
161 0 : hw_prio = AMDGPU_RING_PRIO_DEFAULT;
162 :
163 0 : return hw_prio;
164 : }
165 :
166 : /* Calculate the time spend on the hw */
167 0 : static ktime_t amdgpu_ctx_fence_time(struct dma_fence *fence)
168 : {
169 : struct drm_sched_fence *s_fence;
170 :
171 0 : if (!fence)
172 : return ns_to_ktime(0);
173 :
174 : /* When the fence is not even scheduled it can't have spend time */
175 0 : s_fence = to_drm_sched_fence(fence);
176 0 : if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->scheduled.flags))
177 : return ns_to_ktime(0);
178 :
179 : /* When it is still running account how much already spend */
180 0 : if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->finished.flags))
181 0 : return ktime_sub(ktime_get(), s_fence->scheduled.timestamp);
182 :
183 0 : return ktime_sub(s_fence->finished.timestamp,
184 : s_fence->scheduled.timestamp);
185 : }
186 :
187 : static ktime_t amdgpu_ctx_entity_time(struct amdgpu_ctx *ctx,
188 : struct amdgpu_ctx_entity *centity)
189 : {
190 0 : ktime_t res = ns_to_ktime(0);
191 : uint32_t i;
192 :
193 0 : spin_lock(&ctx->ring_lock);
194 0 : for (i = 0; i < amdgpu_sched_jobs; i++) {
195 0 : res = ktime_add(res, amdgpu_ctx_fence_time(centity->fences[i]));
196 : }
197 0 : spin_unlock(&ctx->ring_lock);
198 : return res;
199 : }
200 :
201 0 : static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
202 : const u32 ring)
203 : {
204 0 : struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
205 0 : struct amdgpu_device *adev = ctx->mgr->adev;
206 : struct amdgpu_ctx_entity *entity;
207 : enum drm_sched_priority drm_prio;
208 : unsigned int hw_prio, num_scheds;
209 : int32_t ctx_prio;
210 : int r;
211 :
212 0 : entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs),
213 : GFP_KERNEL);
214 0 : if (!entity)
215 : return -ENOMEM;
216 :
217 0 : ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
218 0 : ctx->init_priority : ctx->override_priority;
219 0 : entity->hw_ip = hw_ip;
220 0 : entity->sequence = 1;
221 0 : hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
222 0 : drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio);
223 :
224 0 : hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
225 0 : scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
226 0 : num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
227 :
228 : /* disable load balance if the hw engine retains context among dependent jobs */
229 0 : if (hw_ip == AMDGPU_HW_IP_VCN_ENC ||
230 0 : hw_ip == AMDGPU_HW_IP_VCN_DEC ||
231 0 : hw_ip == AMDGPU_HW_IP_UVD_ENC ||
232 0 : hw_ip == AMDGPU_HW_IP_UVD) {
233 0 : sched = drm_sched_pick_best(scheds, num_scheds);
234 0 : scheds = &sched;
235 0 : num_scheds = 1;
236 : }
237 :
238 0 : r = drm_sched_entity_init(&entity->entity, drm_prio, scheds, num_scheds,
239 : &ctx->guilty);
240 0 : if (r)
241 : goto error_free_entity;
242 :
243 : /* It's not an error if we fail to install the new entity */
244 0 : if (cmpxchg(&ctx->entities[hw_ip][ring], NULL, entity))
245 : goto cleanup_entity;
246 :
247 : return 0;
248 :
249 : cleanup_entity:
250 0 : drm_sched_entity_fini(&entity->entity);
251 :
252 : error_free_entity:
253 0 : kfree(entity);
254 :
255 0 : return r;
256 : }
257 :
258 0 : static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
259 : {
260 0 : ktime_t res = ns_to_ktime(0);
261 : int i;
262 :
263 0 : if (!entity)
264 : return res;
265 :
266 0 : for (i = 0; i < amdgpu_sched_jobs; ++i) {
267 0 : res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i]));
268 0 : dma_fence_put(entity->fences[i]);
269 : }
270 :
271 0 : kfree(entity);
272 0 : return res;
273 : }
274 :
275 0 : static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
276 : u32 *stable_pstate)
277 : {
278 0 : struct amdgpu_device *adev = ctx->mgr->adev;
279 : enum amd_dpm_forced_level current_level;
280 :
281 0 : current_level = amdgpu_dpm_get_performance_level(adev);
282 :
283 0 : switch (current_level) {
284 : case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
285 0 : *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_STANDARD;
286 : break;
287 : case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
288 0 : *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK;
289 : break;
290 : case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK:
291 0 : *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK;
292 : break;
293 : case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
294 0 : *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_PEAK;
295 : break;
296 : default:
297 0 : *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE;
298 : break;
299 : }
300 0 : return 0;
301 : }
302 :
303 0 : static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
304 : struct drm_file *filp, struct amdgpu_ctx *ctx)
305 : {
306 : u32 current_stable_pstate;
307 : int r;
308 :
309 0 : r = amdgpu_ctx_priority_permit(filp, priority);
310 0 : if (r)
311 : return r;
312 :
313 0 : memset(ctx, 0, sizeof(*ctx));
314 :
315 0 : kref_init(&ctx->refcount);
316 0 : ctx->mgr = mgr;
317 0 : spin_lock_init(&ctx->ring_lock);
318 0 : mutex_init(&ctx->lock);
319 :
320 0 : ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter);
321 0 : ctx->reset_counter_query = ctx->reset_counter;
322 0 : ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter);
323 0 : ctx->init_priority = priority;
324 0 : ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
325 :
326 0 : r = amdgpu_ctx_get_stable_pstate(ctx, ¤t_stable_pstate);
327 0 : if (r)
328 : return r;
329 :
330 0 : ctx->stable_pstate = current_stable_pstate;
331 :
332 0 : return 0;
333 : }
334 :
335 0 : static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
336 : u32 stable_pstate)
337 : {
338 0 : struct amdgpu_device *adev = ctx->mgr->adev;
339 : enum amd_dpm_forced_level level;
340 : u32 current_stable_pstate;
341 : int r;
342 :
343 0 : mutex_lock(&adev->pm.stable_pstate_ctx_lock);
344 0 : if (adev->pm.stable_pstate_ctx && adev->pm.stable_pstate_ctx != ctx) {
345 : r = -EBUSY;
346 : goto done;
347 : }
348 :
349 0 : r = amdgpu_ctx_get_stable_pstate(ctx, ¤t_stable_pstate);
350 0 : if (r || (stable_pstate == current_stable_pstate))
351 : goto done;
352 :
353 0 : switch (stable_pstate) {
354 : case AMDGPU_CTX_STABLE_PSTATE_NONE:
355 : level = AMD_DPM_FORCED_LEVEL_AUTO;
356 : break;
357 : case AMDGPU_CTX_STABLE_PSTATE_STANDARD:
358 0 : level = AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD;
359 0 : break;
360 : case AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK:
361 0 : level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK;
362 0 : break;
363 : case AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK:
364 0 : level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK;
365 0 : break;
366 : case AMDGPU_CTX_STABLE_PSTATE_PEAK:
367 0 : level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK;
368 0 : break;
369 : default:
370 : r = -EINVAL;
371 : goto done;
372 : }
373 :
374 0 : r = amdgpu_dpm_force_performance_level(adev, level);
375 :
376 0 : if (level == AMD_DPM_FORCED_LEVEL_AUTO)
377 0 : adev->pm.stable_pstate_ctx = NULL;
378 : else
379 0 : adev->pm.stable_pstate_ctx = ctx;
380 : done:
381 0 : mutex_unlock(&adev->pm.stable_pstate_ctx_lock);
382 :
383 0 : return r;
384 : }
385 :
386 0 : static void amdgpu_ctx_fini(struct kref *ref)
387 : {
388 0 : struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
389 0 : struct amdgpu_ctx_mgr *mgr = ctx->mgr;
390 0 : struct amdgpu_device *adev = mgr->adev;
391 : unsigned i, j, idx;
392 :
393 0 : if (!adev)
394 0 : return;
395 :
396 0 : for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
397 0 : for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
398 : ktime_t spend;
399 :
400 0 : spend = amdgpu_ctx_fini_entity(ctx->entities[i][j]);
401 0 : atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]);
402 : }
403 : }
404 :
405 0 : if (drm_dev_enter(adev_to_drm(adev), &idx)) {
406 0 : amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate);
407 0 : drm_dev_exit(idx);
408 : }
409 :
410 0 : mutex_destroy(&ctx->lock);
411 0 : kfree(ctx);
412 : }
413 :
414 0 : int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
415 : u32 ring, struct drm_sched_entity **entity)
416 : {
417 : int r;
418 :
419 0 : if (hw_ip >= AMDGPU_HW_IP_NUM) {
420 0 : DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
421 0 : return -EINVAL;
422 : }
423 :
424 : /* Right now all IPs have only one instance - multiple rings. */
425 0 : if (instance != 0) {
426 0 : DRM_DEBUG("invalid ip instance: %d\n", instance);
427 0 : return -EINVAL;
428 : }
429 :
430 0 : if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
431 0 : DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
432 0 : return -EINVAL;
433 : }
434 :
435 0 : if (ctx->entities[hw_ip][ring] == NULL) {
436 0 : r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
437 0 : if (r)
438 : return r;
439 : }
440 :
441 0 : *entity = &ctx->entities[hw_ip][ring]->entity;
442 0 : return 0;
443 : }
444 :
445 0 : static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
446 : struct amdgpu_fpriv *fpriv,
447 : struct drm_file *filp,
448 : int32_t priority,
449 : uint32_t *id)
450 : {
451 0 : struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
452 : struct amdgpu_ctx *ctx;
453 : int r;
454 :
455 0 : ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
456 0 : if (!ctx)
457 : return -ENOMEM;
458 :
459 0 : mutex_lock(&mgr->lock);
460 0 : r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL);
461 0 : if (r < 0) {
462 0 : mutex_unlock(&mgr->lock);
463 0 : kfree(ctx);
464 : return r;
465 : }
466 :
467 0 : *id = (uint32_t)r;
468 0 : r = amdgpu_ctx_init(mgr, priority, filp, ctx);
469 0 : if (r) {
470 0 : idr_remove(&mgr->ctx_handles, *id);
471 0 : *id = 0;
472 0 : kfree(ctx);
473 : }
474 0 : mutex_unlock(&mgr->lock);
475 : return r;
476 : }
477 :
478 0 : static void amdgpu_ctx_do_release(struct kref *ref)
479 : {
480 : struct amdgpu_ctx *ctx;
481 : u32 i, j;
482 :
483 0 : ctx = container_of(ref, struct amdgpu_ctx, refcount);
484 0 : for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
485 0 : for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
486 0 : if (!ctx->entities[i][j])
487 0 : continue;
488 :
489 0 : drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
490 : }
491 : }
492 :
493 0 : amdgpu_ctx_fini(ref);
494 0 : }
495 :
496 0 : static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
497 : {
498 0 : struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
499 : struct amdgpu_ctx *ctx;
500 :
501 0 : mutex_lock(&mgr->lock);
502 0 : ctx = idr_remove(&mgr->ctx_handles, id);
503 0 : if (ctx)
504 0 : kref_put(&ctx->refcount, amdgpu_ctx_do_release);
505 0 : mutex_unlock(&mgr->lock);
506 0 : return ctx ? 0 : -EINVAL;
507 : }
508 :
509 0 : static int amdgpu_ctx_query(struct amdgpu_device *adev,
510 : struct amdgpu_fpriv *fpriv, uint32_t id,
511 : union drm_amdgpu_ctx_out *out)
512 : {
513 : struct amdgpu_ctx *ctx;
514 : struct amdgpu_ctx_mgr *mgr;
515 : unsigned reset_counter;
516 :
517 0 : if (!fpriv)
518 : return -EINVAL;
519 :
520 0 : mgr = &fpriv->ctx_mgr;
521 0 : mutex_lock(&mgr->lock);
522 0 : ctx = idr_find(&mgr->ctx_handles, id);
523 0 : if (!ctx) {
524 0 : mutex_unlock(&mgr->lock);
525 0 : return -EINVAL;
526 : }
527 :
528 : /* TODO: these two are always zero */
529 0 : out->state.flags = 0x0;
530 0 : out->state.hangs = 0x0;
531 :
532 : /* determine if a GPU reset has occured since the last call */
533 0 : reset_counter = atomic_read(&adev->gpu_reset_counter);
534 : /* TODO: this should ideally return NO, GUILTY, or INNOCENT. */
535 0 : if (ctx->reset_counter_query == reset_counter)
536 0 : out->state.reset_status = AMDGPU_CTX_NO_RESET;
537 : else
538 0 : out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET;
539 0 : ctx->reset_counter_query = reset_counter;
540 :
541 0 : mutex_unlock(&mgr->lock);
542 0 : return 0;
543 : }
544 :
545 : #define AMDGPU_RAS_COUNTE_DELAY_MS 3000
546 :
547 0 : static int amdgpu_ctx_query2(struct amdgpu_device *adev,
548 : struct amdgpu_fpriv *fpriv, uint32_t id,
549 : union drm_amdgpu_ctx_out *out)
550 : {
551 0 : struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
552 : struct amdgpu_ctx *ctx;
553 : struct amdgpu_ctx_mgr *mgr;
554 :
555 0 : if (!fpriv)
556 : return -EINVAL;
557 :
558 0 : mgr = &fpriv->ctx_mgr;
559 0 : mutex_lock(&mgr->lock);
560 0 : ctx = idr_find(&mgr->ctx_handles, id);
561 0 : if (!ctx) {
562 0 : mutex_unlock(&mgr->lock);
563 0 : return -EINVAL;
564 : }
565 :
566 0 : out->state.flags = 0x0;
567 0 : out->state.hangs = 0x0;
568 :
569 0 : if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter))
570 0 : out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET;
571 :
572 0 : if (ctx->vram_lost_counter != atomic_read(&adev->vram_lost_counter))
573 0 : out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST;
574 :
575 0 : if (atomic_read(&ctx->guilty))
576 0 : out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
577 :
578 0 : if (adev->ras_enabled && con) {
579 : /* Return the cached values in O(1),
580 : * and schedule delayed work to cache
581 : * new vaues.
582 : */
583 : int ce_count, ue_count;
584 :
585 0 : ce_count = atomic_read(&con->ras_ce_count);
586 0 : ue_count = atomic_read(&con->ras_ue_count);
587 :
588 0 : if (ce_count != ctx->ras_counter_ce) {
589 0 : ctx->ras_counter_ce = ce_count;
590 0 : out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE;
591 : }
592 :
593 0 : if (ue_count != ctx->ras_counter_ue) {
594 0 : ctx->ras_counter_ue = ue_count;
595 0 : out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE;
596 : }
597 :
598 0 : schedule_delayed_work(&con->ras_counte_delay_work,
599 : msecs_to_jiffies(AMDGPU_RAS_COUNTE_DELAY_MS));
600 : }
601 :
602 0 : mutex_unlock(&mgr->lock);
603 0 : return 0;
604 : }
605 :
606 :
607 :
608 0 : static int amdgpu_ctx_stable_pstate(struct amdgpu_device *adev,
609 : struct amdgpu_fpriv *fpriv, uint32_t id,
610 : bool set, u32 *stable_pstate)
611 : {
612 : struct amdgpu_ctx *ctx;
613 : struct amdgpu_ctx_mgr *mgr;
614 : int r;
615 :
616 0 : if (!fpriv)
617 : return -EINVAL;
618 :
619 0 : mgr = &fpriv->ctx_mgr;
620 0 : mutex_lock(&mgr->lock);
621 0 : ctx = idr_find(&mgr->ctx_handles, id);
622 0 : if (!ctx) {
623 0 : mutex_unlock(&mgr->lock);
624 0 : return -EINVAL;
625 : }
626 :
627 0 : if (set)
628 0 : r = amdgpu_ctx_set_stable_pstate(ctx, *stable_pstate);
629 : else
630 0 : r = amdgpu_ctx_get_stable_pstate(ctx, stable_pstate);
631 :
632 0 : mutex_unlock(&mgr->lock);
633 0 : return r;
634 : }
635 :
636 0 : int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
637 : struct drm_file *filp)
638 : {
639 : int r;
640 : uint32_t id, stable_pstate;
641 : int32_t priority;
642 :
643 0 : union drm_amdgpu_ctx *args = data;
644 0 : struct amdgpu_device *adev = drm_to_adev(dev);
645 0 : struct amdgpu_fpriv *fpriv = filp->driver_priv;
646 :
647 0 : id = args->in.ctx_id;
648 0 : priority = args->in.priority;
649 :
650 : /* For backwards compatibility reasons, we need to accept
651 : * ioctls with garbage in the priority field */
652 0 : if (!amdgpu_ctx_priority_is_valid(priority))
653 0 : priority = AMDGPU_CTX_PRIORITY_NORMAL;
654 :
655 0 : switch (args->in.op) {
656 : case AMDGPU_CTX_OP_ALLOC_CTX:
657 0 : r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id);
658 0 : args->out.alloc.ctx_id = id;
659 0 : break;
660 : case AMDGPU_CTX_OP_FREE_CTX:
661 0 : r = amdgpu_ctx_free(fpriv, id);
662 0 : break;
663 : case AMDGPU_CTX_OP_QUERY_STATE:
664 0 : r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
665 0 : break;
666 : case AMDGPU_CTX_OP_QUERY_STATE2:
667 0 : r = amdgpu_ctx_query2(adev, fpriv, id, &args->out);
668 0 : break;
669 : case AMDGPU_CTX_OP_GET_STABLE_PSTATE:
670 0 : if (args->in.flags)
671 : return -EINVAL;
672 0 : r = amdgpu_ctx_stable_pstate(adev, fpriv, id, false, &stable_pstate);
673 0 : if (!r)
674 0 : args->out.pstate.flags = stable_pstate;
675 : break;
676 : case AMDGPU_CTX_OP_SET_STABLE_PSTATE:
677 0 : if (args->in.flags & ~AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK)
678 : return -EINVAL;
679 0 : stable_pstate = args->in.flags & AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK;
680 0 : if (stable_pstate > AMDGPU_CTX_STABLE_PSTATE_PEAK)
681 : return -EINVAL;
682 0 : r = amdgpu_ctx_stable_pstate(adev, fpriv, id, true, &stable_pstate);
683 0 : break;
684 : default:
685 : return -EINVAL;
686 : }
687 :
688 : return r;
689 : }
690 :
691 0 : struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id)
692 : {
693 : struct amdgpu_ctx *ctx;
694 : struct amdgpu_ctx_mgr *mgr;
695 :
696 0 : if (!fpriv)
697 : return NULL;
698 :
699 0 : mgr = &fpriv->ctx_mgr;
700 :
701 0 : mutex_lock(&mgr->lock);
702 0 : ctx = idr_find(&mgr->ctx_handles, id);
703 0 : if (ctx)
704 0 : kref_get(&ctx->refcount);
705 0 : mutex_unlock(&mgr->lock);
706 0 : return ctx;
707 : }
708 :
709 0 : int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
710 : {
711 0 : if (ctx == NULL)
712 : return -EINVAL;
713 :
714 0 : kref_put(&ctx->refcount, amdgpu_ctx_do_release);
715 0 : return 0;
716 : }
717 :
718 0 : uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
719 : struct drm_sched_entity *entity,
720 : struct dma_fence *fence)
721 : {
722 0 : struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
723 0 : uint64_t seq = centity->sequence;
724 0 : struct dma_fence *other = NULL;
725 0 : unsigned idx = 0;
726 :
727 0 : idx = seq & (amdgpu_sched_jobs - 1);
728 0 : other = centity->fences[idx];
729 0 : WARN_ON(other && !dma_fence_is_signaled(other));
730 :
731 0 : dma_fence_get(fence);
732 :
733 0 : spin_lock(&ctx->ring_lock);
734 0 : centity->fences[idx] = fence;
735 0 : centity->sequence++;
736 0 : spin_unlock(&ctx->ring_lock);
737 :
738 0 : atomic64_add(ktime_to_ns(amdgpu_ctx_fence_time(other)),
739 0 : &ctx->mgr->time_spend[centity->hw_ip]);
740 :
741 0 : dma_fence_put(other);
742 0 : return seq;
743 : }
744 :
745 0 : struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
746 : struct drm_sched_entity *entity,
747 : uint64_t seq)
748 : {
749 0 : struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
750 : struct dma_fence *fence;
751 :
752 0 : spin_lock(&ctx->ring_lock);
753 :
754 0 : if (seq == ~0ull)
755 0 : seq = centity->sequence - 1;
756 :
757 0 : if (seq >= centity->sequence) {
758 0 : spin_unlock(&ctx->ring_lock);
759 0 : return ERR_PTR(-EINVAL);
760 : }
761 :
762 :
763 0 : if (seq + amdgpu_sched_jobs < centity->sequence) {
764 0 : spin_unlock(&ctx->ring_lock);
765 0 : return NULL;
766 : }
767 :
768 0 : fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]);
769 0 : spin_unlock(&ctx->ring_lock);
770 :
771 0 : return fence;
772 : }
773 :
774 0 : static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
775 : struct amdgpu_ctx_entity *aentity,
776 : int hw_ip,
777 : int32_t priority)
778 : {
779 0 : struct amdgpu_device *adev = ctx->mgr->adev;
780 : unsigned int hw_prio;
781 0 : struct drm_gpu_scheduler **scheds = NULL;
782 : unsigned num_scheds;
783 :
784 : /* set sw priority */
785 0 : drm_sched_entity_set_priority(&aentity->entity,
786 : amdgpu_ctx_to_drm_sched_prio(priority));
787 :
788 : /* set hw priority */
789 0 : if (hw_ip == AMDGPU_HW_IP_COMPUTE || hw_ip == AMDGPU_HW_IP_GFX) {
790 0 : hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
791 0 : hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX);
792 0 : scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
793 0 : num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
794 0 : drm_sched_entity_modify_sched(&aentity->entity, scheds,
795 : num_scheds);
796 : }
797 0 : }
798 :
799 0 : void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
800 : int32_t priority)
801 : {
802 : int32_t ctx_prio;
803 : unsigned i, j;
804 :
805 0 : ctx->override_priority = priority;
806 :
807 0 : ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
808 0 : ctx->init_priority : ctx->override_priority;
809 0 : for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
810 0 : for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
811 0 : if (!ctx->entities[i][j])
812 0 : continue;
813 :
814 0 : amdgpu_ctx_set_entity_priority(ctx, ctx->entities[i][j],
815 : i, ctx_prio);
816 : }
817 : }
818 0 : }
819 :
820 0 : int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
821 : struct drm_sched_entity *entity)
822 : {
823 0 : struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
824 : struct dma_fence *other;
825 : unsigned idx;
826 : long r;
827 :
828 0 : spin_lock(&ctx->ring_lock);
829 0 : idx = centity->sequence & (amdgpu_sched_jobs - 1);
830 0 : other = dma_fence_get(centity->fences[idx]);
831 0 : spin_unlock(&ctx->ring_lock);
832 :
833 0 : if (!other)
834 : return 0;
835 :
836 0 : r = dma_fence_wait(other, true);
837 0 : if (r < 0 && r != -ERESTARTSYS)
838 0 : DRM_ERROR("Error (%ld) waiting for fence!\n", r);
839 :
840 0 : dma_fence_put(other);
841 0 : return r;
842 : }
843 :
844 0 : void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
845 : struct amdgpu_device *adev)
846 : {
847 : unsigned int i;
848 :
849 0 : mgr->adev = adev;
850 0 : mutex_init(&mgr->lock);
851 0 : idr_init(&mgr->ctx_handles);
852 :
853 0 : for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
854 0 : atomic64_set(&mgr->time_spend[i], 0);
855 0 : }
856 :
857 0 : long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
858 : {
859 : struct amdgpu_ctx *ctx;
860 : struct idr *idp;
861 : uint32_t id, i, j;
862 :
863 0 : idp = &mgr->ctx_handles;
864 :
865 0 : mutex_lock(&mgr->lock);
866 0 : idr_for_each_entry(idp, ctx, id) {
867 0 : for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
868 0 : for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
869 : struct drm_sched_entity *entity;
870 :
871 0 : if (!ctx->entities[i][j])
872 0 : continue;
873 :
874 0 : entity = &ctx->entities[i][j]->entity;
875 0 : timeout = drm_sched_entity_flush(entity, timeout);
876 : }
877 : }
878 : }
879 0 : mutex_unlock(&mgr->lock);
880 0 : return timeout;
881 : }
882 :
883 0 : void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
884 : {
885 : struct amdgpu_ctx *ctx;
886 : struct idr *idp;
887 : uint32_t id, i, j;
888 :
889 0 : idp = &mgr->ctx_handles;
890 :
891 0 : idr_for_each_entry(idp, ctx, id) {
892 0 : if (kref_read(&ctx->refcount) != 1) {
893 0 : DRM_ERROR("ctx %p is still alive\n", ctx);
894 0 : continue;
895 : }
896 :
897 0 : for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
898 0 : for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
899 : struct drm_sched_entity *entity;
900 :
901 0 : if (!ctx->entities[i][j])
902 0 : continue;
903 :
904 0 : entity = &ctx->entities[i][j]->entity;
905 0 : drm_sched_entity_fini(entity);
906 : }
907 : }
908 : }
909 0 : }
910 :
911 0 : void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
912 : {
913 : struct amdgpu_ctx *ctx;
914 : struct idr *idp;
915 : uint32_t id;
916 :
917 0 : amdgpu_ctx_mgr_entity_fini(mgr);
918 :
919 0 : idp = &mgr->ctx_handles;
920 :
921 0 : idr_for_each_entry(idp, ctx, id) {
922 0 : if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1)
923 0 : DRM_ERROR("ctx %p is still alive\n", ctx);
924 : }
925 :
926 0 : idr_destroy(&mgr->ctx_handles);
927 0 : mutex_destroy(&mgr->lock);
928 0 : }
929 :
930 0 : void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
931 : ktime_t usage[AMDGPU_HW_IP_NUM])
932 : {
933 : struct amdgpu_ctx *ctx;
934 : unsigned int hw_ip, i;
935 : uint32_t id;
936 :
937 : /*
938 : * This is a little bit racy because it can be that a ctx or a fence are
939 : * destroyed just in the moment we try to account them. But that is ok
940 : * since exactly that case is explicitely allowed by the interface.
941 : */
942 0 : mutex_lock(&mgr->lock);
943 0 : for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
944 0 : uint64_t ns = atomic64_read(&mgr->time_spend[hw_ip]);
945 :
946 0 : usage[hw_ip] = ns_to_ktime(ns);
947 : }
948 :
949 0 : idr_for_each_entry(&mgr->ctx_handles, ctx, id) {
950 0 : for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
951 0 : for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) {
952 : struct amdgpu_ctx_entity *centity;
953 : ktime_t spend;
954 :
955 0 : centity = ctx->entities[hw_ip][i];
956 0 : if (!centity)
957 0 : continue;
958 0 : spend = amdgpu_ctx_entity_time(ctx, centity);
959 0 : usage[hw_ip] = ktime_add(usage[hw_ip], spend);
960 : }
961 : }
962 : }
963 0 : mutex_unlock(&mgr->lock);
964 0 : }
|