Line data Source code
1 : /*
2 : * Copyright 2014 Advanced Micro Devices, Inc.
3 : * All Rights Reserved.
4 : *
5 : * Permission is hereby granted, free of charge, to any person obtaining a
6 : * copy of this software and associated documentation files (the
7 : * "Software"), to deal in the Software without restriction, including
8 : * without limitation the rights to use, copy, modify, merge, publish,
9 : * distribute, sub license, and/or sell copies of the Software, and to
10 : * permit persons to whom the Software is furnished to do so, subject to
11 : * the following conditions:
12 : *
13 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 : * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 : * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 : * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 : * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 : * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 : *
21 : * The above copyright notice and this permission notice (including the
22 : * next paragraph) shall be included in all copies or substantial portions
23 : * of the Software.
24 : *
25 : */
26 : /*
27 : * Authors:
28 : * Christian König <christian.koenig@amd.com>
29 : */
30 :
31 : #include <linux/dma-fence-chain.h>
32 :
33 : #include "amdgpu.h"
34 : #include "amdgpu_trace.h"
35 : #include "amdgpu_amdkfd.h"
36 :
37 : struct amdgpu_sync_entry {
38 : struct hlist_node node;
39 : struct dma_fence *fence;
40 : };
41 :
42 : static struct kmem_cache *amdgpu_sync_slab;
43 :
44 : /**
45 : * amdgpu_sync_create - zero init sync object
46 : *
47 : * @sync: sync object to initialize
48 : *
49 : * Just clear the sync object for now.
50 : */
51 0 : void amdgpu_sync_create(struct amdgpu_sync *sync)
52 : {
53 0 : hash_init(sync->fences);
54 0 : }
55 :
56 : /**
57 : * amdgpu_sync_same_dev - test if fence belong to us
58 : *
59 : * @adev: amdgpu device to use for the test
60 : * @f: fence to test
61 : *
62 : * Test if the fence was issued by us.
63 : */
64 : static bool amdgpu_sync_same_dev(struct amdgpu_device *adev,
65 : struct dma_fence *f)
66 : {
67 0 : struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
68 :
69 0 : if (s_fence) {
70 : struct amdgpu_ring *ring;
71 :
72 0 : ring = container_of(s_fence->sched, struct amdgpu_ring, sched);
73 0 : return ring->adev == adev;
74 : }
75 :
76 : return false;
77 : }
78 :
79 : /**
80 : * amdgpu_sync_get_owner - extract the owner of a fence
81 : *
82 : * @f: fence get the owner from
83 : *
84 : * Extract who originally created the fence.
85 : */
86 : static void *amdgpu_sync_get_owner(struct dma_fence *f)
87 : {
88 : struct drm_sched_fence *s_fence;
89 : struct amdgpu_amdkfd_fence *kfd_fence;
90 :
91 0 : if (!f)
92 : return AMDGPU_FENCE_OWNER_UNDEFINED;
93 :
94 0 : s_fence = to_drm_sched_fence(f);
95 0 : if (s_fence)
96 0 : return s_fence->owner;
97 :
98 : kfd_fence = to_amdgpu_amdkfd_fence(f);
99 : if (kfd_fence)
100 : return AMDGPU_FENCE_OWNER_KFD;
101 :
102 : return AMDGPU_FENCE_OWNER_UNDEFINED;
103 : }
104 :
105 : /**
106 : * amdgpu_sync_keep_later - Keep the later fence
107 : *
108 : * @keep: existing fence to test
109 : * @fence: new fence
110 : *
111 : * Either keep the existing fence or the new one, depending which one is later.
112 : */
113 0 : static void amdgpu_sync_keep_later(struct dma_fence **keep,
114 : struct dma_fence *fence)
115 : {
116 0 : if (*keep && dma_fence_is_later(*keep, fence))
117 : return;
118 :
119 0 : dma_fence_put(*keep);
120 0 : *keep = dma_fence_get(fence);
121 : }
122 :
123 : /**
124 : * amdgpu_sync_add_later - add the fence to the hash
125 : *
126 : * @sync: sync object to add the fence to
127 : * @f: fence to add
128 : *
129 : * Tries to add the fence to an existing hash entry. Returns true when an entry
130 : * was found, false otherwise.
131 : */
132 0 : static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
133 : {
134 : struct amdgpu_sync_entry *e;
135 :
136 0 : hash_for_each_possible(sync->fences, e, node, f->context) {
137 0 : if (unlikely(e->fence->context != f->context))
138 0 : continue;
139 :
140 0 : amdgpu_sync_keep_later(&e->fence, f);
141 0 : return true;
142 : }
143 : return false;
144 : }
145 :
146 : /**
147 : * amdgpu_sync_fence - remember to sync to this fence
148 : *
149 : * @sync: sync object to add fence to
150 : * @f: fence to sync to
151 : *
152 : * Add the fence to the sync object.
153 : */
154 0 : int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
155 : {
156 : struct amdgpu_sync_entry *e;
157 :
158 0 : if (!f)
159 : return 0;
160 :
161 0 : if (amdgpu_sync_add_later(sync, f))
162 : return 0;
163 :
164 0 : e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
165 0 : if (!e)
166 : return -ENOMEM;
167 :
168 0 : hash_add(sync->fences, &e->node, f->context);
169 0 : e->fence = dma_fence_get(f);
170 0 : return 0;
171 : }
172 :
173 : /* Determine based on the owner and mode if we should sync to a fence or not */
174 0 : static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
175 : enum amdgpu_sync_mode mode,
176 : void *owner, struct dma_fence *f)
177 : {
178 0 : void *fence_owner = amdgpu_sync_get_owner(f);
179 :
180 : /* Always sync to moves, no matter what */
181 0 : if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED)
182 : return true;
183 :
184 : /* We only want to trigger KFD eviction fences on
185 : * evict or move jobs. Skip KFD fences otherwise.
186 : */
187 0 : if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
188 0 : owner != AMDGPU_FENCE_OWNER_UNDEFINED)
189 : return false;
190 :
191 : /* Never sync to VM updates either. */
192 0 : if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
193 : owner != AMDGPU_FENCE_OWNER_UNDEFINED)
194 : return false;
195 :
196 : /* Ignore fences depending on the sync mode */
197 0 : switch (mode) {
198 : case AMDGPU_SYNC_ALWAYS:
199 : return true;
200 :
201 : case AMDGPU_SYNC_NE_OWNER:
202 0 : if (amdgpu_sync_same_dev(adev, f) &&
203 : fence_owner == owner)
204 : return false;
205 : break;
206 :
207 : case AMDGPU_SYNC_EQ_OWNER:
208 0 : if (amdgpu_sync_same_dev(adev, f) &&
209 : fence_owner != owner)
210 : return false;
211 : break;
212 :
213 : case AMDGPU_SYNC_EXPLICIT:
214 0 : return false;
215 : }
216 :
217 : WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
218 : "Adding eviction fence to sync obj");
219 : return true;
220 : }
221 :
222 : /**
223 : * amdgpu_sync_resv - sync to a reservation object
224 : *
225 : * @adev: amdgpu device
226 : * @sync: sync object to add fences from reservation object to
227 : * @resv: reservation object with embedded fence
228 : * @mode: how owner affects which fences we sync to
229 : * @owner: owner of the planned job submission
230 : *
231 : * Sync to the fence
232 : */
233 0 : int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
234 : struct dma_resv *resv, enum amdgpu_sync_mode mode,
235 : void *owner)
236 : {
237 : struct dma_resv_iter cursor;
238 : struct dma_fence *f;
239 : int r;
240 :
241 0 : if (resv == NULL)
242 : return -EINVAL;
243 :
244 : /* TODO: Use DMA_RESV_USAGE_READ here */
245 0 : dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, f) {
246 0 : dma_fence_chain_for_each(f, f) {
247 0 : struct dma_fence *tmp = dma_fence_chain_contained(f);
248 :
249 0 : if (amdgpu_sync_test_fence(adev, mode, owner, tmp)) {
250 0 : r = amdgpu_sync_fence(sync, f);
251 0 : dma_fence_put(f);
252 0 : if (r)
253 : return r;
254 : break;
255 : }
256 : }
257 : }
258 : return 0;
259 : }
260 :
261 : /**
262 : * amdgpu_sync_peek_fence - get the next fence not signaled yet
263 : *
264 : * @sync: the sync object
265 : * @ring: optional ring to use for test
266 : *
267 : * Returns the next fence not signaled yet without removing it from the sync
268 : * object.
269 : */
270 0 : struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
271 : struct amdgpu_ring *ring)
272 : {
273 : struct amdgpu_sync_entry *e;
274 : struct hlist_node *tmp;
275 : int i;
276 :
277 0 : hash_for_each_safe(sync->fences, i, tmp, e, node) {
278 0 : struct dma_fence *f = e->fence;
279 0 : struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
280 :
281 0 : if (dma_fence_is_signaled(f)) {
282 0 : hash_del(&e->node);
283 0 : dma_fence_put(f);
284 0 : kmem_cache_free(amdgpu_sync_slab, e);
285 0 : continue;
286 : }
287 0 : if (ring && s_fence) {
288 : /* For fences from the same ring it is sufficient
289 : * when they are scheduled.
290 : */
291 0 : if (s_fence->sched == &ring->sched) {
292 0 : if (dma_fence_is_signaled(&s_fence->scheduled))
293 0 : continue;
294 :
295 : return &s_fence->scheduled;
296 : }
297 : }
298 :
299 : return f;
300 : }
301 :
302 : return NULL;
303 : }
304 :
305 : /**
306 : * amdgpu_sync_get_fence - get the next fence from the sync object
307 : *
308 : * @sync: sync object to use
309 : *
310 : * Get and removes the next fence from the sync object not signaled yet.
311 : */
312 0 : struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
313 : {
314 : struct amdgpu_sync_entry *e;
315 : struct hlist_node *tmp;
316 : struct dma_fence *f;
317 : int i;
318 0 : hash_for_each_safe(sync->fences, i, tmp, e, node) {
319 :
320 0 : f = e->fence;
321 :
322 0 : hash_del(&e->node);
323 0 : kmem_cache_free(amdgpu_sync_slab, e);
324 :
325 0 : if (!dma_fence_is_signaled(f))
326 : return f;
327 :
328 0 : dma_fence_put(f);
329 : }
330 : return NULL;
331 : }
332 :
333 : /**
334 : * amdgpu_sync_clone - clone a sync object
335 : *
336 : * @source: sync object to clone
337 : * @clone: pointer to destination sync object
338 : *
339 : * Adds references to all unsignaled fences in @source to @clone. Also
340 : * removes signaled fences from @source while at it.
341 : */
342 0 : int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
343 : {
344 : struct amdgpu_sync_entry *e;
345 : struct hlist_node *tmp;
346 : struct dma_fence *f;
347 : int i, r;
348 :
349 0 : hash_for_each_safe(source->fences, i, tmp, e, node) {
350 0 : f = e->fence;
351 0 : if (!dma_fence_is_signaled(f)) {
352 0 : r = amdgpu_sync_fence(clone, f);
353 0 : if (r)
354 : return r;
355 : } else {
356 0 : hash_del(&e->node);
357 0 : dma_fence_put(f);
358 0 : kmem_cache_free(amdgpu_sync_slab, e);
359 : }
360 : }
361 :
362 : return 0;
363 : }
364 :
365 0 : int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)
366 : {
367 : struct amdgpu_sync_entry *e;
368 : struct hlist_node *tmp;
369 : int i, r;
370 :
371 0 : hash_for_each_safe(sync->fences, i, tmp, e, node) {
372 0 : r = dma_fence_wait(e->fence, intr);
373 0 : if (r)
374 : return r;
375 :
376 0 : hash_del(&e->node);
377 0 : dma_fence_put(e->fence);
378 0 : kmem_cache_free(amdgpu_sync_slab, e);
379 : }
380 :
381 : return 0;
382 : }
383 :
384 : /**
385 : * amdgpu_sync_free - free the sync object
386 : *
387 : * @sync: sync object to use
388 : *
389 : * Free the sync object.
390 : */
391 0 : void amdgpu_sync_free(struct amdgpu_sync *sync)
392 : {
393 : struct amdgpu_sync_entry *e;
394 : struct hlist_node *tmp;
395 : unsigned i;
396 :
397 0 : hash_for_each_safe(sync->fences, i, tmp, e, node) {
398 0 : hash_del(&e->node);
399 0 : dma_fence_put(e->fence);
400 0 : kmem_cache_free(amdgpu_sync_slab, e);
401 : }
402 0 : }
403 :
404 : /**
405 : * amdgpu_sync_init - init sync object subsystem
406 : *
407 : * Allocate the slab allocator.
408 : */
409 1 : int amdgpu_sync_init(void)
410 : {
411 1 : amdgpu_sync_slab = kmem_cache_create(
412 : "amdgpu_sync", sizeof(struct amdgpu_sync_entry), 0,
413 : SLAB_HWCACHE_ALIGN, NULL);
414 1 : if (!amdgpu_sync_slab)
415 : return -ENOMEM;
416 :
417 1 : return 0;
418 : }
419 :
420 : /**
421 : * amdgpu_sync_fini - fini sync object subsystem
422 : *
423 : * Free the slab allocator.
424 : */
425 0 : void amdgpu_sync_fini(void)
426 : {
427 0 : kmem_cache_destroy(amdgpu_sync_slab);
428 0 : }
|