1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2024, Advanced Micro Devices, Inc.
4 */
5
6 #include <drm/amdxdna_accel.h>
7 #include <drm/drm_device.h>
8 #include <drm/drm_gem.h>
9 #include <drm/drm_gem_shmem_helper.h>
10 #include <drm/drm_print.h>
11 #include <drm/drm_syncobj.h>
12 #include <linux/hmm.h>
13 #include <linux/types.h>
14 #include <linux/xarray.h>
15 #include <trace/events/amdxdna.h>
16
17 #include "aie2_msg_priv.h"
18 #include "aie2_pci.h"
19 #include "aie2_solver.h"
20 #include "amdxdna_ctx.h"
21 #include "amdxdna_gem.h"
22 #include "amdxdna_mailbox.h"
23 #include "amdxdna_pci_drv.h"
24
25 static bool force_cmdlist;
26 module_param(force_cmdlist, bool, 0600);
27 MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default false)");
28
29 #define HWCTX_MAX_TIMEOUT 60000 /* milliseconds */
30
aie2_job_release(struct kref * ref)31 static void aie2_job_release(struct kref *ref)
32 {
33 struct amdxdna_sched_job *job;
34
35 job = container_of(ref, struct amdxdna_sched_job, refcnt);
36 amdxdna_sched_job_cleanup(job);
37 atomic64_inc(&job->hwctx->job_free_cnt);
38 wake_up(&job->hwctx->priv->job_free_wq);
39 if (job->out_fence)
40 dma_fence_put(job->out_fence);
41 kfree(job);
42 }
43
aie2_job_put(struct amdxdna_sched_job * job)44 static void aie2_job_put(struct amdxdna_sched_job *job)
45 {
46 kref_put(&job->refcnt, aie2_job_release);
47 }
48
49 /* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */
aie2_hwctx_stop(struct amdxdna_dev * xdna,struct amdxdna_hwctx * hwctx,struct drm_sched_job * bad_job)50 static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx,
51 struct drm_sched_job *bad_job)
52 {
53 drm_sched_stop(&hwctx->priv->sched, bad_job);
54 aie2_destroy_context(xdna->dev_handle, hwctx);
55 }
56
aie2_hwctx_restart(struct amdxdna_dev * xdna,struct amdxdna_hwctx * hwctx)57 static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx)
58 {
59 struct amdxdna_gem_obj *heap = hwctx->priv->heap;
60 int ret;
61
62 ret = aie2_create_context(xdna->dev_handle, hwctx);
63 if (ret) {
64 XDNA_ERR(xdna, "Create hwctx failed, ret %d", ret);
65 goto out;
66 }
67
68 ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
69 heap->mem.userptr, heap->mem.size);
70 if (ret) {
71 XDNA_ERR(xdna, "Map host buf failed, ret %d", ret);
72 goto out;
73 }
74
75 if (hwctx->status != HWCTX_STAT_READY) {
76 XDNA_DBG(xdna, "hwctx is not ready, status %d", hwctx->status);
77 goto out;
78 }
79
80 ret = aie2_config_cu(hwctx);
81 if (ret) {
82 XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
83 goto out;
84 }
85
86 out:
87 drm_sched_start(&hwctx->priv->sched, 0);
88 XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret);
89 return ret;
90 }
91
aie2_restart_ctx(struct amdxdna_client * client)92 void aie2_restart_ctx(struct amdxdna_client *client)
93 {
94 struct amdxdna_dev *xdna = client->xdna;
95 struct amdxdna_hwctx *hwctx;
96 unsigned long hwctx_id;
97
98 drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
99 mutex_lock(&client->hwctx_lock);
100 amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
101 if (hwctx->status != HWCTX_STAT_STOP)
102 continue;
103
104 hwctx->status = hwctx->old_status;
105 XDNA_DBG(xdna, "Resetting %s", hwctx->name);
106 aie2_hwctx_restart(xdna, hwctx);
107 }
108 mutex_unlock(&client->hwctx_lock);
109 }
110
aie2_cmd_get_out_fence(struct amdxdna_hwctx * hwctx,u64 seq)111 static struct dma_fence *aie2_cmd_get_out_fence(struct amdxdna_hwctx *hwctx, u64 seq)
112 {
113 struct dma_fence *fence, *out_fence = NULL;
114 int ret;
115
116 fence = drm_syncobj_fence_get(hwctx->priv->syncobj);
117 if (!fence)
118 return NULL;
119
120 ret = dma_fence_chain_find_seqno(&fence, seq);
121 if (ret)
122 goto out;
123
124 out_fence = dma_fence_get(dma_fence_chain_contained(fence));
125
126 out:
127 dma_fence_put(fence);
128 return out_fence;
129 }
130
aie2_hwctx_wait_for_idle(struct amdxdna_hwctx * hwctx)131 static void aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx)
132 {
133 struct dma_fence *fence;
134
135 fence = aie2_cmd_get_out_fence(hwctx, hwctx->priv->seq - 1);
136 if (!fence)
137 return;
138
139 /* Wait up to 2 seconds for fw to finish all pending requests */
140 dma_fence_wait_timeout(fence, false, msecs_to_jiffies(2000));
141 dma_fence_put(fence);
142 }
143
aie2_hwctx_suspend(struct amdxdna_hwctx * hwctx)144 void aie2_hwctx_suspend(struct amdxdna_hwctx *hwctx)
145 {
146 struct amdxdna_dev *xdna = hwctx->client->xdna;
147
148 /*
149 * Command timeout is unlikely. But if it happens, it doesn't
150 * break the system. aie2_hwctx_stop() will destroy mailbox
151 * and abort all commands.
152 */
153 drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
154 aie2_hwctx_wait_for_idle(hwctx);
155 aie2_hwctx_stop(xdna, hwctx, NULL);
156 hwctx->old_status = hwctx->status;
157 hwctx->status = HWCTX_STAT_STOP;
158 }
159
aie2_hwctx_resume(struct amdxdna_hwctx * hwctx)160 void aie2_hwctx_resume(struct amdxdna_hwctx *hwctx)
161 {
162 struct amdxdna_dev *xdna = hwctx->client->xdna;
163
164 /*
165 * The resume path cannot guarantee that mailbox channel can be
166 * regenerated. If this happen, when submit message to this
167 * mailbox channel, error will return.
168 */
169 drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
170 hwctx->status = hwctx->old_status;
171 aie2_hwctx_restart(xdna, hwctx);
172 }
173
174 static void
aie2_sched_notify(struct amdxdna_sched_job * job)175 aie2_sched_notify(struct amdxdna_sched_job *job)
176 {
177 struct dma_fence *fence = job->fence;
178
179 trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
180 job->hwctx->priv->completed++;
181 dma_fence_signal(fence);
182
183 up(&job->hwctx->priv->job_sem);
184 job->job_done = true;
185 dma_fence_put(fence);
186 mmput_async(job->mm);
187 aie2_job_put(job);
188 }
189
190 static int
aie2_sched_resp_handler(void * handle,void __iomem * data,size_t size)191 aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size)
192 {
193 struct amdxdna_sched_job *job = handle;
194 struct amdxdna_gem_obj *cmd_abo;
195 u32 ret = 0;
196 u32 status;
197
198 cmd_abo = job->cmd_bo;
199
200 if (unlikely(!data))
201 goto out;
202
203 if (unlikely(size != sizeof(u32))) {
204 amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
205 ret = -EINVAL;
206 goto out;
207 }
208
209 status = readl(data);
210 XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
211 if (status == AIE2_STATUS_SUCCESS)
212 amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
213 else
214 amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR);
215
216 out:
217 aie2_sched_notify(job);
218 return ret;
219 }
220
221 static int
aie2_sched_nocmd_resp_handler(void * handle,void __iomem * data,size_t size)222 aie2_sched_nocmd_resp_handler(void *handle, void __iomem *data, size_t size)
223 {
224 struct amdxdna_sched_job *job = handle;
225 u32 ret = 0;
226 u32 status;
227
228 if (unlikely(!data))
229 goto out;
230
231 if (unlikely(size != sizeof(u32))) {
232 ret = -EINVAL;
233 goto out;
234 }
235
236 status = readl(data);
237 XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
238
239 out:
240 aie2_sched_notify(job);
241 return ret;
242 }
243
244 static int
aie2_sched_cmdlist_resp_handler(void * handle,void __iomem * data,size_t size)245 aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size)
246 {
247 struct amdxdna_sched_job *job = handle;
248 struct amdxdna_gem_obj *cmd_abo;
249 struct amdxdna_dev *xdna;
250 u32 fail_cmd_status;
251 u32 fail_cmd_idx;
252 u32 cmd_status;
253 u32 ret = 0;
254
255 cmd_abo = job->cmd_bo;
256 if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) {
257 amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
258 ret = -EINVAL;
259 goto out;
260 }
261
262 cmd_status = readl(data + offsetof(struct cmd_chain_resp, status));
263 xdna = job->hwctx->client->xdna;
264 XDNA_DBG(xdna, "Status 0x%x", cmd_status);
265 if (cmd_status == AIE2_STATUS_SUCCESS) {
266 amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
267 goto out;
268 }
269
270 /* Slow path to handle error, read from ringbuf on BAR */
271 fail_cmd_idx = readl(data + offsetof(struct cmd_chain_resp, fail_cmd_idx));
272 fail_cmd_status = readl(data + offsetof(struct cmd_chain_resp, fail_cmd_status));
273 XDNA_DBG(xdna, "Failed cmd idx %d, status 0x%x",
274 fail_cmd_idx, fail_cmd_status);
275
276 if (fail_cmd_status == AIE2_STATUS_SUCCESS) {
277 amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
278 ret = -EINVAL;
279 goto out;
280 }
281 amdxdna_cmd_set_state(cmd_abo, fail_cmd_status);
282
283 if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) {
284 struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(cmd_abo, NULL);
285
286 cc->error_index = fail_cmd_idx;
287 if (cc->error_index >= cc->command_count)
288 cc->error_index = 0;
289 }
290 out:
291 aie2_sched_notify(job);
292 return ret;
293 }
294
295 static struct dma_fence *
aie2_sched_job_run(struct drm_sched_job * sched_job)296 aie2_sched_job_run(struct drm_sched_job *sched_job)
297 {
298 struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
299 struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
300 struct amdxdna_hwctx *hwctx = job->hwctx;
301 struct dma_fence *fence;
302 int ret;
303
304 if (!mmget_not_zero(job->mm))
305 return ERR_PTR(-ESRCH);
306
307 kref_get(&job->refcnt);
308 fence = dma_fence_get(job->fence);
309
310 if (unlikely(!cmd_abo)) {
311 ret = aie2_sync_bo(hwctx, job, aie2_sched_nocmd_resp_handler);
312 goto out;
313 }
314
315 amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_NEW);
316
317 if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN)
318 ret = aie2_cmdlist_multi_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
319 else if (force_cmdlist)
320 ret = aie2_cmdlist_single_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
321 else
322 ret = aie2_execbuf(hwctx, job, aie2_sched_resp_handler);
323
324 out:
325 if (ret) {
326 dma_fence_put(job->fence);
327 aie2_job_put(job);
328 mmput(job->mm);
329 fence = ERR_PTR(ret);
330 }
331 trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq);
332
333 return fence;
334 }
335
aie2_sched_job_free(struct drm_sched_job * sched_job)336 static void aie2_sched_job_free(struct drm_sched_job *sched_job)
337 {
338 struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
339 struct amdxdna_hwctx *hwctx = job->hwctx;
340
341 trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
342 if (!job->job_done)
343 up(&hwctx->priv->job_sem);
344
345 drm_sched_job_cleanup(sched_job);
346 aie2_job_put(job);
347 }
348
349 static enum drm_gpu_sched_stat
aie2_sched_job_timedout(struct drm_sched_job * sched_job)350 aie2_sched_job_timedout(struct drm_sched_job *sched_job)
351 {
352 struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
353 struct amdxdna_hwctx *hwctx = job->hwctx;
354 struct amdxdna_dev *xdna;
355
356 xdna = hwctx->client->xdna;
357 trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
358 mutex_lock(&xdna->dev_lock);
359 aie2_hwctx_stop(xdna, hwctx, sched_job);
360
361 aie2_hwctx_restart(xdna, hwctx);
362 mutex_unlock(&xdna->dev_lock);
363
364 return DRM_GPU_SCHED_STAT_NOMINAL;
365 }
366
367 static const struct drm_sched_backend_ops sched_ops = {
368 .run_job = aie2_sched_job_run,
369 .free_job = aie2_sched_job_free,
370 .timedout_job = aie2_sched_job_timedout,
371 };
372
aie2_hwctx_col_list(struct amdxdna_hwctx * hwctx)373 static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx)
374 {
375 struct amdxdna_dev *xdna = hwctx->client->xdna;
376 struct amdxdna_dev_hdl *ndev;
377 int start, end, first, last;
378 u32 width = 1, entries = 0;
379 int i;
380
381 if (!hwctx->num_tiles) {
382 XDNA_ERR(xdna, "Number of tiles is zero");
383 return -EINVAL;
384 }
385
386 ndev = xdna->dev_handle;
387 if (unlikely(!ndev->metadata.core.row_count)) {
388 XDNA_WARN(xdna, "Core tile row count is zero");
389 return -EINVAL;
390 }
391
392 hwctx->num_col = hwctx->num_tiles / ndev->metadata.core.row_count;
393 if (!hwctx->num_col || hwctx->num_col > ndev->total_col) {
394 XDNA_ERR(xdna, "Invalid num_col %d", hwctx->num_col);
395 return -EINVAL;
396 }
397
398 if (ndev->priv->col_align == COL_ALIGN_NATURE)
399 width = hwctx->num_col;
400
401 /*
402 * In range [start, end], find out columns that is multiple of width.
403 * 'first' is the first column,
404 * 'last' is the last column,
405 * 'entries' is the total number of columns.
406 */
407 start = xdna->dev_info->first_col;
408 end = ndev->total_col - hwctx->num_col;
409 if (start > 0 && end == 0) {
410 XDNA_DBG(xdna, "Force start from col 0");
411 start = 0;
412 }
413 first = start + (width - start % width) % width;
414 last = end - end % width;
415 if (last >= first)
416 entries = (last - first) / width + 1;
417 XDNA_DBG(xdna, "start %d end %d first %d last %d",
418 start, end, first, last);
419
420 if (unlikely(!entries)) {
421 XDNA_ERR(xdna, "Start %d end %d width %d",
422 start, end, width);
423 return -EINVAL;
424 }
425
426 hwctx->col_list = kmalloc_array(entries, sizeof(*hwctx->col_list), GFP_KERNEL);
427 if (!hwctx->col_list)
428 return -ENOMEM;
429
430 hwctx->col_list_len = entries;
431 hwctx->col_list[0] = first;
432 for (i = 1; i < entries; i++)
433 hwctx->col_list[i] = hwctx->col_list[i - 1] + width;
434
435 print_hex_dump_debug("col_list: ", DUMP_PREFIX_OFFSET, 16, 4, hwctx->col_list,
436 entries * sizeof(*hwctx->col_list), false);
437 return 0;
438 }
439
aie2_alloc_resource(struct amdxdna_hwctx * hwctx)440 static int aie2_alloc_resource(struct amdxdna_hwctx *hwctx)
441 {
442 struct amdxdna_dev *xdna = hwctx->client->xdna;
443 struct alloc_requests *xrs_req;
444 int ret;
445
446 xrs_req = kzalloc(sizeof(*xrs_req), GFP_KERNEL);
447 if (!xrs_req)
448 return -ENOMEM;
449
450 xrs_req->cdo.start_cols = hwctx->col_list;
451 xrs_req->cdo.cols_len = hwctx->col_list_len;
452 xrs_req->cdo.ncols = hwctx->num_col;
453 xrs_req->cdo.qos_cap.opc = hwctx->max_opc;
454
455 xrs_req->rqos.gops = hwctx->qos.gops;
456 xrs_req->rqos.fps = hwctx->qos.fps;
457 xrs_req->rqos.dma_bw = hwctx->qos.dma_bandwidth;
458 xrs_req->rqos.latency = hwctx->qos.latency;
459 xrs_req->rqos.exec_time = hwctx->qos.frame_exec_time;
460 xrs_req->rqos.priority = hwctx->qos.priority;
461
462 xrs_req->rid = (uintptr_t)hwctx;
463
464 ret = xrs_allocate_resource(xdna->xrs_hdl, xrs_req, hwctx);
465 if (ret)
466 XDNA_ERR(xdna, "Allocate AIE resource failed, ret %d", ret);
467
468 kfree(xrs_req);
469 return ret;
470 }
471
aie2_release_resource(struct amdxdna_hwctx * hwctx)472 static void aie2_release_resource(struct amdxdna_hwctx *hwctx)
473 {
474 struct amdxdna_dev *xdna = hwctx->client->xdna;
475 int ret;
476
477 ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx);
478 if (ret)
479 XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret);
480 }
481
aie2_ctx_syncobj_create(struct amdxdna_hwctx * hwctx)482 static int aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx)
483 {
484 struct amdxdna_dev *xdna = hwctx->client->xdna;
485 struct drm_file *filp = hwctx->client->filp;
486 struct drm_syncobj *syncobj;
487 u32 hdl;
488 int ret;
489
490 hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE;
491
492 ret = drm_syncobj_create(&syncobj, 0, NULL);
493 if (ret) {
494 XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret);
495 return ret;
496 }
497 ret = drm_syncobj_get_handle(filp, syncobj, &hdl);
498 if (ret) {
499 drm_syncobj_put(syncobj);
500 XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret);
501 return ret;
502 }
503 hwctx->priv->syncobj = syncobj;
504 hwctx->syncobj_hdl = hdl;
505
506 return 0;
507 }
508
aie2_ctx_syncobj_destroy(struct amdxdna_hwctx * hwctx)509 static void aie2_ctx_syncobj_destroy(struct amdxdna_hwctx *hwctx)
510 {
511 /*
512 * The syncobj_hdl is owned by user space and will be cleaned up
513 * separately.
514 */
515 drm_syncobj_put(hwctx->priv->syncobj);
516 }
517
aie2_hwctx_init(struct amdxdna_hwctx * hwctx)518 int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
519 {
520 struct amdxdna_client *client = hwctx->client;
521 struct amdxdna_dev *xdna = client->xdna;
522 const struct drm_sched_init_args args = {
523 .ops = &sched_ops,
524 .num_rqs = DRM_SCHED_PRIORITY_COUNT,
525 .credit_limit = HWCTX_MAX_CMDS,
526 .timeout = msecs_to_jiffies(HWCTX_MAX_TIMEOUT),
527 .name = hwctx->name,
528 .dev = xdna->ddev.dev,
529 };
530 struct drm_gpu_scheduler *sched;
531 struct amdxdna_hwctx_priv *priv;
532 struct amdxdna_gem_obj *heap;
533 struct amdxdna_dev_hdl *ndev;
534 int i, ret;
535
536 priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL);
537 if (!priv)
538 return -ENOMEM;
539 hwctx->priv = priv;
540
541 mutex_lock(&client->mm_lock);
542 heap = client->dev_heap;
543 if (!heap) {
544 XDNA_ERR(xdna, "The client dev heap object not exist");
545 mutex_unlock(&client->mm_lock);
546 ret = -ENOENT;
547 goto free_priv;
548 }
549 drm_gem_object_get(to_gobj(heap));
550 mutex_unlock(&client->mm_lock);
551 priv->heap = heap;
552 sema_init(&priv->job_sem, HWCTX_MAX_CMDS);
553
554 ret = amdxdna_gem_pin(heap);
555 if (ret) {
556 XDNA_ERR(xdna, "Dev heap pin failed, ret %d", ret);
557 goto put_heap;
558 }
559
560 for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
561 struct amdxdna_gem_obj *abo;
562 struct amdxdna_drm_create_bo args = {
563 .flags = 0,
564 .type = AMDXDNA_BO_DEV,
565 .vaddr = 0,
566 .size = MAX_CHAIN_CMDBUF_SIZE,
567 };
568
569 abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, client->filp, true);
570 if (IS_ERR(abo)) {
571 ret = PTR_ERR(abo);
572 goto free_cmd_bufs;
573 }
574
575 XDNA_DBG(xdna, "Command buf %d addr 0x%llx size 0x%lx",
576 i, abo->mem.dev_addr, abo->mem.size);
577 priv->cmd_buf[i] = abo;
578 }
579
580 sched = &priv->sched;
581 mutex_init(&priv->io_lock);
582
583 fs_reclaim_acquire(GFP_KERNEL);
584 might_lock(&priv->io_lock);
585 fs_reclaim_release(GFP_KERNEL);
586
587 ret = drm_sched_init(sched, &args);
588 if (ret) {
589 XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret);
590 goto free_cmd_bufs;
591 }
592
593 ret = drm_sched_entity_init(&priv->entity, DRM_SCHED_PRIORITY_NORMAL,
594 &sched, 1, NULL);
595 if (ret) {
596 XDNA_ERR(xdna, "Failed to initial sched entiry. ret %d", ret);
597 goto free_sched;
598 }
599
600 ret = aie2_hwctx_col_list(hwctx);
601 if (ret) {
602 XDNA_ERR(xdna, "Create col list failed, ret %d", ret);
603 goto free_entity;
604 }
605
606 ret = aie2_alloc_resource(hwctx);
607 if (ret) {
608 XDNA_ERR(xdna, "Alloc hw resource failed, ret %d", ret);
609 goto free_col_list;
610 }
611
612 ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
613 heap->mem.userptr, heap->mem.size);
614 if (ret) {
615 XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret);
616 goto release_resource;
617 }
618
619 ret = aie2_ctx_syncobj_create(hwctx);
620 if (ret) {
621 XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret);
622 goto release_resource;
623 }
624
625 hwctx->status = HWCTX_STAT_INIT;
626 ndev = xdna->dev_handle;
627 ndev->hwctx_num++;
628 init_waitqueue_head(&priv->job_free_wq);
629
630 XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
631
632 return 0;
633
634 release_resource:
635 aie2_release_resource(hwctx);
636 free_col_list:
637 kfree(hwctx->col_list);
638 free_entity:
639 drm_sched_entity_destroy(&priv->entity);
640 free_sched:
641 drm_sched_fini(&priv->sched);
642 free_cmd_bufs:
643 for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
644 if (!priv->cmd_buf[i])
645 continue;
646 drm_gem_object_put(to_gobj(priv->cmd_buf[i]));
647 }
648 amdxdna_gem_unpin(heap);
649 put_heap:
650 drm_gem_object_put(to_gobj(heap));
651 free_priv:
652 kfree(priv);
653 return ret;
654 }
655
aie2_hwctx_fini(struct amdxdna_hwctx * hwctx)656 void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
657 {
658 struct amdxdna_dev_hdl *ndev;
659 struct amdxdna_dev *xdna;
660 int idx;
661
662 xdna = hwctx->client->xdna;
663 ndev = xdna->dev_handle;
664 ndev->hwctx_num--;
665
666 XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq);
667 drm_sched_entity_destroy(&hwctx->priv->entity);
668
669 aie2_hwctx_wait_for_idle(hwctx);
670
671 /* Request fw to destroy hwctx and cancel the rest pending requests */
672 aie2_release_resource(hwctx);
673
674 /* Wait for all submitted jobs to be completed or canceled */
675 wait_event(hwctx->priv->job_free_wq,
676 atomic64_read(&hwctx->job_submit_cnt) ==
677 atomic64_read(&hwctx->job_free_cnt));
678
679 drm_sched_fini(&hwctx->priv->sched);
680 aie2_ctx_syncobj_destroy(hwctx);
681
682 for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++)
683 drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx]));
684 amdxdna_gem_unpin(hwctx->priv->heap);
685 drm_gem_object_put(to_gobj(hwctx->priv->heap));
686
687 mutex_destroy(&hwctx->priv->io_lock);
688 kfree(hwctx->col_list);
689 kfree(hwctx->priv);
690 kfree(hwctx->cus);
691 }
692
aie2_hwctx_cu_config(struct amdxdna_hwctx * hwctx,void * buf,u32 size)693 static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size)
694 {
695 struct amdxdna_hwctx_param_config_cu *config = buf;
696 struct amdxdna_dev *xdna = hwctx->client->xdna;
697 u32 total_size;
698 int ret;
699
700 XDNA_DBG(xdna, "Config %d CU to %s", config->num_cus, hwctx->name);
701 if (XDNA_MBZ_DBG(xdna, config->pad, sizeof(config->pad)))
702 return -EINVAL;
703
704 if (hwctx->status != HWCTX_STAT_INIT) {
705 XDNA_ERR(xdna, "Not support re-config CU");
706 return -EINVAL;
707 }
708
709 if (!config->num_cus) {
710 XDNA_ERR(xdna, "Number of CU is zero");
711 return -EINVAL;
712 }
713
714 total_size = struct_size(config, cu_configs, config->num_cus);
715 if (total_size > size) {
716 XDNA_ERR(xdna, "CU config larger than size");
717 return -EINVAL;
718 }
719
720 hwctx->cus = kmemdup(config, total_size, GFP_KERNEL);
721 if (!hwctx->cus)
722 return -ENOMEM;
723
724 ret = aie2_config_cu(hwctx);
725 if (ret) {
726 XDNA_ERR(xdna, "Config CU to firmware failed, ret %d", ret);
727 goto free_cus;
728 }
729
730 wmb(); /* To avoid locking in command submit when check status */
731 hwctx->status = HWCTX_STAT_READY;
732
733 return 0;
734
735 free_cus:
736 kfree(hwctx->cus);
737 hwctx->cus = NULL;
738 return ret;
739 }
740
aie2_hwctx_config(struct amdxdna_hwctx * hwctx,u32 type,u64 value,void * buf,u32 size)741 int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size)
742 {
743 struct amdxdna_dev *xdna = hwctx->client->xdna;
744
745 drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
746 switch (type) {
747 case DRM_AMDXDNA_HWCTX_CONFIG_CU:
748 return aie2_hwctx_cu_config(hwctx, buf, size);
749 case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF:
750 case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF:
751 return -EOPNOTSUPP;
752 default:
753 XDNA_DBG(xdna, "Not supported type %d", type);
754 return -EOPNOTSUPP;
755 }
756 }
757
aie2_populate_range(struct amdxdna_gem_obj * abo)758 static int aie2_populate_range(struct amdxdna_gem_obj *abo)
759 {
760 struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
761 struct mm_struct *mm = abo->mem.notifier.mm;
762 struct hmm_range range = { 0 };
763 unsigned long timeout;
764 int ret;
765
766 XDNA_INFO_ONCE(xdna, "populate memory range %llx size %lx",
767 abo->mem.userptr, abo->mem.size);
768 range.notifier = &abo->mem.notifier;
769 range.start = abo->mem.userptr;
770 range.end = abo->mem.userptr + abo->mem.size;
771 range.hmm_pfns = abo->mem.pfns;
772 range.default_flags = HMM_PFN_REQ_FAULT;
773
774 if (!mmget_not_zero(mm))
775 return -EFAULT;
776
777 timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
778 again:
779 range.notifier_seq = mmu_interval_read_begin(&abo->mem.notifier);
780 mmap_read_lock(mm);
781 ret = hmm_range_fault(&range);
782 mmap_read_unlock(mm);
783 if (ret) {
784 if (time_after(jiffies, timeout)) {
785 ret = -ETIME;
786 goto put_mm;
787 }
788
789 if (ret == -EBUSY)
790 goto again;
791
792 goto put_mm;
793 }
794
795 down_read(&xdna->notifier_lock);
796 if (mmu_interval_read_retry(&abo->mem.notifier, range.notifier_seq)) {
797 up_read(&xdna->notifier_lock);
798 goto again;
799 }
800 abo->mem.map_invalid = false;
801 up_read(&xdna->notifier_lock);
802
803 put_mm:
804 mmput(mm);
805 return ret;
806 }
807
aie2_cmd_submit(struct amdxdna_hwctx * hwctx,struct amdxdna_sched_job * job,u64 * seq)808 int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq)
809 {
810 struct amdxdna_dev *xdna = hwctx->client->xdna;
811 struct ww_acquire_ctx acquire_ctx;
812 struct dma_fence_chain *chain;
813 struct amdxdna_gem_obj *abo;
814 unsigned long timeout = 0;
815 int ret, i;
816
817 ret = down_interruptible(&hwctx->priv->job_sem);
818 if (ret) {
819 XDNA_ERR(xdna, "Grab job sem failed, ret %d", ret);
820 return ret;
821 }
822
823 chain = dma_fence_chain_alloc();
824 if (!chain) {
825 XDNA_ERR(xdna, "Alloc fence chain failed");
826 ret = -ENOMEM;
827 goto up_sem;
828 }
829
830 ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx);
831 if (ret) {
832 XDNA_ERR(xdna, "DRM job init failed, ret %d", ret);
833 goto free_chain;
834 }
835
836 retry:
837 ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
838 if (ret) {
839 XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret);
840 goto cleanup_job;
841 }
842
843 for (i = 0; i < job->bo_cnt; i++) {
844 ret = dma_resv_reserve_fences(job->bos[i]->resv, 1);
845 if (ret) {
846 XDNA_WARN(xdna, "Failed to reserve fences %d", ret);
847 drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
848 goto cleanup_job;
849 }
850 }
851
852 down_read(&xdna->notifier_lock);
853 for (i = 0; i < job->bo_cnt; i++) {
854 abo = to_xdna_obj(job->bos[i]);
855 if (abo->mem.map_invalid) {
856 up_read(&xdna->notifier_lock);
857 drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
858 if (!timeout) {
859 timeout = jiffies +
860 msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
861 } else if (time_after(jiffies, timeout)) {
862 ret = -ETIME;
863 goto cleanup_job;
864 }
865
866 ret = aie2_populate_range(abo);
867 if (ret)
868 goto cleanup_job;
869 goto retry;
870 }
871 }
872
873 mutex_lock(&hwctx->priv->io_lock);
874 drm_sched_job_arm(&job->base);
875 job->out_fence = dma_fence_get(&job->base.s_fence->finished);
876 for (i = 0; i < job->bo_cnt; i++)
877 dma_resv_add_fence(job->bos[i]->resv, job->out_fence, DMA_RESV_USAGE_WRITE);
878 job->seq = hwctx->priv->seq++;
879 kref_get(&job->refcnt);
880 drm_sched_entity_push_job(&job->base);
881
882 *seq = job->seq;
883 drm_syncobj_add_point(hwctx->priv->syncobj, chain, job->out_fence, *seq);
884 mutex_unlock(&hwctx->priv->io_lock);
885
886 up_read(&xdna->notifier_lock);
887 drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
888
889 aie2_job_put(job);
890 atomic64_inc(&hwctx->job_submit_cnt);
891
892 return 0;
893
894 cleanup_job:
895 drm_sched_job_cleanup(&job->base);
896 free_chain:
897 dma_fence_chain_free(chain);
898 up_sem:
899 up(&hwctx->priv->job_sem);
900 job->job_done = true;
901 return ret;
902 }
903
aie2_hmm_invalidate(struct amdxdna_gem_obj * abo,unsigned long cur_seq)904 void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo,
905 unsigned long cur_seq)
906 {
907 struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
908 struct drm_gem_object *gobj = to_gobj(abo);
909 long ret;
910
911 down_write(&xdna->notifier_lock);
912 abo->mem.map_invalid = true;
913 mmu_interval_set_seq(&abo->mem.notifier, cur_seq);
914 up_write(&xdna->notifier_lock);
915 ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP,
916 true, MAX_SCHEDULE_TIMEOUT);
917 if (!ret || ret == -ERESTARTSYS)
918 XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret);
919 }
920