1 // SPDX-License-Identifier: GPL-2.0 OR MIT
2 /*
3 * Copyright 2014-2022 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include <linux/slab.h>
26 #include <linux/list.h>
27 #include "kfd_device_queue_manager.h"
28 #include "kfd_priv.h"
29 #include "kfd_kernel_queue.h"
30 #include "amdgpu_amdkfd.h"
31
get_queue_by_qid(struct process_queue_manager * pqm,unsigned int qid)32 static inline struct process_queue_node *get_queue_by_qid(
33 struct process_queue_manager *pqm, unsigned int qid)
34 {
35 struct process_queue_node *pqn;
36
37 list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
38 if ((pqn->q && pqn->q->properties.queue_id == qid) ||
39 (pqn->kq && pqn->kq->queue->properties.queue_id == qid))
40 return pqn;
41 }
42
43 return NULL;
44 }
45
assign_queue_slot_by_qid(struct process_queue_manager * pqm,unsigned int qid)46 static int assign_queue_slot_by_qid(struct process_queue_manager *pqm,
47 unsigned int qid)
48 {
49 if (qid >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
50 return -EINVAL;
51
52 if (__test_and_set_bit(qid, pqm->queue_slot_bitmap)) {
53 pr_err("Cannot create new queue because requested qid(%u) is in use\n", qid);
54 return -ENOSPC;
55 }
56
57 return 0;
58 }
59
find_available_queue_slot(struct process_queue_manager * pqm,unsigned int * qid)60 static int find_available_queue_slot(struct process_queue_manager *pqm,
61 unsigned int *qid)
62 {
63 unsigned long found;
64
65 found = find_first_zero_bit(pqm->queue_slot_bitmap,
66 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
67
68 pr_debug("The new slot id %lu\n", found);
69
70 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
71 pr_info("Cannot open more queues for process with pasid 0x%x\n",
72 pqm->process->pasid);
73 return -ENOMEM;
74 }
75
76 set_bit(found, pqm->queue_slot_bitmap);
77 *qid = found;
78
79 return 0;
80 }
81
kfd_process_dequeue_from_device(struct kfd_process_device * pdd)82 void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
83 {
84 struct kfd_node *dev = pdd->dev;
85
86 if (pdd->already_dequeued)
87 return;
88
89 dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd);
90 pdd->already_dequeued = true;
91 }
92
pqm_set_gws(struct process_queue_manager * pqm,unsigned int qid,void * gws)93 int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
94 void *gws)
95 {
96 struct kfd_node *dev = NULL;
97 struct process_queue_node *pqn;
98 struct kfd_process_device *pdd;
99 struct kgd_mem *mem = NULL;
100 int ret;
101
102 pqn = get_queue_by_qid(pqm, qid);
103 if (!pqn) {
104 pr_err("Queue id does not match any known queue\n");
105 return -EINVAL;
106 }
107
108 if (pqn->q)
109 dev = pqn->q->device;
110 if (WARN_ON(!dev))
111 return -ENODEV;
112
113 pdd = kfd_get_process_device_data(dev, pqm->process);
114 if (!pdd) {
115 pr_err("Process device data doesn't exist\n");
116 return -EINVAL;
117 }
118
119 /* Only allow one queue per process can have GWS assigned */
120 if (gws && pdd->qpd.num_gws)
121 return -EBUSY;
122
123 if (!gws && pdd->qpd.num_gws == 0)
124 return -EINVAL;
125
126 if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) && !dev->kfd->shared_resources.enable_mes) {
127 if (gws)
128 ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
129 gws, &mem);
130 else
131 ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info,
132 pqn->q->gws);
133 if (unlikely(ret))
134 return ret;
135 pqn->q->gws = mem;
136 } else {
137 /*
138 * Intentionally set GWS to a non-NULL value
139 * for devices that do not use GWS for global wave
140 * synchronization but require the formality
141 * of setting GWS for cooperative groups.
142 */
143 pqn->q->gws = gws ? ERR_PTR(-ENOMEM) : NULL;
144 }
145
146 pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0;
147
148 return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
149 pqn->q, NULL);
150 }
151
kfd_process_dequeue_from_all_devices(struct kfd_process * p)152 void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
153 {
154 int i;
155
156 for (i = 0; i < p->n_pdds; i++)
157 kfd_process_dequeue_from_device(p->pdds[i]);
158 }
159
pqm_init(struct process_queue_manager * pqm,struct kfd_process * p)160 int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
161 {
162 INIT_LIST_HEAD(&pqm->queues);
163 pqm->queue_slot_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
164 GFP_KERNEL);
165 if (!pqm->queue_slot_bitmap)
166 return -ENOMEM;
167 pqm->process = p;
168
169 return 0;
170 }
171
pqm_uninit(struct process_queue_manager * pqm)172 void pqm_uninit(struct process_queue_manager *pqm)
173 {
174 struct process_queue_node *pqn, *next;
175
176 list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
177 if (pqn->q && pqn->q->gws &&
178 KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
179 !pqn->q->device->kfd->shared_resources.enable_mes)
180 amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
181 pqn->q->gws);
182 kfd_procfs_del_queue(pqn->q);
183 uninit_queue(pqn->q);
184 list_del(&pqn->process_queue_list);
185 kfree(pqn);
186 }
187
188 bitmap_free(pqm->queue_slot_bitmap);
189 pqm->queue_slot_bitmap = NULL;
190 }
191
init_user_queue(struct process_queue_manager * pqm,struct kfd_node * dev,struct queue ** q,struct queue_properties * q_properties,struct file * f,struct amdgpu_bo * wptr_bo,unsigned int qid)192 static int init_user_queue(struct process_queue_manager *pqm,
193 struct kfd_node *dev, struct queue **q,
194 struct queue_properties *q_properties,
195 struct file *f, struct amdgpu_bo *wptr_bo,
196 unsigned int qid)
197 {
198 int retval;
199
200 /* Doorbell initialized in user space*/
201 q_properties->doorbell_ptr = NULL;
202 q_properties->exception_status = KFD_EC_MASK(EC_QUEUE_NEW);
203
204 /* let DQM handle it*/
205 q_properties->vmid = 0;
206 q_properties->queue_id = qid;
207
208 retval = init_queue(q, q_properties);
209 if (retval != 0)
210 return retval;
211
212 (*q)->device = dev;
213 (*q)->process = pqm->process;
214
215 if (dev->kfd->shared_resources.enable_mes) {
216 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
217 AMDGPU_MES_GANG_CTX_SIZE,
218 &(*q)->gang_ctx_bo,
219 &(*q)->gang_ctx_gpu_addr,
220 &(*q)->gang_ctx_cpu_ptr,
221 false);
222 if (retval) {
223 pr_err("failed to allocate gang context bo\n");
224 goto cleanup;
225 }
226 memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
227 (*q)->wptr_bo = wptr_bo;
228 }
229
230 pr_debug("PQM After init queue");
231 return 0;
232
233 cleanup:
234 uninit_queue(*q);
235 *q = NULL;
236 return retval;
237 }
238
pqm_create_queue(struct process_queue_manager * pqm,struct kfd_node * dev,struct file * f,struct queue_properties * properties,unsigned int * qid,struct amdgpu_bo * wptr_bo,const struct kfd_criu_queue_priv_data * q_data,const void * restore_mqd,const void * restore_ctl_stack,uint32_t * p_doorbell_offset_in_process)239 int pqm_create_queue(struct process_queue_manager *pqm,
240 struct kfd_node *dev,
241 struct file *f,
242 struct queue_properties *properties,
243 unsigned int *qid,
244 struct amdgpu_bo *wptr_bo,
245 const struct kfd_criu_queue_priv_data *q_data,
246 const void *restore_mqd,
247 const void *restore_ctl_stack,
248 uint32_t *p_doorbell_offset_in_process)
249 {
250 int retval;
251 struct kfd_process_device *pdd;
252 struct queue *q;
253 struct process_queue_node *pqn;
254 struct kernel_queue *kq;
255 enum kfd_queue_type type = properties->type;
256 unsigned int max_queues = 127; /* HWS limit */
257
258 /*
259 * On GFX 9.4.3, increase the number of queues that
260 * can be created to 255. No HWS limit on GFX 9.4.3.
261 */
262 if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3))
263 max_queues = 255;
264
265 q = NULL;
266 kq = NULL;
267
268 pdd = kfd_get_process_device_data(dev, pqm->process);
269 if (!pdd) {
270 pr_err("Process device data doesn't exist\n");
271 return -1;
272 }
273
274 /*
275 * for debug process, verify that it is within the static queues limit
276 * currently limit is set to half of the total avail HQD slots
277 * If we are just about to create DIQ, the is_debug flag is not set yet
278 * Hence we also check the type as well
279 */
280 if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ))
281 max_queues = dev->kfd->device_info.max_no_of_hqd/2;
282
283 if (pdd->qpd.queue_count >= max_queues)
284 return -ENOSPC;
285
286 if (q_data) {
287 retval = assign_queue_slot_by_qid(pqm, q_data->q_id);
288 *qid = q_data->q_id;
289 } else
290 retval = find_available_queue_slot(pqm, qid);
291
292 if (retval != 0)
293 return retval;
294
295 if (list_empty(&pdd->qpd.queues_list) &&
296 list_empty(&pdd->qpd.priv_queue_list))
297 dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
298
299 pqn = kzalloc(sizeof(*pqn), GFP_KERNEL);
300 if (!pqn) {
301 retval = -ENOMEM;
302 goto err_allocate_pqn;
303 }
304
305 switch (type) {
306 case KFD_QUEUE_TYPE_SDMA:
307 case KFD_QUEUE_TYPE_SDMA_XGMI:
308 /* SDMA queues are always allocated statically no matter
309 * which scheduler mode is used. We also do not need to
310 * check whether a SDMA queue can be allocated here, because
311 * allocate_sdma_queue() in create_queue() has the
312 * corresponding check logic.
313 */
314 retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
315 if (retval != 0)
316 goto err_create_queue;
317 pqn->q = q;
318 pqn->kq = NULL;
319 retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data,
320 restore_mqd, restore_ctl_stack);
321 print_queue(q);
322 break;
323
324 case KFD_QUEUE_TYPE_COMPUTE:
325 /* check if there is over subscription */
326 if ((dev->dqm->sched_policy ==
327 KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
328 ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) ||
329 (dev->dqm->active_queue_count >= get_cp_queues_num(dev->dqm)))) {
330 pr_debug("Over-subscription is not allowed when amdkfd.sched_policy == 1\n");
331 retval = -EPERM;
332 goto err_create_queue;
333 }
334
335 retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
336 if (retval != 0)
337 goto err_create_queue;
338 pqn->q = q;
339 pqn->kq = NULL;
340 retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data,
341 restore_mqd, restore_ctl_stack);
342 print_queue(q);
343 break;
344 case KFD_QUEUE_TYPE_DIQ:
345 kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_DIQ);
346 if (!kq) {
347 retval = -ENOMEM;
348 goto err_create_queue;
349 }
350 kq->queue->properties.queue_id = *qid;
351 pqn->kq = kq;
352 pqn->q = NULL;
353 retval = kfd_process_drain_interrupts(pdd);
354 if (retval)
355 break;
356
357 retval = dev->dqm->ops.create_kernel_queue(dev->dqm,
358 kq, &pdd->qpd);
359 break;
360 default:
361 WARN(1, "Invalid queue type %d", type);
362 retval = -EINVAL;
363 }
364
365 if (retval != 0) {
366 pr_err("Pasid 0x%x DQM create queue type %d failed. ret %d\n",
367 pqm->process->pasid, type, retval);
368 goto err_create_queue;
369 }
370
371 if (q && p_doorbell_offset_in_process) {
372 /* Return the doorbell offset within the doorbell page
373 * to the caller so it can be passed up to user mode
374 * (in bytes).
375 * relative doorbell index = Absolute doorbell index -
376 * absolute index of first doorbell in the page.
377 */
378 uint32_t first_db_index = amdgpu_doorbell_index_on_bar(pdd->dev->adev,
379 pdd->qpd.proc_doorbells,
380 0);
381
382 *p_doorbell_offset_in_process = (q->properties.doorbell_off
383 - first_db_index) * sizeof(uint32_t);
384 }
385
386 pr_debug("PQM After DQM create queue\n");
387
388 list_add(&pqn->process_queue_list, &pqm->queues);
389
390 if (q) {
391 pr_debug("PQM done creating queue\n");
392 kfd_procfs_add_queue(q);
393 print_queue_properties(&q->properties);
394 }
395
396 return retval;
397
398 err_create_queue:
399 uninit_queue(q);
400 if (kq)
401 kernel_queue_uninit(kq, false);
402 kfree(pqn);
403 err_allocate_pqn:
404 /* check if queues list is empty unregister process from device */
405 clear_bit(*qid, pqm->queue_slot_bitmap);
406 if (list_empty(&pdd->qpd.queues_list) &&
407 list_empty(&pdd->qpd.priv_queue_list))
408 dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd);
409 return retval;
410 }
411
pqm_destroy_queue(struct process_queue_manager * pqm,unsigned int qid)412 int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
413 {
414 struct process_queue_node *pqn;
415 struct kfd_process_device *pdd;
416 struct device_queue_manager *dqm;
417 struct kfd_node *dev;
418 int retval;
419
420 dqm = NULL;
421
422 retval = 0;
423
424 pqn = get_queue_by_qid(pqm, qid);
425 if (!pqn) {
426 pr_err("Queue id does not match any known queue\n");
427 return -EINVAL;
428 }
429
430 dev = NULL;
431 if (pqn->kq)
432 dev = pqn->kq->dev;
433 if (pqn->q)
434 dev = pqn->q->device;
435 if (WARN_ON(!dev))
436 return -ENODEV;
437
438 pdd = kfd_get_process_device_data(dev, pqm->process);
439 if (!pdd) {
440 pr_err("Process device data doesn't exist\n");
441 return -1;
442 }
443
444 if (pqn->kq) {
445 /* destroy kernel queue (DIQ) */
446 dqm = pqn->kq->dev->dqm;
447 dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd);
448 kernel_queue_uninit(pqn->kq, false);
449 }
450
451 if (pqn->q) {
452 kfd_procfs_del_queue(pqn->q);
453 dqm = pqn->q->device->dqm;
454 retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
455 if (retval) {
456 pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n",
457 pqm->process->pasid,
458 pqn->q->properties.queue_id, retval);
459 if (retval != -ETIME)
460 goto err_destroy_queue;
461 }
462
463 if (pqn->q->gws) {
464 if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
465 !dev->kfd->shared_resources.enable_mes)
466 amdgpu_amdkfd_remove_gws_from_process(
467 pqm->process->kgd_process_info,
468 pqn->q->gws);
469 pdd->qpd.num_gws = 0;
470 }
471
472 if (dev->kfd->shared_resources.enable_mes) {
473 amdgpu_amdkfd_free_gtt_mem(dev->adev,
474 pqn->q->gang_ctx_bo);
475 if (pqn->q->wptr_bo)
476 amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo);
477
478 }
479 uninit_queue(pqn->q);
480 }
481
482 list_del(&pqn->process_queue_list);
483 kfree(pqn);
484 clear_bit(qid, pqm->queue_slot_bitmap);
485
486 if (list_empty(&pdd->qpd.queues_list) &&
487 list_empty(&pdd->qpd.priv_queue_list))
488 dqm->ops.unregister_process(dqm, &pdd->qpd);
489
490 err_destroy_queue:
491 return retval;
492 }
493
pqm_update_queue_properties(struct process_queue_manager * pqm,unsigned int qid,struct queue_properties * p)494 int pqm_update_queue_properties(struct process_queue_manager *pqm,
495 unsigned int qid, struct queue_properties *p)
496 {
497 int retval;
498 struct process_queue_node *pqn;
499
500 pqn = get_queue_by_qid(pqm, qid);
501 if (!pqn) {
502 pr_debug("No queue %d exists for update operation\n", qid);
503 return -EFAULT;
504 }
505
506 pqn->q->properties.queue_address = p->queue_address;
507 pqn->q->properties.queue_size = p->queue_size;
508 pqn->q->properties.queue_percent = p->queue_percent;
509 pqn->q->properties.priority = p->priority;
510 pqn->q->properties.pm4_target_xcc = p->pm4_target_xcc;
511
512 retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
513 pqn->q, NULL);
514 if (retval != 0)
515 return retval;
516
517 return 0;
518 }
519
pqm_update_mqd(struct process_queue_manager * pqm,unsigned int qid,struct mqd_update_info * minfo)520 int pqm_update_mqd(struct process_queue_manager *pqm,
521 unsigned int qid, struct mqd_update_info *minfo)
522 {
523 int retval;
524 struct process_queue_node *pqn;
525
526 pqn = get_queue_by_qid(pqm, qid);
527 if (!pqn) {
528 pr_debug("No queue %d exists for update operation\n", qid);
529 return -EFAULT;
530 }
531
532 /* CUs are masked for debugger requirements so deny user mask */
533 if (pqn->q->properties.is_dbg_wa && minfo && minfo->cu_mask.ptr)
534 return -EBUSY;
535
536 /* ASICs that have WGPs must enforce pairwise enabled mask checks. */
537 if (minfo && minfo->cu_mask.ptr &&
538 KFD_GC_VERSION(pqn->q->device) >= IP_VERSION(10, 0, 0)) {
539 int i;
540
541 for (i = 0; i < minfo->cu_mask.count; i += 2) {
542 uint32_t cu_pair = (minfo->cu_mask.ptr[i / 32] >> (i % 32)) & 0x3;
543
544 if (cu_pair && cu_pair != 0x3) {
545 pr_debug("CUs must be adjacent pairwise enabled.\n");
546 return -EINVAL;
547 }
548 }
549 }
550
551 retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
552 pqn->q, minfo);
553 if (retval != 0)
554 return retval;
555
556 if (minfo && minfo->cu_mask.ptr)
557 pqn->q->properties.is_user_cu_masked = true;
558
559 return 0;
560 }
561
pqm_get_kernel_queue(struct process_queue_manager * pqm,unsigned int qid)562 struct kernel_queue *pqm_get_kernel_queue(
563 struct process_queue_manager *pqm,
564 unsigned int qid)
565 {
566 struct process_queue_node *pqn;
567
568 pqn = get_queue_by_qid(pqm, qid);
569 if (pqn && pqn->kq)
570 return pqn->kq;
571
572 return NULL;
573 }
574
pqm_get_user_queue(struct process_queue_manager * pqm,unsigned int qid)575 struct queue *pqm_get_user_queue(struct process_queue_manager *pqm,
576 unsigned int qid)
577 {
578 struct process_queue_node *pqn;
579
580 pqn = get_queue_by_qid(pqm, qid);
581 return pqn ? pqn->q : NULL;
582 }
583
pqm_get_wave_state(struct process_queue_manager * pqm,unsigned int qid,void __user * ctl_stack,u32 * ctl_stack_used_size,u32 * save_area_used_size)584 int pqm_get_wave_state(struct process_queue_manager *pqm,
585 unsigned int qid,
586 void __user *ctl_stack,
587 u32 *ctl_stack_used_size,
588 u32 *save_area_used_size)
589 {
590 struct process_queue_node *pqn;
591
592 pqn = get_queue_by_qid(pqm, qid);
593 if (!pqn) {
594 pr_debug("amdkfd: No queue %d exists for operation\n",
595 qid);
596 return -EFAULT;
597 }
598
599 return pqn->q->device->dqm->ops.get_wave_state(pqn->q->device->dqm,
600 pqn->q,
601 ctl_stack,
602 ctl_stack_used_size,
603 save_area_used_size);
604 }
605
pqm_get_queue_snapshot(struct process_queue_manager * pqm,uint64_t exception_clear_mask,void __user * buf,int * num_qss_entries,uint32_t * entry_size)606 int pqm_get_queue_snapshot(struct process_queue_manager *pqm,
607 uint64_t exception_clear_mask,
608 void __user *buf,
609 int *num_qss_entries,
610 uint32_t *entry_size)
611 {
612 struct process_queue_node *pqn;
613 struct kfd_queue_snapshot_entry src;
614 uint32_t tmp_entry_size = *entry_size, tmp_qss_entries = *num_qss_entries;
615 int r = 0;
616
617 *num_qss_entries = 0;
618 if (!(*entry_size))
619 return -EINVAL;
620
621 *entry_size = min_t(size_t, *entry_size, sizeof(struct kfd_queue_snapshot_entry));
622 mutex_lock(&pqm->process->event_mutex);
623
624 memset(&src, 0, sizeof(src));
625
626 list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
627 if (!pqn->q)
628 continue;
629
630 if (*num_qss_entries < tmp_qss_entries) {
631 set_queue_snapshot_entry(pqn->q, exception_clear_mask, &src);
632
633 if (copy_to_user(buf, &src, *entry_size)) {
634 r = -EFAULT;
635 break;
636 }
637 buf += tmp_entry_size;
638 }
639 *num_qss_entries += 1;
640 }
641
642 mutex_unlock(&pqm->process->event_mutex);
643 return r;
644 }
645
get_queue_data_sizes(struct kfd_process_device * pdd,struct queue * q,uint32_t * mqd_size,uint32_t * ctl_stack_size)646 static int get_queue_data_sizes(struct kfd_process_device *pdd,
647 struct queue *q,
648 uint32_t *mqd_size,
649 uint32_t *ctl_stack_size)
650 {
651 int ret;
652
653 ret = pqm_get_queue_checkpoint_info(&pdd->process->pqm,
654 q->properties.queue_id,
655 mqd_size,
656 ctl_stack_size);
657 if (ret)
658 pr_err("Failed to get queue dump info (%d)\n", ret);
659
660 return ret;
661 }
662
kfd_process_get_queue_info(struct kfd_process * p,uint32_t * num_queues,uint64_t * priv_data_sizes)663 int kfd_process_get_queue_info(struct kfd_process *p,
664 uint32_t *num_queues,
665 uint64_t *priv_data_sizes)
666 {
667 uint32_t extra_data_sizes = 0;
668 struct queue *q;
669 int i;
670 int ret;
671
672 *num_queues = 0;
673
674 /* Run over all PDDs of the process */
675 for (i = 0; i < p->n_pdds; i++) {
676 struct kfd_process_device *pdd = p->pdds[i];
677
678 list_for_each_entry(q, &pdd->qpd.queues_list, list) {
679 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
680 q->properties.type == KFD_QUEUE_TYPE_SDMA ||
681 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
682 uint32_t mqd_size, ctl_stack_size;
683
684 *num_queues = *num_queues + 1;
685
686 ret = get_queue_data_sizes(pdd, q, &mqd_size, &ctl_stack_size);
687 if (ret)
688 return ret;
689
690 extra_data_sizes += mqd_size + ctl_stack_size;
691 } else {
692 pr_err("Unsupported queue type (%d)\n", q->properties.type);
693 return -EOPNOTSUPP;
694 }
695 }
696 }
697 *priv_data_sizes = extra_data_sizes +
698 (*num_queues * sizeof(struct kfd_criu_queue_priv_data));
699
700 return 0;
701 }
702
pqm_checkpoint_mqd(struct process_queue_manager * pqm,unsigned int qid,void * mqd,void * ctl_stack)703 static int pqm_checkpoint_mqd(struct process_queue_manager *pqm,
704 unsigned int qid,
705 void *mqd,
706 void *ctl_stack)
707 {
708 struct process_queue_node *pqn;
709
710 pqn = get_queue_by_qid(pqm, qid);
711 if (!pqn) {
712 pr_debug("amdkfd: No queue %d exists for operation\n", qid);
713 return -EFAULT;
714 }
715
716 if (!pqn->q->device->dqm->ops.checkpoint_mqd) {
717 pr_err("amdkfd: queue dumping not supported on this device\n");
718 return -EOPNOTSUPP;
719 }
720
721 return pqn->q->device->dqm->ops.checkpoint_mqd(pqn->q->device->dqm,
722 pqn->q, mqd, ctl_stack);
723 }
724
criu_checkpoint_queue(struct kfd_process_device * pdd,struct queue * q,struct kfd_criu_queue_priv_data * q_data)725 static int criu_checkpoint_queue(struct kfd_process_device *pdd,
726 struct queue *q,
727 struct kfd_criu_queue_priv_data *q_data)
728 {
729 uint8_t *mqd, *ctl_stack;
730 int ret;
731
732 mqd = (void *)(q_data + 1);
733 ctl_stack = mqd + q_data->mqd_size;
734
735 q_data->gpu_id = pdd->user_gpu_id;
736 q_data->type = q->properties.type;
737 q_data->format = q->properties.format;
738 q_data->q_id = q->properties.queue_id;
739 q_data->q_address = q->properties.queue_address;
740 q_data->q_size = q->properties.queue_size;
741 q_data->priority = q->properties.priority;
742 q_data->q_percent = q->properties.queue_percent;
743 q_data->read_ptr_addr = (uint64_t)q->properties.read_ptr;
744 q_data->write_ptr_addr = (uint64_t)q->properties.write_ptr;
745 q_data->doorbell_id = q->doorbell_id;
746
747 q_data->sdma_id = q->sdma_id;
748
749 q_data->eop_ring_buffer_address =
750 q->properties.eop_ring_buffer_address;
751
752 q_data->eop_ring_buffer_size = q->properties.eop_ring_buffer_size;
753
754 q_data->ctx_save_restore_area_address =
755 q->properties.ctx_save_restore_area_address;
756
757 q_data->ctx_save_restore_area_size =
758 q->properties.ctx_save_restore_area_size;
759
760 q_data->gws = !!q->gws;
761
762 ret = pqm_checkpoint_mqd(&pdd->process->pqm, q->properties.queue_id, mqd, ctl_stack);
763 if (ret) {
764 pr_err("Failed checkpoint queue_mqd (%d)\n", ret);
765 return ret;
766 }
767
768 pr_debug("Dumping Queue: gpu_id:%x queue_id:%u\n", q_data->gpu_id, q_data->q_id);
769 return ret;
770 }
771
criu_checkpoint_queues_device(struct kfd_process_device * pdd,uint8_t __user * user_priv,unsigned int * q_index,uint64_t * queues_priv_data_offset)772 static int criu_checkpoint_queues_device(struct kfd_process_device *pdd,
773 uint8_t __user *user_priv,
774 unsigned int *q_index,
775 uint64_t *queues_priv_data_offset)
776 {
777 unsigned int q_private_data_size = 0;
778 uint8_t *q_private_data = NULL; /* Local buffer to store individual queue private data */
779 struct queue *q;
780 int ret = 0;
781
782 list_for_each_entry(q, &pdd->qpd.queues_list, list) {
783 struct kfd_criu_queue_priv_data *q_data;
784 uint64_t q_data_size;
785 uint32_t mqd_size;
786 uint32_t ctl_stack_size;
787
788 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE &&
789 q->properties.type != KFD_QUEUE_TYPE_SDMA &&
790 q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI) {
791
792 pr_err("Unsupported queue type (%d)\n", q->properties.type);
793 ret = -EOPNOTSUPP;
794 break;
795 }
796
797 ret = get_queue_data_sizes(pdd, q, &mqd_size, &ctl_stack_size);
798 if (ret)
799 break;
800
801 q_data_size = sizeof(*q_data) + mqd_size + ctl_stack_size;
802
803 /* Increase local buffer space if needed */
804 if (q_private_data_size < q_data_size) {
805 kfree(q_private_data);
806
807 q_private_data = kzalloc(q_data_size, GFP_KERNEL);
808 if (!q_private_data) {
809 ret = -ENOMEM;
810 break;
811 }
812 q_private_data_size = q_data_size;
813 }
814
815 q_data = (struct kfd_criu_queue_priv_data *)q_private_data;
816
817 /* data stored in this order: priv_data, mqd, ctl_stack */
818 q_data->mqd_size = mqd_size;
819 q_data->ctl_stack_size = ctl_stack_size;
820
821 ret = criu_checkpoint_queue(pdd, q, q_data);
822 if (ret)
823 break;
824
825 q_data->object_type = KFD_CRIU_OBJECT_TYPE_QUEUE;
826
827 ret = copy_to_user(user_priv + *queues_priv_data_offset,
828 q_data, q_data_size);
829 if (ret) {
830 ret = -EFAULT;
831 break;
832 }
833 *queues_priv_data_offset += q_data_size;
834 *q_index = *q_index + 1;
835 }
836
837 kfree(q_private_data);
838
839 return ret;
840 }
841
kfd_criu_checkpoint_queues(struct kfd_process * p,uint8_t __user * user_priv_data,uint64_t * priv_data_offset)842 int kfd_criu_checkpoint_queues(struct kfd_process *p,
843 uint8_t __user *user_priv_data,
844 uint64_t *priv_data_offset)
845 {
846 int ret = 0, pdd_index, q_index = 0;
847
848 for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
849 struct kfd_process_device *pdd = p->pdds[pdd_index];
850
851 /*
852 * criu_checkpoint_queues_device will copy data to user and update q_index and
853 * queues_priv_data_offset
854 */
855 ret = criu_checkpoint_queues_device(pdd, user_priv_data, &q_index,
856 priv_data_offset);
857
858 if (ret)
859 break;
860 }
861
862 return ret;
863 }
864
set_queue_properties_from_criu(struct queue_properties * qp,struct kfd_criu_queue_priv_data * q_data)865 static void set_queue_properties_from_criu(struct queue_properties *qp,
866 struct kfd_criu_queue_priv_data *q_data)
867 {
868 qp->is_interop = false;
869 qp->queue_percent = q_data->q_percent;
870 qp->priority = q_data->priority;
871 qp->queue_address = q_data->q_address;
872 qp->queue_size = q_data->q_size;
873 qp->read_ptr = (uint32_t *) q_data->read_ptr_addr;
874 qp->write_ptr = (uint32_t *) q_data->write_ptr_addr;
875 qp->eop_ring_buffer_address = q_data->eop_ring_buffer_address;
876 qp->eop_ring_buffer_size = q_data->eop_ring_buffer_size;
877 qp->ctx_save_restore_area_address = q_data->ctx_save_restore_area_address;
878 qp->ctx_save_restore_area_size = q_data->ctx_save_restore_area_size;
879 qp->ctl_stack_size = q_data->ctl_stack_size;
880 qp->type = q_data->type;
881 qp->format = q_data->format;
882 }
883
kfd_criu_restore_queue(struct kfd_process * p,uint8_t __user * user_priv_ptr,uint64_t * priv_data_offset,uint64_t max_priv_data_size)884 int kfd_criu_restore_queue(struct kfd_process *p,
885 uint8_t __user *user_priv_ptr,
886 uint64_t *priv_data_offset,
887 uint64_t max_priv_data_size)
888 {
889 uint8_t *mqd, *ctl_stack, *q_extra_data = NULL;
890 struct kfd_criu_queue_priv_data *q_data;
891 struct kfd_process_device *pdd;
892 uint64_t q_extra_data_size;
893 struct queue_properties qp;
894 unsigned int queue_id;
895 int ret = 0;
896
897 if (*priv_data_offset + sizeof(*q_data) > max_priv_data_size)
898 return -EINVAL;
899
900 q_data = kmalloc(sizeof(*q_data), GFP_KERNEL);
901 if (!q_data)
902 return -ENOMEM;
903
904 ret = copy_from_user(q_data, user_priv_ptr + *priv_data_offset, sizeof(*q_data));
905 if (ret) {
906 ret = -EFAULT;
907 goto exit;
908 }
909
910 *priv_data_offset += sizeof(*q_data);
911 q_extra_data_size = (uint64_t)q_data->ctl_stack_size + q_data->mqd_size;
912
913 if (*priv_data_offset + q_extra_data_size > max_priv_data_size) {
914 ret = -EINVAL;
915 goto exit;
916 }
917
918 q_extra_data = kmalloc(q_extra_data_size, GFP_KERNEL);
919 if (!q_extra_data) {
920 ret = -ENOMEM;
921 goto exit;
922 }
923
924 ret = copy_from_user(q_extra_data, user_priv_ptr + *priv_data_offset, q_extra_data_size);
925 if (ret) {
926 ret = -EFAULT;
927 goto exit;
928 }
929
930 *priv_data_offset += q_extra_data_size;
931
932 pdd = kfd_process_device_data_by_id(p, q_data->gpu_id);
933 if (!pdd) {
934 pr_err("Failed to get pdd\n");
935 ret = -EINVAL;
936 goto exit;
937 }
938
939 /* data stored in this order: mqd, ctl_stack */
940 mqd = q_extra_data;
941 ctl_stack = mqd + q_data->mqd_size;
942
943 memset(&qp, 0, sizeof(qp));
944 set_queue_properties_from_criu(&qp, q_data);
945
946 print_queue_properties(&qp);
947
948 ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, NULL, q_data, mqd, ctl_stack,
949 NULL);
950 if (ret) {
951 pr_err("Failed to create new queue err:%d\n", ret);
952 goto exit;
953 }
954
955 if (q_data->gws)
956 ret = pqm_set_gws(&p->pqm, q_data->q_id, pdd->dev->gws);
957
958 exit:
959 if (ret)
960 pr_err("Failed to restore queue (%d)\n", ret);
961 else
962 pr_debug("Queue id %d was restored successfully\n", queue_id);
963
964 kfree(q_data);
965
966 return ret;
967 }
968
pqm_get_queue_checkpoint_info(struct process_queue_manager * pqm,unsigned int qid,uint32_t * mqd_size,uint32_t * ctl_stack_size)969 int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm,
970 unsigned int qid,
971 uint32_t *mqd_size,
972 uint32_t *ctl_stack_size)
973 {
974 struct process_queue_node *pqn;
975
976 pqn = get_queue_by_qid(pqm, qid);
977 if (!pqn) {
978 pr_debug("amdkfd: No queue %d exists for operation\n", qid);
979 return -EFAULT;
980 }
981
982 if (!pqn->q->device->dqm->ops.get_queue_checkpoint_info) {
983 pr_err("amdkfd: queue dumping not supported on this device\n");
984 return -EOPNOTSUPP;
985 }
986
987 pqn->q->device->dqm->ops.get_queue_checkpoint_info(pqn->q->device->dqm,
988 pqn->q, mqd_size,
989 ctl_stack_size);
990 return 0;
991 }
992
993 #if defined(CONFIG_DEBUG_FS)
994
pqm_debugfs_mqds(struct seq_file * m,void * data)995 int pqm_debugfs_mqds(struct seq_file *m, void *data)
996 {
997 struct process_queue_manager *pqm = data;
998 struct process_queue_node *pqn;
999 struct queue *q;
1000 enum KFD_MQD_TYPE mqd_type;
1001 struct mqd_manager *mqd_mgr;
1002 int r = 0, xcc, num_xccs = 1;
1003 void *mqd;
1004 uint64_t size = 0;
1005
1006 list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
1007 if (pqn->q) {
1008 q = pqn->q;
1009 switch (q->properties.type) {
1010 case KFD_QUEUE_TYPE_SDMA:
1011 case KFD_QUEUE_TYPE_SDMA_XGMI:
1012 seq_printf(m, " SDMA queue on device %x\n",
1013 q->device->id);
1014 mqd_type = KFD_MQD_TYPE_SDMA;
1015 break;
1016 case KFD_QUEUE_TYPE_COMPUTE:
1017 seq_printf(m, " Compute queue on device %x\n",
1018 q->device->id);
1019 mqd_type = KFD_MQD_TYPE_CP;
1020 num_xccs = NUM_XCC(q->device->xcc_mask);
1021 break;
1022 default:
1023 seq_printf(m,
1024 " Bad user queue type %d on device %x\n",
1025 q->properties.type, q->device->id);
1026 continue;
1027 }
1028 mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type];
1029 size = mqd_mgr->mqd_stride(mqd_mgr,
1030 &q->properties);
1031 } else if (pqn->kq) {
1032 q = pqn->kq->queue;
1033 mqd_mgr = pqn->kq->mqd_mgr;
1034 switch (q->properties.type) {
1035 case KFD_QUEUE_TYPE_DIQ:
1036 seq_printf(m, " DIQ on device %x\n",
1037 pqn->kq->dev->id);
1038 break;
1039 default:
1040 seq_printf(m,
1041 " Bad kernel queue type %d on device %x\n",
1042 q->properties.type,
1043 pqn->kq->dev->id);
1044 continue;
1045 }
1046 } else {
1047 seq_printf(m,
1048 " Weird: Queue node with neither kernel nor user queue\n");
1049 continue;
1050 }
1051
1052 for (xcc = 0; xcc < num_xccs; xcc++) {
1053 mqd = q->mqd + size * xcc;
1054 r = mqd_mgr->debugfs_show_mqd(m, mqd);
1055 if (r != 0)
1056 break;
1057 }
1058 }
1059
1060 return r;
1061 }
1062
1063 #endif
1064