xref: /linux/drivers/gpu/drm/amd/amdkfd/kfd_queue.c (revision ab93e0dd72c37d378dd936f031ffb83ff2bd87ce)
1d87f36a0SRajneesh Bhardwaj // SPDX-License-Identifier: GPL-2.0 OR MIT
2ed8aab45SBen Goz /*
3d87f36a0SRajneesh Bhardwaj  * Copyright 2014-2022 Advanced Micro Devices, Inc.
4ed8aab45SBen Goz  *
5ed8aab45SBen Goz  * Permission is hereby granted, free of charge, to any person obtaining a
6ed8aab45SBen Goz  * copy of this software and associated documentation files (the "Software"),
7ed8aab45SBen Goz  * to deal in the Software without restriction, including without limitation
8ed8aab45SBen Goz  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9ed8aab45SBen Goz  * and/or sell copies of the Software, and to permit persons to whom the
10ed8aab45SBen Goz  * Software is furnished to do so, subject to the following conditions:
11ed8aab45SBen Goz  *
12ed8aab45SBen Goz  * The above copyright notice and this permission notice shall be included in
13ed8aab45SBen Goz  * all copies or substantial portions of the Software.
14ed8aab45SBen Goz  *
15ed8aab45SBen Goz  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16ed8aab45SBen Goz  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17ed8aab45SBen Goz  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18ed8aab45SBen Goz  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19ed8aab45SBen Goz  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20ed8aab45SBen Goz  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21ed8aab45SBen Goz  * OTHER DEALINGS IN THE SOFTWARE.
22ed8aab45SBen Goz  *
23ed8aab45SBen Goz  */
24ed8aab45SBen Goz 
25ed8aab45SBen Goz #include <linux/slab.h>
26ed8aab45SBen Goz #include "kfd_priv.h"
27517fff22SPhilip Yang #include "kfd_topology.h"
28b049504eSPhilip Yang #include "kfd_svm.h"
29ed8aab45SBen Goz 
print_queue_properties(struct queue_properties * q)30ed8aab45SBen Goz void print_queue_properties(struct queue_properties *q)
31ed8aab45SBen Goz {
32ed8aab45SBen Goz 	if (!q)
33ed8aab45SBen Goz 		return;
34ed8aab45SBen Goz 
35ed8aab45SBen Goz 	pr_debug("Printing queue properties:\n");
36ed8aab45SBen Goz 	pr_debug("Queue Type: %u\n", q->type);
37ed8aab45SBen Goz 	pr_debug("Queue Size: %llu\n", q->queue_size);
38ed8aab45SBen Goz 	pr_debug("Queue percent: %u\n", q->queue_percent);
39ed8aab45SBen Goz 	pr_debug("Queue Address: 0x%llX\n", q->queue_address);
40ed8aab45SBen Goz 	pr_debug("Queue Id: %u\n", q->queue_id);
41ed8aab45SBen Goz 	pr_debug("Queue Process Vmid: %u\n", q->vmid);
42fa7e6514SPhilip Yang 	pr_debug("Queue Read Pointer: 0x%px\n", q->read_ptr);
43fa7e6514SPhilip Yang 	pr_debug("Queue Write Pointer: 0x%px\n", q->write_ptr);
44ed8aab45SBen Goz 	pr_debug("Queue Doorbell Pointer: 0x%p\n", q->doorbell_ptr);
45ed8aab45SBen Goz 	pr_debug("Queue Doorbell Offset: %u\n", q->doorbell_off);
46ed8aab45SBen Goz }
47ed8aab45SBen Goz 
print_queue(struct queue * q)48ed8aab45SBen Goz void print_queue(struct queue *q)
49ed8aab45SBen Goz {
50ed8aab45SBen Goz 	if (!q)
51ed8aab45SBen Goz 		return;
52ed8aab45SBen Goz 	pr_debug("Printing queue:\n");
53ed8aab45SBen Goz 	pr_debug("Queue Type: %u\n", q->properties.type);
54ed8aab45SBen Goz 	pr_debug("Queue Size: %llu\n", q->properties.queue_size);
55ed8aab45SBen Goz 	pr_debug("Queue percent: %u\n", q->properties.queue_percent);
56ed8aab45SBen Goz 	pr_debug("Queue Address: 0x%llX\n", q->properties.queue_address);
57ed8aab45SBen Goz 	pr_debug("Queue Id: %u\n", q->properties.queue_id);
58ed8aab45SBen Goz 	pr_debug("Queue Process Vmid: %u\n", q->properties.vmid);
59fa7e6514SPhilip Yang 	pr_debug("Queue Read Pointer: 0x%px\n", q->properties.read_ptr);
60fa7e6514SPhilip Yang 	pr_debug("Queue Write Pointer: 0x%px\n", q->properties.write_ptr);
61ed8aab45SBen Goz 	pr_debug("Queue Doorbell Pointer: 0x%p\n", q->properties.doorbell_ptr);
62ed8aab45SBen Goz 	pr_debug("Queue Doorbell Offset: %u\n", q->properties.doorbell_off);
63ed8aab45SBen Goz 	pr_debug("Queue MQD Address: 0x%p\n", q->mqd);
64ed8aab45SBen Goz 	pr_debug("Queue MQD Gart: 0x%llX\n", q->gart_mqd_addr);
65ed8aab45SBen Goz 	pr_debug("Queue Process Address: 0x%p\n", q->process);
66ed8aab45SBen Goz 	pr_debug("Queue Device Address: 0x%p\n", q->device);
67ed8aab45SBen Goz }
68ed8aab45SBen Goz 
init_queue(struct queue ** q,const struct queue_properties * properties)69e88a614cSEdward O'Callaghan int init_queue(struct queue **q, const struct queue_properties *properties)
70ed8aab45SBen Goz {
71dbf56ab1SKent Russell 	struct queue *tmp_q;
72ed8aab45SBen Goz 
73dbf56ab1SKent Russell 	tmp_q = kzalloc(sizeof(*tmp_q), GFP_KERNEL);
74dbf56ab1SKent Russell 	if (!tmp_q)
75ed8aab45SBen Goz 		return -ENOMEM;
76ed8aab45SBen Goz 
77dbf56ab1SKent Russell 	memcpy(&tmp_q->properties, properties, sizeof(*properties));
78ed8aab45SBen Goz 
79dbf56ab1SKent Russell 	*q = tmp_q;
80ed8aab45SBen Goz 	return 0;
81ed8aab45SBen Goz }
82ed8aab45SBen Goz 
uninit_queue(struct queue * q)83ed8aab45SBen Goz void uninit_queue(struct queue *q)
84ed8aab45SBen Goz {
85ed8aab45SBen Goz 	kfree(q);
86ed8aab45SBen Goz }
87fb910658SPhilip Yang 
881cb62da0SPhilip Yang #if IS_ENABLED(CONFIG_HSA_AMD_SVM)
891cb62da0SPhilip Yang 
kfd_queue_buffer_svm_get(struct kfd_process_device * pdd,u64 addr,u64 size)90b049504eSPhilip Yang static int kfd_queue_buffer_svm_get(struct kfd_process_device *pdd, u64 addr, u64 size)
91b049504eSPhilip Yang {
92b049504eSPhilip Yang 	struct kfd_process *p = pdd->process;
93b049504eSPhilip Yang 	struct list_head update_list;
94b049504eSPhilip Yang 	struct svm_range *prange;
95b049504eSPhilip Yang 	int ret = -EINVAL;
96b049504eSPhilip Yang 
97b049504eSPhilip Yang 	INIT_LIST_HEAD(&update_list);
98b049504eSPhilip Yang 	addr >>= PAGE_SHIFT;
99b049504eSPhilip Yang 	size >>= PAGE_SHIFT;
100b049504eSPhilip Yang 
101b049504eSPhilip Yang 	mutex_lock(&p->svms.lock);
102b049504eSPhilip Yang 
103b049504eSPhilip Yang 	/*
104b049504eSPhilip Yang 	 * range may split to multiple svm pranges aligned to granularity boundaery.
105b049504eSPhilip Yang 	 */
106b049504eSPhilip Yang 	while (size) {
107b049504eSPhilip Yang 		uint32_t gpuid, gpuidx;
108b049504eSPhilip Yang 		int r;
109b049504eSPhilip Yang 
110b049504eSPhilip Yang 		prange = svm_range_from_addr(&p->svms, addr, NULL);
111b049504eSPhilip Yang 		if (!prange)
112b049504eSPhilip Yang 			break;
113b049504eSPhilip Yang 
114b049504eSPhilip Yang 		if (!prange->mapped_to_gpu)
115b049504eSPhilip Yang 			break;
116b049504eSPhilip Yang 
117b049504eSPhilip Yang 		r = kfd_process_gpuid_from_node(p, pdd->dev, &gpuid, &gpuidx);
118b049504eSPhilip Yang 		if (r < 0)
119b049504eSPhilip Yang 			break;
120b049504eSPhilip Yang 		if (!test_bit(gpuidx, prange->bitmap_access) &&
121b049504eSPhilip Yang 		    !test_bit(gpuidx, prange->bitmap_aip))
122b049504eSPhilip Yang 			break;
123b049504eSPhilip Yang 
124b049504eSPhilip Yang 		if (!(prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED))
125b049504eSPhilip Yang 			break;
126b049504eSPhilip Yang 
127b049504eSPhilip Yang 		list_add(&prange->update_list, &update_list);
128b049504eSPhilip Yang 
129b049504eSPhilip Yang 		if (prange->last - prange->start + 1 >= size) {
130b049504eSPhilip Yang 			size = 0;
131b049504eSPhilip Yang 			break;
132b049504eSPhilip Yang 		}
133b049504eSPhilip Yang 
134b049504eSPhilip Yang 		size -= prange->last - prange->start + 1;
135b049504eSPhilip Yang 		addr += prange->last - prange->start + 1;
136b049504eSPhilip Yang 	}
137b049504eSPhilip Yang 	if (size) {
138b049504eSPhilip Yang 		pr_debug("[0x%llx 0x%llx] not registered\n", addr, addr + size - 1);
139b049504eSPhilip Yang 		goto out_unlock;
140b049504eSPhilip Yang 	}
141b049504eSPhilip Yang 
142b049504eSPhilip Yang 	list_for_each_entry(prange, &update_list, update_list)
143b049504eSPhilip Yang 		atomic_inc(&prange->queue_refcount);
144b049504eSPhilip Yang 	ret = 0;
145b049504eSPhilip Yang 
146b049504eSPhilip Yang out_unlock:
147b049504eSPhilip Yang 	mutex_unlock(&p->svms.lock);
148b049504eSPhilip Yang 	return ret;
149b049504eSPhilip Yang }
150b049504eSPhilip Yang 
kfd_queue_buffer_svm_put(struct kfd_process_device * pdd,u64 addr,u64 size)151b049504eSPhilip Yang static void kfd_queue_buffer_svm_put(struct kfd_process_device *pdd, u64 addr, u64 size)
152b049504eSPhilip Yang {
153b049504eSPhilip Yang 	struct kfd_process *p = pdd->process;
154b049504eSPhilip Yang 	struct svm_range *prange, *pchild;
155b049504eSPhilip Yang 	struct interval_tree_node *node;
156b049504eSPhilip Yang 	unsigned long last;
157b049504eSPhilip Yang 
158b049504eSPhilip Yang 	addr >>= PAGE_SHIFT;
159b049504eSPhilip Yang 	last = addr + (size >> PAGE_SHIFT) - 1;
160b049504eSPhilip Yang 
161b049504eSPhilip Yang 	mutex_lock(&p->svms.lock);
162b049504eSPhilip Yang 
163b049504eSPhilip Yang 	node = interval_tree_iter_first(&p->svms.objects, addr, last);
164b049504eSPhilip Yang 	while (node) {
165b049504eSPhilip Yang 		struct interval_tree_node *next_node;
166b049504eSPhilip Yang 		unsigned long next_start;
167b049504eSPhilip Yang 
168b049504eSPhilip Yang 		prange = container_of(node, struct svm_range, it_node);
169b049504eSPhilip Yang 		next_node = interval_tree_iter_next(node, addr, last);
170b049504eSPhilip Yang 		next_start = min(node->last, last) + 1;
171b049504eSPhilip Yang 
172b049504eSPhilip Yang 		if (atomic_add_unless(&prange->queue_refcount, -1, 0)) {
173b049504eSPhilip Yang 			list_for_each_entry(pchild, &prange->child_list, child_list)
174b049504eSPhilip Yang 				atomic_add_unless(&pchild->queue_refcount, -1, 0);
175b049504eSPhilip Yang 		}
176b049504eSPhilip Yang 
177b049504eSPhilip Yang 		node = next_node;
178b049504eSPhilip Yang 		addr = next_start;
179b049504eSPhilip Yang 	}
180b049504eSPhilip Yang 
181b049504eSPhilip Yang 	mutex_unlock(&p->svms.lock);
182b049504eSPhilip Yang }
1831cb62da0SPhilip Yang #else
1841cb62da0SPhilip Yang 
kfd_queue_buffer_svm_get(struct kfd_process_device * pdd,u64 addr,u64 size)1851cb62da0SPhilip Yang static int kfd_queue_buffer_svm_get(struct kfd_process_device *pdd, u64 addr, u64 size)
1861cb62da0SPhilip Yang {
1871cb62da0SPhilip Yang 	return -EINVAL;
1881cb62da0SPhilip Yang }
1891cb62da0SPhilip Yang 
kfd_queue_buffer_svm_put(struct kfd_process_device * pdd,u64 addr,u64 size)1901cb62da0SPhilip Yang static void kfd_queue_buffer_svm_put(struct kfd_process_device *pdd, u64 addr, u64 size)
1911cb62da0SPhilip Yang {
1921cb62da0SPhilip Yang }
1931cb62da0SPhilip Yang 
1941cb62da0SPhilip Yang #endif
195b049504eSPhilip Yang 
kfd_queue_buffer_get(struct amdgpu_vm * vm,void __user * addr,struct amdgpu_bo ** pbo,u64 expected_size)196fb910658SPhilip Yang int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_bo **pbo,
197fb910658SPhilip Yang 			 u64 expected_size)
198fb910658SPhilip Yang {
199fb910658SPhilip Yang 	struct amdgpu_bo_va_mapping *mapping;
200fb910658SPhilip Yang 	u64 user_addr;
201fb910658SPhilip Yang 	u64 size;
202fb910658SPhilip Yang 
203fb910658SPhilip Yang 	user_addr = (u64)addr >> AMDGPU_GPU_PAGE_SHIFT;
204fb910658SPhilip Yang 	size = expected_size >> AMDGPU_GPU_PAGE_SHIFT;
205fb910658SPhilip Yang 
206fb910658SPhilip Yang 	mapping = amdgpu_vm_bo_lookup_mapping(vm, user_addr);
207fb910658SPhilip Yang 	if (!mapping)
208fb910658SPhilip Yang 		goto out_err;
209fb910658SPhilip Yang 
21068e599dbSPhilip Yang 	if (user_addr != mapping->start ||
21168e599dbSPhilip Yang 	    (size != 0 && user_addr + size - 1 != mapping->last)) {
212fb910658SPhilip Yang 		pr_debug("expected size 0x%llx not equal to mapping addr 0x%llx size 0x%llx\n",
213fb910658SPhilip Yang 			expected_size, mapping->start << AMDGPU_GPU_PAGE_SHIFT,
214fb910658SPhilip Yang 			(mapping->last - mapping->start + 1) << AMDGPU_GPU_PAGE_SHIFT);
215fb910658SPhilip Yang 		goto out_err;
216fb910658SPhilip Yang 	}
217fb910658SPhilip Yang 
218fb910658SPhilip Yang 	*pbo = amdgpu_bo_ref(mapping->bo_va->base.bo);
219834368eaSPhilip Yang 	mapping->bo_va->queue_refcount++;
220fb910658SPhilip Yang 	return 0;
221fb910658SPhilip Yang 
222fb910658SPhilip Yang out_err:
223fb910658SPhilip Yang 	*pbo = NULL;
224fb910658SPhilip Yang 	return -EINVAL;
225fb910658SPhilip Yang }
226fb910658SPhilip Yang 
227a1fc9f58SPhilip Yang /* FIXME: remove this function, just call amdgpu_bo_unref directly */
kfd_queue_buffer_put(struct amdgpu_bo ** bo)228a1fc9f58SPhilip Yang void kfd_queue_buffer_put(struct amdgpu_bo **bo)
229834368eaSPhilip Yang {
230834368eaSPhilip Yang 	amdgpu_bo_unref(bo);
231834368eaSPhilip Yang }
232834368eaSPhilip Yang 
kfd_queue_acquire_buffers(struct kfd_process_device * pdd,struct queue_properties * properties)233fb910658SPhilip Yang int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties)
234fb910658SPhilip Yang {
235629568d2SPhilip Yang 	struct kfd_topology_device *topo_dev;
236e7a47773SPhilip Yang 	u64 expected_queue_size;
237fb910658SPhilip Yang 	struct amdgpu_vm *vm;
238629568d2SPhilip Yang 	u32 total_cwsr_size;
239fb910658SPhilip Yang 	int err;
240fb910658SPhilip Yang 
241629568d2SPhilip Yang 	topo_dev = kfd_topology_device_by_id(pdd->dev->id);
242629568d2SPhilip Yang 	if (!topo_dev)
243629568d2SPhilip Yang 		return -EINVAL;
244629568d2SPhilip Yang 
245e7a47773SPhilip Yang 	/* AQL queues on GFX7 and GFX8 appear twice their actual size */
246e7a47773SPhilip Yang 	if (properties->type == KFD_QUEUE_TYPE_COMPUTE &&
247e7a47773SPhilip Yang 	    properties->format == KFD_QUEUE_FORMAT_AQL &&
248e7a47773SPhilip Yang 	    topo_dev->node_props.gfx_target_version >= 70000 &&
249e7a47773SPhilip Yang 	    topo_dev->node_props.gfx_target_version < 90000)
250e7a47773SPhilip Yang 		expected_queue_size = properties->queue_size / 2;
251e7a47773SPhilip Yang 	else
252e7a47773SPhilip Yang 		expected_queue_size = properties->queue_size;
253e7a47773SPhilip Yang 
254fb910658SPhilip Yang 	vm = drm_priv_to_vm(pdd->drm_priv);
255fb910658SPhilip Yang 	err = amdgpu_bo_reserve(vm->root.bo, false);
256fb910658SPhilip Yang 	if (err)
257fb910658SPhilip Yang 		return err;
258fb910658SPhilip Yang 
259fb910658SPhilip Yang 	err = kfd_queue_buffer_get(vm, properties->write_ptr, &properties->wptr_bo, PAGE_SIZE);
260fb910658SPhilip Yang 	if (err)
26168e599dbSPhilip Yang 		goto out_err_unreserve;
26268e599dbSPhilip Yang 
26368e599dbSPhilip Yang 	err = kfd_queue_buffer_get(vm, properties->read_ptr, &properties->rptr_bo, PAGE_SIZE);
26468e599dbSPhilip Yang 	if (err)
26568e599dbSPhilip Yang 		goto out_err_unreserve;
26668e599dbSPhilip Yang 
26768e599dbSPhilip Yang 	err = kfd_queue_buffer_get(vm, (void *)properties->queue_address,
268e7a47773SPhilip Yang 				   &properties->ring_bo, expected_queue_size);
26968e599dbSPhilip Yang 	if (err)
27068e599dbSPhilip Yang 		goto out_err_unreserve;
27168e599dbSPhilip Yang 
27268e599dbSPhilip Yang 	/* only compute queue requires EOP buffer and CWSR area */
27368e599dbSPhilip Yang 	if (properties->type != KFD_QUEUE_TYPE_COMPUTE)
274fb910658SPhilip Yang 		goto out_unreserve;
275fb910658SPhilip Yang 
27668e599dbSPhilip Yang 	/* EOP buffer is not required for all ASICs */
27768e599dbSPhilip Yang 	if (properties->eop_ring_buffer_address) {
278629568d2SPhilip Yang 		if (properties->eop_ring_buffer_size != topo_dev->node_props.eop_buffer_size) {
279049e5bf3SAndrew Martin 			pr_debug("queue eop bo size 0x%x not equal to node eop buf size 0x%x\n",
280049e5bf3SAndrew Martin 				properties->eop_ring_buffer_size,
281629568d2SPhilip Yang 				topo_dev->node_props.eop_buffer_size);
2827c5b3445SSrinivasan Shanmugam 			err = -EINVAL;
283629568d2SPhilip Yang 			goto out_err_unreserve;
284629568d2SPhilip Yang 		}
28568e599dbSPhilip Yang 		err = kfd_queue_buffer_get(vm, (void *)properties->eop_ring_buffer_address,
28668e599dbSPhilip Yang 					   &properties->eop_buf_bo,
28768e599dbSPhilip Yang 					   properties->eop_ring_buffer_size);
28868e599dbSPhilip Yang 		if (err)
28968e599dbSPhilip Yang 			goto out_err_unreserve;
29068e599dbSPhilip Yang 	}
29168e599dbSPhilip Yang 
292629568d2SPhilip Yang 	if (properties->ctl_stack_size != topo_dev->node_props.ctl_stack_size) {
293629568d2SPhilip Yang 		pr_debug("queue ctl stack size 0x%x not equal to node ctl stack size 0x%x\n",
294629568d2SPhilip Yang 			properties->ctl_stack_size,
295629568d2SPhilip Yang 			topo_dev->node_props.ctl_stack_size);
2967c5b3445SSrinivasan Shanmugam 		err = -EINVAL;
297629568d2SPhilip Yang 		goto out_err_unreserve;
298629568d2SPhilip Yang 	}
299629568d2SPhilip Yang 
300629568d2SPhilip Yang 	if (properties->ctx_save_restore_area_size != topo_dev->node_props.cwsr_size) {
301629568d2SPhilip Yang 		pr_debug("queue cwsr size 0x%x not equal to node cwsr size 0x%x\n",
302629568d2SPhilip Yang 			properties->ctx_save_restore_area_size,
303629568d2SPhilip Yang 			topo_dev->node_props.cwsr_size);
3047c5b3445SSrinivasan Shanmugam 		err = -EINVAL;
305629568d2SPhilip Yang 		goto out_err_unreserve;
306629568d2SPhilip Yang 	}
307629568d2SPhilip Yang 
308629568d2SPhilip Yang 	total_cwsr_size = (topo_dev->node_props.cwsr_size + topo_dev->node_props.debug_memory_size)
309629568d2SPhilip Yang 			  * NUM_XCC(pdd->dev->xcc_mask);
310629568d2SPhilip Yang 	total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE);
311629568d2SPhilip Yang 
31268e599dbSPhilip Yang 	err = kfd_queue_buffer_get(vm, (void *)properties->ctx_save_restore_area_address,
313629568d2SPhilip Yang 				   &properties->cwsr_bo, total_cwsr_size);
314b049504eSPhilip Yang 	if (!err)
315b049504eSPhilip Yang 		goto out_unreserve;
316b049504eSPhilip Yang 
317b049504eSPhilip Yang 	amdgpu_bo_unreserve(vm->root.bo);
318b049504eSPhilip Yang 
319b049504eSPhilip Yang 	err = kfd_queue_buffer_svm_get(pdd, properties->ctx_save_restore_area_address,
320629568d2SPhilip Yang 				       total_cwsr_size);
32168e599dbSPhilip Yang 	if (err)
322b049504eSPhilip Yang 		goto out_err_release;
323b049504eSPhilip Yang 
324b049504eSPhilip Yang 	return 0;
325fb910658SPhilip Yang 
326fb910658SPhilip Yang out_unreserve:
327fb910658SPhilip Yang 	amdgpu_bo_unreserve(vm->root.bo);
32868e599dbSPhilip Yang 	return 0;
32968e599dbSPhilip Yang 
33068e599dbSPhilip Yang out_err_unreserve:
33168e599dbSPhilip Yang 	amdgpu_bo_unreserve(vm->root.bo);
332b049504eSPhilip Yang out_err_release:
333a1fc9f58SPhilip Yang 	/* FIXME: make a _locked version of this that can be called before
334a1fc9f58SPhilip Yang 	 * dropping the VM reservation.
335a1fc9f58SPhilip Yang 	 */
336a1fc9f58SPhilip Yang 	kfd_queue_unref_bo_vas(pdd, properties);
33768e599dbSPhilip Yang 	kfd_queue_release_buffers(pdd, properties);
338fb910658SPhilip Yang 	return err;
339fb910658SPhilip Yang }
340fb910658SPhilip Yang 
kfd_queue_release_buffers(struct kfd_process_device * pdd,struct queue_properties * properties)341fb910658SPhilip Yang int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties)
342fb910658SPhilip Yang {
343629568d2SPhilip Yang 	struct kfd_topology_device *topo_dev;
344629568d2SPhilip Yang 	u32 total_cwsr_size;
345834368eaSPhilip Yang 
346a1fc9f58SPhilip Yang 	kfd_queue_buffer_put(&properties->wptr_bo);
347a1fc9f58SPhilip Yang 	kfd_queue_buffer_put(&properties->rptr_bo);
348a1fc9f58SPhilip Yang 	kfd_queue_buffer_put(&properties->ring_bo);
349a1fc9f58SPhilip Yang 	kfd_queue_buffer_put(&properties->eop_buf_bo);
350a1fc9f58SPhilip Yang 	kfd_queue_buffer_put(&properties->cwsr_bo);
351b049504eSPhilip Yang 
352629568d2SPhilip Yang 	topo_dev = kfd_topology_device_by_id(pdd->dev->id);
353629568d2SPhilip Yang 	if (!topo_dev)
354629568d2SPhilip Yang 		return -EINVAL;
355629568d2SPhilip Yang 	total_cwsr_size = (topo_dev->node_props.cwsr_size + topo_dev->node_props.debug_memory_size)
356629568d2SPhilip Yang 			  * NUM_XCC(pdd->dev->xcc_mask);
357629568d2SPhilip Yang 	total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE);
358629568d2SPhilip Yang 
359629568d2SPhilip Yang 	kfd_queue_buffer_svm_put(pdd, properties->ctx_save_restore_area_address, total_cwsr_size);
360fb910658SPhilip Yang 	return 0;
361fb910658SPhilip Yang }
362517fff22SPhilip Yang 
kfd_queue_unref_bo_va(struct amdgpu_vm * vm,struct amdgpu_bo ** bo)363a1fc9f58SPhilip Yang void kfd_queue_unref_bo_va(struct amdgpu_vm *vm, struct amdgpu_bo **bo)
364a1fc9f58SPhilip Yang {
365a1fc9f58SPhilip Yang 	if (*bo) {
366a1fc9f58SPhilip Yang 		struct amdgpu_bo_va *bo_va;
367a1fc9f58SPhilip Yang 
368a1fc9f58SPhilip Yang 		bo_va = amdgpu_vm_bo_find(vm, *bo);
369a1fc9f58SPhilip Yang 		if (bo_va && bo_va->queue_refcount)
370a1fc9f58SPhilip Yang 			bo_va->queue_refcount--;
371a1fc9f58SPhilip Yang 	}
372a1fc9f58SPhilip Yang }
373a1fc9f58SPhilip Yang 
kfd_queue_unref_bo_vas(struct kfd_process_device * pdd,struct queue_properties * properties)374a1fc9f58SPhilip Yang int kfd_queue_unref_bo_vas(struct kfd_process_device *pdd,
375a1fc9f58SPhilip Yang 			   struct queue_properties *properties)
376a1fc9f58SPhilip Yang {
377a1fc9f58SPhilip Yang 	struct amdgpu_vm *vm;
378a1fc9f58SPhilip Yang 	int err;
379a1fc9f58SPhilip Yang 
380a1fc9f58SPhilip Yang 	vm = drm_priv_to_vm(pdd->drm_priv);
381a1fc9f58SPhilip Yang 	err = amdgpu_bo_reserve(vm->root.bo, false);
382a1fc9f58SPhilip Yang 	if (err)
383a1fc9f58SPhilip Yang 		return err;
384a1fc9f58SPhilip Yang 
385a1fc9f58SPhilip Yang 	kfd_queue_unref_bo_va(vm, &properties->wptr_bo);
386a1fc9f58SPhilip Yang 	kfd_queue_unref_bo_va(vm, &properties->rptr_bo);
387a1fc9f58SPhilip Yang 	kfd_queue_unref_bo_va(vm, &properties->ring_bo);
388a1fc9f58SPhilip Yang 	kfd_queue_unref_bo_va(vm, &properties->eop_buf_bo);
389a1fc9f58SPhilip Yang 	kfd_queue_unref_bo_va(vm, &properties->cwsr_bo);
390a1fc9f58SPhilip Yang 
391a1fc9f58SPhilip Yang 	amdgpu_bo_unreserve(vm->root.bo);
392a1fc9f58SPhilip Yang 	return 0;
393a1fc9f58SPhilip Yang }
394a1fc9f58SPhilip Yang 
395517fff22SPhilip Yang #define SGPR_SIZE_PER_CU	0x4000
396517fff22SPhilip Yang #define LDS_SIZE_PER_CU		0x10000
397517fff22SPhilip Yang #define HWREG_SIZE_PER_CU	0x1000
398517fff22SPhilip Yang #define DEBUGGER_BYTES_ALIGN	64
399517fff22SPhilip Yang #define DEBUGGER_BYTES_PER_WAVE	32
400517fff22SPhilip Yang 
kfd_get_vgpr_size_per_cu(u32 gfxv)401517fff22SPhilip Yang static u32 kfd_get_vgpr_size_per_cu(u32 gfxv)
402517fff22SPhilip Yang {
403517fff22SPhilip Yang 	u32 vgpr_size = 0x40000;
404517fff22SPhilip Yang 
405*daafa303SApurv Mishra 	if (gfxv == 90402 ||			/* GFX_VERSION_AQUA_VANJARAM */
406517fff22SPhilip Yang 	    gfxv == 90010 ||			/* GFX_VERSION_ALDEBARAN */
4075a7c8c57SLe Ma 	    gfxv == 90008 ||			/* GFX_VERSION_ARCTURUS */
4085a7c8c57SLe Ma 	    gfxv == 90500)
409517fff22SPhilip Yang 		vgpr_size = 0x80000;
410517fff22SPhilip Yang 	else if (gfxv == 110000 ||		/* GFX_VERSION_PLUM_BONITO */
411517fff22SPhilip Yang 		 gfxv == 110001 ||		/* GFX_VERSION_WHEAT_NAS */
412517fff22SPhilip Yang 		 gfxv == 120000 ||		/* GFX_VERSION_GFX1200 */
413517fff22SPhilip Yang 		 gfxv == 120001)		/* GFX_VERSION_GFX1201 */
414517fff22SPhilip Yang 		vgpr_size = 0x60000;
415517fff22SPhilip Yang 
416517fff22SPhilip Yang 	return vgpr_size;
417517fff22SPhilip Yang }
418517fff22SPhilip Yang 
4195a7c8c57SLe Ma #define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props)	\
420517fff22SPhilip Yang 	(kfd_get_vgpr_size_per_cu(gfxv) + SGPR_SIZE_PER_CU +\
4215a7c8c57SLe Ma 	 (((gfxv) == 90500) ? (props->lds_size_in_kb << 10) : LDS_SIZE_PER_CU) +\
4225a7c8c57SLe Ma 	 HWREG_SIZE_PER_CU)
423517fff22SPhilip Yang 
424517fff22SPhilip Yang #define CNTL_STACK_BYTES_PER_WAVE(gfxv)	\
425517fff22SPhilip Yang 	((gfxv) >= 100100 ? 12 : 8)	/* GFX_VERSION_NAVI10*/
426517fff22SPhilip Yang 
427517fff22SPhilip Yang #define SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER 40
428517fff22SPhilip Yang 
kfd_queue_ctx_save_restore_size(struct kfd_topology_device * dev)429517fff22SPhilip Yang void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev)
430517fff22SPhilip Yang {
431517fff22SPhilip Yang 	struct kfd_node_properties *props = &dev->node_props;
432517fff22SPhilip Yang 	u32 gfxv = props->gfx_target_version;
433517fff22SPhilip Yang 	u32 ctl_stack_size;
434517fff22SPhilip Yang 	u32 wg_data_size;
435517fff22SPhilip Yang 	u32 wave_num;
436517fff22SPhilip Yang 	u32 cu_num;
437517fff22SPhilip Yang 
438517fff22SPhilip Yang 	if (gfxv < 80001)	/* GFX_VERSION_CARRIZO */
439517fff22SPhilip Yang 		return;
440517fff22SPhilip Yang 
441517fff22SPhilip Yang 	cu_num = props->simd_count / props->simd_per_cu / NUM_XCC(dev->gpu->xcc_mask);
442517fff22SPhilip Yang 	wave_num = (gfxv < 100100) ?	/* GFX_VERSION_NAVI10 */
443517fff22SPhilip Yang 		    min(cu_num * 40, props->array_count / props->simd_arrays_per_engine * 512)
444517fff22SPhilip Yang 		    : cu_num * 32;
445517fff22SPhilip Yang 
4465a7c8c57SLe Ma 	wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props), PAGE_SIZE);
447517fff22SPhilip Yang 	ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(gfxv) + 8;
448517fff22SPhilip Yang 	ctl_stack_size = ALIGN(SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER + ctl_stack_size,
449517fff22SPhilip Yang 			       PAGE_SIZE);
450517fff22SPhilip Yang 
451517fff22SPhilip Yang 	if ((gfxv / 10000 * 10000) == 100000) {
452517fff22SPhilip Yang 		/* HW design limits control stack size to 0x7000.
453517fff22SPhilip Yang 		 * This is insufficient for theoretical PM4 cases
454517fff22SPhilip Yang 		 * but sufficient for AQL, limited by SPI events.
455517fff22SPhilip Yang 		 */
456517fff22SPhilip Yang 		ctl_stack_size = min(ctl_stack_size, 0x7000);
457517fff22SPhilip Yang 	}
458517fff22SPhilip Yang 
459517fff22SPhilip Yang 	props->ctl_stack_size = ctl_stack_size;
460517fff22SPhilip Yang 	props->debug_memory_size = ALIGN(wave_num * DEBUGGER_BYTES_PER_WAVE, DEBUGGER_BYTES_ALIGN);
461517fff22SPhilip Yang 	props->cwsr_size = ctl_stack_size + wg_data_size;
462517fff22SPhilip Yang 
463517fff22SPhilip Yang 	if (gfxv == 80002)	/* GFX_VERSION_TONGA */
464517fff22SPhilip Yang 		props->eop_buffer_size = 0x8000;
465*daafa303SApurv Mishra 	else if (gfxv == 90402)	/* GFX_VERSION_AQUA_VANJARAM */
466517fff22SPhilip Yang 		props->eop_buffer_size = 4096;
467517fff22SPhilip Yang 	else if (gfxv >= 80000)
468517fff22SPhilip Yang 		props->eop_buffer_size = 4096;
469517fff22SPhilip Yang }
470