1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2024 Intel Corporation
4  */
5 
6 #include "xe_bo.h"
7 #include "xe_gt_tlb_invalidation.h"
8 #include "xe_migrate.h"
9 #include "xe_module.h"
10 #include "xe_pt.h"
11 #include "xe_svm.h"
12 #include "xe_ttm_vram_mgr.h"
13 #include "xe_vm.h"
14 #include "xe_vm_types.h"
15 
xe_svm_range_in_vram(struct xe_svm_range * range)16 static bool xe_svm_range_in_vram(struct xe_svm_range *range)
17 {
18 	/*
19 	 * Advisory only check whether the range is currently backed by VRAM
20 	 * memory.
21 	 */
22 
23 	struct drm_gpusvm_range_flags flags = {
24 		/* Pairs with WRITE_ONCE in drm_gpusvm.c */
25 		.__flags = READ_ONCE(range->base.flags.__flags),
26 	};
27 
28 	return flags.has_devmem_pages;
29 }
30 
xe_svm_range_has_vram_binding(struct xe_svm_range * range)31 static bool xe_svm_range_has_vram_binding(struct xe_svm_range *range)
32 {
33 	/* Not reliable without notifier lock */
34 	return xe_svm_range_in_vram(range) && range->tile_present;
35 }
36 
gpusvm_to_vm(struct drm_gpusvm * gpusvm)37 static struct xe_vm *gpusvm_to_vm(struct drm_gpusvm *gpusvm)
38 {
39 	return container_of(gpusvm, struct xe_vm, svm.gpusvm);
40 }
41 
range_to_vm(struct drm_gpusvm_range * r)42 static struct xe_vm *range_to_vm(struct drm_gpusvm_range *r)
43 {
44 	return gpusvm_to_vm(r->gpusvm);
45 }
46 
xe_svm_range_start(struct xe_svm_range * range)47 static unsigned long xe_svm_range_start(struct xe_svm_range *range)
48 {
49 	return drm_gpusvm_range_start(&range->base);
50 }
51 
xe_svm_range_end(struct xe_svm_range * range)52 static unsigned long xe_svm_range_end(struct xe_svm_range *range)
53 {
54 	return drm_gpusvm_range_end(&range->base);
55 }
56 
xe_svm_range_size(struct xe_svm_range * range)57 static unsigned long xe_svm_range_size(struct xe_svm_range *range)
58 {
59 	return drm_gpusvm_range_size(&range->base);
60 }
61 
62 #define range_debug(r__, operaton__)					\
63 	vm_dbg(&range_to_vm(&(r__)->base)->xe->drm,			\
64 	       "%s: asid=%u, gpusvm=%p, vram=%d,%d, seqno=%lu, " \
65 	       "start=0x%014lx, end=0x%014lx, size=%lu",		\
66 	       (operaton__), range_to_vm(&(r__)->base)->usm.asid,	\
67 	       (r__)->base.gpusvm,					\
68 	       xe_svm_range_in_vram((r__)) ? 1 : 0,			\
69 	       xe_svm_range_has_vram_binding((r__)) ? 1 : 0,		\
70 	       (r__)->base.notifier_seq,				\
71 	       xe_svm_range_start((r__)), xe_svm_range_end((r__)),	\
72 	       xe_svm_range_size((r__)))
73 
xe_svm_range_debug(struct xe_svm_range * range,const char * operation)74 void xe_svm_range_debug(struct xe_svm_range *range, const char *operation)
75 {
76 	range_debug(range, operation);
77 }
78 
xe_svm_devm_owner(struct xe_device * xe)79 static void *xe_svm_devm_owner(struct xe_device *xe)
80 {
81 	return xe;
82 }
83 
84 static struct drm_gpusvm_range *
xe_svm_range_alloc(struct drm_gpusvm * gpusvm)85 xe_svm_range_alloc(struct drm_gpusvm *gpusvm)
86 {
87 	struct xe_svm_range *range;
88 
89 	range = kzalloc(sizeof(*range), GFP_KERNEL);
90 	if (!range)
91 		return NULL;
92 
93 	INIT_LIST_HEAD(&range->garbage_collector_link);
94 	xe_vm_get(gpusvm_to_vm(gpusvm));
95 
96 	return &range->base;
97 }
98 
xe_svm_range_free(struct drm_gpusvm_range * range)99 static void xe_svm_range_free(struct drm_gpusvm_range *range)
100 {
101 	xe_vm_put(range_to_vm(range));
102 	kfree(range);
103 }
104 
to_xe_range(struct drm_gpusvm_range * r)105 static struct xe_svm_range *to_xe_range(struct drm_gpusvm_range *r)
106 {
107 	return container_of(r, struct xe_svm_range, base);
108 }
109 
110 static void
xe_svm_garbage_collector_add_range(struct xe_vm * vm,struct xe_svm_range * range,const struct mmu_notifier_range * mmu_range)111 xe_svm_garbage_collector_add_range(struct xe_vm *vm, struct xe_svm_range *range,
112 				   const struct mmu_notifier_range *mmu_range)
113 {
114 	struct xe_device *xe = vm->xe;
115 
116 	range_debug(range, "GARBAGE COLLECTOR ADD");
117 
118 	drm_gpusvm_range_set_unmapped(&range->base, mmu_range);
119 
120 	spin_lock(&vm->svm.garbage_collector.lock);
121 	if (list_empty(&range->garbage_collector_link))
122 		list_add_tail(&range->garbage_collector_link,
123 			      &vm->svm.garbage_collector.range_list);
124 	spin_unlock(&vm->svm.garbage_collector.lock);
125 
126 	queue_work(xe_device_get_root_tile(xe)->primary_gt->usm.pf_wq,
127 		   &vm->svm.garbage_collector.work);
128 }
129 
130 static u8
xe_svm_range_notifier_event_begin(struct xe_vm * vm,struct drm_gpusvm_range * r,const struct mmu_notifier_range * mmu_range,u64 * adj_start,u64 * adj_end)131 xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r,
132 				  const struct mmu_notifier_range *mmu_range,
133 				  u64 *adj_start, u64 *adj_end)
134 {
135 	struct xe_svm_range *range = to_xe_range(r);
136 	struct xe_device *xe = vm->xe;
137 	struct xe_tile *tile;
138 	u8 tile_mask = 0;
139 	u8 id;
140 
141 	xe_svm_assert_in_notifier(vm);
142 
143 	range_debug(range, "NOTIFIER");
144 
145 	/* Skip if already unmapped or if no binding exist */
146 	if (range->base.flags.unmapped || !range->tile_present)
147 		return 0;
148 
149 	range_debug(range, "NOTIFIER - EXECUTE");
150 
151 	/* Adjust invalidation to range boundaries */
152 	*adj_start = min(xe_svm_range_start(range), mmu_range->start);
153 	*adj_end = max(xe_svm_range_end(range), mmu_range->end);
154 
155 	/*
156 	 * XXX: Ideally would zap PTEs in one shot in xe_svm_invalidate but the
157 	 * invalidation code can't correctly cope with sparse ranges or
158 	 * invalidations spanning multiple ranges.
159 	 */
160 	for_each_tile(tile, xe, id)
161 		if (xe_pt_zap_ptes_range(tile, vm, range)) {
162 			tile_mask |= BIT(id);
163 			range->tile_invalidated |= BIT(id);
164 		}
165 
166 	return tile_mask;
167 }
168 
169 static void
xe_svm_range_notifier_event_end(struct xe_vm * vm,struct drm_gpusvm_range * r,const struct mmu_notifier_range * mmu_range)170 xe_svm_range_notifier_event_end(struct xe_vm *vm, struct drm_gpusvm_range *r,
171 				const struct mmu_notifier_range *mmu_range)
172 {
173 	struct drm_gpusvm_ctx ctx = { .in_notifier = true, };
174 
175 	xe_svm_assert_in_notifier(vm);
176 
177 	drm_gpusvm_range_unmap_pages(&vm->svm.gpusvm, r, &ctx);
178 	if (!xe_vm_is_closed(vm) && mmu_range->event == MMU_NOTIFY_UNMAP)
179 		xe_svm_garbage_collector_add_range(vm, to_xe_range(r),
180 						   mmu_range);
181 }
182 
xe_svm_invalidate(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier,const struct mmu_notifier_range * mmu_range)183 static void xe_svm_invalidate(struct drm_gpusvm *gpusvm,
184 			      struct drm_gpusvm_notifier *notifier,
185 			      const struct mmu_notifier_range *mmu_range)
186 {
187 	struct xe_vm *vm = gpusvm_to_vm(gpusvm);
188 	struct xe_device *xe = vm->xe;
189 	struct xe_tile *tile;
190 	struct drm_gpusvm_range *r, *first;
191 	struct xe_gt_tlb_invalidation_fence
192 		fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
193 	u64 adj_start = mmu_range->start, adj_end = mmu_range->end;
194 	u8 tile_mask = 0;
195 	u8 id;
196 	u32 fence_id = 0;
197 	long err;
198 
199 	xe_svm_assert_in_notifier(vm);
200 
201 	vm_dbg(&gpusvm_to_vm(gpusvm)->xe->drm,
202 	       "INVALIDATE: asid=%u, gpusvm=%p, seqno=%lu, start=0x%016lx, end=0x%016lx, event=%d",
203 	       vm->usm.asid, gpusvm, notifier->notifier.invalidate_seq,
204 	       mmu_range->start, mmu_range->end, mmu_range->event);
205 
206 	/* Adjust invalidation to notifier boundaries */
207 	adj_start = max(drm_gpusvm_notifier_start(notifier), adj_start);
208 	adj_end = min(drm_gpusvm_notifier_end(notifier), adj_end);
209 
210 	first = drm_gpusvm_range_find(notifier, adj_start, adj_end);
211 	if (!first)
212 		return;
213 
214 	/*
215 	 * PTs may be getting destroyed so not safe to touch these but PT should
216 	 * be invalidated at this point in time. Regardless we still need to
217 	 * ensure any dma mappings are unmapped in the here.
218 	 */
219 	if (xe_vm_is_closed(vm))
220 		goto range_notifier_event_end;
221 
222 	/*
223 	 * XXX: Less than ideal to always wait on VM's resv slots if an
224 	 * invalidation is not required. Could walk range list twice to figure
225 	 * out if an invalidations is need, but also not ideal.
226 	 */
227 	err = dma_resv_wait_timeout(xe_vm_resv(vm),
228 				    DMA_RESV_USAGE_BOOKKEEP,
229 				    false, MAX_SCHEDULE_TIMEOUT);
230 	XE_WARN_ON(err <= 0);
231 
232 	r = first;
233 	drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end)
234 		tile_mask |= xe_svm_range_notifier_event_begin(vm, r, mmu_range,
235 							       &adj_start,
236 							       &adj_end);
237 	if (!tile_mask)
238 		goto range_notifier_event_end;
239 
240 	xe_device_wmb(xe);
241 
242 	for_each_tile(tile, xe, id) {
243 		if (tile_mask & BIT(id)) {
244 			int err;
245 
246 			xe_gt_tlb_invalidation_fence_init(tile->primary_gt,
247 							  &fence[fence_id], true);
248 
249 			err = xe_gt_tlb_invalidation_range(tile->primary_gt,
250 							   &fence[fence_id],
251 							   adj_start,
252 							   adj_end,
253 							   vm->usm.asid);
254 			if (WARN_ON_ONCE(err < 0))
255 				goto wait;
256 			++fence_id;
257 
258 			if (!tile->media_gt)
259 				continue;
260 
261 			xe_gt_tlb_invalidation_fence_init(tile->media_gt,
262 							  &fence[fence_id], true);
263 
264 			err = xe_gt_tlb_invalidation_range(tile->media_gt,
265 							   &fence[fence_id],
266 							   adj_start,
267 							   adj_end,
268 							   vm->usm.asid);
269 			if (WARN_ON_ONCE(err < 0))
270 				goto wait;
271 			++fence_id;
272 		}
273 	}
274 
275 wait:
276 	for (id = 0; id < fence_id; ++id)
277 		xe_gt_tlb_invalidation_fence_wait(&fence[id]);
278 
279 range_notifier_event_end:
280 	r = first;
281 	drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end)
282 		xe_svm_range_notifier_event_end(vm, r, mmu_range);
283 }
284 
__xe_svm_garbage_collector(struct xe_vm * vm,struct xe_svm_range * range)285 static int __xe_svm_garbage_collector(struct xe_vm *vm,
286 				      struct xe_svm_range *range)
287 {
288 	struct dma_fence *fence;
289 
290 	range_debug(range, "GARBAGE COLLECTOR");
291 
292 	xe_vm_lock(vm, false);
293 	fence = xe_vm_range_unbind(vm, range);
294 	xe_vm_unlock(vm);
295 	if (IS_ERR(fence))
296 		return PTR_ERR(fence);
297 	dma_fence_put(fence);
298 
299 	drm_gpusvm_range_remove(&vm->svm.gpusvm, &range->base);
300 
301 	return 0;
302 }
303 
xe_svm_garbage_collector(struct xe_vm * vm)304 static int xe_svm_garbage_collector(struct xe_vm *vm)
305 {
306 	struct xe_svm_range *range;
307 	int err;
308 
309 	lockdep_assert_held_write(&vm->lock);
310 
311 	if (xe_vm_is_closed_or_banned(vm))
312 		return -ENOENT;
313 
314 	spin_lock(&vm->svm.garbage_collector.lock);
315 	for (;;) {
316 		range = list_first_entry_or_null(&vm->svm.garbage_collector.range_list,
317 						 typeof(*range),
318 						 garbage_collector_link);
319 		if (!range)
320 			break;
321 
322 		list_del(&range->garbage_collector_link);
323 		spin_unlock(&vm->svm.garbage_collector.lock);
324 
325 		err = __xe_svm_garbage_collector(vm, range);
326 		if (err) {
327 			drm_warn(&vm->xe->drm,
328 				 "Garbage collection failed: %pe\n",
329 				 ERR_PTR(err));
330 			xe_vm_kill(vm, true);
331 			return err;
332 		}
333 
334 		spin_lock(&vm->svm.garbage_collector.lock);
335 	}
336 	spin_unlock(&vm->svm.garbage_collector.lock);
337 
338 	return 0;
339 }
340 
xe_svm_garbage_collector_work_func(struct work_struct * w)341 static void xe_svm_garbage_collector_work_func(struct work_struct *w)
342 {
343 	struct xe_vm *vm = container_of(w, struct xe_vm,
344 					svm.garbage_collector.work);
345 
346 	down_write(&vm->lock);
347 	xe_svm_garbage_collector(vm);
348 	up_write(&vm->lock);
349 }
350 
page_to_vr(struct page * page)351 static struct xe_vram_region *page_to_vr(struct page *page)
352 {
353 	return container_of(page_pgmap(page), struct xe_vram_region, pagemap);
354 }
355 
vr_to_tile(struct xe_vram_region * vr)356 static struct xe_tile *vr_to_tile(struct xe_vram_region *vr)
357 {
358 	return container_of(vr, struct xe_tile, mem.vram);
359 }
360 
xe_vram_region_page_to_dpa(struct xe_vram_region * vr,struct page * page)361 static u64 xe_vram_region_page_to_dpa(struct xe_vram_region *vr,
362 				      struct page *page)
363 {
364 	u64 dpa;
365 	struct xe_tile *tile = vr_to_tile(vr);
366 	u64 pfn = page_to_pfn(page);
367 	u64 offset;
368 
369 	xe_tile_assert(tile, is_device_private_page(page));
370 	xe_tile_assert(tile, (pfn << PAGE_SHIFT) >= vr->hpa_base);
371 
372 	offset = (pfn << PAGE_SHIFT) - vr->hpa_base;
373 	dpa = vr->dpa_base + offset;
374 
375 	return dpa;
376 }
377 
378 enum xe_svm_copy_dir {
379 	XE_SVM_COPY_TO_VRAM,
380 	XE_SVM_COPY_TO_SRAM,
381 };
382 
xe_svm_copy(struct page ** pages,dma_addr_t * dma_addr,unsigned long npages,const enum xe_svm_copy_dir dir)383 static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr,
384 		       unsigned long npages, const enum xe_svm_copy_dir dir)
385 {
386 	struct xe_vram_region *vr = NULL;
387 	struct xe_tile *tile;
388 	struct dma_fence *fence = NULL;
389 	unsigned long i;
390 #define XE_VRAM_ADDR_INVALID	~0x0ull
391 	u64 vram_addr = XE_VRAM_ADDR_INVALID;
392 	int err = 0, pos = 0;
393 	bool sram = dir == XE_SVM_COPY_TO_SRAM;
394 
395 	/*
396 	 * This flow is complex: it locates physically contiguous device pages,
397 	 * derives the starting physical address, and performs a single GPU copy
398 	 * to for every 8M chunk in a DMA address array. Both device pages and
399 	 * DMA addresses may be sparsely populated. If either is NULL, a copy is
400 	 * triggered based on the current search state. The last GPU copy is
401 	 * waited on to ensure all copies are complete.
402 	 */
403 
404 	for (i = 0; i < npages; ++i) {
405 		struct page *spage = pages[i];
406 		struct dma_fence *__fence;
407 		u64 __vram_addr;
408 		bool match = false, chunk, last;
409 
410 #define XE_MIGRATE_CHUNK_SIZE	SZ_8M
411 		chunk = (i - pos) == (XE_MIGRATE_CHUNK_SIZE / PAGE_SIZE);
412 		last = (i + 1) == npages;
413 
414 		/* No CPU page and no device pages queue'd to copy */
415 		if (!dma_addr[i] && vram_addr == XE_VRAM_ADDR_INVALID)
416 			continue;
417 
418 		if (!vr && spage) {
419 			vr = page_to_vr(spage);
420 			tile = vr_to_tile(vr);
421 		}
422 		XE_WARN_ON(spage && page_to_vr(spage) != vr);
423 
424 		/*
425 		 * CPU page and device page valid, capture physical address on
426 		 * first device page, check if physical contiguous on subsequent
427 		 * device pages.
428 		 */
429 		if (dma_addr[i] && spage) {
430 			__vram_addr = xe_vram_region_page_to_dpa(vr, spage);
431 			if (vram_addr == XE_VRAM_ADDR_INVALID) {
432 				vram_addr = __vram_addr;
433 				pos = i;
434 			}
435 
436 			match = vram_addr + PAGE_SIZE * (i - pos) == __vram_addr;
437 		}
438 
439 		/*
440 		 * Mismatched physical address, 8M copy chunk, or last page -
441 		 * trigger a copy.
442 		 */
443 		if (!match || chunk || last) {
444 			/*
445 			 * Extra page for first copy if last page and matching
446 			 * physical address.
447 			 */
448 			int incr = (match && last) ? 1 : 0;
449 
450 			if (vram_addr != XE_VRAM_ADDR_INVALID) {
451 				if (sram) {
452 					vm_dbg(&tile->xe->drm,
453 					       "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld",
454 					       vram_addr, (u64)dma_addr[pos], i - pos + incr);
455 					__fence = xe_migrate_from_vram(tile->migrate,
456 								       i - pos + incr,
457 								       vram_addr,
458 								       dma_addr + pos);
459 				} else {
460 					vm_dbg(&tile->xe->drm,
461 					       "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld",
462 					       (u64)dma_addr[pos], vram_addr, i - pos + incr);
463 					__fence = xe_migrate_to_vram(tile->migrate,
464 								     i - pos + incr,
465 								     dma_addr + pos,
466 								     vram_addr);
467 				}
468 				if (IS_ERR(__fence)) {
469 					err = PTR_ERR(__fence);
470 					goto err_out;
471 				}
472 
473 				dma_fence_put(fence);
474 				fence = __fence;
475 			}
476 
477 			/* Setup physical address of next device page */
478 			if (dma_addr[i] && spage) {
479 				vram_addr = __vram_addr;
480 				pos = i;
481 			} else {
482 				vram_addr = XE_VRAM_ADDR_INVALID;
483 			}
484 
485 			/* Extra mismatched device page, copy it */
486 			if (!match && last && vram_addr != XE_VRAM_ADDR_INVALID) {
487 				if (sram) {
488 					vm_dbg(&tile->xe->drm,
489 					       "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%d",
490 					       vram_addr, (u64)dma_addr[pos], 1);
491 					__fence = xe_migrate_from_vram(tile->migrate, 1,
492 								       vram_addr,
493 								       dma_addr + pos);
494 				} else {
495 					vm_dbg(&tile->xe->drm,
496 					       "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%d",
497 					       (u64)dma_addr[pos], vram_addr, 1);
498 					__fence = xe_migrate_to_vram(tile->migrate, 1,
499 								     dma_addr + pos,
500 								     vram_addr);
501 				}
502 				if (IS_ERR(__fence)) {
503 					err = PTR_ERR(__fence);
504 					goto err_out;
505 				}
506 
507 				dma_fence_put(fence);
508 				fence = __fence;
509 			}
510 		}
511 	}
512 
513 err_out:
514 	/* Wait for all copies to complete */
515 	if (fence) {
516 		dma_fence_wait(fence, false);
517 		dma_fence_put(fence);
518 	}
519 
520 	return err;
521 #undef XE_MIGRATE_CHUNK_SIZE
522 #undef XE_VRAM_ADDR_INVALID
523 }
524 
xe_svm_copy_to_devmem(struct page ** pages,dma_addr_t * dma_addr,unsigned long npages)525 static int xe_svm_copy_to_devmem(struct page **pages, dma_addr_t *dma_addr,
526 				 unsigned long npages)
527 {
528 	return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_VRAM);
529 }
530 
xe_svm_copy_to_ram(struct page ** pages,dma_addr_t * dma_addr,unsigned long npages)531 static int xe_svm_copy_to_ram(struct page **pages, dma_addr_t *dma_addr,
532 			      unsigned long npages)
533 {
534 	return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_SRAM);
535 }
536 
to_xe_bo(struct drm_gpusvm_devmem * devmem_allocation)537 static struct xe_bo *to_xe_bo(struct drm_gpusvm_devmem *devmem_allocation)
538 {
539 	return container_of(devmem_allocation, struct xe_bo, devmem_allocation);
540 }
541 
xe_svm_devmem_release(struct drm_gpusvm_devmem * devmem_allocation)542 static void xe_svm_devmem_release(struct drm_gpusvm_devmem *devmem_allocation)
543 {
544 	struct xe_bo *bo = to_xe_bo(devmem_allocation);
545 
546 	xe_bo_put_async(bo);
547 }
548 
block_offset_to_pfn(struct xe_vram_region * vr,u64 offset)549 static u64 block_offset_to_pfn(struct xe_vram_region *vr, u64 offset)
550 {
551 	return PHYS_PFN(offset + vr->hpa_base);
552 }
553 
tile_to_buddy(struct xe_tile * tile)554 static struct drm_buddy *tile_to_buddy(struct xe_tile *tile)
555 {
556 	return &tile->mem.vram.ttm.mm;
557 }
558 
xe_svm_populate_devmem_pfn(struct drm_gpusvm_devmem * devmem_allocation,unsigned long npages,unsigned long * pfn)559 static int xe_svm_populate_devmem_pfn(struct drm_gpusvm_devmem *devmem_allocation,
560 				      unsigned long npages, unsigned long *pfn)
561 {
562 	struct xe_bo *bo = to_xe_bo(devmem_allocation);
563 	struct ttm_resource *res = bo->ttm.resource;
564 	struct list_head *blocks = &to_xe_ttm_vram_mgr_resource(res)->blocks;
565 	struct drm_buddy_block *block;
566 	int j = 0;
567 
568 	list_for_each_entry(block, blocks, link) {
569 		struct xe_vram_region *vr = block->private;
570 		struct xe_tile *tile = vr_to_tile(vr);
571 		struct drm_buddy *buddy = tile_to_buddy(tile);
572 		u64 block_pfn = block_offset_to_pfn(vr, drm_buddy_block_offset(block));
573 		int i;
574 
575 		for (i = 0; i < drm_buddy_block_size(buddy, block) >> PAGE_SHIFT; ++i)
576 			pfn[j++] = block_pfn + i;
577 	}
578 
579 	return 0;
580 }
581 
582 static const struct drm_gpusvm_devmem_ops gpusvm_devmem_ops = {
583 	.devmem_release = xe_svm_devmem_release,
584 	.populate_devmem_pfn = xe_svm_populate_devmem_pfn,
585 	.copy_to_devmem = xe_svm_copy_to_devmem,
586 	.copy_to_ram = xe_svm_copy_to_ram,
587 };
588 
589 static const struct drm_gpusvm_ops gpusvm_ops = {
590 	.range_alloc = xe_svm_range_alloc,
591 	.range_free = xe_svm_range_free,
592 	.invalidate = xe_svm_invalidate,
593 };
594 
595 static const unsigned long fault_chunk_sizes[] = {
596 	SZ_2M,
597 	SZ_64K,
598 	SZ_4K,
599 };
600 
601 /**
602  * xe_svm_init() - SVM initialize
603  * @vm: The VM.
604  *
605  * Initialize SVM state which is embedded within the VM.
606  *
607  * Return: 0 on success, negative error code on error.
608  */
xe_svm_init(struct xe_vm * vm)609 int xe_svm_init(struct xe_vm *vm)
610 {
611 	int err;
612 
613 	spin_lock_init(&vm->svm.garbage_collector.lock);
614 	INIT_LIST_HEAD(&vm->svm.garbage_collector.range_list);
615 	INIT_WORK(&vm->svm.garbage_collector.work,
616 		  xe_svm_garbage_collector_work_func);
617 
618 	err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm,
619 			      current->mm, xe_svm_devm_owner(vm->xe), 0,
620 			      vm->size, xe_modparam.svm_notifier_size * SZ_1M,
621 			      &gpusvm_ops, fault_chunk_sizes,
622 			      ARRAY_SIZE(fault_chunk_sizes));
623 	if (err)
624 		return err;
625 
626 	drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock);
627 
628 	return 0;
629 }
630 
631 /**
632  * xe_svm_close() - SVM close
633  * @vm: The VM.
634  *
635  * Close SVM state (i.e., stop and flush all SVM actions).
636  */
xe_svm_close(struct xe_vm * vm)637 void xe_svm_close(struct xe_vm *vm)
638 {
639 	xe_assert(vm->xe, xe_vm_is_closed(vm));
640 	flush_work(&vm->svm.garbage_collector.work);
641 }
642 
643 /**
644  * xe_svm_fini() - SVM finalize
645  * @vm: The VM.
646  *
647  * Finalize SVM state which is embedded within the VM.
648  */
xe_svm_fini(struct xe_vm * vm)649 void xe_svm_fini(struct xe_vm *vm)
650 {
651 	xe_assert(vm->xe, xe_vm_is_closed(vm));
652 
653 	drm_gpusvm_fini(&vm->svm.gpusvm);
654 }
655 
xe_svm_range_is_valid(struct xe_svm_range * range,struct xe_tile * tile,bool devmem_only)656 static bool xe_svm_range_is_valid(struct xe_svm_range *range,
657 				  struct xe_tile *tile,
658 				  bool devmem_only)
659 {
660 	/*
661 	 * Advisory only check whether the range currently has a valid mapping,
662 	 * READ_ONCE pairs with WRITE_ONCE in xe_pt.c
663 	 */
664 	return ((READ_ONCE(range->tile_present) &
665 		 ~READ_ONCE(range->tile_invalidated)) & BIT(tile->id)) &&
666 		(!devmem_only || xe_svm_range_in_vram(range));
667 }
668 
tile_to_vr(struct xe_tile * tile)669 static struct xe_vram_region *tile_to_vr(struct xe_tile *tile)
670 {
671 	return &tile->mem.vram;
672 }
673 
xe_svm_alloc_vram(struct xe_vm * vm,struct xe_tile * tile,struct xe_svm_range * range,const struct drm_gpusvm_ctx * ctx)674 static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile,
675 			     struct xe_svm_range *range,
676 			     const struct drm_gpusvm_ctx *ctx)
677 {
678 	struct mm_struct *mm = vm->svm.gpusvm.mm;
679 	struct xe_vram_region *vr = tile_to_vr(tile);
680 	struct drm_buddy_block *block;
681 	struct list_head *blocks;
682 	struct xe_bo *bo;
683 	ktime_t end = 0;
684 	int err;
685 
686 	range_debug(range, "ALLOCATE VRAM");
687 
688 	if (!mmget_not_zero(mm))
689 		return -EFAULT;
690 	mmap_read_lock(mm);
691 
692 retry:
693 	bo = xe_bo_create_locked(tile_to_xe(tile), NULL, NULL,
694 				 xe_svm_range_size(range),
695 				 ttm_bo_type_device,
696 				 XE_BO_FLAG_VRAM_IF_DGFX(tile) |
697 				 XE_BO_FLAG_CPU_ADDR_MIRROR);
698 	if (IS_ERR(bo)) {
699 		err = PTR_ERR(bo);
700 		if (xe_vm_validate_should_retry(NULL, err, &end))
701 			goto retry;
702 		goto unlock;
703 	}
704 
705 	drm_gpusvm_devmem_init(&bo->devmem_allocation,
706 			       vm->xe->drm.dev, mm,
707 			       &gpusvm_devmem_ops,
708 			       &tile->mem.vram.dpagemap,
709 			       xe_svm_range_size(range));
710 
711 	blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks;
712 	list_for_each_entry(block, blocks, link)
713 		block->private = vr;
714 
715 	xe_bo_get(bo);
716 	err = drm_gpusvm_migrate_to_devmem(&vm->svm.gpusvm, &range->base,
717 					   &bo->devmem_allocation, ctx);
718 	if (err)
719 		xe_svm_devmem_release(&bo->devmem_allocation);
720 
721 	xe_bo_unlock(bo);
722 	xe_bo_put(bo);
723 
724 unlock:
725 	mmap_read_unlock(mm);
726 	mmput(mm);
727 
728 	return err;
729 }
730 
supports_4K_migration(struct xe_device * xe)731 static bool supports_4K_migration(struct xe_device *xe)
732 {
733 	if (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
734 		return false;
735 
736 	return true;
737 }
738 
xe_svm_range_needs_migrate_to_vram(struct xe_svm_range * range,struct xe_vma * vma)739 static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range,
740 					       struct xe_vma *vma)
741 {
742 	struct xe_vm *vm = range_to_vm(&range->base);
743 	u64 range_size = xe_svm_range_size(range);
744 
745 	if (!range->base.flags.migrate_devmem)
746 		return false;
747 
748 	if (xe_svm_range_in_vram(range)) {
749 		drm_dbg(&vm->xe->drm, "Range is already in VRAM\n");
750 		return false;
751 	}
752 
753 	if (range_size <= SZ_64K && !supports_4K_migration(vm->xe)) {
754 		drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n");
755 		return false;
756 	}
757 
758 	return true;
759 }
760 
761 /**
762  * xe_svm_handle_pagefault() - SVM handle page fault
763  * @vm: The VM.
764  * @vma: The CPU address mirror VMA.
765  * @tile: The tile upon the fault occurred.
766  * @fault_addr: The GPU fault address.
767  * @atomic: The fault atomic access bit.
768  *
769  * Create GPU bindings for a SVM page fault. Optionally migrate to device
770  * memory.
771  *
772  * Return: 0 on success, negative error code on error.
773  */
xe_svm_handle_pagefault(struct xe_vm * vm,struct xe_vma * vma,struct xe_tile * tile,u64 fault_addr,bool atomic)774 int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
775 			    struct xe_tile *tile, u64 fault_addr,
776 			    bool atomic)
777 {
778 	struct drm_gpusvm_ctx ctx = {
779 		.read_only = xe_vma_read_only(vma),
780 		.devmem_possible = IS_DGFX(vm->xe) &&
781 			IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
782 		.check_pages_threshold = IS_DGFX(vm->xe) &&
783 			IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0,
784 		.devmem_only = atomic && IS_DGFX(vm->xe) &&
785 			IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
786 		.timeslice_ms = atomic && IS_DGFX(vm->xe) &&
787 			IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? 5 : 0,
788 	};
789 	struct xe_svm_range *range;
790 	struct drm_gpusvm_range *r;
791 	struct drm_exec exec;
792 	struct dma_fence *fence;
793 	int migrate_try_count = ctx.devmem_only ? 3 : 1;
794 	ktime_t end = 0;
795 	int err;
796 
797 	lockdep_assert_held_write(&vm->lock);
798 	xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
799 
800 retry:
801 	/* Always process UNMAPs first so view SVM ranges is current */
802 	err = xe_svm_garbage_collector(vm);
803 	if (err)
804 		return err;
805 
806 	r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, fault_addr,
807 					    xe_vma_start(vma), xe_vma_end(vma),
808 					    &ctx);
809 	if (IS_ERR(r))
810 		return PTR_ERR(r);
811 
812 	if (ctx.devmem_only && !r->flags.migrate_devmem)
813 		return -EACCES;
814 
815 	range = to_xe_range(r);
816 	if (xe_svm_range_is_valid(range, tile, ctx.devmem_only))
817 		return 0;
818 
819 	range_debug(range, "PAGE FAULT");
820 
821 	if (--migrate_try_count >= 0 &&
822 	    xe_svm_range_needs_migrate_to_vram(range, vma)) {
823 		err = xe_svm_alloc_vram(vm, tile, range, &ctx);
824 		ctx.timeslice_ms <<= 1;	/* Double timeslice if we have to retry */
825 		if (err) {
826 			if (migrate_try_count || !ctx.devmem_only) {
827 				drm_dbg(&vm->xe->drm,
828 					"VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n",
829 					vm->usm.asid, ERR_PTR(err));
830 				goto retry;
831 			} else {
832 				drm_err(&vm->xe->drm,
833 					"VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n",
834 					vm->usm.asid, ERR_PTR(err));
835 				return err;
836 			}
837 		}
838 	}
839 
840 	range_debug(range, "GET PAGES");
841 	err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, r, &ctx);
842 	/* Corner where CPU mappings have changed */
843 	if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) {
844 		ctx.timeslice_ms <<= 1;	/* Double timeslice if we have to retry */
845 		if (migrate_try_count > 0 || !ctx.devmem_only) {
846 			if (err == -EOPNOTSUPP) {
847 				range_debug(range, "PAGE FAULT - EVICT PAGES");
848 				drm_gpusvm_range_evict(&vm->svm.gpusvm,
849 						       &range->base);
850 			}
851 			drm_dbg(&vm->xe->drm,
852 				"Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n",
853 				vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
854 			range_debug(range, "PAGE FAULT - RETRY PAGES");
855 			goto retry;
856 		} else {
857 			drm_err(&vm->xe->drm,
858 				"Get pages failed, retry count exceeded, asid=%u, gpusvm=%p, errno=%pe\n",
859 				vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
860 		}
861 	}
862 	if (err) {
863 		range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT");
864 		goto err_out;
865 	}
866 
867 	range_debug(range, "PAGE FAULT - BIND");
868 
869 retry_bind:
870 	drm_exec_init(&exec, 0, 0);
871 	drm_exec_until_all_locked(&exec) {
872 		err = drm_exec_lock_obj(&exec, vm->gpuvm.r_obj);
873 		drm_exec_retry_on_contention(&exec);
874 		if (err) {
875 			drm_exec_fini(&exec);
876 			goto err_out;
877 		}
878 
879 		fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id));
880 		if (IS_ERR(fence)) {
881 			drm_exec_fini(&exec);
882 			err = PTR_ERR(fence);
883 			if (err == -EAGAIN) {
884 				ctx.timeslice_ms <<= 1;	/* Double timeslice if we have to retry */
885 				range_debug(range, "PAGE FAULT - RETRY BIND");
886 				goto retry;
887 			}
888 			if (xe_vm_validate_should_retry(&exec, err, &end))
889 				goto retry_bind;
890 			goto err_out;
891 		}
892 	}
893 	drm_exec_fini(&exec);
894 
895 	dma_fence_wait(fence, false);
896 	dma_fence_put(fence);
897 
898 err_out:
899 
900 	return err;
901 }
902 
903 /**
904  * xe_svm_has_mapping() - SVM has mappings
905  * @vm: The VM.
906  * @start: Start address.
907  * @end: End address.
908  *
909  * Check if an address range has SVM mappings.
910  *
911  * Return: True if address range has a SVM mapping, False otherwise
912  */
xe_svm_has_mapping(struct xe_vm * vm,u64 start,u64 end)913 bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end)
914 {
915 	return drm_gpusvm_has_mapping(&vm->svm.gpusvm, start, end);
916 }
917 
918 /**
919  * xe_svm_bo_evict() - SVM evict BO to system memory
920  * @bo: BO to evict
921  *
922  * SVM evict BO to system memory. GPU SVM layer ensures all device pages
923  * are evicted before returning.
924  *
925  * Return: 0 on success standard error code otherwise
926  */
xe_svm_bo_evict(struct xe_bo * bo)927 int xe_svm_bo_evict(struct xe_bo *bo)
928 {
929 	return drm_gpusvm_evict_to_ram(&bo->devmem_allocation);
930 }
931 
932 #if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
933 static struct drm_pagemap_device_addr
xe_drm_pagemap_device_map(struct drm_pagemap * dpagemap,struct device * dev,struct page * page,unsigned int order,enum dma_data_direction dir)934 xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap,
935 			  struct device *dev,
936 			  struct page *page,
937 			  unsigned int order,
938 			  enum dma_data_direction dir)
939 {
940 	struct device *pgmap_dev = dpagemap->dev;
941 	enum drm_interconnect_protocol prot;
942 	dma_addr_t addr;
943 
944 	if (pgmap_dev == dev) {
945 		addr = xe_vram_region_page_to_dpa(page_to_vr(page), page);
946 		prot = XE_INTERCONNECT_VRAM;
947 	} else {
948 		addr = DMA_MAPPING_ERROR;
949 		prot = 0;
950 	}
951 
952 	return drm_pagemap_device_addr_encode(addr, prot, order, dir);
953 }
954 
955 static const struct drm_pagemap_ops xe_drm_pagemap_ops = {
956 	.device_map = xe_drm_pagemap_device_map,
957 };
958 
959 /**
960  * xe_devm_add: Remap and provide memmap backing for device memory
961  * @tile: tile that the memory region belongs to
962  * @vr: vram memory region to remap
963  *
964  * This remap device memory to host physical address space and create
965  * struct page to back device memory
966  *
967  * Return: 0 on success standard error code otherwise
968  */
xe_devm_add(struct xe_tile * tile,struct xe_vram_region * vr)969 int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
970 {
971 	struct xe_device *xe = tile_to_xe(tile);
972 	struct device *dev = &to_pci_dev(xe->drm.dev)->dev;
973 	struct resource *res;
974 	void *addr;
975 	int ret;
976 
977 	res = devm_request_free_mem_region(dev, &iomem_resource,
978 					   vr->usable_size);
979 	if (IS_ERR(res)) {
980 		ret = PTR_ERR(res);
981 		return ret;
982 	}
983 
984 	vr->pagemap.type = MEMORY_DEVICE_PRIVATE;
985 	vr->pagemap.range.start = res->start;
986 	vr->pagemap.range.end = res->end;
987 	vr->pagemap.nr_range = 1;
988 	vr->pagemap.ops = drm_gpusvm_pagemap_ops_get();
989 	vr->pagemap.owner = xe_svm_devm_owner(xe);
990 	addr = devm_memremap_pages(dev, &vr->pagemap);
991 
992 	vr->dpagemap.dev = dev;
993 	vr->dpagemap.ops = &xe_drm_pagemap_ops;
994 
995 	if (IS_ERR(addr)) {
996 		devm_release_mem_region(dev, res->start, resource_size(res));
997 		ret = PTR_ERR(addr);
998 		drm_err(&xe->drm, "Failed to remap tile %d memory, errno %pe\n",
999 			tile->id, ERR_PTR(ret));
1000 		return ret;
1001 	}
1002 	vr->hpa_base = res->start;
1003 
1004 	drm_dbg(&xe->drm, "Added tile %d memory [%llx-%llx] to devm, remapped to %pr\n",
1005 		tile->id, vr->io_start, vr->io_start + vr->usable_size, res);
1006 	return 0;
1007 }
1008 #else
xe_devm_add(struct xe_tile * tile,struct xe_vram_region * vr)1009 int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
1010 {
1011 	return 0;
1012 }
1013 #endif
1014 
1015 /**
1016  * xe_svm_flush() - SVM flush
1017  * @vm: The VM.
1018  *
1019  * Flush all SVM actions.
1020  */
xe_svm_flush(struct xe_vm * vm)1021 void xe_svm_flush(struct xe_vm *vm)
1022 {
1023 	if (xe_vm_in_fault_mode(vm))
1024 		flush_work(&vm->svm.garbage_collector.work);
1025 }
1026