1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2014-2016 Intel Corporation
5  */
6 
7 #include "display/intel_display.h"
8 #include "gt/intel_gt.h"
9 
10 #include "i915_drv.h"
11 #include "i915_gem_clflush.h"
12 #include "i915_gem_domain.h"
13 #include "i915_gem_gtt.h"
14 #include "i915_gem_ioctls.h"
15 #include "i915_gem_lmem.h"
16 #include "i915_gem_mman.h"
17 #include "i915_gem_object.h"
18 #include "i915_gem_object_frontbuffer.h"
19 #include "i915_vma.h"
20 
gpu_write_needs_clflush(struct drm_i915_gem_object * obj)21 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
22 {
23 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
24 
25 	if (IS_DGFX(i915))
26 		return false;
27 
28 	/*
29 	 * For objects created by userspace through GEM_CREATE with pat_index
30 	 * set by set_pat extension, i915_gem_object_has_cache_level() will
31 	 * always return true, because the coherency of such object is managed
32 	 * by userspace. Othereise the call here would fall back to checking
33 	 * whether the object is un-cached or write-through.
34 	 */
35 	return !(i915_gem_object_has_cache_level(obj, I915_CACHE_NONE) ||
36 		 i915_gem_object_has_cache_level(obj, I915_CACHE_WT));
37 }
38 
i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object * obj)39 bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
40 {
41 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
42 
43 	if (obj->cache_dirty)
44 		return false;
45 
46 	if (IS_DGFX(i915))
47 		return false;
48 
49 	if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
50 		return true;
51 
52 	/* Currently in use by HW (display engine)? Keep flushed. */
53 	return i915_gem_object_is_framebuffer(obj);
54 }
55 
56 static void
flush_write_domain(struct drm_i915_gem_object * obj,unsigned int flush_domains)57 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
58 {
59 	struct i915_vma *vma;
60 
61 	assert_object_held(obj);
62 
63 	if (!(obj->write_domain & flush_domains))
64 		return;
65 
66 	switch (obj->write_domain) {
67 	case I915_GEM_DOMAIN_GTT:
68 		spin_lock(&obj->vma.lock);
69 		for_each_ggtt_vma(vma, obj)
70 			i915_vma_flush_writes(vma);
71 		spin_unlock(&obj->vma.lock);
72 
73 		i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
74 		break;
75 
76 	case I915_GEM_DOMAIN_WC:
77 		wmb();
78 		break;
79 
80 	case I915_GEM_DOMAIN_CPU:
81 		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
82 		break;
83 
84 	case I915_GEM_DOMAIN_RENDER:
85 		if (gpu_write_needs_clflush(obj))
86 			obj->cache_dirty = true;
87 		break;
88 	}
89 
90 	obj->write_domain = 0;
91 }
92 
__i915_gem_object_flush_for_display(struct drm_i915_gem_object * obj)93 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
94 {
95 	/*
96 	 * We manually flush the CPU domain so that we can override and
97 	 * force the flush for the display, and perform it asyncrhonously.
98 	 */
99 	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
100 	if (obj->cache_dirty)
101 		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
102 	obj->write_domain = 0;
103 }
104 
i915_gem_object_flush_if_display(struct drm_i915_gem_object * obj)105 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
106 {
107 	if (!i915_gem_object_is_framebuffer(obj))
108 		return;
109 
110 	i915_gem_object_lock(obj, NULL);
111 	__i915_gem_object_flush_for_display(obj);
112 	i915_gem_object_unlock(obj);
113 }
114 
i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object * obj)115 void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
116 {
117 	if (i915_gem_object_is_framebuffer(obj))
118 		__i915_gem_object_flush_for_display(obj);
119 }
120 
121 /**
122  * i915_gem_object_set_to_wc_domain - Moves a single object to the WC read, and
123  *                                    possibly write domain.
124  * @obj: object to act on
125  * @write: ask for write access or read only
126  *
127  * This function returns when the move is complete, including waiting on
128  * flushes to occur.
129  */
130 int
i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object * obj,bool write)131 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
132 {
133 	int ret;
134 
135 	assert_object_held(obj);
136 
137 	ret = i915_gem_object_wait(obj,
138 				   I915_WAIT_INTERRUPTIBLE |
139 				   (write ? I915_WAIT_ALL : 0),
140 				   MAX_SCHEDULE_TIMEOUT);
141 	if (ret)
142 		return ret;
143 
144 	if (obj->write_domain == I915_GEM_DOMAIN_WC)
145 		return 0;
146 
147 	/* Flush and acquire obj->pages so that we are coherent through
148 	 * direct access in memory with previous cached writes through
149 	 * shmemfs and that our cache domain tracking remains valid.
150 	 * For example, if the obj->filp was moved to swap without us
151 	 * being notified and releasing the pages, we would mistakenly
152 	 * continue to assume that the obj remained out of the CPU cached
153 	 * domain.
154 	 */
155 	ret = i915_gem_object_pin_pages(obj);
156 	if (ret)
157 		return ret;
158 
159 	flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
160 
161 	/* Serialise direct access to this object with the barriers for
162 	 * coherent writes from the GPU, by effectively invalidating the
163 	 * WC domain upon first access.
164 	 */
165 	if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
166 		mb();
167 
168 	/* It should now be out of any other write domains, and we can update
169 	 * the domain values for our changes.
170 	 */
171 	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
172 	obj->read_domains |= I915_GEM_DOMAIN_WC;
173 	if (write) {
174 		obj->read_domains = I915_GEM_DOMAIN_WC;
175 		obj->write_domain = I915_GEM_DOMAIN_WC;
176 		obj->mm.dirty = true;
177 	}
178 
179 	i915_gem_object_unpin_pages(obj);
180 	return 0;
181 }
182 
183 /**
184  * i915_gem_object_set_to_gtt_domain - Moves a single object to the GTT read,
185  *                                     and possibly write domain.
186  * @obj: object to act on
187  * @write: ask for write access or read only
188  *
189  * This function returns when the move is complete, including waiting on
190  * flushes to occur.
191  */
192 int
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object * obj,bool write)193 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
194 {
195 	int ret;
196 
197 	assert_object_held(obj);
198 
199 	ret = i915_gem_object_wait(obj,
200 				   I915_WAIT_INTERRUPTIBLE |
201 				   (write ? I915_WAIT_ALL : 0),
202 				   MAX_SCHEDULE_TIMEOUT);
203 	if (ret)
204 		return ret;
205 
206 	if (obj->write_domain == I915_GEM_DOMAIN_GTT)
207 		return 0;
208 
209 	/* Flush and acquire obj->pages so that we are coherent through
210 	 * direct access in memory with previous cached writes through
211 	 * shmemfs and that our cache domain tracking remains valid.
212 	 * For example, if the obj->filp was moved to swap without us
213 	 * being notified and releasing the pages, we would mistakenly
214 	 * continue to assume that the obj remained out of the CPU cached
215 	 * domain.
216 	 */
217 	ret = i915_gem_object_pin_pages(obj);
218 	if (ret)
219 		return ret;
220 
221 	flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
222 
223 	/* Serialise direct access to this object with the barriers for
224 	 * coherent writes from the GPU, by effectively invalidating the
225 	 * GTT domain upon first access.
226 	 */
227 	if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
228 		mb();
229 
230 	/* It should now be out of any other write domains, and we can update
231 	 * the domain values for our changes.
232 	 */
233 	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
234 	obj->read_domains |= I915_GEM_DOMAIN_GTT;
235 	if (write) {
236 		struct i915_vma *vma;
237 
238 		obj->read_domains = I915_GEM_DOMAIN_GTT;
239 		obj->write_domain = I915_GEM_DOMAIN_GTT;
240 		obj->mm.dirty = true;
241 
242 		spin_lock(&obj->vma.lock);
243 		for_each_ggtt_vma(vma, obj)
244 			if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
245 				i915_vma_set_ggtt_write(vma);
246 		spin_unlock(&obj->vma.lock);
247 	}
248 
249 	i915_gem_object_unpin_pages(obj);
250 	return 0;
251 }
252 
253 /**
254  * i915_gem_object_set_cache_level - Changes the cache-level of an object across all VMA.
255  * @obj: object to act on
256  * @cache_level: new cache level to set for the object
257  *
258  * After this function returns, the object will be in the new cache-level
259  * across all GTT and the contents of the backing storage will be coherent,
260  * with respect to the new cache-level. In order to keep the backing storage
261  * coherent for all users, we only allow a single cache level to be set
262  * globally on the object and prevent it from being changed whilst the
263  * hardware is reading from the object. That is if the object is currently
264  * on the scanout it will be set to uncached (or equivalent display
265  * cache coherency) and all non-MOCS GPU access will also be uncached so
266  * that all direct access to the scanout remains coherent.
267  */
i915_gem_object_set_cache_level(struct drm_i915_gem_object * obj,enum i915_cache_level cache_level)268 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
269 				    enum i915_cache_level cache_level)
270 {
271 	int ret;
272 
273 	/*
274 	 * For objects created by userspace through GEM_CREATE with pat_index
275 	 * set by set_pat extension, simply return 0 here without touching
276 	 * the cache setting, because such objects should have an immutable
277 	 * cache setting by design and always managed by userspace.
278 	 */
279 	if (i915_gem_object_has_cache_level(obj, cache_level))
280 		return 0;
281 
282 	ret = i915_gem_object_wait(obj,
283 				   I915_WAIT_INTERRUPTIBLE |
284 				   I915_WAIT_ALL,
285 				   MAX_SCHEDULE_TIMEOUT);
286 	if (ret)
287 		return ret;
288 
289 	/* Always invalidate stale cachelines */
290 	i915_gem_object_set_cache_coherency(obj, cache_level);
291 	obj->cache_dirty = true;
292 
293 	/* The cache-level will be applied when each vma is rebound. */
294 	return i915_gem_object_unbind(obj,
295 				      I915_GEM_OBJECT_UNBIND_ACTIVE |
296 				      I915_GEM_OBJECT_UNBIND_BARRIER);
297 }
298 
i915_gem_get_caching_ioctl(struct drm_device * dev,void * data,struct drm_file * file)299 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
300 			       struct drm_file *file)
301 {
302 	struct drm_i915_gem_caching *args = data;
303 	struct drm_i915_gem_object *obj;
304 	int err = 0;
305 
306 	if (IS_DGFX(to_i915(dev)))
307 		return -ENODEV;
308 
309 	rcu_read_lock();
310 	obj = i915_gem_object_lookup_rcu(file, args->handle);
311 	if (!obj) {
312 		err = -ENOENT;
313 		goto out;
314 	}
315 
316 	/*
317 	 * This ioctl should be disabled for the objects with pat_index
318 	 * set by user space.
319 	 */
320 	if (obj->pat_set_by_user) {
321 		err = -EOPNOTSUPP;
322 		goto out;
323 	}
324 
325 	if (i915_gem_object_has_cache_level(obj, I915_CACHE_LLC) ||
326 	    i915_gem_object_has_cache_level(obj, I915_CACHE_L3_LLC))
327 		args->caching = I915_CACHING_CACHED;
328 	else if (i915_gem_object_has_cache_level(obj, I915_CACHE_WT))
329 		args->caching = I915_CACHING_DISPLAY;
330 	else
331 		args->caching = I915_CACHING_NONE;
332 out:
333 	rcu_read_unlock();
334 	return err;
335 }
336 
i915_gem_set_caching_ioctl(struct drm_device * dev,void * data,struct drm_file * file)337 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
338 			       struct drm_file *file)
339 {
340 	struct drm_i915_private *i915 = to_i915(dev);
341 	struct drm_i915_gem_caching *args = data;
342 	struct drm_i915_gem_object *obj;
343 	enum i915_cache_level level;
344 	int ret = 0;
345 
346 	if (IS_DGFX(i915))
347 		return -ENODEV;
348 
349 	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
350 		return -EOPNOTSUPP;
351 
352 	switch (args->caching) {
353 	case I915_CACHING_NONE:
354 		level = I915_CACHE_NONE;
355 		break;
356 	case I915_CACHING_CACHED:
357 		/*
358 		 * Due to a HW issue on BXT A stepping, GPU stores via a
359 		 * snooped mapping may leave stale data in a corresponding CPU
360 		 * cacheline, whereas normally such cachelines would get
361 		 * invalidated.
362 		 */
363 		if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
364 			return -ENODEV;
365 
366 		level = I915_CACHE_LLC;
367 		break;
368 	case I915_CACHING_DISPLAY:
369 		level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
370 		break;
371 	default:
372 		return -EINVAL;
373 	}
374 
375 	obj = i915_gem_object_lookup(file, args->handle);
376 	if (!obj)
377 		return -ENOENT;
378 
379 	/*
380 	 * This ioctl should be disabled for the objects with pat_index
381 	 * set by user space.
382 	 */
383 	if (obj->pat_set_by_user) {
384 		ret = -EOPNOTSUPP;
385 		goto out;
386 	}
387 
388 	/*
389 	 * The caching mode of proxy object is handled by its generator, and
390 	 * not allowed to be changed by userspace.
391 	 */
392 	if (i915_gem_object_is_proxy(obj)) {
393 		/*
394 		 * Silently allow cached for userptr; the vulkan driver
395 		 * sets all objects to cached
396 		 */
397 		if (!i915_gem_object_is_userptr(obj) ||
398 		    args->caching != I915_CACHING_CACHED)
399 			ret = -ENXIO;
400 
401 		goto out;
402 	}
403 
404 	ret = i915_gem_object_lock_interruptible(obj, NULL);
405 	if (ret)
406 		goto out;
407 
408 	ret = i915_gem_object_set_cache_level(obj, level);
409 	i915_gem_object_unlock(obj);
410 
411 out:
412 	i915_gem_object_put(obj);
413 	return ret;
414 }
415 
416 /*
417  * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
418  * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
419  * (for pageflips). We only flush the caches while preparing the buffer for
420  * display, the callers are responsible for frontbuffer flush.
421  */
422 struct i915_vma *
i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object * obj,struct i915_gem_ww_ctx * ww,u32 alignment,unsigned int guard,const struct i915_gtt_view * view,unsigned int flags)423 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
424 				     struct i915_gem_ww_ctx *ww,
425 				     u32 alignment, unsigned int guard,
426 				     const struct i915_gtt_view *view,
427 				     unsigned int flags)
428 {
429 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
430 	struct i915_vma *vma;
431 	int ret;
432 
433 	/* Frame buffer must be in LMEM */
434 	if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
435 		return ERR_PTR(-EINVAL);
436 
437 	/*
438 	 * The display engine is not coherent with the LLC cache on gen6.  As
439 	 * a result, we make sure that the pinning that is about to occur is
440 	 * done with uncached PTEs. This is lowest common denominator for all
441 	 * chipsets.
442 	 *
443 	 * However for gen6+, we could do better by using the GFDT bit instead
444 	 * of uncaching, which would allow us to flush all the LLC-cached data
445 	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
446 	 */
447 	ret = i915_gem_object_set_cache_level(obj,
448 					      HAS_WT(i915) ?
449 					      I915_CACHE_WT : I915_CACHE_NONE);
450 	if (ret)
451 		return ERR_PTR(ret);
452 
453 	/* VT-d may overfetch before/after the vma, so pad with scratch */
454 	if (guard)
455 		flags |= PIN_OFFSET_GUARD | (guard * I915_GTT_PAGE_SIZE);
456 
457 	/*
458 	 * As the user may map the buffer once pinned in the display plane
459 	 * (e.g. libkms for the bootup splash), we have to ensure that we
460 	 * always use map_and_fenceable for all scanout buffers. However,
461 	 * it may simply be too big to fit into mappable, in which case
462 	 * put it anyway and hope that userspace can cope (but always first
463 	 * try to preserve the existing ABI).
464 	 */
465 	vma = ERR_PTR(-ENOSPC);
466 	if ((flags & PIN_MAPPABLE) == 0 &&
467 	    (!view || view->type == I915_GTT_VIEW_NORMAL))
468 		vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment,
469 						  flags | PIN_MAPPABLE |
470 						  PIN_NONBLOCK);
471 	if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
472 		vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0,
473 						  alignment, flags);
474 	if (IS_ERR(vma))
475 		return vma;
476 
477 	vma->display_alignment = max(vma->display_alignment, alignment);
478 	i915_vma_mark_scanout(vma);
479 
480 	i915_gem_object_flush_if_display_locked(obj);
481 
482 	return vma;
483 }
484 
485 /**
486  * i915_gem_object_set_to_cpu_domain - Moves a single object to the CPU read,
487  *                                     and possibly write domain.
488  * @obj: object to act on
489  * @write: requesting write or read-only access
490  *
491  * This function returns when the move is complete, including waiting on
492  * flushes to occur.
493  */
494 int
i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object * obj,bool write)495 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
496 {
497 	int ret;
498 
499 	assert_object_held(obj);
500 
501 	ret = i915_gem_object_wait(obj,
502 				   I915_WAIT_INTERRUPTIBLE |
503 				   (write ? I915_WAIT_ALL : 0),
504 				   MAX_SCHEDULE_TIMEOUT);
505 	if (ret)
506 		return ret;
507 
508 	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
509 
510 	/* Flush the CPU cache if it's still invalid. */
511 	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
512 		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
513 		obj->read_domains |= I915_GEM_DOMAIN_CPU;
514 	}
515 
516 	/* It should now be out of any other write domains, and we can update
517 	 * the domain values for our changes.
518 	 */
519 	GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
520 
521 	/* If we're writing through the CPU, then the GPU read domains will
522 	 * need to be invalidated at next use.
523 	 */
524 	if (write)
525 		__start_cpu_write(obj);
526 
527 	return 0;
528 }
529 
530 /**
531  * i915_gem_set_domain_ioctl - Called when user space prepares to use an
532  *                             object with the CPU, either
533  * through the mmap ioctl's mapping or a GTT mapping.
534  * @dev: drm device
535  * @data: ioctl data blob
536  * @file: drm file
537  */
538 int
i915_gem_set_domain_ioctl(struct drm_device * dev,void * data,struct drm_file * file)539 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
540 			  struct drm_file *file)
541 {
542 	struct drm_i915_gem_set_domain *args = data;
543 	struct drm_i915_gem_object *obj;
544 	u32 read_domains = args->read_domains;
545 	u32 write_domain = args->write_domain;
546 	int err;
547 
548 	if (IS_DGFX(to_i915(dev)))
549 		return -ENODEV;
550 
551 	/* Only handle setting domains to types used by the CPU. */
552 	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
553 		return -EINVAL;
554 
555 	/*
556 	 * Having something in the write domain implies it's in the read
557 	 * domain, and only that read domain.  Enforce that in the request.
558 	 */
559 	if (write_domain && read_domains != write_domain)
560 		return -EINVAL;
561 
562 	if (!read_domains)
563 		return 0;
564 
565 	obj = i915_gem_object_lookup(file, args->handle);
566 	if (!obj)
567 		return -ENOENT;
568 
569 	/*
570 	 * Try to flush the object off the GPU without holding the lock.
571 	 * We will repeat the flush holding the lock in the normal manner
572 	 * to catch cases where we are gazumped.
573 	 */
574 	err = i915_gem_object_wait(obj,
575 				   I915_WAIT_INTERRUPTIBLE |
576 				   I915_WAIT_PRIORITY |
577 				   (write_domain ? I915_WAIT_ALL : 0),
578 				   MAX_SCHEDULE_TIMEOUT);
579 	if (err)
580 		goto out;
581 
582 	if (i915_gem_object_is_userptr(obj)) {
583 		/*
584 		 * Try to grab userptr pages, iris uses set_domain to check
585 		 * userptr validity
586 		 */
587 		err = i915_gem_object_userptr_validate(obj);
588 		if (!err)
589 			err = i915_gem_object_wait(obj,
590 						   I915_WAIT_INTERRUPTIBLE |
591 						   I915_WAIT_PRIORITY |
592 						   (write_domain ? I915_WAIT_ALL : 0),
593 						   MAX_SCHEDULE_TIMEOUT);
594 		goto out;
595 	}
596 
597 	/*
598 	 * Proxy objects do not control access to the backing storage, ergo
599 	 * they cannot be used as a means to manipulate the cache domain
600 	 * tracking for that backing storage. The proxy object is always
601 	 * considered to be outside of any cache domain.
602 	 */
603 	if (i915_gem_object_is_proxy(obj)) {
604 		err = -ENXIO;
605 		goto out;
606 	}
607 
608 	err = i915_gem_object_lock_interruptible(obj, NULL);
609 	if (err)
610 		goto out;
611 
612 	/*
613 	 * Flush and acquire obj->pages so that we are coherent through
614 	 * direct access in memory with previous cached writes through
615 	 * shmemfs and that our cache domain tracking remains valid.
616 	 * For example, if the obj->filp was moved to swap without us
617 	 * being notified and releasing the pages, we would mistakenly
618 	 * continue to assume that the obj remained out of the CPU cached
619 	 * domain.
620 	 */
621 	err = i915_gem_object_pin_pages(obj);
622 	if (err)
623 		goto out_unlock;
624 
625 	/*
626 	 * Already in the desired write domain? Nothing for us to do!
627 	 *
628 	 * We apply a little bit of cunning here to catch a broader set of
629 	 * no-ops. If obj->write_domain is set, we must be in the same
630 	 * obj->read_domains, and only that domain. Therefore, if that
631 	 * obj->write_domain matches the request read_domains, we are
632 	 * already in the same read/write domain and can skip the operation,
633 	 * without having to further check the requested write_domain.
634 	 */
635 	if (READ_ONCE(obj->write_domain) == read_domains)
636 		goto out_unpin;
637 
638 	if (read_domains & I915_GEM_DOMAIN_WC)
639 		err = i915_gem_object_set_to_wc_domain(obj, write_domain);
640 	else if (read_domains & I915_GEM_DOMAIN_GTT)
641 		err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
642 	else
643 		err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
644 
645 out_unpin:
646 	i915_gem_object_unpin_pages(obj);
647 
648 out_unlock:
649 	i915_gem_object_unlock(obj);
650 
651 	if (!err && write_domain)
652 		i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
653 
654 out:
655 	i915_gem_object_put(obj);
656 	return err;
657 }
658 
659 /*
660  * Pins the specified object's pages and synchronizes the object with
661  * GPU accesses. Sets needs_clflush to non-zero if the caller should
662  * flush the object from the CPU cache.
663  */
i915_gem_object_prepare_read(struct drm_i915_gem_object * obj,unsigned int * needs_clflush)664 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
665 				 unsigned int *needs_clflush)
666 {
667 	int ret;
668 
669 	*needs_clflush = 0;
670 	if (!i915_gem_object_has_struct_page(obj))
671 		return -ENODEV;
672 
673 	assert_object_held(obj);
674 
675 	ret = i915_gem_object_wait(obj,
676 				   I915_WAIT_INTERRUPTIBLE,
677 				   MAX_SCHEDULE_TIMEOUT);
678 	if (ret)
679 		return ret;
680 
681 	ret = i915_gem_object_pin_pages(obj);
682 	if (ret)
683 		return ret;
684 
685 	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
686 	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
687 		ret = i915_gem_object_set_to_cpu_domain(obj, false);
688 		if (ret)
689 			goto err_unpin;
690 		else
691 			goto out;
692 	}
693 
694 	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
695 
696 	/* If we're not in the cpu read domain, set ourself into the gtt
697 	 * read domain and manually flush cachelines (if required). This
698 	 * optimizes for the case when the gpu will dirty the data
699 	 * anyway again before the next pread happens.
700 	 */
701 	if (!obj->cache_dirty &&
702 	    !(obj->read_domains & I915_GEM_DOMAIN_CPU))
703 		*needs_clflush = CLFLUSH_BEFORE;
704 
705 out:
706 	/* return with the pages pinned */
707 	return 0;
708 
709 err_unpin:
710 	i915_gem_object_unpin_pages(obj);
711 	return ret;
712 }
713 
i915_gem_object_prepare_write(struct drm_i915_gem_object * obj,unsigned int * needs_clflush)714 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
715 				  unsigned int *needs_clflush)
716 {
717 	int ret;
718 
719 	*needs_clflush = 0;
720 	if (!i915_gem_object_has_struct_page(obj))
721 		return -ENODEV;
722 
723 	assert_object_held(obj);
724 
725 	ret = i915_gem_object_wait(obj,
726 				   I915_WAIT_INTERRUPTIBLE |
727 				   I915_WAIT_ALL,
728 				   MAX_SCHEDULE_TIMEOUT);
729 	if (ret)
730 		return ret;
731 
732 	ret = i915_gem_object_pin_pages(obj);
733 	if (ret)
734 		return ret;
735 
736 	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
737 	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
738 		ret = i915_gem_object_set_to_cpu_domain(obj, true);
739 		if (ret)
740 			goto err_unpin;
741 		else
742 			goto out;
743 	}
744 
745 	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
746 
747 	/* If we're not in the cpu write domain, set ourself into the
748 	 * gtt write domain and manually flush cachelines (as required).
749 	 * This optimizes for the case when the gpu will use the data
750 	 * right away and we therefore have to clflush anyway.
751 	 */
752 	if (!obj->cache_dirty) {
753 		*needs_clflush |= CLFLUSH_AFTER;
754 
755 		/*
756 		 * Same trick applies to invalidate partially written
757 		 * cachelines read before writing.
758 		 */
759 		if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
760 			*needs_clflush |= CLFLUSH_BEFORE;
761 	}
762 
763 out:
764 	i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
765 	obj->mm.dirty = true;
766 	/* return with the pages pinned */
767 	return 0;
768 
769 err_unpin:
770 	i915_gem_object_unpin_pages(obj);
771 	return ret;
772 }
773