1 /*
2 * SPDX-License-Identifier: MIT
3 *
4 * Copyright © 2014-2016 Intel Corporation
5 */
6
7 #include "display/intel_display.h"
8 #include "gt/intel_gt.h"
9
10 #include "i915_drv.h"
11 #include "i915_gem_clflush.h"
12 #include "i915_gem_domain.h"
13 #include "i915_gem_gtt.h"
14 #include "i915_gem_ioctls.h"
15 #include "i915_gem_lmem.h"
16 #include "i915_gem_mman.h"
17 #include "i915_gem_object.h"
18 #include "i915_gem_object_frontbuffer.h"
19 #include "i915_vma.h"
20
gpu_write_needs_clflush(struct drm_i915_gem_object * obj)21 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
22 {
23 struct drm_i915_private *i915 = to_i915(obj->base.dev);
24
25 if (IS_DGFX(i915))
26 return false;
27
28 /*
29 * For objects created by userspace through GEM_CREATE with pat_index
30 * set by set_pat extension, i915_gem_object_has_cache_level() will
31 * always return true, because the coherency of such object is managed
32 * by userspace. Othereise the call here would fall back to checking
33 * whether the object is un-cached or write-through.
34 */
35 return !(i915_gem_object_has_cache_level(obj, I915_CACHE_NONE) ||
36 i915_gem_object_has_cache_level(obj, I915_CACHE_WT));
37 }
38
i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object * obj)39 bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
40 {
41 struct drm_i915_private *i915 = to_i915(obj->base.dev);
42
43 if (obj->cache_dirty)
44 return false;
45
46 if (IS_DGFX(i915))
47 return false;
48
49 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
50 return true;
51
52 /* Currently in use by HW (display engine)? Keep flushed. */
53 return i915_gem_object_is_framebuffer(obj);
54 }
55
56 static void
flush_write_domain(struct drm_i915_gem_object * obj,unsigned int flush_domains)57 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
58 {
59 struct i915_vma *vma;
60
61 assert_object_held(obj);
62
63 if (!(obj->write_domain & flush_domains))
64 return;
65
66 switch (obj->write_domain) {
67 case I915_GEM_DOMAIN_GTT:
68 spin_lock(&obj->vma.lock);
69 for_each_ggtt_vma(vma, obj)
70 i915_vma_flush_writes(vma);
71 spin_unlock(&obj->vma.lock);
72
73 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
74 break;
75
76 case I915_GEM_DOMAIN_WC:
77 wmb();
78 break;
79
80 case I915_GEM_DOMAIN_CPU:
81 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
82 break;
83
84 case I915_GEM_DOMAIN_RENDER:
85 if (gpu_write_needs_clflush(obj))
86 obj->cache_dirty = true;
87 break;
88 }
89
90 obj->write_domain = 0;
91 }
92
__i915_gem_object_flush_for_display(struct drm_i915_gem_object * obj)93 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
94 {
95 /*
96 * We manually flush the CPU domain so that we can override and
97 * force the flush for the display, and perform it asyncrhonously.
98 */
99 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
100 if (obj->cache_dirty)
101 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
102 obj->write_domain = 0;
103 }
104
i915_gem_object_flush_if_display(struct drm_i915_gem_object * obj)105 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
106 {
107 if (!i915_gem_object_is_framebuffer(obj))
108 return;
109
110 i915_gem_object_lock(obj, NULL);
111 __i915_gem_object_flush_for_display(obj);
112 i915_gem_object_unlock(obj);
113 }
114
i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object * obj)115 void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
116 {
117 if (i915_gem_object_is_framebuffer(obj))
118 __i915_gem_object_flush_for_display(obj);
119 }
120
121 /**
122 * i915_gem_object_set_to_wc_domain - Moves a single object to the WC read, and
123 * possibly write domain.
124 * @obj: object to act on
125 * @write: ask for write access or read only
126 *
127 * This function returns when the move is complete, including waiting on
128 * flushes to occur.
129 */
130 int
i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object * obj,bool write)131 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
132 {
133 int ret;
134
135 assert_object_held(obj);
136
137 ret = i915_gem_object_wait(obj,
138 I915_WAIT_INTERRUPTIBLE |
139 (write ? I915_WAIT_ALL : 0),
140 MAX_SCHEDULE_TIMEOUT);
141 if (ret)
142 return ret;
143
144 if (obj->write_domain == I915_GEM_DOMAIN_WC)
145 return 0;
146
147 /* Flush and acquire obj->pages so that we are coherent through
148 * direct access in memory with previous cached writes through
149 * shmemfs and that our cache domain tracking remains valid.
150 * For example, if the obj->filp was moved to swap without us
151 * being notified and releasing the pages, we would mistakenly
152 * continue to assume that the obj remained out of the CPU cached
153 * domain.
154 */
155 ret = i915_gem_object_pin_pages(obj);
156 if (ret)
157 return ret;
158
159 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
160
161 /* Serialise direct access to this object with the barriers for
162 * coherent writes from the GPU, by effectively invalidating the
163 * WC domain upon first access.
164 */
165 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
166 mb();
167
168 /* It should now be out of any other write domains, and we can update
169 * the domain values for our changes.
170 */
171 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
172 obj->read_domains |= I915_GEM_DOMAIN_WC;
173 if (write) {
174 obj->read_domains = I915_GEM_DOMAIN_WC;
175 obj->write_domain = I915_GEM_DOMAIN_WC;
176 obj->mm.dirty = true;
177 }
178
179 i915_gem_object_unpin_pages(obj);
180 return 0;
181 }
182
183 /**
184 * i915_gem_object_set_to_gtt_domain - Moves a single object to the GTT read,
185 * and possibly write domain.
186 * @obj: object to act on
187 * @write: ask for write access or read only
188 *
189 * This function returns when the move is complete, including waiting on
190 * flushes to occur.
191 */
192 int
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object * obj,bool write)193 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
194 {
195 int ret;
196
197 assert_object_held(obj);
198
199 ret = i915_gem_object_wait(obj,
200 I915_WAIT_INTERRUPTIBLE |
201 (write ? I915_WAIT_ALL : 0),
202 MAX_SCHEDULE_TIMEOUT);
203 if (ret)
204 return ret;
205
206 if (obj->write_domain == I915_GEM_DOMAIN_GTT)
207 return 0;
208
209 /* Flush and acquire obj->pages so that we are coherent through
210 * direct access in memory with previous cached writes through
211 * shmemfs and that our cache domain tracking remains valid.
212 * For example, if the obj->filp was moved to swap without us
213 * being notified and releasing the pages, we would mistakenly
214 * continue to assume that the obj remained out of the CPU cached
215 * domain.
216 */
217 ret = i915_gem_object_pin_pages(obj);
218 if (ret)
219 return ret;
220
221 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
222
223 /* Serialise direct access to this object with the barriers for
224 * coherent writes from the GPU, by effectively invalidating the
225 * GTT domain upon first access.
226 */
227 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
228 mb();
229
230 /* It should now be out of any other write domains, and we can update
231 * the domain values for our changes.
232 */
233 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
234 obj->read_domains |= I915_GEM_DOMAIN_GTT;
235 if (write) {
236 struct i915_vma *vma;
237
238 obj->read_domains = I915_GEM_DOMAIN_GTT;
239 obj->write_domain = I915_GEM_DOMAIN_GTT;
240 obj->mm.dirty = true;
241
242 spin_lock(&obj->vma.lock);
243 for_each_ggtt_vma(vma, obj)
244 if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
245 i915_vma_set_ggtt_write(vma);
246 spin_unlock(&obj->vma.lock);
247 }
248
249 i915_gem_object_unpin_pages(obj);
250 return 0;
251 }
252
253 /**
254 * i915_gem_object_set_cache_level - Changes the cache-level of an object across all VMA.
255 * @obj: object to act on
256 * @cache_level: new cache level to set for the object
257 *
258 * After this function returns, the object will be in the new cache-level
259 * across all GTT and the contents of the backing storage will be coherent,
260 * with respect to the new cache-level. In order to keep the backing storage
261 * coherent for all users, we only allow a single cache level to be set
262 * globally on the object and prevent it from being changed whilst the
263 * hardware is reading from the object. That is if the object is currently
264 * on the scanout it will be set to uncached (or equivalent display
265 * cache coherency) and all non-MOCS GPU access will also be uncached so
266 * that all direct access to the scanout remains coherent.
267 */
i915_gem_object_set_cache_level(struct drm_i915_gem_object * obj,enum i915_cache_level cache_level)268 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
269 enum i915_cache_level cache_level)
270 {
271 int ret;
272
273 /*
274 * For objects created by userspace through GEM_CREATE with pat_index
275 * set by set_pat extension, simply return 0 here without touching
276 * the cache setting, because such objects should have an immutable
277 * cache setting by design and always managed by userspace.
278 */
279 if (i915_gem_object_has_cache_level(obj, cache_level))
280 return 0;
281
282 ret = i915_gem_object_wait(obj,
283 I915_WAIT_INTERRUPTIBLE |
284 I915_WAIT_ALL,
285 MAX_SCHEDULE_TIMEOUT);
286 if (ret)
287 return ret;
288
289 /* Always invalidate stale cachelines */
290 i915_gem_object_set_cache_coherency(obj, cache_level);
291 obj->cache_dirty = true;
292
293 /* The cache-level will be applied when each vma is rebound. */
294 return i915_gem_object_unbind(obj,
295 I915_GEM_OBJECT_UNBIND_ACTIVE |
296 I915_GEM_OBJECT_UNBIND_BARRIER);
297 }
298
i915_gem_get_caching_ioctl(struct drm_device * dev,void * data,struct drm_file * file)299 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
300 struct drm_file *file)
301 {
302 struct drm_i915_gem_caching *args = data;
303 struct drm_i915_gem_object *obj;
304 int err = 0;
305
306 if (IS_DGFX(to_i915(dev)))
307 return -ENODEV;
308
309 rcu_read_lock();
310 obj = i915_gem_object_lookup_rcu(file, args->handle);
311 if (!obj) {
312 err = -ENOENT;
313 goto out;
314 }
315
316 /*
317 * This ioctl should be disabled for the objects with pat_index
318 * set by user space.
319 */
320 if (obj->pat_set_by_user) {
321 err = -EOPNOTSUPP;
322 goto out;
323 }
324
325 if (i915_gem_object_has_cache_level(obj, I915_CACHE_LLC) ||
326 i915_gem_object_has_cache_level(obj, I915_CACHE_L3_LLC))
327 args->caching = I915_CACHING_CACHED;
328 else if (i915_gem_object_has_cache_level(obj, I915_CACHE_WT))
329 args->caching = I915_CACHING_DISPLAY;
330 else
331 args->caching = I915_CACHING_NONE;
332 out:
333 rcu_read_unlock();
334 return err;
335 }
336
i915_gem_set_caching_ioctl(struct drm_device * dev,void * data,struct drm_file * file)337 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
338 struct drm_file *file)
339 {
340 struct drm_i915_private *i915 = to_i915(dev);
341 struct drm_i915_gem_caching *args = data;
342 struct drm_i915_gem_object *obj;
343 enum i915_cache_level level;
344 int ret = 0;
345
346 if (IS_DGFX(i915))
347 return -ENODEV;
348
349 if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
350 return -EOPNOTSUPP;
351
352 switch (args->caching) {
353 case I915_CACHING_NONE:
354 level = I915_CACHE_NONE;
355 break;
356 case I915_CACHING_CACHED:
357 /*
358 * Due to a HW issue on BXT A stepping, GPU stores via a
359 * snooped mapping may leave stale data in a corresponding CPU
360 * cacheline, whereas normally such cachelines would get
361 * invalidated.
362 */
363 if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
364 return -ENODEV;
365
366 level = I915_CACHE_LLC;
367 break;
368 case I915_CACHING_DISPLAY:
369 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
370 break;
371 default:
372 return -EINVAL;
373 }
374
375 obj = i915_gem_object_lookup(file, args->handle);
376 if (!obj)
377 return -ENOENT;
378
379 /*
380 * This ioctl should be disabled for the objects with pat_index
381 * set by user space.
382 */
383 if (obj->pat_set_by_user) {
384 ret = -EOPNOTSUPP;
385 goto out;
386 }
387
388 /*
389 * The caching mode of proxy object is handled by its generator, and
390 * not allowed to be changed by userspace.
391 */
392 if (i915_gem_object_is_proxy(obj)) {
393 /*
394 * Silently allow cached for userptr; the vulkan driver
395 * sets all objects to cached
396 */
397 if (!i915_gem_object_is_userptr(obj) ||
398 args->caching != I915_CACHING_CACHED)
399 ret = -ENXIO;
400
401 goto out;
402 }
403
404 ret = i915_gem_object_lock_interruptible(obj, NULL);
405 if (ret)
406 goto out;
407
408 ret = i915_gem_object_set_cache_level(obj, level);
409 i915_gem_object_unlock(obj);
410
411 out:
412 i915_gem_object_put(obj);
413 return ret;
414 }
415
416 /*
417 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
418 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
419 * (for pageflips). We only flush the caches while preparing the buffer for
420 * display, the callers are responsible for frontbuffer flush.
421 */
422 struct i915_vma *
i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object * obj,struct i915_gem_ww_ctx * ww,u32 alignment,unsigned int guard,const struct i915_gtt_view * view,unsigned int flags)423 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
424 struct i915_gem_ww_ctx *ww,
425 u32 alignment, unsigned int guard,
426 const struct i915_gtt_view *view,
427 unsigned int flags)
428 {
429 struct drm_i915_private *i915 = to_i915(obj->base.dev);
430 struct i915_vma *vma;
431 int ret;
432
433 /* Frame buffer must be in LMEM */
434 if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
435 return ERR_PTR(-EINVAL);
436
437 /*
438 * The display engine is not coherent with the LLC cache on gen6. As
439 * a result, we make sure that the pinning that is about to occur is
440 * done with uncached PTEs. This is lowest common denominator for all
441 * chipsets.
442 *
443 * However for gen6+, we could do better by using the GFDT bit instead
444 * of uncaching, which would allow us to flush all the LLC-cached data
445 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
446 */
447 ret = i915_gem_object_set_cache_level(obj,
448 HAS_WT(i915) ?
449 I915_CACHE_WT : I915_CACHE_NONE);
450 if (ret)
451 return ERR_PTR(ret);
452
453 /* VT-d may overfetch before/after the vma, so pad with scratch */
454 if (guard)
455 flags |= PIN_OFFSET_GUARD | (guard * I915_GTT_PAGE_SIZE);
456
457 /*
458 * As the user may map the buffer once pinned in the display plane
459 * (e.g. libkms for the bootup splash), we have to ensure that we
460 * always use map_and_fenceable for all scanout buffers. However,
461 * it may simply be too big to fit into mappable, in which case
462 * put it anyway and hope that userspace can cope (but always first
463 * try to preserve the existing ABI).
464 */
465 vma = ERR_PTR(-ENOSPC);
466 if ((flags & PIN_MAPPABLE) == 0 &&
467 (!view || view->type == I915_GTT_VIEW_NORMAL))
468 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment,
469 flags | PIN_MAPPABLE |
470 PIN_NONBLOCK);
471 if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
472 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0,
473 alignment, flags);
474 if (IS_ERR(vma))
475 return vma;
476
477 vma->display_alignment = max(vma->display_alignment, alignment);
478 i915_vma_mark_scanout(vma);
479
480 i915_gem_object_flush_if_display_locked(obj);
481
482 return vma;
483 }
484
485 /**
486 * i915_gem_object_set_to_cpu_domain - Moves a single object to the CPU read,
487 * and possibly write domain.
488 * @obj: object to act on
489 * @write: requesting write or read-only access
490 *
491 * This function returns when the move is complete, including waiting on
492 * flushes to occur.
493 */
494 int
i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object * obj,bool write)495 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
496 {
497 int ret;
498
499 assert_object_held(obj);
500
501 ret = i915_gem_object_wait(obj,
502 I915_WAIT_INTERRUPTIBLE |
503 (write ? I915_WAIT_ALL : 0),
504 MAX_SCHEDULE_TIMEOUT);
505 if (ret)
506 return ret;
507
508 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
509
510 /* Flush the CPU cache if it's still invalid. */
511 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
512 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
513 obj->read_domains |= I915_GEM_DOMAIN_CPU;
514 }
515
516 /* It should now be out of any other write domains, and we can update
517 * the domain values for our changes.
518 */
519 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
520
521 /* If we're writing through the CPU, then the GPU read domains will
522 * need to be invalidated at next use.
523 */
524 if (write)
525 __start_cpu_write(obj);
526
527 return 0;
528 }
529
530 /**
531 * i915_gem_set_domain_ioctl - Called when user space prepares to use an
532 * object with the CPU, either
533 * through the mmap ioctl's mapping or a GTT mapping.
534 * @dev: drm device
535 * @data: ioctl data blob
536 * @file: drm file
537 */
538 int
i915_gem_set_domain_ioctl(struct drm_device * dev,void * data,struct drm_file * file)539 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
540 struct drm_file *file)
541 {
542 struct drm_i915_gem_set_domain *args = data;
543 struct drm_i915_gem_object *obj;
544 u32 read_domains = args->read_domains;
545 u32 write_domain = args->write_domain;
546 int err;
547
548 if (IS_DGFX(to_i915(dev)))
549 return -ENODEV;
550
551 /* Only handle setting domains to types used by the CPU. */
552 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
553 return -EINVAL;
554
555 /*
556 * Having something in the write domain implies it's in the read
557 * domain, and only that read domain. Enforce that in the request.
558 */
559 if (write_domain && read_domains != write_domain)
560 return -EINVAL;
561
562 if (!read_domains)
563 return 0;
564
565 obj = i915_gem_object_lookup(file, args->handle);
566 if (!obj)
567 return -ENOENT;
568
569 /*
570 * Try to flush the object off the GPU without holding the lock.
571 * We will repeat the flush holding the lock in the normal manner
572 * to catch cases where we are gazumped.
573 */
574 err = i915_gem_object_wait(obj,
575 I915_WAIT_INTERRUPTIBLE |
576 I915_WAIT_PRIORITY |
577 (write_domain ? I915_WAIT_ALL : 0),
578 MAX_SCHEDULE_TIMEOUT);
579 if (err)
580 goto out;
581
582 if (i915_gem_object_is_userptr(obj)) {
583 /*
584 * Try to grab userptr pages, iris uses set_domain to check
585 * userptr validity
586 */
587 err = i915_gem_object_userptr_validate(obj);
588 if (!err)
589 err = i915_gem_object_wait(obj,
590 I915_WAIT_INTERRUPTIBLE |
591 I915_WAIT_PRIORITY |
592 (write_domain ? I915_WAIT_ALL : 0),
593 MAX_SCHEDULE_TIMEOUT);
594 goto out;
595 }
596
597 /*
598 * Proxy objects do not control access to the backing storage, ergo
599 * they cannot be used as a means to manipulate the cache domain
600 * tracking for that backing storage. The proxy object is always
601 * considered to be outside of any cache domain.
602 */
603 if (i915_gem_object_is_proxy(obj)) {
604 err = -ENXIO;
605 goto out;
606 }
607
608 err = i915_gem_object_lock_interruptible(obj, NULL);
609 if (err)
610 goto out;
611
612 /*
613 * Flush and acquire obj->pages so that we are coherent through
614 * direct access in memory with previous cached writes through
615 * shmemfs and that our cache domain tracking remains valid.
616 * For example, if the obj->filp was moved to swap without us
617 * being notified and releasing the pages, we would mistakenly
618 * continue to assume that the obj remained out of the CPU cached
619 * domain.
620 */
621 err = i915_gem_object_pin_pages(obj);
622 if (err)
623 goto out_unlock;
624
625 /*
626 * Already in the desired write domain? Nothing for us to do!
627 *
628 * We apply a little bit of cunning here to catch a broader set of
629 * no-ops. If obj->write_domain is set, we must be in the same
630 * obj->read_domains, and only that domain. Therefore, if that
631 * obj->write_domain matches the request read_domains, we are
632 * already in the same read/write domain and can skip the operation,
633 * without having to further check the requested write_domain.
634 */
635 if (READ_ONCE(obj->write_domain) == read_domains)
636 goto out_unpin;
637
638 if (read_domains & I915_GEM_DOMAIN_WC)
639 err = i915_gem_object_set_to_wc_domain(obj, write_domain);
640 else if (read_domains & I915_GEM_DOMAIN_GTT)
641 err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
642 else
643 err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
644
645 out_unpin:
646 i915_gem_object_unpin_pages(obj);
647
648 out_unlock:
649 i915_gem_object_unlock(obj);
650
651 if (!err && write_domain)
652 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
653
654 out:
655 i915_gem_object_put(obj);
656 return err;
657 }
658
659 /*
660 * Pins the specified object's pages and synchronizes the object with
661 * GPU accesses. Sets needs_clflush to non-zero if the caller should
662 * flush the object from the CPU cache.
663 */
i915_gem_object_prepare_read(struct drm_i915_gem_object * obj,unsigned int * needs_clflush)664 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
665 unsigned int *needs_clflush)
666 {
667 int ret;
668
669 *needs_clflush = 0;
670 if (!i915_gem_object_has_struct_page(obj))
671 return -ENODEV;
672
673 assert_object_held(obj);
674
675 ret = i915_gem_object_wait(obj,
676 I915_WAIT_INTERRUPTIBLE,
677 MAX_SCHEDULE_TIMEOUT);
678 if (ret)
679 return ret;
680
681 ret = i915_gem_object_pin_pages(obj);
682 if (ret)
683 return ret;
684
685 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
686 !static_cpu_has(X86_FEATURE_CLFLUSH)) {
687 ret = i915_gem_object_set_to_cpu_domain(obj, false);
688 if (ret)
689 goto err_unpin;
690 else
691 goto out;
692 }
693
694 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
695
696 /* If we're not in the cpu read domain, set ourself into the gtt
697 * read domain and manually flush cachelines (if required). This
698 * optimizes for the case when the gpu will dirty the data
699 * anyway again before the next pread happens.
700 */
701 if (!obj->cache_dirty &&
702 !(obj->read_domains & I915_GEM_DOMAIN_CPU))
703 *needs_clflush = CLFLUSH_BEFORE;
704
705 out:
706 /* return with the pages pinned */
707 return 0;
708
709 err_unpin:
710 i915_gem_object_unpin_pages(obj);
711 return ret;
712 }
713
i915_gem_object_prepare_write(struct drm_i915_gem_object * obj,unsigned int * needs_clflush)714 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
715 unsigned int *needs_clflush)
716 {
717 int ret;
718
719 *needs_clflush = 0;
720 if (!i915_gem_object_has_struct_page(obj))
721 return -ENODEV;
722
723 assert_object_held(obj);
724
725 ret = i915_gem_object_wait(obj,
726 I915_WAIT_INTERRUPTIBLE |
727 I915_WAIT_ALL,
728 MAX_SCHEDULE_TIMEOUT);
729 if (ret)
730 return ret;
731
732 ret = i915_gem_object_pin_pages(obj);
733 if (ret)
734 return ret;
735
736 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
737 !static_cpu_has(X86_FEATURE_CLFLUSH)) {
738 ret = i915_gem_object_set_to_cpu_domain(obj, true);
739 if (ret)
740 goto err_unpin;
741 else
742 goto out;
743 }
744
745 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
746
747 /* If we're not in the cpu write domain, set ourself into the
748 * gtt write domain and manually flush cachelines (as required).
749 * This optimizes for the case when the gpu will use the data
750 * right away and we therefore have to clflush anyway.
751 */
752 if (!obj->cache_dirty) {
753 *needs_clflush |= CLFLUSH_AFTER;
754
755 /*
756 * Same trick applies to invalidate partially written
757 * cachelines read before writing.
758 */
759 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
760 *needs_clflush |= CLFLUSH_BEFORE;
761 }
762
763 out:
764 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
765 obj->mm.dirty = true;
766 /* return with the pages pinned */
767 return 0;
768
769 err_unpin:
770 i915_gem_object_unpin_pages(obj);
771 return ret;
772 }
773