1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2014-2016 Intel Corporation
4 */
5
6 #include "gt/intel_gt.h"
7
8 #include "i915_drv.h"
9 #include "i915_gem_clflush.h"
10 #include "i915_gem_domain.h"
11 #include "i915_gem_gtt.h"
12 #include "i915_gem_ioctls.h"
13 #include "i915_gem_lmem.h"
14 #include "i915_gem_mman.h"
15 #include "i915_gem_object.h"
16 #include "i915_gem_object_frontbuffer.h"
17 #include "i915_vma.h"
18
gpu_write_needs_clflush(struct drm_i915_gem_object * obj)19 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
20 {
21 struct drm_i915_private *i915 = to_i915(obj->base.dev);
22
23 if (IS_DGFX(i915))
24 return false;
25
26 /*
27 * For objects created by userspace through GEM_CREATE with pat_index
28 * set by set_pat extension, i915_gem_object_has_cache_level() will
29 * always return true, because the coherency of such object is managed
30 * by userspace. Othereise the call here would fall back to checking
31 * whether the object is un-cached or write-through.
32 */
33 return !(i915_gem_object_has_cache_level(obj, I915_CACHE_NONE) ||
34 i915_gem_object_has_cache_level(obj, I915_CACHE_WT));
35 }
36
i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object * obj)37 bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
38 {
39 struct drm_i915_private *i915 = to_i915(obj->base.dev);
40
41 if (obj->cache_dirty)
42 return false;
43
44 if (IS_DGFX(i915))
45 return false;
46
47 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
48 return true;
49
50 /* Currently in use by HW (display engine)? Keep flushed. */
51 return i915_gem_object_is_framebuffer(obj);
52 }
53
54 static void
flush_write_domain(struct drm_i915_gem_object * obj,unsigned int flush_domains)55 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
56 {
57 struct i915_vma *vma;
58
59 assert_object_held(obj);
60
61 if (!(obj->write_domain & flush_domains))
62 return;
63
64 switch (obj->write_domain) {
65 case I915_GEM_DOMAIN_GTT:
66 spin_lock(&obj->vma.lock);
67 for_each_ggtt_vma(vma, obj)
68 i915_vma_flush_writes(vma);
69 spin_unlock(&obj->vma.lock);
70
71 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
72 break;
73
74 case I915_GEM_DOMAIN_WC:
75 wmb();
76 break;
77
78 case I915_GEM_DOMAIN_CPU:
79 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
80 break;
81
82 case I915_GEM_DOMAIN_RENDER:
83 if (gpu_write_needs_clflush(obj))
84 obj->cache_dirty = true;
85 break;
86 }
87
88 obj->write_domain = 0;
89 }
90
__i915_gem_object_flush_for_display(struct drm_i915_gem_object * obj)91 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
92 {
93 /*
94 * We manually flush the CPU domain so that we can override and
95 * force the flush for the display, and perform it asyncrhonously.
96 */
97 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
98 if (obj->cache_dirty)
99 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
100 obj->write_domain = 0;
101 }
102
i915_gem_object_flush_if_display(struct drm_i915_gem_object * obj)103 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
104 {
105 if (!i915_gem_object_is_framebuffer(obj))
106 return;
107
108 i915_gem_object_lock(obj, NULL);
109 __i915_gem_object_flush_for_display(obj);
110 i915_gem_object_unlock(obj);
111 }
112
i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object * obj)113 void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
114 {
115 if (i915_gem_object_is_framebuffer(obj))
116 __i915_gem_object_flush_for_display(obj);
117 }
118
119 /**
120 * i915_gem_object_set_to_wc_domain - Moves a single object to the WC read, and
121 * possibly write domain.
122 * @obj: object to act on
123 * @write: ask for write access or read only
124 *
125 * This function returns when the move is complete, including waiting on
126 * flushes to occur.
127 */
128 int
i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object * obj,bool write)129 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
130 {
131 int ret;
132
133 assert_object_held(obj);
134
135 ret = i915_gem_object_wait(obj,
136 I915_WAIT_INTERRUPTIBLE |
137 (write ? I915_WAIT_ALL : 0),
138 MAX_SCHEDULE_TIMEOUT);
139 if (ret)
140 return ret;
141
142 if (obj->write_domain == I915_GEM_DOMAIN_WC)
143 return 0;
144
145 /* Flush and acquire obj->pages so that we are coherent through
146 * direct access in memory with previous cached writes through
147 * shmemfs and that our cache domain tracking remains valid.
148 * For example, if the obj->filp was moved to swap without us
149 * being notified and releasing the pages, we would mistakenly
150 * continue to assume that the obj remained out of the CPU cached
151 * domain.
152 */
153 ret = i915_gem_object_pin_pages(obj);
154 if (ret)
155 return ret;
156
157 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
158
159 /* Serialise direct access to this object with the barriers for
160 * coherent writes from the GPU, by effectively invalidating the
161 * WC domain upon first access.
162 */
163 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
164 mb();
165
166 /* It should now be out of any other write domains, and we can update
167 * the domain values for our changes.
168 */
169 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
170 obj->read_domains |= I915_GEM_DOMAIN_WC;
171 if (write) {
172 obj->read_domains = I915_GEM_DOMAIN_WC;
173 obj->write_domain = I915_GEM_DOMAIN_WC;
174 obj->mm.dirty = true;
175 }
176
177 i915_gem_object_unpin_pages(obj);
178 return 0;
179 }
180
181 /**
182 * i915_gem_object_set_to_gtt_domain - Moves a single object to the GTT read,
183 * and possibly write domain.
184 * @obj: object to act on
185 * @write: ask for write access or read only
186 *
187 * This function returns when the move is complete, including waiting on
188 * flushes to occur.
189 */
190 int
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object * obj,bool write)191 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
192 {
193 int ret;
194
195 assert_object_held(obj);
196
197 ret = i915_gem_object_wait(obj,
198 I915_WAIT_INTERRUPTIBLE |
199 (write ? I915_WAIT_ALL : 0),
200 MAX_SCHEDULE_TIMEOUT);
201 if (ret)
202 return ret;
203
204 if (obj->write_domain == I915_GEM_DOMAIN_GTT)
205 return 0;
206
207 /* Flush and acquire obj->pages so that we are coherent through
208 * direct access in memory with previous cached writes through
209 * shmemfs and that our cache domain tracking remains valid.
210 * For example, if the obj->filp was moved to swap without us
211 * being notified and releasing the pages, we would mistakenly
212 * continue to assume that the obj remained out of the CPU cached
213 * domain.
214 */
215 ret = i915_gem_object_pin_pages(obj);
216 if (ret)
217 return ret;
218
219 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
220
221 /* Serialise direct access to this object with the barriers for
222 * coherent writes from the GPU, by effectively invalidating the
223 * GTT domain upon first access.
224 */
225 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
226 mb();
227
228 /* It should now be out of any other write domains, and we can update
229 * the domain values for our changes.
230 */
231 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
232 obj->read_domains |= I915_GEM_DOMAIN_GTT;
233 if (write) {
234 struct i915_vma *vma;
235
236 obj->read_domains = I915_GEM_DOMAIN_GTT;
237 obj->write_domain = I915_GEM_DOMAIN_GTT;
238 obj->mm.dirty = true;
239
240 spin_lock(&obj->vma.lock);
241 for_each_ggtt_vma(vma, obj)
242 if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
243 i915_vma_set_ggtt_write(vma);
244 spin_unlock(&obj->vma.lock);
245 }
246
247 i915_gem_object_unpin_pages(obj);
248 return 0;
249 }
250
251 /**
252 * i915_gem_object_set_cache_level - Changes the cache-level of an object across all VMA.
253 * @obj: object to act on
254 * @cache_level: new cache level to set for the object
255 *
256 * After this function returns, the object will be in the new cache-level
257 * across all GTT and the contents of the backing storage will be coherent,
258 * with respect to the new cache-level. In order to keep the backing storage
259 * coherent for all users, we only allow a single cache level to be set
260 * globally on the object and prevent it from being changed whilst the
261 * hardware is reading from the object. That is if the object is currently
262 * on the scanout it will be set to uncached (or equivalent display
263 * cache coherency) and all non-MOCS GPU access will also be uncached so
264 * that all direct access to the scanout remains coherent.
265 */
i915_gem_object_set_cache_level(struct drm_i915_gem_object * obj,enum i915_cache_level cache_level)266 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
267 enum i915_cache_level cache_level)
268 {
269 int ret;
270
271 /*
272 * For objects created by userspace through GEM_CREATE with pat_index
273 * set by set_pat extension, simply return 0 here without touching
274 * the cache setting, because such objects should have an immutable
275 * cache setting by design and always managed by userspace.
276 */
277 if (i915_gem_object_has_cache_level(obj, cache_level))
278 return 0;
279
280 ret = i915_gem_object_wait(obj,
281 I915_WAIT_INTERRUPTIBLE |
282 I915_WAIT_ALL,
283 MAX_SCHEDULE_TIMEOUT);
284 if (ret)
285 return ret;
286
287 /* Always invalidate stale cachelines */
288 i915_gem_object_set_cache_coherency(obj, cache_level);
289 obj->cache_dirty = true;
290
291 /* The cache-level will be applied when each vma is rebound. */
292 return i915_gem_object_unbind(obj,
293 I915_GEM_OBJECT_UNBIND_ACTIVE |
294 I915_GEM_OBJECT_UNBIND_BARRIER);
295 }
296
i915_gem_get_caching_ioctl(struct drm_device * dev,void * data,struct drm_file * file)297 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
298 struct drm_file *file)
299 {
300 struct drm_i915_gem_caching *args = data;
301 struct drm_i915_gem_object *obj;
302 int err = 0;
303
304 if (IS_DGFX(to_i915(dev)))
305 return -ENODEV;
306
307 rcu_read_lock();
308 obj = i915_gem_object_lookup_rcu(file, args->handle);
309 if (!obj) {
310 err = -ENOENT;
311 goto out;
312 }
313
314 /*
315 * This ioctl should be disabled for the objects with pat_index
316 * set by user space.
317 */
318 if (obj->pat_set_by_user) {
319 err = -EOPNOTSUPP;
320 goto out;
321 }
322
323 if (i915_gem_object_has_cache_level(obj, I915_CACHE_LLC) ||
324 i915_gem_object_has_cache_level(obj, I915_CACHE_L3_LLC))
325 args->caching = I915_CACHING_CACHED;
326 else if (i915_gem_object_has_cache_level(obj, I915_CACHE_WT))
327 args->caching = I915_CACHING_DISPLAY;
328 else
329 args->caching = I915_CACHING_NONE;
330 out:
331 rcu_read_unlock();
332 return err;
333 }
334
i915_gem_set_caching_ioctl(struct drm_device * dev,void * data,struct drm_file * file)335 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
336 struct drm_file *file)
337 {
338 struct drm_i915_private *i915 = to_i915(dev);
339 struct drm_i915_gem_caching *args = data;
340 struct drm_i915_gem_object *obj;
341 enum i915_cache_level level;
342 int ret = 0;
343
344 if (IS_DGFX(i915))
345 return -ENODEV;
346
347 if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
348 return -EOPNOTSUPP;
349
350 switch (args->caching) {
351 case I915_CACHING_NONE:
352 level = I915_CACHE_NONE;
353 break;
354 case I915_CACHING_CACHED:
355 /*
356 * Due to a HW issue on BXT A stepping, GPU stores via a
357 * snooped mapping may leave stale data in a corresponding CPU
358 * cacheline, whereas normally such cachelines would get
359 * invalidated.
360 */
361 if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
362 return -ENODEV;
363
364 level = I915_CACHE_LLC;
365 break;
366 case I915_CACHING_DISPLAY:
367 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
368 break;
369 default:
370 return -EINVAL;
371 }
372
373 obj = i915_gem_object_lookup(file, args->handle);
374 if (!obj)
375 return -ENOENT;
376
377 /*
378 * This ioctl should be disabled for the objects with pat_index
379 * set by user space.
380 */
381 if (obj->pat_set_by_user) {
382 ret = -EOPNOTSUPP;
383 goto out;
384 }
385
386 /*
387 * The caching mode of proxy object is handled by its generator, and
388 * not allowed to be changed by userspace.
389 */
390 if (i915_gem_object_is_proxy(obj)) {
391 /*
392 * Silently allow cached for userptr; the vulkan driver
393 * sets all objects to cached
394 */
395 if (!i915_gem_object_is_userptr(obj) ||
396 args->caching != I915_CACHING_CACHED)
397 ret = -ENXIO;
398
399 goto out;
400 }
401
402 ret = i915_gem_object_lock_interruptible(obj, NULL);
403 if (ret)
404 goto out;
405
406 ret = i915_gem_object_set_cache_level(obj, level);
407 i915_gem_object_unlock(obj);
408
409 out:
410 i915_gem_object_put(obj);
411 return ret;
412 }
413
414 /*
415 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
416 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
417 * (for pageflips). We only flush the caches while preparing the buffer for
418 * display, the callers are responsible for frontbuffer flush.
419 */
420 struct i915_vma *
i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object * obj,struct i915_gem_ww_ctx * ww,u32 alignment,unsigned int guard,const struct i915_gtt_view * view,unsigned int flags)421 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
422 struct i915_gem_ww_ctx *ww,
423 u32 alignment, unsigned int guard,
424 const struct i915_gtt_view *view,
425 unsigned int flags)
426 {
427 struct drm_i915_private *i915 = to_i915(obj->base.dev);
428 struct i915_vma *vma;
429 int ret;
430
431 /* Frame buffer must be in LMEM */
432 if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
433 return ERR_PTR(-EINVAL);
434
435 /*
436 * The display engine is not coherent with the LLC cache on gen6. As
437 * a result, we make sure that the pinning that is about to occur is
438 * done with uncached PTEs. This is lowest common denominator for all
439 * chipsets.
440 *
441 * However for gen6+, we could do better by using the GFDT bit instead
442 * of uncaching, which would allow us to flush all the LLC-cached data
443 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
444 */
445 ret = i915_gem_object_set_cache_level(obj,
446 HAS_WT(i915) ?
447 I915_CACHE_WT : I915_CACHE_NONE);
448 if (ret)
449 return ERR_PTR(ret);
450
451 /* VT-d may overfetch before/after the vma, so pad with scratch */
452 if (guard)
453 flags |= PIN_OFFSET_GUARD | (guard * I915_GTT_PAGE_SIZE);
454
455 /*
456 * As the user may map the buffer once pinned in the display plane
457 * (e.g. libkms for the bootup splash), we have to ensure that we
458 * always use map_and_fenceable for all scanout buffers. However,
459 * it may simply be too big to fit into mappable, in which case
460 * put it anyway and hope that userspace can cope (but always first
461 * try to preserve the existing ABI).
462 */
463 vma = ERR_PTR(-ENOSPC);
464 if ((flags & PIN_MAPPABLE) == 0 &&
465 (!view || view->type == I915_GTT_VIEW_NORMAL))
466 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment,
467 flags | PIN_MAPPABLE |
468 PIN_NONBLOCK);
469 if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
470 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0,
471 alignment, flags);
472 if (IS_ERR(vma))
473 return vma;
474
475 vma->display_alignment = max(vma->display_alignment, alignment);
476 i915_vma_mark_scanout(vma);
477
478 i915_gem_object_flush_if_display_locked(obj);
479
480 return vma;
481 }
482
483 /**
484 * i915_gem_object_set_to_cpu_domain - Moves a single object to the CPU read,
485 * and possibly write domain.
486 * @obj: object to act on
487 * @write: requesting write or read-only access
488 *
489 * This function returns when the move is complete, including waiting on
490 * flushes to occur.
491 */
492 int
i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object * obj,bool write)493 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
494 {
495 int ret;
496
497 assert_object_held(obj);
498
499 ret = i915_gem_object_wait(obj,
500 I915_WAIT_INTERRUPTIBLE |
501 (write ? I915_WAIT_ALL : 0),
502 MAX_SCHEDULE_TIMEOUT);
503 if (ret)
504 return ret;
505
506 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
507
508 /* Flush the CPU cache if it's still invalid. */
509 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
510 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
511 obj->read_domains |= I915_GEM_DOMAIN_CPU;
512 }
513
514 /* It should now be out of any other write domains, and we can update
515 * the domain values for our changes.
516 */
517 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
518
519 /* If we're writing through the CPU, then the GPU read domains will
520 * need to be invalidated at next use.
521 */
522 if (write)
523 __start_cpu_write(obj);
524
525 return 0;
526 }
527
528 /**
529 * i915_gem_set_domain_ioctl - Called when user space prepares to use an
530 * object with the CPU, either
531 * through the mmap ioctl's mapping or a GTT mapping.
532 * @dev: drm device
533 * @data: ioctl data blob
534 * @file: drm file
535 */
536 int
i915_gem_set_domain_ioctl(struct drm_device * dev,void * data,struct drm_file * file)537 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
538 struct drm_file *file)
539 {
540 struct drm_i915_gem_set_domain *args = data;
541 struct drm_i915_gem_object *obj;
542 u32 read_domains = args->read_domains;
543 u32 write_domain = args->write_domain;
544 int err;
545
546 if (IS_DGFX(to_i915(dev)))
547 return -ENODEV;
548
549 /* Only handle setting domains to types used by the CPU. */
550 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
551 return -EINVAL;
552
553 /*
554 * Having something in the write domain implies it's in the read
555 * domain, and only that read domain. Enforce that in the request.
556 */
557 if (write_domain && read_domains != write_domain)
558 return -EINVAL;
559
560 if (!read_domains)
561 return 0;
562
563 obj = i915_gem_object_lookup(file, args->handle);
564 if (!obj)
565 return -ENOENT;
566
567 /*
568 * Try to flush the object off the GPU without holding the lock.
569 * We will repeat the flush holding the lock in the normal manner
570 * to catch cases where we are gazumped.
571 */
572 err = i915_gem_object_wait(obj,
573 I915_WAIT_INTERRUPTIBLE |
574 I915_WAIT_PRIORITY |
575 (write_domain ? I915_WAIT_ALL : 0),
576 MAX_SCHEDULE_TIMEOUT);
577 if (err)
578 goto out;
579
580 if (i915_gem_object_is_userptr(obj)) {
581 /*
582 * Try to grab userptr pages, iris uses set_domain to check
583 * userptr validity
584 */
585 err = i915_gem_object_userptr_validate(obj);
586 if (!err)
587 err = i915_gem_object_wait(obj,
588 I915_WAIT_INTERRUPTIBLE |
589 I915_WAIT_PRIORITY |
590 (write_domain ? I915_WAIT_ALL : 0),
591 MAX_SCHEDULE_TIMEOUT);
592 goto out;
593 }
594
595 /*
596 * Proxy objects do not control access to the backing storage, ergo
597 * they cannot be used as a means to manipulate the cache domain
598 * tracking for that backing storage. The proxy object is always
599 * considered to be outside of any cache domain.
600 */
601 if (i915_gem_object_is_proxy(obj)) {
602 err = -ENXIO;
603 goto out;
604 }
605
606 err = i915_gem_object_lock_interruptible(obj, NULL);
607 if (err)
608 goto out;
609
610 /*
611 * Flush and acquire obj->pages so that we are coherent through
612 * direct access in memory with previous cached writes through
613 * shmemfs and that our cache domain tracking remains valid.
614 * For example, if the obj->filp was moved to swap without us
615 * being notified and releasing the pages, we would mistakenly
616 * continue to assume that the obj remained out of the CPU cached
617 * domain.
618 */
619 err = i915_gem_object_pin_pages(obj);
620 if (err)
621 goto out_unlock;
622
623 /*
624 * Already in the desired write domain? Nothing for us to do!
625 *
626 * We apply a little bit of cunning here to catch a broader set of
627 * no-ops. If obj->write_domain is set, we must be in the same
628 * obj->read_domains, and only that domain. Therefore, if that
629 * obj->write_domain matches the request read_domains, we are
630 * already in the same read/write domain and can skip the operation,
631 * without having to further check the requested write_domain.
632 */
633 if (READ_ONCE(obj->write_domain) == read_domains)
634 goto out_unpin;
635
636 if (read_domains & I915_GEM_DOMAIN_WC)
637 err = i915_gem_object_set_to_wc_domain(obj, write_domain);
638 else if (read_domains & I915_GEM_DOMAIN_GTT)
639 err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
640 else
641 err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
642
643 out_unpin:
644 i915_gem_object_unpin_pages(obj);
645
646 out_unlock:
647 i915_gem_object_unlock(obj);
648
649 if (!err && write_domain)
650 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
651
652 out:
653 i915_gem_object_put(obj);
654 return err;
655 }
656
657 /*
658 * Pins the specified object's pages and synchronizes the object with
659 * GPU accesses. Sets needs_clflush to non-zero if the caller should
660 * flush the object from the CPU cache.
661 */
i915_gem_object_prepare_read(struct drm_i915_gem_object * obj,unsigned int * needs_clflush)662 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
663 unsigned int *needs_clflush)
664 {
665 int ret;
666
667 *needs_clflush = 0;
668 if (!i915_gem_object_has_struct_page(obj))
669 return -ENODEV;
670
671 assert_object_held(obj);
672
673 ret = i915_gem_object_wait(obj,
674 I915_WAIT_INTERRUPTIBLE,
675 MAX_SCHEDULE_TIMEOUT);
676 if (ret)
677 return ret;
678
679 ret = i915_gem_object_pin_pages(obj);
680 if (ret)
681 return ret;
682
683 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
684 !static_cpu_has(X86_FEATURE_CLFLUSH)) {
685 ret = i915_gem_object_set_to_cpu_domain(obj, false);
686 if (ret)
687 goto err_unpin;
688 else
689 goto out;
690 }
691
692 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
693
694 /* If we're not in the cpu read domain, set ourself into the gtt
695 * read domain and manually flush cachelines (if required). This
696 * optimizes for the case when the gpu will dirty the data
697 * anyway again before the next pread happens.
698 */
699 if (!obj->cache_dirty &&
700 !(obj->read_domains & I915_GEM_DOMAIN_CPU))
701 *needs_clflush = CLFLUSH_BEFORE;
702
703 out:
704 /* return with the pages pinned */
705 return 0;
706
707 err_unpin:
708 i915_gem_object_unpin_pages(obj);
709 return ret;
710 }
711
i915_gem_object_prepare_write(struct drm_i915_gem_object * obj,unsigned int * needs_clflush)712 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
713 unsigned int *needs_clflush)
714 {
715 int ret;
716
717 *needs_clflush = 0;
718 if (!i915_gem_object_has_struct_page(obj))
719 return -ENODEV;
720
721 assert_object_held(obj);
722
723 ret = i915_gem_object_wait(obj,
724 I915_WAIT_INTERRUPTIBLE |
725 I915_WAIT_ALL,
726 MAX_SCHEDULE_TIMEOUT);
727 if (ret)
728 return ret;
729
730 ret = i915_gem_object_pin_pages(obj);
731 if (ret)
732 return ret;
733
734 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
735 !static_cpu_has(X86_FEATURE_CLFLUSH)) {
736 ret = i915_gem_object_set_to_cpu_domain(obj, true);
737 if (ret)
738 goto err_unpin;
739 else
740 goto out;
741 }
742
743 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
744
745 /* If we're not in the cpu write domain, set ourself into the
746 * gtt write domain and manually flush cachelines (as required).
747 * This optimizes for the case when the gpu will use the data
748 * right away and we therefore have to clflush anyway.
749 */
750 if (!obj->cache_dirty) {
751 *needs_clflush |= CLFLUSH_AFTER;
752
753 /*
754 * Same trick applies to invalidate partially written
755 * cachelines read before writing.
756 */
757 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
758 *needs_clflush |= CLFLUSH_BEFORE;
759 }
760
761 out:
762 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
763 obj->mm.dirty = true;
764 /* return with the pages pinned */
765 return 0;
766
767 err_unpin:
768 i915_gem_object_unpin_pages(obj);
769 return ret;
770 }
771