1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014-2016 Intel Corporation 4 */ 5 6 #include "display/intel_display.h" 7 #include "gt/intel_gt.h" 8 9 #include "i915_drv.h" 10 #include "i915_gem_clflush.h" 11 #include "i915_gem_domain.h" 12 #include "i915_gem_gtt.h" 13 #include "i915_gem_ioctls.h" 14 #include "i915_gem_lmem.h" 15 #include "i915_gem_mman.h" 16 #include "i915_gem_object.h" 17 #include "i915_gem_object_frontbuffer.h" 18 #include "i915_vma.h" 19 20 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 21 { 22 struct drm_i915_private *i915 = to_i915(obj->base.dev); 23 24 if (IS_DGFX(i915)) 25 return false; 26 27 /* 28 * For objects created by userspace through GEM_CREATE with pat_index 29 * set by set_pat extension, i915_gem_object_has_cache_level() will 30 * always return true, because the coherency of such object is managed 31 * by userspace. Othereise the call here would fall back to checking 32 * whether the object is un-cached or write-through. 33 */ 34 return !(i915_gem_object_has_cache_level(obj, I915_CACHE_NONE) || 35 i915_gem_object_has_cache_level(obj, I915_CACHE_WT)); 36 } 37 38 bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 39 { 40 struct drm_i915_private *i915 = to_i915(obj->base.dev); 41 42 if (obj->cache_dirty) 43 return false; 44 45 if (IS_DGFX(i915)) 46 return false; 47 48 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 49 return true; 50 51 /* Currently in use by HW (display engine)? Keep flushed. */ 52 return i915_gem_object_is_framebuffer(obj); 53 } 54 55 static void 56 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) 57 { 58 struct i915_vma *vma; 59 60 assert_object_held(obj); 61 62 if (!(obj->write_domain & flush_domains)) 63 return; 64 65 switch (obj->write_domain) { 66 case I915_GEM_DOMAIN_GTT: 67 spin_lock(&obj->vma.lock); 68 for_each_ggtt_vma(vma, obj) 69 i915_vma_flush_writes(vma); 70 spin_unlock(&obj->vma.lock); 71 72 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 73 break; 74 75 case I915_GEM_DOMAIN_WC: 76 wmb(); 77 break; 78 79 case I915_GEM_DOMAIN_CPU: 80 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 81 break; 82 83 case I915_GEM_DOMAIN_RENDER: 84 if (gpu_write_needs_clflush(obj)) 85 obj->cache_dirty = true; 86 break; 87 } 88 89 obj->write_domain = 0; 90 } 91 92 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 93 { 94 /* 95 * We manually flush the CPU domain so that we can override and 96 * force the flush for the display, and perform it asyncrhonously. 97 */ 98 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 99 if (obj->cache_dirty) 100 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 101 obj->write_domain = 0; 102 } 103 104 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 105 { 106 if (!i915_gem_object_is_framebuffer(obj)) 107 return; 108 109 i915_gem_object_lock(obj, NULL); 110 __i915_gem_object_flush_for_display(obj); 111 i915_gem_object_unlock(obj); 112 } 113 114 void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj) 115 { 116 if (i915_gem_object_is_framebuffer(obj)) 117 __i915_gem_object_flush_for_display(obj); 118 } 119 120 /** 121 * i915_gem_object_set_to_wc_domain - Moves a single object to the WC read, and 122 * possibly write domain. 123 * @obj: object to act on 124 * @write: ask for write access or read only 125 * 126 * This function returns when the move is complete, including waiting on 127 * flushes to occur. 128 */ 129 int 130 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 131 { 132 int ret; 133 134 assert_object_held(obj); 135 136 ret = i915_gem_object_wait(obj, 137 I915_WAIT_INTERRUPTIBLE | 138 (write ? I915_WAIT_ALL : 0), 139 MAX_SCHEDULE_TIMEOUT); 140 if (ret) 141 return ret; 142 143 if (obj->write_domain == I915_GEM_DOMAIN_WC) 144 return 0; 145 146 /* Flush and acquire obj->pages so that we are coherent through 147 * direct access in memory with previous cached writes through 148 * shmemfs and that our cache domain tracking remains valid. 149 * For example, if the obj->filp was moved to swap without us 150 * being notified and releasing the pages, we would mistakenly 151 * continue to assume that the obj remained out of the CPU cached 152 * domain. 153 */ 154 ret = i915_gem_object_pin_pages(obj); 155 if (ret) 156 return ret; 157 158 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 159 160 /* Serialise direct access to this object with the barriers for 161 * coherent writes from the GPU, by effectively invalidating the 162 * WC domain upon first access. 163 */ 164 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 165 mb(); 166 167 /* It should now be out of any other write domains, and we can update 168 * the domain values for our changes. 169 */ 170 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 171 obj->read_domains |= I915_GEM_DOMAIN_WC; 172 if (write) { 173 obj->read_domains = I915_GEM_DOMAIN_WC; 174 obj->write_domain = I915_GEM_DOMAIN_WC; 175 obj->mm.dirty = true; 176 } 177 178 i915_gem_object_unpin_pages(obj); 179 return 0; 180 } 181 182 /** 183 * i915_gem_object_set_to_gtt_domain - Moves a single object to the GTT read, 184 * and possibly write domain. 185 * @obj: object to act on 186 * @write: ask for write access or read only 187 * 188 * This function returns when the move is complete, including waiting on 189 * flushes to occur. 190 */ 191 int 192 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 193 { 194 int ret; 195 196 assert_object_held(obj); 197 198 ret = i915_gem_object_wait(obj, 199 I915_WAIT_INTERRUPTIBLE | 200 (write ? I915_WAIT_ALL : 0), 201 MAX_SCHEDULE_TIMEOUT); 202 if (ret) 203 return ret; 204 205 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 206 return 0; 207 208 /* Flush and acquire obj->pages so that we are coherent through 209 * direct access in memory with previous cached writes through 210 * shmemfs and that our cache domain tracking remains valid. 211 * For example, if the obj->filp was moved to swap without us 212 * being notified and releasing the pages, we would mistakenly 213 * continue to assume that the obj remained out of the CPU cached 214 * domain. 215 */ 216 ret = i915_gem_object_pin_pages(obj); 217 if (ret) 218 return ret; 219 220 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 221 222 /* Serialise direct access to this object with the barriers for 223 * coherent writes from the GPU, by effectively invalidating the 224 * GTT domain upon first access. 225 */ 226 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 227 mb(); 228 229 /* It should now be out of any other write domains, and we can update 230 * the domain values for our changes. 231 */ 232 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 233 obj->read_domains |= I915_GEM_DOMAIN_GTT; 234 if (write) { 235 struct i915_vma *vma; 236 237 obj->read_domains = I915_GEM_DOMAIN_GTT; 238 obj->write_domain = I915_GEM_DOMAIN_GTT; 239 obj->mm.dirty = true; 240 241 spin_lock(&obj->vma.lock); 242 for_each_ggtt_vma(vma, obj) 243 if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) 244 i915_vma_set_ggtt_write(vma); 245 spin_unlock(&obj->vma.lock); 246 } 247 248 i915_gem_object_unpin_pages(obj); 249 return 0; 250 } 251 252 /** 253 * i915_gem_object_set_cache_level - Changes the cache-level of an object across all VMA. 254 * @obj: object to act on 255 * @cache_level: new cache level to set for the object 256 * 257 * After this function returns, the object will be in the new cache-level 258 * across all GTT and the contents of the backing storage will be coherent, 259 * with respect to the new cache-level. In order to keep the backing storage 260 * coherent for all users, we only allow a single cache level to be set 261 * globally on the object and prevent it from being changed whilst the 262 * hardware is reading from the object. That is if the object is currently 263 * on the scanout it will be set to uncached (or equivalent display 264 * cache coherency) and all non-MOCS GPU access will also be uncached so 265 * that all direct access to the scanout remains coherent. 266 */ 267 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 268 enum i915_cache_level cache_level) 269 { 270 int ret; 271 272 /* 273 * For objects created by userspace through GEM_CREATE with pat_index 274 * set by set_pat extension, simply return 0 here without touching 275 * the cache setting, because such objects should have an immutable 276 * cache setting by design and always managed by userspace. 277 */ 278 if (i915_gem_object_has_cache_level(obj, cache_level)) 279 return 0; 280 281 ret = i915_gem_object_wait(obj, 282 I915_WAIT_INTERRUPTIBLE | 283 I915_WAIT_ALL, 284 MAX_SCHEDULE_TIMEOUT); 285 if (ret) 286 return ret; 287 288 /* Always invalidate stale cachelines */ 289 i915_gem_object_set_cache_coherency(obj, cache_level); 290 obj->cache_dirty = true; 291 292 /* The cache-level will be applied when each vma is rebound. */ 293 return i915_gem_object_unbind(obj, 294 I915_GEM_OBJECT_UNBIND_ACTIVE | 295 I915_GEM_OBJECT_UNBIND_BARRIER); 296 } 297 298 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 299 struct drm_file *file) 300 { 301 struct drm_i915_gem_caching *args = data; 302 struct drm_i915_gem_object *obj; 303 int err = 0; 304 305 if (IS_DGFX(to_i915(dev))) 306 return -ENODEV; 307 308 rcu_read_lock(); 309 obj = i915_gem_object_lookup_rcu(file, args->handle); 310 if (!obj) { 311 err = -ENOENT; 312 goto out; 313 } 314 315 /* 316 * This ioctl should be disabled for the objects with pat_index 317 * set by user space. 318 */ 319 if (obj->pat_set_by_user) { 320 err = -EOPNOTSUPP; 321 goto out; 322 } 323 324 if (i915_gem_object_has_cache_level(obj, I915_CACHE_LLC) || 325 i915_gem_object_has_cache_level(obj, I915_CACHE_L3_LLC)) 326 args->caching = I915_CACHING_CACHED; 327 else if (i915_gem_object_has_cache_level(obj, I915_CACHE_WT)) 328 args->caching = I915_CACHING_DISPLAY; 329 else 330 args->caching = I915_CACHING_NONE; 331 out: 332 rcu_read_unlock(); 333 return err; 334 } 335 336 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 337 struct drm_file *file) 338 { 339 struct drm_i915_private *i915 = to_i915(dev); 340 struct drm_i915_gem_caching *args = data; 341 struct drm_i915_gem_object *obj; 342 enum i915_cache_level level; 343 int ret = 0; 344 345 if (IS_DGFX(i915)) 346 return -ENODEV; 347 348 if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) 349 return -EOPNOTSUPP; 350 351 switch (args->caching) { 352 case I915_CACHING_NONE: 353 level = I915_CACHE_NONE; 354 break; 355 case I915_CACHING_CACHED: 356 /* 357 * Due to a HW issue on BXT A stepping, GPU stores via a 358 * snooped mapping may leave stale data in a corresponding CPU 359 * cacheline, whereas normally such cachelines would get 360 * invalidated. 361 */ 362 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 363 return -ENODEV; 364 365 level = I915_CACHE_LLC; 366 break; 367 case I915_CACHING_DISPLAY: 368 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 369 break; 370 default: 371 return -EINVAL; 372 } 373 374 obj = i915_gem_object_lookup(file, args->handle); 375 if (!obj) 376 return -ENOENT; 377 378 /* 379 * This ioctl should be disabled for the objects with pat_index 380 * set by user space. 381 */ 382 if (obj->pat_set_by_user) { 383 ret = -EOPNOTSUPP; 384 goto out; 385 } 386 387 /* 388 * The caching mode of proxy object is handled by its generator, and 389 * not allowed to be changed by userspace. 390 */ 391 if (i915_gem_object_is_proxy(obj)) { 392 /* 393 * Silently allow cached for userptr; the vulkan driver 394 * sets all objects to cached 395 */ 396 if (!i915_gem_object_is_userptr(obj) || 397 args->caching != I915_CACHING_CACHED) 398 ret = -ENXIO; 399 400 goto out; 401 } 402 403 ret = i915_gem_object_lock_interruptible(obj, NULL); 404 if (ret) 405 goto out; 406 407 ret = i915_gem_object_set_cache_level(obj, level); 408 i915_gem_object_unlock(obj); 409 410 out: 411 i915_gem_object_put(obj); 412 return ret; 413 } 414 415 /* 416 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from 417 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined 418 * (for pageflips). We only flush the caches while preparing the buffer for 419 * display, the callers are responsible for frontbuffer flush. 420 */ 421 struct i915_vma * 422 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 423 struct i915_gem_ww_ctx *ww, 424 u32 alignment, unsigned int guard, 425 const struct i915_gtt_view *view, 426 unsigned int flags) 427 { 428 struct drm_i915_private *i915 = to_i915(obj->base.dev); 429 struct i915_vma *vma; 430 int ret; 431 432 /* Frame buffer must be in LMEM */ 433 if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) 434 return ERR_PTR(-EINVAL); 435 436 /* 437 * The display engine is not coherent with the LLC cache on gen6. As 438 * a result, we make sure that the pinning that is about to occur is 439 * done with uncached PTEs. This is lowest common denominator for all 440 * chipsets. 441 * 442 * However for gen6+, we could do better by using the GFDT bit instead 443 * of uncaching, which would allow us to flush all the LLC-cached data 444 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 445 */ 446 ret = i915_gem_object_set_cache_level(obj, 447 HAS_WT(i915) ? 448 I915_CACHE_WT : I915_CACHE_NONE); 449 if (ret) 450 return ERR_PTR(ret); 451 452 /* VT-d may overfetch before/after the vma, so pad with scratch */ 453 if (guard) 454 flags |= PIN_OFFSET_GUARD | (guard * I915_GTT_PAGE_SIZE); 455 456 /* 457 * As the user may map the buffer once pinned in the display plane 458 * (e.g. libkms for the bootup splash), we have to ensure that we 459 * always use map_and_fenceable for all scanout buffers. However, 460 * it may simply be too big to fit into mappable, in which case 461 * put it anyway and hope that userspace can cope (but always first 462 * try to preserve the existing ABI). 463 */ 464 vma = ERR_PTR(-ENOSPC); 465 if ((flags & PIN_MAPPABLE) == 0 && 466 (!view || view->type == I915_GTT_VIEW_NORMAL)) 467 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment, 468 flags | PIN_MAPPABLE | 469 PIN_NONBLOCK); 470 if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) 471 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, 472 alignment, flags); 473 if (IS_ERR(vma)) 474 return vma; 475 476 vma->display_alignment = max(vma->display_alignment, alignment); 477 i915_vma_mark_scanout(vma); 478 479 i915_gem_object_flush_if_display_locked(obj); 480 481 return vma; 482 } 483 484 /** 485 * i915_gem_object_set_to_cpu_domain - Moves a single object to the CPU read, 486 * and possibly write domain. 487 * @obj: object to act on 488 * @write: requesting write or read-only access 489 * 490 * This function returns when the move is complete, including waiting on 491 * flushes to occur. 492 */ 493 int 494 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 495 { 496 int ret; 497 498 assert_object_held(obj); 499 500 ret = i915_gem_object_wait(obj, 501 I915_WAIT_INTERRUPTIBLE | 502 (write ? I915_WAIT_ALL : 0), 503 MAX_SCHEDULE_TIMEOUT); 504 if (ret) 505 return ret; 506 507 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 508 509 /* Flush the CPU cache if it's still invalid. */ 510 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 511 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 512 obj->read_domains |= I915_GEM_DOMAIN_CPU; 513 } 514 515 /* It should now be out of any other write domains, and we can update 516 * the domain values for our changes. 517 */ 518 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 519 520 /* If we're writing through the CPU, then the GPU read domains will 521 * need to be invalidated at next use. 522 */ 523 if (write) 524 __start_cpu_write(obj); 525 526 return 0; 527 } 528 529 /** 530 * i915_gem_set_domain_ioctl - Called when user space prepares to use an 531 * object with the CPU, either 532 * through the mmap ioctl's mapping or a GTT mapping. 533 * @dev: drm device 534 * @data: ioctl data blob 535 * @file: drm file 536 */ 537 int 538 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 539 struct drm_file *file) 540 { 541 struct drm_i915_gem_set_domain *args = data; 542 struct drm_i915_gem_object *obj; 543 u32 read_domains = args->read_domains; 544 u32 write_domain = args->write_domain; 545 int err; 546 547 if (IS_DGFX(to_i915(dev))) 548 return -ENODEV; 549 550 /* Only handle setting domains to types used by the CPU. */ 551 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 552 return -EINVAL; 553 554 /* 555 * Having something in the write domain implies it's in the read 556 * domain, and only that read domain. Enforce that in the request. 557 */ 558 if (write_domain && read_domains != write_domain) 559 return -EINVAL; 560 561 if (!read_domains) 562 return 0; 563 564 obj = i915_gem_object_lookup(file, args->handle); 565 if (!obj) 566 return -ENOENT; 567 568 /* 569 * Try to flush the object off the GPU without holding the lock. 570 * We will repeat the flush holding the lock in the normal manner 571 * to catch cases where we are gazumped. 572 */ 573 err = i915_gem_object_wait(obj, 574 I915_WAIT_INTERRUPTIBLE | 575 I915_WAIT_PRIORITY | 576 (write_domain ? I915_WAIT_ALL : 0), 577 MAX_SCHEDULE_TIMEOUT); 578 if (err) 579 goto out; 580 581 if (i915_gem_object_is_userptr(obj)) { 582 /* 583 * Try to grab userptr pages, iris uses set_domain to check 584 * userptr validity 585 */ 586 err = i915_gem_object_userptr_validate(obj); 587 if (!err) 588 err = i915_gem_object_wait(obj, 589 I915_WAIT_INTERRUPTIBLE | 590 I915_WAIT_PRIORITY | 591 (write_domain ? I915_WAIT_ALL : 0), 592 MAX_SCHEDULE_TIMEOUT); 593 goto out; 594 } 595 596 /* 597 * Proxy objects do not control access to the backing storage, ergo 598 * they cannot be used as a means to manipulate the cache domain 599 * tracking for that backing storage. The proxy object is always 600 * considered to be outside of any cache domain. 601 */ 602 if (i915_gem_object_is_proxy(obj)) { 603 err = -ENXIO; 604 goto out; 605 } 606 607 err = i915_gem_object_lock_interruptible(obj, NULL); 608 if (err) 609 goto out; 610 611 /* 612 * Flush and acquire obj->pages so that we are coherent through 613 * direct access in memory with previous cached writes through 614 * shmemfs and that our cache domain tracking remains valid. 615 * For example, if the obj->filp was moved to swap without us 616 * being notified and releasing the pages, we would mistakenly 617 * continue to assume that the obj remained out of the CPU cached 618 * domain. 619 */ 620 err = i915_gem_object_pin_pages(obj); 621 if (err) 622 goto out_unlock; 623 624 /* 625 * Already in the desired write domain? Nothing for us to do! 626 * 627 * We apply a little bit of cunning here to catch a broader set of 628 * no-ops. If obj->write_domain is set, we must be in the same 629 * obj->read_domains, and only that domain. Therefore, if that 630 * obj->write_domain matches the request read_domains, we are 631 * already in the same read/write domain and can skip the operation, 632 * without having to further check the requested write_domain. 633 */ 634 if (READ_ONCE(obj->write_domain) == read_domains) 635 goto out_unpin; 636 637 if (read_domains & I915_GEM_DOMAIN_WC) 638 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 639 else if (read_domains & I915_GEM_DOMAIN_GTT) 640 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 641 else 642 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 643 644 out_unpin: 645 i915_gem_object_unpin_pages(obj); 646 647 out_unlock: 648 i915_gem_object_unlock(obj); 649 650 if (!err && write_domain) 651 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 652 653 out: 654 i915_gem_object_put(obj); 655 return err; 656 } 657 658 /* 659 * Pins the specified object's pages and synchronizes the object with 660 * GPU accesses. Sets needs_clflush to non-zero if the caller should 661 * flush the object from the CPU cache. 662 */ 663 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, 664 unsigned int *needs_clflush) 665 { 666 int ret; 667 668 *needs_clflush = 0; 669 if (!i915_gem_object_has_struct_page(obj)) 670 return -ENODEV; 671 672 assert_object_held(obj); 673 674 ret = i915_gem_object_wait(obj, 675 I915_WAIT_INTERRUPTIBLE, 676 MAX_SCHEDULE_TIMEOUT); 677 if (ret) 678 return ret; 679 680 ret = i915_gem_object_pin_pages(obj); 681 if (ret) 682 return ret; 683 684 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 685 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 686 ret = i915_gem_object_set_to_cpu_domain(obj, false); 687 if (ret) 688 goto err_unpin; 689 else 690 goto out; 691 } 692 693 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 694 695 /* If we're not in the cpu read domain, set ourself into the gtt 696 * read domain and manually flush cachelines (if required). This 697 * optimizes for the case when the gpu will dirty the data 698 * anyway again before the next pread happens. 699 */ 700 if (!obj->cache_dirty && 701 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 702 *needs_clflush = CLFLUSH_BEFORE; 703 704 out: 705 /* return with the pages pinned */ 706 return 0; 707 708 err_unpin: 709 i915_gem_object_unpin_pages(obj); 710 return ret; 711 } 712 713 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, 714 unsigned int *needs_clflush) 715 { 716 int ret; 717 718 *needs_clflush = 0; 719 if (!i915_gem_object_has_struct_page(obj)) 720 return -ENODEV; 721 722 assert_object_held(obj); 723 724 ret = i915_gem_object_wait(obj, 725 I915_WAIT_INTERRUPTIBLE | 726 I915_WAIT_ALL, 727 MAX_SCHEDULE_TIMEOUT); 728 if (ret) 729 return ret; 730 731 ret = i915_gem_object_pin_pages(obj); 732 if (ret) 733 return ret; 734 735 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 736 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 737 ret = i915_gem_object_set_to_cpu_domain(obj, true); 738 if (ret) 739 goto err_unpin; 740 else 741 goto out; 742 } 743 744 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 745 746 /* If we're not in the cpu write domain, set ourself into the 747 * gtt write domain and manually flush cachelines (as required). 748 * This optimizes for the case when the gpu will use the data 749 * right away and we therefore have to clflush anyway. 750 */ 751 if (!obj->cache_dirty) { 752 *needs_clflush |= CLFLUSH_AFTER; 753 754 /* 755 * Same trick applies to invalidate partially written 756 * cachelines read before writing. 757 */ 758 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 759 *needs_clflush |= CLFLUSH_BEFORE; 760 } 761 762 out: 763 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 764 obj->mm.dirty = true; 765 /* return with the pages pinned */ 766 return 0; 767 768 err_unpin: 769 i915_gem_object_unpin_pages(obj); 770 return ret; 771 } 772