1 /*
2  * Copyright © 2008,2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Chris Wilson <chris@chris-wilson.co.uk>
26  *
27  */
28 
29 #include "drmP.h"
30 #include "drm.h"
31 #include "i915_drm.h"
32 #include "i915_drv.h"
33 #include "i915_trace.h"
34 #include "intel_drv.h"
35 #include <linux/dma_remapping.h>
36 
37 struct change_domains {
38 	uint32_t invalidate_domains;
39 	uint32_t flush_domains;
40 	uint32_t flush_rings;
41 	uint32_t flips;
42 };
43 
44 /*
45  * Set the next domain for the specified object. This
46  * may not actually perform the necessary flushing/invaliding though,
47  * as that may want to be batched with other set_domain operations
48  *
49  * This is (we hope) the only really tricky part of gem. The goal
50  * is fairly simple -- track which caches hold bits of the object
51  * and make sure they remain coherent. A few concrete examples may
52  * help to explain how it works. For shorthand, we use the notation
53  * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
54  * a pair of read and write domain masks.
55  *
56  * Case 1: the batch buffer
57  *
58  *	1. Allocated
59  *	2. Written by CPU
60  *	3. Mapped to GTT
61  *	4. Read by GPU
62  *	5. Unmapped from GTT
63  *	6. Freed
64  *
65  *	Let's take these a step at a time
66  *
67  *	1. Allocated
68  *		Pages allocated from the kernel may still have
69  *		cache contents, so we set them to (CPU, CPU) always.
70  *	2. Written by CPU (using pwrite)
71  *		The pwrite function calls set_domain (CPU, CPU) and
72  *		this function does nothing (as nothing changes)
73  *	3. Mapped by GTT
74  *		This function asserts that the object is not
75  *		currently in any GPU-based read or write domains
76  *	4. Read by GPU
77  *		i915_gem_execbuffer calls set_domain (COMMAND, 0).
78  *		As write_domain is zero, this function adds in the
79  *		current read domains (CPU+COMMAND, 0).
80  *		flush_domains is set to CPU.
81  *		invalidate_domains is set to COMMAND
82  *		clflush is run to get data out of the CPU caches
83  *		then i915_dev_set_domain calls i915_gem_flush to
84  *		emit an MI_FLUSH and drm_agp_chipset_flush
85  *	5. Unmapped from GTT
86  *		i915_gem_object_unbind calls set_domain (CPU, CPU)
87  *		flush_domains and invalidate_domains end up both zero
88  *		so no flushing/invalidating happens
89  *	6. Freed
90  *		yay, done
91  *
92  * Case 2: The shared render buffer
93  *
94  *	1. Allocated
95  *	2. Mapped to GTT
96  *	3. Read/written by GPU
97  *	4. set_domain to (CPU,CPU)
98  *	5. Read/written by CPU
99  *	6. Read/written by GPU
100  *
101  *	1. Allocated
102  *		Same as last example, (CPU, CPU)
103  *	2. Mapped to GTT
104  *		Nothing changes (assertions find that it is not in the GPU)
105  *	3. Read/written by GPU
106  *		execbuffer calls set_domain (RENDER, RENDER)
107  *		flush_domains gets CPU
108  *		invalidate_domains gets GPU
109  *		clflush (obj)
110  *		MI_FLUSH and drm_agp_chipset_flush
111  *	4. set_domain (CPU, CPU)
112  *		flush_domains gets GPU
113  *		invalidate_domains gets CPU
114  *		wait_rendering (obj) to make sure all drawing is complete.
115  *		This will include an MI_FLUSH to get the data from GPU
116  *		to memory
117  *		clflush (obj) to invalidate the CPU cache
118  *		Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
119  *	5. Read/written by CPU
120  *		cache lines are loaded and dirtied
121  *	6. Read written by GPU
122  *		Same as last GPU access
123  *
124  * Case 3: The constant buffer
125  *
126  *	1. Allocated
127  *	2. Written by CPU
128  *	3. Read by GPU
129  *	4. Updated (written) by CPU again
130  *	5. Read by GPU
131  *
132  *	1. Allocated
133  *		(CPU, CPU)
134  *	2. Written by CPU
135  *		(CPU, CPU)
136  *	3. Read by GPU
137  *		(CPU+RENDER, 0)
138  *		flush_domains = CPU
139  *		invalidate_domains = RENDER
140  *		clflush (obj)
141  *		MI_FLUSH
142  *		drm_agp_chipset_flush
143  *	4. Updated (written) by CPU again
144  *		(CPU, CPU)
145  *		flush_domains = 0 (no previous write domain)
146  *		invalidate_domains = 0 (no new read domains)
147  *	5. Read by GPU
148  *		(CPU+RENDER, 0)
149  *		flush_domains = CPU
150  *		invalidate_domains = RENDER
151  *		clflush (obj)
152  *		MI_FLUSH
153  *		drm_agp_chipset_flush
154  */
155 static void
i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object * obj,struct intel_ring_buffer * ring,struct change_domains * cd)156 i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
157 				  struct intel_ring_buffer *ring,
158 				  struct change_domains *cd)
159 {
160 	uint32_t invalidate_domains = 0, flush_domains = 0;
161 
162 	/*
163 	 * If the object isn't moving to a new write domain,
164 	 * let the object stay in multiple read domains
165 	 */
166 	if (obj->base.pending_write_domain == 0)
167 		obj->base.pending_read_domains |= obj->base.read_domains;
168 
169 	/*
170 	 * Flush the current write domain if
171 	 * the new read domains don't match. Invalidate
172 	 * any read domains which differ from the old
173 	 * write domain
174 	 */
175 	if (obj->base.write_domain &&
176 	    (((obj->base.write_domain != obj->base.pending_read_domains ||
177 	       obj->ring != ring)) ||
178 	     (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) {
179 		flush_domains |= obj->base.write_domain;
180 		invalidate_domains |=
181 			obj->base.pending_read_domains & ~obj->base.write_domain;
182 	}
183 	/*
184 	 * Invalidate any read caches which may have
185 	 * stale data. That is, any new read domains.
186 	 */
187 	invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains;
188 	if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU)
189 		i915_gem_clflush_object(obj);
190 
191 	if (obj->base.pending_write_domain)
192 		cd->flips |= atomic_read(&obj->pending_flip);
193 
194 	/* The actual obj->write_domain will be updated with
195 	 * pending_write_domain after we emit the accumulated flush for all
196 	 * of our domain changes in execbuffers (which clears objects'
197 	 * write_domains).  So if we have a current write domain that we
198 	 * aren't changing, set pending_write_domain to that.
199 	 */
200 	if (flush_domains == 0 && obj->base.pending_write_domain == 0)
201 		obj->base.pending_write_domain = obj->base.write_domain;
202 
203 	cd->invalidate_domains |= invalidate_domains;
204 	cd->flush_domains |= flush_domains;
205 	if (flush_domains & I915_GEM_GPU_DOMAINS)
206 		cd->flush_rings |= obj->ring->id;
207 	if (invalidate_domains & I915_GEM_GPU_DOMAINS)
208 		cd->flush_rings |= ring->id;
209 }
210 
211 struct eb_objects {
212 	int and;
213 	struct hlist_head buckets[0];
214 };
215 
216 static struct eb_objects *
eb_create(int size)217 eb_create(int size)
218 {
219 	struct eb_objects *eb;
220 	int count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
221 	while (count > size)
222 		count >>= 1;
223 	eb = kzalloc(count*sizeof(struct hlist_head) +
224 		     sizeof(struct eb_objects),
225 		     GFP_KERNEL);
226 	if (eb == NULL)
227 		return eb;
228 
229 	eb->and = count - 1;
230 	return eb;
231 }
232 
233 static void
eb_reset(struct eb_objects * eb)234 eb_reset(struct eb_objects *eb)
235 {
236 	memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
237 }
238 
239 static void
eb_add_object(struct eb_objects * eb,struct drm_i915_gem_object * obj)240 eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj)
241 {
242 	hlist_add_head(&obj->exec_node,
243 		       &eb->buckets[obj->exec_handle & eb->and]);
244 }
245 
246 static struct drm_i915_gem_object *
eb_get_object(struct eb_objects * eb,unsigned long handle)247 eb_get_object(struct eb_objects *eb, unsigned long handle)
248 {
249 	struct hlist_head *head;
250 	struct hlist_node *node;
251 	struct drm_i915_gem_object *obj;
252 
253 	head = &eb->buckets[handle & eb->and];
254 	hlist_for_each(node, head) {
255 		obj = hlist_entry(node, struct drm_i915_gem_object, exec_node);
256 		if (obj->exec_handle == handle)
257 			return obj;
258 	}
259 
260 	return NULL;
261 }
262 
263 static void
eb_destroy(struct eb_objects * eb)264 eb_destroy(struct eb_objects *eb)
265 {
266 	kfree(eb);
267 }
268 
269 static int
i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object * obj,struct eb_objects * eb,struct drm_i915_gem_relocation_entry * reloc)270 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
271 				   struct eb_objects *eb,
272 				   struct drm_i915_gem_relocation_entry *reloc)
273 {
274 	struct drm_device *dev = obj->base.dev;
275 	struct drm_gem_object *target_obj;
276 	uint32_t target_offset;
277 	int ret = -EINVAL;
278 
279 	/* we've already hold a reference to all valid objects */
280 	target_obj = &eb_get_object(eb, reloc->target_handle)->base;
281 	if (unlikely(target_obj == NULL))
282 		return -ENOENT;
283 
284 	target_offset = to_intel_bo(target_obj)->gtt_offset;
285 
286 	/* The target buffer should have appeared before us in the
287 	 * exec_object list, so it should have a GTT space bound by now.
288 	 */
289 	if (unlikely(target_offset == 0)) {
290 		DRM_ERROR("No GTT space found for object %d\n",
291 			  reloc->target_handle);
292 		return ret;
293 	}
294 
295 	/* Validate that the target is in a valid r/w GPU domain */
296 	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
297 		DRM_ERROR("reloc with multiple write domains: "
298 			  "obj %p target %d offset %d "
299 			  "read %08x write %08x",
300 			  obj, reloc->target_handle,
301 			  (int) reloc->offset,
302 			  reloc->read_domains,
303 			  reloc->write_domain);
304 		return ret;
305 	}
306 	if (unlikely((reloc->write_domain | reloc->read_domains) & I915_GEM_DOMAIN_CPU)) {
307 		DRM_ERROR("reloc with read/write CPU domains: "
308 			  "obj %p target %d offset %d "
309 			  "read %08x write %08x",
310 			  obj, reloc->target_handle,
311 			  (int) reloc->offset,
312 			  reloc->read_domains,
313 			  reloc->write_domain);
314 		return ret;
315 	}
316 	if (unlikely(reloc->write_domain && target_obj->pending_write_domain &&
317 		     reloc->write_domain != target_obj->pending_write_domain)) {
318 		DRM_ERROR("Write domain conflict: "
319 			  "obj %p target %d offset %d "
320 			  "new %08x old %08x\n",
321 			  obj, reloc->target_handle,
322 			  (int) reloc->offset,
323 			  reloc->write_domain,
324 			  target_obj->pending_write_domain);
325 		return ret;
326 	}
327 
328 	target_obj->pending_read_domains |= reloc->read_domains;
329 	target_obj->pending_write_domain |= reloc->write_domain;
330 
331 	/* If the relocation already has the right value in it, no
332 	 * more work needs to be done.
333 	 */
334 	if (target_offset == reloc->presumed_offset)
335 		return 0;
336 
337 	/* Check that the relocation address is valid... */
338 	if (unlikely(reloc->offset > obj->base.size - 4)) {
339 		DRM_ERROR("Relocation beyond object bounds: "
340 			  "obj %p target %d offset %d size %d.\n",
341 			  obj, reloc->target_handle,
342 			  (int) reloc->offset,
343 			  (int) obj->base.size);
344 		return ret;
345 	}
346 	if (unlikely(reloc->offset & 3)) {
347 		DRM_ERROR("Relocation not 4-byte aligned: "
348 			  "obj %p target %d offset %d.\n",
349 			  obj, reloc->target_handle,
350 			  (int) reloc->offset);
351 		return ret;
352 	}
353 
354 	reloc->delta += target_offset;
355 	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
356 		uint32_t page_offset = reloc->offset & ~PAGE_MASK;
357 		char *vaddr;
358 
359 		vaddr = kmap_atomic(obj->pages[reloc->offset >> PAGE_SHIFT]);
360 		*(uint32_t *)(vaddr + page_offset) = reloc->delta;
361 		kunmap_atomic(vaddr);
362 	} else {
363 		struct drm_i915_private *dev_priv = dev->dev_private;
364 		uint32_t __iomem *reloc_entry;
365 		void __iomem *reloc_page;
366 
367 		/* We can't wait for rendering with pagefaults disabled */
368 		if (obj->active && in_atomic())
369 			return -EFAULT;
370 
371 		ret = i915_gem_object_set_to_gtt_domain(obj, 1);
372 		if (ret)
373 			return ret;
374 
375 		/* Map the page containing the relocation we're going to perform.  */
376 		reloc->offset += obj->gtt_offset;
377 		reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
378 						      reloc->offset & PAGE_MASK);
379 		reloc_entry = (uint32_t __iomem *)
380 			(reloc_page + (reloc->offset & ~PAGE_MASK));
381 		iowrite32(reloc->delta, reloc_entry);
382 		io_mapping_unmap_atomic(reloc_page);
383 	}
384 
385 	/* and update the user's relocation entry */
386 	reloc->presumed_offset = target_offset;
387 
388 	return 0;
389 }
390 
391 static int
i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object * obj,struct eb_objects * eb)392 i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
393 				    struct eb_objects *eb)
394 {
395 	struct drm_i915_gem_relocation_entry __user *user_relocs;
396 	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
397 	int i, ret;
398 
399 	user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr;
400 	for (i = 0; i < entry->relocation_count; i++) {
401 		struct drm_i915_gem_relocation_entry reloc;
402 
403 		if (__copy_from_user_inatomic(&reloc,
404 					      user_relocs+i,
405 					      sizeof(reloc)))
406 			return -EFAULT;
407 
408 		ret = i915_gem_execbuffer_relocate_entry(obj, eb, &reloc);
409 		if (ret)
410 			return ret;
411 
412 		if (__copy_to_user_inatomic(&user_relocs[i].presumed_offset,
413 					    &reloc.presumed_offset,
414 					    sizeof(reloc.presumed_offset)))
415 			return -EFAULT;
416 	}
417 
418 	return 0;
419 }
420 
421 static int
i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object * obj,struct eb_objects * eb,struct drm_i915_gem_relocation_entry * relocs)422 i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
423 					 struct eb_objects *eb,
424 					 struct drm_i915_gem_relocation_entry *relocs)
425 {
426 	const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
427 	int i, ret;
428 
429 	for (i = 0; i < entry->relocation_count; i++) {
430 		ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]);
431 		if (ret)
432 			return ret;
433 	}
434 
435 	return 0;
436 }
437 
438 static int
i915_gem_execbuffer_relocate(struct drm_device * dev,struct eb_objects * eb,struct list_head * objects)439 i915_gem_execbuffer_relocate(struct drm_device *dev,
440 			     struct eb_objects *eb,
441 			     struct list_head *objects)
442 {
443 	struct drm_i915_gem_object *obj;
444 	int ret = 0;
445 
446 	/* This is the fast path and we cannot handle a pagefault whilst
447 	 * holding the struct mutex lest the user pass in the relocations
448 	 * contained within a mmaped bo. For in such a case we, the page
449 	 * fault handler would call i915_gem_fault() and we would try to
450 	 * acquire the struct mutex again. Obviously this is bad and so
451 	 * lockdep complains vehemently.
452 	 */
453 	pagefault_disable();
454 	list_for_each_entry(obj, objects, exec_list) {
455 		ret = i915_gem_execbuffer_relocate_object(obj, eb);
456 		if (ret)
457 			break;
458 	}
459 	pagefault_enable();
460 
461 	return ret;
462 }
463 
464 static int
i915_gem_execbuffer_reserve(struct intel_ring_buffer * ring,struct drm_file * file,struct list_head * objects)465 i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
466 			    struct drm_file *file,
467 			    struct list_head *objects)
468 {
469 	struct drm_i915_gem_object *obj;
470 	int ret, retry;
471 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
472 	struct list_head ordered_objects;
473 
474 	INIT_LIST_HEAD(&ordered_objects);
475 	while (!list_empty(objects)) {
476 		struct drm_i915_gem_exec_object2 *entry;
477 		bool need_fence, need_mappable;
478 
479 		obj = list_first_entry(objects,
480 				       struct drm_i915_gem_object,
481 				       exec_list);
482 		entry = obj->exec_entry;
483 
484 		need_fence =
485 			has_fenced_gpu_access &&
486 			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
487 			obj->tiling_mode != I915_TILING_NONE;
488 		need_mappable =
489 			entry->relocation_count ? true : need_fence;
490 
491 		if (need_mappable)
492 			list_move(&obj->exec_list, &ordered_objects);
493 		else
494 			list_move_tail(&obj->exec_list, &ordered_objects);
495 
496 		obj->base.pending_read_domains = 0;
497 		obj->base.pending_write_domain = 0;
498 	}
499 	list_splice(&ordered_objects, objects);
500 
501 	/* Attempt to pin all of the buffers into the GTT.
502 	 * This is done in 3 phases:
503 	 *
504 	 * 1a. Unbind all objects that do not match the GTT constraints for
505 	 *     the execbuffer (fenceable, mappable, alignment etc).
506 	 * 1b. Increment pin count for already bound objects.
507 	 * 2.  Bind new objects.
508 	 * 3.  Decrement pin count.
509 	 *
510 	 * This avoid unnecessary unbinding of later objects in order to makr
511 	 * room for the earlier objects *unless* we need to defragment.
512 	 */
513 	retry = 0;
514 	do {
515 		ret = 0;
516 
517 		/* Unbind any ill-fitting objects or pin. */
518 		list_for_each_entry(obj, objects, exec_list) {
519 			struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
520 			bool need_fence, need_mappable;
521 			if (!obj->gtt_space)
522 				continue;
523 
524 			need_fence =
525 				has_fenced_gpu_access &&
526 				entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
527 				obj->tiling_mode != I915_TILING_NONE;
528 			need_mappable =
529 				entry->relocation_count ? true : need_fence;
530 
531 			if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) ||
532 			    (need_mappable && !obj->map_and_fenceable))
533 				ret = i915_gem_object_unbind(obj);
534 			else
535 				ret = i915_gem_object_pin(obj,
536 							  entry->alignment,
537 							  need_mappable);
538 			if (ret)
539 				goto err;
540 
541 			entry++;
542 		}
543 
544 		/* Bind fresh objects */
545 		list_for_each_entry(obj, objects, exec_list) {
546 			struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
547 			bool need_fence;
548 
549 			need_fence =
550 				has_fenced_gpu_access &&
551 				entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
552 				obj->tiling_mode != I915_TILING_NONE;
553 
554 			if (!obj->gtt_space) {
555 				bool need_mappable =
556 					entry->relocation_count ? true : need_fence;
557 
558 				ret = i915_gem_object_pin(obj,
559 							  entry->alignment,
560 							  need_mappable);
561 				if (ret)
562 					break;
563 			}
564 
565 			if (has_fenced_gpu_access) {
566 				if (need_fence) {
567 					ret = i915_gem_object_get_fence(obj, ring);
568 					if (ret)
569 						break;
570 				} else if (entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
571 					   obj->tiling_mode == I915_TILING_NONE) {
572 					/* XXX pipelined! */
573 					ret = i915_gem_object_put_fence(obj);
574 					if (ret)
575 						break;
576 				}
577 				obj->pending_fenced_gpu_access = need_fence;
578 			}
579 
580 			entry->offset = obj->gtt_offset;
581 		}
582 
583 		/* Decrement pin count for bound objects */
584 		list_for_each_entry(obj, objects, exec_list) {
585 			if (obj->gtt_space)
586 				i915_gem_object_unpin(obj);
587 		}
588 
589 		if (ret != -ENOSPC || retry > 1)
590 			return ret;
591 
592 		/* First attempt, just clear anything that is purgeable.
593 		 * Second attempt, clear the entire GTT.
594 		 */
595 		ret = i915_gem_evict_everything(ring->dev, retry == 0);
596 		if (ret)
597 			return ret;
598 
599 		retry++;
600 	} while (1);
601 
602 err:
603 	obj = list_entry(obj->exec_list.prev,
604 			 struct drm_i915_gem_object,
605 			 exec_list);
606 	while (objects != &obj->exec_list) {
607 		if (obj->gtt_space)
608 			i915_gem_object_unpin(obj);
609 
610 		obj = list_entry(obj->exec_list.prev,
611 				 struct drm_i915_gem_object,
612 				 exec_list);
613 	}
614 
615 	return ret;
616 }
617 
618 static int
i915_gem_execbuffer_relocate_slow(struct drm_device * dev,struct drm_file * file,struct intel_ring_buffer * ring,struct list_head * objects,struct eb_objects * eb,struct drm_i915_gem_exec_object2 * exec,int count)619 i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
620 				  struct drm_file *file,
621 				  struct intel_ring_buffer *ring,
622 				  struct list_head *objects,
623 				  struct eb_objects *eb,
624 				  struct drm_i915_gem_exec_object2 *exec,
625 				  int count)
626 {
627 	struct drm_i915_gem_relocation_entry *reloc;
628 	struct drm_i915_gem_object *obj;
629 	int *reloc_offset;
630 	int i, total, ret;
631 
632 	/* We may process another execbuffer during the unlock... */
633 	while (!list_empty(objects)) {
634 		obj = list_first_entry(objects,
635 				       struct drm_i915_gem_object,
636 				       exec_list);
637 		list_del_init(&obj->exec_list);
638 		drm_gem_object_unreference(&obj->base);
639 	}
640 
641 	mutex_unlock(&dev->struct_mutex);
642 
643 	total = 0;
644 	for (i = 0; i < count; i++)
645 		total += exec[i].relocation_count;
646 
647 	reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
648 	reloc = drm_malloc_ab(total, sizeof(*reloc));
649 	if (reloc == NULL || reloc_offset == NULL) {
650 		drm_free_large(reloc);
651 		drm_free_large(reloc_offset);
652 		mutex_lock(&dev->struct_mutex);
653 		return -ENOMEM;
654 	}
655 
656 	total = 0;
657 	for (i = 0; i < count; i++) {
658 		struct drm_i915_gem_relocation_entry __user *user_relocs;
659 
660 		user_relocs = (void __user *)(uintptr_t)exec[i].relocs_ptr;
661 
662 		if (copy_from_user(reloc+total, user_relocs,
663 				   exec[i].relocation_count * sizeof(*reloc))) {
664 			ret = -EFAULT;
665 			mutex_lock(&dev->struct_mutex);
666 			goto err;
667 		}
668 
669 		reloc_offset[i] = total;
670 		total += exec[i].relocation_count;
671 	}
672 
673 	ret = i915_mutex_lock_interruptible(dev);
674 	if (ret) {
675 		mutex_lock(&dev->struct_mutex);
676 		goto err;
677 	}
678 
679 	/* reacquire the objects */
680 	eb_reset(eb);
681 	for (i = 0; i < count; i++) {
682 		obj = to_intel_bo(drm_gem_object_lookup(dev, file,
683 							exec[i].handle));
684 		if (&obj->base == NULL) {
685 			DRM_ERROR("Invalid object handle %d at index %d\n",
686 				   exec[i].handle, i);
687 			ret = -ENOENT;
688 			goto err;
689 		}
690 
691 		list_add_tail(&obj->exec_list, objects);
692 		obj->exec_handle = exec[i].handle;
693 		obj->exec_entry = &exec[i];
694 		eb_add_object(eb, obj);
695 	}
696 
697 	ret = i915_gem_execbuffer_reserve(ring, file, objects);
698 	if (ret)
699 		goto err;
700 
701 	list_for_each_entry(obj, objects, exec_list) {
702 		int offset = obj->exec_entry - exec;
703 		ret = i915_gem_execbuffer_relocate_object_slow(obj, eb,
704 							       reloc + reloc_offset[offset]);
705 		if (ret)
706 			goto err;
707 	}
708 
709 	/* Leave the user relocations as are, this is the painfully slow path,
710 	 * and we want to avoid the complication of dropping the lock whilst
711 	 * having buffers reserved in the aperture and so causing spurious
712 	 * ENOSPC for random operations.
713 	 */
714 
715 err:
716 	drm_free_large(reloc);
717 	drm_free_large(reloc_offset);
718 	return ret;
719 }
720 
721 static int
i915_gem_execbuffer_flush(struct drm_device * dev,uint32_t invalidate_domains,uint32_t flush_domains,uint32_t flush_rings)722 i915_gem_execbuffer_flush(struct drm_device *dev,
723 			  uint32_t invalidate_domains,
724 			  uint32_t flush_domains,
725 			  uint32_t flush_rings)
726 {
727 	drm_i915_private_t *dev_priv = dev->dev_private;
728 	int i, ret;
729 
730 	if (flush_domains & I915_GEM_DOMAIN_CPU)
731 		intel_gtt_chipset_flush();
732 
733 	if (flush_domains & I915_GEM_DOMAIN_GTT)
734 		wmb();
735 
736 	if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) {
737 		for (i = 0; i < I915_NUM_RINGS; i++)
738 			if (flush_rings & (1 << i)) {
739 				ret = i915_gem_flush_ring(&dev_priv->ring[i],
740 							  invalidate_domains,
741 							  flush_domains);
742 				if (ret)
743 					return ret;
744 			}
745 	}
746 
747 	return 0;
748 }
749 
750 static bool
intel_enable_semaphores(struct drm_device * dev)751 intel_enable_semaphores(struct drm_device *dev)
752 {
753 	if (INTEL_INFO(dev)->gen < 6)
754 		return 0;
755 
756 	if (i915_semaphores >= 0)
757 		return i915_semaphores;
758 
759 	/* Disable semaphores on SNB */
760 	if (INTEL_INFO(dev)->gen == 6)
761 		return 0;
762 
763 	return 1;
764 }
765 
766 static int
i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object * obj,struct intel_ring_buffer * to)767 i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj,
768 			       struct intel_ring_buffer *to)
769 {
770 	struct intel_ring_buffer *from = obj->ring;
771 	u32 seqno;
772 	int ret, idx;
773 
774 	if (from == NULL || to == from)
775 		return 0;
776 
777 	/* XXX gpu semaphores are implicated in various hard hangs on SNB */
778 	if (!intel_enable_semaphores(obj->base.dev))
779 		return i915_gem_object_wait_rendering(obj);
780 
781 	idx = intel_ring_sync_index(from, to);
782 
783 	seqno = obj->last_rendering_seqno;
784 	if (seqno <= from->sync_seqno[idx])
785 		return 0;
786 
787 	if (seqno == from->outstanding_lazy_request) {
788 		struct drm_i915_gem_request *request;
789 
790 		request = kzalloc(sizeof(*request), GFP_KERNEL);
791 		if (request == NULL)
792 			return -ENOMEM;
793 
794 		ret = i915_add_request(from, NULL, request);
795 		if (ret) {
796 			kfree(request);
797 			return ret;
798 		}
799 
800 		seqno = request->seqno;
801 	}
802 
803 	from->sync_seqno[idx] = seqno;
804 
805 	return to->sync_to(to, from, seqno - 1);
806 }
807 
808 static int
i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer * ring,u32 flips)809 i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
810 {
811 	u32 plane, flip_mask;
812 	int ret;
813 
814 	/* Check for any pending flips. As we only maintain a flip queue depth
815 	 * of 1, we can simply insert a WAIT for the next display flip prior
816 	 * to executing the batch and avoid stalling the CPU.
817 	 */
818 
819 	for (plane = 0; flips >> plane; plane++) {
820 		if (((flips >> plane) & 1) == 0)
821 			continue;
822 
823 		if (plane)
824 			flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
825 		else
826 			flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
827 
828 		ret = intel_ring_begin(ring, 2);
829 		if (ret)
830 			return ret;
831 
832 		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
833 		intel_ring_emit(ring, MI_NOOP);
834 		intel_ring_advance(ring);
835 	}
836 
837 	return 0;
838 }
839 
840 
841 static int
i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer * ring,struct list_head * objects)842 i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
843 				struct list_head *objects)
844 {
845 	struct drm_i915_gem_object *obj;
846 	struct change_domains cd;
847 	int ret;
848 
849 	memset(&cd, 0, sizeof(cd));
850 	list_for_each_entry(obj, objects, exec_list)
851 		i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
852 
853 	if (cd.invalidate_domains | cd.flush_domains) {
854 		ret = i915_gem_execbuffer_flush(ring->dev,
855 						cd.invalidate_domains,
856 						cd.flush_domains,
857 						cd.flush_rings);
858 		if (ret)
859 			return ret;
860 	}
861 
862 	if (cd.flips) {
863 		ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips);
864 		if (ret)
865 			return ret;
866 	}
867 
868 	list_for_each_entry(obj, objects, exec_list) {
869 		ret = i915_gem_execbuffer_sync_rings(obj, ring);
870 		if (ret)
871 			return ret;
872 	}
873 
874 	return 0;
875 }
876 
877 static bool
i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 * exec)878 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
879 {
880 	return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
881 }
882 
883 static int
validate_exec_list(struct drm_i915_gem_exec_object2 * exec,int count)884 validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
885 		   int count)
886 {
887 	int i;
888 
889 	for (i = 0; i < count; i++) {
890 		char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr;
891 		int length; /* limited by fault_in_pages_readable() */
892 
893 		/* First check for malicious input causing overflow */
894 		if (exec[i].relocation_count >
895 		    INT_MAX / sizeof(struct drm_i915_gem_relocation_entry))
896 			return -EINVAL;
897 
898 		length = exec[i].relocation_count *
899 			sizeof(struct drm_i915_gem_relocation_entry);
900 		if (!access_ok(VERIFY_READ, ptr, length))
901 			return -EFAULT;
902 
903 		/* we may also need to update the presumed offsets */
904 		if (!access_ok(VERIFY_WRITE, ptr, length))
905 			return -EFAULT;
906 
907 		if (fault_in_pages_readable(ptr, length))
908 			return -EFAULT;
909 	}
910 
911 	return 0;
912 }
913 
914 static void
i915_gem_execbuffer_move_to_active(struct list_head * objects,struct intel_ring_buffer * ring,u32 seqno)915 i915_gem_execbuffer_move_to_active(struct list_head *objects,
916 				   struct intel_ring_buffer *ring,
917 				   u32 seqno)
918 {
919 	struct drm_i915_gem_object *obj;
920 
921 	list_for_each_entry(obj, objects, exec_list) {
922 		  u32 old_read = obj->base.read_domains;
923 		  u32 old_write = obj->base.write_domain;
924 
925 
926 		obj->base.read_domains = obj->base.pending_read_domains;
927 		obj->base.write_domain = obj->base.pending_write_domain;
928 		obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
929 
930 		i915_gem_object_move_to_active(obj, ring, seqno);
931 		if (obj->base.write_domain) {
932 			obj->dirty = 1;
933 			obj->pending_gpu_write = true;
934 			list_move_tail(&obj->gpu_write_list,
935 				       &ring->gpu_write_list);
936 			intel_mark_busy(ring->dev, obj);
937 		}
938 
939 		trace_i915_gem_object_change_domain(obj, old_read, old_write);
940 	}
941 }
942 
943 static void
i915_gem_execbuffer_retire_commands(struct drm_device * dev,struct drm_file * file,struct intel_ring_buffer * ring)944 i915_gem_execbuffer_retire_commands(struct drm_device *dev,
945 				    struct drm_file *file,
946 				    struct intel_ring_buffer *ring)
947 {
948 	struct drm_i915_gem_request *request;
949 	u32 invalidate;
950 
951 	/*
952 	 * Ensure that the commands in the batch buffer are
953 	 * finished before the interrupt fires.
954 	 *
955 	 * The sampler always gets flushed on i965 (sigh).
956 	 */
957 	invalidate = I915_GEM_DOMAIN_COMMAND;
958 	if (INTEL_INFO(dev)->gen >= 4)
959 		invalidate |= I915_GEM_DOMAIN_SAMPLER;
960 	if (ring->flush(ring, invalidate, 0)) {
961 		i915_gem_next_request_seqno(ring);
962 		return;
963 	}
964 
965 	/* Add a breadcrumb for the completion of the batch buffer */
966 	request = kzalloc(sizeof(*request), GFP_KERNEL);
967 	if (request == NULL || i915_add_request(ring, file, request)) {
968 		i915_gem_next_request_seqno(ring);
969 		kfree(request);
970 	}
971 }
972 
973 static int
i915_reset_gen7_sol_offsets(struct drm_device * dev,struct intel_ring_buffer * ring)974 i915_reset_gen7_sol_offsets(struct drm_device *dev,
975 			    struct intel_ring_buffer *ring)
976 {
977 	drm_i915_private_t *dev_priv = dev->dev_private;
978 	int ret, i;
979 
980 	if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS])
981 		return 0;
982 
983 	ret = intel_ring_begin(ring, 4 * 3);
984 	if (ret)
985 		return ret;
986 
987 	for (i = 0; i < 4; i++) {
988 		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
989 		intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
990 		intel_ring_emit(ring, 0);
991 	}
992 
993 	intel_ring_advance(ring);
994 
995 	return 0;
996 }
997 
998 static int
i915_gem_do_execbuffer(struct drm_device * dev,void * data,struct drm_file * file,struct drm_i915_gem_execbuffer2 * args,struct drm_i915_gem_exec_object2 * exec)999 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1000 		       struct drm_file *file,
1001 		       struct drm_i915_gem_execbuffer2 *args,
1002 		       struct drm_i915_gem_exec_object2 *exec)
1003 {
1004 	drm_i915_private_t *dev_priv = dev->dev_private;
1005 	struct list_head objects;
1006 	struct eb_objects *eb;
1007 	struct drm_i915_gem_object *batch_obj;
1008 	struct drm_clip_rect *cliprects = NULL;
1009 	struct intel_ring_buffer *ring;
1010 	u32 exec_start, exec_len;
1011 	u32 seqno;
1012 	u32 mask;
1013 	int ret, mode, i;
1014 
1015 	if (!i915_gem_check_execbuffer(args)) {
1016 		DRM_ERROR("execbuf with invalid offset/length\n");
1017 		return -EINVAL;
1018 	}
1019 
1020 	ret = validate_exec_list(exec, args->buffer_count);
1021 	if (ret)
1022 		return ret;
1023 
1024 	switch (args->flags & I915_EXEC_RING_MASK) {
1025 	case I915_EXEC_DEFAULT:
1026 	case I915_EXEC_RENDER:
1027 		ring = &dev_priv->ring[RCS];
1028 		break;
1029 	case I915_EXEC_BSD:
1030 		if (!HAS_BSD(dev)) {
1031 			DRM_ERROR("execbuf with invalid ring (BSD)\n");
1032 			return -EINVAL;
1033 		}
1034 		ring = &dev_priv->ring[VCS];
1035 		break;
1036 	case I915_EXEC_BLT:
1037 		if (!HAS_BLT(dev)) {
1038 			DRM_ERROR("execbuf with invalid ring (BLT)\n");
1039 			return -EINVAL;
1040 		}
1041 		ring = &dev_priv->ring[BCS];
1042 		break;
1043 	default:
1044 		DRM_ERROR("execbuf with unknown ring: %d\n",
1045 			  (int)(args->flags & I915_EXEC_RING_MASK));
1046 		return -EINVAL;
1047 	}
1048 
1049 	mode = args->flags & I915_EXEC_CONSTANTS_MASK;
1050 	mask = I915_EXEC_CONSTANTS_MASK;
1051 	switch (mode) {
1052 	case I915_EXEC_CONSTANTS_REL_GENERAL:
1053 	case I915_EXEC_CONSTANTS_ABSOLUTE:
1054 	case I915_EXEC_CONSTANTS_REL_SURFACE:
1055 		if (ring == &dev_priv->ring[RCS] &&
1056 		    mode != dev_priv->relative_constants_mode) {
1057 			if (INTEL_INFO(dev)->gen < 4)
1058 				return -EINVAL;
1059 
1060 			if (INTEL_INFO(dev)->gen > 5 &&
1061 			    mode == I915_EXEC_CONSTANTS_REL_SURFACE)
1062 				return -EINVAL;
1063 
1064 			/* The HW changed the meaning on this bit on gen6 */
1065 			if (INTEL_INFO(dev)->gen >= 6)
1066 				mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
1067 		}
1068 		break;
1069 	default:
1070 		DRM_ERROR("execbuf with unknown constants: %d\n", mode);
1071 		return -EINVAL;
1072 	}
1073 
1074 	if (args->buffer_count < 1) {
1075 		DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
1076 		return -EINVAL;
1077 	}
1078 
1079 	if (args->num_cliprects != 0) {
1080 		if (ring != &dev_priv->ring[RCS]) {
1081 			DRM_ERROR("clip rectangles are only valid with the render ring\n");
1082 			return -EINVAL;
1083 		}
1084 
1085 		cliprects = kmalloc(args->num_cliprects * sizeof(*cliprects),
1086 				    GFP_KERNEL);
1087 		if (cliprects == NULL) {
1088 			ret = -ENOMEM;
1089 			goto pre_mutex_err;
1090 		}
1091 
1092 		if (copy_from_user(cliprects,
1093 				     (struct drm_clip_rect __user *)(uintptr_t)
1094 				     args->cliprects_ptr,
1095 				     sizeof(*cliprects)*args->num_cliprects)) {
1096 			ret = -EFAULT;
1097 			goto pre_mutex_err;
1098 		}
1099 	}
1100 
1101 	ret = i915_mutex_lock_interruptible(dev);
1102 	if (ret)
1103 		goto pre_mutex_err;
1104 
1105 	if (dev_priv->mm.suspended) {
1106 		mutex_unlock(&dev->struct_mutex);
1107 		ret = -EBUSY;
1108 		goto pre_mutex_err;
1109 	}
1110 
1111 	eb = eb_create(args->buffer_count);
1112 	if (eb == NULL) {
1113 		mutex_unlock(&dev->struct_mutex);
1114 		ret = -ENOMEM;
1115 		goto pre_mutex_err;
1116 	}
1117 
1118 	/* Look up object handles */
1119 	INIT_LIST_HEAD(&objects);
1120 	for (i = 0; i < args->buffer_count; i++) {
1121 		struct drm_i915_gem_object *obj;
1122 
1123 		obj = to_intel_bo(drm_gem_object_lookup(dev, file,
1124 							exec[i].handle));
1125 		if (&obj->base == NULL) {
1126 			DRM_ERROR("Invalid object handle %d at index %d\n",
1127 				   exec[i].handle, i);
1128 			/* prevent error path from reading uninitialized data */
1129 			ret = -ENOENT;
1130 			goto err;
1131 		}
1132 
1133 		if (!list_empty(&obj->exec_list)) {
1134 			DRM_ERROR("Object %p [handle %d, index %d] appears more than once in object list\n",
1135 				   obj, exec[i].handle, i);
1136 			ret = -EINVAL;
1137 			goto err;
1138 		}
1139 
1140 		list_add_tail(&obj->exec_list, &objects);
1141 		obj->exec_handle = exec[i].handle;
1142 		obj->exec_entry = &exec[i];
1143 		eb_add_object(eb, obj);
1144 	}
1145 
1146 	/* take note of the batch buffer before we might reorder the lists */
1147 	batch_obj = list_entry(objects.prev,
1148 			       struct drm_i915_gem_object,
1149 			       exec_list);
1150 
1151 	/* Move the objects en-masse into the GTT, evicting if necessary. */
1152 	ret = i915_gem_execbuffer_reserve(ring, file, &objects);
1153 	if (ret)
1154 		goto err;
1155 
1156 	/* The objects are in their final locations, apply the relocations. */
1157 	ret = i915_gem_execbuffer_relocate(dev, eb, &objects);
1158 	if (ret) {
1159 		if (ret == -EFAULT) {
1160 			ret = i915_gem_execbuffer_relocate_slow(dev, file, ring,
1161 								&objects, eb,
1162 								exec,
1163 								args->buffer_count);
1164 			BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1165 		}
1166 		if (ret)
1167 			goto err;
1168 	}
1169 
1170 	/* Set the pending read domains for the batch buffer to COMMAND */
1171 	if (batch_obj->base.pending_write_domain) {
1172 		DRM_ERROR("Attempting to use self-modifying batch buffer\n");
1173 		ret = -EINVAL;
1174 		goto err;
1175 	}
1176 	batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1177 
1178 	ret = i915_gem_execbuffer_move_to_gpu(ring, &objects);
1179 	if (ret)
1180 		goto err;
1181 
1182 	seqno = i915_gem_next_request_seqno(ring);
1183 	for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) {
1184 		if (seqno < ring->sync_seqno[i]) {
1185 			/* The GPU can not handle its semaphore value wrapping,
1186 			 * so every billion or so execbuffers, we need to stall
1187 			 * the GPU in order to reset the counters.
1188 			 */
1189 			ret = i915_gpu_idle(dev);
1190 			if (ret)
1191 				goto err;
1192 
1193 			BUG_ON(ring->sync_seqno[i]);
1194 		}
1195 	}
1196 
1197 	if (ring == &dev_priv->ring[RCS] &&
1198 	    mode != dev_priv->relative_constants_mode) {
1199 		ret = intel_ring_begin(ring, 4);
1200 		if (ret)
1201 				goto err;
1202 
1203 		intel_ring_emit(ring, MI_NOOP);
1204 		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1205 		intel_ring_emit(ring, INSTPM);
1206 		intel_ring_emit(ring, mask << 16 | mode);
1207 		intel_ring_advance(ring);
1208 
1209 		dev_priv->relative_constants_mode = mode;
1210 	}
1211 
1212 	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1213 		ret = i915_reset_gen7_sol_offsets(dev, ring);
1214 		if (ret)
1215 			goto err;
1216 	}
1217 
1218 	trace_i915_gem_ring_dispatch(ring, seqno);
1219 
1220 	exec_start = batch_obj->gtt_offset + args->batch_start_offset;
1221 	exec_len = args->batch_len;
1222 	if (cliprects) {
1223 		for (i = 0; i < args->num_cliprects; i++) {
1224 			ret = i915_emit_box(dev, &cliprects[i],
1225 					    args->DR1, args->DR4);
1226 			if (ret)
1227 				goto err;
1228 
1229 			ret = ring->dispatch_execbuffer(ring,
1230 							exec_start, exec_len);
1231 			if (ret)
1232 				goto err;
1233 		}
1234 	} else {
1235 		ret = ring->dispatch_execbuffer(ring, exec_start, exec_len);
1236 		if (ret)
1237 			goto err;
1238 	}
1239 
1240 	i915_gem_execbuffer_move_to_active(&objects, ring, seqno);
1241 	i915_gem_execbuffer_retire_commands(dev, file, ring);
1242 
1243 err:
1244 	eb_destroy(eb);
1245 	while (!list_empty(&objects)) {
1246 		struct drm_i915_gem_object *obj;
1247 
1248 		obj = list_first_entry(&objects,
1249 				       struct drm_i915_gem_object,
1250 				       exec_list);
1251 		list_del_init(&obj->exec_list);
1252 		drm_gem_object_unreference(&obj->base);
1253 	}
1254 
1255 	mutex_unlock(&dev->struct_mutex);
1256 
1257 pre_mutex_err:
1258 	kfree(cliprects);
1259 	return ret;
1260 }
1261 
1262 /*
1263  * Legacy execbuffer just creates an exec2 list from the original exec object
1264  * list array and passes it to the real function.
1265  */
1266 int
i915_gem_execbuffer(struct drm_device * dev,void * data,struct drm_file * file)1267 i915_gem_execbuffer(struct drm_device *dev, void *data,
1268 		    struct drm_file *file)
1269 {
1270 	struct drm_i915_gem_execbuffer *args = data;
1271 	struct drm_i915_gem_execbuffer2 exec2;
1272 	struct drm_i915_gem_exec_object *exec_list = NULL;
1273 	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1274 	int ret, i;
1275 
1276 	if (args->buffer_count < 1) {
1277 		DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
1278 		return -EINVAL;
1279 	}
1280 
1281 	/* Copy in the exec list from userland */
1282 	exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1283 	exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1284 	if (exec_list == NULL || exec2_list == NULL) {
1285 		DRM_ERROR("Failed to allocate exec list for %d buffers\n",
1286 			  args->buffer_count);
1287 		drm_free_large(exec_list);
1288 		drm_free_large(exec2_list);
1289 		return -ENOMEM;
1290 	}
1291 	ret = copy_from_user(exec_list,
1292 			     (struct drm_i915_relocation_entry __user *)
1293 			     (uintptr_t) args->buffers_ptr,
1294 			     sizeof(*exec_list) * args->buffer_count);
1295 	if (ret != 0) {
1296 		DRM_ERROR("copy %d exec entries failed %d\n",
1297 			  args->buffer_count, ret);
1298 		drm_free_large(exec_list);
1299 		drm_free_large(exec2_list);
1300 		return -EFAULT;
1301 	}
1302 
1303 	for (i = 0; i < args->buffer_count; i++) {
1304 		exec2_list[i].handle = exec_list[i].handle;
1305 		exec2_list[i].relocation_count = exec_list[i].relocation_count;
1306 		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1307 		exec2_list[i].alignment = exec_list[i].alignment;
1308 		exec2_list[i].offset = exec_list[i].offset;
1309 		if (INTEL_INFO(dev)->gen < 4)
1310 			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1311 		else
1312 			exec2_list[i].flags = 0;
1313 	}
1314 
1315 	exec2.buffers_ptr = args->buffers_ptr;
1316 	exec2.buffer_count = args->buffer_count;
1317 	exec2.batch_start_offset = args->batch_start_offset;
1318 	exec2.batch_len = args->batch_len;
1319 	exec2.DR1 = args->DR1;
1320 	exec2.DR4 = args->DR4;
1321 	exec2.num_cliprects = args->num_cliprects;
1322 	exec2.cliprects_ptr = args->cliprects_ptr;
1323 	exec2.flags = I915_EXEC_RENDER;
1324 
1325 	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1326 	if (!ret) {
1327 		/* Copy the new buffer offsets back to the user's exec list. */
1328 		for (i = 0; i < args->buffer_count; i++)
1329 			exec_list[i].offset = exec2_list[i].offset;
1330 		/* ... and back out to userspace */
1331 		ret = copy_to_user((struct drm_i915_relocation_entry __user *)
1332 				   (uintptr_t) args->buffers_ptr,
1333 				   exec_list,
1334 				   sizeof(*exec_list) * args->buffer_count);
1335 		if (ret) {
1336 			ret = -EFAULT;
1337 			DRM_ERROR("failed to copy %d exec entries "
1338 				  "back to user (%d)\n",
1339 				  args->buffer_count, ret);
1340 		}
1341 	}
1342 
1343 	drm_free_large(exec_list);
1344 	drm_free_large(exec2_list);
1345 	return ret;
1346 }
1347 
1348 int
i915_gem_execbuffer2(struct drm_device * dev,void * data,struct drm_file * file)1349 i915_gem_execbuffer2(struct drm_device *dev, void *data,
1350 		     struct drm_file *file)
1351 {
1352 	struct drm_i915_gem_execbuffer2 *args = data;
1353 	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1354 	int ret;
1355 
1356 	if (args->buffer_count < 1) {
1357 		DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count);
1358 		return -EINVAL;
1359 	}
1360 
1361 	exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
1362 			     GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
1363 	if (exec2_list == NULL)
1364 		exec2_list = drm_malloc_ab(sizeof(*exec2_list),
1365 					   args->buffer_count);
1366 	if (exec2_list == NULL) {
1367 		DRM_ERROR("Failed to allocate exec list for %d buffers\n",
1368 			  args->buffer_count);
1369 		return -ENOMEM;
1370 	}
1371 	ret = copy_from_user(exec2_list,
1372 			     (struct drm_i915_relocation_entry __user *)
1373 			     (uintptr_t) args->buffers_ptr,
1374 			     sizeof(*exec2_list) * args->buffer_count);
1375 	if (ret != 0) {
1376 		DRM_ERROR("copy %d exec entries failed %d\n",
1377 			  args->buffer_count, ret);
1378 		drm_free_large(exec2_list);
1379 		return -EFAULT;
1380 	}
1381 
1382 	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1383 	if (!ret) {
1384 		/* Copy the new buffer offsets back to the user's exec list. */
1385 		ret = copy_to_user((struct drm_i915_relocation_entry __user *)
1386 				   (uintptr_t) args->buffers_ptr,
1387 				   exec2_list,
1388 				   sizeof(*exec2_list) * args->buffer_count);
1389 		if (ret) {
1390 			ret = -EFAULT;
1391 			DRM_ERROR("failed to copy %d exec entries "
1392 				  "back to user (%d)\n",
1393 				  args->buffer_count, ret);
1394 		}
1395 	}
1396 
1397 	drm_free_large(exec2_list);
1398 	return ret;
1399 }
1400