diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c')
| -rw-r--r-- | drivers/gpu/drm/i915/i915_gem_execbuffer.c | 920 | 
1 files changed, 562 insertions, 358 deletions
| diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 1978633e7549..7adb4c77cc7f 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -26,22 +26,42 @@   *   */ +#include <linux/dma_remapping.h> +#include <linux/reservation.h> +#include <linux/uaccess.h> +  #include <drm/drmP.h>  #include <drm/i915_drm.h> +  #include "i915_drv.h" +#include "i915_gem_dmabuf.h"  #include "i915_trace.h"  #include "intel_drv.h" -#include <linux/dma_remapping.h> -#include <linux/uaccess.h> +#include "intel_frontbuffer.h" + +#define DBG_USE_CPU_RELOC 0 /* -1 force GTT relocs; 1 force CPU relocs */ -#define  __EXEC_OBJECT_HAS_PIN (1<<31) -#define  __EXEC_OBJECT_HAS_FENCE (1<<30) -#define  __EXEC_OBJECT_NEEDS_MAP (1<<29) -#define  __EXEC_OBJECT_NEEDS_BIAS (1<<28) +#define  __EXEC_OBJECT_HAS_PIN		(1<<31) +#define  __EXEC_OBJECT_HAS_FENCE	(1<<30) +#define  __EXEC_OBJECT_NEEDS_MAP	(1<<29) +#define  __EXEC_OBJECT_NEEDS_BIAS	(1<<28) +#define  __EXEC_OBJECT_INTERNAL_FLAGS (0xf<<28) /* all of the above */  #define BATCH_OFFSET_BIAS (256*1024) +struct i915_execbuffer_params { +	struct drm_device               *dev; +	struct drm_file                 *file; +	struct i915_vma			*batch; +	u32				dispatch_flags; +	u32				args_batch_start_offset; +	struct intel_engine_cs          *engine; +	struct i915_gem_context         *ctx; +	struct drm_i915_gem_request     *request; +}; +  struct eb_vmas { +	struct drm_i915_private *i915;  	struct list_head vmas;  	int and;  	union { @@ -51,7 +71,8 @@ struct eb_vmas {  };  static struct eb_vmas * -eb_create(struct drm_i915_gem_execbuffer2 *args) +eb_create(struct drm_i915_private *i915, +	  struct drm_i915_gem_execbuffer2 *args)  {  	struct eb_vmas *eb = NULL; @@ -78,6 +99,7 @@ eb_create(struct drm_i915_gem_execbuffer2 *args)  	} else  		eb->and = -args->buffer_count; +	eb->i915 = i915;  	INIT_LIST_HEAD(&eb->vmas);  	return eb;  } @@ -89,6 +111,26 @@ eb_reset(struct eb_vmas *eb)  		memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));  } +static struct i915_vma * +eb_get_batch(struct eb_vmas *eb) +{ +	struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); + +	/* +	 * SNA is doing fancy tricks with compressing batch buffers, which leads +	 * to negative relocation deltas. Usually that works out ok since the +	 * relocate address is still positive, except when the batch is placed +	 * very low in the GTT. Ensure this doesn't happen. +	 * +	 * Note that actual hangs have only been observed on gen7, but for +	 * paranoia do it everywhere. +	 */ +	if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) +		vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; + +	return vma; +} +  static int  eb_lookup_vmas(struct eb_vmas *eb,  	       struct drm_i915_gem_exec_object2 *exec, @@ -122,7 +164,7 @@ eb_lookup_vmas(struct eb_vmas *eb,  			goto err;  		} -		drm_gem_object_reference(&obj->base); +		i915_gem_object_get(obj);  		list_add_tail(&obj->obj_exec_link, &objects);  	}  	spin_unlock(&file->table_lock); @@ -143,8 +185,8 @@ eb_lookup_vmas(struct eb_vmas *eb,  		 * from the (obj, vm) we don't run the risk of creating  		 * duplicated vmas for the same vm.  		 */ -		vma = i915_gem_obj_lookup_or_create_vma(obj, vm); -		if (IS_ERR(vma)) { +		vma = i915_gem_obj_lookup_or_create_vma(obj, vm, NULL); +		if (unlikely(IS_ERR(vma))) {  			DRM_DEBUG("Failed to lookup VMA\n");  			ret = PTR_ERR(vma);  			goto err; @@ -175,7 +217,7 @@ err:  				       struct drm_i915_gem_object,  				       obj_exec_link);  		list_del_init(&obj->obj_exec_link); -		drm_gem_object_unreference(&obj->base); +		i915_gem_object_put(obj);  	}  	/*  	 * Objects already transfered to the vmas list will be unreferenced by @@ -208,7 +250,6 @@ static void  i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)  {  	struct drm_i915_gem_exec_object2 *entry; -	struct drm_i915_gem_object *obj = vma->obj;  	if (!drm_mm_node_allocated(&vma->node))  		return; @@ -216,10 +257,10 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)  	entry = vma->exec_entry;  	if (entry->flags & __EXEC_OBJECT_HAS_FENCE) -		i915_gem_object_unpin_fence(obj); +		i915_vma_unpin_fence(vma);  	if (entry->flags & __EXEC_OBJECT_HAS_PIN) -		vma->pin_count--; +		__i915_vma_unpin(vma);  	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);  } @@ -234,13 +275,19 @@ static void eb_destroy(struct eb_vmas *eb)  				       exec_list);  		list_del_init(&vma->exec_list);  		i915_gem_execbuffer_unreserve_vma(vma); -		drm_gem_object_unreference(&vma->obj->base); +		i915_vma_put(vma);  	}  	kfree(eb);  }  static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)  { +	if (!i915_gem_object_has_struct_page(obj)) +		return false; + +	if (DBG_USE_CPU_RELOC) +		return DBG_USE_CPU_RELOC > 0; +  	return (HAS_LLC(obj->base.dev) ||  		obj->base.write_domain == I915_GEM_DOMAIN_CPU ||  		obj->cache_level != I915_CACHE_NONE); @@ -265,144 +312,265 @@ static inline uint64_t gen8_noncanonical_addr(uint64_t address)  }  static inline uint64_t -relocation_target(struct drm_i915_gem_relocation_entry *reloc, +relocation_target(const struct drm_i915_gem_relocation_entry *reloc,  		  uint64_t target_offset)  {  	return gen8_canonical_addr((int)reloc->delta + target_offset);  } -static int -relocate_entry_cpu(struct drm_i915_gem_object *obj, -		   struct drm_i915_gem_relocation_entry *reloc, -		   uint64_t target_offset) +struct reloc_cache { +	struct drm_i915_private *i915; +	struct drm_mm_node node; +	unsigned long vaddr; +	unsigned int page; +	bool use_64bit_reloc; +}; + +static void reloc_cache_init(struct reloc_cache *cache, +			     struct drm_i915_private *i915)  { -	struct drm_device *dev = obj->base.dev; -	uint32_t page_offset = offset_in_page(reloc->offset); -	uint64_t delta = relocation_target(reloc, target_offset); -	char *vaddr; -	int ret; +	cache->page = -1; +	cache->vaddr = 0; +	cache->i915 = i915; +	cache->use_64bit_reloc = INTEL_GEN(cache->i915) >= 8; +	cache->node.allocated = false; +} -	ret = i915_gem_object_set_to_cpu_domain(obj, true); -	if (ret) -		return ret; +static inline void *unmask_page(unsigned long p) +{ +	return (void *)(uintptr_t)(p & PAGE_MASK); +} -	vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, -				reloc->offset >> PAGE_SHIFT)); -	*(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta); +static inline unsigned int unmask_flags(unsigned long p) +{ +	return p & ~PAGE_MASK; +} + +#define KMAP 0x4 /* after CLFLUSH_FLAGS */ + +static void reloc_cache_fini(struct reloc_cache *cache) +{ +	void *vaddr; + +	if (!cache->vaddr) +		return; -	if (INTEL_INFO(dev)->gen >= 8) { -		page_offset = offset_in_page(page_offset + sizeof(uint32_t)); +	vaddr = unmask_page(cache->vaddr); +	if (cache->vaddr & KMAP) { +		if (cache->vaddr & CLFLUSH_AFTER) +			mb(); -		if (page_offset == 0) { -			kunmap_atomic(vaddr); -			vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, -			    (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT)); +		kunmap_atomic(vaddr); +		i915_gem_obj_finish_shmem_access((struct drm_i915_gem_object *)cache->node.mm); +	} else { +		wmb(); +		io_mapping_unmap_atomic((void __iomem *)vaddr); +		if (cache->node.allocated) { +			struct i915_ggtt *ggtt = &cache->i915->ggtt; + +			ggtt->base.clear_range(&ggtt->base, +					       cache->node.start, +					       cache->node.size, +					       true); +			drm_mm_remove_node(&cache->node); +		} else { +			i915_vma_unpin((struct i915_vma *)cache->node.mm);  		} +	} +} + +static void *reloc_kmap(struct drm_i915_gem_object *obj, +			struct reloc_cache *cache, +			int page) +{ +	void *vaddr; + +	if (cache->vaddr) { +		kunmap_atomic(unmask_page(cache->vaddr)); +	} else { +		unsigned int flushes; +		int ret; -		*(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta); +		ret = i915_gem_obj_prepare_shmem_write(obj, &flushes); +		if (ret) +			return ERR_PTR(ret); + +		BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS); +		BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK); + +		cache->vaddr = flushes | KMAP; +		cache->node.mm = (void *)obj; +		if (flushes) +			mb();  	} -	kunmap_atomic(vaddr); +	vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page)); +	cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr; +	cache->page = page; -	return 0; +	return vaddr;  } -static int -relocate_entry_gtt(struct drm_i915_gem_object *obj, -		   struct drm_i915_gem_relocation_entry *reloc, -		   uint64_t target_offset) +static void *reloc_iomap(struct drm_i915_gem_object *obj, +			 struct reloc_cache *cache, +			 int page)  { -	struct drm_device *dev = obj->base.dev; -	struct drm_i915_private *dev_priv = to_i915(dev); -	struct i915_ggtt *ggtt = &dev_priv->ggtt; -	uint64_t delta = relocation_target(reloc, target_offset); -	uint64_t offset; -	void __iomem *reloc_page; -	int ret; +	struct i915_ggtt *ggtt = &cache->i915->ggtt; +	unsigned long offset; +	void *vaddr; -	ret = i915_gem_object_set_to_gtt_domain(obj, true); -	if (ret) -		return ret; +	if (cache->node.allocated) { +		wmb(); +		ggtt->base.insert_page(&ggtt->base, +				       i915_gem_object_get_dma_address(obj, page), +				       cache->node.start, I915_CACHE_NONE, 0); +		cache->page = page; +		return unmask_page(cache->vaddr); +	} -	ret = i915_gem_object_put_fence(obj); -	if (ret) -		return ret; +	if (cache->vaddr) { +		io_mapping_unmap_atomic(unmask_page(cache->vaddr)); +	} else { +		struct i915_vma *vma; +		int ret; -	/* Map the page containing the relocation we're going to perform.  */ -	offset = i915_gem_obj_ggtt_offset(obj); -	offset += reloc->offset; -	reloc_page = io_mapping_map_atomic_wc(ggtt->mappable, -					      offset & PAGE_MASK); -	iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset)); - -	if (INTEL_INFO(dev)->gen >= 8) { -		offset += sizeof(uint32_t); - -		if (offset_in_page(offset) == 0) { -			io_mapping_unmap_atomic(reloc_page); -			reloc_page = -				io_mapping_map_atomic_wc(ggtt->mappable, -							 offset); +		if (use_cpu_reloc(obj)) +			return NULL; + +		ret = i915_gem_object_set_to_gtt_domain(obj, true); +		if (ret) +			return ERR_PTR(ret); + +		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, +					       PIN_MAPPABLE | PIN_NONBLOCK); +		if (IS_ERR(vma)) { +			memset(&cache->node, 0, sizeof(cache->node)); +			ret = drm_mm_insert_node_in_range_generic +				(&ggtt->base.mm, &cache->node, +				 4096, 0, 0, +				 0, ggtt->mappable_end, +				 DRM_MM_SEARCH_DEFAULT, +				 DRM_MM_CREATE_DEFAULT); +			if (ret) /* no inactive aperture space, use cpu reloc */ +				return NULL; +		} else { +			ret = i915_vma_put_fence(vma); +			if (ret) { +				i915_vma_unpin(vma); +				return ERR_PTR(ret); +			} + +			cache->node.start = vma->node.start; +			cache->node.mm = (void *)vma;  		} +	} -		iowrite32(upper_32_bits(delta), -			  reloc_page + offset_in_page(offset)); +	offset = cache->node.start; +	if (cache->node.allocated) { +		ggtt->base.insert_page(&ggtt->base, +				       i915_gem_object_get_dma_address(obj, page), +				       offset, I915_CACHE_NONE, 0); +	} else { +		offset += page << PAGE_SHIFT;  	} -	io_mapping_unmap_atomic(reloc_page); +	vaddr = io_mapping_map_atomic_wc(&cache->i915->ggtt.mappable, offset); +	cache->page = page; +	cache->vaddr = (unsigned long)vaddr; -	return 0; +	return vaddr;  } -static void -clflush_write32(void *addr, uint32_t value) +static void *reloc_vaddr(struct drm_i915_gem_object *obj, +			 struct reloc_cache *cache, +			 int page)  { -	/* This is not a fast path, so KISS. */ -	drm_clflush_virt_range(addr, sizeof(uint32_t)); -	*(uint32_t *)addr = value; -	drm_clflush_virt_range(addr, sizeof(uint32_t)); +	void *vaddr; + +	if (cache->page == page) { +		vaddr = unmask_page(cache->vaddr); +	} else { +		vaddr = NULL; +		if ((cache->vaddr & KMAP) == 0) +			vaddr = reloc_iomap(obj, cache, page); +		if (!vaddr) +			vaddr = reloc_kmap(obj, cache, page); +	} + +	return vaddr;  } -static int -relocate_entry_clflush(struct drm_i915_gem_object *obj, -		       struct drm_i915_gem_relocation_entry *reloc, -		       uint64_t target_offset) +static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)  { -	struct drm_device *dev = obj->base.dev; -	uint32_t page_offset = offset_in_page(reloc->offset); -	uint64_t delta = relocation_target(reloc, target_offset); -	char *vaddr; -	int ret; +	if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) { +		if (flushes & CLFLUSH_BEFORE) { +			clflushopt(addr); +			mb(); +		} -	ret = i915_gem_object_set_to_gtt_domain(obj, true); -	if (ret) -		return ret; +		*addr = value; -	vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, -				reloc->offset >> PAGE_SHIFT)); -	clflush_write32(vaddr + page_offset, lower_32_bits(delta)); +		/* Writes to the same cacheline are serialised by the CPU +		 * (including clflush). On the write path, we only require +		 * that it hits memory in an orderly fashion and place +		 * mb barriers at the start and end of the relocation phase +		 * to ensure ordering of clflush wrt to the system. +		 */ +		if (flushes & CLFLUSH_AFTER) +			clflushopt(addr); +	} else +		*addr = value; +} -	if (INTEL_INFO(dev)->gen >= 8) { -		page_offset = offset_in_page(page_offset + sizeof(uint32_t)); +static int +relocate_entry(struct drm_i915_gem_object *obj, +	       const struct drm_i915_gem_relocation_entry *reloc, +	       struct reloc_cache *cache, +	       u64 target_offset) +{ +	u64 offset = reloc->offset; +	bool wide = cache->use_64bit_reloc; +	void *vaddr; + +	target_offset = relocation_target(reloc, target_offset); +repeat: +	vaddr = reloc_vaddr(obj, cache, offset >> PAGE_SHIFT); +	if (IS_ERR(vaddr)) +		return PTR_ERR(vaddr); + +	clflush_write32(vaddr + offset_in_page(offset), +			lower_32_bits(target_offset), +			cache->vaddr); + +	if (wide) { +		offset += sizeof(u32); +		target_offset >>= 32; +		wide = false; +		goto repeat; +	} -		if (page_offset == 0) { -			kunmap_atomic(vaddr); -			vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, -			    (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT)); -		} +	return 0; +} -		clflush_write32(vaddr + page_offset, upper_32_bits(delta)); -	} +static bool object_is_idle(struct drm_i915_gem_object *obj) +{ +	unsigned long active = i915_gem_object_get_active(obj); +	int idx; -	kunmap_atomic(vaddr); +	for_each_active(active, idx) { +		if (!i915_gem_active_is_idle(&obj->last_read[idx], +					     &obj->base.dev->struct_mutex)) +			return false; +	} -	return 0; +	return true;  }  static int  i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,  				   struct eb_vmas *eb, -				   struct drm_i915_gem_relocation_entry *reloc) +				   struct drm_i915_gem_relocation_entry *reloc, +				   struct reloc_cache *cache)  {  	struct drm_device *dev = obj->base.dev;  	struct drm_gem_object *target_obj; @@ -465,7 +633,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,  	/* Check that the relocation address is valid... */  	if (unlikely(reloc->offset > -		obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) { +		     obj->base.size - (cache->use_64bit_reloc ? 8 : 4))) {  		DRM_DEBUG("Relocation beyond object bounds: "  			  "obj %p target %d offset %d size %d.\n",  			  obj, reloc->target_handle, @@ -482,26 +650,15 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,  	}  	/* We can't wait for rendering with pagefaults disabled */ -	if (obj->active && pagefault_disabled()) +	if (pagefault_disabled() && !object_is_idle(obj))  		return -EFAULT; -	if (use_cpu_reloc(obj)) -		ret = relocate_entry_cpu(obj, reloc, target_offset); -	else if (obj->map_and_fenceable) -		ret = relocate_entry_gtt(obj, reloc, target_offset); -	else if (static_cpu_has(X86_FEATURE_CLFLUSH)) -		ret = relocate_entry_clflush(obj, reloc, target_offset); -	else { -		WARN_ONCE(1, "Impossible case in relocation handling\n"); -		ret = -ENODEV; -	} - +	ret = relocate_entry(obj, reloc, cache, target_offset);  	if (ret)  		return ret;  	/* and update the user's relocation entry */  	reloc->presumed_offset = target_offset; -  	return 0;  } @@ -513,9 +670,11 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,  	struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];  	struct drm_i915_gem_relocation_entry __user *user_relocs;  	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; -	int remain, ret; +	struct reloc_cache cache; +	int remain, ret = 0;  	user_relocs = u64_to_user_ptr(entry->relocs_ptr); +	reloc_cache_init(&cache, eb->i915);  	remain = entry->relocation_count;  	while (remain) { @@ -525,19 +684,23 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,  			count = ARRAY_SIZE(stack_reloc);  		remain -= count; -		if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]))) -			return -EFAULT; +		if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]))) { +			ret = -EFAULT; +			goto out; +		}  		do {  			u64 offset = r->presumed_offset; -			ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r); +			ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r, &cache);  			if (ret) -				return ret; +				goto out;  			if (r->presumed_offset != offset && -			    __put_user(r->presumed_offset, &user_relocs->presumed_offset)) { -				return -EFAULT; +			    __put_user(r->presumed_offset, +				       &user_relocs->presumed_offset)) { +				ret = -EFAULT; +				goto out;  			}  			user_relocs++; @@ -545,7 +708,9 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,  		} while (--count);  	} -	return 0; +out: +	reloc_cache_fini(&cache); +	return ret;  #undef N_RELOC  } @@ -555,15 +720,18 @@ i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,  				      struct drm_i915_gem_relocation_entry *relocs)  {  	const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; -	int i, ret; +	struct reloc_cache cache; +	int i, ret = 0; +	reloc_cache_init(&cache, eb->i915);  	for (i = 0; i < entry->relocation_count; i++) { -		ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]); +		ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], &cache);  		if (ret) -			return ret; +			break;  	} +	reloc_cache_fini(&cache); -	return 0; +	return ret;  }  static int @@ -626,23 +794,27 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,  			flags |= PIN_HIGH;  	} -	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); -	if ((ret == -ENOSPC  || ret == -E2BIG) && +	ret = i915_vma_pin(vma, +			   entry->pad_to_size, +			   entry->alignment, +			   flags); +	if ((ret == -ENOSPC || ret == -E2BIG) &&  	    only_mappable_for_reloc(entry->flags)) -		ret = i915_gem_object_pin(obj, vma->vm, -					  entry->alignment, -					  flags & ~PIN_MAPPABLE); +		ret = i915_vma_pin(vma, +				   entry->pad_to_size, +				   entry->alignment, +				   flags & ~PIN_MAPPABLE);  	if (ret)  		return ret;  	entry->flags |= __EXEC_OBJECT_HAS_PIN;  	if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { -		ret = i915_gem_object_get_fence(obj); +		ret = i915_vma_get_fence(vma);  		if (ret)  			return ret; -		if (i915_gem_object_pin_fence(obj)) +		if (i915_vma_pin_fence(vma))  			entry->flags |= __EXEC_OBJECT_HAS_FENCE;  	} @@ -667,7 +839,7 @@ need_reloc_mappable(struct i915_vma *vma)  	if (entry->relocation_count == 0)  		return false; -	if (!vma->is_ggtt) +	if (!i915_vma_is_ggtt(vma))  		return false;  	/* See also use_cpu_reloc() */ @@ -684,14 +856,17 @@ static bool  eb_vma_misplaced(struct i915_vma *vma)  {  	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; -	struct drm_i915_gem_object *obj = vma->obj; -	WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && !vma->is_ggtt); +	WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && +		!i915_vma_is_ggtt(vma));  	if (entry->alignment &&  	    vma->node.start & (entry->alignment - 1))  		return true; +	if (vma->node.size < entry->pad_to_size) +		return true; +  	if (entry->flags & EXEC_OBJECT_PINNED &&  	    vma->node.start != entry->offset)  		return true; @@ -701,7 +876,8 @@ eb_vma_misplaced(struct i915_vma *vma)  		return true;  	/* avoid costly ping-pong once a batch bo ended up non-mappable */ -	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable) +	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && +	    !i915_vma_is_map_and_fenceable(vma))  		return !only_mappable_for_reloc(entry->flags);  	if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 && @@ -725,8 +901,6 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine,  	bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4;  	int retry; -	i915_gem_retire_requests_ring(engine); -  	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;  	INIT_LIST_HEAD(&ordered_vmas); @@ -746,7 +920,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine,  			entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;  		need_fence =  			entry->flags & EXEC_OBJECT_NEEDS_FENCE && -			obj->tiling_mode != I915_TILING_NONE; +			i915_gem_object_is_tiled(obj);  		need_mappable = need_fence || need_reloc_mappable(vma);  		if (entry->flags & EXEC_OBJECT_PINNED) @@ -843,7 +1017,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,  		vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list);  		list_del_init(&vma->exec_list);  		i915_gem_execbuffer_unreserve_vma(vma); -		drm_gem_object_unreference(&vma->obj->base); +		i915_vma_put(vma);  	}  	mutex_unlock(&dev->struct_mutex); @@ -937,41 +1111,54 @@ err:  	return ret;  } +static unsigned int eb_other_engines(struct drm_i915_gem_request *req) +{ +	unsigned int mask; + +	mask = ~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK; +	mask <<= I915_BO_ACTIVE_SHIFT; + +	return mask; +} +  static int  i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,  				struct list_head *vmas)  { -	const unsigned other_rings = ~intel_engine_flag(req->engine); +	const unsigned int other_rings = eb_other_engines(req);  	struct i915_vma *vma; -	uint32_t flush_domains = 0; -	bool flush_chipset = false;  	int ret;  	list_for_each_entry(vma, vmas, exec_list) {  		struct drm_i915_gem_object *obj = vma->obj; +		struct reservation_object *resv; -		if (obj->active & other_rings) { -			ret = i915_gem_object_sync(obj, req->engine, &req); +		if (obj->flags & other_rings) { +			ret = i915_gem_request_await_object +				(req, obj, obj->base.pending_write_domain);  			if (ret)  				return ret;  		} -		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) -			flush_chipset |= i915_gem_clflush_object(obj, false); +		resv = i915_gem_object_get_dmabuf_resv(obj); +		if (resv) { +			ret = i915_sw_fence_await_reservation +				(&req->submit, resv, &i915_fence_ops, +				 obj->base.pending_write_domain, 10*HZ, +				 GFP_KERNEL | __GFP_NOWARN); +			if (ret < 0) +				return ret; +		} -		flush_domains |= obj->base.write_domain; +		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) +			i915_gem_clflush_object(obj, false);  	} -	if (flush_chipset) -		i915_gem_chipset_flush(req->engine->i915); - -	if (flush_domains & I915_GEM_DOMAIN_GTT) -		wmb(); +	/* Unconditionally flush any chipset caches (for streaming writes). */ +	i915_gem_chipset_flush(req->engine->i915); -	/* Unconditionally invalidate gpu caches and ensure that we do flush -	 * any residual writes from the previous batch. -	 */ -	return intel_ring_invalidate_all_caches(req); +	/* Unconditionally invalidate GPU caches and TLBs. */ +	return req->engine->emit_flush(req, EMIT_INVALIDATE);  }  static bool @@ -1007,6 +1194,9 @@ validate_exec_list(struct drm_device *dev,  	unsigned invalid_flags;  	int i; +	/* INTERNAL flags must not overlap with external ones */ +	BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS); +  	invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;  	if (USES_FULL_PPGTT(dev))  		invalid_flags |= EXEC_OBJECT_NEEDS_GTT; @@ -1036,6 +1226,14 @@ validate_exec_list(struct drm_device *dev,  		if (exec[i].alignment && !is_power_of_2(exec[i].alignment))  			return -EINVAL; +		/* pad_to_size was once a reserved field, so sanitize it */ +		if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) { +			if (offset_in_page(exec[i].pad_to_size)) +				return -EINVAL; +		} else { +			exec[i].pad_to_size = 0; +		} +  		/* First check for malicious input causing overflow in  		 * the worst case where we need to allocate the entire  		 * relocation tree as a single array. @@ -1055,7 +1253,7 @@ validate_exec_list(struct drm_device *dev,  			return -EFAULT;  		if (likely(!i915.prefault_disable)) { -			if (fault_in_multipages_readable(ptr, length)) +			if (fault_in_pages_readable(ptr, length))  				return -EFAULT;  		}  	} @@ -1067,12 +1265,9 @@ static struct i915_gem_context *  i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,  			  struct intel_engine_cs *engine, const u32 ctx_id)  { -	struct i915_gem_context *ctx = NULL; +	struct i915_gem_context *ctx;  	struct i915_ctx_hang_stats *hs; -	if (engine->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE) -		return ERR_PTR(-EINVAL); -  	ctx = i915_gem_context_lookup(file->driver_priv, ctx_id);  	if (IS_ERR(ctx))  		return ctx; @@ -1086,66 +1281,99 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,  	return ctx;  } -void +void i915_vma_move_to_active(struct i915_vma *vma, +			     struct drm_i915_gem_request *req, +			     unsigned int flags) +{ +	struct drm_i915_gem_object *obj = vma->obj; +	const unsigned int idx = req->engine->id; + +	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + +	obj->dirty = 1; /* be paranoid  */ + +	/* Add a reference if we're newly entering the active list. +	 * The order in which we add operations to the retirement queue is +	 * vital here: mark_active adds to the start of the callback list, +	 * such that subsequent callbacks are called first. Therefore we +	 * add the active reference first and queue for it to be dropped +	 * *last*. +	 */ +	if (!i915_gem_object_is_active(obj)) +		i915_gem_object_get(obj); +	i915_gem_object_set_active(obj, idx); +	i915_gem_active_set(&obj->last_read[idx], req); + +	if (flags & EXEC_OBJECT_WRITE) { +		i915_gem_active_set(&obj->last_write, req); + +		intel_fb_obj_invalidate(obj, ORIGIN_CS); + +		/* update for the implicit flush after a batch */ +		obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; +	} + +	if (flags & EXEC_OBJECT_NEEDS_FENCE) +		i915_gem_active_set(&vma->last_fence, req); + +	i915_vma_set_active(vma, idx); +	i915_gem_active_set(&vma->last_read[idx], req); +	list_move_tail(&vma->vm_link, &vma->vm->active_list); +} + +static void eb_export_fence(struct drm_i915_gem_object *obj, +			    struct drm_i915_gem_request *req, +			    unsigned int flags) +{ +	struct reservation_object *resv; + +	resv = i915_gem_object_get_dmabuf_resv(obj); +	if (!resv) +		return; + +	/* Ignore errors from failing to allocate the new fence, we can't +	 * handle an error right now. Worst case should be missed +	 * synchronisation leading to rendering corruption. +	 */ +	ww_mutex_lock(&resv->lock, NULL); +	if (flags & EXEC_OBJECT_WRITE) +		reservation_object_add_excl_fence(resv, &req->fence); +	else if (reservation_object_reserve_shared(resv) == 0) +		reservation_object_add_shared_fence(resv, &req->fence); +	ww_mutex_unlock(&resv->lock); +} + +static void  i915_gem_execbuffer_move_to_active(struct list_head *vmas,  				   struct drm_i915_gem_request *req)  { -	struct intel_engine_cs *engine = i915_gem_request_get_engine(req);  	struct i915_vma *vma;  	list_for_each_entry(vma, vmas, exec_list) { -		struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;  		struct drm_i915_gem_object *obj = vma->obj;  		u32 old_read = obj->base.read_domains;  		u32 old_write = obj->base.write_domain; -		obj->dirty = 1; /* be paranoid  */  		obj->base.write_domain = obj->base.pending_write_domain; -		if (obj->base.write_domain == 0) +		if (obj->base.write_domain) +			vma->exec_entry->flags |= EXEC_OBJECT_WRITE; +		else  			obj->base.pending_read_domains |= obj->base.read_domains;  		obj->base.read_domains = obj->base.pending_read_domains; -		i915_vma_move_to_active(vma, req); -		if (obj->base.write_domain) { -			i915_gem_request_assign(&obj->last_write_req, req); - -			intel_fb_obj_invalidate(obj, ORIGIN_CS); - -			/* update for the implicit flush after a batch */ -			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; -		} -		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { -			i915_gem_request_assign(&obj->last_fenced_req, req); -			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { -				struct drm_i915_private *dev_priv = engine->i915; -				list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list, -					       &dev_priv->mm.fence_list); -			} -		} - +		i915_vma_move_to_active(vma, req, vma->exec_entry->flags); +		eb_export_fence(obj, req, vma->exec_entry->flags);  		trace_i915_gem_object_change_domain(obj, old_read, old_write);  	}  } -static void -i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params) -{ -	/* Unconditionally force add_request to emit a full flush. */ -	params->engine->gpu_caches_dirty = true; - -	/* Add a breadcrumb for the completion of the batch buffer */ -	__i915_add_request(params->request, params->batch_obj, true); -} -  static int -i915_reset_gen7_sol_offsets(struct drm_device *dev, -			    struct drm_i915_gem_request *req) +i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)  { -	struct intel_engine_cs *engine = req->engine; -	struct drm_i915_private *dev_priv = to_i915(dev); +	struct intel_ring *ring = req->ring;  	int ret, i; -	if (!IS_GEN7(dev) || engine != &dev_priv->engine[RCS]) { +	if (!IS_GEN7(req->i915) || req->engine->id != RCS) {  		DRM_DEBUG("sol reset is gen7/rcs only\n");  		return -EINVAL;  	} @@ -1155,21 +1383,21 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,  		return ret;  	for (i = 0; i < 4; i++) { -		intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); -		intel_ring_emit_reg(engine, GEN7_SO_WRITE_OFFSET(i)); -		intel_ring_emit(engine, 0); +		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); +		intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i)); +		intel_ring_emit(ring, 0);  	} -	intel_ring_advance(engine); +	intel_ring_advance(ring);  	return 0;  } -static struct drm_i915_gem_object* +static struct i915_vma *  i915_gem_execbuffer_parse(struct intel_engine_cs *engine,  			  struct drm_i915_gem_exec_object2 *shadow_exec_entry, -			  struct eb_vmas *eb,  			  struct drm_i915_gem_object *batch_obj, +			  struct eb_vmas *eb,  			  u32 batch_start_offset,  			  u32 batch_len,  			  bool is_master) @@ -1181,51 +1409,44 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine,  	shadow_batch_obj = i915_gem_batch_pool_get(&engine->batch_pool,  						   PAGE_ALIGN(batch_len));  	if (IS_ERR(shadow_batch_obj)) -		return shadow_batch_obj; - -	ret = i915_parse_cmds(engine, -			      batch_obj, -			      shadow_batch_obj, -			      batch_start_offset, -			      batch_len, -			      is_master); -	if (ret) -		goto err; - -	ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 0, 0); -	if (ret) -		goto err; +		return ERR_CAST(shadow_batch_obj); + +	ret = intel_engine_cmd_parser(engine, +				      batch_obj, +				      shadow_batch_obj, +				      batch_start_offset, +				      batch_len, +				      is_master); +	if (ret) { +		if (ret == -EACCES) /* unhandled chained batch */ +			vma = NULL; +		else +			vma = ERR_PTR(ret); +		goto out; +	} -	i915_gem_object_unpin_pages(shadow_batch_obj); +	vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0); +	if (IS_ERR(vma)) +		goto out;  	memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry)); -	vma = i915_gem_obj_to_ggtt(shadow_batch_obj);  	vma->exec_entry = shadow_exec_entry;  	vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN; -	drm_gem_object_reference(&shadow_batch_obj->base); +	i915_gem_object_get(shadow_batch_obj);  	list_add_tail(&vma->exec_list, &eb->vmas); -	shadow_batch_obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND; - -	return shadow_batch_obj; - -err: +out:  	i915_gem_object_unpin_pages(shadow_batch_obj); -	if (ret == -EACCES) /* unhandled chained batch */ -		return batch_obj; -	else -		return ERR_PTR(ret); +	return vma;  } -int -i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, -			       struct drm_i915_gem_execbuffer2 *args, -			       struct list_head *vmas) +static int +execbuf_submit(struct i915_execbuffer_params *params, +	       struct drm_i915_gem_execbuffer2 *args, +	       struct list_head *vmas)  { -	struct drm_device *dev = params->dev; -	struct intel_engine_cs *engine = params->engine; -	struct drm_i915_private *dev_priv = to_i915(dev); +	struct drm_i915_private *dev_priv = params->request->i915;  	u64 exec_start, exec_len;  	int instp_mode;  	u32 instp_mask; @@ -1239,34 +1460,31 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,  	if (ret)  		return ret; -	WARN(params->ctx->ppgtt && params->ctx->ppgtt->pd_dirty_rings & (1<<engine->id), -	     "%s didn't clear reload\n", engine->name); -  	instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;  	instp_mask = I915_EXEC_CONSTANTS_MASK;  	switch (instp_mode) {  	case I915_EXEC_CONSTANTS_REL_GENERAL:  	case I915_EXEC_CONSTANTS_ABSOLUTE:  	case I915_EXEC_CONSTANTS_REL_SURFACE: -		if (instp_mode != 0 && engine != &dev_priv->engine[RCS]) { +		if (instp_mode != 0 && params->engine->id != RCS) {  			DRM_DEBUG("non-0 rel constants mode on non-RCS\n");  			return -EINVAL;  		}  		if (instp_mode != dev_priv->relative_constants_mode) { -			if (INTEL_INFO(dev)->gen < 4) { +			if (INTEL_INFO(dev_priv)->gen < 4) {  				DRM_DEBUG("no rel constants on pre-gen4\n");  				return -EINVAL;  			} -			if (INTEL_INFO(dev)->gen > 5 && +			if (INTEL_INFO(dev_priv)->gen > 5 &&  			    instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {  				DRM_DEBUG("rel surface constants mode invalid on gen5+\n");  				return -EINVAL;  			}  			/* The HW changed the meaning on this bit on gen6 */ -			if (INTEL_INFO(dev)->gen >= 6) +			if (INTEL_INFO(dev_priv)->gen >= 6)  				instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;  		}  		break; @@ -1275,37 +1493,39 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,  		return -EINVAL;  	} -	if (engine == &dev_priv->engine[RCS] && +	if (params->engine->id == RCS &&  	    instp_mode != dev_priv->relative_constants_mode) { +		struct intel_ring *ring = params->request->ring; +  		ret = intel_ring_begin(params->request, 4);  		if (ret)  			return ret; -		intel_ring_emit(engine, MI_NOOP); -		intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); -		intel_ring_emit_reg(engine, INSTPM); -		intel_ring_emit(engine, instp_mask << 16 | instp_mode); -		intel_ring_advance(engine); +		intel_ring_emit(ring, MI_NOOP); +		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); +		intel_ring_emit_reg(ring, INSTPM); +		intel_ring_emit(ring, instp_mask << 16 | instp_mode); +		intel_ring_advance(ring);  		dev_priv->relative_constants_mode = instp_mode;  	}  	if (args->flags & I915_EXEC_GEN7_SOL_RESET) { -		ret = i915_reset_gen7_sol_offsets(dev, params->request); +		ret = i915_reset_gen7_sol_offsets(params->request);  		if (ret)  			return ret;  	}  	exec_len   = args->batch_len; -	exec_start = params->batch_obj_vm_offset + +	exec_start = params->batch->node.start +  		     params->args_batch_start_offset;  	if (exec_len == 0) -		exec_len = params->batch_obj->base.size; +		exec_len = params->batch->size - params->args_batch_start_offset; -	ret = engine->dispatch_execbuffer(params->request, -					exec_start, exec_len, -					params->dispatch_flags); +	ret = params->engine->emit_bb_start(params->request, +					    exec_start, exec_len, +					    params->dispatch_flags);  	if (ret)  		return ret; @@ -1318,43 +1538,20 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,  /**   * Find one BSD ring to dispatch the corresponding BSD command. - * The ring index is returned. + * The engine index is returned.   */  static unsigned int -gen8_dispatch_bsd_ring(struct drm_i915_private *dev_priv, struct drm_file *file) +gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, +			 struct drm_file *file)  {  	struct drm_i915_file_private *file_priv = file->driver_priv;  	/* Check whether the file_priv has already selected one ring. */ -	if ((int)file_priv->bsd_ring < 0) { -		/* If not, use the ping-pong mechanism to select one. */ -		mutex_lock(&dev_priv->drm.struct_mutex); -		file_priv->bsd_ring = dev_priv->mm.bsd_ring_dispatch_index; -		dev_priv->mm.bsd_ring_dispatch_index ^= 1; -		mutex_unlock(&dev_priv->drm.struct_mutex); -	} +	if ((int)file_priv->bsd_engine < 0) +		file_priv->bsd_engine = atomic_fetch_xor(1, +			 &dev_priv->mm.bsd_engine_dispatch_index); -	return file_priv->bsd_ring; -} - -static struct drm_i915_gem_object * -eb_get_batch(struct eb_vmas *eb) -{ -	struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); - -	/* -	 * SNA is doing fancy tricks with compressing batch buffers, which leads -	 * to negative relocation deltas. Usually that works out ok since the -	 * relocate address is still positive, except when the batch is placed -	 * very low in the GTT. Ensure this doesn't happen. -	 * -	 * Note that actual hangs have only been observed on gen7, but for -	 * paranoia do it everywhere. -	 */ -	if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) -		vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; - -	return vma->obj; +	return file_priv->bsd_engine;  }  #define I915_USER_RINGS (4) @@ -1367,31 +1564,31 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {  	[I915_EXEC_VEBOX]	= VECS  }; -static int -eb_select_ring(struct drm_i915_private *dev_priv, -	       struct drm_file *file, -	       struct drm_i915_gem_execbuffer2 *args, -	       struct intel_engine_cs **ring) +static struct intel_engine_cs * +eb_select_engine(struct drm_i915_private *dev_priv, +		 struct drm_file *file, +		 struct drm_i915_gem_execbuffer2 *args)  {  	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK; +	struct intel_engine_cs *engine;  	if (user_ring_id > I915_USER_RINGS) {  		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id); -		return -EINVAL; +		return NULL;  	}  	if ((user_ring_id != I915_EXEC_BSD) &&  	    ((args->flags & I915_EXEC_BSD_MASK) != 0)) {  		DRM_DEBUG("execbuf with non bsd ring but with invalid "  			  "bsd dispatch flags: %d\n", (int)(args->flags)); -		return -EINVAL; +		return NULL;  	}  	if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) {  		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;  		if (bsd_idx == I915_EXEC_BSD_DEFAULT) { -			bsd_idx = gen8_dispatch_bsd_ring(dev_priv, file); +			bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);  		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&  			   bsd_idx <= I915_EXEC_BSD_RING2) {  			bsd_idx >>= I915_EXEC_BSD_SHIFT; @@ -1399,20 +1596,20 @@ eb_select_ring(struct drm_i915_private *dev_priv,  		} else {  			DRM_DEBUG("execbuf with unknown bsd ring: %u\n",  				  bsd_idx); -			return -EINVAL; +			return NULL;  		} -		*ring = &dev_priv->engine[_VCS(bsd_idx)]; +		engine = &dev_priv->engine[_VCS(bsd_idx)];  	} else { -		*ring = &dev_priv->engine[user_ring_map[user_ring_id]]; +		engine = &dev_priv->engine[user_ring_map[user_ring_id]];  	} -	if (!intel_engine_initialized(*ring)) { +	if (!intel_engine_initialized(engine)) {  		DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id); -		return -EINVAL; +		return NULL;  	} -	return 0; +	return engine;  }  static int @@ -1423,9 +1620,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,  {  	struct drm_i915_private *dev_priv = to_i915(dev);  	struct i915_ggtt *ggtt = &dev_priv->ggtt; -	struct drm_i915_gem_request *req = NULL;  	struct eb_vmas *eb; -	struct drm_i915_gem_object *batch_obj;  	struct drm_i915_gem_exec_object2 shadow_exec_entry;  	struct intel_engine_cs *engine;  	struct i915_gem_context *ctx; @@ -1454,9 +1649,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,  	if (args->flags & I915_EXEC_IS_PINNED)  		dispatch_flags |= I915_DISPATCH_PINNED; -	ret = eb_select_ring(dev_priv, file, args, &engine); -	if (ret) -		return ret; +	engine = eb_select_engine(dev_priv, file, args); +	if (!engine) +		return -EINVAL;  	if (args->buffer_count < 1) {  		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); @@ -1496,7 +1691,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,  		goto pre_mutex_err;  	} -	i915_gem_context_reference(ctx); +	i915_gem_context_get(ctx);  	if (ctx->ppgtt)  		vm = &ctx->ppgtt->base; @@ -1505,9 +1700,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,  	memset(¶ms_master, 0x00, sizeof(params_master)); -	eb = eb_create(args); +	eb = eb_create(dev_priv, args);  	if (eb == NULL) { -		i915_gem_context_unreference(ctx); +		i915_gem_context_put(ctx);  		mutex_unlock(&dev->struct_mutex);  		ret = -ENOMEM;  		goto pre_mutex_err; @@ -1519,7 +1714,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,  		goto err;  	/* take note of the batch buffer before we might reorder the lists */ -	batch_obj = eb_get_batch(eb); +	params->batch = eb_get_batch(eb);  	/* Move the objects en-masse into the GTT, evicting if necessary. */  	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; @@ -1543,34 +1738,34 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,  	}  	/* Set the pending read domains for the batch buffer to COMMAND */ -	if (batch_obj->base.pending_write_domain) { +	if (params->batch->obj->base.pending_write_domain) {  		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");  		ret = -EINVAL;  		goto err;  	} +	if (args->batch_start_offset > params->batch->size || +	    args->batch_len > params->batch->size - args->batch_start_offset) { +		DRM_DEBUG("Attempting to use out-of-bounds batch\n"); +		ret = -EINVAL; +		goto err; +	}  	params->args_batch_start_offset = args->batch_start_offset; -	if (i915_needs_cmd_parser(engine) && args->batch_len) { -		struct drm_i915_gem_object *parsed_batch_obj; - -		parsed_batch_obj = i915_gem_execbuffer_parse(engine, -							     &shadow_exec_entry, -							     eb, -							     batch_obj, -							     args->batch_start_offset, -							     args->batch_len, -							     drm_is_current_master(file)); -		if (IS_ERR(parsed_batch_obj)) { -			ret = PTR_ERR(parsed_batch_obj); +	if (intel_engine_needs_cmd_parser(engine) && args->batch_len) { +		struct i915_vma *vma; + +		vma = i915_gem_execbuffer_parse(engine, &shadow_exec_entry, +						params->batch->obj, +						eb, +						args->batch_start_offset, +						args->batch_len, +						drm_is_current_master(file)); +		if (IS_ERR(vma)) { +			ret = PTR_ERR(vma);  			goto err;  		} -		/* -		 * parsed_batch_obj == batch_obj means batch not fully parsed: -		 * Accept, but don't promote to secure. -		 */ - -		if (parsed_batch_obj != batch_obj) { +		if (vma) {  			/*  			 * Batch parsed and accepted:  			 * @@ -1582,16 +1777,19 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,  			 */  			dispatch_flags |= I915_DISPATCH_SECURE;  			params->args_batch_start_offset = 0; -			batch_obj = parsed_batch_obj; +			params->batch = vma;  		}  	} -	batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; +	params->batch->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;  	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure  	 * batch" bit. Hence we need to pin secure batches into the global gtt.  	 * hsw should have this fixed, but bdw mucks it up again. */  	if (dispatch_flags & I915_DISPATCH_SECURE) { +		struct drm_i915_gem_object *obj = params->batch->obj; +		struct i915_vma *vma; +  		/*  		 * So on first glance it looks freaky that we pin the batch here  		 * outside of the reservation loop. But: @@ -1602,22 +1800,31 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,  		 *   fitting due to fragmentation.  		 * So this is actually safe.  		 */ -		ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0); -		if (ret) +		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); +		if (IS_ERR(vma)) { +			ret = PTR_ERR(vma);  			goto err; +		} -		params->batch_obj_vm_offset = i915_gem_obj_ggtt_offset(batch_obj); -	} else -		params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm); +		params->batch = vma; +	}  	/* Allocate a request for this batch buffer nice and early. */ -	req = i915_gem_request_alloc(engine, ctx); -	if (IS_ERR(req)) { -		ret = PTR_ERR(req); +	params->request = i915_gem_request_alloc(engine, ctx); +	if (IS_ERR(params->request)) { +		ret = PTR_ERR(params->request);  		goto err_batch_unpin;  	} -	ret = i915_gem_request_add_to_client(req, file); +	/* Whilst this request exists, batch_obj will be on the +	 * active_list, and so will hold the active reference. Only when this +	 * request is retired will the the batch_obj be moved onto the +	 * inactive_list and lose its active reference. Hence we do not need +	 * to explicitly hold another reference here. +	 */ +	params->request->batch = params->batch; + +	ret = i915_gem_request_add_to_client(params->request, file);  	if (ret)  		goto err_request; @@ -1631,13 +1838,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,  	params->file                    = file;  	params->engine                    = engine;  	params->dispatch_flags          = dispatch_flags; -	params->batch_obj               = batch_obj;  	params->ctx                     = ctx; -	params->request                 = req; -	ret = dev_priv->gt.execbuf_submit(params, args, &eb->vmas); +	ret = execbuf_submit(params, args, &eb->vmas);  err_request: -	i915_gem_execbuffer_retire_commands(params); +	__i915_add_request(params->request, ret == 0);  err_batch_unpin:  	/* @@ -1647,11 +1852,10 @@ err_batch_unpin:  	 * active.  	 */  	if (dispatch_flags & I915_DISPATCH_SECURE) -		i915_gem_object_ggtt_unpin(batch_obj); - +		i915_vma_unpin(params->batch);  err:  	/* the request owns the ref now */ -	i915_gem_context_unreference(ctx); +	i915_gem_context_put(ctx);  	eb_destroy(eb);  	mutex_unlock(&dev->struct_mutex); |