1 /*	$NetBSD: i915_gem_domain.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $	*/
2 
3 /*
4  * SPDX-License-Identifier: MIT
5  *
6  * Copyright © 2014-2016 Intel Corporation
7  */
8 
9 #include <sys/cdefs.h>
10 __KERNEL_RCSID(0, "$NetBSD: i915_gem_domain.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $");
11 
12 #include "display/intel_frontbuffer.h"
13 
14 #include "i915_drv.h"
15 #include "i915_gem_clflush.h"
16 #include "i915_gem_gtt.h"
17 #include "i915_gem_ioctls.h"
18 #include "i915_gem_object.h"
19 #include "i915_vma.h"
20 #include "i915_gem_lmem.h"
21 #include "i915_gem_mman.h"
22 
__i915_gem_object_flush_for_display(struct drm_i915_gem_object * obj)23 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
24 {
25 	/*
26 	 * We manually flush the CPU domain so that we can override and
27 	 * force the flush for the display, and perform it asyncrhonously.
28 	 */
29 	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
30 	if (obj->cache_dirty)
31 		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
32 	obj->write_domain = 0;
33 }
34 
i915_gem_object_flush_if_display(struct drm_i915_gem_object * obj)35 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
36 {
37 	if (!i915_gem_object_is_framebuffer(obj))
38 		return;
39 
40 	i915_gem_object_lock(obj);
41 	__i915_gem_object_flush_for_display(obj);
42 	i915_gem_object_unlock(obj);
43 }
44 
45 /**
46  * Moves a single object to the WC read, and possibly write domain.
47  * @obj: object to act on
48  * @write: ask for write access or read only
49  *
50  * This function returns when the move is complete, including waiting on
51  * flushes to occur.
52  */
53 int
i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object * obj,bool write)54 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
55 {
56 	int ret;
57 
58 	assert_object_held(obj);
59 
60 	ret = i915_gem_object_wait(obj,
61 				   I915_WAIT_INTERRUPTIBLE |
62 				   (write ? I915_WAIT_ALL : 0),
63 				   MAX_SCHEDULE_TIMEOUT);
64 	if (ret)
65 		return ret;
66 
67 	if (obj->write_domain == I915_GEM_DOMAIN_WC)
68 		return 0;
69 
70 	/* Flush and acquire obj->pages so that we are coherent through
71 	 * direct access in memory with previous cached writes through
72 	 * shmemfs and that our cache domain tracking remains valid.
73 	 * For example, if the obj->filp was moved to swap without us
74 	 * being notified and releasing the pages, we would mistakenly
75 	 * continue to assume that the obj remained out of the CPU cached
76 	 * domain.
77 	 */
78 	ret = i915_gem_object_pin_pages(obj);
79 	if (ret)
80 		return ret;
81 
82 	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
83 
84 	/* Serialise direct access to this object with the barriers for
85 	 * coherent writes from the GPU, by effectively invalidating the
86 	 * WC domain upon first access.
87 	 */
88 	if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
89 		mb();
90 
91 	/* It should now be out of any other write domains, and we can update
92 	 * the domain values for our changes.
93 	 */
94 	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
95 	obj->read_domains |= I915_GEM_DOMAIN_WC;
96 	if (write) {
97 		obj->read_domains = I915_GEM_DOMAIN_WC;
98 		obj->write_domain = I915_GEM_DOMAIN_WC;
99 		obj->mm.dirty = true;
100 	}
101 
102 	i915_gem_object_unpin_pages(obj);
103 	return 0;
104 }
105 
106 /**
107  * Moves a single object to the GTT read, and possibly write domain.
108  * @obj: object to act on
109  * @write: ask for write access or read only
110  *
111  * This function returns when the move is complete, including waiting on
112  * flushes to occur.
113  */
114 int
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object * obj,bool write)115 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
116 {
117 	int ret;
118 
119 	assert_object_held(obj);
120 
121 	ret = i915_gem_object_wait(obj,
122 				   I915_WAIT_INTERRUPTIBLE |
123 				   (write ? I915_WAIT_ALL : 0),
124 				   MAX_SCHEDULE_TIMEOUT);
125 	if (ret)
126 		return ret;
127 
128 	if (obj->write_domain == I915_GEM_DOMAIN_GTT)
129 		return 0;
130 
131 	/* Flush and acquire obj->pages so that we are coherent through
132 	 * direct access in memory with previous cached writes through
133 	 * shmemfs and that our cache domain tracking remains valid.
134 	 * For example, if the obj->filp was moved to swap without us
135 	 * being notified and releasing the pages, we would mistakenly
136 	 * continue to assume that the obj remained out of the CPU cached
137 	 * domain.
138 	 */
139 	ret = i915_gem_object_pin_pages(obj);
140 	if (ret)
141 		return ret;
142 
143 	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
144 
145 	/* Serialise direct access to this object with the barriers for
146 	 * coherent writes from the GPU, by effectively invalidating the
147 	 * GTT domain upon first access.
148 	 */
149 	if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
150 		mb();
151 
152 	/* It should now be out of any other write domains, and we can update
153 	 * the domain values for our changes.
154 	 */
155 	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
156 	obj->read_domains |= I915_GEM_DOMAIN_GTT;
157 	if (write) {
158 		struct i915_vma *vma;
159 
160 		obj->read_domains = I915_GEM_DOMAIN_GTT;
161 		obj->write_domain = I915_GEM_DOMAIN_GTT;
162 		obj->mm.dirty = true;
163 
164 		spin_lock(&obj->vma.lock);
165 		for_each_ggtt_vma(vma, obj)
166 			if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
167 				i915_vma_set_ggtt_write(vma);
168 		spin_unlock(&obj->vma.lock);
169 	}
170 
171 	i915_gem_object_unpin_pages(obj);
172 	return 0;
173 }
174 
175 /**
176  * Changes the cache-level of an object across all VMA.
177  * @obj: object to act on
178  * @cache_level: new cache level to set for the object
179  *
180  * After this function returns, the object will be in the new cache-level
181  * across all GTT and the contents of the backing storage will be coherent,
182  * with respect to the new cache-level. In order to keep the backing storage
183  * coherent for all users, we only allow a single cache level to be set
184  * globally on the object and prevent it from being changed whilst the
185  * hardware is reading from the object. That is if the object is currently
186  * on the scanout it will be set to uncached (or equivalent display
187  * cache coherency) and all non-MOCS GPU access will also be uncached so
188  * that all direct access to the scanout remains coherent.
189  */
i915_gem_object_set_cache_level(struct drm_i915_gem_object * obj,enum i915_cache_level cache_level)190 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
191 				    enum i915_cache_level cache_level)
192 {
193 	int ret;
194 
195 	if (obj->cache_level == cache_level)
196 		return 0;
197 
198 	ret = i915_gem_object_wait(obj,
199 				   I915_WAIT_INTERRUPTIBLE |
200 				   I915_WAIT_ALL,
201 				   MAX_SCHEDULE_TIMEOUT);
202 	if (ret)
203 		return ret;
204 
205 	ret = i915_gem_object_lock_interruptible(obj);
206 	if (ret)
207 		return ret;
208 
209 	/* Always invalidate stale cachelines */
210 	if (obj->cache_level != cache_level) {
211 		i915_gem_object_set_cache_coherency(obj, cache_level);
212 		obj->cache_dirty = true;
213 	}
214 
215 	i915_gem_object_unlock(obj);
216 
217 	/* The cache-level will be applied when each vma is rebound. */
218 	return i915_gem_object_unbind(obj,
219 				      I915_GEM_OBJECT_UNBIND_ACTIVE |
220 				      I915_GEM_OBJECT_UNBIND_BARRIER);
221 }
222 
i915_gem_get_caching_ioctl(struct drm_device * dev,void * data,struct drm_file * file)223 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
224 			       struct drm_file *file)
225 {
226 	struct drm_i915_gem_caching *args = data;
227 	struct drm_i915_gem_object *obj;
228 	int err = 0;
229 
230 	rcu_read_lock();
231 	obj = i915_gem_object_lookup_rcu(file, args->handle);
232 	if (!obj) {
233 		err = -ENOENT;
234 		goto out;
235 	}
236 
237 	switch (obj->cache_level) {
238 	case I915_CACHE_LLC:
239 	case I915_CACHE_L3_LLC:
240 		args->caching = I915_CACHING_CACHED;
241 		break;
242 
243 	case I915_CACHE_WT:
244 		args->caching = I915_CACHING_DISPLAY;
245 		break;
246 
247 	default:
248 		args->caching = I915_CACHING_NONE;
249 		break;
250 	}
251 out:
252 	rcu_read_unlock();
253 	return err;
254 }
255 
i915_gem_set_caching_ioctl(struct drm_device * dev,void * data,struct drm_file * file)256 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
257 			       struct drm_file *file)
258 {
259 	struct drm_i915_private *i915 = to_i915(dev);
260 	struct drm_i915_gem_caching *args = data;
261 	struct drm_i915_gem_object *obj;
262 	enum i915_cache_level level;
263 	int ret = 0;
264 
265 	switch (args->caching) {
266 	case I915_CACHING_NONE:
267 		level = I915_CACHE_NONE;
268 		break;
269 	case I915_CACHING_CACHED:
270 		/*
271 		 * Due to a HW issue on BXT A stepping, GPU stores via a
272 		 * snooped mapping may leave stale data in a corresponding CPU
273 		 * cacheline, whereas normally such cachelines would get
274 		 * invalidated.
275 		 */
276 		if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
277 			return -ENODEV;
278 
279 		level = I915_CACHE_LLC;
280 		break;
281 	case I915_CACHING_DISPLAY:
282 		level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
283 		break;
284 	default:
285 		return -EINVAL;
286 	}
287 
288 	obj = i915_gem_object_lookup(file, args->handle);
289 	if (!obj)
290 		return -ENOENT;
291 
292 	/*
293 	 * The caching mode of proxy object is handled by its generator, and
294 	 * not allowed to be changed by userspace.
295 	 */
296 	if (i915_gem_object_is_proxy(obj)) {
297 		ret = -ENXIO;
298 		goto out;
299 	}
300 
301 	ret = i915_gem_object_set_cache_level(obj, level);
302 
303 out:
304 	i915_gem_object_put(obj);
305 	return ret;
306 }
307 
308 /*
309  * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
310  * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
311  * (for pageflips). We only flush the caches while preparing the buffer for
312  * display, the callers are responsible for frontbuffer flush.
313  */
314 struct i915_vma *
i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object * obj,u32 alignment,const struct i915_ggtt_view * view,unsigned int flags)315 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
316 				     u32 alignment,
317 				     const struct i915_ggtt_view *view,
318 				     unsigned int flags)
319 {
320 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
321 	struct i915_vma *vma;
322 	int ret;
323 
324 	/* Frame buffer must be in LMEM (no migration yet) */
325 	if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
326 		return ERR_PTR(-EINVAL);
327 
328 	/*
329 	 * The display engine is not coherent with the LLC cache on gen6.  As
330 	 * a result, we make sure that the pinning that is about to occur is
331 	 * done with uncached PTEs. This is lowest common denominator for all
332 	 * chipsets.
333 	 *
334 	 * However for gen6+, we could do better by using the GFDT bit instead
335 	 * of uncaching, which would allow us to flush all the LLC-cached data
336 	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
337 	 */
338 	ret = i915_gem_object_set_cache_level(obj,
339 					      HAS_WT(i915) ?
340 					      I915_CACHE_WT : I915_CACHE_NONE);
341 	if (ret)
342 		return ERR_PTR(ret);
343 
344 	/*
345 	 * As the user may map the buffer once pinned in the display plane
346 	 * (e.g. libkms for the bootup splash), we have to ensure that we
347 	 * always use map_and_fenceable for all scanout buffers. However,
348 	 * it may simply be too big to fit into mappable, in which case
349 	 * put it anyway and hope that userspace can cope (but always first
350 	 * try to preserve the existing ABI).
351 	 */
352 	vma = ERR_PTR(-ENOSPC);
353 	if ((flags & PIN_MAPPABLE) == 0 &&
354 	    (!view || view->type == I915_GGTT_VIEW_NORMAL))
355 		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
356 					       flags |
357 					       PIN_MAPPABLE |
358 					       PIN_NONBLOCK);
359 	if (IS_ERR(vma))
360 		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
361 	if (IS_ERR(vma))
362 		return vma;
363 
364 	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
365 
366 	i915_gem_object_flush_if_display(obj);
367 
368 	return vma;
369 }
370 
i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object * obj)371 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
372 {
373 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
374 	struct i915_vma *vma;
375 
376 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
377 	if (!atomic_read(&obj->bind_count))
378 		return;
379 
380 	mutex_lock(&i915->ggtt.vm.mutex);
381 	spin_lock(&obj->vma.lock);
382 	for_each_ggtt_vma(vma, obj) {
383 		if (!drm_mm_node_allocated(&vma->node))
384 			continue;
385 
386 		GEM_BUG_ON(vma->vm != &i915->ggtt.vm);
387 		list_move_tail(&vma->vm_link, &vma->vm->bound_list);
388 	}
389 	spin_unlock(&obj->vma.lock);
390 	mutex_unlock(&i915->ggtt.vm.mutex);
391 
392 	if (i915_gem_object_is_shrinkable(obj)) {
393 		unsigned long flags;
394 
395 		spin_lock_irqsave(&i915->mm.obj_lock, flags);
396 
397 		if (obj->mm.madv == I915_MADV_WILLNEED &&
398 		    !atomic_read(&obj->mm.shrink_pin))
399 			list_move_tail(&obj->mm.link, &i915->mm.shrink_list);
400 
401 		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
402 	}
403 }
404 
405 void
i915_gem_object_unpin_from_display_plane(struct i915_vma * vma)406 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
407 {
408 	struct drm_i915_gem_object *obj = vma->obj;
409 
410 	assert_object_held(obj);
411 
412 	/* Bump the LRU to try and avoid premature eviction whilst flipping  */
413 	i915_gem_object_bump_inactive_ggtt(obj);
414 
415 	i915_vma_unpin(vma);
416 }
417 
418 /**
419  * Moves a single object to the CPU read, and possibly write domain.
420  * @obj: object to act on
421  * @write: requesting write or read-only access
422  *
423  * This function returns when the move is complete, including waiting on
424  * flushes to occur.
425  */
426 int
i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object * obj,bool write)427 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
428 {
429 	int ret;
430 
431 	assert_object_held(obj);
432 
433 	ret = i915_gem_object_wait(obj,
434 				   I915_WAIT_INTERRUPTIBLE |
435 				   (write ? I915_WAIT_ALL : 0),
436 				   MAX_SCHEDULE_TIMEOUT);
437 	if (ret)
438 		return ret;
439 
440 	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
441 
442 	/* Flush the CPU cache if it's still invalid. */
443 	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
444 		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
445 		obj->read_domains |= I915_GEM_DOMAIN_CPU;
446 	}
447 
448 	/* It should now be out of any other write domains, and we can update
449 	 * the domain values for our changes.
450 	 */
451 	GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
452 
453 	/* If we're writing through the CPU, then the GPU read domains will
454 	 * need to be invalidated at next use.
455 	 */
456 	if (write)
457 		__start_cpu_write(obj);
458 
459 	return 0;
460 }
461 
462 /**
463  * Called when user space prepares to use an object with the CPU, either
464  * through the mmap ioctl's mapping or a GTT mapping.
465  * @dev: drm device
466  * @data: ioctl data blob
467  * @file: drm file
468  */
469 int
i915_gem_set_domain_ioctl(struct drm_device * dev,void * data,struct drm_file * file)470 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
471 			  struct drm_file *file)
472 {
473 	struct drm_i915_gem_set_domain *args = data;
474 	struct drm_i915_gem_object *obj;
475 	u32 read_domains = args->read_domains;
476 	u32 write_domain = args->write_domain;
477 	int err;
478 
479 	/* Only handle setting domains to types used by the CPU. */
480 	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
481 		return -EINVAL;
482 
483 	/*
484 	 * Having something in the write domain implies it's in the read
485 	 * domain, and only that read domain.  Enforce that in the request.
486 	 */
487 	if (write_domain && read_domains != write_domain)
488 		return -EINVAL;
489 
490 	if (!read_domains)
491 		return 0;
492 
493 	obj = i915_gem_object_lookup(file, args->handle);
494 	if (!obj)
495 		return -ENOENT;
496 
497 	/*
498 	 * Already in the desired write domain? Nothing for us to do!
499 	 *
500 	 * We apply a little bit of cunning here to catch a broader set of
501 	 * no-ops. If obj->write_domain is set, we must be in the same
502 	 * obj->read_domains, and only that domain. Therefore, if that
503 	 * obj->write_domain matches the request read_domains, we are
504 	 * already in the same read/write domain and can skip the operation,
505 	 * without having to further check the requested write_domain.
506 	 */
507 	if (READ_ONCE(obj->write_domain) == read_domains) {
508 		err = 0;
509 		goto out;
510 	}
511 
512 	/*
513 	 * Try to flush the object off the GPU without holding the lock.
514 	 * We will repeat the flush holding the lock in the normal manner
515 	 * to catch cases where we are gazumped.
516 	 */
517 	err = i915_gem_object_wait(obj,
518 				   I915_WAIT_INTERRUPTIBLE |
519 				   I915_WAIT_PRIORITY |
520 				   (write_domain ? I915_WAIT_ALL : 0),
521 				   MAX_SCHEDULE_TIMEOUT);
522 	if (err)
523 		goto out;
524 
525 	/*
526 	 * Proxy objects do not control access to the backing storage, ergo
527 	 * they cannot be used as a means to manipulate the cache domain
528 	 * tracking for that backing storage. The proxy object is always
529 	 * considered to be outside of any cache domain.
530 	 */
531 	if (i915_gem_object_is_proxy(obj)) {
532 		err = -ENXIO;
533 		goto out;
534 	}
535 
536 	/*
537 	 * Flush and acquire obj->pages so that we are coherent through
538 	 * direct access in memory with previous cached writes through
539 	 * shmemfs and that our cache domain tracking remains valid.
540 	 * For example, if the obj->filp was moved to swap without us
541 	 * being notified and releasing the pages, we would mistakenly
542 	 * continue to assume that the obj remained out of the CPU cached
543 	 * domain.
544 	 */
545 	err = i915_gem_object_pin_pages(obj);
546 	if (err)
547 		goto out;
548 
549 	err = i915_gem_object_lock_interruptible(obj);
550 	if (err)
551 		goto out_unpin;
552 
553 	if (read_domains & I915_GEM_DOMAIN_WC)
554 		err = i915_gem_object_set_to_wc_domain(obj, write_domain);
555 	else if (read_domains & I915_GEM_DOMAIN_GTT)
556 		err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
557 	else
558 		err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
559 
560 	/* And bump the LRU for this access */
561 	i915_gem_object_bump_inactive_ggtt(obj);
562 
563 	i915_gem_object_unlock(obj);
564 
565 	if (write_domain)
566 		i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
567 
568 out_unpin:
569 	i915_gem_object_unpin_pages(obj);
570 out:
571 	i915_gem_object_put(obj);
572 	return err;
573 }
574 
575 /*
576  * Pins the specified object's pages and synchronizes the object with
577  * GPU accesses. Sets needs_clflush to non-zero if the caller should
578  * flush the object from the CPU cache.
579  */
i915_gem_object_prepare_read(struct drm_i915_gem_object * obj,unsigned int * needs_clflush)580 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
581 				 unsigned int *needs_clflush)
582 {
583 	int ret;
584 
585 	*needs_clflush = 0;
586 	if (!i915_gem_object_has_struct_page(obj))
587 		return -ENODEV;
588 
589 	ret = i915_gem_object_lock_interruptible(obj);
590 	if (ret)
591 		return ret;
592 
593 	ret = i915_gem_object_wait(obj,
594 				   I915_WAIT_INTERRUPTIBLE,
595 				   MAX_SCHEDULE_TIMEOUT);
596 	if (ret)
597 		goto err_unlock;
598 
599 	ret = i915_gem_object_pin_pages(obj);
600 	if (ret)
601 		goto err_unlock;
602 
603 	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
604 	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
605 		ret = i915_gem_object_set_to_cpu_domain(obj, false);
606 		if (ret)
607 			goto err_unpin;
608 		else
609 			goto out;
610 	}
611 
612 	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
613 
614 	/* If we're not in the cpu read domain, set ourself into the gtt
615 	 * read domain and manually flush cachelines (if required). This
616 	 * optimizes for the case when the gpu will dirty the data
617 	 * anyway again before the next pread happens.
618 	 */
619 	if (!obj->cache_dirty &&
620 	    !(obj->read_domains & I915_GEM_DOMAIN_CPU))
621 		*needs_clflush = CLFLUSH_BEFORE;
622 
623 out:
624 	/* return with the pages pinned */
625 	return 0;
626 
627 err_unpin:
628 	i915_gem_object_unpin_pages(obj);
629 err_unlock:
630 	i915_gem_object_unlock(obj);
631 	return ret;
632 }
633 
i915_gem_object_prepare_write(struct drm_i915_gem_object * obj,unsigned int * needs_clflush)634 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
635 				  unsigned int *needs_clflush)
636 {
637 	int ret;
638 
639 	*needs_clflush = 0;
640 	if (!i915_gem_object_has_struct_page(obj))
641 		return -ENODEV;
642 
643 	ret = i915_gem_object_lock_interruptible(obj);
644 	if (ret)
645 		return ret;
646 
647 	ret = i915_gem_object_wait(obj,
648 				   I915_WAIT_INTERRUPTIBLE |
649 				   I915_WAIT_ALL,
650 				   MAX_SCHEDULE_TIMEOUT);
651 	if (ret)
652 		goto err_unlock;
653 
654 	ret = i915_gem_object_pin_pages(obj);
655 	if (ret)
656 		goto err_unlock;
657 
658 	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
659 	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
660 		ret = i915_gem_object_set_to_cpu_domain(obj, true);
661 		if (ret)
662 			goto err_unpin;
663 		else
664 			goto out;
665 	}
666 
667 	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
668 
669 	/* If we're not in the cpu write domain, set ourself into the
670 	 * gtt write domain and manually flush cachelines (as required).
671 	 * This optimizes for the case when the gpu will use the data
672 	 * right away and we therefore have to clflush anyway.
673 	 */
674 	if (!obj->cache_dirty) {
675 		*needs_clflush |= CLFLUSH_AFTER;
676 
677 		/*
678 		 * Same trick applies to invalidate partially written
679 		 * cachelines read before writing.
680 		 */
681 		if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
682 			*needs_clflush |= CLFLUSH_BEFORE;
683 	}
684 
685 out:
686 	i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
687 	obj->mm.dirty = true;
688 	/* return with the pages pinned */
689 	return 0;
690 
691 err_unpin:
692 	i915_gem_object_unpin_pages(obj);
693 err_unlock:
694 	i915_gem_object_unlock(obj);
695 	return ret;
696 }
697