xref: /openbsd/sys/dev/pci/drm/i915/gem/i915_gem_shmem.c (revision f005ef32)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2014-2016 Intel Corporation
5  */
6 
7 #include <linux/pagevec.h>
8 #include <linux/shmem_fs.h>
9 #include <linux/swap.h>
10 
11 #include <drm/drm_cache.h>
12 
13 #include "gem/i915_gem_region.h"
14 #include "i915_drv.h"
15 #include "i915_gem_object.h"
16 #include "i915_gem_tiling.h"
17 #include "i915_gemfs.h"
18 #include "i915_scatterlist.h"
19 #include "i915_trace.h"
20 
21 /*
22  * Move folios to appropriate lru and release the batch, decrementing the
23  * ref count of those folios.
24  */
check_release_folio_batch(struct folio_batch * fbatch)25 static void check_release_folio_batch(struct folio_batch *fbatch)
26 {
27 	STUB();
28 #ifdef notyet
29 	check_move_unevictable_folios(fbatch);
30 	__folio_batch_release(fbatch);
31 #endif
32 	cond_resched();
33 }
34 
shmem_sg_free_table(struct sg_table * st,struct address_space * mapping,bool dirty,bool backup,struct drm_i915_gem_object * obj)35 void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping,
36 			 bool dirty, bool backup,
37 			 struct drm_i915_gem_object *obj)
38 {
39 	struct sgt_iter sgt_iter;
40 	struct folio_batch fbatch;
41 	struct folio *last = NULL;
42 	struct vm_page *page;
43 
44 #ifdef __linux__
45 	mapping_clear_unevictable(mapping);
46 
47 	folio_batch_init(&fbatch);
48 #endif
49 	for_each_sgt_page(page, sgt_iter, st) {
50 #ifdef __linux__
51 		struct folio *folio = page_folio(page);
52 
53 		if (folio == last)
54 			continue;
55 		last = folio;
56 		if (dirty)
57 			folio_mark_dirty(folio);
58 		if (backup)
59 			folio_mark_accessed(folio);
60 
61 		if (!folio_batch_add(&fbatch, folio))
62 			check_release_folio_batch(&fbatch);
63 #else
64 		if (dirty)
65 			set_page_dirty(page);
66 #endif
67 	}
68 #ifdef __linux__
69 	if (fbatch.nr)
70 		check_release_folio_batch(&fbatch);
71 #else
72 	uvm_obj_unwire(obj->base.uao, 0, obj->base.size);
73 #endif
74 
75 	sg_free_table(st);
76 }
77 
shmem_sg_alloc_table(struct drm_i915_private * i915,struct sg_table * st,size_t size,struct intel_memory_region * mr,struct address_space * mapping,unsigned int max_segment,struct drm_i915_gem_object * obj)78 int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
79 			 size_t size, struct intel_memory_region *mr,
80 			 struct address_space *mapping,
81 			 unsigned int max_segment,
82 			 struct drm_i915_gem_object *obj)
83 {
84 	unsigned int page_count; /* restricted by sg_alloc_table */
85 	unsigned long i;
86 	struct scatterlist *sg;
87 	unsigned long next_pfn = 0;	/* suppress gcc warning */
88 	gfp_t noreclaim;
89 	int ret;
90 	struct pglist plist;
91 	struct vm_page *page;
92 
93 	if (overflows_type(size / PAGE_SIZE, page_count))
94 		return -E2BIG;
95 
96 	page_count = size / PAGE_SIZE;
97 	/*
98 	 * If there's no chance of allocating enough pages for the whole
99 	 * object, bail early.
100 	 */
101 	if (size > resource_size(&mr->region))
102 		return -ENOMEM;
103 
104 	if (sg_alloc_table(st, page_count, GFP_KERNEL | __GFP_NOWARN))
105 		return -ENOMEM;
106 #ifdef __linux__
107 
108 	/*
109 	 * Get the list of pages out of our struct file.  They'll be pinned
110 	 * at this point until we release them.
111 	 *
112 	 * Fail silently without starting the shrinker
113 	 */
114 	mapping_set_unevictable(mapping);
115 	noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
116 	noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
117 
118 	sg = st->sgl;
119 	st->nents = 0;
120 	for (i = 0; i < page_count; i++) {
121 		struct folio *folio;
122 		unsigned long nr_pages;
123 		const unsigned int shrink[] = {
124 			I915_SHRINK_BOUND | I915_SHRINK_UNBOUND,
125 			0,
126 		}, *s = shrink;
127 		gfp_t gfp = noreclaim;
128 
129 		do {
130 			cond_resched();
131 			folio = shmem_read_folio_gfp(mapping, i, gfp);
132 			if (!IS_ERR(folio))
133 				break;
134 
135 			if (!*s) {
136 				ret = PTR_ERR(folio);
137 				goto err_sg;
138 			}
139 
140 			i915_gem_shrink(NULL, i915, 2 * page_count, NULL, *s++);
141 
142 			/*
143 			 * We've tried hard to allocate the memory by reaping
144 			 * our own buffer, now let the real VM do its job and
145 			 * go down in flames if truly OOM.
146 			 *
147 			 * However, since graphics tend to be disposable,
148 			 * defer the oom here by reporting the ENOMEM back
149 			 * to userspace.
150 			 */
151 			if (!*s) {
152 				/* reclaim and warn, but no oom */
153 				gfp = mapping_gfp_mask(mapping);
154 
155 				/*
156 				 * Our bo are always dirty and so we require
157 				 * kswapd to reclaim our pages (direct reclaim
158 				 * does not effectively begin pageout of our
159 				 * buffers on its own). However, direct reclaim
160 				 * only waits for kswapd when under allocation
161 				 * congestion. So as a result __GFP_RECLAIM is
162 				 * unreliable and fails to actually reclaim our
163 				 * dirty pages -- unless you try over and over
164 				 * again with !__GFP_NORETRY. However, we still
165 				 * want to fail this allocation rather than
166 				 * trigger the out-of-memory killer and for
167 				 * this we want __GFP_RETRY_MAYFAIL.
168 				 */
169 				gfp |= __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
170 			}
171 		} while (1);
172 
173 		nr_pages = min_t(unsigned long,
174 				folio_nr_pages(folio), page_count - i);
175 		if (!i ||
176 		    sg->length >= max_segment ||
177 		    folio_pfn(folio) != next_pfn) {
178 			if (i)
179 				sg = sg_next(sg);
180 
181 			st->nents++;
182 			sg_set_folio(sg, folio, nr_pages * PAGE_SIZE, 0);
183 		} else {
184 			/* XXX: could overflow? */
185 			sg->length += nr_pages * PAGE_SIZE;
186 		}
187 		next_pfn = folio_pfn(folio) + nr_pages;
188 		i += nr_pages - 1;
189 
190 		/* Check that the i965g/gm workaround works. */
191 		GEM_BUG_ON(gfp & __GFP_DMA32 && next_pfn >= 0x00100000UL);
192 	}
193 #else
194 	sg = st->sgl;
195 	st->nents = 0;
196 
197 	TAILQ_INIT(&plist);
198 	if (uvm_obj_wire(obj->base.uao, 0, obj->base.size, &plist)) {
199 		sg_free_table(st);
200 		kfree(st);
201 		return -ENOMEM;
202 	}
203 
204 	i = 0;
205 	TAILQ_FOREACH(page, &plist, pageq) {
206 		if (i)
207 			sg = sg_next(sg);
208 		st->nents++;
209 		sg_set_page(sg, page, PAGE_SIZE, 0);
210 		i++;
211 	}
212 #endif
213 	if (sg) /* loop terminated early; short sg table */
214 		sg_mark_end(sg);
215 
216 	/* Trim unused sg entries to avoid wasting memory. */
217 	i915_sg_trim(st);
218 
219 	return 0;
220 #ifdef notyet
221 err_sg:
222 	sg_mark_end(sg);
223 	if (sg != st->sgl) {
224 		shmem_sg_free_table(st, mapping, false, false);
225 	} else {
226 		mapping_clear_unevictable(mapping);
227 		sg_free_table(st);
228 	}
229 
230 	/*
231 	 * shmemfs first checks if there is enough memory to allocate the page
232 	 * and reports ENOSPC should there be insufficient, along with the usual
233 	 * ENOMEM for a genuine allocation failure.
234 	 *
235 	 * We use ENOSPC in our driver to mean that we have run out of aperture
236 	 * space and so want to translate the error from shmemfs back to our
237 	 * usual understanding of ENOMEM.
238 	 */
239 	if (ret == -ENOSPC)
240 		ret = -ENOMEM;
241 
242 	return ret;
243 #endif
244 }
245 
shmem_get_pages(struct drm_i915_gem_object * obj)246 static int shmem_get_pages(struct drm_i915_gem_object *obj)
247 {
248 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
249 	struct intel_memory_region *mem = obj->mm.region;
250 #ifdef __linux__
251 	struct address_space *mapping = obj->base.filp->f_mapping;
252 #endif
253 	unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
254 	struct sg_table *st;
255 	struct sgt_iter sgt_iter;
256 	struct vm_page *page;
257 	int ret;
258 
259 	/*
260 	 * Assert that the object is not currently in any GPU domain. As it
261 	 * wasn't in the GTT, there shouldn't be any way it could have been in
262 	 * a GPU cache
263 	 */
264 	GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
265 	GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
266 
267 rebuild_st:
268 	st = kmalloc(sizeof(*st), GFP_KERNEL | __GFP_NOWARN);
269 	if (!st)
270 		return -ENOMEM;
271 
272 #ifdef __linux__
273 	ret = shmem_sg_alloc_table(i915, st, obj->base.size, mem, mapping,
274 				   max_segment);
275 #else
276 	ret = shmem_sg_alloc_table(i915, st, obj->base.size, mem, NULL,
277 				   max_segment, obj);
278 #endif
279 	if (ret)
280 		goto err_st;
281 
282 	ret = i915_gem_gtt_prepare_pages(obj, st);
283 	if (ret) {
284 		/*
285 		 * DMA remapping failed? One possible cause is that
286 		 * it could not reserve enough large entries, asking
287 		 * for PAGE_SIZE chunks instead may be helpful.
288 		 */
289 		if (max_segment > PAGE_SIZE) {
290 #ifdef __linux__
291 			for_each_sgt_page(page, sgt_iter, st)
292 				put_page(page);
293 #else
294 			uvm_obj_unwire(obj->base.uao, 0, obj->base.size);
295 #endif
296 			sg_free_table(st);
297 			kfree(st);
298 
299 			max_segment = PAGE_SIZE;
300 			goto rebuild_st;
301 		} else {
302 			dev_warn(i915->drm.dev,
303 				 "Failed to DMA remap %zu pages\n",
304 				 obj->base.size >> PAGE_SHIFT);
305 			goto err_pages;
306 		}
307 	}
308 
309 	if (i915_gem_object_needs_bit17_swizzle(obj))
310 		i915_gem_object_do_bit_17_swizzle(obj, st);
311 
312 	if (i915_gem_object_can_bypass_llc(obj))
313 		obj->cache_dirty = true;
314 
315 	__i915_gem_object_set_pages(obj, st);
316 
317 	return 0;
318 
319 err_pages:
320 #ifdef __linux__
321 	shmem_sg_free_table(st, mapping, false, false);
322 #else
323 	shmem_sg_free_table(st, NULL, false, false, obj);
324 #endif
325 	/*
326 	 * shmemfs first checks if there is enough memory to allocate the page
327 	 * and reports ENOSPC should there be insufficient, along with the usual
328 	 * ENOMEM for a genuine allocation failure.
329 	 *
330 	 * We use ENOSPC in our driver to mean that we have run out of aperture
331 	 * space and so want to translate the error from shmemfs back to our
332 	 * usual understanding of ENOMEM.
333 	 */
334 err_st:
335 	if (ret == -ENOSPC)
336 		ret = -ENOMEM;
337 
338 	kfree(st);
339 
340 	return ret;
341 }
342 
343 static int
shmem_truncate(struct drm_i915_gem_object * obj)344 shmem_truncate(struct drm_i915_gem_object *obj)
345 {
346 	/*
347 	 * Our goal here is to return as much of the memory as
348 	 * is possible back to the system as we are called from OOM.
349 	 * To do this we must instruct the shmfs to drop all of its
350 	 * backing pages, *now*.
351 	 */
352 #ifdef __linux__
353 	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
354 #else
355 	rw_enter(obj->base.uao->vmobjlock, RW_WRITE);
356 	obj->base.uao->pgops->pgo_flush(obj->base.uao, 0, obj->base.size,
357 	    PGO_ALLPAGES | PGO_FREE);
358 	rw_exit(obj->base.uao->vmobjlock);
359 #endif
360 	obj->mm.madv = __I915_MADV_PURGED;
361 	obj->mm.pages = ERR_PTR(-EFAULT);
362 
363 	return 0;
364 }
365 
__shmem_writeback(size_t size,struct address_space * mapping)366 void __shmem_writeback(size_t size, struct address_space *mapping)
367 {
368 	STUB();
369 #ifdef notyet
370 	struct writeback_control wbc = {
371 		.sync_mode = WB_SYNC_NONE,
372 		.nr_to_write = SWAP_CLUSTER_MAX,
373 		.range_start = 0,
374 		.range_end = LLONG_MAX,
375 		.for_reclaim = 1,
376 	};
377 	unsigned long i;
378 
379 	/*
380 	 * Leave mmapings intact (GTT will have been revoked on unbinding,
381 	 * leaving only CPU mmapings around) and add those pages to the LRU
382 	 * instead of invoking writeback so they are aged and paged out
383 	 * as normal.
384 	 */
385 
386 	/* Begin writeback on each dirty page */
387 	for (i = 0; i < size >> PAGE_SHIFT; i++) {
388 		struct vm_page *page;
389 
390 		page = find_lock_page(mapping, i);
391 		if (!page)
392 			continue;
393 
394 		if (!page_mapped(page) && clear_page_dirty_for_io(page)) {
395 			int ret;
396 
397 			SetPageReclaim(page);
398 			ret = mapping->a_ops->writepage(page, &wbc);
399 			if (!PageWriteback(page))
400 				ClearPageReclaim(page);
401 			if (!ret)
402 				goto put;
403 		}
404 		unlock_page(page);
405 put:
406 		put_page(page);
407 	}
408 #endif
409 }
410 
411 static void
shmem_writeback(struct drm_i915_gem_object * obj)412 shmem_writeback(struct drm_i915_gem_object *obj)
413 {
414 	STUB();
415 #ifdef notyet
416 	__shmem_writeback(obj->base.size, obj->base.filp->f_mapping);
417 #endif
418 }
419 
shmem_shrink(struct drm_i915_gem_object * obj,unsigned int flags)420 static int shmem_shrink(struct drm_i915_gem_object *obj, unsigned int flags)
421 {
422 	switch (obj->mm.madv) {
423 	case I915_MADV_DONTNEED:
424 		return i915_gem_object_truncate(obj);
425 	case __I915_MADV_PURGED:
426 		return 0;
427 	}
428 
429 	if (flags & I915_GEM_OBJECT_SHRINK_WRITEBACK)
430 		shmem_writeback(obj);
431 
432 	return 0;
433 }
434 
435 void
__i915_gem_object_release_shmem(struct drm_i915_gem_object * obj,struct sg_table * pages,bool needs_clflush)436 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
437 				struct sg_table *pages,
438 				bool needs_clflush)
439 {
440 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
441 
442 	GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
443 
444 	if (obj->mm.madv == I915_MADV_DONTNEED)
445 		obj->mm.dirty = false;
446 
447 	if (needs_clflush &&
448 	    (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
449 	    !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
450 		drm_clflush_sg(pages);
451 
452 	__start_cpu_write(obj);
453 	/*
454 	 * On non-LLC igfx platforms, force the flush-on-acquire if this is ever
455 	 * swapped-in. Our async flush path is not trust worthy enough yet(and
456 	 * happens in the wrong order), and with some tricks it's conceivable
457 	 * for userspace to change the cache-level to I915_CACHE_NONE after the
458 	 * pages are swapped-in, and since execbuf binds the object before doing
459 	 * the async flush, we have a race window.
460 	 */
461 	if (!HAS_LLC(i915) && !IS_DGFX(i915))
462 		obj->cache_dirty = true;
463 }
464 
i915_gem_object_put_pages_shmem(struct drm_i915_gem_object * obj,struct sg_table * pages)465 void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages)
466 {
467 	__i915_gem_object_release_shmem(obj, pages, true);
468 
469 	i915_gem_gtt_finish_pages(obj, pages);
470 
471 	if (i915_gem_object_needs_bit17_swizzle(obj))
472 		i915_gem_object_save_bit_17_swizzle(obj, pages);
473 
474 #ifdef __linux__
475 	shmem_sg_free_table(pages, file_inode(obj->base.filp)->i_mapping,
476 			    obj->mm.dirty, obj->mm.madv == I915_MADV_WILLNEED);
477 #else
478 	shmem_sg_free_table(pages, NULL,
479 			    obj->mm.dirty, obj->mm.madv == I915_MADV_WILLNEED, obj);
480 #endif
481 	kfree(pages);
482 	obj->mm.dirty = false;
483 }
484 
485 static void
shmem_put_pages(struct drm_i915_gem_object * obj,struct sg_table * pages)486 shmem_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages)
487 {
488 	if (likely(i915_gem_object_has_struct_page(obj)))
489 		i915_gem_object_put_pages_shmem(obj, pages);
490 	else
491 		i915_gem_object_put_pages_phys(obj, pages);
492 }
493 
494 static int
shmem_pwrite(struct drm_i915_gem_object * obj,const struct drm_i915_gem_pwrite * arg)495 shmem_pwrite(struct drm_i915_gem_object *obj,
496 	     const struct drm_i915_gem_pwrite *arg)
497 {
498 #ifdef __linux__
499 	struct address_space *mapping = obj->base.filp->f_mapping;
500 	const struct address_space_operations *aops = mapping->a_ops;
501 #endif
502 	char __user *user_data = u64_to_user_ptr(arg->data_ptr);
503 	u64 remain, offset;
504 	unsigned int pg;
505 
506 	/* Caller already validated user args */
507 	GEM_BUG_ON(!access_ok(user_data, arg->size));
508 
509 	if (!i915_gem_object_has_struct_page(obj))
510 		return i915_gem_object_pwrite_phys(obj, arg);
511 
512 	/*
513 	 * Before we instantiate/pin the backing store for our use, we
514 	 * can prepopulate the shmemfs filp efficiently using a write into
515 	 * the pagecache. We avoid the penalty of instantiating all the
516 	 * pages, important if the user is just writing to a few and never
517 	 * uses the object on the GPU, and using a direct write into shmemfs
518 	 * allows it to avoid the cost of retrieving a page (either swapin
519 	 * or clearing-before-use) before it is overwritten.
520 	 */
521 	if (i915_gem_object_has_pages(obj))
522 		return -ENODEV;
523 
524 	if (obj->mm.madv != I915_MADV_WILLNEED)
525 		return -EFAULT;
526 
527 	/*
528 	 * Before the pages are instantiated the object is treated as being
529 	 * in the CPU domain. The pages will be clflushed as required before
530 	 * use, and we can freely write into the pages directly. If userspace
531 	 * races pwrite with any other operation; corruption will ensue -
532 	 * that is userspace's prerogative!
533 	 */
534 
535 	remain = arg->size;
536 	offset = arg->offset;
537 	pg = offset_in_page(offset);
538 
539 	do {
540 		unsigned int len, unwritten;
541 		struct vm_page *page;
542 		void *data, *vaddr;
543 		int err;
544 		char __maybe_unused c;
545 
546 		len = PAGE_SIZE - pg;
547 		if (len > remain)
548 			len = remain;
549 
550 		/* Prefault the user page to reduce potential recursion */
551 		err = __get_user(c, user_data);
552 		if (err)
553 			return err;
554 
555 		err = __get_user(c, user_data + len - 1);
556 		if (err)
557 			return err;
558 
559 #ifdef __linux__
560 		err = aops->write_begin(obj->base.filp, mapping, offset, len,
561 					&page, &data);
562 		if (err < 0)
563 			return err;
564 #else
565 		struct pglist plist;
566 		TAILQ_INIT(&plist);
567 		if (uvm_obj_wire(obj->base.uao, trunc_page(offset),
568 		    trunc_page(offset) + PAGE_SIZE, &plist)) {
569 			return -ENOMEM;
570 		}
571 		page = TAILQ_FIRST(&plist);
572 #endif
573 
574 		vaddr = kmap_atomic(page);
575 		unwritten = __copy_from_user_inatomic(vaddr + pg,
576 						      user_data,
577 						      len);
578 		kunmap_atomic(vaddr);
579 
580 #ifdef __linux__
581 		err = aops->write_end(obj->base.filp, mapping, offset, len,
582 				      len - unwritten, page, data);
583 		if (err < 0)
584 			return err;
585 #else
586 		uvm_obj_unwire(obj->base.uao, trunc_page(offset),
587 		    trunc_page(offset) + PAGE_SIZE);
588 #endif
589 
590 		/* We don't handle -EFAULT, leave it to the caller to check */
591 		if (unwritten)
592 			return -ENODEV;
593 
594 		remain -= len;
595 		user_data += len;
596 		offset += len;
597 		pg = 0;
598 	} while (remain);
599 
600 	return 0;
601 }
602 
603 static int
shmem_pread(struct drm_i915_gem_object * obj,const struct drm_i915_gem_pread * arg)604 shmem_pread(struct drm_i915_gem_object *obj,
605 	    const struct drm_i915_gem_pread *arg)
606 {
607 	if (!i915_gem_object_has_struct_page(obj))
608 		return i915_gem_object_pread_phys(obj, arg);
609 
610 	return -ENODEV;
611 }
612 
shmem_release(struct drm_i915_gem_object * obj)613 static void shmem_release(struct drm_i915_gem_object *obj)
614 {
615 	if (i915_gem_object_has_struct_page(obj))
616 		i915_gem_object_release_memory_region(obj);
617 
618 #ifdef __linux__
619 	fput(obj->base.filp);
620 #endif
621 }
622 
623 const struct drm_i915_gem_object_ops i915_gem_shmem_ops = {
624 	.name = "i915_gem_object_shmem",
625 	.flags = I915_GEM_OBJECT_IS_SHRINKABLE,
626 
627 	.get_pages = shmem_get_pages,
628 	.put_pages = shmem_put_pages,
629 	.truncate = shmem_truncate,
630 	.shrink = shmem_shrink,
631 
632 	.pwrite = shmem_pwrite,
633 	.pread = shmem_pread,
634 
635 	.release = shmem_release,
636 };
637 
638 #ifdef __linux__
__create_shmem(struct drm_i915_private * i915,struct drm_gem_object * obj,resource_size_t size)639 static int __create_shmem(struct drm_i915_private *i915,
640 			  struct drm_gem_object *obj,
641 			  resource_size_t size)
642 {
643 	unsigned long flags = VM_NORESERVE;
644 	struct file *filp;
645 
646 	drm_gem_private_object_init(&i915->drm, obj, size);
647 
648 	/* XXX: The __shmem_file_setup() function returns -EINVAL if size is
649 	 * greater than MAX_LFS_FILESIZE.
650 	 * To handle the same error as other code that returns -E2BIG when
651 	 * the size is too large, we add a code that returns -E2BIG when the
652 	 * size is larger than the size that can be handled.
653 	 * If BITS_PER_LONG is 32, size > MAX_LFS_FILESIZE is always false,
654 	 * so we only needs to check when BITS_PER_LONG is 64.
655 	 * If BITS_PER_LONG is 32, E2BIG checks are processed when
656 	 * i915_gem_object_size_2big() is called before init_object() callback
657 	 * is called.
658 	 */
659 	if (BITS_PER_LONG == 64 && size > MAX_LFS_FILESIZE)
660 		return -E2BIG;
661 
662 	if (i915->mm.gemfs)
663 		filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size,
664 						 flags);
665 	else
666 		filp = shmem_file_setup("i915", size, flags);
667 	if (IS_ERR(filp))
668 		return PTR_ERR(filp);
669 
670 	obj->filp = filp;
671 	return 0;
672 }
673 #endif
674 
shmem_object_init(struct intel_memory_region * mem,struct drm_i915_gem_object * obj,resource_size_t offset,resource_size_t size,resource_size_t page_size,unsigned int flags)675 static int shmem_object_init(struct intel_memory_region *mem,
676 			     struct drm_i915_gem_object *obj,
677 			     resource_size_t offset,
678 			     resource_size_t size,
679 			     resource_size_t page_size,
680 			     unsigned int flags)
681 {
682 	static struct lock_class_key lock_class;
683 	struct drm_i915_private *i915 = mem->i915;
684 	struct address_space *mapping;
685 	unsigned int cache_level;
686 	gfp_t mask;
687 	int ret;
688 
689 #ifdef __linux__
690 	ret = __create_shmem(i915, &obj->base, size);
691 #else
692 	ret = drm_gem_object_init(&i915->drm, &obj->base, size);
693 #endif
694 	if (ret)
695 		return ret;
696 
697 	mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
698 	if (IS_I965GM(i915) || IS_I965G(i915)) {
699 		/* 965gm cannot relocate objects above 4GiB. */
700 		mask &= ~__GFP_HIGHMEM;
701 		mask |= __GFP_DMA32;
702 	}
703 
704 #ifdef __linux__
705 	mapping = obj->base.filp->f_mapping;
706 	mapping_set_gfp_mask(mapping, mask);
707 	GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
708 #endif
709 
710 	i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, flags);
711 	obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
712 	obj->write_domain = I915_GEM_DOMAIN_CPU;
713 	obj->read_domains = I915_GEM_DOMAIN_CPU;
714 
715 	/*
716 	 * MTL doesn't snoop CPU cache by default for GPU access (namely
717 	 * 1-way coherency). However some UMD's are currently depending on
718 	 * that. Make 1-way coherent the default setting for MTL. A follow
719 	 * up patch will extend the GEM_CREATE uAPI to allow UMD's specify
720 	 * caching mode at BO creation time
721 	 */
722 	if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)))
723 		/* On some devices, we can have the GPU use the LLC (the CPU
724 		 * cache) for about a 10% performance improvement
725 		 * compared to uncached.  Graphics requests other than
726 		 * display scanout are coherent with the CPU in
727 		 * accessing this cache.  This means in this mode we
728 		 * don't need to clflush on the CPU side, and on the
729 		 * GPU side we only need to flush internal caches to
730 		 * get data visible to the CPU.
731 		 *
732 		 * However, we maintain the display planes as UC, and so
733 		 * need to rebind when first used as such.
734 		 */
735 		cache_level = I915_CACHE_LLC;
736 	else
737 		cache_level = I915_CACHE_NONE;
738 
739 	i915_gem_object_set_cache_coherency(obj, cache_level);
740 
741 	i915_gem_object_init_memory_region(obj, mem);
742 
743 	return 0;
744 }
745 
746 struct drm_i915_gem_object *
i915_gem_object_create_shmem(struct drm_i915_private * i915,resource_size_t size)747 i915_gem_object_create_shmem(struct drm_i915_private *i915,
748 			     resource_size_t size)
749 {
750 	return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_SMEM],
751 					     size, 0, 0);
752 }
753 
754 /* Allocate a new GEM object and fill it with the supplied data */
755 #ifdef __linux__
756 struct drm_i915_gem_object *
i915_gem_object_create_shmem_from_data(struct drm_i915_private * dev_priv,const void * data,resource_size_t size)757 i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv,
758 				       const void *data, resource_size_t size)
759 {
760 	struct drm_i915_gem_object *obj;
761 	struct file *file;
762 	const struct address_space_operations *aops;
763 	resource_size_t offset;
764 	int err;
765 
766 	GEM_WARN_ON(IS_DGFX(dev_priv));
767 	obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE));
768 	if (IS_ERR(obj))
769 		return obj;
770 
771 	GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
772 
773 	file = obj->base.filp;
774 	aops = file->f_mapping->a_ops;
775 	offset = 0;
776 	do {
777 		unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
778 		struct vm_page *page;
779 		void *pgdata, *vaddr;
780 
781 		err = aops->write_begin(file, file->f_mapping, offset, len,
782 					&page, &pgdata);
783 		if (err < 0)
784 			goto fail;
785 
786 		vaddr = kmap(page);
787 		memcpy(vaddr, data, len);
788 		kunmap(page);
789 
790 		err = aops->write_end(file, file->f_mapping, offset, len, len,
791 				      page, pgdata);
792 		if (err < 0)
793 			goto fail;
794 
795 		size -= len;
796 		data += len;
797 		offset += len;
798 	} while (size);
799 
800 	return obj;
801 
802 fail:
803 	i915_gem_object_put(obj);
804 	return ERR_PTR(err);
805 }
806 #else /* !__linux__ */
807 struct drm_i915_gem_object *
i915_gem_object_create_shmem_from_data(struct drm_i915_private * dev_priv,const void * data,resource_size_t size)808 i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv,
809 				       const void *data, resource_size_t size)
810 {
811 	struct drm_i915_gem_object *obj;
812 	struct uvm_object *uao;
813 	resource_size_t offset;
814 	int err;
815 
816 	GEM_WARN_ON(IS_DGFX(dev_priv));
817 	obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE));
818 	if (IS_ERR(obj))
819 		return obj;
820 
821 	GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
822 
823 	uao = obj->base.uao;
824 	offset = 0;
825 	do {
826 		unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
827 		struct vm_page *page;
828 		void *pgdata, *vaddr;
829 		struct pglist plist;
830 
831 		TAILQ_INIT(&plist);
832 		if (uvm_obj_wire(uao, trunc_page(offset),
833 		    trunc_page(offset) + PAGE_SIZE, &plist)) {
834 			err = -ENOMEM;
835 			goto fail;
836 		}
837 		page = TAILQ_FIRST(&plist);
838 
839 		vaddr = kmap(page);
840 		memcpy(vaddr, data, len);
841 		kunmap_va(vaddr);
842 
843 		uvm_obj_unwire(uao, trunc_page(offset),
844 		    trunc_page(offset) + PAGE_SIZE);
845 
846 		size -= len;
847 		data += len;
848 		offset += len;
849 	} while (size);
850 
851 	return obj;
852 
853 fail:
854 	i915_gem_object_put(obj);
855 	return ERR_PTR(err);
856 }
857 #endif
858 
init_shmem(struct intel_memory_region * mem)859 static int init_shmem(struct intel_memory_region *mem)
860 {
861 	i915_gemfs_init(mem->i915);
862 	intel_memory_region_set_name(mem, "system");
863 
864 	return 0; /* We have fallback to the kernel mnt if gemfs init failed. */
865 }
866 
release_shmem(struct intel_memory_region * mem)867 static int release_shmem(struct intel_memory_region *mem)
868 {
869 	i915_gemfs_fini(mem->i915);
870 	return 0;
871 }
872 
873 static const struct intel_memory_region_ops shmem_region_ops = {
874 	.init = init_shmem,
875 	.release = release_shmem,
876 	.init_object = shmem_object_init,
877 };
878 
i915_gem_shmem_setup(struct drm_i915_private * i915,u16 type,u16 instance)879 struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915,
880 						 u16 type, u16 instance)
881 {
882 	return intel_memory_region_create(i915, 0,
883 					  totalram_pages() << PAGE_SHIFT,
884 					  PAGE_SIZE, 0, 0,
885 					  type, instance,
886 					  &shmem_region_ops);
887 }
888 
i915_gem_object_is_shmem(const struct drm_i915_gem_object * obj)889 bool i915_gem_object_is_shmem(const struct drm_i915_gem_object *obj)
890 {
891 	return obj->ops == &i915_gem_shmem_ops;
892 }
893