xref: /dragonfly/sys/dev/drm/i915/i915_gem_gtt.c (revision a4f37ab4)
1 /*
2  * Copyright © 2010 Daniel Vetter
3  * Copyright © 2011-2014 Intel Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  *
24  */
25 
26 #include <linux/seq_file.h>
27 #include <drm/drmP.h>
28 #include <drm/i915_drm.h>
29 #include "i915_drv.h"
30 #include "i915_vgpu.h"
31 #include "i915_trace.h"
32 #include "intel_drv.h"
33 
34 #include <linux/bitmap.h>
35 
36 #include <sys/mplock2.h>
37 
38 /**
39  * DOC: Global GTT views
40  *
41  * Background and previous state
42  *
43  * Historically objects could exists (be bound) in global GTT space only as
44  * singular instances with a view representing all of the object's backing pages
45  * in a linear fashion. This view will be called a normal view.
46  *
47  * To support multiple views of the same object, where the number of mapped
48  * pages is not equal to the backing store, or where the layout of the pages
49  * is not linear, concept of a GGTT view was added.
50  *
51  * One example of an alternative view is a stereo display driven by a single
52  * image. In this case we would have a framebuffer looking like this
53  * (2x2 pages):
54  *
55  *    12
56  *    34
57  *
58  * Above would represent a normal GGTT view as normally mapped for GPU or CPU
59  * rendering. In contrast, fed to the display engine would be an alternative
60  * view which could look something like this:
61  *
62  *   1212
63  *   3434
64  *
65  * In this example both the size and layout of pages in the alternative view is
66  * different from the normal view.
67  *
68  * Implementation and usage
69  *
70  * GGTT views are implemented using VMAs and are distinguished via enum
71  * i915_ggtt_view_type and struct i915_ggtt_view.
72  *
73  * A new flavour of core GEM functions which work with GGTT bound objects were
74  * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
75  * renaming  in large amounts of code. They take the struct i915_ggtt_view
76  * parameter encapsulating all metadata required to implement a view.
77  *
78  * As a helper for callers which are only interested in the normal view,
79  * globally const i915_ggtt_view_normal singleton instance exists. All old core
80  * GEM API functions, the ones not taking the view parameter, are operating on,
81  * or with the normal GGTT view.
82  *
83  * Code wanting to add or use a new GGTT view needs to:
84  *
85  * 1. Add a new enum with a suitable name.
86  * 2. Extend the metadata in the i915_ggtt_view structure if required.
87  * 3. Add support to i915_get_vma_pages().
88  *
89  * New views are required to build a scatter-gather table from within the
90  * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
91  * exists for the lifetime of an VMA.
92  *
93  * Core API is designed to have copy semantics which means that passed in
94  * struct i915_ggtt_view does not need to be persistent (left around after
95  * calling the core API functions).
96  *
97  */
98 
99 static int
100 i915_get_ggtt_vma_pages(struct i915_vma *vma);
101 
102 const struct i915_ggtt_view i915_ggtt_view_normal = {
103 	.type = I915_GGTT_VIEW_NORMAL,
104 };
105 const struct i915_ggtt_view i915_ggtt_view_rotated = {
106 	.type = I915_GGTT_VIEW_ROTATED,
107 };
108 
109 static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt)
110 {
111 	bool has_aliasing_ppgtt;
112 	bool has_full_ppgtt;
113 	bool has_full_48bit_ppgtt;
114 
115 	has_aliasing_ppgtt = INTEL_INFO(dev)->gen >= 6;
116 	has_full_ppgtt = INTEL_INFO(dev)->gen >= 7;
117 	has_full_48bit_ppgtt = IS_BROADWELL(dev) || INTEL_INFO(dev)->gen >= 9;
118 
119 	if (intel_vgpu_active(dev))
120 		has_full_ppgtt = false; /* emulation is too hard */
121 
122 	/*
123 	 * We don't allow disabling PPGTT for gen9+ as it's a requirement for
124 	 * execlists, the sole mechanism available to submit work.
125 	 */
126 	if (INTEL_INFO(dev)->gen < 9 &&
127 	    (enable_ppgtt == 0 || !has_aliasing_ppgtt))
128 		return 0;
129 
130 	if (enable_ppgtt == 1)
131 		return 1;
132 
133 	if (enable_ppgtt == 2 && has_full_ppgtt)
134 		return 2;
135 
136 	if (enable_ppgtt == 3 && has_full_48bit_ppgtt)
137 		return 3;
138 
139 #ifdef CONFIG_INTEL_IOMMU
140 	/* Disable ppgtt on SNB if VT-d is on. */
141 	if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) {
142 		DRM_INFO("Disabling PPGTT because VT-d is on\n");
143 		return 0;
144 	}
145 #endif
146 
147 	/* Early VLV doesn't have this */
148 	if (IS_VALLEYVIEW(dev) && dev->pdev->revision < 0xb) {
149 		DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
150 		return 0;
151 	}
152 
153 	if (INTEL_INFO(dev)->gen >= 8 && i915.enable_execlists)
154 		return has_full_48bit_ppgtt ? 3 : 2;
155 	else
156 		return has_aliasing_ppgtt ? 1 : 0;
157 }
158 
159 static int ppgtt_bind_vma(struct i915_vma *vma,
160 			  enum i915_cache_level cache_level,
161 			  u32 unused)
162 {
163 	u32 pte_flags = 0;
164 
165 	/* Currently applicable only to VLV */
166 	if (vma->obj->gt_ro)
167 		pte_flags |= PTE_READ_ONLY;
168 
169 	vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
170 				cache_level, pte_flags);
171 
172 	return 0;
173 }
174 
175 static void ppgtt_unbind_vma(struct i915_vma *vma)
176 {
177 	vma->vm->clear_range(vma->vm,
178 			     vma->node.start,
179 			     vma->obj->base.size,
180 			     true);
181 }
182 
183 static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
184 				  enum i915_cache_level level,
185 				  bool valid)
186 {
187 	gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
188 	pte |= addr;
189 
190 	switch (level) {
191 	case I915_CACHE_NONE:
192 		pte |= PPAT_UNCACHED_INDEX;
193 		break;
194 	case I915_CACHE_WT:
195 		pte |= PPAT_DISPLAY_ELLC_INDEX;
196 		break;
197 	default:
198 		pte |= PPAT_CACHED_INDEX;
199 		break;
200 	}
201 
202 	return pte;
203 }
204 
205 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
206 				  const enum i915_cache_level level)
207 {
208 	gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
209 	pde |= addr;
210 	if (level != I915_CACHE_NONE)
211 		pde |= PPAT_CACHED_PDE_INDEX;
212 	else
213 		pde |= PPAT_UNCACHED_INDEX;
214 	return pde;
215 }
216 
217 #define gen8_pdpe_encode gen8_pde_encode
218 #define gen8_pml4e_encode gen8_pde_encode
219 
220 static gen6_pte_t snb_pte_encode(dma_addr_t addr,
221 				 enum i915_cache_level level,
222 				 bool valid, u32 unused)
223 {
224 	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
225 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
226 
227 	switch (level) {
228 	case I915_CACHE_L3_LLC:
229 	case I915_CACHE_LLC:
230 		pte |= GEN6_PTE_CACHE_LLC;
231 		break;
232 	case I915_CACHE_NONE:
233 		pte |= GEN6_PTE_UNCACHED;
234 		break;
235 	default:
236 		MISSING_CASE(level);
237 	}
238 
239 	return pte;
240 }
241 
242 static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
243 				 enum i915_cache_level level,
244 				 bool valid, u32 unused)
245 {
246 	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
247 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
248 
249 	switch (level) {
250 	case I915_CACHE_L3_LLC:
251 		pte |= GEN7_PTE_CACHE_L3_LLC;
252 		break;
253 	case I915_CACHE_LLC:
254 		pte |= GEN6_PTE_CACHE_LLC;
255 		break;
256 	case I915_CACHE_NONE:
257 		pte |= GEN6_PTE_UNCACHED;
258 		break;
259 	default:
260 		MISSING_CASE(level);
261 	}
262 
263 	return pte;
264 }
265 
266 static gen6_pte_t byt_pte_encode(dma_addr_t addr,
267 				 enum i915_cache_level level,
268 				 bool valid, u32 flags)
269 {
270 	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
271 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
272 
273 	if (!(flags & PTE_READ_ONLY))
274 		pte |= BYT_PTE_WRITEABLE;
275 
276 	if (level != I915_CACHE_NONE)
277 		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
278 
279 	return pte;
280 }
281 
282 static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
283 				 enum i915_cache_level level,
284 				 bool valid, u32 unused)
285 {
286 	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
287 	pte |= HSW_PTE_ADDR_ENCODE(addr);
288 
289 	if (level != I915_CACHE_NONE)
290 		pte |= HSW_WB_LLC_AGE3;
291 
292 	return pte;
293 }
294 
295 static gen6_pte_t iris_pte_encode(dma_addr_t addr,
296 				  enum i915_cache_level level,
297 				  bool valid, u32 unused)
298 {
299 	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
300 	pte |= HSW_PTE_ADDR_ENCODE(addr);
301 
302 	switch (level) {
303 	case I915_CACHE_NONE:
304 		break;
305 	case I915_CACHE_WT:
306 		pte |= HSW_WT_ELLC_LLC_AGE3;
307 		break;
308 	default:
309 		pte |= HSW_WB_ELLC_LLC_AGE3;
310 		break;
311 	}
312 
313 	return pte;
314 }
315 
316 static int __setup_page_dma(struct drm_device *dev,
317 			    struct i915_page_dma *p, gfp_t flags)
318 {
319 	struct device *device = &dev->pdev->dev;
320 
321 	p->page = alloc_page(flags);
322 	if (!p->page)
323 		return -ENOMEM;
324 
325 	p->daddr = dma_map_page(device,
326 				p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
327 
328 	if (dma_mapping_error(device, p->daddr)) {
329 		__free_page(p->page);
330 		return -EINVAL;
331 	}
332 
333 	return 0;
334 }
335 
336 static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
337 {
338 	return __setup_page_dma(dev, p, GFP_KERNEL);
339 }
340 
341 static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
342 {
343 	if (WARN_ON(!p->page))
344 		return;
345 
346 	dma_unmap_page(&dev->pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL);
347 	__free_page(p->page);
348 	memset(p, 0, sizeof(*p));
349 }
350 
351 static void *kmap_page_dma(struct i915_page_dma *p)
352 {
353 	return kmap_atomic(p->page);
354 }
355 
356 /* We use the flushing unmap only with ppgtt structures:
357  * page directories, page tables and scratch pages.
358  */
359 static void kunmap_page_dma(struct drm_device *dev, void *vaddr)
360 {
361 	/* There are only few exceptions for gen >=6. chv and bxt.
362 	 * And we are not sure about the latter so play safe for now.
363 	 */
364 	if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
365 		drm_clflush_virt_range(vaddr, PAGE_SIZE);
366 
367 	kunmap_atomic(vaddr);
368 }
369 
370 #define kmap_px(px) kmap_page_dma(px_base(px))
371 #define kunmap_px(ppgtt, vaddr) kunmap_page_dma((ppgtt)->base.dev, (vaddr))
372 
373 #define setup_px(dev, px) setup_page_dma((dev), px_base(px))
374 #define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px))
375 #define fill_px(dev, px, v) fill_page_dma((dev), px_base(px), (v))
376 #define fill32_px(dev, px, v) fill_page_dma_32((dev), px_base(px), (v))
377 
378 static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p,
379 			  const uint64_t val)
380 {
381 	int i;
382 	uint64_t * const vaddr = kmap_page_dma(p);
383 
384 	for (i = 0; i < 512; i++)
385 		vaddr[i] = val;
386 
387 	kunmap_page_dma(dev, vaddr);
388 }
389 
390 static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p,
391 			     const uint32_t val32)
392 {
393 	uint64_t v = val32;
394 
395 	v = v << 32 | val32;
396 
397 	fill_page_dma(dev, p, v);
398 }
399 
400 static struct i915_page_scratch *alloc_scratch_page(struct drm_device *dev)
401 {
402 	struct i915_page_scratch *sp;
403 	int ret;
404 
405 	sp = kzalloc(sizeof(*sp), GFP_KERNEL);
406 	if (sp == NULL)
407 		return ERR_PTR(-ENOMEM);
408 
409 	ret = __setup_page_dma(dev, px_base(sp), GFP_DMA32 | __GFP_ZERO);
410 	if (ret) {
411 		kfree(sp);
412 		return ERR_PTR(ret);
413 	}
414 
415 	set_pages_uc(px_page(sp), 1);
416 
417 	return sp;
418 }
419 
420 static void free_scratch_page(struct drm_device *dev,
421 			      struct i915_page_scratch *sp)
422 {
423 	set_pages_wb(px_page(sp), 1);
424 
425 	cleanup_px(dev, sp);
426 	kfree(sp);
427 }
428 
429 static struct i915_page_table *alloc_pt(struct drm_device *dev)
430 {
431 	struct i915_page_table *pt;
432 	const size_t count = INTEL_INFO(dev)->gen >= 8 ?
433 		GEN8_PTES : GEN6_PTES;
434 	int ret = -ENOMEM;
435 
436 	pt = kzalloc(sizeof(*pt), GFP_KERNEL);
437 	if (!pt)
438 		return ERR_PTR(-ENOMEM);
439 
440 	pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
441 				GFP_KERNEL);
442 
443 	if (!pt->used_ptes)
444 		goto fail_bitmap;
445 
446 	ret = setup_px(dev, pt);
447 	if (ret)
448 		goto fail_page_m;
449 
450 	return pt;
451 
452 fail_page_m:
453 	kfree(pt->used_ptes);
454 fail_bitmap:
455 	kfree(pt);
456 
457 	return ERR_PTR(ret);
458 }
459 
460 static void free_pt(struct drm_device *dev, struct i915_page_table *pt)
461 {
462 	cleanup_px(dev, pt);
463 	kfree(pt->used_ptes);
464 	kfree(pt);
465 }
466 
467 static void gen8_initialize_pt(struct i915_address_space *vm,
468 			       struct i915_page_table *pt)
469 {
470 	gen8_pte_t scratch_pte;
471 
472 	scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
473 				      I915_CACHE_LLC, true);
474 
475 	fill_px(vm->dev, pt, scratch_pte);
476 }
477 
478 static void gen6_initialize_pt(struct i915_address_space *vm,
479 			       struct i915_page_table *pt)
480 {
481 	gen6_pte_t scratch_pte;
482 
483 	WARN_ON(px_dma(vm->scratch_page) == 0);
484 
485 	scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
486 				     I915_CACHE_LLC, true, 0);
487 
488 	fill32_px(vm->dev, pt, scratch_pte);
489 }
490 
491 static struct i915_page_directory *alloc_pd(struct drm_device *dev)
492 {
493 	struct i915_page_directory *pd;
494 	int ret = -ENOMEM;
495 
496 	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
497 	if (!pd)
498 		return ERR_PTR(-ENOMEM);
499 
500 	pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES),
501 				sizeof(*pd->used_pdes), GFP_KERNEL);
502 	if (!pd->used_pdes)
503 		goto fail_bitmap;
504 
505 	ret = setup_px(dev, pd);
506 	if (ret)
507 		goto fail_page_m;
508 
509 	return pd;
510 
511 fail_page_m:
512 	kfree(pd->used_pdes);
513 fail_bitmap:
514 	kfree(pd);
515 
516 	return ERR_PTR(ret);
517 }
518 
519 static void free_pd(struct drm_device *dev, struct i915_page_directory *pd)
520 {
521 	if (px_page(pd)) {
522 		cleanup_px(dev, pd);
523 		kfree(pd->used_pdes);
524 		kfree(pd);
525 	}
526 }
527 
528 static void gen8_initialize_pd(struct i915_address_space *vm,
529 			       struct i915_page_directory *pd)
530 {
531 	gen8_pde_t scratch_pde;
532 
533 	scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
534 
535 	fill_px(vm->dev, pd, scratch_pde);
536 }
537 
538 static int __pdp_init(struct drm_device *dev,
539 		      struct i915_page_directory_pointer *pdp)
540 {
541 	size_t pdpes = I915_PDPES_PER_PDP(dev);
542 
543 	pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes),
544 				  sizeof(unsigned long),
545 				  GFP_KERNEL);
546 	if (!pdp->used_pdpes)
547 		return -ENOMEM;
548 
549 	pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory),
550 				      GFP_KERNEL);
551 	if (!pdp->page_directory) {
552 		kfree(pdp->used_pdpes);
553 		/* the PDP might be the statically allocated top level. Keep it
554 		 * as clean as possible */
555 		pdp->used_pdpes = NULL;
556 		return -ENOMEM;
557 	}
558 
559 	return 0;
560 }
561 
562 static void __pdp_fini(struct i915_page_directory_pointer *pdp)
563 {
564 	kfree(pdp->used_pdpes);
565 	kfree(pdp->page_directory);
566 	pdp->page_directory = NULL;
567 }
568 
569 static struct
570 i915_page_directory_pointer *alloc_pdp(struct drm_device *dev)
571 {
572 	struct i915_page_directory_pointer *pdp;
573 	int ret = -ENOMEM;
574 
575 	WARN_ON(!USES_FULL_48BIT_PPGTT(dev));
576 
577 	pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
578 	if (!pdp)
579 		return ERR_PTR(-ENOMEM);
580 
581 	ret = __pdp_init(dev, pdp);
582 	if (ret)
583 		goto fail_bitmap;
584 
585 	ret = setup_px(dev, pdp);
586 	if (ret)
587 		goto fail_page_m;
588 
589 	return pdp;
590 
591 fail_page_m:
592 	__pdp_fini(pdp);
593 fail_bitmap:
594 	kfree(pdp);
595 
596 	return ERR_PTR(ret);
597 }
598 
599 static void free_pdp(struct drm_device *dev,
600 		     struct i915_page_directory_pointer *pdp)
601 {
602 	__pdp_fini(pdp);
603 	if (USES_FULL_48BIT_PPGTT(dev)) {
604 		cleanup_px(dev, pdp);
605 		kfree(pdp);
606 	}
607 }
608 
609 static void gen8_initialize_pdp(struct i915_address_space *vm,
610 				struct i915_page_directory_pointer *pdp)
611 {
612 	gen8_ppgtt_pdpe_t scratch_pdpe;
613 
614 	scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
615 
616 	fill_px(vm->dev, pdp, scratch_pdpe);
617 }
618 
619 static void gen8_initialize_pml4(struct i915_address_space *vm,
620 				 struct i915_pml4 *pml4)
621 {
622 	gen8_ppgtt_pml4e_t scratch_pml4e;
623 
624 	scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
625 					  I915_CACHE_LLC);
626 
627 	fill_px(vm->dev, pml4, scratch_pml4e);
628 }
629 
630 static void
631 gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt,
632 			  struct i915_page_directory_pointer *pdp,
633 			  struct i915_page_directory *pd,
634 			  int index)
635 {
636 	gen8_ppgtt_pdpe_t *page_directorypo;
637 
638 	if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
639 		return;
640 
641 	page_directorypo = kmap_px(pdp);
642 	page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
643 	kunmap_px(ppgtt, page_directorypo);
644 }
645 
646 static void
647 gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt,
648 				  struct i915_pml4 *pml4,
649 				  struct i915_page_directory_pointer *pdp,
650 				  int index)
651 {
652 	gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
653 
654 	WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev));
655 	pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
656 	kunmap_px(ppgtt, pagemap);
657 }
658 
659 /* Broadwell Page Directory Pointer Descriptors */
660 static int gen8_write_pdp(struct drm_i915_gem_request *req,
661 			  unsigned entry,
662 			  dma_addr_t addr)
663 {
664 	struct intel_engine_cs *engine = req->engine;
665 	int ret;
666 
667 	BUG_ON(entry >= 4);
668 
669 	ret = intel_ring_begin(req, 6);
670 	if (ret)
671 		return ret;
672 
673 	intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
674 	intel_ring_emit_reg(engine, GEN8_RING_PDP_UDW(engine, entry));
675 	intel_ring_emit(engine, upper_32_bits(addr));
676 	intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
677 	intel_ring_emit_reg(engine, GEN8_RING_PDP_LDW(engine, entry));
678 	intel_ring_emit(engine, lower_32_bits(addr));
679 	intel_ring_advance(engine);
680 
681 	return 0;
682 }
683 
684 static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt,
685 				 struct drm_i915_gem_request *req)
686 {
687 	int i, ret;
688 
689 	for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
690 		const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
691 
692 		ret = gen8_write_pdp(req, i, pd_daddr);
693 		if (ret)
694 			return ret;
695 	}
696 
697 	return 0;
698 }
699 
700 static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
701 			      struct drm_i915_gem_request *req)
702 {
703 	return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
704 }
705 
706 static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
707 				       struct i915_page_directory_pointer *pdp,
708 				       uint64_t start,
709 				       uint64_t length,
710 				       gen8_pte_t scratch_pte)
711 {
712 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
713 	gen8_pte_t *pt_vaddr;
714 	unsigned pdpe = gen8_pdpe_index(start);
715 	unsigned pde = gen8_pde_index(start);
716 	unsigned pte = gen8_pte_index(start);
717 	unsigned num_entries = length >> PAGE_SHIFT;
718 	unsigned last_pte, i;
719 
720 	if (WARN_ON(!pdp))
721 		return;
722 
723 	while (num_entries) {
724 		struct i915_page_directory *pd;
725 		struct i915_page_table *pt;
726 
727 		if (WARN_ON(!pdp->page_directory[pdpe]))
728 			break;
729 
730 		pd = pdp->page_directory[pdpe];
731 
732 		if (WARN_ON(!pd->page_table[pde]))
733 			break;
734 
735 		pt = pd->page_table[pde];
736 
737 		if (WARN_ON(!px_page(pt)))
738 			break;
739 
740 		last_pte = pte + num_entries;
741 		if (last_pte > GEN8_PTES)
742 			last_pte = GEN8_PTES;
743 
744 		pt_vaddr = kmap_px(pt);
745 
746 		for (i = pte; i < last_pte; i++) {
747 			pt_vaddr[i] = scratch_pte;
748 			num_entries--;
749 		}
750 
751 		kunmap_px(ppgtt, pt_vaddr);
752 
753 		pte = 0;
754 		if (++pde == I915_PDES) {
755 			if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
756 				break;
757 			pde = 0;
758 		}
759 	}
760 }
761 
762 static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
763 				   uint64_t start,
764 				   uint64_t length,
765 				   bool use_scratch)
766 {
767 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
768 	gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
769 						 I915_CACHE_LLC, use_scratch);
770 
771 	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
772 		gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length,
773 					   scratch_pte);
774 	} else {
775 		uint64_t pml4e;
776 		struct i915_page_directory_pointer *pdp;
777 
778 		gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
779 			gen8_ppgtt_clear_pte_range(vm, pdp, start, length,
780 						   scratch_pte);
781 		}
782 	}
783 }
784 
785 static void
786 gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
787 			      struct i915_page_directory_pointer *pdp,
788 			      struct sg_page_iter *sg_iter,
789 			      uint64_t start,
790 			      enum i915_cache_level cache_level)
791 {
792 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
793 	gen8_pte_t *pt_vaddr;
794 	unsigned pdpe = gen8_pdpe_index(start);
795 	unsigned pde = gen8_pde_index(start);
796 	unsigned pte = gen8_pte_index(start);
797 
798 	pt_vaddr = NULL;
799 
800 	while (__sg_page_iter_next(sg_iter)) {
801 		if (pt_vaddr == NULL) {
802 			struct i915_page_directory *pd = pdp->page_directory[pdpe];
803 			struct i915_page_table *pt = pd->page_table[pde];
804 			pt_vaddr = kmap_px(pt);
805 		}
806 
807 		pt_vaddr[pte] =
808 			gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
809 					cache_level, true);
810 		if (++pte == GEN8_PTES) {
811 			kunmap_px(ppgtt, pt_vaddr);
812 			pt_vaddr = NULL;
813 			if (++pde == I915_PDES) {
814 				if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
815 					break;
816 				pde = 0;
817 			}
818 			pte = 0;
819 		}
820 	}
821 
822 	if (pt_vaddr)
823 		kunmap_px(ppgtt, pt_vaddr);
824 }
825 
826 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
827 				      struct sg_table *pages,
828 				      uint64_t start,
829 				      enum i915_cache_level cache_level,
830 				      u32 unused)
831 {
832 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
833 	struct sg_page_iter sg_iter;
834 
835 	__sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0);
836 
837 	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
838 		gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start,
839 					      cache_level);
840 	} else {
841 		struct i915_page_directory_pointer *pdp;
842 		uint64_t pml4e;
843 		uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT;
844 
845 		gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
846 			gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter,
847 						      start, cache_level);
848 		}
849 	}
850 }
851 
852 static void gen8_free_page_tables(struct drm_device *dev,
853 				  struct i915_page_directory *pd)
854 {
855 	int i;
856 
857 	if (!px_page(pd))
858 		return;
859 
860 	for_each_set_bit(i, pd->used_pdes, I915_PDES) {
861 		if (WARN_ON(!pd->page_table[i]))
862 			continue;
863 
864 		free_pt(dev, pd->page_table[i]);
865 		pd->page_table[i] = NULL;
866 	}
867 }
868 
869 static int gen8_init_scratch(struct i915_address_space *vm)
870 {
871 	struct drm_device *dev = vm->dev;
872 
873 	vm->scratch_page = alloc_scratch_page(dev);
874 	if (IS_ERR(vm->scratch_page))
875 		return PTR_ERR(vm->scratch_page);
876 
877 	vm->scratch_pt = alloc_pt(dev);
878 	if (IS_ERR(vm->scratch_pt)) {
879 		free_scratch_page(dev, vm->scratch_page);
880 		return PTR_ERR(vm->scratch_pt);
881 	}
882 
883 	vm->scratch_pd = alloc_pd(dev);
884 	if (IS_ERR(vm->scratch_pd)) {
885 		free_pt(dev, vm->scratch_pt);
886 		free_scratch_page(dev, vm->scratch_page);
887 		return PTR_ERR(vm->scratch_pd);
888 	}
889 
890 	if (USES_FULL_48BIT_PPGTT(dev)) {
891 		vm->scratch_pdp = alloc_pdp(dev);
892 		if (IS_ERR(vm->scratch_pdp)) {
893 			free_pd(dev, vm->scratch_pd);
894 			free_pt(dev, vm->scratch_pt);
895 			free_scratch_page(dev, vm->scratch_page);
896 			return PTR_ERR(vm->scratch_pdp);
897 		}
898 	}
899 
900 	gen8_initialize_pt(vm, vm->scratch_pt);
901 	gen8_initialize_pd(vm, vm->scratch_pd);
902 	if (USES_FULL_48BIT_PPGTT(dev))
903 		gen8_initialize_pdp(vm, vm->scratch_pdp);
904 
905 	return 0;
906 }
907 
908 static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
909 {
910 	enum vgt_g2v_type msg;
911 	struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev);
912 	int i;
913 
914 	if (USES_FULL_48BIT_PPGTT(dev_priv)) {
915 		u64 daddr = px_dma(&ppgtt->pml4);
916 
917 		I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
918 		I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
919 
920 		msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
921 				VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
922 	} else {
923 		for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
924 			u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
925 
926 			I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
927 			I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
928 		}
929 
930 		msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
931 				VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
932 	}
933 
934 	I915_WRITE(vgtif_reg(g2v_notify), msg);
935 
936 	return 0;
937 }
938 
939 static void gen8_free_scratch(struct i915_address_space *vm)
940 {
941 	struct drm_device *dev = vm->dev;
942 
943 	if (USES_FULL_48BIT_PPGTT(dev))
944 		free_pdp(dev, vm->scratch_pdp);
945 	free_pd(dev, vm->scratch_pd);
946 	free_pt(dev, vm->scratch_pt);
947 	free_scratch_page(dev, vm->scratch_page);
948 }
949 
950 static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev,
951 				    struct i915_page_directory_pointer *pdp)
952 {
953 	int i;
954 
955 	for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) {
956 		if (WARN_ON(!pdp->page_directory[i]))
957 			continue;
958 
959 		gen8_free_page_tables(dev, pdp->page_directory[i]);
960 		free_pd(dev, pdp->page_directory[i]);
961 	}
962 
963 	free_pdp(dev, pdp);
964 }
965 
966 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
967 {
968 	int i;
969 
970 	for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
971 		if (WARN_ON(!ppgtt->pml4.pdps[i]))
972 			continue;
973 
974 		gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]);
975 	}
976 
977 	cleanup_px(ppgtt->base.dev, &ppgtt->pml4);
978 }
979 
980 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
981 {
982 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
983 
984 	if (intel_vgpu_active(vm->dev))
985 		gen8_ppgtt_notify_vgt(ppgtt, false);
986 
987 	if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
988 		gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp);
989 	else
990 		gen8_ppgtt_cleanup_4lvl(ppgtt);
991 
992 	gen8_free_scratch(vm);
993 }
994 
995 /**
996  * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range.
997  * @vm:	Master vm structure.
998  * @pd:	Page directory for this address range.
999  * @start:	Starting virtual address to begin allocations.
1000  * @length:	Size of the allocations.
1001  * @new_pts:	Bitmap set by function with new allocations. Likely used by the
1002  *		caller to free on error.
1003  *
1004  * Allocate the required number of page tables. Extremely similar to
1005  * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
1006  * the page directory boundary (instead of the page directory pointer). That
1007  * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is
1008  * possible, and likely that the caller will need to use multiple calls of this
1009  * function to achieve the appropriate allocation.
1010  *
1011  * Return: 0 if success; negative error code otherwise.
1012  */
1013 static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
1014 				     struct i915_page_directory *pd,
1015 				     uint64_t start,
1016 				     uint64_t length,
1017 				     unsigned long *new_pts)
1018 {
1019 	struct drm_device *dev = vm->dev;
1020 	struct i915_page_table *pt;
1021 	uint32_t pde;
1022 
1023 	gen8_for_each_pde(pt, pd, start, length, pde) {
1024 		/* Don't reallocate page tables */
1025 		if (test_bit(pde, pd->used_pdes)) {
1026 			/* Scratch is never allocated this way */
1027 			WARN_ON(pt == vm->scratch_pt);
1028 			continue;
1029 		}
1030 
1031 		pt = alloc_pt(dev);
1032 		if (IS_ERR(pt))
1033 			goto unwind_out;
1034 
1035 		gen8_initialize_pt(vm, pt);
1036 		pd->page_table[pde] = pt;
1037 		__set_bit(pde, new_pts);
1038 		trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT);
1039 	}
1040 
1041 	return 0;
1042 
1043 unwind_out:
1044 	for_each_set_bit(pde, new_pts, I915_PDES)
1045 		free_pt(dev, pd->page_table[pde]);
1046 
1047 	return -ENOMEM;
1048 }
1049 
1050 /**
1051  * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range.
1052  * @vm:	Master vm structure.
1053  * @pdp:	Page directory pointer for this address range.
1054  * @start:	Starting virtual address to begin allocations.
1055  * @length:	Size of the allocations.
1056  * @new_pds:	Bitmap set by function with new allocations. Likely used by the
1057  *		caller to free on error.
1058  *
1059  * Allocate the required number of page directories starting at the pde index of
1060  * @start, and ending at the pde index @start + @length. This function will skip
1061  * over already allocated page directories within the range, and only allocate
1062  * new ones, setting the appropriate pointer within the pdp as well as the
1063  * correct position in the bitmap @new_pds.
1064  *
1065  * The function will only allocate the pages within the range for a give page
1066  * directory pointer. In other words, if @start + @length straddles a virtually
1067  * addressed PDP boundary (512GB for 4k pages), there will be more allocations
1068  * required by the caller, This is not currently possible, and the BUG in the
1069  * code will prevent it.
1070  *
1071  * Return: 0 if success; negative error code otherwise.
1072  */
1073 static int
1074 gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
1075 				  struct i915_page_directory_pointer *pdp,
1076 				  uint64_t start,
1077 				  uint64_t length,
1078 				  unsigned long *new_pds)
1079 {
1080 	struct drm_device *dev = vm->dev;
1081 	struct i915_page_directory *pd;
1082 	uint32_t pdpe;
1083 	uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1084 
1085 	WARN_ON(!bitmap_empty(new_pds, pdpes));
1086 
1087 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1088 		if (test_bit(pdpe, pdp->used_pdpes))
1089 			continue;
1090 
1091 		pd = alloc_pd(dev);
1092 		if (IS_ERR(pd))
1093 			goto unwind_out;
1094 
1095 		gen8_initialize_pd(vm, pd);
1096 		pdp->page_directory[pdpe] = pd;
1097 		__set_bit(pdpe, new_pds);
1098 		trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT);
1099 	}
1100 
1101 	return 0;
1102 
1103 unwind_out:
1104 	for_each_set_bit(pdpe, new_pds, pdpes)
1105 		free_pd(dev, pdp->page_directory[pdpe]);
1106 
1107 	return -ENOMEM;
1108 }
1109 
1110 /**
1111  * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range.
1112  * @vm:	Master vm structure.
1113  * @pml4:	Page map level 4 for this address range.
1114  * @start:	Starting virtual address to begin allocations.
1115  * @length:	Size of the allocations.
1116  * @new_pdps:	Bitmap set by function with new allocations. Likely used by the
1117  *		caller to free on error.
1118  *
1119  * Allocate the required number of page directory pointers. Extremely similar to
1120  * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs().
1121  * The main difference is here we are limited by the pml4 boundary (instead of
1122  * the page directory pointer).
1123  *
1124  * Return: 0 if success; negative error code otherwise.
1125  */
1126 static int
1127 gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
1128 				  struct i915_pml4 *pml4,
1129 				  uint64_t start,
1130 				  uint64_t length,
1131 				  unsigned long *new_pdps)
1132 {
1133 	struct drm_device *dev = vm->dev;
1134 	struct i915_page_directory_pointer *pdp;
1135 	uint32_t pml4e;
1136 
1137 	WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4));
1138 
1139 	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1140 		if (!test_bit(pml4e, pml4->used_pml4es)) {
1141 			pdp = alloc_pdp(dev);
1142 			if (IS_ERR(pdp))
1143 				goto unwind_out;
1144 
1145 			gen8_initialize_pdp(vm, pdp);
1146 			pml4->pdps[pml4e] = pdp;
1147 			__set_bit(pml4e, new_pdps);
1148 			trace_i915_page_directory_pointer_entry_alloc(vm,
1149 								      pml4e,
1150 								      start,
1151 								      GEN8_PML4E_SHIFT);
1152 		}
1153 	}
1154 
1155 	return 0;
1156 
1157 unwind_out:
1158 	for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
1159 		free_pdp(dev, pml4->pdps[pml4e]);
1160 
1161 	return -ENOMEM;
1162 }
1163 
1164 static void
1165 free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
1166 {
1167 	kfree(new_pts);
1168 	kfree(new_pds);
1169 }
1170 
1171 /* Fills in the page directory bitmap, and the array of page tables bitmap. Both
1172  * of these are based on the number of PDPEs in the system.
1173  */
1174 static
1175 int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
1176 					 unsigned long **new_pts,
1177 					 uint32_t pdpes)
1178 {
1179 	unsigned long *pds;
1180 	unsigned long *pts;
1181 
1182 	pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY);
1183 	if (!pds)
1184 		return -ENOMEM;
1185 
1186 	pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long),
1187 		      GFP_TEMPORARY);
1188 	if (!pts)
1189 		goto err_out;
1190 
1191 	*new_pds = pds;
1192 	*new_pts = pts;
1193 
1194 	return 0;
1195 
1196 err_out:
1197 	free_gen8_temp_bitmaps(pds, pts);
1198 	return -ENOMEM;
1199 }
1200 
1201 /* PDE TLBs are a pain to invalidate on GEN8+. When we modify
1202  * the page table structures, we mark them dirty so that
1203  * context switching/execlist queuing code takes extra steps
1204  * to ensure that tlbs are flushed.
1205  */
1206 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
1207 {
1208 	ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask;
1209 }
1210 
1211 static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
1212 				    struct i915_page_directory_pointer *pdp,
1213 				    uint64_t start,
1214 				    uint64_t length)
1215 {
1216 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1217 	unsigned long *new_page_dirs, *new_page_tables;
1218 	struct drm_device *dev = vm->dev;
1219 	struct i915_page_directory *pd;
1220 	const uint64_t orig_start = start;
1221 	const uint64_t orig_length = length;
1222 	uint32_t pdpe;
1223 	uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1224 	int ret;
1225 
1226 	/* Wrap is never okay since we can only represent 48b, and we don't
1227 	 * actually use the other side of the canonical address space.
1228 	 */
1229 	if (WARN_ON(start + length < start))
1230 		return -ENODEV;
1231 
1232 	if (WARN_ON(start + length > vm->total))
1233 		return -ENODEV;
1234 
1235 	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
1236 	if (ret)
1237 		return ret;
1238 
1239 	/* Do the allocations first so we can easily bail out */
1240 	ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
1241 						new_page_dirs);
1242 	if (ret) {
1243 		free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1244 		return ret;
1245 	}
1246 
1247 	/* For every page directory referenced, allocate page tables */
1248 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1249 		ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length,
1250 						new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES));
1251 		if (ret)
1252 			goto err_out;
1253 	}
1254 
1255 	start = orig_start;
1256 	length = orig_length;
1257 
1258 	/* Allocations have completed successfully, so set the bitmaps, and do
1259 	 * the mappings. */
1260 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1261 		gen8_pde_t *const page_directory = kmap_px(pd);
1262 		struct i915_page_table *pt;
1263 		uint64_t pd_len = length;
1264 		uint64_t pd_start = start;
1265 		uint32_t pde;
1266 
1267 		/* Every pd should be allocated, we just did that above. */
1268 		WARN_ON(!pd);
1269 
1270 		gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
1271 			/* Same reasoning as pd */
1272 			WARN_ON(!pt);
1273 			WARN_ON(!pd_len);
1274 			WARN_ON(!gen8_pte_count(pd_start, pd_len));
1275 
1276 			/* Set our used ptes within the page table */
1277 			bitmap_set(pt->used_ptes,
1278 				   gen8_pte_index(pd_start),
1279 				   gen8_pte_count(pd_start, pd_len));
1280 
1281 			/* Our pde is now pointing to the pagetable, pt */
1282 			__set_bit(pde, pd->used_pdes);
1283 
1284 			/* Map the PDE to the page table */
1285 			page_directory[pde] = gen8_pde_encode(px_dma(pt),
1286 							      I915_CACHE_LLC);
1287 			trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
1288 							gen8_pte_index(start),
1289 							gen8_pte_count(start, length),
1290 							GEN8_PTES);
1291 
1292 			/* NB: We haven't yet mapped ptes to pages. At this
1293 			 * point we're still relying on insert_entries() */
1294 		}
1295 
1296 		kunmap_px(ppgtt, page_directory);
1297 		__set_bit(pdpe, pdp->used_pdpes);
1298 		gen8_setup_page_directory(ppgtt, pdp, pd, pdpe);
1299 	}
1300 
1301 	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1302 	mark_tlbs_dirty(ppgtt);
1303 	return 0;
1304 
1305 err_out:
1306 	while (pdpe--) {
1307 		unsigned long temp;
1308 
1309 		for_each_set_bit(temp, new_page_tables + pdpe *
1310 				BITS_TO_LONGS(I915_PDES), I915_PDES)
1311 			free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]);
1312 	}
1313 
1314 	for_each_set_bit(pdpe, new_page_dirs, pdpes)
1315 		free_pd(dev, pdp->page_directory[pdpe]);
1316 
1317 	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1318 	mark_tlbs_dirty(ppgtt);
1319 	return ret;
1320 }
1321 
1322 static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
1323 				    struct i915_pml4 *pml4,
1324 				    uint64_t start,
1325 				    uint64_t length)
1326 {
1327 	DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
1328 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1329 	struct i915_page_directory_pointer *pdp;
1330 	uint64_t pml4e;
1331 	int ret = 0;
1332 
1333 	/* Do the pml4 allocations first, so we don't need to track the newly
1334 	 * allocated tables below the pdp */
1335 	bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4);
1336 
1337 	/* The pagedirectory and pagetable allocations are done in the shared 3
1338 	 * and 4 level code. Just allocate the pdps.
1339 	 */
1340 	ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length,
1341 						new_pdps);
1342 	if (ret)
1343 		return ret;
1344 
1345 	WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2,
1346 	     "The allocation has spanned more than 512GB. "
1347 	     "It is highly likely this is incorrect.");
1348 
1349 	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1350 		WARN_ON(!pdp);
1351 
1352 		ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
1353 		if (ret)
1354 			goto err_out;
1355 
1356 		gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e);
1357 	}
1358 
1359 	bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
1360 		  GEN8_PML4ES_PER_PML4);
1361 
1362 	return 0;
1363 
1364 err_out:
1365 	for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
1366 		gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]);
1367 
1368 	return ret;
1369 }
1370 
1371 static int gen8_alloc_va_range(struct i915_address_space *vm,
1372 			       uint64_t start, uint64_t length)
1373 {
1374 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1375 
1376 	if (USES_FULL_48BIT_PPGTT(vm->dev))
1377 		return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length);
1378 	else
1379 		return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
1380 }
1381 
1382 static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
1383 			  uint64_t start, uint64_t length,
1384 			  gen8_pte_t scratch_pte,
1385 			  struct seq_file *m)
1386 {
1387 	struct i915_page_directory *pd;
1388 	uint32_t pdpe;
1389 
1390 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1391 		struct i915_page_table *pt;
1392 		uint64_t pd_len = length;
1393 		uint64_t pd_start = start;
1394 		uint32_t pde;
1395 
1396 		if (!test_bit(pdpe, pdp->used_pdpes))
1397 			continue;
1398 
1399 		seq_printf(m, "\tPDPE #%d\n", pdpe);
1400 		gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
1401 			uint32_t  pte;
1402 			gen8_pte_t *pt_vaddr;
1403 
1404 			if (!test_bit(pde, pd->used_pdes))
1405 				continue;
1406 
1407 			pt_vaddr = kmap_px(pt);
1408 			for (pte = 0; pte < GEN8_PTES; pte += 4) {
1409 				uint64_t va =
1410 					(pdpe << GEN8_PDPE_SHIFT) |
1411 					(pde << GEN8_PDE_SHIFT) |
1412 					(pte << GEN8_PTE_SHIFT);
1413 				int i;
1414 				bool found = false;
1415 
1416 				for (i = 0; i < 4; i++)
1417 					if (pt_vaddr[pte + i] != scratch_pte)
1418 						found = true;
1419 				if (!found)
1420 					continue;
1421 
1422 				seq_printf(m, "\t\t0x%lx [%03d,%03d,%04d]: =", va, pdpe, pde, pte);
1423 				for (i = 0; i < 4; i++) {
1424 					if (pt_vaddr[pte + i] != scratch_pte)
1425 						seq_printf(m, " %lx", pt_vaddr[pte + i]);
1426 					else
1427 						seq_puts(m, "  SCRATCH ");
1428 				}
1429 				seq_puts(m, "\n");
1430 			}
1431 			/* don't use kunmap_px, it could trigger
1432 			 * an unnecessary flush.
1433 			 */
1434 			kunmap_atomic(pt_vaddr);
1435 		}
1436 	}
1437 }
1438 
1439 static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1440 {
1441 	struct i915_address_space *vm = &ppgtt->base;
1442 	uint64_t start = ppgtt->base.start;
1443 	uint64_t length = ppgtt->base.total;
1444 	gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
1445 						 I915_CACHE_LLC, true);
1446 
1447 	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
1448 		gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
1449 	} else {
1450 		uint64_t pml4e;
1451 		struct i915_pml4 *pml4 = &ppgtt->pml4;
1452 		struct i915_page_directory_pointer *pdp;
1453 
1454 		gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1455 			if (!test_bit(pml4e, pml4->used_pml4es))
1456 				continue;
1457 
1458 			seq_printf(m, "    PML4E #%lu\n", pml4e);
1459 			gen8_dump_pdp(pdp, start, length, scratch_pte, m);
1460 		}
1461 	}
1462 }
1463 
1464 static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
1465 {
1466 	unsigned long *new_page_dirs, *new_page_tables;
1467 	uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1468 	int ret;
1469 
1470 	/* We allocate temp bitmap for page tables for no gain
1471 	 * but as this is for init only, lets keep the things simple
1472 	 */
1473 	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
1474 	if (ret)
1475 		return ret;
1476 
1477 	/* Allocate for all pdps regardless of how the ppgtt
1478 	 * was defined.
1479 	 */
1480 	ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp,
1481 						0, 1ULL << 32,
1482 						new_page_dirs);
1483 	if (!ret)
1484 		*ppgtt->pdp.used_pdpes = *new_page_dirs;
1485 
1486 	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1487 
1488 	return ret;
1489 }
1490 
1491 /*
1492  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
1493  * with a net effect resembling a 2-level page table in normal x86 terms. Each
1494  * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
1495  * space.
1496  *
1497  */
1498 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
1499 {
1500 	int ret;
1501 
1502 	ret = gen8_init_scratch(&ppgtt->base);
1503 	if (ret)
1504 		return ret;
1505 
1506 	ppgtt->base.start = 0;
1507 	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
1508 	ppgtt->base.allocate_va_range = gen8_alloc_va_range;
1509 	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
1510 	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
1511 	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
1512 	ppgtt->base.bind_vma = ppgtt_bind_vma;
1513 	ppgtt->debug_dump = gen8_dump_ppgtt;
1514 
1515 	if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
1516 		ret = setup_px(ppgtt->base.dev, &ppgtt->pml4);
1517 		if (ret)
1518 			goto free_scratch;
1519 
1520 		gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4);
1521 
1522 		ppgtt->base.total = 1ULL << 48;
1523 		ppgtt->switch_mm = gen8_48b_mm_switch;
1524 	} else {
1525 		ret = __pdp_init(ppgtt->base.dev, &ppgtt->pdp);
1526 		if (ret)
1527 			goto free_scratch;
1528 
1529 		ppgtt->base.total = 1ULL << 32;
1530 		ppgtt->switch_mm = gen8_legacy_mm_switch;
1531 		trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base,
1532 							      0, 0,
1533 							      GEN8_PML4E_SHIFT);
1534 
1535 		if (intel_vgpu_active(ppgtt->base.dev)) {
1536 			ret = gen8_preallocate_top_level_pdps(ppgtt);
1537 			if (ret)
1538 				goto free_scratch;
1539 		}
1540 	}
1541 
1542 	if (intel_vgpu_active(ppgtt->base.dev))
1543 		gen8_ppgtt_notify_vgt(ppgtt, true);
1544 
1545 	return 0;
1546 
1547 free_scratch:
1548 	gen8_free_scratch(&ppgtt->base);
1549 	return ret;
1550 }
1551 
1552 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1553 {
1554 	struct i915_address_space *vm = &ppgtt->base;
1555 	struct i915_page_table *unused;
1556 	gen6_pte_t scratch_pte;
1557 	uint32_t pd_entry;
1558 	uint32_t  pte, pde, temp;
1559 	uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
1560 
1561 	scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
1562 				     I915_CACHE_LLC, true, 0);
1563 
1564 	gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde) {
1565 		u32 expected;
1566 		gen6_pte_t *pt_vaddr;
1567 		const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]);
1568 		pd_entry = readl(ppgtt->pd_addr + pde);
1569 		expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
1570 
1571 		if (pd_entry != expected)
1572 			seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
1573 				   pde,
1574 				   pd_entry,
1575 				   expected);
1576 		seq_printf(m, "\tPDE: %x\n", pd_entry);
1577 
1578 		pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]);
1579 
1580 		for (pte = 0; pte < GEN6_PTES; pte+=4) {
1581 			unsigned long va =
1582 				(pde * PAGE_SIZE * GEN6_PTES) +
1583 				(pte * PAGE_SIZE);
1584 			int i;
1585 			bool found = false;
1586 			for (i = 0; i < 4; i++)
1587 				if (pt_vaddr[pte + i] != scratch_pte)
1588 					found = true;
1589 			if (!found)
1590 				continue;
1591 
1592 			seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
1593 			for (i = 0; i < 4; i++) {
1594 				if (pt_vaddr[pte + i] != scratch_pte)
1595 					seq_printf(m, " %08x", pt_vaddr[pte + i]);
1596 				else
1597 					seq_puts(m, "  SCRATCH ");
1598 			}
1599 			seq_puts(m, "\n");
1600 		}
1601 		kunmap_px(ppgtt, pt_vaddr);
1602 	}
1603 }
1604 
1605 /* Write pde (index) from the page directory @pd to the page table @pt */
1606 static void gen6_write_pde(struct i915_page_directory *pd,
1607 			    const int pde, struct i915_page_table *pt)
1608 {
1609 	/* Caller needs to make sure the write completes if necessary */
1610 	struct i915_hw_ppgtt *ppgtt =
1611 		container_of(pd, struct i915_hw_ppgtt, pd);
1612 	u32 pd_entry;
1613 
1614 	pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt));
1615 	pd_entry |= GEN6_PDE_VALID;
1616 
1617 	writel(pd_entry, ppgtt->pd_addr + pde);
1618 }
1619 
1620 /* Write all the page tables found in the ppgtt structure to incrementing page
1621  * directories. */
1622 static void gen6_write_page_range(struct drm_i915_private *dev_priv,
1623 				  struct i915_page_directory *pd,
1624 				  uint32_t start, uint32_t length)
1625 {
1626 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
1627 	struct i915_page_table *pt;
1628 	uint32_t pde, temp;
1629 
1630 	gen6_for_each_pde(pt, pd, start, length, temp, pde)
1631 		gen6_write_pde(pd, pde, pt);
1632 
1633 	/* Make sure write is complete before other code can use this page
1634 	 * table. Also require for WC mapped PTEs */
1635 	readl(ggtt->gsm);
1636 }
1637 
1638 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
1639 {
1640 	BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
1641 
1642 	return (ppgtt->pd.base.ggtt_offset / 64) << 16;
1643 }
1644 
1645 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
1646 			 struct drm_i915_gem_request *req)
1647 {
1648 	struct intel_engine_cs *engine = req->engine;
1649 	int ret;
1650 
1651 	/* NB: TLBs must be flushed and invalidated before a switch */
1652 	ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1653 	if (ret)
1654 		return ret;
1655 
1656 	ret = intel_ring_begin(req, 6);
1657 	if (ret)
1658 		return ret;
1659 
1660 	intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(2));
1661 	intel_ring_emit_reg(engine, RING_PP_DIR_DCLV(engine));
1662 	intel_ring_emit(engine, PP_DIR_DCLV_2G);
1663 	intel_ring_emit_reg(engine, RING_PP_DIR_BASE(engine));
1664 	intel_ring_emit(engine, get_pd_offset(ppgtt));
1665 	intel_ring_emit(engine, MI_NOOP);
1666 	intel_ring_advance(engine);
1667 
1668 	return 0;
1669 }
1670 
1671 static int vgpu_mm_switch(struct i915_hw_ppgtt *ppgtt,
1672 			  struct drm_i915_gem_request *req)
1673 {
1674 	struct intel_engine_cs *engine = req->engine;
1675 	struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev);
1676 
1677 	I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
1678 	I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt));
1679 	return 0;
1680 }
1681 
1682 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
1683 			  struct drm_i915_gem_request *req)
1684 {
1685 	struct intel_engine_cs *engine = req->engine;
1686 	int ret;
1687 
1688 	/* NB: TLBs must be flushed and invalidated before a switch */
1689 	ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1690 	if (ret)
1691 		return ret;
1692 
1693 	ret = intel_ring_begin(req, 6);
1694 	if (ret)
1695 		return ret;
1696 
1697 	intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(2));
1698 	intel_ring_emit_reg(engine, RING_PP_DIR_DCLV(engine));
1699 	intel_ring_emit(engine, PP_DIR_DCLV_2G);
1700 	intel_ring_emit_reg(engine, RING_PP_DIR_BASE(engine));
1701 	intel_ring_emit(engine, get_pd_offset(ppgtt));
1702 	intel_ring_emit(engine, MI_NOOP);
1703 	intel_ring_advance(engine);
1704 
1705 	/* XXX: RCS is the only one to auto invalidate the TLBs? */
1706 	if (engine->id != RCS) {
1707 		ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1708 		if (ret)
1709 			return ret;
1710 	}
1711 
1712 	return 0;
1713 }
1714 
1715 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
1716 			  struct drm_i915_gem_request *req)
1717 {
1718 	struct intel_engine_cs *engine = req->engine;
1719 	struct drm_device *dev = ppgtt->base.dev;
1720 	struct drm_i915_private *dev_priv = dev->dev_private;
1721 
1722 
1723 	I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
1724 	I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt));
1725 
1726 	POSTING_READ(RING_PP_DIR_DCLV(engine));
1727 
1728 	return 0;
1729 }
1730 
1731 static void gen8_ppgtt_enable(struct drm_device *dev)
1732 {
1733 	struct drm_i915_private *dev_priv = dev->dev_private;
1734 	struct intel_engine_cs *engine;
1735 
1736 	for_each_engine(engine, dev_priv) {
1737 		u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0;
1738 		I915_WRITE(RING_MODE_GEN7(engine),
1739 			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
1740 	}
1741 }
1742 
1743 static void gen7_ppgtt_enable(struct drm_device *dev)
1744 {
1745 	struct drm_i915_private *dev_priv = dev->dev_private;
1746 	struct intel_engine_cs *engine;
1747 	uint32_t ecochk, ecobits;
1748 
1749 	ecobits = I915_READ(GAC_ECO_BITS);
1750 	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
1751 
1752 	ecochk = I915_READ(GAM_ECOCHK);
1753 	if (IS_HASWELL(dev)) {
1754 		ecochk |= ECOCHK_PPGTT_WB_HSW;
1755 	} else {
1756 		ecochk |= ECOCHK_PPGTT_LLC_IVB;
1757 		ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
1758 	}
1759 	I915_WRITE(GAM_ECOCHK, ecochk);
1760 
1761 	for_each_engine(engine, dev_priv) {
1762 		/* GFX_MODE is per-ring on gen7+ */
1763 		I915_WRITE(RING_MODE_GEN7(engine),
1764 			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1765 	}
1766 }
1767 
1768 static void gen6_ppgtt_enable(struct drm_device *dev)
1769 {
1770 	struct drm_i915_private *dev_priv = dev->dev_private;
1771 	uint32_t ecochk, gab_ctl, ecobits;
1772 
1773 	ecobits = I915_READ(GAC_ECO_BITS);
1774 	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
1775 		   ECOBITS_PPGTT_CACHE64B);
1776 
1777 	gab_ctl = I915_READ(GAB_CTL);
1778 	I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
1779 
1780 	ecochk = I915_READ(GAM_ECOCHK);
1781 	I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
1782 
1783 	I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1784 }
1785 
1786 /* PPGTT support for Sandybdrige/Gen6 and later */
1787 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
1788 				   uint64_t start,
1789 				   uint64_t length,
1790 				   bool use_scratch)
1791 {
1792 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1793 	gen6_pte_t *pt_vaddr, scratch_pte;
1794 	unsigned first_entry = start >> PAGE_SHIFT;
1795 	unsigned num_entries = length >> PAGE_SHIFT;
1796 	unsigned act_pt = first_entry / GEN6_PTES;
1797 	unsigned first_pte = first_entry % GEN6_PTES;
1798 	unsigned last_pte, i;
1799 
1800 	scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
1801 				     I915_CACHE_LLC, true, 0);
1802 
1803 	while (num_entries) {
1804 		last_pte = first_pte + num_entries;
1805 		if (last_pte > GEN6_PTES)
1806 			last_pte = GEN6_PTES;
1807 
1808 		pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
1809 
1810 		for (i = first_pte; i < last_pte; i++)
1811 			pt_vaddr[i] = scratch_pte;
1812 
1813 		kunmap_px(ppgtt, pt_vaddr);
1814 
1815 		num_entries -= last_pte - first_pte;
1816 		first_pte = 0;
1817 		act_pt++;
1818 	}
1819 }
1820 
1821 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
1822 				      struct sg_table *pages,
1823 				      uint64_t start,
1824 				      enum i915_cache_level cache_level, u32 flags)
1825 {
1826 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1827 	gen6_pte_t *pt_vaddr;
1828 	unsigned first_entry = start >> PAGE_SHIFT;
1829 	unsigned act_pt = first_entry / GEN6_PTES;
1830 	unsigned act_pte = first_entry % GEN6_PTES;
1831 	struct sg_page_iter sg_iter;
1832 
1833 	pt_vaddr = NULL;
1834 	for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
1835 		if (pt_vaddr == NULL)
1836 			pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
1837 
1838 		pt_vaddr[act_pte] =
1839 			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
1840 				       cache_level, true, flags);
1841 
1842 		if (++act_pte == GEN6_PTES) {
1843 			kunmap_px(ppgtt, pt_vaddr);
1844 			pt_vaddr = NULL;
1845 			act_pt++;
1846 			act_pte = 0;
1847 		}
1848 	}
1849 	if (pt_vaddr)
1850 		kunmap_px(ppgtt, pt_vaddr);
1851 }
1852 
1853 static int gen6_alloc_va_range(struct i915_address_space *vm,
1854 			       uint64_t start_in, uint64_t length_in)
1855 {
1856 	DECLARE_BITMAP(new_page_tables, I915_PDES);
1857 	struct drm_device *dev = vm->dev;
1858 	struct drm_i915_private *dev_priv = to_i915(dev);
1859 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
1860 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1861 	struct i915_page_table *pt;
1862 	uint32_t start, length, start_save, length_save;
1863 	uint32_t pde, temp;
1864 	int ret;
1865 
1866 	if (WARN_ON(start_in + length_in > ppgtt->base.total))
1867 		return -ENODEV;
1868 
1869 	start = start_save = start_in;
1870 	length = length_save = length_in;
1871 
1872 	bitmap_zero(new_page_tables, I915_PDES);
1873 
1874 	/* The allocation is done in two stages so that we can bail out with
1875 	 * minimal amount of pain. The first stage finds new page tables that
1876 	 * need allocation. The second stage marks use ptes within the page
1877 	 * tables.
1878 	 */
1879 	gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
1880 		if (pt != vm->scratch_pt) {
1881 			WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES));
1882 			continue;
1883 		}
1884 
1885 		/* We've already allocated a page table */
1886 		WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
1887 
1888 		pt = alloc_pt(dev);
1889 		if (IS_ERR(pt)) {
1890 			ret = PTR_ERR(pt);
1891 			goto unwind_out;
1892 		}
1893 
1894 		gen6_initialize_pt(vm, pt);
1895 
1896 		ppgtt->pd.page_table[pde] = pt;
1897 		__set_bit(pde, new_page_tables);
1898 		trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT);
1899 	}
1900 
1901 	start = start_save;
1902 	length = length_save;
1903 
1904 	gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
1905 		DECLARE_BITMAP(tmp_bitmap, GEN6_PTES);
1906 
1907 		bitmap_zero(tmp_bitmap, GEN6_PTES);
1908 		bitmap_set(tmp_bitmap, gen6_pte_index(start),
1909 			   gen6_pte_count(start, length));
1910 
1911 		if (__test_and_clear_bit(pde, new_page_tables))
1912 			gen6_write_pde(&ppgtt->pd, pde, pt);
1913 
1914 		trace_i915_page_table_entry_map(vm, pde, pt,
1915 					 gen6_pte_index(start),
1916 					 gen6_pte_count(start, length),
1917 					 GEN6_PTES);
1918 		bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes,
1919 				GEN6_PTES);
1920 	}
1921 
1922 	WARN_ON(!bitmap_empty(new_page_tables, I915_PDES));
1923 
1924 	/* Make sure write is complete before other code can use this page
1925 	 * table. Also require for WC mapped PTEs */
1926 	readl(ggtt->gsm);
1927 
1928 	mark_tlbs_dirty(ppgtt);
1929 	return 0;
1930 
1931 unwind_out:
1932 	for_each_set_bit(pde, new_page_tables, I915_PDES) {
1933 		struct i915_page_table *pt = ppgtt->pd.page_table[pde];
1934 
1935 		ppgtt->pd.page_table[pde] = vm->scratch_pt;
1936 		free_pt(vm->dev, pt);
1937 	}
1938 
1939 	mark_tlbs_dirty(ppgtt);
1940 	return ret;
1941 }
1942 
1943 static int gen6_init_scratch(struct i915_address_space *vm)
1944 {
1945 	struct drm_device *dev = vm->dev;
1946 
1947 	vm->scratch_page = alloc_scratch_page(dev);
1948 	if (IS_ERR(vm->scratch_page))
1949 		return PTR_ERR(vm->scratch_page);
1950 
1951 	vm->scratch_pt = alloc_pt(dev);
1952 	if (IS_ERR(vm->scratch_pt)) {
1953 		free_scratch_page(dev, vm->scratch_page);
1954 		return PTR_ERR(vm->scratch_pt);
1955 	}
1956 
1957 	gen6_initialize_pt(vm, vm->scratch_pt);
1958 
1959 	return 0;
1960 }
1961 
1962 static void gen6_free_scratch(struct i915_address_space *vm)
1963 {
1964 	struct drm_device *dev = vm->dev;
1965 
1966 	free_pt(dev, vm->scratch_pt);
1967 	free_scratch_page(dev, vm->scratch_page);
1968 }
1969 
1970 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
1971 {
1972 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1973 	struct i915_page_table *pt;
1974 	uint32_t pde;
1975 
1976 	drm_mm_remove_node(&ppgtt->node);
1977 
1978 	gen6_for_all_pdes(pt, ppgtt, pde) {
1979 		if (pt != vm->scratch_pt)
1980 			free_pt(ppgtt->base.dev, pt);
1981 	}
1982 
1983 	gen6_free_scratch(vm);
1984 }
1985 
1986 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
1987 {
1988 	struct i915_address_space *vm = &ppgtt->base;
1989 	struct drm_device *dev = ppgtt->base.dev;
1990 	struct drm_i915_private *dev_priv = to_i915(dev);
1991 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
1992 	bool retried = false;
1993 	int ret;
1994 
1995 	/* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
1996 	 * allocator works in address space sizes, so it's multiplied by page
1997 	 * size. We allocate at the top of the GTT to avoid fragmentation.
1998 	 */
1999 	BUG_ON(!drm_mm_initialized(&ggtt->base.mm));
2000 
2001 	ret = gen6_init_scratch(vm);
2002 	if (ret)
2003 		return ret;
2004 
2005 alloc:
2006 	ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm,
2007 						  &ppgtt->node, GEN6_PD_SIZE,
2008 						  GEN6_PD_ALIGN, 0,
2009 						  0, ggtt->base.total,
2010 						  DRM_MM_TOPDOWN);
2011 	if (ret == -ENOSPC && !retried) {
2012 		ret = i915_gem_evict_something(dev, &ggtt->base,
2013 					       GEN6_PD_SIZE, GEN6_PD_ALIGN,
2014 					       I915_CACHE_NONE,
2015 					       0, ggtt->base.total,
2016 					       0);
2017 		if (ret)
2018 			goto err_out;
2019 
2020 		retried = true;
2021 		goto alloc;
2022 	}
2023 
2024 	if (ret)
2025 		goto err_out;
2026 
2027 
2028 	if (ppgtt->node.start < ggtt->mappable_end)
2029 		DRM_DEBUG("Forced to use aperture for PDEs\n");
2030 
2031 	return 0;
2032 
2033 err_out:
2034 	gen6_free_scratch(vm);
2035 	return ret;
2036 }
2037 
2038 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
2039 {
2040 	return gen6_ppgtt_allocate_page_directories(ppgtt);
2041 }
2042 
2043 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
2044 				  uint64_t start, uint64_t length)
2045 {
2046 	struct i915_page_table *unused;
2047 	uint32_t pde, temp;
2048 
2049 	gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde)
2050 		ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt;
2051 }
2052 
2053 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
2054 {
2055 	struct drm_device *dev = ppgtt->base.dev;
2056 	struct drm_i915_private *dev_priv = to_i915(dev);
2057 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2058 	int ret;
2059 
2060 	ppgtt->base.pte_encode = ggtt->base.pte_encode;
2061 	if (IS_GEN6(dev)) {
2062 		ppgtt->switch_mm = gen6_mm_switch;
2063 	} else if (IS_HASWELL(dev)) {
2064 		ppgtt->switch_mm = hsw_mm_switch;
2065 	} else if (IS_GEN7(dev)) {
2066 		ppgtt->switch_mm = gen7_mm_switch;
2067 	} else
2068 		BUG();
2069 
2070 	if (intel_vgpu_active(dev))
2071 		ppgtt->switch_mm = vgpu_mm_switch;
2072 
2073 	ret = gen6_ppgtt_alloc(ppgtt);
2074 	if (ret)
2075 		return ret;
2076 
2077 	ppgtt->base.allocate_va_range = gen6_alloc_va_range;
2078 	ppgtt->base.clear_range = gen6_ppgtt_clear_range;
2079 	ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
2080 	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
2081 	ppgtt->base.bind_vma = ppgtt_bind_vma;
2082 	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
2083 	ppgtt->base.start = 0;
2084 	ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
2085 	ppgtt->debug_dump = gen6_dump_ppgtt;
2086 
2087 	ppgtt->pd.base.ggtt_offset =
2088 		ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
2089 
2090 	ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm +
2091 		ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t);
2092 
2093 	gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
2094 
2095 	gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);
2096 
2097 	DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n",
2098 			 ppgtt->node.size >> 20,
2099 			 ppgtt->node.start / PAGE_SIZE);
2100 
2101 	DRM_DEBUG("Adding PPGTT at offset %x\n",
2102 		  ppgtt->pd.base.ggtt_offset << 10);
2103 
2104 	return 0;
2105 }
2106 
2107 static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
2108 {
2109 	ppgtt->base.dev = dev;
2110 
2111 	if (INTEL_INFO(dev)->gen < 8)
2112 		return gen6_ppgtt_init(ppgtt);
2113 	else
2114 		return gen8_ppgtt_init(ppgtt);
2115 }
2116 
2117 static void i915_address_space_init(struct i915_address_space *vm,
2118 				    struct drm_i915_private *dev_priv)
2119 {
2120 	drm_mm_init(&vm->mm, vm->start, vm->total);
2121 	vm->dev = dev_priv->dev;
2122 	INIT_LIST_HEAD(&vm->active_list);
2123 	INIT_LIST_HEAD(&vm->inactive_list);
2124 	list_add_tail(&vm->global_link, &dev_priv->vm_list);
2125 }
2126 
2127 static void gtt_write_workarounds(struct drm_device *dev)
2128 {
2129 	struct drm_i915_private *dev_priv = dev->dev_private;
2130 
2131 	/* This function is for gtt related workarounds. This function is
2132 	 * called on driver load and after a GPU reset, so you can place
2133 	 * workarounds here even if they get overwritten by GPU reset.
2134 	 */
2135 	/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt */
2136 	if (IS_BROADWELL(dev))
2137 		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
2138 	else if (IS_CHERRYVIEW(dev))
2139 		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
2140 	else if (IS_SKYLAKE(dev))
2141 		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
2142 	else if (IS_BROXTON(dev))
2143 		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
2144 }
2145 
2146 int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
2147 {
2148 	struct drm_i915_private *dev_priv = dev->dev_private;
2149 	int ret = 0;
2150 
2151 	ret = __hw_ppgtt_init(dev, ppgtt);
2152 	if (ret == 0) {
2153 		kref_init(&ppgtt->ref);
2154 		i915_address_space_init(&ppgtt->base, dev_priv);
2155 	}
2156 
2157 	return ret;
2158 }
2159 
2160 int i915_ppgtt_init_hw(struct drm_device *dev)
2161 {
2162 	gtt_write_workarounds(dev);
2163 
2164 	/* In the case of execlists, PPGTT is enabled by the context descriptor
2165 	 * and the PDPs are contained within the context itself.  We don't
2166 	 * need to do anything here. */
2167 	if (i915.enable_execlists)
2168 		return 0;
2169 
2170 	if (!USES_PPGTT(dev))
2171 		return 0;
2172 
2173 	if (IS_GEN6(dev))
2174 		gen6_ppgtt_enable(dev);
2175 	else if (IS_GEN7(dev))
2176 		gen7_ppgtt_enable(dev);
2177 	else if (INTEL_INFO(dev)->gen >= 8)
2178 		gen8_ppgtt_enable(dev);
2179 	else
2180 		MISSING_CASE(INTEL_INFO(dev)->gen);
2181 
2182 	return 0;
2183 }
2184 
2185 int i915_ppgtt_init_ring(struct drm_i915_gem_request *req)
2186 {
2187 	struct drm_i915_private *dev_priv = req->i915;
2188 	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
2189 
2190 	if (i915.enable_execlists)
2191 		return 0;
2192 
2193 	if (!ppgtt)
2194 		return 0;
2195 
2196 	return ppgtt->switch_mm(ppgtt, req);
2197 }
2198 
2199 struct i915_hw_ppgtt *
2200 i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
2201 {
2202 	struct i915_hw_ppgtt *ppgtt;
2203 	int ret;
2204 
2205 	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2206 	if (!ppgtt)
2207 		return ERR_PTR(-ENOMEM);
2208 
2209 	ret = i915_ppgtt_init(dev, ppgtt);
2210 	if (ret) {
2211 		kfree(ppgtt);
2212 		return ERR_PTR(ret);
2213 	}
2214 
2215 	ppgtt->file_priv = fpriv;
2216 
2217 	trace_i915_ppgtt_create(&ppgtt->base);
2218 
2219 	return ppgtt;
2220 }
2221 
2222 void  i915_ppgtt_release(struct kref *kref)
2223 {
2224 	struct i915_hw_ppgtt *ppgtt =
2225 		container_of(kref, struct i915_hw_ppgtt, ref);
2226 
2227 	trace_i915_ppgtt_release(&ppgtt->base);
2228 
2229 	/* vmas should already be unbound */
2230 	WARN_ON(!list_empty(&ppgtt->base.active_list));
2231 	WARN_ON(!list_empty(&ppgtt->base.inactive_list));
2232 
2233 	list_del(&ppgtt->base.global_link);
2234 	drm_mm_takedown(&ppgtt->base.mm);
2235 
2236 	ppgtt->base.cleanup(&ppgtt->base);
2237 	kfree(ppgtt);
2238 }
2239 
2240 extern int intel_iommu_gfx_mapped;
2241 /* Certain Gen5 chipsets require require idling the GPU before
2242  * unmapping anything from the GTT when VT-d is enabled.
2243  */
2244 static bool needs_idle_maps(struct drm_device *dev)
2245 {
2246 #ifdef CONFIG_INTEL_IOMMU
2247 	/* Query intel_iommu to see if we need the workaround. Presumably that
2248 	 * was loaded first.
2249 	 */
2250 	if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped)
2251 		return true;
2252 #endif
2253 	return false;
2254 }
2255 
2256 static bool do_idling(struct drm_i915_private *dev_priv)
2257 {
2258 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2259 	bool ret = dev_priv->mm.interruptible;
2260 
2261 	if (unlikely(ggtt->do_idle_maps)) {
2262 		dev_priv->mm.interruptible = false;
2263 		if (i915_gpu_idle(dev_priv->dev)) {
2264 			DRM_ERROR("Couldn't idle GPU\n");
2265 			/* Wait a bit, in hopes it avoids the hang */
2266 			udelay(10);
2267 		}
2268 	}
2269 
2270 	return ret;
2271 }
2272 
2273 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
2274 {
2275 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2276 
2277 	if (unlikely(ggtt->do_idle_maps))
2278 		dev_priv->mm.interruptible = interruptible;
2279 }
2280 
2281 void i915_check_and_clear_faults(struct drm_device *dev)
2282 {
2283 	struct drm_i915_private *dev_priv = dev->dev_private;
2284 	struct intel_engine_cs *engine;
2285 
2286 	if (INTEL_INFO(dev)->gen < 6)
2287 		return;
2288 
2289 	for_each_engine(engine, dev_priv) {
2290 		u32 fault_reg;
2291 		fault_reg = I915_READ(RING_FAULT_REG(engine));
2292 		if (fault_reg & RING_FAULT_VALID) {
2293 #if 0
2294 			DRM_DEBUG_DRIVER("Unexpected fault\n"
2295 					 "\tAddr: 0x%08lx\n"
2296 					 "\tAddress space: %s\n"
2297 					 "\tSource ID: %d\n"
2298 					 "\tType: %d\n",
2299 					 fault_reg & PAGE_MASK,
2300 					 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
2301 					 RING_FAULT_SRCID(fault_reg),
2302 					 RING_FAULT_FAULT_TYPE(fault_reg));
2303 #endif
2304 			I915_WRITE(RING_FAULT_REG(engine),
2305 				   fault_reg & ~RING_FAULT_VALID);
2306 		}
2307 	}
2308 	POSTING_READ(RING_FAULT_REG(&dev_priv->engine[RCS]));
2309 }
2310 
2311 static void i915_ggtt_flush(struct drm_i915_private *dev_priv)
2312 {
2313 	if (INTEL_INFO(dev_priv)->gen < 6) {
2314 		intel_gtt_chipset_flush();
2315 	} else {
2316 		I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2317 		POSTING_READ(GFX_FLSH_CNTL_GEN6);
2318 	}
2319 }
2320 
2321 void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
2322 {
2323 	struct drm_i915_private *dev_priv = to_i915(dev);
2324 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2325 
2326 	/* Don't bother messing with faults pre GEN6 as we have little
2327 	 * documentation supporting that it's a good idea.
2328 	 */
2329 	if (INTEL_INFO(dev)->gen < 6)
2330 		return;
2331 
2332 	i915_check_and_clear_faults(dev);
2333 
2334 	ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total,
2335 			     true);
2336 
2337 	i915_ggtt_flush(dev_priv);
2338 }
2339 
2340 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
2341 {
2342 	if (!dma_map_sg(&obj->base.dev->pdev->dev,
2343 			obj->pages->sgl, obj->pages->nents,
2344 			PCI_DMA_BIDIRECTIONAL))
2345 		return -ENOSPC;
2346 
2347 	return 0;
2348 }
2349 
2350 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
2351 {
2352 #ifdef writeq
2353 	writeq(pte, addr);
2354 #else
2355 	iowrite32((u32)pte, addr);
2356 	iowrite32(pte >> 32, addr + 4);
2357 #endif
2358 }
2359 
2360 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
2361 				     struct sg_table *st,
2362 				     uint64_t start,
2363 				     enum i915_cache_level level, u32 unused)
2364 {
2365 	struct drm_i915_private *dev_priv = to_i915(vm->dev);
2366 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2367 	unsigned first_entry = start >> PAGE_SHIFT;
2368 	gen8_pte_t __iomem *gtt_entries =
2369 		(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
2370 	int i = 0;
2371 	struct sg_page_iter sg_iter;
2372 	dma_addr_t addr = 0; /* shut up gcc */
2373 	int rpm_atomic_seq;
2374 
2375 	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2376 
2377 	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
2378 		addr = sg_dma_address(sg_iter.sg) +
2379 			(sg_iter.sg_pgoffset << PAGE_SHIFT);
2380 		gen8_set_pte(&gtt_entries[i],
2381 			     gen8_pte_encode(addr, level, true));
2382 		i++;
2383 	}
2384 
2385 	/*
2386 	 * XXX: This serves as a posting read to make sure that the PTE has
2387 	 * actually been updated. There is some concern that even though
2388 	 * registers and PTEs are within the same BAR that they are potentially
2389 	 * of NUMA access patterns. Therefore, even with the way we assume
2390 	 * hardware should work, we must keep this posting read for paranoia.
2391 	 */
2392 	if (i != 0)
2393 		WARN_ON(readq(&gtt_entries[i-1])
2394 			!= gen8_pte_encode(addr, level, true));
2395 
2396 	/* This next bit makes the above posting read even more important. We
2397 	 * want to flush the TLBs only after we're certain all the PTE updates
2398 	 * have finished.
2399 	 */
2400 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2401 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
2402 
2403 	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2404 }
2405 
2406 struct insert_entries {
2407 	struct i915_address_space *vm;
2408 	struct sg_table *st;
2409 	uint64_t start;
2410 	enum i915_cache_level level;
2411 	u32 flags;
2412 };
2413 
2414 static int gen8_ggtt_insert_entries__cb(void *_arg)
2415 {
2416 	struct insert_entries *arg = _arg;
2417 	gen8_ggtt_insert_entries(arg->vm, arg->st,
2418 				 arg->start, arg->level, arg->flags);
2419 	return 0;
2420 }
2421 
2422 static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm,
2423 					  struct sg_table *st,
2424 					  uint64_t start,
2425 					  enum i915_cache_level level,
2426 					  u32 flags)
2427 {
2428 	struct insert_entries arg = { vm, st, start, level, flags };
2429 #ifndef __DragonFly__
2430 	stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL);
2431 #else
2432 	/* XXX: is this enough ?
2433 	 * See Linux commit 5bab6f60cb4d1417ad7c599166bcfec87529c1a2 */
2434 	get_mplock();
2435 	gen8_ggtt_insert_entries__cb(&arg);
2436 	rel_mplock();
2437 #endif
2438 }
2439 
2440 /*
2441  * Binds an object into the global gtt with the specified cache level. The object
2442  * will be accessible to the GPU via commands whose operands reference offsets
2443  * within the global GTT as well as accessible by the GPU through the GMADR
2444  * mapped BAR (dev_priv->mm.gtt->gtt).
2445  */
2446 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
2447 				     struct sg_table *st,
2448 				     uint64_t start,
2449 				     enum i915_cache_level level, u32 flags)
2450 {
2451 	struct drm_i915_private *dev_priv = to_i915(vm->dev);
2452 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2453 	unsigned first_entry = start >> PAGE_SHIFT;
2454 	gen6_pte_t __iomem *gtt_entries =
2455 		(gen6_pte_t __iomem *)ggtt->gsm + first_entry;
2456 	int i = 0;
2457 	struct sg_page_iter sg_iter;
2458 	dma_addr_t addr = 0;
2459 	int rpm_atomic_seq;
2460 
2461 	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2462 
2463 	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
2464 		addr = sg_page_iter_dma_address(&sg_iter);
2465 		iowrite32(vm->pte_encode(addr, level, true, flags), &gtt_entries[i]);
2466 		i++;
2467 	}
2468 
2469 	/* XXX: This serves as a posting read to make sure that the PTE has
2470 	 * actually been updated. There is some concern that even though
2471 	 * registers and PTEs are within the same BAR that they are potentially
2472 	 * of NUMA access patterns. Therefore, even with the way we assume
2473 	 * hardware should work, we must keep this posting read for paranoia.
2474 	 */
2475 	if (i != 0) {
2476 		unsigned long gtt = readl(&gtt_entries[i-1]);
2477 		WARN_ON(gtt != vm->pte_encode(addr, level, true, flags));
2478 	}
2479 
2480 	/* This next bit makes the above posting read even more important. We
2481 	 * want to flush the TLBs only after we're certain all the PTE updates
2482 	 * have finished.
2483 	 */
2484 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2485 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
2486 
2487 	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2488 }
2489 
2490 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
2491 				  uint64_t start,
2492 				  uint64_t length,
2493 				  bool use_scratch)
2494 {
2495 	struct drm_i915_private *dev_priv = to_i915(vm->dev);
2496 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2497 	unsigned first_entry = start >> PAGE_SHIFT;
2498 	unsigned num_entries = length >> PAGE_SHIFT;
2499 	gen8_pte_t scratch_pte, __iomem *gtt_base =
2500 		(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
2501 	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2502 	int i;
2503 	int rpm_atomic_seq;
2504 
2505 	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2506 
2507 	if (WARN(num_entries > max_entries,
2508 		 "First entry = %d; Num entries = %d (max=%d)\n",
2509 		 first_entry, num_entries, max_entries))
2510 		num_entries = max_entries;
2511 
2512 	scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
2513 				      I915_CACHE_LLC,
2514 				      use_scratch);
2515 	for (i = 0; i < num_entries; i++)
2516 		gen8_set_pte(&gtt_base[i], scratch_pte);
2517 	readl(gtt_base);
2518 
2519 	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2520 }
2521 
2522 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
2523 				  uint64_t start,
2524 				  uint64_t length,
2525 				  bool use_scratch)
2526 {
2527 	struct drm_i915_private *dev_priv = to_i915(vm->dev);
2528 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2529 	unsigned first_entry = start >> PAGE_SHIFT;
2530 	unsigned num_entries = length >> PAGE_SHIFT;
2531 	gen6_pte_t scratch_pte, __iomem *gtt_base =
2532 		(gen6_pte_t __iomem *)ggtt->gsm + first_entry;
2533 	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2534 	int i;
2535 	int rpm_atomic_seq;
2536 
2537 	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2538 
2539 	if (WARN(num_entries > max_entries,
2540 		 "First entry = %d; Num entries = %d (max=%d)\n",
2541 		 first_entry, num_entries, max_entries))
2542 		num_entries = max_entries;
2543 
2544 	scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
2545 				     I915_CACHE_LLC, use_scratch, 0);
2546 
2547 	for (i = 0; i < num_entries; i++)
2548 		iowrite32(scratch_pte, &gtt_base[i]);
2549 	readl(gtt_base);
2550 
2551 	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2552 }
2553 
2554 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
2555 				     struct sg_table *pages,
2556 				     uint64_t start,
2557 				     enum i915_cache_level cache_level, u32 unused)
2558 {
2559 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
2560 	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2561 		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2562 	int rpm_atomic_seq;
2563 
2564 	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2565 
2566 	intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags);
2567 
2568 	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2569 
2570 }
2571 
2572 static void i915_ggtt_clear_range(struct i915_address_space *vm,
2573 				  uint64_t start,
2574 				  uint64_t length,
2575 				  bool unused)
2576 {
2577 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
2578 	unsigned first_entry = start >> PAGE_SHIFT;
2579 	unsigned num_entries = length >> PAGE_SHIFT;
2580 	int rpm_atomic_seq;
2581 
2582 	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2583 
2584 	intel_gtt_clear_range(first_entry, num_entries);
2585 
2586 	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2587 }
2588 
2589 static int ggtt_bind_vma(struct i915_vma *vma,
2590 			 enum i915_cache_level cache_level,
2591 			 u32 flags)
2592 {
2593 	struct drm_i915_gem_object *obj = vma->obj;
2594 	u32 pte_flags = 0;
2595 	int ret;
2596 
2597 	ret = i915_get_ggtt_vma_pages(vma);
2598 	if (ret)
2599 		return ret;
2600 
2601 	/* Currently applicable only to VLV */
2602 	if (obj->gt_ro)
2603 		pte_flags |= PTE_READ_ONLY;
2604 
2605 	vma->vm->insert_entries(vma->vm, vma->ggtt_view.pages,
2606 				vma->node.start,
2607 				cache_level, pte_flags);
2608 
2609 	/*
2610 	 * Without aliasing PPGTT there's no difference between
2611 	 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
2612 	 * upgrade to both bound if we bind either to avoid double-binding.
2613 	 */
2614 	vma->bound |= GLOBAL_BIND | LOCAL_BIND;
2615 
2616 	return 0;
2617 }
2618 
2619 static int aliasing_gtt_bind_vma(struct i915_vma *vma,
2620 				 enum i915_cache_level cache_level,
2621 				 u32 flags)
2622 {
2623 	u32 pte_flags;
2624 	int ret;
2625 
2626 	ret = i915_get_ggtt_vma_pages(vma);
2627 	if (ret)
2628 		return ret;
2629 
2630 	/* Currently applicable only to VLV */
2631 	pte_flags = 0;
2632 	if (vma->obj->gt_ro)
2633 		pte_flags |= PTE_READ_ONLY;
2634 
2635 
2636 	if (flags & GLOBAL_BIND) {
2637 		vma->vm->insert_entries(vma->vm,
2638 					vma->ggtt_view.pages,
2639 					vma->node.start,
2640 					cache_level, pte_flags);
2641 	}
2642 
2643 	if (flags & LOCAL_BIND) {
2644 		struct i915_hw_ppgtt *appgtt =
2645 			to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
2646 		appgtt->base.insert_entries(&appgtt->base,
2647 					    vma->ggtt_view.pages,
2648 					    vma->node.start,
2649 					    cache_level, pte_flags);
2650 	}
2651 
2652 	return 0;
2653 }
2654 
2655 static void ggtt_unbind_vma(struct i915_vma *vma)
2656 {
2657 	struct drm_device *dev = vma->vm->dev;
2658 	struct drm_i915_private *dev_priv = dev->dev_private;
2659 	struct drm_i915_gem_object *obj = vma->obj;
2660 	const uint64_t size = min_t(uint64_t,
2661 				    obj->base.size,
2662 				    vma->node.size);
2663 
2664 	if (vma->bound & GLOBAL_BIND) {
2665 		vma->vm->clear_range(vma->vm,
2666 				     vma->node.start,
2667 				     size,
2668 				     true);
2669 	}
2670 
2671 	if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) {
2672 		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
2673 
2674 		appgtt->base.clear_range(&appgtt->base,
2675 					 vma->node.start,
2676 					 size,
2677 					 true);
2678 	}
2679 }
2680 
2681 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
2682 {
2683 	struct drm_device *dev = obj->base.dev;
2684 	struct drm_i915_private *dev_priv = dev->dev_private;
2685 	bool interruptible;
2686 
2687 	interruptible = do_idling(dev_priv);
2688 
2689 	dma_unmap_sg(&dev->pdev->dev, obj->pages->sgl, obj->pages->nents,
2690 		     PCI_DMA_BIDIRECTIONAL);
2691 
2692 	undo_idling(dev_priv, interruptible);
2693 }
2694 
2695 static void i915_gtt_color_adjust(struct drm_mm_node *node,
2696 				  unsigned long color,
2697 				  u64 *start,
2698 				  u64 *end)
2699 {
2700 	if (node->color != color)
2701 		*start += 4096;
2702 
2703 	if (!list_empty(&node->node_list)) {
2704 		node = list_entry(node->node_list.next,
2705 				  struct drm_mm_node,
2706 				  node_list);
2707 		if (node->allocated && node->color != color)
2708 			*end -= 4096;
2709 	}
2710 }
2711 
2712 static int i915_gem_setup_global_gtt(struct drm_device *dev,
2713 				     u64 start,
2714 				     u64 mappable_end,
2715 				     u64 end)
2716 {
2717 	/* Let GEM Manage all of the aperture.
2718 	 *
2719 	 * However, leave one page at the end still bound to the scratch page.
2720 	 * There are a number of places where the hardware apparently prefetches
2721 	 * past the end of the object, and we've seen multiple hangs with the
2722 	 * GPU head pointer stuck in a batchbuffer bound at the last page of the
2723 	 * aperture.  One page should be enough to keep any prefetching inside
2724 	 * of the aperture.
2725 	 */
2726 	struct drm_i915_private *dev_priv = to_i915(dev);
2727 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2728 	struct drm_mm_node *entry;
2729 	struct drm_i915_gem_object *obj;
2730 	unsigned long hole_start, hole_end;
2731 	int ret;
2732 	unsigned long mappable;
2733 	int error;
2734 
2735 	mappable = min(end, mappable_end) - start;
2736 	BUG_ON(mappable_end > end);
2737 
2738 	ggtt->base.start = start;
2739 
2740 	/* Subtract the guard page before address space initialization to
2741 	 * shrink the range used by drm_mm */
2742 	ggtt->base.total = end - start - PAGE_SIZE;
2743 	i915_address_space_init(&ggtt->base, dev_priv);
2744 	ggtt->base.total += PAGE_SIZE;
2745 
2746 	if (intel_vgpu_active(dev)) {
2747 		ret = intel_vgt_balloon(dev);
2748 		if (ret)
2749 			return ret;
2750 	}
2751 
2752 	if (!HAS_LLC(dev))
2753 		ggtt->base.mm.color_adjust = i915_gtt_color_adjust;
2754 
2755 	/* Mark any preallocated objects as occupied */
2756 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
2757 		struct i915_vma *vma = i915_gem_obj_to_vma(obj, &ggtt->base);
2758 
2759 		DRM_DEBUG_KMS("reserving preallocated space: %llx + %zx\n",
2760 			      i915_gem_obj_ggtt_offset(obj), obj->base.size);
2761 
2762 		WARN_ON(i915_gem_obj_ggtt_bound(obj));
2763 		ret = drm_mm_reserve_node(&ggtt->base.mm, &vma->node);
2764 		if (ret) {
2765 			DRM_DEBUG_KMS("Reservation failed: %i\n", ret);
2766 			return ret;
2767 		}
2768 		vma->bound |= GLOBAL_BIND;
2769 		__i915_vma_set_map_and_fenceable(vma);
2770 		list_add_tail(&vma->vm_link, &ggtt->base.inactive_list);
2771 	}
2772 
2773 	/* Clear any non-preallocated blocks */
2774 	drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) {
2775 		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
2776 			      hole_start, hole_end);
2777 		ggtt->base.clear_range(&ggtt->base, hole_start,
2778 				     hole_end - hole_start, true);
2779 	}
2780 
2781 #ifdef __DragonFly__
2782 	device_printf(dev->dev->bsddev,
2783 	    "taking over the fictitious range 0x%llx-0x%llx\n",
2784 	    dev_priv->ggtt.mappable_base + start, dev_priv->ggtt.mappable_base + start + mappable);
2785 	error = -vm_phys_fictitious_reg_range(dev_priv->ggtt.mappable_base + start,
2786 	    dev_priv->ggtt.mappable_base + start + mappable, VM_MEMATTR_WRITE_COMBINING);
2787 #endif
2788 
2789 	/* And finally clear the reserved guard page */
2790 	ggtt->base.clear_range(&ggtt->base, end - PAGE_SIZE, PAGE_SIZE, true);
2791 
2792 	if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) {
2793 		struct i915_hw_ppgtt *ppgtt;
2794 
2795 		ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2796 		if (!ppgtt)
2797 			return -ENOMEM;
2798 
2799 		ret = __hw_ppgtt_init(dev, ppgtt);
2800 		if (ret) {
2801 			ppgtt->base.cleanup(&ppgtt->base);
2802 			kfree(ppgtt);
2803 			return ret;
2804 		}
2805 
2806 		if (ppgtt->base.allocate_va_range)
2807 			ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0,
2808 							    ppgtt->base.total);
2809 		if (ret) {
2810 			ppgtt->base.cleanup(&ppgtt->base);
2811 			kfree(ppgtt);
2812 			return ret;
2813 		}
2814 
2815 		ppgtt->base.clear_range(&ppgtt->base,
2816 					ppgtt->base.start,
2817 					ppgtt->base.total,
2818 					true);
2819 
2820 		dev_priv->mm.aliasing_ppgtt = ppgtt;
2821 		WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma);
2822 		ggtt->base.bind_vma = aliasing_gtt_bind_vma;
2823 	}
2824 
2825 	return 0;
2826 }
2827 
2828 /**
2829  * i915_gem_init_ggtt - Initialize GEM for Global GTT
2830  * @dev: DRM device
2831  */
2832 void i915_gem_init_ggtt(struct drm_device *dev)
2833 {
2834 	struct drm_i915_private *dev_priv = to_i915(dev);
2835 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2836 
2837 	i915_gem_setup_global_gtt(dev, 0, ggtt->mappable_end, ggtt->base.total);
2838 }
2839 
2840 /**
2841  * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization
2842  * @dev: DRM device
2843  */
2844 void i915_ggtt_cleanup_hw(struct drm_device *dev)
2845 {
2846 	struct drm_i915_private *dev_priv = to_i915(dev);
2847 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2848 
2849 	if (dev_priv->mm.aliasing_ppgtt) {
2850 		struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
2851 
2852 		ppgtt->base.cleanup(&ppgtt->base);
2853 		kfree(ppgtt);
2854 	}
2855 
2856 	i915_gem_cleanup_stolen(dev);
2857 
2858 	if (drm_mm_initialized(&ggtt->base.mm)) {
2859 		if (intel_vgpu_active(dev))
2860 			intel_vgt_deballoon();
2861 
2862 		drm_mm_takedown(&ggtt->base.mm);
2863 		list_del(&ggtt->base.global_link);
2864 	}
2865 
2866 	ggtt->base.cleanup(&ggtt->base);
2867 }
2868 
2869 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
2870 {
2871 	snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
2872 	snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
2873 	return snb_gmch_ctl << 20;
2874 }
2875 
2876 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
2877 {
2878 	bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
2879 	bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
2880 	if (bdw_gmch_ctl)
2881 		bdw_gmch_ctl = 1 << bdw_gmch_ctl;
2882 
2883 #ifdef CONFIG_X86_32
2884 	/* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
2885 	if (bdw_gmch_ctl > 4)
2886 		bdw_gmch_ctl = 4;
2887 #endif
2888 
2889 	return bdw_gmch_ctl << 20;
2890 }
2891 
2892 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
2893 {
2894 	gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
2895 	gmch_ctrl &= SNB_GMCH_GGMS_MASK;
2896 
2897 	if (gmch_ctrl)
2898 		return 1 << (20 + gmch_ctrl);
2899 
2900 	return 0;
2901 }
2902 
2903 static size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
2904 {
2905 	snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
2906 	snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
2907 	return snb_gmch_ctl << 25; /* 32 MB units */
2908 }
2909 
2910 static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
2911 {
2912 	bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2913 	bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
2914 	return bdw_gmch_ctl << 25; /* 32 MB units */
2915 }
2916 
2917 static size_t chv_get_stolen_size(u16 gmch_ctrl)
2918 {
2919 	gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
2920 	gmch_ctrl &= SNB_GMCH_GMS_MASK;
2921 
2922 	/*
2923 	 * 0x0  to 0x10: 32MB increments starting at 0MB
2924 	 * 0x11 to 0x16: 4MB increments starting at 8MB
2925 	 * 0x17 to 0x1d: 4MB increments start at 36MB
2926 	 */
2927 	if (gmch_ctrl < 0x11)
2928 		return gmch_ctrl << 25;
2929 	else if (gmch_ctrl < 0x17)
2930 		return (gmch_ctrl - 0x11 + 2) << 22;
2931 	else
2932 		return (gmch_ctrl - 0x17 + 9) << 22;
2933 }
2934 
2935 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
2936 {
2937 	gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2938 	gen9_gmch_ctl &= BDW_GMCH_GMS_MASK;
2939 
2940 	if (gen9_gmch_ctl < 0xf0)
2941 		return gen9_gmch_ctl << 25; /* 32 MB units */
2942 	else
2943 		/* 4MB increments starting at 0xf0 for 4MB */
2944 		return (gen9_gmch_ctl - 0xf0 + 1) << 22;
2945 }
2946 
2947 static int ggtt_probe_common(struct drm_device *dev,
2948 			     size_t gtt_size)
2949 {
2950 	struct drm_i915_private *dev_priv = to_i915(dev);
2951 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2952 	struct i915_page_scratch *scratch_page;
2953 	phys_addr_t ggtt_phys_addr;
2954 
2955 	/* For Modern GENs the PTEs and register space are split in the BAR */
2956 	ggtt_phys_addr = pci_resource_start(dev->pdev, 0) +
2957 			 (pci_resource_len(dev->pdev, 0) / 2);
2958 
2959 	/*
2960 	 * On BXT writes larger than 64 bit to the GTT pagetable range will be
2961 	 * dropped. For WC mappings in general we have 64 byte burst writes
2962 	 * when the WC buffer is flushed, so we can't use it, but have to
2963 	 * resort to an uncached mapping. The WC issue is easily caught by the
2964 	 * readback check when writing GTT PTE entries.
2965 	 */
2966 	if (IS_BROXTON(dev))
2967 		ggtt->gsm = ioremap_nocache(ggtt_phys_addr, gtt_size);
2968 	else
2969 		ggtt->gsm = ioremap_wc(ggtt_phys_addr, gtt_size);
2970 	if (!ggtt->gsm) {
2971 		DRM_ERROR("Failed to map the gtt page table\n");
2972 		return -ENOMEM;
2973 	}
2974 
2975 	scratch_page = alloc_scratch_page(dev);
2976 	if (IS_ERR(scratch_page)) {
2977 		DRM_ERROR("Scratch setup failed\n");
2978 		/* iounmap will also get called at remove, but meh */
2979 		iounmap(ggtt->gsm);
2980 		return PTR_ERR(scratch_page);
2981 	}
2982 
2983 	ggtt->base.scratch_page = scratch_page;
2984 
2985 	return 0;
2986 }
2987 
2988 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
2989  * bits. When using advanced contexts each context stores its own PAT, but
2990  * writing this data shouldn't be harmful even in those cases. */
2991 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
2992 {
2993 	uint64_t pat;
2994 
2995 	pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
2996 	      GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
2997 	      GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
2998 	      GEN8_PPAT(3, GEN8_PPAT_UC)                     | /* Uncached objects, mostly for scanout */
2999 	      GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
3000 	      GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
3001 	      GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
3002 	      GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
3003 
3004 	if (!USES_PPGTT(dev_priv))
3005 		/* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
3006 		 * so RTL will always use the value corresponding to
3007 		 * pat_sel = 000".
3008 		 * So let's disable cache for GGTT to avoid screen corruptions.
3009 		 * MOCS still can be used though.
3010 		 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
3011 		 * before this patch, i.e. the same uncached + snooping access
3012 		 * like on gen6/7 seems to be in effect.
3013 		 * - So this just fixes blitter/render access. Again it looks
3014 		 * like it's not just uncached access, but uncached + snooping.
3015 		 * So we can still hold onto all our assumptions wrt cpu
3016 		 * clflushing on LLC machines.
3017 		 */
3018 		pat = GEN8_PPAT(0, GEN8_PPAT_UC);
3019 
3020 	/* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
3021 	 * write would work. */
3022 	I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
3023 	I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
3024 }
3025 
3026 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
3027 {
3028 	uint64_t pat;
3029 
3030 	/*
3031 	 * Map WB on BDW to snooped on CHV.
3032 	 *
3033 	 * Only the snoop bit has meaning for CHV, the rest is
3034 	 * ignored.
3035 	 *
3036 	 * The hardware will never snoop for certain types of accesses:
3037 	 * - CPU GTT (GMADR->GGTT->no snoop->memory)
3038 	 * - PPGTT page tables
3039 	 * - some other special cycles
3040 	 *
3041 	 * As with BDW, we also need to consider the following for GT accesses:
3042 	 * "For GGTT, there is NO pat_sel[2:0] from the entry,
3043 	 * so RTL will always use the value corresponding to
3044 	 * pat_sel = 000".
3045 	 * Which means we must set the snoop bit in PAT entry 0
3046 	 * in order to keep the global status page working.
3047 	 */
3048 	pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
3049 	      GEN8_PPAT(1, 0) |
3050 	      GEN8_PPAT(2, 0) |
3051 	      GEN8_PPAT(3, 0) |
3052 	      GEN8_PPAT(4, CHV_PPAT_SNOOP) |
3053 	      GEN8_PPAT(5, CHV_PPAT_SNOOP) |
3054 	      GEN8_PPAT(6, CHV_PPAT_SNOOP) |
3055 	      GEN8_PPAT(7, CHV_PPAT_SNOOP);
3056 
3057 	I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
3058 	I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
3059 }
3060 
3061 static int gen8_gmch_probe(struct i915_ggtt *ggtt)
3062 {
3063 	struct drm_device *dev = ggtt->base.dev;
3064 	struct drm_i915_private *dev_priv = to_i915(dev);
3065 	u16 snb_gmch_ctl;
3066 	int ret;
3067 
3068 	/* TODO: We're not aware of mappable constraints on gen8 yet */
3069 	ggtt->mappable_base = pci_resource_start(dev->pdev, 2);
3070 	ggtt->mappable_end = pci_resource_len(dev->pdev, 2);
3071 
3072 #if 0
3073 	if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39)))
3074 		pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39));
3075 #endif
3076 
3077 	pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3078 
3079 	if (INTEL_INFO(dev)->gen >= 9) {
3080 		ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl);
3081 		ggtt->size = gen8_get_total_gtt_size(snb_gmch_ctl);
3082 	} else if (IS_CHERRYVIEW(dev)) {
3083 		ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl);
3084 		ggtt->size = chv_get_total_gtt_size(snb_gmch_ctl);
3085 	} else {
3086 		ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl);
3087 		ggtt->size = gen8_get_total_gtt_size(snb_gmch_ctl);
3088 	}
3089 
3090 	ggtt->base.total = (ggtt->size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
3091 
3092 	if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
3093 		chv_setup_private_ppat(dev_priv);
3094 	else
3095 		bdw_setup_private_ppat(dev_priv);
3096 
3097 	ret = ggtt_probe_common(dev, ggtt->size);
3098 
3099 	ggtt->base.clear_range = gen8_ggtt_clear_range;
3100 	if (IS_CHERRYVIEW(dev_priv))
3101 		ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL;
3102 	else
3103 		ggtt->base.insert_entries = gen8_ggtt_insert_entries;
3104 	ggtt->base.bind_vma = ggtt_bind_vma;
3105 	ggtt->base.unbind_vma = ggtt_unbind_vma;
3106 
3107 	return ret;
3108 }
3109 
3110 static int gen6_gmch_probe(struct i915_ggtt *ggtt)
3111 {
3112 	struct drm_device *dev = ggtt->base.dev;
3113 	u16 snb_gmch_ctl;
3114 	int ret;
3115 
3116 	ggtt->mappable_base = pci_resource_start(dev->pdev, 2);
3117 	ggtt->mappable_end = pci_resource_len(dev->pdev, 2);
3118 
3119 	/* 64/512MB is the current min/max we actually know of, but this is just
3120 	 * a coarse sanity check.
3121 	 */
3122 	if ((ggtt->mappable_end < (64<<20) || (ggtt->mappable_end > (512<<20)))) {
3123 		DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end);
3124 		return -ENXIO;
3125 	}
3126 
3127 #if 0
3128 	if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
3129 		pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
3130 #endif
3131 	pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3132 
3133 	ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl);
3134 	ggtt->size = gen6_get_total_gtt_size(snb_gmch_ctl);
3135 	ggtt->base.total = (ggtt->size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
3136 
3137 	ret = ggtt_probe_common(dev, ggtt->size);
3138 
3139 	ggtt->base.clear_range = gen6_ggtt_clear_range;
3140 	ggtt->base.insert_entries = gen6_ggtt_insert_entries;
3141 	ggtt->base.bind_vma = ggtt_bind_vma;
3142 	ggtt->base.unbind_vma = ggtt_unbind_vma;
3143 
3144 	return ret;
3145 }
3146 
3147 static void gen6_gmch_remove(struct i915_address_space *vm)
3148 {
3149 	struct i915_ggtt *ggtt = container_of(vm, struct i915_ggtt, base);
3150 
3151 	iounmap(ggtt->gsm);
3152 	free_scratch_page(vm->dev, vm->scratch_page);
3153 }
3154 
3155 static int i915_gmch_probe(struct i915_ggtt *ggtt)
3156 {
3157 	struct drm_device *dev = ggtt->base.dev;
3158 	struct drm_i915_private *dev_priv = to_i915(dev);
3159 #if 0
3160 	int ret;
3161 
3162 	ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL);
3163 	if (!ret) {
3164 		DRM_ERROR("failed to set up gmch\n");
3165 		return -EIO;
3166 	}
3167 #endif
3168 
3169 	intel_gtt_get(&ggtt->base.total, &ggtt->stolen_size,
3170 		      &ggtt->mappable_base, &ggtt->mappable_end);
3171 
3172 	ggtt->do_idle_maps = needs_idle_maps(dev_priv->dev);
3173 	ggtt->base.insert_entries = i915_ggtt_insert_entries;
3174 	ggtt->base.clear_range = i915_ggtt_clear_range;
3175 	ggtt->base.bind_vma = ggtt_bind_vma;
3176 	ggtt->base.unbind_vma = ggtt_unbind_vma;
3177 
3178 	if (unlikely(ggtt->do_idle_maps))
3179 		DRM_INFO("applying Ironlake quirks for intel_iommu\n");
3180 
3181 	return 0;
3182 }
3183 
3184 static void i915_gmch_remove(struct i915_address_space *vm)
3185 {
3186 	intel_gmch_remove();
3187 }
3188 
3189 /**
3190  * i915_ggtt_init_hw - Initialize GGTT hardware
3191  * @dev: DRM device
3192  */
3193 int i915_ggtt_init_hw(struct drm_device *dev)
3194 {
3195 	struct drm_i915_private *dev_priv = to_i915(dev);
3196 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
3197 	int ret;
3198 
3199 	if (INTEL_INFO(dev)->gen <= 5) {
3200 		ggtt->probe = i915_gmch_probe;
3201 		ggtt->base.cleanup = i915_gmch_remove;
3202 	} else if (INTEL_INFO(dev)->gen < 8) {
3203 		ggtt->probe = gen6_gmch_probe;
3204 		ggtt->base.cleanup = gen6_gmch_remove;
3205 
3206 		if (HAS_EDRAM(dev))
3207 			ggtt->base.pte_encode = iris_pte_encode;
3208 		else if (IS_HASWELL(dev))
3209 			ggtt->base.pte_encode = hsw_pte_encode;
3210 		else if (IS_VALLEYVIEW(dev))
3211 			ggtt->base.pte_encode = byt_pte_encode;
3212 		else if (INTEL_INFO(dev)->gen >= 7)
3213 			ggtt->base.pte_encode = ivb_pte_encode;
3214 		else
3215 			ggtt->base.pte_encode = snb_pte_encode;
3216 	} else {
3217 		ggtt->probe = gen8_gmch_probe;
3218 		ggtt->base.cleanup = gen6_gmch_remove;
3219 	}
3220 
3221 	ggtt->base.dev = dev;
3222 	ggtt->base.is_ggtt = true;
3223 
3224 	ret = ggtt->probe(ggtt);
3225 	if (ret)
3226 		return ret;
3227 
3228 	if ((ggtt->base.total - 1) >> 32) {
3229 		DRM_ERROR("We never expected a Global GTT with more than 32bits"
3230 			  "of address space! Found %lldM!\n",
3231 			  ggtt->base.total >> 20);
3232 		ggtt->base.total = 1ULL << 32;
3233 		ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total);
3234 	}
3235 
3236 	/*
3237 	 * Initialise stolen early so that we may reserve preallocated
3238 	 * objects for the BIOS to KMS transition.
3239 	 */
3240 	ret = i915_gem_init_stolen(dev);
3241 	if (ret)
3242 		goto out_gtt_cleanup;
3243 
3244 	/* GMADR is the PCI mmio aperture into the global GTT. */
3245 	DRM_INFO("Memory usable by graphics device = %lluM\n",
3246 		 ggtt->base.total >> 20);
3247 	DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20);
3248 	DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", ggtt->stolen_size >> 20);
3249 #ifdef CONFIG_INTEL_IOMMU
3250 	if (intel_iommu_gfx_mapped)
3251 		DRM_INFO("VT-d active for gfx access\n");
3252 #endif
3253 	/*
3254 	 * i915.enable_ppgtt is read-only, so do an early pass to validate the
3255 	 * user's requested state against the hardware/driver capabilities.  We
3256 	 * do this now so that we can print out any log messages once rather
3257 	 * than every time we check intel_enable_ppgtt().
3258 	 */
3259 	i915.enable_ppgtt = sanitize_enable_ppgtt(dev, i915.enable_ppgtt);
3260 	DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt);
3261 
3262 	return 0;
3263 
3264 out_gtt_cleanup:
3265 	ggtt->base.cleanup(&ggtt->base);
3266 
3267 	return ret;
3268 }
3269 
3270 int i915_ggtt_enable_hw(struct drm_device *dev)
3271 {
3272 	if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
3273 		return -EIO;
3274 
3275 	return 0;
3276 }
3277 
3278 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
3279 {
3280 	struct drm_i915_private *dev_priv = to_i915(dev);
3281 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
3282 	struct drm_i915_gem_object *obj;
3283 	struct i915_vma *vma;
3284 	bool flush;
3285 
3286 	i915_check_and_clear_faults(dev);
3287 
3288 	/* First fill our portion of the GTT with scratch pages */
3289 	ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total,
3290 			       true);
3291 
3292 	/* Cache flush objects bound into GGTT and rebind them. */
3293 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
3294 		flush = false;
3295 		list_for_each_entry(vma, &obj->vma_list, obj_link) {
3296 			if (vma->vm != &ggtt->base)
3297 				continue;
3298 
3299 			WARN_ON(i915_vma_bind(vma, obj->cache_level,
3300 					      PIN_UPDATE));
3301 
3302 			flush = true;
3303 		}
3304 
3305 		if (flush)
3306 			i915_gem_clflush_object(obj, obj->pin_display);
3307 	}
3308 
3309 	if (INTEL_INFO(dev)->gen >= 8) {
3310 		if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
3311 			chv_setup_private_ppat(dev_priv);
3312 		else
3313 			bdw_setup_private_ppat(dev_priv);
3314 
3315 		return;
3316 	}
3317 
3318 	if (USES_PPGTT(dev)) {
3319 		struct i915_address_space *vm;
3320 
3321 		list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
3322 			/* TODO: Perhaps it shouldn't be gen6 specific */
3323 
3324 			struct i915_hw_ppgtt *ppgtt;
3325 
3326 			if (vm->is_ggtt)
3327 				ppgtt = dev_priv->mm.aliasing_ppgtt;
3328 			else
3329 				ppgtt = i915_vm_to_ppgtt(vm);
3330 
3331 			gen6_write_page_range(dev_priv, &ppgtt->pd,
3332 					      0, ppgtt->base.total);
3333 		}
3334 	}
3335 
3336 	i915_ggtt_flush(dev_priv);
3337 }
3338 
3339 static struct i915_vma *
3340 __i915_gem_vma_create(struct drm_i915_gem_object *obj,
3341 		      struct i915_address_space *vm,
3342 		      const struct i915_ggtt_view *ggtt_view)
3343 {
3344 	struct i915_vma *vma;
3345 
3346 	if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
3347 		return ERR_PTR(-EINVAL);
3348 
3349 	vma = kzalloc(sizeof(*vma), GFP_KERNEL);
3350 	if (vma == NULL)
3351 		return ERR_PTR(-ENOMEM);
3352 
3353 	INIT_LIST_HEAD(&vma->vm_link);
3354 	INIT_LIST_HEAD(&vma->obj_link);
3355 	INIT_LIST_HEAD(&vma->exec_list);
3356 	vma->vm = vm;
3357 	vma->obj = obj;
3358 	vma->is_ggtt = i915_is_ggtt(vm);
3359 
3360 	if (i915_is_ggtt(vm))
3361 		vma->ggtt_view = *ggtt_view;
3362 	else
3363 		i915_ppgtt_get(i915_vm_to_ppgtt(vm));
3364 
3365 	list_add_tail(&vma->obj_link, &obj->vma_list);
3366 
3367 	return vma;
3368 }
3369 
3370 struct i915_vma *
3371 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
3372 				  struct i915_address_space *vm)
3373 {
3374 	struct i915_vma *vma;
3375 
3376 	vma = i915_gem_obj_to_vma(obj, vm);
3377 	if (!vma)
3378 		vma = __i915_gem_vma_create(obj, vm,
3379 					    i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL);
3380 
3381 	return vma;
3382 }
3383 
3384 struct i915_vma *
3385 i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
3386 				       const struct i915_ggtt_view *view)
3387 {
3388 	struct drm_device *dev = obj->base.dev;
3389 	struct drm_i915_private *dev_priv = to_i915(dev);
3390 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
3391 	struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
3392 
3393 	if (!vma)
3394 		vma = __i915_gem_vma_create(obj, &ggtt->base, view);
3395 
3396 	return vma;
3397 
3398 }
3399 
3400 static struct scatterlist *
3401 rotate_pages(const dma_addr_t *in, unsigned int offset,
3402 	     unsigned int width, unsigned int height,
3403 	     unsigned int stride,
3404 	     struct sg_table *st, struct scatterlist *sg)
3405 {
3406 	unsigned int column, row;
3407 	unsigned int src_idx;
3408 
3409 	for (column = 0; column < width; column++) {
3410 		src_idx = stride * (height - 1) + column;
3411 		for (row = 0; row < height; row++) {
3412 			st->nents++;
3413 			/* We don't need the pages, but need to initialize
3414 			 * the entries so the sg list can be happily traversed.
3415 			 * The only thing we need are DMA addresses.
3416 			 */
3417 			sg_set_page(sg, NULL, PAGE_SIZE, 0);
3418 			sg_dma_address(sg) = in[offset + src_idx];
3419 			sg_dma_len(sg) = PAGE_SIZE;
3420 			sg = sg_next(sg);
3421 			src_idx -= stride;
3422 		}
3423 	}
3424 
3425 	return sg;
3426 }
3427 
3428 static struct sg_table *
3429 intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info,
3430 			  struct drm_i915_gem_object *obj)
3431 {
3432 	unsigned int size_pages = rot_info->plane[0].width * rot_info->plane[0].height;
3433 	unsigned int size_pages_uv;
3434 	struct sg_page_iter sg_iter;
3435 	unsigned long i;
3436 	dma_addr_t *page_addr_list;
3437 	struct sg_table *st;
3438 	unsigned int uv_start_page;
3439 	struct scatterlist *sg;
3440 	int ret = -ENOMEM;
3441 
3442 	/* Allocate a temporary list of source pages for random access. */
3443 	page_addr_list = drm_malloc_ab(obj->base.size / PAGE_SIZE,
3444 				       sizeof(dma_addr_t));
3445 	if (!page_addr_list)
3446 		return ERR_PTR(ret);
3447 
3448 	/* Account for UV plane with NV12. */
3449 	if (rot_info->pixel_format == DRM_FORMAT_NV12)
3450 		size_pages_uv = rot_info->plane[1].width * rot_info->plane[1].height;
3451 	else
3452 		size_pages_uv = 0;
3453 
3454 	/* Allocate target SG list. */
3455 	st = kmalloc(sizeof(*st), M_DRM, M_WAITOK);
3456 	if (!st)
3457 		goto err_st_alloc;
3458 
3459 	ret = sg_alloc_table(st, size_pages + size_pages_uv, GFP_KERNEL);
3460 	if (ret)
3461 		goto err_sg_alloc;
3462 
3463 	/* Populate source page list from the object. */
3464 	i = 0;
3465 	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
3466 		page_addr_list[i] = sg_page_iter_dma_address(&sg_iter);
3467 		i++;
3468 	}
3469 
3470 	st->nents = 0;
3471 	sg = st->sgl;
3472 
3473 	/* Rotate the pages. */
3474 	sg = rotate_pages(page_addr_list, 0,
3475 			  rot_info->plane[0].width, rot_info->plane[0].height,
3476 			  rot_info->plane[0].width,
3477 			  st, sg);
3478 
3479 	/* Append the UV plane if NV12. */
3480 	if (rot_info->pixel_format == DRM_FORMAT_NV12) {
3481 		uv_start_page = size_pages;
3482 
3483 		/* Check for tile-row un-alignment. */
3484 		if (offset_in_page(rot_info->uv_offset))
3485 			uv_start_page--;
3486 
3487 		rot_info->uv_start_page = uv_start_page;
3488 
3489 		sg = rotate_pages(page_addr_list, rot_info->uv_start_page,
3490 				  rot_info->plane[1].width, rot_info->plane[1].height,
3491 				  rot_info->plane[1].width,
3492 				  st, sg);
3493 	}
3494 
3495 	DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages (%u plane 0)).\n",
3496 		      obj->base.size, rot_info->plane[0].width,
3497 		      rot_info->plane[0].height, size_pages + size_pages_uv,
3498 		      size_pages);
3499 
3500 	drm_free_large(page_addr_list);
3501 
3502 	return st;
3503 
3504 err_sg_alloc:
3505 	kfree(st);
3506 err_st_alloc:
3507 	drm_free_large(page_addr_list);
3508 
3509 	DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%d) (%ux%u tiles, %u pages (%u plane 0))\n",
3510 		      obj->base.size, ret, rot_info->plane[0].width,
3511 		      rot_info->plane[0].height, size_pages + size_pages_uv,
3512 		      size_pages);
3513 	return ERR_PTR(ret);
3514 }
3515 
3516 static struct sg_table *
3517 intel_partial_pages(const struct i915_ggtt_view *view,
3518 		    struct drm_i915_gem_object *obj)
3519 {
3520 	struct sg_table *st;
3521 	struct scatterlist *sg;
3522 	struct sg_page_iter obj_sg_iter;
3523 	int ret = -ENOMEM;
3524 
3525 	st = kmalloc(sizeof(*st), M_DRM, M_WAITOK);
3526 	if (!st)
3527 		goto err_st_alloc;
3528 
3529 	ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL);
3530 	if (ret)
3531 		goto err_sg_alloc;
3532 
3533 	sg = st->sgl;
3534 	st->nents = 0;
3535 	for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents,
3536 		view->params.partial.offset)
3537 	{
3538 		if (st->nents >= view->params.partial.size)
3539 			break;
3540 
3541 		sg_set_page(sg, NULL, PAGE_SIZE, 0);
3542 		sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter);
3543 		sg_dma_len(sg) = PAGE_SIZE;
3544 
3545 		sg = sg_next(sg);
3546 		st->nents++;
3547 	}
3548 
3549 	return st;
3550 
3551 err_sg_alloc:
3552 	kfree(st);
3553 err_st_alloc:
3554 	return ERR_PTR(ret);
3555 }
3556 
3557 static int
3558 i915_get_ggtt_vma_pages(struct i915_vma *vma)
3559 {
3560 	int ret = 0;
3561 
3562 	if (vma->ggtt_view.pages)
3563 		return 0;
3564 
3565 	if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
3566 		vma->ggtt_view.pages = vma->obj->pages;
3567 	else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
3568 		vma->ggtt_view.pages =
3569 			intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj);
3570 	else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL)
3571 		vma->ggtt_view.pages =
3572 			intel_partial_pages(&vma->ggtt_view, vma->obj);
3573 	else
3574 		WARN_ONCE(1, "GGTT view %u not implemented!\n",
3575 			  vma->ggtt_view.type);
3576 
3577 	if (!vma->ggtt_view.pages) {
3578 		DRM_ERROR("Failed to get pages for GGTT view type %u!\n",
3579 			  vma->ggtt_view.type);
3580 		ret = -EINVAL;
3581 	} else if (IS_ERR(vma->ggtt_view.pages)) {
3582 		ret = PTR_ERR(vma->ggtt_view.pages);
3583 		vma->ggtt_view.pages = NULL;
3584 		DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
3585 			  vma->ggtt_view.type, ret);
3586 	}
3587 
3588 	return ret;
3589 }
3590 
3591 /**
3592  * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
3593  * @vma: VMA to map
3594  * @cache_level: mapping cache level
3595  * @flags: flags like global or local mapping
3596  *
3597  * DMA addresses are taken from the scatter-gather table of this object (or of
3598  * this VMA in case of non-default GGTT views) and PTE entries set up.
3599  * Note that DMA addresses are also the only part of the SG table we care about.
3600  */
3601 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
3602 		  u32 flags)
3603 {
3604 	int ret;
3605 	u32 bind_flags;
3606 
3607 	if (WARN_ON(flags == 0))
3608 		return -EINVAL;
3609 
3610 	bind_flags = 0;
3611 	if (flags & PIN_GLOBAL)
3612 		bind_flags |= GLOBAL_BIND;
3613 	if (flags & PIN_USER)
3614 		bind_flags |= LOCAL_BIND;
3615 
3616 	if (flags & PIN_UPDATE)
3617 		bind_flags |= vma->bound;
3618 	else
3619 		bind_flags &= ~vma->bound;
3620 
3621 	if (bind_flags == 0)
3622 		return 0;
3623 
3624 	if (vma->bound == 0 && vma->vm->allocate_va_range) {
3625 		/* XXX: i915_vma_pin() will fix this +- hack */
3626 		vma->pin_count++;
3627 		trace_i915_va_alloc(vma);
3628 		ret = vma->vm->allocate_va_range(vma->vm,
3629 						 vma->node.start,
3630 						 vma->node.size);
3631 		vma->pin_count--;
3632 		if (ret)
3633 			return ret;
3634 	}
3635 
3636 	ret = vma->vm->bind_vma(vma, cache_level, bind_flags);
3637 	if (ret)
3638 		return ret;
3639 
3640 	vma->bound |= bind_flags;
3641 
3642 	return 0;
3643 }
3644 
3645 /**
3646  * i915_ggtt_view_size - Get the size of a GGTT view.
3647  * @obj: Object the view is of.
3648  * @view: The view in question.
3649  *
3650  * @return The size of the GGTT view in bytes.
3651  */
3652 size_t
3653 i915_ggtt_view_size(struct drm_i915_gem_object *obj,
3654 		    const struct i915_ggtt_view *view)
3655 {
3656 	if (view->type == I915_GGTT_VIEW_NORMAL) {
3657 		return obj->base.size;
3658 	} else if (view->type == I915_GGTT_VIEW_ROTATED) {
3659 		return intel_rotation_info_size(&view->params.rotated) << PAGE_SHIFT;
3660 	} else if (view->type == I915_GGTT_VIEW_PARTIAL) {
3661 		return view->params.partial.size << PAGE_SHIFT;
3662 	} else {
3663 		WARN_ONCE(1, "GGTT view %u not implemented!\n", view->type);
3664 		return obj->base.size;
3665 	}
3666 }
3667