xref: /dragonfly/sys/dev/drm/i915/i915_gem_gtt.c (revision 65867155)
1 /*
2  * Copyright © 2010 Daniel Vetter
3  * Copyright © 2011-2014 Intel Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  *
24  */
25 
26 #include <linux/seq_file.h>
27 #include <drm/drmP.h>
28 #include <drm/i915_drm.h>
29 #include "i915_drv.h"
30 #include "i915_vgpu.h"
31 #include "i915_trace.h"
32 #include "intel_drv.h"
33 
34 #include <linux/bitmap.h>
35 #include <linux/highmem.h>
36 
37 /**
38  * DOC: Global GTT views
39  *
40  * Background and previous state
41  *
42  * Historically objects could exists (be bound) in global GTT space only as
43  * singular instances with a view representing all of the object's backing pages
44  * in a linear fashion. This view will be called a normal view.
45  *
46  * To support multiple views of the same object, where the number of mapped
47  * pages is not equal to the backing store, or where the layout of the pages
48  * is not linear, concept of a GGTT view was added.
49  *
50  * One example of an alternative view is a stereo display driven by a single
51  * image. In this case we would have a framebuffer looking like this
52  * (2x2 pages):
53  *
54  *    12
55  *    34
56  *
57  * Above would represent a normal GGTT view as normally mapped for GPU or CPU
58  * rendering. In contrast, fed to the display engine would be an alternative
59  * view which could look something like this:
60  *
61  *   1212
62  *   3434
63  *
64  * In this example both the size and layout of pages in the alternative view is
65  * different from the normal view.
66  *
67  * Implementation and usage
68  *
69  * GGTT views are implemented using VMAs and are distinguished via enum
70  * i915_ggtt_view_type and struct i915_ggtt_view.
71  *
72  * A new flavour of core GEM functions which work with GGTT bound objects were
73  * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
74  * renaming  in large amounts of code. They take the struct i915_ggtt_view
75  * parameter encapsulating all metadata required to implement a view.
76  *
77  * As a helper for callers which are only interested in the normal view,
78  * globally const i915_ggtt_view_normal singleton instance exists. All old core
79  * GEM API functions, the ones not taking the view parameter, are operating on,
80  * or with the normal GGTT view.
81  *
82  * Code wanting to add or use a new GGTT view needs to:
83  *
84  * 1. Add a new enum with a suitable name.
85  * 2. Extend the metadata in the i915_ggtt_view structure if required.
86  * 3. Add support to i915_get_vma_pages().
87  *
88  * New views are required to build a scatter-gather table from within the
89  * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
90  * exists for the lifetime of an VMA.
91  *
92  * Core API is designed to have copy semantics which means that passed in
93  * struct i915_ggtt_view does not need to be persistent (left around after
94  * calling the core API functions).
95  *
96  */
97 
98 static int
99 i915_get_ggtt_vma_pages(struct i915_vma *vma);
100 
101 const struct i915_ggtt_view i915_ggtt_view_normal;
102 const struct i915_ggtt_view i915_ggtt_view_rotated = {
103         .type = I915_GGTT_VIEW_ROTATED
104 };
105 
106 static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt)
107 {
108 	bool has_aliasing_ppgtt;
109 	bool has_full_ppgtt;
110 
111 	has_aliasing_ppgtt = INTEL_INFO(dev)->gen >= 6;
112 	has_full_ppgtt = INTEL_INFO(dev)->gen >= 7;
113 
114 	if (intel_vgpu_active(dev))
115 		has_full_ppgtt = false; /* emulation is too hard */
116 
117 	/*
118 	 * We don't allow disabling PPGTT for gen9+ as it's a requirement for
119 	 * execlists, the sole mechanism available to submit work.
120 	 */
121 	if (INTEL_INFO(dev)->gen < 9 &&
122 	    (enable_ppgtt == 0 || !has_aliasing_ppgtt))
123 		return 0;
124 
125 	if (enable_ppgtt == 1)
126 		return 1;
127 
128 	if (enable_ppgtt == 2 && has_full_ppgtt)
129 		return 2;
130 
131 #ifdef CONFIG_INTEL_IOMMU
132 	/* Disable ppgtt on SNB if VT-d is on. */
133 	if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) {
134 		DRM_INFO("Disabling PPGTT because VT-d is on\n");
135 		return 0;
136 	}
137 #endif
138 
139 	/* Early VLV doesn't have this */
140 	if (IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) &&
141 	    dev->pdev->revision < 0xb) {
142 		DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
143 		return 0;
144 	}
145 
146 	if (INTEL_INFO(dev)->gen >= 8 && i915.enable_execlists)
147 		return 2;
148 	else
149 		return has_aliasing_ppgtt ? 1 : 0;
150 }
151 
152 static int ppgtt_bind_vma(struct i915_vma *vma,
153 			  enum i915_cache_level cache_level,
154 			  u32 unused)
155 {
156 	u32 pte_flags = 0;
157 
158 	/* Currently applicable only to VLV */
159 	if (vma->obj->gt_ro)
160 		pte_flags |= PTE_READ_ONLY;
161 
162 	vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
163 				cache_level, pte_flags);
164 
165 	return 0;
166 }
167 
168 static void ppgtt_unbind_vma(struct i915_vma *vma)
169 {
170 	vma->vm->clear_range(vma->vm,
171 			     vma->node.start,
172 			     vma->obj->base.size,
173 			     true);
174 }
175 
176 static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
177 				  enum i915_cache_level level,
178 				  bool valid)
179 {
180 	gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
181 	pte |= addr;
182 
183 	switch (level) {
184 	case I915_CACHE_NONE:
185 		pte |= PPAT_UNCACHED_INDEX;
186 		break;
187 	case I915_CACHE_WT:
188 		pte |= PPAT_DISPLAY_ELLC_INDEX;
189 		break;
190 	default:
191 		pte |= PPAT_CACHED_INDEX;
192 		break;
193 	}
194 
195 	return pte;
196 }
197 
198 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
199 				  const enum i915_cache_level level)
200 {
201 	gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
202 	pde |= addr;
203 	if (level != I915_CACHE_NONE)
204 		pde |= PPAT_CACHED_PDE_INDEX;
205 	else
206 		pde |= PPAT_UNCACHED_INDEX;
207 	return pde;
208 }
209 
210 static gen6_pte_t snb_pte_encode(dma_addr_t addr,
211 				 enum i915_cache_level level,
212 				 bool valid, u32 unused)
213 {
214 	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
215 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
216 
217 	switch (level) {
218 	case I915_CACHE_L3_LLC:
219 	case I915_CACHE_LLC:
220 		pte |= GEN6_PTE_CACHE_LLC;
221 		break;
222 	case I915_CACHE_NONE:
223 		pte |= GEN6_PTE_UNCACHED;
224 		break;
225 	default:
226 		MISSING_CASE(level);
227 	}
228 
229 	return pte;
230 }
231 
232 static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
233 				 enum i915_cache_level level,
234 				 bool valid, u32 unused)
235 {
236 	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
237 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
238 
239 	switch (level) {
240 	case I915_CACHE_L3_LLC:
241 		pte |= GEN7_PTE_CACHE_L3_LLC;
242 		break;
243 	case I915_CACHE_LLC:
244 		pte |= GEN6_PTE_CACHE_LLC;
245 		break;
246 	case I915_CACHE_NONE:
247 		pte |= GEN6_PTE_UNCACHED;
248 		break;
249 	default:
250 		MISSING_CASE(level);
251 	}
252 
253 	return pte;
254 }
255 
256 static gen6_pte_t byt_pte_encode(dma_addr_t addr,
257 				 enum i915_cache_level level,
258 				 bool valid, u32 flags)
259 {
260 	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
261 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
262 
263 	if (!(flags & PTE_READ_ONLY))
264 		pte |= BYT_PTE_WRITEABLE;
265 
266 	if (level != I915_CACHE_NONE)
267 		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
268 
269 	return pte;
270 }
271 
272 static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
273 				 enum i915_cache_level level,
274 				 bool valid, u32 unused)
275 {
276 	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
277 	pte |= HSW_PTE_ADDR_ENCODE(addr);
278 
279 	if (level != I915_CACHE_NONE)
280 		pte |= HSW_WB_LLC_AGE3;
281 
282 	return pte;
283 }
284 
285 static gen6_pte_t iris_pte_encode(dma_addr_t addr,
286 				  enum i915_cache_level level,
287 				  bool valid, u32 unused)
288 {
289 	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
290 	pte |= HSW_PTE_ADDR_ENCODE(addr);
291 
292 	switch (level) {
293 	case I915_CACHE_NONE:
294 		break;
295 	case I915_CACHE_WT:
296 		pte |= HSW_WT_ELLC_LLC_AGE3;
297 		break;
298 	default:
299 		pte |= HSW_WB_ELLC_LLC_AGE3;
300 		break;
301 	}
302 
303 	return pte;
304 }
305 
306 static int __setup_page_dma(struct drm_device *dev,
307 			    struct i915_page_dma *p, gfp_t flags)
308 {
309 	struct device *device = dev->pdev->dev;
310 
311 	p->page = alloc_page(flags);
312 	if (!p->page)
313 		return -ENOMEM;
314 
315 	p->daddr = dma_map_page(device,
316 				p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
317 
318 	if (dma_mapping_error(device, p->daddr)) {
319 		__free_page(p->page);
320 		return -EINVAL;
321 	}
322 
323 	return 0;
324 }
325 
326 static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
327 {
328 	return __setup_page_dma(dev, p, GFP_KERNEL);
329 }
330 
331 static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
332 {
333 	if (WARN_ON(!p->page))
334 		return;
335 
336 	dma_unmap_page(dev->pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL);
337 	__free_page(p->page);
338 	memset(p, 0, sizeof(*p));
339 }
340 
341 static void *kmap_page_dma(struct i915_page_dma *p)
342 {
343 	return kmap_atomic(p->page);
344 }
345 
346 /* We use the flushing unmap only with ppgtt structures:
347  * page directories, page tables and scratch pages.
348  */
349 static void kunmap_page_dma(struct drm_device *dev, void *vaddr)
350 {
351 	/* There are only few exceptions for gen >=6. chv and bxt.
352 	 * And we are not sure about the latter so play safe for now.
353 	 */
354 	if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
355 		drm_clflush_virt_range(vaddr, PAGE_SIZE);
356 
357 	kunmap_atomic(vaddr);
358 }
359 
360 #define kmap_px(px) kmap_page_dma(px_base(px))
361 #define kunmap_px(ppgtt, vaddr) kunmap_page_dma((ppgtt)->base.dev, (vaddr))
362 
363 #define setup_px(dev, px) setup_page_dma((dev), px_base(px))
364 #define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px))
365 #define fill_px(dev, px, v) fill_page_dma((dev), px_base(px), (v))
366 #define fill32_px(dev, px, v) fill_page_dma_32((dev), px_base(px), (v))
367 
368 static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p,
369 			  const uint64_t val)
370 {
371 	int i;
372 	uint64_t * const vaddr = kmap_page_dma(p);
373 
374 	for (i = 0; i < 512; i++)
375 		vaddr[i] = val;
376 
377 	kunmap_page_dma(dev, vaddr);
378 }
379 
380 static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p,
381 			     const uint32_t val32)
382 {
383 	uint64_t v = val32;
384 
385 	v = v << 32 | val32;
386 
387 	fill_page_dma(dev, p, v);
388 }
389 
390 static struct i915_page_scratch *alloc_scratch_page(struct drm_device *dev)
391 {
392 	struct i915_page_scratch *sp;
393 	int ret;
394 
395 	sp = kzalloc(sizeof(*sp), GFP_KERNEL);
396 	if (sp == NULL)
397 		return ERR_PTR(-ENOMEM);
398 
399 	ret = __setup_page_dma(dev, px_base(sp), GFP_DMA32 | __GFP_ZERO);
400 	if (ret) {
401 		kfree(sp);
402 		return ERR_PTR(ret);
403 	}
404 
405 	set_pages_uc(px_page(sp), 1);
406 
407 	return sp;
408 }
409 
410 static void free_scratch_page(struct drm_device *dev,
411 			      struct i915_page_scratch *sp)
412 {
413 	set_pages_wb(px_page(sp), 1);
414 
415 	cleanup_px(dev, sp);
416 	kfree(sp);
417 }
418 
419 static struct i915_page_table *alloc_pt(struct drm_device *dev)
420 {
421 	struct i915_page_table *pt;
422 	const size_t count = INTEL_INFO(dev)->gen >= 8 ?
423 		GEN8_PTES : GEN6_PTES;
424 	int ret = -ENOMEM;
425 
426 	pt = kzalloc(sizeof(*pt), GFP_KERNEL);
427 	if (!pt)
428 		return ERR_PTR(-ENOMEM);
429 
430 	pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
431 				GFP_KERNEL);
432 
433 	if (!pt->used_ptes)
434 		goto fail_bitmap;
435 
436 	ret = setup_px(dev, pt);
437 	if (ret)
438 		goto fail_page_m;
439 
440 	return pt;
441 
442 fail_page_m:
443 	kfree(pt->used_ptes);
444 fail_bitmap:
445 	kfree(pt);
446 
447 	return ERR_PTR(ret);
448 }
449 
450 static void free_pt(struct drm_device *dev, struct i915_page_table *pt)
451 {
452 	cleanup_px(dev, pt);
453 	kfree(pt->used_ptes);
454 	kfree(pt);
455 }
456 
457 /**
458  * alloc_pt_range() - Allocate a multiple page tables
459  * @pd:		The page directory which will have at least @count entries
460  *		available to point to the allocated page tables.
461  * @pde:	First page directory entry for which we are allocating.
462  * @count:	Number of pages to allocate.
463  * @dev:	DRM device.
464  *
465  * Allocates multiple page table pages and sets the appropriate entries in the
466  * page table structure within the page directory. Function cleans up after
467  * itself on any failures.
468  *
469  * Return: 0 if allocation succeeded.
470  */
471 static int alloc_pt_range(struct i915_page_directory *pd, uint16_t pde, size_t count,
472 			  struct drm_device *dev)
473 {
474 	int i, ret;
475 
476 	/* 512 is the max page tables per page_directory on any platform. */
477 	if (WARN_ON(pde + count > I915_PDES))
478 		return -EINVAL;
479 
480 	for (i = pde; i < pde + count; i++) {
481 		struct i915_page_table *pt = alloc_pt(dev);
482 
483 		if (IS_ERR(pt)) {
484 			ret = PTR_ERR(pt);
485 			goto err_out;
486 		}
487 		WARN(pd->page_table[i],
488 		     "Leaking page directory entry %d (%p)\n",
489 		     i, pd->page_table[i]);
490 		pd->page_table[i] = pt;
491 	}
492 
493 	return 0;
494 
495 err_out:
496 	while (i-- > pde)
497 		free_pt(dev, pd->page_table[i]);
498 	return ret;
499 }
500 
501 static void gen8_initialize_pt(struct i915_address_space *vm,
502 			       struct i915_page_table *pt)
503 {
504 	gen8_pte_t scratch_pte;
505 
506 	scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
507 				      I915_CACHE_LLC, true);
508 
509 	fill_px(vm->dev, pt, scratch_pte);
510 }
511 
512 static void gen6_initialize_pt(struct i915_address_space *vm,
513 			       struct i915_page_table *pt)
514 {
515 	gen6_pte_t scratch_pte;
516 
517 	WARN_ON(px_dma(vm->scratch_page) == 0);
518 
519 	scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
520 				     I915_CACHE_LLC, true, 0);
521 
522 	fill32_px(vm->dev, pt, scratch_pte);
523 }
524 
525 static struct i915_page_directory *alloc_pd(struct drm_device *dev)
526 {
527 	struct i915_page_directory *pd;
528 	int ret = -ENOMEM;
529 
530 	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
531 	if (!pd)
532 		return ERR_PTR(-ENOMEM);
533 
534 	pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES),
535 				sizeof(*pd->used_pdes), GFP_KERNEL);
536 	if (!pd->used_pdes)
537 		goto fail_bitmap;
538 
539 	ret = setup_px(dev, pd);
540 	if (ret)
541 		goto fail_page_m;
542 
543 	return pd;
544 
545 fail_page_m:
546 	kfree(pd->used_pdes);
547 fail_bitmap:
548 	kfree(pd);
549 
550 	return ERR_PTR(ret);
551 }
552 
553 static void free_pd(struct drm_device *dev, struct i915_page_directory *pd)
554 {
555 	if (px_page(pd)) {
556 		cleanup_px(dev, pd);
557 		kfree(pd->used_pdes);
558 		kfree(pd);
559 	}
560 }
561 
562 static void gen8_initialize_pd(struct i915_address_space *vm,
563 			       struct i915_page_directory *pd)
564 {
565 	gen8_pde_t scratch_pde;
566 
567 	scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
568 
569 	fill_px(vm->dev, pd, scratch_pde);
570 }
571 
572 /* Broadwell Page Directory Pointer Descriptors */
573 static int gen8_write_pdp(struct drm_i915_gem_request *req,
574 			  unsigned entry,
575 			  dma_addr_t addr)
576 {
577 	struct intel_engine_cs *ring = req->ring;
578 	int ret;
579 
580 	BUG_ON(entry >= 4);
581 
582 	ret = intel_ring_begin(req, 6);
583 	if (ret)
584 		return ret;
585 
586 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
587 	intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry));
588 	intel_ring_emit(ring, upper_32_bits(addr));
589 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
590 	intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry));
591 	intel_ring_emit(ring, lower_32_bits(addr));
592 	intel_ring_advance(ring);
593 
594 	return 0;
595 }
596 
597 static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
598 			  struct drm_i915_gem_request *req)
599 {
600 	int i, ret;
601 
602 	for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
603 		const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
604 
605 		ret = gen8_write_pdp(req, i, pd_daddr);
606 		if (ret)
607 			return ret;
608 	}
609 
610 	return 0;
611 }
612 
613 static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
614 				   uint64_t start,
615 				   uint64_t length,
616 				   bool use_scratch)
617 {
618 	struct i915_hw_ppgtt *ppgtt =
619 		container_of(vm, struct i915_hw_ppgtt, base);
620 	gen8_pte_t *pt_vaddr, scratch_pte;
621 	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
622 	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
623 	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
624 	unsigned num_entries = length >> PAGE_SHIFT;
625 	unsigned last_pte, i;
626 
627 	scratch_pte = gen8_pte_encode(px_dma(ppgtt->base.scratch_page),
628 				      I915_CACHE_LLC, use_scratch);
629 
630 	while (num_entries) {
631 		struct i915_page_directory *pd;
632 		struct i915_page_table *pt;
633 
634 		if (WARN_ON(!ppgtt->pdp.page_directory[pdpe]))
635 			continue;
636 
637 		pd = ppgtt->pdp.page_directory[pdpe];
638 
639 		if (WARN_ON(!pd->page_table[pde]))
640 			continue;
641 
642 		pt = pd->page_table[pde];
643 
644 		if (WARN_ON(!px_page(pt)))
645 			continue;
646 
647 		last_pte = pte + num_entries;
648 		if (last_pte > GEN8_PTES)
649 			last_pte = GEN8_PTES;
650 
651 		pt_vaddr = kmap_px(pt);
652 
653 		for (i = pte; i < last_pte; i++) {
654 			pt_vaddr[i] = scratch_pte;
655 			num_entries--;
656 		}
657 
658 		kunmap_px(ppgtt, pt);
659 
660 		pte = 0;
661 		if (++pde == I915_PDES) {
662 			pdpe++;
663 			pde = 0;
664 		}
665 	}
666 }
667 
668 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
669 				      struct sg_table *pages,
670 				      uint64_t start,
671 				      enum i915_cache_level cache_level, u32 unused)
672 {
673 	struct i915_hw_ppgtt *ppgtt =
674 		container_of(vm, struct i915_hw_ppgtt, base);
675 	gen8_pte_t *pt_vaddr;
676 	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
677 	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
678 	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
679 	struct sg_page_iter sg_iter;
680 
681 	pt_vaddr = NULL;
682 
683 	for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
684 		if (WARN_ON(pdpe >= GEN8_LEGACY_PDPES))
685 			break;
686 
687 		if (pt_vaddr == NULL) {
688 			struct i915_page_directory *pd = ppgtt->pdp.page_directory[pdpe];
689 			struct i915_page_table *pt = pd->page_table[pde];
690 
691 			pt_vaddr = kmap_px(pt);
692 		}
693 
694 		pt_vaddr[pte] =
695 			gen8_pte_encode(sg_page_iter_dma_address(&sg_iter),
696 					cache_level, true);
697 		if (++pte == GEN8_PTES) {
698 			kunmap_px(ppgtt, pt_vaddr);
699 			pt_vaddr = NULL;
700 			if (++pde == I915_PDES) {
701 				pdpe++;
702 				pde = 0;
703 			}
704 			pte = 0;
705 		}
706 	}
707 
708 	if (pt_vaddr)
709 		kunmap_px(ppgtt, pt_vaddr);
710 }
711 
712 static void gen8_free_page_tables(struct drm_device *dev,
713 				  struct i915_page_directory *pd)
714 {
715 	int i;
716 
717 	if (!px_page(pd))
718 		return;
719 
720 	for_each_set_bit(i, pd->used_pdes, I915_PDES) {
721 		if (WARN_ON(!pd->page_table[i]))
722 			continue;
723 
724 		free_pt(dev, pd->page_table[i]);
725 		pd->page_table[i] = NULL;
726 	}
727 }
728 
729 static int gen8_init_scratch(struct i915_address_space *vm)
730 {
731 	struct drm_device *dev = vm->dev;
732 
733 	vm->scratch_page = alloc_scratch_page(dev);
734 	if (IS_ERR(vm->scratch_page))
735 		return PTR_ERR(vm->scratch_page);
736 
737 	vm->scratch_pt = alloc_pt(dev);
738 	if (IS_ERR(vm->scratch_pt)) {
739 		free_scratch_page(dev, vm->scratch_page);
740 		return PTR_ERR(vm->scratch_pt);
741 	}
742 
743 	vm->scratch_pd = alloc_pd(dev);
744 	if (IS_ERR(vm->scratch_pd)) {
745 		free_pt(dev, vm->scratch_pt);
746 		free_scratch_page(dev, vm->scratch_page);
747 		return PTR_ERR(vm->scratch_pd);
748 	}
749 
750 	gen8_initialize_pt(vm, vm->scratch_pt);
751 	gen8_initialize_pd(vm, vm->scratch_pd);
752 
753 	return 0;
754 }
755 
756 static void gen8_free_scratch(struct i915_address_space *vm)
757 {
758 	struct drm_device *dev = vm->dev;
759 
760 	free_pd(dev, vm->scratch_pd);
761 	free_pt(dev, vm->scratch_pt);
762 	free_scratch_page(dev, vm->scratch_page);
763 }
764 
765 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
766 {
767 	struct i915_hw_ppgtt *ppgtt =
768 		container_of(vm, struct i915_hw_ppgtt, base);
769 	int i;
770 
771 	for_each_set_bit(i, ppgtt->pdp.used_pdpes, GEN8_LEGACY_PDPES) {
772 		if (WARN_ON(!ppgtt->pdp.page_directory[i]))
773 			continue;
774 
775 		gen8_free_page_tables(ppgtt->base.dev,
776 				      ppgtt->pdp.page_directory[i]);
777 		free_pd(ppgtt->base.dev, ppgtt->pdp.page_directory[i]);
778 	}
779 
780 	gen8_free_scratch(vm);
781 }
782 
783 /**
784  * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range.
785  * @ppgtt:	Master ppgtt structure.
786  * @pd:		Page directory for this address range.
787  * @start:	Starting virtual address to begin allocations.
788  * @length	Size of the allocations.
789  * @new_pts:	Bitmap set by function with new allocations. Likely used by the
790  *		caller to free on error.
791  *
792  * Allocate the required number of page tables. Extremely similar to
793  * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
794  * the page directory boundary (instead of the page directory pointer). That
795  * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is
796  * possible, and likely that the caller will need to use multiple calls of this
797  * function to achieve the appropriate allocation.
798  *
799  * Return: 0 if success; negative error code otherwise.
800  */
801 static int gen8_ppgtt_alloc_pagetabs(struct i915_hw_ppgtt *ppgtt,
802 				     struct i915_page_directory *pd,
803 				     uint64_t start,
804 				     uint64_t length,
805 				     unsigned long *new_pts)
806 {
807 	struct drm_device *dev = ppgtt->base.dev;
808 	struct i915_page_table *pt;
809 	uint64_t temp;
810 	uint32_t pde;
811 
812 	gen8_for_each_pde(pt, pd, start, length, temp, pde) {
813 		/* Don't reallocate page tables */
814 		if (pt) {
815 			/* Scratch is never allocated this way */
816 			WARN_ON(pt == ppgtt->base.scratch_pt);
817 			continue;
818 		}
819 
820 		pt = alloc_pt(dev);
821 		if (IS_ERR(pt))
822 			goto unwind_out;
823 
824 		gen8_initialize_pt(&ppgtt->base, pt);
825 		pd->page_table[pde] = pt;
826 		__set_bit(pde, new_pts);
827 	}
828 
829 	return 0;
830 
831 unwind_out:
832 	for_each_set_bit(pde, new_pts, I915_PDES)
833 		free_pt(dev, pd->page_table[pde]);
834 
835 	return -ENOMEM;
836 }
837 
838 /**
839  * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range.
840  * @ppgtt:	Master ppgtt structure.
841  * @pdp:	Page directory pointer for this address range.
842  * @start:	Starting virtual address to begin allocations.
843  * @length	Size of the allocations.
844  * @new_pds	Bitmap set by function with new allocations. Likely used by the
845  *		caller to free on error.
846  *
847  * Allocate the required number of page directories starting at the pde index of
848  * @start, and ending at the pde index @start + @length. This function will skip
849  * over already allocated page directories within the range, and only allocate
850  * new ones, setting the appropriate pointer within the pdp as well as the
851  * correct position in the bitmap @new_pds.
852  *
853  * The function will only allocate the pages within the range for a give page
854  * directory pointer. In other words, if @start + @length straddles a virtually
855  * addressed PDP boundary (512GB for 4k pages), there will be more allocations
856  * required by the caller, This is not currently possible, and the BUG in the
857  * code will prevent it.
858  *
859  * Return: 0 if success; negative error code otherwise.
860  */
861 static int gen8_ppgtt_alloc_page_directories(struct i915_hw_ppgtt *ppgtt,
862 				     struct i915_page_directory_pointer *pdp,
863 				     uint64_t start,
864 				     uint64_t length,
865 				     unsigned long *new_pds)
866 {
867 	struct drm_device *dev = ppgtt->base.dev;
868 	struct i915_page_directory *pd;
869 	uint64_t temp;
870 	uint32_t pdpe;
871 
872 	WARN_ON(!bitmap_empty(new_pds, GEN8_LEGACY_PDPES));
873 
874 	gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
875 		if (pd)
876 			continue;
877 
878 		pd = alloc_pd(dev);
879 		if (IS_ERR(pd))
880 			goto unwind_out;
881 
882 		gen8_initialize_pd(&ppgtt->base, pd);
883 		pdp->page_directory[pdpe] = pd;
884 		__set_bit(pdpe, new_pds);
885 	}
886 
887 	return 0;
888 
889 unwind_out:
890 	for_each_set_bit(pdpe, new_pds, GEN8_LEGACY_PDPES)
891 		free_pd(dev, pdp->page_directory[pdpe]);
892 
893 	return -ENOMEM;
894 }
895 
896 static void
897 free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts)
898 {
899 	int i;
900 
901 	for (i = 0; i < GEN8_LEGACY_PDPES; i++)
902 		kfree(new_pts[i]);
903 	kfree(new_pts);
904 	kfree(new_pds);
905 }
906 
907 /* Fills in the page directory bitmap, and the array of page tables bitmap. Both
908  * of these are based on the number of PDPEs in the system.
909  */
910 static
911 int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
912 					 unsigned long ***new_pts)
913 {
914 	int i;
915 	unsigned long *pds;
916 	unsigned long **pts;
917 
918 	pds = kcalloc(BITS_TO_LONGS(GEN8_LEGACY_PDPES), sizeof(unsigned long), GFP_KERNEL);
919 	if (!pds)
920 		return -ENOMEM;
921 
922 	pts = kcalloc(GEN8_LEGACY_PDPES, sizeof(unsigned long *), GFP_KERNEL);
923 	if (!pts) {
924 		kfree(pds);
925 		return -ENOMEM;
926 	}
927 
928 	for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
929 		pts[i] = kcalloc(BITS_TO_LONGS(I915_PDES),
930 				 sizeof(unsigned long), GFP_KERNEL);
931 		if (!pts[i])
932 			goto err_out;
933 	}
934 
935 	*new_pds = pds;
936 	*new_pts = pts;
937 
938 	return 0;
939 
940 err_out:
941 	free_gen8_temp_bitmaps(pds, pts);
942 	return -ENOMEM;
943 }
944 
945 /* PDE TLBs are a pain to invalidate on GEN8+. When we modify
946  * the page table structures, we mark them dirty so that
947  * context switching/execlist queuing code takes extra steps
948  * to ensure that tlbs are flushed.
949  */
950 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
951 {
952 	ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask;
953 }
954 
955 static int gen8_alloc_va_range(struct i915_address_space *vm,
956 			       uint64_t start,
957 			       uint64_t length)
958 {
959 	struct i915_hw_ppgtt *ppgtt =
960 		container_of(vm, struct i915_hw_ppgtt, base);
961 	unsigned long *new_page_dirs, **new_page_tables;
962 	struct i915_page_directory *pd;
963 	const uint64_t orig_start = start;
964 	const uint64_t orig_length = length;
965 	uint64_t temp;
966 	uint32_t pdpe;
967 	int ret;
968 
969 	/* Wrap is never okay since we can only represent 48b, and we don't
970 	 * actually use the other side of the canonical address space.
971 	 */
972 	if (WARN_ON(start + length < start))
973 		return -ENODEV;
974 
975 	if (WARN_ON(start + length > ppgtt->base.total))
976 		return -ENODEV;
977 
978 	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables);
979 	if (ret)
980 		return ret;
981 
982 	/* Do the allocations first so we can easily bail out */
983 	ret = gen8_ppgtt_alloc_page_directories(ppgtt, &ppgtt->pdp, start, length,
984 					new_page_dirs);
985 	if (ret) {
986 		free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
987 		return ret;
988 	}
989 
990 	/* For every page directory referenced, allocate page tables */
991 	gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) {
992 		ret = gen8_ppgtt_alloc_pagetabs(ppgtt, pd, start, length,
993 						new_page_tables[pdpe]);
994 		if (ret)
995 			goto err_out;
996 	}
997 
998 	start = orig_start;
999 	length = orig_length;
1000 
1001 	/* Allocations have completed successfully, so set the bitmaps, and do
1002 	 * the mappings. */
1003 	gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) {
1004 		gen8_pde_t *const page_directory = kmap_px(pd);
1005 		struct i915_page_table *pt;
1006 		uint64_t pd_len = gen8_clamp_pd(start, length);
1007 		uint64_t pd_start = start;
1008 		uint32_t pde;
1009 
1010 		/* Every pd should be allocated, we just did that above. */
1011 		WARN_ON(!pd);
1012 
1013 		gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) {
1014 			/* Same reasoning as pd */
1015 			WARN_ON(!pt);
1016 			WARN_ON(!pd_len);
1017 			WARN_ON(!gen8_pte_count(pd_start, pd_len));
1018 
1019 			/* Set our used ptes within the page table */
1020 			bitmap_set(pt->used_ptes,
1021 				   gen8_pte_index(pd_start),
1022 				   gen8_pte_count(pd_start, pd_len));
1023 
1024 			/* Our pde is now pointing to the pagetable, pt */
1025 			__set_bit(pde, pd->used_pdes);
1026 
1027 			/* Map the PDE to the page table */
1028 			page_directory[pde] = gen8_pde_encode(px_dma(pt),
1029 							      I915_CACHE_LLC);
1030 
1031 			/* NB: We haven't yet mapped ptes to pages. At this
1032 			 * point we're still relying on insert_entries() */
1033 		}
1034 
1035 		kunmap_px(ppgtt, page_directory);
1036 
1037 		__set_bit(pdpe, ppgtt->pdp.used_pdpes);
1038 	}
1039 
1040 	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1041 	mark_tlbs_dirty(ppgtt);
1042 	return 0;
1043 
1044 err_out:
1045 	while (pdpe--) {
1046 		for_each_set_bit(temp, new_page_tables[pdpe], I915_PDES)
1047 			free_pt(vm->dev, ppgtt->pdp.page_directory[pdpe]->page_table[temp]);
1048 	}
1049 
1050 	for_each_set_bit(pdpe, new_page_dirs, GEN8_LEGACY_PDPES)
1051 		free_pd(vm->dev, ppgtt->pdp.page_directory[pdpe]);
1052 
1053 	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1054 	mark_tlbs_dirty(ppgtt);
1055 	return ret;
1056 }
1057 
1058 /*
1059  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
1060  * with a net effect resembling a 2-level page table in normal x86 terms. Each
1061  * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
1062  * space.
1063  *
1064  */
1065 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
1066 {
1067 	int ret;
1068 
1069 	ret = gen8_init_scratch(&ppgtt->base);
1070 	if (ret)
1071 		return ret;
1072 
1073 	ppgtt->base.start = 0;
1074 #ifndef __DragonFly__
1075 	ppgtt->base.total = 1ULL << 32;
1076 	if (IS_ENABLED(CONFIG_X86_32))
1077 		/* While we have a proliferation of size_t variables
1078 		 * we cannot represent the full ppgtt size on 32bit,
1079 		 * so limit it to the same size as the GGTT (currently
1080 		 * 2GiB).
1081 		 */
1082 #endif
1083 		ppgtt->base.total = to_i915(ppgtt->base.dev)->gtt.base.total;
1084 	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
1085 	ppgtt->base.allocate_va_range = gen8_alloc_va_range;
1086 	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
1087 	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
1088 	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
1089 	ppgtt->base.bind_vma = ppgtt_bind_vma;
1090 
1091 	ppgtt->switch_mm = gen8_mm_switch;
1092 
1093 	return 0;
1094 }
1095 
1096 static int gen8_aliasing_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
1097 {
1098 	struct drm_device *dev = ppgtt->base.dev;
1099 	struct drm_i915_private *dev_priv = dev->dev_private;
1100 	uint64_t start = 0, size = dev_priv->gtt.base.total;
1101 	int ret;
1102 
1103 	ret = gen8_ppgtt_init(ppgtt);
1104 	if (ret)
1105 		return ret;
1106 
1107 	/* Aliasing PPGTT has to always work and be mapped because of the way we
1108 	 * use RESTORE_INHIBIT in the context switch. This will be fixed
1109 	 * eventually. */
1110 	ret = gen8_alloc_va_range(&ppgtt->base, start, size);
1111 	if (ret) {
1112 		free_pd(ppgtt->base.dev, ppgtt->base.scratch_pd);
1113 		free_pt(ppgtt->base.dev, ppgtt->base.scratch_pt);
1114 		return ret;
1115 	}
1116 
1117 	ppgtt->base.allocate_va_range = NULL;
1118 	ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
1119 
1120 	return 0;
1121 }
1122 
1123 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1124 {
1125 	struct i915_address_space *vm = &ppgtt->base;
1126 	struct i915_page_table *unused;
1127 	gen6_pte_t scratch_pte;
1128 	uint32_t pd_entry;
1129 	uint32_t  pte, pde, temp;
1130 	uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
1131 
1132 	scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
1133 				     I915_CACHE_LLC, true, 0);
1134 
1135 	gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde) {
1136 		u32 expected;
1137 		gen6_pte_t *pt_vaddr;
1138 		const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]);
1139 		pd_entry = readl(ppgtt->pd_addr + pde);
1140 		expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
1141 
1142 		if (pd_entry != expected)
1143 			seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
1144 				   pde,
1145 				   pd_entry,
1146 				   expected);
1147 		seq_printf(m, "\tPDE: %x\n", pd_entry);
1148 
1149 		pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]);
1150 
1151 		for (pte = 0; pte < GEN6_PTES; pte+=4) {
1152 			unsigned long va =
1153 				(pde * PAGE_SIZE * GEN6_PTES) +
1154 				(pte * PAGE_SIZE);
1155 			int i;
1156 			bool found = false;
1157 			for (i = 0; i < 4; i++)
1158 				if (pt_vaddr[pte + i] != scratch_pte)
1159 					found = true;
1160 			if (!found)
1161 				continue;
1162 
1163 			seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
1164 			for (i = 0; i < 4; i++) {
1165 				if (pt_vaddr[pte + i] != scratch_pte)
1166 					seq_printf(m, " %08x", pt_vaddr[pte + i]);
1167 				else
1168 					seq_puts(m, "  SCRATCH ");
1169 			}
1170 			seq_puts(m, "\n");
1171 		}
1172 		kunmap_px(ppgtt, pt_vaddr);
1173 	}
1174 }
1175 
1176 /* Write pde (index) from the page directory @pd to the page table @pt */
1177 static void gen6_write_pde(struct i915_page_directory *pd,
1178 			    const int pde, struct i915_page_table *pt)
1179 {
1180 	/* Caller needs to make sure the write completes if necessary */
1181 	struct i915_hw_ppgtt *ppgtt =
1182 		container_of(pd, struct i915_hw_ppgtt, pd);
1183 	u32 pd_entry;
1184 
1185 	pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt));
1186 	pd_entry |= GEN6_PDE_VALID;
1187 
1188 	writel(pd_entry, ppgtt->pd_addr + pde);
1189 }
1190 
1191 /* Write all the page tables found in the ppgtt structure to incrementing page
1192  * directories. */
1193 static void gen6_write_page_range(struct drm_i915_private *dev_priv,
1194 				  struct i915_page_directory *pd,
1195 				  uint32_t start, uint32_t length)
1196 {
1197 	struct i915_page_table *pt;
1198 	uint32_t pde, temp;
1199 
1200 	gen6_for_each_pde(pt, pd, start, length, temp, pde)
1201 		gen6_write_pde(pd, pde, pt);
1202 
1203 	/* Make sure write is complete before other code can use this page
1204 	 * table. Also require for WC mapped PTEs */
1205 	readl(dev_priv->gtt.gsm);
1206 }
1207 
1208 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
1209 {
1210 	BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
1211 
1212 	return (ppgtt->pd.base.ggtt_offset / 64) << 16;
1213 }
1214 
1215 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
1216 			 struct drm_i915_gem_request *req)
1217 {
1218 	struct intel_engine_cs *ring = req->ring;
1219 	int ret;
1220 
1221 	/* NB: TLBs must be flushed and invalidated before a switch */
1222 	ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1223 	if (ret)
1224 		return ret;
1225 
1226 	ret = intel_ring_begin(req, 6);
1227 	if (ret)
1228 		return ret;
1229 
1230 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
1231 	intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
1232 	intel_ring_emit(ring, PP_DIR_DCLV_2G);
1233 	intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
1234 	intel_ring_emit(ring, get_pd_offset(ppgtt));
1235 	intel_ring_emit(ring, MI_NOOP);
1236 	intel_ring_advance(ring);
1237 
1238 	return 0;
1239 }
1240 
1241 static int vgpu_mm_switch(struct i915_hw_ppgtt *ppgtt,
1242 			  struct drm_i915_gem_request *req)
1243 {
1244 	struct intel_engine_cs *ring = req->ring;
1245 	struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev);
1246 
1247 	I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
1248 	I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
1249 	return 0;
1250 }
1251 
1252 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
1253 			  struct drm_i915_gem_request *req)
1254 {
1255 	struct intel_engine_cs *ring = req->ring;
1256 	int ret;
1257 
1258 	/* NB: TLBs must be flushed and invalidated before a switch */
1259 	ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1260 	if (ret)
1261 		return ret;
1262 
1263 	ret = intel_ring_begin(req, 6);
1264 	if (ret)
1265 		return ret;
1266 
1267 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
1268 	intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
1269 	intel_ring_emit(ring, PP_DIR_DCLV_2G);
1270 	intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
1271 	intel_ring_emit(ring, get_pd_offset(ppgtt));
1272 	intel_ring_emit(ring, MI_NOOP);
1273 	intel_ring_advance(ring);
1274 
1275 	/* XXX: RCS is the only one to auto invalidate the TLBs? */
1276 	if (ring->id != RCS) {
1277 		ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1278 		if (ret)
1279 			return ret;
1280 	}
1281 
1282 	return 0;
1283 }
1284 
1285 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
1286 			  struct drm_i915_gem_request *req)
1287 {
1288 	struct intel_engine_cs *ring = req->ring;
1289 	struct drm_device *dev = ppgtt->base.dev;
1290 	struct drm_i915_private *dev_priv = dev->dev_private;
1291 
1292 
1293 	I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
1294 	I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
1295 
1296 	POSTING_READ(RING_PP_DIR_DCLV(ring));
1297 
1298 	return 0;
1299 }
1300 
1301 static void gen8_ppgtt_enable(struct drm_device *dev)
1302 {
1303 	struct drm_i915_private *dev_priv = dev->dev_private;
1304 	struct intel_engine_cs *ring;
1305 	int j;
1306 
1307 	for_each_ring(ring, dev_priv, j) {
1308 		I915_WRITE(RING_MODE_GEN7(ring),
1309 			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1310 	}
1311 }
1312 
1313 static void gen7_ppgtt_enable(struct drm_device *dev)
1314 {
1315 	struct drm_i915_private *dev_priv = dev->dev_private;
1316 	struct intel_engine_cs *ring;
1317 	uint32_t ecochk, ecobits;
1318 	int i;
1319 
1320 	ecobits = I915_READ(GAC_ECO_BITS);
1321 	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
1322 
1323 	ecochk = I915_READ(GAM_ECOCHK);
1324 	if (IS_HASWELL(dev)) {
1325 		ecochk |= ECOCHK_PPGTT_WB_HSW;
1326 	} else {
1327 		ecochk |= ECOCHK_PPGTT_LLC_IVB;
1328 		ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
1329 	}
1330 	I915_WRITE(GAM_ECOCHK, ecochk);
1331 
1332 	for_each_ring(ring, dev_priv, i) {
1333 		/* GFX_MODE is per-ring on gen7+ */
1334 		I915_WRITE(RING_MODE_GEN7(ring),
1335 			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1336 	}
1337 }
1338 
1339 static void gen6_ppgtt_enable(struct drm_device *dev)
1340 {
1341 	struct drm_i915_private *dev_priv = dev->dev_private;
1342 	uint32_t ecochk, gab_ctl, ecobits;
1343 
1344 	ecobits = I915_READ(GAC_ECO_BITS);
1345 	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
1346 		   ECOBITS_PPGTT_CACHE64B);
1347 
1348 	gab_ctl = I915_READ(GAB_CTL);
1349 	I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
1350 
1351 	ecochk = I915_READ(GAM_ECOCHK);
1352 	I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
1353 
1354 	I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1355 }
1356 
1357 /* PPGTT support for Sandybdrige/Gen6 and later */
1358 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
1359 				   uint64_t start,
1360 				   uint64_t length,
1361 				   bool use_scratch)
1362 {
1363 	struct i915_hw_ppgtt *ppgtt =
1364 		container_of(vm, struct i915_hw_ppgtt, base);
1365 	gen6_pte_t *pt_vaddr, scratch_pte;
1366 	unsigned first_entry = start >> PAGE_SHIFT;
1367 	unsigned num_entries = length >> PAGE_SHIFT;
1368 	unsigned act_pt = first_entry / GEN6_PTES;
1369 	unsigned first_pte = first_entry % GEN6_PTES;
1370 	unsigned last_pte, i;
1371 
1372 	scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
1373 				     I915_CACHE_LLC, true, 0);
1374 
1375 	while (num_entries) {
1376 		last_pte = first_pte + num_entries;
1377 		if (last_pte > GEN6_PTES)
1378 			last_pte = GEN6_PTES;
1379 
1380 		pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
1381 
1382 		for (i = first_pte; i < last_pte; i++)
1383 			pt_vaddr[i] = scratch_pte;
1384 
1385 		kunmap_px(ppgtt, pt_vaddr);
1386 
1387 		num_entries -= last_pte - first_pte;
1388 		first_pte = 0;
1389 		act_pt++;
1390 	}
1391 }
1392 
1393 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
1394 				      struct sg_table *pages,
1395 				      uint64_t start,
1396 				      enum i915_cache_level cache_level, u32 flags)
1397 {
1398 	struct i915_hw_ppgtt *ppgtt =
1399 		container_of(vm, struct i915_hw_ppgtt, base);
1400 	gen6_pte_t *pt_vaddr;
1401 	unsigned first_entry = start >> PAGE_SHIFT;
1402 	unsigned act_pt = first_entry / GEN6_PTES;
1403 	unsigned act_pte = first_entry % GEN6_PTES;
1404 	struct sg_page_iter sg_iter;
1405 
1406 	pt_vaddr = NULL;
1407 	for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
1408 		if (pt_vaddr == NULL)
1409 			pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
1410 
1411 		pt_vaddr[act_pte] =
1412 			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
1413 				       cache_level, true, flags);
1414 
1415 		if (++act_pte == GEN6_PTES) {
1416 			kunmap_px(ppgtt, pt_vaddr);
1417 			pt_vaddr = NULL;
1418 			act_pt++;
1419 			act_pte = 0;
1420 		}
1421 	}
1422 	if (pt_vaddr)
1423 		kunmap_px(ppgtt, pt_vaddr);
1424 }
1425 
1426 static int gen6_alloc_va_range(struct i915_address_space *vm,
1427 			       uint64_t start_in, uint64_t length_in)
1428 {
1429 	DECLARE_BITMAP(new_page_tables, I915_PDES);
1430 	struct drm_device *dev = vm->dev;
1431 	struct drm_i915_private *dev_priv = dev->dev_private;
1432 	struct i915_hw_ppgtt *ppgtt =
1433 				container_of(vm, struct i915_hw_ppgtt, base);
1434 	struct i915_page_table *pt;
1435 	uint32_t start, length, start_save, length_save;
1436 	uint32_t pde, temp;
1437 	int ret;
1438 
1439 	if (WARN_ON(start_in + length_in > ppgtt->base.total))
1440 		return -ENODEV;
1441 
1442 	start = start_save = start_in;
1443 	length = length_save = length_in;
1444 
1445 	bitmap_zero(new_page_tables, I915_PDES);
1446 
1447 	/* The allocation is done in two stages so that we can bail out with
1448 	 * minimal amount of pain. The first stage finds new page tables that
1449 	 * need allocation. The second stage marks use ptes within the page
1450 	 * tables.
1451 	 */
1452 	gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
1453 		if (pt != vm->scratch_pt) {
1454 //			WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES));
1455 			continue;
1456 		}
1457 
1458 		/* We've already allocated a page table */
1459 		WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
1460 
1461 		pt = alloc_pt(dev);
1462 		if (IS_ERR(pt)) {
1463 			ret = PTR_ERR(pt);
1464 			goto unwind_out;
1465 		}
1466 
1467 		gen6_initialize_pt(vm, pt);
1468 
1469 		ppgtt->pd.page_table[pde] = pt;
1470 		__set_bit(pde, new_page_tables);
1471 		trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT);
1472 	}
1473 
1474 	start = start_save;
1475 	length = length_save;
1476 
1477 	gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
1478 		DECLARE_BITMAP(tmp_bitmap, GEN6_PTES);
1479 
1480 		bitmap_zero(tmp_bitmap, GEN6_PTES);
1481 		bitmap_set(tmp_bitmap, gen6_pte_index(start),
1482 			   gen6_pte_count(start, length));
1483 
1484 		if (__test_and_clear_bit(pde, new_page_tables))
1485 			gen6_write_pde(&ppgtt->pd, pde, pt);
1486 
1487 		trace_i915_page_table_entry_map(vm, pde, pt,
1488 					 gen6_pte_index(start),
1489 					 gen6_pte_count(start, length),
1490 					 GEN6_PTES);
1491 		bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes,
1492 				GEN6_PTES);
1493 	}
1494 
1495 	WARN_ON(!bitmap_empty(new_page_tables, I915_PDES));
1496 
1497 	/* Make sure write is complete before other code can use this page
1498 	 * table. Also require for WC mapped PTEs */
1499 	readl(dev_priv->gtt.gsm);
1500 
1501 	mark_tlbs_dirty(ppgtt);
1502 	return 0;
1503 
1504 unwind_out:
1505 	for_each_set_bit(pde, new_page_tables, I915_PDES) {
1506 		struct i915_page_table *pt = ppgtt->pd.page_table[pde];
1507 
1508 		ppgtt->pd.page_table[pde] = vm->scratch_pt;
1509 		free_pt(vm->dev, pt);
1510 	}
1511 
1512 	mark_tlbs_dirty(ppgtt);
1513 	return ret;
1514 }
1515 
1516 static int gen6_init_scratch(struct i915_address_space *vm)
1517 {
1518 	struct drm_device *dev = vm->dev;
1519 
1520 	vm->scratch_page = alloc_scratch_page(dev);
1521 	if (IS_ERR(vm->scratch_page))
1522 		return PTR_ERR(vm->scratch_page);
1523 
1524 	vm->scratch_pt = alloc_pt(dev);
1525 	if (IS_ERR(vm->scratch_pt)) {
1526 		free_scratch_page(dev, vm->scratch_page);
1527 		return PTR_ERR(vm->scratch_pt);
1528 	}
1529 
1530 	gen6_initialize_pt(vm, vm->scratch_pt);
1531 
1532 	return 0;
1533 }
1534 
1535 static void gen6_free_scratch(struct i915_address_space *vm)
1536 {
1537 	struct drm_device *dev = vm->dev;
1538 
1539 	free_pt(dev, vm->scratch_pt);
1540 	free_scratch_page(dev, vm->scratch_page);
1541 }
1542 
1543 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
1544 {
1545 	struct i915_hw_ppgtt *ppgtt =
1546 		container_of(vm, struct i915_hw_ppgtt, base);
1547 	struct i915_page_table *pt;
1548 	uint32_t pde;
1549 
1550 	drm_mm_remove_node(&ppgtt->node);
1551 
1552 	gen6_for_all_pdes(pt, ppgtt, pde) {
1553 		if (pt != vm->scratch_pt)
1554 			free_pt(ppgtt->base.dev, pt);
1555 	}
1556 
1557 	gen6_free_scratch(vm);
1558 }
1559 
1560 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
1561 {
1562 	struct i915_address_space *vm = &ppgtt->base;
1563 	struct drm_device *dev = ppgtt->base.dev;
1564 	struct drm_i915_private *dev_priv = dev->dev_private;
1565 	bool retried = false;
1566 	int ret;
1567 
1568 	/* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
1569 	 * allocator works in address space sizes, so it's multiplied by page
1570 	 * size. We allocate at the top of the GTT to avoid fragmentation.
1571 	 */
1572 	BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm));
1573 
1574 	ret = gen6_init_scratch(vm);
1575 	if (ret)
1576 		return ret;
1577 
1578 alloc:
1579 	ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
1580 						  &ppgtt->node, GEN6_PD_SIZE,
1581 						  GEN6_PD_ALIGN, 0,
1582 						  0, dev_priv->gtt.base.total,
1583 						  DRM_MM_TOPDOWN);
1584 	if (ret == -ENOSPC && !retried) {
1585 		ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
1586 					       GEN6_PD_SIZE, GEN6_PD_ALIGN,
1587 					       I915_CACHE_NONE,
1588 					       0, dev_priv->gtt.base.total,
1589 					       0);
1590 		if (ret)
1591 			goto err_out;
1592 
1593 		retried = true;
1594 		goto alloc;
1595 	}
1596 
1597 	if (ret)
1598 		goto err_out;
1599 
1600 
1601 	if (ppgtt->node.start < dev_priv->gtt.mappable_end)
1602 		DRM_DEBUG("Forced to use aperture for PDEs\n");
1603 
1604 	return 0;
1605 
1606 err_out:
1607 	gen6_free_scratch(vm);
1608 	return ret;
1609 }
1610 
1611 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
1612 {
1613 	return gen6_ppgtt_allocate_page_directories(ppgtt);
1614 }
1615 
1616 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
1617 				  uint64_t start, uint64_t length)
1618 {
1619 	struct i915_page_table *unused;
1620 	uint32_t pde, temp;
1621 
1622 	gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde)
1623 		ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt;
1624 }
1625 
1626 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt, bool aliasing)
1627 {
1628 	struct drm_device *dev = ppgtt->base.dev;
1629 	struct drm_i915_private *dev_priv = dev->dev_private;
1630 	int ret;
1631 
1632 	ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
1633 	if (IS_GEN6(dev)) {
1634 		ppgtt->switch_mm = gen6_mm_switch;
1635 	} else if (IS_HASWELL(dev)) {
1636 		ppgtt->switch_mm = hsw_mm_switch;
1637 	} else if (IS_GEN7(dev)) {
1638 		ppgtt->switch_mm = gen7_mm_switch;
1639 	} else
1640 		BUG();
1641 
1642 	if (intel_vgpu_active(dev))
1643 		ppgtt->switch_mm = vgpu_mm_switch;
1644 
1645 	ret = gen6_ppgtt_alloc(ppgtt);
1646 	if (ret)
1647 		return ret;
1648 
1649 	if (aliasing) {
1650 		/* preallocate all pts */
1651 		ret = alloc_pt_range(&ppgtt->pd, 0, I915_PDES,
1652 				ppgtt->base.dev);
1653 
1654 		if (ret) {
1655 			gen6_ppgtt_cleanup(&ppgtt->base);
1656 			return ret;
1657 		}
1658 	}
1659 
1660 	ppgtt->base.allocate_va_range = gen6_alloc_va_range;
1661 	ppgtt->base.clear_range = gen6_ppgtt_clear_range;
1662 	ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
1663 	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
1664 	ppgtt->base.bind_vma = ppgtt_bind_vma;
1665 	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
1666 	ppgtt->base.start = 0;
1667 	ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
1668 	ppgtt->debug_dump = gen6_dump_ppgtt;
1669 
1670 	ppgtt->pd.base.ggtt_offset =
1671 		ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
1672 
1673 	ppgtt->pd_addr = (gen6_pte_t __iomem *)dev_priv->gtt.gsm +
1674 		ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t);
1675 
1676 	if (aliasing)
1677 		ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
1678 	else
1679 		gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
1680 
1681 	gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);
1682 
1683 	DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n",
1684 			 ppgtt->node.size >> 20,
1685 			 ppgtt->node.start / PAGE_SIZE);
1686 
1687 	DRM_DEBUG("Adding PPGTT at offset %x\n",
1688 		  ppgtt->pd.base.ggtt_offset << 10);
1689 
1690 	return 0;
1691 }
1692 
1693 static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt,
1694 		bool aliasing)
1695 {
1696 	ppgtt->base.dev = dev;
1697 
1698 	if (INTEL_INFO(dev)->gen < 8)
1699 		return gen6_ppgtt_init(ppgtt, aliasing);
1700 	else if (aliasing)
1701 		return gen8_aliasing_ppgtt_init(ppgtt);
1702 	else
1703 		return gen8_ppgtt_init(ppgtt);
1704 }
1705 
1706 int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
1707 {
1708 	struct drm_i915_private *dev_priv = dev->dev_private;
1709 	int ret = 0;
1710 
1711 	ret = __hw_ppgtt_init(dev, ppgtt, false);
1712 	if (ret == 0) {
1713 		kref_init(&ppgtt->ref);
1714 		drm_mm_init(&ppgtt->base.mm, ppgtt->base.start,
1715 			    ppgtt->base.total);
1716 		i915_init_vm(dev_priv, &ppgtt->base);
1717 	}
1718 
1719 	return ret;
1720 }
1721 
1722 int i915_ppgtt_init_hw(struct drm_device *dev)
1723 {
1724 	/* In the case of execlists, PPGTT is enabled by the context descriptor
1725 	 * and the PDPs are contained within the context itself.  We don't
1726 	 * need to do anything here. */
1727 	if (i915.enable_execlists)
1728 		return 0;
1729 
1730 	if (!USES_PPGTT(dev))
1731 		return 0;
1732 
1733 	if (IS_GEN6(dev))
1734 		gen6_ppgtt_enable(dev);
1735 	else if (IS_GEN7(dev))
1736 		gen7_ppgtt_enable(dev);
1737 	else if (INTEL_INFO(dev)->gen >= 8)
1738 		gen8_ppgtt_enable(dev);
1739 	else
1740 		MISSING_CASE(INTEL_INFO(dev)->gen);
1741 
1742 	return 0;
1743 }
1744 
1745 int i915_ppgtt_init_ring(struct drm_i915_gem_request *req)
1746 {
1747 	struct drm_i915_private *dev_priv = req->ring->dev->dev_private;
1748 	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
1749 
1750 	if (i915.enable_execlists)
1751 		return 0;
1752 
1753 	if (!ppgtt)
1754 		return 0;
1755 
1756 	return ppgtt->switch_mm(ppgtt, req);
1757 }
1758 
1759 struct i915_hw_ppgtt *
1760 i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
1761 {
1762 	struct i915_hw_ppgtt *ppgtt;
1763 	int ret;
1764 
1765 	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
1766 	if (!ppgtt)
1767 		return ERR_PTR(-ENOMEM);
1768 
1769 	ret = i915_ppgtt_init(dev, ppgtt);
1770 	if (ret) {
1771 		kfree(ppgtt);
1772 		return ERR_PTR(ret);
1773 	}
1774 
1775 	ppgtt->file_priv = fpriv;
1776 
1777 	trace_i915_ppgtt_create(&ppgtt->base);
1778 
1779 	return ppgtt;
1780 }
1781 
1782 void  i915_ppgtt_release(struct kref *kref)
1783 {
1784 	struct i915_hw_ppgtt *ppgtt =
1785 		container_of(kref, struct i915_hw_ppgtt, ref);
1786 
1787 	trace_i915_ppgtt_release(&ppgtt->base);
1788 
1789 	/* vmas should already be unbound */
1790 	WARN_ON(!list_empty(&ppgtt->base.active_list));
1791 	WARN_ON(!list_empty(&ppgtt->base.inactive_list));
1792 
1793 	list_del(&ppgtt->base.global_link);
1794 	drm_mm_takedown(&ppgtt->base.mm);
1795 
1796 	ppgtt->base.cleanup(&ppgtt->base);
1797 	kfree(ppgtt);
1798 }
1799 
1800 extern int intel_iommu_gfx_mapped;
1801 /* Certain Gen5 chipsets require require idling the GPU before
1802  * unmapping anything from the GTT when VT-d is enabled.
1803  */
1804 static bool needs_idle_maps(struct drm_device *dev)
1805 {
1806 #ifdef CONFIG_INTEL_IOMMU
1807 	/* Query intel_iommu to see if we need the workaround. Presumably that
1808 	 * was loaded first.
1809 	 */
1810 	if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped)
1811 		return true;
1812 #endif
1813 	return false;
1814 }
1815 
1816 static bool do_idling(struct drm_i915_private *dev_priv)
1817 {
1818 	bool ret = dev_priv->mm.interruptible;
1819 
1820 	if (unlikely(dev_priv->gtt.do_idle_maps)) {
1821 		dev_priv->mm.interruptible = false;
1822 		if (i915_gpu_idle(dev_priv->dev)) {
1823 			DRM_ERROR("Couldn't idle GPU\n");
1824 			/* Wait a bit, in hopes it avoids the hang */
1825 			udelay(10);
1826 		}
1827 	}
1828 
1829 	return ret;
1830 }
1831 
1832 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
1833 {
1834 	if (unlikely(dev_priv->gtt.do_idle_maps))
1835 		dev_priv->mm.interruptible = interruptible;
1836 }
1837 
1838 void i915_check_and_clear_faults(struct drm_device *dev)
1839 {
1840 	struct drm_i915_private *dev_priv = dev->dev_private;
1841 	struct intel_engine_cs *ring;
1842 	int i;
1843 
1844 	if (INTEL_INFO(dev)->gen < 6)
1845 		return;
1846 
1847 	for_each_ring(ring, dev_priv, i) {
1848 		u32 fault_reg;
1849 		fault_reg = I915_READ(RING_FAULT_REG(ring));
1850 		if (fault_reg & RING_FAULT_VALID) {
1851 #if 0
1852 			DRM_DEBUG_DRIVER("Unexpected fault\n"
1853 					 "\tAddr: 0x%08lx\n"
1854 					 "\tAddress space: %s\n"
1855 					 "\tSource ID: %d\n"
1856 					 "\tType: %d\n",
1857 					 fault_reg & PAGE_MASK,
1858 					 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
1859 					 RING_FAULT_SRCID(fault_reg),
1860 					 RING_FAULT_FAULT_TYPE(fault_reg));
1861 #endif
1862 			I915_WRITE(RING_FAULT_REG(ring),
1863 				   fault_reg & ~RING_FAULT_VALID);
1864 		}
1865 	}
1866 	POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS]));
1867 }
1868 
1869 static void i915_ggtt_flush(struct drm_i915_private *dev_priv)
1870 {
1871 	if (INTEL_INFO(dev_priv->dev)->gen < 6) {
1872 		intel_gtt_chipset_flush();
1873 	} else {
1874 		I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
1875 		POSTING_READ(GFX_FLSH_CNTL_GEN6);
1876 	}
1877 }
1878 
1879 void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
1880 {
1881 	struct drm_i915_private *dev_priv = dev->dev_private;
1882 
1883 	/* Don't bother messing with faults pre GEN6 as we have little
1884 	 * documentation supporting that it's a good idea.
1885 	 */
1886 	if (INTEL_INFO(dev)->gen < 6)
1887 		return;
1888 
1889 	i915_check_and_clear_faults(dev);
1890 
1891 	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
1892 				       dev_priv->gtt.base.start,
1893 				       dev_priv->gtt.base.total,
1894 				       true);
1895 
1896 	i915_ggtt_flush(dev_priv);
1897 }
1898 
1899 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
1900 {
1901 	if (!dma_map_sg(obj->base.dev->pdev->dev,
1902 			obj->pages->sgl, obj->pages->nents,
1903 			PCI_DMA_BIDIRECTIONAL))
1904 		return -ENOSPC;
1905 
1906 	return 0;
1907 }
1908 
1909 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
1910 {
1911 #if 0
1912 	writeq(pte, addr);
1913 #else
1914 	iowrite32((u32)pte, addr);
1915 	iowrite32(pte >> 32, addr + 4);
1916 #endif
1917 }
1918 
1919 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
1920 				     struct sg_table *st,
1921 				     uint64_t start,
1922 				     enum i915_cache_level level, u32 unused)
1923 {
1924 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
1925 	unsigned first_entry = start >> PAGE_SHIFT;
1926 	gen8_pte_t __iomem *gtt_entries =
1927 		(gen8_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
1928 	int i = 0;
1929 	struct sg_page_iter sg_iter;
1930 	dma_addr_t addr = 0; /* shut up gcc */
1931 
1932 	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
1933 		addr = sg_dma_address(sg_iter.sg) +
1934 			(sg_iter.sg_pgoffset << PAGE_SHIFT);
1935 		gen8_set_pte(&gtt_entries[i],
1936 			     gen8_pte_encode(addr, level, true));
1937 		i++;
1938 	}
1939 
1940 	/*
1941 	 * XXX: This serves as a posting read to make sure that the PTE has
1942 	 * actually been updated. There is some concern that even though
1943 	 * registers and PTEs are within the same BAR that they are potentially
1944 	 * of NUMA access patterns. Therefore, even with the way we assume
1945 	 * hardware should work, we must keep this posting read for paranoia.
1946 	 */
1947 	if (i != 0)
1948 		WARN_ON(readq(&gtt_entries[i-1])
1949 			!= gen8_pte_encode(addr, level, true));
1950 
1951 	/* This next bit makes the above posting read even more important. We
1952 	 * want to flush the TLBs only after we're certain all the PTE updates
1953 	 * have finished.
1954 	 */
1955 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
1956 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
1957 }
1958 
1959 /*
1960  * Binds an object into the global gtt with the specified cache level. The object
1961  * will be accessible to the GPU via commands whose operands reference offsets
1962  * within the global GTT as well as accessible by the GPU through the GMADR
1963  * mapped BAR (dev_priv->mm.gtt->gtt).
1964  */
1965 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
1966 				     struct sg_table *st,
1967 				     uint64_t start,
1968 				     enum i915_cache_level level, u32 flags)
1969 {
1970 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
1971 	unsigned first_entry = start >> PAGE_SHIFT;
1972 	gen6_pte_t __iomem *gtt_entries =
1973 		(gen6_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
1974 	int i = 0;
1975 	struct sg_page_iter sg_iter;
1976 	dma_addr_t addr = 0;
1977 
1978 	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
1979 		addr = sg_page_iter_dma_address(&sg_iter);
1980 		iowrite32(vm->pte_encode(addr, level, true, flags), &gtt_entries[i]);
1981 		i++;
1982 	}
1983 
1984 	/* XXX: This serves as a posting read to make sure that the PTE has
1985 	 * actually been updated. There is some concern that even though
1986 	 * registers and PTEs are within the same BAR that they are potentially
1987 	 * of NUMA access patterns. Therefore, even with the way we assume
1988 	 * hardware should work, we must keep this posting read for paranoia.
1989 	 */
1990 	if (i != 0) {
1991 		unsigned long gtt = readl(&gtt_entries[i-1]);
1992 		WARN_ON(gtt != vm->pte_encode(addr, level, true, flags));
1993 	}
1994 
1995 	/* This next bit makes the above posting read even more important. We
1996 	 * want to flush the TLBs only after we're certain all the PTE updates
1997 	 * have finished.
1998 	 */
1999 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2000 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
2001 }
2002 
2003 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
2004 				  uint64_t start,
2005 				  uint64_t length,
2006 				  bool use_scratch)
2007 {
2008 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
2009 	unsigned first_entry = start >> PAGE_SHIFT;
2010 	unsigned num_entries = length >> PAGE_SHIFT;
2011 	gen8_pte_t scratch_pte, __iomem *gtt_base =
2012 		(gen8_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
2013 	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
2014 	int i;
2015 
2016 	if (WARN(num_entries > max_entries,
2017 		 "First entry = %d; Num entries = %d (max=%d)\n",
2018 		 first_entry, num_entries, max_entries))
2019 		num_entries = max_entries;
2020 
2021 	scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
2022 				      I915_CACHE_LLC,
2023 				      use_scratch);
2024 	for (i = 0; i < num_entries; i++)
2025 		gen8_set_pte(&gtt_base[i], scratch_pte);
2026 	readl(gtt_base);
2027 }
2028 
2029 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
2030 				  uint64_t start,
2031 				  uint64_t length,
2032 				  bool use_scratch)
2033 {
2034 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
2035 	unsigned first_entry = start >> PAGE_SHIFT;
2036 	unsigned num_entries = length >> PAGE_SHIFT;
2037 	gen6_pte_t scratch_pte, __iomem *gtt_base =
2038 		(gen6_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
2039 	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
2040 	int i;
2041 
2042 	if (WARN(num_entries > max_entries,
2043 		 "First entry = %d; Num entries = %d (max=%d)\n",
2044 		 first_entry, num_entries, max_entries))
2045 		num_entries = max_entries;
2046 
2047 	scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
2048 				     I915_CACHE_LLC, use_scratch, 0);
2049 
2050 	for (i = 0; i < num_entries; i++)
2051 		iowrite32(scratch_pte, &gtt_base[i]);
2052 	readl(gtt_base);
2053 }
2054 
2055 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
2056 				     struct sg_table *pages,
2057 				     uint64_t start,
2058 				     enum i915_cache_level cache_level, u32 unused)
2059 {
2060 	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2061 		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2062 
2063 	intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags);
2064 }
2065 
2066 static void i915_ggtt_clear_range(struct i915_address_space *vm,
2067 				  uint64_t start,
2068 				  uint64_t length,
2069 				  bool unused)
2070 {
2071 	unsigned first_entry = start >> PAGE_SHIFT;
2072 	unsigned num_entries = length >> PAGE_SHIFT;
2073 	intel_gtt_clear_range(first_entry, num_entries);
2074 }
2075 
2076 static int ggtt_bind_vma(struct i915_vma *vma,
2077 			 enum i915_cache_level cache_level,
2078 			 u32 flags)
2079 {
2080 	struct drm_device *dev = vma->vm->dev;
2081 	struct drm_i915_private *dev_priv = dev->dev_private;
2082 	struct drm_i915_gem_object *obj = vma->obj;
2083 	struct sg_table *pages = obj->pages;
2084 	u32 pte_flags = 0;
2085 	int ret;
2086 
2087 	ret = i915_get_ggtt_vma_pages(vma);
2088 	if (ret)
2089 		return ret;
2090 	pages = vma->ggtt_view.pages;
2091 
2092 	/* Currently applicable only to VLV */
2093 	if (obj->gt_ro)
2094 		pte_flags |= PTE_READ_ONLY;
2095 
2096 
2097 	if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) {
2098 		vma->vm->insert_entries(vma->vm, pages,
2099 					vma->node.start,
2100 					cache_level, pte_flags);
2101 	}
2102 
2103 	if (dev_priv->mm.aliasing_ppgtt && flags & LOCAL_BIND) {
2104 		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
2105 		appgtt->base.insert_entries(&appgtt->base, pages,
2106 					    vma->node.start,
2107 					    cache_level, pte_flags);
2108 	}
2109 
2110 	return 0;
2111 }
2112 
2113 static void ggtt_unbind_vma(struct i915_vma *vma)
2114 {
2115 	struct drm_device *dev = vma->vm->dev;
2116 	struct drm_i915_private *dev_priv = dev->dev_private;
2117 	struct drm_i915_gem_object *obj = vma->obj;
2118 	const uint64_t size = min_t(uint64_t,
2119 				    obj->base.size,
2120 				    vma->node.size);
2121 
2122 	if (vma->bound & GLOBAL_BIND) {
2123 		vma->vm->clear_range(vma->vm,
2124 				     vma->node.start,
2125 				     size,
2126 				     true);
2127 	}
2128 
2129 	if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) {
2130 		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
2131 
2132 		appgtt->base.clear_range(&appgtt->base,
2133 					 vma->node.start,
2134 					 size,
2135 					 true);
2136 	}
2137 }
2138 
2139 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
2140 {
2141 	struct drm_device *dev = obj->base.dev;
2142 	struct drm_i915_private *dev_priv = dev->dev_private;
2143 	bool interruptible;
2144 
2145 	interruptible = do_idling(dev_priv);
2146 
2147 	dma_unmap_sg(dev->pdev->dev, obj->pages->sgl, obj->pages->nents,
2148 		     PCI_DMA_BIDIRECTIONAL);
2149 
2150 	undo_idling(dev_priv, interruptible);
2151 }
2152 
2153 static void i915_gtt_color_adjust(struct drm_mm_node *node,
2154 				  unsigned long color,
2155 				  u64 *start,
2156 				  u64 *end)
2157 {
2158 	if (node->color != color)
2159 		*start += 4096;
2160 
2161 	if (!list_empty(&node->node_list)) {
2162 		node = list_entry(node->node_list.next,
2163 				  struct drm_mm_node,
2164 				  node_list);
2165 		if (node->allocated && node->color != color)
2166 			*end -= 4096;
2167 	}
2168 }
2169 
2170 static int i915_gem_setup_global_gtt(struct drm_device *dev,
2171 				     unsigned long start,
2172 				     unsigned long mappable_end,
2173 				     unsigned long end)
2174 {
2175 	/* Let GEM Manage all of the aperture.
2176 	 *
2177 	 * However, leave one page at the end still bound to the scratch page.
2178 	 * There are a number of places where the hardware apparently prefetches
2179 	 * past the end of the object, and we've seen multiple hangs with the
2180 	 * GPU head pointer stuck in a batchbuffer bound at the last page of the
2181 	 * aperture.  One page should be enough to keep any prefetching inside
2182 	 * of the aperture.
2183 	 */
2184 	struct drm_i915_private *dev_priv = dev->dev_private;
2185 	struct i915_address_space *ggtt_vm = &dev_priv->gtt.base;
2186 	unsigned long mappable;
2187 	int error;
2188 	struct drm_mm_node *entry;
2189 	struct drm_i915_gem_object *obj;
2190 	unsigned long hole_start, hole_end;
2191 	int ret;
2192 
2193 	mappable = min(end, mappable_end) - start;
2194 	BUG_ON(mappable_end > end);
2195 
2196 	/* Subtract the guard page ... */
2197 	drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE);
2198 
2199 	dev_priv->gtt.base.start = start;
2200 	dev_priv->gtt.base.total = end - start;
2201 
2202 	if (intel_vgpu_active(dev)) {
2203 		ret = intel_vgt_balloon(dev);
2204 		if (ret)
2205 			return ret;
2206 	}
2207 
2208 	if (!HAS_LLC(dev))
2209 		dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust;
2210 
2211 	/* Mark any preallocated objects as occupied */
2212 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
2213 		struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
2214 
2215 		DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n",
2216 			      i915_gem_obj_ggtt_offset(obj), obj->base.size);
2217 
2218 		WARN_ON(i915_gem_obj_ggtt_bound(obj));
2219 		ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node);
2220 		if (ret) {
2221 			DRM_DEBUG_KMS("Reservation failed: %i\n", ret);
2222 			return ret;
2223 		}
2224 		vma->bound |= GLOBAL_BIND;
2225 	}
2226 
2227 	/* Clear any non-preallocated blocks */
2228 	drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) {
2229 		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
2230 			      hole_start, hole_end);
2231 		ggtt_vm->clear_range(ggtt_vm, hole_start,
2232 				     hole_end - hole_start, true);
2233 	}
2234 
2235 #ifdef __DragonFly__
2236 	device_printf(dev->dev,
2237 	    "taking over the fictitious range 0x%lx-0x%lx\n",
2238 	    dev_priv->gtt.mappable_base + start, dev_priv->gtt.mappable_base + start + mappable);
2239 	error = -vm_phys_fictitious_reg_range(dev_priv->gtt.mappable_base + start,
2240 	    dev_priv->gtt.mappable_base + start + mappable, VM_MEMATTR_WRITE_COMBINING);
2241 #endif
2242 
2243 	/* And finally clear the reserved guard page */
2244 	ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true);
2245 
2246 	if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) {
2247 		struct i915_hw_ppgtt *ppgtt;
2248 
2249 		ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2250 		if (!ppgtt)
2251 			return -ENOMEM;
2252 
2253 		ret = __hw_ppgtt_init(dev, ppgtt, true);
2254 		if (ret) {
2255 			ppgtt->base.cleanup(&ppgtt->base);
2256 			kfree(ppgtt);
2257 			return ret;
2258 		}
2259 
2260 		if (ppgtt->base.allocate_va_range)
2261 			ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0,
2262 							    ppgtt->base.total);
2263 		if (ret) {
2264 			ppgtt->base.cleanup(&ppgtt->base);
2265 			kfree(ppgtt);
2266 			return ret;
2267 		}
2268 
2269 		ppgtt->base.clear_range(&ppgtt->base,
2270 					ppgtt->base.start,
2271 					ppgtt->base.total,
2272 					true);
2273 
2274 		dev_priv->mm.aliasing_ppgtt = ppgtt;
2275 	}
2276 
2277 	return 0;
2278 }
2279 
2280 void i915_gem_init_global_gtt(struct drm_device *dev)
2281 {
2282 	struct drm_i915_private *dev_priv = dev->dev_private;
2283 	u64 gtt_size, mappable_size;
2284 
2285 	gtt_size = dev_priv->gtt.base.total;
2286 	mappable_size = dev_priv->gtt.mappable_end;
2287 
2288 	i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
2289 }
2290 
2291 void i915_global_gtt_cleanup(struct drm_device *dev)
2292 {
2293 	struct drm_i915_private *dev_priv = dev->dev_private;
2294 	struct i915_address_space *vm = &dev_priv->gtt.base;
2295 
2296 	if (dev_priv->mm.aliasing_ppgtt) {
2297 		struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
2298 
2299 		ppgtt->base.cleanup(&ppgtt->base);
2300 	}
2301 
2302 	if (drm_mm_initialized(&vm->mm)) {
2303 		if (intel_vgpu_active(dev))
2304 			intel_vgt_deballoon();
2305 
2306 		drm_mm_takedown(&vm->mm);
2307 		list_del(&vm->global_link);
2308 	}
2309 
2310 	vm->cleanup(vm);
2311 }
2312 
2313 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
2314 {
2315 	snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
2316 	snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
2317 	return snb_gmch_ctl << 20;
2318 }
2319 
2320 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
2321 {
2322 	bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
2323 	bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
2324 	if (bdw_gmch_ctl)
2325 		bdw_gmch_ctl = 1 << bdw_gmch_ctl;
2326 
2327 #ifdef CONFIG_X86_32
2328 	/* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
2329 	if (bdw_gmch_ctl > 4)
2330 		bdw_gmch_ctl = 4;
2331 #endif
2332 
2333 	return bdw_gmch_ctl << 20;
2334 }
2335 
2336 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
2337 {
2338 	gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
2339 	gmch_ctrl &= SNB_GMCH_GGMS_MASK;
2340 
2341 	if (gmch_ctrl)
2342 		return 1 << (20 + gmch_ctrl);
2343 
2344 	return 0;
2345 }
2346 
2347 static size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
2348 {
2349 	snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
2350 	snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
2351 	return snb_gmch_ctl << 25; /* 32 MB units */
2352 }
2353 
2354 static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
2355 {
2356 	bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2357 	bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
2358 	return bdw_gmch_ctl << 25; /* 32 MB units */
2359 }
2360 
2361 static size_t chv_get_stolen_size(u16 gmch_ctrl)
2362 {
2363 	gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
2364 	gmch_ctrl &= SNB_GMCH_GMS_MASK;
2365 
2366 	/*
2367 	 * 0x0  to 0x10: 32MB increments starting at 0MB
2368 	 * 0x11 to 0x16: 4MB increments starting at 8MB
2369 	 * 0x17 to 0x1d: 4MB increments start at 36MB
2370 	 */
2371 	if (gmch_ctrl < 0x11)
2372 		return gmch_ctrl << 25;
2373 	else if (gmch_ctrl < 0x17)
2374 		return (gmch_ctrl - 0x11 + 2) << 22;
2375 	else
2376 		return (gmch_ctrl - 0x17 + 9) << 22;
2377 }
2378 
2379 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
2380 {
2381 	gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2382 	gen9_gmch_ctl &= BDW_GMCH_GMS_MASK;
2383 
2384 	if (gen9_gmch_ctl < 0xf0)
2385 		return gen9_gmch_ctl << 25; /* 32 MB units */
2386 	else
2387 		/* 4MB increments starting at 0xf0 for 4MB */
2388 		return (gen9_gmch_ctl - 0xf0 + 1) << 22;
2389 }
2390 
2391 static int ggtt_probe_common(struct drm_device *dev,
2392 			     size_t gtt_size)
2393 {
2394 	struct drm_i915_private *dev_priv = dev->dev_private;
2395 	struct i915_page_scratch *scratch_page;
2396 	phys_addr_t gtt_phys_addr;
2397 
2398 	/* For Modern GENs the PTEs and register space are split in the BAR */
2399 	gtt_phys_addr = pci_resource_start(dev->pdev, 0) +
2400 		(pci_resource_len(dev->pdev, 0) / 2);
2401 
2402 	/*
2403 	 * On BXT writes larger than 64 bit to the GTT pagetable range will be
2404 	 * dropped. For WC mappings in general we have 64 byte burst writes
2405 	 * when the WC buffer is flushed, so we can't use it, but have to
2406 	 * resort to an uncached mapping. The WC issue is easily caught by the
2407 	 * readback check when writing GTT PTE entries.
2408 	 */
2409 	if (IS_BROXTON(dev))
2410 		dev_priv->gtt.gsm = ioremap_nocache(gtt_phys_addr, gtt_size);
2411 	else
2412 		dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size);
2413 	if (!dev_priv->gtt.gsm) {
2414 		DRM_ERROR("Failed to map the gtt page table\n");
2415 		return -ENOMEM;
2416 	}
2417 
2418 	scratch_page = alloc_scratch_page(dev);
2419 	if (IS_ERR(scratch_page)) {
2420 		DRM_ERROR("Scratch setup failed\n");
2421 		/* iounmap will also get called at remove, but meh */
2422 		iounmap(dev_priv->gtt.gsm);
2423 		return PTR_ERR(scratch_page);
2424 	}
2425 
2426 	dev_priv->gtt.base.scratch_page = scratch_page;
2427 
2428 	return 0;
2429 }
2430 
2431 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
2432  * bits. When using advanced contexts each context stores its own PAT, but
2433  * writing this data shouldn't be harmful even in those cases. */
2434 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
2435 {
2436 	uint64_t pat;
2437 
2438 	pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
2439 	      GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
2440 	      GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
2441 	      GEN8_PPAT(3, GEN8_PPAT_UC)                     | /* Uncached objects, mostly for scanout */
2442 	      GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
2443 	      GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
2444 	      GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
2445 	      GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
2446 
2447 	if (!USES_PPGTT(dev_priv->dev))
2448 		/* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
2449 		 * so RTL will always use the value corresponding to
2450 		 * pat_sel = 000".
2451 		 * So let's disable cache for GGTT to avoid screen corruptions.
2452 		 * MOCS still can be used though.
2453 		 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
2454 		 * before this patch, i.e. the same uncached + snooping access
2455 		 * like on gen6/7 seems to be in effect.
2456 		 * - So this just fixes blitter/render access. Again it looks
2457 		 * like it's not just uncached access, but uncached + snooping.
2458 		 * So we can still hold onto all our assumptions wrt cpu
2459 		 * clflushing on LLC machines.
2460 		 */
2461 		pat = GEN8_PPAT(0, GEN8_PPAT_UC);
2462 
2463 	/* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
2464 	 * write would work. */
2465 	I915_WRITE(GEN8_PRIVATE_PAT, pat);
2466 	I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
2467 }
2468 
2469 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
2470 {
2471 	uint64_t pat;
2472 
2473 	/*
2474 	 * Map WB on BDW to snooped on CHV.
2475 	 *
2476 	 * Only the snoop bit has meaning for CHV, the rest is
2477 	 * ignored.
2478 	 *
2479 	 * The hardware will never snoop for certain types of accesses:
2480 	 * - CPU GTT (GMADR->GGTT->no snoop->memory)
2481 	 * - PPGTT page tables
2482 	 * - some other special cycles
2483 	 *
2484 	 * As with BDW, we also need to consider the following for GT accesses:
2485 	 * "For GGTT, there is NO pat_sel[2:0] from the entry,
2486 	 * so RTL will always use the value corresponding to
2487 	 * pat_sel = 000".
2488 	 * Which means we must set the snoop bit in PAT entry 0
2489 	 * in order to keep the global status page working.
2490 	 */
2491 	pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
2492 	      GEN8_PPAT(1, 0) |
2493 	      GEN8_PPAT(2, 0) |
2494 	      GEN8_PPAT(3, 0) |
2495 	      GEN8_PPAT(4, CHV_PPAT_SNOOP) |
2496 	      GEN8_PPAT(5, CHV_PPAT_SNOOP) |
2497 	      GEN8_PPAT(6, CHV_PPAT_SNOOP) |
2498 	      GEN8_PPAT(7, CHV_PPAT_SNOOP);
2499 
2500 	I915_WRITE(GEN8_PRIVATE_PAT, pat);
2501 	I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
2502 }
2503 
2504 static int gen8_gmch_probe(struct drm_device *dev,
2505 			   u64 *gtt_total,
2506 			   size_t *stolen,
2507 			   phys_addr_t *mappable_base,
2508 			   u64 *mappable_end)
2509 {
2510 	struct drm_i915_private *dev_priv = dev->dev_private;
2511 	u64 gtt_size;
2512 	u16 snb_gmch_ctl;
2513 	int ret;
2514 
2515 	/* TODO: We're not aware of mappable constraints on gen8 yet */
2516 	*mappable_base = pci_resource_start(dev->pdev, 2);
2517 	*mappable_end = pci_resource_len(dev->pdev, 2);
2518 
2519 #if 0
2520 	if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39)))
2521 		pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39));
2522 #endif
2523 
2524 	pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
2525 
2526 	if (INTEL_INFO(dev)->gen >= 9) {
2527 		*stolen = gen9_get_stolen_size(snb_gmch_ctl);
2528 		gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
2529 	} else if (IS_CHERRYVIEW(dev)) {
2530 		*stolen = chv_get_stolen_size(snb_gmch_ctl);
2531 		gtt_size = chv_get_total_gtt_size(snb_gmch_ctl);
2532 	} else {
2533 		*stolen = gen8_get_stolen_size(snb_gmch_ctl);
2534 		gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
2535 	}
2536 
2537 	*gtt_total = (gtt_size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
2538 
2539 	if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
2540 		chv_setup_private_ppat(dev_priv);
2541 	else
2542 		bdw_setup_private_ppat(dev_priv);
2543 
2544 	ret = ggtt_probe_common(dev, gtt_size);
2545 
2546 	dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
2547 	dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
2548 	dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
2549 	dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
2550 
2551 	return ret;
2552 }
2553 
2554 static int gen6_gmch_probe(struct drm_device *dev,
2555 			   u64 *gtt_total,
2556 			   size_t *stolen,
2557 			   phys_addr_t *mappable_base,
2558 			   u64 *mappable_end)
2559 {
2560 	struct drm_i915_private *dev_priv = dev->dev_private;
2561 	unsigned int gtt_size;
2562 	u16 snb_gmch_ctl;
2563 	int ret;
2564 
2565 	*mappable_base = pci_resource_start(dev->pdev, 2);
2566 	*mappable_end = pci_resource_len(dev->pdev, 2);
2567 
2568 	/* 64/512MB is the current min/max we actually know of, but this is just
2569 	 * a coarse sanity check.
2570 	 */
2571 	if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) {
2572 		DRM_ERROR("Unknown GMADR size (%lx)\n",
2573 			  dev_priv->gtt.mappable_end);
2574 		return -ENXIO;
2575 	}
2576 
2577 #if 0
2578 	if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
2579 		pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
2580 #endif
2581 	pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
2582 
2583 	*stolen = gen6_get_stolen_size(snb_gmch_ctl);
2584 
2585 	gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl);
2586 	*gtt_total = (gtt_size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
2587 
2588 	ret = ggtt_probe_common(dev, gtt_size);
2589 
2590 	dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
2591 	dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
2592 	dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
2593 	dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
2594 
2595 	return ret;
2596 }
2597 
2598 static void gen6_gmch_remove(struct i915_address_space *vm)
2599 {
2600 	struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base);
2601 
2602 	iounmap(gtt->gsm);
2603 	free_scratch_page(vm->dev, vm->scratch_page);
2604 }
2605 
2606 static int i915_gmch_probe(struct drm_device *dev,
2607 			   u64 *gtt_total,
2608 			   size_t *stolen,
2609 			   phys_addr_t *mappable_base,
2610 			   u64 *mappable_end)
2611 {
2612 	struct drm_i915_private *dev_priv = dev->dev_private;
2613 #if 0
2614 	int ret;
2615 
2616 	ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL);
2617 	if (!ret) {
2618 		DRM_ERROR("failed to set up gmch\n");
2619 		return -EIO;
2620 	}
2621 #endif
2622 
2623 	intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
2624 
2625 	dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
2626 	dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries;
2627 	dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
2628 	dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
2629 	dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
2630 
2631 	if (unlikely(dev_priv->gtt.do_idle_maps))
2632 		DRM_INFO("applying Ironlake quirks for intel_iommu\n");
2633 
2634 	return 0;
2635 }
2636 
2637 static void i915_gmch_remove(struct i915_address_space *vm)
2638 {
2639 	intel_gmch_remove();
2640 }
2641 
2642 int i915_gem_gtt_init(struct drm_device *dev)
2643 {
2644 	struct drm_i915_private *dev_priv = dev->dev_private;
2645 	struct i915_gtt *gtt = &dev_priv->gtt;
2646 	int ret;
2647 
2648 	if (INTEL_INFO(dev)->gen <= 5) {
2649 		gtt->gtt_probe = i915_gmch_probe;
2650 		gtt->base.cleanup = i915_gmch_remove;
2651 	} else if (INTEL_INFO(dev)->gen < 8) {
2652 		gtt->gtt_probe = gen6_gmch_probe;
2653 		gtt->base.cleanup = gen6_gmch_remove;
2654 		if (IS_HASWELL(dev) && dev_priv->ellc_size)
2655 			gtt->base.pte_encode = iris_pte_encode;
2656 		else if (IS_HASWELL(dev))
2657 			gtt->base.pte_encode = hsw_pte_encode;
2658 		else if (IS_VALLEYVIEW(dev))
2659 			gtt->base.pte_encode = byt_pte_encode;
2660 		else if (INTEL_INFO(dev)->gen >= 7)
2661 			gtt->base.pte_encode = ivb_pte_encode;
2662 		else
2663 			gtt->base.pte_encode = snb_pte_encode;
2664 	} else {
2665 		dev_priv->gtt.gtt_probe = gen8_gmch_probe;
2666 		dev_priv->gtt.base.cleanup = gen6_gmch_remove;
2667 	}
2668 
2669 	gtt->base.dev = dev;
2670 
2671 	ret = gtt->gtt_probe(dev, &gtt->base.total, &gtt->stolen_size,
2672 			     &gtt->mappable_base, &gtt->mappable_end);
2673 	if (ret)
2674 		return ret;
2675 
2676 	/* GMADR is the PCI mmio aperture into the global GTT. */
2677 	DRM_INFO("Memory usable by graphics device = %luM\n",
2678 		 gtt->base.total >> 20);
2679 	DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20);
2680 	DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20);
2681 #ifdef CONFIG_INTEL_IOMMU
2682 	if (intel_iommu_gfx_mapped)
2683 		DRM_INFO("VT-d active for gfx access\n");
2684 #endif
2685 	/*
2686 	 * i915.enable_ppgtt is read-only, so do an early pass to validate the
2687 	 * user's requested state against the hardware/driver capabilities.  We
2688 	 * do this now so that we can print out any log messages once rather
2689 	 * than every time we check intel_enable_ppgtt().
2690 	 */
2691 	i915.enable_ppgtt = sanitize_enable_ppgtt(dev, i915.enable_ppgtt);
2692 	DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt);
2693 
2694 	return 0;
2695 }
2696 
2697 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
2698 {
2699 	struct drm_i915_private *dev_priv = dev->dev_private;
2700 	struct drm_i915_gem_object *obj;
2701 	struct i915_address_space *vm;
2702 
2703 	i915_check_and_clear_faults(dev);
2704 
2705 	/* First fill our portion of the GTT with scratch pages */
2706 	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
2707 				       dev_priv->gtt.base.start,
2708 				       dev_priv->gtt.base.total,
2709 				       true);
2710 
2711 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
2712 		struct i915_vma *vma = i915_gem_obj_to_vma(obj,
2713 							   &dev_priv->gtt.base);
2714 		if (!vma)
2715 			continue;
2716 
2717 		i915_gem_clflush_object(obj, obj->pin_display);
2718 		WARN_ON(i915_vma_bind(vma, obj->cache_level, PIN_UPDATE));
2719 	}
2720 
2721 
2722 	if (INTEL_INFO(dev)->gen >= 8) {
2723 		if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
2724 			chv_setup_private_ppat(dev_priv);
2725 		else
2726 			bdw_setup_private_ppat(dev_priv);
2727 
2728 		return;
2729 	}
2730 
2731 	if (USES_PPGTT(dev)) {
2732 		list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
2733 			/* TODO: Perhaps it shouldn't be gen6 specific */
2734 
2735 			struct i915_hw_ppgtt *ppgtt =
2736 					container_of(vm, struct i915_hw_ppgtt,
2737 						     base);
2738 
2739 			if (i915_is_ggtt(vm))
2740 				ppgtt = dev_priv->mm.aliasing_ppgtt;
2741 
2742 			gen6_write_page_range(dev_priv, &ppgtt->pd,
2743 					      0, ppgtt->base.total);
2744 		}
2745 	}
2746 
2747 	i915_ggtt_flush(dev_priv);
2748 }
2749 
2750 static struct i915_vma *
2751 __i915_gem_vma_create(struct drm_i915_gem_object *obj,
2752 		      struct i915_address_space *vm,
2753 		      const struct i915_ggtt_view *ggtt_view)
2754 {
2755 	struct i915_vma *vma;
2756 
2757 	if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
2758 		return ERR_PTR(-EINVAL);
2759 
2760 	vma = kzalloc(sizeof(*vma), GFP_KERNEL);
2761 	if (vma == NULL)
2762 		return ERR_PTR(-ENOMEM);
2763 
2764 	INIT_LIST_HEAD(&vma->vma_link);
2765 	INIT_LIST_HEAD(&vma->mm_list);
2766 	INIT_LIST_HEAD(&vma->exec_list);
2767 	vma->vm = vm;
2768 	vma->obj = obj;
2769 
2770 	if (i915_is_ggtt(vm))
2771 		vma->ggtt_view = *ggtt_view;
2772 
2773 	list_add_tail(&vma->vma_link, &obj->vma_list);
2774 	if (!i915_is_ggtt(vm))
2775 		i915_ppgtt_get(i915_vm_to_ppgtt(vm));
2776 
2777 	return vma;
2778 }
2779 
2780 struct i915_vma *
2781 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
2782 				  struct i915_address_space *vm)
2783 {
2784 	struct i915_vma *vma;
2785 
2786 	vma = i915_gem_obj_to_vma(obj, vm);
2787 	if (!vma)
2788 		vma = __i915_gem_vma_create(obj, vm,
2789 					    i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL);
2790 
2791 	return vma;
2792 }
2793 
2794 struct i915_vma *
2795 i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
2796 				       const struct i915_ggtt_view *view)
2797 {
2798 	struct i915_address_space *ggtt = i915_obj_to_ggtt(obj);
2799 	struct i915_vma *vma;
2800 
2801 	if (WARN_ON(!view))
2802 		return ERR_PTR(-EINVAL);
2803 
2804 	vma = i915_gem_obj_to_ggtt_view(obj, view);
2805 
2806 	if (IS_ERR(vma))
2807 		return vma;
2808 
2809 	if (!vma)
2810 		vma = __i915_gem_vma_create(obj, ggtt, view);
2811 
2812 	return vma;
2813 
2814 }
2815 
2816 static void
2817 rotate_pages(dma_addr_t *in, unsigned int width, unsigned int height,
2818 	     struct sg_table *st)
2819 {
2820 	unsigned int column, row;
2821 	unsigned int src_idx;
2822 	struct scatterlist *sg = st->sgl;
2823 
2824 	st->nents = 0;
2825 
2826 	for (column = 0; column < width; column++) {
2827 		src_idx = width * (height - 1) + column;
2828 		for (row = 0; row < height; row++) {
2829 			st->nents++;
2830 			/* We don't need the pages, but need to initialize
2831 			 * the entries so the sg list can be happily traversed.
2832 			 * The only thing we need are DMA addresses.
2833 			 */
2834 			sg_set_page(sg, NULL, PAGE_SIZE, 0);
2835 			sg_dma_address(sg) = in[src_idx];
2836 			sg_dma_len(sg) = PAGE_SIZE;
2837 			sg = sg_next(sg);
2838 			src_idx -= width;
2839 		}
2840 	}
2841 }
2842 
2843 static struct sg_table *
2844 intel_rotate_fb_obj_pages(struct i915_ggtt_view *ggtt_view,
2845 			  struct drm_i915_gem_object *obj)
2846 {
2847 	struct intel_rotation_info *rot_info = &ggtt_view->rotation_info;
2848 	unsigned int size_pages = rot_info->size >> PAGE_SHIFT;
2849 	struct sg_page_iter sg_iter;
2850 	unsigned long i;
2851 	dma_addr_t *page_addr_list;
2852 	struct sg_table *st;
2853 	int ret = -ENOMEM;
2854 
2855 	/* Allocate a temporary list of source pages for random access. */
2856 	page_addr_list = drm_malloc_ab(obj->base.size / PAGE_SIZE,
2857 				       sizeof(dma_addr_t));
2858 	if (!page_addr_list)
2859 		return ERR_PTR(ret);
2860 
2861 	/* Allocate target SG list. */
2862 	st = kmalloc(sizeof(*st), M_DRM, M_WAITOK);
2863 	if (!st)
2864 		goto err_st_alloc;
2865 
2866 	ret = sg_alloc_table(st, size_pages, GFP_KERNEL);
2867 	if (ret)
2868 		goto err_sg_alloc;
2869 
2870 	/* Populate source page list from the object. */
2871 	i = 0;
2872 	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
2873 		page_addr_list[i] = sg_page_iter_dma_address(&sg_iter);
2874 		i++;
2875 	}
2876 
2877 	/* Rotate the pages. */
2878 	rotate_pages(page_addr_list,
2879 		     rot_info->width_pages, rot_info->height_pages,
2880 		     st);
2881 
2882 	DRM_DEBUG_KMS(
2883 		      "Created rotated page mapping for object size %zu (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages).\n",
2884 		      obj->base.size, rot_info->pitch, rot_info->height,
2885 		      rot_info->pixel_format, rot_info->width_pages,
2886 		      rot_info->height_pages, size_pages);
2887 
2888 	drm_free_large(page_addr_list);
2889 
2890 	return st;
2891 
2892 err_sg_alloc:
2893 	kfree(st);
2894 err_st_alloc:
2895 	drm_free_large(page_addr_list);
2896 
2897 	DRM_DEBUG_KMS(
2898 		      "Failed to create rotated mapping for object size %zu! (%d) (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages)\n",
2899 		      obj->base.size, ret, rot_info->pitch, rot_info->height,
2900 		      rot_info->pixel_format, rot_info->width_pages,
2901 		      rot_info->height_pages, size_pages);
2902 	return ERR_PTR(ret);
2903 }
2904 
2905 static struct sg_table *
2906 intel_partial_pages(const struct i915_ggtt_view *view,
2907 		    struct drm_i915_gem_object *obj)
2908 {
2909 	struct sg_table *st;
2910 	struct scatterlist *sg;
2911 	struct sg_page_iter obj_sg_iter;
2912 	int ret = -ENOMEM;
2913 
2914 	st = kmalloc(sizeof(*st), M_DRM, M_WAITOK);
2915 	if (!st)
2916 		goto err_st_alloc;
2917 
2918 	ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL);
2919 	if (ret)
2920 		goto err_sg_alloc;
2921 
2922 	sg = st->sgl;
2923 	st->nents = 0;
2924 	for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents,
2925 		view->params.partial.offset)
2926 	{
2927 		if (st->nents >= view->params.partial.size)
2928 			break;
2929 
2930 		sg_set_page(sg, NULL, PAGE_SIZE, 0);
2931 		sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter);
2932 		sg_dma_len(sg) = PAGE_SIZE;
2933 
2934 		sg = sg_next(sg);
2935 		st->nents++;
2936 	}
2937 
2938 	return st;
2939 
2940 err_sg_alloc:
2941 	kfree(st);
2942 err_st_alloc:
2943 	return ERR_PTR(ret);
2944 }
2945 
2946 static int
2947 i915_get_ggtt_vma_pages(struct i915_vma *vma)
2948 {
2949 	int ret = 0;
2950 
2951 	if (vma->ggtt_view.pages)
2952 		return 0;
2953 
2954 	if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
2955 		vma->ggtt_view.pages = vma->obj->pages;
2956 	else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
2957 		vma->ggtt_view.pages =
2958 			intel_rotate_fb_obj_pages(&vma->ggtt_view, vma->obj);
2959 	else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL)
2960 		vma->ggtt_view.pages =
2961 			intel_partial_pages(&vma->ggtt_view, vma->obj);
2962 	else
2963 		WARN_ONCE(1, "GGTT view %u not implemented!\n",
2964 			  vma->ggtt_view.type);
2965 
2966 	if (!vma->ggtt_view.pages) {
2967 		DRM_ERROR("Failed to get pages for GGTT view type %u!\n",
2968 			  vma->ggtt_view.type);
2969 		ret = -EINVAL;
2970 	} else if (IS_ERR(vma->ggtt_view.pages)) {
2971 		ret = PTR_ERR(vma->ggtt_view.pages);
2972 		vma->ggtt_view.pages = NULL;
2973 		DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
2974 			  vma->ggtt_view.type, ret);
2975 	}
2976 
2977 	return ret;
2978 }
2979 
2980 /**
2981  * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
2982  * @vma: VMA to map
2983  * @cache_level: mapping cache level
2984  * @flags: flags like global or local mapping
2985  *
2986  * DMA addresses are taken from the scatter-gather table of this object (or of
2987  * this VMA in case of non-default GGTT views) and PTE entries set up.
2988  * Note that DMA addresses are also the only part of the SG table we care about.
2989  */
2990 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
2991 		  u32 flags)
2992 {
2993 	int ret;
2994 	u32 bind_flags;
2995 
2996 	if (WARN_ON(flags == 0))
2997 		return -EINVAL;
2998 
2999 	bind_flags = 0;
3000 	if (flags & PIN_GLOBAL)
3001 		bind_flags |= GLOBAL_BIND;
3002 	if (flags & PIN_USER)
3003 		bind_flags |= LOCAL_BIND;
3004 
3005 	if (flags & PIN_UPDATE)
3006 		bind_flags |= vma->bound;
3007 	else
3008 		bind_flags &= ~vma->bound;
3009 
3010 	if (bind_flags == 0)
3011 		return 0;
3012 
3013 	if (vma->bound == 0 && vma->vm->allocate_va_range) {
3014 		trace_i915_va_alloc(vma->vm,
3015 				    vma->node.start,
3016 				    vma->node.size,
3017 				    VM_TO_TRACE_NAME(vma->vm));
3018 
3019 		/* XXX: i915_vma_pin() will fix this +- hack */
3020 		vma->pin_count++;
3021 		ret = vma->vm->allocate_va_range(vma->vm,
3022 						 vma->node.start,
3023 						 vma->node.size);
3024 		vma->pin_count--;
3025 		if (ret)
3026 			return ret;
3027 	}
3028 
3029 	ret = vma->vm->bind_vma(vma, cache_level, bind_flags);
3030 	if (ret)
3031 		return ret;
3032 
3033 	vma->bound |= bind_flags;
3034 
3035 	return 0;
3036 }
3037 
3038 /**
3039  * i915_ggtt_view_size - Get the size of a GGTT view.
3040  * @obj: Object the view is of.
3041  * @view: The view in question.
3042  *
3043  * @return The size of the GGTT view in bytes.
3044  */
3045 size_t
3046 i915_ggtt_view_size(struct drm_i915_gem_object *obj,
3047 		    const struct i915_ggtt_view *view)
3048 {
3049 	if (view->type == I915_GGTT_VIEW_NORMAL) {
3050 		return obj->base.size;
3051 	} else if (view->type == I915_GGTT_VIEW_ROTATED) {
3052 		return view->rotation_info.size;
3053 	} else if (view->type == I915_GGTT_VIEW_PARTIAL) {
3054 		return view->params.partial.size << PAGE_SHIFT;
3055 	} else {
3056 		WARN_ONCE(1, "GGTT view %u not implemented!\n", view->type);
3057 		return obj->base.size;
3058 	}
3059 }
3060