xref: /dragonfly/sys/dev/drm/i915/i915_gem_gtt.c (revision 2249b4bc)
1 /*
2  * Copyright © 2010 Daniel Vetter
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include <drm/drmP.h>
26 #include <drm/i915_drm.h>
27 #include "i915_drv.h"
28 #include "intel_drv.h"
29 
30 #include <linux/highmem.h>
31 
32 typedef uint32_t gen6_gtt_pte_t;
33 
34 /* PPGTT stuff */
35 #define GEN6_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0xff0))
36 
37 #define GEN6_PDE_VALID			(1 << 0)
38 /* gen6+ has bit 11-4 for physical addr bit 39-32 */
39 #define GEN6_PDE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
40 
41 #define GEN6_PTE_VALID			(1 << 0)
42 #define GEN6_PTE_UNCACHED		(1 << 1)
43 #define HSW_PTE_UNCACHED		(0)
44 #define GEN6_PTE_CACHE_LLC		(2 << 1)
45 #define GEN6_PTE_CACHE_LLC_MLC		(3 << 1)
46 #define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
47 
48 static inline gen6_gtt_pte_t gen6_pte_encode(struct drm_device *dev,
49 					     dma_addr_t addr,
50 					     enum i915_cache_level level)
51 {
52 	gen6_gtt_pte_t pte = GEN6_PTE_VALID;
53 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
54 
55 	switch (level) {
56 	case I915_CACHE_LLC_MLC:
57 		/* Haswell doesn't set L3 this way */
58 		if (IS_HASWELL(dev))
59 			pte |= GEN6_PTE_CACHE_LLC;
60 		else
61 			pte |= GEN6_PTE_CACHE_LLC_MLC;
62 		break;
63 	case I915_CACHE_LLC:
64 		pte |= GEN6_PTE_CACHE_LLC;
65 		break;
66 	case I915_CACHE_NONE:
67 		if (IS_HASWELL(dev))
68 			pte |= HSW_PTE_UNCACHED;
69 		else
70 			pte |= GEN6_PTE_UNCACHED;
71 		break;
72 	default:
73 		BUG();
74 	}
75 
76 	return pte;
77 }
78 
79 static int gen6_ppgtt_enable(struct drm_device *dev)
80 {
81 	drm_i915_private_t *dev_priv = dev->dev_private;
82 	uint32_t pd_offset;
83 	struct intel_ring_buffer *ring;
84 	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
85 	uint32_t pd_entry, first_pd_entry_in_global_pt;
86 	int i;
87 
88 	first_pd_entry_in_global_pt = 512 * 1024 - I915_PPGTT_PD_ENTRIES;
89 	for (i = 0; i < ppgtt->num_pd_entries; i++) {
90 		dma_addr_t pt_addr;
91 
92 		pt_addr = VM_PAGE_TO_PHYS(ppgtt->pt_pages[i]);
93 		pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
94 		pd_entry |= GEN6_PDE_VALID;
95 
96 		intel_gtt_write(first_pd_entry_in_global_pt + i, pd_entry);
97 	}
98 	intel_gtt_read_pte(first_pd_entry_in_global_pt);
99 
100 	pd_offset = ppgtt->pd_offset;
101 	pd_offset /= 64; /* in cachelines, */
102 	pd_offset <<= 16;
103 
104 	if (INTEL_INFO(dev)->gen == 6) {
105 		uint32_t ecochk, gab_ctl, ecobits;
106 
107 		ecobits = I915_READ(GAC_ECO_BITS);
108 		I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
109 					 ECOBITS_PPGTT_CACHE64B);
110 
111 		gab_ctl = I915_READ(GAB_CTL);
112 		I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
113 
114 		ecochk = I915_READ(GAM_ECOCHK);
115 		I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT |
116 				       ECOCHK_PPGTT_CACHE64B);
117 		I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
118 	} else if (INTEL_INFO(dev)->gen >= 7) {
119 		uint32_t ecochk, ecobits;
120 
121 		ecobits = I915_READ(GAC_ECO_BITS);
122 		I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
123 
124 		ecochk = I915_READ(GAM_ECOCHK);
125 		if (IS_HASWELL(dev)) {
126 			ecochk |= ECOCHK_PPGTT_WB_HSW;
127 		} else {
128 			ecochk |= ECOCHK_PPGTT_LLC_IVB;
129 			ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
130 		}
131 		I915_WRITE(GAM_ECOCHK, ecochk);
132 		/* GFX_MODE is per-ring on gen7+ */
133 	}
134 
135 	for_each_ring(ring, dev_priv, i) {
136 		if (INTEL_INFO(dev)->gen >= 7)
137 			I915_WRITE(RING_MODE_GEN7(ring),
138 				   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
139 
140 		I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
141 		I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset);
142 	}
143 	return 0;
144 }
145 
146 /* PPGTT support for Sandybdrige/Gen6 and later */
147 static void gen6_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
148 				   unsigned first_entry,
149 				   unsigned num_entries)
150 {
151 	gen6_gtt_pte_t *pt_vaddr, scratch_pte;
152 	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
153 	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
154 	unsigned last_pte, i;
155 
156 	scratch_pte = gen6_pte_encode(ppgtt->dev,
157 				      ppgtt->scratch_page_dma_addr,
158 				      I915_CACHE_LLC);
159 
160 	while (num_entries) {
161 		last_pte = first_pte + num_entries;
162 		if (last_pte > I915_PPGTT_PT_ENTRIES)
163 			last_pte = I915_PPGTT_PT_ENTRIES;
164 
165 		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
166 
167 		for (i = first_pte; i < last_pte; i++)
168 			pt_vaddr[i] = scratch_pte;
169 
170 		kunmap_atomic(pt_vaddr);
171 
172 		num_entries -= last_pte - first_pte;
173 		first_pte = 0;
174 		act_pt++;
175 	}
176 }
177 
178 static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt,
179 				      struct sg_table *pages,
180 				      unsigned first_entry,
181 				      enum i915_cache_level cache_level)
182 {
183 	gen6_gtt_pte_t *pt_vaddr;
184 	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
185 	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
186 	unsigned i, j, m, segment_len;
187 	dma_addr_t page_addr;
188 	struct scatterlist *sg;
189 
190 	/* init sg walking */
191 	sg = pages->sgl;
192 	i = 0;
193 	segment_len = sg_dma_len(sg) >> PAGE_SHIFT;
194 	m = 0;
195 
196 	while (i < pages->nents) {
197 		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
198 
199 		for (j = first_pte; j < I915_PPGTT_PT_ENTRIES; j++) {
200 			page_addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
201 			pt_vaddr[j] = gen6_pte_encode(ppgtt->dev, page_addr,
202 						      cache_level);
203 
204 			/* grab the next page */
205 			if (++m == segment_len) {
206 				if (++i == pages->nents)
207 					break;
208 
209 				sg = sg_next(sg);
210 				segment_len = sg_dma_len(sg) >> PAGE_SHIFT;
211 				m = 0;
212 			}
213 		}
214 
215 		kunmap_atomic(pt_vaddr);
216 
217 		first_pte = 0;
218 		act_pt++;
219 	}
220 }
221 
222 static void gen6_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt)
223 {
224 #if 0
225 	int i;
226 
227 	if (ppgtt->pt_dma_addr) {
228 		for (i = 0; i < ppgtt->num_pd_entries; i++)
229 			pci_unmap_page(ppgtt->dev->pdev,
230 				       ppgtt->pt_dma_addr[i],
231 				       4096, PCI_DMA_BIDIRECTIONAL);
232 	}
233 
234 	kfree(ppgtt->pt_dma_addr);
235 	for (i = 0; i < ppgtt->num_pd_entries; i++)
236 		__free_page(ppgtt->pt_pages[i]);
237 	kfree(ppgtt->pt_pages);
238 	kfree(ppgtt);
239 #endif
240 }
241 
242 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
243 {
244 	struct drm_device *dev = ppgtt->dev;
245 	struct drm_i915_private *dev_priv = dev->dev_private;
246 	unsigned first_pd_entry_in_global_pt;
247 	int i;
248 	int ret = -ENOMEM;
249 
250 	/* ppgtt PDEs reside in the global gtt pagetable, which has 512*1024
251 	 * entries. For aliasing ppgtt support we just steal them at the end for
252 	 * now.
253 	 */
254 	first_pd_entry_in_global_pt = gtt_total_entries(dev_priv->gtt);
255 
256 	ppgtt->num_pd_entries = I915_PPGTT_PD_ENTRIES;
257 	ppgtt->enable = gen6_ppgtt_enable;
258 	ppgtt->clear_range = gen6_ppgtt_clear_range;
259 	ppgtt->insert_entries = gen6_ppgtt_insert_entries;
260 	ppgtt->cleanup = gen6_ppgtt_cleanup;
261 	ppgtt->pt_pages = kzalloc(sizeof(struct vm_page *)*ppgtt->num_pd_entries,
262 				  GFP_KERNEL);
263 	if (!ppgtt->pt_pages)
264 		return -ENOMEM;
265 
266 	for (i = 0; i < ppgtt->num_pd_entries; i++) {
267 		ppgtt->pt_pages[i] = vm_page_alloc(NULL, 0,
268 		    VM_ALLOC_NORMAL | VM_ALLOC_ZERO);
269 		if (!ppgtt->pt_pages[i])
270 			goto err_pt_alloc;
271 	}
272 
273 	ppgtt->clear_range(ppgtt, 0,
274 			   ppgtt->num_pd_entries*I915_PPGTT_PT_ENTRIES);
275 
276 	ppgtt->pd_offset = first_pd_entry_in_global_pt * sizeof(gen6_gtt_pte_t);
277 
278 	return 0;
279 
280 err_pt_alloc:
281 	dev_priv->mm.aliasing_ppgtt = ppgtt;
282 	i915_gem_cleanup_aliasing_ppgtt(dev);
283 
284 	return ret;
285 }
286 
287 static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
288 {
289 	struct drm_i915_private *dev_priv = dev->dev_private;
290 	struct i915_hw_ppgtt *ppgtt;
291 	int ret;
292 
293 	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
294 	if (!ppgtt)
295 		return -ENOMEM;
296 
297 	ppgtt->dev = dev;
298 	ppgtt->scratch_page_dma_addr = dev_priv->gtt.scratch_page_dma;
299 
300 	if (INTEL_INFO(dev)->gen < 8)
301 		ret = gen6_ppgtt_init(ppgtt);
302 	else
303 		BUG();
304 
305 	if (ret)
306 		kfree(ppgtt);
307 	else
308 		dev_priv->mm.aliasing_ppgtt = ppgtt;
309 
310 	return ret;
311 }
312 
313 void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
314 {
315 	struct drm_i915_private *dev_priv = dev->dev_private;
316 	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
317 
318 	if (!ppgtt)
319 		return;
320 
321 	ppgtt->cleanup(ppgtt);
322 	dev_priv->mm.aliasing_ppgtt = NULL;
323 }
324 
325 #if 0
326 void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
327 {
328 	struct drm_i915_private *dev_priv = dev->dev_private;
329 	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
330 	vm_page_t m;
331 	int i;
332 
333 	if (!ppgtt)
334 		return;
335 	dev_priv->mm.aliasing_ppgtt = NULL;
336 
337 	for (i = 0; i < ppgtt->num_pd_entries; i++) {
338 		m = ppgtt->pt_pages[i];
339 		if (m != NULL) {
340 			vm_page_busy_wait(m, FALSE, "i915gem");
341 			vm_page_unwire(m, 0);
342 			vm_page_free(m);
343 		}
344 	}
345 	drm_free(ppgtt->pt_pages, M_DRM);
346 	drm_free(ppgtt, M_DRM);
347 }
348 #endif
349 
350 static void
351 i915_ppgtt_insert_pages(struct i915_hw_ppgtt *ppgtt, unsigned first_entry,
352     unsigned num_entries, vm_page_t *pages, enum i915_cache_level cache_level)
353 {
354 	uint32_t *pt_vaddr;
355 	unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
356 	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
357 	unsigned last_pte, i;
358 	dma_addr_t page_addr;
359 
360 	while (num_entries) {
361 		last_pte = first_pte + num_entries;
362 		if (last_pte > I915_PPGTT_PT_ENTRIES)
363 			last_pte = I915_PPGTT_PT_ENTRIES;
364 
365 		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]);
366 
367 		for (i = first_pte; i < last_pte; i++) {
368 			page_addr = VM_PAGE_TO_PHYS(*pages);
369 			pt_vaddr[i] = gen6_pte_encode(ppgtt->dev, page_addr,
370 						 cache_level);
371 
372 			pages++;
373 		}
374 
375 		kunmap_atomic(pt_vaddr);
376 
377 		num_entries -= last_pte - first_pte;
378 		first_pte = 0;
379 		act_pd++;
380 	}
381 }
382 
383 void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
384 			    struct drm_i915_gem_object *obj,
385 			    enum i915_cache_level cache_level)
386 {
387 	i915_ppgtt_insert_pages(ppgtt, obj->gtt_space->start >> PAGE_SHIFT,
388 	    obj->base.size >> PAGE_SHIFT, obj->pages, cache_level);
389 }
390 
391 void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
392 			      struct drm_i915_gem_object *obj)
393 {
394 	ppgtt->clear_range(ppgtt,
395 			   obj->gtt_space->start >> PAGE_SHIFT,
396 			   obj->base.size >> PAGE_SHIFT);
397 }
398 
399 extern int intel_iommu_gfx_mapped;
400 /* Certain Gen5 chipsets require require idling the GPU before
401  * unmapping anything from the GTT when VT-d is enabled.
402  */
403 static inline bool needs_idle_maps(struct drm_device *dev)
404 {
405 #ifdef CONFIG_INTEL_IOMMU
406 	/* Query intel_iommu to see if we need the workaround. Presumably that
407 	 * was loaded first.
408 	 */
409 	if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped)
410 		return true;
411 #endif
412 	return false;
413 }
414 
415 static bool do_idling(struct drm_i915_private *dev_priv)
416 {
417 	bool ret = dev_priv->mm.interruptible;
418 
419 	if (unlikely(dev_priv->gtt.do_idle_maps)) {
420 		dev_priv->mm.interruptible = false;
421 		if (i915_gpu_idle(dev_priv->dev)) {
422 			DRM_ERROR("Couldn't idle GPU\n");
423 			/* Wait a bit, in hopes it avoids the hang */
424 			udelay(10);
425 		}
426 	}
427 
428 	return ret;
429 }
430 
431 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
432 {
433 	if (unlikely(dev_priv->gtt.do_idle_maps))
434 		dev_priv->mm.interruptible = interruptible;
435 }
436 
437 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
438 {
439 	struct drm_i915_private *dev_priv = dev->dev_private;
440 	struct drm_i915_gem_object *obj;
441 
442 	/* First fill our portion of the GTT with scratch pages */
443 	dev_priv->gtt.gtt_clear_range(dev, dev_priv->gtt.start / PAGE_SIZE,
444 				      dev_priv->gtt.total / PAGE_SIZE);
445 
446 	list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) {
447 		i915_gem_clflush_object(obj);
448 		i915_gem_gtt_bind_object(obj, obj->cache_level);
449 	}
450 
451 	i915_gem_chipset_flush(dev);
452 }
453 
454 #if 0
455 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
456 {
457 	if (obj->has_dma_mapping)
458 		return 0;
459 
460 	if (!dma_map_sg(&obj->base.dev->pdev->dev,
461 			obj->pages->sgl, obj->pages->nents,
462 			PCI_DMA_BIDIRECTIONAL))
463 		return -ENOSPC;
464 
465 	return 0;
466 }
467 #endif
468 
469 /*
470  * Binds an object into the global gtt with the specified cache level. The object
471  * will be accessible to the GPU via commands whose operands reference offsets
472  * within the global GTT as well as accessible by the GPU through the GMADR
473  * mapped BAR (dev_priv->mm.gtt->gtt).
474  */
475 static void gen6_ggtt_insert_entries(struct drm_device *dev,
476 				     struct sg_table *st,
477 				     unsigned int first_entry,
478 				     enum i915_cache_level level)
479 {
480 #if 0
481 	struct drm_i915_private *dev_priv = dev->dev_private;
482 	struct scatterlist *sg = st->sgl;
483 	gtt_pte_t __iomem *gtt_entries =
484 		(gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
485 	int unused, i = 0;
486 	unsigned int len, m = 0;
487 	dma_addr_t addr;
488 
489 	for_each_sg(st->sgl, sg, st->nents, unused) {
490 		len = sg_dma_len(sg) >> PAGE_SHIFT;
491 		for (m = 0; m < len; m++) {
492 			addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
493 			iowrite32(gen6_pte_encode(dev, addr, level),
494 				  &gtt_entries[i]);
495 			i++;
496 		}
497 	}
498 
499 	/* XXX: This serves as a posting read to make sure that the PTE has
500 	 * actually been updated. There is some concern that even though
501 	 * registers and PTEs are within the same BAR that they are potentially
502 	 * of NUMA access patterns. Therefore, even with the way we assume
503 	 * hardware should work, we must keep this posting read for paranoia.
504 	 */
505 	if (i != 0)
506 		WARN_ON(readl(&gtt_entries[i-1])
507 			!= gen6_pte_encode(dev, addr, level));
508 
509 	/* This next bit makes the above posting read even more important. We
510 	 * want to flush the TLBs only after we're certain all the PTE updates
511 	 * have finished.
512 	 */
513 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
514 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
515 #endif
516 }
517 
518 static void gen6_ggtt_clear_range(struct drm_device *dev,
519 				  unsigned int first_entry,
520 				  unsigned int num_entries)
521 {
522 	struct drm_i915_private *dev_priv = dev->dev_private;
523 	gen6_gtt_pte_t scratch_pte, __iomem *gtt_base =
524 		(gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
525 	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
526 	int i;
527 
528 	if (WARN(num_entries > max_entries,
529 		 "First entry = %d; Num entries = %d (max=%d)\n",
530 		 first_entry, num_entries, max_entries))
531 		num_entries = max_entries;
532 
533 	scratch_pte = gen6_pte_encode(dev, dev_priv->gtt.scratch_page_dma,
534 				      I915_CACHE_LLC);
535 	for (i = 0; i < num_entries; i++)
536 		iowrite32(scratch_pte, &gtt_base[i]);
537 	readl(gtt_base);
538 }
539 
540 static void i915_ggtt_insert_entries(struct drm_device *dev,
541 				     struct sg_table *st,
542 				     unsigned int pg_start,
543 				     enum i915_cache_level cache_level)
544 {
545 #if 0
546 	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
547 		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
548 
549 	intel_gtt_insert_sg_entries(st, pg_start, flags);
550 #endif
551 }
552 
553 static void i915_ggtt_clear_range(struct drm_device *dev,
554 				  unsigned int first_entry,
555 				  unsigned int num_entries)
556 {
557 	intel_gtt_clear_range(first_entry, num_entries);
558 }
559 
560 void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
561 			      enum i915_cache_level cache_level)
562 {
563 	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
564 			AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
565 	intel_gtt_insert_pages(obj->gtt_space->start >> PAGE_SHIFT,
566 	    obj->base.size >> PAGE_SHIFT, obj->pages, flags);
567 
568 	obj->has_global_gtt_mapping = 1;
569 }
570 
571 void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj)
572 {
573 	struct drm_device *dev = obj->base.dev;
574 	struct drm_i915_private *dev_priv = dev->dev_private;
575 
576 	dev_priv->gtt.gtt_clear_range(obj->base.dev,
577 				      obj->gtt_space->start >> PAGE_SHIFT,
578 				      obj->base.size >> PAGE_SHIFT);
579 
580 	obj->has_global_gtt_mapping = 0;
581 }
582 
583 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
584 {
585 	struct drm_device *dev = obj->base.dev;
586 	struct drm_i915_private *dev_priv = dev->dev_private;
587 	bool interruptible;
588 
589 	interruptible = do_idling(dev_priv);
590 
591 #if 0
592 	if (!obj->has_dma_mapping)
593 		dma_unmap_sg(&dev->pdev->dev,
594 			     obj->pages->sgl, obj->pages->nents,
595 			     PCI_DMA_BIDIRECTIONAL);
596 #endif
597 
598 	undo_idling(dev_priv, interruptible);
599 }
600 
601 static void i915_gtt_color_adjust(struct drm_mm_node *node,
602 				  unsigned long color,
603 				  unsigned long *start,
604 				  unsigned long *end)
605 {
606 	if (node->color != color)
607 		*start += 4096;
608 
609 	if (!list_empty(&node->node_list)) {
610 		node = list_entry(node->node_list.next,
611 				  struct drm_mm_node,
612 				  node_list);
613 		if (node->allocated && node->color != color)
614 			*end -= 4096;
615 	}
616 }
617 void i915_gem_setup_global_gtt(struct drm_device *dev,
618 			       unsigned long start,
619 			       unsigned long mappable_end,
620 			       unsigned long end)
621 {
622 	/* Let GEM Manage all of the aperture.
623 	 *
624 	 * However, leave one page at the end still bound to the scratch page.
625 	 * There are a number of places where the hardware apparently prefetches
626 	 * past the end of the object, and we've seen multiple hangs with the
627 	 * GPU head pointer stuck in a batchbuffer bound at the last page of the
628 	 * aperture.  One page should be enough to keep any prefetching inside
629 	 * of the aperture.
630 	 */
631 	drm_i915_private_t *dev_priv = dev->dev_private;
632 	unsigned long mappable;
633 	int error;
634 
635 	BUG_ON(mappable_end > end);
636 
637 	mappable = min(end, mappable_end) - start;
638 
639 	/* Substract the guard page ... */
640 	drm_mm_init(&dev_priv->mm.gtt_space, start, end - start);
641 	if (!HAS_LLC(dev))
642 		dev_priv->mm.gtt_space.color_adjust = i915_gtt_color_adjust;
643 
644 	dev_priv->gtt.start = start;
645 	dev_priv->gtt.mappable_end = mappable_end;
646 	dev_priv->gtt.total = end - start;
647 
648 	/* ... but ensure that we clear the entire range. */
649 	intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE);
650 	device_printf(dev->dev,
651 	    "taking over the fictitious range 0x%lx-0x%lx\n",
652 	    dev->agp->base + start, dev->agp->base + start + mappable);
653 	error = -vm_phys_fictitious_reg_range(dev->agp->base + start,
654 	    dev->agp->base + start + mappable, VM_MEMATTR_WRITE_COMBINING);
655 }
656 
657 static bool
658 intel_enable_ppgtt(struct drm_device *dev)
659 {
660 	if (i915_enable_ppgtt >= 0)
661 		return i915_enable_ppgtt;
662 
663 #ifdef CONFIG_INTEL_IOMMU
664 	/* Disable ppgtt on SNB if VT-d is on. */
665 	if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped)
666 		return false;
667 #endif
668 
669 	return true;
670 }
671 
672 void i915_gem_init_global_gtt(struct drm_device *dev)
673 {
674 	struct drm_i915_private *dev_priv = dev->dev_private;
675 	unsigned long gtt_size, mappable_size;
676 
677 	gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT;
678 	mappable_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT;
679 
680 	if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) {
681 		int ret;
682 
683 		if (INTEL_INFO(dev)->gen <= 7) {
684 			/* PPGTT pdes are stolen from global gtt ptes, so shrink the
685 			 * aperture accordingly when using aliasing ppgtt. */
686 			gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE;
687 		}
688 
689 		i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
690 
691 		ret = i915_gem_init_aliasing_ppgtt(dev);
692 		if (!ret)
693 			return;
694 
695 		DRM_ERROR("Aliased PPGTT setup failed %d\n", ret);
696 		drm_mm_takedown(&dev_priv->mm.gtt_space);
697 		gtt_size += I915_PPGTT_PD_ENTRIES*PAGE_SIZE;
698 	}
699 	i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
700 }
701 
702 int i915_gem_gtt_init(struct drm_device *dev)
703 {
704 	struct drm_i915_private *dev_priv = dev->dev_private;
705 
706 	/* On modern platforms we need not worry ourself with the legacy
707 	 * hostbridge query stuff. Skip it entirely
708 	 */
709 	if (INTEL_INFO(dev)->gen < 6 || 1) {
710 		dev_priv->mm.gtt = intel_gtt_get();
711 		if (!dev_priv->mm.gtt) {
712 			DRM_ERROR("Failed to initialize GTT\n");
713 			return -ENODEV;
714 		}
715 
716 		dev_priv->gtt.do_idle_maps = needs_idle_maps(dev);
717 
718 		dev_priv->gtt.gtt_clear_range = i915_ggtt_clear_range;
719 		dev_priv->gtt.gtt_insert_entries = i915_ggtt_insert_entries;
720 
721 		return 0;
722 	}
723 
724 	dev_priv->mm.gtt = kmalloc(sizeof(*dev_priv->mm.gtt), M_DRM, M_WAITOK | M_ZERO);
725 	if (!dev_priv->mm.gtt)
726 		return -ENOMEM;
727 
728 	/* GMADR is the PCI aperture used by SW to access tiled GFX surfaces in a linear fashion. */
729 	DRM_INFO("Memory usable by graphics device = %dM\n", dev_priv->mm.gtt->gtt_total_entries >> 8);
730 	DRM_DEBUG_DRIVER("GMADR size = %dM\n", dev_priv->mm.gtt->gtt_mappable_entries >> 8);
731 	DRM_DEBUG_DRIVER("GTT stolen size = %dM\n", dev_priv->mm.gtt->stolen_size >> 20);
732 
733 	dev_priv->gtt.gtt_clear_range = gen6_ggtt_clear_range;
734 	dev_priv->gtt.gtt_insert_entries = gen6_ggtt_insert_entries;
735 
736 	return 0;
737 }
738