1 /* 2 * Copyright © 2010 Daniel Vetter 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include <drm/drmP.h> 26 #include <drm/i915_drm.h> 27 #include "i915_drv.h" 28 #include "intel_drv.h" 29 30 #include <linux/highmem.h> 31 32 typedef uint32_t gtt_pte_t; 33 34 /* PPGTT stuff */ 35 #define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0)) 36 37 #define GEN6_PDE_VALID (1 << 0) 38 /* gen6+ has bit 11-4 for physical addr bit 39-32 */ 39 #define GEN6_PDE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) 40 41 #define GEN6_PTE_VALID (1 << 0) 42 #define GEN6_PTE_UNCACHED (1 << 1) 43 #define HSW_PTE_UNCACHED (0) 44 #define GEN6_PTE_CACHE_LLC (2 << 1) 45 #define GEN6_PTE_CACHE_LLC_MLC (3 << 1) 46 #define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) 47 48 static inline gtt_pte_t pte_encode(struct drm_device *dev, 49 dma_addr_t addr, 50 enum i915_cache_level level) 51 { 52 gtt_pte_t pte = GEN6_PTE_VALID; 53 pte |= GEN6_PTE_ADDR_ENCODE(addr); 54 55 switch (level) { 56 case I915_CACHE_LLC_MLC: 57 /* Haswell doesn't set L3 this way */ 58 if (IS_HASWELL(dev)) 59 pte |= GEN6_PTE_CACHE_LLC; 60 else 61 pte |= GEN6_PTE_CACHE_LLC_MLC; 62 break; 63 case I915_CACHE_LLC: 64 pte |= GEN6_PTE_CACHE_LLC; 65 break; 66 case I915_CACHE_NONE: 67 if (IS_HASWELL(dev)) 68 pte |= HSW_PTE_UNCACHED; 69 else 70 pte |= GEN6_PTE_UNCACHED; 71 break; 72 default: 73 BUG(); 74 } 75 76 77 return pte; 78 } 79 80 /* PPGTT support for Sandybdrige/Gen6 and later */ 81 static void i915_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt, 82 unsigned first_entry, 83 unsigned num_entries) 84 { 85 gtt_pte_t *pt_vaddr; 86 gtt_pte_t scratch_pte; 87 unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES; 88 unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; 89 unsigned last_pte, i; 90 91 scratch_pte = pte_encode(ppgtt->dev, ppgtt->scratch_page_dma_addr, 92 I915_CACHE_LLC); 93 94 while (num_entries) { 95 last_pte = first_pte + num_entries; 96 if (last_pte > I915_PPGTT_PT_ENTRIES) 97 last_pte = I915_PPGTT_PT_ENTRIES; 98 99 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]); 100 101 for (i = first_pte; i < last_pte; i++) 102 pt_vaddr[i] = scratch_pte; 103 104 kunmap_atomic(pt_vaddr); 105 106 num_entries -= last_pte - first_pte; 107 first_pte = 0; 108 act_pd++; 109 } 110 } 111 112 int i915_gem_init_aliasing_ppgtt(struct drm_device *dev) 113 { 114 struct drm_i915_private *dev_priv = dev->dev_private; 115 struct i915_hw_ppgtt *ppgtt; 116 u_int first_pd_entry_in_global_pt, i; 117 118 /* 119 * ppgtt PDEs reside in the global gtt pagetable, which has 512*1024 120 * entries. For aliasing ppgtt support we just steal them at the end for 121 * now. 122 */ 123 first_pd_entry_in_global_pt = 512 * 1024 - I915_PPGTT_PD_ENTRIES; 124 125 ppgtt = kmalloc(sizeof(*ppgtt), M_DRM, M_WAITOK | M_ZERO); 126 127 ppgtt->dev = dev; 128 ppgtt->num_pd_entries = I915_PPGTT_PD_ENTRIES; 129 ppgtt->pt_pages = kmalloc(sizeof(vm_page_t) * ppgtt->num_pd_entries, 130 M_DRM, M_WAITOK | M_ZERO); 131 132 for (i = 0; i < ppgtt->num_pd_entries; i++) { 133 ppgtt->pt_pages[i] = vm_page_alloc(NULL, 0, 134 VM_ALLOC_NORMAL | VM_ALLOC_ZERO); 135 if (ppgtt->pt_pages[i] == NULL) { 136 dev_priv->mm.aliasing_ppgtt = ppgtt; 137 i915_gem_cleanup_aliasing_ppgtt(dev); 138 return (-ENOMEM); 139 } 140 } 141 142 ppgtt->scratch_page_dma_addr = dev_priv->mm.gtt->scratch_page_dma; 143 144 i915_ppgtt_clear_range(ppgtt, 0, ppgtt->num_pd_entries * 145 I915_PPGTT_PT_ENTRIES); 146 ppgtt->pd_offset = (first_pd_entry_in_global_pt) * sizeof(uint32_t); 147 dev_priv->mm.aliasing_ppgtt = ppgtt; 148 return (0); 149 } 150 151 void 152 i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev) 153 { 154 struct drm_i915_private *dev_priv; 155 struct i915_hw_ppgtt *ppgtt; 156 vm_page_t m; 157 int i; 158 159 dev_priv = dev->dev_private; 160 ppgtt = dev_priv->mm.aliasing_ppgtt; 161 if (ppgtt == NULL) 162 return; 163 dev_priv->mm.aliasing_ppgtt = NULL; 164 165 for (i = 0; i < ppgtt->num_pd_entries; i++) { 166 m = ppgtt->pt_pages[i]; 167 if (m != NULL) { 168 vm_page_busy_wait(m, FALSE, "i915gem"); 169 vm_page_unwire(m, 0); 170 vm_page_free(m); 171 } 172 } 173 drm_free(ppgtt->pt_pages, M_DRM); 174 drm_free(ppgtt, M_DRM); 175 } 176 177 static void 178 i915_ppgtt_insert_pages(struct i915_hw_ppgtt *ppgtt, unsigned first_entry, 179 unsigned num_entries, vm_page_t *pages, enum i915_cache_level cache_level) 180 { 181 uint32_t *pt_vaddr; 182 unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES; 183 unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; 184 unsigned last_pte, i; 185 dma_addr_t page_addr; 186 187 while (num_entries) { 188 last_pte = first_pte + num_entries; 189 if (last_pte > I915_PPGTT_PT_ENTRIES) 190 last_pte = I915_PPGTT_PT_ENTRIES; 191 192 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]); 193 194 for (i = first_pte; i < last_pte; i++) { 195 page_addr = VM_PAGE_TO_PHYS(*pages); 196 pt_vaddr[i] = pte_encode(ppgtt->dev, page_addr, 197 cache_level); 198 199 pages++; 200 } 201 202 kunmap_atomic(pt_vaddr); 203 204 num_entries -= last_pte - first_pte; 205 first_pte = 0; 206 act_pd++; 207 } 208 } 209 210 void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt, 211 struct drm_i915_gem_object *obj, 212 enum i915_cache_level cache_level) 213 { 214 i915_ppgtt_insert_pages(ppgtt, obj->gtt_space->start >> PAGE_SHIFT, 215 obj->base.size >> PAGE_SHIFT, obj->pages, cache_level); 216 } 217 218 void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt, 219 struct drm_i915_gem_object *obj) 220 { 221 i915_ppgtt_clear_range(ppgtt, obj->gtt_space->start >> PAGE_SHIFT, 222 obj->base.size >> PAGE_SHIFT); 223 } 224 225 void i915_gem_init_ppgtt(struct drm_device *dev) 226 { 227 drm_i915_private_t *dev_priv = dev->dev_private; 228 uint32_t pd_offset; 229 struct intel_ring_buffer *ring; 230 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 231 uint32_t pd_entry, first_pd_entry_in_global_pt; 232 int i; 233 234 if (!dev_priv->mm.aliasing_ppgtt) 235 return; 236 237 first_pd_entry_in_global_pt = 512 * 1024 - I915_PPGTT_PD_ENTRIES; 238 for (i = 0; i < ppgtt->num_pd_entries; i++) { 239 vm_paddr_t pt_addr; 240 241 pt_addr = VM_PAGE_TO_PHYS(ppgtt->pt_pages[i]); 242 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr); 243 pd_entry |= GEN6_PDE_VALID; 244 245 intel_gtt_write(first_pd_entry_in_global_pt + i, pd_entry); 246 } 247 intel_gtt_read_pte(first_pd_entry_in_global_pt); 248 249 pd_offset = ppgtt->pd_offset; 250 pd_offset /= 64; /* in cachelines, */ 251 pd_offset <<= 16; 252 253 if (INTEL_INFO(dev)->gen == 6) { 254 uint32_t ecochk, gab_ctl, ecobits; 255 256 ecobits = I915_READ(GAC_ECO_BITS); 257 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 258 259 gab_ctl = I915_READ(GAB_CTL); 260 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 261 262 ecochk = I915_READ(GAM_ECOCHK); 263 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | 264 ECOCHK_PPGTT_CACHE64B); 265 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 266 } else if (INTEL_INFO(dev)->gen >= 7) { 267 I915_WRITE(GAM_ECOCHK, ECOCHK_PPGTT_CACHE64B); 268 /* GFX_MODE is per-ring on gen7+ */ 269 } 270 271 for_each_ring(ring, dev_priv, i) { 272 if (INTEL_INFO(dev)->gen >= 7) 273 I915_WRITE(RING_MODE_GEN7(ring), 274 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 275 276 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 277 I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset); 278 } 279 } 280 281 static bool do_idling(struct drm_i915_private *dev_priv) 282 { 283 bool ret = dev_priv->mm.interruptible; 284 285 if (unlikely(dev_priv->mm.gtt->do_idle_maps)) { 286 dev_priv->mm.interruptible = false; 287 if (i915_gpu_idle(dev_priv->dev)) { 288 DRM_ERROR("Couldn't idle GPU\n"); 289 /* Wait a bit, in hopes it avoids the hang */ 290 DELAY(10); 291 } 292 } 293 294 return ret; 295 } 296 297 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) 298 { 299 300 if (unlikely(dev_priv->mm.gtt->do_idle_maps)) 301 dev_priv->mm.interruptible = interruptible; 302 } 303 304 #if 0 305 static void i915_ggtt_clear_range(struct drm_device *dev, 306 unsigned first_entry, 307 unsigned num_entries) 308 { 309 struct drm_i915_private *dev_priv = dev->dev_private; 310 gtt_pte_t scratch_pte; 311 gtt_pte_t __iomem *gtt_base = dev_priv->mm.gtt->gtt + first_entry; 312 const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry; 313 int i; 314 315 if (INTEL_INFO(dev)->gen < 6) { 316 intel_gtt_clear_range(first_entry, num_entries); 317 return; 318 } 319 320 if (WARN(num_entries > max_entries, 321 "First entry = %d; Num entries = %d (max=%d)\n", 322 first_entry, num_entries, max_entries)) 323 num_entries = max_entries; 324 325 scratch_pte = pte_encode(dev, dev_priv->mm.gtt->scratch_page_dma, I915_CACHE_LLC); 326 for (i = 0; i < num_entries; i++) 327 iowrite32(scratch_pte, >t_base[i]); 328 readl(gtt_base); 329 } 330 #endif 331 332 void 333 i915_gem_restore_gtt_mappings(struct drm_device *dev) 334 { 335 struct drm_i915_private *dev_priv; 336 struct drm_i915_gem_object *obj; 337 338 dev_priv = dev->dev_private; 339 340 /* First fill our portion of the GTT with scratch pages */ 341 intel_gtt_clear_range(dev_priv->mm.gtt_start / PAGE_SIZE, 342 (dev_priv->mm.gtt_end - dev_priv->mm.gtt_start) / PAGE_SIZE); 343 344 list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) { 345 i915_gem_clflush_object(obj); 346 i915_gem_gtt_bind_object(obj, obj->cache_level); 347 } 348 349 intel_gtt_chipset_flush(); 350 } 351 352 #if 0 353 /* 354 * Binds an object into the global gtt with the specified cache level. The object 355 * will be accessible to the GPU via commands whose operands reference offsets 356 * within the global GTT as well as accessible by the GPU through the GMADR 357 * mapped BAR (dev_priv->mm.gtt->gtt). 358 */ 359 static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj, 360 enum i915_cache_level level) 361 { 362 struct drm_device *dev = obj->base.dev; 363 struct drm_i915_private *dev_priv = dev->dev_private; 364 struct sg_table *st = obj->pages; 365 struct scatterlist *sg = st->sgl; 366 const int first_entry = obj->gtt_space->start >> PAGE_SHIFT; 367 const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry; 368 gtt_pte_t __iomem *gtt_entries = dev_priv->mm.gtt->gtt + first_entry; 369 int unused, i = 0; 370 unsigned int len, m = 0; 371 dma_addr_t addr; 372 373 for_each_sg(st->sgl, sg, st->nents, unused) { 374 len = sg_dma_len(sg) >> PAGE_SHIFT; 375 for (m = 0; m < len; m++) { 376 addr = sg_dma_address(sg) + (m << PAGE_SHIFT); 377 iowrite32(pte_encode(dev, addr, level), >t_entries[i]); 378 i++; 379 } 380 } 381 382 BUG_ON(i > max_entries); 383 BUG_ON(i != obj->base.size / PAGE_SIZE); 384 385 /* XXX: This serves as a posting read to make sure that the PTE has 386 * actually been updated. There is some concern that even though 387 * registers and PTEs are within the same BAR that they are potentially 388 * of NUMA access patterns. Therefore, even with the way we assume 389 * hardware should work, we must keep this posting read for paranoia. 390 */ 391 if (i != 0) 392 WARN_ON(readl(>t_entries[i-1]) != pte_encode(dev, addr, level)); 393 394 /* This next bit makes the above posting read even more important. We 395 * want to flush the TLBs only after we're certain all the PTE updates 396 * have finished. 397 */ 398 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 399 POSTING_READ(GFX_FLSH_CNTL_GEN6); 400 } 401 #endif 402 403 void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj, 404 enum i915_cache_level cache_level) 405 { 406 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 407 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 408 intel_gtt_insert_pages(obj->gtt_space->start >> PAGE_SHIFT, 409 obj->base.size >> PAGE_SHIFT, obj->pages, flags); 410 411 obj->has_global_gtt_mapping = 1; 412 } 413 414 void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj) 415 { 416 struct drm_device *dev = obj->base.dev; 417 struct drm_i915_private *dev_priv = dev->dev_private; 418 bool interruptible; 419 420 interruptible = do_idling(dev_priv); 421 422 intel_gtt_clear_range(obj->gtt_space->start >> PAGE_SHIFT, 423 obj->base.size >> PAGE_SHIFT); 424 425 undo_idling(dev_priv, interruptible); 426 obj->has_global_gtt_mapping = 0; 427 } 428 429 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) 430 { 431 struct drm_device *dev = obj->base.dev; 432 struct drm_i915_private *dev_priv = dev->dev_private; 433 bool interruptible; 434 435 interruptible = do_idling(dev_priv); 436 437 #if 0 438 if (!obj->has_dma_mapping) 439 dma_unmap_sg(&dev->pdev->dev, 440 obj->pages->sgl, obj->pages->nents, 441 PCI_DMA_BIDIRECTIONAL); 442 #endif 443 444 undo_idling(dev_priv, interruptible); 445 } 446