1 /* 2 * Copyright © 2010 Daniel Vetter 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * $FreeBSD: src/sys/dev/drm2/i915/i915_gem_gtt.c,v 1.1 2012/05/22 11:07:44 kib Exp $ 24 */ 25 26 #include <sys/sfbuf.h> 27 28 #include <drm/drmP.h> 29 #include <drm/i915_drm.h> 30 #include "i915_drv.h" 31 #include "intel_drv.h" 32 33 typedef uint32_t gtt_pte_t; 34 35 /* PPGTT stuff */ 36 #define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0)) 37 38 #define GEN6_PDE_VALID (1 << 0) 39 /* gen6+ has bit 11-4 for physical addr bit 39-32 */ 40 #define GEN6_PDE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) 41 42 #define GEN6_PTE_VALID (1 << 0) 43 #define GEN6_PTE_UNCACHED (1 << 1) 44 #define HSW_PTE_UNCACHED (0) 45 #define GEN6_PTE_CACHE_LLC (2 << 1) 46 #define GEN6_PTE_CACHE_LLC_MLC (3 << 1) 47 #define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) 48 49 static inline gtt_pte_t pte_encode(struct drm_device *dev, 50 dma_addr_t addr, 51 enum i915_cache_level level) 52 { 53 gtt_pte_t pte = GEN6_PTE_VALID; 54 pte |= GEN6_PTE_ADDR_ENCODE(addr); 55 56 switch (level) { 57 case I915_CACHE_LLC_MLC: 58 /* Haswell doesn't set L3 this way */ 59 if (IS_HASWELL(dev)) 60 pte |= GEN6_PTE_CACHE_LLC; 61 else 62 pte |= GEN6_PTE_CACHE_LLC_MLC; 63 break; 64 case I915_CACHE_LLC: 65 pte |= GEN6_PTE_CACHE_LLC; 66 break; 67 case I915_CACHE_NONE: 68 if (IS_HASWELL(dev)) 69 pte |= HSW_PTE_UNCACHED; 70 else 71 pte |= GEN6_PTE_UNCACHED; 72 break; 73 default: 74 BUG(); 75 } 76 77 78 return pte; 79 } 80 81 /* PPGTT support for Sandybdrige/Gen6 and later */ 82 static void i915_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt, 83 unsigned first_entry, 84 unsigned num_entries) 85 { 86 gtt_pte_t *pt_vaddr; 87 gtt_pte_t scratch_pte; 88 struct sf_buf *sf; 89 unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES; 90 unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; 91 unsigned last_pte, i; 92 93 scratch_pte = GEN6_GTT_ADDR_ENCODE(ppgtt->scratch_page_dma_addr); 94 scratch_pte |= GEN6_PTE_VALID | GEN6_PTE_CACHE_LLC; 95 96 while (num_entries) { 97 last_pte = first_pte + num_entries; 98 if (last_pte > I915_PPGTT_PT_ENTRIES) 99 last_pte = I915_PPGTT_PT_ENTRIES; 100 101 sf = sf_buf_alloc(ppgtt->pt_pages[act_pd]); 102 pt_vaddr = (uint32_t *)(uintptr_t)sf_buf_kva(sf); 103 104 for (i = first_pte; i < last_pte; i++) 105 pt_vaddr[i] = scratch_pte; 106 107 sf_buf_free(sf); 108 109 num_entries -= last_pte - first_pte; 110 first_pte = 0; 111 act_pd++; 112 } 113 } 114 115 int 116 i915_gem_init_aliasing_ppgtt(struct drm_device *dev) 117 { 118 struct drm_i915_private *dev_priv; 119 struct i915_hw_ppgtt *ppgtt; 120 u_int first_pd_entry_in_global_pt, i; 121 122 dev_priv = dev->dev_private; 123 124 /* 125 * ppgtt PDEs reside in the global gtt pagetable, which has 512*1024 126 * entries. For aliasing ppgtt support we just steal them at the end for 127 * now. 128 */ 129 first_pd_entry_in_global_pt = 512 * 1024 - I915_PPGTT_PD_ENTRIES; 130 131 ppgtt = kmalloc(sizeof(*ppgtt), DRM_I915_GEM, M_WAITOK | M_ZERO); 132 133 ppgtt->num_pd_entries = I915_PPGTT_PD_ENTRIES; 134 ppgtt->pt_pages = kmalloc(sizeof(vm_page_t) * ppgtt->num_pd_entries, 135 DRM_I915_GEM, M_WAITOK | M_ZERO); 136 137 for (i = 0; i < ppgtt->num_pd_entries; i++) { 138 ppgtt->pt_pages[i] = vm_page_alloc(NULL, 0, 139 VM_ALLOC_NORMAL | VM_ALLOC_ZERO); 140 if (ppgtt->pt_pages[i] == NULL) { 141 dev_priv->mm.aliasing_ppgtt = ppgtt; 142 i915_gem_cleanup_aliasing_ppgtt(dev); 143 return (-ENOMEM); 144 } 145 } 146 147 ppgtt->scratch_page_dma_addr = dev_priv->mm.gtt->scratch_page_dma; 148 149 i915_ppgtt_clear_range(ppgtt, 0, ppgtt->num_pd_entries * 150 I915_PPGTT_PT_ENTRIES); 151 ppgtt->pd_offset = (first_pd_entry_in_global_pt) * sizeof(uint32_t); 152 dev_priv->mm.aliasing_ppgtt = ppgtt; 153 return (0); 154 } 155 156 static void 157 i915_ppgtt_insert_pages(struct i915_hw_ppgtt *ppgtt, unsigned first_entry, 158 unsigned num_entries, vm_page_t *pages, uint32_t pte_flags) 159 { 160 uint32_t *pt_vaddr, pte; 161 struct sf_buf *sf; 162 unsigned act_pd, first_pte; 163 unsigned last_pte, i; 164 vm_paddr_t page_addr; 165 166 act_pd = first_entry / I915_PPGTT_PT_ENTRIES; 167 first_pte = first_entry % I915_PPGTT_PT_ENTRIES; 168 169 while (num_entries) { 170 last_pte = first_pte + num_entries; 171 if (last_pte > I915_PPGTT_PT_ENTRIES) 172 last_pte = I915_PPGTT_PT_ENTRIES; 173 174 sf = sf_buf_alloc(ppgtt->pt_pages[act_pd]); 175 pt_vaddr = (uint32_t *)(uintptr_t)sf_buf_kva(sf); 176 177 for (i = first_pte; i < last_pte; i++) { 178 page_addr = VM_PAGE_TO_PHYS(*pages); 179 pte = GEN6_PTE_ADDR_ENCODE(page_addr); 180 pt_vaddr[i] = pte | pte_flags; 181 182 pages++; 183 } 184 185 sf_buf_free(sf); 186 187 num_entries -= last_pte - first_pte; 188 first_pte = 0; 189 act_pd++; 190 } 191 } 192 193 void 194 i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt, 195 struct drm_i915_gem_object *obj, enum i915_cache_level cache_level) 196 { 197 struct drm_device *dev; 198 struct drm_i915_private *dev_priv; 199 uint32_t pte_flags; 200 201 dev = obj->base.dev; 202 dev_priv = dev->dev_private; 203 pte_flags = GEN6_PTE_VALID; 204 205 switch (cache_level) { 206 case I915_CACHE_LLC_MLC: 207 pte_flags |= GEN6_PTE_CACHE_LLC_MLC; 208 break; 209 case I915_CACHE_LLC: 210 pte_flags |= GEN6_PTE_CACHE_LLC; 211 break; 212 case I915_CACHE_NONE: 213 pte_flags |= GEN6_PTE_UNCACHED; 214 break; 215 default: 216 panic("cache mode"); 217 } 218 219 i915_ppgtt_insert_pages(ppgtt, obj->gtt_space->start >> PAGE_SHIFT, 220 obj->base.size >> PAGE_SHIFT, obj->pages, pte_flags); 221 } 222 223 void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt, 224 struct drm_i915_gem_object *obj) 225 { 226 i915_ppgtt_clear_range(ppgtt, obj->gtt_space->start >> PAGE_SHIFT, 227 obj->base.size >> PAGE_SHIFT); 228 } 229 230 void 231 i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev) 232 { 233 struct drm_i915_private *dev_priv; 234 struct i915_hw_ppgtt *ppgtt; 235 vm_page_t m; 236 int i; 237 238 dev_priv = dev->dev_private; 239 ppgtt = dev_priv->mm.aliasing_ppgtt; 240 if (ppgtt == NULL) 241 return; 242 dev_priv->mm.aliasing_ppgtt = NULL; 243 244 for (i = 0; i < ppgtt->num_pd_entries; i++) { 245 m = ppgtt->pt_pages[i]; 246 if (m != NULL) { 247 vm_page_busy_wait(m, FALSE, "i915gem"); 248 vm_page_unwire(m, 0); 249 vm_page_free(m); 250 } 251 } 252 drm_free(ppgtt->pt_pages, DRM_I915_GEM); 253 drm_free(ppgtt, DRM_I915_GEM); 254 } 255 256 257 static unsigned int 258 cache_level_to_agp_type(struct drm_device *dev, enum i915_cache_level 259 cache_level) 260 { 261 262 switch (cache_level) { 263 case I915_CACHE_LLC_MLC: 264 if (INTEL_INFO(dev)->gen >= 6) 265 return (AGP_USER_CACHED_MEMORY_LLC_MLC); 266 /* 267 * Older chipsets do not have this extra level of CPU 268 * cacheing, so fallthrough and request the PTE simply 269 * as cached. 270 */ 271 case I915_CACHE_LLC: 272 return (AGP_USER_CACHED_MEMORY); 273 274 default: 275 case I915_CACHE_NONE: 276 return (AGP_USER_MEMORY); 277 } 278 } 279 280 static bool 281 do_idling(struct drm_i915_private *dev_priv) 282 { 283 bool ret = dev_priv->mm.interruptible; 284 285 if (unlikely(dev_priv->mm.gtt->do_idle_maps)) { 286 dev_priv->mm.interruptible = false; 287 if (i915_gpu_idle(dev_priv->dev, false)) { 288 DRM_ERROR("Couldn't idle GPU\n"); 289 /* Wait a bit, in hopes it avoids the hang */ 290 DELAY(10); 291 } 292 } 293 294 return ret; 295 } 296 297 static void 298 undo_idling(struct drm_i915_private *dev_priv, bool interruptible) 299 { 300 301 if (unlikely(dev_priv->mm.gtt->do_idle_maps)) 302 dev_priv->mm.interruptible = interruptible; 303 } 304 305 void 306 i915_gem_restore_gtt_mappings(struct drm_device *dev) 307 { 308 struct drm_i915_private *dev_priv; 309 struct drm_i915_gem_object *obj; 310 311 dev_priv = dev->dev_private; 312 313 /* First fill our portion of the GTT with scratch pages */ 314 intel_gtt_clear_range(dev_priv->mm.gtt_start / PAGE_SIZE, 315 (dev_priv->mm.gtt_end - dev_priv->mm.gtt_start) / PAGE_SIZE); 316 317 list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) { 318 i915_gem_clflush_object(obj); 319 i915_gem_gtt_rebind_object(obj, obj->cache_level); 320 } 321 322 intel_gtt_chipset_flush(); 323 } 324 325 int 326 i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj) 327 { 328 unsigned int agp_type; 329 330 agp_type = cache_level_to_agp_type(obj->base.dev, obj->cache_level); 331 intel_gtt_insert_pages(obj->gtt_space->start >> PAGE_SHIFT, 332 obj->base.size >> PAGE_SHIFT, obj->pages, agp_type); 333 return (0); 334 } 335 336 void 337 i915_gem_gtt_rebind_object(struct drm_i915_gem_object *obj, 338 enum i915_cache_level cache_level) 339 { 340 struct drm_device *dev; 341 struct drm_i915_private *dev_priv; 342 unsigned int agp_type; 343 344 dev = obj->base.dev; 345 dev_priv = dev->dev_private; 346 agp_type = cache_level_to_agp_type(dev, cache_level); 347 348 intel_gtt_insert_pages(obj->gtt_space->start >> PAGE_SHIFT, 349 obj->base.size >> PAGE_SHIFT, obj->pages, agp_type); 350 } 351 352 void 353 i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj) 354 { 355 struct drm_device *dev = obj->base.dev; 356 struct drm_i915_private *dev_priv = dev->dev_private; 357 bool interruptible; 358 359 dev = obj->base.dev; 360 dev_priv = dev->dev_private; 361 362 interruptible = do_idling(dev_priv); 363 364 intel_gtt_clear_range(obj->gtt_space->start >> PAGE_SHIFT, 365 obj->base.size >> PAGE_SHIFT); 366 367 undo_idling(dev_priv, interruptible); 368 } 369 370 #define GFX_MODE_ENABLE(bit) (((bit) << 16) | (bit)) 371 372 void i915_gem_init_ppgtt(struct drm_device *dev) 373 { 374 drm_i915_private_t *dev_priv = dev->dev_private; 375 uint32_t pd_offset; 376 struct intel_ring_buffer *ring; 377 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 378 uint32_t pd_entry; 379 vm_paddr_t pt_addr; 380 u_int first_pd_entry_in_global_pt, i; 381 382 if (ppgtt == NULL) 383 return; 384 385 first_pd_entry_in_global_pt = 512 * 1024 - I915_PPGTT_PD_ENTRIES; 386 for (i = 0; i < ppgtt->num_pd_entries; i++) { 387 pt_addr = VM_PAGE_TO_PHYS(ppgtt->pt_pages[i]); 388 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr); 389 pd_entry |= GEN6_PDE_VALID; 390 intel_gtt_write(first_pd_entry_in_global_pt + i, pd_entry); 391 } 392 intel_gtt_read_pte(first_pd_entry_in_global_pt); 393 394 pd_offset = ppgtt->pd_offset; 395 pd_offset /= 64; /* in cachelines, */ 396 pd_offset <<= 16; 397 398 if (INTEL_INFO(dev)->gen == 6) { 399 uint32_t ecochk = I915_READ(GAM_ECOCHK); 400 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | 401 ECOCHK_PPGTT_CACHE64B); 402 I915_WRITE(GFX_MODE, GFX_MODE_ENABLE(GFX_PPGTT_ENABLE)); 403 } else if (INTEL_INFO(dev)->gen >= 7) { 404 I915_WRITE(GAM_ECOCHK, ECOCHK_PPGTT_CACHE64B); 405 /* GFX_MODE is per-ring on gen7+ */ 406 } 407 408 for (i = 0; i < I915_NUM_RINGS; i++) { 409 ring = &dev_priv->ring[i]; 410 411 if (INTEL_INFO(dev)->gen >= 7) 412 I915_WRITE(RING_MODE_GEN7(ring), 413 GFX_MODE_ENABLE(GFX_PPGTT_ENABLE)); 414 415 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 416 I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset); 417 } 418 } 419