1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include <drm/drmP.h> 25 #include <drm/i915_drm.h> 26 #include "i915_drv.h" 27 28 /** 29 * DOC: fence register handling 30 * 31 * Important to avoid confusions: "fences" in the i915 driver are not execution 32 * fences used to track command completion but hardware detiler objects which 33 * wrap a given range of the global GTT. Each platform has only a fairly limited 34 * set of these objects. 35 * 36 * Fences are used to detile GTT memory mappings. They're also connected to the 37 * hardware frontbuffer render tracking and hence interact with frontbuffer 38 * compression. Furthermore on older platforms fences are required for tiled 39 * objects used by the display engine. They can also be used by the render 40 * engine - they're required for blitter commands and are optional for render 41 * commands. But on gen4+ both display (with the exception of fbc) and rendering 42 * have their own tiling state bits and don't need fences. 43 * 44 * Also note that fences only support X and Y tiling and hence can't be used for 45 * the fancier new tiling formats like W, Ys and Yf. 46 * 47 * Finally note that because fences are such a restricted resource they're 48 * dynamically associated with objects. Furthermore fence state is committed to 49 * the hardware lazily to avoid unnecessary stalls on gen2/3. Therefore code must 50 * explicitly call i915_gem_object_get_fence() to synchronize fencing status 51 * for cpu access. Also note that some code wants an unfenced view, for those 52 * cases the fence can be removed forcefully with i915_gem_object_put_fence(). 53 * 54 * Internally these functions will synchronize with userspace access by removing 55 * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed. 56 */ 57 58 #define pipelined 0 59 60 static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, 61 struct i915_vma *vma) 62 { 63 i915_reg_t fence_reg_lo, fence_reg_hi; 64 int fence_pitch_shift; 65 u64 val; 66 67 if (INTEL_INFO(fence->i915)->gen >= 6) { 68 fence_reg_lo = FENCE_REG_GEN6_LO(fence->id); 69 fence_reg_hi = FENCE_REG_GEN6_HI(fence->id); 70 fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT; 71 72 } else { 73 fence_reg_lo = FENCE_REG_965_LO(fence->id); 74 fence_reg_hi = FENCE_REG_965_HI(fence->id); 75 fence_pitch_shift = I965_FENCE_PITCH_SHIFT; 76 } 77 78 val = 0; 79 if (vma) { 80 unsigned int stride = i915_gem_object_get_stride(vma->obj); 81 82 GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); 83 GEM_BUG_ON(!IS_ALIGNED(vma->node.start, I965_FENCE_PAGE)); 84 GEM_BUG_ON(!IS_ALIGNED(vma->fence_size, I965_FENCE_PAGE)); 85 GEM_BUG_ON(!IS_ALIGNED(stride, 128)); 86 87 val = (vma->node.start + vma->fence_size - I965_FENCE_PAGE) << 32; 88 val |= vma->node.start; 89 val |= (u64)((stride / 128) - 1) << fence_pitch_shift; 90 if (i915_gem_object_get_tiling(vma->obj) == I915_TILING_Y) 91 val |= BIT(I965_FENCE_TILING_Y_SHIFT); 92 val |= I965_FENCE_REG_VALID; 93 } 94 95 if (!pipelined) { 96 struct drm_i915_private *dev_priv = fence->i915; 97 98 /* To w/a incoherency with non-atomic 64-bit register updates, 99 * we split the 64-bit update into two 32-bit writes. In order 100 * for a partial fence not to be evaluated between writes, we 101 * precede the update with write to turn off the fence register, 102 * and only enable the fence as the last step. 103 * 104 * For extra levels of paranoia, we make sure each step lands 105 * before applying the next step. 106 */ 107 I915_WRITE(fence_reg_lo, 0); 108 POSTING_READ(fence_reg_lo); 109 110 I915_WRITE(fence_reg_hi, upper_32_bits(val)); 111 I915_WRITE(fence_reg_lo, lower_32_bits(val)); 112 POSTING_READ(fence_reg_lo); 113 } 114 } 115 116 static void i915_write_fence_reg(struct drm_i915_fence_reg *fence, 117 struct i915_vma *vma) 118 { 119 u32 val; 120 121 val = 0; 122 if (vma) { 123 unsigned int tiling = i915_gem_object_get_tiling(vma->obj); 124 bool is_y_tiled = tiling == I915_TILING_Y; 125 unsigned int stride = i915_gem_object_get_stride(vma->obj); 126 127 GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); 128 GEM_BUG_ON(vma->node.start & ~I915_FENCE_START_MASK); 129 GEM_BUG_ON(!is_power_of_2(vma->fence_size)); 130 GEM_BUG_ON(!IS_ALIGNED(vma->node.start, vma->fence_size)); 131 132 if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915)) 133 stride /= 128; 134 else 135 stride /= 512; 136 GEM_BUG_ON(!is_power_of_2(stride)); 137 138 val = vma->node.start; 139 if (is_y_tiled) 140 val |= BIT(I830_FENCE_TILING_Y_SHIFT); 141 val |= I915_FENCE_SIZE_BITS(vma->fence_size); 142 val |= ilog2(stride) << I830_FENCE_PITCH_SHIFT; 143 144 val |= I830_FENCE_REG_VALID; 145 } 146 147 if (!pipelined) { 148 struct drm_i915_private *dev_priv = fence->i915; 149 i915_reg_t reg = FENCE_REG(fence->id); 150 151 I915_WRITE(reg, val); 152 POSTING_READ(reg); 153 } 154 } 155 156 static void i830_write_fence_reg(struct drm_i915_fence_reg *fence, 157 struct i915_vma *vma) 158 { 159 u32 val; 160 161 val = 0; 162 if (vma) { 163 unsigned int stride = i915_gem_object_get_stride(vma->obj); 164 165 GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); 166 GEM_BUG_ON(vma->node.start & ~I830_FENCE_START_MASK); 167 GEM_BUG_ON(!is_power_of_2(vma->fence_size)); 168 GEM_BUG_ON(!is_power_of_2(stride / 128)); 169 GEM_BUG_ON(!IS_ALIGNED(vma->node.start, vma->fence_size)); 170 171 val = vma->node.start; 172 if (i915_gem_object_get_tiling(vma->obj) == I915_TILING_Y) 173 val |= BIT(I830_FENCE_TILING_Y_SHIFT); 174 val |= I830_FENCE_SIZE_BITS(vma->fence_size); 175 val |= ilog2(stride / 128) << I830_FENCE_PITCH_SHIFT; 176 val |= I830_FENCE_REG_VALID; 177 } 178 179 if (!pipelined) { 180 struct drm_i915_private *dev_priv = fence->i915; 181 i915_reg_t reg = FENCE_REG(fence->id); 182 183 I915_WRITE(reg, val); 184 POSTING_READ(reg); 185 } 186 } 187 188 static void fence_write(struct drm_i915_fence_reg *fence, 189 struct i915_vma *vma) 190 { 191 /* Previous access through the fence register is marshalled by 192 * the mb() inside the fault handlers (i915_gem_release_mmaps) 193 * and explicitly managed for internal users. 194 */ 195 196 if (IS_GEN2(fence->i915)) 197 i830_write_fence_reg(fence, vma); 198 else if (IS_GEN3(fence->i915)) 199 i915_write_fence_reg(fence, vma); 200 else 201 i965_write_fence_reg(fence, vma); 202 203 /* Access through the fenced region afterwards is 204 * ordered by the posting reads whilst writing the registers. 205 */ 206 207 fence->dirty = false; 208 } 209 210 static int fence_update(struct drm_i915_fence_reg *fence, 211 struct i915_vma *vma) 212 { 213 int ret; 214 215 if (vma) { 216 if (!i915_vma_is_map_and_fenceable(vma)) 217 return -EINVAL; 218 219 if (WARN(!i915_gem_object_get_stride(vma->obj) || 220 !i915_gem_object_get_tiling(vma->obj), 221 "bogus fence setup with stride: 0x%x, tiling mode: %i\n", 222 i915_gem_object_get_stride(vma->obj), 223 i915_gem_object_get_tiling(vma->obj))) 224 return -EINVAL; 225 226 ret = i915_gem_active_retire(&vma->last_fence, 227 &vma->obj->base.dev->struct_mutex); 228 if (ret) 229 return ret; 230 } 231 232 if (fence->vma) { 233 ret = i915_gem_active_retire(&fence->vma->last_fence, 234 &fence->vma->obj->base.dev->struct_mutex); 235 if (ret) 236 return ret; 237 } 238 239 if (fence->vma && fence->vma != vma) { 240 /* Ensure that all userspace CPU access is completed before 241 * stealing the fence. 242 */ 243 GEM_BUG_ON(fence->vma->fence != fence); 244 i915_vma_revoke_mmap(fence->vma); 245 246 fence->vma->fence = NULL; 247 fence->vma = NULL; 248 249 list_move(&fence->link, &fence->i915->mm.fence_list); 250 } 251 252 /* We only need to update the register itself if the device is awake. 253 * If the device is currently powered down, we will defer the write 254 * to the runtime resume, see i915_gem_restore_fences(). 255 */ 256 if (intel_runtime_pm_get_if_in_use(fence->i915)) { 257 fence_write(fence, vma); 258 intel_runtime_pm_put(fence->i915); 259 } 260 261 if (vma) { 262 if (fence->vma != vma) { 263 vma->fence = fence; 264 fence->vma = vma; 265 } 266 267 list_move_tail(&fence->link, &fence->i915->mm.fence_list); 268 } 269 270 return 0; 271 } 272 273 /** 274 * i915_vma_put_fence - force-remove fence for a VMA 275 * @vma: vma to map linearly (not through a fence reg) 276 * 277 * This function force-removes any fence from the given object, which is useful 278 * if the kernel wants to do untiled GTT access. 279 * 280 * Returns: 281 * 282 * 0 on success, negative error code on failure. 283 */ 284 int i915_vma_put_fence(struct i915_vma *vma) 285 { 286 struct drm_i915_fence_reg *fence = vma->fence; 287 288 if (!fence) 289 return 0; 290 291 if (fence->pin_count) 292 return -EBUSY; 293 294 return fence_update(fence, NULL); 295 } 296 297 static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv) 298 { 299 struct drm_i915_fence_reg *fence; 300 301 list_for_each_entry(fence, &dev_priv->mm.fence_list, link) { 302 GEM_BUG_ON(fence->vma && fence->vma->fence != fence); 303 304 if (fence->pin_count) 305 continue; 306 307 return fence; 308 } 309 310 /* Wait for completion of pending flips which consume fences */ 311 if (intel_has_pending_fb_unpin(dev_priv)) 312 return ERR_PTR(-EAGAIN); 313 314 return ERR_PTR(-EDEADLK); 315 } 316 317 /** 318 * i915_vma_pin_fence - set up fencing for a vma 319 * @vma: vma to map through a fence reg 320 * 321 * When mapping objects through the GTT, userspace wants to be able to write 322 * to them without having to worry about swizzling if the object is tiled. 323 * This function walks the fence regs looking for a free one for @obj, 324 * stealing one if it can't find any. 325 * 326 * It then sets up the reg based on the object's properties: address, pitch 327 * and tiling format. 328 * 329 * For an untiled surface, this removes any existing fence. 330 * 331 * Returns: 332 * 333 * 0 on success, negative error code on failure. 334 */ 335 int 336 i915_vma_pin_fence(struct i915_vma *vma) 337 { 338 struct drm_i915_fence_reg *fence; 339 struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL; 340 int err; 341 342 /* Note that we revoke fences on runtime suspend. Therefore the user 343 * must keep the device awake whilst using the fence. 344 */ 345 assert_rpm_wakelock_held(vma->vm->i915); 346 347 /* Just update our place in the LRU if our fence is getting reused. */ 348 if (vma->fence) { 349 fence = vma->fence; 350 GEM_BUG_ON(fence->vma != vma); 351 fence->pin_count++; 352 if (!fence->dirty) { 353 list_move_tail(&fence->link, 354 &fence->i915->mm.fence_list); 355 return 0; 356 } 357 } else if (set) { 358 fence = fence_find(vma->vm->i915); 359 if (IS_ERR(fence)) 360 return PTR_ERR(fence); 361 362 GEM_BUG_ON(fence->pin_count); 363 fence->pin_count++; 364 } else 365 return 0; 366 367 err = fence_update(fence, set); 368 if (err) 369 goto out_unpin; 370 371 GEM_BUG_ON(fence->vma != set); 372 GEM_BUG_ON(vma->fence != (set ? fence : NULL)); 373 374 if (set) 375 return 0; 376 377 out_unpin: 378 fence->pin_count--; 379 return err; 380 } 381 382 /** 383 * i915_reserve_fence - Reserve a fence for vGPU 384 * @dev_priv: i915 device private 385 * 386 * This function walks the fence regs looking for a free one and remove 387 * it from the fence_list. It is used to reserve fence for vGPU to use. 388 */ 389 struct drm_i915_fence_reg * 390 i915_reserve_fence(struct drm_i915_private *dev_priv) 391 { 392 struct drm_i915_fence_reg *fence; 393 int count; 394 int ret; 395 396 lockdep_assert_held(&dev_priv->drm.struct_mutex); 397 398 /* Keep at least one fence available for the display engine. */ 399 count = 0; 400 list_for_each_entry(fence, &dev_priv->mm.fence_list, link) 401 count += !fence->pin_count; 402 if (count <= 1) 403 return ERR_PTR(-ENOSPC); 404 405 fence = fence_find(dev_priv); 406 if (IS_ERR(fence)) 407 return fence; 408 409 if (fence->vma) { 410 /* Force-remove fence from VMA */ 411 ret = fence_update(fence, NULL); 412 if (ret) 413 return ERR_PTR(ret); 414 } 415 416 list_del(&fence->link); 417 return fence; 418 } 419 420 /** 421 * i915_unreserve_fence - Reclaim a reserved fence 422 * @fence: the fence reg 423 * 424 * This function add a reserved fence register from vGPU to the fence_list. 425 */ 426 void i915_unreserve_fence(struct drm_i915_fence_reg *fence) 427 { 428 lockdep_assert_held(&fence->i915->drm.struct_mutex); 429 430 list_add(&fence->link, &fence->i915->mm.fence_list); 431 } 432 433 /** 434 * i915_gem_revoke_fences - revoke fence state 435 * @dev_priv: i915 device private 436 * 437 * Removes all GTT mmappings via the fence registers. This forces any user 438 * of the fence to reacquire that fence before continuing with their access. 439 * One use is during GPU reset where the fence register is lost and we need to 440 * revoke concurrent userspace access via GTT mmaps until the hardware has been 441 * reset and the fence registers have been restored. 442 */ 443 void i915_gem_revoke_fences(struct drm_i915_private *dev_priv) 444 { 445 int i; 446 447 lockdep_assert_held(&dev_priv->drm.struct_mutex); 448 449 for (i = 0; i < dev_priv->num_fence_regs; i++) { 450 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i]; 451 452 GEM_BUG_ON(fence->vma && fence->vma->fence != fence); 453 454 if (fence->vma) 455 i915_vma_revoke_mmap(fence->vma); 456 } 457 } 458 459 /** 460 * i915_gem_restore_fences - restore fence state 461 * @dev_priv: i915 device private 462 * 463 * Restore the hw fence state to match the software tracking again, to be called 464 * after a gpu reset and on resume. Note that on runtime suspend we only cancel 465 * the fences, to be reacquired by the user later. 466 */ 467 void i915_gem_restore_fences(struct drm_i915_private *dev_priv) 468 { 469 int i; 470 471 for (i = 0; i < dev_priv->num_fence_regs; i++) { 472 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 473 struct i915_vma *vma = reg->vma; 474 475 GEM_BUG_ON(vma && vma->fence != reg); 476 477 /* 478 * Commit delayed tiling changes if we have an object still 479 * attached to the fence, otherwise just clear the fence. 480 */ 481 if (vma && !i915_gem_object_is_tiled(vma->obj)) { 482 GEM_BUG_ON(!reg->dirty); 483 GEM_BUG_ON(i915_vma_has_userfault(vma)); 484 485 list_move(®->link, &dev_priv->mm.fence_list); 486 vma->fence = NULL; 487 vma = NULL; 488 } 489 490 fence_write(reg, vma); 491 reg->vma = vma; 492 } 493 } 494 495 /** 496 * DOC: tiling swizzling details 497 * 498 * The idea behind tiling is to increase cache hit rates by rearranging 499 * pixel data so that a group of pixel accesses are in the same cacheline. 500 * Performance improvement from doing this on the back/depth buffer are on 501 * the order of 30%. 502 * 503 * Intel architectures make this somewhat more complicated, though, by 504 * adjustments made to addressing of data when the memory is in interleaved 505 * mode (matched pairs of DIMMS) to improve memory bandwidth. 506 * For interleaved memory, the CPU sends every sequential 64 bytes 507 * to an alternate memory channel so it can get the bandwidth from both. 508 * 509 * The GPU also rearranges its accesses for increased bandwidth to interleaved 510 * memory, and it matches what the CPU does for non-tiled. However, when tiled 511 * it does it a little differently, since one walks addresses not just in the 512 * X direction but also Y. So, along with alternating channels when bit 513 * 6 of the address flips, it also alternates when other bits flip -- Bits 9 514 * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines) 515 * are common to both the 915 and 965-class hardware. 516 * 517 * The CPU also sometimes XORs in higher bits as well, to improve 518 * bandwidth doing strided access like we do so frequently in graphics. This 519 * is called "Channel XOR Randomization" in the MCH documentation. The result 520 * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address 521 * decode. 522 * 523 * All of this bit 6 XORing has an effect on our memory management, 524 * as we need to make sure that the 3d driver can correctly address object 525 * contents. 526 * 527 * If we don't have interleaved memory, all tiling is safe and no swizzling is 528 * required. 529 * 530 * When bit 17 is XORed in, we simply refuse to tile at all. Bit 531 * 17 is not just a page offset, so as we page an object out and back in, 532 * individual pages in it will have different bit 17 addresses, resulting in 533 * each 64 bytes being swapped with its neighbor! 534 * 535 * Otherwise, if interleaved, we have to tell the 3d driver what the address 536 * swizzling it needs to do is, since it's writing with the CPU to the pages 537 * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the 538 * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling 539 * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order 540 * to match what the GPU expects. 541 */ 542 543 /** 544 * i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern 545 * @dev_priv: i915 device private 546 * 547 * Detects bit 6 swizzling of address lookup between IGD access and CPU 548 * access through main memory. 549 */ 550 void 551 i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv) 552 { 553 uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; 554 uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; 555 556 if (INTEL_GEN(dev_priv) >= 8 || IS_VALLEYVIEW(dev_priv)) { 557 /* 558 * On BDW+, swizzling is not used. We leave the CPU memory 559 * controller in charge of optimizing memory accesses without 560 * the extra address manipulation GPU side. 561 * 562 * VLV and CHV don't have GPU swizzling. 563 */ 564 swizzle_x = I915_BIT_6_SWIZZLE_NONE; 565 swizzle_y = I915_BIT_6_SWIZZLE_NONE; 566 } else if (INTEL_GEN(dev_priv) >= 6) { 567 if (dev_priv->preserve_bios_swizzle) { 568 if (I915_READ(DISP_ARB_CTL) & 569 DISP_TILE_SURFACE_SWIZZLING) { 570 swizzle_x = I915_BIT_6_SWIZZLE_9_10; 571 swizzle_y = I915_BIT_6_SWIZZLE_9; 572 } else { 573 swizzle_x = I915_BIT_6_SWIZZLE_NONE; 574 swizzle_y = I915_BIT_6_SWIZZLE_NONE; 575 } 576 } else { 577 uint32_t dimm_c0, dimm_c1; 578 dimm_c0 = I915_READ(MAD_DIMM_C0); 579 dimm_c1 = I915_READ(MAD_DIMM_C1); 580 dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; 581 dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; 582 /* Enable swizzling when the channels are populated 583 * with identically sized dimms. We don't need to check 584 * the 3rd channel because no cpu with gpu attached 585 * ships in that configuration. Also, swizzling only 586 * makes sense for 2 channels anyway. */ 587 if (dimm_c0 == dimm_c1) { 588 swizzle_x = I915_BIT_6_SWIZZLE_9_10; 589 swizzle_y = I915_BIT_6_SWIZZLE_9; 590 } else { 591 swizzle_x = I915_BIT_6_SWIZZLE_NONE; 592 swizzle_y = I915_BIT_6_SWIZZLE_NONE; 593 } 594 } 595 } else if (IS_GEN5(dev_priv)) { 596 /* On Ironlake whatever DRAM config, GPU always do 597 * same swizzling setup. 598 */ 599 swizzle_x = I915_BIT_6_SWIZZLE_9_10; 600 swizzle_y = I915_BIT_6_SWIZZLE_9; 601 } else if (IS_GEN2(dev_priv)) { 602 /* As far as we know, the 865 doesn't have these bit 6 603 * swizzling issues. 604 */ 605 swizzle_x = I915_BIT_6_SWIZZLE_NONE; 606 swizzle_y = I915_BIT_6_SWIZZLE_NONE; 607 } else if (IS_MOBILE(dev_priv) || 608 IS_I915G(dev_priv) || IS_I945G(dev_priv)) { 609 uint32_t dcc; 610 611 /* On 9xx chipsets, channel interleave by the CPU is 612 * determined by DCC. For single-channel, neither the CPU 613 * nor the GPU do swizzling. For dual channel interleaved, 614 * the GPU's interleave is bit 9 and 10 for X tiled, and bit 615 * 9 for Y tiled. The CPU's interleave is independent, and 616 * can be based on either bit 11 (haven't seen this yet) or 617 * bit 17 (common). 618 */ 619 dcc = I915_READ(DCC); 620 switch (dcc & DCC_ADDRESSING_MODE_MASK) { 621 case DCC_ADDRESSING_MODE_SINGLE_CHANNEL: 622 case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC: 623 swizzle_x = I915_BIT_6_SWIZZLE_NONE; 624 swizzle_y = I915_BIT_6_SWIZZLE_NONE; 625 break; 626 case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED: 627 if (dcc & DCC_CHANNEL_XOR_DISABLE) { 628 /* This is the base swizzling by the GPU for 629 * tiled buffers. 630 */ 631 swizzle_x = I915_BIT_6_SWIZZLE_9_10; 632 swizzle_y = I915_BIT_6_SWIZZLE_9; 633 } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) { 634 /* Bit 11 swizzling by the CPU in addition. */ 635 swizzle_x = I915_BIT_6_SWIZZLE_9_10_11; 636 swizzle_y = I915_BIT_6_SWIZZLE_9_11; 637 } else { 638 /* Bit 17 swizzling by the CPU in addition. */ 639 swizzle_x = I915_BIT_6_SWIZZLE_9_10_17; 640 swizzle_y = I915_BIT_6_SWIZZLE_9_17; 641 } 642 break; 643 } 644 645 /* check for L-shaped memory aka modified enhanced addressing */ 646 if (IS_GEN4(dev_priv) && 647 !(I915_READ(DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) { 648 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; 649 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; 650 } 651 652 if (dcc == 0xffffffff) { 653 DRM_ERROR("Couldn't read from MCHBAR. " 654 "Disabling tiling.\n"); 655 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; 656 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; 657 } 658 } else { 659 /* The 965, G33, and newer, have a very flexible memory 660 * configuration. It will enable dual-channel mode 661 * (interleaving) on as much memory as it can, and the GPU 662 * will additionally sometimes enable different bit 6 663 * swizzling for tiled objects from the CPU. 664 * 665 * Here's what I found on the G965: 666 * slot fill memory size swizzling 667 * 0A 0B 1A 1B 1-ch 2-ch 668 * 512 0 0 0 512 0 O 669 * 512 0 512 0 16 1008 X 670 * 512 0 0 512 16 1008 X 671 * 0 512 0 512 16 1008 X 672 * 1024 1024 1024 0 2048 1024 O 673 * 674 * We could probably detect this based on either the DRB 675 * matching, which was the case for the swizzling required in 676 * the table above, or from the 1-ch value being less than 677 * the minimum size of a rank. 678 * 679 * Reports indicate that the swizzling actually 680 * varies depending upon page placement inside the 681 * channels, i.e. we see swizzled pages where the 682 * banks of memory are paired and unswizzled on the 683 * uneven portion, so leave that as unknown. 684 */ 685 if (I915_READ16(C0DRB3) == I915_READ16(C1DRB3)) { 686 swizzle_x = I915_BIT_6_SWIZZLE_9_10; 687 swizzle_y = I915_BIT_6_SWIZZLE_9; 688 } 689 } 690 691 if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN || 692 swizzle_y == I915_BIT_6_SWIZZLE_UNKNOWN) { 693 /* Userspace likes to explode if it sees unknown swizzling, 694 * so lie. We will finish the lie when reporting through 695 * the get-tiling-ioctl by reporting the physical swizzle 696 * mode as unknown instead. 697 * 698 * As we don't strictly know what the swizzling is, it may be 699 * bit17 dependent, and so we need to also prevent the pages 700 * from being moved. 701 */ 702 dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES; 703 swizzle_x = I915_BIT_6_SWIZZLE_NONE; 704 swizzle_y = I915_BIT_6_SWIZZLE_NONE; 705 } 706 707 dev_priv->mm.bit_6_swizzle_x = swizzle_x; 708 dev_priv->mm.bit_6_swizzle_y = swizzle_y; 709 } 710 711 /* 712 * Swap every 64 bytes of this page around, to account for it having a new 713 * bit 17 of its physical address and therefore being interpreted differently 714 * by the GPU. 715 */ 716 static void 717 i915_gem_swizzle_page(struct page *page) 718 { 719 char temp[64]; 720 char *vaddr; 721 int i; 722 723 vaddr = kmap(page); 724 725 for (i = 0; i < PAGE_SIZE; i += 128) { 726 memcpy(temp, &vaddr[i], 64); 727 memcpy(&vaddr[i], &vaddr[i + 64], 64); 728 memcpy(&vaddr[i + 64], temp, 64); 729 } 730 731 kunmap(page); 732 } 733 734 /** 735 * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling 736 * @obj: i915 GEM buffer object 737 * @pages: the scattergather list of physical pages 738 * 739 * This function fixes up the swizzling in case any page frame number for this 740 * object has changed in bit 17 since that state has been saved with 741 * i915_gem_object_save_bit_17_swizzle(). 742 * 743 * This is called when pinning backing storage again, since the kernel is free 744 * to move unpinned backing storage around (either by directly moving pages or 745 * by swapping them out and back in again). 746 */ 747 void 748 i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj, 749 struct sg_table *pages) 750 { 751 struct sgt_iter sgt_iter; 752 struct page *page; 753 int i; 754 755 if (obj->bit_17 == NULL) 756 return; 757 758 i = 0; 759 for_each_sgt_page(page, sgt_iter, pages) { 760 char new_bit_17 = page_to_phys(page) >> 17; 761 if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) { 762 i915_gem_swizzle_page(page); 763 set_page_dirty(page); 764 } 765 i++; 766 } 767 } 768 769 /** 770 * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling 771 * @obj: i915 GEM buffer object 772 * @pages: the scattergather list of physical pages 773 * 774 * This function saves the bit 17 of each page frame number so that swizzling 775 * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must 776 * be called before the backing storage can be unpinned. 777 */ 778 void 779 i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj, 780 struct sg_table *pages) 781 { 782 const unsigned int page_count = obj->base.size >> PAGE_SHIFT; 783 struct sgt_iter sgt_iter; 784 struct page *page; 785 int i; 786 787 if (obj->bit_17 == NULL) { 788 obj->bit_17 = kcalloc(BITS_TO_LONGS(page_count), 789 sizeof(long), GFP_KERNEL); 790 if (obj->bit_17 == NULL) { 791 DRM_ERROR("Failed to allocate memory for bit 17 " 792 "record\n"); 793 return; 794 } 795 } 796 797 i = 0; 798 799 for_each_sgt_page(page, sgt_iter, pages) { 800 if (page_to_phys(page) & (1 << 17)) 801 __set_bit(i, obj->bit_17); 802 else 803 __clear_bit(i, obj->bit_17); 804 i++; 805 } 806 } 807