1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include <drm/drmP.h> 25 #include <drm/i915_drm.h> 26 #include "i915_drv.h" 27 28 /** 29 * DOC: fence register handling 30 * 31 * Important to avoid confusions: "fences" in the i915 driver are not execution 32 * fences used to track command completion but hardware detiler objects which 33 * wrap a given range of the global GTT. Each platform has only a fairly limited 34 * set of these objects. 35 * 36 * Fences are used to detile GTT memory mappings. They're also connected to the 37 * hardware frontbuffer render tracking and hence interact with frontbuffer 38 * compression. Furthermore on older platforms fences are required for tiled 39 * objects used by the display engine. They can also be used by the render 40 * engine - they're required for blitter commands and are optional for render 41 * commands. But on gen4+ both display (with the exception of fbc) and rendering 42 * have their own tiling state bits and don't need fences. 43 * 44 * Also note that fences only support X and Y tiling and hence can't be used for 45 * the fancier new tiling formats like W, Ys and Yf. 46 * 47 * Finally note that because fences are such a restricted resource they're 48 * dynamically associated with objects. Furthermore fence state is committed to 49 * the hardware lazily to avoid unnecessary stalls on gen2/3. Therefore code must 50 * explicitly call i915_gem_object_get_fence() to synchronize fencing status 51 * for cpu access. Also note that some code wants an unfenced view, for those 52 * cases the fence can be removed forcefully with i915_gem_object_put_fence(). 53 * 54 * Internally these functions will synchronize with userspace access by removing 55 * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed. 56 */ 57 58 #define pipelined 0 59 60 static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, 61 struct i915_vma *vma) 62 { 63 i915_reg_t fence_reg_lo, fence_reg_hi; 64 int fence_pitch_shift; 65 u64 val; 66 67 if (INTEL_INFO(fence->i915)->gen >= 6) { 68 fence_reg_lo = FENCE_REG_GEN6_LO(fence->id); 69 fence_reg_hi = FENCE_REG_GEN6_HI(fence->id); 70 fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT; 71 72 } else { 73 fence_reg_lo = FENCE_REG_965_LO(fence->id); 74 fence_reg_hi = FENCE_REG_965_HI(fence->id); 75 fence_pitch_shift = I965_FENCE_PITCH_SHIFT; 76 } 77 78 val = 0; 79 if (vma) { 80 unsigned int stride = i915_gem_object_get_stride(vma->obj); 81 82 GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); 83 GEM_BUG_ON(!IS_ALIGNED(vma->node.start, I965_FENCE_PAGE)); 84 GEM_BUG_ON(!IS_ALIGNED(vma->fence_size, I965_FENCE_PAGE)); 85 GEM_BUG_ON(!IS_ALIGNED(stride, 128)); 86 87 val = (vma->node.start + vma->fence_size - I965_FENCE_PAGE) << 32; 88 val |= vma->node.start; 89 val |= (u64)((stride / 128) - 1) << fence_pitch_shift; 90 if (i915_gem_object_get_tiling(vma->obj) == I915_TILING_Y) 91 val |= BIT(I965_FENCE_TILING_Y_SHIFT); 92 val |= I965_FENCE_REG_VALID; 93 } 94 95 if (!pipelined) { 96 struct drm_i915_private *dev_priv = fence->i915; 97 98 /* To w/a incoherency with non-atomic 64-bit register updates, 99 * we split the 64-bit update into two 32-bit writes. In order 100 * for a partial fence not to be evaluated between writes, we 101 * precede the update with write to turn off the fence register, 102 * and only enable the fence as the last step. 103 * 104 * For extra levels of paranoia, we make sure each step lands 105 * before applying the next step. 106 */ 107 I915_WRITE(fence_reg_lo, 0); 108 POSTING_READ(fence_reg_lo); 109 110 I915_WRITE(fence_reg_hi, upper_32_bits(val)); 111 I915_WRITE(fence_reg_lo, lower_32_bits(val)); 112 POSTING_READ(fence_reg_lo); 113 } 114 } 115 116 static void i915_write_fence_reg(struct drm_i915_fence_reg *fence, 117 struct i915_vma *vma) 118 { 119 u32 val; 120 121 val = 0; 122 if (vma) { 123 unsigned int tiling = i915_gem_object_get_tiling(vma->obj); 124 bool is_y_tiled = tiling == I915_TILING_Y; 125 unsigned int stride = i915_gem_object_get_stride(vma->obj); 126 127 GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); 128 GEM_BUG_ON(vma->node.start & ~I915_FENCE_START_MASK); 129 GEM_BUG_ON(!is_power_of_2(vma->fence_size)); 130 GEM_BUG_ON(!IS_ALIGNED(vma->node.start, vma->fence_size)); 131 132 if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915)) 133 stride /= 128; 134 else 135 stride /= 512; 136 GEM_BUG_ON(!is_power_of_2(stride)); 137 138 val = vma->node.start; 139 if (is_y_tiled) 140 val |= BIT(I830_FENCE_TILING_Y_SHIFT); 141 val |= I915_FENCE_SIZE_BITS(vma->fence_size); 142 val |= ilog2(stride) << I830_FENCE_PITCH_SHIFT; 143 144 val |= I830_FENCE_REG_VALID; 145 } 146 147 if (!pipelined) { 148 struct drm_i915_private *dev_priv = fence->i915; 149 i915_reg_t reg = FENCE_REG(fence->id); 150 151 I915_WRITE(reg, val); 152 POSTING_READ(reg); 153 } 154 } 155 156 static void i830_write_fence_reg(struct drm_i915_fence_reg *fence, 157 struct i915_vma *vma) 158 { 159 u32 val; 160 161 val = 0; 162 if (vma) { 163 unsigned int stride = i915_gem_object_get_stride(vma->obj); 164 165 GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); 166 GEM_BUG_ON(vma->node.start & ~I830_FENCE_START_MASK); 167 GEM_BUG_ON(!is_power_of_2(vma->fence_size)); 168 GEM_BUG_ON(!is_power_of_2(stride / 128)); 169 GEM_BUG_ON(!IS_ALIGNED(vma->node.start, vma->fence_size)); 170 171 val = vma->node.start; 172 if (i915_gem_object_get_tiling(vma->obj) == I915_TILING_Y) 173 val |= BIT(I830_FENCE_TILING_Y_SHIFT); 174 val |= I830_FENCE_SIZE_BITS(vma->fence_size); 175 val |= ilog2(stride / 128) << I830_FENCE_PITCH_SHIFT; 176 val |= I830_FENCE_REG_VALID; 177 } 178 179 if (!pipelined) { 180 struct drm_i915_private *dev_priv = fence->i915; 181 i915_reg_t reg = FENCE_REG(fence->id); 182 183 I915_WRITE(reg, val); 184 POSTING_READ(reg); 185 } 186 } 187 188 static void fence_write(struct drm_i915_fence_reg *fence, 189 struct i915_vma *vma) 190 { 191 /* Previous access through the fence register is marshalled by 192 * the mb() inside the fault handlers (i915_gem_release_mmaps) 193 * and explicitly managed for internal users. 194 */ 195 196 if (IS_GEN2(fence->i915)) 197 i830_write_fence_reg(fence, vma); 198 else if (IS_GEN3(fence->i915)) 199 i915_write_fence_reg(fence, vma); 200 else 201 i965_write_fence_reg(fence, vma); 202 203 /* Access through the fenced region afterwards is 204 * ordered by the posting reads whilst writing the registers. 205 */ 206 207 fence->dirty = false; 208 } 209 210 static int fence_update(struct drm_i915_fence_reg *fence, 211 struct i915_vma *vma) 212 { 213 int ret; 214 215 if (vma) { 216 if (!i915_vma_is_map_and_fenceable(vma)) 217 return -EINVAL; 218 219 if (WARN(!i915_gem_object_get_stride(vma->obj) || 220 !i915_gem_object_get_tiling(vma->obj), 221 "bogus fence setup with stride: 0x%x, tiling mode: %i\n", 222 i915_gem_object_get_stride(vma->obj), 223 i915_gem_object_get_tiling(vma->obj))) 224 return -EINVAL; 225 226 ret = i915_gem_active_retire(&vma->last_fence, 227 &vma->obj->base.dev->struct_mutex); 228 if (ret) 229 return ret; 230 } 231 232 if (fence->vma) { 233 ret = i915_gem_active_retire(&fence->vma->last_fence, 234 &fence->vma->obj->base.dev->struct_mutex); 235 if (ret) 236 return ret; 237 } 238 239 if (fence->vma && fence->vma != vma) { 240 /* Ensure that all userspace CPU access is completed before 241 * stealing the fence. 242 */ 243 i915_gem_release_mmap(fence->vma->obj); 244 245 fence->vma->fence = NULL; 246 fence->vma = NULL; 247 248 list_move(&fence->link, &fence->i915->mm.fence_list); 249 } 250 251 /* We only need to update the register itself if the device is awake. 252 * If the device is currently powered down, we will defer the write 253 * to the runtime resume, see i915_gem_restore_fences(). 254 */ 255 if (intel_runtime_pm_get_if_in_use(fence->i915)) { 256 fence_write(fence, vma); 257 intel_runtime_pm_put(fence->i915); 258 } 259 260 if (vma) { 261 if (fence->vma != vma) { 262 vma->fence = fence; 263 fence->vma = vma; 264 } 265 266 list_move_tail(&fence->link, &fence->i915->mm.fence_list); 267 } 268 269 return 0; 270 } 271 272 /** 273 * i915_vma_put_fence - force-remove fence for a VMA 274 * @vma: vma to map linearly (not through a fence reg) 275 * 276 * This function force-removes any fence from the given object, which is useful 277 * if the kernel wants to do untiled GTT access. 278 * 279 * Returns: 280 * 281 * 0 on success, negative error code on failure. 282 */ 283 int 284 i915_vma_put_fence(struct i915_vma *vma) 285 { 286 struct drm_i915_fence_reg *fence = vma->fence; 287 288 if (!fence) 289 return 0; 290 291 if (fence->pin_count) 292 return -EBUSY; 293 294 return fence_update(fence, NULL); 295 } 296 297 static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv) 298 { 299 struct drm_i915_fence_reg *fence; 300 301 list_for_each_entry(fence, &dev_priv->mm.fence_list, link) { 302 if (fence->pin_count) 303 continue; 304 305 return fence; 306 } 307 308 /* Wait for completion of pending flips which consume fences */ 309 if (intel_has_pending_fb_unpin(dev_priv)) 310 return ERR_PTR(-EAGAIN); 311 312 return ERR_PTR(-EDEADLK); 313 } 314 315 /** 316 * i915_vma_get_fence - set up fencing for a vma 317 * @vma: vma to map through a fence reg 318 * 319 * When mapping objects through the GTT, userspace wants to be able to write 320 * to them without having to worry about swizzling if the object is tiled. 321 * This function walks the fence regs looking for a free one for @obj, 322 * stealing one if it can't find any. 323 * 324 * It then sets up the reg based on the object's properties: address, pitch 325 * and tiling format. 326 * 327 * For an untiled surface, this removes any existing fence. 328 * 329 * Returns: 330 * 331 * 0 on success, negative error code on failure. 332 */ 333 int 334 i915_vma_get_fence(struct i915_vma *vma) 335 { 336 struct drm_i915_fence_reg *fence; 337 struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL; 338 339 /* Note that we revoke fences on runtime suspend. Therefore the user 340 * must keep the device awake whilst using the fence. 341 */ 342 assert_rpm_wakelock_held(vma->vm->i915); 343 344 /* Just update our place in the LRU if our fence is getting reused. */ 345 if (vma->fence) { 346 fence = vma->fence; 347 if (!fence->dirty) { 348 list_move_tail(&fence->link, 349 &fence->i915->mm.fence_list); 350 return 0; 351 } 352 } else if (set) { 353 fence = fence_find(vma->vm->i915); 354 if (IS_ERR(fence)) 355 return PTR_ERR(fence); 356 } else 357 return 0; 358 359 return fence_update(fence, set); 360 } 361 362 /** 363 * i915_gem_revoke_fences - revoke fence state 364 * @dev_priv: i915 device private 365 * 366 * Removes all GTT mmappings via the fence registers. This forces any user 367 * of the fence to reacquire that fence before continuing with their access. 368 * One use is during GPU reset where the fence register is lost and we need to 369 * revoke concurrent userspace access via GTT mmaps until the hardware has been 370 * reset and the fence registers have been restored. 371 */ 372 void i915_gem_revoke_fences(struct drm_i915_private *dev_priv) 373 { 374 int i; 375 376 lockdep_assert_held(&dev_priv->drm.struct_mutex); 377 378 for (i = 0; i < dev_priv->num_fence_regs; i++) { 379 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i]; 380 381 if (fence->vma) 382 i915_gem_release_mmap(fence->vma->obj); 383 } 384 } 385 386 /** 387 * i915_gem_restore_fences - restore fence state 388 * @dev_priv: i915 device private 389 * 390 * Restore the hw fence state to match the software tracking again, to be called 391 * after a gpu reset and on resume. Note that on runtime suspend we only cancel 392 * the fences, to be reacquired by the user later. 393 */ 394 void i915_gem_restore_fences(struct drm_i915_private *dev_priv) 395 { 396 int i; 397 398 for (i = 0; i < dev_priv->num_fence_regs; i++) { 399 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 400 struct i915_vma *vma = reg->vma; 401 402 /* 403 * Commit delayed tiling changes if we have an object still 404 * attached to the fence, otherwise just clear the fence. 405 */ 406 if (vma && !i915_gem_object_is_tiled(vma->obj)) { 407 GEM_BUG_ON(!reg->dirty); 408 GEM_BUG_ON(!list_empty(&vma->obj->userfault_link)); 409 410 list_move(®->link, &dev_priv->mm.fence_list); 411 vma->fence = NULL; 412 vma = NULL; 413 } 414 415 fence_write(reg, vma); 416 reg->vma = vma; 417 } 418 } 419 420 /** 421 * DOC: tiling swizzling details 422 * 423 * The idea behind tiling is to increase cache hit rates by rearranging 424 * pixel data so that a group of pixel accesses are in the same cacheline. 425 * Performance improvement from doing this on the back/depth buffer are on 426 * the order of 30%. 427 * 428 * Intel architectures make this somewhat more complicated, though, by 429 * adjustments made to addressing of data when the memory is in interleaved 430 * mode (matched pairs of DIMMS) to improve memory bandwidth. 431 * For interleaved memory, the CPU sends every sequential 64 bytes 432 * to an alternate memory channel so it can get the bandwidth from both. 433 * 434 * The GPU also rearranges its accesses for increased bandwidth to interleaved 435 * memory, and it matches what the CPU does for non-tiled. However, when tiled 436 * it does it a little differently, since one walks addresses not just in the 437 * X direction but also Y. So, along with alternating channels when bit 438 * 6 of the address flips, it also alternates when other bits flip -- Bits 9 439 * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines) 440 * are common to both the 915 and 965-class hardware. 441 * 442 * The CPU also sometimes XORs in higher bits as well, to improve 443 * bandwidth doing strided access like we do so frequently in graphics. This 444 * is called "Channel XOR Randomization" in the MCH documentation. The result 445 * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address 446 * decode. 447 * 448 * All of this bit 6 XORing has an effect on our memory management, 449 * as we need to make sure that the 3d driver can correctly address object 450 * contents. 451 * 452 * If we don't have interleaved memory, all tiling is safe and no swizzling is 453 * required. 454 * 455 * When bit 17 is XORed in, we simply refuse to tile at all. Bit 456 * 17 is not just a page offset, so as we page an object out and back in, 457 * individual pages in it will have different bit 17 addresses, resulting in 458 * each 64 bytes being swapped with its neighbor! 459 * 460 * Otherwise, if interleaved, we have to tell the 3d driver what the address 461 * swizzling it needs to do is, since it's writing with the CPU to the pages 462 * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the 463 * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling 464 * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order 465 * to match what the GPU expects. 466 */ 467 468 /** 469 * i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern 470 * @dev_priv: i915 device private 471 * 472 * Detects bit 6 swizzling of address lookup between IGD access and CPU 473 * access through main memory. 474 */ 475 void 476 i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv) 477 { 478 uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; 479 uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; 480 481 if (INTEL_GEN(dev_priv) >= 8 || IS_VALLEYVIEW(dev_priv)) { 482 /* 483 * On BDW+, swizzling is not used. We leave the CPU memory 484 * controller in charge of optimizing memory accesses without 485 * the extra address manipulation GPU side. 486 * 487 * VLV and CHV don't have GPU swizzling. 488 */ 489 swizzle_x = I915_BIT_6_SWIZZLE_NONE; 490 swizzle_y = I915_BIT_6_SWIZZLE_NONE; 491 } else if (INTEL_GEN(dev_priv) >= 6) { 492 if (dev_priv->preserve_bios_swizzle) { 493 if (I915_READ(DISP_ARB_CTL) & 494 DISP_TILE_SURFACE_SWIZZLING) { 495 swizzle_x = I915_BIT_6_SWIZZLE_9_10; 496 swizzle_y = I915_BIT_6_SWIZZLE_9; 497 } else { 498 swizzle_x = I915_BIT_6_SWIZZLE_NONE; 499 swizzle_y = I915_BIT_6_SWIZZLE_NONE; 500 } 501 } else { 502 uint32_t dimm_c0, dimm_c1; 503 dimm_c0 = I915_READ(MAD_DIMM_C0); 504 dimm_c1 = I915_READ(MAD_DIMM_C1); 505 dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; 506 dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; 507 /* Enable swizzling when the channels are populated 508 * with identically sized dimms. We don't need to check 509 * the 3rd channel because no cpu with gpu attached 510 * ships in that configuration. Also, swizzling only 511 * makes sense for 2 channels anyway. */ 512 if (dimm_c0 == dimm_c1) { 513 swizzle_x = I915_BIT_6_SWIZZLE_9_10; 514 swizzle_y = I915_BIT_6_SWIZZLE_9; 515 } else { 516 swizzle_x = I915_BIT_6_SWIZZLE_NONE; 517 swizzle_y = I915_BIT_6_SWIZZLE_NONE; 518 } 519 } 520 } else if (IS_GEN5(dev_priv)) { 521 /* On Ironlake whatever DRAM config, GPU always do 522 * same swizzling setup. 523 */ 524 swizzle_x = I915_BIT_6_SWIZZLE_9_10; 525 swizzle_y = I915_BIT_6_SWIZZLE_9; 526 } else if (IS_GEN2(dev_priv)) { 527 /* As far as we know, the 865 doesn't have these bit 6 528 * swizzling issues. 529 */ 530 swizzle_x = I915_BIT_6_SWIZZLE_NONE; 531 swizzle_y = I915_BIT_6_SWIZZLE_NONE; 532 } else if (IS_MOBILE(dev_priv) || 533 IS_I915G(dev_priv) || IS_I945G(dev_priv)) { 534 uint32_t dcc; 535 536 /* On 9xx chipsets, channel interleave by the CPU is 537 * determined by DCC. For single-channel, neither the CPU 538 * nor the GPU do swizzling. For dual channel interleaved, 539 * the GPU's interleave is bit 9 and 10 for X tiled, and bit 540 * 9 for Y tiled. The CPU's interleave is independent, and 541 * can be based on either bit 11 (haven't seen this yet) or 542 * bit 17 (common). 543 */ 544 dcc = I915_READ(DCC); 545 switch (dcc & DCC_ADDRESSING_MODE_MASK) { 546 case DCC_ADDRESSING_MODE_SINGLE_CHANNEL: 547 case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC: 548 swizzle_x = I915_BIT_6_SWIZZLE_NONE; 549 swizzle_y = I915_BIT_6_SWIZZLE_NONE; 550 break; 551 case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED: 552 if (dcc & DCC_CHANNEL_XOR_DISABLE) { 553 /* This is the base swizzling by the GPU for 554 * tiled buffers. 555 */ 556 swizzle_x = I915_BIT_6_SWIZZLE_9_10; 557 swizzle_y = I915_BIT_6_SWIZZLE_9; 558 } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) { 559 /* Bit 11 swizzling by the CPU in addition. */ 560 swizzle_x = I915_BIT_6_SWIZZLE_9_10_11; 561 swizzle_y = I915_BIT_6_SWIZZLE_9_11; 562 } else { 563 /* Bit 17 swizzling by the CPU in addition. */ 564 swizzle_x = I915_BIT_6_SWIZZLE_9_10_17; 565 swizzle_y = I915_BIT_6_SWIZZLE_9_17; 566 } 567 break; 568 } 569 570 /* check for L-shaped memory aka modified enhanced addressing */ 571 if (IS_GEN4(dev_priv) && 572 !(I915_READ(DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) { 573 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; 574 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; 575 } 576 577 if (dcc == 0xffffffff) { 578 DRM_ERROR("Couldn't read from MCHBAR. " 579 "Disabling tiling.\n"); 580 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; 581 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; 582 } 583 } else { 584 /* The 965, G33, and newer, have a very flexible memory 585 * configuration. It will enable dual-channel mode 586 * (interleaving) on as much memory as it can, and the GPU 587 * will additionally sometimes enable different bit 6 588 * swizzling for tiled objects from the CPU. 589 * 590 * Here's what I found on the G965: 591 * slot fill memory size swizzling 592 * 0A 0B 1A 1B 1-ch 2-ch 593 * 512 0 0 0 512 0 O 594 * 512 0 512 0 16 1008 X 595 * 512 0 0 512 16 1008 X 596 * 0 512 0 512 16 1008 X 597 * 1024 1024 1024 0 2048 1024 O 598 * 599 * We could probably detect this based on either the DRB 600 * matching, which was the case for the swizzling required in 601 * the table above, or from the 1-ch value being less than 602 * the minimum size of a rank. 603 * 604 * Reports indicate that the swizzling actually 605 * varies depending upon page placement inside the 606 * channels, i.e. we see swizzled pages where the 607 * banks of memory are paired and unswizzled on the 608 * uneven portion, so leave that as unknown. 609 */ 610 if (I915_READ16(C0DRB3) == I915_READ16(C1DRB3)) { 611 swizzle_x = I915_BIT_6_SWIZZLE_9_10; 612 swizzle_y = I915_BIT_6_SWIZZLE_9; 613 } 614 } 615 616 if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN || 617 swizzle_y == I915_BIT_6_SWIZZLE_UNKNOWN) { 618 /* Userspace likes to explode if it sees unknown swizzling, 619 * so lie. We will finish the lie when reporting through 620 * the get-tiling-ioctl by reporting the physical swizzle 621 * mode as unknown instead. 622 * 623 * As we don't strictly know what the swizzling is, it may be 624 * bit17 dependent, and so we need to also prevent the pages 625 * from being moved. 626 */ 627 dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES; 628 swizzle_x = I915_BIT_6_SWIZZLE_NONE; 629 swizzle_y = I915_BIT_6_SWIZZLE_NONE; 630 } 631 632 dev_priv->mm.bit_6_swizzle_x = swizzle_x; 633 dev_priv->mm.bit_6_swizzle_y = swizzle_y; 634 } 635 636 /* 637 * Swap every 64 bytes of this page around, to account for it having a new 638 * bit 17 of its physical address and therefore being interpreted differently 639 * by the GPU. 640 */ 641 static void 642 i915_gem_swizzle_page(struct page *page) 643 { 644 char temp[64]; 645 char *vaddr; 646 int i; 647 648 vaddr = kmap(page); 649 650 for (i = 0; i < PAGE_SIZE; i += 128) { 651 memcpy(temp, &vaddr[i], 64); 652 memcpy(&vaddr[i], &vaddr[i + 64], 64); 653 memcpy(&vaddr[i + 64], temp, 64); 654 } 655 656 kunmap(page); 657 } 658 659 /** 660 * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling 661 * @obj: i915 GEM buffer object 662 * @pages: the scattergather list of physical pages 663 * 664 * This function fixes up the swizzling in case any page frame number for this 665 * object has changed in bit 17 since that state has been saved with 666 * i915_gem_object_save_bit_17_swizzle(). 667 * 668 * This is called when pinning backing storage again, since the kernel is free 669 * to move unpinned backing storage around (either by directly moving pages or 670 * by swapping them out and back in again). 671 */ 672 void 673 i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj, 674 struct sg_table *pages) 675 { 676 struct sgt_iter sgt_iter; 677 struct page *page; 678 int i; 679 680 if (obj->bit_17 == NULL) 681 return; 682 683 i = 0; 684 for_each_sgt_page(page, sgt_iter, pages) { 685 char new_bit_17 = page_to_phys(page) >> 17; 686 if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) { 687 i915_gem_swizzle_page(page); 688 set_page_dirty(page); 689 } 690 i++; 691 } 692 } 693 694 /** 695 * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling 696 * @obj: i915 GEM buffer object 697 * @pages: the scattergather list of physical pages 698 * 699 * This function saves the bit 17 of each page frame number so that swizzling 700 * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must 701 * be called before the backing storage can be unpinned. 702 */ 703 void 704 i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj, 705 struct sg_table *pages) 706 { 707 const unsigned int page_count = obj->base.size >> PAGE_SHIFT; 708 struct sgt_iter sgt_iter; 709 struct page *page; 710 int i; 711 712 if (obj->bit_17 == NULL) { 713 obj->bit_17 = kcalloc(BITS_TO_LONGS(page_count), 714 sizeof(long), GFP_KERNEL); 715 if (obj->bit_17 == NULL) { 716 DRM_ERROR("Failed to allocate memory for bit 17 " 717 "record\n"); 718 return; 719 } 720 } 721 722 i = 0; 723 724 for_each_sgt_page(page, sgt_iter, pages) { 725 if (page_to_phys(page) & (1 << 17)) 726 __set_bit(i, obj->bit_17); 727 else 728 __clear_bit(i, obj->bit_17); 729 i++; 730 } 731 } 732