1 /* 2 * Copyright 2010 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Alex Deucher 23 */ 24 #include <drm/drmP.h> 25 #include "radeon.h" 26 #include "radeon_asic.h" 27 #include "radeon_trace.h" 28 #include "nid.h" 29 30 /* 31 * DMA 32 * Starting with R600, the GPU has an asynchronous 33 * DMA engine. The programming model is very similar 34 * to the 3D engine (ring buffer, IBs, etc.), but the 35 * DMA controller has it's own packet format that is 36 * different form the PM4 format used by the 3D engine. 37 * It supports copying data, writing embedded data, 38 * solid fills, and a number of other things. It also 39 * has support for tiling/detiling of buffers. 40 * Cayman and newer support two asynchronous DMA engines. 41 */ 42 43 /** 44 * cayman_dma_get_rptr - get the current read pointer 45 * 46 * @rdev: radeon_device pointer 47 * @ring: radeon ring pointer 48 * 49 * Get the current rptr from the hardware (cayman+). 50 */ 51 uint32_t cayman_dma_get_rptr(struct radeon_device *rdev, 52 struct radeon_ring *ring) 53 { 54 u32 rptr, reg; 55 56 if (rdev->wb.enabled) { 57 rptr = rdev->wb.wb[ring->rptr_offs/4]; 58 } else { 59 if (ring->idx == R600_RING_TYPE_DMA_INDEX) 60 reg = DMA_RB_RPTR + DMA0_REGISTER_OFFSET; 61 else 62 reg = DMA_RB_RPTR + DMA1_REGISTER_OFFSET; 63 64 rptr = RREG32(reg); 65 } 66 67 return (rptr & 0x3fffc) >> 2; 68 } 69 70 /** 71 * cayman_dma_get_wptr - get the current write pointer 72 * 73 * @rdev: radeon_device pointer 74 * @ring: radeon ring pointer 75 * 76 * Get the current wptr from the hardware (cayman+). 77 */ 78 uint32_t cayman_dma_get_wptr(struct radeon_device *rdev, 79 struct radeon_ring *ring) 80 { 81 u32 reg; 82 83 if (ring->idx == R600_RING_TYPE_DMA_INDEX) 84 reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET; 85 else 86 reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET; 87 88 return (RREG32(reg) & 0x3fffc) >> 2; 89 } 90 91 /** 92 * cayman_dma_set_wptr - commit the write pointer 93 * 94 * @rdev: radeon_device pointer 95 * @ring: radeon ring pointer 96 * 97 * Write the wptr back to the hardware (cayman+). 98 */ 99 void cayman_dma_set_wptr(struct radeon_device *rdev, 100 struct radeon_ring *ring) 101 { 102 u32 reg; 103 104 if (ring->idx == R600_RING_TYPE_DMA_INDEX) 105 reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET; 106 else 107 reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET; 108 109 WREG32(reg, (ring->wptr << 2) & 0x3fffc); 110 } 111 112 /** 113 * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine 114 * 115 * @rdev: radeon_device pointer 116 * @ib: IB object to schedule 117 * 118 * Schedule an IB in the DMA ring (cayman-SI). 119 */ 120 void cayman_dma_ring_ib_execute(struct radeon_device *rdev, 121 struct radeon_ib *ib) 122 { 123 struct radeon_ring *ring = &rdev->ring[ib->ring]; 124 unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0; 125 126 if (rdev->wb.enabled) { 127 u32 next_rptr = ring->wptr + 4; 128 while ((next_rptr & 7) != 5) 129 next_rptr++; 130 next_rptr += 3; 131 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); 132 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 133 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); 134 radeon_ring_write(ring, next_rptr); 135 } 136 137 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. 138 * Pad as necessary with NOPs. 139 */ 140 while ((ring->wptr & 7) != 5) 141 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); 142 radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0)); 143 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); 144 radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); 145 146 } 147 148 /** 149 * cayman_dma_stop - stop the async dma engines 150 * 151 * @rdev: radeon_device pointer 152 * 153 * Stop the async dma engines (cayman-SI). 154 */ 155 void cayman_dma_stop(struct radeon_device *rdev) 156 { 157 u32 rb_cntl; 158 159 if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) || 160 (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX)) 161 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 162 163 /* dma0 */ 164 rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET); 165 rb_cntl &= ~DMA_RB_ENABLE; 166 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl); 167 168 /* dma1 */ 169 rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET); 170 rb_cntl &= ~DMA_RB_ENABLE; 171 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl); 172 173 rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; 174 rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false; 175 } 176 177 /** 178 * cayman_dma_resume - setup and start the async dma engines 179 * 180 * @rdev: radeon_device pointer 181 * 182 * Set up the DMA ring buffers and enable them. (cayman-SI). 183 * Returns 0 for success, error for failure. 184 */ 185 int cayman_dma_resume(struct radeon_device *rdev) 186 { 187 struct radeon_ring *ring; 188 u32 rb_cntl, dma_cntl, ib_cntl; 189 u32 rb_bufsz; 190 u32 reg_offset, wb_offset; 191 int i, r; 192 193 for (i = 0; i < 2; i++) { 194 if (i == 0) { 195 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 196 reg_offset = DMA0_REGISTER_OFFSET; 197 wb_offset = R600_WB_DMA_RPTR_OFFSET; 198 } else { 199 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; 200 reg_offset = DMA1_REGISTER_OFFSET; 201 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET; 202 } 203 204 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0); 205 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0); 206 207 /* Set ring buffer size in dwords */ 208 rb_bufsz = order_base_2(ring->ring_size / 4); 209 rb_cntl = rb_bufsz << 1; 210 #ifdef __BIG_ENDIAN 211 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; 212 #endif 213 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl); 214 215 /* Initialize the ring buffer's read and write pointers */ 216 WREG32(DMA_RB_RPTR + reg_offset, 0); 217 WREG32(DMA_RB_WPTR + reg_offset, 0); 218 219 /* set the wb address whether it's enabled or not */ 220 WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset, 221 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF); 222 WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset, 223 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC)); 224 225 if (rdev->wb.enabled) 226 rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; 227 228 WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8); 229 230 /* enable DMA IBs */ 231 ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE; 232 #ifdef __BIG_ENDIAN 233 ib_cntl |= DMA_IB_SWAP_ENABLE; 234 #endif 235 WREG32(DMA_IB_CNTL + reg_offset, ib_cntl); 236 237 dma_cntl = RREG32(DMA_CNTL + reg_offset); 238 dma_cntl &= ~CTXEMPTY_INT_ENABLE; 239 WREG32(DMA_CNTL + reg_offset, dma_cntl); 240 241 ring->wptr = 0; 242 WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2); 243 244 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE); 245 246 ring->ready = true; 247 248 r = radeon_ring_test(rdev, ring->idx, ring); 249 if (r) { 250 ring->ready = false; 251 return r; 252 } 253 } 254 255 if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) || 256 (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX)) 257 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); 258 259 return 0; 260 } 261 262 /** 263 * cayman_dma_fini - tear down the async dma engines 264 * 265 * @rdev: radeon_device pointer 266 * 267 * Stop the async dma engines and free the rings (cayman-SI). 268 */ 269 void cayman_dma_fini(struct radeon_device *rdev) 270 { 271 cayman_dma_stop(rdev); 272 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); 273 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]); 274 } 275 276 /** 277 * cayman_dma_is_lockup - Check if the DMA engine is locked up 278 * 279 * @rdev: radeon_device pointer 280 * @ring: radeon_ring structure holding ring information 281 * 282 * Check if the async DMA engine is locked up. 283 * Returns true if the engine appears to be locked up, false if not. 284 */ 285 bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) 286 { 287 u32 reset_mask = cayman_gpu_check_soft_reset(rdev); 288 u32 mask; 289 290 if (ring->idx == R600_RING_TYPE_DMA_INDEX) 291 mask = RADEON_RESET_DMA; 292 else 293 mask = RADEON_RESET_DMA1; 294 295 if (!(reset_mask & mask)) { 296 radeon_ring_lockup_update(rdev, ring); 297 return false; 298 } 299 return radeon_ring_test_lockup(rdev, ring); 300 } 301 302 /** 303 * cayman_dma_vm_copy_pages - update PTEs by copying them from the GART 304 * 305 * @rdev: radeon_device pointer 306 * @ib: indirect buffer to fill with commands 307 * @pe: addr of the page entry 308 * @src: src addr where to copy from 309 * @count: number of page entries to update 310 * 311 * Update PTEs by copying them from the GART using the DMA (cayman/TN). 312 */ 313 void cayman_dma_vm_copy_pages(struct radeon_device *rdev, 314 struct radeon_ib *ib, 315 uint64_t pe, uint64_t src, 316 unsigned count) 317 { 318 unsigned ndw; 319 320 while (count) { 321 ndw = count * 2; 322 if (ndw > 0xFFFFE) 323 ndw = 0xFFFFE; 324 325 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 326 0, 0, ndw); 327 ib->ptr[ib->length_dw++] = lower_32_bits(pe); 328 ib->ptr[ib->length_dw++] = lower_32_bits(src); 329 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 330 ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff; 331 332 pe += ndw * 4; 333 src += ndw * 4; 334 count -= ndw / 2; 335 } 336 } 337 338 /** 339 * cayman_dma_vm_write_pages - update PTEs by writing them manually 340 * 341 * @rdev: radeon_device pointer 342 * @ib: indirect buffer to fill with commands 343 * @pe: addr of the page entry 344 * @addr: dst addr to write into pe 345 * @count: number of page entries to update 346 * @incr: increase next addr by incr bytes 347 * @flags: hw access flags 348 * 349 * Update PTEs by writing them manually using the DMA (cayman/TN). 350 */ 351 void cayman_dma_vm_write_pages(struct radeon_device *rdev, 352 struct radeon_ib *ib, 353 uint64_t pe, 354 uint64_t addr, unsigned count, 355 uint32_t incr, uint32_t flags) 356 { 357 uint64_t value; 358 unsigned ndw; 359 360 while (count) { 361 ndw = count * 2; 362 if (ndw > 0xFFFFE) 363 ndw = 0xFFFFE; 364 365 /* for non-physically contiguous pages (system) */ 366 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 367 0, 0, ndw); 368 ib->ptr[ib->length_dw++] = pe; 369 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 370 for (; ndw > 0; ndw -= 2, --count, pe += 8) { 371 if (flags & R600_PTE_SYSTEM) { 372 value = radeon_vm_map_gart(rdev, addr); 373 } else if (flags & R600_PTE_VALID) { 374 value = addr; 375 } else { 376 value = 0; 377 } 378 addr += incr; 379 value |= flags; 380 ib->ptr[ib->length_dw++] = value; 381 ib->ptr[ib->length_dw++] = upper_32_bits(value); 382 } 383 } 384 } 385 386 /** 387 * cayman_dma_vm_set_pages - update the page tables using the DMA 388 * 389 * @rdev: radeon_device pointer 390 * @ib: indirect buffer to fill with commands 391 * @pe: addr of the page entry 392 * @addr: dst addr to write into pe 393 * @count: number of page entries to update 394 * @incr: increase next addr by incr bytes 395 * @flags: hw access flags 396 * 397 * Update the page tables using the DMA (cayman/TN). 398 */ 399 void cayman_dma_vm_set_pages(struct radeon_device *rdev, 400 struct radeon_ib *ib, 401 uint64_t pe, 402 uint64_t addr, unsigned count, 403 uint32_t incr, uint32_t flags) 404 { 405 uint64_t value; 406 unsigned ndw; 407 408 while (count) { 409 ndw = count * 2; 410 if (ndw > 0xFFFFE) 411 ndw = 0xFFFFE; 412 413 if (flags & R600_PTE_VALID) 414 value = addr; 415 else 416 value = 0; 417 418 /* for physically contiguous pages (vram) */ 419 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); 420 ib->ptr[ib->length_dw++] = pe; /* dst addr */ 421 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 422 ib->ptr[ib->length_dw++] = flags; /* mask */ 423 ib->ptr[ib->length_dw++] = 0; 424 ib->ptr[ib->length_dw++] = value; /* value */ 425 ib->ptr[ib->length_dw++] = upper_32_bits(value); 426 ib->ptr[ib->length_dw++] = incr; /* increment size */ 427 ib->ptr[ib->length_dw++] = 0; 428 429 pe += ndw * 4; 430 addr += (ndw / 2) * incr; 431 count -= ndw / 2; 432 } 433 } 434 435 /** 436 * cayman_dma_vm_pad_ib - pad the IB to the required number of dw 437 * 438 * @ib: indirect buffer to fill with padding 439 * 440 */ 441 void cayman_dma_vm_pad_ib(struct radeon_ib *ib) 442 { 443 while (ib->length_dw & 0x7) 444 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); 445 } 446 447 void cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, 448 unsigned vm_id, uint64_t pd_addr) 449 { 450 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); 451 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2)); 452 radeon_ring_write(ring, pd_addr >> 12); 453 454 /* flush hdp cache */ 455 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); 456 radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); 457 radeon_ring_write(ring, 1); 458 459 /* bits 0-7 are the VM contexts0-7 */ 460 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); 461 radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); 462 radeon_ring_write(ring, 1 << vm_id); 463 464 /* wait for invalidate to complete */ 465 radeon_ring_write(ring, DMA_SRBM_READ_PACKET); 466 radeon_ring_write(ring, (0xff << 20) | (VM_INVALIDATE_REQUEST >> 2)); 467 radeon_ring_write(ring, 0); /* mask */ 468 radeon_ring_write(ring, 0); /* value */ 469 } 470 471