1*4cd92098Szrj /* 2*4cd92098Szrj * Copyright 2013 Advanced Micro Devices, Inc. 3*4cd92098Szrj * 4*4cd92098Szrj * Permission is hereby granted, free of charge, to any person obtaining a 5*4cd92098Szrj * copy of this software and associated documentation files (the "Software"), 6*4cd92098Szrj * to deal in the Software without restriction, including without limitation 7*4cd92098Szrj * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8*4cd92098Szrj * and/or sell copies of the Software, and to permit persons to whom the 9*4cd92098Szrj * Software is furnished to do so, subject to the following conditions: 10*4cd92098Szrj * 11*4cd92098Szrj * The above copyright notice and this permission notice shall be included in 12*4cd92098Szrj * all copies or substantial portions of the Software. 13*4cd92098Szrj * 14*4cd92098Szrj * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15*4cd92098Szrj * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16*4cd92098Szrj * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17*4cd92098Szrj * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18*4cd92098Szrj * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19*4cd92098Szrj * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20*4cd92098Szrj * OTHER DEALINGS IN THE SOFTWARE. 21*4cd92098Szrj * 22*4cd92098Szrj * Authors: Alex Deucher 23*4cd92098Szrj */ 24*4cd92098Szrj #include <linux/firmware.h> 25*4cd92098Szrj #include <drm/drmP.h> 26*4cd92098Szrj #include "radeon.h" 27*4cd92098Szrj #include "radeon_asic.h" 28*4cd92098Szrj #include "cikd.h" 29*4cd92098Szrj 30*4cd92098Szrj /* sdma */ 31*4cd92098Szrj #define CIK_SDMA_UCODE_SIZE 1050 32*4cd92098Szrj #define CIK_SDMA_UCODE_VERSION 64 33*4cd92098Szrj 34*4cd92098Szrj /* 35*4cd92098Szrj * sDMA - System DMA 36*4cd92098Szrj * Starting with CIK, the GPU has new asynchronous 37*4cd92098Szrj * DMA engines. These engines are used for compute 38*4cd92098Szrj * and gfx. There are two DMA engines (SDMA0, SDMA1) 39*4cd92098Szrj * and each one supports 1 ring buffer used for gfx 40*4cd92098Szrj * and 2 queues used for compute. 41*4cd92098Szrj * 42*4cd92098Szrj * The programming model is very similar to the CP 43*4cd92098Szrj * (ring buffer, IBs, etc.), but sDMA has it's own 44*4cd92098Szrj * packet format that is different from the PM4 format 45*4cd92098Szrj * used by the CP. sDMA supports copying data, writing 46*4cd92098Szrj * embedded data, solid fills, and a number of other 47*4cd92098Szrj * things. It also has support for tiling/detiling of 48*4cd92098Szrj * buffers. 49*4cd92098Szrj */ 50*4cd92098Szrj 51*4cd92098Szrj /** 52*4cd92098Szrj * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine 53*4cd92098Szrj * 54*4cd92098Szrj * @rdev: radeon_device pointer 55*4cd92098Szrj * @ib: IB object to schedule 56*4cd92098Szrj * 57*4cd92098Szrj * Schedule an IB in the DMA ring (CIK). 58*4cd92098Szrj */ 59*4cd92098Szrj void cik_sdma_ring_ib_execute(struct radeon_device *rdev, 60*4cd92098Szrj struct radeon_ib *ib) 61*4cd92098Szrj { 62*4cd92098Szrj struct radeon_ring *ring = &rdev->ring[ib->ring]; 63*4cd92098Szrj u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf; 64*4cd92098Szrj 65*4cd92098Szrj if (rdev->wb.enabled) { 66*4cd92098Szrj u32 next_rptr = ring->wptr + 5; 67*4cd92098Szrj while ((next_rptr & 7) != 4) 68*4cd92098Szrj next_rptr++; 69*4cd92098Szrj next_rptr += 4; 70*4cd92098Szrj radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); 71*4cd92098Szrj radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 72*4cd92098Szrj radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); 73*4cd92098Szrj radeon_ring_write(ring, 1); /* number of DWs to follow */ 74*4cd92098Szrj radeon_ring_write(ring, next_rptr); 75*4cd92098Szrj } 76*4cd92098Szrj 77*4cd92098Szrj /* IB packet must end on a 8 DW boundary */ 78*4cd92098Szrj while ((ring->wptr & 7) != 4) 79*4cd92098Szrj radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); 80*4cd92098Szrj radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits)); 81*4cd92098Szrj radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */ 82*4cd92098Szrj radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff); 83*4cd92098Szrj radeon_ring_write(ring, ib->length_dw); 84*4cd92098Szrj 85*4cd92098Szrj } 86*4cd92098Szrj 87*4cd92098Szrj /** 88*4cd92098Szrj * cik_sdma_fence_ring_emit - emit a fence on the DMA ring 89*4cd92098Szrj * 90*4cd92098Szrj * @rdev: radeon_device pointer 91*4cd92098Szrj * @fence: radeon fence object 92*4cd92098Szrj * 93*4cd92098Szrj * Add a DMA fence packet to the ring to write 94*4cd92098Szrj * the fence seq number and DMA trap packet to generate 95*4cd92098Szrj * an interrupt if needed (CIK). 96*4cd92098Szrj */ 97*4cd92098Szrj void cik_sdma_fence_ring_emit(struct radeon_device *rdev, 98*4cd92098Szrj struct radeon_fence *fence) 99*4cd92098Szrj { 100*4cd92098Szrj struct radeon_ring *ring = &rdev->ring[fence->ring]; 101*4cd92098Szrj u64 addr = rdev->fence_drv[fence->ring].gpu_addr; 102*4cd92098Szrj u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) | 103*4cd92098Szrj SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */ 104*4cd92098Szrj u32 ref_and_mask; 105*4cd92098Szrj 106*4cd92098Szrj if (fence->ring == R600_RING_TYPE_DMA_INDEX) 107*4cd92098Szrj ref_and_mask = SDMA0; 108*4cd92098Szrj else 109*4cd92098Szrj ref_and_mask = SDMA1; 110*4cd92098Szrj 111*4cd92098Szrj /* write the fence */ 112*4cd92098Szrj radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0)); 113*4cd92098Szrj radeon_ring_write(ring, addr & 0xffffffff); 114*4cd92098Szrj radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 115*4cd92098Szrj radeon_ring_write(ring, fence->seq); 116*4cd92098Szrj /* generate an interrupt */ 117*4cd92098Szrj radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0)); 118*4cd92098Szrj /* flush HDP */ 119*4cd92098Szrj radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits)); 120*4cd92098Szrj radeon_ring_write(ring, GPU_HDP_FLUSH_DONE); 121*4cd92098Szrj radeon_ring_write(ring, GPU_HDP_FLUSH_REQ); 122*4cd92098Szrj radeon_ring_write(ring, ref_and_mask); /* REFERENCE */ 123*4cd92098Szrj radeon_ring_write(ring, ref_and_mask); /* MASK */ 124*4cd92098Szrj radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */ 125*4cd92098Szrj } 126*4cd92098Szrj 127*4cd92098Szrj /** 128*4cd92098Szrj * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring 129*4cd92098Szrj * 130*4cd92098Szrj * @rdev: radeon_device pointer 131*4cd92098Szrj * @ring: radeon_ring structure holding ring information 132*4cd92098Szrj * @semaphore: radeon semaphore object 133*4cd92098Szrj * @emit_wait: wait or signal semaphore 134*4cd92098Szrj * 135*4cd92098Szrj * Add a DMA semaphore packet to the ring wait on or signal 136*4cd92098Szrj * other rings (CIK). 137*4cd92098Szrj */ 138*4cd92098Szrj void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, 139*4cd92098Szrj struct radeon_ring *ring, 140*4cd92098Szrj struct radeon_semaphore *semaphore, 141*4cd92098Szrj bool emit_wait) 142*4cd92098Szrj { 143*4cd92098Szrj u64 addr = semaphore->gpu_addr; 144*4cd92098Szrj u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S; 145*4cd92098Szrj 146*4cd92098Szrj radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits)); 147*4cd92098Szrj radeon_ring_write(ring, addr & 0xfffffff8); 148*4cd92098Szrj radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 149*4cd92098Szrj } 150*4cd92098Szrj 151*4cd92098Szrj /** 152*4cd92098Szrj * cik_sdma_gfx_stop - stop the gfx async dma engines 153*4cd92098Szrj * 154*4cd92098Szrj * @rdev: radeon_device pointer 155*4cd92098Szrj * 156*4cd92098Szrj * Stop the gfx async dma ring buffers (CIK). 157*4cd92098Szrj */ 158*4cd92098Szrj static void cik_sdma_gfx_stop(struct radeon_device *rdev) 159*4cd92098Szrj { 160*4cd92098Szrj u32 rb_cntl, reg_offset; 161*4cd92098Szrj int i; 162*4cd92098Szrj 163*4cd92098Szrj radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 164*4cd92098Szrj 165*4cd92098Szrj for (i = 0; i < 2; i++) { 166*4cd92098Szrj if (i == 0) 167*4cd92098Szrj reg_offset = SDMA0_REGISTER_OFFSET; 168*4cd92098Szrj else 169*4cd92098Szrj reg_offset = SDMA1_REGISTER_OFFSET; 170*4cd92098Szrj rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset); 171*4cd92098Szrj rb_cntl &= ~SDMA_RB_ENABLE; 172*4cd92098Szrj WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl); 173*4cd92098Szrj WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0); 174*4cd92098Szrj } 175*4cd92098Szrj } 176*4cd92098Szrj 177*4cd92098Szrj /** 178*4cd92098Szrj * cik_sdma_rlc_stop - stop the compute async dma engines 179*4cd92098Szrj * 180*4cd92098Szrj * @rdev: radeon_device pointer 181*4cd92098Szrj * 182*4cd92098Szrj * Stop the compute async dma queues (CIK). 183*4cd92098Szrj */ 184*4cd92098Szrj static void cik_sdma_rlc_stop(struct radeon_device *rdev) 185*4cd92098Szrj { 186*4cd92098Szrj /* XXX todo */ 187*4cd92098Szrj } 188*4cd92098Szrj 189*4cd92098Szrj /** 190*4cd92098Szrj * cik_sdma_enable - stop the async dma engines 191*4cd92098Szrj * 192*4cd92098Szrj * @rdev: radeon_device pointer 193*4cd92098Szrj * @enable: enable/disable the DMA MEs. 194*4cd92098Szrj * 195*4cd92098Szrj * Halt or unhalt the async dma engines (CIK). 196*4cd92098Szrj */ 197*4cd92098Szrj void cik_sdma_enable(struct radeon_device *rdev, bool enable) 198*4cd92098Szrj { 199*4cd92098Szrj u32 me_cntl, reg_offset; 200*4cd92098Szrj int i; 201*4cd92098Szrj 202*4cd92098Szrj for (i = 0; i < 2; i++) { 203*4cd92098Szrj if (i == 0) 204*4cd92098Szrj reg_offset = SDMA0_REGISTER_OFFSET; 205*4cd92098Szrj else 206*4cd92098Szrj reg_offset = SDMA1_REGISTER_OFFSET; 207*4cd92098Szrj me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset); 208*4cd92098Szrj if (enable) 209*4cd92098Szrj me_cntl &= ~SDMA_HALT; 210*4cd92098Szrj else 211*4cd92098Szrj me_cntl |= SDMA_HALT; 212*4cd92098Szrj WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl); 213*4cd92098Szrj } 214*4cd92098Szrj } 215*4cd92098Szrj 216*4cd92098Szrj /** 217*4cd92098Szrj * cik_sdma_gfx_resume - setup and start the async dma engines 218*4cd92098Szrj * 219*4cd92098Szrj * @rdev: radeon_device pointer 220*4cd92098Szrj * 221*4cd92098Szrj * Set up the gfx DMA ring buffers and enable them (CIK). 222*4cd92098Szrj * Returns 0 for success, error for failure. 223*4cd92098Szrj */ 224*4cd92098Szrj static int cik_sdma_gfx_resume(struct radeon_device *rdev) 225*4cd92098Szrj { 226*4cd92098Szrj struct radeon_ring *ring; 227*4cd92098Szrj u32 rb_cntl, ib_cntl; 228*4cd92098Szrj u32 rb_bufsz; 229*4cd92098Szrj u32 reg_offset, wb_offset; 230*4cd92098Szrj int i, r; 231*4cd92098Szrj 232*4cd92098Szrj for (i = 0; i < 2; i++) { 233*4cd92098Szrj if (i == 0) { 234*4cd92098Szrj ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 235*4cd92098Szrj reg_offset = SDMA0_REGISTER_OFFSET; 236*4cd92098Szrj wb_offset = R600_WB_DMA_RPTR_OFFSET; 237*4cd92098Szrj } else { 238*4cd92098Szrj ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; 239*4cd92098Szrj reg_offset = SDMA1_REGISTER_OFFSET; 240*4cd92098Szrj wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET; 241*4cd92098Szrj } 242*4cd92098Szrj 243*4cd92098Szrj WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0); 244*4cd92098Szrj WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0); 245*4cd92098Szrj 246*4cd92098Szrj /* Set ring buffer size in dwords */ 247*4cd92098Szrj rb_bufsz = order_base_2(ring->ring_size / 4); 248*4cd92098Szrj rb_cntl = rb_bufsz << 1; 249*4cd92098Szrj #ifdef __BIG_ENDIAN 250*4cd92098Szrj rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE; 251*4cd92098Szrj #endif 252*4cd92098Szrj WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl); 253*4cd92098Szrj 254*4cd92098Szrj /* Initialize the ring buffer's read and write pointers */ 255*4cd92098Szrj WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0); 256*4cd92098Szrj WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0); 257*4cd92098Szrj 258*4cd92098Szrj /* set the wb address whether it's enabled or not */ 259*4cd92098Szrj WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset, 260*4cd92098Szrj upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); 261*4cd92098Szrj WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset, 262*4cd92098Szrj ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC)); 263*4cd92098Szrj 264*4cd92098Szrj if (rdev->wb.enabled) 265*4cd92098Szrj rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE; 266*4cd92098Szrj 267*4cd92098Szrj WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8); 268*4cd92098Szrj WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40); 269*4cd92098Szrj 270*4cd92098Szrj ring->wptr = 0; 271*4cd92098Szrj WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2); 272*4cd92098Szrj 273*4cd92098Szrj ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2; 274*4cd92098Szrj 275*4cd92098Szrj /* enable DMA RB */ 276*4cd92098Szrj WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE); 277*4cd92098Szrj 278*4cd92098Szrj ib_cntl = SDMA_IB_ENABLE; 279*4cd92098Szrj #ifdef __BIG_ENDIAN 280*4cd92098Szrj ib_cntl |= SDMA_IB_SWAP_ENABLE; 281*4cd92098Szrj #endif 282*4cd92098Szrj /* enable DMA IBs */ 283*4cd92098Szrj WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl); 284*4cd92098Szrj 285*4cd92098Szrj ring->ready = true; 286*4cd92098Szrj 287*4cd92098Szrj r = radeon_ring_test(rdev, ring->idx, ring); 288*4cd92098Szrj if (r) { 289*4cd92098Szrj ring->ready = false; 290*4cd92098Szrj return r; 291*4cd92098Szrj } 292*4cd92098Szrj } 293*4cd92098Szrj 294*4cd92098Szrj radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); 295*4cd92098Szrj 296*4cd92098Szrj return 0; 297*4cd92098Szrj } 298*4cd92098Szrj 299*4cd92098Szrj /** 300*4cd92098Szrj * cik_sdma_rlc_resume - setup and start the async dma engines 301*4cd92098Szrj * 302*4cd92098Szrj * @rdev: radeon_device pointer 303*4cd92098Szrj * 304*4cd92098Szrj * Set up the compute DMA queues and enable them (CIK). 305*4cd92098Szrj * Returns 0 for success, error for failure. 306*4cd92098Szrj */ 307*4cd92098Szrj static int cik_sdma_rlc_resume(struct radeon_device *rdev) 308*4cd92098Szrj { 309*4cd92098Szrj /* XXX todo */ 310*4cd92098Szrj return 0; 311*4cd92098Szrj } 312*4cd92098Szrj 313*4cd92098Szrj /** 314*4cd92098Szrj * cik_sdma_load_microcode - load the sDMA ME ucode 315*4cd92098Szrj * 316*4cd92098Szrj * @rdev: radeon_device pointer 317*4cd92098Szrj * 318*4cd92098Szrj * Loads the sDMA0/1 ucode. 319*4cd92098Szrj * Returns 0 for success, -EINVAL if the ucode is not available. 320*4cd92098Szrj */ 321*4cd92098Szrj static int cik_sdma_load_microcode(struct radeon_device *rdev) 322*4cd92098Szrj { 323*4cd92098Szrj const __be32 *fw_data; 324*4cd92098Szrj int i; 325*4cd92098Szrj 326*4cd92098Szrj if (!rdev->sdma_fw) 327*4cd92098Szrj return -EINVAL; 328*4cd92098Szrj 329*4cd92098Szrj /* stop the gfx rings and rlc compute queues */ 330*4cd92098Szrj cik_sdma_gfx_stop(rdev); 331*4cd92098Szrj cik_sdma_rlc_stop(rdev); 332*4cd92098Szrj 333*4cd92098Szrj /* halt the MEs */ 334*4cd92098Szrj cik_sdma_enable(rdev, false); 335*4cd92098Szrj 336*4cd92098Szrj /* sdma0 */ 337*4cd92098Szrj fw_data = (const __be32 *)rdev->sdma_fw->data; 338*4cd92098Szrj WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0); 339*4cd92098Szrj for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++) 340*4cd92098Szrj WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++)); 341*4cd92098Szrj WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION); 342*4cd92098Szrj 343*4cd92098Szrj /* sdma1 */ 344*4cd92098Szrj fw_data = (const __be32 *)rdev->sdma_fw->data; 345*4cd92098Szrj WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0); 346*4cd92098Szrj for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++) 347*4cd92098Szrj WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++)); 348*4cd92098Szrj WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION); 349*4cd92098Szrj 350*4cd92098Szrj WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0); 351*4cd92098Szrj WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0); 352*4cd92098Szrj return 0; 353*4cd92098Szrj } 354*4cd92098Szrj 355*4cd92098Szrj /** 356*4cd92098Szrj * cik_sdma_resume - setup and start the async dma engines 357*4cd92098Szrj * 358*4cd92098Szrj * @rdev: radeon_device pointer 359*4cd92098Szrj * 360*4cd92098Szrj * Set up the DMA engines and enable them (CIK). 361*4cd92098Szrj * Returns 0 for success, error for failure. 362*4cd92098Szrj */ 363*4cd92098Szrj int cik_sdma_resume(struct radeon_device *rdev) 364*4cd92098Szrj { 365*4cd92098Szrj int r; 366*4cd92098Szrj 367*4cd92098Szrj /* Reset dma */ 368*4cd92098Szrj WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1); 369*4cd92098Szrj RREG32(SRBM_SOFT_RESET); 370*4cd92098Szrj udelay(50); 371*4cd92098Szrj WREG32(SRBM_SOFT_RESET, 0); 372*4cd92098Szrj RREG32(SRBM_SOFT_RESET); 373*4cd92098Szrj 374*4cd92098Szrj r = cik_sdma_load_microcode(rdev); 375*4cd92098Szrj if (r) 376*4cd92098Szrj return r; 377*4cd92098Szrj 378*4cd92098Szrj /* unhalt the MEs */ 379*4cd92098Szrj cik_sdma_enable(rdev, true); 380*4cd92098Szrj 381*4cd92098Szrj /* start the gfx rings and rlc compute queues */ 382*4cd92098Szrj r = cik_sdma_gfx_resume(rdev); 383*4cd92098Szrj if (r) 384*4cd92098Szrj return r; 385*4cd92098Szrj r = cik_sdma_rlc_resume(rdev); 386*4cd92098Szrj if (r) 387*4cd92098Szrj return r; 388*4cd92098Szrj 389*4cd92098Szrj return 0; 390*4cd92098Szrj } 391*4cd92098Szrj 392*4cd92098Szrj /** 393*4cd92098Szrj * cik_sdma_fini - tear down the async dma engines 394*4cd92098Szrj * 395*4cd92098Szrj * @rdev: radeon_device pointer 396*4cd92098Szrj * 397*4cd92098Szrj * Stop the async dma engines and free the rings (CIK). 398*4cd92098Szrj */ 399*4cd92098Szrj void cik_sdma_fini(struct radeon_device *rdev) 400*4cd92098Szrj { 401*4cd92098Szrj /* stop the gfx rings and rlc compute queues */ 402*4cd92098Szrj cik_sdma_gfx_stop(rdev); 403*4cd92098Szrj cik_sdma_rlc_stop(rdev); 404*4cd92098Szrj /* halt the MEs */ 405*4cd92098Szrj cik_sdma_enable(rdev, false); 406*4cd92098Szrj radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); 407*4cd92098Szrj radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]); 408*4cd92098Szrj /* XXX - compute dma queue tear down */ 409*4cd92098Szrj } 410*4cd92098Szrj 411*4cd92098Szrj /** 412*4cd92098Szrj * cik_copy_dma - copy pages using the DMA engine 413*4cd92098Szrj * 414*4cd92098Szrj * @rdev: radeon_device pointer 415*4cd92098Szrj * @src_offset: src GPU address 416*4cd92098Szrj * @dst_offset: dst GPU address 417*4cd92098Szrj * @num_gpu_pages: number of GPU pages to xfer 418*4cd92098Szrj * @fence: radeon fence object 419*4cd92098Szrj * 420*4cd92098Szrj * Copy GPU paging using the DMA engine (CIK). 421*4cd92098Szrj * Used by the radeon ttm implementation to move pages if 422*4cd92098Szrj * registered as the asic copy callback. 423*4cd92098Szrj */ 424*4cd92098Szrj int cik_copy_dma(struct radeon_device *rdev, 425*4cd92098Szrj uint64_t src_offset, uint64_t dst_offset, 426*4cd92098Szrj unsigned num_gpu_pages, 427*4cd92098Szrj struct radeon_fence **fence) 428*4cd92098Szrj { 429*4cd92098Szrj struct radeon_semaphore *sem = NULL; 430*4cd92098Szrj int ring_index = rdev->asic->copy.dma_ring_index; 431*4cd92098Szrj struct radeon_ring *ring = &rdev->ring[ring_index]; 432*4cd92098Szrj u32 size_in_bytes, cur_size_in_bytes; 433*4cd92098Szrj int i, num_loops; 434*4cd92098Szrj int r = 0; 435*4cd92098Szrj 436*4cd92098Szrj r = radeon_semaphore_create(rdev, &sem); 437*4cd92098Szrj if (r) { 438*4cd92098Szrj DRM_ERROR("radeon: moving bo (%d).\n", r); 439*4cd92098Szrj return r; 440*4cd92098Szrj } 441*4cd92098Szrj 442*4cd92098Szrj size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); 443*4cd92098Szrj num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff); 444*4cd92098Szrj r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14); 445*4cd92098Szrj if (r) { 446*4cd92098Szrj DRM_ERROR("radeon: moving bo (%d).\n", r); 447*4cd92098Szrj radeon_semaphore_free(rdev, &sem, NULL); 448*4cd92098Szrj return r; 449*4cd92098Szrj } 450*4cd92098Szrj 451*4cd92098Szrj if (radeon_fence_need_sync(*fence, ring->idx)) { 452*4cd92098Szrj radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, 453*4cd92098Szrj ring->idx); 454*4cd92098Szrj radeon_fence_note_sync(*fence, ring->idx); 455*4cd92098Szrj } else { 456*4cd92098Szrj radeon_semaphore_free(rdev, &sem, NULL); 457*4cd92098Szrj } 458*4cd92098Szrj 459*4cd92098Szrj for (i = 0; i < num_loops; i++) { 460*4cd92098Szrj cur_size_in_bytes = size_in_bytes; 461*4cd92098Szrj if (cur_size_in_bytes > 0x1fffff) 462*4cd92098Szrj cur_size_in_bytes = 0x1fffff; 463*4cd92098Szrj size_in_bytes -= cur_size_in_bytes; 464*4cd92098Szrj radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0)); 465*4cd92098Szrj radeon_ring_write(ring, cur_size_in_bytes); 466*4cd92098Szrj radeon_ring_write(ring, 0); /* src/dst endian swap */ 467*4cd92098Szrj radeon_ring_write(ring, src_offset & 0xffffffff); 468*4cd92098Szrj radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff); 469*4cd92098Szrj radeon_ring_write(ring, dst_offset & 0xfffffffc); 470*4cd92098Szrj radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff); 471*4cd92098Szrj src_offset += cur_size_in_bytes; 472*4cd92098Szrj dst_offset += cur_size_in_bytes; 473*4cd92098Szrj } 474*4cd92098Szrj 475*4cd92098Szrj r = radeon_fence_emit(rdev, fence, ring->idx); 476*4cd92098Szrj if (r) { 477*4cd92098Szrj radeon_ring_unlock_undo(rdev, ring); 478*4cd92098Szrj return r; 479*4cd92098Szrj } 480*4cd92098Szrj 481*4cd92098Szrj radeon_ring_unlock_commit(rdev, ring); 482*4cd92098Szrj radeon_semaphore_free(rdev, &sem, *fence); 483*4cd92098Szrj 484*4cd92098Szrj return r; 485*4cd92098Szrj } 486*4cd92098Szrj 487*4cd92098Szrj /** 488*4cd92098Szrj * cik_sdma_ring_test - simple async dma engine test 489*4cd92098Szrj * 490*4cd92098Szrj * @rdev: radeon_device pointer 491*4cd92098Szrj * @ring: radeon_ring structure holding ring information 492*4cd92098Szrj * 493*4cd92098Szrj * Test the DMA engine by writing using it to write an 494*4cd92098Szrj * value to memory. (CIK). 495*4cd92098Szrj * Returns 0 for success, error for failure. 496*4cd92098Szrj */ 497*4cd92098Szrj int cik_sdma_ring_test(struct radeon_device *rdev, 498*4cd92098Szrj struct radeon_ring *ring) 499*4cd92098Szrj { 500*4cd92098Szrj unsigned i; 501*4cd92098Szrj int r; 502*4cd92098Szrj volatile void __iomem *ptr = (volatile void *)rdev->vram_scratch.ptr; 503*4cd92098Szrj u32 tmp; 504*4cd92098Szrj 505*4cd92098Szrj if (!ptr) { 506*4cd92098Szrj DRM_ERROR("invalid vram scratch pointer\n"); 507*4cd92098Szrj return -EINVAL; 508*4cd92098Szrj } 509*4cd92098Szrj 510*4cd92098Szrj tmp = 0xCAFEDEAD; 511*4cd92098Szrj writel(tmp, ptr); 512*4cd92098Szrj 513*4cd92098Szrj r = radeon_ring_lock(rdev, ring, 4); 514*4cd92098Szrj if (r) { 515*4cd92098Szrj DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r); 516*4cd92098Szrj return r; 517*4cd92098Szrj } 518*4cd92098Szrj radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); 519*4cd92098Szrj radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc); 520*4cd92098Szrj radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff); 521*4cd92098Szrj radeon_ring_write(ring, 1); /* number of DWs to follow */ 522*4cd92098Szrj radeon_ring_write(ring, 0xDEADBEEF); 523*4cd92098Szrj radeon_ring_unlock_commit(rdev, ring); 524*4cd92098Szrj 525*4cd92098Szrj for (i = 0; i < rdev->usec_timeout; i++) { 526*4cd92098Szrj tmp = readl(ptr); 527*4cd92098Szrj if (tmp == 0xDEADBEEF) 528*4cd92098Szrj break; 529*4cd92098Szrj DRM_UDELAY(1); 530*4cd92098Szrj } 531*4cd92098Szrj 532*4cd92098Szrj if (i < rdev->usec_timeout) { 533*4cd92098Szrj DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); 534*4cd92098Szrj } else { 535*4cd92098Szrj DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", 536*4cd92098Szrj ring->idx, tmp); 537*4cd92098Szrj r = -EINVAL; 538*4cd92098Szrj } 539*4cd92098Szrj return r; 540*4cd92098Szrj } 541*4cd92098Szrj 542*4cd92098Szrj /** 543*4cd92098Szrj * cik_sdma_ib_test - test an IB on the DMA engine 544*4cd92098Szrj * 545*4cd92098Szrj * @rdev: radeon_device pointer 546*4cd92098Szrj * @ring: radeon_ring structure holding ring information 547*4cd92098Szrj * 548*4cd92098Szrj * Test a simple IB in the DMA ring (CIK). 549*4cd92098Szrj * Returns 0 on success, error on failure. 550*4cd92098Szrj */ 551*4cd92098Szrj int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) 552*4cd92098Szrj { 553*4cd92098Szrj struct radeon_ib ib; 554*4cd92098Szrj unsigned i; 555*4cd92098Szrj int r; 556*4cd92098Szrj volatile void __iomem *ptr = (volatile void *)rdev->vram_scratch.ptr; 557*4cd92098Szrj u32 tmp = 0; 558*4cd92098Szrj 559*4cd92098Szrj if (!ptr) { 560*4cd92098Szrj DRM_ERROR("invalid vram scratch pointer\n"); 561*4cd92098Szrj return -EINVAL; 562*4cd92098Szrj } 563*4cd92098Szrj 564*4cd92098Szrj tmp = 0xCAFEDEAD; 565*4cd92098Szrj writel(tmp, ptr); 566*4cd92098Szrj 567*4cd92098Szrj r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); 568*4cd92098Szrj if (r) { 569*4cd92098Szrj DRM_ERROR("radeon: failed to get ib (%d).\n", r); 570*4cd92098Szrj return r; 571*4cd92098Szrj } 572*4cd92098Szrj 573*4cd92098Szrj ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 574*4cd92098Szrj ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc; 575*4cd92098Szrj ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff; 576*4cd92098Szrj ib.ptr[3] = 1; 577*4cd92098Szrj ib.ptr[4] = 0xDEADBEEF; 578*4cd92098Szrj ib.length_dw = 5; 579*4cd92098Szrj 580*4cd92098Szrj r = radeon_ib_schedule(rdev, &ib, NULL); 581*4cd92098Szrj if (r) { 582*4cd92098Szrj radeon_ib_free(rdev, &ib); 583*4cd92098Szrj DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); 584*4cd92098Szrj return r; 585*4cd92098Szrj } 586*4cd92098Szrj r = radeon_fence_wait(ib.fence, false); 587*4cd92098Szrj if (r) { 588*4cd92098Szrj DRM_ERROR("radeon: fence wait failed (%d).\n", r); 589*4cd92098Szrj return r; 590*4cd92098Szrj } 591*4cd92098Szrj for (i = 0; i < rdev->usec_timeout; i++) { 592*4cd92098Szrj tmp = readl(ptr); 593*4cd92098Szrj if (tmp == 0xDEADBEEF) 594*4cd92098Szrj break; 595*4cd92098Szrj DRM_UDELAY(1); 596*4cd92098Szrj } 597*4cd92098Szrj if (i < rdev->usec_timeout) { 598*4cd92098Szrj DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); 599*4cd92098Szrj } else { 600*4cd92098Szrj DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp); 601*4cd92098Szrj r = -EINVAL; 602*4cd92098Szrj } 603*4cd92098Szrj radeon_ib_free(rdev, &ib); 604*4cd92098Szrj return r; 605*4cd92098Szrj } 606*4cd92098Szrj 607*4cd92098Szrj /** 608*4cd92098Szrj * cik_sdma_is_lockup - Check if the DMA engine is locked up 609*4cd92098Szrj * 610*4cd92098Szrj * @rdev: radeon_device pointer 611*4cd92098Szrj * @ring: radeon_ring structure holding ring information 612*4cd92098Szrj * 613*4cd92098Szrj * Check if the async DMA engine is locked up (CIK). 614*4cd92098Szrj * Returns true if the engine appears to be locked up, false if not. 615*4cd92098Szrj */ 616*4cd92098Szrj bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) 617*4cd92098Szrj { 618*4cd92098Szrj u32 reset_mask = cik_gpu_check_soft_reset(rdev); 619*4cd92098Szrj u32 mask; 620*4cd92098Szrj 621*4cd92098Szrj if (ring->idx == R600_RING_TYPE_DMA_INDEX) 622*4cd92098Szrj mask = RADEON_RESET_DMA; 623*4cd92098Szrj else 624*4cd92098Szrj mask = RADEON_RESET_DMA1; 625*4cd92098Szrj 626*4cd92098Szrj if (!(reset_mask & mask)) { 627*4cd92098Szrj radeon_ring_lockup_update(ring); 628*4cd92098Szrj return false; 629*4cd92098Szrj } 630*4cd92098Szrj /* force ring activities */ 631*4cd92098Szrj radeon_ring_force_activity(rdev, ring); 632*4cd92098Szrj return radeon_ring_test_lockup(rdev, ring); 633*4cd92098Szrj } 634*4cd92098Szrj 635*4cd92098Szrj /** 636*4cd92098Szrj * cik_sdma_vm_set_page - update the page tables using sDMA 637*4cd92098Szrj * 638*4cd92098Szrj * @rdev: radeon_device pointer 639*4cd92098Szrj * @ib: indirect buffer to fill with commands 640*4cd92098Szrj * @pe: addr of the page entry 641*4cd92098Szrj * @addr: dst addr to write into pe 642*4cd92098Szrj * @count: number of page entries to update 643*4cd92098Szrj * @incr: increase next addr by incr bytes 644*4cd92098Szrj * @flags: access flags 645*4cd92098Szrj * 646*4cd92098Szrj * Update the page tables using sDMA (CIK). 647*4cd92098Szrj */ 648*4cd92098Szrj void cik_sdma_vm_set_page(struct radeon_device *rdev, 649*4cd92098Szrj struct radeon_ib *ib, 650*4cd92098Szrj uint64_t pe, 651*4cd92098Szrj uint64_t addr, unsigned count, 652*4cd92098Szrj uint32_t incr, uint32_t flags) 653*4cd92098Szrj { 654*4cd92098Szrj uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); 655*4cd92098Szrj uint64_t value; 656*4cd92098Szrj unsigned ndw; 657*4cd92098Szrj 658*4cd92098Szrj if (flags & RADEON_VM_PAGE_SYSTEM) { 659*4cd92098Szrj while (count) { 660*4cd92098Szrj ndw = count * 2; 661*4cd92098Szrj if (ndw > 0xFFFFE) 662*4cd92098Szrj ndw = 0xFFFFE; 663*4cd92098Szrj 664*4cd92098Szrj /* for non-physically contiguous pages (system) */ 665*4cd92098Szrj ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 666*4cd92098Szrj ib->ptr[ib->length_dw++] = pe; 667*4cd92098Szrj ib->ptr[ib->length_dw++] = upper_32_bits(pe); 668*4cd92098Szrj ib->ptr[ib->length_dw++] = ndw; 669*4cd92098Szrj for (; ndw > 0; ndw -= 2, --count, pe += 8) { 670*4cd92098Szrj if (flags & RADEON_VM_PAGE_SYSTEM) { 671*4cd92098Szrj value = radeon_vm_map_gart(rdev, addr); 672*4cd92098Szrj value &= 0xFFFFFFFFFFFFF000ULL; 673*4cd92098Szrj } else if (flags & RADEON_VM_PAGE_VALID) { 674*4cd92098Szrj value = addr; 675*4cd92098Szrj } else { 676*4cd92098Szrj value = 0; 677*4cd92098Szrj } 678*4cd92098Szrj addr += incr; 679*4cd92098Szrj value |= r600_flags; 680*4cd92098Szrj ib->ptr[ib->length_dw++] = value; 681*4cd92098Szrj ib->ptr[ib->length_dw++] = upper_32_bits(value); 682*4cd92098Szrj } 683*4cd92098Szrj } 684*4cd92098Szrj } else { 685*4cd92098Szrj while (count) { 686*4cd92098Szrj ndw = count; 687*4cd92098Szrj if (ndw > 0x7FFFF) 688*4cd92098Szrj ndw = 0x7FFFF; 689*4cd92098Szrj 690*4cd92098Szrj if (flags & RADEON_VM_PAGE_VALID) 691*4cd92098Szrj value = addr; 692*4cd92098Szrj else 693*4cd92098Szrj value = 0; 694*4cd92098Szrj /* for physically contiguous pages (vram) */ 695*4cd92098Szrj ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0); 696*4cd92098Szrj ib->ptr[ib->length_dw++] = pe; /* dst addr */ 697*4cd92098Szrj ib->ptr[ib->length_dw++] = upper_32_bits(pe); 698*4cd92098Szrj ib->ptr[ib->length_dw++] = r600_flags; /* mask */ 699*4cd92098Szrj ib->ptr[ib->length_dw++] = 0; 700*4cd92098Szrj ib->ptr[ib->length_dw++] = value; /* value */ 701*4cd92098Szrj ib->ptr[ib->length_dw++] = upper_32_bits(value); 702*4cd92098Szrj ib->ptr[ib->length_dw++] = incr; /* increment size */ 703*4cd92098Szrj ib->ptr[ib->length_dw++] = 0; 704*4cd92098Szrj ib->ptr[ib->length_dw++] = ndw; /* number of entries */ 705*4cd92098Szrj pe += ndw * 8; 706*4cd92098Szrj addr += ndw * incr; 707*4cd92098Szrj count -= ndw; 708*4cd92098Szrj } 709*4cd92098Szrj } 710*4cd92098Szrj while (ib->length_dw & 0x7) 711*4cd92098Szrj ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0); 712*4cd92098Szrj } 713*4cd92098Szrj 714*4cd92098Szrj /** 715*4cd92098Szrj * cik_dma_vm_flush - cik vm flush using sDMA 716*4cd92098Szrj * 717*4cd92098Szrj * @rdev: radeon_device pointer 718*4cd92098Szrj * 719*4cd92098Szrj * Update the page table base and flush the VM TLB 720*4cd92098Szrj * using sDMA (CIK). 721*4cd92098Szrj */ 722*4cd92098Szrj void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) 723*4cd92098Szrj { 724*4cd92098Szrj struct radeon_ring *ring = &rdev->ring[ridx]; 725*4cd92098Szrj u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) | 726*4cd92098Szrj SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */ 727*4cd92098Szrj u32 ref_and_mask; 728*4cd92098Szrj 729*4cd92098Szrj if (vm == NULL) 730*4cd92098Szrj return; 731*4cd92098Szrj 732*4cd92098Szrj if (ridx == R600_RING_TYPE_DMA_INDEX) 733*4cd92098Szrj ref_and_mask = SDMA0; 734*4cd92098Szrj else 735*4cd92098Szrj ref_and_mask = SDMA1; 736*4cd92098Szrj 737*4cd92098Szrj radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); 738*4cd92098Szrj if (vm->id < 8) { 739*4cd92098Szrj radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2); 740*4cd92098Szrj } else { 741*4cd92098Szrj radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2); 742*4cd92098Szrj } 743*4cd92098Szrj radeon_ring_write(ring, vm->pd_gpu_addr >> 12); 744*4cd92098Szrj 745*4cd92098Szrj /* update SH_MEM_* regs */ 746*4cd92098Szrj radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); 747*4cd92098Szrj radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); 748*4cd92098Szrj radeon_ring_write(ring, VMID(vm->id)); 749*4cd92098Szrj 750*4cd92098Szrj radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); 751*4cd92098Szrj radeon_ring_write(ring, SH_MEM_BASES >> 2); 752*4cd92098Szrj radeon_ring_write(ring, 0); 753*4cd92098Szrj 754*4cd92098Szrj radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); 755*4cd92098Szrj radeon_ring_write(ring, SH_MEM_CONFIG >> 2); 756*4cd92098Szrj radeon_ring_write(ring, 0); 757*4cd92098Szrj 758*4cd92098Szrj radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); 759*4cd92098Szrj radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2); 760*4cd92098Szrj radeon_ring_write(ring, 1); 761*4cd92098Szrj 762*4cd92098Szrj radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); 763*4cd92098Szrj radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2); 764*4cd92098Szrj radeon_ring_write(ring, 0); 765*4cd92098Szrj 766*4cd92098Szrj radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); 767*4cd92098Szrj radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); 768*4cd92098Szrj radeon_ring_write(ring, VMID(0)); 769*4cd92098Szrj 770*4cd92098Szrj /* flush HDP */ 771*4cd92098Szrj radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits)); 772*4cd92098Szrj radeon_ring_write(ring, GPU_HDP_FLUSH_DONE); 773*4cd92098Szrj radeon_ring_write(ring, GPU_HDP_FLUSH_REQ); 774*4cd92098Szrj radeon_ring_write(ring, ref_and_mask); /* REFERENCE */ 775*4cd92098Szrj radeon_ring_write(ring, ref_and_mask); /* MASK */ 776*4cd92098Szrj radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */ 777*4cd92098Szrj 778*4cd92098Szrj /* flush TLB */ 779*4cd92098Szrj radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); 780*4cd92098Szrj radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); 781*4cd92098Szrj radeon_ring_write(ring, 1 << vm->id); 782*4cd92098Szrj } 783