1 /* $OpenBSD: rv770.c,v 1.7 2015/04/18 14:47:35 jsg Exp $ */ 2 /* 3 * Copyright 2008 Advanced Micro Devices, Inc. 4 * Copyright 2008 Red Hat Inc. 5 * Copyright 2009 Jerome Glisse. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 23 * OTHER DEALINGS IN THE SOFTWARE. 24 * 25 * Authors: Dave Airlie 26 * Alex Deucher 27 * Jerome Glisse 28 */ 29 #include <dev/pci/drm/drmP.h> 30 #include "radeon.h" 31 #include "radeon_asic.h" 32 #include <dev/pci/drm/radeon_drm.h> 33 #include "rv770d.h" 34 #include "atom.h" 35 #include "avivod.h" 36 37 #define R700_PFP_UCODE_SIZE 848 38 #define R700_PM4_UCODE_SIZE 1360 39 40 static void rv770_gpu_init(struct radeon_device *rdev); 41 void rv770_fini(struct radeon_device *rdev); 42 static void rv770_pcie_gen2_enable(struct radeon_device *rdev); 43 44 u32 rv770_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base) 45 { 46 struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id]; 47 u32 tmp = RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset); 48 int i; 49 50 /* Lock the graphics update lock */ 51 tmp |= AVIVO_D1GRPH_UPDATE_LOCK; 52 WREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset, tmp); 53 54 /* update the scanout addresses */ 55 if (radeon_crtc->crtc_id) { 56 WREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base)); 57 WREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base)); 58 } else { 59 WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base)); 60 WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base)); 61 } 62 WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset, 63 (u32)crtc_base); 64 WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset, 65 (u32)crtc_base); 66 67 /* Wait for update_pending to go high. */ 68 for (i = 0; i < rdev->usec_timeout; i++) { 69 if (RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset) & AVIVO_D1GRPH_SURFACE_UPDATE_PENDING) 70 break; 71 udelay(1); 72 } 73 DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n"); 74 75 /* Unlock the lock, so double-buffering can take place inside vblank */ 76 tmp &= ~AVIVO_D1GRPH_UPDATE_LOCK; 77 WREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset, tmp); 78 79 /* Return current update_pending status: */ 80 return RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset) & AVIVO_D1GRPH_SURFACE_UPDATE_PENDING; 81 } 82 83 /* get temperature in millidegrees */ 84 int rv770_get_temp(struct radeon_device *rdev) 85 { 86 u32 temp = (RREG32(CG_MULT_THERMAL_STATUS) & ASIC_T_MASK) >> 87 ASIC_T_SHIFT; 88 int actual_temp; 89 90 if (temp & 0x400) 91 actual_temp = -256; 92 else if (temp & 0x200) 93 actual_temp = 255; 94 else if (temp & 0x100) { 95 actual_temp = temp & 0x1ff; 96 actual_temp |= ~0x1ff; 97 } else 98 actual_temp = temp & 0xff; 99 100 return (actual_temp * 1000) / 2; 101 } 102 103 void rv770_pm_misc(struct radeon_device *rdev) 104 { 105 int req_ps_idx = rdev->pm.requested_power_state_index; 106 int req_cm_idx = rdev->pm.requested_clock_mode_index; 107 struct radeon_power_state *ps = &rdev->pm.power_state[req_ps_idx]; 108 struct radeon_voltage *voltage = &ps->clock_info[req_cm_idx].voltage; 109 110 if ((voltage->type == VOLTAGE_SW) && voltage->voltage) { 111 /* 0xff01 is a flag rather then an actual voltage */ 112 if (voltage->voltage == 0xff01) 113 return; 114 if (voltage->voltage != rdev->pm.current_vddc) { 115 radeon_atom_set_voltage(rdev, voltage->voltage, SET_VOLTAGE_TYPE_ASIC_VDDC); 116 rdev->pm.current_vddc = voltage->voltage; 117 DRM_DEBUG("Setting: v: %d\n", voltage->voltage); 118 } 119 } 120 } 121 122 /* 123 * GART 124 */ 125 static int rv770_pcie_gart_enable(struct radeon_device *rdev) 126 { 127 u32 tmp; 128 int r, i; 129 130 if (rdev->gart.robj == NULL) { 131 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n"); 132 return -EINVAL; 133 } 134 r = radeon_gart_table_vram_pin(rdev); 135 if (r) 136 return r; 137 radeon_gart_restore(rdev); 138 /* Setup L2 cache */ 139 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING | 140 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | 141 EFFECTIVE_L2_QUEUE_SIZE(7)); 142 WREG32(VM_L2_CNTL2, 0); 143 WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2)); 144 /* Setup TLB control */ 145 tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING | 146 SYSTEM_ACCESS_MODE_NOT_IN_SYS | 147 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU | 148 EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5); 149 WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp); 150 WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp); 151 WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp); 152 if (rdev->family == CHIP_RV740) 153 WREG32(MC_VM_MD_L1_TLB3_CNTL, tmp); 154 WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp); 155 WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp); 156 WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp); 157 WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp); 158 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); 159 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); 160 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); 161 WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | 162 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT); 163 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, 164 (u32)(rdev->dummy_page.addr >> 12)); 165 for (i = 1; i < 7; i++) 166 WREG32(VM_CONTEXT0_CNTL + (i * 4), 0); 167 168 r600_pcie_gart_tlb_flush(rdev); 169 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", 170 (unsigned)(rdev->mc.gtt_size >> 20), 171 (unsigned long long)rdev->gart.table_addr); 172 rdev->gart.ready = true; 173 return 0; 174 } 175 176 static void rv770_pcie_gart_disable(struct radeon_device *rdev) 177 { 178 u32 tmp; 179 int i; 180 181 /* Disable all tables */ 182 for (i = 0; i < 7; i++) 183 WREG32(VM_CONTEXT0_CNTL + (i * 4), 0); 184 185 /* Setup L2 cache */ 186 WREG32(VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING | 187 EFFECTIVE_L2_QUEUE_SIZE(7)); 188 WREG32(VM_L2_CNTL2, 0); 189 WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2)); 190 /* Setup TLB control */ 191 tmp = EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5); 192 WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp); 193 WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp); 194 WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp); 195 WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp); 196 WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp); 197 WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp); 198 WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp); 199 radeon_gart_table_vram_unpin(rdev); 200 } 201 202 static void rv770_pcie_gart_fini(struct radeon_device *rdev) 203 { 204 radeon_gart_fini(rdev); 205 rv770_pcie_gart_disable(rdev); 206 radeon_gart_table_vram_free(rdev); 207 } 208 209 210 static void rv770_agp_enable(struct radeon_device *rdev) 211 { 212 u32 tmp; 213 int i; 214 215 /* Setup L2 cache */ 216 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING | 217 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | 218 EFFECTIVE_L2_QUEUE_SIZE(7)); 219 WREG32(VM_L2_CNTL2, 0); 220 WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2)); 221 /* Setup TLB control */ 222 tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING | 223 SYSTEM_ACCESS_MODE_NOT_IN_SYS | 224 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU | 225 EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5); 226 WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp); 227 WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp); 228 WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp); 229 WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp); 230 WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp); 231 WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp); 232 WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp); 233 for (i = 0; i < 7; i++) 234 WREG32(VM_CONTEXT0_CNTL + (i * 4), 0); 235 } 236 237 static void rv770_mc_program(struct radeon_device *rdev) 238 { 239 struct rv515_mc_save save; 240 u32 tmp; 241 int i, j; 242 243 /* Initialize HDP */ 244 for (i = 0, j = 0; i < 32; i++, j += 0x18) { 245 WREG32((0x2c14 + j), 0x00000000); 246 WREG32((0x2c18 + j), 0x00000000); 247 WREG32((0x2c1c + j), 0x00000000); 248 WREG32((0x2c20 + j), 0x00000000); 249 WREG32((0x2c24 + j), 0x00000000); 250 } 251 /* r7xx hw bug. Read from HDP_DEBUG1 rather 252 * than writing to HDP_REG_COHERENCY_FLUSH_CNTL 253 */ 254 tmp = RREG32(HDP_DEBUG1); 255 256 rv515_mc_stop(rdev, &save); 257 if (r600_mc_wait_for_idle(rdev)) { 258 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 259 } 260 /* Lockout access through VGA aperture*/ 261 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE); 262 /* Update configuration */ 263 if (rdev->flags & RADEON_IS_AGP) { 264 if (rdev->mc.vram_start < rdev->mc.gtt_start) { 265 /* VRAM before AGP */ 266 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, 267 rdev->mc.vram_start >> 12); 268 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, 269 rdev->mc.gtt_end >> 12); 270 } else { 271 /* VRAM after AGP */ 272 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, 273 rdev->mc.gtt_start >> 12); 274 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, 275 rdev->mc.vram_end >> 12); 276 } 277 } else { 278 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, 279 rdev->mc.vram_start >> 12); 280 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, 281 rdev->mc.vram_end >> 12); 282 } 283 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, rdev->vram_scratch.gpu_addr >> 12); 284 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16; 285 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF); 286 WREG32(MC_VM_FB_LOCATION, tmp); 287 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8)); 288 WREG32(HDP_NONSURFACE_INFO, (2 << 7)); 289 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF); 290 if (rdev->flags & RADEON_IS_AGP) { 291 WREG32(MC_VM_AGP_TOP, rdev->mc.gtt_end >> 16); 292 WREG32(MC_VM_AGP_BOT, rdev->mc.gtt_start >> 16); 293 WREG32(MC_VM_AGP_BASE, rdev->mc.agp_base >> 22); 294 } else { 295 WREG32(MC_VM_AGP_BASE, 0); 296 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF); 297 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF); 298 } 299 if (r600_mc_wait_for_idle(rdev)) { 300 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 301 } 302 rv515_mc_resume(rdev, &save); 303 /* we need to own VRAM, so turn off the VGA renderer here 304 * to stop it overwriting our objects */ 305 rv515_vga_render_disable(rdev); 306 } 307 308 309 /* 310 * CP. 311 */ 312 void r700_cp_stop(struct radeon_device *rdev) 313 { 314 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 315 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT)); 316 WREG32(SCRATCH_UMSK, 0); 317 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 318 } 319 320 static int rv770_cp_load_microcode(struct radeon_device *rdev) 321 { 322 const __be32 *fw_data; 323 int i; 324 325 if (!rdev->me_fw || !rdev->pfp_fw) 326 return -EINVAL; 327 328 r700_cp_stop(rdev); 329 WREG32(CP_RB_CNTL, 330 #ifdef __BIG_ENDIAN 331 BUF_SWAP_32BIT | 332 #endif 333 RB_NO_UPDATE | RB_BLKSZ(15) | RB_BUFSZ(3)); 334 335 /* Reset cp */ 336 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP); 337 RREG32(GRBM_SOFT_RESET); 338 mdelay(15); 339 WREG32(GRBM_SOFT_RESET, 0); 340 341 fw_data = (const __be32 *)rdev->pfp_fw; 342 WREG32(CP_PFP_UCODE_ADDR, 0); 343 for (i = 0; i < R700_PFP_UCODE_SIZE; i++) 344 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++)); 345 WREG32(CP_PFP_UCODE_ADDR, 0); 346 347 fw_data = (const __be32 *)rdev->me_fw; 348 WREG32(CP_ME_RAM_WADDR, 0); 349 for (i = 0; i < R700_PM4_UCODE_SIZE; i++) 350 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++)); 351 352 WREG32(CP_PFP_UCODE_ADDR, 0); 353 WREG32(CP_ME_RAM_WADDR, 0); 354 WREG32(CP_ME_RAM_RADDR, 0); 355 return 0; 356 } 357 358 void r700_cp_fini(struct radeon_device *rdev) 359 { 360 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 361 r700_cp_stop(rdev); 362 radeon_ring_fini(rdev, ring); 363 radeon_scratch_free(rdev, ring->rptr_save_reg); 364 } 365 366 /* 367 * Core functions 368 */ 369 static void rv770_gpu_init(struct radeon_device *rdev) 370 { 371 int i, j, num_qd_pipes; 372 u32 ta_aux_cntl; 373 u32 sx_debug_1; 374 u32 smx_dc_ctl0; 375 u32 db_debug3; 376 u32 num_gs_verts_per_thread; 377 u32 vgt_gs_per_es; 378 u32 gs_prim_buffer_depth = 0; 379 u32 sq_ms_fifo_sizes; 380 u32 sq_config; 381 u32 sq_thread_resource_mgmt; 382 u32 hdp_host_path_cntl; 383 u32 sq_dyn_gpr_size_simd_ab_0; 384 u32 gb_tiling_config = 0; 385 u32 cc_rb_backend_disable = 0; 386 u32 cc_gc_shader_pipe_config = 0; 387 u32 mc_arb_ramcfg; 388 u32 db_debug4, tmp; 389 u32 inactive_pipes, shader_pipe_config; 390 u32 disabled_rb_mask; 391 unsigned active_number; 392 393 /* setup chip specs */ 394 rdev->config.rv770.tiling_group_size = 256; 395 switch (rdev->family) { 396 case CHIP_RV770: 397 rdev->config.rv770.max_pipes = 4; 398 rdev->config.rv770.max_tile_pipes = 8; 399 rdev->config.rv770.max_simds = 10; 400 rdev->config.rv770.max_backends = 4; 401 rdev->config.rv770.max_gprs = 256; 402 rdev->config.rv770.max_threads = 248; 403 rdev->config.rv770.max_stack_entries = 512; 404 rdev->config.rv770.max_hw_contexts = 8; 405 rdev->config.rv770.max_gs_threads = 16 * 2; 406 rdev->config.rv770.sx_max_export_size = 128; 407 rdev->config.rv770.sx_max_export_pos_size = 16; 408 rdev->config.rv770.sx_max_export_smx_size = 112; 409 rdev->config.rv770.sq_num_cf_insts = 2; 410 411 rdev->config.rv770.sx_num_of_sets = 7; 412 rdev->config.rv770.sc_prim_fifo_size = 0xF9; 413 rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30; 414 rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130; 415 break; 416 case CHIP_RV730: 417 rdev->config.rv770.max_pipes = 2; 418 rdev->config.rv770.max_tile_pipes = 4; 419 rdev->config.rv770.max_simds = 8; 420 rdev->config.rv770.max_backends = 2; 421 rdev->config.rv770.max_gprs = 128; 422 rdev->config.rv770.max_threads = 248; 423 rdev->config.rv770.max_stack_entries = 256; 424 rdev->config.rv770.max_hw_contexts = 8; 425 rdev->config.rv770.max_gs_threads = 16 * 2; 426 rdev->config.rv770.sx_max_export_size = 256; 427 rdev->config.rv770.sx_max_export_pos_size = 32; 428 rdev->config.rv770.sx_max_export_smx_size = 224; 429 rdev->config.rv770.sq_num_cf_insts = 2; 430 431 rdev->config.rv770.sx_num_of_sets = 7; 432 rdev->config.rv770.sc_prim_fifo_size = 0xf9; 433 rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30; 434 rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130; 435 if (rdev->config.rv770.sx_max_export_pos_size > 16) { 436 rdev->config.rv770.sx_max_export_pos_size -= 16; 437 rdev->config.rv770.sx_max_export_smx_size += 16; 438 } 439 break; 440 case CHIP_RV710: 441 rdev->config.rv770.max_pipes = 2; 442 rdev->config.rv770.max_tile_pipes = 2; 443 rdev->config.rv770.max_simds = 2; 444 rdev->config.rv770.max_backends = 1; 445 rdev->config.rv770.max_gprs = 256; 446 rdev->config.rv770.max_threads = 192; 447 rdev->config.rv770.max_stack_entries = 256; 448 rdev->config.rv770.max_hw_contexts = 4; 449 rdev->config.rv770.max_gs_threads = 8 * 2; 450 rdev->config.rv770.sx_max_export_size = 128; 451 rdev->config.rv770.sx_max_export_pos_size = 16; 452 rdev->config.rv770.sx_max_export_smx_size = 112; 453 rdev->config.rv770.sq_num_cf_insts = 1; 454 455 rdev->config.rv770.sx_num_of_sets = 7; 456 rdev->config.rv770.sc_prim_fifo_size = 0x40; 457 rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30; 458 rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130; 459 break; 460 case CHIP_RV740: 461 rdev->config.rv770.max_pipes = 4; 462 rdev->config.rv770.max_tile_pipes = 4; 463 rdev->config.rv770.max_simds = 8; 464 rdev->config.rv770.max_backends = 4; 465 rdev->config.rv770.max_gprs = 256; 466 rdev->config.rv770.max_threads = 248; 467 rdev->config.rv770.max_stack_entries = 512; 468 rdev->config.rv770.max_hw_contexts = 8; 469 rdev->config.rv770.max_gs_threads = 16 * 2; 470 rdev->config.rv770.sx_max_export_size = 256; 471 rdev->config.rv770.sx_max_export_pos_size = 32; 472 rdev->config.rv770.sx_max_export_smx_size = 224; 473 rdev->config.rv770.sq_num_cf_insts = 2; 474 475 rdev->config.rv770.sx_num_of_sets = 7; 476 rdev->config.rv770.sc_prim_fifo_size = 0x100; 477 rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30; 478 rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130; 479 480 if (rdev->config.rv770.sx_max_export_pos_size > 16) { 481 rdev->config.rv770.sx_max_export_pos_size -= 16; 482 rdev->config.rv770.sx_max_export_smx_size += 16; 483 } 484 break; 485 default: 486 break; 487 } 488 489 /* Initialize HDP */ 490 j = 0; 491 for (i = 0; i < 32; i++) { 492 WREG32((0x2c14 + j), 0x00000000); 493 WREG32((0x2c18 + j), 0x00000000); 494 WREG32((0x2c1c + j), 0x00000000); 495 WREG32((0x2c20 + j), 0x00000000); 496 WREG32((0x2c24 + j), 0x00000000); 497 j += 0x18; 498 } 499 500 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff)); 501 502 /* setup tiling, simd, pipe config */ 503 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG); 504 505 shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG); 506 inactive_pipes = (shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> INACTIVE_QD_PIPES_SHIFT; 507 for (i = 0, tmp = 1, active_number = 0; i < R7XX_MAX_PIPES; i++) { 508 if (!(inactive_pipes & tmp)) { 509 active_number++; 510 } 511 tmp <<= 1; 512 } 513 if (active_number == 1) { 514 WREG32(SPI_CONFIG_CNTL, DISABLE_INTERP_1); 515 } else { 516 WREG32(SPI_CONFIG_CNTL, 0); 517 } 518 519 cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000; 520 tmp = R7XX_MAX_BACKENDS - r600_count_pipe_bits(cc_rb_backend_disable >> 16); 521 if (tmp < rdev->config.rv770.max_backends) { 522 rdev->config.rv770.max_backends = tmp; 523 } 524 525 cc_gc_shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00; 526 tmp = R7XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config >> 8) & R7XX_MAX_PIPES_MASK); 527 if (tmp < rdev->config.rv770.max_pipes) { 528 rdev->config.rv770.max_pipes = tmp; 529 } 530 tmp = R7XX_MAX_SIMDS - r600_count_pipe_bits((cc_gc_shader_pipe_config >> 16) & R7XX_MAX_SIMDS_MASK); 531 if (tmp < rdev->config.rv770.max_simds) { 532 rdev->config.rv770.max_simds = tmp; 533 } 534 535 switch (rdev->config.rv770.max_tile_pipes) { 536 case 1: 537 default: 538 gb_tiling_config = PIPE_TILING(0); 539 break; 540 case 2: 541 gb_tiling_config = PIPE_TILING(1); 542 break; 543 case 4: 544 gb_tiling_config = PIPE_TILING(2); 545 break; 546 case 8: 547 gb_tiling_config = PIPE_TILING(3); 548 break; 549 } 550 rdev->config.rv770.tiling_npipes = rdev->config.rv770.max_tile_pipes; 551 552 disabled_rb_mask = (RREG32(CC_RB_BACKEND_DISABLE) >> 16) & R7XX_MAX_BACKENDS_MASK; 553 tmp = (gb_tiling_config & PIPE_TILING__MASK) >> PIPE_TILING__SHIFT; 554 tmp = r6xx_remap_render_backend(rdev, tmp, rdev->config.rv770.max_backends, 555 R7XX_MAX_BACKENDS, disabled_rb_mask); 556 gb_tiling_config |= tmp << 16; 557 rdev->config.rv770.backend_map = tmp; 558 559 if (rdev->family == CHIP_RV770) 560 gb_tiling_config |= BANK_TILING(1); 561 else { 562 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) 563 gb_tiling_config |= BANK_TILING(1); 564 else 565 gb_tiling_config |= BANK_TILING(0); 566 } 567 rdev->config.rv770.tiling_nbanks = 4 << ((gb_tiling_config >> 4) & 0x3); 568 gb_tiling_config |= GROUP_SIZE((mc_arb_ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT); 569 if (((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT) > 3) { 570 gb_tiling_config |= ROW_TILING(3); 571 gb_tiling_config |= SAMPLE_SPLIT(3); 572 } else { 573 gb_tiling_config |= 574 ROW_TILING(((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT)); 575 gb_tiling_config |= 576 SAMPLE_SPLIT(((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT)); 577 } 578 579 gb_tiling_config |= BANK_SWAPS(1); 580 rdev->config.rv770.tile_config = gb_tiling_config; 581 582 WREG32(GB_TILING_CONFIG, gb_tiling_config); 583 WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff)); 584 WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff)); 585 WREG32(DMA_TILING_CONFIG, (gb_tiling_config & 0xffff)); 586 WREG32(DMA_TILING_CONFIG2, (gb_tiling_config & 0xffff)); 587 588 WREG32(CGTS_SYS_TCC_DISABLE, 0); 589 WREG32(CGTS_TCC_DISABLE, 0); 590 WREG32(CGTS_USER_SYS_TCC_DISABLE, 0); 591 WREG32(CGTS_USER_TCC_DISABLE, 0); 592 593 594 num_qd_pipes = R7XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8); 595 WREG32(VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & DEALLOC_DIST_MASK); 596 WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & VTX_REUSE_DEPTH_MASK); 597 598 /* set HW defaults for 3D engine */ 599 WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) | 600 ROQ_IB2_START(0x2b))); 601 602 WREG32(CP_MEQ_THRESHOLDS, STQ_SPLIT(0x30)); 603 604 ta_aux_cntl = RREG32(TA_CNTL_AUX); 605 WREG32(TA_CNTL_AUX, ta_aux_cntl | DISABLE_CUBE_ANISO); 606 607 sx_debug_1 = RREG32(SX_DEBUG_1); 608 sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS; 609 WREG32(SX_DEBUG_1, sx_debug_1); 610 611 smx_dc_ctl0 = RREG32(SMX_DC_CTL0); 612 smx_dc_ctl0 &= ~CACHE_DEPTH(0x1ff); 613 smx_dc_ctl0 |= CACHE_DEPTH((rdev->config.rv770.sx_num_of_sets * 64) - 1); 614 WREG32(SMX_DC_CTL0, smx_dc_ctl0); 615 616 if (rdev->family != CHIP_RV740) 617 WREG32(SMX_EVENT_CTL, (ES_FLUSH_CTL(4) | 618 GS_FLUSH_CTL(4) | 619 ACK_FLUSH_CTL(3) | 620 SYNC_FLUSH_CTL)); 621 622 if (rdev->family != CHIP_RV770) 623 WREG32(SMX_SAR_CTL0, 0x00003f3f); 624 625 db_debug3 = RREG32(DB_DEBUG3); 626 db_debug3 &= ~DB_CLK_OFF_DELAY(0x1f); 627 switch (rdev->family) { 628 case CHIP_RV770: 629 case CHIP_RV740: 630 db_debug3 |= DB_CLK_OFF_DELAY(0x1f); 631 break; 632 case CHIP_RV710: 633 case CHIP_RV730: 634 default: 635 db_debug3 |= DB_CLK_OFF_DELAY(2); 636 break; 637 } 638 WREG32(DB_DEBUG3, db_debug3); 639 640 if (rdev->family != CHIP_RV770) { 641 db_debug4 = RREG32(DB_DEBUG4); 642 db_debug4 |= DISABLE_TILE_COVERED_FOR_PS_ITER; 643 WREG32(DB_DEBUG4, db_debug4); 644 } 645 646 WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.rv770.sx_max_export_size / 4) - 1) | 647 POSITION_BUFFER_SIZE((rdev->config.rv770.sx_max_export_pos_size / 4) - 1) | 648 SMX_BUFFER_SIZE((rdev->config.rv770.sx_max_export_smx_size / 4) - 1))); 649 650 WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.rv770.sc_prim_fifo_size) | 651 SC_HIZ_TILE_FIFO_SIZE(rdev->config.rv770.sc_hiz_tile_fifo_size) | 652 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.rv770.sc_earlyz_tile_fifo_fize))); 653 654 WREG32(PA_SC_MULTI_CHIP_CNTL, 0); 655 656 WREG32(VGT_NUM_INSTANCES, 1); 657 658 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4)); 659 660 WREG32(CP_PERFMON_CNTL, 0); 661 662 sq_ms_fifo_sizes = (CACHE_FIFO_SIZE(16 * rdev->config.rv770.sq_num_cf_insts) | 663 DONE_FIFO_HIWATER(0xe0) | 664 ALU_UPDATE_FIFO_HIWATER(0x8)); 665 switch (rdev->family) { 666 case CHIP_RV770: 667 case CHIP_RV730: 668 case CHIP_RV710: 669 sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x1); 670 break; 671 case CHIP_RV740: 672 default: 673 sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x4); 674 break; 675 } 676 WREG32(SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes); 677 678 /* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT 679 * should be adjusted as needed by the 2D/3D drivers. This just sets default values 680 */ 681 sq_config = RREG32(SQ_CONFIG); 682 sq_config &= ~(PS_PRIO(3) | 683 VS_PRIO(3) | 684 GS_PRIO(3) | 685 ES_PRIO(3)); 686 sq_config |= (DX9_CONSTS | 687 VC_ENABLE | 688 EXPORT_SRC_C | 689 PS_PRIO(0) | 690 VS_PRIO(1) | 691 GS_PRIO(2) | 692 ES_PRIO(3)); 693 if (rdev->family == CHIP_RV710) 694 /* no vertex cache */ 695 sq_config &= ~VC_ENABLE; 696 697 WREG32(SQ_CONFIG, sq_config); 698 699 WREG32(SQ_GPR_RESOURCE_MGMT_1, (NUM_PS_GPRS((rdev->config.rv770.max_gprs * 24)/64) | 700 NUM_VS_GPRS((rdev->config.rv770.max_gprs * 24)/64) | 701 NUM_CLAUSE_TEMP_GPRS(((rdev->config.rv770.max_gprs * 24)/64)/2))); 702 703 WREG32(SQ_GPR_RESOURCE_MGMT_2, (NUM_GS_GPRS((rdev->config.rv770.max_gprs * 7)/64) | 704 NUM_ES_GPRS((rdev->config.rv770.max_gprs * 7)/64))); 705 706 sq_thread_resource_mgmt = (NUM_PS_THREADS((rdev->config.rv770.max_threads * 4)/8) | 707 NUM_VS_THREADS((rdev->config.rv770.max_threads * 2)/8) | 708 NUM_ES_THREADS((rdev->config.rv770.max_threads * 1)/8)); 709 if (((rdev->config.rv770.max_threads * 1) / 8) > rdev->config.rv770.max_gs_threads) 710 sq_thread_resource_mgmt |= NUM_GS_THREADS(rdev->config.rv770.max_gs_threads); 711 else 712 sq_thread_resource_mgmt |= NUM_GS_THREADS((rdev->config.rv770.max_gs_threads * 1)/8); 713 WREG32(SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt); 714 715 WREG32(SQ_STACK_RESOURCE_MGMT_1, (NUM_PS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4) | 716 NUM_VS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4))); 717 718 WREG32(SQ_STACK_RESOURCE_MGMT_2, (NUM_GS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4) | 719 NUM_ES_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4))); 720 721 sq_dyn_gpr_size_simd_ab_0 = (SIMDA_RING0((rdev->config.rv770.max_gprs * 38)/64) | 722 SIMDA_RING1((rdev->config.rv770.max_gprs * 38)/64) | 723 SIMDB_RING0((rdev->config.rv770.max_gprs * 38)/64) | 724 SIMDB_RING1((rdev->config.rv770.max_gprs * 38)/64)); 725 726 WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_0, sq_dyn_gpr_size_simd_ab_0); 727 WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_1, sq_dyn_gpr_size_simd_ab_0); 728 WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_2, sq_dyn_gpr_size_simd_ab_0); 729 WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_3, sq_dyn_gpr_size_simd_ab_0); 730 WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_4, sq_dyn_gpr_size_simd_ab_0); 731 WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_5, sq_dyn_gpr_size_simd_ab_0); 732 WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_6, sq_dyn_gpr_size_simd_ab_0); 733 WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_7, sq_dyn_gpr_size_simd_ab_0); 734 735 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) | 736 FORCE_EOV_MAX_REZ_CNT(255))); 737 738 if (rdev->family == CHIP_RV710) 739 WREG32(VGT_CACHE_INVALIDATION, (CACHE_INVALIDATION(TC_ONLY) | 740 AUTO_INVLD_EN(ES_AND_GS_AUTO))); 741 else 742 WREG32(VGT_CACHE_INVALIDATION, (CACHE_INVALIDATION(VC_AND_TC) | 743 AUTO_INVLD_EN(ES_AND_GS_AUTO))); 744 745 switch (rdev->family) { 746 case CHIP_RV770: 747 case CHIP_RV730: 748 case CHIP_RV740: 749 gs_prim_buffer_depth = 384; 750 break; 751 case CHIP_RV710: 752 gs_prim_buffer_depth = 128; 753 break; 754 default: 755 break; 756 } 757 758 num_gs_verts_per_thread = rdev->config.rv770.max_pipes * 16; 759 vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread; 760 /* Max value for this is 256 */ 761 if (vgt_gs_per_es > 256) 762 vgt_gs_per_es = 256; 763 764 WREG32(VGT_ES_PER_GS, 128); 765 WREG32(VGT_GS_PER_ES, vgt_gs_per_es); 766 WREG32(VGT_GS_PER_VS, 2); 767 768 /* more default values. 2D/3D driver should adjust as needed */ 769 WREG32(VGT_GS_VERTEX_REUSE, 16); 770 WREG32(PA_SC_LINE_STIPPLE_STATE, 0); 771 WREG32(VGT_STRMOUT_EN, 0); 772 WREG32(SX_MISC, 0); 773 WREG32(PA_SC_MODE_CNTL, 0); 774 WREG32(PA_SC_EDGERULE, 0xaaaaaaaa); 775 WREG32(PA_SC_AA_CONFIG, 0); 776 WREG32(PA_SC_CLIPRECT_RULE, 0xffff); 777 WREG32(PA_SC_LINE_STIPPLE, 0); 778 WREG32(SPI_INPUT_Z, 0); 779 WREG32(SPI_PS_IN_CONTROL_0, NUM_INTERP(2)); 780 WREG32(CB_COLOR7_FRAG, 0); 781 782 /* clear render buffer base addresses */ 783 WREG32(CB_COLOR0_BASE, 0); 784 WREG32(CB_COLOR1_BASE, 0); 785 WREG32(CB_COLOR2_BASE, 0); 786 WREG32(CB_COLOR3_BASE, 0); 787 WREG32(CB_COLOR4_BASE, 0); 788 WREG32(CB_COLOR5_BASE, 0); 789 WREG32(CB_COLOR6_BASE, 0); 790 WREG32(CB_COLOR7_BASE, 0); 791 792 WREG32(TCP_CNTL, 0); 793 794 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL); 795 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl); 796 797 WREG32(PA_SC_MULTI_CHIP_CNTL, 0); 798 799 WREG32(PA_CL_ENHANCE, (CLIP_VTX_REORDER_ENA | 800 NUM_CLIP_SEQ(3))); 801 WREG32(VC_ENHANCE, 0); 802 } 803 804 void r700_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc) 805 { 806 u64 size_bf, size_af; 807 808 if (mc->mc_vram_size > 0xE0000000) { 809 /* leave room for at least 512M GTT */ 810 dev_warn(rdev->dev, "limiting VRAM\n"); 811 mc->real_vram_size = 0xE0000000; 812 mc->mc_vram_size = 0xE0000000; 813 } 814 if (rdev->flags & RADEON_IS_AGP) { 815 size_bf = mc->gtt_start; 816 size_af = 0xFFFFFFFF - mc->gtt_end; 817 if (size_bf > size_af) { 818 if (mc->mc_vram_size > size_bf) { 819 dev_warn(rdev->dev, "limiting VRAM\n"); 820 mc->real_vram_size = size_bf; 821 mc->mc_vram_size = size_bf; 822 } 823 mc->vram_start = mc->gtt_start - mc->mc_vram_size; 824 } else { 825 if (mc->mc_vram_size > size_af) { 826 dev_warn(rdev->dev, "limiting VRAM\n"); 827 mc->real_vram_size = size_af; 828 mc->mc_vram_size = size_af; 829 } 830 mc->vram_start = mc->gtt_end + 1; 831 } 832 mc->vram_end = mc->vram_start + mc->mc_vram_size - 1; 833 dev_info(rdev->dev, "VRAM: %lluM 0x%08llX - 0x%08llX (%lluM used)\n", 834 mc->mc_vram_size >> 20, mc->vram_start, 835 mc->vram_end, mc->real_vram_size >> 20); 836 } else { 837 radeon_vram_location(rdev, &rdev->mc, 0); 838 rdev->mc.gtt_base_align = 0; 839 radeon_gtt_location(rdev, mc); 840 } 841 } 842 843 static int rv770_mc_init(struct radeon_device *rdev) 844 { 845 u32 tmp; 846 int chansize, numchan; 847 848 /* Get VRAM informations */ 849 rdev->mc.vram_is_ddr = true; 850 tmp = RREG32(MC_ARB_RAMCFG); 851 if (tmp & CHANSIZE_OVERRIDE) { 852 chansize = 16; 853 } else if (tmp & CHANSIZE_MASK) { 854 chansize = 64; 855 } else { 856 chansize = 32; 857 } 858 tmp = RREG32(MC_SHARED_CHMAP); 859 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) { 860 case 0: 861 default: 862 numchan = 1; 863 break; 864 case 1: 865 numchan = 2; 866 break; 867 case 2: 868 numchan = 4; 869 break; 870 case 3: 871 numchan = 8; 872 break; 873 } 874 rdev->mc.vram_width = numchan * chansize; 875 /* Could aper size report 0 ? */ 876 rdev->mc.aper_base = rdev->fb_aper_offset; 877 rdev->mc.aper_size = rdev->fb_aper_size; 878 /* Setup GPU memory space */ 879 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE); 880 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE); 881 rdev->mc.visible_vram_size = rdev->mc.aper_size; 882 r700_vram_gtt_location(rdev, &rdev->mc); 883 radeon_update_bandwidth_info(rdev); 884 885 return 0; 886 } 887 888 /** 889 * rv770_copy_dma - copy pages using the DMA engine 890 * 891 * @rdev: radeon_device pointer 892 * @src_offset: src GPU address 893 * @dst_offset: dst GPU address 894 * @num_gpu_pages: number of GPU pages to xfer 895 * @fence: radeon fence object 896 * 897 * Copy GPU paging using the DMA engine (r7xx). 898 * Used by the radeon ttm implementation to move pages if 899 * registered as the asic copy callback. 900 */ 901 int rv770_copy_dma(struct radeon_device *rdev, 902 uint64_t src_offset, uint64_t dst_offset, 903 unsigned num_gpu_pages, 904 struct radeon_fence **fence) 905 { 906 struct radeon_semaphore *sem = NULL; 907 int ring_index = rdev->asic->copy.dma_ring_index; 908 struct radeon_ring *ring = &rdev->ring[ring_index]; 909 u32 size_in_dw, cur_size_in_dw; 910 int i, num_loops; 911 int r = 0; 912 913 r = radeon_semaphore_create(rdev, &sem); 914 if (r) { 915 DRM_ERROR("radeon: moving bo (%d).\n", r); 916 return r; 917 } 918 919 size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; 920 num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFF); 921 r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8); 922 if (r) { 923 DRM_ERROR("radeon: moving bo (%d).\n", r); 924 radeon_semaphore_free(rdev, &sem, NULL); 925 return r; 926 } 927 928 if (radeon_fence_need_sync(*fence, ring->idx)) { 929 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, 930 ring->idx); 931 radeon_fence_note_sync(*fence, ring->idx); 932 } else { 933 radeon_semaphore_free(rdev, &sem, NULL); 934 } 935 936 for (i = 0; i < num_loops; i++) { 937 cur_size_in_dw = size_in_dw; 938 if (cur_size_in_dw > 0xFFFF) 939 cur_size_in_dw = 0xFFFF; 940 size_in_dw -= cur_size_in_dw; 941 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw)); 942 radeon_ring_write(ring, dst_offset & 0xfffffffc); 943 radeon_ring_write(ring, src_offset & 0xfffffffc); 944 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); 945 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff); 946 src_offset += cur_size_in_dw * 4; 947 dst_offset += cur_size_in_dw * 4; 948 } 949 950 r = radeon_fence_emit(rdev, fence, ring->idx); 951 if (r) { 952 radeon_ring_unlock_undo(rdev, ring); 953 return r; 954 } 955 956 radeon_ring_unlock_commit(rdev, ring); 957 radeon_semaphore_free(rdev, &sem, *fence); 958 959 return r; 960 } 961 962 static int rv770_startup(struct radeon_device *rdev) 963 { 964 struct radeon_ring *ring; 965 int r; 966 967 /* enable pcie gen2 link */ 968 rv770_pcie_gen2_enable(rdev); 969 970 rv770_mc_program(rdev); 971 972 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) { 973 r = r600_init_microcode(rdev); 974 if (r) { 975 DRM_ERROR("Failed to load firmware!\n"); 976 return r; 977 } 978 } 979 980 r = r600_vram_scratch_init(rdev); 981 if (r) 982 return r; 983 984 if (rdev->flags & RADEON_IS_AGP) { 985 rv770_agp_enable(rdev); 986 } else { 987 r = rv770_pcie_gart_enable(rdev); 988 if (r) 989 return r; 990 } 991 992 rv770_gpu_init(rdev); 993 r = r600_blit_init(rdev); 994 if (r) { 995 r600_blit_fini(rdev); 996 rdev->asic->copy.copy = NULL; 997 dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r); 998 } 999 1000 /* allocate wb buffer */ 1001 r = radeon_wb_init(rdev); 1002 if (r) 1003 return r; 1004 1005 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); 1006 if (r) { 1007 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); 1008 return r; 1009 } 1010 1011 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX); 1012 if (r) { 1013 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); 1014 return r; 1015 } 1016 1017 /* Enable IRQ */ 1018 if (!rdev->irq.installed) { 1019 r = radeon_irq_kms_init(rdev); 1020 if (r) 1021 return r; 1022 } 1023 1024 r = r600_irq_init(rdev); 1025 if (r) { 1026 DRM_ERROR("radeon: IH init failed (%d).\n", r); 1027 radeon_irq_kms_fini(rdev); 1028 return r; 1029 } 1030 r600_irq_set(rdev); 1031 1032 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 1033 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, 1034 R600_CP_RB_RPTR, R600_CP_RB_WPTR, 1035 0, 0xfffff, RADEON_CP_PACKET2); 1036 if (r) 1037 return r; 1038 1039 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 1040 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, 1041 DMA_RB_RPTR, DMA_RB_WPTR, 1042 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); 1043 if (r) 1044 return r; 1045 1046 r = rv770_cp_load_microcode(rdev); 1047 if (r) 1048 return r; 1049 r = r600_cp_resume(rdev); 1050 if (r) 1051 return r; 1052 1053 r = r600_dma_resume(rdev); 1054 if (r) 1055 return r; 1056 1057 r = radeon_ib_pool_init(rdev); 1058 if (r) { 1059 dev_err(rdev->dev, "IB initialization failed (%d).\n", r); 1060 return r; 1061 } 1062 1063 r = r600_audio_init(rdev); 1064 if (r) { 1065 DRM_ERROR("radeon: audio init failed\n"); 1066 return r; 1067 } 1068 1069 return 0; 1070 } 1071 1072 int rv770_resume(struct radeon_device *rdev) 1073 { 1074 int r; 1075 1076 /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw, 1077 * posting will perform necessary task to bring back GPU into good 1078 * shape. 1079 */ 1080 /* post card */ 1081 atom_asic_init(rdev->mode_info.atom_context); 1082 1083 rdev->accel_working = true; 1084 r = rv770_startup(rdev); 1085 if (r) { 1086 DRM_ERROR("r600 startup failed on resume\n"); 1087 rdev->accel_working = false; 1088 return r; 1089 } 1090 1091 return r; 1092 1093 } 1094 1095 int rv770_suspend(struct radeon_device *rdev) 1096 { 1097 r600_audio_fini(rdev); 1098 r700_cp_stop(rdev); 1099 r600_dma_stop(rdev); 1100 r600_irq_suspend(rdev); 1101 radeon_wb_disable(rdev); 1102 rv770_pcie_gart_disable(rdev); 1103 1104 return 0; 1105 } 1106 1107 /* Plan is to move initialization in that function and use 1108 * helper function so that radeon_device_init pretty much 1109 * do nothing more than calling asic specific function. This 1110 * should also allow to remove a bunch of callback function 1111 * like vram_info. 1112 */ 1113 int rv770_init(struct radeon_device *rdev) 1114 { 1115 int r; 1116 1117 /* Read BIOS */ 1118 if (!radeon_get_bios(rdev)) { 1119 if (ASIC_IS_AVIVO(rdev)) 1120 return -EINVAL; 1121 } 1122 /* Must be an ATOMBIOS */ 1123 if (!rdev->is_atom_bios) { 1124 dev_err(rdev->dev, "Expecting atombios for R600 GPU\n"); 1125 return -EINVAL; 1126 } 1127 r = radeon_atombios_init(rdev); 1128 if (r) 1129 return r; 1130 /* Post card if necessary */ 1131 if (!radeon_card_posted(rdev)) { 1132 if (!rdev->bios) { 1133 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n"); 1134 return -EINVAL; 1135 } 1136 DRM_INFO("GPU not posted. posting now...\n"); 1137 atom_asic_init(rdev->mode_info.atom_context); 1138 } 1139 /* Initialize scratch registers */ 1140 r600_scratch_init(rdev); 1141 /* Initialize surface registers */ 1142 radeon_surface_init(rdev); 1143 /* Initialize clocks */ 1144 radeon_get_clock_info(rdev->ddev); 1145 /* Fence driver */ 1146 r = radeon_fence_driver_init(rdev); 1147 if (r) 1148 return r; 1149 /* initialize AGP */ 1150 if (rdev->flags & RADEON_IS_AGP) { 1151 r = radeon_agp_init(rdev); 1152 if (r) 1153 radeon_agp_disable(rdev); 1154 } 1155 r = rv770_mc_init(rdev); 1156 if (r) 1157 return r; 1158 /* Memory manager */ 1159 r = radeon_bo_init(rdev); 1160 if (r) 1161 return r; 1162 1163 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; 1164 r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); 1165 1166 rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL; 1167 r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024); 1168 1169 rdev->ih.ring_obj = NULL; 1170 r600_ih_ring_init(rdev, 64 * 1024); 1171 1172 r = r600_pcie_gart_init(rdev); 1173 if (r) 1174 return r; 1175 1176 rdev->accel_working = true; 1177 r = rv770_startup(rdev); 1178 if (r) { 1179 dev_err(rdev->dev, "disabling GPU acceleration\n"); 1180 r700_cp_fini(rdev); 1181 r600_dma_fini(rdev); 1182 r600_irq_fini(rdev); 1183 radeon_wb_fini(rdev); 1184 radeon_ib_pool_fini(rdev); 1185 radeon_irq_kms_fini(rdev); 1186 rv770_pcie_gart_fini(rdev); 1187 rdev->accel_working = false; 1188 } 1189 1190 return 0; 1191 } 1192 1193 void rv770_fini(struct radeon_device *rdev) 1194 { 1195 r600_blit_fini(rdev); 1196 r700_cp_fini(rdev); 1197 r600_dma_fini(rdev); 1198 r600_irq_fini(rdev); 1199 radeon_wb_fini(rdev); 1200 radeon_ib_pool_fini(rdev); 1201 radeon_irq_kms_fini(rdev); 1202 rv770_pcie_gart_fini(rdev); 1203 r600_vram_scratch_fini(rdev); 1204 radeon_gem_fini(rdev); 1205 radeon_fence_driver_fini(rdev); 1206 radeon_agp_fini(rdev); 1207 radeon_bo_fini(rdev); 1208 radeon_atombios_fini(rdev); 1209 kfree(rdev->bios); 1210 rdev->bios = NULL; 1211 } 1212 1213 static void rv770_pcie_gen2_enable(struct radeon_device *rdev) 1214 { 1215 u32 link_width_cntl, lanes, speed_cntl, tmp; 1216 u16 link_cntl2; 1217 u32 mask; 1218 int ret; 1219 1220 if (radeon_pcie_gen2 == 0) 1221 return; 1222 1223 if (rdev->flags & RADEON_IS_IGP) 1224 return; 1225 1226 if (!(rdev->flags & RADEON_IS_PCIE)) 1227 return; 1228 1229 /* x2 cards have a special sequence */ 1230 if (ASIC_IS_X2(rdev)) 1231 return; 1232 1233 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask); 1234 if (ret != 0) 1235 return; 1236 1237 if (!(mask & DRM_PCIE_SPEED_50)) 1238 return; 1239 1240 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n"); 1241 1242 /* advertise upconfig capability */ 1243 link_width_cntl = RREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL); 1244 link_width_cntl &= ~LC_UPCONFIGURE_DIS; 1245 WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl); 1246 link_width_cntl = RREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL); 1247 if (link_width_cntl & LC_RENEGOTIATION_SUPPORT) { 1248 lanes = (link_width_cntl & LC_LINK_WIDTH_RD_MASK) >> LC_LINK_WIDTH_RD_SHIFT; 1249 link_width_cntl &= ~(LC_LINK_WIDTH_MASK | 1250 LC_RECONFIG_ARC_MISSING_ESCAPE); 1251 link_width_cntl |= lanes | LC_RECONFIG_NOW | 1252 LC_RENEGOTIATE_EN | LC_UPCONFIGURE_SUPPORT; 1253 WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl); 1254 } else { 1255 link_width_cntl |= LC_UPCONFIGURE_DIS; 1256 WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl); 1257 } 1258 1259 speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL); 1260 if ((speed_cntl & LC_OTHER_SIDE_EVER_SENT_GEN2) && 1261 (speed_cntl & LC_OTHER_SIDE_SUPPORTS_GEN2)) { 1262 1263 tmp = RREG32(0x541c); 1264 WREG32(0x541c, tmp | 0x8); 1265 WREG32(MM_CFGREGS_CNTL, MM_WR_TO_CFG_EN); 1266 link_cntl2 = RREG16(0x4088); 1267 link_cntl2 &= ~TARGET_LINK_SPEED_MASK; 1268 link_cntl2 |= 0x2; 1269 WREG16(0x4088, link_cntl2); 1270 WREG32(MM_CFGREGS_CNTL, 0); 1271 1272 speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL); 1273 speed_cntl &= ~LC_TARGET_LINK_SPEED_OVERRIDE_EN; 1274 WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl); 1275 1276 speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL); 1277 speed_cntl |= LC_CLR_FAILED_SPD_CHANGE_CNT; 1278 WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl); 1279 1280 speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL); 1281 speed_cntl &= ~LC_CLR_FAILED_SPD_CHANGE_CNT; 1282 WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl); 1283 1284 speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL); 1285 speed_cntl |= LC_GEN2_EN_STRAP; 1286 WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl); 1287 1288 } else { 1289 link_width_cntl = RREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL); 1290 /* XXX: only disable it if gen1 bridge vendor == 0x111d or 0x1106 */ 1291 if (1) 1292 link_width_cntl |= LC_UPCONFIGURE_DIS; 1293 else 1294 link_width_cntl &= ~LC_UPCONFIGURE_DIS; 1295 WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl); 1296 } 1297 } 1298