xref: /openbsd/sys/dev/pci/drm/radeon/rv770.c (revision 264ca280)
1 /*	$OpenBSD: rv770.c,v 1.7 2015/04/18 14:47:35 jsg Exp $	*/
2 /*
3  * Copyright 2008 Advanced Micro Devices, Inc.
4  * Copyright 2008 Red Hat Inc.
5  * Copyright 2009 Jerome Glisse.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11  * and/or sell copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23  * OTHER DEALINGS IN THE SOFTWARE.
24  *
25  * Authors: Dave Airlie
26  *          Alex Deucher
27  *          Jerome Glisse
28  */
29 #include <dev/pci/drm/drmP.h>
30 #include "radeon.h"
31 #include "radeon_asic.h"
32 #include <dev/pci/drm/radeon_drm.h>
33 #include "rv770d.h"
34 #include "atom.h"
35 #include "avivod.h"
36 
37 #define R700_PFP_UCODE_SIZE 848
38 #define R700_PM4_UCODE_SIZE 1360
39 
40 static void rv770_gpu_init(struct radeon_device *rdev);
41 void rv770_fini(struct radeon_device *rdev);
42 static void rv770_pcie_gen2_enable(struct radeon_device *rdev);
43 
44 u32 rv770_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
45 {
46 	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
47 	u32 tmp = RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset);
48 	int i;
49 
50 	/* Lock the graphics update lock */
51 	tmp |= AVIVO_D1GRPH_UPDATE_LOCK;
52 	WREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset, tmp);
53 
54 	/* update the scanout addresses */
55 	if (radeon_crtc->crtc_id) {
56 		WREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base));
57 		WREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base));
58 	} else {
59 		WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base));
60 		WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base));
61 	}
62 	WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset,
63 	       (u32)crtc_base);
64 	WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset,
65 	       (u32)crtc_base);
66 
67 	/* Wait for update_pending to go high. */
68 	for (i = 0; i < rdev->usec_timeout; i++) {
69 		if (RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset) & AVIVO_D1GRPH_SURFACE_UPDATE_PENDING)
70 			break;
71 		udelay(1);
72 	}
73 	DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
74 
75 	/* Unlock the lock, so double-buffering can take place inside vblank */
76 	tmp &= ~AVIVO_D1GRPH_UPDATE_LOCK;
77 	WREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset, tmp);
78 
79 	/* Return current update_pending status: */
80 	return RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset) & AVIVO_D1GRPH_SURFACE_UPDATE_PENDING;
81 }
82 
83 /* get temperature in millidegrees */
84 int rv770_get_temp(struct radeon_device *rdev)
85 {
86 	u32 temp = (RREG32(CG_MULT_THERMAL_STATUS) & ASIC_T_MASK) >>
87 		ASIC_T_SHIFT;
88 	int actual_temp;
89 
90 	if (temp & 0x400)
91 		actual_temp = -256;
92 	else if (temp & 0x200)
93 		actual_temp = 255;
94 	else if (temp & 0x100) {
95 		actual_temp = temp & 0x1ff;
96 		actual_temp |= ~0x1ff;
97 	} else
98 		actual_temp = temp & 0xff;
99 
100 	return (actual_temp * 1000) / 2;
101 }
102 
103 void rv770_pm_misc(struct radeon_device *rdev)
104 {
105 	int req_ps_idx = rdev->pm.requested_power_state_index;
106 	int req_cm_idx = rdev->pm.requested_clock_mode_index;
107 	struct radeon_power_state *ps = &rdev->pm.power_state[req_ps_idx];
108 	struct radeon_voltage *voltage = &ps->clock_info[req_cm_idx].voltage;
109 
110 	if ((voltage->type == VOLTAGE_SW) && voltage->voltage) {
111 		/* 0xff01 is a flag rather then an actual voltage */
112 		if (voltage->voltage == 0xff01)
113 			return;
114 		if (voltage->voltage != rdev->pm.current_vddc) {
115 			radeon_atom_set_voltage(rdev, voltage->voltage, SET_VOLTAGE_TYPE_ASIC_VDDC);
116 			rdev->pm.current_vddc = voltage->voltage;
117 			DRM_DEBUG("Setting: v: %d\n", voltage->voltage);
118 		}
119 	}
120 }
121 
122 /*
123  * GART
124  */
125 static int rv770_pcie_gart_enable(struct radeon_device *rdev)
126 {
127 	u32 tmp;
128 	int r, i;
129 
130 	if (rdev->gart.robj == NULL) {
131 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
132 		return -EINVAL;
133 	}
134 	r = radeon_gart_table_vram_pin(rdev);
135 	if (r)
136 		return r;
137 	radeon_gart_restore(rdev);
138 	/* Setup L2 cache */
139 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING |
140 				ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
141 				EFFECTIVE_L2_QUEUE_SIZE(7));
142 	WREG32(VM_L2_CNTL2, 0);
143 	WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2));
144 	/* Setup TLB control */
145 	tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING |
146 		SYSTEM_ACCESS_MODE_NOT_IN_SYS |
147 		SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU |
148 		EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5);
149 	WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp);
150 	WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp);
151 	WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp);
152 	if (rdev->family == CHIP_RV740)
153 		WREG32(MC_VM_MD_L1_TLB3_CNTL, tmp);
154 	WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp);
155 	WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
156 	WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
157 	WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp);
158 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
159 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
160 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
161 	WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
162 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
163 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
164 			(u32)(rdev->dummy_page.addr >> 12));
165 	for (i = 1; i < 7; i++)
166 		WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
167 
168 	r600_pcie_gart_tlb_flush(rdev);
169 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
170 		 (unsigned)(rdev->mc.gtt_size >> 20),
171 		 (unsigned long long)rdev->gart.table_addr);
172 	rdev->gart.ready = true;
173 	return 0;
174 }
175 
176 static void rv770_pcie_gart_disable(struct radeon_device *rdev)
177 {
178 	u32 tmp;
179 	int i;
180 
181 	/* Disable all tables */
182 	for (i = 0; i < 7; i++)
183 		WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
184 
185 	/* Setup L2 cache */
186 	WREG32(VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING |
187 				EFFECTIVE_L2_QUEUE_SIZE(7));
188 	WREG32(VM_L2_CNTL2, 0);
189 	WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2));
190 	/* Setup TLB control */
191 	tmp = EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5);
192 	WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp);
193 	WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp);
194 	WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp);
195 	WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp);
196 	WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
197 	WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
198 	WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp);
199 	radeon_gart_table_vram_unpin(rdev);
200 }
201 
202 static void rv770_pcie_gart_fini(struct radeon_device *rdev)
203 {
204 	radeon_gart_fini(rdev);
205 	rv770_pcie_gart_disable(rdev);
206 	radeon_gart_table_vram_free(rdev);
207 }
208 
209 
210 static void rv770_agp_enable(struct radeon_device *rdev)
211 {
212 	u32 tmp;
213 	int i;
214 
215 	/* Setup L2 cache */
216 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING |
217 				ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
218 				EFFECTIVE_L2_QUEUE_SIZE(7));
219 	WREG32(VM_L2_CNTL2, 0);
220 	WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2));
221 	/* Setup TLB control */
222 	tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING |
223 		SYSTEM_ACCESS_MODE_NOT_IN_SYS |
224 		SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU |
225 		EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5);
226 	WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp);
227 	WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp);
228 	WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp);
229 	WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp);
230 	WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
231 	WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
232 	WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp);
233 	for (i = 0; i < 7; i++)
234 		WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
235 }
236 
237 static void rv770_mc_program(struct radeon_device *rdev)
238 {
239 	struct rv515_mc_save save;
240 	u32 tmp;
241 	int i, j;
242 
243 	/* Initialize HDP */
244 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
245 		WREG32((0x2c14 + j), 0x00000000);
246 		WREG32((0x2c18 + j), 0x00000000);
247 		WREG32((0x2c1c + j), 0x00000000);
248 		WREG32((0x2c20 + j), 0x00000000);
249 		WREG32((0x2c24 + j), 0x00000000);
250 	}
251 	/* r7xx hw bug.  Read from HDP_DEBUG1 rather
252 	 * than writing to HDP_REG_COHERENCY_FLUSH_CNTL
253 	 */
254 	tmp = RREG32(HDP_DEBUG1);
255 
256 	rv515_mc_stop(rdev, &save);
257 	if (r600_mc_wait_for_idle(rdev)) {
258 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
259 	}
260 	/* Lockout access through VGA aperture*/
261 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
262 	/* Update configuration */
263 	if (rdev->flags & RADEON_IS_AGP) {
264 		if (rdev->mc.vram_start < rdev->mc.gtt_start) {
265 			/* VRAM before AGP */
266 			WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
267 				rdev->mc.vram_start >> 12);
268 			WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
269 				rdev->mc.gtt_end >> 12);
270 		} else {
271 			/* VRAM after AGP */
272 			WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
273 				rdev->mc.gtt_start >> 12);
274 			WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
275 				rdev->mc.vram_end >> 12);
276 		}
277 	} else {
278 		WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
279 			rdev->mc.vram_start >> 12);
280 		WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
281 			rdev->mc.vram_end >> 12);
282 	}
283 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, rdev->vram_scratch.gpu_addr >> 12);
284 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
285 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
286 	WREG32(MC_VM_FB_LOCATION, tmp);
287 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
288 	WREG32(HDP_NONSURFACE_INFO, (2 << 7));
289 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
290 	if (rdev->flags & RADEON_IS_AGP) {
291 		WREG32(MC_VM_AGP_TOP, rdev->mc.gtt_end >> 16);
292 		WREG32(MC_VM_AGP_BOT, rdev->mc.gtt_start >> 16);
293 		WREG32(MC_VM_AGP_BASE, rdev->mc.agp_base >> 22);
294 	} else {
295 		WREG32(MC_VM_AGP_BASE, 0);
296 		WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
297 		WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
298 	}
299 	if (r600_mc_wait_for_idle(rdev)) {
300 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
301 	}
302 	rv515_mc_resume(rdev, &save);
303 	/* we need to own VRAM, so turn off the VGA renderer here
304 	 * to stop it overwriting our objects */
305 	rv515_vga_render_disable(rdev);
306 }
307 
308 
309 /*
310  * CP.
311  */
312 void r700_cp_stop(struct radeon_device *rdev)
313 {
314 	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
315 	WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
316 	WREG32(SCRATCH_UMSK, 0);
317 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
318 }
319 
320 static int rv770_cp_load_microcode(struct radeon_device *rdev)
321 {
322 	const __be32 *fw_data;
323 	int i;
324 
325 	if (!rdev->me_fw || !rdev->pfp_fw)
326 		return -EINVAL;
327 
328 	r700_cp_stop(rdev);
329 	WREG32(CP_RB_CNTL,
330 #ifdef __BIG_ENDIAN
331 	       BUF_SWAP_32BIT |
332 #endif
333 	       RB_NO_UPDATE | RB_BLKSZ(15) | RB_BUFSZ(3));
334 
335 	/* Reset cp */
336 	WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
337 	RREG32(GRBM_SOFT_RESET);
338 	mdelay(15);
339 	WREG32(GRBM_SOFT_RESET, 0);
340 
341 	fw_data = (const __be32 *)rdev->pfp_fw;
342 	WREG32(CP_PFP_UCODE_ADDR, 0);
343 	for (i = 0; i < R700_PFP_UCODE_SIZE; i++)
344 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
345 	WREG32(CP_PFP_UCODE_ADDR, 0);
346 
347 	fw_data = (const __be32 *)rdev->me_fw;
348 	WREG32(CP_ME_RAM_WADDR, 0);
349 	for (i = 0; i < R700_PM4_UCODE_SIZE; i++)
350 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
351 
352 	WREG32(CP_PFP_UCODE_ADDR, 0);
353 	WREG32(CP_ME_RAM_WADDR, 0);
354 	WREG32(CP_ME_RAM_RADDR, 0);
355 	return 0;
356 }
357 
358 void r700_cp_fini(struct radeon_device *rdev)
359 {
360 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
361 	r700_cp_stop(rdev);
362 	radeon_ring_fini(rdev, ring);
363 	radeon_scratch_free(rdev, ring->rptr_save_reg);
364 }
365 
366 /*
367  * Core functions
368  */
369 static void rv770_gpu_init(struct radeon_device *rdev)
370 {
371 	int i, j, num_qd_pipes;
372 	u32 ta_aux_cntl;
373 	u32 sx_debug_1;
374 	u32 smx_dc_ctl0;
375 	u32 db_debug3;
376 	u32 num_gs_verts_per_thread;
377 	u32 vgt_gs_per_es;
378 	u32 gs_prim_buffer_depth = 0;
379 	u32 sq_ms_fifo_sizes;
380 	u32 sq_config;
381 	u32 sq_thread_resource_mgmt;
382 	u32 hdp_host_path_cntl;
383 	u32 sq_dyn_gpr_size_simd_ab_0;
384 	u32 gb_tiling_config = 0;
385 	u32 cc_rb_backend_disable = 0;
386 	u32 cc_gc_shader_pipe_config = 0;
387 	u32 mc_arb_ramcfg;
388 	u32 db_debug4, tmp;
389 	u32 inactive_pipes, shader_pipe_config;
390 	u32 disabled_rb_mask;
391 	unsigned active_number;
392 
393 	/* setup chip specs */
394 	rdev->config.rv770.tiling_group_size = 256;
395 	switch (rdev->family) {
396 	case CHIP_RV770:
397 		rdev->config.rv770.max_pipes = 4;
398 		rdev->config.rv770.max_tile_pipes = 8;
399 		rdev->config.rv770.max_simds = 10;
400 		rdev->config.rv770.max_backends = 4;
401 		rdev->config.rv770.max_gprs = 256;
402 		rdev->config.rv770.max_threads = 248;
403 		rdev->config.rv770.max_stack_entries = 512;
404 		rdev->config.rv770.max_hw_contexts = 8;
405 		rdev->config.rv770.max_gs_threads = 16 * 2;
406 		rdev->config.rv770.sx_max_export_size = 128;
407 		rdev->config.rv770.sx_max_export_pos_size = 16;
408 		rdev->config.rv770.sx_max_export_smx_size = 112;
409 		rdev->config.rv770.sq_num_cf_insts = 2;
410 
411 		rdev->config.rv770.sx_num_of_sets = 7;
412 		rdev->config.rv770.sc_prim_fifo_size = 0xF9;
413 		rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30;
414 		rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130;
415 		break;
416 	case CHIP_RV730:
417 		rdev->config.rv770.max_pipes = 2;
418 		rdev->config.rv770.max_tile_pipes = 4;
419 		rdev->config.rv770.max_simds = 8;
420 		rdev->config.rv770.max_backends = 2;
421 		rdev->config.rv770.max_gprs = 128;
422 		rdev->config.rv770.max_threads = 248;
423 		rdev->config.rv770.max_stack_entries = 256;
424 		rdev->config.rv770.max_hw_contexts = 8;
425 		rdev->config.rv770.max_gs_threads = 16 * 2;
426 		rdev->config.rv770.sx_max_export_size = 256;
427 		rdev->config.rv770.sx_max_export_pos_size = 32;
428 		rdev->config.rv770.sx_max_export_smx_size = 224;
429 		rdev->config.rv770.sq_num_cf_insts = 2;
430 
431 		rdev->config.rv770.sx_num_of_sets = 7;
432 		rdev->config.rv770.sc_prim_fifo_size = 0xf9;
433 		rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30;
434 		rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130;
435 		if (rdev->config.rv770.sx_max_export_pos_size > 16) {
436 			rdev->config.rv770.sx_max_export_pos_size -= 16;
437 			rdev->config.rv770.sx_max_export_smx_size += 16;
438 		}
439 		break;
440 	case CHIP_RV710:
441 		rdev->config.rv770.max_pipes = 2;
442 		rdev->config.rv770.max_tile_pipes = 2;
443 		rdev->config.rv770.max_simds = 2;
444 		rdev->config.rv770.max_backends = 1;
445 		rdev->config.rv770.max_gprs = 256;
446 		rdev->config.rv770.max_threads = 192;
447 		rdev->config.rv770.max_stack_entries = 256;
448 		rdev->config.rv770.max_hw_contexts = 4;
449 		rdev->config.rv770.max_gs_threads = 8 * 2;
450 		rdev->config.rv770.sx_max_export_size = 128;
451 		rdev->config.rv770.sx_max_export_pos_size = 16;
452 		rdev->config.rv770.sx_max_export_smx_size = 112;
453 		rdev->config.rv770.sq_num_cf_insts = 1;
454 
455 		rdev->config.rv770.sx_num_of_sets = 7;
456 		rdev->config.rv770.sc_prim_fifo_size = 0x40;
457 		rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30;
458 		rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130;
459 		break;
460 	case CHIP_RV740:
461 		rdev->config.rv770.max_pipes = 4;
462 		rdev->config.rv770.max_tile_pipes = 4;
463 		rdev->config.rv770.max_simds = 8;
464 		rdev->config.rv770.max_backends = 4;
465 		rdev->config.rv770.max_gprs = 256;
466 		rdev->config.rv770.max_threads = 248;
467 		rdev->config.rv770.max_stack_entries = 512;
468 		rdev->config.rv770.max_hw_contexts = 8;
469 		rdev->config.rv770.max_gs_threads = 16 * 2;
470 		rdev->config.rv770.sx_max_export_size = 256;
471 		rdev->config.rv770.sx_max_export_pos_size = 32;
472 		rdev->config.rv770.sx_max_export_smx_size = 224;
473 		rdev->config.rv770.sq_num_cf_insts = 2;
474 
475 		rdev->config.rv770.sx_num_of_sets = 7;
476 		rdev->config.rv770.sc_prim_fifo_size = 0x100;
477 		rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30;
478 		rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130;
479 
480 		if (rdev->config.rv770.sx_max_export_pos_size > 16) {
481 			rdev->config.rv770.sx_max_export_pos_size -= 16;
482 			rdev->config.rv770.sx_max_export_smx_size += 16;
483 		}
484 		break;
485 	default:
486 		break;
487 	}
488 
489 	/* Initialize HDP */
490 	j = 0;
491 	for (i = 0; i < 32; i++) {
492 		WREG32((0x2c14 + j), 0x00000000);
493 		WREG32((0x2c18 + j), 0x00000000);
494 		WREG32((0x2c1c + j), 0x00000000);
495 		WREG32((0x2c20 + j), 0x00000000);
496 		WREG32((0x2c24 + j), 0x00000000);
497 		j += 0x18;
498 	}
499 
500 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
501 
502 	/* setup tiling, simd, pipe config */
503 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
504 
505 	shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG);
506 	inactive_pipes = (shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> INACTIVE_QD_PIPES_SHIFT;
507 	for (i = 0, tmp = 1, active_number = 0; i < R7XX_MAX_PIPES; i++) {
508 		if (!(inactive_pipes & tmp)) {
509 			active_number++;
510 		}
511 		tmp <<= 1;
512 	}
513 	if (active_number == 1) {
514 		WREG32(SPI_CONFIG_CNTL, DISABLE_INTERP_1);
515 	} else {
516 		WREG32(SPI_CONFIG_CNTL, 0);
517 	}
518 
519 	cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000;
520 	tmp = R7XX_MAX_BACKENDS - r600_count_pipe_bits(cc_rb_backend_disable >> 16);
521 	if (tmp < rdev->config.rv770.max_backends) {
522 		rdev->config.rv770.max_backends = tmp;
523 	}
524 
525 	cc_gc_shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00;
526 	tmp = R7XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config >> 8) & R7XX_MAX_PIPES_MASK);
527 	if (tmp < rdev->config.rv770.max_pipes) {
528 		rdev->config.rv770.max_pipes = tmp;
529 	}
530 	tmp = R7XX_MAX_SIMDS - r600_count_pipe_bits((cc_gc_shader_pipe_config >> 16) & R7XX_MAX_SIMDS_MASK);
531 	if (tmp < rdev->config.rv770.max_simds) {
532 		rdev->config.rv770.max_simds = tmp;
533 	}
534 
535 	switch (rdev->config.rv770.max_tile_pipes) {
536 	case 1:
537 	default:
538 		gb_tiling_config = PIPE_TILING(0);
539 		break;
540 	case 2:
541 		gb_tiling_config = PIPE_TILING(1);
542 		break;
543 	case 4:
544 		gb_tiling_config = PIPE_TILING(2);
545 		break;
546 	case 8:
547 		gb_tiling_config = PIPE_TILING(3);
548 		break;
549 	}
550 	rdev->config.rv770.tiling_npipes = rdev->config.rv770.max_tile_pipes;
551 
552 	disabled_rb_mask = (RREG32(CC_RB_BACKEND_DISABLE) >> 16) & R7XX_MAX_BACKENDS_MASK;
553 	tmp = (gb_tiling_config & PIPE_TILING__MASK) >> PIPE_TILING__SHIFT;
554 	tmp = r6xx_remap_render_backend(rdev, tmp, rdev->config.rv770.max_backends,
555 					R7XX_MAX_BACKENDS, disabled_rb_mask);
556 	gb_tiling_config |= tmp << 16;
557 	rdev->config.rv770.backend_map = tmp;
558 
559 	if (rdev->family == CHIP_RV770)
560 		gb_tiling_config |= BANK_TILING(1);
561 	else {
562 		if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
563 			gb_tiling_config |= BANK_TILING(1);
564 		else
565 			gb_tiling_config |= BANK_TILING(0);
566 	}
567 	rdev->config.rv770.tiling_nbanks = 4 << ((gb_tiling_config >> 4) & 0x3);
568 	gb_tiling_config |= GROUP_SIZE((mc_arb_ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT);
569 	if (((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT) > 3) {
570 		gb_tiling_config |= ROW_TILING(3);
571 		gb_tiling_config |= SAMPLE_SPLIT(3);
572 	} else {
573 		gb_tiling_config |=
574 			ROW_TILING(((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT));
575 		gb_tiling_config |=
576 			SAMPLE_SPLIT(((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT));
577 	}
578 
579 	gb_tiling_config |= BANK_SWAPS(1);
580 	rdev->config.rv770.tile_config = gb_tiling_config;
581 
582 	WREG32(GB_TILING_CONFIG, gb_tiling_config);
583 	WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff));
584 	WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff));
585 	WREG32(DMA_TILING_CONFIG, (gb_tiling_config & 0xffff));
586 	WREG32(DMA_TILING_CONFIG2, (gb_tiling_config & 0xffff));
587 
588 	WREG32(CGTS_SYS_TCC_DISABLE, 0);
589 	WREG32(CGTS_TCC_DISABLE, 0);
590 	WREG32(CGTS_USER_SYS_TCC_DISABLE, 0);
591 	WREG32(CGTS_USER_TCC_DISABLE, 0);
592 
593 
594 	num_qd_pipes = R7XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8);
595 	WREG32(VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & DEALLOC_DIST_MASK);
596 	WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & VTX_REUSE_DEPTH_MASK);
597 
598 	/* set HW defaults for 3D engine */
599 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
600 				     ROQ_IB2_START(0x2b)));
601 
602 	WREG32(CP_MEQ_THRESHOLDS, STQ_SPLIT(0x30));
603 
604 	ta_aux_cntl = RREG32(TA_CNTL_AUX);
605 	WREG32(TA_CNTL_AUX, ta_aux_cntl | DISABLE_CUBE_ANISO);
606 
607 	sx_debug_1 = RREG32(SX_DEBUG_1);
608 	sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS;
609 	WREG32(SX_DEBUG_1, sx_debug_1);
610 
611 	smx_dc_ctl0 = RREG32(SMX_DC_CTL0);
612 	smx_dc_ctl0 &= ~CACHE_DEPTH(0x1ff);
613 	smx_dc_ctl0 |= CACHE_DEPTH((rdev->config.rv770.sx_num_of_sets * 64) - 1);
614 	WREG32(SMX_DC_CTL0, smx_dc_ctl0);
615 
616 	if (rdev->family != CHIP_RV740)
617 		WREG32(SMX_EVENT_CTL, (ES_FLUSH_CTL(4) |
618 				       GS_FLUSH_CTL(4) |
619 				       ACK_FLUSH_CTL(3) |
620 				       SYNC_FLUSH_CTL));
621 
622 	if (rdev->family != CHIP_RV770)
623 		WREG32(SMX_SAR_CTL0, 0x00003f3f);
624 
625 	db_debug3 = RREG32(DB_DEBUG3);
626 	db_debug3 &= ~DB_CLK_OFF_DELAY(0x1f);
627 	switch (rdev->family) {
628 	case CHIP_RV770:
629 	case CHIP_RV740:
630 		db_debug3 |= DB_CLK_OFF_DELAY(0x1f);
631 		break;
632 	case CHIP_RV710:
633 	case CHIP_RV730:
634 	default:
635 		db_debug3 |= DB_CLK_OFF_DELAY(2);
636 		break;
637 	}
638 	WREG32(DB_DEBUG3, db_debug3);
639 
640 	if (rdev->family != CHIP_RV770) {
641 		db_debug4 = RREG32(DB_DEBUG4);
642 		db_debug4 |= DISABLE_TILE_COVERED_FOR_PS_ITER;
643 		WREG32(DB_DEBUG4, db_debug4);
644 	}
645 
646 	WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.rv770.sx_max_export_size / 4) - 1) |
647 					POSITION_BUFFER_SIZE((rdev->config.rv770.sx_max_export_pos_size / 4) - 1) |
648 					SMX_BUFFER_SIZE((rdev->config.rv770.sx_max_export_smx_size / 4) - 1)));
649 
650 	WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.rv770.sc_prim_fifo_size) |
651 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.rv770.sc_hiz_tile_fifo_size) |
652 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.rv770.sc_earlyz_tile_fifo_fize)));
653 
654 	WREG32(PA_SC_MULTI_CHIP_CNTL, 0);
655 
656 	WREG32(VGT_NUM_INSTANCES, 1);
657 
658 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
659 
660 	WREG32(CP_PERFMON_CNTL, 0);
661 
662 	sq_ms_fifo_sizes = (CACHE_FIFO_SIZE(16 * rdev->config.rv770.sq_num_cf_insts) |
663 			    DONE_FIFO_HIWATER(0xe0) |
664 			    ALU_UPDATE_FIFO_HIWATER(0x8));
665 	switch (rdev->family) {
666 	case CHIP_RV770:
667 	case CHIP_RV730:
668 	case CHIP_RV710:
669 		sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x1);
670 		break;
671 	case CHIP_RV740:
672 	default:
673 		sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x4);
674 		break;
675 	}
676 	WREG32(SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes);
677 
678 	/* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
679 	 * should be adjusted as needed by the 2D/3D drivers.  This just sets default values
680 	 */
681 	sq_config = RREG32(SQ_CONFIG);
682 	sq_config &= ~(PS_PRIO(3) |
683 		       VS_PRIO(3) |
684 		       GS_PRIO(3) |
685 		       ES_PRIO(3));
686 	sq_config |= (DX9_CONSTS |
687 		      VC_ENABLE |
688 		      EXPORT_SRC_C |
689 		      PS_PRIO(0) |
690 		      VS_PRIO(1) |
691 		      GS_PRIO(2) |
692 		      ES_PRIO(3));
693 	if (rdev->family == CHIP_RV710)
694 		/* no vertex cache */
695 		sq_config &= ~VC_ENABLE;
696 
697 	WREG32(SQ_CONFIG, sq_config);
698 
699 	WREG32(SQ_GPR_RESOURCE_MGMT_1,  (NUM_PS_GPRS((rdev->config.rv770.max_gprs * 24)/64) |
700 					 NUM_VS_GPRS((rdev->config.rv770.max_gprs * 24)/64) |
701 					 NUM_CLAUSE_TEMP_GPRS(((rdev->config.rv770.max_gprs * 24)/64)/2)));
702 
703 	WREG32(SQ_GPR_RESOURCE_MGMT_2,  (NUM_GS_GPRS((rdev->config.rv770.max_gprs * 7)/64) |
704 					 NUM_ES_GPRS((rdev->config.rv770.max_gprs * 7)/64)));
705 
706 	sq_thread_resource_mgmt = (NUM_PS_THREADS((rdev->config.rv770.max_threads * 4)/8) |
707 				   NUM_VS_THREADS((rdev->config.rv770.max_threads * 2)/8) |
708 				   NUM_ES_THREADS((rdev->config.rv770.max_threads * 1)/8));
709 	if (((rdev->config.rv770.max_threads * 1) / 8) > rdev->config.rv770.max_gs_threads)
710 		sq_thread_resource_mgmt |= NUM_GS_THREADS(rdev->config.rv770.max_gs_threads);
711 	else
712 		sq_thread_resource_mgmt |= NUM_GS_THREADS((rdev->config.rv770.max_gs_threads * 1)/8);
713 	WREG32(SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);
714 
715 	WREG32(SQ_STACK_RESOURCE_MGMT_1, (NUM_PS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4) |
716 						     NUM_VS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4)));
717 
718 	WREG32(SQ_STACK_RESOURCE_MGMT_2, (NUM_GS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4) |
719 						     NUM_ES_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4)));
720 
721 	sq_dyn_gpr_size_simd_ab_0 = (SIMDA_RING0((rdev->config.rv770.max_gprs * 38)/64) |
722 				     SIMDA_RING1((rdev->config.rv770.max_gprs * 38)/64) |
723 				     SIMDB_RING0((rdev->config.rv770.max_gprs * 38)/64) |
724 				     SIMDB_RING1((rdev->config.rv770.max_gprs * 38)/64));
725 
726 	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_0, sq_dyn_gpr_size_simd_ab_0);
727 	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_1, sq_dyn_gpr_size_simd_ab_0);
728 	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_2, sq_dyn_gpr_size_simd_ab_0);
729 	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_3, sq_dyn_gpr_size_simd_ab_0);
730 	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_4, sq_dyn_gpr_size_simd_ab_0);
731 	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_5, sq_dyn_gpr_size_simd_ab_0);
732 	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_6, sq_dyn_gpr_size_simd_ab_0);
733 	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_7, sq_dyn_gpr_size_simd_ab_0);
734 
735 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
736 					  FORCE_EOV_MAX_REZ_CNT(255)));
737 
738 	if (rdev->family == CHIP_RV710)
739 		WREG32(VGT_CACHE_INVALIDATION, (CACHE_INVALIDATION(TC_ONLY) |
740 						AUTO_INVLD_EN(ES_AND_GS_AUTO)));
741 	else
742 		WREG32(VGT_CACHE_INVALIDATION, (CACHE_INVALIDATION(VC_AND_TC) |
743 						AUTO_INVLD_EN(ES_AND_GS_AUTO)));
744 
745 	switch (rdev->family) {
746 	case CHIP_RV770:
747 	case CHIP_RV730:
748 	case CHIP_RV740:
749 		gs_prim_buffer_depth = 384;
750 		break;
751 	case CHIP_RV710:
752 		gs_prim_buffer_depth = 128;
753 		break;
754 	default:
755 		break;
756 	}
757 
758 	num_gs_verts_per_thread = rdev->config.rv770.max_pipes * 16;
759 	vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread;
760 	/* Max value for this is 256 */
761 	if (vgt_gs_per_es > 256)
762 		vgt_gs_per_es = 256;
763 
764 	WREG32(VGT_ES_PER_GS, 128);
765 	WREG32(VGT_GS_PER_ES, vgt_gs_per_es);
766 	WREG32(VGT_GS_PER_VS, 2);
767 
768 	/* more default values. 2D/3D driver should adjust as needed */
769 	WREG32(VGT_GS_VERTEX_REUSE, 16);
770 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
771 	WREG32(VGT_STRMOUT_EN, 0);
772 	WREG32(SX_MISC, 0);
773 	WREG32(PA_SC_MODE_CNTL, 0);
774 	WREG32(PA_SC_EDGERULE, 0xaaaaaaaa);
775 	WREG32(PA_SC_AA_CONFIG, 0);
776 	WREG32(PA_SC_CLIPRECT_RULE, 0xffff);
777 	WREG32(PA_SC_LINE_STIPPLE, 0);
778 	WREG32(SPI_INPUT_Z, 0);
779 	WREG32(SPI_PS_IN_CONTROL_0, NUM_INTERP(2));
780 	WREG32(CB_COLOR7_FRAG, 0);
781 
782 	/* clear render buffer base addresses */
783 	WREG32(CB_COLOR0_BASE, 0);
784 	WREG32(CB_COLOR1_BASE, 0);
785 	WREG32(CB_COLOR2_BASE, 0);
786 	WREG32(CB_COLOR3_BASE, 0);
787 	WREG32(CB_COLOR4_BASE, 0);
788 	WREG32(CB_COLOR5_BASE, 0);
789 	WREG32(CB_COLOR6_BASE, 0);
790 	WREG32(CB_COLOR7_BASE, 0);
791 
792 	WREG32(TCP_CNTL, 0);
793 
794 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
795 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
796 
797 	WREG32(PA_SC_MULTI_CHIP_CNTL, 0);
798 
799 	WREG32(PA_CL_ENHANCE, (CLIP_VTX_REORDER_ENA |
800 					  NUM_CLIP_SEQ(3)));
801 	WREG32(VC_ENHANCE, 0);
802 }
803 
804 void r700_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
805 {
806 	u64 size_bf, size_af;
807 
808 	if (mc->mc_vram_size > 0xE0000000) {
809 		/* leave room for at least 512M GTT */
810 		dev_warn(rdev->dev, "limiting VRAM\n");
811 		mc->real_vram_size = 0xE0000000;
812 		mc->mc_vram_size = 0xE0000000;
813 	}
814 	if (rdev->flags & RADEON_IS_AGP) {
815 		size_bf = mc->gtt_start;
816 		size_af = 0xFFFFFFFF - mc->gtt_end;
817 		if (size_bf > size_af) {
818 			if (mc->mc_vram_size > size_bf) {
819 				dev_warn(rdev->dev, "limiting VRAM\n");
820 				mc->real_vram_size = size_bf;
821 				mc->mc_vram_size = size_bf;
822 			}
823 			mc->vram_start = mc->gtt_start - mc->mc_vram_size;
824 		} else {
825 			if (mc->mc_vram_size > size_af) {
826 				dev_warn(rdev->dev, "limiting VRAM\n");
827 				mc->real_vram_size = size_af;
828 				mc->mc_vram_size = size_af;
829 			}
830 			mc->vram_start = mc->gtt_end + 1;
831 		}
832 		mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
833 		dev_info(rdev->dev, "VRAM: %lluM 0x%08llX - 0x%08llX (%lluM used)\n",
834 				mc->mc_vram_size >> 20, mc->vram_start,
835 				mc->vram_end, mc->real_vram_size >> 20);
836 	} else {
837 		radeon_vram_location(rdev, &rdev->mc, 0);
838 		rdev->mc.gtt_base_align = 0;
839 		radeon_gtt_location(rdev, mc);
840 	}
841 }
842 
843 static int rv770_mc_init(struct radeon_device *rdev)
844 {
845 	u32 tmp;
846 	int chansize, numchan;
847 
848 	/* Get VRAM informations */
849 	rdev->mc.vram_is_ddr = true;
850 	tmp = RREG32(MC_ARB_RAMCFG);
851 	if (tmp & CHANSIZE_OVERRIDE) {
852 		chansize = 16;
853 	} else if (tmp & CHANSIZE_MASK) {
854 		chansize = 64;
855 	} else {
856 		chansize = 32;
857 	}
858 	tmp = RREG32(MC_SHARED_CHMAP);
859 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
860 	case 0:
861 	default:
862 		numchan = 1;
863 		break;
864 	case 1:
865 		numchan = 2;
866 		break;
867 	case 2:
868 		numchan = 4;
869 		break;
870 	case 3:
871 		numchan = 8;
872 		break;
873 	}
874 	rdev->mc.vram_width = numchan * chansize;
875 	/* Could aper size report 0 ? */
876 	rdev->mc.aper_base = rdev->fb_aper_offset;
877 	rdev->mc.aper_size = rdev->fb_aper_size;
878 	/* Setup GPU memory space */
879 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
880 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
881 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
882 	r700_vram_gtt_location(rdev, &rdev->mc);
883 	radeon_update_bandwidth_info(rdev);
884 
885 	return 0;
886 }
887 
888 /**
889  * rv770_copy_dma - copy pages using the DMA engine
890  *
891  * @rdev: radeon_device pointer
892  * @src_offset: src GPU address
893  * @dst_offset: dst GPU address
894  * @num_gpu_pages: number of GPU pages to xfer
895  * @fence: radeon fence object
896  *
897  * Copy GPU paging using the DMA engine (r7xx).
898  * Used by the radeon ttm implementation to move pages if
899  * registered as the asic copy callback.
900  */
901 int rv770_copy_dma(struct radeon_device *rdev,
902 		  uint64_t src_offset, uint64_t dst_offset,
903 		  unsigned num_gpu_pages,
904 		  struct radeon_fence **fence)
905 {
906 	struct radeon_semaphore *sem = NULL;
907 	int ring_index = rdev->asic->copy.dma_ring_index;
908 	struct radeon_ring *ring = &rdev->ring[ring_index];
909 	u32 size_in_dw, cur_size_in_dw;
910 	int i, num_loops;
911 	int r = 0;
912 
913 	r = radeon_semaphore_create(rdev, &sem);
914 	if (r) {
915 		DRM_ERROR("radeon: moving bo (%d).\n", r);
916 		return r;
917 	}
918 
919 	size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
920 	num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFF);
921 	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8);
922 	if (r) {
923 		DRM_ERROR("radeon: moving bo (%d).\n", r);
924 		radeon_semaphore_free(rdev, &sem, NULL);
925 		return r;
926 	}
927 
928 	if (radeon_fence_need_sync(*fence, ring->idx)) {
929 		radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
930 					    ring->idx);
931 		radeon_fence_note_sync(*fence, ring->idx);
932 	} else {
933 		radeon_semaphore_free(rdev, &sem, NULL);
934 	}
935 
936 	for (i = 0; i < num_loops; i++) {
937 		cur_size_in_dw = size_in_dw;
938 		if (cur_size_in_dw > 0xFFFF)
939 			cur_size_in_dw = 0xFFFF;
940 		size_in_dw -= cur_size_in_dw;
941 		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
942 		radeon_ring_write(ring, dst_offset & 0xfffffffc);
943 		radeon_ring_write(ring, src_offset & 0xfffffffc);
944 		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
945 		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
946 		src_offset += cur_size_in_dw * 4;
947 		dst_offset += cur_size_in_dw * 4;
948 	}
949 
950 	r = radeon_fence_emit(rdev, fence, ring->idx);
951 	if (r) {
952 		radeon_ring_unlock_undo(rdev, ring);
953 		return r;
954 	}
955 
956 	radeon_ring_unlock_commit(rdev, ring);
957 	radeon_semaphore_free(rdev, &sem, *fence);
958 
959 	return r;
960 }
961 
962 static int rv770_startup(struct radeon_device *rdev)
963 {
964 	struct radeon_ring *ring;
965 	int r;
966 
967 	/* enable pcie gen2 link */
968 	rv770_pcie_gen2_enable(rdev);
969 
970 	rv770_mc_program(rdev);
971 
972 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) {
973 		r = r600_init_microcode(rdev);
974 		if (r) {
975 			DRM_ERROR("Failed to load firmware!\n");
976 			return r;
977 		}
978 	}
979 
980 	r = r600_vram_scratch_init(rdev);
981 	if (r)
982 		return r;
983 
984 	if (rdev->flags & RADEON_IS_AGP) {
985 		rv770_agp_enable(rdev);
986 	} else {
987 		r = rv770_pcie_gart_enable(rdev);
988 		if (r)
989 			return r;
990 	}
991 
992 	rv770_gpu_init(rdev);
993 	r = r600_blit_init(rdev);
994 	if (r) {
995 		r600_blit_fini(rdev);
996 		rdev->asic->copy.copy = NULL;
997 		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
998 	}
999 
1000 	/* allocate wb buffer */
1001 	r = radeon_wb_init(rdev);
1002 	if (r)
1003 		return r;
1004 
1005 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
1006 	if (r) {
1007 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
1008 		return r;
1009 	}
1010 
1011 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
1012 	if (r) {
1013 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
1014 		return r;
1015 	}
1016 
1017 	/* Enable IRQ */
1018 	if (!rdev->irq.installed) {
1019 		r = radeon_irq_kms_init(rdev);
1020 		if (r)
1021 			return r;
1022 	}
1023 
1024 	r = r600_irq_init(rdev);
1025 	if (r) {
1026 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
1027 		radeon_irq_kms_fini(rdev);
1028 		return r;
1029 	}
1030 	r600_irq_set(rdev);
1031 
1032 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1033 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
1034 			     R600_CP_RB_RPTR, R600_CP_RB_WPTR,
1035 			     0, 0xfffff, RADEON_CP_PACKET2);
1036 	if (r)
1037 		return r;
1038 
1039 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1040 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
1041 			     DMA_RB_RPTR, DMA_RB_WPTR,
1042 			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1043 	if (r)
1044 		return r;
1045 
1046 	r = rv770_cp_load_microcode(rdev);
1047 	if (r)
1048 		return r;
1049 	r = r600_cp_resume(rdev);
1050 	if (r)
1051 		return r;
1052 
1053 	r = r600_dma_resume(rdev);
1054 	if (r)
1055 		return r;
1056 
1057 	r = radeon_ib_pool_init(rdev);
1058 	if (r) {
1059 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
1060 		return r;
1061 	}
1062 
1063 	r = r600_audio_init(rdev);
1064 	if (r) {
1065 		DRM_ERROR("radeon: audio init failed\n");
1066 		return r;
1067 	}
1068 
1069 	return 0;
1070 }
1071 
1072 int rv770_resume(struct radeon_device *rdev)
1073 {
1074 	int r;
1075 
1076 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
1077 	 * posting will perform necessary task to bring back GPU into good
1078 	 * shape.
1079 	 */
1080 	/* post card */
1081 	atom_asic_init(rdev->mode_info.atom_context);
1082 
1083 	rdev->accel_working = true;
1084 	r = rv770_startup(rdev);
1085 	if (r) {
1086 		DRM_ERROR("r600 startup failed on resume\n");
1087 		rdev->accel_working = false;
1088 		return r;
1089 	}
1090 
1091 	return r;
1092 
1093 }
1094 
1095 int rv770_suspend(struct radeon_device *rdev)
1096 {
1097 	r600_audio_fini(rdev);
1098 	r700_cp_stop(rdev);
1099 	r600_dma_stop(rdev);
1100 	r600_irq_suspend(rdev);
1101 	radeon_wb_disable(rdev);
1102 	rv770_pcie_gart_disable(rdev);
1103 
1104 	return 0;
1105 }
1106 
1107 /* Plan is to move initialization in that function and use
1108  * helper function so that radeon_device_init pretty much
1109  * do nothing more than calling asic specific function. This
1110  * should also allow to remove a bunch of callback function
1111  * like vram_info.
1112  */
1113 int rv770_init(struct radeon_device *rdev)
1114 {
1115 	int r;
1116 
1117 	/* Read BIOS */
1118 	if (!radeon_get_bios(rdev)) {
1119 		if (ASIC_IS_AVIVO(rdev))
1120 			return -EINVAL;
1121 	}
1122 	/* Must be an ATOMBIOS */
1123 	if (!rdev->is_atom_bios) {
1124 		dev_err(rdev->dev, "Expecting atombios for R600 GPU\n");
1125 		return -EINVAL;
1126 	}
1127 	r = radeon_atombios_init(rdev);
1128 	if (r)
1129 		return r;
1130 	/* Post card if necessary */
1131 	if (!radeon_card_posted(rdev)) {
1132 		if (!rdev->bios) {
1133 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
1134 			return -EINVAL;
1135 		}
1136 		DRM_INFO("GPU not posted. posting now...\n");
1137 		atom_asic_init(rdev->mode_info.atom_context);
1138 	}
1139 	/* Initialize scratch registers */
1140 	r600_scratch_init(rdev);
1141 	/* Initialize surface registers */
1142 	radeon_surface_init(rdev);
1143 	/* Initialize clocks */
1144 	radeon_get_clock_info(rdev->ddev);
1145 	/* Fence driver */
1146 	r = radeon_fence_driver_init(rdev);
1147 	if (r)
1148 		return r;
1149 	/* initialize AGP */
1150 	if (rdev->flags & RADEON_IS_AGP) {
1151 		r = radeon_agp_init(rdev);
1152 		if (r)
1153 			radeon_agp_disable(rdev);
1154 	}
1155 	r = rv770_mc_init(rdev);
1156 	if (r)
1157 		return r;
1158 	/* Memory manager */
1159 	r = radeon_bo_init(rdev);
1160 	if (r)
1161 		return r;
1162 
1163 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL;
1164 	r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024);
1165 
1166 	rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
1167 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
1168 
1169 	rdev->ih.ring_obj = NULL;
1170 	r600_ih_ring_init(rdev, 64 * 1024);
1171 
1172 	r = r600_pcie_gart_init(rdev);
1173 	if (r)
1174 		return r;
1175 
1176 	rdev->accel_working = true;
1177 	r = rv770_startup(rdev);
1178 	if (r) {
1179 		dev_err(rdev->dev, "disabling GPU acceleration\n");
1180 		r700_cp_fini(rdev);
1181 		r600_dma_fini(rdev);
1182 		r600_irq_fini(rdev);
1183 		radeon_wb_fini(rdev);
1184 		radeon_ib_pool_fini(rdev);
1185 		radeon_irq_kms_fini(rdev);
1186 		rv770_pcie_gart_fini(rdev);
1187 		rdev->accel_working = false;
1188 	}
1189 
1190 	return 0;
1191 }
1192 
1193 void rv770_fini(struct radeon_device *rdev)
1194 {
1195 	r600_blit_fini(rdev);
1196 	r700_cp_fini(rdev);
1197 	r600_dma_fini(rdev);
1198 	r600_irq_fini(rdev);
1199 	radeon_wb_fini(rdev);
1200 	radeon_ib_pool_fini(rdev);
1201 	radeon_irq_kms_fini(rdev);
1202 	rv770_pcie_gart_fini(rdev);
1203 	r600_vram_scratch_fini(rdev);
1204 	radeon_gem_fini(rdev);
1205 	radeon_fence_driver_fini(rdev);
1206 	radeon_agp_fini(rdev);
1207 	radeon_bo_fini(rdev);
1208 	radeon_atombios_fini(rdev);
1209 	kfree(rdev->bios);
1210 	rdev->bios = NULL;
1211 }
1212 
1213 static void rv770_pcie_gen2_enable(struct radeon_device *rdev)
1214 {
1215 	u32 link_width_cntl, lanes, speed_cntl, tmp;
1216 	u16 link_cntl2;
1217 	u32 mask;
1218 	int ret;
1219 
1220 	if (radeon_pcie_gen2 == 0)
1221 		return;
1222 
1223 	if (rdev->flags & RADEON_IS_IGP)
1224 		return;
1225 
1226 	if (!(rdev->flags & RADEON_IS_PCIE))
1227 		return;
1228 
1229 	/* x2 cards have a special sequence */
1230 	if (ASIC_IS_X2(rdev))
1231 		return;
1232 
1233 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
1234 	if (ret != 0)
1235 		return;
1236 
1237 	if (!(mask & DRM_PCIE_SPEED_50))
1238 		return;
1239 
1240 	DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
1241 
1242 	/* advertise upconfig capability */
1243 	link_width_cntl = RREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL);
1244 	link_width_cntl &= ~LC_UPCONFIGURE_DIS;
1245 	WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
1246 	link_width_cntl = RREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL);
1247 	if (link_width_cntl & LC_RENEGOTIATION_SUPPORT) {
1248 		lanes = (link_width_cntl & LC_LINK_WIDTH_RD_MASK) >> LC_LINK_WIDTH_RD_SHIFT;
1249 		link_width_cntl &= ~(LC_LINK_WIDTH_MASK |
1250 				     LC_RECONFIG_ARC_MISSING_ESCAPE);
1251 		link_width_cntl |= lanes | LC_RECONFIG_NOW |
1252 			LC_RENEGOTIATE_EN | LC_UPCONFIGURE_SUPPORT;
1253 		WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
1254 	} else {
1255 		link_width_cntl |= LC_UPCONFIGURE_DIS;
1256 		WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
1257 	}
1258 
1259 	speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
1260 	if ((speed_cntl & LC_OTHER_SIDE_EVER_SENT_GEN2) &&
1261 	    (speed_cntl & LC_OTHER_SIDE_SUPPORTS_GEN2)) {
1262 
1263 		tmp = RREG32(0x541c);
1264 		WREG32(0x541c, tmp | 0x8);
1265 		WREG32(MM_CFGREGS_CNTL, MM_WR_TO_CFG_EN);
1266 		link_cntl2 = RREG16(0x4088);
1267 		link_cntl2 &= ~TARGET_LINK_SPEED_MASK;
1268 		link_cntl2 |= 0x2;
1269 		WREG16(0x4088, link_cntl2);
1270 		WREG32(MM_CFGREGS_CNTL, 0);
1271 
1272 		speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
1273 		speed_cntl &= ~LC_TARGET_LINK_SPEED_OVERRIDE_EN;
1274 		WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl);
1275 
1276 		speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
1277 		speed_cntl |= LC_CLR_FAILED_SPD_CHANGE_CNT;
1278 		WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl);
1279 
1280 		speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
1281 		speed_cntl &= ~LC_CLR_FAILED_SPD_CHANGE_CNT;
1282 		WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl);
1283 
1284 		speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
1285 		speed_cntl |= LC_GEN2_EN_STRAP;
1286 		WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl);
1287 
1288 	} else {
1289 		link_width_cntl = RREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL);
1290 		/* XXX: only disable it if gen1 bridge vendor == 0x111d or 0x1106 */
1291 		if (1)
1292 			link_width_cntl |= LC_UPCONFIGURE_DIS;
1293 		else
1294 			link_width_cntl &= ~LC_UPCONFIGURE_DIS;
1295 		WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
1296 	}
1297 }
1298