xref: /openbsd/sys/dev/pci/drm/radeon/cik.c (revision 73471bf0)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include <linux/slab.h>
29 
30 #include <drm/drm_vblank.h>
31 
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "cikd.h"
35 #include "clearstate_ci.h"
36 #include "radeon.h"
37 #include "radeon_asic.h"
38 #include "radeon_audio.h"
39 #include "radeon_ucode.h"
40 
41 #define SH_MEM_CONFIG_GFX_DEFAULT \
42 	ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
43 
44 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
47 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
48 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
49 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
50 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
51 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
53 
54 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
55 MODULE_FIRMWARE("radeon/bonaire_me.bin");
56 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
57 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
58 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
59 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
60 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
61 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
62 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
63 
64 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
66 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
67 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
68 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
69 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
70 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
71 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
72 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
73 
74 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
75 MODULE_FIRMWARE("radeon/hawaii_me.bin");
76 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
77 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
78 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
79 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
80 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
81 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
82 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
83 
84 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
85 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
86 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
87 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
88 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
89 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
90 
91 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
92 MODULE_FIRMWARE("radeon/kaveri_me.bin");
93 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
94 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
95 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
96 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
97 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
98 
99 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
100 MODULE_FIRMWARE("radeon/KABINI_me.bin");
101 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
102 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
103 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
104 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
105 
106 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
107 MODULE_FIRMWARE("radeon/kabini_me.bin");
108 MODULE_FIRMWARE("radeon/kabini_ce.bin");
109 MODULE_FIRMWARE("radeon/kabini_mec.bin");
110 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
111 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
112 
113 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
114 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
115 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
116 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
117 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
118 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
119 
120 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
121 MODULE_FIRMWARE("radeon/mullins_me.bin");
122 MODULE_FIRMWARE("radeon/mullins_ce.bin");
123 MODULE_FIRMWARE("radeon/mullins_mec.bin");
124 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
125 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
126 
127 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
128 extern void r600_ih_ring_fini(struct radeon_device *rdev);
129 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
130 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
131 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
132 extern void sumo_rlc_fini(struct radeon_device *rdev);
133 extern int sumo_rlc_init(struct radeon_device *rdev);
134 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
135 extern void si_rlc_reset(struct radeon_device *rdev);
136 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
137 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
138 extern int cik_sdma_resume(struct radeon_device *rdev);
139 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
140 extern void cik_sdma_fini(struct radeon_device *rdev);
141 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
142 static void cik_rlc_stop(struct radeon_device *rdev);
143 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
144 static void cik_program_aspm(struct radeon_device *rdev);
145 static void cik_init_pg(struct radeon_device *rdev);
146 static void cik_init_cg(struct radeon_device *rdev);
147 static void cik_fini_pg(struct radeon_device *rdev);
148 static void cik_fini_cg(struct radeon_device *rdev);
149 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
150 					  bool enable);
151 
152 /**
153  * cik_get_allowed_info_register - fetch the register for the info ioctl
154  *
155  * @rdev: radeon_device pointer
156  * @reg: register offset in bytes
157  * @val: register value
158  *
159  * Returns 0 for success or -EINVAL for an invalid register
160  *
161  */
162 int cik_get_allowed_info_register(struct radeon_device *rdev,
163 				  u32 reg, u32 *val)
164 {
165 	switch (reg) {
166 	case GRBM_STATUS:
167 	case GRBM_STATUS2:
168 	case GRBM_STATUS_SE0:
169 	case GRBM_STATUS_SE1:
170 	case GRBM_STATUS_SE2:
171 	case GRBM_STATUS_SE3:
172 	case SRBM_STATUS:
173 	case SRBM_STATUS2:
174 	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
175 	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
176 	case UVD_STATUS:
177 	/* TODO VCE */
178 		*val = RREG32(reg);
179 		return 0;
180 	default:
181 		return -EINVAL;
182 	}
183 }
184 
185 /*
186  * Indirect registers accessor
187  */
188 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
189 {
190 	unsigned long flags;
191 	u32 r;
192 
193 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
194 	WREG32(CIK_DIDT_IND_INDEX, (reg));
195 	r = RREG32(CIK_DIDT_IND_DATA);
196 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
197 	return r;
198 }
199 
200 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
201 {
202 	unsigned long flags;
203 
204 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
205 	WREG32(CIK_DIDT_IND_INDEX, (reg));
206 	WREG32(CIK_DIDT_IND_DATA, (v));
207 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
208 }
209 
210 /* get temperature in millidegrees */
211 int ci_get_temp(struct radeon_device *rdev)
212 {
213 	u32 temp;
214 	int actual_temp = 0;
215 
216 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
217 		CTF_TEMP_SHIFT;
218 
219 	if (temp & 0x200)
220 		actual_temp = 255;
221 	else
222 		actual_temp = temp & 0x1ff;
223 
224 	return actual_temp * 1000;
225 }
226 
227 /* get temperature in millidegrees */
228 int kv_get_temp(struct radeon_device *rdev)
229 {
230 	u32 temp;
231 	int actual_temp = 0;
232 
233 	temp = RREG32_SMC(0xC0300E0C);
234 
235 	if (temp)
236 		actual_temp = (temp / 8) - 49;
237 	else
238 		actual_temp = 0;
239 
240 	return actual_temp * 1000;
241 }
242 
243 /*
244  * Indirect registers accessor
245  */
246 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
247 {
248 	unsigned long flags;
249 	u32 r;
250 
251 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
252 	WREG32(PCIE_INDEX, reg);
253 	(void)RREG32(PCIE_INDEX);
254 	r = RREG32(PCIE_DATA);
255 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
256 	return r;
257 }
258 
259 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
260 {
261 	unsigned long flags;
262 
263 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
264 	WREG32(PCIE_INDEX, reg);
265 	(void)RREG32(PCIE_INDEX);
266 	WREG32(PCIE_DATA, v);
267 	(void)RREG32(PCIE_DATA);
268 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
269 }
270 
271 static const u32 spectre_rlc_save_restore_register_list[] =
272 {
273 	(0x0e00 << 16) | (0xc12c >> 2),
274 	0x00000000,
275 	(0x0e00 << 16) | (0xc140 >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0xc150 >> 2),
278 	0x00000000,
279 	(0x0e00 << 16) | (0xc15c >> 2),
280 	0x00000000,
281 	(0x0e00 << 16) | (0xc168 >> 2),
282 	0x00000000,
283 	(0x0e00 << 16) | (0xc170 >> 2),
284 	0x00000000,
285 	(0x0e00 << 16) | (0xc178 >> 2),
286 	0x00000000,
287 	(0x0e00 << 16) | (0xc204 >> 2),
288 	0x00000000,
289 	(0x0e00 << 16) | (0xc2b4 >> 2),
290 	0x00000000,
291 	(0x0e00 << 16) | (0xc2b8 >> 2),
292 	0x00000000,
293 	(0x0e00 << 16) | (0xc2bc >> 2),
294 	0x00000000,
295 	(0x0e00 << 16) | (0xc2c0 >> 2),
296 	0x00000000,
297 	(0x0e00 << 16) | (0x8228 >> 2),
298 	0x00000000,
299 	(0x0e00 << 16) | (0x829c >> 2),
300 	0x00000000,
301 	(0x0e00 << 16) | (0x869c >> 2),
302 	0x00000000,
303 	(0x0600 << 16) | (0x98f4 >> 2),
304 	0x00000000,
305 	(0x0e00 << 16) | (0x98f8 >> 2),
306 	0x00000000,
307 	(0x0e00 << 16) | (0x9900 >> 2),
308 	0x00000000,
309 	(0x0e00 << 16) | (0xc260 >> 2),
310 	0x00000000,
311 	(0x0e00 << 16) | (0x90e8 >> 2),
312 	0x00000000,
313 	(0x0e00 << 16) | (0x3c000 >> 2),
314 	0x00000000,
315 	(0x0e00 << 16) | (0x3c00c >> 2),
316 	0x00000000,
317 	(0x0e00 << 16) | (0x8c1c >> 2),
318 	0x00000000,
319 	(0x0e00 << 16) | (0x9700 >> 2),
320 	0x00000000,
321 	(0x0e00 << 16) | (0xcd20 >> 2),
322 	0x00000000,
323 	(0x4e00 << 16) | (0xcd20 >> 2),
324 	0x00000000,
325 	(0x5e00 << 16) | (0xcd20 >> 2),
326 	0x00000000,
327 	(0x6e00 << 16) | (0xcd20 >> 2),
328 	0x00000000,
329 	(0x7e00 << 16) | (0xcd20 >> 2),
330 	0x00000000,
331 	(0x8e00 << 16) | (0xcd20 >> 2),
332 	0x00000000,
333 	(0x9e00 << 16) | (0xcd20 >> 2),
334 	0x00000000,
335 	(0xae00 << 16) | (0xcd20 >> 2),
336 	0x00000000,
337 	(0xbe00 << 16) | (0xcd20 >> 2),
338 	0x00000000,
339 	(0x0e00 << 16) | (0x89bc >> 2),
340 	0x00000000,
341 	(0x0e00 << 16) | (0x8900 >> 2),
342 	0x00000000,
343 	0x3,
344 	(0x0e00 << 16) | (0xc130 >> 2),
345 	0x00000000,
346 	(0x0e00 << 16) | (0xc134 >> 2),
347 	0x00000000,
348 	(0x0e00 << 16) | (0xc1fc >> 2),
349 	0x00000000,
350 	(0x0e00 << 16) | (0xc208 >> 2),
351 	0x00000000,
352 	(0x0e00 << 16) | (0xc264 >> 2),
353 	0x00000000,
354 	(0x0e00 << 16) | (0xc268 >> 2),
355 	0x00000000,
356 	(0x0e00 << 16) | (0xc26c >> 2),
357 	0x00000000,
358 	(0x0e00 << 16) | (0xc270 >> 2),
359 	0x00000000,
360 	(0x0e00 << 16) | (0xc274 >> 2),
361 	0x00000000,
362 	(0x0e00 << 16) | (0xc278 >> 2),
363 	0x00000000,
364 	(0x0e00 << 16) | (0xc27c >> 2),
365 	0x00000000,
366 	(0x0e00 << 16) | (0xc280 >> 2),
367 	0x00000000,
368 	(0x0e00 << 16) | (0xc284 >> 2),
369 	0x00000000,
370 	(0x0e00 << 16) | (0xc288 >> 2),
371 	0x00000000,
372 	(0x0e00 << 16) | (0xc28c >> 2),
373 	0x00000000,
374 	(0x0e00 << 16) | (0xc290 >> 2),
375 	0x00000000,
376 	(0x0e00 << 16) | (0xc294 >> 2),
377 	0x00000000,
378 	(0x0e00 << 16) | (0xc298 >> 2),
379 	0x00000000,
380 	(0x0e00 << 16) | (0xc29c >> 2),
381 	0x00000000,
382 	(0x0e00 << 16) | (0xc2a0 >> 2),
383 	0x00000000,
384 	(0x0e00 << 16) | (0xc2a4 >> 2),
385 	0x00000000,
386 	(0x0e00 << 16) | (0xc2a8 >> 2),
387 	0x00000000,
388 	(0x0e00 << 16) | (0xc2ac  >> 2),
389 	0x00000000,
390 	(0x0e00 << 16) | (0xc2b0 >> 2),
391 	0x00000000,
392 	(0x0e00 << 16) | (0x301d0 >> 2),
393 	0x00000000,
394 	(0x0e00 << 16) | (0x30238 >> 2),
395 	0x00000000,
396 	(0x0e00 << 16) | (0x30250 >> 2),
397 	0x00000000,
398 	(0x0e00 << 16) | (0x30254 >> 2),
399 	0x00000000,
400 	(0x0e00 << 16) | (0x30258 >> 2),
401 	0x00000000,
402 	(0x0e00 << 16) | (0x3025c >> 2),
403 	0x00000000,
404 	(0x4e00 << 16) | (0xc900 >> 2),
405 	0x00000000,
406 	(0x5e00 << 16) | (0xc900 >> 2),
407 	0x00000000,
408 	(0x6e00 << 16) | (0xc900 >> 2),
409 	0x00000000,
410 	(0x7e00 << 16) | (0xc900 >> 2),
411 	0x00000000,
412 	(0x8e00 << 16) | (0xc900 >> 2),
413 	0x00000000,
414 	(0x9e00 << 16) | (0xc900 >> 2),
415 	0x00000000,
416 	(0xae00 << 16) | (0xc900 >> 2),
417 	0x00000000,
418 	(0xbe00 << 16) | (0xc900 >> 2),
419 	0x00000000,
420 	(0x4e00 << 16) | (0xc904 >> 2),
421 	0x00000000,
422 	(0x5e00 << 16) | (0xc904 >> 2),
423 	0x00000000,
424 	(0x6e00 << 16) | (0xc904 >> 2),
425 	0x00000000,
426 	(0x7e00 << 16) | (0xc904 >> 2),
427 	0x00000000,
428 	(0x8e00 << 16) | (0xc904 >> 2),
429 	0x00000000,
430 	(0x9e00 << 16) | (0xc904 >> 2),
431 	0x00000000,
432 	(0xae00 << 16) | (0xc904 >> 2),
433 	0x00000000,
434 	(0xbe00 << 16) | (0xc904 >> 2),
435 	0x00000000,
436 	(0x4e00 << 16) | (0xc908 >> 2),
437 	0x00000000,
438 	(0x5e00 << 16) | (0xc908 >> 2),
439 	0x00000000,
440 	(0x6e00 << 16) | (0xc908 >> 2),
441 	0x00000000,
442 	(0x7e00 << 16) | (0xc908 >> 2),
443 	0x00000000,
444 	(0x8e00 << 16) | (0xc908 >> 2),
445 	0x00000000,
446 	(0x9e00 << 16) | (0xc908 >> 2),
447 	0x00000000,
448 	(0xae00 << 16) | (0xc908 >> 2),
449 	0x00000000,
450 	(0xbe00 << 16) | (0xc908 >> 2),
451 	0x00000000,
452 	(0x4e00 << 16) | (0xc90c >> 2),
453 	0x00000000,
454 	(0x5e00 << 16) | (0xc90c >> 2),
455 	0x00000000,
456 	(0x6e00 << 16) | (0xc90c >> 2),
457 	0x00000000,
458 	(0x7e00 << 16) | (0xc90c >> 2),
459 	0x00000000,
460 	(0x8e00 << 16) | (0xc90c >> 2),
461 	0x00000000,
462 	(0x9e00 << 16) | (0xc90c >> 2),
463 	0x00000000,
464 	(0xae00 << 16) | (0xc90c >> 2),
465 	0x00000000,
466 	(0xbe00 << 16) | (0xc90c >> 2),
467 	0x00000000,
468 	(0x4e00 << 16) | (0xc910 >> 2),
469 	0x00000000,
470 	(0x5e00 << 16) | (0xc910 >> 2),
471 	0x00000000,
472 	(0x6e00 << 16) | (0xc910 >> 2),
473 	0x00000000,
474 	(0x7e00 << 16) | (0xc910 >> 2),
475 	0x00000000,
476 	(0x8e00 << 16) | (0xc910 >> 2),
477 	0x00000000,
478 	(0x9e00 << 16) | (0xc910 >> 2),
479 	0x00000000,
480 	(0xae00 << 16) | (0xc910 >> 2),
481 	0x00000000,
482 	(0xbe00 << 16) | (0xc910 >> 2),
483 	0x00000000,
484 	(0x0e00 << 16) | (0xc99c >> 2),
485 	0x00000000,
486 	(0x0e00 << 16) | (0x9834 >> 2),
487 	0x00000000,
488 	(0x0000 << 16) | (0x30f00 >> 2),
489 	0x00000000,
490 	(0x0001 << 16) | (0x30f00 >> 2),
491 	0x00000000,
492 	(0x0000 << 16) | (0x30f04 >> 2),
493 	0x00000000,
494 	(0x0001 << 16) | (0x30f04 >> 2),
495 	0x00000000,
496 	(0x0000 << 16) | (0x30f08 >> 2),
497 	0x00000000,
498 	(0x0001 << 16) | (0x30f08 >> 2),
499 	0x00000000,
500 	(0x0000 << 16) | (0x30f0c >> 2),
501 	0x00000000,
502 	(0x0001 << 16) | (0x30f0c >> 2),
503 	0x00000000,
504 	(0x0600 << 16) | (0x9b7c >> 2),
505 	0x00000000,
506 	(0x0e00 << 16) | (0x8a14 >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0x8a18 >> 2),
509 	0x00000000,
510 	(0x0600 << 16) | (0x30a00 >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0x8bf0 >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0x8bcc >> 2),
515 	0x00000000,
516 	(0x0e00 << 16) | (0x8b24 >> 2),
517 	0x00000000,
518 	(0x0e00 << 16) | (0x30a04 >> 2),
519 	0x00000000,
520 	(0x0600 << 16) | (0x30a10 >> 2),
521 	0x00000000,
522 	(0x0600 << 16) | (0x30a14 >> 2),
523 	0x00000000,
524 	(0x0600 << 16) | (0x30a18 >> 2),
525 	0x00000000,
526 	(0x0600 << 16) | (0x30a2c >> 2),
527 	0x00000000,
528 	(0x0e00 << 16) | (0xc700 >> 2),
529 	0x00000000,
530 	(0x0e00 << 16) | (0xc704 >> 2),
531 	0x00000000,
532 	(0x0e00 << 16) | (0xc708 >> 2),
533 	0x00000000,
534 	(0x0e00 << 16) | (0xc768 >> 2),
535 	0x00000000,
536 	(0x0400 << 16) | (0xc770 >> 2),
537 	0x00000000,
538 	(0x0400 << 16) | (0xc774 >> 2),
539 	0x00000000,
540 	(0x0400 << 16) | (0xc778 >> 2),
541 	0x00000000,
542 	(0x0400 << 16) | (0xc77c >> 2),
543 	0x00000000,
544 	(0x0400 << 16) | (0xc780 >> 2),
545 	0x00000000,
546 	(0x0400 << 16) | (0xc784 >> 2),
547 	0x00000000,
548 	(0x0400 << 16) | (0xc788 >> 2),
549 	0x00000000,
550 	(0x0400 << 16) | (0xc78c >> 2),
551 	0x00000000,
552 	(0x0400 << 16) | (0xc798 >> 2),
553 	0x00000000,
554 	(0x0400 << 16) | (0xc79c >> 2),
555 	0x00000000,
556 	(0x0400 << 16) | (0xc7a0 >> 2),
557 	0x00000000,
558 	(0x0400 << 16) | (0xc7a4 >> 2),
559 	0x00000000,
560 	(0x0400 << 16) | (0xc7a8 >> 2),
561 	0x00000000,
562 	(0x0400 << 16) | (0xc7ac >> 2),
563 	0x00000000,
564 	(0x0400 << 16) | (0xc7b0 >> 2),
565 	0x00000000,
566 	(0x0400 << 16) | (0xc7b4 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0x9100 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0x3c010 >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x92a8 >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x92ac >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x92b4 >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x92b8 >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x92bc >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0x92c0 >> 2),
583 	0x00000000,
584 	(0x0e00 << 16) | (0x92c4 >> 2),
585 	0x00000000,
586 	(0x0e00 << 16) | (0x92c8 >> 2),
587 	0x00000000,
588 	(0x0e00 << 16) | (0x92cc >> 2),
589 	0x00000000,
590 	(0x0e00 << 16) | (0x92d0 >> 2),
591 	0x00000000,
592 	(0x0e00 << 16) | (0x8c00 >> 2),
593 	0x00000000,
594 	(0x0e00 << 16) | (0x8c04 >> 2),
595 	0x00000000,
596 	(0x0e00 << 16) | (0x8c20 >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0x8c38 >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0x8c3c >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0xae00 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0x9604 >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0xac08 >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0xac0c >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0xac10 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0xac14 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0xac58 >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0xac68 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0xac6c >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0xac70 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0xac74 >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0xac78 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0xac7c >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0xac80 >> 2),
629 	0x00000000,
630 	(0x0e00 << 16) | (0xac84 >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0xac88 >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0xac8c >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0x970c >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x9714 >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0x9718 >> 2),
641 	0x00000000,
642 	(0x0e00 << 16) | (0x971c >> 2),
643 	0x00000000,
644 	(0x0e00 << 16) | (0x31068 >> 2),
645 	0x00000000,
646 	(0x4e00 << 16) | (0x31068 >> 2),
647 	0x00000000,
648 	(0x5e00 << 16) | (0x31068 >> 2),
649 	0x00000000,
650 	(0x6e00 << 16) | (0x31068 >> 2),
651 	0x00000000,
652 	(0x7e00 << 16) | (0x31068 >> 2),
653 	0x00000000,
654 	(0x8e00 << 16) | (0x31068 >> 2),
655 	0x00000000,
656 	(0x9e00 << 16) | (0x31068 >> 2),
657 	0x00000000,
658 	(0xae00 << 16) | (0x31068 >> 2),
659 	0x00000000,
660 	(0xbe00 << 16) | (0x31068 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0xcd10 >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0xcd14 >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0x88b0 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0x88b4 >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0x88b8 >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0x88bc >> 2),
673 	0x00000000,
674 	(0x0400 << 16) | (0x89c0 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0x88c4 >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0x88c8 >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0x88d0 >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0x88d4 >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0x88d8 >> 2),
685 	0x00000000,
686 	(0x0e00 << 16) | (0x8980 >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0x30938 >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0x3093c >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0x30940 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x89a0 >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0x30900 >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0x30904 >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0x89b4 >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0x3c210 >> 2),
703 	0x00000000,
704 	(0x0e00 << 16) | (0x3c214 >> 2),
705 	0x00000000,
706 	(0x0e00 << 16) | (0x3c218 >> 2),
707 	0x00000000,
708 	(0x0e00 << 16) | (0x8904 >> 2),
709 	0x00000000,
710 	0x5,
711 	(0x0e00 << 16) | (0x8c28 >> 2),
712 	(0x0e00 << 16) | (0x8c2c >> 2),
713 	(0x0e00 << 16) | (0x8c30 >> 2),
714 	(0x0e00 << 16) | (0x8c34 >> 2),
715 	(0x0e00 << 16) | (0x9600 >> 2),
716 };
717 
718 static const u32 kalindi_rlc_save_restore_register_list[] =
719 {
720 	(0x0e00 << 16) | (0xc12c >> 2),
721 	0x00000000,
722 	(0x0e00 << 16) | (0xc140 >> 2),
723 	0x00000000,
724 	(0x0e00 << 16) | (0xc150 >> 2),
725 	0x00000000,
726 	(0x0e00 << 16) | (0xc15c >> 2),
727 	0x00000000,
728 	(0x0e00 << 16) | (0xc168 >> 2),
729 	0x00000000,
730 	(0x0e00 << 16) | (0xc170 >> 2),
731 	0x00000000,
732 	(0x0e00 << 16) | (0xc204 >> 2),
733 	0x00000000,
734 	(0x0e00 << 16) | (0xc2b4 >> 2),
735 	0x00000000,
736 	(0x0e00 << 16) | (0xc2b8 >> 2),
737 	0x00000000,
738 	(0x0e00 << 16) | (0xc2bc >> 2),
739 	0x00000000,
740 	(0x0e00 << 16) | (0xc2c0 >> 2),
741 	0x00000000,
742 	(0x0e00 << 16) | (0x8228 >> 2),
743 	0x00000000,
744 	(0x0e00 << 16) | (0x829c >> 2),
745 	0x00000000,
746 	(0x0e00 << 16) | (0x869c >> 2),
747 	0x00000000,
748 	(0x0600 << 16) | (0x98f4 >> 2),
749 	0x00000000,
750 	(0x0e00 << 16) | (0x98f8 >> 2),
751 	0x00000000,
752 	(0x0e00 << 16) | (0x9900 >> 2),
753 	0x00000000,
754 	(0x0e00 << 16) | (0xc260 >> 2),
755 	0x00000000,
756 	(0x0e00 << 16) | (0x90e8 >> 2),
757 	0x00000000,
758 	(0x0e00 << 16) | (0x3c000 >> 2),
759 	0x00000000,
760 	(0x0e00 << 16) | (0x3c00c >> 2),
761 	0x00000000,
762 	(0x0e00 << 16) | (0x8c1c >> 2),
763 	0x00000000,
764 	(0x0e00 << 16) | (0x9700 >> 2),
765 	0x00000000,
766 	(0x0e00 << 16) | (0xcd20 >> 2),
767 	0x00000000,
768 	(0x4e00 << 16) | (0xcd20 >> 2),
769 	0x00000000,
770 	(0x5e00 << 16) | (0xcd20 >> 2),
771 	0x00000000,
772 	(0x6e00 << 16) | (0xcd20 >> 2),
773 	0x00000000,
774 	(0x7e00 << 16) | (0xcd20 >> 2),
775 	0x00000000,
776 	(0x0e00 << 16) | (0x89bc >> 2),
777 	0x00000000,
778 	(0x0e00 << 16) | (0x8900 >> 2),
779 	0x00000000,
780 	0x3,
781 	(0x0e00 << 16) | (0xc130 >> 2),
782 	0x00000000,
783 	(0x0e00 << 16) | (0xc134 >> 2),
784 	0x00000000,
785 	(0x0e00 << 16) | (0xc1fc >> 2),
786 	0x00000000,
787 	(0x0e00 << 16) | (0xc208 >> 2),
788 	0x00000000,
789 	(0x0e00 << 16) | (0xc264 >> 2),
790 	0x00000000,
791 	(0x0e00 << 16) | (0xc268 >> 2),
792 	0x00000000,
793 	(0x0e00 << 16) | (0xc26c >> 2),
794 	0x00000000,
795 	(0x0e00 << 16) | (0xc270 >> 2),
796 	0x00000000,
797 	(0x0e00 << 16) | (0xc274 >> 2),
798 	0x00000000,
799 	(0x0e00 << 16) | (0xc28c >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0xc290 >> 2),
802 	0x00000000,
803 	(0x0e00 << 16) | (0xc294 >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0xc298 >> 2),
806 	0x00000000,
807 	(0x0e00 << 16) | (0xc2a0 >> 2),
808 	0x00000000,
809 	(0x0e00 << 16) | (0xc2a4 >> 2),
810 	0x00000000,
811 	(0x0e00 << 16) | (0xc2a8 >> 2),
812 	0x00000000,
813 	(0x0e00 << 16) | (0xc2ac >> 2),
814 	0x00000000,
815 	(0x0e00 << 16) | (0x301d0 >> 2),
816 	0x00000000,
817 	(0x0e00 << 16) | (0x30238 >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0x30250 >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0x30254 >> 2),
822 	0x00000000,
823 	(0x0e00 << 16) | (0x30258 >> 2),
824 	0x00000000,
825 	(0x0e00 << 16) | (0x3025c >> 2),
826 	0x00000000,
827 	(0x4e00 << 16) | (0xc900 >> 2),
828 	0x00000000,
829 	(0x5e00 << 16) | (0xc900 >> 2),
830 	0x00000000,
831 	(0x6e00 << 16) | (0xc900 >> 2),
832 	0x00000000,
833 	(0x7e00 << 16) | (0xc900 >> 2),
834 	0x00000000,
835 	(0x4e00 << 16) | (0xc904 >> 2),
836 	0x00000000,
837 	(0x5e00 << 16) | (0xc904 >> 2),
838 	0x00000000,
839 	(0x6e00 << 16) | (0xc904 >> 2),
840 	0x00000000,
841 	(0x7e00 << 16) | (0xc904 >> 2),
842 	0x00000000,
843 	(0x4e00 << 16) | (0xc908 >> 2),
844 	0x00000000,
845 	(0x5e00 << 16) | (0xc908 >> 2),
846 	0x00000000,
847 	(0x6e00 << 16) | (0xc908 >> 2),
848 	0x00000000,
849 	(0x7e00 << 16) | (0xc908 >> 2),
850 	0x00000000,
851 	(0x4e00 << 16) | (0xc90c >> 2),
852 	0x00000000,
853 	(0x5e00 << 16) | (0xc90c >> 2),
854 	0x00000000,
855 	(0x6e00 << 16) | (0xc90c >> 2),
856 	0x00000000,
857 	(0x7e00 << 16) | (0xc90c >> 2),
858 	0x00000000,
859 	(0x4e00 << 16) | (0xc910 >> 2),
860 	0x00000000,
861 	(0x5e00 << 16) | (0xc910 >> 2),
862 	0x00000000,
863 	(0x6e00 << 16) | (0xc910 >> 2),
864 	0x00000000,
865 	(0x7e00 << 16) | (0xc910 >> 2),
866 	0x00000000,
867 	(0x0e00 << 16) | (0xc99c >> 2),
868 	0x00000000,
869 	(0x0e00 << 16) | (0x9834 >> 2),
870 	0x00000000,
871 	(0x0000 << 16) | (0x30f00 >> 2),
872 	0x00000000,
873 	(0x0000 << 16) | (0x30f04 >> 2),
874 	0x00000000,
875 	(0x0000 << 16) | (0x30f08 >> 2),
876 	0x00000000,
877 	(0x0000 << 16) | (0x30f0c >> 2),
878 	0x00000000,
879 	(0x0600 << 16) | (0x9b7c >> 2),
880 	0x00000000,
881 	(0x0e00 << 16) | (0x8a14 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0x8a18 >> 2),
884 	0x00000000,
885 	(0x0600 << 16) | (0x30a00 >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0x8bf0 >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0x8bcc >> 2),
890 	0x00000000,
891 	(0x0e00 << 16) | (0x8b24 >> 2),
892 	0x00000000,
893 	(0x0e00 << 16) | (0x30a04 >> 2),
894 	0x00000000,
895 	(0x0600 << 16) | (0x30a10 >> 2),
896 	0x00000000,
897 	(0x0600 << 16) | (0x30a14 >> 2),
898 	0x00000000,
899 	(0x0600 << 16) | (0x30a18 >> 2),
900 	0x00000000,
901 	(0x0600 << 16) | (0x30a2c >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0xc700 >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0xc704 >> 2),
906 	0x00000000,
907 	(0x0e00 << 16) | (0xc708 >> 2),
908 	0x00000000,
909 	(0x0e00 << 16) | (0xc768 >> 2),
910 	0x00000000,
911 	(0x0400 << 16) | (0xc770 >> 2),
912 	0x00000000,
913 	(0x0400 << 16) | (0xc774 >> 2),
914 	0x00000000,
915 	(0x0400 << 16) | (0xc798 >> 2),
916 	0x00000000,
917 	(0x0400 << 16) | (0xc79c >> 2),
918 	0x00000000,
919 	(0x0e00 << 16) | (0x9100 >> 2),
920 	0x00000000,
921 	(0x0e00 << 16) | (0x3c010 >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0x8c00 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0x8c04 >> 2),
926 	0x00000000,
927 	(0x0e00 << 16) | (0x8c20 >> 2),
928 	0x00000000,
929 	(0x0e00 << 16) | (0x8c38 >> 2),
930 	0x00000000,
931 	(0x0e00 << 16) | (0x8c3c >> 2),
932 	0x00000000,
933 	(0x0e00 << 16) | (0xae00 >> 2),
934 	0x00000000,
935 	(0x0e00 << 16) | (0x9604 >> 2),
936 	0x00000000,
937 	(0x0e00 << 16) | (0xac08 >> 2),
938 	0x00000000,
939 	(0x0e00 << 16) | (0xac0c >> 2),
940 	0x00000000,
941 	(0x0e00 << 16) | (0xac10 >> 2),
942 	0x00000000,
943 	(0x0e00 << 16) | (0xac14 >> 2),
944 	0x00000000,
945 	(0x0e00 << 16) | (0xac58 >> 2),
946 	0x00000000,
947 	(0x0e00 << 16) | (0xac68 >> 2),
948 	0x00000000,
949 	(0x0e00 << 16) | (0xac6c >> 2),
950 	0x00000000,
951 	(0x0e00 << 16) | (0xac70 >> 2),
952 	0x00000000,
953 	(0x0e00 << 16) | (0xac74 >> 2),
954 	0x00000000,
955 	(0x0e00 << 16) | (0xac78 >> 2),
956 	0x00000000,
957 	(0x0e00 << 16) | (0xac7c >> 2),
958 	0x00000000,
959 	(0x0e00 << 16) | (0xac80 >> 2),
960 	0x00000000,
961 	(0x0e00 << 16) | (0xac84 >> 2),
962 	0x00000000,
963 	(0x0e00 << 16) | (0xac88 >> 2),
964 	0x00000000,
965 	(0x0e00 << 16) | (0xac8c >> 2),
966 	0x00000000,
967 	(0x0e00 << 16) | (0x970c >> 2),
968 	0x00000000,
969 	(0x0e00 << 16) | (0x9714 >> 2),
970 	0x00000000,
971 	(0x0e00 << 16) | (0x9718 >> 2),
972 	0x00000000,
973 	(0x0e00 << 16) | (0x971c >> 2),
974 	0x00000000,
975 	(0x0e00 << 16) | (0x31068 >> 2),
976 	0x00000000,
977 	(0x4e00 << 16) | (0x31068 >> 2),
978 	0x00000000,
979 	(0x5e00 << 16) | (0x31068 >> 2),
980 	0x00000000,
981 	(0x6e00 << 16) | (0x31068 >> 2),
982 	0x00000000,
983 	(0x7e00 << 16) | (0x31068 >> 2),
984 	0x00000000,
985 	(0x0e00 << 16) | (0xcd10 >> 2),
986 	0x00000000,
987 	(0x0e00 << 16) | (0xcd14 >> 2),
988 	0x00000000,
989 	(0x0e00 << 16) | (0x88b0 >> 2),
990 	0x00000000,
991 	(0x0e00 << 16) | (0x88b4 >> 2),
992 	0x00000000,
993 	(0x0e00 << 16) | (0x88b8 >> 2),
994 	0x00000000,
995 	(0x0e00 << 16) | (0x88bc >> 2),
996 	0x00000000,
997 	(0x0400 << 16) | (0x89c0 >> 2),
998 	0x00000000,
999 	(0x0e00 << 16) | (0x88c4 >> 2),
1000 	0x00000000,
1001 	(0x0e00 << 16) | (0x88c8 >> 2),
1002 	0x00000000,
1003 	(0x0e00 << 16) | (0x88d0 >> 2),
1004 	0x00000000,
1005 	(0x0e00 << 16) | (0x88d4 >> 2),
1006 	0x00000000,
1007 	(0x0e00 << 16) | (0x88d8 >> 2),
1008 	0x00000000,
1009 	(0x0e00 << 16) | (0x8980 >> 2),
1010 	0x00000000,
1011 	(0x0e00 << 16) | (0x30938 >> 2),
1012 	0x00000000,
1013 	(0x0e00 << 16) | (0x3093c >> 2),
1014 	0x00000000,
1015 	(0x0e00 << 16) | (0x30940 >> 2),
1016 	0x00000000,
1017 	(0x0e00 << 16) | (0x89a0 >> 2),
1018 	0x00000000,
1019 	(0x0e00 << 16) | (0x30900 >> 2),
1020 	0x00000000,
1021 	(0x0e00 << 16) | (0x30904 >> 2),
1022 	0x00000000,
1023 	(0x0e00 << 16) | (0x89b4 >> 2),
1024 	0x00000000,
1025 	(0x0e00 << 16) | (0x3e1fc >> 2),
1026 	0x00000000,
1027 	(0x0e00 << 16) | (0x3c210 >> 2),
1028 	0x00000000,
1029 	(0x0e00 << 16) | (0x3c214 >> 2),
1030 	0x00000000,
1031 	(0x0e00 << 16) | (0x3c218 >> 2),
1032 	0x00000000,
1033 	(0x0e00 << 16) | (0x8904 >> 2),
1034 	0x00000000,
1035 	0x5,
1036 	(0x0e00 << 16) | (0x8c28 >> 2),
1037 	(0x0e00 << 16) | (0x8c2c >> 2),
1038 	(0x0e00 << 16) | (0x8c30 >> 2),
1039 	(0x0e00 << 16) | (0x8c34 >> 2),
1040 	(0x0e00 << 16) | (0x9600 >> 2),
1041 };
1042 
1043 static const u32 bonaire_golden_spm_registers[] =
1044 {
1045 	0x30800, 0xe0ffffff, 0xe0000000
1046 };
1047 
1048 static const u32 bonaire_golden_common_registers[] =
1049 {
1050 	0xc770, 0xffffffff, 0x00000800,
1051 	0xc774, 0xffffffff, 0x00000800,
1052 	0xc798, 0xffffffff, 0x00007fbf,
1053 	0xc79c, 0xffffffff, 0x00007faf
1054 };
1055 
1056 static const u32 bonaire_golden_registers[] =
1057 {
1058 	0x3354, 0x00000333, 0x00000333,
1059 	0x3350, 0x000c0fc0, 0x00040200,
1060 	0x9a10, 0x00010000, 0x00058208,
1061 	0x3c000, 0xffff1fff, 0x00140000,
1062 	0x3c200, 0xfdfc0fff, 0x00000100,
1063 	0x3c234, 0x40000000, 0x40000200,
1064 	0x9830, 0xffffffff, 0x00000000,
1065 	0x9834, 0xf00fffff, 0x00000400,
1066 	0x9838, 0x0002021c, 0x00020200,
1067 	0xc78, 0x00000080, 0x00000000,
1068 	0x5bb0, 0x000000f0, 0x00000070,
1069 	0x5bc0, 0xf0311fff, 0x80300000,
1070 	0x98f8, 0x73773777, 0x12010001,
1071 	0x350c, 0x00810000, 0x408af000,
1072 	0x7030, 0x31000111, 0x00000011,
1073 	0x2f48, 0x73773777, 0x12010001,
1074 	0x220c, 0x00007fb6, 0x0021a1b1,
1075 	0x2210, 0x00007fb6, 0x002021b1,
1076 	0x2180, 0x00007fb6, 0x00002191,
1077 	0x2218, 0x00007fb6, 0x002121b1,
1078 	0x221c, 0x00007fb6, 0x002021b1,
1079 	0x21dc, 0x00007fb6, 0x00002191,
1080 	0x21e0, 0x00007fb6, 0x00002191,
1081 	0x3628, 0x0000003f, 0x0000000a,
1082 	0x362c, 0x0000003f, 0x0000000a,
1083 	0x2ae4, 0x00073ffe, 0x000022a2,
1084 	0x240c, 0x000007ff, 0x00000000,
1085 	0x8a14, 0xf000003f, 0x00000007,
1086 	0x8bf0, 0x00002001, 0x00000001,
1087 	0x8b24, 0xffffffff, 0x00ffffff,
1088 	0x30a04, 0x0000ff0f, 0x00000000,
1089 	0x28a4c, 0x07ffffff, 0x06000000,
1090 	0x4d8, 0x00000fff, 0x00000100,
1091 	0x3e78, 0x00000001, 0x00000002,
1092 	0x9100, 0x03000000, 0x0362c688,
1093 	0x8c00, 0x000000ff, 0x00000001,
1094 	0xe40, 0x00001fff, 0x00001fff,
1095 	0x9060, 0x0000007f, 0x00000020,
1096 	0x9508, 0x00010000, 0x00010000,
1097 	0xac14, 0x000003ff, 0x000000f3,
1098 	0xac0c, 0xffffffff, 0x00001032
1099 };
1100 
1101 static const u32 bonaire_mgcg_cgcg_init[] =
1102 {
1103 	0xc420, 0xffffffff, 0xfffffffc,
1104 	0x30800, 0xffffffff, 0xe0000000,
1105 	0x3c2a0, 0xffffffff, 0x00000100,
1106 	0x3c208, 0xffffffff, 0x00000100,
1107 	0x3c2c0, 0xffffffff, 0xc0000100,
1108 	0x3c2c8, 0xffffffff, 0xc0000100,
1109 	0x3c2c4, 0xffffffff, 0xc0000100,
1110 	0x55e4, 0xffffffff, 0x00600100,
1111 	0x3c280, 0xffffffff, 0x00000100,
1112 	0x3c214, 0xffffffff, 0x06000100,
1113 	0x3c220, 0xffffffff, 0x00000100,
1114 	0x3c218, 0xffffffff, 0x06000100,
1115 	0x3c204, 0xffffffff, 0x00000100,
1116 	0x3c2e0, 0xffffffff, 0x00000100,
1117 	0x3c224, 0xffffffff, 0x00000100,
1118 	0x3c200, 0xffffffff, 0x00000100,
1119 	0x3c230, 0xffffffff, 0x00000100,
1120 	0x3c234, 0xffffffff, 0x00000100,
1121 	0x3c250, 0xffffffff, 0x00000100,
1122 	0x3c254, 0xffffffff, 0x00000100,
1123 	0x3c258, 0xffffffff, 0x00000100,
1124 	0x3c25c, 0xffffffff, 0x00000100,
1125 	0x3c260, 0xffffffff, 0x00000100,
1126 	0x3c27c, 0xffffffff, 0x00000100,
1127 	0x3c278, 0xffffffff, 0x00000100,
1128 	0x3c210, 0xffffffff, 0x06000100,
1129 	0x3c290, 0xffffffff, 0x00000100,
1130 	0x3c274, 0xffffffff, 0x00000100,
1131 	0x3c2b4, 0xffffffff, 0x00000100,
1132 	0x3c2b0, 0xffffffff, 0x00000100,
1133 	0x3c270, 0xffffffff, 0x00000100,
1134 	0x30800, 0xffffffff, 0xe0000000,
1135 	0x3c020, 0xffffffff, 0x00010000,
1136 	0x3c024, 0xffffffff, 0x00030002,
1137 	0x3c028, 0xffffffff, 0x00040007,
1138 	0x3c02c, 0xffffffff, 0x00060005,
1139 	0x3c030, 0xffffffff, 0x00090008,
1140 	0x3c034, 0xffffffff, 0x00010000,
1141 	0x3c038, 0xffffffff, 0x00030002,
1142 	0x3c03c, 0xffffffff, 0x00040007,
1143 	0x3c040, 0xffffffff, 0x00060005,
1144 	0x3c044, 0xffffffff, 0x00090008,
1145 	0x3c048, 0xffffffff, 0x00010000,
1146 	0x3c04c, 0xffffffff, 0x00030002,
1147 	0x3c050, 0xffffffff, 0x00040007,
1148 	0x3c054, 0xffffffff, 0x00060005,
1149 	0x3c058, 0xffffffff, 0x00090008,
1150 	0x3c05c, 0xffffffff, 0x00010000,
1151 	0x3c060, 0xffffffff, 0x00030002,
1152 	0x3c064, 0xffffffff, 0x00040007,
1153 	0x3c068, 0xffffffff, 0x00060005,
1154 	0x3c06c, 0xffffffff, 0x00090008,
1155 	0x3c070, 0xffffffff, 0x00010000,
1156 	0x3c074, 0xffffffff, 0x00030002,
1157 	0x3c078, 0xffffffff, 0x00040007,
1158 	0x3c07c, 0xffffffff, 0x00060005,
1159 	0x3c080, 0xffffffff, 0x00090008,
1160 	0x3c084, 0xffffffff, 0x00010000,
1161 	0x3c088, 0xffffffff, 0x00030002,
1162 	0x3c08c, 0xffffffff, 0x00040007,
1163 	0x3c090, 0xffffffff, 0x00060005,
1164 	0x3c094, 0xffffffff, 0x00090008,
1165 	0x3c098, 0xffffffff, 0x00010000,
1166 	0x3c09c, 0xffffffff, 0x00030002,
1167 	0x3c0a0, 0xffffffff, 0x00040007,
1168 	0x3c0a4, 0xffffffff, 0x00060005,
1169 	0x3c0a8, 0xffffffff, 0x00090008,
1170 	0x3c000, 0xffffffff, 0x96e00200,
1171 	0x8708, 0xffffffff, 0x00900100,
1172 	0xc424, 0xffffffff, 0x0020003f,
1173 	0x38, 0xffffffff, 0x0140001c,
1174 	0x3c, 0x000f0000, 0x000f0000,
1175 	0x220, 0xffffffff, 0xC060000C,
1176 	0x224, 0xc0000fff, 0x00000100,
1177 	0xf90, 0xffffffff, 0x00000100,
1178 	0xf98, 0x00000101, 0x00000000,
1179 	0x20a8, 0xffffffff, 0x00000104,
1180 	0x55e4, 0xff000fff, 0x00000100,
1181 	0x30cc, 0xc0000fff, 0x00000104,
1182 	0xc1e4, 0x00000001, 0x00000001,
1183 	0xd00c, 0xff000ff0, 0x00000100,
1184 	0xd80c, 0xff000ff0, 0x00000100
1185 };
1186 
1187 static const u32 spectre_golden_spm_registers[] =
1188 {
1189 	0x30800, 0xe0ffffff, 0xe0000000
1190 };
1191 
1192 static const u32 spectre_golden_common_registers[] =
1193 {
1194 	0xc770, 0xffffffff, 0x00000800,
1195 	0xc774, 0xffffffff, 0x00000800,
1196 	0xc798, 0xffffffff, 0x00007fbf,
1197 	0xc79c, 0xffffffff, 0x00007faf
1198 };
1199 
1200 static const u32 spectre_golden_registers[] =
1201 {
1202 	0x3c000, 0xffff1fff, 0x96940200,
1203 	0x3c00c, 0xffff0001, 0xff000000,
1204 	0x3c200, 0xfffc0fff, 0x00000100,
1205 	0x6ed8, 0x00010101, 0x00010000,
1206 	0x9834, 0xf00fffff, 0x00000400,
1207 	0x9838, 0xfffffffc, 0x00020200,
1208 	0x5bb0, 0x000000f0, 0x00000070,
1209 	0x5bc0, 0xf0311fff, 0x80300000,
1210 	0x98f8, 0x73773777, 0x12010001,
1211 	0x9b7c, 0x00ff0000, 0x00fc0000,
1212 	0x2f48, 0x73773777, 0x12010001,
1213 	0x8a14, 0xf000003f, 0x00000007,
1214 	0x8b24, 0xffffffff, 0x00ffffff,
1215 	0x28350, 0x3f3f3fff, 0x00000082,
1216 	0x28354, 0x0000003f, 0x00000000,
1217 	0x3e78, 0x00000001, 0x00000002,
1218 	0x913c, 0xffff03df, 0x00000004,
1219 	0xc768, 0x00000008, 0x00000008,
1220 	0x8c00, 0x000008ff, 0x00000800,
1221 	0x9508, 0x00010000, 0x00010000,
1222 	0xac0c, 0xffffffff, 0x54763210,
1223 	0x214f8, 0x01ff01ff, 0x00000002,
1224 	0x21498, 0x007ff800, 0x00200000,
1225 	0x2015c, 0xffffffff, 0x00000f40,
1226 	0x30934, 0xffffffff, 0x00000001
1227 };
1228 
1229 static const u32 spectre_mgcg_cgcg_init[] =
1230 {
1231 	0xc420, 0xffffffff, 0xfffffffc,
1232 	0x30800, 0xffffffff, 0xe0000000,
1233 	0x3c2a0, 0xffffffff, 0x00000100,
1234 	0x3c208, 0xffffffff, 0x00000100,
1235 	0x3c2c0, 0xffffffff, 0x00000100,
1236 	0x3c2c8, 0xffffffff, 0x00000100,
1237 	0x3c2c4, 0xffffffff, 0x00000100,
1238 	0x55e4, 0xffffffff, 0x00600100,
1239 	0x3c280, 0xffffffff, 0x00000100,
1240 	0x3c214, 0xffffffff, 0x06000100,
1241 	0x3c220, 0xffffffff, 0x00000100,
1242 	0x3c218, 0xffffffff, 0x06000100,
1243 	0x3c204, 0xffffffff, 0x00000100,
1244 	0x3c2e0, 0xffffffff, 0x00000100,
1245 	0x3c224, 0xffffffff, 0x00000100,
1246 	0x3c200, 0xffffffff, 0x00000100,
1247 	0x3c230, 0xffffffff, 0x00000100,
1248 	0x3c234, 0xffffffff, 0x00000100,
1249 	0x3c250, 0xffffffff, 0x00000100,
1250 	0x3c254, 0xffffffff, 0x00000100,
1251 	0x3c258, 0xffffffff, 0x00000100,
1252 	0x3c25c, 0xffffffff, 0x00000100,
1253 	0x3c260, 0xffffffff, 0x00000100,
1254 	0x3c27c, 0xffffffff, 0x00000100,
1255 	0x3c278, 0xffffffff, 0x00000100,
1256 	0x3c210, 0xffffffff, 0x06000100,
1257 	0x3c290, 0xffffffff, 0x00000100,
1258 	0x3c274, 0xffffffff, 0x00000100,
1259 	0x3c2b4, 0xffffffff, 0x00000100,
1260 	0x3c2b0, 0xffffffff, 0x00000100,
1261 	0x3c270, 0xffffffff, 0x00000100,
1262 	0x30800, 0xffffffff, 0xe0000000,
1263 	0x3c020, 0xffffffff, 0x00010000,
1264 	0x3c024, 0xffffffff, 0x00030002,
1265 	0x3c028, 0xffffffff, 0x00040007,
1266 	0x3c02c, 0xffffffff, 0x00060005,
1267 	0x3c030, 0xffffffff, 0x00090008,
1268 	0x3c034, 0xffffffff, 0x00010000,
1269 	0x3c038, 0xffffffff, 0x00030002,
1270 	0x3c03c, 0xffffffff, 0x00040007,
1271 	0x3c040, 0xffffffff, 0x00060005,
1272 	0x3c044, 0xffffffff, 0x00090008,
1273 	0x3c048, 0xffffffff, 0x00010000,
1274 	0x3c04c, 0xffffffff, 0x00030002,
1275 	0x3c050, 0xffffffff, 0x00040007,
1276 	0x3c054, 0xffffffff, 0x00060005,
1277 	0x3c058, 0xffffffff, 0x00090008,
1278 	0x3c05c, 0xffffffff, 0x00010000,
1279 	0x3c060, 0xffffffff, 0x00030002,
1280 	0x3c064, 0xffffffff, 0x00040007,
1281 	0x3c068, 0xffffffff, 0x00060005,
1282 	0x3c06c, 0xffffffff, 0x00090008,
1283 	0x3c070, 0xffffffff, 0x00010000,
1284 	0x3c074, 0xffffffff, 0x00030002,
1285 	0x3c078, 0xffffffff, 0x00040007,
1286 	0x3c07c, 0xffffffff, 0x00060005,
1287 	0x3c080, 0xffffffff, 0x00090008,
1288 	0x3c084, 0xffffffff, 0x00010000,
1289 	0x3c088, 0xffffffff, 0x00030002,
1290 	0x3c08c, 0xffffffff, 0x00040007,
1291 	0x3c090, 0xffffffff, 0x00060005,
1292 	0x3c094, 0xffffffff, 0x00090008,
1293 	0x3c098, 0xffffffff, 0x00010000,
1294 	0x3c09c, 0xffffffff, 0x00030002,
1295 	0x3c0a0, 0xffffffff, 0x00040007,
1296 	0x3c0a4, 0xffffffff, 0x00060005,
1297 	0x3c0a8, 0xffffffff, 0x00090008,
1298 	0x3c0ac, 0xffffffff, 0x00010000,
1299 	0x3c0b0, 0xffffffff, 0x00030002,
1300 	0x3c0b4, 0xffffffff, 0x00040007,
1301 	0x3c0b8, 0xffffffff, 0x00060005,
1302 	0x3c0bc, 0xffffffff, 0x00090008,
1303 	0x3c000, 0xffffffff, 0x96e00200,
1304 	0x8708, 0xffffffff, 0x00900100,
1305 	0xc424, 0xffffffff, 0x0020003f,
1306 	0x38, 0xffffffff, 0x0140001c,
1307 	0x3c, 0x000f0000, 0x000f0000,
1308 	0x220, 0xffffffff, 0xC060000C,
1309 	0x224, 0xc0000fff, 0x00000100,
1310 	0xf90, 0xffffffff, 0x00000100,
1311 	0xf98, 0x00000101, 0x00000000,
1312 	0x20a8, 0xffffffff, 0x00000104,
1313 	0x55e4, 0xff000fff, 0x00000100,
1314 	0x30cc, 0xc0000fff, 0x00000104,
1315 	0xc1e4, 0x00000001, 0x00000001,
1316 	0xd00c, 0xff000ff0, 0x00000100,
1317 	0xd80c, 0xff000ff0, 0x00000100
1318 };
1319 
1320 static const u32 kalindi_golden_spm_registers[] =
1321 {
1322 	0x30800, 0xe0ffffff, 0xe0000000
1323 };
1324 
1325 static const u32 kalindi_golden_common_registers[] =
1326 {
1327 	0xc770, 0xffffffff, 0x00000800,
1328 	0xc774, 0xffffffff, 0x00000800,
1329 	0xc798, 0xffffffff, 0x00007fbf,
1330 	0xc79c, 0xffffffff, 0x00007faf
1331 };
1332 
1333 static const u32 kalindi_golden_registers[] =
1334 {
1335 	0x3c000, 0xffffdfff, 0x6e944040,
1336 	0x55e4, 0xff607fff, 0xfc000100,
1337 	0x3c220, 0xff000fff, 0x00000100,
1338 	0x3c224, 0xff000fff, 0x00000100,
1339 	0x3c200, 0xfffc0fff, 0x00000100,
1340 	0x6ed8, 0x00010101, 0x00010000,
1341 	0x9830, 0xffffffff, 0x00000000,
1342 	0x9834, 0xf00fffff, 0x00000400,
1343 	0x5bb0, 0x000000f0, 0x00000070,
1344 	0x5bc0, 0xf0311fff, 0x80300000,
1345 	0x98f8, 0x73773777, 0x12010001,
1346 	0x98fc, 0xffffffff, 0x00000010,
1347 	0x9b7c, 0x00ff0000, 0x00fc0000,
1348 	0x8030, 0x00001f0f, 0x0000100a,
1349 	0x2f48, 0x73773777, 0x12010001,
1350 	0x2408, 0x000fffff, 0x000c007f,
1351 	0x8a14, 0xf000003f, 0x00000007,
1352 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1353 	0x30a04, 0x0000ff0f, 0x00000000,
1354 	0x28a4c, 0x07ffffff, 0x06000000,
1355 	0x4d8, 0x00000fff, 0x00000100,
1356 	0x3e78, 0x00000001, 0x00000002,
1357 	0xc768, 0x00000008, 0x00000008,
1358 	0x8c00, 0x000000ff, 0x00000003,
1359 	0x214f8, 0x01ff01ff, 0x00000002,
1360 	0x21498, 0x007ff800, 0x00200000,
1361 	0x2015c, 0xffffffff, 0x00000f40,
1362 	0x88c4, 0x001f3ae3, 0x00000082,
1363 	0x88d4, 0x0000001f, 0x00000010,
1364 	0x30934, 0xffffffff, 0x00000000
1365 };
1366 
1367 static const u32 kalindi_mgcg_cgcg_init[] =
1368 {
1369 	0xc420, 0xffffffff, 0xfffffffc,
1370 	0x30800, 0xffffffff, 0xe0000000,
1371 	0x3c2a0, 0xffffffff, 0x00000100,
1372 	0x3c208, 0xffffffff, 0x00000100,
1373 	0x3c2c0, 0xffffffff, 0x00000100,
1374 	0x3c2c8, 0xffffffff, 0x00000100,
1375 	0x3c2c4, 0xffffffff, 0x00000100,
1376 	0x55e4, 0xffffffff, 0x00600100,
1377 	0x3c280, 0xffffffff, 0x00000100,
1378 	0x3c214, 0xffffffff, 0x06000100,
1379 	0x3c220, 0xffffffff, 0x00000100,
1380 	0x3c218, 0xffffffff, 0x06000100,
1381 	0x3c204, 0xffffffff, 0x00000100,
1382 	0x3c2e0, 0xffffffff, 0x00000100,
1383 	0x3c224, 0xffffffff, 0x00000100,
1384 	0x3c200, 0xffffffff, 0x00000100,
1385 	0x3c230, 0xffffffff, 0x00000100,
1386 	0x3c234, 0xffffffff, 0x00000100,
1387 	0x3c250, 0xffffffff, 0x00000100,
1388 	0x3c254, 0xffffffff, 0x00000100,
1389 	0x3c258, 0xffffffff, 0x00000100,
1390 	0x3c25c, 0xffffffff, 0x00000100,
1391 	0x3c260, 0xffffffff, 0x00000100,
1392 	0x3c27c, 0xffffffff, 0x00000100,
1393 	0x3c278, 0xffffffff, 0x00000100,
1394 	0x3c210, 0xffffffff, 0x06000100,
1395 	0x3c290, 0xffffffff, 0x00000100,
1396 	0x3c274, 0xffffffff, 0x00000100,
1397 	0x3c2b4, 0xffffffff, 0x00000100,
1398 	0x3c2b0, 0xffffffff, 0x00000100,
1399 	0x3c270, 0xffffffff, 0x00000100,
1400 	0x30800, 0xffffffff, 0xe0000000,
1401 	0x3c020, 0xffffffff, 0x00010000,
1402 	0x3c024, 0xffffffff, 0x00030002,
1403 	0x3c028, 0xffffffff, 0x00040007,
1404 	0x3c02c, 0xffffffff, 0x00060005,
1405 	0x3c030, 0xffffffff, 0x00090008,
1406 	0x3c034, 0xffffffff, 0x00010000,
1407 	0x3c038, 0xffffffff, 0x00030002,
1408 	0x3c03c, 0xffffffff, 0x00040007,
1409 	0x3c040, 0xffffffff, 0x00060005,
1410 	0x3c044, 0xffffffff, 0x00090008,
1411 	0x3c000, 0xffffffff, 0x96e00200,
1412 	0x8708, 0xffffffff, 0x00900100,
1413 	0xc424, 0xffffffff, 0x0020003f,
1414 	0x38, 0xffffffff, 0x0140001c,
1415 	0x3c, 0x000f0000, 0x000f0000,
1416 	0x220, 0xffffffff, 0xC060000C,
1417 	0x224, 0xc0000fff, 0x00000100,
1418 	0x20a8, 0xffffffff, 0x00000104,
1419 	0x55e4, 0xff000fff, 0x00000100,
1420 	0x30cc, 0xc0000fff, 0x00000104,
1421 	0xc1e4, 0x00000001, 0x00000001,
1422 	0xd00c, 0xff000ff0, 0x00000100,
1423 	0xd80c, 0xff000ff0, 0x00000100
1424 };
1425 
1426 static const u32 hawaii_golden_spm_registers[] =
1427 {
1428 	0x30800, 0xe0ffffff, 0xe0000000
1429 };
1430 
1431 static const u32 hawaii_golden_common_registers[] =
1432 {
1433 	0x30800, 0xffffffff, 0xe0000000,
1434 	0x28350, 0xffffffff, 0x3a00161a,
1435 	0x28354, 0xffffffff, 0x0000002e,
1436 	0x9a10, 0xffffffff, 0x00018208,
1437 	0x98f8, 0xffffffff, 0x12011003
1438 };
1439 
1440 static const u32 hawaii_golden_registers[] =
1441 {
1442 	0x3354, 0x00000333, 0x00000333,
1443 	0x9a10, 0x00010000, 0x00058208,
1444 	0x9830, 0xffffffff, 0x00000000,
1445 	0x9834, 0xf00fffff, 0x00000400,
1446 	0x9838, 0x0002021c, 0x00020200,
1447 	0xc78, 0x00000080, 0x00000000,
1448 	0x5bb0, 0x000000f0, 0x00000070,
1449 	0x5bc0, 0xf0311fff, 0x80300000,
1450 	0x350c, 0x00810000, 0x408af000,
1451 	0x7030, 0x31000111, 0x00000011,
1452 	0x2f48, 0x73773777, 0x12010001,
1453 	0x2120, 0x0000007f, 0x0000001b,
1454 	0x21dc, 0x00007fb6, 0x00002191,
1455 	0x3628, 0x0000003f, 0x0000000a,
1456 	0x362c, 0x0000003f, 0x0000000a,
1457 	0x2ae4, 0x00073ffe, 0x000022a2,
1458 	0x240c, 0x000007ff, 0x00000000,
1459 	0x8bf0, 0x00002001, 0x00000001,
1460 	0x8b24, 0xffffffff, 0x00ffffff,
1461 	0x30a04, 0x0000ff0f, 0x00000000,
1462 	0x28a4c, 0x07ffffff, 0x06000000,
1463 	0x3e78, 0x00000001, 0x00000002,
1464 	0xc768, 0x00000008, 0x00000008,
1465 	0xc770, 0x00000f00, 0x00000800,
1466 	0xc774, 0x00000f00, 0x00000800,
1467 	0xc798, 0x00ffffff, 0x00ff7fbf,
1468 	0xc79c, 0x00ffffff, 0x00ff7faf,
1469 	0x8c00, 0x000000ff, 0x00000800,
1470 	0xe40, 0x00001fff, 0x00001fff,
1471 	0x9060, 0x0000007f, 0x00000020,
1472 	0x9508, 0x00010000, 0x00010000,
1473 	0xae00, 0x00100000, 0x000ff07c,
1474 	0xac14, 0x000003ff, 0x0000000f,
1475 	0xac10, 0xffffffff, 0x7564fdec,
1476 	0xac0c, 0xffffffff, 0x3120b9a8,
1477 	0xac08, 0x20000000, 0x0f9c0000
1478 };
1479 
1480 static const u32 hawaii_mgcg_cgcg_init[] =
1481 {
1482 	0xc420, 0xffffffff, 0xfffffffd,
1483 	0x30800, 0xffffffff, 0xe0000000,
1484 	0x3c2a0, 0xffffffff, 0x00000100,
1485 	0x3c208, 0xffffffff, 0x00000100,
1486 	0x3c2c0, 0xffffffff, 0x00000100,
1487 	0x3c2c8, 0xffffffff, 0x00000100,
1488 	0x3c2c4, 0xffffffff, 0x00000100,
1489 	0x55e4, 0xffffffff, 0x00200100,
1490 	0x3c280, 0xffffffff, 0x00000100,
1491 	0x3c214, 0xffffffff, 0x06000100,
1492 	0x3c220, 0xffffffff, 0x00000100,
1493 	0x3c218, 0xffffffff, 0x06000100,
1494 	0x3c204, 0xffffffff, 0x00000100,
1495 	0x3c2e0, 0xffffffff, 0x00000100,
1496 	0x3c224, 0xffffffff, 0x00000100,
1497 	0x3c200, 0xffffffff, 0x00000100,
1498 	0x3c230, 0xffffffff, 0x00000100,
1499 	0x3c234, 0xffffffff, 0x00000100,
1500 	0x3c250, 0xffffffff, 0x00000100,
1501 	0x3c254, 0xffffffff, 0x00000100,
1502 	0x3c258, 0xffffffff, 0x00000100,
1503 	0x3c25c, 0xffffffff, 0x00000100,
1504 	0x3c260, 0xffffffff, 0x00000100,
1505 	0x3c27c, 0xffffffff, 0x00000100,
1506 	0x3c278, 0xffffffff, 0x00000100,
1507 	0x3c210, 0xffffffff, 0x06000100,
1508 	0x3c290, 0xffffffff, 0x00000100,
1509 	0x3c274, 0xffffffff, 0x00000100,
1510 	0x3c2b4, 0xffffffff, 0x00000100,
1511 	0x3c2b0, 0xffffffff, 0x00000100,
1512 	0x3c270, 0xffffffff, 0x00000100,
1513 	0x30800, 0xffffffff, 0xe0000000,
1514 	0x3c020, 0xffffffff, 0x00010000,
1515 	0x3c024, 0xffffffff, 0x00030002,
1516 	0x3c028, 0xffffffff, 0x00040007,
1517 	0x3c02c, 0xffffffff, 0x00060005,
1518 	0x3c030, 0xffffffff, 0x00090008,
1519 	0x3c034, 0xffffffff, 0x00010000,
1520 	0x3c038, 0xffffffff, 0x00030002,
1521 	0x3c03c, 0xffffffff, 0x00040007,
1522 	0x3c040, 0xffffffff, 0x00060005,
1523 	0x3c044, 0xffffffff, 0x00090008,
1524 	0x3c048, 0xffffffff, 0x00010000,
1525 	0x3c04c, 0xffffffff, 0x00030002,
1526 	0x3c050, 0xffffffff, 0x00040007,
1527 	0x3c054, 0xffffffff, 0x00060005,
1528 	0x3c058, 0xffffffff, 0x00090008,
1529 	0x3c05c, 0xffffffff, 0x00010000,
1530 	0x3c060, 0xffffffff, 0x00030002,
1531 	0x3c064, 0xffffffff, 0x00040007,
1532 	0x3c068, 0xffffffff, 0x00060005,
1533 	0x3c06c, 0xffffffff, 0x00090008,
1534 	0x3c070, 0xffffffff, 0x00010000,
1535 	0x3c074, 0xffffffff, 0x00030002,
1536 	0x3c078, 0xffffffff, 0x00040007,
1537 	0x3c07c, 0xffffffff, 0x00060005,
1538 	0x3c080, 0xffffffff, 0x00090008,
1539 	0x3c084, 0xffffffff, 0x00010000,
1540 	0x3c088, 0xffffffff, 0x00030002,
1541 	0x3c08c, 0xffffffff, 0x00040007,
1542 	0x3c090, 0xffffffff, 0x00060005,
1543 	0x3c094, 0xffffffff, 0x00090008,
1544 	0x3c098, 0xffffffff, 0x00010000,
1545 	0x3c09c, 0xffffffff, 0x00030002,
1546 	0x3c0a0, 0xffffffff, 0x00040007,
1547 	0x3c0a4, 0xffffffff, 0x00060005,
1548 	0x3c0a8, 0xffffffff, 0x00090008,
1549 	0x3c0ac, 0xffffffff, 0x00010000,
1550 	0x3c0b0, 0xffffffff, 0x00030002,
1551 	0x3c0b4, 0xffffffff, 0x00040007,
1552 	0x3c0b8, 0xffffffff, 0x00060005,
1553 	0x3c0bc, 0xffffffff, 0x00090008,
1554 	0x3c0c0, 0xffffffff, 0x00010000,
1555 	0x3c0c4, 0xffffffff, 0x00030002,
1556 	0x3c0c8, 0xffffffff, 0x00040007,
1557 	0x3c0cc, 0xffffffff, 0x00060005,
1558 	0x3c0d0, 0xffffffff, 0x00090008,
1559 	0x3c0d4, 0xffffffff, 0x00010000,
1560 	0x3c0d8, 0xffffffff, 0x00030002,
1561 	0x3c0dc, 0xffffffff, 0x00040007,
1562 	0x3c0e0, 0xffffffff, 0x00060005,
1563 	0x3c0e4, 0xffffffff, 0x00090008,
1564 	0x3c0e8, 0xffffffff, 0x00010000,
1565 	0x3c0ec, 0xffffffff, 0x00030002,
1566 	0x3c0f0, 0xffffffff, 0x00040007,
1567 	0x3c0f4, 0xffffffff, 0x00060005,
1568 	0x3c0f8, 0xffffffff, 0x00090008,
1569 	0xc318, 0xffffffff, 0x00020200,
1570 	0x3350, 0xffffffff, 0x00000200,
1571 	0x15c0, 0xffffffff, 0x00000400,
1572 	0x55e8, 0xffffffff, 0x00000000,
1573 	0x2f50, 0xffffffff, 0x00000902,
1574 	0x3c000, 0xffffffff, 0x96940200,
1575 	0x8708, 0xffffffff, 0x00900100,
1576 	0xc424, 0xffffffff, 0x0020003f,
1577 	0x38, 0xffffffff, 0x0140001c,
1578 	0x3c, 0x000f0000, 0x000f0000,
1579 	0x220, 0xffffffff, 0xc060000c,
1580 	0x224, 0xc0000fff, 0x00000100,
1581 	0xf90, 0xffffffff, 0x00000100,
1582 	0xf98, 0x00000101, 0x00000000,
1583 	0x20a8, 0xffffffff, 0x00000104,
1584 	0x55e4, 0xff000fff, 0x00000100,
1585 	0x30cc, 0xc0000fff, 0x00000104,
1586 	0xc1e4, 0x00000001, 0x00000001,
1587 	0xd00c, 0xff000ff0, 0x00000100,
1588 	0xd80c, 0xff000ff0, 0x00000100
1589 };
1590 
1591 static const u32 godavari_golden_registers[] =
1592 {
1593 	0x55e4, 0xff607fff, 0xfc000100,
1594 	0x6ed8, 0x00010101, 0x00010000,
1595 	0x9830, 0xffffffff, 0x00000000,
1596 	0x98302, 0xf00fffff, 0x00000400,
1597 	0x6130, 0xffffffff, 0x00010000,
1598 	0x5bb0, 0x000000f0, 0x00000070,
1599 	0x5bc0, 0xf0311fff, 0x80300000,
1600 	0x98f8, 0x73773777, 0x12010001,
1601 	0x98fc, 0xffffffff, 0x00000010,
1602 	0x8030, 0x00001f0f, 0x0000100a,
1603 	0x2f48, 0x73773777, 0x12010001,
1604 	0x2408, 0x000fffff, 0x000c007f,
1605 	0x8a14, 0xf000003f, 0x00000007,
1606 	0x8b24, 0xffffffff, 0x00ff0fff,
1607 	0x30a04, 0x0000ff0f, 0x00000000,
1608 	0x28a4c, 0x07ffffff, 0x06000000,
1609 	0x4d8, 0x00000fff, 0x00000100,
1610 	0xd014, 0x00010000, 0x00810001,
1611 	0xd814, 0x00010000, 0x00810001,
1612 	0x3e78, 0x00000001, 0x00000002,
1613 	0xc768, 0x00000008, 0x00000008,
1614 	0xc770, 0x00000f00, 0x00000800,
1615 	0xc774, 0x00000f00, 0x00000800,
1616 	0xc798, 0x00ffffff, 0x00ff7fbf,
1617 	0xc79c, 0x00ffffff, 0x00ff7faf,
1618 	0x8c00, 0x000000ff, 0x00000001,
1619 	0x214f8, 0x01ff01ff, 0x00000002,
1620 	0x21498, 0x007ff800, 0x00200000,
1621 	0x2015c, 0xffffffff, 0x00000f40,
1622 	0x88c4, 0x001f3ae3, 0x00000082,
1623 	0x88d4, 0x0000001f, 0x00000010,
1624 	0x30934, 0xffffffff, 0x00000000
1625 };
1626 
1627 
1628 static void cik_init_golden_registers(struct radeon_device *rdev)
1629 {
1630 	switch (rdev->family) {
1631 	case CHIP_BONAIRE:
1632 		radeon_program_register_sequence(rdev,
1633 						 bonaire_mgcg_cgcg_init,
1634 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1635 		radeon_program_register_sequence(rdev,
1636 						 bonaire_golden_registers,
1637 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1638 		radeon_program_register_sequence(rdev,
1639 						 bonaire_golden_common_registers,
1640 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1641 		radeon_program_register_sequence(rdev,
1642 						 bonaire_golden_spm_registers,
1643 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1644 		break;
1645 	case CHIP_KABINI:
1646 		radeon_program_register_sequence(rdev,
1647 						 kalindi_mgcg_cgcg_init,
1648 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1649 		radeon_program_register_sequence(rdev,
1650 						 kalindi_golden_registers,
1651 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1652 		radeon_program_register_sequence(rdev,
1653 						 kalindi_golden_common_registers,
1654 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1655 		radeon_program_register_sequence(rdev,
1656 						 kalindi_golden_spm_registers,
1657 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1658 		break;
1659 	case CHIP_MULLINS:
1660 		radeon_program_register_sequence(rdev,
1661 						 kalindi_mgcg_cgcg_init,
1662 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1663 		radeon_program_register_sequence(rdev,
1664 						 godavari_golden_registers,
1665 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1666 		radeon_program_register_sequence(rdev,
1667 						 kalindi_golden_common_registers,
1668 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1669 		radeon_program_register_sequence(rdev,
1670 						 kalindi_golden_spm_registers,
1671 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1672 		break;
1673 	case CHIP_KAVERI:
1674 		radeon_program_register_sequence(rdev,
1675 						 spectre_mgcg_cgcg_init,
1676 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1677 		radeon_program_register_sequence(rdev,
1678 						 spectre_golden_registers,
1679 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1680 		radeon_program_register_sequence(rdev,
1681 						 spectre_golden_common_registers,
1682 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1683 		radeon_program_register_sequence(rdev,
1684 						 spectre_golden_spm_registers,
1685 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1686 		break;
1687 	case CHIP_HAWAII:
1688 		radeon_program_register_sequence(rdev,
1689 						 hawaii_mgcg_cgcg_init,
1690 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1691 		radeon_program_register_sequence(rdev,
1692 						 hawaii_golden_registers,
1693 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1694 		radeon_program_register_sequence(rdev,
1695 						 hawaii_golden_common_registers,
1696 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1697 		radeon_program_register_sequence(rdev,
1698 						 hawaii_golden_spm_registers,
1699 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1700 		break;
1701 	default:
1702 		break;
1703 	}
1704 }
1705 
1706 /**
1707  * cik_get_xclk - get the xclk
1708  *
1709  * @rdev: radeon_device pointer
1710  *
1711  * Returns the reference clock used by the gfx engine
1712  * (CIK).
1713  */
1714 u32 cik_get_xclk(struct radeon_device *rdev)
1715 {
1716 	u32 reference_clock = rdev->clock.spll.reference_freq;
1717 
1718 	if (rdev->flags & RADEON_IS_IGP) {
1719 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1720 			return reference_clock / 2;
1721 	} else {
1722 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1723 			return reference_clock / 4;
1724 	}
1725 	return reference_clock;
1726 }
1727 
1728 /**
1729  * cik_mm_rdoorbell - read a doorbell dword
1730  *
1731  * @rdev: radeon_device pointer
1732  * @index: doorbell index
1733  *
1734  * Returns the value in the doorbell aperture at the
1735  * requested doorbell index (CIK).
1736  */
1737 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1738 {
1739 	if (index < rdev->doorbell.num_doorbells) {
1740 		return readl(rdev->doorbell.ptr + index);
1741 	} else {
1742 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1743 		return 0;
1744 	}
1745 }
1746 
1747 /**
1748  * cik_mm_wdoorbell - write a doorbell dword
1749  *
1750  * @rdev: radeon_device pointer
1751  * @index: doorbell index
1752  * @v: value to write
1753  *
1754  * Writes @v to the doorbell aperture at the
1755  * requested doorbell index (CIK).
1756  */
1757 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1758 {
1759 	if (index < rdev->doorbell.num_doorbells) {
1760 		writel(v, rdev->doorbell.ptr + index);
1761 	} else {
1762 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1763 	}
1764 }
1765 
1766 #define BONAIRE_IO_MC_REGS_SIZE 36
1767 
1768 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1769 {
1770 	{0x00000070, 0x04400000},
1771 	{0x00000071, 0x80c01803},
1772 	{0x00000072, 0x00004004},
1773 	{0x00000073, 0x00000100},
1774 	{0x00000074, 0x00ff0000},
1775 	{0x00000075, 0x34000000},
1776 	{0x00000076, 0x08000014},
1777 	{0x00000077, 0x00cc08ec},
1778 	{0x00000078, 0x00000400},
1779 	{0x00000079, 0x00000000},
1780 	{0x0000007a, 0x04090000},
1781 	{0x0000007c, 0x00000000},
1782 	{0x0000007e, 0x4408a8e8},
1783 	{0x0000007f, 0x00000304},
1784 	{0x00000080, 0x00000000},
1785 	{0x00000082, 0x00000001},
1786 	{0x00000083, 0x00000002},
1787 	{0x00000084, 0xf3e4f400},
1788 	{0x00000085, 0x052024e3},
1789 	{0x00000087, 0x00000000},
1790 	{0x00000088, 0x01000000},
1791 	{0x0000008a, 0x1c0a0000},
1792 	{0x0000008b, 0xff010000},
1793 	{0x0000008d, 0xffffefff},
1794 	{0x0000008e, 0xfff3efff},
1795 	{0x0000008f, 0xfff3efbf},
1796 	{0x00000092, 0xf7ffffff},
1797 	{0x00000093, 0xffffff7f},
1798 	{0x00000095, 0x00101101},
1799 	{0x00000096, 0x00000fff},
1800 	{0x00000097, 0x00116fff},
1801 	{0x00000098, 0x60010000},
1802 	{0x00000099, 0x10010000},
1803 	{0x0000009a, 0x00006000},
1804 	{0x0000009b, 0x00001000},
1805 	{0x0000009f, 0x00b48000}
1806 };
1807 
1808 #define HAWAII_IO_MC_REGS_SIZE 22
1809 
1810 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1811 {
1812 	{0x0000007d, 0x40000000},
1813 	{0x0000007e, 0x40180304},
1814 	{0x0000007f, 0x0000ff00},
1815 	{0x00000081, 0x00000000},
1816 	{0x00000083, 0x00000800},
1817 	{0x00000086, 0x00000000},
1818 	{0x00000087, 0x00000100},
1819 	{0x00000088, 0x00020100},
1820 	{0x00000089, 0x00000000},
1821 	{0x0000008b, 0x00040000},
1822 	{0x0000008c, 0x00000100},
1823 	{0x0000008e, 0xff010000},
1824 	{0x00000090, 0xffffefff},
1825 	{0x00000091, 0xfff3efff},
1826 	{0x00000092, 0xfff3efbf},
1827 	{0x00000093, 0xf7ffffff},
1828 	{0x00000094, 0xffffff7f},
1829 	{0x00000095, 0x00000fff},
1830 	{0x00000096, 0x00116fff},
1831 	{0x00000097, 0x60010000},
1832 	{0x00000098, 0x10010000},
1833 	{0x0000009f, 0x00c79000}
1834 };
1835 
1836 
1837 /**
1838  * cik_srbm_select - select specific register instances
1839  *
1840  * @rdev: radeon_device pointer
1841  * @me: selected ME (micro engine)
1842  * @pipe: pipe
1843  * @queue: queue
1844  * @vmid: VMID
1845  *
1846  * Switches the currently active registers instances.  Some
1847  * registers are instanced per VMID, others are instanced per
1848  * me/pipe/queue combination.
1849  */
1850 static void cik_srbm_select(struct radeon_device *rdev,
1851 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1852 {
1853 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1854 			     MEID(me & 0x3) |
1855 			     VMID(vmid & 0xf) |
1856 			     QUEUEID(queue & 0x7));
1857 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1858 }
1859 
1860 /* ucode loading */
1861 /**
1862  * ci_mc_load_microcode - load MC ucode into the hw
1863  *
1864  * @rdev: radeon_device pointer
1865  *
1866  * Load the GDDR MC ucode into the hw (CIK).
1867  * Returns 0 on success, error on failure.
1868  */
1869 int ci_mc_load_microcode(struct radeon_device *rdev)
1870 {
1871 	const __be32 *fw_data = NULL;
1872 	const __le32 *new_fw_data = NULL;
1873 	u32 running, tmp;
1874 	u32 *io_mc_regs = NULL;
1875 	const __le32 *new_io_mc_regs = NULL;
1876 	int i, regs_size, ucode_size;
1877 
1878 	if (!rdev->mc_fw)
1879 		return -EINVAL;
1880 
1881 	if (rdev->new_fw) {
1882 		const struct mc_firmware_header_v1_0 *hdr =
1883 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1884 
1885 		radeon_ucode_print_mc_hdr(&hdr->header);
1886 
1887 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1888 		new_io_mc_regs = (const __le32 *)
1889 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1890 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1891 		new_fw_data = (const __le32 *)
1892 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1893 	} else {
1894 		ucode_size = rdev->mc_fw->size / 4;
1895 
1896 		switch (rdev->family) {
1897 		case CHIP_BONAIRE:
1898 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1899 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1900 			break;
1901 		case CHIP_HAWAII:
1902 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1903 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1904 			break;
1905 		default:
1906 			return -EINVAL;
1907 		}
1908 		fw_data = (const __be32 *)rdev->mc_fw->data;
1909 	}
1910 
1911 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1912 
1913 	if (running == 0) {
1914 		/* reset the engine and set to writable */
1915 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1916 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1917 
1918 		/* load mc io regs */
1919 		for (i = 0; i < regs_size; i++) {
1920 			if (rdev->new_fw) {
1921 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1922 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1923 			} else {
1924 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1925 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1926 			}
1927 		}
1928 
1929 		tmp = RREG32(MC_SEQ_MISC0);
1930 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1931 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1932 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1933 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1934 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1935 		}
1936 
1937 		/* load the MC ucode */
1938 		for (i = 0; i < ucode_size; i++) {
1939 			if (rdev->new_fw)
1940 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1941 			else
1942 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1943 		}
1944 
1945 		/* put the engine back into the active state */
1946 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1947 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1948 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1949 
1950 		/* wait for training to complete */
1951 		for (i = 0; i < rdev->usec_timeout; i++) {
1952 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1953 				break;
1954 			udelay(1);
1955 		}
1956 		for (i = 0; i < rdev->usec_timeout; i++) {
1957 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1958 				break;
1959 			udelay(1);
1960 		}
1961 	}
1962 
1963 	return 0;
1964 }
1965 
1966 /**
1967  * cik_init_microcode - load ucode images from disk
1968  *
1969  * @rdev: radeon_device pointer
1970  *
1971  * Use the firmware interface to load the ucode images into
1972  * the driver (not loaded into hw).
1973  * Returns 0 on success, error on failure.
1974  */
1975 static int cik_init_microcode(struct radeon_device *rdev)
1976 {
1977 	const char *chip_name;
1978 	const char *new_chip_name;
1979 	size_t pfp_req_size, me_req_size, ce_req_size,
1980 		mec_req_size, rlc_req_size, mc_req_size = 0,
1981 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1982 	char fw_name[30];
1983 	int new_fw = 0;
1984 	int err;
1985 	int num_fw;
1986 	bool new_smc = false;
1987 
1988 	DRM_DEBUG("\n");
1989 
1990 	switch (rdev->family) {
1991 	case CHIP_BONAIRE:
1992 		chip_name = "BONAIRE";
1993 		if ((rdev->pdev->revision == 0x80) ||
1994 		    (rdev->pdev->revision == 0x81) ||
1995 		    (rdev->pdev->device == 0x665f))
1996 			new_smc = true;
1997 		new_chip_name = "bonaire";
1998 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1999 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2000 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2001 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2002 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2003 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2004 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2005 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2006 		smc_req_size = roundup2(BONAIRE_SMC_UCODE_SIZE, 4);
2007 		num_fw = 8;
2008 		break;
2009 	case CHIP_HAWAII:
2010 		chip_name = "HAWAII";
2011 		if (rdev->pdev->revision == 0x80)
2012 			new_smc = true;
2013 		new_chip_name = "hawaii";
2014 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2015 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2016 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2017 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2018 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2019 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2020 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2021 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2022 		smc_req_size = roundup2(HAWAII_SMC_UCODE_SIZE, 4);
2023 		num_fw = 8;
2024 		break;
2025 	case CHIP_KAVERI:
2026 		chip_name = "KAVERI";
2027 		new_chip_name = "kaveri";
2028 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2029 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2030 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2031 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2032 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2033 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2034 		num_fw = 7;
2035 		break;
2036 	case CHIP_KABINI:
2037 		chip_name = "KABINI";
2038 		new_chip_name = "kabini";
2039 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2040 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2041 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2042 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2043 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2044 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2045 		num_fw = 6;
2046 		break;
2047 	case CHIP_MULLINS:
2048 		chip_name = "MULLINS";
2049 		new_chip_name = "mullins";
2050 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2051 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2052 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2053 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2054 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2055 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2056 		num_fw = 6;
2057 		break;
2058 	default: BUG();
2059 	}
2060 
2061 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2062 
2063 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2064 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2065 	if (err) {
2066 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2067 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2068 		if (err)
2069 			goto out;
2070 		if (rdev->pfp_fw->size != pfp_req_size) {
2071 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2072 			       rdev->pfp_fw->size, fw_name);
2073 			err = -EINVAL;
2074 			goto out;
2075 		}
2076 	} else {
2077 		err = radeon_ucode_validate(rdev->pfp_fw);
2078 		if (err) {
2079 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2080 			       fw_name);
2081 			goto out;
2082 		} else {
2083 			new_fw++;
2084 		}
2085 	}
2086 
2087 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2088 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2089 	if (err) {
2090 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2091 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2092 		if (err)
2093 			goto out;
2094 		if (rdev->me_fw->size != me_req_size) {
2095 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2096 			       rdev->me_fw->size, fw_name);
2097 			err = -EINVAL;
2098 		}
2099 	} else {
2100 		err = radeon_ucode_validate(rdev->me_fw);
2101 		if (err) {
2102 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2103 			       fw_name);
2104 			goto out;
2105 		} else {
2106 			new_fw++;
2107 		}
2108 	}
2109 
2110 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2111 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2112 	if (err) {
2113 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2114 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2115 		if (err)
2116 			goto out;
2117 		if (rdev->ce_fw->size != ce_req_size) {
2118 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2119 			       rdev->ce_fw->size, fw_name);
2120 			err = -EINVAL;
2121 		}
2122 	} else {
2123 		err = radeon_ucode_validate(rdev->ce_fw);
2124 		if (err) {
2125 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2126 			       fw_name);
2127 			goto out;
2128 		} else {
2129 			new_fw++;
2130 		}
2131 	}
2132 
2133 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2134 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2135 	if (err) {
2136 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2137 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2138 		if (err)
2139 			goto out;
2140 		if (rdev->mec_fw->size != mec_req_size) {
2141 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2142 			       rdev->mec_fw->size, fw_name);
2143 			err = -EINVAL;
2144 		}
2145 	} else {
2146 		err = radeon_ucode_validate(rdev->mec_fw);
2147 		if (err) {
2148 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2149 			       fw_name);
2150 			goto out;
2151 		} else {
2152 			new_fw++;
2153 		}
2154 	}
2155 
2156 	if (rdev->family == CHIP_KAVERI) {
2157 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2158 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2159 		if (err) {
2160 			goto out;
2161 		} else {
2162 			err = radeon_ucode_validate(rdev->mec2_fw);
2163 			if (err) {
2164 				goto out;
2165 			} else {
2166 				new_fw++;
2167 			}
2168 		}
2169 	}
2170 
2171 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2172 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2173 	if (err) {
2174 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2175 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2176 		if (err)
2177 			goto out;
2178 		if (rdev->rlc_fw->size != rlc_req_size) {
2179 			pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2180 			       rdev->rlc_fw->size, fw_name);
2181 			err = -EINVAL;
2182 		}
2183 	} else {
2184 		err = radeon_ucode_validate(rdev->rlc_fw);
2185 		if (err) {
2186 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2187 			       fw_name);
2188 			goto out;
2189 		} else {
2190 			new_fw++;
2191 		}
2192 	}
2193 
2194 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2195 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2196 	if (err) {
2197 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2198 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2199 		if (err)
2200 			goto out;
2201 		if (rdev->sdma_fw->size != sdma_req_size) {
2202 			pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2203 			       rdev->sdma_fw->size, fw_name);
2204 			err = -EINVAL;
2205 		}
2206 	} else {
2207 		err = radeon_ucode_validate(rdev->sdma_fw);
2208 		if (err) {
2209 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2210 			       fw_name);
2211 			goto out;
2212 		} else {
2213 			new_fw++;
2214 		}
2215 	}
2216 
2217 	/* No SMC, MC ucode on APUs */
2218 	if (!(rdev->flags & RADEON_IS_IGP)) {
2219 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2220 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2221 		if (err) {
2222 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2223 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2224 			if (err) {
2225 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2226 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2227 				if (err)
2228 					goto out;
2229 			}
2230 			if ((rdev->mc_fw->size != mc_req_size) &&
2231 			    (rdev->mc_fw->size != mc2_req_size)){
2232 				pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2233 				       rdev->mc_fw->size, fw_name);
2234 				err = -EINVAL;
2235 			}
2236 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2237 		} else {
2238 			err = radeon_ucode_validate(rdev->mc_fw);
2239 			if (err) {
2240 				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2241 				       fw_name);
2242 				goto out;
2243 			} else {
2244 				new_fw++;
2245 			}
2246 		}
2247 
2248 		if (new_smc)
2249 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2250 		else
2251 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2252 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2253 		if (err) {
2254 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2255 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2256 			if (err) {
2257 				pr_err("smc: error loading firmware \"%s\"\n",
2258 				       fw_name);
2259 				release_firmware(rdev->smc_fw);
2260 				rdev->smc_fw = NULL;
2261 				err = 0;
2262 			} else if (rdev->smc_fw->size != smc_req_size) {
2263 				pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2264 				       rdev->smc_fw->size, fw_name);
2265 				err = -EINVAL;
2266 			}
2267 		} else {
2268 			err = radeon_ucode_validate(rdev->smc_fw);
2269 			if (err) {
2270 				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2271 				       fw_name);
2272 				goto out;
2273 			} else {
2274 				new_fw++;
2275 			}
2276 		}
2277 	}
2278 
2279 	if (new_fw == 0) {
2280 		rdev->new_fw = false;
2281 	} else if (new_fw < num_fw) {
2282 		pr_err("ci_fw: mixing new and old firmware!\n");
2283 		err = -EINVAL;
2284 	} else {
2285 		rdev->new_fw = true;
2286 	}
2287 
2288 out:
2289 	if (err) {
2290 		if (err != -EINVAL)
2291 			pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2292 			       fw_name);
2293 		release_firmware(rdev->pfp_fw);
2294 		rdev->pfp_fw = NULL;
2295 		release_firmware(rdev->me_fw);
2296 		rdev->me_fw = NULL;
2297 		release_firmware(rdev->ce_fw);
2298 		rdev->ce_fw = NULL;
2299 		release_firmware(rdev->mec_fw);
2300 		rdev->mec_fw = NULL;
2301 		release_firmware(rdev->mec2_fw);
2302 		rdev->mec2_fw = NULL;
2303 		release_firmware(rdev->rlc_fw);
2304 		rdev->rlc_fw = NULL;
2305 		release_firmware(rdev->sdma_fw);
2306 		rdev->sdma_fw = NULL;
2307 		release_firmware(rdev->mc_fw);
2308 		rdev->mc_fw = NULL;
2309 		release_firmware(rdev->smc_fw);
2310 		rdev->smc_fw = NULL;
2311 	}
2312 	return err;
2313 }
2314 
2315 /*
2316  * Core functions
2317  */
2318 /**
2319  * cik_tiling_mode_table_init - init the hw tiling table
2320  *
2321  * @rdev: radeon_device pointer
2322  *
2323  * Starting with SI, the tiling setup is done globally in a
2324  * set of 32 tiling modes.  Rather than selecting each set of
2325  * parameters per surface as on older asics, we just select
2326  * which index in the tiling table we want to use, and the
2327  * surface uses those parameters (CIK).
2328  */
2329 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2330 {
2331 	u32 *tile = rdev->config.cik.tile_mode_array;
2332 	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2333 	const u32 num_tile_mode_states =
2334 			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2335 	const u32 num_secondary_tile_mode_states =
2336 			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2337 	u32 reg_offset, split_equal_to_row_size;
2338 	u32 num_pipe_configs;
2339 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2340 		rdev->config.cik.max_shader_engines;
2341 
2342 	switch (rdev->config.cik.mem_row_size_in_kb) {
2343 	case 1:
2344 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2345 		break;
2346 	case 2:
2347 	default:
2348 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2349 		break;
2350 	case 4:
2351 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2352 		break;
2353 	}
2354 
2355 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2356 	if (num_pipe_configs > 8)
2357 		num_pipe_configs = 16;
2358 
2359 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2360 		tile[reg_offset] = 0;
2361 	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2362 		macrotile[reg_offset] = 0;
2363 
2364 	switch(num_pipe_configs) {
2365 	case 16:
2366 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2367 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2368 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2369 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2370 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2371 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2372 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2374 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2376 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2378 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2380 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2382 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2384 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385 			   TILE_SPLIT(split_equal_to_row_size));
2386 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2387 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2389 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2390 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2391 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2393 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2394 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396 			   TILE_SPLIT(split_equal_to_row_size));
2397 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2398 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2399 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2400 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2402 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2403 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2404 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2406 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2407 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2408 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2409 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2411 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2412 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2413 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2414 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2415 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2416 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2417 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2418 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2419 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2422 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2423 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2424 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2426 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2427 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2428 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2430 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2432 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2434 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2436 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2438 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2439 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2441 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2442 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2443 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444 
2445 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2447 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2448 			   NUM_BANKS(ADDR_SURF_16_BANK));
2449 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2451 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452 			   NUM_BANKS(ADDR_SURF_16_BANK));
2453 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2455 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2456 			   NUM_BANKS(ADDR_SURF_16_BANK));
2457 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460 			   NUM_BANKS(ADDR_SURF_16_BANK));
2461 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464 			   NUM_BANKS(ADDR_SURF_8_BANK));
2465 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468 			   NUM_BANKS(ADDR_SURF_4_BANK));
2469 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472 			   NUM_BANKS(ADDR_SURF_2_BANK));
2473 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2475 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2476 			   NUM_BANKS(ADDR_SURF_16_BANK));
2477 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2479 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480 			   NUM_BANKS(ADDR_SURF_16_BANK));
2481 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2484 			    NUM_BANKS(ADDR_SURF_16_BANK));
2485 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488 			    NUM_BANKS(ADDR_SURF_8_BANK));
2489 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2492 			    NUM_BANKS(ADDR_SURF_4_BANK));
2493 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2496 			    NUM_BANKS(ADDR_SURF_2_BANK));
2497 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2500 			    NUM_BANKS(ADDR_SURF_2_BANK));
2501 
2502 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2503 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2504 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2505 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2506 		break;
2507 
2508 	case 8:
2509 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2510 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2511 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2512 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2513 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2515 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2517 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2519 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2520 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2521 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2523 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2525 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2527 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528 			   TILE_SPLIT(split_equal_to_row_size));
2529 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2530 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2532 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2533 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2534 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2536 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2537 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539 			   TILE_SPLIT(split_equal_to_row_size));
2540 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2541 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2542 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2543 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2544 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2545 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2547 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2549 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2550 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2551 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2552 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2554 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2555 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2558 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2560 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2561 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2562 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2564 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2565 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2566 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2567 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2569 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2570 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2573 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2574 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2575 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2577 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2579 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2580 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2581 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2582 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2584 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2585 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2586 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2587 
2588 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2589 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2590 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2591 				NUM_BANKS(ADDR_SURF_16_BANK));
2592 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2594 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2595 				NUM_BANKS(ADDR_SURF_16_BANK));
2596 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2598 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2599 				NUM_BANKS(ADDR_SURF_16_BANK));
2600 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2602 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2603 				NUM_BANKS(ADDR_SURF_16_BANK));
2604 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2605 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2606 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2607 				NUM_BANKS(ADDR_SURF_8_BANK));
2608 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2610 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2611 				NUM_BANKS(ADDR_SURF_4_BANK));
2612 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2613 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2614 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2615 				NUM_BANKS(ADDR_SURF_2_BANK));
2616 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2618 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2619 				NUM_BANKS(ADDR_SURF_16_BANK));
2620 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2622 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2623 				NUM_BANKS(ADDR_SURF_16_BANK));
2624 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2626 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2627 				NUM_BANKS(ADDR_SURF_16_BANK));
2628 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2630 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2631 				NUM_BANKS(ADDR_SURF_16_BANK));
2632 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2633 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2634 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2635 				NUM_BANKS(ADDR_SURF_8_BANK));
2636 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2638 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2639 				NUM_BANKS(ADDR_SURF_4_BANK));
2640 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2642 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2643 				NUM_BANKS(ADDR_SURF_2_BANK));
2644 
2645 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2646 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2647 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2648 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2649 		break;
2650 
2651 	case 4:
2652 		if (num_rbs == 4) {
2653 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2655 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2656 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2657 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2658 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2659 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2660 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2661 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2663 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2664 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2665 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2667 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2668 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2669 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2671 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2672 			   TILE_SPLIT(split_equal_to_row_size));
2673 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2674 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2676 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2677 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2678 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2680 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2681 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683 			   TILE_SPLIT(split_equal_to_row_size));
2684 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2685 			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
2686 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2687 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2688 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2689 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2690 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2691 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2693 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2694 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2695 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2696 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2697 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2698 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2699 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2701 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2702 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2704 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2705 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2706 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2708 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2709 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2710 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2711 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2713 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2714 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2716 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2717 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2718 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2719 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2720 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2721 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2723 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2724 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2725 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2726 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2728 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2729 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2730 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731 
2732 		} else if (num_rbs < 4) {
2733 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2734 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2735 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2736 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2737 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2739 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2740 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2741 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2742 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2743 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2744 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2745 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2746 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2747 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2749 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2750 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2751 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2752 			   TILE_SPLIT(split_equal_to_row_size));
2753 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2754 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2755 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2756 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2757 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2758 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2760 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2761 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763 			   TILE_SPLIT(split_equal_to_row_size));
2764 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2765 			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
2766 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2767 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2768 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2769 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2770 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2771 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2772 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2773 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2774 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2775 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2776 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2777 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2778 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2779 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2780 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2781 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2782 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2783 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2784 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2786 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2788 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2789 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2790 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2792 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2793 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2794 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2796 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2797 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2798 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2799 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2800 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2801 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2803 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2804 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2805 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2808 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2809 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811 		}
2812 
2813 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2814 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2815 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2816 				NUM_BANKS(ADDR_SURF_16_BANK));
2817 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2819 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2820 				NUM_BANKS(ADDR_SURF_16_BANK));
2821 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2823 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2824 				NUM_BANKS(ADDR_SURF_16_BANK));
2825 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2826 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2827 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2828 				NUM_BANKS(ADDR_SURF_16_BANK));
2829 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2831 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2832 				NUM_BANKS(ADDR_SURF_16_BANK));
2833 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2835 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2836 				NUM_BANKS(ADDR_SURF_8_BANK));
2837 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2839 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2840 				NUM_BANKS(ADDR_SURF_4_BANK));
2841 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2842 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2843 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2844 				NUM_BANKS(ADDR_SURF_16_BANK));
2845 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2846 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2847 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2848 				NUM_BANKS(ADDR_SURF_16_BANK));
2849 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2851 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2852 				NUM_BANKS(ADDR_SURF_16_BANK));
2853 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2855 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2856 				NUM_BANKS(ADDR_SURF_16_BANK));
2857 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2859 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2860 				NUM_BANKS(ADDR_SURF_16_BANK));
2861 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2863 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2864 				NUM_BANKS(ADDR_SURF_8_BANK));
2865 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2866 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2867 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2868 				NUM_BANKS(ADDR_SURF_4_BANK));
2869 
2870 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2871 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2872 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2873 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2874 		break;
2875 
2876 	case 2:
2877 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2878 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2879 			   PIPE_CONFIG(ADDR_SURF_P2) |
2880 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2881 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2882 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2883 			   PIPE_CONFIG(ADDR_SURF_P2) |
2884 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2885 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2887 			   PIPE_CONFIG(ADDR_SURF_P2) |
2888 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2889 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2890 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2891 			   PIPE_CONFIG(ADDR_SURF_P2) |
2892 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2893 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2895 			   PIPE_CONFIG(ADDR_SURF_P2) |
2896 			   TILE_SPLIT(split_equal_to_row_size));
2897 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2898 			   PIPE_CONFIG(ADDR_SURF_P2) |
2899 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2900 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2901 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2902 			   PIPE_CONFIG(ADDR_SURF_P2) |
2903 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2904 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2905 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906 			   PIPE_CONFIG(ADDR_SURF_P2) |
2907 			   TILE_SPLIT(split_equal_to_row_size));
2908 		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2909 			   PIPE_CONFIG(ADDR_SURF_P2);
2910 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2911 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2912 			   PIPE_CONFIG(ADDR_SURF_P2));
2913 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2914 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2915 			    PIPE_CONFIG(ADDR_SURF_P2) |
2916 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2917 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2918 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2919 			    PIPE_CONFIG(ADDR_SURF_P2) |
2920 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2921 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2922 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2923 			    PIPE_CONFIG(ADDR_SURF_P2) |
2924 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2926 			    PIPE_CONFIG(ADDR_SURF_P2) |
2927 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2928 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2929 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2930 			    PIPE_CONFIG(ADDR_SURF_P2) |
2931 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2932 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2933 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2934 			    PIPE_CONFIG(ADDR_SURF_P2) |
2935 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2937 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2938 			    PIPE_CONFIG(ADDR_SURF_P2) |
2939 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2941 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2942 			    PIPE_CONFIG(ADDR_SURF_P2));
2943 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2944 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2945 			    PIPE_CONFIG(ADDR_SURF_P2) |
2946 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2947 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2948 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2949 			    PIPE_CONFIG(ADDR_SURF_P2) |
2950 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2952 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2953 			    PIPE_CONFIG(ADDR_SURF_P2) |
2954 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955 
2956 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2957 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2958 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2959 				NUM_BANKS(ADDR_SURF_16_BANK));
2960 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2961 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2962 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2963 				NUM_BANKS(ADDR_SURF_16_BANK));
2964 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2965 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2966 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2967 				NUM_BANKS(ADDR_SURF_16_BANK));
2968 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2970 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971 				NUM_BANKS(ADDR_SURF_16_BANK));
2972 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2973 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2974 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975 				NUM_BANKS(ADDR_SURF_16_BANK));
2976 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2977 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2978 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2979 				NUM_BANKS(ADDR_SURF_16_BANK));
2980 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2983 				NUM_BANKS(ADDR_SURF_8_BANK));
2984 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2985 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2986 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2987 				NUM_BANKS(ADDR_SURF_16_BANK));
2988 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2989 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2990 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2991 				NUM_BANKS(ADDR_SURF_16_BANK));
2992 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2993 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2994 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2995 				NUM_BANKS(ADDR_SURF_16_BANK));
2996 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2997 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2998 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2999 				NUM_BANKS(ADDR_SURF_16_BANK));
3000 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3001 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3002 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003 				NUM_BANKS(ADDR_SURF_16_BANK));
3004 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3006 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007 				NUM_BANKS(ADDR_SURF_16_BANK));
3008 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3010 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3011 				NUM_BANKS(ADDR_SURF_8_BANK));
3012 
3013 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3014 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3015 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3016 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3017 		break;
3018 
3019 	default:
3020 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3021 	}
3022 }
3023 
3024 /**
3025  * cik_select_se_sh - select which SE, SH to address
3026  *
3027  * @rdev: radeon_device pointer
3028  * @se_num: shader engine to address
3029  * @sh_num: sh block to address
3030  *
3031  * Select which SE, SH combinations to address. Certain
3032  * registers are instanced per SE or SH.  0xffffffff means
3033  * broadcast to all SEs or SHs (CIK).
3034  */
3035 static void cik_select_se_sh(struct radeon_device *rdev,
3036 			     u32 se_num, u32 sh_num)
3037 {
3038 	u32 data = INSTANCE_BROADCAST_WRITES;
3039 
3040 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3041 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3042 	else if (se_num == 0xffffffff)
3043 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3044 	else if (sh_num == 0xffffffff)
3045 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3046 	else
3047 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3048 	WREG32(GRBM_GFX_INDEX, data);
3049 }
3050 
3051 /**
3052  * cik_create_bitmask - create a bitmask
3053  *
3054  * @bit_width: length of the mask
3055  *
3056  * create a variable length bit mask (CIK).
3057  * Returns the bitmask.
3058  */
3059 static u32 cik_create_bitmask(u32 bit_width)
3060 {
3061 	u32 i, mask = 0;
3062 
3063 	for (i = 0; i < bit_width; i++) {
3064 		mask <<= 1;
3065 		mask |= 1;
3066 	}
3067 	return mask;
3068 }
3069 
3070 /**
3071  * cik_get_rb_disabled - computes the mask of disabled RBs
3072  *
3073  * @rdev: radeon_device pointer
3074  * @max_rb_num: max RBs (render backends) for the asic
3075  * @se_num: number of SEs (shader engines) for the asic
3076  * @sh_per_se: number of SH blocks per SE for the asic
3077  *
3078  * Calculates the bitmask of disabled RBs (CIK).
3079  * Returns the disabled RB bitmask.
3080  */
3081 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3082 			      u32 max_rb_num_per_se,
3083 			      u32 sh_per_se)
3084 {
3085 	u32 data, mask;
3086 
3087 	data = RREG32(CC_RB_BACKEND_DISABLE);
3088 	if (data & 1)
3089 		data &= BACKEND_DISABLE_MASK;
3090 	else
3091 		data = 0;
3092 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3093 
3094 	data >>= BACKEND_DISABLE_SHIFT;
3095 
3096 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3097 
3098 	return data & mask;
3099 }
3100 
3101 /**
3102  * cik_setup_rb - setup the RBs on the asic
3103  *
3104  * @rdev: radeon_device pointer
3105  * @se_num: number of SEs (shader engines) for the asic
3106  * @sh_per_se: number of SH blocks per SE for the asic
3107  * @max_rb_num: max RBs (render backends) for the asic
3108  *
3109  * Configures per-SE/SH RB registers (CIK).
3110  */
3111 static void cik_setup_rb(struct radeon_device *rdev,
3112 			 u32 se_num, u32 sh_per_se,
3113 			 u32 max_rb_num_per_se)
3114 {
3115 	int i, j;
3116 	u32 data, mask;
3117 	u32 disabled_rbs = 0;
3118 	u32 enabled_rbs = 0;
3119 
3120 	for (i = 0; i < se_num; i++) {
3121 		for (j = 0; j < sh_per_se; j++) {
3122 			cik_select_se_sh(rdev, i, j);
3123 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3124 			if (rdev->family == CHIP_HAWAII)
3125 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3126 			else
3127 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3128 		}
3129 	}
3130 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3131 
3132 	mask = 1;
3133 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3134 		if (!(disabled_rbs & mask))
3135 			enabled_rbs |= mask;
3136 		mask <<= 1;
3137 	}
3138 
3139 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3140 
3141 	for (i = 0; i < se_num; i++) {
3142 		cik_select_se_sh(rdev, i, 0xffffffff);
3143 		data = 0;
3144 		for (j = 0; j < sh_per_se; j++) {
3145 			switch (enabled_rbs & 3) {
3146 			case 0:
3147 				if (j == 0)
3148 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3149 				else
3150 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3151 				break;
3152 			case 1:
3153 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3154 				break;
3155 			case 2:
3156 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3157 				break;
3158 			case 3:
3159 			default:
3160 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3161 				break;
3162 			}
3163 			enabled_rbs >>= 2;
3164 		}
3165 		WREG32(PA_SC_RASTER_CONFIG, data);
3166 	}
3167 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3168 }
3169 
3170 /**
3171  * cik_gpu_init - setup the 3D engine
3172  *
3173  * @rdev: radeon_device pointer
3174  *
3175  * Configures the 3D engine and tiling configuration
3176  * registers so that the 3D engine is usable.
3177  */
3178 static void cik_gpu_init(struct radeon_device *rdev)
3179 {
3180 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3181 	u32 mc_shared_chmap, mc_arb_ramcfg;
3182 	u32 hdp_host_path_cntl;
3183 	u32 tmp;
3184 	int i, j;
3185 
3186 	switch (rdev->family) {
3187 	case CHIP_BONAIRE:
3188 		rdev->config.cik.max_shader_engines = 2;
3189 		rdev->config.cik.max_tile_pipes = 4;
3190 		rdev->config.cik.max_cu_per_sh = 7;
3191 		rdev->config.cik.max_sh_per_se = 1;
3192 		rdev->config.cik.max_backends_per_se = 2;
3193 		rdev->config.cik.max_texture_channel_caches = 4;
3194 		rdev->config.cik.max_gprs = 256;
3195 		rdev->config.cik.max_gs_threads = 32;
3196 		rdev->config.cik.max_hw_contexts = 8;
3197 
3198 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3199 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3200 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3201 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3202 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3203 		break;
3204 	case CHIP_HAWAII:
3205 		rdev->config.cik.max_shader_engines = 4;
3206 		rdev->config.cik.max_tile_pipes = 16;
3207 		rdev->config.cik.max_cu_per_sh = 11;
3208 		rdev->config.cik.max_sh_per_se = 1;
3209 		rdev->config.cik.max_backends_per_se = 4;
3210 		rdev->config.cik.max_texture_channel_caches = 16;
3211 		rdev->config.cik.max_gprs = 256;
3212 		rdev->config.cik.max_gs_threads = 32;
3213 		rdev->config.cik.max_hw_contexts = 8;
3214 
3215 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3216 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3217 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3218 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3219 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3220 		break;
3221 	case CHIP_KAVERI:
3222 		rdev->config.cik.max_shader_engines = 1;
3223 		rdev->config.cik.max_tile_pipes = 4;
3224 		rdev->config.cik.max_cu_per_sh = 8;
3225 		rdev->config.cik.max_backends_per_se = 2;
3226 		rdev->config.cik.max_sh_per_se = 1;
3227 		rdev->config.cik.max_texture_channel_caches = 4;
3228 		rdev->config.cik.max_gprs = 256;
3229 		rdev->config.cik.max_gs_threads = 16;
3230 		rdev->config.cik.max_hw_contexts = 8;
3231 
3232 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3233 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3234 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3235 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3236 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3237 		break;
3238 	case CHIP_KABINI:
3239 	case CHIP_MULLINS:
3240 	default:
3241 		rdev->config.cik.max_shader_engines = 1;
3242 		rdev->config.cik.max_tile_pipes = 2;
3243 		rdev->config.cik.max_cu_per_sh = 2;
3244 		rdev->config.cik.max_sh_per_se = 1;
3245 		rdev->config.cik.max_backends_per_se = 1;
3246 		rdev->config.cik.max_texture_channel_caches = 2;
3247 		rdev->config.cik.max_gprs = 256;
3248 		rdev->config.cik.max_gs_threads = 16;
3249 		rdev->config.cik.max_hw_contexts = 8;
3250 
3251 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3252 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3253 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3254 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3255 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3256 		break;
3257 	}
3258 
3259 	/* Initialize HDP */
3260 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3261 		WREG32((0x2c14 + j), 0x00000000);
3262 		WREG32((0x2c18 + j), 0x00000000);
3263 		WREG32((0x2c1c + j), 0x00000000);
3264 		WREG32((0x2c20 + j), 0x00000000);
3265 		WREG32((0x2c24 + j), 0x00000000);
3266 	}
3267 
3268 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3269 	WREG32(SRBM_INT_CNTL, 0x1);
3270 	WREG32(SRBM_INT_ACK, 0x1);
3271 
3272 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3273 
3274 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3275 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3276 
3277 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3278 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3279 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3280 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3281 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3282 		rdev->config.cik.mem_row_size_in_kb = 4;
3283 	/* XXX use MC settings? */
3284 	rdev->config.cik.shader_engine_tile_size = 32;
3285 	rdev->config.cik.num_gpus = 1;
3286 	rdev->config.cik.multi_gpu_tile_size = 64;
3287 
3288 	/* fix up row size */
3289 	gb_addr_config &= ~ROW_SIZE_MASK;
3290 	switch (rdev->config.cik.mem_row_size_in_kb) {
3291 	case 1:
3292 	default:
3293 		gb_addr_config |= ROW_SIZE(0);
3294 		break;
3295 	case 2:
3296 		gb_addr_config |= ROW_SIZE(1);
3297 		break;
3298 	case 4:
3299 		gb_addr_config |= ROW_SIZE(2);
3300 		break;
3301 	}
3302 
3303 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3304 	 * not have bank info, so create a custom tiling dword.
3305 	 * bits 3:0   num_pipes
3306 	 * bits 7:4   num_banks
3307 	 * bits 11:8  group_size
3308 	 * bits 15:12 row_size
3309 	 */
3310 	rdev->config.cik.tile_config = 0;
3311 	switch (rdev->config.cik.num_tile_pipes) {
3312 	case 1:
3313 		rdev->config.cik.tile_config |= (0 << 0);
3314 		break;
3315 	case 2:
3316 		rdev->config.cik.tile_config |= (1 << 0);
3317 		break;
3318 	case 4:
3319 		rdev->config.cik.tile_config |= (2 << 0);
3320 		break;
3321 	case 8:
3322 	default:
3323 		/* XXX what about 12? */
3324 		rdev->config.cik.tile_config |= (3 << 0);
3325 		break;
3326 	}
3327 	rdev->config.cik.tile_config |=
3328 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3329 	rdev->config.cik.tile_config |=
3330 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3331 	rdev->config.cik.tile_config |=
3332 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3333 
3334 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3335 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3336 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3337 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3338 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3339 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3340 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3341 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3342 
3343 	cik_tiling_mode_table_init(rdev);
3344 
3345 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3346 		     rdev->config.cik.max_sh_per_se,
3347 		     rdev->config.cik.max_backends_per_se);
3348 
3349 	rdev->config.cik.active_cus = 0;
3350 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3351 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3352 			rdev->config.cik.active_cus +=
3353 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3354 		}
3355 	}
3356 
3357 	/* set HW defaults for 3D engine */
3358 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3359 
3360 	WREG32(SX_DEBUG_1, 0x20);
3361 
3362 	WREG32(TA_CNTL_AUX, 0x00010000);
3363 
3364 	tmp = RREG32(SPI_CONFIG_CNTL);
3365 	tmp |= 0x03000000;
3366 	WREG32(SPI_CONFIG_CNTL, tmp);
3367 
3368 	WREG32(SQ_CONFIG, 1);
3369 
3370 	WREG32(DB_DEBUG, 0);
3371 
3372 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3373 	tmp |= 0x00000400;
3374 	WREG32(DB_DEBUG2, tmp);
3375 
3376 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3377 	tmp |= 0x00020200;
3378 	WREG32(DB_DEBUG3, tmp);
3379 
3380 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3381 	tmp |= 0x00018208;
3382 	WREG32(CB_HW_CONTROL, tmp);
3383 
3384 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3385 
3386 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3387 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3388 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3389 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3390 
3391 	WREG32(VGT_NUM_INSTANCES, 1);
3392 
3393 	WREG32(CP_PERFMON_CNTL, 0);
3394 
3395 	WREG32(SQ_CONFIG, 0);
3396 
3397 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3398 					  FORCE_EOV_MAX_REZ_CNT(255)));
3399 
3400 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3401 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3402 
3403 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3404 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3405 
3406 	tmp = RREG32(HDP_MISC_CNTL);
3407 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3408 	WREG32(HDP_MISC_CNTL, tmp);
3409 
3410 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3411 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3412 
3413 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3414 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3415 
3416 	udelay(50);
3417 }
3418 
3419 /*
3420  * GPU scratch registers helpers function.
3421  */
3422 /**
3423  * cik_scratch_init - setup driver info for CP scratch regs
3424  *
3425  * @rdev: radeon_device pointer
3426  *
3427  * Set up the number and offset of the CP scratch registers.
3428  * NOTE: use of CP scratch registers is a legacy inferface and
3429  * is not used by default on newer asics (r6xx+).  On newer asics,
3430  * memory buffers are used for fences rather than scratch regs.
3431  */
3432 static void cik_scratch_init(struct radeon_device *rdev)
3433 {
3434 	int i;
3435 
3436 	rdev->scratch.num_reg = 7;
3437 	rdev->scratch.reg_base = SCRATCH_REG0;
3438 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3439 		rdev->scratch.free[i] = true;
3440 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3441 	}
3442 }
3443 
3444 /**
3445  * cik_ring_test - basic gfx ring test
3446  *
3447  * @rdev: radeon_device pointer
3448  * @ring: radeon_ring structure holding ring information
3449  *
3450  * Allocate a scratch register and write to it using the gfx ring (CIK).
3451  * Provides a basic gfx ring test to verify that the ring is working.
3452  * Used by cik_cp_gfx_resume();
3453  * Returns 0 on success, error on failure.
3454  */
3455 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3456 {
3457 	uint32_t scratch;
3458 	uint32_t tmp = 0;
3459 	unsigned i;
3460 	int r;
3461 
3462 	r = radeon_scratch_get(rdev, &scratch);
3463 	if (r) {
3464 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3465 		return r;
3466 	}
3467 	WREG32(scratch, 0xCAFEDEAD);
3468 	r = radeon_ring_lock(rdev, ring, 3);
3469 	if (r) {
3470 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3471 		radeon_scratch_free(rdev, scratch);
3472 		return r;
3473 	}
3474 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3475 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3476 	radeon_ring_write(ring, 0xDEADBEEF);
3477 	radeon_ring_unlock_commit(rdev, ring, false);
3478 
3479 	for (i = 0; i < rdev->usec_timeout; i++) {
3480 		tmp = RREG32(scratch);
3481 		if (tmp == 0xDEADBEEF)
3482 			break;
3483 		udelay(1);
3484 	}
3485 	if (i < rdev->usec_timeout) {
3486 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3487 	} else {
3488 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3489 			  ring->idx, scratch, tmp);
3490 		r = -EINVAL;
3491 	}
3492 	radeon_scratch_free(rdev, scratch);
3493 	return r;
3494 }
3495 
3496 /**
3497  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3498  *
3499  * @rdev: radeon_device pointer
3500  * @ridx: radeon ring index
3501  *
3502  * Emits an hdp flush on the cp.
3503  */
3504 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3505 				       int ridx)
3506 {
3507 	struct radeon_ring *ring = &rdev->ring[ridx];
3508 	u32 ref_and_mask;
3509 
3510 	switch (ring->idx) {
3511 	case CAYMAN_RING_TYPE_CP1_INDEX:
3512 	case CAYMAN_RING_TYPE_CP2_INDEX:
3513 	default:
3514 		switch (ring->me) {
3515 		case 0:
3516 			ref_and_mask = CP2 << ring->pipe;
3517 			break;
3518 		case 1:
3519 			ref_and_mask = CP6 << ring->pipe;
3520 			break;
3521 		default:
3522 			return;
3523 		}
3524 		break;
3525 	case RADEON_RING_TYPE_GFX_INDEX:
3526 		ref_and_mask = CP0;
3527 		break;
3528 	}
3529 
3530 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3531 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3532 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3533 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3534 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3535 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3536 	radeon_ring_write(ring, ref_and_mask);
3537 	radeon_ring_write(ring, ref_and_mask);
3538 	radeon_ring_write(ring, 0x20); /* poll interval */
3539 }
3540 
3541 /**
3542  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3543  *
3544  * @rdev: radeon_device pointer
3545  * @fence: radeon fence object
3546  *
3547  * Emits a fence sequnce number on the gfx ring and flushes
3548  * GPU caches.
3549  */
3550 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3551 			     struct radeon_fence *fence)
3552 {
3553 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3554 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3555 
3556 	/* Workaround for cache flush problems. First send a dummy EOP
3557 	 * event down the pipe with seq one below.
3558 	 */
3559 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3560 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3561 				 EOP_TC_ACTION_EN |
3562 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3563 				 EVENT_INDEX(5)));
3564 	radeon_ring_write(ring, addr & 0xfffffffc);
3565 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3566 				DATA_SEL(1) | INT_SEL(0));
3567 	radeon_ring_write(ring, fence->seq - 1);
3568 	radeon_ring_write(ring, 0);
3569 
3570 	/* Then send the real EOP event down the pipe. */
3571 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3572 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3573 				 EOP_TC_ACTION_EN |
3574 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3575 				 EVENT_INDEX(5)));
3576 	radeon_ring_write(ring, addr & 0xfffffffc);
3577 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3578 	radeon_ring_write(ring, fence->seq);
3579 	radeon_ring_write(ring, 0);
3580 }
3581 
3582 /**
3583  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3584  *
3585  * @rdev: radeon_device pointer
3586  * @fence: radeon fence object
3587  *
3588  * Emits a fence sequnce number on the compute ring and flushes
3589  * GPU caches.
3590  */
3591 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3592 				 struct radeon_fence *fence)
3593 {
3594 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3595 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3596 
3597 	/* RELEASE_MEM - flush caches, send int */
3598 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3599 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3600 				 EOP_TC_ACTION_EN |
3601 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3602 				 EVENT_INDEX(5)));
3603 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3604 	radeon_ring_write(ring, addr & 0xfffffffc);
3605 	radeon_ring_write(ring, upper_32_bits(addr));
3606 	radeon_ring_write(ring, fence->seq);
3607 	radeon_ring_write(ring, 0);
3608 }
3609 
3610 /**
3611  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3612  *
3613  * @rdev: radeon_device pointer
3614  * @ring: radeon ring buffer object
3615  * @semaphore: radeon semaphore object
3616  * @emit_wait: Is this a sempahore wait?
3617  *
3618  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3619  * from running ahead of semaphore waits.
3620  */
3621 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3622 			     struct radeon_ring *ring,
3623 			     struct radeon_semaphore *semaphore,
3624 			     bool emit_wait)
3625 {
3626 	uint64_t addr = semaphore->gpu_addr;
3627 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3628 
3629 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3630 	radeon_ring_write(ring, lower_32_bits(addr));
3631 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3632 
3633 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3634 		/* Prevent the PFP from running ahead of the semaphore wait */
3635 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3636 		radeon_ring_write(ring, 0x0);
3637 	}
3638 
3639 	return true;
3640 }
3641 
3642 /**
3643  * cik_copy_cpdma - copy pages using the CP DMA engine
3644  *
3645  * @rdev: radeon_device pointer
3646  * @src_offset: src GPU address
3647  * @dst_offset: dst GPU address
3648  * @num_gpu_pages: number of GPU pages to xfer
3649  * @resv: reservation object to sync to
3650  *
3651  * Copy GPU paging using the CP DMA engine (CIK+).
3652  * Used by the radeon ttm implementation to move pages if
3653  * registered as the asic copy callback.
3654  */
3655 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3656 				    uint64_t src_offset, uint64_t dst_offset,
3657 				    unsigned num_gpu_pages,
3658 				    struct dma_resv *resv)
3659 {
3660 	struct radeon_fence *fence;
3661 	struct radeon_sync sync;
3662 	int ring_index = rdev->asic->copy.blit_ring_index;
3663 	struct radeon_ring *ring = &rdev->ring[ring_index];
3664 	u32 size_in_bytes, cur_size_in_bytes, control;
3665 	int i, num_loops;
3666 	int r = 0;
3667 
3668 	radeon_sync_create(&sync);
3669 
3670 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3671 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3672 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3673 	if (r) {
3674 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3675 		radeon_sync_free(rdev, &sync, NULL);
3676 		return ERR_PTR(r);
3677 	}
3678 
3679 	radeon_sync_resv(rdev, &sync, resv, false);
3680 	radeon_sync_rings(rdev, &sync, ring->idx);
3681 
3682 	for (i = 0; i < num_loops; i++) {
3683 		cur_size_in_bytes = size_in_bytes;
3684 		if (cur_size_in_bytes > 0x1fffff)
3685 			cur_size_in_bytes = 0x1fffff;
3686 		size_in_bytes -= cur_size_in_bytes;
3687 		control = 0;
3688 		if (size_in_bytes == 0)
3689 			control |= PACKET3_DMA_DATA_CP_SYNC;
3690 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3691 		radeon_ring_write(ring, control);
3692 		radeon_ring_write(ring, lower_32_bits(src_offset));
3693 		radeon_ring_write(ring, upper_32_bits(src_offset));
3694 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3695 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3696 		radeon_ring_write(ring, cur_size_in_bytes);
3697 		src_offset += cur_size_in_bytes;
3698 		dst_offset += cur_size_in_bytes;
3699 	}
3700 
3701 	r = radeon_fence_emit(rdev, &fence, ring->idx);
3702 	if (r) {
3703 		radeon_ring_unlock_undo(rdev, ring);
3704 		radeon_sync_free(rdev, &sync, NULL);
3705 		return ERR_PTR(r);
3706 	}
3707 
3708 	radeon_ring_unlock_commit(rdev, ring, false);
3709 	radeon_sync_free(rdev, &sync, fence);
3710 
3711 	return fence;
3712 }
3713 
3714 /*
3715  * IB stuff
3716  */
3717 /**
3718  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3719  *
3720  * @rdev: radeon_device pointer
3721  * @ib: radeon indirect buffer object
3722  *
3723  * Emits a DE (drawing engine) or CE (constant engine) IB
3724  * on the gfx ring.  IBs are usually generated by userspace
3725  * acceleration drivers and submitted to the kernel for
3726  * scheduling on the ring.  This function schedules the IB
3727  * on the gfx ring for execution by the GPU.
3728  */
3729 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3730 {
3731 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3732 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3733 	u32 header, control = INDIRECT_BUFFER_VALID;
3734 
3735 	if (ib->is_const_ib) {
3736 		/* set switch buffer packet before const IB */
3737 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3738 		radeon_ring_write(ring, 0);
3739 
3740 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3741 	} else {
3742 		u32 next_rptr;
3743 		if (ring->rptr_save_reg) {
3744 			next_rptr = ring->wptr + 3 + 4;
3745 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3746 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3747 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3748 			radeon_ring_write(ring, next_rptr);
3749 		} else if (rdev->wb.enabled) {
3750 			next_rptr = ring->wptr + 5 + 4;
3751 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3752 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3753 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3754 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3755 			radeon_ring_write(ring, next_rptr);
3756 		}
3757 
3758 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3759 	}
3760 
3761 	control |= ib->length_dw | (vm_id << 24);
3762 
3763 	radeon_ring_write(ring, header);
3764 	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3765 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3766 	radeon_ring_write(ring, control);
3767 }
3768 
3769 /**
3770  * cik_ib_test - basic gfx ring IB test
3771  *
3772  * @rdev: radeon_device pointer
3773  * @ring: radeon_ring structure holding ring information
3774  *
3775  * Allocate an IB and execute it on the gfx ring (CIK).
3776  * Provides a basic gfx ring test to verify that IBs are working.
3777  * Returns 0 on success, error on failure.
3778  */
3779 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3780 {
3781 	struct radeon_ib ib;
3782 	uint32_t scratch;
3783 	uint32_t tmp = 0;
3784 	unsigned i;
3785 	int r;
3786 
3787 	r = radeon_scratch_get(rdev, &scratch);
3788 	if (r) {
3789 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3790 		return r;
3791 	}
3792 	WREG32(scratch, 0xCAFEDEAD);
3793 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3794 	if (r) {
3795 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3796 		radeon_scratch_free(rdev, scratch);
3797 		return r;
3798 	}
3799 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3800 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3801 	ib.ptr[2] = 0xDEADBEEF;
3802 	ib.length_dw = 3;
3803 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3804 	if (r) {
3805 		radeon_scratch_free(rdev, scratch);
3806 		radeon_ib_free(rdev, &ib);
3807 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3808 		return r;
3809 	}
3810 	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3811 		RADEON_USEC_IB_TEST_TIMEOUT));
3812 	if (r < 0) {
3813 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3814 		radeon_scratch_free(rdev, scratch);
3815 		radeon_ib_free(rdev, &ib);
3816 		return r;
3817 	} else if (r == 0) {
3818 		DRM_ERROR("radeon: fence wait timed out.\n");
3819 		radeon_scratch_free(rdev, scratch);
3820 		radeon_ib_free(rdev, &ib);
3821 		return -ETIMEDOUT;
3822 	}
3823 	r = 0;
3824 	for (i = 0; i < rdev->usec_timeout; i++) {
3825 		tmp = RREG32(scratch);
3826 		if (tmp == 0xDEADBEEF)
3827 			break;
3828 		udelay(1);
3829 	}
3830 	if (i < rdev->usec_timeout) {
3831 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3832 	} else {
3833 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3834 			  scratch, tmp);
3835 		r = -EINVAL;
3836 	}
3837 	radeon_scratch_free(rdev, scratch);
3838 	radeon_ib_free(rdev, &ib);
3839 	return r;
3840 }
3841 
3842 /*
3843  * CP.
3844  * On CIK, gfx and compute now have independant command processors.
3845  *
3846  * GFX
3847  * Gfx consists of a single ring and can process both gfx jobs and
3848  * compute jobs.  The gfx CP consists of three microengines (ME):
3849  * PFP - Pre-Fetch Parser
3850  * ME - Micro Engine
3851  * CE - Constant Engine
3852  * The PFP and ME make up what is considered the Drawing Engine (DE).
3853  * The CE is an asynchronous engine used for updating buffer desciptors
3854  * used by the DE so that they can be loaded into cache in parallel
3855  * while the DE is processing state update packets.
3856  *
3857  * Compute
3858  * The compute CP consists of two microengines (ME):
3859  * MEC1 - Compute MicroEngine 1
3860  * MEC2 - Compute MicroEngine 2
3861  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3862  * The queues are exposed to userspace and are programmed directly
3863  * by the compute runtime.
3864  */
3865 /**
3866  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3867  *
3868  * @rdev: radeon_device pointer
3869  * @enable: enable or disable the MEs
3870  *
3871  * Halts or unhalts the gfx MEs.
3872  */
3873 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3874 {
3875 	if (enable)
3876 		WREG32(CP_ME_CNTL, 0);
3877 	else {
3878 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3879 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3880 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3881 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3882 	}
3883 	udelay(50);
3884 }
3885 
3886 /**
3887  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3888  *
3889  * @rdev: radeon_device pointer
3890  *
3891  * Loads the gfx PFP, ME, and CE ucode.
3892  * Returns 0 for success, -EINVAL if the ucode is not available.
3893  */
3894 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3895 {
3896 	int i;
3897 
3898 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3899 		return -EINVAL;
3900 
3901 	cik_cp_gfx_enable(rdev, false);
3902 
3903 	if (rdev->new_fw) {
3904 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3905 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3906 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3907 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3908 		const struct gfx_firmware_header_v1_0 *me_hdr =
3909 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3910 		const __le32 *fw_data;
3911 		u32 fw_size;
3912 
3913 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3914 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3915 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3916 
3917 		/* PFP */
3918 		fw_data = (const __le32 *)
3919 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3920 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3921 		WREG32(CP_PFP_UCODE_ADDR, 0);
3922 		for (i = 0; i < fw_size; i++)
3923 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3924 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3925 
3926 		/* CE */
3927 		fw_data = (const __le32 *)
3928 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3929 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3930 		WREG32(CP_CE_UCODE_ADDR, 0);
3931 		for (i = 0; i < fw_size; i++)
3932 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3933 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3934 
3935 		/* ME */
3936 		fw_data = (const __be32 *)
3937 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3938 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3939 		WREG32(CP_ME_RAM_WADDR, 0);
3940 		for (i = 0; i < fw_size; i++)
3941 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3942 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3943 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3944 	} else {
3945 		const __be32 *fw_data;
3946 
3947 		/* PFP */
3948 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3949 		WREG32(CP_PFP_UCODE_ADDR, 0);
3950 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3951 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3952 		WREG32(CP_PFP_UCODE_ADDR, 0);
3953 
3954 		/* CE */
3955 		fw_data = (const __be32 *)rdev->ce_fw->data;
3956 		WREG32(CP_CE_UCODE_ADDR, 0);
3957 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3958 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3959 		WREG32(CP_CE_UCODE_ADDR, 0);
3960 
3961 		/* ME */
3962 		fw_data = (const __be32 *)rdev->me_fw->data;
3963 		WREG32(CP_ME_RAM_WADDR, 0);
3964 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3965 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3966 		WREG32(CP_ME_RAM_WADDR, 0);
3967 	}
3968 
3969 	return 0;
3970 }
3971 
3972 /**
3973  * cik_cp_gfx_start - start the gfx ring
3974  *
3975  * @rdev: radeon_device pointer
3976  *
3977  * Enables the ring and loads the clear state context and other
3978  * packets required to init the ring.
3979  * Returns 0 for success, error for failure.
3980  */
3981 static int cik_cp_gfx_start(struct radeon_device *rdev)
3982 {
3983 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3984 	int r, i;
3985 
3986 	/* init the CP */
3987 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3988 	WREG32(CP_ENDIAN_SWAP, 0);
3989 	WREG32(CP_DEVICE_ID, 1);
3990 
3991 	cik_cp_gfx_enable(rdev, true);
3992 
3993 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3994 	if (r) {
3995 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3996 		return r;
3997 	}
3998 
3999 	/* init the CE partitions.  CE only used for gfx on CIK */
4000 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4001 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4002 	radeon_ring_write(ring, 0x8000);
4003 	radeon_ring_write(ring, 0x8000);
4004 
4005 	/* setup clear context state */
4006 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4007 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4008 
4009 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4010 	radeon_ring_write(ring, 0x80000000);
4011 	radeon_ring_write(ring, 0x80000000);
4012 
4013 	for (i = 0; i < cik_default_size; i++)
4014 		radeon_ring_write(ring, cik_default_state[i]);
4015 
4016 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4017 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4018 
4019 	/* set clear context state */
4020 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4021 	radeon_ring_write(ring, 0);
4022 
4023 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4024 	radeon_ring_write(ring, 0x00000316);
4025 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4026 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4027 
4028 	radeon_ring_unlock_commit(rdev, ring, false);
4029 
4030 	return 0;
4031 }
4032 
4033 /**
4034  * cik_cp_gfx_fini - stop the gfx ring
4035  *
4036  * @rdev: radeon_device pointer
4037  *
4038  * Stop the gfx ring and tear down the driver ring
4039  * info.
4040  */
4041 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4042 {
4043 	cik_cp_gfx_enable(rdev, false);
4044 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4045 }
4046 
4047 /**
4048  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4049  *
4050  * @rdev: radeon_device pointer
4051  *
4052  * Program the location and size of the gfx ring buffer
4053  * and test it to make sure it's working.
4054  * Returns 0 for success, error for failure.
4055  */
4056 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4057 {
4058 	struct radeon_ring *ring;
4059 	u32 tmp;
4060 	u32 rb_bufsz;
4061 	u64 rb_addr;
4062 	int r;
4063 
4064 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4065 	if (rdev->family != CHIP_HAWAII)
4066 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4067 
4068 	/* Set the write pointer delay */
4069 	WREG32(CP_RB_WPTR_DELAY, 0);
4070 
4071 	/* set the RB to use vmid 0 */
4072 	WREG32(CP_RB_VMID, 0);
4073 
4074 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4075 
4076 	/* ring 0 - compute and gfx */
4077 	/* Set ring buffer size */
4078 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4079 	rb_bufsz = order_base_2(ring->ring_size / 8);
4080 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4081 #ifdef __BIG_ENDIAN
4082 	tmp |= BUF_SWAP_32BIT;
4083 #endif
4084 	WREG32(CP_RB0_CNTL, tmp);
4085 
4086 	/* Initialize the ring buffer's read and write pointers */
4087 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4088 	ring->wptr = 0;
4089 	WREG32(CP_RB0_WPTR, ring->wptr);
4090 
4091 	/* set the wb address wether it's enabled or not */
4092 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4093 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4094 
4095 	/* scratch register shadowing is no longer supported */
4096 	WREG32(SCRATCH_UMSK, 0);
4097 
4098 	if (!rdev->wb.enabled)
4099 		tmp |= RB_NO_UPDATE;
4100 
4101 	mdelay(1);
4102 	WREG32(CP_RB0_CNTL, tmp);
4103 
4104 	rb_addr = ring->gpu_addr >> 8;
4105 	WREG32(CP_RB0_BASE, rb_addr);
4106 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4107 
4108 	/* start the ring */
4109 	cik_cp_gfx_start(rdev);
4110 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4111 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4112 	if (r) {
4113 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4114 		return r;
4115 	}
4116 
4117 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4118 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4119 
4120 	return 0;
4121 }
4122 
4123 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4124 		     struct radeon_ring *ring)
4125 {
4126 	u32 rptr;
4127 
4128 	if (rdev->wb.enabled)
4129 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4130 	else
4131 		rptr = RREG32(CP_RB0_RPTR);
4132 
4133 	return rptr;
4134 }
4135 
4136 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4137 		     struct radeon_ring *ring)
4138 {
4139 	return RREG32(CP_RB0_WPTR);
4140 }
4141 
4142 void cik_gfx_set_wptr(struct radeon_device *rdev,
4143 		      struct radeon_ring *ring)
4144 {
4145 	WREG32(CP_RB0_WPTR, ring->wptr);
4146 	(void)RREG32(CP_RB0_WPTR);
4147 }
4148 
4149 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4150 			 struct radeon_ring *ring)
4151 {
4152 	u32 rptr;
4153 
4154 	if (rdev->wb.enabled) {
4155 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4156 	} else {
4157 		mutex_lock(&rdev->srbm_mutex);
4158 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4159 		rptr = RREG32(CP_HQD_PQ_RPTR);
4160 		cik_srbm_select(rdev, 0, 0, 0, 0);
4161 		mutex_unlock(&rdev->srbm_mutex);
4162 	}
4163 
4164 	return rptr;
4165 }
4166 
4167 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4168 			 struct radeon_ring *ring)
4169 {
4170 	u32 wptr;
4171 
4172 	if (rdev->wb.enabled) {
4173 		/* XXX check if swapping is necessary on BE */
4174 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4175 	} else {
4176 		mutex_lock(&rdev->srbm_mutex);
4177 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4178 		wptr = RREG32(CP_HQD_PQ_WPTR);
4179 		cik_srbm_select(rdev, 0, 0, 0, 0);
4180 		mutex_unlock(&rdev->srbm_mutex);
4181 	}
4182 
4183 	return wptr;
4184 }
4185 
4186 void cik_compute_set_wptr(struct radeon_device *rdev,
4187 			  struct radeon_ring *ring)
4188 {
4189 	/* XXX check if swapping is necessary on BE */
4190 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4191 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4192 }
4193 
4194 static void cik_compute_stop(struct radeon_device *rdev,
4195 			     struct radeon_ring *ring)
4196 {
4197 	u32 j, tmp;
4198 
4199 	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4200 	/* Disable wptr polling. */
4201 	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4202 	tmp &= ~WPTR_POLL_EN;
4203 	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4204 	/* Disable HQD. */
4205 	if (RREG32(CP_HQD_ACTIVE) & 1) {
4206 		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4207 		for (j = 0; j < rdev->usec_timeout; j++) {
4208 			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4209 				break;
4210 			udelay(1);
4211 		}
4212 		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4213 		WREG32(CP_HQD_PQ_RPTR, 0);
4214 		WREG32(CP_HQD_PQ_WPTR, 0);
4215 	}
4216 	cik_srbm_select(rdev, 0, 0, 0, 0);
4217 }
4218 
4219 /**
4220  * cik_cp_compute_enable - enable/disable the compute CP MEs
4221  *
4222  * @rdev: radeon_device pointer
4223  * @enable: enable or disable the MEs
4224  *
4225  * Halts or unhalts the compute MEs.
4226  */
4227 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4228 {
4229 	if (enable)
4230 		WREG32(CP_MEC_CNTL, 0);
4231 	else {
4232 		/*
4233 		 * To make hibernation reliable we need to clear compute ring
4234 		 * configuration before halting the compute ring.
4235 		 */
4236 		mutex_lock(&rdev->srbm_mutex);
4237 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4238 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4239 		mutex_unlock(&rdev->srbm_mutex);
4240 
4241 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4242 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4243 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4244 	}
4245 	udelay(50);
4246 }
4247 
4248 /**
4249  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4250  *
4251  * @rdev: radeon_device pointer
4252  *
4253  * Loads the compute MEC1&2 ucode.
4254  * Returns 0 for success, -EINVAL if the ucode is not available.
4255  */
4256 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4257 {
4258 	int i;
4259 
4260 	if (!rdev->mec_fw)
4261 		return -EINVAL;
4262 
4263 	cik_cp_compute_enable(rdev, false);
4264 
4265 	if (rdev->new_fw) {
4266 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4267 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4268 		const __le32 *fw_data;
4269 		u32 fw_size;
4270 
4271 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4272 
4273 		/* MEC1 */
4274 		fw_data = (const __le32 *)
4275 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4276 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4277 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4278 		for (i = 0; i < fw_size; i++)
4279 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4280 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4281 
4282 		/* MEC2 */
4283 		if (rdev->family == CHIP_KAVERI) {
4284 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4285 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4286 
4287 			fw_data = (const __le32 *)
4288 				(rdev->mec2_fw->data +
4289 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4290 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4291 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4292 			for (i = 0; i < fw_size; i++)
4293 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4294 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4295 		}
4296 	} else {
4297 		const __be32 *fw_data;
4298 
4299 		/* MEC1 */
4300 		fw_data = (const __be32 *)rdev->mec_fw->data;
4301 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4302 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4303 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4304 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4305 
4306 		if (rdev->family == CHIP_KAVERI) {
4307 			/* MEC2 */
4308 			fw_data = (const __be32 *)rdev->mec_fw->data;
4309 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4310 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4311 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4312 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4313 		}
4314 	}
4315 
4316 	return 0;
4317 }
4318 
4319 /**
4320  * cik_cp_compute_start - start the compute queues
4321  *
4322  * @rdev: radeon_device pointer
4323  *
4324  * Enable the compute queues.
4325  * Returns 0 for success, error for failure.
4326  */
4327 static int cik_cp_compute_start(struct radeon_device *rdev)
4328 {
4329 	cik_cp_compute_enable(rdev, true);
4330 
4331 	return 0;
4332 }
4333 
4334 /**
4335  * cik_cp_compute_fini - stop the compute queues
4336  *
4337  * @rdev: radeon_device pointer
4338  *
4339  * Stop the compute queues and tear down the driver queue
4340  * info.
4341  */
4342 static void cik_cp_compute_fini(struct radeon_device *rdev)
4343 {
4344 	int i, idx, r;
4345 
4346 	cik_cp_compute_enable(rdev, false);
4347 
4348 	for (i = 0; i < 2; i++) {
4349 		if (i == 0)
4350 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4351 		else
4352 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4353 
4354 		if (rdev->ring[idx].mqd_obj) {
4355 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4356 			if (unlikely(r != 0))
4357 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4358 
4359 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4360 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4361 
4362 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4363 			rdev->ring[idx].mqd_obj = NULL;
4364 		}
4365 	}
4366 }
4367 
4368 static void cik_mec_fini(struct radeon_device *rdev)
4369 {
4370 	int r;
4371 
4372 	if (rdev->mec.hpd_eop_obj) {
4373 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4374 		if (unlikely(r != 0))
4375 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4376 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4377 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4378 
4379 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4380 		rdev->mec.hpd_eop_obj = NULL;
4381 	}
4382 }
4383 
4384 #define MEC_HPD_SIZE 2048
4385 
4386 static int cik_mec_init(struct radeon_device *rdev)
4387 {
4388 	int r;
4389 	u32 *hpd;
4390 
4391 	/*
4392 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4393 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4394 	 */
4395 	if (rdev->family == CHIP_KAVERI)
4396 		rdev->mec.num_mec = 2;
4397 	else
4398 		rdev->mec.num_mec = 1;
4399 	rdev->mec.num_pipe = 4;
4400 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4401 
4402 	if (rdev->mec.hpd_eop_obj == NULL) {
4403 		r = radeon_bo_create(rdev,
4404 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4405 				     PAGE_SIZE, true,
4406 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4407 				     &rdev->mec.hpd_eop_obj);
4408 		if (r) {
4409 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4410 			return r;
4411 		}
4412 	}
4413 
4414 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4415 	if (unlikely(r != 0)) {
4416 		cik_mec_fini(rdev);
4417 		return r;
4418 	}
4419 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4420 			  &rdev->mec.hpd_eop_gpu_addr);
4421 	if (r) {
4422 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4423 		cik_mec_fini(rdev);
4424 		return r;
4425 	}
4426 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4427 	if (r) {
4428 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4429 		cik_mec_fini(rdev);
4430 		return r;
4431 	}
4432 
4433 	/* clear memory.  Not sure if this is required or not */
4434 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4435 
4436 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4437 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4438 
4439 	return 0;
4440 }
4441 
4442 struct hqd_registers
4443 {
4444 	u32 cp_mqd_base_addr;
4445 	u32 cp_mqd_base_addr_hi;
4446 	u32 cp_hqd_active;
4447 	u32 cp_hqd_vmid;
4448 	u32 cp_hqd_persistent_state;
4449 	u32 cp_hqd_pipe_priority;
4450 	u32 cp_hqd_queue_priority;
4451 	u32 cp_hqd_quantum;
4452 	u32 cp_hqd_pq_base;
4453 	u32 cp_hqd_pq_base_hi;
4454 	u32 cp_hqd_pq_rptr;
4455 	u32 cp_hqd_pq_rptr_report_addr;
4456 	u32 cp_hqd_pq_rptr_report_addr_hi;
4457 	u32 cp_hqd_pq_wptr_poll_addr;
4458 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4459 	u32 cp_hqd_pq_doorbell_control;
4460 	u32 cp_hqd_pq_wptr;
4461 	u32 cp_hqd_pq_control;
4462 	u32 cp_hqd_ib_base_addr;
4463 	u32 cp_hqd_ib_base_addr_hi;
4464 	u32 cp_hqd_ib_rptr;
4465 	u32 cp_hqd_ib_control;
4466 	u32 cp_hqd_iq_timer;
4467 	u32 cp_hqd_iq_rptr;
4468 	u32 cp_hqd_dequeue_request;
4469 	u32 cp_hqd_dma_offload;
4470 	u32 cp_hqd_sema_cmd;
4471 	u32 cp_hqd_msg_type;
4472 	u32 cp_hqd_atomic0_preop_lo;
4473 	u32 cp_hqd_atomic0_preop_hi;
4474 	u32 cp_hqd_atomic1_preop_lo;
4475 	u32 cp_hqd_atomic1_preop_hi;
4476 	u32 cp_hqd_hq_scheduler0;
4477 	u32 cp_hqd_hq_scheduler1;
4478 	u32 cp_mqd_control;
4479 };
4480 
4481 struct bonaire_mqd
4482 {
4483 	u32 header;
4484 	u32 dispatch_initiator;
4485 	u32 dimensions[3];
4486 	u32 start_idx[3];
4487 	u32 num_threads[3];
4488 	u32 pipeline_stat_enable;
4489 	u32 perf_counter_enable;
4490 	u32 pgm[2];
4491 	u32 tba[2];
4492 	u32 tma[2];
4493 	u32 pgm_rsrc[2];
4494 	u32 vmid;
4495 	u32 resource_limits;
4496 	u32 static_thread_mgmt01[2];
4497 	u32 tmp_ring_size;
4498 	u32 static_thread_mgmt23[2];
4499 	u32 restart[3];
4500 	u32 thread_trace_enable;
4501 	u32 reserved1;
4502 	u32 user_data[16];
4503 	u32 vgtcs_invoke_count[2];
4504 	struct hqd_registers queue_state;
4505 	u32 dequeue_cntr;
4506 	u32 interrupt_queue[64];
4507 };
4508 
4509 /**
4510  * cik_cp_compute_resume - setup the compute queue registers
4511  *
4512  * @rdev: radeon_device pointer
4513  *
4514  * Program the compute queues and test them to make sure they
4515  * are working.
4516  * Returns 0 for success, error for failure.
4517  */
4518 static int cik_cp_compute_resume(struct radeon_device *rdev)
4519 {
4520 	int r, i, j, idx;
4521 	u32 tmp;
4522 	bool use_doorbell = true;
4523 	u64 hqd_gpu_addr;
4524 	u64 mqd_gpu_addr;
4525 	u64 eop_gpu_addr;
4526 	u64 wb_gpu_addr;
4527 	u32 *buf;
4528 	struct bonaire_mqd *mqd;
4529 
4530 	r = cik_cp_compute_start(rdev);
4531 	if (r)
4532 		return r;
4533 
4534 	/* fix up chicken bits */
4535 	tmp = RREG32(CP_CPF_DEBUG);
4536 	tmp |= (1 << 23);
4537 	WREG32(CP_CPF_DEBUG, tmp);
4538 
4539 	/* init the pipes */
4540 	mutex_lock(&rdev->srbm_mutex);
4541 
4542 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); ++i) {
4543 		int me = (i < 4) ? 1 : 2;
4544 		int pipe = (i < 4) ? i : (i - 4);
4545 
4546 		cik_srbm_select(rdev, me, pipe, 0, 0);
4547 
4548 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4549 		/* write the EOP addr */
4550 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4551 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4552 
4553 		/* set the VMID assigned */
4554 		WREG32(CP_HPD_EOP_VMID, 0);
4555 
4556 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4557 		tmp = RREG32(CP_HPD_EOP_CONTROL);
4558 		tmp &= ~EOP_SIZE_MASK;
4559 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4560 		WREG32(CP_HPD_EOP_CONTROL, tmp);
4561 
4562 	}
4563 	cik_srbm_select(rdev, 0, 0, 0, 0);
4564 	mutex_unlock(&rdev->srbm_mutex);
4565 
4566 	/* init the queues.  Just two for now. */
4567 	for (i = 0; i < 2; i++) {
4568 		if (i == 0)
4569 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4570 		else
4571 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4572 
4573 		if (rdev->ring[idx].mqd_obj == NULL) {
4574 			r = radeon_bo_create(rdev,
4575 					     sizeof(struct bonaire_mqd),
4576 					     PAGE_SIZE, true,
4577 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4578 					     NULL, &rdev->ring[idx].mqd_obj);
4579 			if (r) {
4580 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4581 				return r;
4582 			}
4583 		}
4584 
4585 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4586 		if (unlikely(r != 0)) {
4587 			cik_cp_compute_fini(rdev);
4588 			return r;
4589 		}
4590 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4591 				  &mqd_gpu_addr);
4592 		if (r) {
4593 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4594 			cik_cp_compute_fini(rdev);
4595 			return r;
4596 		}
4597 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4598 		if (r) {
4599 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4600 			cik_cp_compute_fini(rdev);
4601 			return r;
4602 		}
4603 
4604 		/* init the mqd struct */
4605 		memset(buf, 0, sizeof(struct bonaire_mqd));
4606 
4607 		mqd = (struct bonaire_mqd *)buf;
4608 		mqd->header = 0xC0310800;
4609 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4610 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4611 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4612 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4613 
4614 		mutex_lock(&rdev->srbm_mutex);
4615 		cik_srbm_select(rdev, rdev->ring[idx].me,
4616 				rdev->ring[idx].pipe,
4617 				rdev->ring[idx].queue, 0);
4618 
4619 		/* disable wptr polling */
4620 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4621 		tmp &= ~WPTR_POLL_EN;
4622 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4623 
4624 		/* enable doorbell? */
4625 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4626 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4627 		if (use_doorbell)
4628 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4629 		else
4630 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4631 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4632 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4633 
4634 		/* disable the queue if it's active */
4635 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4636 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4637 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4638 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4639 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4640 			for (j = 0; j < rdev->usec_timeout; j++) {
4641 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4642 					break;
4643 				udelay(1);
4644 			}
4645 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4646 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4647 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4648 		}
4649 
4650 		/* set the pointer to the MQD */
4651 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4652 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4653 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4654 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4655 		/* set MQD vmid to 0 */
4656 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4657 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4658 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4659 
4660 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4661 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4662 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4663 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4664 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4665 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4666 
4667 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4668 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4669 		mqd->queue_state.cp_hqd_pq_control &=
4670 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4671 
4672 		mqd->queue_state.cp_hqd_pq_control |=
4673 			order_base_2(rdev->ring[idx].ring_size / 8);
4674 		mqd->queue_state.cp_hqd_pq_control |=
4675 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4676 #ifdef __BIG_ENDIAN
4677 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4678 #endif
4679 		mqd->queue_state.cp_hqd_pq_control &=
4680 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4681 		mqd->queue_state.cp_hqd_pq_control |=
4682 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4683 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4684 
4685 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4686 		if (i == 0)
4687 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4688 		else
4689 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4690 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4691 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4692 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4693 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4694 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4695 
4696 		/* set the wb address wether it's enabled or not */
4697 		if (i == 0)
4698 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4699 		else
4700 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4701 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4702 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4703 			upper_32_bits(wb_gpu_addr) & 0xffff;
4704 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4705 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4706 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4707 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4708 
4709 		/* enable the doorbell if requested */
4710 		if (use_doorbell) {
4711 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4712 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4713 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4714 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4715 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4716 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4717 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4718 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4719 
4720 		} else {
4721 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4722 		}
4723 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4724 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4725 
4726 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4727 		rdev->ring[idx].wptr = 0;
4728 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4729 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4730 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4731 
4732 		/* set the vmid for the queue */
4733 		mqd->queue_state.cp_hqd_vmid = 0;
4734 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4735 
4736 		/* activate the queue */
4737 		mqd->queue_state.cp_hqd_active = 1;
4738 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4739 
4740 		cik_srbm_select(rdev, 0, 0, 0, 0);
4741 		mutex_unlock(&rdev->srbm_mutex);
4742 
4743 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4744 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4745 
4746 		rdev->ring[idx].ready = true;
4747 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4748 		if (r)
4749 			rdev->ring[idx].ready = false;
4750 	}
4751 
4752 	return 0;
4753 }
4754 
4755 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4756 {
4757 	cik_cp_gfx_enable(rdev, enable);
4758 	cik_cp_compute_enable(rdev, enable);
4759 }
4760 
4761 static int cik_cp_load_microcode(struct radeon_device *rdev)
4762 {
4763 	int r;
4764 
4765 	r = cik_cp_gfx_load_microcode(rdev);
4766 	if (r)
4767 		return r;
4768 	r = cik_cp_compute_load_microcode(rdev);
4769 	if (r)
4770 		return r;
4771 
4772 	return 0;
4773 }
4774 
4775 static void cik_cp_fini(struct radeon_device *rdev)
4776 {
4777 	cik_cp_gfx_fini(rdev);
4778 	cik_cp_compute_fini(rdev);
4779 }
4780 
4781 static int cik_cp_resume(struct radeon_device *rdev)
4782 {
4783 	int r;
4784 
4785 	cik_enable_gui_idle_interrupt(rdev, false);
4786 
4787 	r = cik_cp_load_microcode(rdev);
4788 	if (r)
4789 		return r;
4790 
4791 	r = cik_cp_gfx_resume(rdev);
4792 	if (r)
4793 		return r;
4794 	r = cik_cp_compute_resume(rdev);
4795 	if (r)
4796 		return r;
4797 
4798 	cik_enable_gui_idle_interrupt(rdev, true);
4799 
4800 	return 0;
4801 }
4802 
4803 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4804 {
4805 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4806 		RREG32(GRBM_STATUS));
4807 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4808 		RREG32(GRBM_STATUS2));
4809 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4810 		RREG32(GRBM_STATUS_SE0));
4811 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4812 		RREG32(GRBM_STATUS_SE1));
4813 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4814 		RREG32(GRBM_STATUS_SE2));
4815 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4816 		RREG32(GRBM_STATUS_SE3));
4817 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4818 		RREG32(SRBM_STATUS));
4819 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4820 		RREG32(SRBM_STATUS2));
4821 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4822 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4823 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4824 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4825 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4826 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4827 		 RREG32(CP_STALLED_STAT1));
4828 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4829 		 RREG32(CP_STALLED_STAT2));
4830 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4831 		 RREG32(CP_STALLED_STAT3));
4832 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4833 		 RREG32(CP_CPF_BUSY_STAT));
4834 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4835 		 RREG32(CP_CPF_STALLED_STAT1));
4836 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4837 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4838 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4839 		 RREG32(CP_CPC_STALLED_STAT1));
4840 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4841 }
4842 
4843 /**
4844  * cik_gpu_check_soft_reset - check which blocks are busy
4845  *
4846  * @rdev: radeon_device pointer
4847  *
4848  * Check which blocks are busy and return the relevant reset
4849  * mask to be used by cik_gpu_soft_reset().
4850  * Returns a mask of the blocks to be reset.
4851  */
4852 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4853 {
4854 	u32 reset_mask = 0;
4855 	u32 tmp;
4856 
4857 	/* GRBM_STATUS */
4858 	tmp = RREG32(GRBM_STATUS);
4859 	if (tmp & (PA_BUSY | SC_BUSY |
4860 		   BCI_BUSY | SX_BUSY |
4861 		   TA_BUSY | VGT_BUSY |
4862 		   DB_BUSY | CB_BUSY |
4863 		   GDS_BUSY | SPI_BUSY |
4864 		   IA_BUSY | IA_BUSY_NO_DMA))
4865 		reset_mask |= RADEON_RESET_GFX;
4866 
4867 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4868 		reset_mask |= RADEON_RESET_CP;
4869 
4870 	/* GRBM_STATUS2 */
4871 	tmp = RREG32(GRBM_STATUS2);
4872 	if (tmp & RLC_BUSY)
4873 		reset_mask |= RADEON_RESET_RLC;
4874 
4875 	/* SDMA0_STATUS_REG */
4876 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4877 	if (!(tmp & SDMA_IDLE))
4878 		reset_mask |= RADEON_RESET_DMA;
4879 
4880 	/* SDMA1_STATUS_REG */
4881 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4882 	if (!(tmp & SDMA_IDLE))
4883 		reset_mask |= RADEON_RESET_DMA1;
4884 
4885 	/* SRBM_STATUS2 */
4886 	tmp = RREG32(SRBM_STATUS2);
4887 	if (tmp & SDMA_BUSY)
4888 		reset_mask |= RADEON_RESET_DMA;
4889 
4890 	if (tmp & SDMA1_BUSY)
4891 		reset_mask |= RADEON_RESET_DMA1;
4892 
4893 	/* SRBM_STATUS */
4894 	tmp = RREG32(SRBM_STATUS);
4895 
4896 	if (tmp & IH_BUSY)
4897 		reset_mask |= RADEON_RESET_IH;
4898 
4899 	if (tmp & SEM_BUSY)
4900 		reset_mask |= RADEON_RESET_SEM;
4901 
4902 	if (tmp & GRBM_RQ_PENDING)
4903 		reset_mask |= RADEON_RESET_GRBM;
4904 
4905 	if (tmp & VMC_BUSY)
4906 		reset_mask |= RADEON_RESET_VMC;
4907 
4908 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4909 		   MCC_BUSY | MCD_BUSY))
4910 		reset_mask |= RADEON_RESET_MC;
4911 
4912 	if (evergreen_is_display_hung(rdev))
4913 		reset_mask |= RADEON_RESET_DISPLAY;
4914 
4915 	/* Skip MC reset as it's mostly likely not hung, just busy */
4916 	if (reset_mask & RADEON_RESET_MC) {
4917 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4918 		reset_mask &= ~RADEON_RESET_MC;
4919 	}
4920 
4921 	return reset_mask;
4922 }
4923 
4924 /**
4925  * cik_gpu_soft_reset - soft reset GPU
4926  *
4927  * @rdev: radeon_device pointer
4928  * @reset_mask: mask of which blocks to reset
4929  *
4930  * Soft reset the blocks specified in @reset_mask.
4931  */
4932 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4933 {
4934 	struct evergreen_mc_save save;
4935 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4936 	u32 tmp;
4937 
4938 	if (reset_mask == 0)
4939 		return;
4940 
4941 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4942 
4943 	cik_print_gpu_status_regs(rdev);
4944 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4945 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4946 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4947 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4948 
4949 	/* disable CG/PG */
4950 	cik_fini_pg(rdev);
4951 	cik_fini_cg(rdev);
4952 
4953 	/* stop the rlc */
4954 	cik_rlc_stop(rdev);
4955 
4956 	/* Disable GFX parsing/prefetching */
4957 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4958 
4959 	/* Disable MEC parsing/prefetching */
4960 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4961 
4962 	if (reset_mask & RADEON_RESET_DMA) {
4963 		/* sdma0 */
4964 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4965 		tmp |= SDMA_HALT;
4966 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4967 	}
4968 	if (reset_mask & RADEON_RESET_DMA1) {
4969 		/* sdma1 */
4970 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4971 		tmp |= SDMA_HALT;
4972 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4973 	}
4974 
4975 	evergreen_mc_stop(rdev, &save);
4976 	if (evergreen_mc_wait_for_idle(rdev)) {
4977 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4978 	}
4979 
4980 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4981 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4982 
4983 	if (reset_mask & RADEON_RESET_CP) {
4984 		grbm_soft_reset |= SOFT_RESET_CP;
4985 
4986 		srbm_soft_reset |= SOFT_RESET_GRBM;
4987 	}
4988 
4989 	if (reset_mask & RADEON_RESET_DMA)
4990 		srbm_soft_reset |= SOFT_RESET_SDMA;
4991 
4992 	if (reset_mask & RADEON_RESET_DMA1)
4993 		srbm_soft_reset |= SOFT_RESET_SDMA1;
4994 
4995 	if (reset_mask & RADEON_RESET_DISPLAY)
4996 		srbm_soft_reset |= SOFT_RESET_DC;
4997 
4998 	if (reset_mask & RADEON_RESET_RLC)
4999 		grbm_soft_reset |= SOFT_RESET_RLC;
5000 
5001 	if (reset_mask & RADEON_RESET_SEM)
5002 		srbm_soft_reset |= SOFT_RESET_SEM;
5003 
5004 	if (reset_mask & RADEON_RESET_IH)
5005 		srbm_soft_reset |= SOFT_RESET_IH;
5006 
5007 	if (reset_mask & RADEON_RESET_GRBM)
5008 		srbm_soft_reset |= SOFT_RESET_GRBM;
5009 
5010 	if (reset_mask & RADEON_RESET_VMC)
5011 		srbm_soft_reset |= SOFT_RESET_VMC;
5012 
5013 	if (!(rdev->flags & RADEON_IS_IGP)) {
5014 		if (reset_mask & RADEON_RESET_MC)
5015 			srbm_soft_reset |= SOFT_RESET_MC;
5016 	}
5017 
5018 	if (grbm_soft_reset) {
5019 		tmp = RREG32(GRBM_SOFT_RESET);
5020 		tmp |= grbm_soft_reset;
5021 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5022 		WREG32(GRBM_SOFT_RESET, tmp);
5023 		tmp = RREG32(GRBM_SOFT_RESET);
5024 
5025 		udelay(50);
5026 
5027 		tmp &= ~grbm_soft_reset;
5028 		WREG32(GRBM_SOFT_RESET, tmp);
5029 		tmp = RREG32(GRBM_SOFT_RESET);
5030 	}
5031 
5032 	if (srbm_soft_reset) {
5033 		tmp = RREG32(SRBM_SOFT_RESET);
5034 		tmp |= srbm_soft_reset;
5035 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5036 		WREG32(SRBM_SOFT_RESET, tmp);
5037 		tmp = RREG32(SRBM_SOFT_RESET);
5038 
5039 		udelay(50);
5040 
5041 		tmp &= ~srbm_soft_reset;
5042 		WREG32(SRBM_SOFT_RESET, tmp);
5043 		tmp = RREG32(SRBM_SOFT_RESET);
5044 	}
5045 
5046 	/* Wait a little for things to settle down */
5047 	udelay(50);
5048 
5049 	evergreen_mc_resume(rdev, &save);
5050 	udelay(50);
5051 
5052 	cik_print_gpu_status_regs(rdev);
5053 }
5054 
5055 struct kv_reset_save_regs {
5056 	u32 gmcon_reng_execute;
5057 	u32 gmcon_misc;
5058 	u32 gmcon_misc3;
5059 };
5060 
5061 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5062 				   struct kv_reset_save_regs *save)
5063 {
5064 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5065 	save->gmcon_misc = RREG32(GMCON_MISC);
5066 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5067 
5068 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5069 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5070 						STCTRL_STUTTER_EN));
5071 }
5072 
5073 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5074 				      struct kv_reset_save_regs *save)
5075 {
5076 	int i;
5077 
5078 	WREG32(GMCON_PGFSM_WRITE, 0);
5079 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5080 
5081 	for (i = 0; i < 5; i++)
5082 		WREG32(GMCON_PGFSM_WRITE, 0);
5083 
5084 	WREG32(GMCON_PGFSM_WRITE, 0);
5085 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5086 
5087 	for (i = 0; i < 5; i++)
5088 		WREG32(GMCON_PGFSM_WRITE, 0);
5089 
5090 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5091 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5092 
5093 	for (i = 0; i < 5; i++)
5094 		WREG32(GMCON_PGFSM_WRITE, 0);
5095 
5096 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5097 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5098 
5099 	for (i = 0; i < 5; i++)
5100 		WREG32(GMCON_PGFSM_WRITE, 0);
5101 
5102 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5103 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5104 
5105 	for (i = 0; i < 5; i++)
5106 		WREG32(GMCON_PGFSM_WRITE, 0);
5107 
5108 	WREG32(GMCON_PGFSM_WRITE, 0);
5109 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5110 
5111 	for (i = 0; i < 5; i++)
5112 		WREG32(GMCON_PGFSM_WRITE, 0);
5113 
5114 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5115 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5116 
5117 	for (i = 0; i < 5; i++)
5118 		WREG32(GMCON_PGFSM_WRITE, 0);
5119 
5120 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5121 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5122 
5123 	for (i = 0; i < 5; i++)
5124 		WREG32(GMCON_PGFSM_WRITE, 0);
5125 
5126 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5127 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5128 
5129 	for (i = 0; i < 5; i++)
5130 		WREG32(GMCON_PGFSM_WRITE, 0);
5131 
5132 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5133 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5134 
5135 	for (i = 0; i < 5; i++)
5136 		WREG32(GMCON_PGFSM_WRITE, 0);
5137 
5138 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5139 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5140 
5141 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5142 	WREG32(GMCON_MISC, save->gmcon_misc);
5143 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5144 }
5145 
5146 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5147 {
5148 	struct evergreen_mc_save save;
5149 	struct kv_reset_save_regs kv_save = { 0 };
5150 	u32 tmp, i;
5151 
5152 	dev_info(rdev->dev, "GPU pci config reset\n");
5153 
5154 	/* disable dpm? */
5155 
5156 	/* disable cg/pg */
5157 	cik_fini_pg(rdev);
5158 	cik_fini_cg(rdev);
5159 
5160 	/* Disable GFX parsing/prefetching */
5161 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5162 
5163 	/* Disable MEC parsing/prefetching */
5164 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5165 
5166 	/* sdma0 */
5167 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5168 	tmp |= SDMA_HALT;
5169 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5170 	/* sdma1 */
5171 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5172 	tmp |= SDMA_HALT;
5173 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5174 	/* XXX other engines? */
5175 
5176 	/* halt the rlc, disable cp internal ints */
5177 	cik_rlc_stop(rdev);
5178 
5179 	udelay(50);
5180 
5181 	/* disable mem access */
5182 	evergreen_mc_stop(rdev, &save);
5183 	if (evergreen_mc_wait_for_idle(rdev)) {
5184 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5185 	}
5186 
5187 	if (rdev->flags & RADEON_IS_IGP)
5188 		kv_save_regs_for_reset(rdev, &kv_save);
5189 
5190 	/* disable BM */
5191 	pci_clear_master(rdev->pdev);
5192 	/* reset */
5193 	radeon_pci_config_reset(rdev);
5194 
5195 	udelay(100);
5196 
5197 	/* wait for asic to come out of reset */
5198 	for (i = 0; i < rdev->usec_timeout; i++) {
5199 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5200 			break;
5201 		udelay(1);
5202 	}
5203 
5204 	/* does asic init need to be run first??? */
5205 	if (rdev->flags & RADEON_IS_IGP)
5206 		kv_restore_regs_for_reset(rdev, &kv_save);
5207 }
5208 
5209 /**
5210  * cik_asic_reset - soft reset GPU
5211  *
5212  * @rdev: radeon_device pointer
5213  * @hard: force hard reset
5214  *
5215  * Look up which blocks are hung and attempt
5216  * to reset them.
5217  * Returns 0 for success.
5218  */
5219 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5220 {
5221 	u32 reset_mask;
5222 
5223 	if (hard) {
5224 		cik_gpu_pci_config_reset(rdev);
5225 		return 0;
5226 	}
5227 
5228 	reset_mask = cik_gpu_check_soft_reset(rdev);
5229 
5230 	if (reset_mask)
5231 		r600_set_bios_scratch_engine_hung(rdev, true);
5232 
5233 	/* try soft reset */
5234 	cik_gpu_soft_reset(rdev, reset_mask);
5235 
5236 	reset_mask = cik_gpu_check_soft_reset(rdev);
5237 
5238 	/* try pci config reset */
5239 	if (reset_mask && radeon_hard_reset)
5240 		cik_gpu_pci_config_reset(rdev);
5241 
5242 	reset_mask = cik_gpu_check_soft_reset(rdev);
5243 
5244 	if (!reset_mask)
5245 		r600_set_bios_scratch_engine_hung(rdev, false);
5246 
5247 	return 0;
5248 }
5249 
5250 /**
5251  * cik_gfx_is_lockup - check if the 3D engine is locked up
5252  *
5253  * @rdev: radeon_device pointer
5254  * @ring: radeon_ring structure holding ring information
5255  *
5256  * Check if the 3D engine is locked up (CIK).
5257  * Returns true if the engine is locked, false if not.
5258  */
5259 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5260 {
5261 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5262 
5263 	if (!(reset_mask & (RADEON_RESET_GFX |
5264 			    RADEON_RESET_COMPUTE |
5265 			    RADEON_RESET_CP))) {
5266 		radeon_ring_lockup_update(rdev, ring);
5267 		return false;
5268 	}
5269 	return radeon_ring_test_lockup(rdev, ring);
5270 }
5271 
5272 /* MC */
5273 /**
5274  * cik_mc_program - program the GPU memory controller
5275  *
5276  * @rdev: radeon_device pointer
5277  *
5278  * Set the location of vram, gart, and AGP in the GPU's
5279  * physical address space (CIK).
5280  */
5281 static void cik_mc_program(struct radeon_device *rdev)
5282 {
5283 	struct evergreen_mc_save save;
5284 	u32 tmp;
5285 	int i, j;
5286 
5287 	/* Initialize HDP */
5288 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5289 		WREG32((0x2c14 + j), 0x00000000);
5290 		WREG32((0x2c18 + j), 0x00000000);
5291 		WREG32((0x2c1c + j), 0x00000000);
5292 		WREG32((0x2c20 + j), 0x00000000);
5293 		WREG32((0x2c24 + j), 0x00000000);
5294 	}
5295 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5296 
5297 	evergreen_mc_stop(rdev, &save);
5298 	if (radeon_mc_wait_for_idle(rdev)) {
5299 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5300 	}
5301 	/* Lockout access through VGA aperture*/
5302 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5303 	/* Update configuration */
5304 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5305 	       rdev->mc.vram_start >> 12);
5306 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5307 	       rdev->mc.vram_end >> 12);
5308 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5309 	       rdev->vram_scratch.gpu_addr >> 12);
5310 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5311 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5312 	WREG32(MC_VM_FB_LOCATION, tmp);
5313 	/* XXX double check these! */
5314 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5315 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5316 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5317 	WREG32(MC_VM_AGP_BASE, 0);
5318 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5319 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5320 	if (radeon_mc_wait_for_idle(rdev)) {
5321 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5322 	}
5323 	evergreen_mc_resume(rdev, &save);
5324 	/* we need to own VRAM, so turn off the VGA renderer here
5325 	 * to stop it overwriting our objects */
5326 	rv515_vga_render_disable(rdev);
5327 }
5328 
5329 /**
5330  * cik_mc_init - initialize the memory controller driver params
5331  *
5332  * @rdev: radeon_device pointer
5333  *
5334  * Look up the amount of vram, vram width, and decide how to place
5335  * vram and gart within the GPU's physical address space (CIK).
5336  * Returns 0 for success.
5337  */
5338 static int cik_mc_init(struct radeon_device *rdev)
5339 {
5340 	u32 tmp;
5341 	int chansize, numchan;
5342 
5343 	/* Get VRAM informations */
5344 	rdev->mc.vram_is_ddr = true;
5345 	tmp = RREG32(MC_ARB_RAMCFG);
5346 	if (tmp & CHANSIZE_MASK) {
5347 		chansize = 64;
5348 	} else {
5349 		chansize = 32;
5350 	}
5351 	tmp = RREG32(MC_SHARED_CHMAP);
5352 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5353 	case 0:
5354 	default:
5355 		numchan = 1;
5356 		break;
5357 	case 1:
5358 		numchan = 2;
5359 		break;
5360 	case 2:
5361 		numchan = 4;
5362 		break;
5363 	case 3:
5364 		numchan = 8;
5365 		break;
5366 	case 4:
5367 		numchan = 3;
5368 		break;
5369 	case 5:
5370 		numchan = 6;
5371 		break;
5372 	case 6:
5373 		numchan = 10;
5374 		break;
5375 	case 7:
5376 		numchan = 12;
5377 		break;
5378 	case 8:
5379 		numchan = 16;
5380 		break;
5381 	}
5382 	rdev->mc.vram_width = numchan * chansize;
5383 	/* Could aper size report 0 ? */
5384 	rdev->mc.aper_base = rdev->fb_aper_offset;
5385 	rdev->mc.aper_size = rdev->fb_aper_size;
5386 	/* size in MB on si */
5387 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5388 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5389 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5390 	si_vram_gtt_location(rdev, &rdev->mc);
5391 	radeon_update_bandwidth_info(rdev);
5392 
5393 	return 0;
5394 }
5395 
5396 /*
5397  * GART
5398  * VMID 0 is the physical GPU addresses as used by the kernel.
5399  * VMIDs 1-15 are used for userspace clients and are handled
5400  * by the radeon vm/hsa code.
5401  */
5402 /**
5403  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5404  *
5405  * @rdev: radeon_device pointer
5406  *
5407  * Flush the TLB for the VMID 0 page table (CIK).
5408  */
5409 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5410 {
5411 	/* flush hdp cache */
5412 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5413 
5414 	/* bits 0-15 are the VM contexts0-15 */
5415 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5416 }
5417 
5418 /**
5419  * cik_pcie_gart_enable - gart enable
5420  *
5421  * @rdev: radeon_device pointer
5422  *
5423  * This sets up the TLBs, programs the page tables for VMID0,
5424  * sets up the hw for VMIDs 1-15 which are allocated on
5425  * demand, and sets up the global locations for the LDS, GDS,
5426  * and GPUVM for FSA64 clients (CIK).
5427  * Returns 0 for success, errors for failure.
5428  */
5429 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5430 {
5431 	int r, i;
5432 
5433 	if (rdev->gart.robj == NULL) {
5434 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5435 		return -EINVAL;
5436 	}
5437 	r = radeon_gart_table_vram_pin(rdev);
5438 	if (r)
5439 		return r;
5440 	/* Setup TLB control */
5441 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5442 	       (0xA << 7) |
5443 	       ENABLE_L1_TLB |
5444 	       ENABLE_L1_FRAGMENT_PROCESSING |
5445 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5446 	       ENABLE_ADVANCED_DRIVER_MODEL |
5447 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5448 	/* Setup L2 cache */
5449 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5450 	       ENABLE_L2_FRAGMENT_PROCESSING |
5451 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5452 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5453 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5454 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5455 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5456 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5457 	       BANK_SELECT(4) |
5458 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5459 	/* setup context0 */
5460 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5461 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5462 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5463 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5464 			(u32)(rdev->dummy_page.addr >> 12));
5465 	WREG32(VM_CONTEXT0_CNTL2, 0);
5466 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5467 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5468 
5469 	WREG32(0x15D4, 0);
5470 	WREG32(0x15D8, 0);
5471 	WREG32(0x15DC, 0);
5472 
5473 	/* restore context1-15 */
5474 	/* set vm size, must be a multiple of 4 */
5475 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5476 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5477 	for (i = 1; i < 16; i++) {
5478 		if (i < 8)
5479 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5480 			       rdev->vm_manager.saved_table_addr[i]);
5481 		else
5482 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5483 			       rdev->vm_manager.saved_table_addr[i]);
5484 	}
5485 
5486 	/* enable context1-15 */
5487 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5488 	       (u32)(rdev->dummy_page.addr >> 12));
5489 	WREG32(VM_CONTEXT1_CNTL2, 4);
5490 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5491 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5492 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5493 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5494 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5495 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5496 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5497 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5498 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5499 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5500 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5501 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5502 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5503 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5504 
5505 	if (rdev->family == CHIP_KAVERI) {
5506 		u32 tmp = RREG32(CHUB_CONTROL);
5507 		tmp &= ~BYPASS_VM;
5508 		WREG32(CHUB_CONTROL, tmp);
5509 	}
5510 
5511 	/* XXX SH_MEM regs */
5512 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5513 	mutex_lock(&rdev->srbm_mutex);
5514 	for (i = 0; i < 16; i++) {
5515 		cik_srbm_select(rdev, 0, 0, 0, i);
5516 		/* CP and shaders */
5517 		WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5518 		WREG32(SH_MEM_APE1_BASE, 1);
5519 		WREG32(SH_MEM_APE1_LIMIT, 0);
5520 		WREG32(SH_MEM_BASES, 0);
5521 		/* SDMA GFX */
5522 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5523 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5524 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5525 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5526 		/* XXX SDMA RLC - todo */
5527 	}
5528 	cik_srbm_select(rdev, 0, 0, 0, 0);
5529 	mutex_unlock(&rdev->srbm_mutex);
5530 
5531 	cik_pcie_gart_tlb_flush(rdev);
5532 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5533 		 (unsigned)(rdev->mc.gtt_size >> 20),
5534 		 (unsigned long long)rdev->gart.table_addr);
5535 	rdev->gart.ready = true;
5536 	return 0;
5537 }
5538 
5539 /**
5540  * cik_pcie_gart_disable - gart disable
5541  *
5542  * @rdev: radeon_device pointer
5543  *
5544  * This disables all VM page table (CIK).
5545  */
5546 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5547 {
5548 	unsigned i;
5549 
5550 	for (i = 1; i < 16; ++i) {
5551 		uint32_t reg;
5552 		if (i < 8)
5553 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5554 		else
5555 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5556 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5557 	}
5558 
5559 	/* Disable all tables */
5560 	WREG32(VM_CONTEXT0_CNTL, 0);
5561 	WREG32(VM_CONTEXT1_CNTL, 0);
5562 	/* Setup TLB control */
5563 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5564 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5565 	/* Setup L2 cache */
5566 	WREG32(VM_L2_CNTL,
5567 	       ENABLE_L2_FRAGMENT_PROCESSING |
5568 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5569 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5570 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5571 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5572 	WREG32(VM_L2_CNTL2, 0);
5573 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5574 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5575 	radeon_gart_table_vram_unpin(rdev);
5576 }
5577 
5578 /**
5579  * cik_pcie_gart_fini - vm fini callback
5580  *
5581  * @rdev: radeon_device pointer
5582  *
5583  * Tears down the driver GART/VM setup (CIK).
5584  */
5585 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5586 {
5587 	cik_pcie_gart_disable(rdev);
5588 	radeon_gart_table_vram_free(rdev);
5589 	radeon_gart_fini(rdev);
5590 }
5591 
5592 /* vm parser */
5593 /**
5594  * cik_ib_parse - vm ib_parse callback
5595  *
5596  * @rdev: radeon_device pointer
5597  * @ib: indirect buffer pointer
5598  *
5599  * CIK uses hw IB checking so this is a nop (CIK).
5600  */
5601 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5602 {
5603 	return 0;
5604 }
5605 
5606 /*
5607  * vm
5608  * VMID 0 is the physical GPU addresses as used by the kernel.
5609  * VMIDs 1-15 are used for userspace clients and are handled
5610  * by the radeon vm/hsa code.
5611  */
5612 /**
5613  * cik_vm_init - cik vm init callback
5614  *
5615  * @rdev: radeon_device pointer
5616  *
5617  * Inits cik specific vm parameters (number of VMs, base of vram for
5618  * VMIDs 1-15) (CIK).
5619  * Returns 0 for success.
5620  */
5621 int cik_vm_init(struct radeon_device *rdev)
5622 {
5623 	/*
5624 	 * number of VMs
5625 	 * VMID 0 is reserved for System
5626 	 * radeon graphics/compute will use VMIDs 1-15
5627 	 */
5628 	rdev->vm_manager.nvm = 16;
5629 	/* base offset of vram pages */
5630 	if (rdev->flags & RADEON_IS_IGP) {
5631 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5632 		tmp <<= 22;
5633 		rdev->vm_manager.vram_base_offset = tmp;
5634 	} else
5635 		rdev->vm_manager.vram_base_offset = 0;
5636 
5637 	return 0;
5638 }
5639 
5640 /**
5641  * cik_vm_fini - cik vm fini callback
5642  *
5643  * @rdev: radeon_device pointer
5644  *
5645  * Tear down any asic specific VM setup (CIK).
5646  */
5647 void cik_vm_fini(struct radeon_device *rdev)
5648 {
5649 }
5650 
5651 /**
5652  * cik_vm_decode_fault - print human readable fault info
5653  *
5654  * @rdev: radeon_device pointer
5655  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5656  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5657  *
5658  * Print human readable fault information (CIK).
5659  */
5660 static void cik_vm_decode_fault(struct radeon_device *rdev,
5661 				u32 status, u32 addr, u32 mc_client)
5662 {
5663 	u32 mc_id;
5664 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5665 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5666 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5667 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5668 
5669 	if (rdev->family == CHIP_HAWAII)
5670 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5671 	else
5672 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5673 
5674 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5675 	       protections, vmid, addr,
5676 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5677 	       block, mc_client, mc_id);
5678 }
5679 
5680 /**
5681  * cik_vm_flush - cik vm flush using the CP
5682  *
5683  * @rdev: radeon_device pointer
5684  *
5685  * Update the page table base and flush the VM TLB
5686  * using the CP (CIK).
5687  */
5688 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5689 		  unsigned vm_id, uint64_t pd_addr)
5690 {
5691 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5692 
5693 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5694 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5695 				 WRITE_DATA_DST_SEL(0)));
5696 	if (vm_id < 8) {
5697 		radeon_ring_write(ring,
5698 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5699 	} else {
5700 		radeon_ring_write(ring,
5701 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5702 	}
5703 	radeon_ring_write(ring, 0);
5704 	radeon_ring_write(ring, pd_addr >> 12);
5705 
5706 	/* update SH_MEM_* regs */
5707 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5708 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5709 				 WRITE_DATA_DST_SEL(0)));
5710 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5711 	radeon_ring_write(ring, 0);
5712 	radeon_ring_write(ring, VMID(vm_id));
5713 
5714 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5715 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5716 				 WRITE_DATA_DST_SEL(0)));
5717 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5718 	radeon_ring_write(ring, 0);
5719 
5720 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5721 	radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5722 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5723 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5724 
5725 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5726 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5727 				 WRITE_DATA_DST_SEL(0)));
5728 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5729 	radeon_ring_write(ring, 0);
5730 	radeon_ring_write(ring, VMID(0));
5731 
5732 	/* HDP flush */
5733 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5734 
5735 	/* bits 0-15 are the VM contexts0-15 */
5736 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5737 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5738 				 WRITE_DATA_DST_SEL(0)));
5739 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5740 	radeon_ring_write(ring, 0);
5741 	radeon_ring_write(ring, 1 << vm_id);
5742 
5743 	/* wait for the invalidate to complete */
5744 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5745 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5746 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5747 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5748 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5749 	radeon_ring_write(ring, 0);
5750 	radeon_ring_write(ring, 0); /* ref */
5751 	radeon_ring_write(ring, 0); /* mask */
5752 	radeon_ring_write(ring, 0x20); /* poll interval */
5753 
5754 	/* compute doesn't have PFP */
5755 	if (usepfp) {
5756 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5757 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5758 		radeon_ring_write(ring, 0x0);
5759 	}
5760 }
5761 
5762 /*
5763  * RLC
5764  * The RLC is a multi-purpose microengine that handles a
5765  * variety of functions, the most important of which is
5766  * the interrupt controller.
5767  */
5768 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5769 					  bool enable)
5770 {
5771 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5772 
5773 	if (enable)
5774 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5775 	else
5776 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5777 	WREG32(CP_INT_CNTL_RING0, tmp);
5778 }
5779 
5780 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5781 {
5782 	u32 tmp;
5783 
5784 	tmp = RREG32(RLC_LB_CNTL);
5785 	if (enable)
5786 		tmp |= LOAD_BALANCE_ENABLE;
5787 	else
5788 		tmp &= ~LOAD_BALANCE_ENABLE;
5789 	WREG32(RLC_LB_CNTL, tmp);
5790 }
5791 
5792 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5793 {
5794 	u32 i, j, k;
5795 	u32 mask;
5796 
5797 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5798 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5799 			cik_select_se_sh(rdev, i, j);
5800 			for (k = 0; k < rdev->usec_timeout; k++) {
5801 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5802 					break;
5803 				udelay(1);
5804 			}
5805 		}
5806 	}
5807 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5808 
5809 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5810 	for (k = 0; k < rdev->usec_timeout; k++) {
5811 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5812 			break;
5813 		udelay(1);
5814 	}
5815 }
5816 
5817 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5818 {
5819 	u32 tmp;
5820 
5821 	tmp = RREG32(RLC_CNTL);
5822 	if (tmp != rlc)
5823 		WREG32(RLC_CNTL, rlc);
5824 }
5825 
5826 static u32 cik_halt_rlc(struct radeon_device *rdev)
5827 {
5828 	u32 data, orig;
5829 
5830 	orig = data = RREG32(RLC_CNTL);
5831 
5832 	if (data & RLC_ENABLE) {
5833 		u32 i;
5834 
5835 		data &= ~RLC_ENABLE;
5836 		WREG32(RLC_CNTL, data);
5837 
5838 		for (i = 0; i < rdev->usec_timeout; i++) {
5839 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5840 				break;
5841 			udelay(1);
5842 		}
5843 
5844 		cik_wait_for_rlc_serdes(rdev);
5845 	}
5846 
5847 	return orig;
5848 }
5849 
5850 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5851 {
5852 	u32 tmp, i, mask;
5853 
5854 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5855 	WREG32(RLC_GPR_REG2, tmp);
5856 
5857 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5858 	for (i = 0; i < rdev->usec_timeout; i++) {
5859 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5860 			break;
5861 		udelay(1);
5862 	}
5863 
5864 	for (i = 0; i < rdev->usec_timeout; i++) {
5865 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5866 			break;
5867 		udelay(1);
5868 	}
5869 }
5870 
5871 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5872 {
5873 	u32 tmp;
5874 
5875 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5876 	WREG32(RLC_GPR_REG2, tmp);
5877 }
5878 
5879 /**
5880  * cik_rlc_stop - stop the RLC ME
5881  *
5882  * @rdev: radeon_device pointer
5883  *
5884  * Halt the RLC ME (MicroEngine) (CIK).
5885  */
5886 static void cik_rlc_stop(struct radeon_device *rdev)
5887 {
5888 	WREG32(RLC_CNTL, 0);
5889 
5890 	cik_enable_gui_idle_interrupt(rdev, false);
5891 
5892 	cik_wait_for_rlc_serdes(rdev);
5893 }
5894 
5895 /**
5896  * cik_rlc_start - start the RLC ME
5897  *
5898  * @rdev: radeon_device pointer
5899  *
5900  * Unhalt the RLC ME (MicroEngine) (CIK).
5901  */
5902 static void cik_rlc_start(struct radeon_device *rdev)
5903 {
5904 	WREG32(RLC_CNTL, RLC_ENABLE);
5905 
5906 	cik_enable_gui_idle_interrupt(rdev, true);
5907 
5908 	udelay(50);
5909 }
5910 
5911 /**
5912  * cik_rlc_resume - setup the RLC hw
5913  *
5914  * @rdev: radeon_device pointer
5915  *
5916  * Initialize the RLC registers, load the ucode,
5917  * and start the RLC (CIK).
5918  * Returns 0 for success, -EINVAL if the ucode is not available.
5919  */
5920 static int cik_rlc_resume(struct radeon_device *rdev)
5921 {
5922 	u32 i, size, tmp;
5923 
5924 	if (!rdev->rlc_fw)
5925 		return -EINVAL;
5926 
5927 	cik_rlc_stop(rdev);
5928 
5929 	/* disable CG */
5930 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5931 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5932 
5933 	si_rlc_reset(rdev);
5934 
5935 	cik_init_pg(rdev);
5936 
5937 	cik_init_cg(rdev);
5938 
5939 	WREG32(RLC_LB_CNTR_INIT, 0);
5940 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5941 
5942 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5943 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5944 	WREG32(RLC_LB_PARAMS, 0x00600408);
5945 	WREG32(RLC_LB_CNTL, 0x80000004);
5946 
5947 	WREG32(RLC_MC_CNTL, 0);
5948 	WREG32(RLC_UCODE_CNTL, 0);
5949 
5950 	if (rdev->new_fw) {
5951 		const struct rlc_firmware_header_v1_0 *hdr =
5952 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5953 		const __le32 *fw_data = (const __le32 *)
5954 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5955 
5956 		radeon_ucode_print_rlc_hdr(&hdr->header);
5957 
5958 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5959 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5960 		for (i = 0; i < size; i++)
5961 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5962 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5963 	} else {
5964 		const __be32 *fw_data;
5965 
5966 		switch (rdev->family) {
5967 		case CHIP_BONAIRE:
5968 		case CHIP_HAWAII:
5969 		default:
5970 			size = BONAIRE_RLC_UCODE_SIZE;
5971 			break;
5972 		case CHIP_KAVERI:
5973 			size = KV_RLC_UCODE_SIZE;
5974 			break;
5975 		case CHIP_KABINI:
5976 			size = KB_RLC_UCODE_SIZE;
5977 			break;
5978 		case CHIP_MULLINS:
5979 			size = ML_RLC_UCODE_SIZE;
5980 			break;
5981 		}
5982 
5983 		fw_data = (const __be32 *)rdev->rlc_fw->data;
5984 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5985 		for (i = 0; i < size; i++)
5986 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5987 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5988 	}
5989 
5990 	/* XXX - find out what chips support lbpw */
5991 	cik_enable_lbpw(rdev, false);
5992 
5993 	if (rdev->family == CHIP_BONAIRE)
5994 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5995 
5996 	cik_rlc_start(rdev);
5997 
5998 	return 0;
5999 }
6000 
6001 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6002 {
6003 	u32 data, orig, tmp, tmp2;
6004 
6005 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6006 
6007 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6008 		cik_enable_gui_idle_interrupt(rdev, true);
6009 
6010 		tmp = cik_halt_rlc(rdev);
6011 
6012 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6013 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6014 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6015 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6016 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6017 
6018 		cik_update_rlc(rdev, tmp);
6019 
6020 		data |= CGCG_EN | CGLS_EN;
6021 	} else {
6022 		cik_enable_gui_idle_interrupt(rdev, false);
6023 
6024 		RREG32(CB_CGTT_SCLK_CTRL);
6025 		RREG32(CB_CGTT_SCLK_CTRL);
6026 		RREG32(CB_CGTT_SCLK_CTRL);
6027 		RREG32(CB_CGTT_SCLK_CTRL);
6028 
6029 		data &= ~(CGCG_EN | CGLS_EN);
6030 	}
6031 
6032 	if (orig != data)
6033 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6034 
6035 }
6036 
6037 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6038 {
6039 	u32 data, orig, tmp = 0;
6040 
6041 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6042 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6043 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6044 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6045 				data |= CP_MEM_LS_EN;
6046 				if (orig != data)
6047 					WREG32(CP_MEM_SLP_CNTL, data);
6048 			}
6049 		}
6050 
6051 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6052 		data |= 0x00000001;
6053 		data &= 0xfffffffd;
6054 		if (orig != data)
6055 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6056 
6057 		tmp = cik_halt_rlc(rdev);
6058 
6059 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6060 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6061 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6062 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6063 		WREG32(RLC_SERDES_WR_CTRL, data);
6064 
6065 		cik_update_rlc(rdev, tmp);
6066 
6067 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6068 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6069 			data &= ~SM_MODE_MASK;
6070 			data |= SM_MODE(0x2);
6071 			data |= SM_MODE_ENABLE;
6072 			data &= ~CGTS_OVERRIDE;
6073 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6074 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6075 				data &= ~CGTS_LS_OVERRIDE;
6076 			data &= ~ON_MONITOR_ADD_MASK;
6077 			data |= ON_MONITOR_ADD_EN;
6078 			data |= ON_MONITOR_ADD(0x96);
6079 			if (orig != data)
6080 				WREG32(CGTS_SM_CTRL_REG, data);
6081 		}
6082 	} else {
6083 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6084 		data |= 0x00000003;
6085 		if (orig != data)
6086 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6087 
6088 		data = RREG32(RLC_MEM_SLP_CNTL);
6089 		if (data & RLC_MEM_LS_EN) {
6090 			data &= ~RLC_MEM_LS_EN;
6091 			WREG32(RLC_MEM_SLP_CNTL, data);
6092 		}
6093 
6094 		data = RREG32(CP_MEM_SLP_CNTL);
6095 		if (data & CP_MEM_LS_EN) {
6096 			data &= ~CP_MEM_LS_EN;
6097 			WREG32(CP_MEM_SLP_CNTL, data);
6098 		}
6099 
6100 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6101 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6102 		if (orig != data)
6103 			WREG32(CGTS_SM_CTRL_REG, data);
6104 
6105 		tmp = cik_halt_rlc(rdev);
6106 
6107 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6108 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6109 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6110 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6111 		WREG32(RLC_SERDES_WR_CTRL, data);
6112 
6113 		cik_update_rlc(rdev, tmp);
6114 	}
6115 }
6116 
6117 static const u32 mc_cg_registers[] =
6118 {
6119 	MC_HUB_MISC_HUB_CG,
6120 	MC_HUB_MISC_SIP_CG,
6121 	MC_HUB_MISC_VM_CG,
6122 	MC_XPB_CLK_GAT,
6123 	ATC_MISC_CG,
6124 	MC_CITF_MISC_WR_CG,
6125 	MC_CITF_MISC_RD_CG,
6126 	MC_CITF_MISC_VM_CG,
6127 	VM_L2_CG,
6128 };
6129 
6130 static void cik_enable_mc_ls(struct radeon_device *rdev,
6131 			     bool enable)
6132 {
6133 	int i;
6134 	u32 orig, data;
6135 
6136 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6137 		orig = data = RREG32(mc_cg_registers[i]);
6138 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6139 			data |= MC_LS_ENABLE;
6140 		else
6141 			data &= ~MC_LS_ENABLE;
6142 		if (data != orig)
6143 			WREG32(mc_cg_registers[i], data);
6144 	}
6145 }
6146 
6147 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6148 			       bool enable)
6149 {
6150 	int i;
6151 	u32 orig, data;
6152 
6153 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6154 		orig = data = RREG32(mc_cg_registers[i]);
6155 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6156 			data |= MC_CG_ENABLE;
6157 		else
6158 			data &= ~MC_CG_ENABLE;
6159 		if (data != orig)
6160 			WREG32(mc_cg_registers[i], data);
6161 	}
6162 }
6163 
6164 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6165 				 bool enable)
6166 {
6167 	u32 orig, data;
6168 
6169 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6170 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6171 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6172 	} else {
6173 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6174 		data |= 0xff000000;
6175 		if (data != orig)
6176 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6177 
6178 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6179 		data |= 0xff000000;
6180 		if (data != orig)
6181 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6182 	}
6183 }
6184 
6185 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6186 				 bool enable)
6187 {
6188 	u32 orig, data;
6189 
6190 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6191 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6192 		data |= 0x100;
6193 		if (orig != data)
6194 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6195 
6196 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6197 		data |= 0x100;
6198 		if (orig != data)
6199 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6200 	} else {
6201 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6202 		data &= ~0x100;
6203 		if (orig != data)
6204 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6205 
6206 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6207 		data &= ~0x100;
6208 		if (orig != data)
6209 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6210 	}
6211 }
6212 
6213 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6214 				bool enable)
6215 {
6216 	u32 orig, data;
6217 
6218 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6219 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6220 		data = 0xfff;
6221 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6222 
6223 		orig = data = RREG32(UVD_CGC_CTRL);
6224 		data |= DCM;
6225 		if (orig != data)
6226 			WREG32(UVD_CGC_CTRL, data);
6227 	} else {
6228 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6229 		data &= ~0xfff;
6230 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6231 
6232 		orig = data = RREG32(UVD_CGC_CTRL);
6233 		data &= ~DCM;
6234 		if (orig != data)
6235 			WREG32(UVD_CGC_CTRL, data);
6236 	}
6237 }
6238 
6239 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6240 			       bool enable)
6241 {
6242 	u32 orig, data;
6243 
6244 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6245 
6246 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6247 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6248 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6249 	else
6250 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6251 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6252 
6253 	if (orig != data)
6254 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6255 }
6256 
6257 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6258 				bool enable)
6259 {
6260 	u32 orig, data;
6261 
6262 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6263 
6264 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6265 		data &= ~CLOCK_GATING_DIS;
6266 	else
6267 		data |= CLOCK_GATING_DIS;
6268 
6269 	if (orig != data)
6270 		WREG32(HDP_HOST_PATH_CNTL, data);
6271 }
6272 
6273 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6274 			      bool enable)
6275 {
6276 	u32 orig, data;
6277 
6278 	orig = data = RREG32(HDP_MEM_POWER_LS);
6279 
6280 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6281 		data |= HDP_LS_ENABLE;
6282 	else
6283 		data &= ~HDP_LS_ENABLE;
6284 
6285 	if (orig != data)
6286 		WREG32(HDP_MEM_POWER_LS, data);
6287 }
6288 
6289 void cik_update_cg(struct radeon_device *rdev,
6290 		   u32 block, bool enable)
6291 {
6292 
6293 	if (block & RADEON_CG_BLOCK_GFX) {
6294 		cik_enable_gui_idle_interrupt(rdev, false);
6295 		/* order matters! */
6296 		if (enable) {
6297 			cik_enable_mgcg(rdev, true);
6298 			cik_enable_cgcg(rdev, true);
6299 		} else {
6300 			cik_enable_cgcg(rdev, false);
6301 			cik_enable_mgcg(rdev, false);
6302 		}
6303 		cik_enable_gui_idle_interrupt(rdev, true);
6304 	}
6305 
6306 	if (block & RADEON_CG_BLOCK_MC) {
6307 		if (!(rdev->flags & RADEON_IS_IGP)) {
6308 			cik_enable_mc_mgcg(rdev, enable);
6309 			cik_enable_mc_ls(rdev, enable);
6310 		}
6311 	}
6312 
6313 	if (block & RADEON_CG_BLOCK_SDMA) {
6314 		cik_enable_sdma_mgcg(rdev, enable);
6315 		cik_enable_sdma_mgls(rdev, enable);
6316 	}
6317 
6318 	if (block & RADEON_CG_BLOCK_BIF) {
6319 		cik_enable_bif_mgls(rdev, enable);
6320 	}
6321 
6322 	if (block & RADEON_CG_BLOCK_UVD) {
6323 		if (rdev->has_uvd)
6324 			cik_enable_uvd_mgcg(rdev, enable);
6325 	}
6326 
6327 	if (block & RADEON_CG_BLOCK_HDP) {
6328 		cik_enable_hdp_mgcg(rdev, enable);
6329 		cik_enable_hdp_ls(rdev, enable);
6330 	}
6331 
6332 	if (block & RADEON_CG_BLOCK_VCE) {
6333 		vce_v2_0_enable_mgcg(rdev, enable);
6334 	}
6335 }
6336 
6337 static void cik_init_cg(struct radeon_device *rdev)
6338 {
6339 
6340 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6341 
6342 	if (rdev->has_uvd)
6343 		si_init_uvd_internal_cg(rdev);
6344 
6345 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6346 			     RADEON_CG_BLOCK_SDMA |
6347 			     RADEON_CG_BLOCK_BIF |
6348 			     RADEON_CG_BLOCK_UVD |
6349 			     RADEON_CG_BLOCK_HDP), true);
6350 }
6351 
6352 static void cik_fini_cg(struct radeon_device *rdev)
6353 {
6354 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6355 			     RADEON_CG_BLOCK_SDMA |
6356 			     RADEON_CG_BLOCK_BIF |
6357 			     RADEON_CG_BLOCK_UVD |
6358 			     RADEON_CG_BLOCK_HDP), false);
6359 
6360 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6361 }
6362 
6363 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6364 					  bool enable)
6365 {
6366 	u32 data, orig;
6367 
6368 	orig = data = RREG32(RLC_PG_CNTL);
6369 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6370 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6371 	else
6372 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6373 	if (orig != data)
6374 		WREG32(RLC_PG_CNTL, data);
6375 }
6376 
6377 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6378 					  bool enable)
6379 {
6380 	u32 data, orig;
6381 
6382 	orig = data = RREG32(RLC_PG_CNTL);
6383 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6384 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6385 	else
6386 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6387 	if (orig != data)
6388 		WREG32(RLC_PG_CNTL, data);
6389 }
6390 
6391 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6392 {
6393 	u32 data, orig;
6394 
6395 	orig = data = RREG32(RLC_PG_CNTL);
6396 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6397 		data &= ~DISABLE_CP_PG;
6398 	else
6399 		data |= DISABLE_CP_PG;
6400 	if (orig != data)
6401 		WREG32(RLC_PG_CNTL, data);
6402 }
6403 
6404 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6405 {
6406 	u32 data, orig;
6407 
6408 	orig = data = RREG32(RLC_PG_CNTL);
6409 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6410 		data &= ~DISABLE_GDS_PG;
6411 	else
6412 		data |= DISABLE_GDS_PG;
6413 	if (orig != data)
6414 		WREG32(RLC_PG_CNTL, data);
6415 }
6416 
6417 #define CP_ME_TABLE_SIZE    96
6418 #define CP_ME_TABLE_OFFSET  2048
6419 #define CP_MEC_TABLE_OFFSET 4096
6420 
6421 void cik_init_cp_pg_table(struct radeon_device *rdev)
6422 {
6423 	volatile u32 *dst_ptr;
6424 	int me, i, max_me = 4;
6425 	u32 bo_offset = 0;
6426 	u32 table_offset, table_size;
6427 
6428 	if (rdev->family == CHIP_KAVERI)
6429 		max_me = 5;
6430 
6431 	if (rdev->rlc.cp_table_ptr == NULL)
6432 		return;
6433 
6434 	/* write the cp table buffer */
6435 	dst_ptr = rdev->rlc.cp_table_ptr;
6436 	for (me = 0; me < max_me; me++) {
6437 		if (rdev->new_fw) {
6438 			const __le32 *fw_data;
6439 			const struct gfx_firmware_header_v1_0 *hdr;
6440 
6441 			if (me == 0) {
6442 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6443 				fw_data = (const __le32 *)
6444 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6445 				table_offset = le32_to_cpu(hdr->jt_offset);
6446 				table_size = le32_to_cpu(hdr->jt_size);
6447 			} else if (me == 1) {
6448 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6449 				fw_data = (const __le32 *)
6450 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6451 				table_offset = le32_to_cpu(hdr->jt_offset);
6452 				table_size = le32_to_cpu(hdr->jt_size);
6453 			} else if (me == 2) {
6454 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6455 				fw_data = (const __le32 *)
6456 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6457 				table_offset = le32_to_cpu(hdr->jt_offset);
6458 				table_size = le32_to_cpu(hdr->jt_size);
6459 			} else if (me == 3) {
6460 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6461 				fw_data = (const __le32 *)
6462 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6463 				table_offset = le32_to_cpu(hdr->jt_offset);
6464 				table_size = le32_to_cpu(hdr->jt_size);
6465 			} else {
6466 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6467 				fw_data = (const __le32 *)
6468 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6469 				table_offset = le32_to_cpu(hdr->jt_offset);
6470 				table_size = le32_to_cpu(hdr->jt_size);
6471 			}
6472 
6473 			for (i = 0; i < table_size; i ++) {
6474 				dst_ptr[bo_offset + i] =
6475 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6476 			}
6477 			bo_offset += table_size;
6478 		} else {
6479 			const __be32 *fw_data;
6480 			table_size = CP_ME_TABLE_SIZE;
6481 
6482 			if (me == 0) {
6483 				fw_data = (const __be32 *)rdev->ce_fw->data;
6484 				table_offset = CP_ME_TABLE_OFFSET;
6485 			} else if (me == 1) {
6486 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6487 				table_offset = CP_ME_TABLE_OFFSET;
6488 			} else if (me == 2) {
6489 				fw_data = (const __be32 *)rdev->me_fw->data;
6490 				table_offset = CP_ME_TABLE_OFFSET;
6491 			} else {
6492 				fw_data = (const __be32 *)rdev->mec_fw->data;
6493 				table_offset = CP_MEC_TABLE_OFFSET;
6494 			}
6495 
6496 			for (i = 0; i < table_size; i ++) {
6497 				dst_ptr[bo_offset + i] =
6498 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6499 			}
6500 			bo_offset += table_size;
6501 		}
6502 	}
6503 }
6504 
6505 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6506 				bool enable)
6507 {
6508 	u32 data, orig;
6509 
6510 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6511 		orig = data = RREG32(RLC_PG_CNTL);
6512 		data |= GFX_PG_ENABLE;
6513 		if (orig != data)
6514 			WREG32(RLC_PG_CNTL, data);
6515 
6516 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6517 		data |= AUTO_PG_EN;
6518 		if (orig != data)
6519 			WREG32(RLC_AUTO_PG_CTRL, data);
6520 	} else {
6521 		orig = data = RREG32(RLC_PG_CNTL);
6522 		data &= ~GFX_PG_ENABLE;
6523 		if (orig != data)
6524 			WREG32(RLC_PG_CNTL, data);
6525 
6526 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6527 		data &= ~AUTO_PG_EN;
6528 		if (orig != data)
6529 			WREG32(RLC_AUTO_PG_CTRL, data);
6530 
6531 		data = RREG32(DB_RENDER_CONTROL);
6532 	}
6533 }
6534 
6535 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6536 {
6537 	u32 mask = 0, tmp, tmp1;
6538 	int i;
6539 
6540 	cik_select_se_sh(rdev, se, sh);
6541 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6542 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6543 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6544 
6545 	tmp &= 0xffff0000;
6546 
6547 	tmp |= tmp1;
6548 	tmp >>= 16;
6549 
6550 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6551 		mask <<= 1;
6552 		mask |= 1;
6553 	}
6554 
6555 	return (~tmp) & mask;
6556 }
6557 
6558 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6559 {
6560 	u32 i, j, k, active_cu_number = 0;
6561 	u32 mask, counter, cu_bitmap;
6562 	u32 tmp = 0;
6563 
6564 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6565 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6566 			mask = 1;
6567 			cu_bitmap = 0;
6568 			counter = 0;
6569 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6570 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6571 					if (counter < 2)
6572 						cu_bitmap |= mask;
6573 					counter ++;
6574 				}
6575 				mask <<= 1;
6576 			}
6577 
6578 			active_cu_number += counter;
6579 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6580 		}
6581 	}
6582 
6583 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6584 
6585 	tmp = RREG32(RLC_MAX_PG_CU);
6586 	tmp &= ~MAX_PU_CU_MASK;
6587 	tmp |= MAX_PU_CU(active_cu_number);
6588 	WREG32(RLC_MAX_PG_CU, tmp);
6589 }
6590 
6591 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6592 				       bool enable)
6593 {
6594 	u32 data, orig;
6595 
6596 	orig = data = RREG32(RLC_PG_CNTL);
6597 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6598 		data |= STATIC_PER_CU_PG_ENABLE;
6599 	else
6600 		data &= ~STATIC_PER_CU_PG_ENABLE;
6601 	if (orig != data)
6602 		WREG32(RLC_PG_CNTL, data);
6603 }
6604 
6605 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6606 					bool enable)
6607 {
6608 	u32 data, orig;
6609 
6610 	orig = data = RREG32(RLC_PG_CNTL);
6611 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6612 		data |= DYN_PER_CU_PG_ENABLE;
6613 	else
6614 		data &= ~DYN_PER_CU_PG_ENABLE;
6615 	if (orig != data)
6616 		WREG32(RLC_PG_CNTL, data);
6617 }
6618 
6619 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6620 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6621 
6622 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6623 {
6624 	u32 data, orig;
6625 	u32 i;
6626 
6627 	if (rdev->rlc.cs_data) {
6628 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6629 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6630 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6631 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6632 	} else {
6633 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6634 		for (i = 0; i < 3; i++)
6635 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6636 	}
6637 	if (rdev->rlc.reg_list) {
6638 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6639 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6640 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6641 	}
6642 
6643 	orig = data = RREG32(RLC_PG_CNTL);
6644 	data |= GFX_PG_SRC;
6645 	if (orig != data)
6646 		WREG32(RLC_PG_CNTL, data);
6647 
6648 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6649 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6650 
6651 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6652 	data &= ~IDLE_POLL_COUNT_MASK;
6653 	data |= IDLE_POLL_COUNT(0x60);
6654 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6655 
6656 	data = 0x10101010;
6657 	WREG32(RLC_PG_DELAY, data);
6658 
6659 	data = RREG32(RLC_PG_DELAY_2);
6660 	data &= ~0xff;
6661 	data |= 0x3;
6662 	WREG32(RLC_PG_DELAY_2, data);
6663 
6664 	data = RREG32(RLC_AUTO_PG_CTRL);
6665 	data &= ~GRBM_REG_SGIT_MASK;
6666 	data |= GRBM_REG_SGIT(0x700);
6667 	WREG32(RLC_AUTO_PG_CTRL, data);
6668 
6669 }
6670 
6671 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6672 {
6673 	cik_enable_gfx_cgpg(rdev, enable);
6674 	cik_enable_gfx_static_mgpg(rdev, enable);
6675 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6676 }
6677 
6678 u32 cik_get_csb_size(struct radeon_device *rdev)
6679 {
6680 	u32 count = 0;
6681 	const struct cs_section_def *sect = NULL;
6682 	const struct cs_extent_def *ext = NULL;
6683 
6684 	if (rdev->rlc.cs_data == NULL)
6685 		return 0;
6686 
6687 	/* begin clear state */
6688 	count += 2;
6689 	/* context control state */
6690 	count += 3;
6691 
6692 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6693 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6694 			if (sect->id == SECT_CONTEXT)
6695 				count += 2 + ext->reg_count;
6696 			else
6697 				return 0;
6698 		}
6699 	}
6700 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6701 	count += 4;
6702 	/* end clear state */
6703 	count += 2;
6704 	/* clear state */
6705 	count += 2;
6706 
6707 	return count;
6708 }
6709 
6710 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6711 {
6712 	u32 count = 0, i;
6713 	const struct cs_section_def *sect = NULL;
6714 	const struct cs_extent_def *ext = NULL;
6715 
6716 	if (rdev->rlc.cs_data == NULL)
6717 		return;
6718 	if (buffer == NULL)
6719 		return;
6720 
6721 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6722 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6723 
6724 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6725 	buffer[count++] = cpu_to_le32(0x80000000);
6726 	buffer[count++] = cpu_to_le32(0x80000000);
6727 
6728 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6729 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6730 			if (sect->id == SECT_CONTEXT) {
6731 				buffer[count++] =
6732 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6733 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6734 				for (i = 0; i < ext->reg_count; i++)
6735 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6736 			} else {
6737 				return;
6738 			}
6739 		}
6740 	}
6741 
6742 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6743 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6744 	switch (rdev->family) {
6745 	case CHIP_BONAIRE:
6746 		buffer[count++] = cpu_to_le32(0x16000012);
6747 		buffer[count++] = cpu_to_le32(0x00000000);
6748 		break;
6749 	case CHIP_KAVERI:
6750 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6751 		buffer[count++] = cpu_to_le32(0x00000000);
6752 		break;
6753 	case CHIP_KABINI:
6754 	case CHIP_MULLINS:
6755 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6756 		buffer[count++] = cpu_to_le32(0x00000000);
6757 		break;
6758 	case CHIP_HAWAII:
6759 		buffer[count++] = cpu_to_le32(0x3a00161a);
6760 		buffer[count++] = cpu_to_le32(0x0000002e);
6761 		break;
6762 	default:
6763 		buffer[count++] = cpu_to_le32(0x00000000);
6764 		buffer[count++] = cpu_to_le32(0x00000000);
6765 		break;
6766 	}
6767 
6768 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6769 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6770 
6771 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6772 	buffer[count++] = cpu_to_le32(0);
6773 }
6774 
6775 static void cik_init_pg(struct radeon_device *rdev)
6776 {
6777 	if (rdev->pg_flags) {
6778 		cik_enable_sck_slowdown_on_pu(rdev, true);
6779 		cik_enable_sck_slowdown_on_pd(rdev, true);
6780 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6781 			cik_init_gfx_cgpg(rdev);
6782 			cik_enable_cp_pg(rdev, true);
6783 			cik_enable_gds_pg(rdev, true);
6784 		}
6785 		cik_init_ao_cu_mask(rdev);
6786 		cik_update_gfx_pg(rdev, true);
6787 	}
6788 }
6789 
6790 static void cik_fini_pg(struct radeon_device *rdev)
6791 {
6792 	if (rdev->pg_flags) {
6793 		cik_update_gfx_pg(rdev, false);
6794 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6795 			cik_enable_cp_pg(rdev, false);
6796 			cik_enable_gds_pg(rdev, false);
6797 		}
6798 	}
6799 }
6800 
6801 /*
6802  * Interrupts
6803  * Starting with r6xx, interrupts are handled via a ring buffer.
6804  * Ring buffers are areas of GPU accessible memory that the GPU
6805  * writes interrupt vectors into and the host reads vectors out of.
6806  * There is a rptr (read pointer) that determines where the
6807  * host is currently reading, and a wptr (write pointer)
6808  * which determines where the GPU has written.  When the
6809  * pointers are equal, the ring is idle.  When the GPU
6810  * writes vectors to the ring buffer, it increments the
6811  * wptr.  When there is an interrupt, the host then starts
6812  * fetching commands and processing them until the pointers are
6813  * equal again at which point it updates the rptr.
6814  */
6815 
6816 /**
6817  * cik_enable_interrupts - Enable the interrupt ring buffer
6818  *
6819  * @rdev: radeon_device pointer
6820  *
6821  * Enable the interrupt ring buffer (CIK).
6822  */
6823 static void cik_enable_interrupts(struct radeon_device *rdev)
6824 {
6825 	u32 ih_cntl = RREG32(IH_CNTL);
6826 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6827 
6828 	ih_cntl |= ENABLE_INTR;
6829 	ih_rb_cntl |= IH_RB_ENABLE;
6830 	WREG32(IH_CNTL, ih_cntl);
6831 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6832 	rdev->ih.enabled = true;
6833 }
6834 
6835 /**
6836  * cik_disable_interrupts - Disable the interrupt ring buffer
6837  *
6838  * @rdev: radeon_device pointer
6839  *
6840  * Disable the interrupt ring buffer (CIK).
6841  */
6842 static void cik_disable_interrupts(struct radeon_device *rdev)
6843 {
6844 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6845 	u32 ih_cntl = RREG32(IH_CNTL);
6846 
6847 	ih_rb_cntl &= ~IH_RB_ENABLE;
6848 	ih_cntl &= ~ENABLE_INTR;
6849 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6850 	WREG32(IH_CNTL, ih_cntl);
6851 	/* set rptr, wptr to 0 */
6852 	WREG32(IH_RB_RPTR, 0);
6853 	WREG32(IH_RB_WPTR, 0);
6854 	rdev->ih.enabled = false;
6855 	rdev->ih.rptr = 0;
6856 }
6857 
6858 /**
6859  * cik_disable_interrupt_state - Disable all interrupt sources
6860  *
6861  * @rdev: radeon_device pointer
6862  *
6863  * Clear all interrupt enable bits used by the driver (CIK).
6864  */
6865 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6866 {
6867 	u32 tmp;
6868 
6869 	/* gfx ring */
6870 	tmp = RREG32(CP_INT_CNTL_RING0) &
6871 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6872 	WREG32(CP_INT_CNTL_RING0, tmp);
6873 	/* sdma */
6874 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6875 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6876 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6877 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6878 	/* compute queues */
6879 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6880 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6881 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6882 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6883 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6884 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6885 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6886 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6887 	/* grbm */
6888 	WREG32(GRBM_INT_CNTL, 0);
6889 	/* SRBM */
6890 	WREG32(SRBM_INT_CNTL, 0);
6891 	/* vline/vblank, etc. */
6892 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6893 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6894 	if (rdev->num_crtc >= 4) {
6895 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6896 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6897 	}
6898 	if (rdev->num_crtc >= 6) {
6899 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6900 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6901 	}
6902 	/* pflip */
6903 	if (rdev->num_crtc >= 2) {
6904 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6905 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6906 	}
6907 	if (rdev->num_crtc >= 4) {
6908 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6909 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6910 	}
6911 	if (rdev->num_crtc >= 6) {
6912 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6913 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6914 	}
6915 
6916 	/* dac hotplug */
6917 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6918 
6919 	/* digital hotplug */
6920 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6921 	WREG32(DC_HPD1_INT_CONTROL, tmp);
6922 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6923 	WREG32(DC_HPD2_INT_CONTROL, tmp);
6924 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6925 	WREG32(DC_HPD3_INT_CONTROL, tmp);
6926 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6927 	WREG32(DC_HPD4_INT_CONTROL, tmp);
6928 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6929 	WREG32(DC_HPD5_INT_CONTROL, tmp);
6930 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6931 	WREG32(DC_HPD6_INT_CONTROL, tmp);
6932 
6933 }
6934 
6935 /**
6936  * cik_irq_init - init and enable the interrupt ring
6937  *
6938  * @rdev: radeon_device pointer
6939  *
6940  * Allocate a ring buffer for the interrupt controller,
6941  * enable the RLC, disable interrupts, enable the IH
6942  * ring buffer and enable it (CIK).
6943  * Called at device load and reume.
6944  * Returns 0 for success, errors for failure.
6945  */
6946 static int cik_irq_init(struct radeon_device *rdev)
6947 {
6948 	int ret = 0;
6949 	int rb_bufsz;
6950 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6951 
6952 	/* allocate ring */
6953 	ret = r600_ih_ring_alloc(rdev);
6954 	if (ret)
6955 		return ret;
6956 
6957 	/* disable irqs */
6958 	cik_disable_interrupts(rdev);
6959 
6960 	/* init rlc */
6961 	ret = cik_rlc_resume(rdev);
6962 	if (ret) {
6963 		r600_ih_ring_fini(rdev);
6964 		return ret;
6965 	}
6966 
6967 	/* setup interrupt control */
6968 	/* set dummy read address to dummy page address */
6969 	WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
6970 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6971 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6972 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6973 	 */
6974 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6975 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6976 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6977 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6978 
6979 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6980 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6981 
6982 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6983 		      IH_WPTR_OVERFLOW_CLEAR |
6984 		      (rb_bufsz << 1));
6985 
6986 	if (rdev->wb.enabled)
6987 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6988 
6989 	/* set the writeback address whether it's enabled or not */
6990 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6991 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6992 
6993 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6994 
6995 	/* set rptr, wptr to 0 */
6996 	WREG32(IH_RB_RPTR, 0);
6997 	WREG32(IH_RB_WPTR, 0);
6998 
6999 	/* Default settings for IH_CNTL (disabled at first) */
7000 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7001 	/* RPTR_REARM only works if msi's are enabled */
7002 	if (rdev->msi_enabled)
7003 		ih_cntl |= RPTR_REARM;
7004 	WREG32(IH_CNTL, ih_cntl);
7005 
7006 	/* force the active interrupt state to all disabled */
7007 	cik_disable_interrupt_state(rdev);
7008 
7009 	pci_set_master(rdev->pdev);
7010 
7011 	/* enable irqs */
7012 	cik_enable_interrupts(rdev);
7013 
7014 	return ret;
7015 }
7016 
7017 /**
7018  * cik_irq_set - enable/disable interrupt sources
7019  *
7020  * @rdev: radeon_device pointer
7021  *
7022  * Enable interrupt sources on the GPU (vblanks, hpd,
7023  * etc.) (CIK).
7024  * Returns 0 for success, errors for failure.
7025  */
7026 int cik_irq_set(struct radeon_device *rdev)
7027 {
7028 	u32 cp_int_cntl;
7029 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7030 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7031 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7032 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7033 	u32 grbm_int_cntl = 0;
7034 	u32 dma_cntl, dma_cntl1;
7035 
7036 	if (!rdev->irq.installed) {
7037 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7038 		return -EINVAL;
7039 	}
7040 	/* don't enable anything if the ih is disabled */
7041 	if (!rdev->ih.enabled) {
7042 		cik_disable_interrupts(rdev);
7043 		/* force the active interrupt state to all disabled */
7044 		cik_disable_interrupt_state(rdev);
7045 		return 0;
7046 	}
7047 
7048 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7049 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7050 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7051 
7052 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7053 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7054 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7055 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7056 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7057 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7058 
7059 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7060 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7061 
7062 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7063 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7064 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7065 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7066 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7067 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7068 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7069 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7070 
7071 	/* enable CP interrupts on all rings */
7072 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7073 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7074 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7075 	}
7076 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7077 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7078 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7079 		if (ring->me == 1) {
7080 			switch (ring->pipe) {
7081 			case 0:
7082 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7083 				break;
7084 			case 1:
7085 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7086 				break;
7087 			case 2:
7088 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7089 				break;
7090 			case 3:
7091 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7092 				break;
7093 			default:
7094 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7095 				break;
7096 			}
7097 		} else if (ring->me == 2) {
7098 			switch (ring->pipe) {
7099 			case 0:
7100 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7101 				break;
7102 			case 1:
7103 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7104 				break;
7105 			case 2:
7106 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7107 				break;
7108 			case 3:
7109 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7110 				break;
7111 			default:
7112 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7113 				break;
7114 			}
7115 		} else {
7116 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7117 		}
7118 	}
7119 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7120 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7121 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7122 		if (ring->me == 1) {
7123 			switch (ring->pipe) {
7124 			case 0:
7125 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7126 				break;
7127 			case 1:
7128 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7129 				break;
7130 			case 2:
7131 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7132 				break;
7133 			case 3:
7134 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7135 				break;
7136 			default:
7137 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7138 				break;
7139 			}
7140 		} else if (ring->me == 2) {
7141 			switch (ring->pipe) {
7142 			case 0:
7143 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7144 				break;
7145 			case 1:
7146 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7147 				break;
7148 			case 2:
7149 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7150 				break;
7151 			case 3:
7152 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7153 				break;
7154 			default:
7155 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7156 				break;
7157 			}
7158 		} else {
7159 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7160 		}
7161 	}
7162 
7163 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7164 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7165 		dma_cntl |= TRAP_ENABLE;
7166 	}
7167 
7168 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7169 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7170 		dma_cntl1 |= TRAP_ENABLE;
7171 	}
7172 
7173 	if (rdev->irq.crtc_vblank_int[0] ||
7174 	    atomic_read(&rdev->irq.pflip[0])) {
7175 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7176 		crtc1 |= VBLANK_INTERRUPT_MASK;
7177 	}
7178 	if (rdev->irq.crtc_vblank_int[1] ||
7179 	    atomic_read(&rdev->irq.pflip[1])) {
7180 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7181 		crtc2 |= VBLANK_INTERRUPT_MASK;
7182 	}
7183 	if (rdev->irq.crtc_vblank_int[2] ||
7184 	    atomic_read(&rdev->irq.pflip[2])) {
7185 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7186 		crtc3 |= VBLANK_INTERRUPT_MASK;
7187 	}
7188 	if (rdev->irq.crtc_vblank_int[3] ||
7189 	    atomic_read(&rdev->irq.pflip[3])) {
7190 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7191 		crtc4 |= VBLANK_INTERRUPT_MASK;
7192 	}
7193 	if (rdev->irq.crtc_vblank_int[4] ||
7194 	    atomic_read(&rdev->irq.pflip[4])) {
7195 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7196 		crtc5 |= VBLANK_INTERRUPT_MASK;
7197 	}
7198 	if (rdev->irq.crtc_vblank_int[5] ||
7199 	    atomic_read(&rdev->irq.pflip[5])) {
7200 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7201 		crtc6 |= VBLANK_INTERRUPT_MASK;
7202 	}
7203 	if (rdev->irq.hpd[0]) {
7204 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7205 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7206 	}
7207 	if (rdev->irq.hpd[1]) {
7208 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7209 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7210 	}
7211 	if (rdev->irq.hpd[2]) {
7212 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7213 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7214 	}
7215 	if (rdev->irq.hpd[3]) {
7216 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7217 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7218 	}
7219 	if (rdev->irq.hpd[4]) {
7220 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7221 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7222 	}
7223 	if (rdev->irq.hpd[5]) {
7224 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7225 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7226 	}
7227 
7228 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7229 
7230 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7231 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7232 
7233 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7234 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7235 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7236 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7237 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7238 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7239 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7240 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7241 
7242 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7243 
7244 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7245 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7246 	if (rdev->num_crtc >= 4) {
7247 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7248 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7249 	}
7250 	if (rdev->num_crtc >= 6) {
7251 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7252 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7253 	}
7254 
7255 	if (rdev->num_crtc >= 2) {
7256 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7257 		       GRPH_PFLIP_INT_MASK);
7258 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7259 		       GRPH_PFLIP_INT_MASK);
7260 	}
7261 	if (rdev->num_crtc >= 4) {
7262 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7263 		       GRPH_PFLIP_INT_MASK);
7264 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7265 		       GRPH_PFLIP_INT_MASK);
7266 	}
7267 	if (rdev->num_crtc >= 6) {
7268 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7269 		       GRPH_PFLIP_INT_MASK);
7270 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7271 		       GRPH_PFLIP_INT_MASK);
7272 	}
7273 
7274 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7275 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7276 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7277 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7278 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7279 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7280 
7281 	/* posting read */
7282 	RREG32(SRBM_STATUS);
7283 
7284 	return 0;
7285 }
7286 
7287 /**
7288  * cik_irq_ack - ack interrupt sources
7289  *
7290  * @rdev: radeon_device pointer
7291  *
7292  * Ack interrupt sources on the GPU (vblanks, hpd,
7293  * etc.) (CIK).  Certain interrupts sources are sw
7294  * generated and do not require an explicit ack.
7295  */
7296 static inline void cik_irq_ack(struct radeon_device *rdev)
7297 {
7298 	u32 tmp;
7299 
7300 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7301 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7302 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7303 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7304 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7305 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7306 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7307 
7308 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7309 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7310 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7311 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7312 	if (rdev->num_crtc >= 4) {
7313 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7314 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7315 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7316 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7317 	}
7318 	if (rdev->num_crtc >= 6) {
7319 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7320 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7321 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7322 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7323 	}
7324 
7325 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7326 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7327 		       GRPH_PFLIP_INT_CLEAR);
7328 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7329 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7330 		       GRPH_PFLIP_INT_CLEAR);
7331 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7332 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7333 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7334 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7335 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7336 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7337 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7338 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7339 
7340 	if (rdev->num_crtc >= 4) {
7341 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7342 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7343 			       GRPH_PFLIP_INT_CLEAR);
7344 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7345 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7346 			       GRPH_PFLIP_INT_CLEAR);
7347 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7348 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7349 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7350 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7351 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7352 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7353 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7354 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7355 	}
7356 
7357 	if (rdev->num_crtc >= 6) {
7358 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7359 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7360 			       GRPH_PFLIP_INT_CLEAR);
7361 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7362 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7363 			       GRPH_PFLIP_INT_CLEAR);
7364 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7365 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7366 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7367 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7368 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7369 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7370 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7371 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7372 	}
7373 
7374 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7375 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7376 		tmp |= DC_HPDx_INT_ACK;
7377 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7378 	}
7379 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7380 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7381 		tmp |= DC_HPDx_INT_ACK;
7382 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7383 	}
7384 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7385 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7386 		tmp |= DC_HPDx_INT_ACK;
7387 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7388 	}
7389 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7390 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7391 		tmp |= DC_HPDx_INT_ACK;
7392 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7393 	}
7394 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7395 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7396 		tmp |= DC_HPDx_INT_ACK;
7397 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7398 	}
7399 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7400 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7401 		tmp |= DC_HPDx_INT_ACK;
7402 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7403 	}
7404 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7405 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7406 		tmp |= DC_HPDx_RX_INT_ACK;
7407 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7408 	}
7409 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7410 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7411 		tmp |= DC_HPDx_RX_INT_ACK;
7412 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7413 	}
7414 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7415 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7416 		tmp |= DC_HPDx_RX_INT_ACK;
7417 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7418 	}
7419 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7420 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7421 		tmp |= DC_HPDx_RX_INT_ACK;
7422 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7423 	}
7424 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7425 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7426 		tmp |= DC_HPDx_RX_INT_ACK;
7427 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7428 	}
7429 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7430 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7431 		tmp |= DC_HPDx_RX_INT_ACK;
7432 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7433 	}
7434 }
7435 
7436 /**
7437  * cik_irq_disable - disable interrupts
7438  *
7439  * @rdev: radeon_device pointer
7440  *
7441  * Disable interrupts on the hw (CIK).
7442  */
7443 static void cik_irq_disable(struct radeon_device *rdev)
7444 {
7445 	cik_disable_interrupts(rdev);
7446 	/* Wait and acknowledge irq */
7447 	mdelay(1);
7448 	cik_irq_ack(rdev);
7449 	cik_disable_interrupt_state(rdev);
7450 }
7451 
7452 /**
7453  * cik_irq_disable - disable interrupts for suspend
7454  *
7455  * @rdev: radeon_device pointer
7456  *
7457  * Disable interrupts and stop the RLC (CIK).
7458  * Used for suspend.
7459  */
7460 static void cik_irq_suspend(struct radeon_device *rdev)
7461 {
7462 	cik_irq_disable(rdev);
7463 	cik_rlc_stop(rdev);
7464 }
7465 
7466 /**
7467  * cik_irq_fini - tear down interrupt support
7468  *
7469  * @rdev: radeon_device pointer
7470  *
7471  * Disable interrupts on the hw and free the IH ring
7472  * buffer (CIK).
7473  * Used for driver unload.
7474  */
7475 static void cik_irq_fini(struct radeon_device *rdev)
7476 {
7477 	cik_irq_suspend(rdev);
7478 	r600_ih_ring_fini(rdev);
7479 }
7480 
7481 /**
7482  * cik_get_ih_wptr - get the IH ring buffer wptr
7483  *
7484  * @rdev: radeon_device pointer
7485  *
7486  * Get the IH ring buffer wptr from either the register
7487  * or the writeback memory buffer (CIK).  Also check for
7488  * ring buffer overflow and deal with it.
7489  * Used by cik_irq_process().
7490  * Returns the value of the wptr.
7491  */
7492 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7493 {
7494 	u32 wptr, tmp;
7495 
7496 	if (rdev->wb.enabled)
7497 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7498 	else
7499 		wptr = RREG32(IH_RB_WPTR);
7500 
7501 	if (wptr & RB_OVERFLOW) {
7502 		wptr &= ~RB_OVERFLOW;
7503 		/* When a ring buffer overflow happen start parsing interrupt
7504 		 * from the last not overwritten vector (wptr + 16). Hopefully
7505 		 * this should allow us to catchup.
7506 		 */
7507 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7508 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7509 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7510 		tmp = RREG32(IH_RB_CNTL);
7511 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7512 		WREG32(IH_RB_CNTL, tmp);
7513 	}
7514 	return (wptr & rdev->ih.ptr_mask);
7515 }
7516 
7517 /*        CIK IV Ring
7518  * Each IV ring entry is 128 bits:
7519  * [7:0]    - interrupt source id
7520  * [31:8]   - reserved
7521  * [59:32]  - interrupt source data
7522  * [63:60]  - reserved
7523  * [71:64]  - RINGID
7524  *            CP:
7525  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7526  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7527  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7528  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7529  *            PIPE_ID - ME0 0=3D
7530  *                    - ME1&2 compute dispatcher (4 pipes each)
7531  *            SDMA:
7532  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7533  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7534  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7535  * [79:72]  - VMID
7536  * [95:80]  - PASID
7537  * [127:96] - reserved
7538  */
7539 /**
7540  * cik_irq_process - interrupt handler
7541  *
7542  * @rdev: radeon_device pointer
7543  *
7544  * Interrupt hander (CIK).  Walk the IH ring,
7545  * ack interrupts and schedule work to handle
7546  * interrupt events.
7547  * Returns irq process return code.
7548  */
7549 int cik_irq_process(struct radeon_device *rdev)
7550 {
7551 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7552 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7553 	u32 wptr;
7554 	u32 rptr;
7555 	u32 src_id, src_data, ring_id;
7556 	u8 me_id, pipe_id, queue_id;
7557 	u32 ring_index;
7558 	bool queue_hotplug = false;
7559 	bool queue_dp = false;
7560 	bool queue_reset = false;
7561 	u32 addr, status, mc_client;
7562 	bool queue_thermal = false;
7563 
7564 	if (!rdev->ih.enabled || rdev->shutdown)
7565 		return IRQ_NONE;
7566 
7567 	wptr = cik_get_ih_wptr(rdev);
7568 
7569 	if (wptr == rdev->ih.rptr)
7570 		return IRQ_NONE;
7571 restart_ih:
7572 	/* is somebody else already processing irqs? */
7573 	if (atomic_xchg(&rdev->ih.lock, 1))
7574 		return IRQ_NONE;
7575 
7576 	rptr = rdev->ih.rptr;
7577 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7578 
7579 	/* Order reading of wptr vs. reading of IH ring data */
7580 	rmb();
7581 
7582 	/* display interrupts */
7583 	cik_irq_ack(rdev);
7584 
7585 	while (rptr != wptr) {
7586 		/* wptr/rptr are in bytes! */
7587 		ring_index = rptr / 4;
7588 
7589 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7590 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7591 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7592 
7593 		switch (src_id) {
7594 		case 1: /* D1 vblank/vline */
7595 			switch (src_data) {
7596 			case 0: /* D1 vblank */
7597 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7598 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7599 
7600 				if (rdev->irq.crtc_vblank_int[0]) {
7601 					drm_handle_vblank(rdev->ddev, 0);
7602 					rdev->pm.vblank_sync = true;
7603 					wake_up(&rdev->irq.vblank_queue);
7604 				}
7605 				if (atomic_read(&rdev->irq.pflip[0]))
7606 					radeon_crtc_handle_vblank(rdev, 0);
7607 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7608 				DRM_DEBUG("IH: D1 vblank\n");
7609 
7610 				break;
7611 			case 1: /* D1 vline */
7612 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7613 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7614 
7615 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7616 				DRM_DEBUG("IH: D1 vline\n");
7617 
7618 				break;
7619 			default:
7620 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7621 				break;
7622 			}
7623 			break;
7624 		case 2: /* D2 vblank/vline */
7625 			switch (src_data) {
7626 			case 0: /* D2 vblank */
7627 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7628 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7629 
7630 				if (rdev->irq.crtc_vblank_int[1]) {
7631 					drm_handle_vblank(rdev->ddev, 1);
7632 					rdev->pm.vblank_sync = true;
7633 					wake_up(&rdev->irq.vblank_queue);
7634 				}
7635 				if (atomic_read(&rdev->irq.pflip[1]))
7636 					radeon_crtc_handle_vblank(rdev, 1);
7637 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7638 				DRM_DEBUG("IH: D2 vblank\n");
7639 
7640 				break;
7641 			case 1: /* D2 vline */
7642 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7643 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7644 
7645 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7646 				DRM_DEBUG("IH: D2 vline\n");
7647 
7648 				break;
7649 			default:
7650 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7651 				break;
7652 			}
7653 			break;
7654 		case 3: /* D3 vblank/vline */
7655 			switch (src_data) {
7656 			case 0: /* D3 vblank */
7657 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7658 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7659 
7660 				if (rdev->irq.crtc_vblank_int[2]) {
7661 					drm_handle_vblank(rdev->ddev, 2);
7662 					rdev->pm.vblank_sync = true;
7663 					wake_up(&rdev->irq.vblank_queue);
7664 				}
7665 				if (atomic_read(&rdev->irq.pflip[2]))
7666 					radeon_crtc_handle_vblank(rdev, 2);
7667 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7668 				DRM_DEBUG("IH: D3 vblank\n");
7669 
7670 				break;
7671 			case 1: /* D3 vline */
7672 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7673 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7674 
7675 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7676 				DRM_DEBUG("IH: D3 vline\n");
7677 
7678 				break;
7679 			default:
7680 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7681 				break;
7682 			}
7683 			break;
7684 		case 4: /* D4 vblank/vline */
7685 			switch (src_data) {
7686 			case 0: /* D4 vblank */
7687 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7688 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7689 
7690 				if (rdev->irq.crtc_vblank_int[3]) {
7691 					drm_handle_vblank(rdev->ddev, 3);
7692 					rdev->pm.vblank_sync = true;
7693 					wake_up(&rdev->irq.vblank_queue);
7694 				}
7695 				if (atomic_read(&rdev->irq.pflip[3]))
7696 					radeon_crtc_handle_vblank(rdev, 3);
7697 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7698 				DRM_DEBUG("IH: D4 vblank\n");
7699 
7700 				break;
7701 			case 1: /* D4 vline */
7702 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7703 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7704 
7705 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7706 				DRM_DEBUG("IH: D4 vline\n");
7707 
7708 				break;
7709 			default:
7710 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7711 				break;
7712 			}
7713 			break;
7714 		case 5: /* D5 vblank/vline */
7715 			switch (src_data) {
7716 			case 0: /* D5 vblank */
7717 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7718 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7719 
7720 				if (rdev->irq.crtc_vblank_int[4]) {
7721 					drm_handle_vblank(rdev->ddev, 4);
7722 					rdev->pm.vblank_sync = true;
7723 					wake_up(&rdev->irq.vblank_queue);
7724 				}
7725 				if (atomic_read(&rdev->irq.pflip[4]))
7726 					radeon_crtc_handle_vblank(rdev, 4);
7727 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7728 				DRM_DEBUG("IH: D5 vblank\n");
7729 
7730 				break;
7731 			case 1: /* D5 vline */
7732 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7733 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7734 
7735 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7736 				DRM_DEBUG("IH: D5 vline\n");
7737 
7738 				break;
7739 			default:
7740 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7741 				break;
7742 			}
7743 			break;
7744 		case 6: /* D6 vblank/vline */
7745 			switch (src_data) {
7746 			case 0: /* D6 vblank */
7747 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7748 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7749 
7750 				if (rdev->irq.crtc_vblank_int[5]) {
7751 					drm_handle_vblank(rdev->ddev, 5);
7752 					rdev->pm.vblank_sync = true;
7753 					wake_up(&rdev->irq.vblank_queue);
7754 				}
7755 				if (atomic_read(&rdev->irq.pflip[5]))
7756 					radeon_crtc_handle_vblank(rdev, 5);
7757 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7758 				DRM_DEBUG("IH: D6 vblank\n");
7759 
7760 				break;
7761 			case 1: /* D6 vline */
7762 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7763 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7764 
7765 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7766 				DRM_DEBUG("IH: D6 vline\n");
7767 
7768 				break;
7769 			default:
7770 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7771 				break;
7772 			}
7773 			break;
7774 		case 8: /* D1 page flip */
7775 		case 10: /* D2 page flip */
7776 		case 12: /* D3 page flip */
7777 		case 14: /* D4 page flip */
7778 		case 16: /* D5 page flip */
7779 		case 18: /* D6 page flip */
7780 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7781 			if (radeon_use_pflipirq > 0)
7782 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7783 			break;
7784 		case 42: /* HPD hotplug */
7785 			switch (src_data) {
7786 			case 0:
7787 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7788 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7789 
7790 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7791 				queue_hotplug = true;
7792 				DRM_DEBUG("IH: HPD1\n");
7793 
7794 				break;
7795 			case 1:
7796 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7797 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7798 
7799 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7800 				queue_hotplug = true;
7801 				DRM_DEBUG("IH: HPD2\n");
7802 
7803 				break;
7804 			case 2:
7805 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7806 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7807 
7808 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7809 				queue_hotplug = true;
7810 				DRM_DEBUG("IH: HPD3\n");
7811 
7812 				break;
7813 			case 3:
7814 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7815 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7816 
7817 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7818 				queue_hotplug = true;
7819 				DRM_DEBUG("IH: HPD4\n");
7820 
7821 				break;
7822 			case 4:
7823 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7824 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7825 
7826 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7827 				queue_hotplug = true;
7828 				DRM_DEBUG("IH: HPD5\n");
7829 
7830 				break;
7831 			case 5:
7832 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7833 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7834 
7835 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7836 				queue_hotplug = true;
7837 				DRM_DEBUG("IH: HPD6\n");
7838 
7839 				break;
7840 			case 6:
7841 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7842 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7843 
7844 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7845 				queue_dp = true;
7846 				DRM_DEBUG("IH: HPD_RX 1\n");
7847 
7848 				break;
7849 			case 7:
7850 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7851 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7852 
7853 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7854 				queue_dp = true;
7855 				DRM_DEBUG("IH: HPD_RX 2\n");
7856 
7857 				break;
7858 			case 8:
7859 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7860 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7861 
7862 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7863 				queue_dp = true;
7864 				DRM_DEBUG("IH: HPD_RX 3\n");
7865 
7866 				break;
7867 			case 9:
7868 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7869 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7870 
7871 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7872 				queue_dp = true;
7873 				DRM_DEBUG("IH: HPD_RX 4\n");
7874 
7875 				break;
7876 			case 10:
7877 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7878 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7879 
7880 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7881 				queue_dp = true;
7882 				DRM_DEBUG("IH: HPD_RX 5\n");
7883 
7884 				break;
7885 			case 11:
7886 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7887 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7888 
7889 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7890 				queue_dp = true;
7891 				DRM_DEBUG("IH: HPD_RX 6\n");
7892 
7893 				break;
7894 			default:
7895 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7896 				break;
7897 			}
7898 			break;
7899 		case 96:
7900 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7901 			WREG32(SRBM_INT_ACK, 0x1);
7902 			break;
7903 		case 124: /* UVD */
7904 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7905 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7906 			break;
7907 		case 146:
7908 		case 147:
7909 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7910 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7911 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7912 			/* reset addr and status */
7913 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7914 			if (addr == 0x0 && status == 0x0)
7915 				break;
7916 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7917 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7918 				addr);
7919 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7920 				status);
7921 			cik_vm_decode_fault(rdev, status, addr, mc_client);
7922 			break;
7923 		case 167: /* VCE */
7924 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7925 			switch (src_data) {
7926 			case 0:
7927 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7928 				break;
7929 			case 1:
7930 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7931 				break;
7932 			default:
7933 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7934 				break;
7935 			}
7936 			break;
7937 		case 176: /* GFX RB CP_INT */
7938 		case 177: /* GFX IB CP_INT */
7939 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7940 			break;
7941 		case 181: /* CP EOP event */
7942 			DRM_DEBUG("IH: CP EOP\n");
7943 			/* XXX check the bitfield order! */
7944 			me_id = (ring_id & 0x60) >> 5;
7945 			pipe_id = (ring_id & 0x18) >> 3;
7946 			queue_id = (ring_id & 0x7) >> 0;
7947 			switch (me_id) {
7948 			case 0:
7949 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7950 				break;
7951 			case 1:
7952 			case 2:
7953 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7954 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7955 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7956 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7957 				break;
7958 			}
7959 			break;
7960 		case 184: /* CP Privileged reg access */
7961 			DRM_ERROR("Illegal register access in command stream\n");
7962 			/* XXX check the bitfield order! */
7963 			me_id = (ring_id & 0x60) >> 5;
7964 			pipe_id = (ring_id & 0x18) >> 3;
7965 			queue_id = (ring_id & 0x7) >> 0;
7966 			switch (me_id) {
7967 			case 0:
7968 				/* This results in a full GPU reset, but all we need to do is soft
7969 				 * reset the CP for gfx
7970 				 */
7971 				queue_reset = true;
7972 				break;
7973 			case 1:
7974 				/* XXX compute */
7975 				queue_reset = true;
7976 				break;
7977 			case 2:
7978 				/* XXX compute */
7979 				queue_reset = true;
7980 				break;
7981 			}
7982 			break;
7983 		case 185: /* CP Privileged inst */
7984 			DRM_ERROR("Illegal instruction in command stream\n");
7985 			/* XXX check the bitfield order! */
7986 			me_id = (ring_id & 0x60) >> 5;
7987 			pipe_id = (ring_id & 0x18) >> 3;
7988 			queue_id = (ring_id & 0x7) >> 0;
7989 			switch (me_id) {
7990 			case 0:
7991 				/* This results in a full GPU reset, but all we need to do is soft
7992 				 * reset the CP for gfx
7993 				 */
7994 				queue_reset = true;
7995 				break;
7996 			case 1:
7997 				/* XXX compute */
7998 				queue_reset = true;
7999 				break;
8000 			case 2:
8001 				/* XXX compute */
8002 				queue_reset = true;
8003 				break;
8004 			}
8005 			break;
8006 		case 224: /* SDMA trap event */
8007 			/* XXX check the bitfield order! */
8008 			me_id = (ring_id & 0x3) >> 0;
8009 			queue_id = (ring_id & 0xc) >> 2;
8010 			DRM_DEBUG("IH: SDMA trap\n");
8011 			switch (me_id) {
8012 			case 0:
8013 				switch (queue_id) {
8014 				case 0:
8015 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8016 					break;
8017 				case 1:
8018 					/* XXX compute */
8019 					break;
8020 				case 2:
8021 					/* XXX compute */
8022 					break;
8023 				}
8024 				break;
8025 			case 1:
8026 				switch (queue_id) {
8027 				case 0:
8028 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8029 					break;
8030 				case 1:
8031 					/* XXX compute */
8032 					break;
8033 				case 2:
8034 					/* XXX compute */
8035 					break;
8036 				}
8037 				break;
8038 			}
8039 			break;
8040 		case 230: /* thermal low to high */
8041 			DRM_DEBUG("IH: thermal low to high\n");
8042 			rdev->pm.dpm.thermal.high_to_low = false;
8043 			queue_thermal = true;
8044 			break;
8045 		case 231: /* thermal high to low */
8046 			DRM_DEBUG("IH: thermal high to low\n");
8047 			rdev->pm.dpm.thermal.high_to_low = true;
8048 			queue_thermal = true;
8049 			break;
8050 		case 233: /* GUI IDLE */
8051 			DRM_DEBUG("IH: GUI idle\n");
8052 			break;
8053 		case 241: /* SDMA Privileged inst */
8054 		case 247: /* SDMA Privileged inst */
8055 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8056 			/* XXX check the bitfield order! */
8057 			me_id = (ring_id & 0x3) >> 0;
8058 			queue_id = (ring_id & 0xc) >> 2;
8059 			switch (me_id) {
8060 			case 0:
8061 				switch (queue_id) {
8062 				case 0:
8063 					queue_reset = true;
8064 					break;
8065 				case 1:
8066 					/* XXX compute */
8067 					queue_reset = true;
8068 					break;
8069 				case 2:
8070 					/* XXX compute */
8071 					queue_reset = true;
8072 					break;
8073 				}
8074 				break;
8075 			case 1:
8076 				switch (queue_id) {
8077 				case 0:
8078 					queue_reset = true;
8079 					break;
8080 				case 1:
8081 					/* XXX compute */
8082 					queue_reset = true;
8083 					break;
8084 				case 2:
8085 					/* XXX compute */
8086 					queue_reset = true;
8087 					break;
8088 				}
8089 				break;
8090 			}
8091 			break;
8092 		default:
8093 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8094 			break;
8095 		}
8096 
8097 		/* wptr/rptr are in bytes! */
8098 		rptr += 16;
8099 		rptr &= rdev->ih.ptr_mask;
8100 		WREG32(IH_RB_RPTR, rptr);
8101 	}
8102 	if (queue_dp)
8103 		schedule_work(&rdev->dp_work);
8104 	if (queue_hotplug)
8105 		schedule_delayed_work(&rdev->hotplug_work, 0);
8106 	if (queue_reset) {
8107 		rdev->needs_reset = true;
8108 		wake_up_all(&rdev->fence_queue);
8109 	}
8110 	if (queue_thermal)
8111 		schedule_work(&rdev->pm.dpm.thermal.work);
8112 	rdev->ih.rptr = rptr;
8113 	atomic_set(&rdev->ih.lock, 0);
8114 
8115 	/* make sure wptr hasn't changed while processing */
8116 	wptr = cik_get_ih_wptr(rdev);
8117 	if (wptr != rptr)
8118 		goto restart_ih;
8119 
8120 	return IRQ_HANDLED;
8121 }
8122 
8123 /*
8124  * startup/shutdown callbacks
8125  */
8126 static void cik_uvd_init(struct radeon_device *rdev)
8127 {
8128 	int r;
8129 
8130 	if (!rdev->has_uvd)
8131 		return;
8132 
8133 	r = radeon_uvd_init(rdev);
8134 	if (r) {
8135 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8136 		/*
8137 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8138 		 * to early fails cik_uvd_start() and thus nothing happens
8139 		 * there. So it is pointless to try to go through that code
8140 		 * hence why we disable uvd here.
8141 		 */
8142 		rdev->has_uvd = false;
8143 		return;
8144 	}
8145 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8146 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8147 }
8148 
8149 static void cik_uvd_start(struct radeon_device *rdev)
8150 {
8151 	int r;
8152 
8153 	if (!rdev->has_uvd)
8154 		return;
8155 
8156 	r = radeon_uvd_resume(rdev);
8157 	if (r) {
8158 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8159 		goto error;
8160 	}
8161 	r = uvd_v4_2_resume(rdev);
8162 	if (r) {
8163 		dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8164 		goto error;
8165 	}
8166 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8167 	if (r) {
8168 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8169 		goto error;
8170 	}
8171 	return;
8172 
8173 error:
8174 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8175 }
8176 
8177 static void cik_uvd_resume(struct radeon_device *rdev)
8178 {
8179 	struct radeon_ring *ring;
8180 	int r;
8181 
8182 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8183 		return;
8184 
8185 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8186 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8187 	if (r) {
8188 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8189 		return;
8190 	}
8191 	r = uvd_v1_0_init(rdev);
8192 	if (r) {
8193 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8194 		return;
8195 	}
8196 }
8197 
8198 static void cik_vce_init(struct radeon_device *rdev)
8199 {
8200 	int r;
8201 
8202 	if (!rdev->has_vce)
8203 		return;
8204 
8205 	r = radeon_vce_init(rdev);
8206 	if (r) {
8207 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8208 		/*
8209 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8210 		 * to early fails cik_vce_start() and thus nothing happens
8211 		 * there. So it is pointless to try to go through that code
8212 		 * hence why we disable vce here.
8213 		 */
8214 		rdev->has_vce = false;
8215 		return;
8216 	}
8217 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8218 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8219 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8220 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8221 }
8222 
8223 static void cik_vce_start(struct radeon_device *rdev)
8224 {
8225 	int r;
8226 
8227 	if (!rdev->has_vce)
8228 		return;
8229 
8230 	r = radeon_vce_resume(rdev);
8231 	if (r) {
8232 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8233 		goto error;
8234 	}
8235 	r = vce_v2_0_resume(rdev);
8236 	if (r) {
8237 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8238 		goto error;
8239 	}
8240 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8241 	if (r) {
8242 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8243 		goto error;
8244 	}
8245 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8246 	if (r) {
8247 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8248 		goto error;
8249 	}
8250 	return;
8251 
8252 error:
8253 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8254 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8255 }
8256 
8257 static void cik_vce_resume(struct radeon_device *rdev)
8258 {
8259 	struct radeon_ring *ring;
8260 	int r;
8261 
8262 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8263 		return;
8264 
8265 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8266 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8267 	if (r) {
8268 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8269 		return;
8270 	}
8271 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8272 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8273 	if (r) {
8274 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8275 		return;
8276 	}
8277 	r = vce_v1_0_init(rdev);
8278 	if (r) {
8279 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8280 		return;
8281 	}
8282 }
8283 
8284 /**
8285  * cik_startup - program the asic to a functional state
8286  *
8287  * @rdev: radeon_device pointer
8288  *
8289  * Programs the asic to a functional state (CIK).
8290  * Called by cik_init() and cik_resume().
8291  * Returns 0 for success, error for failure.
8292  */
8293 static int cik_startup(struct radeon_device *rdev)
8294 {
8295 	struct radeon_ring *ring;
8296 	u32 nop;
8297 	int r;
8298 
8299 	/* enable pcie gen2/3 link */
8300 	cik_pcie_gen3_enable(rdev);
8301 	/* enable aspm */
8302 	cik_program_aspm(rdev);
8303 
8304 	/* scratch needs to be initialized before MC */
8305 	r = r600_vram_scratch_init(rdev);
8306 	if (r)
8307 		return r;
8308 
8309 	cik_mc_program(rdev);
8310 
8311 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8312 		r = ci_mc_load_microcode(rdev);
8313 		if (r) {
8314 			DRM_ERROR("Failed to load MC firmware!\n");
8315 			return r;
8316 		}
8317 	}
8318 
8319 	r = cik_pcie_gart_enable(rdev);
8320 	if (r)
8321 		return r;
8322 	cik_gpu_init(rdev);
8323 
8324 	/* allocate rlc buffers */
8325 	if (rdev->flags & RADEON_IS_IGP) {
8326 		if (rdev->family == CHIP_KAVERI) {
8327 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8328 			rdev->rlc.reg_list_size =
8329 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8330 		} else {
8331 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8332 			rdev->rlc.reg_list_size =
8333 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8334 		}
8335 	}
8336 	rdev->rlc.cs_data = ci_cs_data;
8337 	rdev->rlc.cp_table_size = roundup2(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8338 	rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8339 	r = sumo_rlc_init(rdev);
8340 	if (r) {
8341 		DRM_ERROR("Failed to init rlc BOs!\n");
8342 		return r;
8343 	}
8344 
8345 	/* allocate wb buffer */
8346 	r = radeon_wb_init(rdev);
8347 	if (r)
8348 		return r;
8349 
8350 	/* allocate mec buffers */
8351 	r = cik_mec_init(rdev);
8352 	if (r) {
8353 		DRM_ERROR("Failed to init MEC BOs!\n");
8354 		return r;
8355 	}
8356 
8357 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8358 	if (r) {
8359 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8360 		return r;
8361 	}
8362 
8363 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8364 	if (r) {
8365 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8366 		return r;
8367 	}
8368 
8369 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8370 	if (r) {
8371 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8372 		return r;
8373 	}
8374 
8375 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8376 	if (r) {
8377 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8378 		return r;
8379 	}
8380 
8381 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8382 	if (r) {
8383 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8384 		return r;
8385 	}
8386 
8387 	cik_uvd_start(rdev);
8388 	cik_vce_start(rdev);
8389 
8390 	/* Enable IRQ */
8391 	if (!rdev->irq.installed) {
8392 		r = radeon_irq_kms_init(rdev);
8393 		if (r)
8394 			return r;
8395 	}
8396 
8397 	r = cik_irq_init(rdev);
8398 	if (r) {
8399 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8400 		radeon_irq_kms_fini(rdev);
8401 		return r;
8402 	}
8403 	cik_irq_set(rdev);
8404 
8405 	if (rdev->family == CHIP_HAWAII) {
8406 		if (rdev->new_fw)
8407 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8408 		else
8409 			nop = RADEON_CP_PACKET2;
8410 	} else {
8411 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8412 	}
8413 
8414 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8415 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8416 			     nop);
8417 	if (r)
8418 		return r;
8419 
8420 	/* set up the compute queues */
8421 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8422 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8423 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8424 			     nop);
8425 	if (r)
8426 		return r;
8427 	ring->me = 1; /* first MEC */
8428 	ring->pipe = 0; /* first pipe */
8429 	ring->queue = 0; /* first queue */
8430 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8431 
8432 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8433 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8434 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8435 			     nop);
8436 	if (r)
8437 		return r;
8438 	/* dGPU only have 1 MEC */
8439 	ring->me = 1; /* first MEC */
8440 	ring->pipe = 0; /* first pipe */
8441 	ring->queue = 1; /* second queue */
8442 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8443 
8444 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8445 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8446 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8447 	if (r)
8448 		return r;
8449 
8450 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8451 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8452 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8453 	if (r)
8454 		return r;
8455 
8456 	r = cik_cp_resume(rdev);
8457 	if (r)
8458 		return r;
8459 
8460 	r = cik_sdma_resume(rdev);
8461 	if (r)
8462 		return r;
8463 
8464 	cik_uvd_resume(rdev);
8465 	cik_vce_resume(rdev);
8466 
8467 	r = radeon_ib_pool_init(rdev);
8468 	if (r) {
8469 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8470 		return r;
8471 	}
8472 
8473 	r = radeon_vm_manager_init(rdev);
8474 	if (r) {
8475 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8476 		return r;
8477 	}
8478 
8479 	r = radeon_audio_init(rdev);
8480 	if (r)
8481 		return r;
8482 
8483 	return 0;
8484 }
8485 
8486 /**
8487  * cik_resume - resume the asic to a functional state
8488  *
8489  * @rdev: radeon_device pointer
8490  *
8491  * Programs the asic to a functional state (CIK).
8492  * Called at resume.
8493  * Returns 0 for success, error for failure.
8494  */
8495 int cik_resume(struct radeon_device *rdev)
8496 {
8497 	int r;
8498 
8499 	/* post card */
8500 	atom_asic_init(rdev->mode_info.atom_context);
8501 
8502 	/* init golden registers */
8503 	cik_init_golden_registers(rdev);
8504 
8505 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8506 		radeon_pm_resume(rdev);
8507 
8508 	rdev->accel_working = true;
8509 	r = cik_startup(rdev);
8510 	if (r) {
8511 		DRM_ERROR("cik startup failed on resume\n");
8512 		rdev->accel_working = false;
8513 		return r;
8514 	}
8515 
8516 	return r;
8517 
8518 }
8519 
8520 /**
8521  * cik_suspend - suspend the asic
8522  *
8523  * @rdev: radeon_device pointer
8524  *
8525  * Bring the chip into a state suitable for suspend (CIK).
8526  * Called at suspend.
8527  * Returns 0 for success.
8528  */
8529 int cik_suspend(struct radeon_device *rdev)
8530 {
8531 	radeon_pm_suspend(rdev);
8532 	radeon_audio_fini(rdev);
8533 	radeon_vm_manager_fini(rdev);
8534 	cik_cp_enable(rdev, false);
8535 	cik_sdma_enable(rdev, false);
8536 	if (rdev->has_uvd) {
8537 		uvd_v1_0_fini(rdev);
8538 		radeon_uvd_suspend(rdev);
8539 	}
8540 	if (rdev->has_vce)
8541 		radeon_vce_suspend(rdev);
8542 	cik_fini_pg(rdev);
8543 	cik_fini_cg(rdev);
8544 	cik_irq_suspend(rdev);
8545 	radeon_wb_disable(rdev);
8546 	cik_pcie_gart_disable(rdev);
8547 	return 0;
8548 }
8549 
8550 /* Plan is to move initialization in that function and use
8551  * helper function so that radeon_device_init pretty much
8552  * do nothing more than calling asic specific function. This
8553  * should also allow to remove a bunch of callback function
8554  * like vram_info.
8555  */
8556 /**
8557  * cik_init - asic specific driver and hw init
8558  *
8559  * @rdev: radeon_device pointer
8560  *
8561  * Setup asic specific driver variables and program the hw
8562  * to a functional state (CIK).
8563  * Called at driver startup.
8564  * Returns 0 for success, errors for failure.
8565  */
8566 int cik_init(struct radeon_device *rdev)
8567 {
8568 	struct radeon_ring *ring;
8569 	int r;
8570 
8571 	/* Read BIOS */
8572 	if (!radeon_get_bios(rdev)) {
8573 		if (ASIC_IS_AVIVO(rdev))
8574 			return -EINVAL;
8575 	}
8576 	/* Must be an ATOMBIOS */
8577 	if (!rdev->is_atom_bios) {
8578 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8579 		return -EINVAL;
8580 	}
8581 	r = radeon_atombios_init(rdev);
8582 	if (r)
8583 		return r;
8584 
8585 	/* Post card if necessary */
8586 	if (!radeon_card_posted(rdev)) {
8587 		if (!rdev->bios) {
8588 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8589 			return -EINVAL;
8590 		}
8591 		DRM_INFO("GPU not posted. posting now...\n");
8592 		atom_asic_init(rdev->mode_info.atom_context);
8593 	}
8594 	/* init golden registers */
8595 	cik_init_golden_registers(rdev);
8596 	/* Initialize scratch registers */
8597 	cik_scratch_init(rdev);
8598 	/* Initialize surface registers */
8599 	radeon_surface_init(rdev);
8600 	/* Initialize clocks */
8601 	radeon_get_clock_info(rdev->ddev);
8602 
8603 	/* Fence driver */
8604 	r = radeon_fence_driver_init(rdev);
8605 	if (r)
8606 		return r;
8607 
8608 	/* initialize memory controller */
8609 	r = cik_mc_init(rdev);
8610 	if (r)
8611 		return r;
8612 	/* Memory manager */
8613 	r = radeon_bo_init(rdev);
8614 	if (r)
8615 		return r;
8616 
8617 	if (rdev->flags & RADEON_IS_IGP) {
8618 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8619 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8620 			r = cik_init_microcode(rdev);
8621 			if (r) {
8622 				DRM_ERROR("Failed to load firmware!\n");
8623 				return r;
8624 			}
8625 		}
8626 	} else {
8627 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8628 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8629 		    !rdev->mc_fw) {
8630 			r = cik_init_microcode(rdev);
8631 			if (r) {
8632 				DRM_ERROR("Failed to load firmware!\n");
8633 				return r;
8634 			}
8635 		}
8636 	}
8637 
8638 	/* Initialize power management */
8639 	radeon_pm_init(rdev);
8640 
8641 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8642 	ring->ring_obj = NULL;
8643 	r600_ring_init(rdev, ring, 1024 * 1024);
8644 
8645 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8646 	ring->ring_obj = NULL;
8647 	r600_ring_init(rdev, ring, 1024 * 1024);
8648 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8649 	if (r)
8650 		return r;
8651 
8652 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8653 	ring->ring_obj = NULL;
8654 	r600_ring_init(rdev, ring, 1024 * 1024);
8655 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8656 	if (r)
8657 		return r;
8658 
8659 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8660 	ring->ring_obj = NULL;
8661 	r600_ring_init(rdev, ring, 256 * 1024);
8662 
8663 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8664 	ring->ring_obj = NULL;
8665 	r600_ring_init(rdev, ring, 256 * 1024);
8666 
8667 	cik_uvd_init(rdev);
8668 	cik_vce_init(rdev);
8669 
8670 	rdev->ih.ring_obj = NULL;
8671 	r600_ih_ring_init(rdev, 64 * 1024);
8672 
8673 	r = r600_pcie_gart_init(rdev);
8674 	if (r)
8675 		return r;
8676 
8677 	rdev->accel_working = true;
8678 	r = cik_startup(rdev);
8679 	if (r) {
8680 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8681 		cik_cp_fini(rdev);
8682 		cik_sdma_fini(rdev);
8683 		cik_irq_fini(rdev);
8684 		sumo_rlc_fini(rdev);
8685 		cik_mec_fini(rdev);
8686 		radeon_wb_fini(rdev);
8687 		radeon_ib_pool_fini(rdev);
8688 		radeon_vm_manager_fini(rdev);
8689 		radeon_irq_kms_fini(rdev);
8690 		cik_pcie_gart_fini(rdev);
8691 		rdev->accel_working = false;
8692 	}
8693 
8694 	/* Don't start up if the MC ucode is missing.
8695 	 * The default clocks and voltages before the MC ucode
8696 	 * is loaded are not suffient for advanced operations.
8697 	 */
8698 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8699 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8700 		return -EINVAL;
8701 	}
8702 
8703 	return 0;
8704 }
8705 
8706 /**
8707  * cik_fini - asic specific driver and hw fini
8708  *
8709  * @rdev: radeon_device pointer
8710  *
8711  * Tear down the asic specific driver variables and program the hw
8712  * to an idle state (CIK).
8713  * Called at driver unload.
8714  */
8715 void cik_fini(struct radeon_device *rdev)
8716 {
8717 	radeon_pm_fini(rdev);
8718 	cik_cp_fini(rdev);
8719 	cik_sdma_fini(rdev);
8720 	cik_fini_pg(rdev);
8721 	cik_fini_cg(rdev);
8722 	cik_irq_fini(rdev);
8723 	sumo_rlc_fini(rdev);
8724 	cik_mec_fini(rdev);
8725 	radeon_wb_fini(rdev);
8726 	radeon_vm_manager_fini(rdev);
8727 	radeon_ib_pool_fini(rdev);
8728 	radeon_irq_kms_fini(rdev);
8729 	uvd_v1_0_fini(rdev);
8730 	radeon_uvd_fini(rdev);
8731 	radeon_vce_fini(rdev);
8732 	cik_pcie_gart_fini(rdev);
8733 	r600_vram_scratch_fini(rdev);
8734 	radeon_gem_fini(rdev);
8735 	radeon_fence_driver_fini(rdev);
8736 	radeon_bo_fini(rdev);
8737 	radeon_atombios_fini(rdev);
8738 	kfree(rdev->bios);
8739 	rdev->bios = NULL;
8740 }
8741 
8742 void dce8_program_fmt(struct drm_encoder *encoder)
8743 {
8744 	struct drm_device *dev = encoder->dev;
8745 	struct radeon_device *rdev = dev->dev_private;
8746 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8747 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8748 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8749 	int bpc = 0;
8750 	u32 tmp = 0;
8751 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8752 
8753 	if (connector) {
8754 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8755 		bpc = radeon_get_monitor_bpc(connector);
8756 		dither = radeon_connector->dither;
8757 	}
8758 
8759 	/* LVDS/eDP FMT is set up by atom */
8760 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8761 		return;
8762 
8763 	/* not needed for analog */
8764 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8765 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8766 		return;
8767 
8768 	if (bpc == 0)
8769 		return;
8770 
8771 	switch (bpc) {
8772 	case 6:
8773 		if (dither == RADEON_FMT_DITHER_ENABLE)
8774 			/* XXX sort out optimal dither settings */
8775 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8776 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8777 		else
8778 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8779 		break;
8780 	case 8:
8781 		if (dither == RADEON_FMT_DITHER_ENABLE)
8782 			/* XXX sort out optimal dither settings */
8783 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8784 				FMT_RGB_RANDOM_ENABLE |
8785 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8786 		else
8787 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8788 		break;
8789 	case 10:
8790 		if (dither == RADEON_FMT_DITHER_ENABLE)
8791 			/* XXX sort out optimal dither settings */
8792 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8793 				FMT_RGB_RANDOM_ENABLE |
8794 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8795 		else
8796 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8797 		break;
8798 	default:
8799 		/* not needed */
8800 		break;
8801 	}
8802 
8803 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8804 }
8805 
8806 /* display watermark setup */
8807 /**
8808  * dce8_line_buffer_adjust - Set up the line buffer
8809  *
8810  * @rdev: radeon_device pointer
8811  * @radeon_crtc: the selected display controller
8812  * @mode: the current display mode on the selected display
8813  * controller
8814  *
8815  * Setup up the line buffer allocation for
8816  * the selected display controller (CIK).
8817  * Returns the line buffer size in pixels.
8818  */
8819 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8820 				   struct radeon_crtc *radeon_crtc,
8821 				   struct drm_display_mode *mode)
8822 {
8823 	u32 tmp, buffer_alloc, i;
8824 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8825 	/*
8826 	 * Line Buffer Setup
8827 	 * There are 6 line buffers, one for each display controllers.
8828 	 * There are 3 partitions per LB. Select the number of partitions
8829 	 * to enable based on the display width.  For display widths larger
8830 	 * than 4096, you need use to use 2 display controllers and combine
8831 	 * them using the stereo blender.
8832 	 */
8833 	if (radeon_crtc->base.enabled && mode) {
8834 		if (mode->crtc_hdisplay < 1920) {
8835 			tmp = 1;
8836 			buffer_alloc = 2;
8837 		} else if (mode->crtc_hdisplay < 2560) {
8838 			tmp = 2;
8839 			buffer_alloc = 2;
8840 		} else if (mode->crtc_hdisplay < 4096) {
8841 			tmp = 0;
8842 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8843 		} else {
8844 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8845 			tmp = 0;
8846 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8847 		}
8848 	} else {
8849 		tmp = 1;
8850 		buffer_alloc = 0;
8851 	}
8852 
8853 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8854 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8855 
8856 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8857 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8858 	for (i = 0; i < rdev->usec_timeout; i++) {
8859 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8860 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8861 			break;
8862 		udelay(1);
8863 	}
8864 
8865 	if (radeon_crtc->base.enabled && mode) {
8866 		switch (tmp) {
8867 		case 0:
8868 		default:
8869 			return 4096 * 2;
8870 		case 1:
8871 			return 1920 * 2;
8872 		case 2:
8873 			return 2560 * 2;
8874 		}
8875 	}
8876 
8877 	/* controller not enabled, so no lb used */
8878 	return 0;
8879 }
8880 
8881 /**
8882  * cik_get_number_of_dram_channels - get the number of dram channels
8883  *
8884  * @rdev: radeon_device pointer
8885  *
8886  * Look up the number of video ram channels (CIK).
8887  * Used for display watermark bandwidth calculations
8888  * Returns the number of dram channels
8889  */
8890 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8891 {
8892 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8893 
8894 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8895 	case 0:
8896 	default:
8897 		return 1;
8898 	case 1:
8899 		return 2;
8900 	case 2:
8901 		return 4;
8902 	case 3:
8903 		return 8;
8904 	case 4:
8905 		return 3;
8906 	case 5:
8907 		return 6;
8908 	case 6:
8909 		return 10;
8910 	case 7:
8911 		return 12;
8912 	case 8:
8913 		return 16;
8914 	}
8915 }
8916 
8917 struct dce8_wm_params {
8918 	u32 dram_channels; /* number of dram channels */
8919 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8920 	u32 sclk;          /* engine clock in kHz */
8921 	u32 disp_clk;      /* display clock in kHz */
8922 	u32 src_width;     /* viewport width */
8923 	u32 active_time;   /* active display time in ns */
8924 	u32 blank_time;    /* blank time in ns */
8925 	bool interlaced;    /* mode is interlaced */
8926 	fixed20_12 vsc;    /* vertical scale ratio */
8927 	u32 num_heads;     /* number of active crtcs */
8928 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8929 	u32 lb_size;       /* line buffer allocated to pipe */
8930 	u32 vtaps;         /* vertical scaler taps */
8931 };
8932 
8933 /**
8934  * dce8_dram_bandwidth - get the dram bandwidth
8935  *
8936  * @wm: watermark calculation data
8937  *
8938  * Calculate the raw dram bandwidth (CIK).
8939  * Used for display watermark bandwidth calculations
8940  * Returns the dram bandwidth in MBytes/s
8941  */
8942 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8943 {
8944 	/* Calculate raw DRAM Bandwidth */
8945 	fixed20_12 dram_efficiency; /* 0.7 */
8946 	fixed20_12 yclk, dram_channels, bandwidth;
8947 	fixed20_12 a;
8948 
8949 	a.full = dfixed_const(1000);
8950 	yclk.full = dfixed_const(wm->yclk);
8951 	yclk.full = dfixed_div(yclk, a);
8952 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8953 	a.full = dfixed_const(10);
8954 	dram_efficiency.full = dfixed_const(7);
8955 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8956 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8957 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8958 
8959 	return dfixed_trunc(bandwidth);
8960 }
8961 
8962 /**
8963  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8964  *
8965  * @wm: watermark calculation data
8966  *
8967  * Calculate the dram bandwidth used for display (CIK).
8968  * Used for display watermark bandwidth calculations
8969  * Returns the dram bandwidth for display in MBytes/s
8970  */
8971 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8972 {
8973 	/* Calculate DRAM Bandwidth and the part allocated to display. */
8974 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8975 	fixed20_12 yclk, dram_channels, bandwidth;
8976 	fixed20_12 a;
8977 
8978 	a.full = dfixed_const(1000);
8979 	yclk.full = dfixed_const(wm->yclk);
8980 	yclk.full = dfixed_div(yclk, a);
8981 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8982 	a.full = dfixed_const(10);
8983 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8984 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8985 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8986 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8987 
8988 	return dfixed_trunc(bandwidth);
8989 }
8990 
8991 /**
8992  * dce8_data_return_bandwidth - get the data return bandwidth
8993  *
8994  * @wm: watermark calculation data
8995  *
8996  * Calculate the data return bandwidth used for display (CIK).
8997  * Used for display watermark bandwidth calculations
8998  * Returns the data return bandwidth in MBytes/s
8999  */
9000 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9001 {
9002 	/* Calculate the display Data return Bandwidth */
9003 	fixed20_12 return_efficiency; /* 0.8 */
9004 	fixed20_12 sclk, bandwidth;
9005 	fixed20_12 a;
9006 
9007 	a.full = dfixed_const(1000);
9008 	sclk.full = dfixed_const(wm->sclk);
9009 	sclk.full = dfixed_div(sclk, a);
9010 	a.full = dfixed_const(10);
9011 	return_efficiency.full = dfixed_const(8);
9012 	return_efficiency.full = dfixed_div(return_efficiency, a);
9013 	a.full = dfixed_const(32);
9014 	bandwidth.full = dfixed_mul(a, sclk);
9015 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9016 
9017 	return dfixed_trunc(bandwidth);
9018 }
9019 
9020 /**
9021  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9022  *
9023  * @wm: watermark calculation data
9024  *
9025  * Calculate the dmif bandwidth used for display (CIK).
9026  * Used for display watermark bandwidth calculations
9027  * Returns the dmif bandwidth in MBytes/s
9028  */
9029 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9030 {
9031 	/* Calculate the DMIF Request Bandwidth */
9032 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9033 	fixed20_12 disp_clk, bandwidth;
9034 	fixed20_12 a, b;
9035 
9036 	a.full = dfixed_const(1000);
9037 	disp_clk.full = dfixed_const(wm->disp_clk);
9038 	disp_clk.full = dfixed_div(disp_clk, a);
9039 	a.full = dfixed_const(32);
9040 	b.full = dfixed_mul(a, disp_clk);
9041 
9042 	a.full = dfixed_const(10);
9043 	disp_clk_request_efficiency.full = dfixed_const(8);
9044 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9045 
9046 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9047 
9048 	return dfixed_trunc(bandwidth);
9049 }
9050 
9051 /**
9052  * dce8_available_bandwidth - get the min available bandwidth
9053  *
9054  * @wm: watermark calculation data
9055  *
9056  * Calculate the min available bandwidth used for display (CIK).
9057  * Used for display watermark bandwidth calculations
9058  * Returns the min available bandwidth in MBytes/s
9059  */
9060 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9061 {
9062 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9063 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9064 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9065 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9066 
9067 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9068 }
9069 
9070 /**
9071  * dce8_average_bandwidth - get the average available bandwidth
9072  *
9073  * @wm: watermark calculation data
9074  *
9075  * Calculate the average available bandwidth used for display (CIK).
9076  * Used for display watermark bandwidth calculations
9077  * Returns the average available bandwidth in MBytes/s
9078  */
9079 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9080 {
9081 	/* Calculate the display mode Average Bandwidth
9082 	 * DisplayMode should contain the source and destination dimensions,
9083 	 * timing, etc.
9084 	 */
9085 	fixed20_12 bpp;
9086 	fixed20_12 line_time;
9087 	fixed20_12 src_width;
9088 	fixed20_12 bandwidth;
9089 	fixed20_12 a;
9090 
9091 	a.full = dfixed_const(1000);
9092 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9093 	line_time.full = dfixed_div(line_time, a);
9094 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9095 	src_width.full = dfixed_const(wm->src_width);
9096 	bandwidth.full = dfixed_mul(src_width, bpp);
9097 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9098 	bandwidth.full = dfixed_div(bandwidth, line_time);
9099 
9100 	return dfixed_trunc(bandwidth);
9101 }
9102 
9103 /**
9104  * dce8_latency_watermark - get the latency watermark
9105  *
9106  * @wm: watermark calculation data
9107  *
9108  * Calculate the latency watermark (CIK).
9109  * Used for display watermark bandwidth calculations
9110  * Returns the latency watermark in ns
9111  */
9112 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9113 {
9114 	/* First calculate the latency in ns */
9115 	u32 mc_latency = 2000; /* 2000 ns. */
9116 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9117 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9118 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9119 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9120 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9121 		(wm->num_heads * cursor_line_pair_return_time);
9122 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9123 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9124 	u32 tmp, dmif_size = 12288;
9125 	fixed20_12 a, b, c;
9126 
9127 	if (wm->num_heads == 0)
9128 		return 0;
9129 
9130 	a.full = dfixed_const(2);
9131 	b.full = dfixed_const(1);
9132 	if ((wm->vsc.full > a.full) ||
9133 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9134 	    (wm->vtaps >= 5) ||
9135 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9136 		max_src_lines_per_dst_line = 4;
9137 	else
9138 		max_src_lines_per_dst_line = 2;
9139 
9140 	a.full = dfixed_const(available_bandwidth);
9141 	b.full = dfixed_const(wm->num_heads);
9142 	a.full = dfixed_div(a, b);
9143 	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9144 	tmp = min(dfixed_trunc(a), tmp);
9145 
9146 	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9147 
9148 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9149 	b.full = dfixed_const(1000);
9150 	c.full = dfixed_const(lb_fill_bw);
9151 	b.full = dfixed_div(c, b);
9152 	a.full = dfixed_div(a, b);
9153 	line_fill_time = dfixed_trunc(a);
9154 
9155 	if (line_fill_time < wm->active_time)
9156 		return latency;
9157 	else
9158 		return latency + (line_fill_time - wm->active_time);
9159 
9160 }
9161 
9162 /**
9163  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9164  * average and available dram bandwidth
9165  *
9166  * @wm: watermark calculation data
9167  *
9168  * Check if the display average bandwidth fits in the display
9169  * dram bandwidth (CIK).
9170  * Used for display watermark bandwidth calculations
9171  * Returns true if the display fits, false if not.
9172  */
9173 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9174 {
9175 	if (dce8_average_bandwidth(wm) <=
9176 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9177 		return true;
9178 	else
9179 		return false;
9180 }
9181 
9182 /**
9183  * dce8_average_bandwidth_vs_available_bandwidth - check
9184  * average and available bandwidth
9185  *
9186  * @wm: watermark calculation data
9187  *
9188  * Check if the display average bandwidth fits in the display
9189  * available bandwidth (CIK).
9190  * Used for display watermark bandwidth calculations
9191  * Returns true if the display fits, false if not.
9192  */
9193 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9194 {
9195 	if (dce8_average_bandwidth(wm) <=
9196 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9197 		return true;
9198 	else
9199 		return false;
9200 }
9201 
9202 /**
9203  * dce8_check_latency_hiding - check latency hiding
9204  *
9205  * @wm: watermark calculation data
9206  *
9207  * Check latency hiding (CIK).
9208  * Used for display watermark bandwidth calculations
9209  * Returns true if the display fits, false if not.
9210  */
9211 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9212 {
9213 	u32 lb_partitions = wm->lb_size / wm->src_width;
9214 	u32 line_time = wm->active_time + wm->blank_time;
9215 	u32 latency_tolerant_lines;
9216 	u32 latency_hiding;
9217 	fixed20_12 a;
9218 
9219 	a.full = dfixed_const(1);
9220 	if (wm->vsc.full > a.full)
9221 		latency_tolerant_lines = 1;
9222 	else {
9223 		if (lb_partitions <= (wm->vtaps + 1))
9224 			latency_tolerant_lines = 1;
9225 		else
9226 			latency_tolerant_lines = 2;
9227 	}
9228 
9229 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9230 
9231 	if (dce8_latency_watermark(wm) <= latency_hiding)
9232 		return true;
9233 	else
9234 		return false;
9235 }
9236 
9237 /**
9238  * dce8_program_watermarks - program display watermarks
9239  *
9240  * @rdev: radeon_device pointer
9241  * @radeon_crtc: the selected display controller
9242  * @lb_size: line buffer size
9243  * @num_heads: number of display controllers in use
9244  *
9245  * Calculate and program the display watermarks for the
9246  * selected display controller (CIK).
9247  */
9248 static void dce8_program_watermarks(struct radeon_device *rdev,
9249 				    struct radeon_crtc *radeon_crtc,
9250 				    u32 lb_size, u32 num_heads)
9251 {
9252 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9253 	struct dce8_wm_params wm_low, wm_high;
9254 	u32 active_time;
9255 	u32 line_time = 0;
9256 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9257 	u32 tmp, wm_mask;
9258 
9259 	if (radeon_crtc->base.enabled && num_heads && mode) {
9260 		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9261 					    (u32)mode->clock);
9262 		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9263 					  (u32)mode->clock);
9264 		line_time = min(line_time, (u32)65535);
9265 
9266 		/* watermark for high clocks */
9267 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9268 		    rdev->pm.dpm_enabled) {
9269 			wm_high.yclk =
9270 				radeon_dpm_get_mclk(rdev, false) * 10;
9271 			wm_high.sclk =
9272 				radeon_dpm_get_sclk(rdev, false) * 10;
9273 		} else {
9274 			wm_high.yclk = rdev->pm.current_mclk * 10;
9275 			wm_high.sclk = rdev->pm.current_sclk * 10;
9276 		}
9277 
9278 		wm_high.disp_clk = mode->clock;
9279 		wm_high.src_width = mode->crtc_hdisplay;
9280 		wm_high.active_time = active_time;
9281 		wm_high.blank_time = line_time - wm_high.active_time;
9282 		wm_high.interlaced = false;
9283 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9284 			wm_high.interlaced = true;
9285 		wm_high.vsc = radeon_crtc->vsc;
9286 		wm_high.vtaps = 1;
9287 		if (radeon_crtc->rmx_type != RMX_OFF)
9288 			wm_high.vtaps = 2;
9289 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9290 		wm_high.lb_size = lb_size;
9291 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9292 		wm_high.num_heads = num_heads;
9293 
9294 		/* set for high clocks */
9295 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9296 
9297 		/* possibly force display priority to high */
9298 		/* should really do this at mode validation time... */
9299 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9300 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9301 		    !dce8_check_latency_hiding(&wm_high) ||
9302 		    (rdev->disp_priority == 2)) {
9303 			DRM_DEBUG_KMS("force priority to high\n");
9304 		}
9305 
9306 		/* watermark for low clocks */
9307 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9308 		    rdev->pm.dpm_enabled) {
9309 			wm_low.yclk =
9310 				radeon_dpm_get_mclk(rdev, true) * 10;
9311 			wm_low.sclk =
9312 				radeon_dpm_get_sclk(rdev, true) * 10;
9313 		} else {
9314 			wm_low.yclk = rdev->pm.current_mclk * 10;
9315 			wm_low.sclk = rdev->pm.current_sclk * 10;
9316 		}
9317 
9318 		wm_low.disp_clk = mode->clock;
9319 		wm_low.src_width = mode->crtc_hdisplay;
9320 		wm_low.active_time = active_time;
9321 		wm_low.blank_time = line_time - wm_low.active_time;
9322 		wm_low.interlaced = false;
9323 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9324 			wm_low.interlaced = true;
9325 		wm_low.vsc = radeon_crtc->vsc;
9326 		wm_low.vtaps = 1;
9327 		if (radeon_crtc->rmx_type != RMX_OFF)
9328 			wm_low.vtaps = 2;
9329 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9330 		wm_low.lb_size = lb_size;
9331 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9332 		wm_low.num_heads = num_heads;
9333 
9334 		/* set for low clocks */
9335 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9336 
9337 		/* possibly force display priority to high */
9338 		/* should really do this at mode validation time... */
9339 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9340 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9341 		    !dce8_check_latency_hiding(&wm_low) ||
9342 		    (rdev->disp_priority == 2)) {
9343 			DRM_DEBUG_KMS("force priority to high\n");
9344 		}
9345 
9346 		/* Save number of lines the linebuffer leads before the scanout */
9347 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9348 	}
9349 
9350 	/* select wm A */
9351 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9352 	tmp = wm_mask;
9353 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9354 	tmp |= LATENCY_WATERMARK_MASK(1);
9355 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9356 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9357 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9358 		LATENCY_HIGH_WATERMARK(line_time)));
9359 	/* select wm B */
9360 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9361 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9362 	tmp |= LATENCY_WATERMARK_MASK(2);
9363 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9364 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9365 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9366 		LATENCY_HIGH_WATERMARK(line_time)));
9367 	/* restore original selection */
9368 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9369 
9370 	/* save values for DPM */
9371 	radeon_crtc->line_time = line_time;
9372 	radeon_crtc->wm_high = latency_watermark_a;
9373 	radeon_crtc->wm_low = latency_watermark_b;
9374 }
9375 
9376 /**
9377  * dce8_bandwidth_update - program display watermarks
9378  *
9379  * @rdev: radeon_device pointer
9380  *
9381  * Calculate and program the display watermarks and line
9382  * buffer allocation (CIK).
9383  */
9384 void dce8_bandwidth_update(struct radeon_device *rdev)
9385 {
9386 	struct drm_display_mode *mode = NULL;
9387 	u32 num_heads = 0, lb_size;
9388 	int i;
9389 
9390 	if (!rdev->mode_info.mode_config_initialized)
9391 		return;
9392 
9393 	radeon_update_display_priority(rdev);
9394 
9395 	for (i = 0; i < rdev->num_crtc; i++) {
9396 		if (rdev->mode_info.crtcs[i]->base.enabled)
9397 			num_heads++;
9398 	}
9399 	for (i = 0; i < rdev->num_crtc; i++) {
9400 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9401 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9402 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9403 	}
9404 }
9405 
9406 /**
9407  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9408  *
9409  * @rdev: radeon_device pointer
9410  *
9411  * Fetches a GPU clock counter snapshot (SI).
9412  * Returns the 64 bit clock counter snapshot.
9413  */
9414 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9415 {
9416 	uint64_t clock;
9417 
9418 	mutex_lock(&rdev->gpu_clock_mutex);
9419 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9420 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9421 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9422 	mutex_unlock(&rdev->gpu_clock_mutex);
9423 	return clock;
9424 }
9425 
9426 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9427 			     u32 cntl_reg, u32 status_reg)
9428 {
9429 	int r, i;
9430 	struct atom_clock_dividers dividers;
9431 	uint32_t tmp;
9432 
9433 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9434 					   clock, false, &dividers);
9435 	if (r)
9436 		return r;
9437 
9438 	tmp = RREG32_SMC(cntl_reg);
9439 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9440 	tmp |= dividers.post_divider;
9441 	WREG32_SMC(cntl_reg, tmp);
9442 
9443 	for (i = 0; i < 100; i++) {
9444 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9445 			break;
9446 		mdelay(10);
9447 	}
9448 	if (i == 100)
9449 		return -ETIMEDOUT;
9450 
9451 	return 0;
9452 }
9453 
9454 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9455 {
9456 	int r = 0;
9457 
9458 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9459 	if (r)
9460 		return r;
9461 
9462 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9463 	return r;
9464 }
9465 
9466 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9467 {
9468 	int r, i;
9469 	struct atom_clock_dividers dividers;
9470 	u32 tmp;
9471 
9472 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9473 					   ecclk, false, &dividers);
9474 	if (r)
9475 		return r;
9476 
9477 	for (i = 0; i < 100; i++) {
9478 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9479 			break;
9480 		mdelay(10);
9481 	}
9482 	if (i == 100)
9483 		return -ETIMEDOUT;
9484 
9485 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9486 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9487 	tmp |= dividers.post_divider;
9488 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9489 
9490 	for (i = 0; i < 100; i++) {
9491 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9492 			break;
9493 		mdelay(10);
9494 	}
9495 	if (i == 100)
9496 		return -ETIMEDOUT;
9497 
9498 	return 0;
9499 }
9500 
9501 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9502 {
9503 	struct pci_dev *root = rdev->pdev->bus->self;
9504 	enum pci_bus_speed speed_cap;
9505 	u32 speed_cntl, current_data_rate;
9506 	int i;
9507 	u16 tmp16;
9508 
9509 	if (pci_is_root_bus(rdev->pdev->bus))
9510 		return;
9511 
9512 	if (radeon_pcie_gen2 == 0)
9513 		return;
9514 
9515 	if (rdev->flags & RADEON_IS_IGP)
9516 		return;
9517 
9518 	if (!(rdev->flags & RADEON_IS_PCIE))
9519 		return;
9520 
9521 	speed_cap = pcie_get_speed_cap(root);
9522 	if (speed_cap == PCI_SPEED_UNKNOWN)
9523 		return;
9524 
9525 	if ((speed_cap != PCIE_SPEED_8_0GT) &&
9526 	    (speed_cap != PCIE_SPEED_5_0GT))
9527 		return;
9528 
9529 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9530 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9531 		LC_CURRENT_DATA_RATE_SHIFT;
9532 	if (speed_cap == PCIE_SPEED_8_0GT) {
9533 		if (current_data_rate == 2) {
9534 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9535 			return;
9536 		}
9537 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9538 	} else if (speed_cap == PCIE_SPEED_5_0GT) {
9539 		if (current_data_rate == 1) {
9540 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9541 			return;
9542 		}
9543 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9544 	}
9545 
9546 	if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
9547 		return;
9548 
9549 	if (speed_cap == PCIE_SPEED_8_0GT) {
9550 		/* re-try equalization if gen3 is not already enabled */
9551 		if (current_data_rate != 2) {
9552 			u16 bridge_cfg, gpu_cfg;
9553 			u16 bridge_cfg2, gpu_cfg2;
9554 			u32 max_lw, current_lw, tmp;
9555 
9556 			pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9557 						  &bridge_cfg);
9558 			pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL,
9559 						  &gpu_cfg);
9560 
9561 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9562 			pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
9563 
9564 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9565 			pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL,
9566 						   tmp16);
9567 
9568 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9569 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9570 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9571 
9572 			if (current_lw < max_lw) {
9573 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9574 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9575 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9576 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9577 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9578 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9579 				}
9580 			}
9581 
9582 			for (i = 0; i < 10; i++) {
9583 				/* check status */
9584 				pcie_capability_read_word(rdev->pdev,
9585 							  PCI_EXP_DEVSTA,
9586 							  &tmp16);
9587 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9588 					break;
9589 
9590 				pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9591 							  &bridge_cfg);
9592 				pcie_capability_read_word(rdev->pdev,
9593 							  PCI_EXP_LNKCTL,
9594 							  &gpu_cfg);
9595 
9596 				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
9597 							  &bridge_cfg2);
9598 				pcie_capability_read_word(rdev->pdev,
9599 							  PCI_EXP_LNKCTL2,
9600 							  &gpu_cfg2);
9601 
9602 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9603 				tmp |= LC_SET_QUIESCE;
9604 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9605 
9606 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9607 				tmp |= LC_REDO_EQ;
9608 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9609 
9610 				drm_msleep(100);
9611 
9612 				/* linkctl */
9613 				pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9614 							  &tmp16);
9615 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9616 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9617 				pcie_capability_write_word(root, PCI_EXP_LNKCTL,
9618 							   tmp16);
9619 
9620 				pcie_capability_read_word(rdev->pdev,
9621 							  PCI_EXP_LNKCTL,
9622 							  &tmp16);
9623 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9624 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9625 				pcie_capability_write_word(rdev->pdev,
9626 							   PCI_EXP_LNKCTL,
9627 							   tmp16);
9628 
9629 				/* linkctl2 */
9630 				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
9631 							  &tmp16);
9632 				tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
9633 					   PCI_EXP_LNKCTL2_TX_MARGIN);
9634 				tmp16 |= (bridge_cfg2 &
9635 					  (PCI_EXP_LNKCTL2_ENTER_COMP |
9636 					   PCI_EXP_LNKCTL2_TX_MARGIN));
9637 				pcie_capability_write_word(root,
9638 							   PCI_EXP_LNKCTL2,
9639 							   tmp16);
9640 
9641 				pcie_capability_read_word(rdev->pdev,
9642 							  PCI_EXP_LNKCTL2,
9643 							  &tmp16);
9644 				tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
9645 					   PCI_EXP_LNKCTL2_TX_MARGIN);
9646 				tmp16 |= (gpu_cfg2 &
9647 					  (PCI_EXP_LNKCTL2_ENTER_COMP |
9648 					   PCI_EXP_LNKCTL2_TX_MARGIN));
9649 				pcie_capability_write_word(rdev->pdev,
9650 							   PCI_EXP_LNKCTL2,
9651 							   tmp16);
9652 
9653 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9654 				tmp &= ~LC_SET_QUIESCE;
9655 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9656 			}
9657 		}
9658 	}
9659 
9660 	/* set the link speed */
9661 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9662 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9663 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9664 
9665 	pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL2, &tmp16);
9666 	tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
9667 	if (speed_cap == PCIE_SPEED_8_0GT)
9668 		tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
9669 	else if (speed_cap == PCIE_SPEED_5_0GT)
9670 		tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
9671 	else
9672 		tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
9673 	pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL2, tmp16);
9674 
9675 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9676 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9677 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9678 
9679 	for (i = 0; i < rdev->usec_timeout; i++) {
9680 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9681 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9682 			break;
9683 		udelay(1);
9684 	}
9685 }
9686 
9687 static void cik_program_aspm(struct radeon_device *rdev)
9688 {
9689 	u32 data, orig;
9690 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9691 	bool disable_clkreq = false;
9692 
9693 	if (radeon_aspm == 0)
9694 		return;
9695 
9696 	/* XXX double check IGPs */
9697 	if (rdev->flags & RADEON_IS_IGP)
9698 		return;
9699 
9700 	if (!(rdev->flags & RADEON_IS_PCIE))
9701 		return;
9702 
9703 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9704 	data &= ~LC_XMIT_N_FTS_MASK;
9705 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9706 	if (orig != data)
9707 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9708 
9709 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9710 	data |= LC_GO_TO_RECOVERY;
9711 	if (orig != data)
9712 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9713 
9714 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9715 	data |= P_IGNORE_EDB_ERR;
9716 	if (orig != data)
9717 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9718 
9719 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9720 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9721 	data |= LC_PMI_TO_L1_DIS;
9722 	if (!disable_l0s)
9723 		data |= LC_L0S_INACTIVITY(7);
9724 
9725 	if (!disable_l1) {
9726 		data |= LC_L1_INACTIVITY(7);
9727 		data &= ~LC_PMI_TO_L1_DIS;
9728 		if (orig != data)
9729 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9730 
9731 		if (!disable_plloff_in_l1) {
9732 			bool clk_req_support;
9733 
9734 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9735 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9736 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9737 			if (orig != data)
9738 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9739 
9740 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9741 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9742 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9743 			if (orig != data)
9744 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9745 
9746 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9747 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9748 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9749 			if (orig != data)
9750 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9751 
9752 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9753 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9754 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9755 			if (orig != data)
9756 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9757 
9758 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9759 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9760 			data |= LC_DYN_LANES_PWR_STATE(3);
9761 			if (orig != data)
9762 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9763 
9764 			if (!disable_clkreq &&
9765 			    !pci_is_root_bus(rdev->pdev->bus)) {
9766 				struct pci_dev *root = rdev->pdev->bus->self;
9767 				u32 lnkcap;
9768 
9769 				clk_req_support = false;
9770 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9771 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9772 					clk_req_support = true;
9773 			} else {
9774 				clk_req_support = false;
9775 			}
9776 
9777 			if (clk_req_support) {
9778 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9779 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9780 				if (orig != data)
9781 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9782 
9783 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9784 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9785 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9786 				if (orig != data)
9787 					WREG32_SMC(THM_CLK_CNTL, data);
9788 
9789 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9790 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9791 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9792 				if (orig != data)
9793 					WREG32_SMC(MISC_CLK_CTRL, data);
9794 
9795 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9796 				data &= ~BCLK_AS_XCLK;
9797 				if (orig != data)
9798 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9799 
9800 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9801 				data &= ~FORCE_BIF_REFCLK_EN;
9802 				if (orig != data)
9803 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9804 
9805 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9806 				data &= ~MPLL_CLKOUT_SEL_MASK;
9807 				data |= MPLL_CLKOUT_SEL(4);
9808 				if (orig != data)
9809 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9810 			}
9811 		}
9812 	} else {
9813 		if (orig != data)
9814 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9815 	}
9816 
9817 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9818 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9819 	if (orig != data)
9820 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9821 
9822 	if (!disable_l0s) {
9823 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9824 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9825 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9826 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9827 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9828 				data &= ~LC_L0S_INACTIVITY_MASK;
9829 				if (orig != data)
9830 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9831 			}
9832 		}
9833 	}
9834 }
9835