xref: /dragonfly/sys/dev/drm/radeon/cik.c (revision 5ca0a96d)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 
37 #define SH_MEM_CONFIG_GFX_DEFAULT \
38 	ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
39 
40 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
47 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
48 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
49 
50 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
51 MODULE_FIRMWARE("radeon/bonaire_me.bin");
52 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
53 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
54 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
55 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
56 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
57 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
58 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
59 
60 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
66 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
67 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
68 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
69 
70 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
71 MODULE_FIRMWARE("radeon/hawaii_me.bin");
72 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
73 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
74 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
75 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
76 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
77 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
78 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
79 
80 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
82 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
83 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
84 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
85 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
86 
87 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
88 MODULE_FIRMWARE("radeon/kaveri_me.bin");
89 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
90 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
91 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
92 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
93 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
94 
95 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
96 MODULE_FIRMWARE("radeon/KABINI_me.bin");
97 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
98 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
99 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
100 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
101 
102 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
103 MODULE_FIRMWARE("radeon/kabini_me.bin");
104 MODULE_FIRMWARE("radeon/kabini_ce.bin");
105 MODULE_FIRMWARE("radeon/kabini_mec.bin");
106 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
107 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
108 
109 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
111 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
112 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
113 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
114 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
115 
116 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
117 MODULE_FIRMWARE("radeon/mullins_me.bin");
118 MODULE_FIRMWARE("radeon/mullins_ce.bin");
119 MODULE_FIRMWARE("radeon/mullins_mec.bin");
120 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
121 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
122 
123 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
124 static void cik_rlc_stop(struct radeon_device *rdev);
125 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
126 static void cik_program_aspm(struct radeon_device *rdev);
127 static void cik_init_pg(struct radeon_device *rdev);
128 static void cik_init_cg(struct radeon_device *rdev);
129 static void cik_fini_pg(struct radeon_device *rdev);
130 static void cik_fini_cg(struct radeon_device *rdev);
131 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
132 					  bool enable);
133 
134 /**
135  * cik_get_allowed_info_register - fetch the register for the info ioctl
136  *
137  * @rdev: radeon_device pointer
138  * @reg: register offset in bytes
139  * @val: register value
140  *
141  * Returns 0 for success or -EINVAL for an invalid register
142  *
143  */
144 int cik_get_allowed_info_register(struct radeon_device *rdev,
145 				  u32 reg, u32 *val)
146 {
147 	switch (reg) {
148 	case GRBM_STATUS:
149 	case GRBM_STATUS2:
150 	case GRBM_STATUS_SE0:
151 	case GRBM_STATUS_SE1:
152 	case GRBM_STATUS_SE2:
153 	case GRBM_STATUS_SE3:
154 	case SRBM_STATUS:
155 	case SRBM_STATUS2:
156 	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
157 	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
158 	case UVD_STATUS:
159 	/* TODO VCE */
160 		*val = RREG32(reg);
161 		return 0;
162 	default:
163 		return -EINVAL;
164 	}
165 }
166 
167 /*
168  * Indirect registers accessor
169  */
170 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
171 {
172 	unsigned long flags;
173 	u32 r;
174 
175 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
176 	WREG32(CIK_DIDT_IND_INDEX, (reg));
177 	r = RREG32(CIK_DIDT_IND_DATA);
178 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
179 	return r;
180 }
181 
182 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
183 {
184 	unsigned long flags;
185 
186 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
187 	WREG32(CIK_DIDT_IND_INDEX, (reg));
188 	WREG32(CIK_DIDT_IND_DATA, (v));
189 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
190 }
191 
192 /* get temperature in millidegrees */
193 int ci_get_temp(struct radeon_device *rdev)
194 {
195 	u32 temp;
196 	int actual_temp = 0;
197 
198 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
199 		CTF_TEMP_SHIFT;
200 
201 	if (temp & 0x200)
202 		actual_temp = 255;
203 	else
204 		actual_temp = temp & 0x1ff;
205 
206 	actual_temp = actual_temp * 1000;
207 
208 	return actual_temp;
209 }
210 
211 /* get temperature in millidegrees */
212 int kv_get_temp(struct radeon_device *rdev)
213 {
214 	u32 temp;
215 	int actual_temp = 0;
216 
217 	temp = RREG32_SMC(0xC0300E0C);
218 
219 	if (temp)
220 		actual_temp = (temp / 8) - 49;
221 	else
222 		actual_temp = 0;
223 
224 	actual_temp = actual_temp * 1000;
225 
226 	return actual_temp;
227 }
228 
229 /*
230  * Indirect registers accessor
231  */
232 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
233 {
234 	unsigned long flags;
235 	u32 r;
236 
237 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
238 	WREG32(PCIE_INDEX, reg);
239 	(void)RREG32(PCIE_INDEX);
240 	r = RREG32(PCIE_DATA);
241 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
242 	return r;
243 }
244 
245 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
246 {
247 	unsigned long flags;
248 
249 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
250 	WREG32(PCIE_INDEX, reg);
251 	(void)RREG32(PCIE_INDEX);
252 	WREG32(PCIE_DATA, v);
253 	(void)RREG32(PCIE_DATA);
254 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
255 }
256 
257 static const u32 spectre_rlc_save_restore_register_list[] =
258 {
259 	(0x0e00 << 16) | (0xc12c >> 2),
260 	0x00000000,
261 	(0x0e00 << 16) | (0xc140 >> 2),
262 	0x00000000,
263 	(0x0e00 << 16) | (0xc150 >> 2),
264 	0x00000000,
265 	(0x0e00 << 16) | (0xc15c >> 2),
266 	0x00000000,
267 	(0x0e00 << 16) | (0xc168 >> 2),
268 	0x00000000,
269 	(0x0e00 << 16) | (0xc170 >> 2),
270 	0x00000000,
271 	(0x0e00 << 16) | (0xc178 >> 2),
272 	0x00000000,
273 	(0x0e00 << 16) | (0xc204 >> 2),
274 	0x00000000,
275 	(0x0e00 << 16) | (0xc2b4 >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0xc2b8 >> 2),
278 	0x00000000,
279 	(0x0e00 << 16) | (0xc2bc >> 2),
280 	0x00000000,
281 	(0x0e00 << 16) | (0xc2c0 >> 2),
282 	0x00000000,
283 	(0x0e00 << 16) | (0x8228 >> 2),
284 	0x00000000,
285 	(0x0e00 << 16) | (0x829c >> 2),
286 	0x00000000,
287 	(0x0e00 << 16) | (0x869c >> 2),
288 	0x00000000,
289 	(0x0600 << 16) | (0x98f4 >> 2),
290 	0x00000000,
291 	(0x0e00 << 16) | (0x98f8 >> 2),
292 	0x00000000,
293 	(0x0e00 << 16) | (0x9900 >> 2),
294 	0x00000000,
295 	(0x0e00 << 16) | (0xc260 >> 2),
296 	0x00000000,
297 	(0x0e00 << 16) | (0x90e8 >> 2),
298 	0x00000000,
299 	(0x0e00 << 16) | (0x3c000 >> 2),
300 	0x00000000,
301 	(0x0e00 << 16) | (0x3c00c >> 2),
302 	0x00000000,
303 	(0x0e00 << 16) | (0x8c1c >> 2),
304 	0x00000000,
305 	(0x0e00 << 16) | (0x9700 >> 2),
306 	0x00000000,
307 	(0x0e00 << 16) | (0xcd20 >> 2),
308 	0x00000000,
309 	(0x4e00 << 16) | (0xcd20 >> 2),
310 	0x00000000,
311 	(0x5e00 << 16) | (0xcd20 >> 2),
312 	0x00000000,
313 	(0x6e00 << 16) | (0xcd20 >> 2),
314 	0x00000000,
315 	(0x7e00 << 16) | (0xcd20 >> 2),
316 	0x00000000,
317 	(0x8e00 << 16) | (0xcd20 >> 2),
318 	0x00000000,
319 	(0x9e00 << 16) | (0xcd20 >> 2),
320 	0x00000000,
321 	(0xae00 << 16) | (0xcd20 >> 2),
322 	0x00000000,
323 	(0xbe00 << 16) | (0xcd20 >> 2),
324 	0x00000000,
325 	(0x0e00 << 16) | (0x89bc >> 2),
326 	0x00000000,
327 	(0x0e00 << 16) | (0x8900 >> 2),
328 	0x00000000,
329 	0x3,
330 	(0x0e00 << 16) | (0xc130 >> 2),
331 	0x00000000,
332 	(0x0e00 << 16) | (0xc134 >> 2),
333 	0x00000000,
334 	(0x0e00 << 16) | (0xc1fc >> 2),
335 	0x00000000,
336 	(0x0e00 << 16) | (0xc208 >> 2),
337 	0x00000000,
338 	(0x0e00 << 16) | (0xc264 >> 2),
339 	0x00000000,
340 	(0x0e00 << 16) | (0xc268 >> 2),
341 	0x00000000,
342 	(0x0e00 << 16) | (0xc26c >> 2),
343 	0x00000000,
344 	(0x0e00 << 16) | (0xc270 >> 2),
345 	0x00000000,
346 	(0x0e00 << 16) | (0xc274 >> 2),
347 	0x00000000,
348 	(0x0e00 << 16) | (0xc278 >> 2),
349 	0x00000000,
350 	(0x0e00 << 16) | (0xc27c >> 2),
351 	0x00000000,
352 	(0x0e00 << 16) | (0xc280 >> 2),
353 	0x00000000,
354 	(0x0e00 << 16) | (0xc284 >> 2),
355 	0x00000000,
356 	(0x0e00 << 16) | (0xc288 >> 2),
357 	0x00000000,
358 	(0x0e00 << 16) | (0xc28c >> 2),
359 	0x00000000,
360 	(0x0e00 << 16) | (0xc290 >> 2),
361 	0x00000000,
362 	(0x0e00 << 16) | (0xc294 >> 2),
363 	0x00000000,
364 	(0x0e00 << 16) | (0xc298 >> 2),
365 	0x00000000,
366 	(0x0e00 << 16) | (0xc29c >> 2),
367 	0x00000000,
368 	(0x0e00 << 16) | (0xc2a0 >> 2),
369 	0x00000000,
370 	(0x0e00 << 16) | (0xc2a4 >> 2),
371 	0x00000000,
372 	(0x0e00 << 16) | (0xc2a8 >> 2),
373 	0x00000000,
374 	(0x0e00 << 16) | (0xc2ac  >> 2),
375 	0x00000000,
376 	(0x0e00 << 16) | (0xc2b0 >> 2),
377 	0x00000000,
378 	(0x0e00 << 16) | (0x301d0 >> 2),
379 	0x00000000,
380 	(0x0e00 << 16) | (0x30238 >> 2),
381 	0x00000000,
382 	(0x0e00 << 16) | (0x30250 >> 2),
383 	0x00000000,
384 	(0x0e00 << 16) | (0x30254 >> 2),
385 	0x00000000,
386 	(0x0e00 << 16) | (0x30258 >> 2),
387 	0x00000000,
388 	(0x0e00 << 16) | (0x3025c >> 2),
389 	0x00000000,
390 	(0x4e00 << 16) | (0xc900 >> 2),
391 	0x00000000,
392 	(0x5e00 << 16) | (0xc900 >> 2),
393 	0x00000000,
394 	(0x6e00 << 16) | (0xc900 >> 2),
395 	0x00000000,
396 	(0x7e00 << 16) | (0xc900 >> 2),
397 	0x00000000,
398 	(0x8e00 << 16) | (0xc900 >> 2),
399 	0x00000000,
400 	(0x9e00 << 16) | (0xc900 >> 2),
401 	0x00000000,
402 	(0xae00 << 16) | (0xc900 >> 2),
403 	0x00000000,
404 	(0xbe00 << 16) | (0xc900 >> 2),
405 	0x00000000,
406 	(0x4e00 << 16) | (0xc904 >> 2),
407 	0x00000000,
408 	(0x5e00 << 16) | (0xc904 >> 2),
409 	0x00000000,
410 	(0x6e00 << 16) | (0xc904 >> 2),
411 	0x00000000,
412 	(0x7e00 << 16) | (0xc904 >> 2),
413 	0x00000000,
414 	(0x8e00 << 16) | (0xc904 >> 2),
415 	0x00000000,
416 	(0x9e00 << 16) | (0xc904 >> 2),
417 	0x00000000,
418 	(0xae00 << 16) | (0xc904 >> 2),
419 	0x00000000,
420 	(0xbe00 << 16) | (0xc904 >> 2),
421 	0x00000000,
422 	(0x4e00 << 16) | (0xc908 >> 2),
423 	0x00000000,
424 	(0x5e00 << 16) | (0xc908 >> 2),
425 	0x00000000,
426 	(0x6e00 << 16) | (0xc908 >> 2),
427 	0x00000000,
428 	(0x7e00 << 16) | (0xc908 >> 2),
429 	0x00000000,
430 	(0x8e00 << 16) | (0xc908 >> 2),
431 	0x00000000,
432 	(0x9e00 << 16) | (0xc908 >> 2),
433 	0x00000000,
434 	(0xae00 << 16) | (0xc908 >> 2),
435 	0x00000000,
436 	(0xbe00 << 16) | (0xc908 >> 2),
437 	0x00000000,
438 	(0x4e00 << 16) | (0xc90c >> 2),
439 	0x00000000,
440 	(0x5e00 << 16) | (0xc90c >> 2),
441 	0x00000000,
442 	(0x6e00 << 16) | (0xc90c >> 2),
443 	0x00000000,
444 	(0x7e00 << 16) | (0xc90c >> 2),
445 	0x00000000,
446 	(0x8e00 << 16) | (0xc90c >> 2),
447 	0x00000000,
448 	(0x9e00 << 16) | (0xc90c >> 2),
449 	0x00000000,
450 	(0xae00 << 16) | (0xc90c >> 2),
451 	0x00000000,
452 	(0xbe00 << 16) | (0xc90c >> 2),
453 	0x00000000,
454 	(0x4e00 << 16) | (0xc910 >> 2),
455 	0x00000000,
456 	(0x5e00 << 16) | (0xc910 >> 2),
457 	0x00000000,
458 	(0x6e00 << 16) | (0xc910 >> 2),
459 	0x00000000,
460 	(0x7e00 << 16) | (0xc910 >> 2),
461 	0x00000000,
462 	(0x8e00 << 16) | (0xc910 >> 2),
463 	0x00000000,
464 	(0x9e00 << 16) | (0xc910 >> 2),
465 	0x00000000,
466 	(0xae00 << 16) | (0xc910 >> 2),
467 	0x00000000,
468 	(0xbe00 << 16) | (0xc910 >> 2),
469 	0x00000000,
470 	(0x0e00 << 16) | (0xc99c >> 2),
471 	0x00000000,
472 	(0x0e00 << 16) | (0x9834 >> 2),
473 	0x00000000,
474 	(0x0000 << 16) | (0x30f00 >> 2),
475 	0x00000000,
476 	(0x0001 << 16) | (0x30f00 >> 2),
477 	0x00000000,
478 	(0x0000 << 16) | (0x30f04 >> 2),
479 	0x00000000,
480 	(0x0001 << 16) | (0x30f04 >> 2),
481 	0x00000000,
482 	(0x0000 << 16) | (0x30f08 >> 2),
483 	0x00000000,
484 	(0x0001 << 16) | (0x30f08 >> 2),
485 	0x00000000,
486 	(0x0000 << 16) | (0x30f0c >> 2),
487 	0x00000000,
488 	(0x0001 << 16) | (0x30f0c >> 2),
489 	0x00000000,
490 	(0x0600 << 16) | (0x9b7c >> 2),
491 	0x00000000,
492 	(0x0e00 << 16) | (0x8a14 >> 2),
493 	0x00000000,
494 	(0x0e00 << 16) | (0x8a18 >> 2),
495 	0x00000000,
496 	(0x0600 << 16) | (0x30a00 >> 2),
497 	0x00000000,
498 	(0x0e00 << 16) | (0x8bf0 >> 2),
499 	0x00000000,
500 	(0x0e00 << 16) | (0x8bcc >> 2),
501 	0x00000000,
502 	(0x0e00 << 16) | (0x8b24 >> 2),
503 	0x00000000,
504 	(0x0e00 << 16) | (0x30a04 >> 2),
505 	0x00000000,
506 	(0x0600 << 16) | (0x30a10 >> 2),
507 	0x00000000,
508 	(0x0600 << 16) | (0x30a14 >> 2),
509 	0x00000000,
510 	(0x0600 << 16) | (0x30a18 >> 2),
511 	0x00000000,
512 	(0x0600 << 16) | (0x30a2c >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0xc700 >> 2),
515 	0x00000000,
516 	(0x0e00 << 16) | (0xc704 >> 2),
517 	0x00000000,
518 	(0x0e00 << 16) | (0xc708 >> 2),
519 	0x00000000,
520 	(0x0e00 << 16) | (0xc768 >> 2),
521 	0x00000000,
522 	(0x0400 << 16) | (0xc770 >> 2),
523 	0x00000000,
524 	(0x0400 << 16) | (0xc774 >> 2),
525 	0x00000000,
526 	(0x0400 << 16) | (0xc778 >> 2),
527 	0x00000000,
528 	(0x0400 << 16) | (0xc77c >> 2),
529 	0x00000000,
530 	(0x0400 << 16) | (0xc780 >> 2),
531 	0x00000000,
532 	(0x0400 << 16) | (0xc784 >> 2),
533 	0x00000000,
534 	(0x0400 << 16) | (0xc788 >> 2),
535 	0x00000000,
536 	(0x0400 << 16) | (0xc78c >> 2),
537 	0x00000000,
538 	(0x0400 << 16) | (0xc798 >> 2),
539 	0x00000000,
540 	(0x0400 << 16) | (0xc79c >> 2),
541 	0x00000000,
542 	(0x0400 << 16) | (0xc7a0 >> 2),
543 	0x00000000,
544 	(0x0400 << 16) | (0xc7a4 >> 2),
545 	0x00000000,
546 	(0x0400 << 16) | (0xc7a8 >> 2),
547 	0x00000000,
548 	(0x0400 << 16) | (0xc7ac >> 2),
549 	0x00000000,
550 	(0x0400 << 16) | (0xc7b0 >> 2),
551 	0x00000000,
552 	(0x0400 << 16) | (0xc7b4 >> 2),
553 	0x00000000,
554 	(0x0e00 << 16) | (0x9100 >> 2),
555 	0x00000000,
556 	(0x0e00 << 16) | (0x3c010 >> 2),
557 	0x00000000,
558 	(0x0e00 << 16) | (0x92a8 >> 2),
559 	0x00000000,
560 	(0x0e00 << 16) | (0x92ac >> 2),
561 	0x00000000,
562 	(0x0e00 << 16) | (0x92b4 >> 2),
563 	0x00000000,
564 	(0x0e00 << 16) | (0x92b8 >> 2),
565 	0x00000000,
566 	(0x0e00 << 16) | (0x92bc >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0x92c0 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0x92c4 >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x92c8 >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x92cc >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x92d0 >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x8c00 >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x8c04 >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0x8c20 >> 2),
583 	0x00000000,
584 	(0x0e00 << 16) | (0x8c38 >> 2),
585 	0x00000000,
586 	(0x0e00 << 16) | (0x8c3c >> 2),
587 	0x00000000,
588 	(0x0e00 << 16) | (0xae00 >> 2),
589 	0x00000000,
590 	(0x0e00 << 16) | (0x9604 >> 2),
591 	0x00000000,
592 	(0x0e00 << 16) | (0xac08 >> 2),
593 	0x00000000,
594 	(0x0e00 << 16) | (0xac0c >> 2),
595 	0x00000000,
596 	(0x0e00 << 16) | (0xac10 >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0xac14 >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0xac58 >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0xac68 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0xac6c >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0xac70 >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0xac74 >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0xac78 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0xac7c >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0xac80 >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0xac84 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0xac88 >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0xac8c >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0x970c >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0x9714 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0x9718 >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0x971c >> 2),
629 	0x00000000,
630 	(0x0e00 << 16) | (0x31068 >> 2),
631 	0x00000000,
632 	(0x4e00 << 16) | (0x31068 >> 2),
633 	0x00000000,
634 	(0x5e00 << 16) | (0x31068 >> 2),
635 	0x00000000,
636 	(0x6e00 << 16) | (0x31068 >> 2),
637 	0x00000000,
638 	(0x7e00 << 16) | (0x31068 >> 2),
639 	0x00000000,
640 	(0x8e00 << 16) | (0x31068 >> 2),
641 	0x00000000,
642 	(0x9e00 << 16) | (0x31068 >> 2),
643 	0x00000000,
644 	(0xae00 << 16) | (0x31068 >> 2),
645 	0x00000000,
646 	(0xbe00 << 16) | (0x31068 >> 2),
647 	0x00000000,
648 	(0x0e00 << 16) | (0xcd10 >> 2),
649 	0x00000000,
650 	(0x0e00 << 16) | (0xcd14 >> 2),
651 	0x00000000,
652 	(0x0e00 << 16) | (0x88b0 >> 2),
653 	0x00000000,
654 	(0x0e00 << 16) | (0x88b4 >> 2),
655 	0x00000000,
656 	(0x0e00 << 16) | (0x88b8 >> 2),
657 	0x00000000,
658 	(0x0e00 << 16) | (0x88bc >> 2),
659 	0x00000000,
660 	(0x0400 << 16) | (0x89c0 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0x88c4 >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0x88c8 >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0x88d0 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0x88d4 >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0x88d8 >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0x8980 >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0x30938 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0x3093c >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0x30940 >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0x89a0 >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0x30900 >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0x30904 >> 2),
685 	0x00000000,
686 	(0x0e00 << 16) | (0x89b4 >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0x3c210 >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0x3c214 >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0x3c218 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x8904 >> 2),
695 	0x00000000,
696 	0x5,
697 	(0x0e00 << 16) | (0x8c28 >> 2),
698 	(0x0e00 << 16) | (0x8c2c >> 2),
699 	(0x0e00 << 16) | (0x8c30 >> 2),
700 	(0x0e00 << 16) | (0x8c34 >> 2),
701 	(0x0e00 << 16) | (0x9600 >> 2),
702 };
703 
704 static const u32 kalindi_rlc_save_restore_register_list[] =
705 {
706 	(0x0e00 << 16) | (0xc12c >> 2),
707 	0x00000000,
708 	(0x0e00 << 16) | (0xc140 >> 2),
709 	0x00000000,
710 	(0x0e00 << 16) | (0xc150 >> 2),
711 	0x00000000,
712 	(0x0e00 << 16) | (0xc15c >> 2),
713 	0x00000000,
714 	(0x0e00 << 16) | (0xc168 >> 2),
715 	0x00000000,
716 	(0x0e00 << 16) | (0xc170 >> 2),
717 	0x00000000,
718 	(0x0e00 << 16) | (0xc204 >> 2),
719 	0x00000000,
720 	(0x0e00 << 16) | (0xc2b4 >> 2),
721 	0x00000000,
722 	(0x0e00 << 16) | (0xc2b8 >> 2),
723 	0x00000000,
724 	(0x0e00 << 16) | (0xc2bc >> 2),
725 	0x00000000,
726 	(0x0e00 << 16) | (0xc2c0 >> 2),
727 	0x00000000,
728 	(0x0e00 << 16) | (0x8228 >> 2),
729 	0x00000000,
730 	(0x0e00 << 16) | (0x829c >> 2),
731 	0x00000000,
732 	(0x0e00 << 16) | (0x869c >> 2),
733 	0x00000000,
734 	(0x0600 << 16) | (0x98f4 >> 2),
735 	0x00000000,
736 	(0x0e00 << 16) | (0x98f8 >> 2),
737 	0x00000000,
738 	(0x0e00 << 16) | (0x9900 >> 2),
739 	0x00000000,
740 	(0x0e00 << 16) | (0xc260 >> 2),
741 	0x00000000,
742 	(0x0e00 << 16) | (0x90e8 >> 2),
743 	0x00000000,
744 	(0x0e00 << 16) | (0x3c000 >> 2),
745 	0x00000000,
746 	(0x0e00 << 16) | (0x3c00c >> 2),
747 	0x00000000,
748 	(0x0e00 << 16) | (0x8c1c >> 2),
749 	0x00000000,
750 	(0x0e00 << 16) | (0x9700 >> 2),
751 	0x00000000,
752 	(0x0e00 << 16) | (0xcd20 >> 2),
753 	0x00000000,
754 	(0x4e00 << 16) | (0xcd20 >> 2),
755 	0x00000000,
756 	(0x5e00 << 16) | (0xcd20 >> 2),
757 	0x00000000,
758 	(0x6e00 << 16) | (0xcd20 >> 2),
759 	0x00000000,
760 	(0x7e00 << 16) | (0xcd20 >> 2),
761 	0x00000000,
762 	(0x0e00 << 16) | (0x89bc >> 2),
763 	0x00000000,
764 	(0x0e00 << 16) | (0x8900 >> 2),
765 	0x00000000,
766 	0x3,
767 	(0x0e00 << 16) | (0xc130 >> 2),
768 	0x00000000,
769 	(0x0e00 << 16) | (0xc134 >> 2),
770 	0x00000000,
771 	(0x0e00 << 16) | (0xc1fc >> 2),
772 	0x00000000,
773 	(0x0e00 << 16) | (0xc208 >> 2),
774 	0x00000000,
775 	(0x0e00 << 16) | (0xc264 >> 2),
776 	0x00000000,
777 	(0x0e00 << 16) | (0xc268 >> 2),
778 	0x00000000,
779 	(0x0e00 << 16) | (0xc26c >> 2),
780 	0x00000000,
781 	(0x0e00 << 16) | (0xc270 >> 2),
782 	0x00000000,
783 	(0x0e00 << 16) | (0xc274 >> 2),
784 	0x00000000,
785 	(0x0e00 << 16) | (0xc28c >> 2),
786 	0x00000000,
787 	(0x0e00 << 16) | (0xc290 >> 2),
788 	0x00000000,
789 	(0x0e00 << 16) | (0xc294 >> 2),
790 	0x00000000,
791 	(0x0e00 << 16) | (0xc298 >> 2),
792 	0x00000000,
793 	(0x0e00 << 16) | (0xc2a0 >> 2),
794 	0x00000000,
795 	(0x0e00 << 16) | (0xc2a4 >> 2),
796 	0x00000000,
797 	(0x0e00 << 16) | (0xc2a8 >> 2),
798 	0x00000000,
799 	(0x0e00 << 16) | (0xc2ac >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0x301d0 >> 2),
802 	0x00000000,
803 	(0x0e00 << 16) | (0x30238 >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0x30250 >> 2),
806 	0x00000000,
807 	(0x0e00 << 16) | (0x30254 >> 2),
808 	0x00000000,
809 	(0x0e00 << 16) | (0x30258 >> 2),
810 	0x00000000,
811 	(0x0e00 << 16) | (0x3025c >> 2),
812 	0x00000000,
813 	(0x4e00 << 16) | (0xc900 >> 2),
814 	0x00000000,
815 	(0x5e00 << 16) | (0xc900 >> 2),
816 	0x00000000,
817 	(0x6e00 << 16) | (0xc900 >> 2),
818 	0x00000000,
819 	(0x7e00 << 16) | (0xc900 >> 2),
820 	0x00000000,
821 	(0x4e00 << 16) | (0xc904 >> 2),
822 	0x00000000,
823 	(0x5e00 << 16) | (0xc904 >> 2),
824 	0x00000000,
825 	(0x6e00 << 16) | (0xc904 >> 2),
826 	0x00000000,
827 	(0x7e00 << 16) | (0xc904 >> 2),
828 	0x00000000,
829 	(0x4e00 << 16) | (0xc908 >> 2),
830 	0x00000000,
831 	(0x5e00 << 16) | (0xc908 >> 2),
832 	0x00000000,
833 	(0x6e00 << 16) | (0xc908 >> 2),
834 	0x00000000,
835 	(0x7e00 << 16) | (0xc908 >> 2),
836 	0x00000000,
837 	(0x4e00 << 16) | (0xc90c >> 2),
838 	0x00000000,
839 	(0x5e00 << 16) | (0xc90c >> 2),
840 	0x00000000,
841 	(0x6e00 << 16) | (0xc90c >> 2),
842 	0x00000000,
843 	(0x7e00 << 16) | (0xc90c >> 2),
844 	0x00000000,
845 	(0x4e00 << 16) | (0xc910 >> 2),
846 	0x00000000,
847 	(0x5e00 << 16) | (0xc910 >> 2),
848 	0x00000000,
849 	(0x6e00 << 16) | (0xc910 >> 2),
850 	0x00000000,
851 	(0x7e00 << 16) | (0xc910 >> 2),
852 	0x00000000,
853 	(0x0e00 << 16) | (0xc99c >> 2),
854 	0x00000000,
855 	(0x0e00 << 16) | (0x9834 >> 2),
856 	0x00000000,
857 	(0x0000 << 16) | (0x30f00 >> 2),
858 	0x00000000,
859 	(0x0000 << 16) | (0x30f04 >> 2),
860 	0x00000000,
861 	(0x0000 << 16) | (0x30f08 >> 2),
862 	0x00000000,
863 	(0x0000 << 16) | (0x30f0c >> 2),
864 	0x00000000,
865 	(0x0600 << 16) | (0x9b7c >> 2),
866 	0x00000000,
867 	(0x0e00 << 16) | (0x8a14 >> 2),
868 	0x00000000,
869 	(0x0e00 << 16) | (0x8a18 >> 2),
870 	0x00000000,
871 	(0x0600 << 16) | (0x30a00 >> 2),
872 	0x00000000,
873 	(0x0e00 << 16) | (0x8bf0 >> 2),
874 	0x00000000,
875 	(0x0e00 << 16) | (0x8bcc >> 2),
876 	0x00000000,
877 	(0x0e00 << 16) | (0x8b24 >> 2),
878 	0x00000000,
879 	(0x0e00 << 16) | (0x30a04 >> 2),
880 	0x00000000,
881 	(0x0600 << 16) | (0x30a10 >> 2),
882 	0x00000000,
883 	(0x0600 << 16) | (0x30a14 >> 2),
884 	0x00000000,
885 	(0x0600 << 16) | (0x30a18 >> 2),
886 	0x00000000,
887 	(0x0600 << 16) | (0x30a2c >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0xc700 >> 2),
890 	0x00000000,
891 	(0x0e00 << 16) | (0xc704 >> 2),
892 	0x00000000,
893 	(0x0e00 << 16) | (0xc708 >> 2),
894 	0x00000000,
895 	(0x0e00 << 16) | (0xc768 >> 2),
896 	0x00000000,
897 	(0x0400 << 16) | (0xc770 >> 2),
898 	0x00000000,
899 	(0x0400 << 16) | (0xc774 >> 2),
900 	0x00000000,
901 	(0x0400 << 16) | (0xc798 >> 2),
902 	0x00000000,
903 	(0x0400 << 16) | (0xc79c >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0x9100 >> 2),
906 	0x00000000,
907 	(0x0e00 << 16) | (0x3c010 >> 2),
908 	0x00000000,
909 	(0x0e00 << 16) | (0x8c00 >> 2),
910 	0x00000000,
911 	(0x0e00 << 16) | (0x8c04 >> 2),
912 	0x00000000,
913 	(0x0e00 << 16) | (0x8c20 >> 2),
914 	0x00000000,
915 	(0x0e00 << 16) | (0x8c38 >> 2),
916 	0x00000000,
917 	(0x0e00 << 16) | (0x8c3c >> 2),
918 	0x00000000,
919 	(0x0e00 << 16) | (0xae00 >> 2),
920 	0x00000000,
921 	(0x0e00 << 16) | (0x9604 >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0xac08 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0xac0c >> 2),
926 	0x00000000,
927 	(0x0e00 << 16) | (0xac10 >> 2),
928 	0x00000000,
929 	(0x0e00 << 16) | (0xac14 >> 2),
930 	0x00000000,
931 	(0x0e00 << 16) | (0xac58 >> 2),
932 	0x00000000,
933 	(0x0e00 << 16) | (0xac68 >> 2),
934 	0x00000000,
935 	(0x0e00 << 16) | (0xac6c >> 2),
936 	0x00000000,
937 	(0x0e00 << 16) | (0xac70 >> 2),
938 	0x00000000,
939 	(0x0e00 << 16) | (0xac74 >> 2),
940 	0x00000000,
941 	(0x0e00 << 16) | (0xac78 >> 2),
942 	0x00000000,
943 	(0x0e00 << 16) | (0xac7c >> 2),
944 	0x00000000,
945 	(0x0e00 << 16) | (0xac80 >> 2),
946 	0x00000000,
947 	(0x0e00 << 16) | (0xac84 >> 2),
948 	0x00000000,
949 	(0x0e00 << 16) | (0xac88 >> 2),
950 	0x00000000,
951 	(0x0e00 << 16) | (0xac8c >> 2),
952 	0x00000000,
953 	(0x0e00 << 16) | (0x970c >> 2),
954 	0x00000000,
955 	(0x0e00 << 16) | (0x9714 >> 2),
956 	0x00000000,
957 	(0x0e00 << 16) | (0x9718 >> 2),
958 	0x00000000,
959 	(0x0e00 << 16) | (0x971c >> 2),
960 	0x00000000,
961 	(0x0e00 << 16) | (0x31068 >> 2),
962 	0x00000000,
963 	(0x4e00 << 16) | (0x31068 >> 2),
964 	0x00000000,
965 	(0x5e00 << 16) | (0x31068 >> 2),
966 	0x00000000,
967 	(0x6e00 << 16) | (0x31068 >> 2),
968 	0x00000000,
969 	(0x7e00 << 16) | (0x31068 >> 2),
970 	0x00000000,
971 	(0x0e00 << 16) | (0xcd10 >> 2),
972 	0x00000000,
973 	(0x0e00 << 16) | (0xcd14 >> 2),
974 	0x00000000,
975 	(0x0e00 << 16) | (0x88b0 >> 2),
976 	0x00000000,
977 	(0x0e00 << 16) | (0x88b4 >> 2),
978 	0x00000000,
979 	(0x0e00 << 16) | (0x88b8 >> 2),
980 	0x00000000,
981 	(0x0e00 << 16) | (0x88bc >> 2),
982 	0x00000000,
983 	(0x0400 << 16) | (0x89c0 >> 2),
984 	0x00000000,
985 	(0x0e00 << 16) | (0x88c4 >> 2),
986 	0x00000000,
987 	(0x0e00 << 16) | (0x88c8 >> 2),
988 	0x00000000,
989 	(0x0e00 << 16) | (0x88d0 >> 2),
990 	0x00000000,
991 	(0x0e00 << 16) | (0x88d4 >> 2),
992 	0x00000000,
993 	(0x0e00 << 16) | (0x88d8 >> 2),
994 	0x00000000,
995 	(0x0e00 << 16) | (0x8980 >> 2),
996 	0x00000000,
997 	(0x0e00 << 16) | (0x30938 >> 2),
998 	0x00000000,
999 	(0x0e00 << 16) | (0x3093c >> 2),
1000 	0x00000000,
1001 	(0x0e00 << 16) | (0x30940 >> 2),
1002 	0x00000000,
1003 	(0x0e00 << 16) | (0x89a0 >> 2),
1004 	0x00000000,
1005 	(0x0e00 << 16) | (0x30900 >> 2),
1006 	0x00000000,
1007 	(0x0e00 << 16) | (0x30904 >> 2),
1008 	0x00000000,
1009 	(0x0e00 << 16) | (0x89b4 >> 2),
1010 	0x00000000,
1011 	(0x0e00 << 16) | (0x3e1fc >> 2),
1012 	0x00000000,
1013 	(0x0e00 << 16) | (0x3c210 >> 2),
1014 	0x00000000,
1015 	(0x0e00 << 16) | (0x3c214 >> 2),
1016 	0x00000000,
1017 	(0x0e00 << 16) | (0x3c218 >> 2),
1018 	0x00000000,
1019 	(0x0e00 << 16) | (0x8904 >> 2),
1020 	0x00000000,
1021 	0x5,
1022 	(0x0e00 << 16) | (0x8c28 >> 2),
1023 	(0x0e00 << 16) | (0x8c2c >> 2),
1024 	(0x0e00 << 16) | (0x8c30 >> 2),
1025 	(0x0e00 << 16) | (0x8c34 >> 2),
1026 	(0x0e00 << 16) | (0x9600 >> 2),
1027 };
1028 
1029 static const u32 bonaire_golden_spm_registers[] =
1030 {
1031 	0x30800, 0xe0ffffff, 0xe0000000
1032 };
1033 
1034 static const u32 bonaire_golden_common_registers[] =
1035 {
1036 	0xc770, 0xffffffff, 0x00000800,
1037 	0xc774, 0xffffffff, 0x00000800,
1038 	0xc798, 0xffffffff, 0x00007fbf,
1039 	0xc79c, 0xffffffff, 0x00007faf
1040 };
1041 
1042 static const u32 bonaire_golden_registers[] =
1043 {
1044 	0x3354, 0x00000333, 0x00000333,
1045 	0x3350, 0x000c0fc0, 0x00040200,
1046 	0x9a10, 0x00010000, 0x00058208,
1047 	0x3c000, 0xffff1fff, 0x00140000,
1048 	0x3c200, 0xfdfc0fff, 0x00000100,
1049 	0x3c234, 0x40000000, 0x40000200,
1050 	0x9830, 0xffffffff, 0x00000000,
1051 	0x9834, 0xf00fffff, 0x00000400,
1052 	0x9838, 0x0002021c, 0x00020200,
1053 	0xc78, 0x00000080, 0x00000000,
1054 	0x5bb0, 0x000000f0, 0x00000070,
1055 	0x5bc0, 0xf0311fff, 0x80300000,
1056 	0x98f8, 0x73773777, 0x12010001,
1057 	0x350c, 0x00810000, 0x408af000,
1058 	0x7030, 0x31000111, 0x00000011,
1059 	0x2f48, 0x73773777, 0x12010001,
1060 	0x220c, 0x00007fb6, 0x0021a1b1,
1061 	0x2210, 0x00007fb6, 0x002021b1,
1062 	0x2180, 0x00007fb6, 0x00002191,
1063 	0x2218, 0x00007fb6, 0x002121b1,
1064 	0x221c, 0x00007fb6, 0x002021b1,
1065 	0x21dc, 0x00007fb6, 0x00002191,
1066 	0x21e0, 0x00007fb6, 0x00002191,
1067 	0x3628, 0x0000003f, 0x0000000a,
1068 	0x362c, 0x0000003f, 0x0000000a,
1069 	0x2ae4, 0x00073ffe, 0x000022a2,
1070 	0x240c, 0x000007ff, 0x00000000,
1071 	0x8a14, 0xf000003f, 0x00000007,
1072 	0x8bf0, 0x00002001, 0x00000001,
1073 	0x8b24, 0xffffffff, 0x00ffffff,
1074 	0x30a04, 0x0000ff0f, 0x00000000,
1075 	0x28a4c, 0x07ffffff, 0x06000000,
1076 	0x4d8, 0x00000fff, 0x00000100,
1077 	0x3e78, 0x00000001, 0x00000002,
1078 	0x9100, 0x03000000, 0x0362c688,
1079 	0x8c00, 0x000000ff, 0x00000001,
1080 	0xe40, 0x00001fff, 0x00001fff,
1081 	0x9060, 0x0000007f, 0x00000020,
1082 	0x9508, 0x00010000, 0x00010000,
1083 	0xac14, 0x000003ff, 0x000000f3,
1084 	0xac0c, 0xffffffff, 0x00001032
1085 };
1086 
1087 static const u32 bonaire_mgcg_cgcg_init[] =
1088 {
1089 	0xc420, 0xffffffff, 0xfffffffc,
1090 	0x30800, 0xffffffff, 0xe0000000,
1091 	0x3c2a0, 0xffffffff, 0x00000100,
1092 	0x3c208, 0xffffffff, 0x00000100,
1093 	0x3c2c0, 0xffffffff, 0xc0000100,
1094 	0x3c2c8, 0xffffffff, 0xc0000100,
1095 	0x3c2c4, 0xffffffff, 0xc0000100,
1096 	0x55e4, 0xffffffff, 0x00600100,
1097 	0x3c280, 0xffffffff, 0x00000100,
1098 	0x3c214, 0xffffffff, 0x06000100,
1099 	0x3c220, 0xffffffff, 0x00000100,
1100 	0x3c218, 0xffffffff, 0x06000100,
1101 	0x3c204, 0xffffffff, 0x00000100,
1102 	0x3c2e0, 0xffffffff, 0x00000100,
1103 	0x3c224, 0xffffffff, 0x00000100,
1104 	0x3c200, 0xffffffff, 0x00000100,
1105 	0x3c230, 0xffffffff, 0x00000100,
1106 	0x3c234, 0xffffffff, 0x00000100,
1107 	0x3c250, 0xffffffff, 0x00000100,
1108 	0x3c254, 0xffffffff, 0x00000100,
1109 	0x3c258, 0xffffffff, 0x00000100,
1110 	0x3c25c, 0xffffffff, 0x00000100,
1111 	0x3c260, 0xffffffff, 0x00000100,
1112 	0x3c27c, 0xffffffff, 0x00000100,
1113 	0x3c278, 0xffffffff, 0x00000100,
1114 	0x3c210, 0xffffffff, 0x06000100,
1115 	0x3c290, 0xffffffff, 0x00000100,
1116 	0x3c274, 0xffffffff, 0x00000100,
1117 	0x3c2b4, 0xffffffff, 0x00000100,
1118 	0x3c2b0, 0xffffffff, 0x00000100,
1119 	0x3c270, 0xffffffff, 0x00000100,
1120 	0x30800, 0xffffffff, 0xe0000000,
1121 	0x3c020, 0xffffffff, 0x00010000,
1122 	0x3c024, 0xffffffff, 0x00030002,
1123 	0x3c028, 0xffffffff, 0x00040007,
1124 	0x3c02c, 0xffffffff, 0x00060005,
1125 	0x3c030, 0xffffffff, 0x00090008,
1126 	0x3c034, 0xffffffff, 0x00010000,
1127 	0x3c038, 0xffffffff, 0x00030002,
1128 	0x3c03c, 0xffffffff, 0x00040007,
1129 	0x3c040, 0xffffffff, 0x00060005,
1130 	0x3c044, 0xffffffff, 0x00090008,
1131 	0x3c048, 0xffffffff, 0x00010000,
1132 	0x3c04c, 0xffffffff, 0x00030002,
1133 	0x3c050, 0xffffffff, 0x00040007,
1134 	0x3c054, 0xffffffff, 0x00060005,
1135 	0x3c058, 0xffffffff, 0x00090008,
1136 	0x3c05c, 0xffffffff, 0x00010000,
1137 	0x3c060, 0xffffffff, 0x00030002,
1138 	0x3c064, 0xffffffff, 0x00040007,
1139 	0x3c068, 0xffffffff, 0x00060005,
1140 	0x3c06c, 0xffffffff, 0x00090008,
1141 	0x3c070, 0xffffffff, 0x00010000,
1142 	0x3c074, 0xffffffff, 0x00030002,
1143 	0x3c078, 0xffffffff, 0x00040007,
1144 	0x3c07c, 0xffffffff, 0x00060005,
1145 	0x3c080, 0xffffffff, 0x00090008,
1146 	0x3c084, 0xffffffff, 0x00010000,
1147 	0x3c088, 0xffffffff, 0x00030002,
1148 	0x3c08c, 0xffffffff, 0x00040007,
1149 	0x3c090, 0xffffffff, 0x00060005,
1150 	0x3c094, 0xffffffff, 0x00090008,
1151 	0x3c098, 0xffffffff, 0x00010000,
1152 	0x3c09c, 0xffffffff, 0x00030002,
1153 	0x3c0a0, 0xffffffff, 0x00040007,
1154 	0x3c0a4, 0xffffffff, 0x00060005,
1155 	0x3c0a8, 0xffffffff, 0x00090008,
1156 	0x3c000, 0xffffffff, 0x96e00200,
1157 	0x8708, 0xffffffff, 0x00900100,
1158 	0xc424, 0xffffffff, 0x0020003f,
1159 	0x38, 0xffffffff, 0x0140001c,
1160 	0x3c, 0x000f0000, 0x000f0000,
1161 	0x220, 0xffffffff, 0xC060000C,
1162 	0x224, 0xc0000fff, 0x00000100,
1163 	0xf90, 0xffffffff, 0x00000100,
1164 	0xf98, 0x00000101, 0x00000000,
1165 	0x20a8, 0xffffffff, 0x00000104,
1166 	0x55e4, 0xff000fff, 0x00000100,
1167 	0x30cc, 0xc0000fff, 0x00000104,
1168 	0xc1e4, 0x00000001, 0x00000001,
1169 	0xd00c, 0xff000ff0, 0x00000100,
1170 	0xd80c, 0xff000ff0, 0x00000100
1171 };
1172 
1173 static const u32 spectre_golden_spm_registers[] =
1174 {
1175 	0x30800, 0xe0ffffff, 0xe0000000
1176 };
1177 
1178 static const u32 spectre_golden_common_registers[] =
1179 {
1180 	0xc770, 0xffffffff, 0x00000800,
1181 	0xc774, 0xffffffff, 0x00000800,
1182 	0xc798, 0xffffffff, 0x00007fbf,
1183 	0xc79c, 0xffffffff, 0x00007faf
1184 };
1185 
1186 static const u32 spectre_golden_registers[] =
1187 {
1188 	0x3c000, 0xffff1fff, 0x96940200,
1189 	0x3c00c, 0xffff0001, 0xff000000,
1190 	0x3c200, 0xfffc0fff, 0x00000100,
1191 	0x6ed8, 0x00010101, 0x00010000,
1192 	0x9834, 0xf00fffff, 0x00000400,
1193 	0x9838, 0xfffffffc, 0x00020200,
1194 	0x5bb0, 0x000000f0, 0x00000070,
1195 	0x5bc0, 0xf0311fff, 0x80300000,
1196 	0x98f8, 0x73773777, 0x12010001,
1197 	0x9b7c, 0x00ff0000, 0x00fc0000,
1198 	0x2f48, 0x73773777, 0x12010001,
1199 	0x8a14, 0xf000003f, 0x00000007,
1200 	0x8b24, 0xffffffff, 0x00ffffff,
1201 	0x28350, 0x3f3f3fff, 0x00000082,
1202 	0x28354, 0x0000003f, 0x00000000,
1203 	0x3e78, 0x00000001, 0x00000002,
1204 	0x913c, 0xffff03df, 0x00000004,
1205 	0xc768, 0x00000008, 0x00000008,
1206 	0x8c00, 0x000008ff, 0x00000800,
1207 	0x9508, 0x00010000, 0x00010000,
1208 	0xac0c, 0xffffffff, 0x54763210,
1209 	0x214f8, 0x01ff01ff, 0x00000002,
1210 	0x21498, 0x007ff800, 0x00200000,
1211 	0x2015c, 0xffffffff, 0x00000f40,
1212 	0x30934, 0xffffffff, 0x00000001
1213 };
1214 
1215 static const u32 spectre_mgcg_cgcg_init[] =
1216 {
1217 	0xc420, 0xffffffff, 0xfffffffc,
1218 	0x30800, 0xffffffff, 0xe0000000,
1219 	0x3c2a0, 0xffffffff, 0x00000100,
1220 	0x3c208, 0xffffffff, 0x00000100,
1221 	0x3c2c0, 0xffffffff, 0x00000100,
1222 	0x3c2c8, 0xffffffff, 0x00000100,
1223 	0x3c2c4, 0xffffffff, 0x00000100,
1224 	0x55e4, 0xffffffff, 0x00600100,
1225 	0x3c280, 0xffffffff, 0x00000100,
1226 	0x3c214, 0xffffffff, 0x06000100,
1227 	0x3c220, 0xffffffff, 0x00000100,
1228 	0x3c218, 0xffffffff, 0x06000100,
1229 	0x3c204, 0xffffffff, 0x00000100,
1230 	0x3c2e0, 0xffffffff, 0x00000100,
1231 	0x3c224, 0xffffffff, 0x00000100,
1232 	0x3c200, 0xffffffff, 0x00000100,
1233 	0x3c230, 0xffffffff, 0x00000100,
1234 	0x3c234, 0xffffffff, 0x00000100,
1235 	0x3c250, 0xffffffff, 0x00000100,
1236 	0x3c254, 0xffffffff, 0x00000100,
1237 	0x3c258, 0xffffffff, 0x00000100,
1238 	0x3c25c, 0xffffffff, 0x00000100,
1239 	0x3c260, 0xffffffff, 0x00000100,
1240 	0x3c27c, 0xffffffff, 0x00000100,
1241 	0x3c278, 0xffffffff, 0x00000100,
1242 	0x3c210, 0xffffffff, 0x06000100,
1243 	0x3c290, 0xffffffff, 0x00000100,
1244 	0x3c274, 0xffffffff, 0x00000100,
1245 	0x3c2b4, 0xffffffff, 0x00000100,
1246 	0x3c2b0, 0xffffffff, 0x00000100,
1247 	0x3c270, 0xffffffff, 0x00000100,
1248 	0x30800, 0xffffffff, 0xe0000000,
1249 	0x3c020, 0xffffffff, 0x00010000,
1250 	0x3c024, 0xffffffff, 0x00030002,
1251 	0x3c028, 0xffffffff, 0x00040007,
1252 	0x3c02c, 0xffffffff, 0x00060005,
1253 	0x3c030, 0xffffffff, 0x00090008,
1254 	0x3c034, 0xffffffff, 0x00010000,
1255 	0x3c038, 0xffffffff, 0x00030002,
1256 	0x3c03c, 0xffffffff, 0x00040007,
1257 	0x3c040, 0xffffffff, 0x00060005,
1258 	0x3c044, 0xffffffff, 0x00090008,
1259 	0x3c048, 0xffffffff, 0x00010000,
1260 	0x3c04c, 0xffffffff, 0x00030002,
1261 	0x3c050, 0xffffffff, 0x00040007,
1262 	0x3c054, 0xffffffff, 0x00060005,
1263 	0x3c058, 0xffffffff, 0x00090008,
1264 	0x3c05c, 0xffffffff, 0x00010000,
1265 	0x3c060, 0xffffffff, 0x00030002,
1266 	0x3c064, 0xffffffff, 0x00040007,
1267 	0x3c068, 0xffffffff, 0x00060005,
1268 	0x3c06c, 0xffffffff, 0x00090008,
1269 	0x3c070, 0xffffffff, 0x00010000,
1270 	0x3c074, 0xffffffff, 0x00030002,
1271 	0x3c078, 0xffffffff, 0x00040007,
1272 	0x3c07c, 0xffffffff, 0x00060005,
1273 	0x3c080, 0xffffffff, 0x00090008,
1274 	0x3c084, 0xffffffff, 0x00010000,
1275 	0x3c088, 0xffffffff, 0x00030002,
1276 	0x3c08c, 0xffffffff, 0x00040007,
1277 	0x3c090, 0xffffffff, 0x00060005,
1278 	0x3c094, 0xffffffff, 0x00090008,
1279 	0x3c098, 0xffffffff, 0x00010000,
1280 	0x3c09c, 0xffffffff, 0x00030002,
1281 	0x3c0a0, 0xffffffff, 0x00040007,
1282 	0x3c0a4, 0xffffffff, 0x00060005,
1283 	0x3c0a8, 0xffffffff, 0x00090008,
1284 	0x3c0ac, 0xffffffff, 0x00010000,
1285 	0x3c0b0, 0xffffffff, 0x00030002,
1286 	0x3c0b4, 0xffffffff, 0x00040007,
1287 	0x3c0b8, 0xffffffff, 0x00060005,
1288 	0x3c0bc, 0xffffffff, 0x00090008,
1289 	0x3c000, 0xffffffff, 0x96e00200,
1290 	0x8708, 0xffffffff, 0x00900100,
1291 	0xc424, 0xffffffff, 0x0020003f,
1292 	0x38, 0xffffffff, 0x0140001c,
1293 	0x3c, 0x000f0000, 0x000f0000,
1294 	0x220, 0xffffffff, 0xC060000C,
1295 	0x224, 0xc0000fff, 0x00000100,
1296 	0xf90, 0xffffffff, 0x00000100,
1297 	0xf98, 0x00000101, 0x00000000,
1298 	0x20a8, 0xffffffff, 0x00000104,
1299 	0x55e4, 0xff000fff, 0x00000100,
1300 	0x30cc, 0xc0000fff, 0x00000104,
1301 	0xc1e4, 0x00000001, 0x00000001,
1302 	0xd00c, 0xff000ff0, 0x00000100,
1303 	0xd80c, 0xff000ff0, 0x00000100
1304 };
1305 
1306 static const u32 kalindi_golden_spm_registers[] =
1307 {
1308 	0x30800, 0xe0ffffff, 0xe0000000
1309 };
1310 
1311 static const u32 kalindi_golden_common_registers[] =
1312 {
1313 	0xc770, 0xffffffff, 0x00000800,
1314 	0xc774, 0xffffffff, 0x00000800,
1315 	0xc798, 0xffffffff, 0x00007fbf,
1316 	0xc79c, 0xffffffff, 0x00007faf
1317 };
1318 
1319 static const u32 kalindi_golden_registers[] =
1320 {
1321 	0x3c000, 0xffffdfff, 0x6e944040,
1322 	0x55e4, 0xff607fff, 0xfc000100,
1323 	0x3c220, 0xff000fff, 0x00000100,
1324 	0x3c224, 0xff000fff, 0x00000100,
1325 	0x3c200, 0xfffc0fff, 0x00000100,
1326 	0x6ed8, 0x00010101, 0x00010000,
1327 	0x9830, 0xffffffff, 0x00000000,
1328 	0x9834, 0xf00fffff, 0x00000400,
1329 	0x5bb0, 0x000000f0, 0x00000070,
1330 	0x5bc0, 0xf0311fff, 0x80300000,
1331 	0x98f8, 0x73773777, 0x12010001,
1332 	0x98fc, 0xffffffff, 0x00000010,
1333 	0x9b7c, 0x00ff0000, 0x00fc0000,
1334 	0x8030, 0x00001f0f, 0x0000100a,
1335 	0x2f48, 0x73773777, 0x12010001,
1336 	0x2408, 0x000fffff, 0x000c007f,
1337 	0x8a14, 0xf000003f, 0x00000007,
1338 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1339 	0x30a04, 0x0000ff0f, 0x00000000,
1340 	0x28a4c, 0x07ffffff, 0x06000000,
1341 	0x4d8, 0x00000fff, 0x00000100,
1342 	0x3e78, 0x00000001, 0x00000002,
1343 	0xc768, 0x00000008, 0x00000008,
1344 	0x8c00, 0x000000ff, 0x00000003,
1345 	0x214f8, 0x01ff01ff, 0x00000002,
1346 	0x21498, 0x007ff800, 0x00200000,
1347 	0x2015c, 0xffffffff, 0x00000f40,
1348 	0x88c4, 0x001f3ae3, 0x00000082,
1349 	0x88d4, 0x0000001f, 0x00000010,
1350 	0x30934, 0xffffffff, 0x00000000
1351 };
1352 
1353 static const u32 kalindi_mgcg_cgcg_init[] =
1354 {
1355 	0xc420, 0xffffffff, 0xfffffffc,
1356 	0x30800, 0xffffffff, 0xe0000000,
1357 	0x3c2a0, 0xffffffff, 0x00000100,
1358 	0x3c208, 0xffffffff, 0x00000100,
1359 	0x3c2c0, 0xffffffff, 0x00000100,
1360 	0x3c2c8, 0xffffffff, 0x00000100,
1361 	0x3c2c4, 0xffffffff, 0x00000100,
1362 	0x55e4, 0xffffffff, 0x00600100,
1363 	0x3c280, 0xffffffff, 0x00000100,
1364 	0x3c214, 0xffffffff, 0x06000100,
1365 	0x3c220, 0xffffffff, 0x00000100,
1366 	0x3c218, 0xffffffff, 0x06000100,
1367 	0x3c204, 0xffffffff, 0x00000100,
1368 	0x3c2e0, 0xffffffff, 0x00000100,
1369 	0x3c224, 0xffffffff, 0x00000100,
1370 	0x3c200, 0xffffffff, 0x00000100,
1371 	0x3c230, 0xffffffff, 0x00000100,
1372 	0x3c234, 0xffffffff, 0x00000100,
1373 	0x3c250, 0xffffffff, 0x00000100,
1374 	0x3c254, 0xffffffff, 0x00000100,
1375 	0x3c258, 0xffffffff, 0x00000100,
1376 	0x3c25c, 0xffffffff, 0x00000100,
1377 	0x3c260, 0xffffffff, 0x00000100,
1378 	0x3c27c, 0xffffffff, 0x00000100,
1379 	0x3c278, 0xffffffff, 0x00000100,
1380 	0x3c210, 0xffffffff, 0x06000100,
1381 	0x3c290, 0xffffffff, 0x00000100,
1382 	0x3c274, 0xffffffff, 0x00000100,
1383 	0x3c2b4, 0xffffffff, 0x00000100,
1384 	0x3c2b0, 0xffffffff, 0x00000100,
1385 	0x3c270, 0xffffffff, 0x00000100,
1386 	0x30800, 0xffffffff, 0xe0000000,
1387 	0x3c020, 0xffffffff, 0x00010000,
1388 	0x3c024, 0xffffffff, 0x00030002,
1389 	0x3c028, 0xffffffff, 0x00040007,
1390 	0x3c02c, 0xffffffff, 0x00060005,
1391 	0x3c030, 0xffffffff, 0x00090008,
1392 	0x3c034, 0xffffffff, 0x00010000,
1393 	0x3c038, 0xffffffff, 0x00030002,
1394 	0x3c03c, 0xffffffff, 0x00040007,
1395 	0x3c040, 0xffffffff, 0x00060005,
1396 	0x3c044, 0xffffffff, 0x00090008,
1397 	0x3c000, 0xffffffff, 0x96e00200,
1398 	0x8708, 0xffffffff, 0x00900100,
1399 	0xc424, 0xffffffff, 0x0020003f,
1400 	0x38, 0xffffffff, 0x0140001c,
1401 	0x3c, 0x000f0000, 0x000f0000,
1402 	0x220, 0xffffffff, 0xC060000C,
1403 	0x224, 0xc0000fff, 0x00000100,
1404 	0x20a8, 0xffffffff, 0x00000104,
1405 	0x55e4, 0xff000fff, 0x00000100,
1406 	0x30cc, 0xc0000fff, 0x00000104,
1407 	0xc1e4, 0x00000001, 0x00000001,
1408 	0xd00c, 0xff000ff0, 0x00000100,
1409 	0xd80c, 0xff000ff0, 0x00000100
1410 };
1411 
1412 static const u32 hawaii_golden_spm_registers[] =
1413 {
1414 	0x30800, 0xe0ffffff, 0xe0000000
1415 };
1416 
1417 static const u32 hawaii_golden_common_registers[] =
1418 {
1419 	0x30800, 0xffffffff, 0xe0000000,
1420 	0x28350, 0xffffffff, 0x3a00161a,
1421 	0x28354, 0xffffffff, 0x0000002e,
1422 	0x9a10, 0xffffffff, 0x00018208,
1423 	0x98f8, 0xffffffff, 0x12011003
1424 };
1425 
1426 static const u32 hawaii_golden_registers[] =
1427 {
1428 	0x3354, 0x00000333, 0x00000333,
1429 	0x9a10, 0x00010000, 0x00058208,
1430 	0x9830, 0xffffffff, 0x00000000,
1431 	0x9834, 0xf00fffff, 0x00000400,
1432 	0x9838, 0x0002021c, 0x00020200,
1433 	0xc78, 0x00000080, 0x00000000,
1434 	0x5bb0, 0x000000f0, 0x00000070,
1435 	0x5bc0, 0xf0311fff, 0x80300000,
1436 	0x350c, 0x00810000, 0x408af000,
1437 	0x7030, 0x31000111, 0x00000011,
1438 	0x2f48, 0x73773777, 0x12010001,
1439 	0x2120, 0x0000007f, 0x0000001b,
1440 	0x21dc, 0x00007fb6, 0x00002191,
1441 	0x3628, 0x0000003f, 0x0000000a,
1442 	0x362c, 0x0000003f, 0x0000000a,
1443 	0x2ae4, 0x00073ffe, 0x000022a2,
1444 	0x240c, 0x000007ff, 0x00000000,
1445 	0x8bf0, 0x00002001, 0x00000001,
1446 	0x8b24, 0xffffffff, 0x00ffffff,
1447 	0x30a04, 0x0000ff0f, 0x00000000,
1448 	0x28a4c, 0x07ffffff, 0x06000000,
1449 	0x3e78, 0x00000001, 0x00000002,
1450 	0xc768, 0x00000008, 0x00000008,
1451 	0xc770, 0x00000f00, 0x00000800,
1452 	0xc774, 0x00000f00, 0x00000800,
1453 	0xc798, 0x00ffffff, 0x00ff7fbf,
1454 	0xc79c, 0x00ffffff, 0x00ff7faf,
1455 	0x8c00, 0x000000ff, 0x00000800,
1456 	0xe40, 0x00001fff, 0x00001fff,
1457 	0x9060, 0x0000007f, 0x00000020,
1458 	0x9508, 0x00010000, 0x00010000,
1459 	0xae00, 0x00100000, 0x000ff07c,
1460 	0xac14, 0x000003ff, 0x0000000f,
1461 	0xac10, 0xffffffff, 0x7564fdec,
1462 	0xac0c, 0xffffffff, 0x3120b9a8,
1463 	0xac08, 0x20000000, 0x0f9c0000
1464 };
1465 
1466 static const u32 hawaii_mgcg_cgcg_init[] =
1467 {
1468 	0xc420, 0xffffffff, 0xfffffffd,
1469 	0x30800, 0xffffffff, 0xe0000000,
1470 	0x3c2a0, 0xffffffff, 0x00000100,
1471 	0x3c208, 0xffffffff, 0x00000100,
1472 	0x3c2c0, 0xffffffff, 0x00000100,
1473 	0x3c2c8, 0xffffffff, 0x00000100,
1474 	0x3c2c4, 0xffffffff, 0x00000100,
1475 	0x55e4, 0xffffffff, 0x00200100,
1476 	0x3c280, 0xffffffff, 0x00000100,
1477 	0x3c214, 0xffffffff, 0x06000100,
1478 	0x3c220, 0xffffffff, 0x00000100,
1479 	0x3c218, 0xffffffff, 0x06000100,
1480 	0x3c204, 0xffffffff, 0x00000100,
1481 	0x3c2e0, 0xffffffff, 0x00000100,
1482 	0x3c224, 0xffffffff, 0x00000100,
1483 	0x3c200, 0xffffffff, 0x00000100,
1484 	0x3c230, 0xffffffff, 0x00000100,
1485 	0x3c234, 0xffffffff, 0x00000100,
1486 	0x3c250, 0xffffffff, 0x00000100,
1487 	0x3c254, 0xffffffff, 0x00000100,
1488 	0x3c258, 0xffffffff, 0x00000100,
1489 	0x3c25c, 0xffffffff, 0x00000100,
1490 	0x3c260, 0xffffffff, 0x00000100,
1491 	0x3c27c, 0xffffffff, 0x00000100,
1492 	0x3c278, 0xffffffff, 0x00000100,
1493 	0x3c210, 0xffffffff, 0x06000100,
1494 	0x3c290, 0xffffffff, 0x00000100,
1495 	0x3c274, 0xffffffff, 0x00000100,
1496 	0x3c2b4, 0xffffffff, 0x00000100,
1497 	0x3c2b0, 0xffffffff, 0x00000100,
1498 	0x3c270, 0xffffffff, 0x00000100,
1499 	0x30800, 0xffffffff, 0xe0000000,
1500 	0x3c020, 0xffffffff, 0x00010000,
1501 	0x3c024, 0xffffffff, 0x00030002,
1502 	0x3c028, 0xffffffff, 0x00040007,
1503 	0x3c02c, 0xffffffff, 0x00060005,
1504 	0x3c030, 0xffffffff, 0x00090008,
1505 	0x3c034, 0xffffffff, 0x00010000,
1506 	0x3c038, 0xffffffff, 0x00030002,
1507 	0x3c03c, 0xffffffff, 0x00040007,
1508 	0x3c040, 0xffffffff, 0x00060005,
1509 	0x3c044, 0xffffffff, 0x00090008,
1510 	0x3c048, 0xffffffff, 0x00010000,
1511 	0x3c04c, 0xffffffff, 0x00030002,
1512 	0x3c050, 0xffffffff, 0x00040007,
1513 	0x3c054, 0xffffffff, 0x00060005,
1514 	0x3c058, 0xffffffff, 0x00090008,
1515 	0x3c05c, 0xffffffff, 0x00010000,
1516 	0x3c060, 0xffffffff, 0x00030002,
1517 	0x3c064, 0xffffffff, 0x00040007,
1518 	0x3c068, 0xffffffff, 0x00060005,
1519 	0x3c06c, 0xffffffff, 0x00090008,
1520 	0x3c070, 0xffffffff, 0x00010000,
1521 	0x3c074, 0xffffffff, 0x00030002,
1522 	0x3c078, 0xffffffff, 0x00040007,
1523 	0x3c07c, 0xffffffff, 0x00060005,
1524 	0x3c080, 0xffffffff, 0x00090008,
1525 	0x3c084, 0xffffffff, 0x00010000,
1526 	0x3c088, 0xffffffff, 0x00030002,
1527 	0x3c08c, 0xffffffff, 0x00040007,
1528 	0x3c090, 0xffffffff, 0x00060005,
1529 	0x3c094, 0xffffffff, 0x00090008,
1530 	0x3c098, 0xffffffff, 0x00010000,
1531 	0x3c09c, 0xffffffff, 0x00030002,
1532 	0x3c0a0, 0xffffffff, 0x00040007,
1533 	0x3c0a4, 0xffffffff, 0x00060005,
1534 	0x3c0a8, 0xffffffff, 0x00090008,
1535 	0x3c0ac, 0xffffffff, 0x00010000,
1536 	0x3c0b0, 0xffffffff, 0x00030002,
1537 	0x3c0b4, 0xffffffff, 0x00040007,
1538 	0x3c0b8, 0xffffffff, 0x00060005,
1539 	0x3c0bc, 0xffffffff, 0x00090008,
1540 	0x3c0c0, 0xffffffff, 0x00010000,
1541 	0x3c0c4, 0xffffffff, 0x00030002,
1542 	0x3c0c8, 0xffffffff, 0x00040007,
1543 	0x3c0cc, 0xffffffff, 0x00060005,
1544 	0x3c0d0, 0xffffffff, 0x00090008,
1545 	0x3c0d4, 0xffffffff, 0x00010000,
1546 	0x3c0d8, 0xffffffff, 0x00030002,
1547 	0x3c0dc, 0xffffffff, 0x00040007,
1548 	0x3c0e0, 0xffffffff, 0x00060005,
1549 	0x3c0e4, 0xffffffff, 0x00090008,
1550 	0x3c0e8, 0xffffffff, 0x00010000,
1551 	0x3c0ec, 0xffffffff, 0x00030002,
1552 	0x3c0f0, 0xffffffff, 0x00040007,
1553 	0x3c0f4, 0xffffffff, 0x00060005,
1554 	0x3c0f8, 0xffffffff, 0x00090008,
1555 	0xc318, 0xffffffff, 0x00020200,
1556 	0x3350, 0xffffffff, 0x00000200,
1557 	0x15c0, 0xffffffff, 0x00000400,
1558 	0x55e8, 0xffffffff, 0x00000000,
1559 	0x2f50, 0xffffffff, 0x00000902,
1560 	0x3c000, 0xffffffff, 0x96940200,
1561 	0x8708, 0xffffffff, 0x00900100,
1562 	0xc424, 0xffffffff, 0x0020003f,
1563 	0x38, 0xffffffff, 0x0140001c,
1564 	0x3c, 0x000f0000, 0x000f0000,
1565 	0x220, 0xffffffff, 0xc060000c,
1566 	0x224, 0xc0000fff, 0x00000100,
1567 	0xf90, 0xffffffff, 0x00000100,
1568 	0xf98, 0x00000101, 0x00000000,
1569 	0x20a8, 0xffffffff, 0x00000104,
1570 	0x55e4, 0xff000fff, 0x00000100,
1571 	0x30cc, 0xc0000fff, 0x00000104,
1572 	0xc1e4, 0x00000001, 0x00000001,
1573 	0xd00c, 0xff000ff0, 0x00000100,
1574 	0xd80c, 0xff000ff0, 0x00000100
1575 };
1576 
1577 static const u32 godavari_golden_registers[] =
1578 {
1579 	0x55e4, 0xff607fff, 0xfc000100,
1580 	0x6ed8, 0x00010101, 0x00010000,
1581 	0x9830, 0xffffffff, 0x00000000,
1582 	0x98302, 0xf00fffff, 0x00000400,
1583 	0x6130, 0xffffffff, 0x00010000,
1584 	0x5bb0, 0x000000f0, 0x00000070,
1585 	0x5bc0, 0xf0311fff, 0x80300000,
1586 	0x98f8, 0x73773777, 0x12010001,
1587 	0x98fc, 0xffffffff, 0x00000010,
1588 	0x8030, 0x00001f0f, 0x0000100a,
1589 	0x2f48, 0x73773777, 0x12010001,
1590 	0x2408, 0x000fffff, 0x000c007f,
1591 	0x8a14, 0xf000003f, 0x00000007,
1592 	0x8b24, 0xffffffff, 0x00ff0fff,
1593 	0x30a04, 0x0000ff0f, 0x00000000,
1594 	0x28a4c, 0x07ffffff, 0x06000000,
1595 	0x4d8, 0x00000fff, 0x00000100,
1596 	0xd014, 0x00010000, 0x00810001,
1597 	0xd814, 0x00010000, 0x00810001,
1598 	0x3e78, 0x00000001, 0x00000002,
1599 	0xc768, 0x00000008, 0x00000008,
1600 	0xc770, 0x00000f00, 0x00000800,
1601 	0xc774, 0x00000f00, 0x00000800,
1602 	0xc798, 0x00ffffff, 0x00ff7fbf,
1603 	0xc79c, 0x00ffffff, 0x00ff7faf,
1604 	0x8c00, 0x000000ff, 0x00000001,
1605 	0x214f8, 0x01ff01ff, 0x00000002,
1606 	0x21498, 0x007ff800, 0x00200000,
1607 	0x2015c, 0xffffffff, 0x00000f40,
1608 	0x88c4, 0x001f3ae3, 0x00000082,
1609 	0x88d4, 0x0000001f, 0x00000010,
1610 	0x30934, 0xffffffff, 0x00000000
1611 };
1612 
1613 
1614 static void cik_init_golden_registers(struct radeon_device *rdev)
1615 {
1616 	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1617 	mutex_lock(&rdev->grbm_idx_mutex);
1618 	switch (rdev->family) {
1619 	case CHIP_BONAIRE:
1620 		radeon_program_register_sequence(rdev,
1621 						 bonaire_mgcg_cgcg_init,
1622 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1623 		radeon_program_register_sequence(rdev,
1624 						 bonaire_golden_registers,
1625 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1626 		radeon_program_register_sequence(rdev,
1627 						 bonaire_golden_common_registers,
1628 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1629 		radeon_program_register_sequence(rdev,
1630 						 bonaire_golden_spm_registers,
1631 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1632 		break;
1633 	case CHIP_KABINI:
1634 		radeon_program_register_sequence(rdev,
1635 						 kalindi_mgcg_cgcg_init,
1636 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1637 		radeon_program_register_sequence(rdev,
1638 						 kalindi_golden_registers,
1639 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1640 		radeon_program_register_sequence(rdev,
1641 						 kalindi_golden_common_registers,
1642 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1643 		radeon_program_register_sequence(rdev,
1644 						 kalindi_golden_spm_registers,
1645 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1646 		break;
1647 	case CHIP_MULLINS:
1648 		radeon_program_register_sequence(rdev,
1649 						 kalindi_mgcg_cgcg_init,
1650 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1651 		radeon_program_register_sequence(rdev,
1652 						 godavari_golden_registers,
1653 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1654 		radeon_program_register_sequence(rdev,
1655 						 kalindi_golden_common_registers,
1656 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1657 		radeon_program_register_sequence(rdev,
1658 						 kalindi_golden_spm_registers,
1659 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1660 		break;
1661 	case CHIP_KAVERI:
1662 		radeon_program_register_sequence(rdev,
1663 						 spectre_mgcg_cgcg_init,
1664 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1665 		radeon_program_register_sequence(rdev,
1666 						 spectre_golden_registers,
1667 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1668 		radeon_program_register_sequence(rdev,
1669 						 spectre_golden_common_registers,
1670 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1671 		radeon_program_register_sequence(rdev,
1672 						 spectre_golden_spm_registers,
1673 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1674 		break;
1675 	case CHIP_HAWAII:
1676 		radeon_program_register_sequence(rdev,
1677 						 hawaii_mgcg_cgcg_init,
1678 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1679 		radeon_program_register_sequence(rdev,
1680 						 hawaii_golden_registers,
1681 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1682 		radeon_program_register_sequence(rdev,
1683 						 hawaii_golden_common_registers,
1684 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1685 		radeon_program_register_sequence(rdev,
1686 						 hawaii_golden_spm_registers,
1687 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1688 		break;
1689 	default:
1690 		break;
1691 	}
1692 	mutex_unlock(&rdev->grbm_idx_mutex);
1693 }
1694 
1695 /**
1696  * cik_get_xclk - get the xclk
1697  *
1698  * @rdev: radeon_device pointer
1699  *
1700  * Returns the reference clock used by the gfx engine
1701  * (CIK).
1702  */
1703 u32 cik_get_xclk(struct radeon_device *rdev)
1704 {
1705 	u32 reference_clock = rdev->clock.spll.reference_freq;
1706 
1707 	if (rdev->flags & RADEON_IS_IGP) {
1708 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1709 			return reference_clock / 2;
1710 	} else {
1711 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1712 			return reference_clock / 4;
1713 	}
1714 	return reference_clock;
1715 }
1716 
1717 /**
1718  * cik_mm_rdoorbell - read a doorbell dword
1719  *
1720  * @rdev: radeon_device pointer
1721  * @index: doorbell index
1722  *
1723  * Returns the value in the doorbell aperture at the
1724  * requested doorbell index (CIK).
1725  */
1726 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1727 {
1728 	if (index < rdev->doorbell.num_doorbells) {
1729 		return readl(rdev->doorbell.ptr + index);
1730 	} else {
1731 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1732 		return 0;
1733 	}
1734 }
1735 
1736 /**
1737  * cik_mm_wdoorbell - write a doorbell dword
1738  *
1739  * @rdev: radeon_device pointer
1740  * @index: doorbell index
1741  * @v: value to write
1742  *
1743  * Writes @v to the doorbell aperture at the
1744  * requested doorbell index (CIK).
1745  */
1746 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1747 {
1748 	if (index < rdev->doorbell.num_doorbells) {
1749 		writel(v, rdev->doorbell.ptr + index);
1750 	} else {
1751 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1752 	}
1753 }
1754 
1755 #define BONAIRE_IO_MC_REGS_SIZE 36
1756 
1757 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1758 {
1759 	{0x00000070, 0x04400000},
1760 	{0x00000071, 0x80c01803},
1761 	{0x00000072, 0x00004004},
1762 	{0x00000073, 0x00000100},
1763 	{0x00000074, 0x00ff0000},
1764 	{0x00000075, 0x34000000},
1765 	{0x00000076, 0x08000014},
1766 	{0x00000077, 0x00cc08ec},
1767 	{0x00000078, 0x00000400},
1768 	{0x00000079, 0x00000000},
1769 	{0x0000007a, 0x04090000},
1770 	{0x0000007c, 0x00000000},
1771 	{0x0000007e, 0x4408a8e8},
1772 	{0x0000007f, 0x00000304},
1773 	{0x00000080, 0x00000000},
1774 	{0x00000082, 0x00000001},
1775 	{0x00000083, 0x00000002},
1776 	{0x00000084, 0xf3e4f400},
1777 	{0x00000085, 0x052024e3},
1778 	{0x00000087, 0x00000000},
1779 	{0x00000088, 0x01000000},
1780 	{0x0000008a, 0x1c0a0000},
1781 	{0x0000008b, 0xff010000},
1782 	{0x0000008d, 0xffffefff},
1783 	{0x0000008e, 0xfff3efff},
1784 	{0x0000008f, 0xfff3efbf},
1785 	{0x00000092, 0xf7ffffff},
1786 	{0x00000093, 0xffffff7f},
1787 	{0x00000095, 0x00101101},
1788 	{0x00000096, 0x00000fff},
1789 	{0x00000097, 0x00116fff},
1790 	{0x00000098, 0x60010000},
1791 	{0x00000099, 0x10010000},
1792 	{0x0000009a, 0x00006000},
1793 	{0x0000009b, 0x00001000},
1794 	{0x0000009f, 0x00b48000}
1795 };
1796 
1797 #define HAWAII_IO_MC_REGS_SIZE 22
1798 
1799 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1800 {
1801 	{0x0000007d, 0x40000000},
1802 	{0x0000007e, 0x40180304},
1803 	{0x0000007f, 0x0000ff00},
1804 	{0x00000081, 0x00000000},
1805 	{0x00000083, 0x00000800},
1806 	{0x00000086, 0x00000000},
1807 	{0x00000087, 0x00000100},
1808 	{0x00000088, 0x00020100},
1809 	{0x00000089, 0x00000000},
1810 	{0x0000008b, 0x00040000},
1811 	{0x0000008c, 0x00000100},
1812 	{0x0000008e, 0xff010000},
1813 	{0x00000090, 0xffffefff},
1814 	{0x00000091, 0xfff3efff},
1815 	{0x00000092, 0xfff3efbf},
1816 	{0x00000093, 0xf7ffffff},
1817 	{0x00000094, 0xffffff7f},
1818 	{0x00000095, 0x00000fff},
1819 	{0x00000096, 0x00116fff},
1820 	{0x00000097, 0x60010000},
1821 	{0x00000098, 0x10010000},
1822 	{0x0000009f, 0x00c79000}
1823 };
1824 
1825 
1826 /**
1827  * cik_srbm_select - select specific register instances
1828  *
1829  * @rdev: radeon_device pointer
1830  * @me: selected ME (micro engine)
1831  * @pipe: pipe
1832  * @queue: queue
1833  * @vmid: VMID
1834  *
1835  * Switches the currently active registers instances.  Some
1836  * registers are instanced per VMID, others are instanced per
1837  * me/pipe/queue combination.
1838  */
1839 static void cik_srbm_select(struct radeon_device *rdev,
1840 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1841 {
1842 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1843 			     MEID(me & 0x3) |
1844 			     VMID(vmid & 0xf) |
1845 			     QUEUEID(queue & 0x7));
1846 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1847 }
1848 
1849 /* ucode loading */
1850 /**
1851  * ci_mc_load_microcode - load MC ucode into the hw
1852  *
1853  * @rdev: radeon_device pointer
1854  *
1855  * Load the GDDR MC ucode into the hw (CIK).
1856  * Returns 0 on success, error on failure.
1857  */
1858 int ci_mc_load_microcode(struct radeon_device *rdev)
1859 {
1860 	const __be32 *fw_data = NULL;
1861 	const __le32 *new_fw_data = NULL;
1862 	u32 running, tmp;
1863 	u32 *io_mc_regs = NULL;
1864 	const __le32 *new_io_mc_regs = NULL;
1865 	int i, regs_size, ucode_size;
1866 
1867 	if (!rdev->mc_fw)
1868 		return -EINVAL;
1869 
1870 	if (rdev->new_fw) {
1871 		const struct mc_firmware_header_v1_0 *hdr =
1872 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1873 
1874 		radeon_ucode_print_mc_hdr(&hdr->header);
1875 
1876 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1877 		new_io_mc_regs = (const __le32 *)
1878 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1879 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1880 		new_fw_data = (const __le32 *)
1881 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1882 	} else {
1883 		ucode_size = rdev->mc_fw->datasize / 4;
1884 
1885 		switch (rdev->family) {
1886 		case CHIP_BONAIRE:
1887 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1888 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1889 			break;
1890 		case CHIP_HAWAII:
1891 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1892 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1893 			break;
1894 		default:
1895 			return -EINVAL;
1896 		}
1897 		fw_data = (const __be32 *)rdev->mc_fw->data;
1898 	}
1899 
1900 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1901 
1902 	if (running == 0) {
1903 		/* reset the engine and set to writable */
1904 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1905 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1906 
1907 		/* load mc io regs */
1908 		for (i = 0; i < regs_size; i++) {
1909 			if (rdev->new_fw) {
1910 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1911 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1912 			} else {
1913 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1914 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1915 			}
1916 		}
1917 
1918 		tmp = RREG32(MC_SEQ_MISC0);
1919 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1920 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1921 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1922 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1923 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1924 		}
1925 
1926 		/* load the MC ucode */
1927 		for (i = 0; i < ucode_size; i++) {
1928 			if (rdev->new_fw)
1929 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1930 			else
1931 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1932 		}
1933 
1934 		/* put the engine back into the active state */
1935 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1936 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1937 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1938 
1939 		/* wait for training to complete */
1940 		for (i = 0; i < rdev->usec_timeout; i++) {
1941 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1942 				break;
1943 			udelay(1);
1944 		}
1945 		for (i = 0; i < rdev->usec_timeout; i++) {
1946 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1947 				break;
1948 			udelay(1);
1949 		}
1950 	}
1951 
1952 	return 0;
1953 }
1954 
1955 /**
1956  * cik_init_microcode - load ucode images from disk
1957  *
1958  * @rdev: radeon_device pointer
1959  *
1960  * Use the firmware interface to load the ucode images into
1961  * the driver (not loaded into hw).
1962  * Returns 0 on success, error on failure.
1963  */
1964 static int cik_init_microcode(struct radeon_device *rdev)
1965 {
1966 	const char *chip_name;
1967 	const char *new_chip_name;
1968 	size_t pfp_req_size, me_req_size, ce_req_size,
1969 		mec_req_size, rlc_req_size, mc_req_size = 0,
1970 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1971 	char fw_name[30];
1972 	int new_fw = 0;
1973 	int err;
1974 	int num_fw;
1975 	bool new_smc = false;
1976 
1977 	DRM_DEBUG("\n");
1978 
1979 	switch (rdev->family) {
1980 	case CHIP_BONAIRE:
1981 		chip_name = "BONAIRE";
1982 		if ((rdev->pdev->revision == 0x80) ||
1983 		    (rdev->pdev->revision == 0x81) ||
1984 		    (rdev->pdev->device == 0x665f))
1985 			new_smc = true;
1986 		new_chip_name = "bonaire";
1987 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1988 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1989 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1990 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1991 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1992 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1993 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1994 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1995 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1996 		num_fw = 8;
1997 		break;
1998 	case CHIP_HAWAII:
1999 		chip_name = "HAWAII";
2000 		if (rdev->pdev->revision == 0x80)
2001 			new_smc = true;
2002 		new_chip_name = "hawaii";
2003 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2004 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2005 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2006 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2007 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2008 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2009 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2010 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2011 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2012 		num_fw = 8;
2013 		break;
2014 	case CHIP_KAVERI:
2015 		chip_name = "KAVERI";
2016 		new_chip_name = "kaveri";
2017 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2018 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2019 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2020 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2021 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2022 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2023 		num_fw = 7;
2024 		break;
2025 	case CHIP_KABINI:
2026 		chip_name = "KABINI";
2027 		new_chip_name = "kabini";
2028 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2029 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2030 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2031 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2032 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2033 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2034 		num_fw = 6;
2035 		break;
2036 	case CHIP_MULLINS:
2037 		chip_name = "MULLINS";
2038 		new_chip_name = "mullins";
2039 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2040 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2041 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2042 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2043 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2044 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2045 		num_fw = 6;
2046 		break;
2047 	default: BUG();
2048 	}
2049 
2050 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2051 
2052 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", new_chip_name);
2053 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2054 	if (err) {
2055 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
2056 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2057 		if (err)
2058 			goto out;
2059 		if (rdev->pfp_fw->datasize != pfp_req_size) {
2060 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2061 			       rdev->pfp_fw->datasize, fw_name);
2062 			err = -EINVAL;
2063 			goto out;
2064 		}
2065 	} else {
2066 		err = radeon_ucode_validate(rdev->pfp_fw);
2067 		if (err) {
2068 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2069 			       fw_name);
2070 			goto out;
2071 		} else {
2072 			new_fw++;
2073 		}
2074 	}
2075 
2076 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", new_chip_name);
2077 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2078 	if (err) {
2079 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
2080 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2081 		if (err)
2082 			goto out;
2083 		if (rdev->me_fw->datasize != me_req_size) {
2084 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2085 			       rdev->me_fw->datasize, fw_name);
2086 			err = -EINVAL;
2087 		}
2088 	} else {
2089 		err = radeon_ucode_validate(rdev->me_fw);
2090 		if (err) {
2091 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2092 			       fw_name);
2093 			goto out;
2094 		} else {
2095 			new_fw++;
2096 		}
2097 	}
2098 
2099 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", new_chip_name);
2100 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2101 	if (err) {
2102 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
2103 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2104 		if (err)
2105 			goto out;
2106 		if (rdev->ce_fw->datasize != ce_req_size) {
2107 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2108 			       rdev->ce_fw->datasize, fw_name);
2109 			err = -EINVAL;
2110 		}
2111 	} else {
2112 		err = radeon_ucode_validate(rdev->ce_fw);
2113 		if (err) {
2114 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2115 			       fw_name);
2116 			goto out;
2117 		} else {
2118 			new_fw++;
2119 		}
2120 	}
2121 
2122 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", new_chip_name);
2123 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2124 	if (err) {
2125 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", chip_name);
2126 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2127 		if (err)
2128 			goto out;
2129 		if (rdev->mec_fw->datasize != mec_req_size) {
2130 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2131 			       rdev->mec_fw->datasize, fw_name);
2132 			err = -EINVAL;
2133 		}
2134 	} else {
2135 		err = radeon_ucode_validate(rdev->mec_fw);
2136 		if (err) {
2137 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2138 			       fw_name);
2139 			goto out;
2140 		} else {
2141 			new_fw++;
2142 		}
2143 	}
2144 
2145 	if (rdev->family == CHIP_KAVERI) {
2146 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec2", new_chip_name);
2147 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2148 		if (err) {
2149 			goto out;
2150 		} else {
2151 			err = radeon_ucode_validate(rdev->mec2_fw);
2152 			if (err) {
2153 				goto out;
2154 			} else {
2155 				new_fw++;
2156 			}
2157 		}
2158 	}
2159 
2160 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", new_chip_name);
2161 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2162 	if (err) {
2163 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
2164 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2165 		if (err)
2166 			goto out;
2167 		if (rdev->rlc_fw->datasize != rlc_req_size) {
2168 			pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2169 			       rdev->rlc_fw->datasize, fw_name);
2170 			err = -EINVAL;
2171 		}
2172 	} else {
2173 		err = radeon_ucode_validate(rdev->rlc_fw);
2174 		if (err) {
2175 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2176 			       fw_name);
2177 			goto out;
2178 		} else {
2179 			new_fw++;
2180 		}
2181 	}
2182 
2183 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", new_chip_name);
2184 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2185 	if (err) {
2186 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", chip_name);
2187 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2188 		if (err)
2189 			goto out;
2190 		if (rdev->sdma_fw->datasize != sdma_req_size) {
2191 			pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2192 			       rdev->sdma_fw->datasize, fw_name);
2193 			err = -EINVAL;
2194 		}
2195 	} else {
2196 		err = radeon_ucode_validate(rdev->sdma_fw);
2197 		if (err) {
2198 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2199 			       fw_name);
2200 			goto out;
2201 		} else {
2202 			new_fw++;
2203 		}
2204 	}
2205 
2206 	/* No SMC, MC ucode on APUs */
2207 	if (!(rdev->flags & RADEON_IS_IGP)) {
2208 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", new_chip_name);
2209 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2210 		if (err) {
2211 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc2", chip_name);
2212 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2213 			if (err) {
2214 				ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
2215 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2216 				if (err)
2217 					goto out;
2218 			}
2219 			if ((rdev->mc_fw->datasize != mc_req_size) &&
2220 			    (rdev->mc_fw->datasize != mc2_req_size)){
2221 				pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2222 				       rdev->mc_fw->datasize, fw_name);
2223 				err = -EINVAL;
2224 			}
2225 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->datasize);
2226 		} else {
2227 			err = radeon_ucode_validate(rdev->mc_fw);
2228 			if (err) {
2229 				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2230 				       fw_name);
2231 				goto out;
2232 			} else {
2233 				new_fw++;
2234 			}
2235 		}
2236 
2237 		if (new_smc)
2238 			ksnprintf(fw_name, sizeof(fw_name), "amdgpufw_%s_k_smc", new_chip_name);
2239 		else
2240 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", new_chip_name);
2241 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2242 		if (err) {
2243 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
2244 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2245 			if (err) {
2246 				pr_err("smc: error loading firmware \"%s\"\n",
2247 				       fw_name);
2248 				release_firmware(rdev->smc_fw);
2249 				rdev->smc_fw = NULL;
2250 				err = 0;
2251 			} else if (rdev->smc_fw->datasize != smc_req_size) {
2252 				pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2253 				       rdev->smc_fw->datasize, fw_name);
2254 				err = -EINVAL;
2255 			}
2256 		} else {
2257 			err = radeon_ucode_validate(rdev->smc_fw);
2258 			if (err) {
2259 				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2260 				       fw_name);
2261 				goto out;
2262 			} else {
2263 				new_fw++;
2264 			}
2265 		}
2266 	}
2267 
2268 	if (new_fw == 0) {
2269 		rdev->new_fw = false;
2270 	} else if (new_fw < num_fw) {
2271 		pr_err("ci_fw: mixing new and old firmware!\n");
2272 		err = -EINVAL;
2273 	} else {
2274 		rdev->new_fw = true;
2275 	}
2276 
2277 out:
2278 	if (err) {
2279 		if (err != -EINVAL)
2280 			pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2281 			       fw_name);
2282 		release_firmware(rdev->pfp_fw);
2283 		rdev->pfp_fw = NULL;
2284 		release_firmware(rdev->me_fw);
2285 		rdev->me_fw = NULL;
2286 		release_firmware(rdev->ce_fw);
2287 		rdev->ce_fw = NULL;
2288 		release_firmware(rdev->mec_fw);
2289 		rdev->mec_fw = NULL;
2290 		release_firmware(rdev->mec2_fw);
2291 		rdev->mec2_fw = NULL;
2292 		release_firmware(rdev->rlc_fw);
2293 		rdev->rlc_fw = NULL;
2294 		release_firmware(rdev->sdma_fw);
2295 		rdev->sdma_fw = NULL;
2296 		release_firmware(rdev->mc_fw);
2297 		rdev->mc_fw = NULL;
2298 		release_firmware(rdev->smc_fw);
2299 		rdev->smc_fw = NULL;
2300 	}
2301 	return err;
2302 }
2303 
2304 /**
2305  * cik_fini_microcode - drop the firmwares image references
2306  *
2307  * @rdev: radeon_device pointer
2308  *
2309  * Drop the pfp, me, mec, mec2, rlc, sdma, mc, smc and ce firmware image references.
2310  * Called at driver shutdown.
2311  */
2312 static void cik_fini_microcode(struct radeon_device *rdev)
2313 {
2314 	release_firmware(rdev->pfp_fw);
2315 	rdev->pfp_fw = NULL;
2316 	release_firmware(rdev->me_fw);
2317 	rdev->me_fw = NULL;
2318 	release_firmware(rdev->ce_fw);
2319 	rdev->ce_fw = NULL;
2320 	release_firmware(rdev->mec_fw);
2321 	rdev->mec_fw = NULL;
2322 	release_firmware(rdev->mec2_fw);
2323 	rdev->mec2_fw = NULL;
2324 	release_firmware(rdev->rlc_fw);
2325 	rdev->rlc_fw = NULL;
2326 	release_firmware(rdev->sdma_fw);
2327 	rdev->sdma_fw = NULL;
2328 	release_firmware(rdev->mc_fw);
2329 	rdev->mc_fw = NULL;
2330 	release_firmware(rdev->smc_fw);
2331 	rdev->smc_fw = NULL;
2332 }
2333 
2334 /*
2335  * Core functions
2336  */
2337 /**
2338  * cik_tiling_mode_table_init - init the hw tiling table
2339  *
2340  * @rdev: radeon_device pointer
2341  *
2342  * Starting with SI, the tiling setup is done globally in a
2343  * set of 32 tiling modes.  Rather than selecting each set of
2344  * parameters per surface as on older asics, we just select
2345  * which index in the tiling table we want to use, and the
2346  * surface uses those parameters (CIK).
2347  */
2348 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2349 {
2350 	u32 *tile = rdev->config.cik.tile_mode_array;
2351 	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2352 	const u32 num_tile_mode_states =
2353 			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2354 	const u32 num_secondary_tile_mode_states =
2355 			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2356 	u32 reg_offset, split_equal_to_row_size;
2357 	u32 num_pipe_configs;
2358 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2359 		rdev->config.cik.max_shader_engines;
2360 
2361 	switch (rdev->config.cik.mem_row_size_in_kb) {
2362 	case 1:
2363 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2364 		break;
2365 	case 2:
2366 	default:
2367 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2368 		break;
2369 	case 4:
2370 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2371 		break;
2372 	}
2373 
2374 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2375 	if (num_pipe_configs > 8)
2376 		num_pipe_configs = 16;
2377 
2378 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2379 		tile[reg_offset] = 0;
2380 	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2381 		macrotile[reg_offset] = 0;
2382 
2383 	switch(num_pipe_configs) {
2384 	case 16:
2385 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2386 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2387 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2389 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2390 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2391 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2393 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2397 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2398 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2399 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2401 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2403 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404 			   TILE_SPLIT(split_equal_to_row_size));
2405 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2406 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2408 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2409 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2410 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2412 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2413 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2414 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415 			   TILE_SPLIT(split_equal_to_row_size));
2416 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2417 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2418 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2419 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2421 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2422 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2423 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2426 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2427 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2428 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2430 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2431 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2434 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2436 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2438 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2439 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2442 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2443 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2445 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2446 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2447 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2448 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2449 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2451 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2452 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2453 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2454 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2455 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2456 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2457 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2458 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2459 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2460 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2461 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2462 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2463 
2464 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2466 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2467 			   NUM_BANKS(ADDR_SURF_16_BANK));
2468 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2470 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2471 			   NUM_BANKS(ADDR_SURF_16_BANK));
2472 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2475 			   NUM_BANKS(ADDR_SURF_16_BANK));
2476 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2478 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2479 			   NUM_BANKS(ADDR_SURF_16_BANK));
2480 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483 			   NUM_BANKS(ADDR_SURF_8_BANK));
2484 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487 			   NUM_BANKS(ADDR_SURF_4_BANK));
2488 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2490 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2491 			   NUM_BANKS(ADDR_SURF_2_BANK));
2492 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2494 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2495 			   NUM_BANKS(ADDR_SURF_16_BANK));
2496 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2498 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2499 			   NUM_BANKS(ADDR_SURF_16_BANK));
2500 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2502 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2503 			    NUM_BANKS(ADDR_SURF_16_BANK));
2504 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2506 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2507 			    NUM_BANKS(ADDR_SURF_8_BANK));
2508 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2510 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2511 			    NUM_BANKS(ADDR_SURF_4_BANK));
2512 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2513 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2514 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2515 			    NUM_BANKS(ADDR_SURF_2_BANK));
2516 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2518 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2519 			    NUM_BANKS(ADDR_SURF_2_BANK));
2520 
2521 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2522 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2523 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2524 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2525 		break;
2526 
2527 	case 8:
2528 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2529 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2530 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2532 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2533 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2534 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2536 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2540 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2541 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2542 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2544 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2545 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2546 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547 			   TILE_SPLIT(split_equal_to_row_size));
2548 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2549 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2551 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2552 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2553 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2555 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2556 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2557 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2558 			   TILE_SPLIT(split_equal_to_row_size));
2559 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2560 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2561 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2562 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2564 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2566 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2569 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2570 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2571 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2573 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2574 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2576 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2577 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2579 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2581 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2584 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2585 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2586 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2587 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2588 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2589 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2592 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2594 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2595 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2596 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2597 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2598 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2599 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2600 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2601 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2602 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2603 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2604 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2605 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2606 
2607 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2609 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2610 				NUM_BANKS(ADDR_SURF_16_BANK));
2611 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2613 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2614 				NUM_BANKS(ADDR_SURF_16_BANK));
2615 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2617 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2618 				NUM_BANKS(ADDR_SURF_16_BANK));
2619 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2621 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2622 				NUM_BANKS(ADDR_SURF_16_BANK));
2623 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2625 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2626 				NUM_BANKS(ADDR_SURF_8_BANK));
2627 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2629 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2630 				NUM_BANKS(ADDR_SURF_4_BANK));
2631 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2633 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2634 				NUM_BANKS(ADDR_SURF_2_BANK));
2635 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2637 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2638 				NUM_BANKS(ADDR_SURF_16_BANK));
2639 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2641 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2642 				NUM_BANKS(ADDR_SURF_16_BANK));
2643 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2645 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2646 				NUM_BANKS(ADDR_SURF_16_BANK));
2647 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2649 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2650 				NUM_BANKS(ADDR_SURF_16_BANK));
2651 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2653 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2654 				NUM_BANKS(ADDR_SURF_8_BANK));
2655 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2657 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2658 				NUM_BANKS(ADDR_SURF_4_BANK));
2659 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2661 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2662 				NUM_BANKS(ADDR_SURF_2_BANK));
2663 
2664 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2665 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2666 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2667 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2668 		break;
2669 
2670 	case 4:
2671 		if (num_rbs == 4) {
2672 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2674 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2676 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2678 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2680 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2684 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2685 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2686 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2688 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2689 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2690 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691 			   TILE_SPLIT(split_equal_to_row_size));
2692 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2693 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2695 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2696 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2697 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2699 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2700 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2701 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2702 			   TILE_SPLIT(split_equal_to_row_size));
2703 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2704 			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
2705 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2706 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2708 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2709 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2710 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2713 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2714 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2715 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2716 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2717 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2718 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2720 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2721 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2723 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2724 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2725 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2726 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2729 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2730 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2732 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2733 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2735 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2736 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2738 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2739 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2740 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2742 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2743 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2744 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2745 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2746 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2747 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2748 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2750 
2751 		} else if (num_rbs < 4) {
2752 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2753 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2754 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2755 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2756 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2757 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2758 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2760 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2764 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2765 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2766 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2768 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2769 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2770 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2771 			   TILE_SPLIT(split_equal_to_row_size));
2772 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2773 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2774 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2775 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2776 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2777 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2779 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2780 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2781 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2782 			   TILE_SPLIT(split_equal_to_row_size));
2783 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2784 			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
2785 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2786 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2788 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2789 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2790 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2792 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2793 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2794 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2796 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2797 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2798 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2799 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2800 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2801 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2803 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2804 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2805 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2808 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2809 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2812 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2813 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2816 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2817 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2818 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2819 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2820 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2821 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2822 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2823 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2824 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2825 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2826 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2827 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2828 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2829 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2830 		}
2831 
2832 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2834 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835 				NUM_BANKS(ADDR_SURF_16_BANK));
2836 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2838 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2839 				NUM_BANKS(ADDR_SURF_16_BANK));
2840 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2842 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2843 				NUM_BANKS(ADDR_SURF_16_BANK));
2844 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2846 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2847 				NUM_BANKS(ADDR_SURF_16_BANK));
2848 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2851 				NUM_BANKS(ADDR_SURF_16_BANK));
2852 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855 				NUM_BANKS(ADDR_SURF_8_BANK));
2856 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2858 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2859 				NUM_BANKS(ADDR_SURF_4_BANK));
2860 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2861 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2862 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2863 				NUM_BANKS(ADDR_SURF_16_BANK));
2864 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2865 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2866 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2867 				NUM_BANKS(ADDR_SURF_16_BANK));
2868 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2869 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2870 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2871 				NUM_BANKS(ADDR_SURF_16_BANK));
2872 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2873 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2874 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2875 				NUM_BANKS(ADDR_SURF_16_BANK));
2876 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2878 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2879 				NUM_BANKS(ADDR_SURF_16_BANK));
2880 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2882 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2883 				NUM_BANKS(ADDR_SURF_8_BANK));
2884 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2885 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2886 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2887 				NUM_BANKS(ADDR_SURF_4_BANK));
2888 
2889 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2890 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2891 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2892 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2893 		break;
2894 
2895 	case 2:
2896 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2897 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2898 			   PIPE_CONFIG(ADDR_SURF_P2) |
2899 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2900 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2902 			   PIPE_CONFIG(ADDR_SURF_P2) |
2903 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2904 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906 			   PIPE_CONFIG(ADDR_SURF_P2) |
2907 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2908 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2909 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2910 			   PIPE_CONFIG(ADDR_SURF_P2) |
2911 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2912 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2913 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2914 			   PIPE_CONFIG(ADDR_SURF_P2) |
2915 			   TILE_SPLIT(split_equal_to_row_size));
2916 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2917 			   PIPE_CONFIG(ADDR_SURF_P2) |
2918 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2919 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2920 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2921 			   PIPE_CONFIG(ADDR_SURF_P2) |
2922 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2923 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2924 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2925 			   PIPE_CONFIG(ADDR_SURF_P2) |
2926 			   TILE_SPLIT(split_equal_to_row_size));
2927 		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2928 			   PIPE_CONFIG(ADDR_SURF_P2);
2929 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2930 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2931 			   PIPE_CONFIG(ADDR_SURF_P2));
2932 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2933 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2934 			    PIPE_CONFIG(ADDR_SURF_P2) |
2935 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2937 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2938 			    PIPE_CONFIG(ADDR_SURF_P2) |
2939 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2941 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2942 			    PIPE_CONFIG(ADDR_SURF_P2) |
2943 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2945 			    PIPE_CONFIG(ADDR_SURF_P2) |
2946 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2947 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2948 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2949 			    PIPE_CONFIG(ADDR_SURF_P2) |
2950 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2952 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2953 			    PIPE_CONFIG(ADDR_SURF_P2) |
2954 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2956 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2957 			    PIPE_CONFIG(ADDR_SURF_P2) |
2958 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2960 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2961 			    PIPE_CONFIG(ADDR_SURF_P2));
2962 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2963 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2964 			    PIPE_CONFIG(ADDR_SURF_P2) |
2965 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2966 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2967 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2968 			    PIPE_CONFIG(ADDR_SURF_P2) |
2969 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2970 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2971 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2972 			    PIPE_CONFIG(ADDR_SURF_P2) |
2973 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2974 
2975 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2976 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2977 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978 				NUM_BANKS(ADDR_SURF_16_BANK));
2979 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2980 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2981 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2982 				NUM_BANKS(ADDR_SURF_16_BANK));
2983 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2984 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2985 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2986 				NUM_BANKS(ADDR_SURF_16_BANK));
2987 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2988 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2989 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2990 				NUM_BANKS(ADDR_SURF_16_BANK));
2991 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2992 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2993 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994 				NUM_BANKS(ADDR_SURF_16_BANK));
2995 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2996 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2997 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2998 				NUM_BANKS(ADDR_SURF_16_BANK));
2999 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3000 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3001 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3002 				NUM_BANKS(ADDR_SURF_8_BANK));
3003 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3004 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3005 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3006 				NUM_BANKS(ADDR_SURF_16_BANK));
3007 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3008 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3009 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3010 				NUM_BANKS(ADDR_SURF_16_BANK));
3011 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3012 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3013 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3014 				NUM_BANKS(ADDR_SURF_16_BANK));
3015 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3016 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3017 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3018 				NUM_BANKS(ADDR_SURF_16_BANK));
3019 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3021 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3022 				NUM_BANKS(ADDR_SURF_16_BANK));
3023 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3025 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3026 				NUM_BANKS(ADDR_SURF_16_BANK));
3027 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3028 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3029 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3030 				NUM_BANKS(ADDR_SURF_8_BANK));
3031 
3032 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3033 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3034 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3035 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3036 		break;
3037 
3038 	default:
3039 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3040 	}
3041 }
3042 
3043 /**
3044  * cik_select_se_sh - select which SE, SH to address
3045  *
3046  * @rdev: radeon_device pointer
3047  * @se_num: shader engine to address
3048  * @sh_num: sh block to address
3049  *
3050  * Select which SE, SH combinations to address. Certain
3051  * registers are instanced per SE or SH.  0xffffffff means
3052  * broadcast to all SEs or SHs (CIK).
3053  */
3054 static void cik_select_se_sh(struct radeon_device *rdev,
3055 			     u32 se_num, u32 sh_num)
3056 {
3057 	u32 data = INSTANCE_BROADCAST_WRITES;
3058 
3059 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3060 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3061 	else if (se_num == 0xffffffff)
3062 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3063 	else if (sh_num == 0xffffffff)
3064 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3065 	else
3066 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3067 	WREG32(GRBM_GFX_INDEX, data);
3068 }
3069 
3070 /**
3071  * cik_create_bitmask - create a bitmask
3072  *
3073  * @bit_width: length of the mask
3074  *
3075  * create a variable length bit mask (CIK).
3076  * Returns the bitmask.
3077  */
3078 static u32 cik_create_bitmask(u32 bit_width)
3079 {
3080 	u32 i, mask = 0;
3081 
3082 	for (i = 0; i < bit_width; i++) {
3083 		mask <<= 1;
3084 		mask |= 1;
3085 	}
3086 	return mask;
3087 }
3088 
3089 /**
3090  * cik_get_rb_disabled - computes the mask of disabled RBs
3091  *
3092  * @rdev: radeon_device pointer
3093  * @max_rb_num: max RBs (render backends) for the asic
3094  * @se_num: number of SEs (shader engines) for the asic
3095  * @sh_per_se: number of SH blocks per SE for the asic
3096  *
3097  * Calculates the bitmask of disabled RBs (CIK).
3098  * Returns the disabled RB bitmask.
3099  */
3100 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3101 			      u32 max_rb_num_per_se,
3102 			      u32 sh_per_se)
3103 {
3104 	u32 data, mask;
3105 
3106 	data = RREG32(CC_RB_BACKEND_DISABLE);
3107 	if (data & 1)
3108 		data &= BACKEND_DISABLE_MASK;
3109 	else
3110 		data = 0;
3111 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3112 
3113 	data >>= BACKEND_DISABLE_SHIFT;
3114 
3115 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3116 
3117 	return data & mask;
3118 }
3119 
3120 /**
3121  * cik_setup_rb - setup the RBs on the asic
3122  *
3123  * @rdev: radeon_device pointer
3124  * @se_num: number of SEs (shader engines) for the asic
3125  * @sh_per_se: number of SH blocks per SE for the asic
3126  * @max_rb_num: max RBs (render backends) for the asic
3127  *
3128  * Configures per-SE/SH RB registers (CIK).
3129  */
3130 static void cik_setup_rb(struct radeon_device *rdev,
3131 			 u32 se_num, u32 sh_per_se,
3132 			 u32 max_rb_num_per_se)
3133 {
3134 	int i, j;
3135 	u32 data, mask;
3136 	u32 disabled_rbs = 0;
3137 	u32 enabled_rbs = 0;
3138 
3139 	mutex_lock(&rdev->grbm_idx_mutex);
3140 	for (i = 0; i < se_num; i++) {
3141 		for (j = 0; j < sh_per_se; j++) {
3142 			cik_select_se_sh(rdev, i, j);
3143 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3144 			if (rdev->family == CHIP_HAWAII)
3145 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3146 			else
3147 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3148 		}
3149 	}
3150 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3151 	mutex_unlock(&rdev->grbm_idx_mutex);
3152 
3153 	mask = 1;
3154 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3155 		if (!(disabled_rbs & mask))
3156 			enabled_rbs |= mask;
3157 		mask <<= 1;
3158 	}
3159 
3160 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3161 
3162 	mutex_lock(&rdev->grbm_idx_mutex);
3163 	for (i = 0; i < se_num; i++) {
3164 		cik_select_se_sh(rdev, i, 0xffffffff);
3165 		data = 0;
3166 		for (j = 0; j < sh_per_se; j++) {
3167 			switch (enabled_rbs & 3) {
3168 			case 0:
3169 				if (j == 0)
3170 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3171 				else
3172 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3173 				break;
3174 			case 1:
3175 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3176 				break;
3177 			case 2:
3178 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3179 				break;
3180 			case 3:
3181 			default:
3182 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3183 				break;
3184 			}
3185 			enabled_rbs >>= 2;
3186 		}
3187 		WREG32(PA_SC_RASTER_CONFIG, data);
3188 	}
3189 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3190 	mutex_unlock(&rdev->grbm_idx_mutex);
3191 }
3192 
3193 /**
3194  * cik_gpu_init - setup the 3D engine
3195  *
3196  * @rdev: radeon_device pointer
3197  *
3198  * Configures the 3D engine and tiling configuration
3199  * registers so that the 3D engine is usable.
3200  */
3201 static void cik_gpu_init(struct radeon_device *rdev)
3202 {
3203 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3204 	u32 mc_shared_chmap, mc_arb_ramcfg;
3205 	u32 hdp_host_path_cntl;
3206 	u32 tmp;
3207 	int i, j;
3208 
3209 	switch (rdev->family) {
3210 	case CHIP_BONAIRE:
3211 		rdev->config.cik.max_shader_engines = 2;
3212 		rdev->config.cik.max_tile_pipes = 4;
3213 		rdev->config.cik.max_cu_per_sh = 7;
3214 		rdev->config.cik.max_sh_per_se = 1;
3215 		rdev->config.cik.max_backends_per_se = 2;
3216 		rdev->config.cik.max_texture_channel_caches = 4;
3217 		rdev->config.cik.max_gprs = 256;
3218 		rdev->config.cik.max_gs_threads = 32;
3219 		rdev->config.cik.max_hw_contexts = 8;
3220 
3221 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3222 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3223 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3224 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3225 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3226 		break;
3227 	case CHIP_HAWAII:
3228 		rdev->config.cik.max_shader_engines = 4;
3229 		rdev->config.cik.max_tile_pipes = 16;
3230 		rdev->config.cik.max_cu_per_sh = 11;
3231 		rdev->config.cik.max_sh_per_se = 1;
3232 		rdev->config.cik.max_backends_per_se = 4;
3233 		rdev->config.cik.max_texture_channel_caches = 16;
3234 		rdev->config.cik.max_gprs = 256;
3235 		rdev->config.cik.max_gs_threads = 32;
3236 		rdev->config.cik.max_hw_contexts = 8;
3237 
3238 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3239 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3240 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3241 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3242 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3243 		break;
3244 	case CHIP_KAVERI:
3245 		rdev->config.cik.max_shader_engines = 1;
3246 		rdev->config.cik.max_tile_pipes = 4;
3247 		rdev->config.cik.max_cu_per_sh = 8;
3248 		rdev->config.cik.max_backends_per_se = 2;
3249 		rdev->config.cik.max_sh_per_se = 1;
3250 		rdev->config.cik.max_texture_channel_caches = 4;
3251 		rdev->config.cik.max_gprs = 256;
3252 		rdev->config.cik.max_gs_threads = 16;
3253 		rdev->config.cik.max_hw_contexts = 8;
3254 
3255 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3256 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3257 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3258 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3259 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3260 		break;
3261 	case CHIP_KABINI:
3262 	case CHIP_MULLINS:
3263 	default:
3264 		rdev->config.cik.max_shader_engines = 1;
3265 		rdev->config.cik.max_tile_pipes = 2;
3266 		rdev->config.cik.max_cu_per_sh = 2;
3267 		rdev->config.cik.max_sh_per_se = 1;
3268 		rdev->config.cik.max_backends_per_se = 1;
3269 		rdev->config.cik.max_texture_channel_caches = 2;
3270 		rdev->config.cik.max_gprs = 256;
3271 		rdev->config.cik.max_gs_threads = 16;
3272 		rdev->config.cik.max_hw_contexts = 8;
3273 
3274 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3275 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3276 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3277 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3278 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3279 		break;
3280 	}
3281 
3282 	/* Initialize HDP */
3283 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3284 		WREG32((0x2c14 + j), 0x00000000);
3285 		WREG32((0x2c18 + j), 0x00000000);
3286 		WREG32((0x2c1c + j), 0x00000000);
3287 		WREG32((0x2c20 + j), 0x00000000);
3288 		WREG32((0x2c24 + j), 0x00000000);
3289 	}
3290 
3291 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3292 	WREG32(SRBM_INT_CNTL, 0x1);
3293 	WREG32(SRBM_INT_ACK, 0x1);
3294 
3295 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3296 
3297 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3298 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3299 
3300 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3301 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3302 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3303 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3304 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3305 		rdev->config.cik.mem_row_size_in_kb = 4;
3306 	/* XXX use MC settings? */
3307 	rdev->config.cik.shader_engine_tile_size = 32;
3308 	rdev->config.cik.num_gpus = 1;
3309 	rdev->config.cik.multi_gpu_tile_size = 64;
3310 
3311 	/* fix up row size */
3312 	gb_addr_config &= ~ROW_SIZE_MASK;
3313 	switch (rdev->config.cik.mem_row_size_in_kb) {
3314 	case 1:
3315 	default:
3316 		gb_addr_config |= ROW_SIZE(0);
3317 		break;
3318 	case 2:
3319 		gb_addr_config |= ROW_SIZE(1);
3320 		break;
3321 	case 4:
3322 		gb_addr_config |= ROW_SIZE(2);
3323 		break;
3324 	}
3325 
3326 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3327 	 * not have bank info, so create a custom tiling dword.
3328 	 * bits 3:0   num_pipes
3329 	 * bits 7:4   num_banks
3330 	 * bits 11:8  group_size
3331 	 * bits 15:12 row_size
3332 	 */
3333 	rdev->config.cik.tile_config = 0;
3334 	switch (rdev->config.cik.num_tile_pipes) {
3335 	case 1:
3336 		rdev->config.cik.tile_config |= (0 << 0);
3337 		break;
3338 	case 2:
3339 		rdev->config.cik.tile_config |= (1 << 0);
3340 		break;
3341 	case 4:
3342 		rdev->config.cik.tile_config |= (2 << 0);
3343 		break;
3344 	case 8:
3345 	default:
3346 		/* XXX what about 12? */
3347 		rdev->config.cik.tile_config |= (3 << 0);
3348 		break;
3349 	}
3350 	rdev->config.cik.tile_config |=
3351 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3352 	rdev->config.cik.tile_config |=
3353 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3354 	rdev->config.cik.tile_config |=
3355 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3356 
3357 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3358 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3359 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3360 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3361 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3362 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3363 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3364 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3365 
3366 	cik_tiling_mode_table_init(rdev);
3367 
3368 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3369 		     rdev->config.cik.max_sh_per_se,
3370 		     rdev->config.cik.max_backends_per_se);
3371 
3372 	rdev->config.cik.active_cus = 0;
3373 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3374 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3375 			rdev->config.cik.active_cus +=
3376 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3377 		}
3378 	}
3379 
3380 	/* set HW defaults for 3D engine */
3381 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3382 
3383 	mutex_lock(&rdev->grbm_idx_mutex);
3384 	/*
3385 	 * making sure that the following register writes will be broadcasted
3386 	 * to all the shaders
3387 	 */
3388 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3389 	WREG32(SX_DEBUG_1, 0x20);
3390 
3391 	WREG32(TA_CNTL_AUX, 0x00010000);
3392 
3393 	tmp = RREG32(SPI_CONFIG_CNTL);
3394 	tmp |= 0x03000000;
3395 	WREG32(SPI_CONFIG_CNTL, tmp);
3396 
3397 	WREG32(SQ_CONFIG, 1);
3398 
3399 	WREG32(DB_DEBUG, 0);
3400 
3401 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3402 	tmp |= 0x00000400;
3403 	WREG32(DB_DEBUG2, tmp);
3404 
3405 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3406 	tmp |= 0x00020200;
3407 	WREG32(DB_DEBUG3, tmp);
3408 
3409 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3410 	tmp |= 0x00018208;
3411 	WREG32(CB_HW_CONTROL, tmp);
3412 
3413 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3414 
3415 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3416 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3417 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3418 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3419 
3420 	WREG32(VGT_NUM_INSTANCES, 1);
3421 
3422 	WREG32(CP_PERFMON_CNTL, 0);
3423 
3424 	WREG32(SQ_CONFIG, 0);
3425 
3426 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3427 					  FORCE_EOV_MAX_REZ_CNT(255)));
3428 
3429 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3430 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3431 
3432 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3433 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3434 
3435 	tmp = RREG32(HDP_MISC_CNTL);
3436 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3437 	WREG32(HDP_MISC_CNTL, tmp);
3438 
3439 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3440 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3441 
3442 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3443 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3444 	mutex_unlock(&rdev->grbm_idx_mutex);
3445 
3446 	udelay(50);
3447 }
3448 
3449 /*
3450  * GPU scratch registers helpers function.
3451  */
3452 /**
3453  * cik_scratch_init - setup driver info for CP scratch regs
3454  *
3455  * @rdev: radeon_device pointer
3456  *
3457  * Set up the number and offset of the CP scratch registers.
3458  * NOTE: use of CP scratch registers is a legacy inferface and
3459  * is not used by default on newer asics (r6xx+).  On newer asics,
3460  * memory buffers are used for fences rather than scratch regs.
3461  */
3462 static void cik_scratch_init(struct radeon_device *rdev)
3463 {
3464 	int i;
3465 
3466 	rdev->scratch.num_reg = 7;
3467 	rdev->scratch.reg_base = SCRATCH_REG0;
3468 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3469 		rdev->scratch.free[i] = true;
3470 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3471 	}
3472 }
3473 
3474 /**
3475  * cik_ring_test - basic gfx ring test
3476  *
3477  * @rdev: radeon_device pointer
3478  * @ring: radeon_ring structure holding ring information
3479  *
3480  * Allocate a scratch register and write to it using the gfx ring (CIK).
3481  * Provides a basic gfx ring test to verify that the ring is working.
3482  * Used by cik_cp_gfx_resume();
3483  * Returns 0 on success, error on failure.
3484  */
3485 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3486 {
3487 	uint32_t scratch;
3488 	uint32_t tmp = 0;
3489 	unsigned i;
3490 	int r;
3491 
3492 	r = radeon_scratch_get(rdev, &scratch);
3493 	if (r) {
3494 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3495 		return r;
3496 	}
3497 	WREG32(scratch, 0xCAFEDEAD);
3498 	r = radeon_ring_lock(rdev, ring, 3);
3499 	if (r) {
3500 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3501 		radeon_scratch_free(rdev, scratch);
3502 		return r;
3503 	}
3504 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3505 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3506 	radeon_ring_write(ring, 0xDEADBEEF);
3507 	radeon_ring_unlock_commit(rdev, ring, false);
3508 
3509 	for (i = 0; i < rdev->usec_timeout; i++) {
3510 		tmp = RREG32(scratch);
3511 		if (tmp == 0xDEADBEEF)
3512 			break;
3513 		DRM_UDELAY(1);
3514 	}
3515 	if (i < rdev->usec_timeout) {
3516 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3517 	} else {
3518 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3519 			  ring->idx, scratch, tmp);
3520 		r = -EINVAL;
3521 	}
3522 	radeon_scratch_free(rdev, scratch);
3523 	return r;
3524 }
3525 
3526 /**
3527  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3528  *
3529  * @rdev: radeon_device pointer
3530  * @ridx: radeon ring index
3531  *
3532  * Emits an hdp flush on the cp.
3533  */
3534 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3535 				       int ridx)
3536 {
3537 	struct radeon_ring *ring = &rdev->ring[ridx];
3538 	u32 ref_and_mask;
3539 
3540 	switch (ring->idx) {
3541 	case CAYMAN_RING_TYPE_CP1_INDEX:
3542 	case CAYMAN_RING_TYPE_CP2_INDEX:
3543 	default:
3544 		switch (ring->me) {
3545 		case 0:
3546 			ref_and_mask = CP2 << ring->pipe;
3547 			break;
3548 		case 1:
3549 			ref_and_mask = CP6 << ring->pipe;
3550 			break;
3551 		default:
3552 			return;
3553 		}
3554 		break;
3555 	case RADEON_RING_TYPE_GFX_INDEX:
3556 		ref_and_mask = CP0;
3557 		break;
3558 	}
3559 
3560 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3561 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3562 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3563 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3564 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3565 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3566 	radeon_ring_write(ring, ref_and_mask);
3567 	radeon_ring_write(ring, ref_and_mask);
3568 	radeon_ring_write(ring, 0x20); /* poll interval */
3569 }
3570 
3571 /**
3572  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3573  *
3574  * @rdev: radeon_device pointer
3575  * @fence: radeon fence object
3576  *
3577  * Emits a fence sequnce number on the gfx ring and flushes
3578  * GPU caches.
3579  */
3580 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3581 			     struct radeon_fence *fence)
3582 {
3583 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3584 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3585 
3586 	/* Workaround for cache flush problems. First send a dummy EOP
3587 	 * event down the pipe with seq one below.
3588 	 */
3589 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3590 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3591 				 EOP_TC_ACTION_EN |
3592 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3593 				 EVENT_INDEX(5)));
3594 	radeon_ring_write(ring, addr & 0xfffffffc);
3595 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3596 				DATA_SEL(1) | INT_SEL(0));
3597 	radeon_ring_write(ring, fence->seq - 1);
3598 	radeon_ring_write(ring, 0);
3599 
3600 	/* Then send the real EOP event down the pipe. */
3601 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3602 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3603 				 EOP_TC_ACTION_EN |
3604 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3605 				 EVENT_INDEX(5)));
3606 	radeon_ring_write(ring, addr & 0xfffffffc);
3607 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3608 	radeon_ring_write(ring, fence->seq);
3609 	radeon_ring_write(ring, 0);
3610 }
3611 
3612 /**
3613  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3614  *
3615  * @rdev: radeon_device pointer
3616  * @fence: radeon fence object
3617  *
3618  * Emits a fence sequnce number on the compute ring and flushes
3619  * GPU caches.
3620  */
3621 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3622 				 struct radeon_fence *fence)
3623 {
3624 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3625 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3626 
3627 	/* RELEASE_MEM - flush caches, send int */
3628 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3629 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3630 				 EOP_TC_ACTION_EN |
3631 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3632 				 EVENT_INDEX(5)));
3633 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3634 	radeon_ring_write(ring, addr & 0xfffffffc);
3635 	radeon_ring_write(ring, upper_32_bits(addr));
3636 	radeon_ring_write(ring, fence->seq);
3637 	radeon_ring_write(ring, 0);
3638 }
3639 
3640 /**
3641  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3642  *
3643  * @rdev: radeon_device pointer
3644  * @ring: radeon ring buffer object
3645  * @semaphore: radeon semaphore object
3646  * @emit_wait: Is this a sempahore wait?
3647  *
3648  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3649  * from running ahead of semaphore waits.
3650  */
3651 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3652 			     struct radeon_ring *ring,
3653 			     struct radeon_semaphore *semaphore,
3654 			     bool emit_wait)
3655 {
3656 	uint64_t addr = semaphore->gpu_addr;
3657 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3658 
3659 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3660 	radeon_ring_write(ring, lower_32_bits(addr));
3661 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3662 
3663 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3664 		/* Prevent the PFP from running ahead of the semaphore wait */
3665 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3666 		radeon_ring_write(ring, 0x0);
3667 	}
3668 
3669 	return true;
3670 }
3671 
3672 /**
3673  * cik_copy_cpdma - copy pages using the CP DMA engine
3674  *
3675  * @rdev: radeon_device pointer
3676  * @src_offset: src GPU address
3677  * @dst_offset: dst GPU address
3678  * @num_gpu_pages: number of GPU pages to xfer
3679  * @resv: reservation object to sync to
3680  *
3681  * Copy GPU paging using the CP DMA engine (CIK+).
3682  * Used by the radeon ttm implementation to move pages if
3683  * registered as the asic copy callback.
3684  */
3685 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3686 				    uint64_t src_offset, uint64_t dst_offset,
3687 				    unsigned num_gpu_pages,
3688 				    struct reservation_object *resv)
3689 {
3690 	struct radeon_fence *fence;
3691 	struct radeon_sync sync;
3692 	int ring_index = rdev->asic->copy.blit_ring_index;
3693 	struct radeon_ring *ring = &rdev->ring[ring_index];
3694 	u32 size_in_bytes, cur_size_in_bytes, control;
3695 	int i, num_loops;
3696 	int r = 0;
3697 
3698 	radeon_sync_create(&sync);
3699 
3700 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3701 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3702 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3703 	if (r) {
3704 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3705 		radeon_sync_free(rdev, &sync, NULL);
3706 		return ERR_PTR(r);
3707 	}
3708 
3709 	radeon_sync_resv(rdev, &sync, resv, false);
3710 	radeon_sync_rings(rdev, &sync, ring->idx);
3711 
3712 	for (i = 0; i < num_loops; i++) {
3713 		cur_size_in_bytes = size_in_bytes;
3714 		if (cur_size_in_bytes > 0x1fffff)
3715 			cur_size_in_bytes = 0x1fffff;
3716 		size_in_bytes -= cur_size_in_bytes;
3717 		control = 0;
3718 		if (size_in_bytes == 0)
3719 			control |= PACKET3_DMA_DATA_CP_SYNC;
3720 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3721 		radeon_ring_write(ring, control);
3722 		radeon_ring_write(ring, lower_32_bits(src_offset));
3723 		radeon_ring_write(ring, upper_32_bits(src_offset));
3724 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3725 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3726 		radeon_ring_write(ring, cur_size_in_bytes);
3727 		src_offset += cur_size_in_bytes;
3728 		dst_offset += cur_size_in_bytes;
3729 	}
3730 
3731 	r = radeon_fence_emit(rdev, &fence, ring->idx);
3732 	if (r) {
3733 		radeon_ring_unlock_undo(rdev, ring);
3734 		radeon_sync_free(rdev, &sync, NULL);
3735 		return ERR_PTR(r);
3736 	}
3737 
3738 	radeon_ring_unlock_commit(rdev, ring, false);
3739 	radeon_sync_free(rdev, &sync, fence);
3740 
3741 	return fence;
3742 }
3743 
3744 /*
3745  * IB stuff
3746  */
3747 /**
3748  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3749  *
3750  * @rdev: radeon_device pointer
3751  * @ib: radeon indirect buffer object
3752  *
3753  * Emits a DE (drawing engine) or CE (constant engine) IB
3754  * on the gfx ring.  IBs are usually generated by userspace
3755  * acceleration drivers and submitted to the kernel for
3756  * scheduling on the ring.  This function schedules the IB
3757  * on the gfx ring for execution by the GPU.
3758  */
3759 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3760 {
3761 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3762 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3763 	u32 header, control = INDIRECT_BUFFER_VALID;
3764 
3765 	if (ib->is_const_ib) {
3766 		/* set switch buffer packet before const IB */
3767 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3768 		radeon_ring_write(ring, 0);
3769 
3770 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3771 	} else {
3772 		u32 next_rptr;
3773 		if (ring->rptr_save_reg) {
3774 			next_rptr = ring->wptr + 3 + 4;
3775 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3776 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3777 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3778 			radeon_ring_write(ring, next_rptr);
3779 		} else if (rdev->wb.enabled) {
3780 			next_rptr = ring->wptr + 5 + 4;
3781 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3782 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3783 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3784 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3785 			radeon_ring_write(ring, next_rptr);
3786 		}
3787 
3788 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3789 	}
3790 
3791 	control |= ib->length_dw | (vm_id << 24);
3792 
3793 	radeon_ring_write(ring, header);
3794 	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3795 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3796 	radeon_ring_write(ring, control);
3797 }
3798 
3799 /**
3800  * cik_ib_test - basic gfx ring IB test
3801  *
3802  * @rdev: radeon_device pointer
3803  * @ring: radeon_ring structure holding ring information
3804  *
3805  * Allocate an IB and execute it on the gfx ring (CIK).
3806  * Provides a basic gfx ring test to verify that IBs are working.
3807  * Returns 0 on success, error on failure.
3808  */
3809 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3810 {
3811 	struct radeon_ib ib;
3812 	uint32_t scratch;
3813 	uint32_t tmp = 0;
3814 	unsigned i;
3815 	int r;
3816 
3817 	r = radeon_scratch_get(rdev, &scratch);
3818 	if (r) {
3819 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3820 		return r;
3821 	}
3822 	WREG32(scratch, 0xCAFEDEAD);
3823 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3824 	if (r) {
3825 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3826 		radeon_scratch_free(rdev, scratch);
3827 		return r;
3828 	}
3829 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3830 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3831 	ib.ptr[2] = 0xDEADBEEF;
3832 	ib.length_dw = 3;
3833 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3834 	if (r) {
3835 		radeon_scratch_free(rdev, scratch);
3836 		radeon_ib_free(rdev, &ib);
3837 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3838 		return r;
3839 	}
3840 	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3841 		RADEON_USEC_IB_TEST_TIMEOUT));
3842 	if (r < 0) {
3843 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3844 		radeon_scratch_free(rdev, scratch);
3845 		radeon_ib_free(rdev, &ib);
3846 		return r;
3847 	} else if (r == 0) {
3848 		DRM_ERROR("radeon: fence wait timed out.\n");
3849 		radeon_scratch_free(rdev, scratch);
3850 		radeon_ib_free(rdev, &ib);
3851 		return -ETIMEDOUT;
3852 	}
3853 	r = 0;
3854 	for (i = 0; i < rdev->usec_timeout; i++) {
3855 		tmp = RREG32(scratch);
3856 		if (tmp == 0xDEADBEEF)
3857 			break;
3858 		DRM_UDELAY(1);
3859 	}
3860 	if (i < rdev->usec_timeout) {
3861 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3862 	} else {
3863 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3864 			  scratch, tmp);
3865 		r = -EINVAL;
3866 	}
3867 	radeon_scratch_free(rdev, scratch);
3868 	radeon_ib_free(rdev, &ib);
3869 	return r;
3870 }
3871 
3872 /*
3873  * CP.
3874  * On CIK, gfx and compute now have independant command processors.
3875  *
3876  * GFX
3877  * Gfx consists of a single ring and can process both gfx jobs and
3878  * compute jobs.  The gfx CP consists of three microengines (ME):
3879  * PFP - Pre-Fetch Parser
3880  * ME - Micro Engine
3881  * CE - Constant Engine
3882  * The PFP and ME make up what is considered the Drawing Engine (DE).
3883  * The CE is an asynchronous engine used for updating buffer desciptors
3884  * used by the DE so that they can be loaded into cache in parallel
3885  * while the DE is processing state update packets.
3886  *
3887  * Compute
3888  * The compute CP consists of two microengines (ME):
3889  * MEC1 - Compute MicroEngine 1
3890  * MEC2 - Compute MicroEngine 2
3891  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3892  * The queues are exposed to userspace and are programmed directly
3893  * by the compute runtime.
3894  */
3895 /**
3896  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3897  *
3898  * @rdev: radeon_device pointer
3899  * @enable: enable or disable the MEs
3900  *
3901  * Halts or unhalts the gfx MEs.
3902  */
3903 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3904 {
3905 	if (enable)
3906 		WREG32(CP_ME_CNTL, 0);
3907 	else {
3908 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3909 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3910 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3911 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3912 	}
3913 	udelay(50);
3914 }
3915 
3916 /**
3917  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3918  *
3919  * @rdev: radeon_device pointer
3920  *
3921  * Loads the gfx PFP, ME, and CE ucode.
3922  * Returns 0 for success, -EINVAL if the ucode is not available.
3923  */
3924 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3925 {
3926 	int i;
3927 
3928 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3929 		return -EINVAL;
3930 
3931 	cik_cp_gfx_enable(rdev, false);
3932 
3933 	if (rdev->new_fw) {
3934 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3935 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3936 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3937 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3938 		const struct gfx_firmware_header_v1_0 *me_hdr =
3939 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3940 		const __le32 *fw_data;
3941 		u32 fw_size;
3942 
3943 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3944 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3945 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3946 
3947 		/* PFP */
3948 		fw_data = (const __le32 *)
3949 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3950 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3951 		WREG32(CP_PFP_UCODE_ADDR, 0);
3952 		for (i = 0; i < fw_size; i++)
3953 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3954 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3955 
3956 		/* CE */
3957 		fw_data = (const __le32 *)
3958 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3959 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3960 		WREG32(CP_CE_UCODE_ADDR, 0);
3961 		for (i = 0; i < fw_size; i++)
3962 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3963 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3964 
3965 		/* ME */
3966 		fw_data = (const __be32 *)
3967 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3968 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3969 		WREG32(CP_ME_RAM_WADDR, 0);
3970 		for (i = 0; i < fw_size; i++)
3971 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3972 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3973 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3974 	} else {
3975 		const __be32 *fw_data;
3976 
3977 		/* PFP */
3978 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3979 		WREG32(CP_PFP_UCODE_ADDR, 0);
3980 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3981 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3982 		WREG32(CP_PFP_UCODE_ADDR, 0);
3983 
3984 		/* CE */
3985 		fw_data = (const __be32 *)rdev->ce_fw->data;
3986 		WREG32(CP_CE_UCODE_ADDR, 0);
3987 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3988 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3989 		WREG32(CP_CE_UCODE_ADDR, 0);
3990 
3991 		/* ME */
3992 		fw_data = (const __be32 *)rdev->me_fw->data;
3993 		WREG32(CP_ME_RAM_WADDR, 0);
3994 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3995 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3996 		WREG32(CP_ME_RAM_WADDR, 0);
3997 	}
3998 
3999 	return 0;
4000 }
4001 
4002 /**
4003  * cik_cp_gfx_start - start the gfx ring
4004  *
4005  * @rdev: radeon_device pointer
4006  *
4007  * Enables the ring and loads the clear state context and other
4008  * packets required to init the ring.
4009  * Returns 0 for success, error for failure.
4010  */
4011 static int cik_cp_gfx_start(struct radeon_device *rdev)
4012 {
4013 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4014 	int r, i;
4015 
4016 	/* init the CP */
4017 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4018 	WREG32(CP_ENDIAN_SWAP, 0);
4019 	WREG32(CP_DEVICE_ID, 1);
4020 
4021 	cik_cp_gfx_enable(rdev, true);
4022 
4023 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4024 	if (r) {
4025 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4026 		return r;
4027 	}
4028 
4029 	/* init the CE partitions.  CE only used for gfx on CIK */
4030 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4031 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4032 	radeon_ring_write(ring, 0x8000);
4033 	radeon_ring_write(ring, 0x8000);
4034 
4035 	/* setup clear context state */
4036 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4037 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4038 
4039 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4040 	radeon_ring_write(ring, 0x80000000);
4041 	radeon_ring_write(ring, 0x80000000);
4042 
4043 	for (i = 0; i < cik_default_size; i++)
4044 		radeon_ring_write(ring, cik_default_state[i]);
4045 
4046 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4047 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4048 
4049 	/* set clear context state */
4050 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4051 	radeon_ring_write(ring, 0);
4052 
4053 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4054 	radeon_ring_write(ring, 0x00000316);
4055 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4056 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4057 
4058 	radeon_ring_unlock_commit(rdev, ring, false);
4059 
4060 	return 0;
4061 }
4062 
4063 /**
4064  * cik_cp_gfx_fini - stop the gfx ring
4065  *
4066  * @rdev: radeon_device pointer
4067  *
4068  * Stop the gfx ring and tear down the driver ring
4069  * info.
4070  */
4071 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4072 {
4073 	cik_cp_gfx_enable(rdev, false);
4074 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4075 }
4076 
4077 /**
4078  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4079  *
4080  * @rdev: radeon_device pointer
4081  *
4082  * Program the location and size of the gfx ring buffer
4083  * and test it to make sure it's working.
4084  * Returns 0 for success, error for failure.
4085  */
4086 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4087 {
4088 	struct radeon_ring *ring;
4089 	u32 tmp;
4090 	u32 rb_bufsz;
4091 	u64 rb_addr;
4092 	int r;
4093 
4094 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4095 	if (rdev->family != CHIP_HAWAII)
4096 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4097 
4098 	/* Set the write pointer delay */
4099 	WREG32(CP_RB_WPTR_DELAY, 0);
4100 
4101 	/* set the RB to use vmid 0 */
4102 	WREG32(CP_RB_VMID, 0);
4103 
4104 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4105 
4106 	/* ring 0 - compute and gfx */
4107 	/* Set ring buffer size */
4108 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4109 	rb_bufsz = order_base_2(ring->ring_size / 8);
4110 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4111 #ifdef __BIG_ENDIAN
4112 	tmp |= BUF_SWAP_32BIT;
4113 #endif
4114 	WREG32(CP_RB0_CNTL, tmp);
4115 
4116 	/* Initialize the ring buffer's read and write pointers */
4117 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4118 	ring->wptr = 0;
4119 	WREG32(CP_RB0_WPTR, ring->wptr);
4120 
4121 	/* set the wb address wether it's enabled or not */
4122 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4123 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4124 
4125 	/* scratch register shadowing is no longer supported */
4126 	WREG32(SCRATCH_UMSK, 0);
4127 
4128 	if (!rdev->wb.enabled)
4129 		tmp |= RB_NO_UPDATE;
4130 
4131 	mdelay(1);
4132 	WREG32(CP_RB0_CNTL, tmp);
4133 
4134 	rb_addr = ring->gpu_addr >> 8;
4135 	WREG32(CP_RB0_BASE, rb_addr);
4136 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4137 
4138 	/* start the ring */
4139 	cik_cp_gfx_start(rdev);
4140 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4141 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4142 	if (r) {
4143 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4144 		return r;
4145 	}
4146 
4147 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4148 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4149 
4150 	return 0;
4151 }
4152 
4153 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4154 		     struct radeon_ring *ring)
4155 {
4156 	u32 rptr;
4157 
4158 	if (rdev->wb.enabled)
4159 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4160 	else
4161 		rptr = RREG32(CP_RB0_RPTR);
4162 
4163 	return rptr;
4164 }
4165 
4166 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4167 		     struct radeon_ring *ring)
4168 {
4169 	return RREG32(CP_RB0_WPTR);
4170 }
4171 
4172 void cik_gfx_set_wptr(struct radeon_device *rdev,
4173 		      struct radeon_ring *ring)
4174 {
4175 	WREG32(CP_RB0_WPTR, ring->wptr);
4176 	(void)RREG32(CP_RB0_WPTR);
4177 }
4178 
4179 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4180 			 struct radeon_ring *ring)
4181 {
4182 	u32 rptr;
4183 
4184 	if (rdev->wb.enabled) {
4185 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4186 	} else {
4187 		mutex_lock(&rdev->srbm_mutex);
4188 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4189 		rptr = RREG32(CP_HQD_PQ_RPTR);
4190 		cik_srbm_select(rdev, 0, 0, 0, 0);
4191 		mutex_unlock(&rdev->srbm_mutex);
4192 	}
4193 
4194 	return rptr;
4195 }
4196 
4197 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4198 			 struct radeon_ring *ring)
4199 {
4200 	u32 wptr;
4201 
4202 	if (rdev->wb.enabled) {
4203 		/* XXX check if swapping is necessary on BE */
4204 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4205 	} else {
4206 		mutex_lock(&rdev->srbm_mutex);
4207 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4208 		wptr = RREG32(CP_HQD_PQ_WPTR);
4209 		cik_srbm_select(rdev, 0, 0, 0, 0);
4210 		mutex_unlock(&rdev->srbm_mutex);
4211 	}
4212 
4213 	return wptr;
4214 }
4215 
4216 void cik_compute_set_wptr(struct radeon_device *rdev,
4217 			  struct radeon_ring *ring)
4218 {
4219 	/* XXX check if swapping is necessary on BE */
4220 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4221 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4222 }
4223 
4224 static void cik_compute_stop(struct radeon_device *rdev,
4225 			     struct radeon_ring *ring)
4226 {
4227 	u32 j, tmp;
4228 
4229 	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4230 	/* Disable wptr polling. */
4231 	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4232 	tmp &= ~WPTR_POLL_EN;
4233 	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4234 	/* Disable HQD. */
4235 	if (RREG32(CP_HQD_ACTIVE) & 1) {
4236 		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4237 		for (j = 0; j < rdev->usec_timeout; j++) {
4238 			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4239 				break;
4240 			udelay(1);
4241 		}
4242 		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4243 		WREG32(CP_HQD_PQ_RPTR, 0);
4244 		WREG32(CP_HQD_PQ_WPTR, 0);
4245 	}
4246 	cik_srbm_select(rdev, 0, 0, 0, 0);
4247 }
4248 
4249 /**
4250  * cik_cp_compute_enable - enable/disable the compute CP MEs
4251  *
4252  * @rdev: radeon_device pointer
4253  * @enable: enable or disable the MEs
4254  *
4255  * Halts or unhalts the compute MEs.
4256  */
4257 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4258 {
4259 	if (enable)
4260 		WREG32(CP_MEC_CNTL, 0);
4261 	else {
4262 		/*
4263 		 * To make hibernation reliable we need to clear compute ring
4264 		 * configuration before halting the compute ring.
4265 		 */
4266 		mutex_lock(&rdev->srbm_mutex);
4267 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4268 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4269 		mutex_unlock(&rdev->srbm_mutex);
4270 
4271 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4272 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4273 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4274 	}
4275 	udelay(50);
4276 }
4277 
4278 /**
4279  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4280  *
4281  * @rdev: radeon_device pointer
4282  *
4283  * Loads the compute MEC1&2 ucode.
4284  * Returns 0 for success, -EINVAL if the ucode is not available.
4285  */
4286 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4287 {
4288 	int i;
4289 
4290 	if (!rdev->mec_fw)
4291 		return -EINVAL;
4292 
4293 	cik_cp_compute_enable(rdev, false);
4294 
4295 	if (rdev->new_fw) {
4296 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4297 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4298 		const __le32 *fw_data;
4299 		u32 fw_size;
4300 
4301 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4302 
4303 		/* MEC1 */
4304 		fw_data = (const __le32 *)
4305 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4306 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4307 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4308 		for (i = 0; i < fw_size; i++)
4309 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4310 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4311 
4312 		/* MEC2 */
4313 		if (rdev->family == CHIP_KAVERI) {
4314 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4315 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4316 
4317 			fw_data = (const __le32 *)
4318 				(rdev->mec2_fw->data +
4319 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4320 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4321 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4322 			for (i = 0; i < fw_size; i++)
4323 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4324 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4325 		}
4326 	} else {
4327 		const __be32 *fw_data;
4328 
4329 		/* MEC1 */
4330 		fw_data = (const __be32 *)rdev->mec_fw->data;
4331 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4332 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4333 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4334 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4335 
4336 		if (rdev->family == CHIP_KAVERI) {
4337 			/* MEC2 */
4338 			fw_data = (const __be32 *)rdev->mec_fw->data;
4339 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4340 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4341 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4342 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4343 		}
4344 	}
4345 
4346 	return 0;
4347 }
4348 
4349 /**
4350  * cik_cp_compute_start - start the compute queues
4351  *
4352  * @rdev: radeon_device pointer
4353  *
4354  * Enable the compute queues.
4355  * Returns 0 for success, error for failure.
4356  */
4357 static int cik_cp_compute_start(struct radeon_device *rdev)
4358 {
4359 	cik_cp_compute_enable(rdev, true);
4360 
4361 	return 0;
4362 }
4363 
4364 /**
4365  * cik_cp_compute_fini - stop the compute queues
4366  *
4367  * @rdev: radeon_device pointer
4368  *
4369  * Stop the compute queues and tear down the driver queue
4370  * info.
4371  */
4372 static void cik_cp_compute_fini(struct radeon_device *rdev)
4373 {
4374 	int i, idx, r;
4375 
4376 	cik_cp_compute_enable(rdev, false);
4377 
4378 	for (i = 0; i < 2; i++) {
4379 		if (i == 0)
4380 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4381 		else
4382 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4383 
4384 		if (rdev->ring[idx].mqd_obj) {
4385 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4386 			if (unlikely(r != 0))
4387 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4388 
4389 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4390 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4391 
4392 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4393 			rdev->ring[idx].mqd_obj = NULL;
4394 		}
4395 	}
4396 }
4397 
4398 static void cik_mec_fini(struct radeon_device *rdev)
4399 {
4400 	int r;
4401 
4402 	if (rdev->mec.hpd_eop_obj) {
4403 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4404 		if (unlikely(r != 0))
4405 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4406 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4407 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4408 
4409 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4410 		rdev->mec.hpd_eop_obj = NULL;
4411 	}
4412 }
4413 
4414 #define MEC_HPD_SIZE 2048
4415 
4416 static int cik_mec_init(struct radeon_device *rdev)
4417 {
4418 	int r;
4419 	u32 *hpd;
4420 
4421 	/*
4422 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4423 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4424 	 * Nonetheless, we assign only 1 pipe because all other pipes will
4425 	 * be handled by KFD
4426 	 */
4427 	rdev->mec.num_mec = 1;
4428 	rdev->mec.num_pipe = 1;
4429 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4430 
4431 	if (rdev->mec.hpd_eop_obj == NULL) {
4432 		r = radeon_bo_create(rdev,
4433 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4434 				     PAGE_SIZE, true,
4435 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4436 				     &rdev->mec.hpd_eop_obj);
4437 		if (r) {
4438 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4439 			return r;
4440 		}
4441 	}
4442 
4443 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4444 	if (unlikely(r != 0)) {
4445 		cik_mec_fini(rdev);
4446 		return r;
4447 	}
4448 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4449 			  &rdev->mec.hpd_eop_gpu_addr);
4450 	if (r) {
4451 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4452 		cik_mec_fini(rdev);
4453 		return r;
4454 	}
4455 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4456 	if (r) {
4457 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4458 		cik_mec_fini(rdev);
4459 		return r;
4460 	}
4461 
4462 	/* clear memory.  Not sure if this is required or not */
4463 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4464 
4465 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4466 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4467 
4468 	return 0;
4469 }
4470 
4471 struct hqd_registers
4472 {
4473 	u32 cp_mqd_base_addr;
4474 	u32 cp_mqd_base_addr_hi;
4475 	u32 cp_hqd_active;
4476 	u32 cp_hqd_vmid;
4477 	u32 cp_hqd_persistent_state;
4478 	u32 cp_hqd_pipe_priority;
4479 	u32 cp_hqd_queue_priority;
4480 	u32 cp_hqd_quantum;
4481 	u32 cp_hqd_pq_base;
4482 	u32 cp_hqd_pq_base_hi;
4483 	u32 cp_hqd_pq_rptr;
4484 	u32 cp_hqd_pq_rptr_report_addr;
4485 	u32 cp_hqd_pq_rptr_report_addr_hi;
4486 	u32 cp_hqd_pq_wptr_poll_addr;
4487 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4488 	u32 cp_hqd_pq_doorbell_control;
4489 	u32 cp_hqd_pq_wptr;
4490 	u32 cp_hqd_pq_control;
4491 	u32 cp_hqd_ib_base_addr;
4492 	u32 cp_hqd_ib_base_addr_hi;
4493 	u32 cp_hqd_ib_rptr;
4494 	u32 cp_hqd_ib_control;
4495 	u32 cp_hqd_iq_timer;
4496 	u32 cp_hqd_iq_rptr;
4497 	u32 cp_hqd_dequeue_request;
4498 	u32 cp_hqd_dma_offload;
4499 	u32 cp_hqd_sema_cmd;
4500 	u32 cp_hqd_msg_type;
4501 	u32 cp_hqd_atomic0_preop_lo;
4502 	u32 cp_hqd_atomic0_preop_hi;
4503 	u32 cp_hqd_atomic1_preop_lo;
4504 	u32 cp_hqd_atomic1_preop_hi;
4505 	u32 cp_hqd_hq_scheduler0;
4506 	u32 cp_hqd_hq_scheduler1;
4507 	u32 cp_mqd_control;
4508 };
4509 
4510 struct bonaire_mqd
4511 {
4512 	u32 header;
4513 	u32 dispatch_initiator;
4514 	u32 dimensions[3];
4515 	u32 start_idx[3];
4516 	u32 num_threads[3];
4517 	u32 pipeline_stat_enable;
4518 	u32 perf_counter_enable;
4519 	u32 pgm[2];
4520 	u32 tba[2];
4521 	u32 tma[2];
4522 	u32 pgm_rsrc[2];
4523 	u32 vmid;
4524 	u32 resource_limits;
4525 	u32 static_thread_mgmt01[2];
4526 	u32 tmp_ring_size;
4527 	u32 static_thread_mgmt23[2];
4528 	u32 restart[3];
4529 	u32 thread_trace_enable;
4530 	u32 reserved1;
4531 	u32 user_data[16];
4532 	u32 vgtcs_invoke_count[2];
4533 	struct hqd_registers queue_state;
4534 	u32 dequeue_cntr;
4535 	u32 interrupt_queue[64];
4536 };
4537 
4538 /**
4539  * cik_cp_compute_resume - setup the compute queue registers
4540  *
4541  * @rdev: radeon_device pointer
4542  *
4543  * Program the compute queues and test them to make sure they
4544  * are working.
4545  * Returns 0 for success, error for failure.
4546  */
4547 static int cik_cp_compute_resume(struct radeon_device *rdev)
4548 {
4549 	int r, i, j, idx;
4550 	u32 tmp;
4551 	bool use_doorbell = true;
4552 	u64 hqd_gpu_addr;
4553 	u64 mqd_gpu_addr;
4554 	u64 eop_gpu_addr;
4555 	u64 wb_gpu_addr;
4556 	u32 *buf;
4557 	struct bonaire_mqd *mqd;
4558 
4559 	r = cik_cp_compute_start(rdev);
4560 	if (r)
4561 		return r;
4562 
4563 	/* fix up chicken bits */
4564 	tmp = RREG32(CP_CPF_DEBUG);
4565 	tmp |= (1 << 23);
4566 	WREG32(CP_CPF_DEBUG, tmp);
4567 
4568 	/* init the pipes */
4569 	mutex_lock(&rdev->srbm_mutex);
4570 
4571 	for (i = 0; i < rdev->mec.num_pipe; ++i) {
4572 		cik_srbm_select(rdev, 0, i, 0, 0);
4573 
4574 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4575 		/* write the EOP addr */
4576 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4577 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4578 
4579 		/* set the VMID assigned */
4580 		WREG32(CP_HPD_EOP_VMID, 0);
4581 
4582 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4583 		tmp = RREG32(CP_HPD_EOP_CONTROL);
4584 		tmp &= ~EOP_SIZE_MASK;
4585 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4586 		WREG32(CP_HPD_EOP_CONTROL, tmp);
4587 
4588 	}
4589 	mutex_unlock(&rdev->srbm_mutex);
4590 
4591 	/* init the queues.  Just two for now. */
4592 	for (i = 0; i < 2; i++) {
4593 		if (i == 0)
4594 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4595 		else
4596 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4597 
4598 		if (rdev->ring[idx].mqd_obj == NULL) {
4599 			r = radeon_bo_create(rdev,
4600 					     sizeof(struct bonaire_mqd),
4601 					     PAGE_SIZE, true,
4602 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4603 					     NULL, &rdev->ring[idx].mqd_obj);
4604 			if (r) {
4605 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4606 				return r;
4607 			}
4608 		}
4609 
4610 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4611 		if (unlikely(r != 0)) {
4612 			cik_cp_compute_fini(rdev);
4613 			return r;
4614 		}
4615 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4616 				  &mqd_gpu_addr);
4617 		if (r) {
4618 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4619 			cik_cp_compute_fini(rdev);
4620 			return r;
4621 		}
4622 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4623 		if (r) {
4624 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4625 			cik_cp_compute_fini(rdev);
4626 			return r;
4627 		}
4628 
4629 		/* init the mqd struct */
4630 		memset(buf, 0, sizeof(struct bonaire_mqd));
4631 
4632 		mqd = (struct bonaire_mqd *)buf;
4633 		mqd->header = 0xC0310800;
4634 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4635 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4636 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4637 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4638 
4639 		mutex_lock(&rdev->srbm_mutex);
4640 		cik_srbm_select(rdev, rdev->ring[idx].me,
4641 				rdev->ring[idx].pipe,
4642 				rdev->ring[idx].queue, 0);
4643 
4644 		/* disable wptr polling */
4645 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4646 		tmp &= ~WPTR_POLL_EN;
4647 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4648 
4649 		/* enable doorbell? */
4650 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4651 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4652 		if (use_doorbell)
4653 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4654 		else
4655 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4656 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4657 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4658 
4659 		/* disable the queue if it's active */
4660 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4661 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4662 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4663 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4664 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4665 			for (j = 0; j < rdev->usec_timeout; j++) {
4666 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4667 					break;
4668 				udelay(1);
4669 			}
4670 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4671 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4672 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4673 		}
4674 
4675 		/* set the pointer to the MQD */
4676 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4677 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4678 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4679 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4680 		/* set MQD vmid to 0 */
4681 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4682 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4683 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4684 
4685 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4686 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4687 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4688 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4689 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4690 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4691 
4692 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4693 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4694 		mqd->queue_state.cp_hqd_pq_control &=
4695 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4696 
4697 		mqd->queue_state.cp_hqd_pq_control |=
4698 			order_base_2(rdev->ring[idx].ring_size / 8);
4699 		mqd->queue_state.cp_hqd_pq_control |=
4700 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4701 #ifdef __BIG_ENDIAN
4702 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4703 #endif
4704 		mqd->queue_state.cp_hqd_pq_control &=
4705 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4706 		mqd->queue_state.cp_hqd_pq_control |=
4707 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4708 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4709 
4710 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4711 		if (i == 0)
4712 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4713 		else
4714 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4715 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4716 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4717 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4718 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4719 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4720 
4721 		/* set the wb address wether it's enabled or not */
4722 		if (i == 0)
4723 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4724 		else
4725 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4726 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4727 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4728 			upper_32_bits(wb_gpu_addr) & 0xffff;
4729 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4730 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4731 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4732 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4733 
4734 		/* enable the doorbell if requested */
4735 		if (use_doorbell) {
4736 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4737 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4738 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4739 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4740 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4741 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4742 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4743 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4744 
4745 		} else {
4746 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4747 		}
4748 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4749 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4750 
4751 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4752 		rdev->ring[idx].wptr = 0;
4753 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4754 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4755 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4756 
4757 		/* set the vmid for the queue */
4758 		mqd->queue_state.cp_hqd_vmid = 0;
4759 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4760 
4761 		/* activate the queue */
4762 		mqd->queue_state.cp_hqd_active = 1;
4763 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4764 
4765 		cik_srbm_select(rdev, 0, 0, 0, 0);
4766 		mutex_unlock(&rdev->srbm_mutex);
4767 
4768 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4769 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4770 
4771 		rdev->ring[idx].ready = true;
4772 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4773 		if (r)
4774 			rdev->ring[idx].ready = false;
4775 	}
4776 
4777 	return 0;
4778 }
4779 
4780 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4781 {
4782 	cik_cp_gfx_enable(rdev, enable);
4783 	cik_cp_compute_enable(rdev, enable);
4784 }
4785 
4786 static int cik_cp_load_microcode(struct radeon_device *rdev)
4787 {
4788 	int r;
4789 
4790 	r = cik_cp_gfx_load_microcode(rdev);
4791 	if (r)
4792 		return r;
4793 	r = cik_cp_compute_load_microcode(rdev);
4794 	if (r)
4795 		return r;
4796 
4797 	return 0;
4798 }
4799 
4800 static void cik_cp_fini(struct radeon_device *rdev)
4801 {
4802 	cik_cp_gfx_fini(rdev);
4803 	cik_cp_compute_fini(rdev);
4804 }
4805 
4806 static int cik_cp_resume(struct radeon_device *rdev)
4807 {
4808 	int r;
4809 
4810 	cik_enable_gui_idle_interrupt(rdev, false);
4811 
4812 	r = cik_cp_load_microcode(rdev);
4813 	if (r)
4814 		return r;
4815 
4816 	r = cik_cp_gfx_resume(rdev);
4817 	if (r)
4818 		return r;
4819 	r = cik_cp_compute_resume(rdev);
4820 	if (r)
4821 		return r;
4822 
4823 	cik_enable_gui_idle_interrupt(rdev, true);
4824 
4825 	return 0;
4826 }
4827 
4828 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4829 {
4830 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4831 		RREG32(GRBM_STATUS));
4832 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4833 		RREG32(GRBM_STATUS2));
4834 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4835 		RREG32(GRBM_STATUS_SE0));
4836 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4837 		RREG32(GRBM_STATUS_SE1));
4838 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4839 		RREG32(GRBM_STATUS_SE2));
4840 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4841 		RREG32(GRBM_STATUS_SE3));
4842 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4843 		RREG32(SRBM_STATUS));
4844 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4845 		RREG32(SRBM_STATUS2));
4846 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4847 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4848 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4849 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4850 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4851 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4852 		 RREG32(CP_STALLED_STAT1));
4853 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4854 		 RREG32(CP_STALLED_STAT2));
4855 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4856 		 RREG32(CP_STALLED_STAT3));
4857 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4858 		 RREG32(CP_CPF_BUSY_STAT));
4859 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4860 		 RREG32(CP_CPF_STALLED_STAT1));
4861 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4862 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4863 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4864 		 RREG32(CP_CPC_STALLED_STAT1));
4865 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4866 }
4867 
4868 /**
4869  * cik_gpu_check_soft_reset - check which blocks are busy
4870  *
4871  * @rdev: radeon_device pointer
4872  *
4873  * Check which blocks are busy and return the relevant reset
4874  * mask to be used by cik_gpu_soft_reset().
4875  * Returns a mask of the blocks to be reset.
4876  */
4877 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4878 {
4879 	u32 reset_mask = 0;
4880 	u32 tmp;
4881 
4882 	/* GRBM_STATUS */
4883 	tmp = RREG32(GRBM_STATUS);
4884 	if (tmp & (PA_BUSY | SC_BUSY |
4885 		   BCI_BUSY | SX_BUSY |
4886 		   TA_BUSY | VGT_BUSY |
4887 		   DB_BUSY | CB_BUSY |
4888 		   GDS_BUSY | SPI_BUSY |
4889 		   IA_BUSY | IA_BUSY_NO_DMA))
4890 		reset_mask |= RADEON_RESET_GFX;
4891 
4892 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4893 		reset_mask |= RADEON_RESET_CP;
4894 
4895 	/* GRBM_STATUS2 */
4896 	tmp = RREG32(GRBM_STATUS2);
4897 	if (tmp & RLC_BUSY)
4898 		reset_mask |= RADEON_RESET_RLC;
4899 
4900 	/* SDMA0_STATUS_REG */
4901 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4902 	if (!(tmp & SDMA_IDLE))
4903 		reset_mask |= RADEON_RESET_DMA;
4904 
4905 	/* SDMA1_STATUS_REG */
4906 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4907 	if (!(tmp & SDMA_IDLE))
4908 		reset_mask |= RADEON_RESET_DMA1;
4909 
4910 	/* SRBM_STATUS2 */
4911 	tmp = RREG32(SRBM_STATUS2);
4912 	if (tmp & SDMA_BUSY)
4913 		reset_mask |= RADEON_RESET_DMA;
4914 
4915 	if (tmp & SDMA1_BUSY)
4916 		reset_mask |= RADEON_RESET_DMA1;
4917 
4918 	/* SRBM_STATUS */
4919 	tmp = RREG32(SRBM_STATUS);
4920 
4921 	if (tmp & IH_BUSY)
4922 		reset_mask |= RADEON_RESET_IH;
4923 
4924 	if (tmp & SEM_BUSY)
4925 		reset_mask |= RADEON_RESET_SEM;
4926 
4927 	if (tmp & GRBM_RQ_PENDING)
4928 		reset_mask |= RADEON_RESET_GRBM;
4929 
4930 	if (tmp & VMC_BUSY)
4931 		reset_mask |= RADEON_RESET_VMC;
4932 
4933 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4934 		   MCC_BUSY | MCD_BUSY))
4935 		reset_mask |= RADEON_RESET_MC;
4936 
4937 	if (evergreen_is_display_hung(rdev))
4938 		reset_mask |= RADEON_RESET_DISPLAY;
4939 
4940 	/* Skip MC reset as it's mostly likely not hung, just busy */
4941 	if (reset_mask & RADEON_RESET_MC) {
4942 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4943 		reset_mask &= ~RADEON_RESET_MC;
4944 	}
4945 
4946 	return reset_mask;
4947 }
4948 
4949 /**
4950  * cik_gpu_soft_reset - soft reset GPU
4951  *
4952  * @rdev: radeon_device pointer
4953  * @reset_mask: mask of which blocks to reset
4954  *
4955  * Soft reset the blocks specified in @reset_mask.
4956  */
4957 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4958 {
4959 	struct evergreen_mc_save save;
4960 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4961 	u32 tmp;
4962 
4963 	if (reset_mask == 0)
4964 		return;
4965 
4966 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4967 
4968 	cik_print_gpu_status_regs(rdev);
4969 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4970 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4971 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4972 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4973 
4974 	/* disable CG/PG */
4975 	cik_fini_pg(rdev);
4976 	cik_fini_cg(rdev);
4977 
4978 	/* stop the rlc */
4979 	cik_rlc_stop(rdev);
4980 
4981 	/* Disable GFX parsing/prefetching */
4982 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4983 
4984 	/* Disable MEC parsing/prefetching */
4985 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4986 
4987 	if (reset_mask & RADEON_RESET_DMA) {
4988 		/* sdma0 */
4989 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4990 		tmp |= SDMA_HALT;
4991 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4992 	}
4993 	if (reset_mask & RADEON_RESET_DMA1) {
4994 		/* sdma1 */
4995 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4996 		tmp |= SDMA_HALT;
4997 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4998 	}
4999 
5000 	evergreen_mc_stop(rdev, &save);
5001 	if (evergreen_mc_wait_for_idle(rdev)) {
5002 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5003 	}
5004 
5005 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5006 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5007 
5008 	if (reset_mask & RADEON_RESET_CP) {
5009 		grbm_soft_reset |= SOFT_RESET_CP;
5010 
5011 		srbm_soft_reset |= SOFT_RESET_GRBM;
5012 	}
5013 
5014 	if (reset_mask & RADEON_RESET_DMA)
5015 		srbm_soft_reset |= SOFT_RESET_SDMA;
5016 
5017 	if (reset_mask & RADEON_RESET_DMA1)
5018 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5019 
5020 	if (reset_mask & RADEON_RESET_DISPLAY)
5021 		srbm_soft_reset |= SOFT_RESET_DC;
5022 
5023 	if (reset_mask & RADEON_RESET_RLC)
5024 		grbm_soft_reset |= SOFT_RESET_RLC;
5025 
5026 	if (reset_mask & RADEON_RESET_SEM)
5027 		srbm_soft_reset |= SOFT_RESET_SEM;
5028 
5029 	if (reset_mask & RADEON_RESET_IH)
5030 		srbm_soft_reset |= SOFT_RESET_IH;
5031 
5032 	if (reset_mask & RADEON_RESET_GRBM)
5033 		srbm_soft_reset |= SOFT_RESET_GRBM;
5034 
5035 	if (reset_mask & RADEON_RESET_VMC)
5036 		srbm_soft_reset |= SOFT_RESET_VMC;
5037 
5038 	if (!(rdev->flags & RADEON_IS_IGP)) {
5039 		if (reset_mask & RADEON_RESET_MC)
5040 			srbm_soft_reset |= SOFT_RESET_MC;
5041 	}
5042 
5043 	if (grbm_soft_reset) {
5044 		tmp = RREG32(GRBM_SOFT_RESET);
5045 		tmp |= grbm_soft_reset;
5046 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5047 		WREG32(GRBM_SOFT_RESET, tmp);
5048 		tmp = RREG32(GRBM_SOFT_RESET);
5049 
5050 		udelay(50);
5051 
5052 		tmp &= ~grbm_soft_reset;
5053 		WREG32(GRBM_SOFT_RESET, tmp);
5054 		tmp = RREG32(GRBM_SOFT_RESET);
5055 	}
5056 
5057 	if (srbm_soft_reset) {
5058 		tmp = RREG32(SRBM_SOFT_RESET);
5059 		tmp |= srbm_soft_reset;
5060 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5061 		WREG32(SRBM_SOFT_RESET, tmp);
5062 		tmp = RREG32(SRBM_SOFT_RESET);
5063 
5064 		udelay(50);
5065 
5066 		tmp &= ~srbm_soft_reset;
5067 		WREG32(SRBM_SOFT_RESET, tmp);
5068 		tmp = RREG32(SRBM_SOFT_RESET);
5069 	}
5070 
5071 	/* Wait a little for things to settle down */
5072 	udelay(50);
5073 
5074 	evergreen_mc_resume(rdev, &save);
5075 	udelay(50);
5076 
5077 	cik_print_gpu_status_regs(rdev);
5078 }
5079 
5080 struct kv_reset_save_regs {
5081 	u32 gmcon_reng_execute;
5082 	u32 gmcon_misc;
5083 	u32 gmcon_misc3;
5084 };
5085 
5086 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5087 				   struct kv_reset_save_regs *save)
5088 {
5089 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5090 	save->gmcon_misc = RREG32(GMCON_MISC);
5091 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5092 
5093 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5094 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5095 						STCTRL_STUTTER_EN));
5096 }
5097 
5098 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5099 				      struct kv_reset_save_regs *save)
5100 {
5101 	int i;
5102 
5103 	WREG32(GMCON_PGFSM_WRITE, 0);
5104 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5105 
5106 	for (i = 0; i < 5; i++)
5107 		WREG32(GMCON_PGFSM_WRITE, 0);
5108 
5109 	WREG32(GMCON_PGFSM_WRITE, 0);
5110 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5111 
5112 	for (i = 0; i < 5; i++)
5113 		WREG32(GMCON_PGFSM_WRITE, 0);
5114 
5115 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5116 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5117 
5118 	for (i = 0; i < 5; i++)
5119 		WREG32(GMCON_PGFSM_WRITE, 0);
5120 
5121 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5122 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5123 
5124 	for (i = 0; i < 5; i++)
5125 		WREG32(GMCON_PGFSM_WRITE, 0);
5126 
5127 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5128 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5129 
5130 	for (i = 0; i < 5; i++)
5131 		WREG32(GMCON_PGFSM_WRITE, 0);
5132 
5133 	WREG32(GMCON_PGFSM_WRITE, 0);
5134 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5135 
5136 	for (i = 0; i < 5; i++)
5137 		WREG32(GMCON_PGFSM_WRITE, 0);
5138 
5139 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5140 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5141 
5142 	for (i = 0; i < 5; i++)
5143 		WREG32(GMCON_PGFSM_WRITE, 0);
5144 
5145 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5146 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5147 
5148 	for (i = 0; i < 5; i++)
5149 		WREG32(GMCON_PGFSM_WRITE, 0);
5150 
5151 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5152 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5153 
5154 	for (i = 0; i < 5; i++)
5155 		WREG32(GMCON_PGFSM_WRITE, 0);
5156 
5157 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5158 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5159 
5160 	for (i = 0; i < 5; i++)
5161 		WREG32(GMCON_PGFSM_WRITE, 0);
5162 
5163 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5164 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5165 
5166 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5167 	WREG32(GMCON_MISC, save->gmcon_misc);
5168 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5169 }
5170 
5171 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5172 {
5173 	struct evergreen_mc_save save;
5174 	struct kv_reset_save_regs kv_save = { 0 };
5175 	u32 tmp, i;
5176 
5177 	dev_info(rdev->dev, "GPU pci config reset\n");
5178 
5179 	/* disable dpm? */
5180 
5181 	/* disable cg/pg */
5182 	cik_fini_pg(rdev);
5183 	cik_fini_cg(rdev);
5184 
5185 	/* Disable GFX parsing/prefetching */
5186 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5187 
5188 	/* Disable MEC parsing/prefetching */
5189 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5190 
5191 	/* sdma0 */
5192 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5193 	tmp |= SDMA_HALT;
5194 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5195 	/* sdma1 */
5196 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5197 	tmp |= SDMA_HALT;
5198 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5199 	/* XXX other engines? */
5200 
5201 	/* halt the rlc, disable cp internal ints */
5202 	cik_rlc_stop(rdev);
5203 
5204 	udelay(50);
5205 
5206 	/* disable mem access */
5207 	evergreen_mc_stop(rdev, &save);
5208 	if (evergreen_mc_wait_for_idle(rdev)) {
5209 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5210 	}
5211 
5212 	if (rdev->flags & RADEON_IS_IGP)
5213 		kv_save_regs_for_reset(rdev, &kv_save);
5214 
5215 	/* disable BM */
5216 	pci_clear_master(rdev->pdev);
5217 	/* reset */
5218 	radeon_pci_config_reset(rdev);
5219 
5220 	udelay(100);
5221 
5222 	/* wait for asic to come out of reset */
5223 	for (i = 0; i < rdev->usec_timeout; i++) {
5224 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5225 			break;
5226 		udelay(1);
5227 	}
5228 
5229 	/* does asic init need to be run first??? */
5230 	if (rdev->flags & RADEON_IS_IGP)
5231 		kv_restore_regs_for_reset(rdev, &kv_save);
5232 }
5233 
5234 /**
5235  * cik_asic_reset - soft reset GPU
5236  *
5237  * @rdev: radeon_device pointer
5238  * @hard: force hard reset
5239  *
5240  * Look up which blocks are hung and attempt
5241  * to reset them.
5242  * Returns 0 for success.
5243  */
5244 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5245 {
5246 	u32 reset_mask;
5247 
5248 	if (hard) {
5249 		cik_gpu_pci_config_reset(rdev);
5250 		return 0;
5251 	}
5252 
5253 	reset_mask = cik_gpu_check_soft_reset(rdev);
5254 
5255 	if (reset_mask)
5256 		r600_set_bios_scratch_engine_hung(rdev, true);
5257 
5258 	/* try soft reset */
5259 	cik_gpu_soft_reset(rdev, reset_mask);
5260 
5261 	reset_mask = cik_gpu_check_soft_reset(rdev);
5262 
5263 	/* try pci config reset */
5264 	if (reset_mask && radeon_hard_reset)
5265 		cik_gpu_pci_config_reset(rdev);
5266 
5267 	reset_mask = cik_gpu_check_soft_reset(rdev);
5268 
5269 	if (!reset_mask)
5270 		r600_set_bios_scratch_engine_hung(rdev, false);
5271 
5272 	return 0;
5273 }
5274 
5275 /**
5276  * cik_gfx_is_lockup - check if the 3D engine is locked up
5277  *
5278  * @rdev: radeon_device pointer
5279  * @ring: radeon_ring structure holding ring information
5280  *
5281  * Check if the 3D engine is locked up (CIK).
5282  * Returns true if the engine is locked, false if not.
5283  */
5284 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5285 {
5286 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5287 
5288 	if (!(reset_mask & (RADEON_RESET_GFX |
5289 			    RADEON_RESET_COMPUTE |
5290 			    RADEON_RESET_CP))) {
5291 		radeon_ring_lockup_update(rdev, ring);
5292 		return false;
5293 	}
5294 	return radeon_ring_test_lockup(rdev, ring);
5295 }
5296 
5297 /* MC */
5298 /**
5299  * cik_mc_program - program the GPU memory controller
5300  *
5301  * @rdev: radeon_device pointer
5302  *
5303  * Set the location of vram, gart, and AGP in the GPU's
5304  * physical address space (CIK).
5305  */
5306 static void cik_mc_program(struct radeon_device *rdev)
5307 {
5308 	struct evergreen_mc_save save;
5309 	u32 tmp;
5310 	int i, j;
5311 
5312 	/* Initialize HDP */
5313 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5314 		WREG32((0x2c14 + j), 0x00000000);
5315 		WREG32((0x2c18 + j), 0x00000000);
5316 		WREG32((0x2c1c + j), 0x00000000);
5317 		WREG32((0x2c20 + j), 0x00000000);
5318 		WREG32((0x2c24 + j), 0x00000000);
5319 	}
5320 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5321 
5322 	evergreen_mc_stop(rdev, &save);
5323 	if (radeon_mc_wait_for_idle(rdev)) {
5324 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5325 	}
5326 	/* Lockout access through VGA aperture*/
5327 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5328 	/* Update configuration */
5329 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5330 	       rdev->mc.vram_start >> 12);
5331 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5332 	       rdev->mc.vram_end >> 12);
5333 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5334 	       rdev->vram_scratch.gpu_addr >> 12);
5335 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5336 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5337 	WREG32(MC_VM_FB_LOCATION, tmp);
5338 	/* XXX double check these! */
5339 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5340 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5341 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5342 	WREG32(MC_VM_AGP_BASE, 0);
5343 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5344 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5345 	if (radeon_mc_wait_for_idle(rdev)) {
5346 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5347 	}
5348 	evergreen_mc_resume(rdev, &save);
5349 	/* we need to own VRAM, so turn off the VGA renderer here
5350 	 * to stop it overwriting our objects */
5351 	rv515_vga_render_disable(rdev);
5352 }
5353 
5354 /**
5355  * cik_mc_init - initialize the memory controller driver params
5356  *
5357  * @rdev: radeon_device pointer
5358  *
5359  * Look up the amount of vram, vram width, and decide how to place
5360  * vram and gart within the GPU's physical address space (CIK).
5361  * Returns 0 for success.
5362  */
5363 static int cik_mc_init(struct radeon_device *rdev)
5364 {
5365 	u32 tmp;
5366 	int chansize, numchan;
5367 
5368 	/* Get VRAM informations */
5369 	rdev->mc.vram_is_ddr = true;
5370 	tmp = RREG32(MC_ARB_RAMCFG);
5371 	if (tmp & CHANSIZE_MASK) {
5372 		chansize = 64;
5373 	} else {
5374 		chansize = 32;
5375 	}
5376 	tmp = RREG32(MC_SHARED_CHMAP);
5377 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5378 	case 0:
5379 	default:
5380 		numchan = 1;
5381 		break;
5382 	case 1:
5383 		numchan = 2;
5384 		break;
5385 	case 2:
5386 		numchan = 4;
5387 		break;
5388 	case 3:
5389 		numchan = 8;
5390 		break;
5391 	case 4:
5392 		numchan = 3;
5393 		break;
5394 	case 5:
5395 		numchan = 6;
5396 		break;
5397 	case 6:
5398 		numchan = 10;
5399 		break;
5400 	case 7:
5401 		numchan = 12;
5402 		break;
5403 	case 8:
5404 		numchan = 16;
5405 		break;
5406 	}
5407 	rdev->mc.vram_width = numchan * chansize;
5408 	/* Could aper size report 0 ? */
5409 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5410 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5411 	/* size in MB on si */
5412 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5413 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5414 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5415 	si_vram_gtt_location(rdev, &rdev->mc);
5416 	radeon_update_bandwidth_info(rdev);
5417 
5418 	return 0;
5419 }
5420 
5421 /*
5422  * GART
5423  * VMID 0 is the physical GPU addresses as used by the kernel.
5424  * VMIDs 1-15 are used for userspace clients and are handled
5425  * by the radeon vm/hsa code.
5426  */
5427 /**
5428  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5429  *
5430  * @rdev: radeon_device pointer
5431  *
5432  * Flush the TLB for the VMID 0 page table (CIK).
5433  */
5434 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5435 {
5436 	/* flush hdp cache */
5437 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5438 
5439 	/* bits 0-15 are the VM contexts0-15 */
5440 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5441 }
5442 
5443 /**
5444  * cik_pcie_gart_enable - gart enable
5445  *
5446  * @rdev: radeon_device pointer
5447  *
5448  * This sets up the TLBs, programs the page tables for VMID0,
5449  * sets up the hw for VMIDs 1-15 which are allocated on
5450  * demand, and sets up the global locations for the LDS, GDS,
5451  * and GPUVM for FSA64 clients (CIK).
5452  * Returns 0 for success, errors for failure.
5453  */
5454 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5455 {
5456 	int r, i;
5457 
5458 	if (rdev->gart.robj == NULL) {
5459 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5460 		return -EINVAL;
5461 	}
5462 	r = radeon_gart_table_vram_pin(rdev);
5463 	if (r)
5464 		return r;
5465 	/* Setup TLB control */
5466 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5467 	       (0xA << 7) |
5468 	       ENABLE_L1_TLB |
5469 	       ENABLE_L1_FRAGMENT_PROCESSING |
5470 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5471 	       ENABLE_ADVANCED_DRIVER_MODEL |
5472 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5473 	/* Setup L2 cache */
5474 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5475 	       ENABLE_L2_FRAGMENT_PROCESSING |
5476 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5477 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5478 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5479 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5480 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5481 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5482 	       BANK_SELECT(4) |
5483 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5484 	/* setup context0 */
5485 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5486 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5487 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5488 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5489 			(u32)(rdev->dummy_page.addr >> 12));
5490 	WREG32(VM_CONTEXT0_CNTL2, 0);
5491 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5492 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5493 
5494 	WREG32(0x15D4, 0);
5495 	WREG32(0x15D8, 0);
5496 	WREG32(0x15DC, 0);
5497 
5498 	/* restore context1-15 */
5499 	/* set vm size, must be a multiple of 4 */
5500 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5501 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5502 	for (i = 1; i < 16; i++) {
5503 		if (i < 8)
5504 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5505 			       rdev->vm_manager.saved_table_addr[i]);
5506 		else
5507 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5508 			       rdev->vm_manager.saved_table_addr[i]);
5509 	}
5510 
5511 	/* enable context1-15 */
5512 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5513 	       (u32)(rdev->dummy_page.addr >> 12));
5514 	WREG32(VM_CONTEXT1_CNTL2, 4);
5515 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5516 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5517 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5518 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5519 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5520 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5521 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5522 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5523 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5524 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5525 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5526 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5527 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5528 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5529 
5530 	if (rdev->family == CHIP_KAVERI) {
5531 		u32 tmp = RREG32(CHUB_CONTROL);
5532 		tmp &= ~BYPASS_VM;
5533 		WREG32(CHUB_CONTROL, tmp);
5534 	}
5535 
5536 	/* XXX SH_MEM regs */
5537 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5538 	mutex_lock(&rdev->srbm_mutex);
5539 	for (i = 0; i < 16; i++) {
5540 		cik_srbm_select(rdev, 0, 0, 0, i);
5541 		/* CP and shaders */
5542 		WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5543 		WREG32(SH_MEM_APE1_BASE, 1);
5544 		WREG32(SH_MEM_APE1_LIMIT, 0);
5545 		WREG32(SH_MEM_BASES, 0);
5546 		/* SDMA GFX */
5547 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5548 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5549 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5550 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5551 		/* XXX SDMA RLC - todo */
5552 	}
5553 	cik_srbm_select(rdev, 0, 0, 0, 0);
5554 	mutex_unlock(&rdev->srbm_mutex);
5555 
5556 	cik_pcie_gart_tlb_flush(rdev);
5557 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5558 		 (unsigned)(rdev->mc.gtt_size >> 20),
5559 		 (unsigned long long)rdev->gart.table_addr);
5560 	rdev->gart.ready = true;
5561 	return 0;
5562 }
5563 
5564 /**
5565  * cik_pcie_gart_disable - gart disable
5566  *
5567  * @rdev: radeon_device pointer
5568  *
5569  * This disables all VM page table (CIK).
5570  */
5571 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5572 {
5573 	unsigned i;
5574 
5575 	for (i = 1; i < 16; ++i) {
5576 		uint32_t reg;
5577 		if (i < 8)
5578 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5579 		else
5580 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5581 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5582 	}
5583 
5584 	/* Disable all tables */
5585 	WREG32(VM_CONTEXT0_CNTL, 0);
5586 	WREG32(VM_CONTEXT1_CNTL, 0);
5587 	/* Setup TLB control */
5588 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5589 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5590 	/* Setup L2 cache */
5591 	WREG32(VM_L2_CNTL,
5592 	       ENABLE_L2_FRAGMENT_PROCESSING |
5593 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5594 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5595 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5596 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5597 	WREG32(VM_L2_CNTL2, 0);
5598 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5599 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5600 	radeon_gart_table_vram_unpin(rdev);
5601 }
5602 
5603 /**
5604  * cik_pcie_gart_fini - vm fini callback
5605  *
5606  * @rdev: radeon_device pointer
5607  *
5608  * Tears down the driver GART/VM setup (CIK).
5609  */
5610 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5611 {
5612 	cik_pcie_gart_disable(rdev);
5613 	radeon_gart_table_vram_free(rdev);
5614 	radeon_gart_fini(rdev);
5615 }
5616 
5617 /* vm parser */
5618 /**
5619  * cik_ib_parse - vm ib_parse callback
5620  *
5621  * @rdev: radeon_device pointer
5622  * @ib: indirect buffer pointer
5623  *
5624  * CIK uses hw IB checking so this is a nop (CIK).
5625  */
5626 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5627 {
5628 	return 0;
5629 }
5630 
5631 /*
5632  * vm
5633  * VMID 0 is the physical GPU addresses as used by the kernel.
5634  * VMIDs 1-15 are used for userspace clients and are handled
5635  * by the radeon vm/hsa code.
5636  */
5637 /**
5638  * cik_vm_init - cik vm init callback
5639  *
5640  * @rdev: radeon_device pointer
5641  *
5642  * Inits cik specific vm parameters (number of VMs, base of vram for
5643  * VMIDs 1-15) (CIK).
5644  * Returns 0 for success.
5645  */
5646 int cik_vm_init(struct radeon_device *rdev)
5647 {
5648 	/*
5649 	 * number of VMs
5650 	 * VMID 0 is reserved for System
5651 	 * radeon graphics/compute will use VMIDs 1-15
5652 	 */
5653 	rdev->vm_manager.nvm = 16;
5654 	/* base offset of vram pages */
5655 	if (rdev->flags & RADEON_IS_IGP) {
5656 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5657 		tmp <<= 22;
5658 		rdev->vm_manager.vram_base_offset = tmp;
5659 	} else
5660 		rdev->vm_manager.vram_base_offset = 0;
5661 
5662 	return 0;
5663 }
5664 
5665 /**
5666  * cik_vm_fini - cik vm fini callback
5667  *
5668  * @rdev: radeon_device pointer
5669  *
5670  * Tear down any asic specific VM setup (CIK).
5671  */
5672 void cik_vm_fini(struct radeon_device *rdev)
5673 {
5674 }
5675 
5676 /**
5677  * cik_vm_decode_fault - print human readable fault info
5678  *
5679  * @rdev: radeon_device pointer
5680  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5681  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5682  *
5683  * Print human readable fault information (CIK).
5684  */
5685 static void cik_vm_decode_fault(struct radeon_device *rdev,
5686 				u32 status, u32 addr, u32 mc_client)
5687 {
5688 	u32 mc_id;
5689 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5690 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5691 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5692 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5693 
5694 	if (rdev->family == CHIP_HAWAII)
5695 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5696 	else
5697 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5698 
5699 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5700 	       protections, vmid, addr,
5701 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5702 	       block, mc_client, mc_id);
5703 }
5704 
5705 /**
5706  * cik_vm_flush - cik vm flush using the CP
5707  *
5708  * @rdev: radeon_device pointer
5709  *
5710  * Update the page table base and flush the VM TLB
5711  * using the CP (CIK).
5712  */
5713 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5714 		  unsigned vm_id, uint64_t pd_addr)
5715 {
5716 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5717 
5718 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5719 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5720 				 WRITE_DATA_DST_SEL(0)));
5721 	if (vm_id < 8) {
5722 		radeon_ring_write(ring,
5723 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5724 	} else {
5725 		radeon_ring_write(ring,
5726 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5727 	}
5728 	radeon_ring_write(ring, 0);
5729 	radeon_ring_write(ring, pd_addr >> 12);
5730 
5731 	/* update SH_MEM_* regs */
5732 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5733 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5734 				 WRITE_DATA_DST_SEL(0)));
5735 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5736 	radeon_ring_write(ring, 0);
5737 	radeon_ring_write(ring, VMID(vm_id));
5738 
5739 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5740 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5741 				 WRITE_DATA_DST_SEL(0)));
5742 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5743 	radeon_ring_write(ring, 0);
5744 
5745 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5746 	radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5747 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5748 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5749 
5750 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5751 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5752 				 WRITE_DATA_DST_SEL(0)));
5753 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5754 	radeon_ring_write(ring, 0);
5755 	radeon_ring_write(ring, VMID(0));
5756 
5757 	/* HDP flush */
5758 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5759 
5760 	/* bits 0-15 are the VM contexts0-15 */
5761 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5762 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5763 				 WRITE_DATA_DST_SEL(0)));
5764 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5765 	radeon_ring_write(ring, 0);
5766 	radeon_ring_write(ring, 1 << vm_id);
5767 
5768 	/* wait for the invalidate to complete */
5769 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5770 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5771 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5772 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5773 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5774 	radeon_ring_write(ring, 0);
5775 	radeon_ring_write(ring, 0); /* ref */
5776 	radeon_ring_write(ring, 0); /* mask */
5777 	radeon_ring_write(ring, 0x20); /* poll interval */
5778 
5779 	/* compute doesn't have PFP */
5780 	if (usepfp) {
5781 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5782 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5783 		radeon_ring_write(ring, 0x0);
5784 	}
5785 }
5786 
5787 /*
5788  * RLC
5789  * The RLC is a multi-purpose microengine that handles a
5790  * variety of functions, the most important of which is
5791  * the interrupt controller.
5792  */
5793 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5794 					  bool enable)
5795 {
5796 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5797 
5798 	if (enable)
5799 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5800 	else
5801 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5802 	WREG32(CP_INT_CNTL_RING0, tmp);
5803 }
5804 
5805 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5806 {
5807 	u32 tmp;
5808 
5809 	tmp = RREG32(RLC_LB_CNTL);
5810 	if (enable)
5811 		tmp |= LOAD_BALANCE_ENABLE;
5812 	else
5813 		tmp &= ~LOAD_BALANCE_ENABLE;
5814 	WREG32(RLC_LB_CNTL, tmp);
5815 }
5816 
5817 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5818 {
5819 	u32 i, j, k;
5820 	u32 mask;
5821 
5822 	mutex_lock(&rdev->grbm_idx_mutex);
5823 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5824 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5825 			cik_select_se_sh(rdev, i, j);
5826 			for (k = 0; k < rdev->usec_timeout; k++) {
5827 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5828 					break;
5829 				udelay(1);
5830 			}
5831 		}
5832 	}
5833 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5834 	mutex_unlock(&rdev->grbm_idx_mutex);
5835 
5836 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5837 	for (k = 0; k < rdev->usec_timeout; k++) {
5838 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5839 			break;
5840 		udelay(1);
5841 	}
5842 }
5843 
5844 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5845 {
5846 	u32 tmp;
5847 
5848 	tmp = RREG32(RLC_CNTL);
5849 	if (tmp != rlc)
5850 		WREG32(RLC_CNTL, rlc);
5851 }
5852 
5853 static u32 cik_halt_rlc(struct radeon_device *rdev)
5854 {
5855 	u32 data, orig;
5856 
5857 	orig = data = RREG32(RLC_CNTL);
5858 
5859 	if (data & RLC_ENABLE) {
5860 		u32 i;
5861 
5862 		data &= ~RLC_ENABLE;
5863 		WREG32(RLC_CNTL, data);
5864 
5865 		for (i = 0; i < rdev->usec_timeout; i++) {
5866 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5867 				break;
5868 			udelay(1);
5869 		}
5870 
5871 		cik_wait_for_rlc_serdes(rdev);
5872 	}
5873 
5874 	return orig;
5875 }
5876 
5877 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5878 {
5879 	u32 tmp, i, mask;
5880 
5881 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5882 	WREG32(RLC_GPR_REG2, tmp);
5883 
5884 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5885 	for (i = 0; i < rdev->usec_timeout; i++) {
5886 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5887 			break;
5888 		udelay(1);
5889 	}
5890 
5891 	for (i = 0; i < rdev->usec_timeout; i++) {
5892 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5893 			break;
5894 		udelay(1);
5895 	}
5896 }
5897 
5898 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5899 {
5900 	u32 tmp;
5901 
5902 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5903 	WREG32(RLC_GPR_REG2, tmp);
5904 }
5905 
5906 /**
5907  * cik_rlc_stop - stop the RLC ME
5908  *
5909  * @rdev: radeon_device pointer
5910  *
5911  * Halt the RLC ME (MicroEngine) (CIK).
5912  */
5913 static void cik_rlc_stop(struct radeon_device *rdev)
5914 {
5915 	WREG32(RLC_CNTL, 0);
5916 
5917 	cik_enable_gui_idle_interrupt(rdev, false);
5918 
5919 	cik_wait_for_rlc_serdes(rdev);
5920 }
5921 
5922 /**
5923  * cik_rlc_start - start the RLC ME
5924  *
5925  * @rdev: radeon_device pointer
5926  *
5927  * Unhalt the RLC ME (MicroEngine) (CIK).
5928  */
5929 static void cik_rlc_start(struct radeon_device *rdev)
5930 {
5931 	WREG32(RLC_CNTL, RLC_ENABLE);
5932 
5933 	cik_enable_gui_idle_interrupt(rdev, true);
5934 
5935 	udelay(50);
5936 }
5937 
5938 /**
5939  * cik_rlc_resume - setup the RLC hw
5940  *
5941  * @rdev: radeon_device pointer
5942  *
5943  * Initialize the RLC registers, load the ucode,
5944  * and start the RLC (CIK).
5945  * Returns 0 for success, -EINVAL if the ucode is not available.
5946  */
5947 static int cik_rlc_resume(struct radeon_device *rdev)
5948 {
5949 	u32 i, size, tmp;
5950 
5951 	if (!rdev->rlc_fw)
5952 		return -EINVAL;
5953 
5954 	cik_rlc_stop(rdev);
5955 
5956 	/* disable CG */
5957 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5958 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5959 
5960 	si_rlc_reset(rdev);
5961 
5962 	cik_init_pg(rdev);
5963 
5964 	cik_init_cg(rdev);
5965 
5966 	WREG32(RLC_LB_CNTR_INIT, 0);
5967 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5968 
5969 	mutex_lock(&rdev->grbm_idx_mutex);
5970 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5971 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5972 	WREG32(RLC_LB_PARAMS, 0x00600408);
5973 	WREG32(RLC_LB_CNTL, 0x80000004);
5974 	mutex_unlock(&rdev->grbm_idx_mutex);
5975 
5976 	WREG32(RLC_MC_CNTL, 0);
5977 	WREG32(RLC_UCODE_CNTL, 0);
5978 
5979 	if (rdev->new_fw) {
5980 		const struct rlc_firmware_header_v1_0 *hdr =
5981 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5982 		const __le32 *fw_data = (const __le32 *)
5983 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5984 
5985 		radeon_ucode_print_rlc_hdr(&hdr->header);
5986 
5987 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5988 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5989 		for (i = 0; i < size; i++)
5990 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5991 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5992 	} else {
5993 		const __be32 *fw_data;
5994 
5995 		switch (rdev->family) {
5996 		case CHIP_BONAIRE:
5997 		case CHIP_HAWAII:
5998 		default:
5999 			size = BONAIRE_RLC_UCODE_SIZE;
6000 			break;
6001 		case CHIP_KAVERI:
6002 			size = KV_RLC_UCODE_SIZE;
6003 			break;
6004 		case CHIP_KABINI:
6005 			size = KB_RLC_UCODE_SIZE;
6006 			break;
6007 		case CHIP_MULLINS:
6008 			size = ML_RLC_UCODE_SIZE;
6009 			break;
6010 		}
6011 
6012 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6013 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6014 		for (i = 0; i < size; i++)
6015 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6016 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6017 	}
6018 
6019 	/* XXX - find out what chips support lbpw */
6020 	cik_enable_lbpw(rdev, false);
6021 
6022 	if (rdev->family == CHIP_BONAIRE)
6023 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6024 
6025 	cik_rlc_start(rdev);
6026 
6027 	return 0;
6028 }
6029 
6030 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6031 {
6032 	u32 data, orig, tmp, tmp2;
6033 
6034 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6035 
6036 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6037 		cik_enable_gui_idle_interrupt(rdev, true);
6038 
6039 		tmp = cik_halt_rlc(rdev);
6040 
6041 		mutex_lock(&rdev->grbm_idx_mutex);
6042 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6043 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6044 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6045 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6046 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6047 		mutex_unlock(&rdev->grbm_idx_mutex);
6048 
6049 		cik_update_rlc(rdev, tmp);
6050 
6051 		data |= CGCG_EN | CGLS_EN;
6052 	} else {
6053 		cik_enable_gui_idle_interrupt(rdev, false);
6054 
6055 		RREG32(CB_CGTT_SCLK_CTRL);
6056 		RREG32(CB_CGTT_SCLK_CTRL);
6057 		RREG32(CB_CGTT_SCLK_CTRL);
6058 		RREG32(CB_CGTT_SCLK_CTRL);
6059 
6060 		data &= ~(CGCG_EN | CGLS_EN);
6061 	}
6062 
6063 	if (orig != data)
6064 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6065 
6066 }
6067 
6068 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6069 {
6070 	u32 data, orig, tmp = 0;
6071 
6072 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6073 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6074 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6075 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6076 				data |= CP_MEM_LS_EN;
6077 				if (orig != data)
6078 					WREG32(CP_MEM_SLP_CNTL, data);
6079 			}
6080 		}
6081 
6082 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6083 		data |= 0x00000001;
6084 		data &= 0xfffffffd;
6085 		if (orig != data)
6086 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6087 
6088 		tmp = cik_halt_rlc(rdev);
6089 
6090 		mutex_lock(&rdev->grbm_idx_mutex);
6091 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6092 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6093 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6094 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6095 		WREG32(RLC_SERDES_WR_CTRL, data);
6096 		mutex_unlock(&rdev->grbm_idx_mutex);
6097 
6098 		cik_update_rlc(rdev, tmp);
6099 
6100 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6101 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6102 			data &= ~SM_MODE_MASK;
6103 			data |= SM_MODE(0x2);
6104 			data |= SM_MODE_ENABLE;
6105 			data &= ~CGTS_OVERRIDE;
6106 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6107 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6108 				data &= ~CGTS_LS_OVERRIDE;
6109 			data &= ~ON_MONITOR_ADD_MASK;
6110 			data |= ON_MONITOR_ADD_EN;
6111 			data |= ON_MONITOR_ADD(0x96);
6112 			if (orig != data)
6113 				WREG32(CGTS_SM_CTRL_REG, data);
6114 		}
6115 	} else {
6116 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6117 		data |= 0x00000003;
6118 		if (orig != data)
6119 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6120 
6121 		data = RREG32(RLC_MEM_SLP_CNTL);
6122 		if (data & RLC_MEM_LS_EN) {
6123 			data &= ~RLC_MEM_LS_EN;
6124 			WREG32(RLC_MEM_SLP_CNTL, data);
6125 		}
6126 
6127 		data = RREG32(CP_MEM_SLP_CNTL);
6128 		if (data & CP_MEM_LS_EN) {
6129 			data &= ~CP_MEM_LS_EN;
6130 			WREG32(CP_MEM_SLP_CNTL, data);
6131 		}
6132 
6133 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6134 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6135 		if (orig != data)
6136 			WREG32(CGTS_SM_CTRL_REG, data);
6137 
6138 		tmp = cik_halt_rlc(rdev);
6139 
6140 		mutex_lock(&rdev->grbm_idx_mutex);
6141 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6142 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6143 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6144 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6145 		WREG32(RLC_SERDES_WR_CTRL, data);
6146 		mutex_unlock(&rdev->grbm_idx_mutex);
6147 
6148 		cik_update_rlc(rdev, tmp);
6149 	}
6150 }
6151 
6152 static const u32 mc_cg_registers[] =
6153 {
6154 	MC_HUB_MISC_HUB_CG,
6155 	MC_HUB_MISC_SIP_CG,
6156 	MC_HUB_MISC_VM_CG,
6157 	MC_XPB_CLK_GAT,
6158 	ATC_MISC_CG,
6159 	MC_CITF_MISC_WR_CG,
6160 	MC_CITF_MISC_RD_CG,
6161 	MC_CITF_MISC_VM_CG,
6162 	VM_L2_CG,
6163 };
6164 
6165 static void cik_enable_mc_ls(struct radeon_device *rdev,
6166 			     bool enable)
6167 {
6168 	int i;
6169 	u32 orig, data;
6170 
6171 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6172 		orig = data = RREG32(mc_cg_registers[i]);
6173 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6174 			data |= MC_LS_ENABLE;
6175 		else
6176 			data &= ~MC_LS_ENABLE;
6177 		if (data != orig)
6178 			WREG32(mc_cg_registers[i], data);
6179 	}
6180 }
6181 
6182 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6183 			       bool enable)
6184 {
6185 	int i;
6186 	u32 orig, data;
6187 
6188 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6189 		orig = data = RREG32(mc_cg_registers[i]);
6190 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6191 			data |= MC_CG_ENABLE;
6192 		else
6193 			data &= ~MC_CG_ENABLE;
6194 		if (data != orig)
6195 			WREG32(mc_cg_registers[i], data);
6196 	}
6197 }
6198 
6199 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6200 				 bool enable)
6201 {
6202 	u32 orig, data;
6203 
6204 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6205 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6206 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6207 	} else {
6208 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6209 		data |= 0xff000000;
6210 		if (data != orig)
6211 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6212 
6213 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6214 		data |= 0xff000000;
6215 		if (data != orig)
6216 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6217 	}
6218 }
6219 
6220 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6221 				 bool enable)
6222 {
6223 	u32 orig, data;
6224 
6225 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6226 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6227 		data |= 0x100;
6228 		if (orig != data)
6229 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6230 
6231 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6232 		data |= 0x100;
6233 		if (orig != data)
6234 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6235 	} else {
6236 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6237 		data &= ~0x100;
6238 		if (orig != data)
6239 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6240 
6241 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6242 		data &= ~0x100;
6243 		if (orig != data)
6244 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6245 	}
6246 }
6247 
6248 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6249 				bool enable)
6250 {
6251 	u32 orig, data;
6252 
6253 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6254 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6255 		data = 0xfff;
6256 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6257 
6258 		orig = data = RREG32(UVD_CGC_CTRL);
6259 		data |= DCM;
6260 		if (orig != data)
6261 			WREG32(UVD_CGC_CTRL, data);
6262 	} else {
6263 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6264 		data &= ~0xfff;
6265 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6266 
6267 		orig = data = RREG32(UVD_CGC_CTRL);
6268 		data &= ~DCM;
6269 		if (orig != data)
6270 			WREG32(UVD_CGC_CTRL, data);
6271 	}
6272 }
6273 
6274 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6275 			       bool enable)
6276 {
6277 	u32 orig, data;
6278 
6279 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6280 
6281 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6282 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6283 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6284 	else
6285 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6286 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6287 
6288 	if (orig != data)
6289 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6290 }
6291 
6292 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6293 				bool enable)
6294 {
6295 	u32 orig, data;
6296 
6297 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6298 
6299 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6300 		data &= ~CLOCK_GATING_DIS;
6301 	else
6302 		data |= CLOCK_GATING_DIS;
6303 
6304 	if (orig != data)
6305 		WREG32(HDP_HOST_PATH_CNTL, data);
6306 }
6307 
6308 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6309 			      bool enable)
6310 {
6311 	u32 orig, data;
6312 
6313 	orig = data = RREG32(HDP_MEM_POWER_LS);
6314 
6315 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6316 		data |= HDP_LS_ENABLE;
6317 	else
6318 		data &= ~HDP_LS_ENABLE;
6319 
6320 	if (orig != data)
6321 		WREG32(HDP_MEM_POWER_LS, data);
6322 }
6323 
6324 void cik_update_cg(struct radeon_device *rdev,
6325 		   u32 block, bool enable)
6326 {
6327 
6328 	if (block & RADEON_CG_BLOCK_GFX) {
6329 		cik_enable_gui_idle_interrupt(rdev, false);
6330 		/* order matters! */
6331 		if (enable) {
6332 			cik_enable_mgcg(rdev, true);
6333 			cik_enable_cgcg(rdev, true);
6334 		} else {
6335 			cik_enable_cgcg(rdev, false);
6336 			cik_enable_mgcg(rdev, false);
6337 		}
6338 		cik_enable_gui_idle_interrupt(rdev, true);
6339 	}
6340 
6341 	if (block & RADEON_CG_BLOCK_MC) {
6342 		if (!(rdev->flags & RADEON_IS_IGP)) {
6343 			cik_enable_mc_mgcg(rdev, enable);
6344 			cik_enable_mc_ls(rdev, enable);
6345 		}
6346 	}
6347 
6348 	if (block & RADEON_CG_BLOCK_SDMA) {
6349 		cik_enable_sdma_mgcg(rdev, enable);
6350 		cik_enable_sdma_mgls(rdev, enable);
6351 	}
6352 
6353 	if (block & RADEON_CG_BLOCK_BIF) {
6354 		cik_enable_bif_mgls(rdev, enable);
6355 	}
6356 
6357 	if (block & RADEON_CG_BLOCK_UVD) {
6358 		if (rdev->has_uvd)
6359 			cik_enable_uvd_mgcg(rdev, enable);
6360 	}
6361 
6362 	if (block & RADEON_CG_BLOCK_HDP) {
6363 		cik_enable_hdp_mgcg(rdev, enable);
6364 		cik_enable_hdp_ls(rdev, enable);
6365 	}
6366 
6367 	if (block & RADEON_CG_BLOCK_VCE) {
6368 		vce_v2_0_enable_mgcg(rdev, enable);
6369 	}
6370 }
6371 
6372 static void cik_init_cg(struct radeon_device *rdev)
6373 {
6374 
6375 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6376 
6377 	if (rdev->has_uvd)
6378 		si_init_uvd_internal_cg(rdev);
6379 
6380 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6381 			     RADEON_CG_BLOCK_SDMA |
6382 			     RADEON_CG_BLOCK_BIF |
6383 			     RADEON_CG_BLOCK_UVD |
6384 			     RADEON_CG_BLOCK_HDP), true);
6385 }
6386 
6387 static void cik_fini_cg(struct radeon_device *rdev)
6388 {
6389 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6390 			     RADEON_CG_BLOCK_SDMA |
6391 			     RADEON_CG_BLOCK_BIF |
6392 			     RADEON_CG_BLOCK_UVD |
6393 			     RADEON_CG_BLOCK_HDP), false);
6394 
6395 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6396 }
6397 
6398 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6399 					  bool enable)
6400 {
6401 	u32 data, orig;
6402 
6403 	orig = data = RREG32(RLC_PG_CNTL);
6404 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6405 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6406 	else
6407 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6408 	if (orig != data)
6409 		WREG32(RLC_PG_CNTL, data);
6410 }
6411 
6412 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6413 					  bool enable)
6414 {
6415 	u32 data, orig;
6416 
6417 	orig = data = RREG32(RLC_PG_CNTL);
6418 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6419 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6420 	else
6421 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6422 	if (orig != data)
6423 		WREG32(RLC_PG_CNTL, data);
6424 }
6425 
6426 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6427 {
6428 	u32 data, orig;
6429 
6430 	orig = data = RREG32(RLC_PG_CNTL);
6431 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6432 		data &= ~DISABLE_CP_PG;
6433 	else
6434 		data |= DISABLE_CP_PG;
6435 	if (orig != data)
6436 		WREG32(RLC_PG_CNTL, data);
6437 }
6438 
6439 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6440 {
6441 	u32 data, orig;
6442 
6443 	orig = data = RREG32(RLC_PG_CNTL);
6444 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6445 		data &= ~DISABLE_GDS_PG;
6446 	else
6447 		data |= DISABLE_GDS_PG;
6448 	if (orig != data)
6449 		WREG32(RLC_PG_CNTL, data);
6450 }
6451 
6452 #define CP_ME_TABLE_SIZE    96
6453 #define CP_ME_TABLE_OFFSET  2048
6454 #define CP_MEC_TABLE_OFFSET 4096
6455 
6456 void cik_init_cp_pg_table(struct radeon_device *rdev)
6457 {
6458 	volatile u32 *dst_ptr;
6459 	int me, i, max_me = 4;
6460 	u32 bo_offset = 0;
6461 	u32 table_offset, table_size;
6462 
6463 	if (rdev->family == CHIP_KAVERI)
6464 		max_me = 5;
6465 
6466 	if (rdev->rlc.cp_table_ptr == NULL)
6467 		return;
6468 
6469 	/* write the cp table buffer */
6470 	dst_ptr = rdev->rlc.cp_table_ptr;
6471 	for (me = 0; me < max_me; me++) {
6472 		if (rdev->new_fw) {
6473 			const __le32 *fw_data;
6474 			const struct gfx_firmware_header_v1_0 *hdr;
6475 
6476 			if (me == 0) {
6477 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6478 				fw_data = (const __le32 *)
6479 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6480 				table_offset = le32_to_cpu(hdr->jt_offset);
6481 				table_size = le32_to_cpu(hdr->jt_size);
6482 			} else if (me == 1) {
6483 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6484 				fw_data = (const __le32 *)
6485 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6486 				table_offset = le32_to_cpu(hdr->jt_offset);
6487 				table_size = le32_to_cpu(hdr->jt_size);
6488 			} else if (me == 2) {
6489 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6490 				fw_data = (const __le32 *)
6491 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6492 				table_offset = le32_to_cpu(hdr->jt_offset);
6493 				table_size = le32_to_cpu(hdr->jt_size);
6494 			} else if (me == 3) {
6495 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6496 				fw_data = (const __le32 *)
6497 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6498 				table_offset = le32_to_cpu(hdr->jt_offset);
6499 				table_size = le32_to_cpu(hdr->jt_size);
6500 			} else {
6501 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6502 				fw_data = (const __le32 *)
6503 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6504 				table_offset = le32_to_cpu(hdr->jt_offset);
6505 				table_size = le32_to_cpu(hdr->jt_size);
6506 			}
6507 
6508 			for (i = 0; i < table_size; i ++) {
6509 				dst_ptr[bo_offset + i] =
6510 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6511 			}
6512 			bo_offset += table_size;
6513 		} else {
6514 			const __be32 *fw_data;
6515 			table_size = CP_ME_TABLE_SIZE;
6516 
6517 			if (me == 0) {
6518 				fw_data = (const __be32 *)rdev->ce_fw->data;
6519 				table_offset = CP_ME_TABLE_OFFSET;
6520 			} else if (me == 1) {
6521 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6522 				table_offset = CP_ME_TABLE_OFFSET;
6523 			} else if (me == 2) {
6524 				fw_data = (const __be32 *)rdev->me_fw->data;
6525 				table_offset = CP_ME_TABLE_OFFSET;
6526 			} else {
6527 				fw_data = (const __be32 *)rdev->mec_fw->data;
6528 				table_offset = CP_MEC_TABLE_OFFSET;
6529 			}
6530 
6531 			for (i = 0; i < table_size; i ++) {
6532 				dst_ptr[bo_offset + i] =
6533 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6534 			}
6535 			bo_offset += table_size;
6536 		}
6537 	}
6538 }
6539 
6540 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6541 				bool enable)
6542 {
6543 	u32 data, orig;
6544 
6545 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6546 		orig = data = RREG32(RLC_PG_CNTL);
6547 		data |= GFX_PG_ENABLE;
6548 		if (orig != data)
6549 			WREG32(RLC_PG_CNTL, data);
6550 
6551 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6552 		data |= AUTO_PG_EN;
6553 		if (orig != data)
6554 			WREG32(RLC_AUTO_PG_CTRL, data);
6555 	} else {
6556 		orig = data = RREG32(RLC_PG_CNTL);
6557 		data &= ~GFX_PG_ENABLE;
6558 		if (orig != data)
6559 			WREG32(RLC_PG_CNTL, data);
6560 
6561 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6562 		data &= ~AUTO_PG_EN;
6563 		if (orig != data)
6564 			WREG32(RLC_AUTO_PG_CTRL, data);
6565 
6566 		data = RREG32(DB_RENDER_CONTROL);
6567 	}
6568 }
6569 
6570 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6571 {
6572 	u32 mask = 0, tmp, tmp1;
6573 	int i;
6574 
6575 	mutex_lock(&rdev->grbm_idx_mutex);
6576 	cik_select_se_sh(rdev, se, sh);
6577 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6578 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6579 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6580 	mutex_unlock(&rdev->grbm_idx_mutex);
6581 
6582 	tmp &= 0xffff0000;
6583 
6584 	tmp |= tmp1;
6585 	tmp >>= 16;
6586 
6587 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6588 		mask <<= 1;
6589 		mask |= 1;
6590 	}
6591 
6592 	return (~tmp) & mask;
6593 }
6594 
6595 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6596 {
6597 	u32 i, j, k, active_cu_number = 0;
6598 	u32 mask, counter, cu_bitmap;
6599 	u32 tmp = 0;
6600 
6601 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6602 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6603 			mask = 1;
6604 			cu_bitmap = 0;
6605 			counter = 0;
6606 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6607 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6608 					if (counter < 2)
6609 						cu_bitmap |= mask;
6610 					counter ++;
6611 				}
6612 				mask <<= 1;
6613 			}
6614 
6615 			active_cu_number += counter;
6616 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6617 		}
6618 	}
6619 
6620 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6621 
6622 	tmp = RREG32(RLC_MAX_PG_CU);
6623 	tmp &= ~MAX_PU_CU_MASK;
6624 	tmp |= MAX_PU_CU(active_cu_number);
6625 	WREG32(RLC_MAX_PG_CU, tmp);
6626 }
6627 
6628 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6629 				       bool enable)
6630 {
6631 	u32 data, orig;
6632 
6633 	orig = data = RREG32(RLC_PG_CNTL);
6634 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6635 		data |= STATIC_PER_CU_PG_ENABLE;
6636 	else
6637 		data &= ~STATIC_PER_CU_PG_ENABLE;
6638 	if (orig != data)
6639 		WREG32(RLC_PG_CNTL, data);
6640 }
6641 
6642 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6643 					bool enable)
6644 {
6645 	u32 data, orig;
6646 
6647 	orig = data = RREG32(RLC_PG_CNTL);
6648 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6649 		data |= DYN_PER_CU_PG_ENABLE;
6650 	else
6651 		data &= ~DYN_PER_CU_PG_ENABLE;
6652 	if (orig != data)
6653 		WREG32(RLC_PG_CNTL, data);
6654 }
6655 
6656 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6657 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6658 
6659 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6660 {
6661 	u32 data, orig;
6662 	u32 i;
6663 
6664 	if (rdev->rlc.cs_data) {
6665 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6666 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6667 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6668 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6669 	} else {
6670 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6671 		for (i = 0; i < 3; i++)
6672 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6673 	}
6674 	if (rdev->rlc.reg_list) {
6675 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6676 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6677 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6678 	}
6679 
6680 	orig = data = RREG32(RLC_PG_CNTL);
6681 	data |= GFX_PG_SRC;
6682 	if (orig != data)
6683 		WREG32(RLC_PG_CNTL, data);
6684 
6685 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6686 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6687 
6688 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6689 	data &= ~IDLE_POLL_COUNT_MASK;
6690 	data |= IDLE_POLL_COUNT(0x60);
6691 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6692 
6693 	data = 0x10101010;
6694 	WREG32(RLC_PG_DELAY, data);
6695 
6696 	data = RREG32(RLC_PG_DELAY_2);
6697 	data &= ~0xff;
6698 	data |= 0x3;
6699 	WREG32(RLC_PG_DELAY_2, data);
6700 
6701 	data = RREG32(RLC_AUTO_PG_CTRL);
6702 	data &= ~GRBM_REG_SGIT_MASK;
6703 	data |= GRBM_REG_SGIT(0x700);
6704 	WREG32(RLC_AUTO_PG_CTRL, data);
6705 
6706 }
6707 
6708 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6709 {
6710 	cik_enable_gfx_cgpg(rdev, enable);
6711 	cik_enable_gfx_static_mgpg(rdev, enable);
6712 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6713 }
6714 
6715 u32 cik_get_csb_size(struct radeon_device *rdev)
6716 {
6717 	u32 count = 0;
6718 	const struct cs_section_def *sect = NULL;
6719 	const struct cs_extent_def *ext = NULL;
6720 
6721 	if (rdev->rlc.cs_data == NULL)
6722 		return 0;
6723 
6724 	/* begin clear state */
6725 	count += 2;
6726 	/* context control state */
6727 	count += 3;
6728 
6729 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6730 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6731 			if (sect->id == SECT_CONTEXT)
6732 				count += 2 + ext->reg_count;
6733 			else
6734 				return 0;
6735 		}
6736 	}
6737 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6738 	count += 4;
6739 	/* end clear state */
6740 	count += 2;
6741 	/* clear state */
6742 	count += 2;
6743 
6744 	return count;
6745 }
6746 
6747 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6748 {
6749 	u32 count = 0, i;
6750 	const struct cs_section_def *sect = NULL;
6751 	const struct cs_extent_def *ext = NULL;
6752 
6753 	if (rdev->rlc.cs_data == NULL)
6754 		return;
6755 	if (buffer == NULL)
6756 		return;
6757 
6758 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6759 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6760 
6761 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6762 	buffer[count++] = cpu_to_le32(0x80000000);
6763 	buffer[count++] = cpu_to_le32(0x80000000);
6764 
6765 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6766 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6767 			if (sect->id == SECT_CONTEXT) {
6768 				buffer[count++] =
6769 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6770 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6771 				for (i = 0; i < ext->reg_count; i++)
6772 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6773 			} else {
6774 				return;
6775 			}
6776 		}
6777 	}
6778 
6779 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6780 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6781 	switch (rdev->family) {
6782 	case CHIP_BONAIRE:
6783 		buffer[count++] = cpu_to_le32(0x16000012);
6784 		buffer[count++] = cpu_to_le32(0x00000000);
6785 		break;
6786 	case CHIP_KAVERI:
6787 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6788 		buffer[count++] = cpu_to_le32(0x00000000);
6789 		break;
6790 	case CHIP_KABINI:
6791 	case CHIP_MULLINS:
6792 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6793 		buffer[count++] = cpu_to_le32(0x00000000);
6794 		break;
6795 	case CHIP_HAWAII:
6796 		buffer[count++] = cpu_to_le32(0x3a00161a);
6797 		buffer[count++] = cpu_to_le32(0x0000002e);
6798 		break;
6799 	default:
6800 		buffer[count++] = cpu_to_le32(0x00000000);
6801 		buffer[count++] = cpu_to_le32(0x00000000);
6802 		break;
6803 	}
6804 
6805 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6806 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6807 
6808 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6809 	buffer[count++] = cpu_to_le32(0);
6810 }
6811 
6812 static void cik_init_pg(struct radeon_device *rdev)
6813 {
6814 	if (rdev->pg_flags) {
6815 		cik_enable_sck_slowdown_on_pu(rdev, true);
6816 		cik_enable_sck_slowdown_on_pd(rdev, true);
6817 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6818 			cik_init_gfx_cgpg(rdev);
6819 			cik_enable_cp_pg(rdev, true);
6820 			cik_enable_gds_pg(rdev, true);
6821 		}
6822 		cik_init_ao_cu_mask(rdev);
6823 		cik_update_gfx_pg(rdev, true);
6824 	}
6825 }
6826 
6827 static void cik_fini_pg(struct radeon_device *rdev)
6828 {
6829 	if (rdev->pg_flags) {
6830 		cik_update_gfx_pg(rdev, false);
6831 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6832 			cik_enable_cp_pg(rdev, false);
6833 			cik_enable_gds_pg(rdev, false);
6834 		}
6835 	}
6836 }
6837 
6838 /*
6839  * Interrupts
6840  * Starting with r6xx, interrupts are handled via a ring buffer.
6841  * Ring buffers are areas of GPU accessible memory that the GPU
6842  * writes interrupt vectors into and the host reads vectors out of.
6843  * There is a rptr (read pointer) that determines where the
6844  * host is currently reading, and a wptr (write pointer)
6845  * which determines where the GPU has written.  When the
6846  * pointers are equal, the ring is idle.  When the GPU
6847  * writes vectors to the ring buffer, it increments the
6848  * wptr.  When there is an interrupt, the host then starts
6849  * fetching commands and processing them until the pointers are
6850  * equal again at which point it updates the rptr.
6851  */
6852 
6853 /**
6854  * cik_enable_interrupts - Enable the interrupt ring buffer
6855  *
6856  * @rdev: radeon_device pointer
6857  *
6858  * Enable the interrupt ring buffer (CIK).
6859  */
6860 static void cik_enable_interrupts(struct radeon_device *rdev)
6861 {
6862 	u32 ih_cntl = RREG32(IH_CNTL);
6863 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6864 
6865 	ih_cntl |= ENABLE_INTR;
6866 	ih_rb_cntl |= IH_RB_ENABLE;
6867 	WREG32(IH_CNTL, ih_cntl);
6868 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6869 	rdev->ih.enabled = true;
6870 }
6871 
6872 /**
6873  * cik_disable_interrupts - Disable the interrupt ring buffer
6874  *
6875  * @rdev: radeon_device pointer
6876  *
6877  * Disable the interrupt ring buffer (CIK).
6878  */
6879 static void cik_disable_interrupts(struct radeon_device *rdev)
6880 {
6881 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6882 	u32 ih_cntl = RREG32(IH_CNTL);
6883 
6884 	ih_rb_cntl &= ~IH_RB_ENABLE;
6885 	ih_cntl &= ~ENABLE_INTR;
6886 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6887 	WREG32(IH_CNTL, ih_cntl);
6888 	/* set rptr, wptr to 0 */
6889 	WREG32(IH_RB_RPTR, 0);
6890 	WREG32(IH_RB_WPTR, 0);
6891 	rdev->ih.enabled = false;
6892 	rdev->ih.rptr = 0;
6893 }
6894 
6895 /**
6896  * cik_disable_interrupt_state - Disable all interrupt sources
6897  *
6898  * @rdev: radeon_device pointer
6899  *
6900  * Clear all interrupt enable bits used by the driver (CIK).
6901  */
6902 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6903 {
6904 	u32 tmp;
6905 
6906 	/* gfx ring */
6907 	tmp = RREG32(CP_INT_CNTL_RING0) &
6908 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6909 	WREG32(CP_INT_CNTL_RING0, tmp);
6910 	/* sdma */
6911 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6912 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6913 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6914 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6915 	/* compute queues */
6916 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6917 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6918 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6919 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6920 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6921 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6922 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6923 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6924 	/* grbm */
6925 	WREG32(GRBM_INT_CNTL, 0);
6926 	/* SRBM */
6927 	WREG32(SRBM_INT_CNTL, 0);
6928 	/* vline/vblank, etc. */
6929 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6930 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6931 	if (rdev->num_crtc >= 4) {
6932 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6933 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6934 	}
6935 	if (rdev->num_crtc >= 6) {
6936 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6937 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6938 	}
6939 	/* pflip */
6940 	if (rdev->num_crtc >= 2) {
6941 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6942 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6943 	}
6944 	if (rdev->num_crtc >= 4) {
6945 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6946 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6947 	}
6948 	if (rdev->num_crtc >= 6) {
6949 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6950 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6951 	}
6952 
6953 	/* dac hotplug */
6954 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6955 
6956 	/* digital hotplug */
6957 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6958 	WREG32(DC_HPD1_INT_CONTROL, tmp);
6959 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6960 	WREG32(DC_HPD2_INT_CONTROL, tmp);
6961 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6962 	WREG32(DC_HPD3_INT_CONTROL, tmp);
6963 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6964 	WREG32(DC_HPD4_INT_CONTROL, tmp);
6965 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6966 	WREG32(DC_HPD5_INT_CONTROL, tmp);
6967 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6968 	WREG32(DC_HPD6_INT_CONTROL, tmp);
6969 
6970 }
6971 
6972 /**
6973  * cik_irq_init - init and enable the interrupt ring
6974  *
6975  * @rdev: radeon_device pointer
6976  *
6977  * Allocate a ring buffer for the interrupt controller,
6978  * enable the RLC, disable interrupts, enable the IH
6979  * ring buffer and enable it (CIK).
6980  * Called at device load and reume.
6981  * Returns 0 for success, errors for failure.
6982  */
6983 static int cik_irq_init(struct radeon_device *rdev)
6984 {
6985 	int ret = 0;
6986 	int rb_bufsz;
6987 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6988 
6989 	/* allocate ring */
6990 	ret = r600_ih_ring_alloc(rdev);
6991 	if (ret)
6992 		return ret;
6993 
6994 	/* disable irqs */
6995 	cik_disable_interrupts(rdev);
6996 
6997 	/* init rlc */
6998 	ret = cik_rlc_resume(rdev);
6999 	if (ret) {
7000 		r600_ih_ring_fini(rdev);
7001 		return ret;
7002 	}
7003 
7004 	/* setup interrupt control */
7005 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7006 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7007 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7008 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7009 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7010 	 */
7011 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7012 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7013 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7014 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7015 
7016 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7017 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7018 
7019 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7020 		      IH_WPTR_OVERFLOW_CLEAR |
7021 		      (rb_bufsz << 1));
7022 
7023 	if (rdev->wb.enabled)
7024 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7025 
7026 	/* set the writeback address whether it's enabled or not */
7027 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7028 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7029 
7030 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7031 
7032 	/* set rptr, wptr to 0 */
7033 	WREG32(IH_RB_RPTR, 0);
7034 	WREG32(IH_RB_WPTR, 0);
7035 
7036 	/* Default settings for IH_CNTL (disabled at first) */
7037 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7038 	/* RPTR_REARM only works if msi's are enabled */
7039 	if (rdev->msi_enabled)
7040 		ih_cntl |= RPTR_REARM;
7041 	WREG32(IH_CNTL, ih_cntl);
7042 
7043 	/* force the active interrupt state to all disabled */
7044 	cik_disable_interrupt_state(rdev);
7045 
7046 	pci_set_master(rdev->pdev);
7047 
7048 	/* enable irqs */
7049 	cik_enable_interrupts(rdev);
7050 
7051 	return ret;
7052 }
7053 
7054 /**
7055  * cik_irq_set - enable/disable interrupt sources
7056  *
7057  * @rdev: radeon_device pointer
7058  *
7059  * Enable interrupt sources on the GPU (vblanks, hpd,
7060  * etc.) (CIK).
7061  * Returns 0 for success, errors for failure.
7062  */
7063 int cik_irq_set(struct radeon_device *rdev)
7064 {
7065 	u32 cp_int_cntl;
7066 	u32 cp_m1p0;
7067 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7068 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7069 	u32 grbm_int_cntl = 0;
7070 	u32 dma_cntl, dma_cntl1;
7071 
7072 	if (!rdev->irq.installed) {
7073 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7074 		return -EINVAL;
7075 	}
7076 	/* don't enable anything if the ih is disabled */
7077 	if (!rdev->ih.enabled) {
7078 		cik_disable_interrupts(rdev);
7079 		/* force the active interrupt state to all disabled */
7080 		cik_disable_interrupt_state(rdev);
7081 		return 0;
7082 	}
7083 
7084 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7085 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7086 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7087 
7088 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7089 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7090 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7091 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7092 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7093 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7094 
7095 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7096 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7097 
7098 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7099 
7100 	/* enable CP interrupts on all rings */
7101 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7102 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7103 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7104 	}
7105 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7106 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7107 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7108 		if (ring->me == 1) {
7109 			switch (ring->pipe) {
7110 			case 0:
7111 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7112 				break;
7113 			default:
7114 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7115 				break;
7116 			}
7117 		} else {
7118 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7119 		}
7120 	}
7121 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7122 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7123 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7124 		if (ring->me == 1) {
7125 			switch (ring->pipe) {
7126 			case 0:
7127 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7128 				break;
7129 			default:
7130 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7131 				break;
7132 			}
7133 		} else {
7134 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7135 		}
7136 	}
7137 
7138 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7139 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7140 		dma_cntl |= TRAP_ENABLE;
7141 	}
7142 
7143 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7144 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7145 		dma_cntl1 |= TRAP_ENABLE;
7146 	}
7147 
7148 	if (rdev->irq.crtc_vblank_int[0] ||
7149 	    atomic_read(&rdev->irq.pflip[0])) {
7150 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7151 		crtc1 |= VBLANK_INTERRUPT_MASK;
7152 	}
7153 	if (rdev->irq.crtc_vblank_int[1] ||
7154 	    atomic_read(&rdev->irq.pflip[1])) {
7155 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7156 		crtc2 |= VBLANK_INTERRUPT_MASK;
7157 	}
7158 	if (rdev->irq.crtc_vblank_int[2] ||
7159 	    atomic_read(&rdev->irq.pflip[2])) {
7160 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7161 		crtc3 |= VBLANK_INTERRUPT_MASK;
7162 	}
7163 	if (rdev->irq.crtc_vblank_int[3] ||
7164 	    atomic_read(&rdev->irq.pflip[3])) {
7165 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7166 		crtc4 |= VBLANK_INTERRUPT_MASK;
7167 	}
7168 	if (rdev->irq.crtc_vblank_int[4] ||
7169 	    atomic_read(&rdev->irq.pflip[4])) {
7170 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7171 		crtc5 |= VBLANK_INTERRUPT_MASK;
7172 	}
7173 	if (rdev->irq.crtc_vblank_int[5] ||
7174 	    atomic_read(&rdev->irq.pflip[5])) {
7175 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7176 		crtc6 |= VBLANK_INTERRUPT_MASK;
7177 	}
7178 	if (rdev->irq.hpd[0]) {
7179 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7180 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7181 	}
7182 	if (rdev->irq.hpd[1]) {
7183 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7184 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7185 	}
7186 	if (rdev->irq.hpd[2]) {
7187 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7188 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7189 	}
7190 	if (rdev->irq.hpd[3]) {
7191 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7192 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7193 	}
7194 	if (rdev->irq.hpd[4]) {
7195 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7196 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7197 	}
7198 	if (rdev->irq.hpd[5]) {
7199 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7200 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7201 	}
7202 
7203 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7204 
7205 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7206 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7207 
7208 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7209 
7210 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7211 
7212 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7213 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7214 	if (rdev->num_crtc >= 4) {
7215 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7216 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7217 	}
7218 	if (rdev->num_crtc >= 6) {
7219 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7220 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7221 	}
7222 
7223 	if (rdev->num_crtc >= 2) {
7224 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7225 		       GRPH_PFLIP_INT_MASK);
7226 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7227 		       GRPH_PFLIP_INT_MASK);
7228 	}
7229 	if (rdev->num_crtc >= 4) {
7230 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7231 		       GRPH_PFLIP_INT_MASK);
7232 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7233 		       GRPH_PFLIP_INT_MASK);
7234 	}
7235 	if (rdev->num_crtc >= 6) {
7236 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7237 		       GRPH_PFLIP_INT_MASK);
7238 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7239 		       GRPH_PFLIP_INT_MASK);
7240 	}
7241 
7242 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7243 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7244 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7245 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7246 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7247 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7248 
7249 	/* posting read */
7250 	RREG32(SRBM_STATUS);
7251 
7252 	return 0;
7253 }
7254 
7255 /**
7256  * cik_irq_ack - ack interrupt sources
7257  *
7258  * @rdev: radeon_device pointer
7259  *
7260  * Ack interrupt sources on the GPU (vblanks, hpd,
7261  * etc.) (CIK).  Certain interrupts sources are sw
7262  * generated and do not require an explicit ack.
7263  */
7264 static inline void cik_irq_ack(struct radeon_device *rdev)
7265 {
7266 	u32 tmp;
7267 
7268 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7269 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7270 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7271 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7272 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7273 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7274 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7275 
7276 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7277 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7278 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7279 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7280 	if (rdev->num_crtc >= 4) {
7281 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7282 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7283 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7284 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7285 	}
7286 	if (rdev->num_crtc >= 6) {
7287 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7288 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7289 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7290 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7291 	}
7292 
7293 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7294 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7295 		       GRPH_PFLIP_INT_CLEAR);
7296 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7297 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7298 		       GRPH_PFLIP_INT_CLEAR);
7299 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7300 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7301 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7302 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7303 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7304 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7305 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7306 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7307 
7308 	if (rdev->num_crtc >= 4) {
7309 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7310 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7311 			       GRPH_PFLIP_INT_CLEAR);
7312 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7313 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7314 			       GRPH_PFLIP_INT_CLEAR);
7315 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7316 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7317 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7318 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7319 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7320 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7321 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7322 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7323 	}
7324 
7325 	if (rdev->num_crtc >= 6) {
7326 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7327 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7328 			       GRPH_PFLIP_INT_CLEAR);
7329 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7330 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7331 			       GRPH_PFLIP_INT_CLEAR);
7332 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7333 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7334 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7335 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7336 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7337 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7338 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7339 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7340 	}
7341 
7342 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7343 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7344 		tmp |= DC_HPDx_INT_ACK;
7345 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7346 	}
7347 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7348 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7349 		tmp |= DC_HPDx_INT_ACK;
7350 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7351 	}
7352 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7353 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7354 		tmp |= DC_HPDx_INT_ACK;
7355 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7356 	}
7357 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7358 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7359 		tmp |= DC_HPDx_INT_ACK;
7360 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7361 	}
7362 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7363 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7364 		tmp |= DC_HPDx_INT_ACK;
7365 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7366 	}
7367 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7368 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7369 		tmp |= DC_HPDx_INT_ACK;
7370 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7371 	}
7372 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7373 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7374 		tmp |= DC_HPDx_RX_INT_ACK;
7375 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7376 	}
7377 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7378 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7379 		tmp |= DC_HPDx_RX_INT_ACK;
7380 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7381 	}
7382 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7383 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7384 		tmp |= DC_HPDx_RX_INT_ACK;
7385 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7386 	}
7387 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7388 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7389 		tmp |= DC_HPDx_RX_INT_ACK;
7390 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7391 	}
7392 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7393 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7394 		tmp |= DC_HPDx_RX_INT_ACK;
7395 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7396 	}
7397 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7398 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7399 		tmp |= DC_HPDx_RX_INT_ACK;
7400 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7401 	}
7402 }
7403 
7404 /**
7405  * cik_irq_disable - disable interrupts
7406  *
7407  * @rdev: radeon_device pointer
7408  *
7409  * Disable interrupts on the hw (CIK).
7410  */
7411 static void cik_irq_disable(struct radeon_device *rdev)
7412 {
7413 	cik_disable_interrupts(rdev);
7414 	/* Wait and acknowledge irq */
7415 	mdelay(1);
7416 	cik_irq_ack(rdev);
7417 	cik_disable_interrupt_state(rdev);
7418 }
7419 
7420 /**
7421  * cik_irq_disable - disable interrupts for suspend
7422  *
7423  * @rdev: radeon_device pointer
7424  *
7425  * Disable interrupts and stop the RLC (CIK).
7426  * Used for suspend.
7427  */
7428 static void cik_irq_suspend(struct radeon_device *rdev)
7429 {
7430 	cik_irq_disable(rdev);
7431 	cik_rlc_stop(rdev);
7432 }
7433 
7434 /**
7435  * cik_irq_fini - tear down interrupt support
7436  *
7437  * @rdev: radeon_device pointer
7438  *
7439  * Disable interrupts on the hw and free the IH ring
7440  * buffer (CIK).
7441  * Used for driver unload.
7442  */
7443 static void cik_irq_fini(struct radeon_device *rdev)
7444 {
7445 	cik_irq_suspend(rdev);
7446 	r600_ih_ring_fini(rdev);
7447 }
7448 
7449 /**
7450  * cik_get_ih_wptr - get the IH ring buffer wptr
7451  *
7452  * @rdev: radeon_device pointer
7453  *
7454  * Get the IH ring buffer wptr from either the register
7455  * or the writeback memory buffer (CIK).  Also check for
7456  * ring buffer overflow and deal with it.
7457  * Used by cik_irq_process().
7458  * Returns the value of the wptr.
7459  */
7460 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7461 {
7462 	u32 wptr, tmp;
7463 
7464 	if (rdev->wb.enabled)
7465 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7466 	else
7467 		wptr = RREG32(IH_RB_WPTR);
7468 
7469 	if (wptr & RB_OVERFLOW) {
7470 		wptr &= ~RB_OVERFLOW;
7471 		/* When a ring buffer overflow happen start parsing interrupt
7472 		 * from the last not overwritten vector (wptr + 16). Hopefully
7473 		 * this should allow us to catchup.
7474 		 */
7475 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7476 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7477 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7478 		tmp = RREG32(IH_RB_CNTL);
7479 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7480 		WREG32(IH_RB_CNTL, tmp);
7481 	}
7482 	return (wptr & rdev->ih.ptr_mask);
7483 }
7484 
7485 /*        CIK IV Ring
7486  * Each IV ring entry is 128 bits:
7487  * [7:0]    - interrupt source id
7488  * [31:8]   - reserved
7489  * [59:32]  - interrupt source data
7490  * [63:60]  - reserved
7491  * [71:64]  - RINGID
7492  *            CP:
7493  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7494  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7495  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7496  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7497  *            PIPE_ID - ME0 0=3D
7498  *                    - ME1&2 compute dispatcher (4 pipes each)
7499  *            SDMA:
7500  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7501  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7502  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7503  * [79:72]  - VMID
7504  * [95:80]  - PASID
7505  * [127:96] - reserved
7506  */
7507 /**
7508  * cik_irq_process - interrupt handler
7509  *
7510  * @rdev: radeon_device pointer
7511  *
7512  * Interrupt hander (CIK).  Walk the IH ring,
7513  * ack interrupts and schedule work to handle
7514  * interrupt events.
7515  * Returns irq process return code.
7516  */
7517 irqreturn_t cik_irq_process(struct radeon_device *rdev)
7518 {
7519 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7520 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7521 	u32 wptr;
7522 	u32 rptr;
7523 	u32 src_id, src_data, ring_id;
7524 	u8 me_id, pipe_id, queue_id;
7525 	u32 ring_index;
7526 	bool queue_hotplug = false;
7527 	bool queue_dp = false;
7528 	bool queue_reset = false;
7529 	u32 addr, status, mc_client;
7530 	bool queue_thermal = false;
7531 
7532 	if (!rdev->ih.enabled || rdev->shutdown)
7533 		return IRQ_NONE;
7534 
7535 	wptr = cik_get_ih_wptr(rdev);
7536 
7537 restart_ih:
7538 	/* is somebody else already processing irqs? */
7539 	if (atomic_xchg(&rdev->ih.lock, 1))
7540 		return IRQ_NONE;
7541 
7542 	rptr = rdev->ih.rptr;
7543 	DRM_DEBUG_VBLANK("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7544 
7545 	/* Order reading of wptr vs. reading of IH ring data */
7546 	rmb();
7547 
7548 	/* display interrupts */
7549 	cik_irq_ack(rdev);
7550 
7551 	while (rptr != wptr) {
7552 		/* wptr/rptr are in bytes! */
7553 		ring_index = rptr / 4;
7554 
7555 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7556 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7557 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7558 
7559 		switch (src_id) {
7560 		case 1: /* D1 vblank/vline */
7561 			switch (src_data) {
7562 			case 0: /* D1 vblank */
7563 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7564 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7565 
7566 				if (rdev->irq.crtc_vblank_int[0]) {
7567 					drm_handle_vblank(rdev->ddev, 0);
7568 					rdev->pm.vblank_sync = true;
7569 					wake_up(&rdev->irq.vblank_queue);
7570 				}
7571 				if (atomic_read(&rdev->irq.pflip[0]))
7572 					radeon_crtc_handle_vblank(rdev, 0);
7573 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7574 				DRM_DEBUG_VBLANK("IH: D1 vblank\n");
7575 
7576 				break;
7577 			case 1: /* D1 vline */
7578 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7579 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7580 
7581 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7582 				DRM_DEBUG_VBLANK("IH: D1 vline\n");
7583 
7584 				break;
7585 			default:
7586 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7587 				break;
7588 			}
7589 			break;
7590 		case 2: /* D2 vblank/vline */
7591 			switch (src_data) {
7592 			case 0: /* D2 vblank */
7593 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7594 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7595 
7596 				if (rdev->irq.crtc_vblank_int[1]) {
7597 					drm_handle_vblank(rdev->ddev, 1);
7598 					rdev->pm.vblank_sync = true;
7599 					wake_up(&rdev->irq.vblank_queue);
7600 				}
7601 				if (atomic_read(&rdev->irq.pflip[1]))
7602 					radeon_crtc_handle_vblank(rdev, 1);
7603 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7604 				DRM_DEBUG_VBLANK("IH: D2 vblank\n");
7605 
7606 				break;
7607 			case 1: /* D2 vline */
7608 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7609 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7610 
7611 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7612 				DRM_DEBUG_VBLANK("IH: D2 vline\n");
7613 
7614 				break;
7615 			default:
7616 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7617 				break;
7618 			}
7619 			break;
7620 		case 3: /* D3 vblank/vline */
7621 			switch (src_data) {
7622 			case 0: /* D3 vblank */
7623 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7624 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7625 
7626 				if (rdev->irq.crtc_vblank_int[2]) {
7627 					drm_handle_vblank(rdev->ddev, 2);
7628 					rdev->pm.vblank_sync = true;
7629 					wake_up(&rdev->irq.vblank_queue);
7630 				}
7631 				if (atomic_read(&rdev->irq.pflip[2]))
7632 					radeon_crtc_handle_vblank(rdev, 2);
7633 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7634 				DRM_DEBUG_VBLANK("IH: D3 vblank\n");
7635 
7636 				break;
7637 			case 1: /* D3 vline */
7638 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7639 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7640 
7641 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7642 				DRM_DEBUG_VBLANK("IH: D3 vline\n");
7643 
7644 				break;
7645 			default:
7646 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7647 				break;
7648 			}
7649 			break;
7650 		case 4: /* D4 vblank/vline */
7651 			switch (src_data) {
7652 			case 0: /* D4 vblank */
7653 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7654 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7655 
7656 				if (rdev->irq.crtc_vblank_int[3]) {
7657 					drm_handle_vblank(rdev->ddev, 3);
7658 					rdev->pm.vblank_sync = true;
7659 					wake_up(&rdev->irq.vblank_queue);
7660 				}
7661 				if (atomic_read(&rdev->irq.pflip[3]))
7662 					radeon_crtc_handle_vblank(rdev, 3);
7663 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7664 				DRM_DEBUG_VBLANK("IH: D4 vblank\n");
7665 
7666 				break;
7667 			case 1: /* D4 vline */
7668 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7669 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7670 
7671 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7672 				DRM_DEBUG_VBLANK("IH: D4 vline\n");
7673 
7674 				break;
7675 			default:
7676 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7677 				break;
7678 			}
7679 			break;
7680 		case 5: /* D5 vblank/vline */
7681 			switch (src_data) {
7682 			case 0: /* D5 vblank */
7683 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7684 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7685 
7686 				if (rdev->irq.crtc_vblank_int[4]) {
7687 					drm_handle_vblank(rdev->ddev, 4);
7688 					rdev->pm.vblank_sync = true;
7689 					wake_up(&rdev->irq.vblank_queue);
7690 				}
7691 				if (atomic_read(&rdev->irq.pflip[4]))
7692 					radeon_crtc_handle_vblank(rdev, 4);
7693 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7694 				DRM_DEBUG_VBLANK("IH: D5 vblank\n");
7695 
7696 				break;
7697 			case 1: /* D5 vline */
7698 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7699 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7700 
7701 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7702 				DRM_DEBUG("IH: D5 vline\n");
7703 
7704 				break;
7705 			default:
7706 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7707 				break;
7708 			}
7709 			break;
7710 		case 6: /* D6 vblank/vline */
7711 			switch (src_data) {
7712 			case 0: /* D6 vblank */
7713 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7714 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7715 
7716 				if (rdev->irq.crtc_vblank_int[5]) {
7717 					drm_handle_vblank(rdev->ddev, 5);
7718 					rdev->pm.vblank_sync = true;
7719 					wake_up(&rdev->irq.vblank_queue);
7720 				}
7721 				if (atomic_read(&rdev->irq.pflip[5]))
7722 					radeon_crtc_handle_vblank(rdev, 5);
7723 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7724 				DRM_DEBUG_VBLANK("IH: D6 vblank\n");
7725 
7726 				break;
7727 			case 1: /* D6 vline */
7728 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7729 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7730 
7731 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7732 				DRM_DEBUG_VBLANK("IH: D6 vline\n");
7733 
7734 				break;
7735 			default:
7736 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7737 				break;
7738 			}
7739 			break;
7740 		case 8: /* D1 page flip */
7741 		case 10: /* D2 page flip */
7742 		case 12: /* D3 page flip */
7743 		case 14: /* D4 page flip */
7744 		case 16: /* D5 page flip */
7745 		case 18: /* D6 page flip */
7746 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7747 			if (radeon_use_pflipirq > 0)
7748 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7749 			break;
7750 		case 42: /* HPD hotplug */
7751 			switch (src_data) {
7752 			case 0:
7753 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7754 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7755 
7756 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7757 				queue_hotplug = true;
7758 				DRM_DEBUG("IH: HPD1\n");
7759 
7760 				break;
7761 			case 1:
7762 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7763 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7764 
7765 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7766 				queue_hotplug = true;
7767 				DRM_DEBUG("IH: HPD2\n");
7768 
7769 				break;
7770 			case 2:
7771 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7772 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7773 
7774 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7775 				queue_hotplug = true;
7776 				DRM_DEBUG("IH: HPD3\n");
7777 
7778 				break;
7779 			case 3:
7780 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7781 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7782 
7783 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7784 				queue_hotplug = true;
7785 				DRM_DEBUG("IH: HPD4\n");
7786 
7787 				break;
7788 			case 4:
7789 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7790 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7791 
7792 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7793 				queue_hotplug = true;
7794 				DRM_DEBUG("IH: HPD5\n");
7795 
7796 				break;
7797 			case 5:
7798 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7799 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7800 
7801 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7802 				queue_hotplug = true;
7803 				DRM_DEBUG("IH: HPD6\n");
7804 
7805 				break;
7806 			case 6:
7807 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7808 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7809 
7810 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7811 				queue_dp = true;
7812 				DRM_DEBUG("IH: HPD_RX 1\n");
7813 
7814 				break;
7815 			case 7:
7816 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7817 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7818 
7819 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7820 				queue_dp = true;
7821 				DRM_DEBUG("IH: HPD_RX 2\n");
7822 
7823 				break;
7824 			case 8:
7825 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7826 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7827 
7828 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7829 				queue_dp = true;
7830 				DRM_DEBUG("IH: HPD_RX 3\n");
7831 
7832 				break;
7833 			case 9:
7834 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7835 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7836 
7837 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7838 				queue_dp = true;
7839 				DRM_DEBUG("IH: HPD_RX 4\n");
7840 
7841 				break;
7842 			case 10:
7843 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7844 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7845 
7846 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7847 				queue_dp = true;
7848 				DRM_DEBUG("IH: HPD_RX 5\n");
7849 
7850 				break;
7851 			case 11:
7852 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7853 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7854 
7855 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7856 				queue_dp = true;
7857 				DRM_DEBUG("IH: HPD_RX 6\n");
7858 
7859 				break;
7860 			default:
7861 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7862 				break;
7863 			}
7864 			break;
7865 		case 96:
7866 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7867 			WREG32(SRBM_INT_ACK, 0x1);
7868 			break;
7869 		case 124: /* UVD */
7870 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7871 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7872 			break;
7873 		case 146:
7874 		case 147:
7875 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7876 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7877 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7878 			/* reset addr and status */
7879 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7880 			if (addr == 0x0 && status == 0x0)
7881 				break;
7882 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7883 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7884 				addr);
7885 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7886 				status);
7887 			cik_vm_decode_fault(rdev, status, addr, mc_client);
7888 			break;
7889 		case 167: /* VCE */
7890 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7891 			switch (src_data) {
7892 			case 0:
7893 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7894 				break;
7895 			case 1:
7896 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7897 				break;
7898 			default:
7899 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7900 				break;
7901 			}
7902 			break;
7903 		case 176: /* GFX RB CP_INT */
7904 		case 177: /* GFX IB CP_INT */
7905 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7906 			break;
7907 		case 181: /* CP EOP event */
7908 			DRM_DEBUG("IH: CP EOP\n");
7909 			/* XXX check the bitfield order! */
7910 			me_id = (ring_id & 0x60) >> 5;
7911 			pipe_id = (ring_id & 0x18) >> 3;
7912 			queue_id = (ring_id & 0x7) >> 0;
7913 			switch (me_id) {
7914 			case 0:
7915 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7916 				break;
7917 			case 1:
7918 			case 2:
7919 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7920 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7921 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7922 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7923 				break;
7924 			}
7925 			break;
7926 		case 184: /* CP Privileged reg access */
7927 			DRM_ERROR("Illegal register access in command stream\n");
7928 			/* XXX check the bitfield order! */
7929 			me_id = (ring_id & 0x60) >> 5;
7930 			pipe_id = (ring_id & 0x18) >> 3;
7931 			queue_id = (ring_id & 0x7) >> 0;
7932 			switch (me_id) {
7933 			case 0:
7934 				/* This results in a full GPU reset, but all we need to do is soft
7935 				 * reset the CP for gfx
7936 				 */
7937 				queue_reset = true;
7938 				break;
7939 			case 1:
7940 				/* XXX compute */
7941 				queue_reset = true;
7942 				break;
7943 			case 2:
7944 				/* XXX compute */
7945 				queue_reset = true;
7946 				break;
7947 			}
7948 			break;
7949 		case 185: /* CP Privileged inst */
7950 			DRM_ERROR("Illegal instruction in command stream\n");
7951 			/* XXX check the bitfield order! */
7952 			me_id = (ring_id & 0x60) >> 5;
7953 			pipe_id = (ring_id & 0x18) >> 3;
7954 			queue_id = (ring_id & 0x7) >> 0;
7955 			switch (me_id) {
7956 			case 0:
7957 				/* This results in a full GPU reset, but all we need to do is soft
7958 				 * reset the CP for gfx
7959 				 */
7960 				queue_reset = true;
7961 				break;
7962 			case 1:
7963 				/* XXX compute */
7964 				queue_reset = true;
7965 				break;
7966 			case 2:
7967 				/* XXX compute */
7968 				queue_reset = true;
7969 				break;
7970 			}
7971 			break;
7972 		case 224: /* SDMA trap event */
7973 			/* XXX check the bitfield order! */
7974 			me_id = (ring_id & 0x3) >> 0;
7975 			queue_id = (ring_id & 0xc) >> 2;
7976 			DRM_DEBUG("IH: SDMA trap\n");
7977 			switch (me_id) {
7978 			case 0:
7979 				switch (queue_id) {
7980 				case 0:
7981 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7982 					break;
7983 				case 1:
7984 					/* XXX compute */
7985 					break;
7986 				case 2:
7987 					/* XXX compute */
7988 					break;
7989 				}
7990 				break;
7991 			case 1:
7992 				switch (queue_id) {
7993 				case 0:
7994 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7995 					break;
7996 				case 1:
7997 					/* XXX compute */
7998 					break;
7999 				case 2:
8000 					/* XXX compute */
8001 					break;
8002 				}
8003 				break;
8004 			}
8005 			break;
8006 		case 230: /* thermal low to high */
8007 			DRM_DEBUG("IH: thermal low to high\n");
8008 			rdev->pm.dpm.thermal.high_to_low = false;
8009 			queue_thermal = true;
8010 			break;
8011 		case 231: /* thermal high to low */
8012 			DRM_DEBUG("IH: thermal high to low\n");
8013 			rdev->pm.dpm.thermal.high_to_low = true;
8014 			queue_thermal = true;
8015 			break;
8016 		case 233: /* GUI IDLE */
8017 			DRM_DEBUG("IH: GUI idle\n");
8018 			break;
8019 		case 241: /* SDMA Privileged inst */
8020 		case 247: /* SDMA Privileged inst */
8021 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8022 			/* XXX check the bitfield order! */
8023 			me_id = (ring_id & 0x3) >> 0;
8024 			queue_id = (ring_id & 0xc) >> 2;
8025 			switch (me_id) {
8026 			case 0:
8027 				switch (queue_id) {
8028 				case 0:
8029 					queue_reset = true;
8030 					break;
8031 				case 1:
8032 					/* XXX compute */
8033 					queue_reset = true;
8034 					break;
8035 				case 2:
8036 					/* XXX compute */
8037 					queue_reset = true;
8038 					break;
8039 				}
8040 				break;
8041 			case 1:
8042 				switch (queue_id) {
8043 				case 0:
8044 					queue_reset = true;
8045 					break;
8046 				case 1:
8047 					/* XXX compute */
8048 					queue_reset = true;
8049 					break;
8050 				case 2:
8051 					/* XXX compute */
8052 					queue_reset = true;
8053 					break;
8054 				}
8055 				break;
8056 			}
8057 			break;
8058 		default:
8059 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8060 			break;
8061 		}
8062 
8063 		/* wptr/rptr are in bytes! */
8064 		rptr += 16;
8065 		rptr &= rdev->ih.ptr_mask;
8066 		WREG32(IH_RB_RPTR, rptr);
8067 	}
8068 	if (queue_dp)
8069 		schedule_work(&rdev->dp_work);
8070 	if (queue_hotplug)
8071 		schedule_delayed_work(&rdev->hotplug_work, 0);
8072 	if (queue_reset) {
8073 		rdev->needs_reset = true;
8074 		wake_up_all(&rdev->fence_queue);
8075 	}
8076 	if (queue_thermal)
8077 		schedule_work(&rdev->pm.dpm.thermal.work);
8078 	rdev->ih.rptr = rptr;
8079 	atomic_set(&rdev->ih.lock, 0);
8080 
8081 	/* make sure wptr hasn't changed while processing */
8082 	wptr = cik_get_ih_wptr(rdev);
8083 	if (wptr != rptr)
8084 		goto restart_ih;
8085 
8086 	return IRQ_HANDLED;
8087 }
8088 
8089 /*
8090  * startup/shutdown callbacks
8091  */
8092 static void cik_uvd_init(struct radeon_device *rdev)
8093 {
8094 	int r;
8095 
8096 	if (!rdev->has_uvd)
8097 		return;
8098 
8099 	r = radeon_uvd_init(rdev);
8100 	if (r) {
8101 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8102 		/*
8103 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8104 		 * to early fails cik_uvd_start() and thus nothing happens
8105 		 * there. So it is pointless to try to go through that code
8106 		 * hence why we disable uvd here.
8107 		 */
8108 		rdev->has_uvd = 0;
8109 		return;
8110 	}
8111 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8112 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8113 }
8114 
8115 static void cik_uvd_start(struct radeon_device *rdev)
8116 {
8117 	int r;
8118 
8119 	if (!rdev->has_uvd)
8120 		return;
8121 
8122 	r = radeon_uvd_resume(rdev);
8123 	if (r) {
8124 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8125 		goto error;
8126 	}
8127 	r = uvd_v4_2_resume(rdev);
8128 	if (r) {
8129 		dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8130 		goto error;
8131 	}
8132 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8133 	if (r) {
8134 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8135 		goto error;
8136 	}
8137 	return;
8138 
8139 error:
8140 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8141 }
8142 
8143 static void cik_uvd_resume(struct radeon_device *rdev)
8144 {
8145 	struct radeon_ring *ring;
8146 	int r;
8147 
8148 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8149 		return;
8150 
8151 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8152 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8153 	if (r) {
8154 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8155 		return;
8156 	}
8157 	r = uvd_v1_0_init(rdev);
8158 	if (r) {
8159 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8160 		return;
8161 	}
8162 }
8163 
8164 static void cik_vce_init(struct radeon_device *rdev)
8165 {
8166 	int r;
8167 
8168 	if (!rdev->has_vce)
8169 		return;
8170 
8171 	r = radeon_vce_init(rdev);
8172 	if (r) {
8173 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8174 		/*
8175 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8176 		 * to early fails cik_vce_start() and thus nothing happens
8177 		 * there. So it is pointless to try to go through that code
8178 		 * hence why we disable vce here.
8179 		 */
8180 		rdev->has_vce = 0;
8181 		return;
8182 	}
8183 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8184 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8185 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8186 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8187 }
8188 
8189 static void cik_vce_start(struct radeon_device *rdev)
8190 {
8191 	int r;
8192 
8193 	if (!rdev->has_vce)
8194 		return;
8195 
8196 	r = radeon_vce_resume(rdev);
8197 	if (r) {
8198 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8199 		goto error;
8200 	}
8201 	r = vce_v2_0_resume(rdev);
8202 	if (r) {
8203 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8204 		goto error;
8205 	}
8206 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8207 	if (r) {
8208 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8209 		goto error;
8210 	}
8211 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8212 	if (r) {
8213 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8214 		goto error;
8215 	}
8216 	return;
8217 
8218 error:
8219 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8220 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8221 }
8222 
8223 static void cik_vce_resume(struct radeon_device *rdev)
8224 {
8225 	struct radeon_ring *ring;
8226 	int r;
8227 
8228 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8229 		return;
8230 
8231 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8232 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8233 	if (r) {
8234 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8235 		return;
8236 	}
8237 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8238 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8239 	if (r) {
8240 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8241 		return;
8242 	}
8243 	r = vce_v1_0_init(rdev);
8244 	if (r) {
8245 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8246 		return;
8247 	}
8248 }
8249 
8250 /**
8251  * cik_startup - program the asic to a functional state
8252  *
8253  * @rdev: radeon_device pointer
8254  *
8255  * Programs the asic to a functional state (CIK).
8256  * Called by cik_init() and cik_resume().
8257  * Returns 0 for success, error for failure.
8258  */
8259 static int cik_startup(struct radeon_device *rdev)
8260 {
8261 	struct radeon_ring *ring;
8262 	u32 nop;
8263 	int r;
8264 
8265 	/* enable pcie gen2/3 link */
8266 	cik_pcie_gen3_enable(rdev);
8267 	/* enable aspm */
8268 	cik_program_aspm(rdev);
8269 
8270 	/* scratch needs to be initialized before MC */
8271 	r = r600_vram_scratch_init(rdev);
8272 	if (r)
8273 		return r;
8274 
8275 	cik_mc_program(rdev);
8276 
8277 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8278 		r = ci_mc_load_microcode(rdev);
8279 		if (r) {
8280 			DRM_ERROR("Failed to load MC firmware!\n");
8281 			return r;
8282 		}
8283 	}
8284 
8285 	r = cik_pcie_gart_enable(rdev);
8286 	if (r)
8287 		return r;
8288 	cik_gpu_init(rdev);
8289 
8290 	/* allocate rlc buffers */
8291 	if (rdev->flags & RADEON_IS_IGP) {
8292 		if (rdev->family == CHIP_KAVERI) {
8293 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8294 			rdev->rlc.reg_list_size =
8295 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8296 		} else {
8297 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8298 			rdev->rlc.reg_list_size =
8299 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8300 		}
8301 	}
8302 	rdev->rlc.cs_data = ci_cs_data;
8303 	rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8304 	rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8305 	r = sumo_rlc_init(rdev);
8306 	if (r) {
8307 		DRM_ERROR("Failed to init rlc BOs!\n");
8308 		return r;
8309 	}
8310 
8311 	/* allocate wb buffer */
8312 	r = radeon_wb_init(rdev);
8313 	if (r)
8314 		return r;
8315 
8316 	/* allocate mec buffers */
8317 	r = cik_mec_init(rdev);
8318 	if (r) {
8319 		DRM_ERROR("Failed to init MEC BOs!\n");
8320 		return r;
8321 	}
8322 
8323 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8324 	if (r) {
8325 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8326 		return r;
8327 	}
8328 
8329 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8330 	if (r) {
8331 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8332 		return r;
8333 	}
8334 
8335 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8336 	if (r) {
8337 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8338 		return r;
8339 	}
8340 
8341 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8342 	if (r) {
8343 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8344 		return r;
8345 	}
8346 
8347 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8348 	if (r) {
8349 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8350 		return r;
8351 	}
8352 
8353 	cik_uvd_start(rdev);
8354 	cik_vce_start(rdev);
8355 
8356 	/* Enable IRQ */
8357 	if (!rdev->irq.installed) {
8358 		r = radeon_irq_kms_init(rdev);
8359 		if (r)
8360 			return r;
8361 	}
8362 
8363 	r = cik_irq_init(rdev);
8364 	if (r) {
8365 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8366 		radeon_irq_kms_fini(rdev);
8367 		return r;
8368 	}
8369 	cik_irq_set(rdev);
8370 
8371 	if (rdev->family == CHIP_HAWAII) {
8372 		if (rdev->new_fw)
8373 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8374 		else
8375 			nop = RADEON_CP_PACKET2;
8376 	} else {
8377 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8378 	}
8379 
8380 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8381 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8382 			     nop);
8383 	if (r)
8384 		return r;
8385 
8386 	/* set up the compute queues */
8387 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8388 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8389 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8390 			     nop);
8391 	if (r)
8392 		return r;
8393 	ring->me = 1; /* first MEC */
8394 	ring->pipe = 0; /* first pipe */
8395 	ring->queue = 0; /* first queue */
8396 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8397 
8398 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8399 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8400 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8401 			     nop);
8402 	if (r)
8403 		return r;
8404 	/* dGPU only have 1 MEC */
8405 	ring->me = 1; /* first MEC */
8406 	ring->pipe = 0; /* first pipe */
8407 	ring->queue = 1; /* second queue */
8408 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8409 
8410 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8411 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8412 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8413 	if (r)
8414 		return r;
8415 
8416 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8417 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8418 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8419 	if (r)
8420 		return r;
8421 
8422 	r = cik_cp_resume(rdev);
8423 	if (r)
8424 		return r;
8425 
8426 	r = cik_sdma_resume(rdev);
8427 	if (r)
8428 		return r;
8429 
8430 	cik_uvd_resume(rdev);
8431 	cik_vce_resume(rdev);
8432 
8433 	r = radeon_ib_pool_init(rdev);
8434 	if (r) {
8435 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8436 		return r;
8437 	}
8438 
8439 	r = radeon_vm_manager_init(rdev);
8440 	if (r) {
8441 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8442 		return r;
8443 	}
8444 
8445 	r = radeon_audio_init(rdev);
8446 	if (r)
8447 		return r;
8448 
8449 	return 0;
8450 }
8451 
8452 /**
8453  * cik_resume - resume the asic to a functional state
8454  *
8455  * @rdev: radeon_device pointer
8456  *
8457  * Programs the asic to a functional state (CIK).
8458  * Called at resume.
8459  * Returns 0 for success, error for failure.
8460  */
8461 int cik_resume(struct radeon_device *rdev)
8462 {
8463 	int r;
8464 
8465 	/* post card */
8466 	atom_asic_init(rdev->mode_info.atom_context);
8467 
8468 	/* init golden registers */
8469 	cik_init_golden_registers(rdev);
8470 
8471 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8472 		radeon_pm_resume(rdev);
8473 
8474 	rdev->accel_working = true;
8475 	r = cik_startup(rdev);
8476 	if (r) {
8477 		DRM_ERROR("cik startup failed on resume\n");
8478 		rdev->accel_working = false;
8479 		return r;
8480 	}
8481 
8482 	return r;
8483 
8484 }
8485 
8486 /**
8487  * cik_suspend - suspend the asic
8488  *
8489  * @rdev: radeon_device pointer
8490  *
8491  * Bring the chip into a state suitable for suspend (CIK).
8492  * Called at suspend.
8493  * Returns 0 for success.
8494  */
8495 int cik_suspend(struct radeon_device *rdev)
8496 {
8497 	radeon_pm_suspend(rdev);
8498 	radeon_audio_fini(rdev);
8499 	radeon_vm_manager_fini(rdev);
8500 	cik_cp_enable(rdev, false);
8501 	cik_sdma_enable(rdev, false);
8502 	if (rdev->has_uvd) {
8503 		uvd_v1_0_fini(rdev);
8504 		radeon_uvd_suspend(rdev);
8505 	}
8506 	if (rdev->has_vce)
8507 		radeon_vce_suspend(rdev);
8508 	cik_fini_pg(rdev);
8509 	cik_fini_cg(rdev);
8510 	cik_irq_suspend(rdev);
8511 	radeon_wb_disable(rdev);
8512 	cik_pcie_gart_disable(rdev);
8513 	return 0;
8514 }
8515 
8516 /* Plan is to move initialization in that function and use
8517  * helper function so that radeon_device_init pretty much
8518  * do nothing more than calling asic specific function. This
8519  * should also allow to remove a bunch of callback function
8520  * like vram_info.
8521  */
8522 /**
8523  * cik_init - asic specific driver and hw init
8524  *
8525  * @rdev: radeon_device pointer
8526  *
8527  * Setup asic specific driver variables and program the hw
8528  * to a functional state (CIK).
8529  * Called at driver startup.
8530  * Returns 0 for success, errors for failure.
8531  */
8532 int cik_init(struct radeon_device *rdev)
8533 {
8534 	struct radeon_ring *ring;
8535 	int r;
8536 
8537 	/* Read BIOS */
8538 	if (!radeon_get_bios(rdev)) {
8539 		if (ASIC_IS_AVIVO(rdev))
8540 			return -EINVAL;
8541 	}
8542 	/* Must be an ATOMBIOS */
8543 	if (!rdev->is_atom_bios) {
8544 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8545 		return -EINVAL;
8546 	}
8547 	r = radeon_atombios_init(rdev);
8548 	if (r)
8549 		return r;
8550 
8551 	/* Post card if necessary */
8552 	if (!radeon_card_posted(rdev)) {
8553 		if (!rdev->bios) {
8554 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8555 			return -EINVAL;
8556 		}
8557 		DRM_INFO("GPU not posted. posting now...\n");
8558 		atom_asic_init(rdev->mode_info.atom_context);
8559 	}
8560 	/* init golden registers */
8561 	cik_init_golden_registers(rdev);
8562 	/* Initialize scratch registers */
8563 	cik_scratch_init(rdev);
8564 	/* Initialize surface registers */
8565 	radeon_surface_init(rdev);
8566 	/* Initialize clocks */
8567 	radeon_get_clock_info(rdev->ddev);
8568 
8569 	/* Fence driver */
8570 	r = radeon_fence_driver_init(rdev);
8571 	if (r)
8572 		return r;
8573 
8574 	/* initialize memory controller */
8575 	r = cik_mc_init(rdev);
8576 	if (r)
8577 		return r;
8578 	/* Memory manager */
8579 	r = radeon_bo_init(rdev);
8580 	if (r)
8581 		return r;
8582 
8583 	if (rdev->flags & RADEON_IS_IGP) {
8584 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8585 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8586 			r = cik_init_microcode(rdev);
8587 			if (r) {
8588 				DRM_ERROR("Failed to load firmware!\n");
8589 				return r;
8590 			}
8591 		}
8592 	} else {
8593 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8594 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8595 		    !rdev->mc_fw) {
8596 			r = cik_init_microcode(rdev);
8597 			if (r) {
8598 				DRM_ERROR("Failed to load firmware!\n");
8599 				return r;
8600 			}
8601 		}
8602 	}
8603 
8604 	/* Initialize power management */
8605 	radeon_pm_init(rdev);
8606 
8607 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8608 	ring->ring_obj = NULL;
8609 	r600_ring_init(rdev, ring, 1024 * 1024);
8610 
8611 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8612 	ring->ring_obj = NULL;
8613 	r600_ring_init(rdev, ring, 1024 * 1024);
8614 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8615 	if (r)
8616 		return r;
8617 
8618 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8619 	ring->ring_obj = NULL;
8620 	r600_ring_init(rdev, ring, 1024 * 1024);
8621 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8622 	if (r)
8623 		return r;
8624 
8625 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8626 	ring->ring_obj = NULL;
8627 	r600_ring_init(rdev, ring, 256 * 1024);
8628 
8629 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8630 	ring->ring_obj = NULL;
8631 	r600_ring_init(rdev, ring, 256 * 1024);
8632 
8633 	cik_uvd_init(rdev);
8634 	cik_vce_init(rdev);
8635 
8636 	rdev->ih.ring_obj = NULL;
8637 	r600_ih_ring_init(rdev, 64 * 1024);
8638 
8639 	r = r600_pcie_gart_init(rdev);
8640 	if (r)
8641 		return r;
8642 
8643 	rdev->accel_working = true;
8644 	r = cik_startup(rdev);
8645 	if (r) {
8646 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8647 		cik_cp_fini(rdev);
8648 		cik_sdma_fini(rdev);
8649 		cik_irq_fini(rdev);
8650 		sumo_rlc_fini(rdev);
8651 		cik_mec_fini(rdev);
8652 		radeon_wb_fini(rdev);
8653 		radeon_ib_pool_fini(rdev);
8654 		radeon_vm_manager_fini(rdev);
8655 		radeon_irq_kms_fini(rdev);
8656 		cik_pcie_gart_fini(rdev);
8657 		rdev->accel_working = false;
8658 	}
8659 
8660 	/* Don't start up if the MC ucode is missing.
8661 	 * The default clocks and voltages before the MC ucode
8662 	 * is loaded are not suffient for advanced operations.
8663 	 */
8664 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8665 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8666 		return -EINVAL;
8667 	}
8668 
8669 	return 0;
8670 }
8671 
8672 /**
8673  * cik_fini - asic specific driver and hw fini
8674  *
8675  * @rdev: radeon_device pointer
8676  *
8677  * Tear down the asic specific driver variables and program the hw
8678  * to an idle state (CIK).
8679  * Called at driver unload.
8680  */
8681 void cik_fini(struct radeon_device *rdev)
8682 {
8683 	radeon_pm_fini(rdev);
8684 	cik_cp_fini(rdev);
8685 	cik_sdma_fini(rdev);
8686 	cik_fini_pg(rdev);
8687 	cik_fini_cg(rdev);
8688 	cik_irq_fini(rdev);
8689 	sumo_rlc_fini(rdev);
8690 	cik_mec_fini(rdev);
8691 	radeon_wb_fini(rdev);
8692 	radeon_vm_manager_fini(rdev);
8693 	radeon_ib_pool_fini(rdev);
8694 	radeon_irq_kms_fini(rdev);
8695 	uvd_v1_0_fini(rdev);
8696 	radeon_uvd_fini(rdev);
8697 	radeon_vce_fini(rdev);
8698 	cik_pcie_gart_fini(rdev);
8699 	r600_vram_scratch_fini(rdev);
8700 	radeon_gem_fini(rdev);
8701 	radeon_fence_driver_fini(rdev);
8702 	radeon_bo_fini(rdev);
8703 	radeon_atombios_fini(rdev);
8704 	cik_fini_microcode(rdev);
8705 	kfree(rdev->bios);
8706 	rdev->bios = NULL;
8707 }
8708 
8709 void dce8_program_fmt(struct drm_encoder *encoder)
8710 {
8711 	struct drm_device *dev = encoder->dev;
8712 	struct radeon_device *rdev = dev->dev_private;
8713 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8714 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8715 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8716 	int bpc = 0;
8717 	u32 tmp = 0;
8718 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8719 
8720 	if (connector) {
8721 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8722 		bpc = radeon_get_monitor_bpc(connector);
8723 		dither = radeon_connector->dither;
8724 	}
8725 
8726 	/* LVDS/eDP FMT is set up by atom */
8727 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8728 		return;
8729 
8730 	/* not needed for analog */
8731 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8732 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8733 		return;
8734 
8735 	if (bpc == 0)
8736 		return;
8737 
8738 	switch (bpc) {
8739 	case 6:
8740 		if (dither == RADEON_FMT_DITHER_ENABLE)
8741 			/* XXX sort out optimal dither settings */
8742 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8743 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8744 		else
8745 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8746 		break;
8747 	case 8:
8748 		if (dither == RADEON_FMT_DITHER_ENABLE)
8749 			/* XXX sort out optimal dither settings */
8750 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8751 				FMT_RGB_RANDOM_ENABLE |
8752 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8753 		else
8754 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8755 		break;
8756 	case 10:
8757 		if (dither == RADEON_FMT_DITHER_ENABLE)
8758 			/* XXX sort out optimal dither settings */
8759 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8760 				FMT_RGB_RANDOM_ENABLE |
8761 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8762 		else
8763 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8764 		break;
8765 	default:
8766 		/* not needed */
8767 		break;
8768 	}
8769 
8770 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8771 }
8772 
8773 /* display watermark setup */
8774 /**
8775  * dce8_line_buffer_adjust - Set up the line buffer
8776  *
8777  * @rdev: radeon_device pointer
8778  * @radeon_crtc: the selected display controller
8779  * @mode: the current display mode on the selected display
8780  * controller
8781  *
8782  * Setup up the line buffer allocation for
8783  * the selected display controller (CIK).
8784  * Returns the line buffer size in pixels.
8785  */
8786 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8787 				   struct radeon_crtc *radeon_crtc,
8788 				   struct drm_display_mode *mode)
8789 {
8790 	u32 tmp, buffer_alloc, i;
8791 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8792 	/*
8793 	 * Line Buffer Setup
8794 	 * There are 6 line buffers, one for each display controllers.
8795 	 * There are 3 partitions per LB. Select the number of partitions
8796 	 * to enable based on the display width.  For display widths larger
8797 	 * than 4096, you need use to use 2 display controllers and combine
8798 	 * them using the stereo blender.
8799 	 */
8800 	if (radeon_crtc->base.enabled && mode) {
8801 		if (mode->crtc_hdisplay < 1920) {
8802 			tmp = 1;
8803 			buffer_alloc = 2;
8804 		} else if (mode->crtc_hdisplay < 2560) {
8805 			tmp = 2;
8806 			buffer_alloc = 2;
8807 		} else if (mode->crtc_hdisplay < 4096) {
8808 			tmp = 0;
8809 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8810 		} else {
8811 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8812 			tmp = 0;
8813 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8814 		}
8815 	} else {
8816 		tmp = 1;
8817 		buffer_alloc = 0;
8818 	}
8819 
8820 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8821 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8822 
8823 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8824 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8825 	for (i = 0; i < rdev->usec_timeout; i++) {
8826 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8827 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8828 			break;
8829 		udelay(1);
8830 	}
8831 
8832 	if (radeon_crtc->base.enabled && mode) {
8833 		switch (tmp) {
8834 		case 0:
8835 		default:
8836 			return 4096 * 2;
8837 		case 1:
8838 			return 1920 * 2;
8839 		case 2:
8840 			return 2560 * 2;
8841 		}
8842 	}
8843 
8844 	/* controller not enabled, so no lb used */
8845 	return 0;
8846 }
8847 
8848 /**
8849  * cik_get_number_of_dram_channels - get the number of dram channels
8850  *
8851  * @rdev: radeon_device pointer
8852  *
8853  * Look up the number of video ram channels (CIK).
8854  * Used for display watermark bandwidth calculations
8855  * Returns the number of dram channels
8856  */
8857 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8858 {
8859 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8860 
8861 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8862 	case 0:
8863 	default:
8864 		return 1;
8865 	case 1:
8866 		return 2;
8867 	case 2:
8868 		return 4;
8869 	case 3:
8870 		return 8;
8871 	case 4:
8872 		return 3;
8873 	case 5:
8874 		return 6;
8875 	case 6:
8876 		return 10;
8877 	case 7:
8878 		return 12;
8879 	case 8:
8880 		return 16;
8881 	}
8882 }
8883 
8884 struct dce8_wm_params {
8885 	u32 dram_channels; /* number of dram channels */
8886 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8887 	u32 sclk;          /* engine clock in kHz */
8888 	u32 disp_clk;      /* display clock in kHz */
8889 	u32 src_width;     /* viewport width */
8890 	u32 active_time;   /* active display time in ns */
8891 	u32 blank_time;    /* blank time in ns */
8892 	bool interlaced;    /* mode is interlaced */
8893 	fixed20_12 vsc;    /* vertical scale ratio */
8894 	u32 num_heads;     /* number of active crtcs */
8895 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8896 	u32 lb_size;       /* line buffer allocated to pipe */
8897 	u32 vtaps;         /* vertical scaler taps */
8898 };
8899 
8900 /**
8901  * dce8_dram_bandwidth - get the dram bandwidth
8902  *
8903  * @wm: watermark calculation data
8904  *
8905  * Calculate the raw dram bandwidth (CIK).
8906  * Used for display watermark bandwidth calculations
8907  * Returns the dram bandwidth in MBytes/s
8908  */
8909 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8910 {
8911 	/* Calculate raw DRAM Bandwidth */
8912 	fixed20_12 dram_efficiency; /* 0.7 */
8913 	fixed20_12 yclk, dram_channels, bandwidth;
8914 	fixed20_12 a;
8915 
8916 	a.full = dfixed_const(1000);
8917 	yclk.full = dfixed_const(wm->yclk);
8918 	yclk.full = dfixed_div(yclk, a);
8919 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8920 	a.full = dfixed_const(10);
8921 	dram_efficiency.full = dfixed_const(7);
8922 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8923 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8924 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8925 
8926 	return dfixed_trunc(bandwidth);
8927 }
8928 
8929 /**
8930  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8931  *
8932  * @wm: watermark calculation data
8933  *
8934  * Calculate the dram bandwidth used for display (CIK).
8935  * Used for display watermark bandwidth calculations
8936  * Returns the dram bandwidth for display in MBytes/s
8937  */
8938 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8939 {
8940 	/* Calculate DRAM Bandwidth and the part allocated to display. */
8941 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8942 	fixed20_12 yclk, dram_channels, bandwidth;
8943 	fixed20_12 a;
8944 
8945 	a.full = dfixed_const(1000);
8946 	yclk.full = dfixed_const(wm->yclk);
8947 	yclk.full = dfixed_div(yclk, a);
8948 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8949 	a.full = dfixed_const(10);
8950 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8951 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8952 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8953 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8954 
8955 	return dfixed_trunc(bandwidth);
8956 }
8957 
8958 /**
8959  * dce8_data_return_bandwidth - get the data return bandwidth
8960  *
8961  * @wm: watermark calculation data
8962  *
8963  * Calculate the data return bandwidth used for display (CIK).
8964  * Used for display watermark bandwidth calculations
8965  * Returns the data return bandwidth in MBytes/s
8966  */
8967 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8968 {
8969 	/* Calculate the display Data return Bandwidth */
8970 	fixed20_12 return_efficiency; /* 0.8 */
8971 	fixed20_12 sclk, bandwidth;
8972 	fixed20_12 a;
8973 
8974 	a.full = dfixed_const(1000);
8975 	sclk.full = dfixed_const(wm->sclk);
8976 	sclk.full = dfixed_div(sclk, a);
8977 	a.full = dfixed_const(10);
8978 	return_efficiency.full = dfixed_const(8);
8979 	return_efficiency.full = dfixed_div(return_efficiency, a);
8980 	a.full = dfixed_const(32);
8981 	bandwidth.full = dfixed_mul(a, sclk);
8982 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8983 
8984 	return dfixed_trunc(bandwidth);
8985 }
8986 
8987 /**
8988  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8989  *
8990  * @wm: watermark calculation data
8991  *
8992  * Calculate the dmif bandwidth used for display (CIK).
8993  * Used for display watermark bandwidth calculations
8994  * Returns the dmif bandwidth in MBytes/s
8995  */
8996 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8997 {
8998 	/* Calculate the DMIF Request Bandwidth */
8999 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9000 	fixed20_12 disp_clk, bandwidth;
9001 	fixed20_12 a, b;
9002 
9003 	a.full = dfixed_const(1000);
9004 	disp_clk.full = dfixed_const(wm->disp_clk);
9005 	disp_clk.full = dfixed_div(disp_clk, a);
9006 	a.full = dfixed_const(32);
9007 	b.full = dfixed_mul(a, disp_clk);
9008 
9009 	a.full = dfixed_const(10);
9010 	disp_clk_request_efficiency.full = dfixed_const(8);
9011 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9012 
9013 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9014 
9015 	return dfixed_trunc(bandwidth);
9016 }
9017 
9018 /**
9019  * dce8_available_bandwidth - get the min available bandwidth
9020  *
9021  * @wm: watermark calculation data
9022  *
9023  * Calculate the min available bandwidth used for display (CIK).
9024  * Used for display watermark bandwidth calculations
9025  * Returns the min available bandwidth in MBytes/s
9026  */
9027 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9028 {
9029 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9030 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9031 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9032 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9033 
9034 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9035 }
9036 
9037 /**
9038  * dce8_average_bandwidth - get the average available bandwidth
9039  *
9040  * @wm: watermark calculation data
9041  *
9042  * Calculate the average available bandwidth used for display (CIK).
9043  * Used for display watermark bandwidth calculations
9044  * Returns the average available bandwidth in MBytes/s
9045  */
9046 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9047 {
9048 	/* Calculate the display mode Average Bandwidth
9049 	 * DisplayMode should contain the source and destination dimensions,
9050 	 * timing, etc.
9051 	 */
9052 	fixed20_12 bpp;
9053 	fixed20_12 line_time;
9054 	fixed20_12 src_width;
9055 	fixed20_12 bandwidth;
9056 	fixed20_12 a;
9057 
9058 	a.full = dfixed_const(1000);
9059 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9060 	line_time.full = dfixed_div(line_time, a);
9061 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9062 	src_width.full = dfixed_const(wm->src_width);
9063 	bandwidth.full = dfixed_mul(src_width, bpp);
9064 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9065 	bandwidth.full = dfixed_div(bandwidth, line_time);
9066 
9067 	return dfixed_trunc(bandwidth);
9068 }
9069 
9070 /**
9071  * dce8_latency_watermark - get the latency watermark
9072  *
9073  * @wm: watermark calculation data
9074  *
9075  * Calculate the latency watermark (CIK).
9076  * Used for display watermark bandwidth calculations
9077  * Returns the latency watermark in ns
9078  */
9079 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9080 {
9081 	/* First calculate the latency in ns */
9082 	u32 mc_latency = 2000; /* 2000 ns. */
9083 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9084 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9085 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9086 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9087 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9088 		(wm->num_heads * cursor_line_pair_return_time);
9089 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9090 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9091 	u32 tmp, dmif_size = 12288;
9092 	fixed20_12 a, b, c;
9093 
9094 	if (wm->num_heads == 0)
9095 		return 0;
9096 
9097 	a.full = dfixed_const(2);
9098 	b.full = dfixed_const(1);
9099 	if ((wm->vsc.full > a.full) ||
9100 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9101 	    (wm->vtaps >= 5) ||
9102 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9103 		max_src_lines_per_dst_line = 4;
9104 	else
9105 		max_src_lines_per_dst_line = 2;
9106 
9107 	a.full = dfixed_const(available_bandwidth);
9108 	b.full = dfixed_const(wm->num_heads);
9109 	a.full = dfixed_div(a, b);
9110 	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9111 	tmp = min(dfixed_trunc(a), tmp);
9112 
9113 	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9114 
9115 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9116 	b.full = dfixed_const(1000);
9117 	c.full = dfixed_const(lb_fill_bw);
9118 	b.full = dfixed_div(c, b);
9119 	a.full = dfixed_div(a, b);
9120 	line_fill_time = dfixed_trunc(a);
9121 
9122 	if (line_fill_time < wm->active_time)
9123 		return latency;
9124 	else
9125 		return latency + (line_fill_time - wm->active_time);
9126 
9127 }
9128 
9129 /**
9130  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9131  * average and available dram bandwidth
9132  *
9133  * @wm: watermark calculation data
9134  *
9135  * Check if the display average bandwidth fits in the display
9136  * dram bandwidth (CIK).
9137  * Used for display watermark bandwidth calculations
9138  * Returns true if the display fits, false if not.
9139  */
9140 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9141 {
9142 	if (dce8_average_bandwidth(wm) <=
9143 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9144 		return true;
9145 	else
9146 		return false;
9147 }
9148 
9149 /**
9150  * dce8_average_bandwidth_vs_available_bandwidth - check
9151  * average and available bandwidth
9152  *
9153  * @wm: watermark calculation data
9154  *
9155  * Check if the display average bandwidth fits in the display
9156  * available bandwidth (CIK).
9157  * Used for display watermark bandwidth calculations
9158  * Returns true if the display fits, false if not.
9159  */
9160 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9161 {
9162 	if (dce8_average_bandwidth(wm) <=
9163 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9164 		return true;
9165 	else
9166 		return false;
9167 }
9168 
9169 /**
9170  * dce8_check_latency_hiding - check latency hiding
9171  *
9172  * @wm: watermark calculation data
9173  *
9174  * Check latency hiding (CIK).
9175  * Used for display watermark bandwidth calculations
9176  * Returns true if the display fits, false if not.
9177  */
9178 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9179 {
9180 	u32 lb_partitions = wm->lb_size / wm->src_width;
9181 	u32 line_time = wm->active_time + wm->blank_time;
9182 	u32 latency_tolerant_lines;
9183 	u32 latency_hiding;
9184 	fixed20_12 a;
9185 
9186 	a.full = dfixed_const(1);
9187 	if (wm->vsc.full > a.full)
9188 		latency_tolerant_lines = 1;
9189 	else {
9190 		if (lb_partitions <= (wm->vtaps + 1))
9191 			latency_tolerant_lines = 1;
9192 		else
9193 			latency_tolerant_lines = 2;
9194 	}
9195 
9196 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9197 
9198 	if (dce8_latency_watermark(wm) <= latency_hiding)
9199 		return true;
9200 	else
9201 		return false;
9202 }
9203 
9204 /**
9205  * dce8_program_watermarks - program display watermarks
9206  *
9207  * @rdev: radeon_device pointer
9208  * @radeon_crtc: the selected display controller
9209  * @lb_size: line buffer size
9210  * @num_heads: number of display controllers in use
9211  *
9212  * Calculate and program the display watermarks for the
9213  * selected display controller (CIK).
9214  */
9215 static void dce8_program_watermarks(struct radeon_device *rdev,
9216 				    struct radeon_crtc *radeon_crtc,
9217 				    u32 lb_size, u32 num_heads)
9218 {
9219 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9220 	struct dce8_wm_params wm_low, wm_high;
9221 	u32 active_time;
9222 	u32 line_time = 0;
9223 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9224 	u32 tmp, wm_mask;
9225 
9226 	if (radeon_crtc->base.enabled && num_heads && mode) {
9227 		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9228 					    (u32)mode->clock);
9229 		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9230 					  (u32)mode->clock);
9231 		line_time = min(line_time, (u32)65535);
9232 
9233 		/* watermark for high clocks */
9234 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9235 		    rdev->pm.dpm_enabled) {
9236 			wm_high.yclk =
9237 				radeon_dpm_get_mclk(rdev, false) * 10;
9238 			wm_high.sclk =
9239 				radeon_dpm_get_sclk(rdev, false) * 10;
9240 		} else {
9241 			wm_high.yclk = rdev->pm.current_mclk * 10;
9242 			wm_high.sclk = rdev->pm.current_sclk * 10;
9243 		}
9244 
9245 		wm_high.disp_clk = mode->clock;
9246 		wm_high.src_width = mode->crtc_hdisplay;
9247 		wm_high.active_time = active_time;
9248 		wm_high.blank_time = line_time - wm_high.active_time;
9249 		wm_high.interlaced = false;
9250 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9251 			wm_high.interlaced = true;
9252 		wm_high.vsc = radeon_crtc->vsc;
9253 		wm_high.vtaps = 1;
9254 		if (radeon_crtc->rmx_type != RMX_OFF)
9255 			wm_high.vtaps = 2;
9256 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9257 		wm_high.lb_size = lb_size;
9258 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9259 		wm_high.num_heads = num_heads;
9260 
9261 		/* set for high clocks */
9262 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9263 
9264 		/* possibly force display priority to high */
9265 		/* should really do this at mode validation time... */
9266 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9267 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9268 		    !dce8_check_latency_hiding(&wm_high) ||
9269 		    (rdev->disp_priority == 2)) {
9270 			DRM_DEBUG_KMS("force priority to high\n");
9271 		}
9272 
9273 		/* watermark for low clocks */
9274 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9275 		    rdev->pm.dpm_enabled) {
9276 			wm_low.yclk =
9277 				radeon_dpm_get_mclk(rdev, true) * 10;
9278 			wm_low.sclk =
9279 				radeon_dpm_get_sclk(rdev, true) * 10;
9280 		} else {
9281 			wm_low.yclk = rdev->pm.current_mclk * 10;
9282 			wm_low.sclk = rdev->pm.current_sclk * 10;
9283 		}
9284 
9285 		wm_low.disp_clk = mode->clock;
9286 		wm_low.src_width = mode->crtc_hdisplay;
9287 		wm_low.active_time = active_time;
9288 		wm_low.blank_time = line_time - wm_low.active_time;
9289 		wm_low.interlaced = false;
9290 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9291 			wm_low.interlaced = true;
9292 		wm_low.vsc = radeon_crtc->vsc;
9293 		wm_low.vtaps = 1;
9294 		if (radeon_crtc->rmx_type != RMX_OFF)
9295 			wm_low.vtaps = 2;
9296 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9297 		wm_low.lb_size = lb_size;
9298 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9299 		wm_low.num_heads = num_heads;
9300 
9301 		/* set for low clocks */
9302 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9303 
9304 		/* possibly force display priority to high */
9305 		/* should really do this at mode validation time... */
9306 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9307 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9308 		    !dce8_check_latency_hiding(&wm_low) ||
9309 		    (rdev->disp_priority == 2)) {
9310 			DRM_DEBUG_KMS("force priority to high\n");
9311 		}
9312 
9313 		/* Save number of lines the linebuffer leads before the scanout */
9314 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9315 	}
9316 
9317 	/* select wm A */
9318 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9319 	tmp = wm_mask;
9320 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9321 	tmp |= LATENCY_WATERMARK_MASK(1);
9322 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9323 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9324 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9325 		LATENCY_HIGH_WATERMARK(line_time)));
9326 	/* select wm B */
9327 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9328 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9329 	tmp |= LATENCY_WATERMARK_MASK(2);
9330 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9331 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9332 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9333 		LATENCY_HIGH_WATERMARK(line_time)));
9334 	/* restore original selection */
9335 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9336 
9337 	/* save values for DPM */
9338 	radeon_crtc->line_time = line_time;
9339 	radeon_crtc->wm_high = latency_watermark_a;
9340 	radeon_crtc->wm_low = latency_watermark_b;
9341 }
9342 
9343 /**
9344  * dce8_bandwidth_update - program display watermarks
9345  *
9346  * @rdev: radeon_device pointer
9347  *
9348  * Calculate and program the display watermarks and line
9349  * buffer allocation (CIK).
9350  */
9351 void dce8_bandwidth_update(struct radeon_device *rdev)
9352 {
9353 	struct drm_display_mode *mode = NULL;
9354 	u32 num_heads = 0, lb_size;
9355 	int i;
9356 
9357 	if (!rdev->mode_info.mode_config_initialized)
9358 		return;
9359 
9360 	radeon_update_display_priority(rdev);
9361 
9362 	for (i = 0; i < rdev->num_crtc; i++) {
9363 		if (rdev->mode_info.crtcs[i]->base.enabled)
9364 			num_heads++;
9365 	}
9366 	for (i = 0; i < rdev->num_crtc; i++) {
9367 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9368 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9369 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9370 	}
9371 }
9372 
9373 /**
9374  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9375  *
9376  * @rdev: radeon_device pointer
9377  *
9378  * Fetches a GPU clock counter snapshot (SI).
9379  * Returns the 64 bit clock counter snapshot.
9380  */
9381 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9382 {
9383 	uint64_t clock;
9384 
9385 	mutex_lock(&rdev->gpu_clock_mutex);
9386 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9387 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9388 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9389 	mutex_unlock(&rdev->gpu_clock_mutex);
9390 	return clock;
9391 }
9392 
9393 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9394 			     u32 cntl_reg, u32 status_reg)
9395 {
9396 	int r, i;
9397 	struct atom_clock_dividers dividers;
9398 	uint32_t tmp;
9399 
9400 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9401 					   clock, false, &dividers);
9402 	if (r)
9403 		return r;
9404 
9405 	tmp = RREG32_SMC(cntl_reg);
9406 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9407 	tmp |= dividers.post_divider;
9408 	WREG32_SMC(cntl_reg, tmp);
9409 
9410 	for (i = 0; i < 100; i++) {
9411 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9412 			break;
9413 		mdelay(10);
9414 	}
9415 	if (i == 100)
9416 		return -ETIMEDOUT;
9417 
9418 	return 0;
9419 }
9420 
9421 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9422 {
9423 	int r = 0;
9424 
9425 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9426 	if (r)
9427 		return r;
9428 
9429 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9430 	return r;
9431 }
9432 
9433 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9434 {
9435 	int r, i;
9436 	struct atom_clock_dividers dividers;
9437 	u32 tmp;
9438 
9439 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9440 					   ecclk, false, &dividers);
9441 	if (r)
9442 		return r;
9443 
9444 	for (i = 0; i < 100; i++) {
9445 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9446 			break;
9447 		mdelay(10);
9448 	}
9449 	if (i == 100)
9450 		return -ETIMEDOUT;
9451 
9452 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9453 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9454 	tmp |= dividers.post_divider;
9455 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9456 
9457 	for (i = 0; i < 100; i++) {
9458 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9459 			break;
9460 		mdelay(10);
9461 	}
9462 	if (i == 100)
9463 		return -ETIMEDOUT;
9464 
9465 	return 0;
9466 }
9467 
9468 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9469 {
9470 	struct pci_dev *root = rdev->pdev->bus->self;
9471 	int bridge_pos, gpu_pos;
9472 	u32 speed_cntl, mask, current_data_rate;
9473 	int ret, i;
9474 	u16 tmp16;
9475 
9476 #if 0
9477 	if (pci_is_root_bus(rdev->pdev->bus))
9478 		return;
9479 #endif
9480 
9481 	if (radeon_pcie_gen2 == 0)
9482 		return;
9483 
9484 	if (rdev->flags & RADEON_IS_IGP)
9485 		return;
9486 
9487 	if (!(rdev->flags & RADEON_IS_PCIE))
9488 		return;
9489 
9490 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9491 	if (ret != 0)
9492 		return;
9493 
9494 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9495 		return;
9496 
9497 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9498 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9499 		LC_CURRENT_DATA_RATE_SHIFT;
9500 	if (mask & DRM_PCIE_SPEED_80) {
9501 		if (current_data_rate == 2) {
9502 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9503 			return;
9504 		}
9505 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9506 	} else if (mask & DRM_PCIE_SPEED_50) {
9507 		if (current_data_rate == 1) {
9508 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9509 			return;
9510 		}
9511 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9512 	}
9513 
9514 	bridge_pos = pci_pcie_cap(root);
9515 	if (!bridge_pos)
9516 		return;
9517 
9518 	gpu_pos = pci_pcie_cap(rdev->pdev);
9519 	if (!gpu_pos)
9520 		return;
9521 
9522 	if (mask & DRM_PCIE_SPEED_80) {
9523 		/* re-try equalization if gen3 is not already enabled */
9524 		if (current_data_rate != 2) {
9525 			u16 bridge_cfg, gpu_cfg;
9526 			u16 bridge_cfg2, gpu_cfg2;
9527 			u32 max_lw, current_lw, tmp;
9528 
9529 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9530 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9531 
9532 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9533 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9534 
9535 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9536 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9537 
9538 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9539 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9540 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9541 
9542 			if (current_lw < max_lw) {
9543 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9544 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9545 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9546 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9547 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9548 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9549 				}
9550 			}
9551 
9552 			for (i = 0; i < 10; i++) {
9553 				/* check status */
9554 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9555 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9556 					break;
9557 
9558 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9559 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9560 
9561 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9562 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9563 
9564 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9565 				tmp |= LC_SET_QUIESCE;
9566 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9567 
9568 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9569 				tmp |= LC_REDO_EQ;
9570 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9571 
9572 				mdelay(100);
9573 
9574 				/* linkctl */
9575 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9576 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9577 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9578 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9579 
9580 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9581 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9582 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9583 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9584 
9585 				/* linkctl2 */
9586 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9587 				tmp16 &= ~((1 << 4) | (7 << 9));
9588 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9589 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9590 
9591 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9592 				tmp16 &= ~((1 << 4) | (7 << 9));
9593 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9594 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9595 
9596 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9597 				tmp &= ~LC_SET_QUIESCE;
9598 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9599 			}
9600 		}
9601 	}
9602 
9603 	/* set the link speed */
9604 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9605 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9606 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9607 
9608 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9609 	tmp16 &= ~0xf;
9610 	if (mask & DRM_PCIE_SPEED_80)
9611 		tmp16 |= 3; /* gen3 */
9612 	else if (mask & DRM_PCIE_SPEED_50)
9613 		tmp16 |= 2; /* gen2 */
9614 	else
9615 		tmp16 |= 1; /* gen1 */
9616 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9617 
9618 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9619 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9620 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9621 
9622 	for (i = 0; i < rdev->usec_timeout; i++) {
9623 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9624 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9625 			break;
9626 		udelay(1);
9627 	}
9628 }
9629 
9630 static void cik_program_aspm(struct radeon_device *rdev)
9631 {
9632 	u32 data, orig;
9633 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9634 #if 0
9635 	bool disable_clkreq = false;
9636 #endif
9637 
9638 	if (radeon_aspm == 0)
9639 		return;
9640 
9641 	/* XXX double check IGPs */
9642 	if (rdev->flags & RADEON_IS_IGP)
9643 		return;
9644 
9645 	if (!(rdev->flags & RADEON_IS_PCIE))
9646 		return;
9647 
9648 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9649 	data &= ~LC_XMIT_N_FTS_MASK;
9650 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9651 	if (orig != data)
9652 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9653 
9654 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9655 	data |= LC_GO_TO_RECOVERY;
9656 	if (orig != data)
9657 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9658 
9659 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9660 	data |= P_IGNORE_EDB_ERR;
9661 	if (orig != data)
9662 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9663 
9664 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9665 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9666 	data |= LC_PMI_TO_L1_DIS;
9667 	if (!disable_l0s)
9668 		data |= LC_L0S_INACTIVITY(7);
9669 
9670 	if (!disable_l1) {
9671 		data |= LC_L1_INACTIVITY(7);
9672 		data &= ~LC_PMI_TO_L1_DIS;
9673 		if (orig != data)
9674 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9675 
9676 		if (!disable_plloff_in_l1) {
9677 			bool clk_req_support;
9678 
9679 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9680 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9681 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9682 			if (orig != data)
9683 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9684 
9685 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9686 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9687 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9688 			if (orig != data)
9689 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9690 
9691 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9692 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9693 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9694 			if (orig != data)
9695 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9696 
9697 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9698 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9699 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9700 			if (orig != data)
9701 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9702 
9703 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9704 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9705 			data |= LC_DYN_LANES_PWR_STATE(3);
9706 			if (orig != data)
9707 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9708 
9709 #ifdef zMN_TODO
9710 			if (!disable_clkreq &&
9711 			    !pci_is_root_bus(rdev->pdev->bus)) {
9712 				struct pci_dev *root = rdev->pdev->bus->self;
9713 				u32 lnkcap;
9714 
9715 				clk_req_support = false;
9716 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9717 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9718 					clk_req_support = true;
9719 			} else {
9720 				clk_req_support = false;
9721 			}
9722 #else
9723 			clk_req_support = false;
9724 #endif
9725 
9726 			if (clk_req_support) {
9727 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9728 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9729 				if (orig != data)
9730 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9731 
9732 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9733 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9734 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9735 				if (orig != data)
9736 					WREG32_SMC(THM_CLK_CNTL, data);
9737 
9738 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9739 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9740 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9741 				if (orig != data)
9742 					WREG32_SMC(MISC_CLK_CTRL, data);
9743 
9744 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9745 				data &= ~BCLK_AS_XCLK;
9746 				if (orig != data)
9747 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9748 
9749 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9750 				data &= ~FORCE_BIF_REFCLK_EN;
9751 				if (orig != data)
9752 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9753 
9754 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9755 				data &= ~MPLL_CLKOUT_SEL_MASK;
9756 				data |= MPLL_CLKOUT_SEL(4);
9757 				if (orig != data)
9758 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9759 			}
9760 		}
9761 	} else {
9762 		if (orig != data)
9763 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9764 	}
9765 
9766 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9767 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9768 	if (orig != data)
9769 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9770 
9771 	if (!disable_l0s) {
9772 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9773 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9774 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9775 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9776 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9777 				data &= ~LC_L0S_INACTIVITY_MASK;
9778 				if (orig != data)
9779 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9780 			}
9781 		}
9782 	}
9783 }
9784