xref: /dragonfly/sys/dev/drm/radeon/cik.c (revision 2b57e6df)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include "drmP.h"
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include "radeon_audio.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 #include "radeon_kfd.h"
36 
37 #define SH_MEM_CONFIG_GFX_DEFAULT \
38 	ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
39 
40 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
47 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
48 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
49 
50 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
51 MODULE_FIRMWARE("radeon/bonaire_me.bin");
52 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
53 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
54 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
55 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
56 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
57 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
58 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
59 
60 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
66 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
67 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
68 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
69 
70 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
71 MODULE_FIRMWARE("radeon/hawaii_me.bin");
72 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
73 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
74 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
75 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
76 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
77 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
78 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
79 
80 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
82 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
83 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
84 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
85 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
86 
87 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
88 MODULE_FIRMWARE("radeon/kaveri_me.bin");
89 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
90 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
91 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
92 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
93 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
94 
95 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
96 MODULE_FIRMWARE("radeon/KABINI_me.bin");
97 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
98 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
99 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
100 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
101 
102 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
103 MODULE_FIRMWARE("radeon/kabini_me.bin");
104 MODULE_FIRMWARE("radeon/kabini_ce.bin");
105 MODULE_FIRMWARE("radeon/kabini_mec.bin");
106 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
107 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
108 
109 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
111 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
112 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
113 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
114 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
115 
116 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
117 MODULE_FIRMWARE("radeon/mullins_me.bin");
118 MODULE_FIRMWARE("radeon/mullins_ce.bin");
119 MODULE_FIRMWARE("radeon/mullins_mec.bin");
120 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
121 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
122 
123 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
124 static void cik_rlc_stop(struct radeon_device *rdev);
125 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
126 static void cik_program_aspm(struct radeon_device *rdev);
127 static void cik_init_pg(struct radeon_device *rdev);
128 static void cik_init_cg(struct radeon_device *rdev);
129 static void cik_fini_pg(struct radeon_device *rdev);
130 static void cik_fini_cg(struct radeon_device *rdev);
131 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
132 					  bool enable);
133 
134 /**
135  * cik_get_allowed_info_register - fetch the register for the info ioctl
136  *
137  * @rdev: radeon_device pointer
138  * @reg: register offset in bytes
139  * @val: register value
140  *
141  * Returns 0 for success or -EINVAL for an invalid register
142  *
143  */
144 int cik_get_allowed_info_register(struct radeon_device *rdev,
145 				  u32 reg, u32 *val)
146 {
147 	switch (reg) {
148 	case GRBM_STATUS:
149 	case GRBM_STATUS2:
150 	case GRBM_STATUS_SE0:
151 	case GRBM_STATUS_SE1:
152 	case GRBM_STATUS_SE2:
153 	case GRBM_STATUS_SE3:
154 	case SRBM_STATUS:
155 	case SRBM_STATUS2:
156 	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
157 	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
158 	case UVD_STATUS:
159 	/* TODO VCE */
160 		*val = RREG32(reg);
161 		return 0;
162 	default:
163 		return -EINVAL;
164 	}
165 }
166 
167 /*
168  * Indirect registers accessor
169  */
170 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
171 {
172 	u32 r;
173 
174 	lockmgr(&rdev->didt_idx_lock, LK_EXCLUSIVE);
175 	WREG32(CIK_DIDT_IND_INDEX, (reg));
176 	r = RREG32(CIK_DIDT_IND_DATA);
177 	lockmgr(&rdev->didt_idx_lock, LK_RELEASE);
178 	return r;
179 }
180 
181 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
182 {
183 	lockmgr(&rdev->didt_idx_lock, LK_EXCLUSIVE);
184 	WREG32(CIK_DIDT_IND_INDEX, (reg));
185 	WREG32(CIK_DIDT_IND_DATA, (v));
186 	lockmgr(&rdev->didt_idx_lock, LK_RELEASE);
187 }
188 
189 /* get temperature in millidegrees */
190 int ci_get_temp(struct radeon_device *rdev)
191 {
192 	u32 temp;
193 	int actual_temp = 0;
194 
195 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
196 		CTF_TEMP_SHIFT;
197 
198 	if (temp & 0x200)
199 		actual_temp = 255;
200 	else
201 		actual_temp = temp & 0x1ff;
202 
203 	actual_temp = actual_temp * 1000;
204 
205 	return actual_temp;
206 }
207 
208 /* get temperature in millidegrees */
209 int kv_get_temp(struct radeon_device *rdev)
210 {
211 	u32 temp;
212 	int actual_temp = 0;
213 
214 	temp = RREG32_SMC(0xC0300E0C);
215 
216 	if (temp)
217 		actual_temp = (temp / 8) - 49;
218 	else
219 		actual_temp = 0;
220 
221 	actual_temp = actual_temp * 1000;
222 
223 	return actual_temp;
224 }
225 
226 /*
227  * Indirect registers accessor
228  */
229 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
230 {
231 	u32 r;
232 
233 	lockmgr(&rdev->pciep_idx_lock, LK_EXCLUSIVE);
234 	WREG32(PCIE_INDEX, reg);
235 	(void)RREG32(PCIE_INDEX);
236 	r = RREG32(PCIE_DATA);
237 	lockmgr(&rdev->pciep_idx_lock, LK_RELEASE);
238 	return r;
239 }
240 
241 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
242 {
243 	lockmgr(&rdev->pciep_idx_lock, LK_EXCLUSIVE);
244 	WREG32(PCIE_INDEX, reg);
245 	(void)RREG32(PCIE_INDEX);
246 	WREG32(PCIE_DATA, v);
247 	(void)RREG32(PCIE_DATA);
248 	lockmgr(&rdev->pciep_idx_lock, LK_RELEASE);
249 }
250 
251 static const u32 spectre_rlc_save_restore_register_list[] =
252 {
253 	(0x0e00 << 16) | (0xc12c >> 2),
254 	0x00000000,
255 	(0x0e00 << 16) | (0xc140 >> 2),
256 	0x00000000,
257 	(0x0e00 << 16) | (0xc150 >> 2),
258 	0x00000000,
259 	(0x0e00 << 16) | (0xc15c >> 2),
260 	0x00000000,
261 	(0x0e00 << 16) | (0xc168 >> 2),
262 	0x00000000,
263 	(0x0e00 << 16) | (0xc170 >> 2),
264 	0x00000000,
265 	(0x0e00 << 16) | (0xc178 >> 2),
266 	0x00000000,
267 	(0x0e00 << 16) | (0xc204 >> 2),
268 	0x00000000,
269 	(0x0e00 << 16) | (0xc2b4 >> 2),
270 	0x00000000,
271 	(0x0e00 << 16) | (0xc2b8 >> 2),
272 	0x00000000,
273 	(0x0e00 << 16) | (0xc2bc >> 2),
274 	0x00000000,
275 	(0x0e00 << 16) | (0xc2c0 >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0x8228 >> 2),
278 	0x00000000,
279 	(0x0e00 << 16) | (0x829c >> 2),
280 	0x00000000,
281 	(0x0e00 << 16) | (0x869c >> 2),
282 	0x00000000,
283 	(0x0600 << 16) | (0x98f4 >> 2),
284 	0x00000000,
285 	(0x0e00 << 16) | (0x98f8 >> 2),
286 	0x00000000,
287 	(0x0e00 << 16) | (0x9900 >> 2),
288 	0x00000000,
289 	(0x0e00 << 16) | (0xc260 >> 2),
290 	0x00000000,
291 	(0x0e00 << 16) | (0x90e8 >> 2),
292 	0x00000000,
293 	(0x0e00 << 16) | (0x3c000 >> 2),
294 	0x00000000,
295 	(0x0e00 << 16) | (0x3c00c >> 2),
296 	0x00000000,
297 	(0x0e00 << 16) | (0x8c1c >> 2),
298 	0x00000000,
299 	(0x0e00 << 16) | (0x9700 >> 2),
300 	0x00000000,
301 	(0x0e00 << 16) | (0xcd20 >> 2),
302 	0x00000000,
303 	(0x4e00 << 16) | (0xcd20 >> 2),
304 	0x00000000,
305 	(0x5e00 << 16) | (0xcd20 >> 2),
306 	0x00000000,
307 	(0x6e00 << 16) | (0xcd20 >> 2),
308 	0x00000000,
309 	(0x7e00 << 16) | (0xcd20 >> 2),
310 	0x00000000,
311 	(0x8e00 << 16) | (0xcd20 >> 2),
312 	0x00000000,
313 	(0x9e00 << 16) | (0xcd20 >> 2),
314 	0x00000000,
315 	(0xae00 << 16) | (0xcd20 >> 2),
316 	0x00000000,
317 	(0xbe00 << 16) | (0xcd20 >> 2),
318 	0x00000000,
319 	(0x0e00 << 16) | (0x89bc >> 2),
320 	0x00000000,
321 	(0x0e00 << 16) | (0x8900 >> 2),
322 	0x00000000,
323 	0x3,
324 	(0x0e00 << 16) | (0xc130 >> 2),
325 	0x00000000,
326 	(0x0e00 << 16) | (0xc134 >> 2),
327 	0x00000000,
328 	(0x0e00 << 16) | (0xc1fc >> 2),
329 	0x00000000,
330 	(0x0e00 << 16) | (0xc208 >> 2),
331 	0x00000000,
332 	(0x0e00 << 16) | (0xc264 >> 2),
333 	0x00000000,
334 	(0x0e00 << 16) | (0xc268 >> 2),
335 	0x00000000,
336 	(0x0e00 << 16) | (0xc26c >> 2),
337 	0x00000000,
338 	(0x0e00 << 16) | (0xc270 >> 2),
339 	0x00000000,
340 	(0x0e00 << 16) | (0xc274 >> 2),
341 	0x00000000,
342 	(0x0e00 << 16) | (0xc278 >> 2),
343 	0x00000000,
344 	(0x0e00 << 16) | (0xc27c >> 2),
345 	0x00000000,
346 	(0x0e00 << 16) | (0xc280 >> 2),
347 	0x00000000,
348 	(0x0e00 << 16) | (0xc284 >> 2),
349 	0x00000000,
350 	(0x0e00 << 16) | (0xc288 >> 2),
351 	0x00000000,
352 	(0x0e00 << 16) | (0xc28c >> 2),
353 	0x00000000,
354 	(0x0e00 << 16) | (0xc290 >> 2),
355 	0x00000000,
356 	(0x0e00 << 16) | (0xc294 >> 2),
357 	0x00000000,
358 	(0x0e00 << 16) | (0xc298 >> 2),
359 	0x00000000,
360 	(0x0e00 << 16) | (0xc29c >> 2),
361 	0x00000000,
362 	(0x0e00 << 16) | (0xc2a0 >> 2),
363 	0x00000000,
364 	(0x0e00 << 16) | (0xc2a4 >> 2),
365 	0x00000000,
366 	(0x0e00 << 16) | (0xc2a8 >> 2),
367 	0x00000000,
368 	(0x0e00 << 16) | (0xc2ac  >> 2),
369 	0x00000000,
370 	(0x0e00 << 16) | (0xc2b0 >> 2),
371 	0x00000000,
372 	(0x0e00 << 16) | (0x301d0 >> 2),
373 	0x00000000,
374 	(0x0e00 << 16) | (0x30238 >> 2),
375 	0x00000000,
376 	(0x0e00 << 16) | (0x30250 >> 2),
377 	0x00000000,
378 	(0x0e00 << 16) | (0x30254 >> 2),
379 	0x00000000,
380 	(0x0e00 << 16) | (0x30258 >> 2),
381 	0x00000000,
382 	(0x0e00 << 16) | (0x3025c >> 2),
383 	0x00000000,
384 	(0x4e00 << 16) | (0xc900 >> 2),
385 	0x00000000,
386 	(0x5e00 << 16) | (0xc900 >> 2),
387 	0x00000000,
388 	(0x6e00 << 16) | (0xc900 >> 2),
389 	0x00000000,
390 	(0x7e00 << 16) | (0xc900 >> 2),
391 	0x00000000,
392 	(0x8e00 << 16) | (0xc900 >> 2),
393 	0x00000000,
394 	(0x9e00 << 16) | (0xc900 >> 2),
395 	0x00000000,
396 	(0xae00 << 16) | (0xc900 >> 2),
397 	0x00000000,
398 	(0xbe00 << 16) | (0xc900 >> 2),
399 	0x00000000,
400 	(0x4e00 << 16) | (0xc904 >> 2),
401 	0x00000000,
402 	(0x5e00 << 16) | (0xc904 >> 2),
403 	0x00000000,
404 	(0x6e00 << 16) | (0xc904 >> 2),
405 	0x00000000,
406 	(0x7e00 << 16) | (0xc904 >> 2),
407 	0x00000000,
408 	(0x8e00 << 16) | (0xc904 >> 2),
409 	0x00000000,
410 	(0x9e00 << 16) | (0xc904 >> 2),
411 	0x00000000,
412 	(0xae00 << 16) | (0xc904 >> 2),
413 	0x00000000,
414 	(0xbe00 << 16) | (0xc904 >> 2),
415 	0x00000000,
416 	(0x4e00 << 16) | (0xc908 >> 2),
417 	0x00000000,
418 	(0x5e00 << 16) | (0xc908 >> 2),
419 	0x00000000,
420 	(0x6e00 << 16) | (0xc908 >> 2),
421 	0x00000000,
422 	(0x7e00 << 16) | (0xc908 >> 2),
423 	0x00000000,
424 	(0x8e00 << 16) | (0xc908 >> 2),
425 	0x00000000,
426 	(0x9e00 << 16) | (0xc908 >> 2),
427 	0x00000000,
428 	(0xae00 << 16) | (0xc908 >> 2),
429 	0x00000000,
430 	(0xbe00 << 16) | (0xc908 >> 2),
431 	0x00000000,
432 	(0x4e00 << 16) | (0xc90c >> 2),
433 	0x00000000,
434 	(0x5e00 << 16) | (0xc90c >> 2),
435 	0x00000000,
436 	(0x6e00 << 16) | (0xc90c >> 2),
437 	0x00000000,
438 	(0x7e00 << 16) | (0xc90c >> 2),
439 	0x00000000,
440 	(0x8e00 << 16) | (0xc90c >> 2),
441 	0x00000000,
442 	(0x9e00 << 16) | (0xc90c >> 2),
443 	0x00000000,
444 	(0xae00 << 16) | (0xc90c >> 2),
445 	0x00000000,
446 	(0xbe00 << 16) | (0xc90c >> 2),
447 	0x00000000,
448 	(0x4e00 << 16) | (0xc910 >> 2),
449 	0x00000000,
450 	(0x5e00 << 16) | (0xc910 >> 2),
451 	0x00000000,
452 	(0x6e00 << 16) | (0xc910 >> 2),
453 	0x00000000,
454 	(0x7e00 << 16) | (0xc910 >> 2),
455 	0x00000000,
456 	(0x8e00 << 16) | (0xc910 >> 2),
457 	0x00000000,
458 	(0x9e00 << 16) | (0xc910 >> 2),
459 	0x00000000,
460 	(0xae00 << 16) | (0xc910 >> 2),
461 	0x00000000,
462 	(0xbe00 << 16) | (0xc910 >> 2),
463 	0x00000000,
464 	(0x0e00 << 16) | (0xc99c >> 2),
465 	0x00000000,
466 	(0x0e00 << 16) | (0x9834 >> 2),
467 	0x00000000,
468 	(0x0000 << 16) | (0x30f00 >> 2),
469 	0x00000000,
470 	(0x0001 << 16) | (0x30f00 >> 2),
471 	0x00000000,
472 	(0x0000 << 16) | (0x30f04 >> 2),
473 	0x00000000,
474 	(0x0001 << 16) | (0x30f04 >> 2),
475 	0x00000000,
476 	(0x0000 << 16) | (0x30f08 >> 2),
477 	0x00000000,
478 	(0x0001 << 16) | (0x30f08 >> 2),
479 	0x00000000,
480 	(0x0000 << 16) | (0x30f0c >> 2),
481 	0x00000000,
482 	(0x0001 << 16) | (0x30f0c >> 2),
483 	0x00000000,
484 	(0x0600 << 16) | (0x9b7c >> 2),
485 	0x00000000,
486 	(0x0e00 << 16) | (0x8a14 >> 2),
487 	0x00000000,
488 	(0x0e00 << 16) | (0x8a18 >> 2),
489 	0x00000000,
490 	(0x0600 << 16) | (0x30a00 >> 2),
491 	0x00000000,
492 	(0x0e00 << 16) | (0x8bf0 >> 2),
493 	0x00000000,
494 	(0x0e00 << 16) | (0x8bcc >> 2),
495 	0x00000000,
496 	(0x0e00 << 16) | (0x8b24 >> 2),
497 	0x00000000,
498 	(0x0e00 << 16) | (0x30a04 >> 2),
499 	0x00000000,
500 	(0x0600 << 16) | (0x30a10 >> 2),
501 	0x00000000,
502 	(0x0600 << 16) | (0x30a14 >> 2),
503 	0x00000000,
504 	(0x0600 << 16) | (0x30a18 >> 2),
505 	0x00000000,
506 	(0x0600 << 16) | (0x30a2c >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0xc700 >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0xc704 >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0xc708 >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0xc768 >> 2),
515 	0x00000000,
516 	(0x0400 << 16) | (0xc770 >> 2),
517 	0x00000000,
518 	(0x0400 << 16) | (0xc774 >> 2),
519 	0x00000000,
520 	(0x0400 << 16) | (0xc778 >> 2),
521 	0x00000000,
522 	(0x0400 << 16) | (0xc77c >> 2),
523 	0x00000000,
524 	(0x0400 << 16) | (0xc780 >> 2),
525 	0x00000000,
526 	(0x0400 << 16) | (0xc784 >> 2),
527 	0x00000000,
528 	(0x0400 << 16) | (0xc788 >> 2),
529 	0x00000000,
530 	(0x0400 << 16) | (0xc78c >> 2),
531 	0x00000000,
532 	(0x0400 << 16) | (0xc798 >> 2),
533 	0x00000000,
534 	(0x0400 << 16) | (0xc79c >> 2),
535 	0x00000000,
536 	(0x0400 << 16) | (0xc7a0 >> 2),
537 	0x00000000,
538 	(0x0400 << 16) | (0xc7a4 >> 2),
539 	0x00000000,
540 	(0x0400 << 16) | (0xc7a8 >> 2),
541 	0x00000000,
542 	(0x0400 << 16) | (0xc7ac >> 2),
543 	0x00000000,
544 	(0x0400 << 16) | (0xc7b0 >> 2),
545 	0x00000000,
546 	(0x0400 << 16) | (0xc7b4 >> 2),
547 	0x00000000,
548 	(0x0e00 << 16) | (0x9100 >> 2),
549 	0x00000000,
550 	(0x0e00 << 16) | (0x3c010 >> 2),
551 	0x00000000,
552 	(0x0e00 << 16) | (0x92a8 >> 2),
553 	0x00000000,
554 	(0x0e00 << 16) | (0x92ac >> 2),
555 	0x00000000,
556 	(0x0e00 << 16) | (0x92b4 >> 2),
557 	0x00000000,
558 	(0x0e00 << 16) | (0x92b8 >> 2),
559 	0x00000000,
560 	(0x0e00 << 16) | (0x92bc >> 2),
561 	0x00000000,
562 	(0x0e00 << 16) | (0x92c0 >> 2),
563 	0x00000000,
564 	(0x0e00 << 16) | (0x92c4 >> 2),
565 	0x00000000,
566 	(0x0e00 << 16) | (0x92c8 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0x92cc >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0x92d0 >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x8c00 >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x8c04 >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x8c20 >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x8c38 >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x8c3c >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0xae00 >> 2),
583 	0x00000000,
584 	(0x0e00 << 16) | (0x9604 >> 2),
585 	0x00000000,
586 	(0x0e00 << 16) | (0xac08 >> 2),
587 	0x00000000,
588 	(0x0e00 << 16) | (0xac0c >> 2),
589 	0x00000000,
590 	(0x0e00 << 16) | (0xac10 >> 2),
591 	0x00000000,
592 	(0x0e00 << 16) | (0xac14 >> 2),
593 	0x00000000,
594 	(0x0e00 << 16) | (0xac58 >> 2),
595 	0x00000000,
596 	(0x0e00 << 16) | (0xac68 >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0xac6c >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0xac70 >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0xac74 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0xac78 >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0xac7c >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0xac80 >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0xac84 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0xac88 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0xac8c >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0x970c >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0x9714 >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0x9718 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0x971c >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0x31068 >> 2),
625 	0x00000000,
626 	(0x4e00 << 16) | (0x31068 >> 2),
627 	0x00000000,
628 	(0x5e00 << 16) | (0x31068 >> 2),
629 	0x00000000,
630 	(0x6e00 << 16) | (0x31068 >> 2),
631 	0x00000000,
632 	(0x7e00 << 16) | (0x31068 >> 2),
633 	0x00000000,
634 	(0x8e00 << 16) | (0x31068 >> 2),
635 	0x00000000,
636 	(0x9e00 << 16) | (0x31068 >> 2),
637 	0x00000000,
638 	(0xae00 << 16) | (0x31068 >> 2),
639 	0x00000000,
640 	(0xbe00 << 16) | (0x31068 >> 2),
641 	0x00000000,
642 	(0x0e00 << 16) | (0xcd10 >> 2),
643 	0x00000000,
644 	(0x0e00 << 16) | (0xcd14 >> 2),
645 	0x00000000,
646 	(0x0e00 << 16) | (0x88b0 >> 2),
647 	0x00000000,
648 	(0x0e00 << 16) | (0x88b4 >> 2),
649 	0x00000000,
650 	(0x0e00 << 16) | (0x88b8 >> 2),
651 	0x00000000,
652 	(0x0e00 << 16) | (0x88bc >> 2),
653 	0x00000000,
654 	(0x0400 << 16) | (0x89c0 >> 2),
655 	0x00000000,
656 	(0x0e00 << 16) | (0x88c4 >> 2),
657 	0x00000000,
658 	(0x0e00 << 16) | (0x88c8 >> 2),
659 	0x00000000,
660 	(0x0e00 << 16) | (0x88d0 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0x88d4 >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0x88d8 >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0x8980 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0x30938 >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0x3093c >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0x30940 >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0x89a0 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0x30900 >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0x30904 >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0x89b4 >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0x3c210 >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0x3c214 >> 2),
685 	0x00000000,
686 	(0x0e00 << 16) | (0x3c218 >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0x8904 >> 2),
689 	0x00000000,
690 	0x5,
691 	(0x0e00 << 16) | (0x8c28 >> 2),
692 	(0x0e00 << 16) | (0x8c2c >> 2),
693 	(0x0e00 << 16) | (0x8c30 >> 2),
694 	(0x0e00 << 16) | (0x8c34 >> 2),
695 	(0x0e00 << 16) | (0x9600 >> 2),
696 };
697 
698 static const u32 kalindi_rlc_save_restore_register_list[] =
699 {
700 	(0x0e00 << 16) | (0xc12c >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0xc140 >> 2),
703 	0x00000000,
704 	(0x0e00 << 16) | (0xc150 >> 2),
705 	0x00000000,
706 	(0x0e00 << 16) | (0xc15c >> 2),
707 	0x00000000,
708 	(0x0e00 << 16) | (0xc168 >> 2),
709 	0x00000000,
710 	(0x0e00 << 16) | (0xc170 >> 2),
711 	0x00000000,
712 	(0x0e00 << 16) | (0xc204 >> 2),
713 	0x00000000,
714 	(0x0e00 << 16) | (0xc2b4 >> 2),
715 	0x00000000,
716 	(0x0e00 << 16) | (0xc2b8 >> 2),
717 	0x00000000,
718 	(0x0e00 << 16) | (0xc2bc >> 2),
719 	0x00000000,
720 	(0x0e00 << 16) | (0xc2c0 >> 2),
721 	0x00000000,
722 	(0x0e00 << 16) | (0x8228 >> 2),
723 	0x00000000,
724 	(0x0e00 << 16) | (0x829c >> 2),
725 	0x00000000,
726 	(0x0e00 << 16) | (0x869c >> 2),
727 	0x00000000,
728 	(0x0600 << 16) | (0x98f4 >> 2),
729 	0x00000000,
730 	(0x0e00 << 16) | (0x98f8 >> 2),
731 	0x00000000,
732 	(0x0e00 << 16) | (0x9900 >> 2),
733 	0x00000000,
734 	(0x0e00 << 16) | (0xc260 >> 2),
735 	0x00000000,
736 	(0x0e00 << 16) | (0x90e8 >> 2),
737 	0x00000000,
738 	(0x0e00 << 16) | (0x3c000 >> 2),
739 	0x00000000,
740 	(0x0e00 << 16) | (0x3c00c >> 2),
741 	0x00000000,
742 	(0x0e00 << 16) | (0x8c1c >> 2),
743 	0x00000000,
744 	(0x0e00 << 16) | (0x9700 >> 2),
745 	0x00000000,
746 	(0x0e00 << 16) | (0xcd20 >> 2),
747 	0x00000000,
748 	(0x4e00 << 16) | (0xcd20 >> 2),
749 	0x00000000,
750 	(0x5e00 << 16) | (0xcd20 >> 2),
751 	0x00000000,
752 	(0x6e00 << 16) | (0xcd20 >> 2),
753 	0x00000000,
754 	(0x7e00 << 16) | (0xcd20 >> 2),
755 	0x00000000,
756 	(0x0e00 << 16) | (0x89bc >> 2),
757 	0x00000000,
758 	(0x0e00 << 16) | (0x8900 >> 2),
759 	0x00000000,
760 	0x3,
761 	(0x0e00 << 16) | (0xc130 >> 2),
762 	0x00000000,
763 	(0x0e00 << 16) | (0xc134 >> 2),
764 	0x00000000,
765 	(0x0e00 << 16) | (0xc1fc >> 2),
766 	0x00000000,
767 	(0x0e00 << 16) | (0xc208 >> 2),
768 	0x00000000,
769 	(0x0e00 << 16) | (0xc264 >> 2),
770 	0x00000000,
771 	(0x0e00 << 16) | (0xc268 >> 2),
772 	0x00000000,
773 	(0x0e00 << 16) | (0xc26c >> 2),
774 	0x00000000,
775 	(0x0e00 << 16) | (0xc270 >> 2),
776 	0x00000000,
777 	(0x0e00 << 16) | (0xc274 >> 2),
778 	0x00000000,
779 	(0x0e00 << 16) | (0xc28c >> 2),
780 	0x00000000,
781 	(0x0e00 << 16) | (0xc290 >> 2),
782 	0x00000000,
783 	(0x0e00 << 16) | (0xc294 >> 2),
784 	0x00000000,
785 	(0x0e00 << 16) | (0xc298 >> 2),
786 	0x00000000,
787 	(0x0e00 << 16) | (0xc2a0 >> 2),
788 	0x00000000,
789 	(0x0e00 << 16) | (0xc2a4 >> 2),
790 	0x00000000,
791 	(0x0e00 << 16) | (0xc2a8 >> 2),
792 	0x00000000,
793 	(0x0e00 << 16) | (0xc2ac >> 2),
794 	0x00000000,
795 	(0x0e00 << 16) | (0x301d0 >> 2),
796 	0x00000000,
797 	(0x0e00 << 16) | (0x30238 >> 2),
798 	0x00000000,
799 	(0x0e00 << 16) | (0x30250 >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0x30254 >> 2),
802 	0x00000000,
803 	(0x0e00 << 16) | (0x30258 >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0x3025c >> 2),
806 	0x00000000,
807 	(0x4e00 << 16) | (0xc900 >> 2),
808 	0x00000000,
809 	(0x5e00 << 16) | (0xc900 >> 2),
810 	0x00000000,
811 	(0x6e00 << 16) | (0xc900 >> 2),
812 	0x00000000,
813 	(0x7e00 << 16) | (0xc900 >> 2),
814 	0x00000000,
815 	(0x4e00 << 16) | (0xc904 >> 2),
816 	0x00000000,
817 	(0x5e00 << 16) | (0xc904 >> 2),
818 	0x00000000,
819 	(0x6e00 << 16) | (0xc904 >> 2),
820 	0x00000000,
821 	(0x7e00 << 16) | (0xc904 >> 2),
822 	0x00000000,
823 	(0x4e00 << 16) | (0xc908 >> 2),
824 	0x00000000,
825 	(0x5e00 << 16) | (0xc908 >> 2),
826 	0x00000000,
827 	(0x6e00 << 16) | (0xc908 >> 2),
828 	0x00000000,
829 	(0x7e00 << 16) | (0xc908 >> 2),
830 	0x00000000,
831 	(0x4e00 << 16) | (0xc90c >> 2),
832 	0x00000000,
833 	(0x5e00 << 16) | (0xc90c >> 2),
834 	0x00000000,
835 	(0x6e00 << 16) | (0xc90c >> 2),
836 	0x00000000,
837 	(0x7e00 << 16) | (0xc90c >> 2),
838 	0x00000000,
839 	(0x4e00 << 16) | (0xc910 >> 2),
840 	0x00000000,
841 	(0x5e00 << 16) | (0xc910 >> 2),
842 	0x00000000,
843 	(0x6e00 << 16) | (0xc910 >> 2),
844 	0x00000000,
845 	(0x7e00 << 16) | (0xc910 >> 2),
846 	0x00000000,
847 	(0x0e00 << 16) | (0xc99c >> 2),
848 	0x00000000,
849 	(0x0e00 << 16) | (0x9834 >> 2),
850 	0x00000000,
851 	(0x0000 << 16) | (0x30f00 >> 2),
852 	0x00000000,
853 	(0x0000 << 16) | (0x30f04 >> 2),
854 	0x00000000,
855 	(0x0000 << 16) | (0x30f08 >> 2),
856 	0x00000000,
857 	(0x0000 << 16) | (0x30f0c >> 2),
858 	0x00000000,
859 	(0x0600 << 16) | (0x9b7c >> 2),
860 	0x00000000,
861 	(0x0e00 << 16) | (0x8a14 >> 2),
862 	0x00000000,
863 	(0x0e00 << 16) | (0x8a18 >> 2),
864 	0x00000000,
865 	(0x0600 << 16) | (0x30a00 >> 2),
866 	0x00000000,
867 	(0x0e00 << 16) | (0x8bf0 >> 2),
868 	0x00000000,
869 	(0x0e00 << 16) | (0x8bcc >> 2),
870 	0x00000000,
871 	(0x0e00 << 16) | (0x8b24 >> 2),
872 	0x00000000,
873 	(0x0e00 << 16) | (0x30a04 >> 2),
874 	0x00000000,
875 	(0x0600 << 16) | (0x30a10 >> 2),
876 	0x00000000,
877 	(0x0600 << 16) | (0x30a14 >> 2),
878 	0x00000000,
879 	(0x0600 << 16) | (0x30a18 >> 2),
880 	0x00000000,
881 	(0x0600 << 16) | (0x30a2c >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0xc700 >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0xc704 >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0xc708 >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0xc768 >> 2),
890 	0x00000000,
891 	(0x0400 << 16) | (0xc770 >> 2),
892 	0x00000000,
893 	(0x0400 << 16) | (0xc774 >> 2),
894 	0x00000000,
895 	(0x0400 << 16) | (0xc798 >> 2),
896 	0x00000000,
897 	(0x0400 << 16) | (0xc79c >> 2),
898 	0x00000000,
899 	(0x0e00 << 16) | (0x9100 >> 2),
900 	0x00000000,
901 	(0x0e00 << 16) | (0x3c010 >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0x8c00 >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0x8c04 >> 2),
906 	0x00000000,
907 	(0x0e00 << 16) | (0x8c20 >> 2),
908 	0x00000000,
909 	(0x0e00 << 16) | (0x8c38 >> 2),
910 	0x00000000,
911 	(0x0e00 << 16) | (0x8c3c >> 2),
912 	0x00000000,
913 	(0x0e00 << 16) | (0xae00 >> 2),
914 	0x00000000,
915 	(0x0e00 << 16) | (0x9604 >> 2),
916 	0x00000000,
917 	(0x0e00 << 16) | (0xac08 >> 2),
918 	0x00000000,
919 	(0x0e00 << 16) | (0xac0c >> 2),
920 	0x00000000,
921 	(0x0e00 << 16) | (0xac10 >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0xac14 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0xac58 >> 2),
926 	0x00000000,
927 	(0x0e00 << 16) | (0xac68 >> 2),
928 	0x00000000,
929 	(0x0e00 << 16) | (0xac6c >> 2),
930 	0x00000000,
931 	(0x0e00 << 16) | (0xac70 >> 2),
932 	0x00000000,
933 	(0x0e00 << 16) | (0xac74 >> 2),
934 	0x00000000,
935 	(0x0e00 << 16) | (0xac78 >> 2),
936 	0x00000000,
937 	(0x0e00 << 16) | (0xac7c >> 2),
938 	0x00000000,
939 	(0x0e00 << 16) | (0xac80 >> 2),
940 	0x00000000,
941 	(0x0e00 << 16) | (0xac84 >> 2),
942 	0x00000000,
943 	(0x0e00 << 16) | (0xac88 >> 2),
944 	0x00000000,
945 	(0x0e00 << 16) | (0xac8c >> 2),
946 	0x00000000,
947 	(0x0e00 << 16) | (0x970c >> 2),
948 	0x00000000,
949 	(0x0e00 << 16) | (0x9714 >> 2),
950 	0x00000000,
951 	(0x0e00 << 16) | (0x9718 >> 2),
952 	0x00000000,
953 	(0x0e00 << 16) | (0x971c >> 2),
954 	0x00000000,
955 	(0x0e00 << 16) | (0x31068 >> 2),
956 	0x00000000,
957 	(0x4e00 << 16) | (0x31068 >> 2),
958 	0x00000000,
959 	(0x5e00 << 16) | (0x31068 >> 2),
960 	0x00000000,
961 	(0x6e00 << 16) | (0x31068 >> 2),
962 	0x00000000,
963 	(0x7e00 << 16) | (0x31068 >> 2),
964 	0x00000000,
965 	(0x0e00 << 16) | (0xcd10 >> 2),
966 	0x00000000,
967 	(0x0e00 << 16) | (0xcd14 >> 2),
968 	0x00000000,
969 	(0x0e00 << 16) | (0x88b0 >> 2),
970 	0x00000000,
971 	(0x0e00 << 16) | (0x88b4 >> 2),
972 	0x00000000,
973 	(0x0e00 << 16) | (0x88b8 >> 2),
974 	0x00000000,
975 	(0x0e00 << 16) | (0x88bc >> 2),
976 	0x00000000,
977 	(0x0400 << 16) | (0x89c0 >> 2),
978 	0x00000000,
979 	(0x0e00 << 16) | (0x88c4 >> 2),
980 	0x00000000,
981 	(0x0e00 << 16) | (0x88c8 >> 2),
982 	0x00000000,
983 	(0x0e00 << 16) | (0x88d0 >> 2),
984 	0x00000000,
985 	(0x0e00 << 16) | (0x88d4 >> 2),
986 	0x00000000,
987 	(0x0e00 << 16) | (0x88d8 >> 2),
988 	0x00000000,
989 	(0x0e00 << 16) | (0x8980 >> 2),
990 	0x00000000,
991 	(0x0e00 << 16) | (0x30938 >> 2),
992 	0x00000000,
993 	(0x0e00 << 16) | (0x3093c >> 2),
994 	0x00000000,
995 	(0x0e00 << 16) | (0x30940 >> 2),
996 	0x00000000,
997 	(0x0e00 << 16) | (0x89a0 >> 2),
998 	0x00000000,
999 	(0x0e00 << 16) | (0x30900 >> 2),
1000 	0x00000000,
1001 	(0x0e00 << 16) | (0x30904 >> 2),
1002 	0x00000000,
1003 	(0x0e00 << 16) | (0x89b4 >> 2),
1004 	0x00000000,
1005 	(0x0e00 << 16) | (0x3e1fc >> 2),
1006 	0x00000000,
1007 	(0x0e00 << 16) | (0x3c210 >> 2),
1008 	0x00000000,
1009 	(0x0e00 << 16) | (0x3c214 >> 2),
1010 	0x00000000,
1011 	(0x0e00 << 16) | (0x3c218 >> 2),
1012 	0x00000000,
1013 	(0x0e00 << 16) | (0x8904 >> 2),
1014 	0x00000000,
1015 	0x5,
1016 	(0x0e00 << 16) | (0x8c28 >> 2),
1017 	(0x0e00 << 16) | (0x8c2c >> 2),
1018 	(0x0e00 << 16) | (0x8c30 >> 2),
1019 	(0x0e00 << 16) | (0x8c34 >> 2),
1020 	(0x0e00 << 16) | (0x9600 >> 2),
1021 };
1022 
1023 static const u32 bonaire_golden_spm_registers[] =
1024 {
1025 	0x30800, 0xe0ffffff, 0xe0000000
1026 };
1027 
1028 static const u32 bonaire_golden_common_registers[] =
1029 {
1030 	0xc770, 0xffffffff, 0x00000800,
1031 	0xc774, 0xffffffff, 0x00000800,
1032 	0xc798, 0xffffffff, 0x00007fbf,
1033 	0xc79c, 0xffffffff, 0x00007faf
1034 };
1035 
1036 static const u32 bonaire_golden_registers[] =
1037 {
1038 	0x3354, 0x00000333, 0x00000333,
1039 	0x3350, 0x000c0fc0, 0x00040200,
1040 	0x9a10, 0x00010000, 0x00058208,
1041 	0x3c000, 0xffff1fff, 0x00140000,
1042 	0x3c200, 0xfdfc0fff, 0x00000100,
1043 	0x3c234, 0x40000000, 0x40000200,
1044 	0x9830, 0xffffffff, 0x00000000,
1045 	0x9834, 0xf00fffff, 0x00000400,
1046 	0x9838, 0x0002021c, 0x00020200,
1047 	0xc78, 0x00000080, 0x00000000,
1048 	0x5bb0, 0x000000f0, 0x00000070,
1049 	0x5bc0, 0xf0311fff, 0x80300000,
1050 	0x98f8, 0x73773777, 0x12010001,
1051 	0x350c, 0x00810000, 0x408af000,
1052 	0x7030, 0x31000111, 0x00000011,
1053 	0x2f48, 0x73773777, 0x12010001,
1054 	0x220c, 0x00007fb6, 0x0021a1b1,
1055 	0x2210, 0x00007fb6, 0x002021b1,
1056 	0x2180, 0x00007fb6, 0x00002191,
1057 	0x2218, 0x00007fb6, 0x002121b1,
1058 	0x221c, 0x00007fb6, 0x002021b1,
1059 	0x21dc, 0x00007fb6, 0x00002191,
1060 	0x21e0, 0x00007fb6, 0x00002191,
1061 	0x3628, 0x0000003f, 0x0000000a,
1062 	0x362c, 0x0000003f, 0x0000000a,
1063 	0x2ae4, 0x00073ffe, 0x000022a2,
1064 	0x240c, 0x000007ff, 0x00000000,
1065 	0x8a14, 0xf000003f, 0x00000007,
1066 	0x8bf0, 0x00002001, 0x00000001,
1067 	0x8b24, 0xffffffff, 0x00ffffff,
1068 	0x30a04, 0x0000ff0f, 0x00000000,
1069 	0x28a4c, 0x07ffffff, 0x06000000,
1070 	0x4d8, 0x00000fff, 0x00000100,
1071 	0x3e78, 0x00000001, 0x00000002,
1072 	0x9100, 0x03000000, 0x0362c688,
1073 	0x8c00, 0x000000ff, 0x00000001,
1074 	0xe40, 0x00001fff, 0x00001fff,
1075 	0x9060, 0x0000007f, 0x00000020,
1076 	0x9508, 0x00010000, 0x00010000,
1077 	0xac14, 0x000003ff, 0x000000f3,
1078 	0xac0c, 0xffffffff, 0x00001032
1079 };
1080 
1081 static const u32 bonaire_mgcg_cgcg_init[] =
1082 {
1083 	0xc420, 0xffffffff, 0xfffffffc,
1084 	0x30800, 0xffffffff, 0xe0000000,
1085 	0x3c2a0, 0xffffffff, 0x00000100,
1086 	0x3c208, 0xffffffff, 0x00000100,
1087 	0x3c2c0, 0xffffffff, 0xc0000100,
1088 	0x3c2c8, 0xffffffff, 0xc0000100,
1089 	0x3c2c4, 0xffffffff, 0xc0000100,
1090 	0x55e4, 0xffffffff, 0x00600100,
1091 	0x3c280, 0xffffffff, 0x00000100,
1092 	0x3c214, 0xffffffff, 0x06000100,
1093 	0x3c220, 0xffffffff, 0x00000100,
1094 	0x3c218, 0xffffffff, 0x06000100,
1095 	0x3c204, 0xffffffff, 0x00000100,
1096 	0x3c2e0, 0xffffffff, 0x00000100,
1097 	0x3c224, 0xffffffff, 0x00000100,
1098 	0x3c200, 0xffffffff, 0x00000100,
1099 	0x3c230, 0xffffffff, 0x00000100,
1100 	0x3c234, 0xffffffff, 0x00000100,
1101 	0x3c250, 0xffffffff, 0x00000100,
1102 	0x3c254, 0xffffffff, 0x00000100,
1103 	0x3c258, 0xffffffff, 0x00000100,
1104 	0x3c25c, 0xffffffff, 0x00000100,
1105 	0x3c260, 0xffffffff, 0x00000100,
1106 	0x3c27c, 0xffffffff, 0x00000100,
1107 	0x3c278, 0xffffffff, 0x00000100,
1108 	0x3c210, 0xffffffff, 0x06000100,
1109 	0x3c290, 0xffffffff, 0x00000100,
1110 	0x3c274, 0xffffffff, 0x00000100,
1111 	0x3c2b4, 0xffffffff, 0x00000100,
1112 	0x3c2b0, 0xffffffff, 0x00000100,
1113 	0x3c270, 0xffffffff, 0x00000100,
1114 	0x30800, 0xffffffff, 0xe0000000,
1115 	0x3c020, 0xffffffff, 0x00010000,
1116 	0x3c024, 0xffffffff, 0x00030002,
1117 	0x3c028, 0xffffffff, 0x00040007,
1118 	0x3c02c, 0xffffffff, 0x00060005,
1119 	0x3c030, 0xffffffff, 0x00090008,
1120 	0x3c034, 0xffffffff, 0x00010000,
1121 	0x3c038, 0xffffffff, 0x00030002,
1122 	0x3c03c, 0xffffffff, 0x00040007,
1123 	0x3c040, 0xffffffff, 0x00060005,
1124 	0x3c044, 0xffffffff, 0x00090008,
1125 	0x3c048, 0xffffffff, 0x00010000,
1126 	0x3c04c, 0xffffffff, 0x00030002,
1127 	0x3c050, 0xffffffff, 0x00040007,
1128 	0x3c054, 0xffffffff, 0x00060005,
1129 	0x3c058, 0xffffffff, 0x00090008,
1130 	0x3c05c, 0xffffffff, 0x00010000,
1131 	0x3c060, 0xffffffff, 0x00030002,
1132 	0x3c064, 0xffffffff, 0x00040007,
1133 	0x3c068, 0xffffffff, 0x00060005,
1134 	0x3c06c, 0xffffffff, 0x00090008,
1135 	0x3c070, 0xffffffff, 0x00010000,
1136 	0x3c074, 0xffffffff, 0x00030002,
1137 	0x3c078, 0xffffffff, 0x00040007,
1138 	0x3c07c, 0xffffffff, 0x00060005,
1139 	0x3c080, 0xffffffff, 0x00090008,
1140 	0x3c084, 0xffffffff, 0x00010000,
1141 	0x3c088, 0xffffffff, 0x00030002,
1142 	0x3c08c, 0xffffffff, 0x00040007,
1143 	0x3c090, 0xffffffff, 0x00060005,
1144 	0x3c094, 0xffffffff, 0x00090008,
1145 	0x3c098, 0xffffffff, 0x00010000,
1146 	0x3c09c, 0xffffffff, 0x00030002,
1147 	0x3c0a0, 0xffffffff, 0x00040007,
1148 	0x3c0a4, 0xffffffff, 0x00060005,
1149 	0x3c0a8, 0xffffffff, 0x00090008,
1150 	0x3c000, 0xffffffff, 0x96e00200,
1151 	0x8708, 0xffffffff, 0x00900100,
1152 	0xc424, 0xffffffff, 0x0020003f,
1153 	0x38, 0xffffffff, 0x0140001c,
1154 	0x3c, 0x000f0000, 0x000f0000,
1155 	0x220, 0xffffffff, 0xC060000C,
1156 	0x224, 0xc0000fff, 0x00000100,
1157 	0xf90, 0xffffffff, 0x00000100,
1158 	0xf98, 0x00000101, 0x00000000,
1159 	0x20a8, 0xffffffff, 0x00000104,
1160 	0x55e4, 0xff000fff, 0x00000100,
1161 	0x30cc, 0xc0000fff, 0x00000104,
1162 	0xc1e4, 0x00000001, 0x00000001,
1163 	0xd00c, 0xff000ff0, 0x00000100,
1164 	0xd80c, 0xff000ff0, 0x00000100
1165 };
1166 
1167 static const u32 spectre_golden_spm_registers[] =
1168 {
1169 	0x30800, 0xe0ffffff, 0xe0000000
1170 };
1171 
1172 static const u32 spectre_golden_common_registers[] =
1173 {
1174 	0xc770, 0xffffffff, 0x00000800,
1175 	0xc774, 0xffffffff, 0x00000800,
1176 	0xc798, 0xffffffff, 0x00007fbf,
1177 	0xc79c, 0xffffffff, 0x00007faf
1178 };
1179 
1180 static const u32 spectre_golden_registers[] =
1181 {
1182 	0x3c000, 0xffff1fff, 0x96940200,
1183 	0x3c00c, 0xffff0001, 0xff000000,
1184 	0x3c200, 0xfffc0fff, 0x00000100,
1185 	0x6ed8, 0x00010101, 0x00010000,
1186 	0x9834, 0xf00fffff, 0x00000400,
1187 	0x9838, 0xfffffffc, 0x00020200,
1188 	0x5bb0, 0x000000f0, 0x00000070,
1189 	0x5bc0, 0xf0311fff, 0x80300000,
1190 	0x98f8, 0x73773777, 0x12010001,
1191 	0x9b7c, 0x00ff0000, 0x00fc0000,
1192 	0x2f48, 0x73773777, 0x12010001,
1193 	0x8a14, 0xf000003f, 0x00000007,
1194 	0x8b24, 0xffffffff, 0x00ffffff,
1195 	0x28350, 0x3f3f3fff, 0x00000082,
1196 	0x28354, 0x0000003f, 0x00000000,
1197 	0x3e78, 0x00000001, 0x00000002,
1198 	0x913c, 0xffff03df, 0x00000004,
1199 	0xc768, 0x00000008, 0x00000008,
1200 	0x8c00, 0x000008ff, 0x00000800,
1201 	0x9508, 0x00010000, 0x00010000,
1202 	0xac0c, 0xffffffff, 0x54763210,
1203 	0x214f8, 0x01ff01ff, 0x00000002,
1204 	0x21498, 0x007ff800, 0x00200000,
1205 	0x2015c, 0xffffffff, 0x00000f40,
1206 	0x30934, 0xffffffff, 0x00000001
1207 };
1208 
1209 static const u32 spectre_mgcg_cgcg_init[] =
1210 {
1211 	0xc420, 0xffffffff, 0xfffffffc,
1212 	0x30800, 0xffffffff, 0xe0000000,
1213 	0x3c2a0, 0xffffffff, 0x00000100,
1214 	0x3c208, 0xffffffff, 0x00000100,
1215 	0x3c2c0, 0xffffffff, 0x00000100,
1216 	0x3c2c8, 0xffffffff, 0x00000100,
1217 	0x3c2c4, 0xffffffff, 0x00000100,
1218 	0x55e4, 0xffffffff, 0x00600100,
1219 	0x3c280, 0xffffffff, 0x00000100,
1220 	0x3c214, 0xffffffff, 0x06000100,
1221 	0x3c220, 0xffffffff, 0x00000100,
1222 	0x3c218, 0xffffffff, 0x06000100,
1223 	0x3c204, 0xffffffff, 0x00000100,
1224 	0x3c2e0, 0xffffffff, 0x00000100,
1225 	0x3c224, 0xffffffff, 0x00000100,
1226 	0x3c200, 0xffffffff, 0x00000100,
1227 	0x3c230, 0xffffffff, 0x00000100,
1228 	0x3c234, 0xffffffff, 0x00000100,
1229 	0x3c250, 0xffffffff, 0x00000100,
1230 	0x3c254, 0xffffffff, 0x00000100,
1231 	0x3c258, 0xffffffff, 0x00000100,
1232 	0x3c25c, 0xffffffff, 0x00000100,
1233 	0x3c260, 0xffffffff, 0x00000100,
1234 	0x3c27c, 0xffffffff, 0x00000100,
1235 	0x3c278, 0xffffffff, 0x00000100,
1236 	0x3c210, 0xffffffff, 0x06000100,
1237 	0x3c290, 0xffffffff, 0x00000100,
1238 	0x3c274, 0xffffffff, 0x00000100,
1239 	0x3c2b4, 0xffffffff, 0x00000100,
1240 	0x3c2b0, 0xffffffff, 0x00000100,
1241 	0x3c270, 0xffffffff, 0x00000100,
1242 	0x30800, 0xffffffff, 0xe0000000,
1243 	0x3c020, 0xffffffff, 0x00010000,
1244 	0x3c024, 0xffffffff, 0x00030002,
1245 	0x3c028, 0xffffffff, 0x00040007,
1246 	0x3c02c, 0xffffffff, 0x00060005,
1247 	0x3c030, 0xffffffff, 0x00090008,
1248 	0x3c034, 0xffffffff, 0x00010000,
1249 	0x3c038, 0xffffffff, 0x00030002,
1250 	0x3c03c, 0xffffffff, 0x00040007,
1251 	0x3c040, 0xffffffff, 0x00060005,
1252 	0x3c044, 0xffffffff, 0x00090008,
1253 	0x3c048, 0xffffffff, 0x00010000,
1254 	0x3c04c, 0xffffffff, 0x00030002,
1255 	0x3c050, 0xffffffff, 0x00040007,
1256 	0x3c054, 0xffffffff, 0x00060005,
1257 	0x3c058, 0xffffffff, 0x00090008,
1258 	0x3c05c, 0xffffffff, 0x00010000,
1259 	0x3c060, 0xffffffff, 0x00030002,
1260 	0x3c064, 0xffffffff, 0x00040007,
1261 	0x3c068, 0xffffffff, 0x00060005,
1262 	0x3c06c, 0xffffffff, 0x00090008,
1263 	0x3c070, 0xffffffff, 0x00010000,
1264 	0x3c074, 0xffffffff, 0x00030002,
1265 	0x3c078, 0xffffffff, 0x00040007,
1266 	0x3c07c, 0xffffffff, 0x00060005,
1267 	0x3c080, 0xffffffff, 0x00090008,
1268 	0x3c084, 0xffffffff, 0x00010000,
1269 	0x3c088, 0xffffffff, 0x00030002,
1270 	0x3c08c, 0xffffffff, 0x00040007,
1271 	0x3c090, 0xffffffff, 0x00060005,
1272 	0x3c094, 0xffffffff, 0x00090008,
1273 	0x3c098, 0xffffffff, 0x00010000,
1274 	0x3c09c, 0xffffffff, 0x00030002,
1275 	0x3c0a0, 0xffffffff, 0x00040007,
1276 	0x3c0a4, 0xffffffff, 0x00060005,
1277 	0x3c0a8, 0xffffffff, 0x00090008,
1278 	0x3c0ac, 0xffffffff, 0x00010000,
1279 	0x3c0b0, 0xffffffff, 0x00030002,
1280 	0x3c0b4, 0xffffffff, 0x00040007,
1281 	0x3c0b8, 0xffffffff, 0x00060005,
1282 	0x3c0bc, 0xffffffff, 0x00090008,
1283 	0x3c000, 0xffffffff, 0x96e00200,
1284 	0x8708, 0xffffffff, 0x00900100,
1285 	0xc424, 0xffffffff, 0x0020003f,
1286 	0x38, 0xffffffff, 0x0140001c,
1287 	0x3c, 0x000f0000, 0x000f0000,
1288 	0x220, 0xffffffff, 0xC060000C,
1289 	0x224, 0xc0000fff, 0x00000100,
1290 	0xf90, 0xffffffff, 0x00000100,
1291 	0xf98, 0x00000101, 0x00000000,
1292 	0x20a8, 0xffffffff, 0x00000104,
1293 	0x55e4, 0xff000fff, 0x00000100,
1294 	0x30cc, 0xc0000fff, 0x00000104,
1295 	0xc1e4, 0x00000001, 0x00000001,
1296 	0xd00c, 0xff000ff0, 0x00000100,
1297 	0xd80c, 0xff000ff0, 0x00000100
1298 };
1299 
1300 static const u32 kalindi_golden_spm_registers[] =
1301 {
1302 	0x30800, 0xe0ffffff, 0xe0000000
1303 };
1304 
1305 static const u32 kalindi_golden_common_registers[] =
1306 {
1307 	0xc770, 0xffffffff, 0x00000800,
1308 	0xc774, 0xffffffff, 0x00000800,
1309 	0xc798, 0xffffffff, 0x00007fbf,
1310 	0xc79c, 0xffffffff, 0x00007faf
1311 };
1312 
1313 static const u32 kalindi_golden_registers[] =
1314 {
1315 	0x3c000, 0xffffdfff, 0x6e944040,
1316 	0x55e4, 0xff607fff, 0xfc000100,
1317 	0x3c220, 0xff000fff, 0x00000100,
1318 	0x3c224, 0xff000fff, 0x00000100,
1319 	0x3c200, 0xfffc0fff, 0x00000100,
1320 	0x6ed8, 0x00010101, 0x00010000,
1321 	0x9830, 0xffffffff, 0x00000000,
1322 	0x9834, 0xf00fffff, 0x00000400,
1323 	0x5bb0, 0x000000f0, 0x00000070,
1324 	0x5bc0, 0xf0311fff, 0x80300000,
1325 	0x98f8, 0x73773777, 0x12010001,
1326 	0x98fc, 0xffffffff, 0x00000010,
1327 	0x9b7c, 0x00ff0000, 0x00fc0000,
1328 	0x8030, 0x00001f0f, 0x0000100a,
1329 	0x2f48, 0x73773777, 0x12010001,
1330 	0x2408, 0x000fffff, 0x000c007f,
1331 	0x8a14, 0xf000003f, 0x00000007,
1332 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1333 	0x30a04, 0x0000ff0f, 0x00000000,
1334 	0x28a4c, 0x07ffffff, 0x06000000,
1335 	0x4d8, 0x00000fff, 0x00000100,
1336 	0x3e78, 0x00000001, 0x00000002,
1337 	0xc768, 0x00000008, 0x00000008,
1338 	0x8c00, 0x000000ff, 0x00000003,
1339 	0x214f8, 0x01ff01ff, 0x00000002,
1340 	0x21498, 0x007ff800, 0x00200000,
1341 	0x2015c, 0xffffffff, 0x00000f40,
1342 	0x88c4, 0x001f3ae3, 0x00000082,
1343 	0x88d4, 0x0000001f, 0x00000010,
1344 	0x30934, 0xffffffff, 0x00000000
1345 };
1346 
1347 static const u32 kalindi_mgcg_cgcg_init[] =
1348 {
1349 	0xc420, 0xffffffff, 0xfffffffc,
1350 	0x30800, 0xffffffff, 0xe0000000,
1351 	0x3c2a0, 0xffffffff, 0x00000100,
1352 	0x3c208, 0xffffffff, 0x00000100,
1353 	0x3c2c0, 0xffffffff, 0x00000100,
1354 	0x3c2c8, 0xffffffff, 0x00000100,
1355 	0x3c2c4, 0xffffffff, 0x00000100,
1356 	0x55e4, 0xffffffff, 0x00600100,
1357 	0x3c280, 0xffffffff, 0x00000100,
1358 	0x3c214, 0xffffffff, 0x06000100,
1359 	0x3c220, 0xffffffff, 0x00000100,
1360 	0x3c218, 0xffffffff, 0x06000100,
1361 	0x3c204, 0xffffffff, 0x00000100,
1362 	0x3c2e0, 0xffffffff, 0x00000100,
1363 	0x3c224, 0xffffffff, 0x00000100,
1364 	0x3c200, 0xffffffff, 0x00000100,
1365 	0x3c230, 0xffffffff, 0x00000100,
1366 	0x3c234, 0xffffffff, 0x00000100,
1367 	0x3c250, 0xffffffff, 0x00000100,
1368 	0x3c254, 0xffffffff, 0x00000100,
1369 	0x3c258, 0xffffffff, 0x00000100,
1370 	0x3c25c, 0xffffffff, 0x00000100,
1371 	0x3c260, 0xffffffff, 0x00000100,
1372 	0x3c27c, 0xffffffff, 0x00000100,
1373 	0x3c278, 0xffffffff, 0x00000100,
1374 	0x3c210, 0xffffffff, 0x06000100,
1375 	0x3c290, 0xffffffff, 0x00000100,
1376 	0x3c274, 0xffffffff, 0x00000100,
1377 	0x3c2b4, 0xffffffff, 0x00000100,
1378 	0x3c2b0, 0xffffffff, 0x00000100,
1379 	0x3c270, 0xffffffff, 0x00000100,
1380 	0x30800, 0xffffffff, 0xe0000000,
1381 	0x3c020, 0xffffffff, 0x00010000,
1382 	0x3c024, 0xffffffff, 0x00030002,
1383 	0x3c028, 0xffffffff, 0x00040007,
1384 	0x3c02c, 0xffffffff, 0x00060005,
1385 	0x3c030, 0xffffffff, 0x00090008,
1386 	0x3c034, 0xffffffff, 0x00010000,
1387 	0x3c038, 0xffffffff, 0x00030002,
1388 	0x3c03c, 0xffffffff, 0x00040007,
1389 	0x3c040, 0xffffffff, 0x00060005,
1390 	0x3c044, 0xffffffff, 0x00090008,
1391 	0x3c000, 0xffffffff, 0x96e00200,
1392 	0x8708, 0xffffffff, 0x00900100,
1393 	0xc424, 0xffffffff, 0x0020003f,
1394 	0x38, 0xffffffff, 0x0140001c,
1395 	0x3c, 0x000f0000, 0x000f0000,
1396 	0x220, 0xffffffff, 0xC060000C,
1397 	0x224, 0xc0000fff, 0x00000100,
1398 	0x20a8, 0xffffffff, 0x00000104,
1399 	0x55e4, 0xff000fff, 0x00000100,
1400 	0x30cc, 0xc0000fff, 0x00000104,
1401 	0xc1e4, 0x00000001, 0x00000001,
1402 	0xd00c, 0xff000ff0, 0x00000100,
1403 	0xd80c, 0xff000ff0, 0x00000100
1404 };
1405 
1406 static const u32 hawaii_golden_spm_registers[] =
1407 {
1408 	0x30800, 0xe0ffffff, 0xe0000000
1409 };
1410 
1411 static const u32 hawaii_golden_common_registers[] =
1412 {
1413 	0x30800, 0xffffffff, 0xe0000000,
1414 	0x28350, 0xffffffff, 0x3a00161a,
1415 	0x28354, 0xffffffff, 0x0000002e,
1416 	0x9a10, 0xffffffff, 0x00018208,
1417 	0x98f8, 0xffffffff, 0x12011003
1418 };
1419 
1420 static const u32 hawaii_golden_registers[] =
1421 {
1422 	0x3354, 0x00000333, 0x00000333,
1423 	0x9a10, 0x00010000, 0x00058208,
1424 	0x9830, 0xffffffff, 0x00000000,
1425 	0x9834, 0xf00fffff, 0x00000400,
1426 	0x9838, 0x0002021c, 0x00020200,
1427 	0xc78, 0x00000080, 0x00000000,
1428 	0x5bb0, 0x000000f0, 0x00000070,
1429 	0x5bc0, 0xf0311fff, 0x80300000,
1430 	0x350c, 0x00810000, 0x408af000,
1431 	0x7030, 0x31000111, 0x00000011,
1432 	0x2f48, 0x73773777, 0x12010001,
1433 	0x2120, 0x0000007f, 0x0000001b,
1434 	0x21dc, 0x00007fb6, 0x00002191,
1435 	0x3628, 0x0000003f, 0x0000000a,
1436 	0x362c, 0x0000003f, 0x0000000a,
1437 	0x2ae4, 0x00073ffe, 0x000022a2,
1438 	0x240c, 0x000007ff, 0x00000000,
1439 	0x8bf0, 0x00002001, 0x00000001,
1440 	0x8b24, 0xffffffff, 0x00ffffff,
1441 	0x30a04, 0x0000ff0f, 0x00000000,
1442 	0x28a4c, 0x07ffffff, 0x06000000,
1443 	0x3e78, 0x00000001, 0x00000002,
1444 	0xc768, 0x00000008, 0x00000008,
1445 	0xc770, 0x00000f00, 0x00000800,
1446 	0xc774, 0x00000f00, 0x00000800,
1447 	0xc798, 0x00ffffff, 0x00ff7fbf,
1448 	0xc79c, 0x00ffffff, 0x00ff7faf,
1449 	0x8c00, 0x000000ff, 0x00000800,
1450 	0xe40, 0x00001fff, 0x00001fff,
1451 	0x9060, 0x0000007f, 0x00000020,
1452 	0x9508, 0x00010000, 0x00010000,
1453 	0xae00, 0x00100000, 0x000ff07c,
1454 	0xac14, 0x000003ff, 0x0000000f,
1455 	0xac10, 0xffffffff, 0x7564fdec,
1456 	0xac0c, 0xffffffff, 0x3120b9a8,
1457 	0xac08, 0x20000000, 0x0f9c0000
1458 };
1459 
1460 static const u32 hawaii_mgcg_cgcg_init[] =
1461 {
1462 	0xc420, 0xffffffff, 0xfffffffd,
1463 	0x30800, 0xffffffff, 0xe0000000,
1464 	0x3c2a0, 0xffffffff, 0x00000100,
1465 	0x3c208, 0xffffffff, 0x00000100,
1466 	0x3c2c0, 0xffffffff, 0x00000100,
1467 	0x3c2c8, 0xffffffff, 0x00000100,
1468 	0x3c2c4, 0xffffffff, 0x00000100,
1469 	0x55e4, 0xffffffff, 0x00200100,
1470 	0x3c280, 0xffffffff, 0x00000100,
1471 	0x3c214, 0xffffffff, 0x06000100,
1472 	0x3c220, 0xffffffff, 0x00000100,
1473 	0x3c218, 0xffffffff, 0x06000100,
1474 	0x3c204, 0xffffffff, 0x00000100,
1475 	0x3c2e0, 0xffffffff, 0x00000100,
1476 	0x3c224, 0xffffffff, 0x00000100,
1477 	0x3c200, 0xffffffff, 0x00000100,
1478 	0x3c230, 0xffffffff, 0x00000100,
1479 	0x3c234, 0xffffffff, 0x00000100,
1480 	0x3c250, 0xffffffff, 0x00000100,
1481 	0x3c254, 0xffffffff, 0x00000100,
1482 	0x3c258, 0xffffffff, 0x00000100,
1483 	0x3c25c, 0xffffffff, 0x00000100,
1484 	0x3c260, 0xffffffff, 0x00000100,
1485 	0x3c27c, 0xffffffff, 0x00000100,
1486 	0x3c278, 0xffffffff, 0x00000100,
1487 	0x3c210, 0xffffffff, 0x06000100,
1488 	0x3c290, 0xffffffff, 0x00000100,
1489 	0x3c274, 0xffffffff, 0x00000100,
1490 	0x3c2b4, 0xffffffff, 0x00000100,
1491 	0x3c2b0, 0xffffffff, 0x00000100,
1492 	0x3c270, 0xffffffff, 0x00000100,
1493 	0x30800, 0xffffffff, 0xe0000000,
1494 	0x3c020, 0xffffffff, 0x00010000,
1495 	0x3c024, 0xffffffff, 0x00030002,
1496 	0x3c028, 0xffffffff, 0x00040007,
1497 	0x3c02c, 0xffffffff, 0x00060005,
1498 	0x3c030, 0xffffffff, 0x00090008,
1499 	0x3c034, 0xffffffff, 0x00010000,
1500 	0x3c038, 0xffffffff, 0x00030002,
1501 	0x3c03c, 0xffffffff, 0x00040007,
1502 	0x3c040, 0xffffffff, 0x00060005,
1503 	0x3c044, 0xffffffff, 0x00090008,
1504 	0x3c048, 0xffffffff, 0x00010000,
1505 	0x3c04c, 0xffffffff, 0x00030002,
1506 	0x3c050, 0xffffffff, 0x00040007,
1507 	0x3c054, 0xffffffff, 0x00060005,
1508 	0x3c058, 0xffffffff, 0x00090008,
1509 	0x3c05c, 0xffffffff, 0x00010000,
1510 	0x3c060, 0xffffffff, 0x00030002,
1511 	0x3c064, 0xffffffff, 0x00040007,
1512 	0x3c068, 0xffffffff, 0x00060005,
1513 	0x3c06c, 0xffffffff, 0x00090008,
1514 	0x3c070, 0xffffffff, 0x00010000,
1515 	0x3c074, 0xffffffff, 0x00030002,
1516 	0x3c078, 0xffffffff, 0x00040007,
1517 	0x3c07c, 0xffffffff, 0x00060005,
1518 	0x3c080, 0xffffffff, 0x00090008,
1519 	0x3c084, 0xffffffff, 0x00010000,
1520 	0x3c088, 0xffffffff, 0x00030002,
1521 	0x3c08c, 0xffffffff, 0x00040007,
1522 	0x3c090, 0xffffffff, 0x00060005,
1523 	0x3c094, 0xffffffff, 0x00090008,
1524 	0x3c098, 0xffffffff, 0x00010000,
1525 	0x3c09c, 0xffffffff, 0x00030002,
1526 	0x3c0a0, 0xffffffff, 0x00040007,
1527 	0x3c0a4, 0xffffffff, 0x00060005,
1528 	0x3c0a8, 0xffffffff, 0x00090008,
1529 	0x3c0ac, 0xffffffff, 0x00010000,
1530 	0x3c0b0, 0xffffffff, 0x00030002,
1531 	0x3c0b4, 0xffffffff, 0x00040007,
1532 	0x3c0b8, 0xffffffff, 0x00060005,
1533 	0x3c0bc, 0xffffffff, 0x00090008,
1534 	0x3c0c0, 0xffffffff, 0x00010000,
1535 	0x3c0c4, 0xffffffff, 0x00030002,
1536 	0x3c0c8, 0xffffffff, 0x00040007,
1537 	0x3c0cc, 0xffffffff, 0x00060005,
1538 	0x3c0d0, 0xffffffff, 0x00090008,
1539 	0x3c0d4, 0xffffffff, 0x00010000,
1540 	0x3c0d8, 0xffffffff, 0x00030002,
1541 	0x3c0dc, 0xffffffff, 0x00040007,
1542 	0x3c0e0, 0xffffffff, 0x00060005,
1543 	0x3c0e4, 0xffffffff, 0x00090008,
1544 	0x3c0e8, 0xffffffff, 0x00010000,
1545 	0x3c0ec, 0xffffffff, 0x00030002,
1546 	0x3c0f0, 0xffffffff, 0x00040007,
1547 	0x3c0f4, 0xffffffff, 0x00060005,
1548 	0x3c0f8, 0xffffffff, 0x00090008,
1549 	0xc318, 0xffffffff, 0x00020200,
1550 	0x3350, 0xffffffff, 0x00000200,
1551 	0x15c0, 0xffffffff, 0x00000400,
1552 	0x55e8, 0xffffffff, 0x00000000,
1553 	0x2f50, 0xffffffff, 0x00000902,
1554 	0x3c000, 0xffffffff, 0x96940200,
1555 	0x8708, 0xffffffff, 0x00900100,
1556 	0xc424, 0xffffffff, 0x0020003f,
1557 	0x38, 0xffffffff, 0x0140001c,
1558 	0x3c, 0x000f0000, 0x000f0000,
1559 	0x220, 0xffffffff, 0xc060000c,
1560 	0x224, 0xc0000fff, 0x00000100,
1561 	0xf90, 0xffffffff, 0x00000100,
1562 	0xf98, 0x00000101, 0x00000000,
1563 	0x20a8, 0xffffffff, 0x00000104,
1564 	0x55e4, 0xff000fff, 0x00000100,
1565 	0x30cc, 0xc0000fff, 0x00000104,
1566 	0xc1e4, 0x00000001, 0x00000001,
1567 	0xd00c, 0xff000ff0, 0x00000100,
1568 	0xd80c, 0xff000ff0, 0x00000100
1569 };
1570 
1571 static const u32 godavari_golden_registers[] =
1572 {
1573 	0x55e4, 0xff607fff, 0xfc000100,
1574 	0x6ed8, 0x00010101, 0x00010000,
1575 	0x9830, 0xffffffff, 0x00000000,
1576 	0x98302, 0xf00fffff, 0x00000400,
1577 	0x6130, 0xffffffff, 0x00010000,
1578 	0x5bb0, 0x000000f0, 0x00000070,
1579 	0x5bc0, 0xf0311fff, 0x80300000,
1580 	0x98f8, 0x73773777, 0x12010001,
1581 	0x98fc, 0xffffffff, 0x00000010,
1582 	0x8030, 0x00001f0f, 0x0000100a,
1583 	0x2f48, 0x73773777, 0x12010001,
1584 	0x2408, 0x000fffff, 0x000c007f,
1585 	0x8a14, 0xf000003f, 0x00000007,
1586 	0x8b24, 0xffffffff, 0x00ff0fff,
1587 	0x30a04, 0x0000ff0f, 0x00000000,
1588 	0x28a4c, 0x07ffffff, 0x06000000,
1589 	0x4d8, 0x00000fff, 0x00000100,
1590 	0xd014, 0x00010000, 0x00810001,
1591 	0xd814, 0x00010000, 0x00810001,
1592 	0x3e78, 0x00000001, 0x00000002,
1593 	0xc768, 0x00000008, 0x00000008,
1594 	0xc770, 0x00000f00, 0x00000800,
1595 	0xc774, 0x00000f00, 0x00000800,
1596 	0xc798, 0x00ffffff, 0x00ff7fbf,
1597 	0xc79c, 0x00ffffff, 0x00ff7faf,
1598 	0x8c00, 0x000000ff, 0x00000001,
1599 	0x214f8, 0x01ff01ff, 0x00000002,
1600 	0x21498, 0x007ff800, 0x00200000,
1601 	0x2015c, 0xffffffff, 0x00000f40,
1602 	0x88c4, 0x001f3ae3, 0x00000082,
1603 	0x88d4, 0x0000001f, 0x00000010,
1604 	0x30934, 0xffffffff, 0x00000000
1605 };
1606 
1607 
1608 static void cik_init_golden_registers(struct radeon_device *rdev)
1609 {
1610 	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1611 	mutex_lock(&rdev->grbm_idx_mutex);
1612 	switch (rdev->family) {
1613 	case CHIP_BONAIRE:
1614 		radeon_program_register_sequence(rdev,
1615 						 bonaire_mgcg_cgcg_init,
1616 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1617 		radeon_program_register_sequence(rdev,
1618 						 bonaire_golden_registers,
1619 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1620 		radeon_program_register_sequence(rdev,
1621 						 bonaire_golden_common_registers,
1622 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1623 		radeon_program_register_sequence(rdev,
1624 						 bonaire_golden_spm_registers,
1625 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1626 		break;
1627 	case CHIP_KABINI:
1628 		radeon_program_register_sequence(rdev,
1629 						 kalindi_mgcg_cgcg_init,
1630 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1631 		radeon_program_register_sequence(rdev,
1632 						 kalindi_golden_registers,
1633 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1634 		radeon_program_register_sequence(rdev,
1635 						 kalindi_golden_common_registers,
1636 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1637 		radeon_program_register_sequence(rdev,
1638 						 kalindi_golden_spm_registers,
1639 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1640 		break;
1641 	case CHIP_MULLINS:
1642 		radeon_program_register_sequence(rdev,
1643 						 kalindi_mgcg_cgcg_init,
1644 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1645 		radeon_program_register_sequence(rdev,
1646 						 godavari_golden_registers,
1647 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1648 		radeon_program_register_sequence(rdev,
1649 						 kalindi_golden_common_registers,
1650 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1651 		radeon_program_register_sequence(rdev,
1652 						 kalindi_golden_spm_registers,
1653 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1654 		break;
1655 	case CHIP_KAVERI:
1656 		radeon_program_register_sequence(rdev,
1657 						 spectre_mgcg_cgcg_init,
1658 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1659 		radeon_program_register_sequence(rdev,
1660 						 spectre_golden_registers,
1661 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1662 		radeon_program_register_sequence(rdev,
1663 						 spectre_golden_common_registers,
1664 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1665 		radeon_program_register_sequence(rdev,
1666 						 spectre_golden_spm_registers,
1667 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1668 		break;
1669 	case CHIP_HAWAII:
1670 		radeon_program_register_sequence(rdev,
1671 						 hawaii_mgcg_cgcg_init,
1672 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1673 		radeon_program_register_sequence(rdev,
1674 						 hawaii_golden_registers,
1675 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1676 		radeon_program_register_sequence(rdev,
1677 						 hawaii_golden_common_registers,
1678 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1679 		radeon_program_register_sequence(rdev,
1680 						 hawaii_golden_spm_registers,
1681 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1682 		break;
1683 	default:
1684 		break;
1685 	}
1686 	mutex_unlock(&rdev->grbm_idx_mutex);
1687 }
1688 
1689 /**
1690  * cik_get_xclk - get the xclk
1691  *
1692  * @rdev: radeon_device pointer
1693  *
1694  * Returns the reference clock used by the gfx engine
1695  * (CIK).
1696  */
1697 u32 cik_get_xclk(struct radeon_device *rdev)
1698 {
1699 	u32 reference_clock = rdev->clock.spll.reference_freq;
1700 
1701 	if (rdev->flags & RADEON_IS_IGP) {
1702 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1703 			return reference_clock / 2;
1704 	} else {
1705 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1706 			return reference_clock / 4;
1707 	}
1708 	return reference_clock;
1709 }
1710 
1711 /**
1712  * cik_mm_rdoorbell - read a doorbell dword
1713  *
1714  * @rdev: radeon_device pointer
1715  * @index: doorbell index
1716  *
1717  * Returns the value in the doorbell aperture at the
1718  * requested doorbell index (CIK).
1719  */
1720 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1721 {
1722 	if (index < rdev->doorbell.num_doorbells) {
1723 		return readl(rdev->doorbell.ptr + index);
1724 	} else {
1725 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1726 		return 0;
1727 	}
1728 }
1729 
1730 /**
1731  * cik_mm_wdoorbell - write a doorbell dword
1732  *
1733  * @rdev: radeon_device pointer
1734  * @index: doorbell index
1735  * @v: value to write
1736  *
1737  * Writes @v to the doorbell aperture at the
1738  * requested doorbell index (CIK).
1739  */
1740 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1741 {
1742 	if (index < rdev->doorbell.num_doorbells) {
1743 		writel(v, rdev->doorbell.ptr + index);
1744 	} else {
1745 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1746 	}
1747 }
1748 
1749 #define BONAIRE_IO_MC_REGS_SIZE 36
1750 
1751 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1752 {
1753 	{0x00000070, 0x04400000},
1754 	{0x00000071, 0x80c01803},
1755 	{0x00000072, 0x00004004},
1756 	{0x00000073, 0x00000100},
1757 	{0x00000074, 0x00ff0000},
1758 	{0x00000075, 0x34000000},
1759 	{0x00000076, 0x08000014},
1760 	{0x00000077, 0x00cc08ec},
1761 	{0x00000078, 0x00000400},
1762 	{0x00000079, 0x00000000},
1763 	{0x0000007a, 0x04090000},
1764 	{0x0000007c, 0x00000000},
1765 	{0x0000007e, 0x4408a8e8},
1766 	{0x0000007f, 0x00000304},
1767 	{0x00000080, 0x00000000},
1768 	{0x00000082, 0x00000001},
1769 	{0x00000083, 0x00000002},
1770 	{0x00000084, 0xf3e4f400},
1771 	{0x00000085, 0x052024e3},
1772 	{0x00000087, 0x00000000},
1773 	{0x00000088, 0x01000000},
1774 	{0x0000008a, 0x1c0a0000},
1775 	{0x0000008b, 0xff010000},
1776 	{0x0000008d, 0xffffefff},
1777 	{0x0000008e, 0xfff3efff},
1778 	{0x0000008f, 0xfff3efbf},
1779 	{0x00000092, 0xf7ffffff},
1780 	{0x00000093, 0xffffff7f},
1781 	{0x00000095, 0x00101101},
1782 	{0x00000096, 0x00000fff},
1783 	{0x00000097, 0x00116fff},
1784 	{0x00000098, 0x60010000},
1785 	{0x00000099, 0x10010000},
1786 	{0x0000009a, 0x00006000},
1787 	{0x0000009b, 0x00001000},
1788 	{0x0000009f, 0x00b48000}
1789 };
1790 
1791 #define HAWAII_IO_MC_REGS_SIZE 22
1792 
1793 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1794 {
1795 	{0x0000007d, 0x40000000},
1796 	{0x0000007e, 0x40180304},
1797 	{0x0000007f, 0x0000ff00},
1798 	{0x00000081, 0x00000000},
1799 	{0x00000083, 0x00000800},
1800 	{0x00000086, 0x00000000},
1801 	{0x00000087, 0x00000100},
1802 	{0x00000088, 0x00020100},
1803 	{0x00000089, 0x00000000},
1804 	{0x0000008b, 0x00040000},
1805 	{0x0000008c, 0x00000100},
1806 	{0x0000008e, 0xff010000},
1807 	{0x00000090, 0xffffefff},
1808 	{0x00000091, 0xfff3efff},
1809 	{0x00000092, 0xfff3efbf},
1810 	{0x00000093, 0xf7ffffff},
1811 	{0x00000094, 0xffffff7f},
1812 	{0x00000095, 0x00000fff},
1813 	{0x00000096, 0x00116fff},
1814 	{0x00000097, 0x60010000},
1815 	{0x00000098, 0x10010000},
1816 	{0x0000009f, 0x00c79000}
1817 };
1818 
1819 
1820 /**
1821  * cik_srbm_select - select specific register instances
1822  *
1823  * @rdev: radeon_device pointer
1824  * @me: selected ME (micro engine)
1825  * @pipe: pipe
1826  * @queue: queue
1827  * @vmid: VMID
1828  *
1829  * Switches the currently active registers instances.  Some
1830  * registers are instanced per VMID, others are instanced per
1831  * me/pipe/queue combination.
1832  */
1833 static void cik_srbm_select(struct radeon_device *rdev,
1834 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1835 {
1836 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1837 			     MEID(me & 0x3) |
1838 			     VMID(vmid & 0xf) |
1839 			     QUEUEID(queue & 0x7));
1840 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1841 }
1842 
1843 /* ucode loading */
1844 /**
1845  * ci_mc_load_microcode - load MC ucode into the hw
1846  *
1847  * @rdev: radeon_device pointer
1848  *
1849  * Load the GDDR MC ucode into the hw (CIK).
1850  * Returns 0 on success, error on failure.
1851  */
1852 int ci_mc_load_microcode(struct radeon_device *rdev)
1853 {
1854 	const __be32 *fw_data = NULL;
1855 	const __le32 *new_fw_data = NULL;
1856 	u32 running, tmp;
1857 	u32 *io_mc_regs = NULL;
1858 	const __le32 *new_io_mc_regs = NULL;
1859 	int i, regs_size, ucode_size;
1860 
1861 	if (!rdev->mc_fw)
1862 		return -EINVAL;
1863 
1864 	if (rdev->new_fw) {
1865 		const struct mc_firmware_header_v1_0 *hdr =
1866 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1867 
1868 		radeon_ucode_print_mc_hdr(&hdr->header);
1869 
1870 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1871 		new_io_mc_regs = (const __le32 *)
1872 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1873 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1874 		new_fw_data = (const __le32 *)
1875 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1876 	} else {
1877 		ucode_size = rdev->mc_fw->datasize / 4;
1878 
1879 		switch (rdev->family) {
1880 		case CHIP_BONAIRE:
1881 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1882 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1883 			break;
1884 		case CHIP_HAWAII:
1885 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1886 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1887 			break;
1888 		default:
1889 			return -EINVAL;
1890 		}
1891 		fw_data = (const __be32 *)rdev->mc_fw->data;
1892 	}
1893 
1894 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1895 
1896 	if (running == 0) {
1897 		/* reset the engine and set to writable */
1898 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1899 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1900 
1901 		/* load mc io regs */
1902 		for (i = 0; i < regs_size; i++) {
1903 			if (rdev->new_fw) {
1904 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1905 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1906 			} else {
1907 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1908 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1909 			}
1910 		}
1911 
1912 		tmp = RREG32(MC_SEQ_MISC0);
1913 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1914 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1915 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1916 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1917 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1918 		}
1919 
1920 		/* load the MC ucode */
1921 		for (i = 0; i < ucode_size; i++) {
1922 			if (rdev->new_fw)
1923 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1924 			else
1925 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1926 		}
1927 
1928 		/* put the engine back into the active state */
1929 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1930 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1931 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1932 
1933 		/* wait for training to complete */
1934 		for (i = 0; i < rdev->usec_timeout; i++) {
1935 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1936 				break;
1937 			udelay(1);
1938 		}
1939 		for (i = 0; i < rdev->usec_timeout; i++) {
1940 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1941 				break;
1942 			udelay(1);
1943 		}
1944 	}
1945 
1946 	return 0;
1947 }
1948 
1949 /**
1950  * cik_init_microcode - load ucode images from disk
1951  *
1952  * @rdev: radeon_device pointer
1953  *
1954  * Use the firmware interface to load the ucode images into
1955  * the driver (not loaded into hw).
1956  * Returns 0 on success, error on failure.
1957  */
1958 static int cik_init_microcode(struct radeon_device *rdev)
1959 {
1960 	const char *chip_name;
1961 	const char *new_chip_name;
1962 	size_t pfp_req_size, me_req_size, ce_req_size,
1963 		mec_req_size, rlc_req_size, mc_req_size = 0,
1964 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1965 	char fw_name[30];
1966 	int new_fw = 0;
1967 	int err;
1968 	int num_fw;
1969 	bool new_smc = false;
1970 
1971 	DRM_DEBUG("\n");
1972 
1973 	switch (rdev->family) {
1974 	case CHIP_BONAIRE:
1975 		chip_name = "BONAIRE";
1976 		if ((rdev->pdev->revision == 0x80) ||
1977 		    (rdev->pdev->revision == 0x81) ||
1978 		    (rdev->pdev->device == 0x665f))
1979 			new_smc = true;
1980 		new_chip_name = "bonaire";
1981 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1982 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1983 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1984 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1985 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1986 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1987 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1988 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1989 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1990 		num_fw = 8;
1991 		break;
1992 	case CHIP_HAWAII:
1993 		chip_name = "HAWAII";
1994 		if (rdev->pdev->revision == 0x80)
1995 			new_smc = true;
1996 		new_chip_name = "hawaii";
1997 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1998 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1999 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2000 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2001 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2002 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2003 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2004 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2005 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2006 		num_fw = 8;
2007 		break;
2008 	case CHIP_KAVERI:
2009 		chip_name = "KAVERI";
2010 		new_chip_name = "kaveri";
2011 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2012 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2013 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2014 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2015 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2016 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2017 		num_fw = 7;
2018 		break;
2019 	case CHIP_KABINI:
2020 		chip_name = "KABINI";
2021 		new_chip_name = "kabini";
2022 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2023 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2024 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2025 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2026 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2027 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2028 		num_fw = 6;
2029 		break;
2030 	case CHIP_MULLINS:
2031 		chip_name = "MULLINS";
2032 		new_chip_name = "mullins";
2033 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2034 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2035 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2036 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2037 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2038 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2039 		num_fw = 6;
2040 		break;
2041 	default: BUG();
2042 	}
2043 
2044 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2045 
2046 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", new_chip_name);
2047 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2048 	if (err) {
2049 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
2050 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2051 		if (err)
2052 			goto out;
2053 		if (rdev->pfp_fw->datasize != pfp_req_size) {
2054 			printk(KERN_ERR
2055 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2056 			       rdev->pfp_fw->datasize, fw_name);
2057 			err = -EINVAL;
2058 			goto out;
2059 		}
2060 	} else {
2061 		err = radeon_ucode_validate(rdev->pfp_fw);
2062 		if (err) {
2063 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2064 			       fw_name);
2065 			goto out;
2066 		} else {
2067 			new_fw++;
2068 		}
2069 	}
2070 
2071 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", new_chip_name);
2072 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2073 	if (err) {
2074 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
2075 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2076 		if (err)
2077 			goto out;
2078 		if (rdev->me_fw->datasize != me_req_size) {
2079 			printk(KERN_ERR
2080 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2081 			       rdev->me_fw->datasize, fw_name);
2082 			err = -EINVAL;
2083 		}
2084 	} else {
2085 		err = radeon_ucode_validate(rdev->me_fw);
2086 		if (err) {
2087 			printk(KERN_ERR
2088 			       "cik_fw: validation failed for firmware \"%s\"\n",
2089 			       fw_name);
2090 			goto out;
2091 		} else {
2092 			new_fw++;
2093 		}
2094 	}
2095 
2096 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", new_chip_name);
2097 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2098 	if (err) {
2099 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
2100 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2101 		if (err)
2102 			goto out;
2103 		if (rdev->ce_fw->datasize != ce_req_size) {
2104 			printk(KERN_ERR
2105 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2106 			       rdev->ce_fw->datasize, fw_name);
2107 			err = -EINVAL;
2108 		}
2109 	} else {
2110 		err = radeon_ucode_validate(rdev->ce_fw);
2111 		if (err) {
2112 			printk(KERN_ERR
2113 			       "cik_fw: validation failed for firmware \"%s\"\n",
2114 			       fw_name);
2115 			goto out;
2116 		} else {
2117 			new_fw++;
2118 		}
2119 	}
2120 
2121 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", new_chip_name);
2122 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2123 	if (err) {
2124 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", chip_name);
2125 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2126 		if (err)
2127 			goto out;
2128 		if (rdev->mec_fw->datasize != mec_req_size) {
2129 			printk(KERN_ERR
2130 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2131 			       rdev->mec_fw->datasize, fw_name);
2132 			err = -EINVAL;
2133 		}
2134 	} else {
2135 		err = radeon_ucode_validate(rdev->mec_fw);
2136 		if (err) {
2137 			printk(KERN_ERR
2138 			       "cik_fw: validation failed for firmware \"%s\"\n",
2139 			       fw_name);
2140 			goto out;
2141 		} else {
2142 			new_fw++;
2143 		}
2144 	}
2145 
2146 	if (rdev->family == CHIP_KAVERI) {
2147 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec2", new_chip_name);
2148 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2149 		if (err) {
2150 			goto out;
2151 		} else {
2152 			err = radeon_ucode_validate(rdev->mec2_fw);
2153 			if (err) {
2154 				goto out;
2155 			} else {
2156 				new_fw++;
2157 			}
2158 		}
2159 	}
2160 
2161 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", new_chip_name);
2162 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2163 	if (err) {
2164 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
2165 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2166 		if (err)
2167 			goto out;
2168 		if (rdev->rlc_fw->datasize != rlc_req_size) {
2169 			printk(KERN_ERR
2170 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2171 			       rdev->rlc_fw->datasize, fw_name);
2172 			err = -EINVAL;
2173 		}
2174 	} else {
2175 		err = radeon_ucode_validate(rdev->rlc_fw);
2176 		if (err) {
2177 			printk(KERN_ERR
2178 			       "cik_fw: validation failed for firmware \"%s\"\n",
2179 			       fw_name);
2180 			goto out;
2181 		} else {
2182 			new_fw++;
2183 		}
2184 	}
2185 
2186 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", new_chip_name);
2187 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2188 	if (err) {
2189 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", chip_name);
2190 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2191 		if (err)
2192 			goto out;
2193 		if (rdev->sdma_fw->datasize != sdma_req_size) {
2194 			printk(KERN_ERR
2195 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2196 			       rdev->sdma_fw->datasize, fw_name);
2197 			err = -EINVAL;
2198 		}
2199 	} else {
2200 		err = radeon_ucode_validate(rdev->sdma_fw);
2201 		if (err) {
2202 			printk(KERN_ERR
2203 			       "cik_fw: validation failed for firmware \"%s\"\n",
2204 			       fw_name);
2205 			goto out;
2206 		} else {
2207 			new_fw++;
2208 		}
2209 	}
2210 
2211 	/* No SMC, MC ucode on APUs */
2212 	if (!(rdev->flags & RADEON_IS_IGP)) {
2213 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", new_chip_name);
2214 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2215 		if (err) {
2216 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc2", chip_name);
2217 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2218 			if (err) {
2219 				ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
2220 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2221 				if (err)
2222 					goto out;
2223 			}
2224 			if ((rdev->mc_fw->datasize != mc_req_size) &&
2225 			    (rdev->mc_fw->datasize != mc2_req_size)){
2226 				printk(KERN_ERR
2227 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2228 				       rdev->mc_fw->datasize, fw_name);
2229 				err = -EINVAL;
2230 			}
2231 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->datasize);
2232 		} else {
2233 			err = radeon_ucode_validate(rdev->mc_fw);
2234 			if (err) {
2235 				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2236 				       fw_name);
2237 				goto out;
2238 			} else {
2239 				new_fw++;
2240 			}
2241 		}
2242 
2243 		if (new_smc)
2244 			ksnprintf(fw_name, sizeof(fw_name), "amdgpufw_%s_k_smc", new_chip_name);
2245 		else
2246 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", new_chip_name);
2247 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2248 		if (err) {
2249 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
2250 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2251 			if (err) {
2252 				printk(KERN_ERR
2253 				       "smc: error loading firmware \"%s\"\n",
2254 				       fw_name);
2255 				release_firmware(rdev->smc_fw);
2256 				rdev->smc_fw = NULL;
2257 				err = 0;
2258 			} else if (rdev->smc_fw->datasize != smc_req_size) {
2259 				printk(KERN_ERR
2260 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2261 				       rdev->smc_fw->datasize, fw_name);
2262 				err = -EINVAL;
2263 			}
2264 		} else {
2265 			err = radeon_ucode_validate(rdev->smc_fw);
2266 			if (err) {
2267 				printk(KERN_ERR
2268 				       "cik_fw: validation failed for firmware \"%s\"\n",
2269 				       fw_name);
2270 				goto out;
2271 			} else {
2272 				new_fw++;
2273 			}
2274 		}
2275 	}
2276 
2277 	if (new_fw == 0) {
2278 		rdev->new_fw = false;
2279 	} else if (new_fw < num_fw) {
2280 		pr_err("ci_fw: mixing new and old firmware!\n");
2281 		err = -EINVAL;
2282 	} else {
2283 		rdev->new_fw = true;
2284 	}
2285 
2286 out:
2287 	if (err) {
2288 		if (err != -EINVAL)
2289 			pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2290 			       fw_name);
2291 		release_firmware(rdev->pfp_fw);
2292 		rdev->pfp_fw = NULL;
2293 		release_firmware(rdev->me_fw);
2294 		rdev->me_fw = NULL;
2295 		release_firmware(rdev->ce_fw);
2296 		rdev->ce_fw = NULL;
2297 		release_firmware(rdev->mec_fw);
2298 		rdev->mec_fw = NULL;
2299 		release_firmware(rdev->mec2_fw);
2300 		rdev->mec2_fw = NULL;
2301 		release_firmware(rdev->rlc_fw);
2302 		rdev->rlc_fw = NULL;
2303 		release_firmware(rdev->sdma_fw);
2304 		rdev->sdma_fw = NULL;
2305 		release_firmware(rdev->mc_fw);
2306 		rdev->mc_fw = NULL;
2307 		release_firmware(rdev->smc_fw);
2308 		rdev->smc_fw = NULL;
2309 	}
2310 	return err;
2311 }
2312 
2313 /**
2314  * cik_fini_microcode - drop the firmwares image references
2315  *
2316  * @rdev: radeon_device pointer
2317  *
2318  * Drop the pfp, me, mec, mec2, rlc, sdma, mc, smc and ce firmware image references.
2319  * Called at driver shutdown.
2320  */
2321 static void cik_fini_microcode(struct radeon_device *rdev)
2322 {
2323 	release_firmware(rdev->pfp_fw);
2324 	rdev->pfp_fw = NULL;
2325 	release_firmware(rdev->me_fw);
2326 	rdev->me_fw = NULL;
2327 	release_firmware(rdev->ce_fw);
2328 	rdev->ce_fw = NULL;
2329 	release_firmware(rdev->mec_fw);
2330 	rdev->mec_fw = NULL;
2331 	release_firmware(rdev->mec2_fw);
2332 	rdev->mec2_fw = NULL;
2333 	release_firmware(rdev->rlc_fw);
2334 	rdev->rlc_fw = NULL;
2335 	release_firmware(rdev->sdma_fw);
2336 	rdev->sdma_fw = NULL;
2337 	release_firmware(rdev->mc_fw);
2338 	rdev->mc_fw = NULL;
2339 	release_firmware(rdev->smc_fw);
2340 	rdev->smc_fw = NULL;
2341 }
2342 
2343 /*
2344  * Core functions
2345  */
2346 /**
2347  * cik_tiling_mode_table_init - init the hw tiling table
2348  *
2349  * @rdev: radeon_device pointer
2350  *
2351  * Starting with SI, the tiling setup is done globally in a
2352  * set of 32 tiling modes.  Rather than selecting each set of
2353  * parameters per surface as on older asics, we just select
2354  * which index in the tiling table we want to use, and the
2355  * surface uses those parameters (CIK).
2356  */
2357 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2358 {
2359 	u32 *tile = rdev->config.cik.tile_mode_array;
2360 	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2361 	const u32 num_tile_mode_states =
2362 			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2363 	const u32 num_secondary_tile_mode_states =
2364 			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2365 	u32 reg_offset, split_equal_to_row_size;
2366 	u32 num_pipe_configs;
2367 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2368 		rdev->config.cik.max_shader_engines;
2369 
2370 	switch (rdev->config.cik.mem_row_size_in_kb) {
2371 	case 1:
2372 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2373 		break;
2374 	case 2:
2375 	default:
2376 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2377 		break;
2378 	case 4:
2379 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2380 		break;
2381 	}
2382 
2383 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2384 	if (num_pipe_configs > 8)
2385 		num_pipe_configs = 16;
2386 
2387 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2388 		tile[reg_offset] = 0;
2389 	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2390 		macrotile[reg_offset] = 0;
2391 
2392 	switch(num_pipe_configs) {
2393 	case 16:
2394 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2395 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2396 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2397 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2398 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2399 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2400 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2402 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2403 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2404 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2406 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2408 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2410 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2411 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2412 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2413 			   TILE_SPLIT(split_equal_to_row_size));
2414 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2415 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2416 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2417 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2418 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2419 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2421 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2422 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2423 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424 			   TILE_SPLIT(split_equal_to_row_size));
2425 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2426 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2427 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2428 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2429 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2430 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2431 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2432 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2435 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2436 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2437 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2439 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2440 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2441 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2442 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2443 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2444 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2445 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2446 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2447 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2448 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2449 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2450 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2451 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2452 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2453 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2454 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2455 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2457 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2458 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2459 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2460 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2461 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2462 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2463 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2464 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2465 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2466 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2467 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2468 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2469 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2470 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2471 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2472 
2473 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2475 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2476 			   NUM_BANKS(ADDR_SURF_16_BANK));
2477 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2479 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480 			   NUM_BANKS(ADDR_SURF_16_BANK));
2481 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2484 			   NUM_BANKS(ADDR_SURF_16_BANK));
2485 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488 			   NUM_BANKS(ADDR_SURF_16_BANK));
2489 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2492 			   NUM_BANKS(ADDR_SURF_8_BANK));
2493 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2496 			   NUM_BANKS(ADDR_SURF_4_BANK));
2497 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2500 			   NUM_BANKS(ADDR_SURF_2_BANK));
2501 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2503 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2504 			   NUM_BANKS(ADDR_SURF_16_BANK));
2505 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2506 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2507 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2508 			   NUM_BANKS(ADDR_SURF_16_BANK));
2509 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2511 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2512 			    NUM_BANKS(ADDR_SURF_16_BANK));
2513 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2514 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2515 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2516 			    NUM_BANKS(ADDR_SURF_8_BANK));
2517 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2518 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2519 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2520 			    NUM_BANKS(ADDR_SURF_4_BANK));
2521 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2523 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2524 			    NUM_BANKS(ADDR_SURF_2_BANK));
2525 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2526 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2527 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2528 			    NUM_BANKS(ADDR_SURF_2_BANK));
2529 
2530 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2531 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2532 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2533 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2534 		break;
2535 
2536 	case 8:
2537 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2538 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2539 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2540 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2541 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2542 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2543 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2544 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2545 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2547 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2549 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2551 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2553 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2554 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2555 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556 			   TILE_SPLIT(split_equal_to_row_size));
2557 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2558 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2560 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2561 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2562 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2564 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2565 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2566 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567 			   TILE_SPLIT(split_equal_to_row_size));
2568 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2569 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2570 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2571 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2572 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2573 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2574 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2575 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2577 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2578 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2579 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2580 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2581 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2582 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2583 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2585 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2586 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2588 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2589 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2590 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2592 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2593 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2594 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2595 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2596 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2597 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2598 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2599 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2600 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2601 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2602 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2603 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2605 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2606 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2607 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2608 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2609 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2610 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2611 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2612 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2613 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2614 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2615 
2616 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2618 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2619 				NUM_BANKS(ADDR_SURF_16_BANK));
2620 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2622 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2623 				NUM_BANKS(ADDR_SURF_16_BANK));
2624 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2626 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2627 				NUM_BANKS(ADDR_SURF_16_BANK));
2628 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2630 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2631 				NUM_BANKS(ADDR_SURF_16_BANK));
2632 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2633 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2634 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2635 				NUM_BANKS(ADDR_SURF_8_BANK));
2636 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2638 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2639 				NUM_BANKS(ADDR_SURF_4_BANK));
2640 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2642 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2643 				NUM_BANKS(ADDR_SURF_2_BANK));
2644 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2645 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2646 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2647 				NUM_BANKS(ADDR_SURF_16_BANK));
2648 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2649 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2650 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2651 				NUM_BANKS(ADDR_SURF_16_BANK));
2652 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2653 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2654 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2655 				NUM_BANKS(ADDR_SURF_16_BANK));
2656 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2657 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2658 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2659 				NUM_BANKS(ADDR_SURF_16_BANK));
2660 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2662 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2663 				NUM_BANKS(ADDR_SURF_8_BANK));
2664 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2665 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2666 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2667 				NUM_BANKS(ADDR_SURF_4_BANK));
2668 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2669 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2670 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2671 				NUM_BANKS(ADDR_SURF_2_BANK));
2672 
2673 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2674 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2675 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2676 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2677 		break;
2678 
2679 	case 4:
2680 		if (num_rbs == 4) {
2681 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2682 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2683 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2684 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2685 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2687 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2688 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2689 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2690 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2691 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2693 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2695 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2697 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2698 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2699 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700 			   TILE_SPLIT(split_equal_to_row_size));
2701 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2702 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2704 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2705 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2706 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2708 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2709 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2710 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711 			   TILE_SPLIT(split_equal_to_row_size));
2712 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2713 			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
2714 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2715 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2717 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2718 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2719 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2721 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2722 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2723 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2724 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2725 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2726 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2727 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2729 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2730 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2731 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2732 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2733 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2734 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2735 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2736 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2737 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2738 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2739 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2740 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2741 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2742 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2743 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2744 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2745 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2746 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2747 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2748 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2749 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2750 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2751 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2752 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2753 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2754 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2755 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2756 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2757 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2758 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2759 
2760 		} else if (num_rbs < 4) {
2761 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2762 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2763 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2764 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2765 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2766 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2767 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2768 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2769 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2770 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2771 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2772 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2773 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2774 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2775 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2776 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2777 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2778 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2779 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2780 			   TILE_SPLIT(split_equal_to_row_size));
2781 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2782 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2783 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2784 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2785 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2786 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2788 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2789 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2790 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791 			   TILE_SPLIT(split_equal_to_row_size));
2792 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2793 			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
2794 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2795 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2796 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2797 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2798 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2799 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2800 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2801 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2802 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2803 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2804 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2805 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2806 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2807 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2808 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2809 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2810 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2811 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2812 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2813 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2814 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2815 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2816 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2817 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2818 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2819 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2820 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2821 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2822 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2823 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2824 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2825 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2826 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2827 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2828 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2829 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2830 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2831 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2832 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2833 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2834 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2835 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2836 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2837 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2838 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2839 		}
2840 
2841 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2843 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2844 				NUM_BANKS(ADDR_SURF_16_BANK));
2845 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2846 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2847 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2848 				NUM_BANKS(ADDR_SURF_16_BANK));
2849 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2851 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2852 				NUM_BANKS(ADDR_SURF_16_BANK));
2853 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2855 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2856 				NUM_BANKS(ADDR_SURF_16_BANK));
2857 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2859 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2860 				NUM_BANKS(ADDR_SURF_16_BANK));
2861 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2863 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2864 				NUM_BANKS(ADDR_SURF_8_BANK));
2865 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2866 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2867 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2868 				NUM_BANKS(ADDR_SURF_4_BANK));
2869 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2870 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2871 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2872 				NUM_BANKS(ADDR_SURF_16_BANK));
2873 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2874 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2875 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2876 				NUM_BANKS(ADDR_SURF_16_BANK));
2877 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2878 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2879 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2880 				NUM_BANKS(ADDR_SURF_16_BANK));
2881 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2882 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2883 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2884 				NUM_BANKS(ADDR_SURF_16_BANK));
2885 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2886 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2887 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2888 				NUM_BANKS(ADDR_SURF_16_BANK));
2889 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2890 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2891 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2892 				NUM_BANKS(ADDR_SURF_8_BANK));
2893 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2894 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2895 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2896 				NUM_BANKS(ADDR_SURF_4_BANK));
2897 
2898 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2899 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2900 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2901 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2902 		break;
2903 
2904 	case 2:
2905 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2906 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2907 			   PIPE_CONFIG(ADDR_SURF_P2) |
2908 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2909 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2911 			   PIPE_CONFIG(ADDR_SURF_P2) |
2912 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2913 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2914 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2915 			   PIPE_CONFIG(ADDR_SURF_P2) |
2916 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2917 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2919 			   PIPE_CONFIG(ADDR_SURF_P2) |
2920 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2921 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2922 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2923 			   PIPE_CONFIG(ADDR_SURF_P2) |
2924 			   TILE_SPLIT(split_equal_to_row_size));
2925 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2926 			   PIPE_CONFIG(ADDR_SURF_P2) |
2927 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2928 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2929 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2930 			   PIPE_CONFIG(ADDR_SURF_P2) |
2931 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2932 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2933 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2934 			   PIPE_CONFIG(ADDR_SURF_P2) |
2935 			   TILE_SPLIT(split_equal_to_row_size));
2936 		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2937 			   PIPE_CONFIG(ADDR_SURF_P2);
2938 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2939 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2940 			   PIPE_CONFIG(ADDR_SURF_P2));
2941 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2942 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2943 			    PIPE_CONFIG(ADDR_SURF_P2) |
2944 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2945 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2946 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2947 			    PIPE_CONFIG(ADDR_SURF_P2) |
2948 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2949 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2950 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2951 			    PIPE_CONFIG(ADDR_SURF_P2) |
2952 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2953 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2954 			    PIPE_CONFIG(ADDR_SURF_P2) |
2955 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2956 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2957 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2958 			    PIPE_CONFIG(ADDR_SURF_P2) |
2959 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2960 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2961 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2962 			    PIPE_CONFIG(ADDR_SURF_P2) |
2963 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2964 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2965 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2966 			    PIPE_CONFIG(ADDR_SURF_P2) |
2967 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2968 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2969 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2970 			    PIPE_CONFIG(ADDR_SURF_P2));
2971 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2972 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2973 			    PIPE_CONFIG(ADDR_SURF_P2) |
2974 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2975 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2976 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2977 			    PIPE_CONFIG(ADDR_SURF_P2) |
2978 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2979 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2980 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2981 			    PIPE_CONFIG(ADDR_SURF_P2) |
2982 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2983 
2984 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2985 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2986 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2987 				NUM_BANKS(ADDR_SURF_16_BANK));
2988 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2989 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2990 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2991 				NUM_BANKS(ADDR_SURF_16_BANK));
2992 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2993 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2994 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2995 				NUM_BANKS(ADDR_SURF_16_BANK));
2996 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2997 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2998 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2999 				NUM_BANKS(ADDR_SURF_16_BANK));
3000 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3001 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3002 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003 				NUM_BANKS(ADDR_SURF_16_BANK));
3004 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3006 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007 				NUM_BANKS(ADDR_SURF_16_BANK));
3008 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3010 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3011 				NUM_BANKS(ADDR_SURF_8_BANK));
3012 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3013 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3014 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3015 				NUM_BANKS(ADDR_SURF_16_BANK));
3016 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3017 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3018 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3019 				NUM_BANKS(ADDR_SURF_16_BANK));
3020 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3021 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3022 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3023 				NUM_BANKS(ADDR_SURF_16_BANK));
3024 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3025 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3026 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3027 				NUM_BANKS(ADDR_SURF_16_BANK));
3028 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3029 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3030 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3031 				NUM_BANKS(ADDR_SURF_16_BANK));
3032 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3033 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3034 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3035 				NUM_BANKS(ADDR_SURF_16_BANK));
3036 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3037 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3038 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3039 				NUM_BANKS(ADDR_SURF_8_BANK));
3040 
3041 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3042 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3043 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3044 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3045 		break;
3046 
3047 	default:
3048 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3049 	}
3050 }
3051 
3052 /**
3053  * cik_select_se_sh - select which SE, SH to address
3054  *
3055  * @rdev: radeon_device pointer
3056  * @se_num: shader engine to address
3057  * @sh_num: sh block to address
3058  *
3059  * Select which SE, SH combinations to address. Certain
3060  * registers are instanced per SE or SH.  0xffffffff means
3061  * broadcast to all SEs or SHs (CIK).
3062  */
3063 static void cik_select_se_sh(struct radeon_device *rdev,
3064 			     u32 se_num, u32 sh_num)
3065 {
3066 	u32 data = INSTANCE_BROADCAST_WRITES;
3067 
3068 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3069 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3070 	else if (se_num == 0xffffffff)
3071 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3072 	else if (sh_num == 0xffffffff)
3073 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3074 	else
3075 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3076 	WREG32(GRBM_GFX_INDEX, data);
3077 }
3078 
3079 /**
3080  * cik_create_bitmask - create a bitmask
3081  *
3082  * @bit_width: length of the mask
3083  *
3084  * create a variable length bit mask (CIK).
3085  * Returns the bitmask.
3086  */
3087 static u32 cik_create_bitmask(u32 bit_width)
3088 {
3089 	u32 i, mask = 0;
3090 
3091 	for (i = 0; i < bit_width; i++) {
3092 		mask <<= 1;
3093 		mask |= 1;
3094 	}
3095 	return mask;
3096 }
3097 
3098 /**
3099  * cik_get_rb_disabled - computes the mask of disabled RBs
3100  *
3101  * @rdev: radeon_device pointer
3102  * @max_rb_num: max RBs (render backends) for the asic
3103  * @se_num: number of SEs (shader engines) for the asic
3104  * @sh_per_se: number of SH blocks per SE for the asic
3105  *
3106  * Calculates the bitmask of disabled RBs (CIK).
3107  * Returns the disabled RB bitmask.
3108  */
3109 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3110 			      u32 max_rb_num_per_se,
3111 			      u32 sh_per_se)
3112 {
3113 	u32 data, mask;
3114 
3115 	data = RREG32(CC_RB_BACKEND_DISABLE);
3116 	if (data & 1)
3117 		data &= BACKEND_DISABLE_MASK;
3118 	else
3119 		data = 0;
3120 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3121 
3122 	data >>= BACKEND_DISABLE_SHIFT;
3123 
3124 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3125 
3126 	return data & mask;
3127 }
3128 
3129 /**
3130  * cik_setup_rb - setup the RBs on the asic
3131  *
3132  * @rdev: radeon_device pointer
3133  * @se_num: number of SEs (shader engines) for the asic
3134  * @sh_per_se: number of SH blocks per SE for the asic
3135  * @max_rb_num: max RBs (render backends) for the asic
3136  *
3137  * Configures per-SE/SH RB registers (CIK).
3138  */
3139 static void cik_setup_rb(struct radeon_device *rdev,
3140 			 u32 se_num, u32 sh_per_se,
3141 			 u32 max_rb_num_per_se)
3142 {
3143 	int i, j;
3144 	u32 data, mask;
3145 	u32 disabled_rbs = 0;
3146 	u32 enabled_rbs = 0;
3147 
3148 	mutex_lock(&rdev->grbm_idx_mutex);
3149 	for (i = 0; i < se_num; i++) {
3150 		for (j = 0; j < sh_per_se; j++) {
3151 			cik_select_se_sh(rdev, i, j);
3152 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3153 			if (rdev->family == CHIP_HAWAII)
3154 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3155 			else
3156 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3157 		}
3158 	}
3159 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3160 	mutex_unlock(&rdev->grbm_idx_mutex);
3161 
3162 	mask = 1;
3163 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3164 		if (!(disabled_rbs & mask))
3165 			enabled_rbs |= mask;
3166 		mask <<= 1;
3167 	}
3168 
3169 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3170 
3171 	mutex_lock(&rdev->grbm_idx_mutex);
3172 	for (i = 0; i < se_num; i++) {
3173 		cik_select_se_sh(rdev, i, 0xffffffff);
3174 		data = 0;
3175 		for (j = 0; j < sh_per_se; j++) {
3176 			switch (enabled_rbs & 3) {
3177 			case 0:
3178 				if (j == 0)
3179 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3180 				else
3181 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3182 				break;
3183 			case 1:
3184 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3185 				break;
3186 			case 2:
3187 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3188 				break;
3189 			case 3:
3190 			default:
3191 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3192 				break;
3193 			}
3194 			enabled_rbs >>= 2;
3195 		}
3196 		WREG32(PA_SC_RASTER_CONFIG, data);
3197 	}
3198 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3199 	mutex_unlock(&rdev->grbm_idx_mutex);
3200 }
3201 
3202 /**
3203  * cik_gpu_init - setup the 3D engine
3204  *
3205  * @rdev: radeon_device pointer
3206  *
3207  * Configures the 3D engine and tiling configuration
3208  * registers so that the 3D engine is usable.
3209  */
3210 static void cik_gpu_init(struct radeon_device *rdev)
3211 {
3212 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3213 	u32 mc_shared_chmap, mc_arb_ramcfg;
3214 	u32 hdp_host_path_cntl;
3215 	u32 tmp;
3216 	int i, j;
3217 
3218 	switch (rdev->family) {
3219 	case CHIP_BONAIRE:
3220 		rdev->config.cik.max_shader_engines = 2;
3221 		rdev->config.cik.max_tile_pipes = 4;
3222 		rdev->config.cik.max_cu_per_sh = 7;
3223 		rdev->config.cik.max_sh_per_se = 1;
3224 		rdev->config.cik.max_backends_per_se = 2;
3225 		rdev->config.cik.max_texture_channel_caches = 4;
3226 		rdev->config.cik.max_gprs = 256;
3227 		rdev->config.cik.max_gs_threads = 32;
3228 		rdev->config.cik.max_hw_contexts = 8;
3229 
3230 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3231 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3232 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3233 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3234 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3235 		break;
3236 	case CHIP_HAWAII:
3237 		rdev->config.cik.max_shader_engines = 4;
3238 		rdev->config.cik.max_tile_pipes = 16;
3239 		rdev->config.cik.max_cu_per_sh = 11;
3240 		rdev->config.cik.max_sh_per_se = 1;
3241 		rdev->config.cik.max_backends_per_se = 4;
3242 		rdev->config.cik.max_texture_channel_caches = 16;
3243 		rdev->config.cik.max_gprs = 256;
3244 		rdev->config.cik.max_gs_threads = 32;
3245 		rdev->config.cik.max_hw_contexts = 8;
3246 
3247 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3248 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3249 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3250 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3251 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3252 		break;
3253 	case CHIP_KAVERI:
3254 		rdev->config.cik.max_shader_engines = 1;
3255 		rdev->config.cik.max_tile_pipes = 4;
3256 		if ((rdev->pdev->device == 0x1304) ||
3257 		    (rdev->pdev->device == 0x1305) ||
3258 		    (rdev->pdev->device == 0x130C) ||
3259 		    (rdev->pdev->device == 0x130F) ||
3260 		    (rdev->pdev->device == 0x1310) ||
3261 		    (rdev->pdev->device == 0x1311) ||
3262 		    (rdev->pdev->device == 0x131C)) {
3263 			rdev->config.cik.max_cu_per_sh = 8;
3264 			rdev->config.cik.max_backends_per_se = 2;
3265 		} else if ((rdev->pdev->device == 0x1309) ||
3266 			   (rdev->pdev->device == 0x130A) ||
3267 			   (rdev->pdev->device == 0x130D) ||
3268 			   (rdev->pdev->device == 0x1313) ||
3269 			   (rdev->pdev->device == 0x131D)) {
3270 			rdev->config.cik.max_cu_per_sh = 6;
3271 			rdev->config.cik.max_backends_per_se = 2;
3272 		} else if ((rdev->pdev->device == 0x1306) ||
3273 			   (rdev->pdev->device == 0x1307) ||
3274 			   (rdev->pdev->device == 0x130B) ||
3275 			   (rdev->pdev->device == 0x130E) ||
3276 			   (rdev->pdev->device == 0x1315) ||
3277 			   (rdev->pdev->device == 0x1318) ||
3278 			   (rdev->pdev->device == 0x131B)) {
3279 			rdev->config.cik.max_cu_per_sh = 4;
3280 			rdev->config.cik.max_backends_per_se = 1;
3281 		} else {
3282 			rdev->config.cik.max_cu_per_sh = 3;
3283 			rdev->config.cik.max_backends_per_se = 1;
3284 		}
3285 		rdev->config.cik.max_sh_per_se = 1;
3286 		rdev->config.cik.max_texture_channel_caches = 4;
3287 		rdev->config.cik.max_gprs = 256;
3288 		rdev->config.cik.max_gs_threads = 16;
3289 		rdev->config.cik.max_hw_contexts = 8;
3290 
3291 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3292 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3293 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3294 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3295 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3296 		break;
3297 	case CHIP_KABINI:
3298 	case CHIP_MULLINS:
3299 	default:
3300 		rdev->config.cik.max_shader_engines = 1;
3301 		rdev->config.cik.max_tile_pipes = 2;
3302 		rdev->config.cik.max_cu_per_sh = 2;
3303 		rdev->config.cik.max_sh_per_se = 1;
3304 		rdev->config.cik.max_backends_per_se = 1;
3305 		rdev->config.cik.max_texture_channel_caches = 2;
3306 		rdev->config.cik.max_gprs = 256;
3307 		rdev->config.cik.max_gs_threads = 16;
3308 		rdev->config.cik.max_hw_contexts = 8;
3309 
3310 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3311 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3312 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3313 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3314 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3315 		break;
3316 	}
3317 
3318 	/* Initialize HDP */
3319 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3320 		WREG32((0x2c14 + j), 0x00000000);
3321 		WREG32((0x2c18 + j), 0x00000000);
3322 		WREG32((0x2c1c + j), 0x00000000);
3323 		WREG32((0x2c20 + j), 0x00000000);
3324 		WREG32((0x2c24 + j), 0x00000000);
3325 	}
3326 
3327 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3328 	WREG32(SRBM_INT_CNTL, 0x1);
3329 	WREG32(SRBM_INT_ACK, 0x1);
3330 
3331 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3332 
3333 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3334 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3335 
3336 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3337 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3338 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3339 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3340 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3341 		rdev->config.cik.mem_row_size_in_kb = 4;
3342 	/* XXX use MC settings? */
3343 	rdev->config.cik.shader_engine_tile_size = 32;
3344 	rdev->config.cik.num_gpus = 1;
3345 	rdev->config.cik.multi_gpu_tile_size = 64;
3346 
3347 	/* fix up row size */
3348 	gb_addr_config &= ~ROW_SIZE_MASK;
3349 	switch (rdev->config.cik.mem_row_size_in_kb) {
3350 	case 1:
3351 	default:
3352 		gb_addr_config |= ROW_SIZE(0);
3353 		break;
3354 	case 2:
3355 		gb_addr_config |= ROW_SIZE(1);
3356 		break;
3357 	case 4:
3358 		gb_addr_config |= ROW_SIZE(2);
3359 		break;
3360 	}
3361 
3362 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3363 	 * not have bank info, so create a custom tiling dword.
3364 	 * bits 3:0   num_pipes
3365 	 * bits 7:4   num_banks
3366 	 * bits 11:8  group_size
3367 	 * bits 15:12 row_size
3368 	 */
3369 	rdev->config.cik.tile_config = 0;
3370 	switch (rdev->config.cik.num_tile_pipes) {
3371 	case 1:
3372 		rdev->config.cik.tile_config |= (0 << 0);
3373 		break;
3374 	case 2:
3375 		rdev->config.cik.tile_config |= (1 << 0);
3376 		break;
3377 	case 4:
3378 		rdev->config.cik.tile_config |= (2 << 0);
3379 		break;
3380 	case 8:
3381 	default:
3382 		/* XXX what about 12? */
3383 		rdev->config.cik.tile_config |= (3 << 0);
3384 		break;
3385 	}
3386 	rdev->config.cik.tile_config |=
3387 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3388 	rdev->config.cik.tile_config |=
3389 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3390 	rdev->config.cik.tile_config |=
3391 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3392 
3393 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3394 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3395 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3396 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3397 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3398 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3399 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3400 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3401 
3402 	cik_tiling_mode_table_init(rdev);
3403 
3404 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3405 		     rdev->config.cik.max_sh_per_se,
3406 		     rdev->config.cik.max_backends_per_se);
3407 
3408 	rdev->config.cik.active_cus = 0;
3409 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3410 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3411 			rdev->config.cik.active_cus +=
3412 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3413 		}
3414 	}
3415 
3416 	/* set HW defaults for 3D engine */
3417 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3418 
3419 	mutex_lock(&rdev->grbm_idx_mutex);
3420 	/*
3421 	 * making sure that the following register writes will be broadcasted
3422 	 * to all the shaders
3423 	 */
3424 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3425 	WREG32(SX_DEBUG_1, 0x20);
3426 
3427 	WREG32(TA_CNTL_AUX, 0x00010000);
3428 
3429 	tmp = RREG32(SPI_CONFIG_CNTL);
3430 	tmp |= 0x03000000;
3431 	WREG32(SPI_CONFIG_CNTL, tmp);
3432 
3433 	WREG32(SQ_CONFIG, 1);
3434 
3435 	WREG32(DB_DEBUG, 0);
3436 
3437 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3438 	tmp |= 0x00000400;
3439 	WREG32(DB_DEBUG2, tmp);
3440 
3441 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3442 	tmp |= 0x00020200;
3443 	WREG32(DB_DEBUG3, tmp);
3444 
3445 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3446 	tmp |= 0x00018208;
3447 	WREG32(CB_HW_CONTROL, tmp);
3448 
3449 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3450 
3451 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3452 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3453 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3454 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3455 
3456 	WREG32(VGT_NUM_INSTANCES, 1);
3457 
3458 	WREG32(CP_PERFMON_CNTL, 0);
3459 
3460 	WREG32(SQ_CONFIG, 0);
3461 
3462 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3463 					  FORCE_EOV_MAX_REZ_CNT(255)));
3464 
3465 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3466 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3467 
3468 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3469 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3470 
3471 	tmp = RREG32(HDP_MISC_CNTL);
3472 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3473 	WREG32(HDP_MISC_CNTL, tmp);
3474 
3475 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3476 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3477 
3478 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3479 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3480 	mutex_unlock(&rdev->grbm_idx_mutex);
3481 
3482 	udelay(50);
3483 }
3484 
3485 /*
3486  * GPU scratch registers helpers function.
3487  */
3488 /**
3489  * cik_scratch_init - setup driver info for CP scratch regs
3490  *
3491  * @rdev: radeon_device pointer
3492  *
3493  * Set up the number and offset of the CP scratch registers.
3494  * NOTE: use of CP scratch registers is a legacy inferface and
3495  * is not used by default on newer asics (r6xx+).  On newer asics,
3496  * memory buffers are used for fences rather than scratch regs.
3497  */
3498 static void cik_scratch_init(struct radeon_device *rdev)
3499 {
3500 	int i;
3501 
3502 	rdev->scratch.num_reg = 7;
3503 	rdev->scratch.reg_base = SCRATCH_REG0;
3504 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3505 		rdev->scratch.free[i] = true;
3506 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3507 	}
3508 }
3509 
3510 /**
3511  * cik_ring_test - basic gfx ring test
3512  *
3513  * @rdev: radeon_device pointer
3514  * @ring: radeon_ring structure holding ring information
3515  *
3516  * Allocate a scratch register and write to it using the gfx ring (CIK).
3517  * Provides a basic gfx ring test to verify that the ring is working.
3518  * Used by cik_cp_gfx_resume();
3519  * Returns 0 on success, error on failure.
3520  */
3521 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3522 {
3523 	uint32_t scratch;
3524 	uint32_t tmp = 0;
3525 	unsigned i;
3526 	int r;
3527 
3528 	r = radeon_scratch_get(rdev, &scratch);
3529 	if (r) {
3530 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3531 		return r;
3532 	}
3533 	WREG32(scratch, 0xCAFEDEAD);
3534 	r = radeon_ring_lock(rdev, ring, 3);
3535 	if (r) {
3536 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3537 		radeon_scratch_free(rdev, scratch);
3538 		return r;
3539 	}
3540 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3541 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3542 	radeon_ring_write(ring, 0xDEADBEEF);
3543 	radeon_ring_unlock_commit(rdev, ring, false);
3544 
3545 	for (i = 0; i < rdev->usec_timeout; i++) {
3546 		tmp = RREG32(scratch);
3547 		if (tmp == 0xDEADBEEF)
3548 			break;
3549 		DRM_UDELAY(1);
3550 	}
3551 	if (i < rdev->usec_timeout) {
3552 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3553 	} else {
3554 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3555 			  ring->idx, scratch, tmp);
3556 		r = -EINVAL;
3557 	}
3558 	radeon_scratch_free(rdev, scratch);
3559 	return r;
3560 }
3561 
3562 /**
3563  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3564  *
3565  * @rdev: radeon_device pointer
3566  * @ridx: radeon ring index
3567  *
3568  * Emits an hdp flush on the cp.
3569  */
3570 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3571 				       int ridx)
3572 {
3573 	struct radeon_ring *ring = &rdev->ring[ridx];
3574 	u32 ref_and_mask;
3575 
3576 	switch (ring->idx) {
3577 	case CAYMAN_RING_TYPE_CP1_INDEX:
3578 	case CAYMAN_RING_TYPE_CP2_INDEX:
3579 	default:
3580 		switch (ring->me) {
3581 		case 0:
3582 			ref_and_mask = CP2 << ring->pipe;
3583 			break;
3584 		case 1:
3585 			ref_and_mask = CP6 << ring->pipe;
3586 			break;
3587 		default:
3588 			return;
3589 		}
3590 		break;
3591 	case RADEON_RING_TYPE_GFX_INDEX:
3592 		ref_and_mask = CP0;
3593 		break;
3594 	}
3595 
3596 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3597 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3598 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3599 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3600 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3601 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3602 	radeon_ring_write(ring, ref_and_mask);
3603 	radeon_ring_write(ring, ref_and_mask);
3604 	radeon_ring_write(ring, 0x20); /* poll interval */
3605 }
3606 
3607 /**
3608  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3609  *
3610  * @rdev: radeon_device pointer
3611  * @fence: radeon fence object
3612  *
3613  * Emits a fence sequnce number on the gfx ring and flushes
3614  * GPU caches.
3615  */
3616 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3617 			     struct radeon_fence *fence)
3618 {
3619 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3620 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3621 
3622 	/* Workaround for cache flush problems. First send a dummy EOP
3623 	 * event down the pipe with seq one below.
3624 	 */
3625 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3626 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3627 				 EOP_TC_ACTION_EN |
3628 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3629 				 EVENT_INDEX(5)));
3630 	radeon_ring_write(ring, addr & 0xfffffffc);
3631 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3632 				DATA_SEL(1) | INT_SEL(0));
3633 	radeon_ring_write(ring, fence->seq - 1);
3634 	radeon_ring_write(ring, 0);
3635 
3636 	/* Then send the real EOP event down the pipe. */
3637 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3638 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3639 				 EOP_TC_ACTION_EN |
3640 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3641 				 EVENT_INDEX(5)));
3642 	radeon_ring_write(ring, addr & 0xfffffffc);
3643 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3644 	radeon_ring_write(ring, fence->seq);
3645 	radeon_ring_write(ring, 0);
3646 }
3647 
3648 /**
3649  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3650  *
3651  * @rdev: radeon_device pointer
3652  * @fence: radeon fence object
3653  *
3654  * Emits a fence sequnce number on the compute ring and flushes
3655  * GPU caches.
3656  */
3657 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3658 				 struct radeon_fence *fence)
3659 {
3660 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3661 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3662 
3663 	/* RELEASE_MEM - flush caches, send int */
3664 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3665 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3666 				 EOP_TC_ACTION_EN |
3667 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3668 				 EVENT_INDEX(5)));
3669 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3670 	radeon_ring_write(ring, addr & 0xfffffffc);
3671 	radeon_ring_write(ring, upper_32_bits(addr));
3672 	radeon_ring_write(ring, fence->seq);
3673 	radeon_ring_write(ring, 0);
3674 }
3675 
3676 /**
3677  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3678  *
3679  * @rdev: radeon_device pointer
3680  * @ring: radeon ring buffer object
3681  * @semaphore: radeon semaphore object
3682  * @emit_wait: Is this a sempahore wait?
3683  *
3684  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3685  * from running ahead of semaphore waits.
3686  */
3687 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3688 			     struct radeon_ring *ring,
3689 			     struct radeon_semaphore *semaphore,
3690 			     bool emit_wait)
3691 {
3692 	uint64_t addr = semaphore->gpu_addr;
3693 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3694 
3695 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3696 	radeon_ring_write(ring, lower_32_bits(addr));
3697 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3698 
3699 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3700 		/* Prevent the PFP from running ahead of the semaphore wait */
3701 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3702 		radeon_ring_write(ring, 0x0);
3703 	}
3704 
3705 	return true;
3706 }
3707 
3708 /**
3709  * cik_copy_cpdma - copy pages using the CP DMA engine
3710  *
3711  * @rdev: radeon_device pointer
3712  * @src_offset: src GPU address
3713  * @dst_offset: dst GPU address
3714  * @num_gpu_pages: number of GPU pages to xfer
3715  * @resv: reservation object to sync to
3716  *
3717  * Copy GPU paging using the CP DMA engine (CIK+).
3718  * Used by the radeon ttm implementation to move pages if
3719  * registered as the asic copy callback.
3720  */
3721 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3722 				    uint64_t src_offset, uint64_t dst_offset,
3723 				    unsigned num_gpu_pages,
3724 				    struct reservation_object *resv)
3725 {
3726 	struct radeon_fence *fence;
3727 	struct radeon_sync sync;
3728 	int ring_index = rdev->asic->copy.blit_ring_index;
3729 	struct radeon_ring *ring = &rdev->ring[ring_index];
3730 	u32 size_in_bytes, cur_size_in_bytes, control;
3731 	int i, num_loops;
3732 	int r = 0;
3733 
3734 	radeon_sync_create(&sync);
3735 
3736 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3737 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3738 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3739 	if (r) {
3740 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3741 		radeon_sync_free(rdev, &sync, NULL);
3742 		return ERR_PTR(r);
3743 	}
3744 
3745 	radeon_sync_resv(rdev, &sync, resv, false);
3746 	radeon_sync_rings(rdev, &sync, ring->idx);
3747 
3748 	for (i = 0; i < num_loops; i++) {
3749 		cur_size_in_bytes = size_in_bytes;
3750 		if (cur_size_in_bytes > 0x1fffff)
3751 			cur_size_in_bytes = 0x1fffff;
3752 		size_in_bytes -= cur_size_in_bytes;
3753 		control = 0;
3754 		if (size_in_bytes == 0)
3755 			control |= PACKET3_DMA_DATA_CP_SYNC;
3756 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3757 		radeon_ring_write(ring, control);
3758 		radeon_ring_write(ring, lower_32_bits(src_offset));
3759 		radeon_ring_write(ring, upper_32_bits(src_offset));
3760 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3761 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3762 		radeon_ring_write(ring, cur_size_in_bytes);
3763 		src_offset += cur_size_in_bytes;
3764 		dst_offset += cur_size_in_bytes;
3765 	}
3766 
3767 	r = radeon_fence_emit(rdev, &fence, ring->idx);
3768 	if (r) {
3769 		radeon_ring_unlock_undo(rdev, ring);
3770 		radeon_sync_free(rdev, &sync, NULL);
3771 		return ERR_PTR(r);
3772 	}
3773 
3774 	radeon_ring_unlock_commit(rdev, ring, false);
3775 	radeon_sync_free(rdev, &sync, fence);
3776 
3777 	return fence;
3778 }
3779 
3780 /*
3781  * IB stuff
3782  */
3783 /**
3784  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3785  *
3786  * @rdev: radeon_device pointer
3787  * @ib: radeon indirect buffer object
3788  *
3789  * Emits a DE (drawing engine) or CE (constant engine) IB
3790  * on the gfx ring.  IBs are usually generated by userspace
3791  * acceleration drivers and submitted to the kernel for
3792  * scheduling on the ring.  This function schedules the IB
3793  * on the gfx ring for execution by the GPU.
3794  */
3795 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3796 {
3797 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3798 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3799 	u32 header, control = INDIRECT_BUFFER_VALID;
3800 
3801 	if (ib->is_const_ib) {
3802 		/* set switch buffer packet before const IB */
3803 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3804 		radeon_ring_write(ring, 0);
3805 
3806 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3807 	} else {
3808 		u32 next_rptr;
3809 		if (ring->rptr_save_reg) {
3810 			next_rptr = ring->wptr + 3 + 4;
3811 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3812 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3813 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3814 			radeon_ring_write(ring, next_rptr);
3815 		} else if (rdev->wb.enabled) {
3816 			next_rptr = ring->wptr + 5 + 4;
3817 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3818 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3819 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3820 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3821 			radeon_ring_write(ring, next_rptr);
3822 		}
3823 
3824 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3825 	}
3826 
3827 	control |= ib->length_dw | (vm_id << 24);
3828 
3829 	radeon_ring_write(ring, header);
3830 	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3831 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3832 	radeon_ring_write(ring, control);
3833 }
3834 
3835 /**
3836  * cik_ib_test - basic gfx ring IB test
3837  *
3838  * @rdev: radeon_device pointer
3839  * @ring: radeon_ring structure holding ring information
3840  *
3841  * Allocate an IB and execute it on the gfx ring (CIK).
3842  * Provides a basic gfx ring test to verify that IBs are working.
3843  * Returns 0 on success, error on failure.
3844  */
3845 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3846 {
3847 	struct radeon_ib ib;
3848 	uint32_t scratch;
3849 	uint32_t tmp = 0;
3850 	unsigned i;
3851 	int r;
3852 
3853 	r = radeon_scratch_get(rdev, &scratch);
3854 	if (r) {
3855 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3856 		return r;
3857 	}
3858 	WREG32(scratch, 0xCAFEDEAD);
3859 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3860 	if (r) {
3861 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3862 		radeon_scratch_free(rdev, scratch);
3863 		return r;
3864 	}
3865 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3866 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3867 	ib.ptr[2] = 0xDEADBEEF;
3868 	ib.length_dw = 3;
3869 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3870 	if (r) {
3871 		radeon_scratch_free(rdev, scratch);
3872 		radeon_ib_free(rdev, &ib);
3873 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3874 		return r;
3875 	}
3876 	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3877 		RADEON_USEC_IB_TEST_TIMEOUT));
3878 	if (r < 0) {
3879 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3880 		radeon_scratch_free(rdev, scratch);
3881 		radeon_ib_free(rdev, &ib);
3882 		return r;
3883 	} else if (r == 0) {
3884 		DRM_ERROR("radeon: fence wait timed out.\n");
3885 		radeon_scratch_free(rdev, scratch);
3886 		radeon_ib_free(rdev, &ib);
3887 		return -ETIMEDOUT;
3888 	}
3889 	r = 0;
3890 	for (i = 0; i < rdev->usec_timeout; i++) {
3891 		tmp = RREG32(scratch);
3892 		if (tmp == 0xDEADBEEF)
3893 			break;
3894 		DRM_UDELAY(1);
3895 	}
3896 	if (i < rdev->usec_timeout) {
3897 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3898 	} else {
3899 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3900 			  scratch, tmp);
3901 		r = -EINVAL;
3902 	}
3903 	radeon_scratch_free(rdev, scratch);
3904 	radeon_ib_free(rdev, &ib);
3905 	return r;
3906 }
3907 
3908 /*
3909  * CP.
3910  * On CIK, gfx and compute now have independant command processors.
3911  *
3912  * GFX
3913  * Gfx consists of a single ring and can process both gfx jobs and
3914  * compute jobs.  The gfx CP consists of three microengines (ME):
3915  * PFP - Pre-Fetch Parser
3916  * ME - Micro Engine
3917  * CE - Constant Engine
3918  * The PFP and ME make up what is considered the Drawing Engine (DE).
3919  * The CE is an asynchronous engine used for updating buffer desciptors
3920  * used by the DE so that they can be loaded into cache in parallel
3921  * while the DE is processing state update packets.
3922  *
3923  * Compute
3924  * The compute CP consists of two microengines (ME):
3925  * MEC1 - Compute MicroEngine 1
3926  * MEC2 - Compute MicroEngine 2
3927  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3928  * The queues are exposed to userspace and are programmed directly
3929  * by the compute runtime.
3930  */
3931 /**
3932  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3933  *
3934  * @rdev: radeon_device pointer
3935  * @enable: enable or disable the MEs
3936  *
3937  * Halts or unhalts the gfx MEs.
3938  */
3939 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3940 {
3941 	if (enable)
3942 		WREG32(CP_ME_CNTL, 0);
3943 	else {
3944 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3945 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3946 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3947 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3948 	}
3949 	udelay(50);
3950 }
3951 
3952 /**
3953  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3954  *
3955  * @rdev: radeon_device pointer
3956  *
3957  * Loads the gfx PFP, ME, and CE ucode.
3958  * Returns 0 for success, -EINVAL if the ucode is not available.
3959  */
3960 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3961 {
3962 	int i;
3963 
3964 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3965 		return -EINVAL;
3966 
3967 	cik_cp_gfx_enable(rdev, false);
3968 
3969 	if (rdev->new_fw) {
3970 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3971 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3972 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3973 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3974 		const struct gfx_firmware_header_v1_0 *me_hdr =
3975 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3976 		const __le32 *fw_data;
3977 		u32 fw_size;
3978 
3979 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3980 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3981 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3982 
3983 		/* PFP */
3984 		fw_data = (const __le32 *)
3985 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3986 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3987 		WREG32(CP_PFP_UCODE_ADDR, 0);
3988 		for (i = 0; i < fw_size; i++)
3989 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3990 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3991 
3992 		/* CE */
3993 		fw_data = (const __le32 *)
3994 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3995 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3996 		WREG32(CP_CE_UCODE_ADDR, 0);
3997 		for (i = 0; i < fw_size; i++)
3998 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3999 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4000 
4001 		/* ME */
4002 		fw_data = (const __be32 *)
4003 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4004 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4005 		WREG32(CP_ME_RAM_WADDR, 0);
4006 		for (i = 0; i < fw_size; i++)
4007 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4008 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4009 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4010 	} else {
4011 		const __be32 *fw_data;
4012 
4013 		/* PFP */
4014 		fw_data = (const __be32 *)rdev->pfp_fw->data;
4015 		WREG32(CP_PFP_UCODE_ADDR, 0);
4016 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4017 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4018 		WREG32(CP_PFP_UCODE_ADDR, 0);
4019 
4020 		/* CE */
4021 		fw_data = (const __be32 *)rdev->ce_fw->data;
4022 		WREG32(CP_CE_UCODE_ADDR, 0);
4023 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4024 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4025 		WREG32(CP_CE_UCODE_ADDR, 0);
4026 
4027 		/* ME */
4028 		fw_data = (const __be32 *)rdev->me_fw->data;
4029 		WREG32(CP_ME_RAM_WADDR, 0);
4030 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4031 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4032 		WREG32(CP_ME_RAM_WADDR, 0);
4033 	}
4034 
4035 	return 0;
4036 }
4037 
4038 /**
4039  * cik_cp_gfx_start - start the gfx ring
4040  *
4041  * @rdev: radeon_device pointer
4042  *
4043  * Enables the ring and loads the clear state context and other
4044  * packets required to init the ring.
4045  * Returns 0 for success, error for failure.
4046  */
4047 static int cik_cp_gfx_start(struct radeon_device *rdev)
4048 {
4049 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4050 	int r, i;
4051 
4052 	/* init the CP */
4053 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4054 	WREG32(CP_ENDIAN_SWAP, 0);
4055 	WREG32(CP_DEVICE_ID, 1);
4056 
4057 	cik_cp_gfx_enable(rdev, true);
4058 
4059 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4060 	if (r) {
4061 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4062 		return r;
4063 	}
4064 
4065 	/* init the CE partitions.  CE only used for gfx on CIK */
4066 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4067 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4068 	radeon_ring_write(ring, 0x8000);
4069 	radeon_ring_write(ring, 0x8000);
4070 
4071 	/* setup clear context state */
4072 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4073 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4074 
4075 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4076 	radeon_ring_write(ring, 0x80000000);
4077 	radeon_ring_write(ring, 0x80000000);
4078 
4079 	for (i = 0; i < cik_default_size; i++)
4080 		radeon_ring_write(ring, cik_default_state[i]);
4081 
4082 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4083 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4084 
4085 	/* set clear context state */
4086 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4087 	radeon_ring_write(ring, 0);
4088 
4089 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4090 	radeon_ring_write(ring, 0x00000316);
4091 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4092 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4093 
4094 	radeon_ring_unlock_commit(rdev, ring, false);
4095 
4096 	return 0;
4097 }
4098 
4099 /**
4100  * cik_cp_gfx_fini - stop the gfx ring
4101  *
4102  * @rdev: radeon_device pointer
4103  *
4104  * Stop the gfx ring and tear down the driver ring
4105  * info.
4106  */
4107 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4108 {
4109 	cik_cp_gfx_enable(rdev, false);
4110 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4111 }
4112 
4113 /**
4114  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4115  *
4116  * @rdev: radeon_device pointer
4117  *
4118  * Program the location and size of the gfx ring buffer
4119  * and test it to make sure it's working.
4120  * Returns 0 for success, error for failure.
4121  */
4122 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4123 {
4124 	struct radeon_ring *ring;
4125 	u32 tmp;
4126 	u32 rb_bufsz;
4127 	u64 rb_addr;
4128 	int r;
4129 
4130 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4131 	if (rdev->family != CHIP_HAWAII)
4132 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4133 
4134 	/* Set the write pointer delay */
4135 	WREG32(CP_RB_WPTR_DELAY, 0);
4136 
4137 	/* set the RB to use vmid 0 */
4138 	WREG32(CP_RB_VMID, 0);
4139 
4140 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4141 
4142 	/* ring 0 - compute and gfx */
4143 	/* Set ring buffer size */
4144 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4145 	rb_bufsz = order_base_2(ring->ring_size / 8);
4146 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4147 #ifdef __BIG_ENDIAN
4148 	tmp |= BUF_SWAP_32BIT;
4149 #endif
4150 	WREG32(CP_RB0_CNTL, tmp);
4151 
4152 	/* Initialize the ring buffer's read and write pointers */
4153 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4154 	ring->wptr = 0;
4155 	WREG32(CP_RB0_WPTR, ring->wptr);
4156 
4157 	/* set the wb address wether it's enabled or not */
4158 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4159 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4160 
4161 	/* scratch register shadowing is no longer supported */
4162 	WREG32(SCRATCH_UMSK, 0);
4163 
4164 	if (!rdev->wb.enabled)
4165 		tmp |= RB_NO_UPDATE;
4166 
4167 	mdelay(1);
4168 	WREG32(CP_RB0_CNTL, tmp);
4169 
4170 	rb_addr = ring->gpu_addr >> 8;
4171 	WREG32(CP_RB0_BASE, rb_addr);
4172 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4173 
4174 	/* start the ring */
4175 	cik_cp_gfx_start(rdev);
4176 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4177 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4178 	if (r) {
4179 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4180 		return r;
4181 	}
4182 
4183 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4184 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4185 
4186 	return 0;
4187 }
4188 
4189 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4190 		     struct radeon_ring *ring)
4191 {
4192 	u32 rptr;
4193 
4194 	if (rdev->wb.enabled)
4195 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4196 	else
4197 		rptr = RREG32(CP_RB0_RPTR);
4198 
4199 	return rptr;
4200 }
4201 
4202 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4203 		     struct radeon_ring *ring)
4204 {
4205 	return RREG32(CP_RB0_WPTR);
4206 }
4207 
4208 void cik_gfx_set_wptr(struct radeon_device *rdev,
4209 		      struct radeon_ring *ring)
4210 {
4211 	WREG32(CP_RB0_WPTR, ring->wptr);
4212 	(void)RREG32(CP_RB0_WPTR);
4213 }
4214 
4215 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4216 			 struct radeon_ring *ring)
4217 {
4218 	u32 rptr;
4219 
4220 	if (rdev->wb.enabled) {
4221 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4222 	} else {
4223 		mutex_lock(&rdev->srbm_mutex);
4224 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4225 		rptr = RREG32(CP_HQD_PQ_RPTR);
4226 		cik_srbm_select(rdev, 0, 0, 0, 0);
4227 		mutex_unlock(&rdev->srbm_mutex);
4228 	}
4229 
4230 	return rptr;
4231 }
4232 
4233 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4234 			 struct radeon_ring *ring)
4235 {
4236 	u32 wptr;
4237 
4238 	if (rdev->wb.enabled) {
4239 		/* XXX check if swapping is necessary on BE */
4240 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4241 	} else {
4242 		mutex_lock(&rdev->srbm_mutex);
4243 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4244 		wptr = RREG32(CP_HQD_PQ_WPTR);
4245 		cik_srbm_select(rdev, 0, 0, 0, 0);
4246 		mutex_unlock(&rdev->srbm_mutex);
4247 	}
4248 
4249 	return wptr;
4250 }
4251 
4252 void cik_compute_set_wptr(struct radeon_device *rdev,
4253 			  struct radeon_ring *ring)
4254 {
4255 	/* XXX check if swapping is necessary on BE */
4256 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4257 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4258 }
4259 
4260 static void cik_compute_stop(struct radeon_device *rdev,
4261 			     struct radeon_ring *ring)
4262 {
4263 	u32 j, tmp;
4264 
4265 	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4266 	/* Disable wptr polling. */
4267 	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4268 	tmp &= ~WPTR_POLL_EN;
4269 	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4270 	/* Disable HQD. */
4271 	if (RREG32(CP_HQD_ACTIVE) & 1) {
4272 		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4273 		for (j = 0; j < rdev->usec_timeout; j++) {
4274 			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4275 				break;
4276 			udelay(1);
4277 		}
4278 		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4279 		WREG32(CP_HQD_PQ_RPTR, 0);
4280 		WREG32(CP_HQD_PQ_WPTR, 0);
4281 	}
4282 	cik_srbm_select(rdev, 0, 0, 0, 0);
4283 }
4284 
4285 /**
4286  * cik_cp_compute_enable - enable/disable the compute CP MEs
4287  *
4288  * @rdev: radeon_device pointer
4289  * @enable: enable or disable the MEs
4290  *
4291  * Halts or unhalts the compute MEs.
4292  */
4293 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4294 {
4295 	if (enable)
4296 		WREG32(CP_MEC_CNTL, 0);
4297 	else {
4298 		/*
4299 		 * To make hibernation reliable we need to clear compute ring
4300 		 * configuration before halting the compute ring.
4301 		 */
4302 		mutex_lock(&rdev->srbm_mutex);
4303 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4304 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4305 		mutex_unlock(&rdev->srbm_mutex);
4306 
4307 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4308 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4309 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4310 	}
4311 	udelay(50);
4312 }
4313 
4314 /**
4315  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4316  *
4317  * @rdev: radeon_device pointer
4318  *
4319  * Loads the compute MEC1&2 ucode.
4320  * Returns 0 for success, -EINVAL if the ucode is not available.
4321  */
4322 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4323 {
4324 	int i;
4325 
4326 	if (!rdev->mec_fw)
4327 		return -EINVAL;
4328 
4329 	cik_cp_compute_enable(rdev, false);
4330 
4331 	if (rdev->new_fw) {
4332 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4333 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4334 		const __le32 *fw_data;
4335 		u32 fw_size;
4336 
4337 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4338 
4339 		/* MEC1 */
4340 		fw_data = (const __le32 *)
4341 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4342 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4343 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4344 		for (i = 0; i < fw_size; i++)
4345 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4346 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4347 
4348 		/* MEC2 */
4349 		if (rdev->family == CHIP_KAVERI) {
4350 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4351 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4352 
4353 			fw_data = (const __le32 *)
4354 				(rdev->mec2_fw->data +
4355 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4356 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4357 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4358 			for (i = 0; i < fw_size; i++)
4359 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4360 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4361 		}
4362 	} else {
4363 		const __be32 *fw_data;
4364 
4365 		/* MEC1 */
4366 		fw_data = (const __be32 *)rdev->mec_fw->data;
4367 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4368 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4369 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4370 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4371 
4372 		if (rdev->family == CHIP_KAVERI) {
4373 			/* MEC2 */
4374 			fw_data = (const __be32 *)rdev->mec_fw->data;
4375 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4376 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4377 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4378 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4379 		}
4380 	}
4381 
4382 	return 0;
4383 }
4384 
4385 /**
4386  * cik_cp_compute_start - start the compute queues
4387  *
4388  * @rdev: radeon_device pointer
4389  *
4390  * Enable the compute queues.
4391  * Returns 0 for success, error for failure.
4392  */
4393 static int cik_cp_compute_start(struct radeon_device *rdev)
4394 {
4395 	cik_cp_compute_enable(rdev, true);
4396 
4397 	return 0;
4398 }
4399 
4400 /**
4401  * cik_cp_compute_fini - stop the compute queues
4402  *
4403  * @rdev: radeon_device pointer
4404  *
4405  * Stop the compute queues and tear down the driver queue
4406  * info.
4407  */
4408 static void cik_cp_compute_fini(struct radeon_device *rdev)
4409 {
4410 	int i, idx, r;
4411 
4412 	cik_cp_compute_enable(rdev, false);
4413 
4414 	for (i = 0; i < 2; i++) {
4415 		if (i == 0)
4416 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4417 		else
4418 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4419 
4420 		if (rdev->ring[idx].mqd_obj) {
4421 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4422 			if (unlikely(r != 0))
4423 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4424 
4425 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4426 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4427 
4428 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4429 			rdev->ring[idx].mqd_obj = NULL;
4430 		}
4431 	}
4432 }
4433 
4434 static void cik_mec_fini(struct radeon_device *rdev)
4435 {
4436 	int r;
4437 
4438 	if (rdev->mec.hpd_eop_obj) {
4439 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4440 		if (unlikely(r != 0))
4441 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4442 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4443 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4444 
4445 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4446 		rdev->mec.hpd_eop_obj = NULL;
4447 	}
4448 }
4449 
4450 #define MEC_HPD_SIZE 2048
4451 
4452 static int cik_mec_init(struct radeon_device *rdev)
4453 {
4454 	int r;
4455 	u32 *hpd;
4456 
4457 	/*
4458 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4459 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4460 	 * Nonetheless, we assign only 1 pipe because all other pipes will
4461 	 * be handled by KFD
4462 	 */
4463 	rdev->mec.num_mec = 1;
4464 	rdev->mec.num_pipe = 1;
4465 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4466 
4467 	if (rdev->mec.hpd_eop_obj == NULL) {
4468 		r = radeon_bo_create(rdev,
4469 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4470 				     PAGE_SIZE, true,
4471 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4472 				     &rdev->mec.hpd_eop_obj);
4473 		if (r) {
4474 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4475 			return r;
4476 		}
4477 	}
4478 
4479 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4480 	if (unlikely(r != 0)) {
4481 		cik_mec_fini(rdev);
4482 		return r;
4483 	}
4484 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4485 			  &rdev->mec.hpd_eop_gpu_addr);
4486 	if (r) {
4487 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4488 		cik_mec_fini(rdev);
4489 		return r;
4490 	}
4491 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4492 	if (r) {
4493 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4494 		cik_mec_fini(rdev);
4495 		return r;
4496 	}
4497 
4498 	/* clear memory.  Not sure if this is required or not */
4499 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4500 
4501 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4502 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4503 
4504 	return 0;
4505 }
4506 
4507 struct hqd_registers
4508 {
4509 	u32 cp_mqd_base_addr;
4510 	u32 cp_mqd_base_addr_hi;
4511 	u32 cp_hqd_active;
4512 	u32 cp_hqd_vmid;
4513 	u32 cp_hqd_persistent_state;
4514 	u32 cp_hqd_pipe_priority;
4515 	u32 cp_hqd_queue_priority;
4516 	u32 cp_hqd_quantum;
4517 	u32 cp_hqd_pq_base;
4518 	u32 cp_hqd_pq_base_hi;
4519 	u32 cp_hqd_pq_rptr;
4520 	u32 cp_hqd_pq_rptr_report_addr;
4521 	u32 cp_hqd_pq_rptr_report_addr_hi;
4522 	u32 cp_hqd_pq_wptr_poll_addr;
4523 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4524 	u32 cp_hqd_pq_doorbell_control;
4525 	u32 cp_hqd_pq_wptr;
4526 	u32 cp_hqd_pq_control;
4527 	u32 cp_hqd_ib_base_addr;
4528 	u32 cp_hqd_ib_base_addr_hi;
4529 	u32 cp_hqd_ib_rptr;
4530 	u32 cp_hqd_ib_control;
4531 	u32 cp_hqd_iq_timer;
4532 	u32 cp_hqd_iq_rptr;
4533 	u32 cp_hqd_dequeue_request;
4534 	u32 cp_hqd_dma_offload;
4535 	u32 cp_hqd_sema_cmd;
4536 	u32 cp_hqd_msg_type;
4537 	u32 cp_hqd_atomic0_preop_lo;
4538 	u32 cp_hqd_atomic0_preop_hi;
4539 	u32 cp_hqd_atomic1_preop_lo;
4540 	u32 cp_hqd_atomic1_preop_hi;
4541 	u32 cp_hqd_hq_scheduler0;
4542 	u32 cp_hqd_hq_scheduler1;
4543 	u32 cp_mqd_control;
4544 };
4545 
4546 struct bonaire_mqd
4547 {
4548 	u32 header;
4549 	u32 dispatch_initiator;
4550 	u32 dimensions[3];
4551 	u32 start_idx[3];
4552 	u32 num_threads[3];
4553 	u32 pipeline_stat_enable;
4554 	u32 perf_counter_enable;
4555 	u32 pgm[2];
4556 	u32 tba[2];
4557 	u32 tma[2];
4558 	u32 pgm_rsrc[2];
4559 	u32 vmid;
4560 	u32 resource_limits;
4561 	u32 static_thread_mgmt01[2];
4562 	u32 tmp_ring_size;
4563 	u32 static_thread_mgmt23[2];
4564 	u32 restart[3];
4565 	u32 thread_trace_enable;
4566 	u32 reserved1;
4567 	u32 user_data[16];
4568 	u32 vgtcs_invoke_count[2];
4569 	struct hqd_registers queue_state;
4570 	u32 dequeue_cntr;
4571 	u32 interrupt_queue[64];
4572 };
4573 
4574 /**
4575  * cik_cp_compute_resume - setup the compute queue registers
4576  *
4577  * @rdev: radeon_device pointer
4578  *
4579  * Program the compute queues and test them to make sure they
4580  * are working.
4581  * Returns 0 for success, error for failure.
4582  */
4583 static int cik_cp_compute_resume(struct radeon_device *rdev)
4584 {
4585 	int r, i, j, idx;
4586 	u32 tmp;
4587 	bool use_doorbell = true;
4588 	u64 hqd_gpu_addr;
4589 	u64 mqd_gpu_addr;
4590 	u64 eop_gpu_addr;
4591 	u64 wb_gpu_addr;
4592 	u32 *buf;
4593 	struct bonaire_mqd *mqd;
4594 
4595 	r = cik_cp_compute_start(rdev);
4596 	if (r)
4597 		return r;
4598 
4599 	/* fix up chicken bits */
4600 	tmp = RREG32(CP_CPF_DEBUG);
4601 	tmp |= (1 << 23);
4602 	WREG32(CP_CPF_DEBUG, tmp);
4603 
4604 	/* init the pipes */
4605 	mutex_lock(&rdev->srbm_mutex);
4606 
4607 	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4608 
4609 	cik_srbm_select(rdev, 0, 0, 0, 0);
4610 
4611 	/* write the EOP addr */
4612 	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4613 	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4614 
4615 	/* set the VMID assigned */
4616 	WREG32(CP_HPD_EOP_VMID, 0);
4617 
4618 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4619 	tmp = RREG32(CP_HPD_EOP_CONTROL);
4620 	tmp &= ~EOP_SIZE_MASK;
4621 	tmp |= order_base_2(MEC_HPD_SIZE / 8);
4622 	WREG32(CP_HPD_EOP_CONTROL, tmp);
4623 
4624 	mutex_unlock(&rdev->srbm_mutex);
4625 
4626 	/* init the queues.  Just two for now. */
4627 	for (i = 0; i < 2; i++) {
4628 		if (i == 0)
4629 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4630 		else
4631 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4632 
4633 		if (rdev->ring[idx].mqd_obj == NULL) {
4634 			r = radeon_bo_create(rdev,
4635 					     sizeof(struct bonaire_mqd),
4636 					     PAGE_SIZE, true,
4637 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4638 					     NULL, &rdev->ring[idx].mqd_obj);
4639 			if (r) {
4640 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4641 				return r;
4642 			}
4643 		}
4644 
4645 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4646 		if (unlikely(r != 0)) {
4647 			cik_cp_compute_fini(rdev);
4648 			return r;
4649 		}
4650 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4651 				  &mqd_gpu_addr);
4652 		if (r) {
4653 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4654 			cik_cp_compute_fini(rdev);
4655 			return r;
4656 		}
4657 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4658 		if (r) {
4659 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4660 			cik_cp_compute_fini(rdev);
4661 			return r;
4662 		}
4663 
4664 		/* init the mqd struct */
4665 		memset(buf, 0, sizeof(struct bonaire_mqd));
4666 
4667 		mqd = (struct bonaire_mqd *)buf;
4668 		mqd->header = 0xC0310800;
4669 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4670 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4671 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4672 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4673 
4674 		mutex_lock(&rdev->srbm_mutex);
4675 		cik_srbm_select(rdev, rdev->ring[idx].me,
4676 				rdev->ring[idx].pipe,
4677 				rdev->ring[idx].queue, 0);
4678 
4679 		/* disable wptr polling */
4680 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4681 		tmp &= ~WPTR_POLL_EN;
4682 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4683 
4684 		/* enable doorbell? */
4685 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4686 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4687 		if (use_doorbell)
4688 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4689 		else
4690 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4691 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4692 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4693 
4694 		/* disable the queue if it's active */
4695 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4696 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4697 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4698 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4699 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4700 			for (j = 0; j < rdev->usec_timeout; j++) {
4701 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4702 					break;
4703 				udelay(1);
4704 			}
4705 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4706 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4707 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4708 		}
4709 
4710 		/* set the pointer to the MQD */
4711 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4712 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4713 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4714 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4715 		/* set MQD vmid to 0 */
4716 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4717 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4718 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4719 
4720 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4721 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4722 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4723 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4724 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4725 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4726 
4727 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4728 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4729 		mqd->queue_state.cp_hqd_pq_control &=
4730 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4731 
4732 		mqd->queue_state.cp_hqd_pq_control |=
4733 			order_base_2(rdev->ring[idx].ring_size / 8);
4734 		mqd->queue_state.cp_hqd_pq_control |=
4735 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4736 #ifdef __BIG_ENDIAN
4737 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4738 #endif
4739 		mqd->queue_state.cp_hqd_pq_control &=
4740 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4741 		mqd->queue_state.cp_hqd_pq_control |=
4742 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4743 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4744 
4745 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4746 		if (i == 0)
4747 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4748 		else
4749 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4750 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4751 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4752 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4753 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4754 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4755 
4756 		/* set the wb address wether it's enabled or not */
4757 		if (i == 0)
4758 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4759 		else
4760 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4761 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4762 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4763 			upper_32_bits(wb_gpu_addr) & 0xffff;
4764 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4765 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4766 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4767 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4768 
4769 		/* enable the doorbell if requested */
4770 		if (use_doorbell) {
4771 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4772 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4773 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4774 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4775 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4776 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4777 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4778 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4779 
4780 		} else {
4781 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4782 		}
4783 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4784 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4785 
4786 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4787 		rdev->ring[idx].wptr = 0;
4788 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4789 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4790 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4791 
4792 		/* set the vmid for the queue */
4793 		mqd->queue_state.cp_hqd_vmid = 0;
4794 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4795 
4796 		/* activate the queue */
4797 		mqd->queue_state.cp_hqd_active = 1;
4798 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4799 
4800 		cik_srbm_select(rdev, 0, 0, 0, 0);
4801 		mutex_unlock(&rdev->srbm_mutex);
4802 
4803 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4804 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4805 
4806 		rdev->ring[idx].ready = true;
4807 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4808 		if (r)
4809 			rdev->ring[idx].ready = false;
4810 	}
4811 
4812 	return 0;
4813 }
4814 
4815 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4816 {
4817 	cik_cp_gfx_enable(rdev, enable);
4818 	cik_cp_compute_enable(rdev, enable);
4819 }
4820 
4821 static int cik_cp_load_microcode(struct radeon_device *rdev)
4822 {
4823 	int r;
4824 
4825 	r = cik_cp_gfx_load_microcode(rdev);
4826 	if (r)
4827 		return r;
4828 	r = cik_cp_compute_load_microcode(rdev);
4829 	if (r)
4830 		return r;
4831 
4832 	return 0;
4833 }
4834 
4835 static void cik_cp_fini(struct radeon_device *rdev)
4836 {
4837 	cik_cp_gfx_fini(rdev);
4838 	cik_cp_compute_fini(rdev);
4839 }
4840 
4841 static int cik_cp_resume(struct radeon_device *rdev)
4842 {
4843 	int r;
4844 
4845 	cik_enable_gui_idle_interrupt(rdev, false);
4846 
4847 	r = cik_cp_load_microcode(rdev);
4848 	if (r)
4849 		return r;
4850 
4851 	r = cik_cp_gfx_resume(rdev);
4852 	if (r)
4853 		return r;
4854 	r = cik_cp_compute_resume(rdev);
4855 	if (r)
4856 		return r;
4857 
4858 	cik_enable_gui_idle_interrupt(rdev, true);
4859 
4860 	return 0;
4861 }
4862 
4863 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4864 {
4865 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4866 		RREG32(GRBM_STATUS));
4867 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4868 		RREG32(GRBM_STATUS2));
4869 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4870 		RREG32(GRBM_STATUS_SE0));
4871 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4872 		RREG32(GRBM_STATUS_SE1));
4873 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4874 		RREG32(GRBM_STATUS_SE2));
4875 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4876 		RREG32(GRBM_STATUS_SE3));
4877 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4878 		RREG32(SRBM_STATUS));
4879 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4880 		RREG32(SRBM_STATUS2));
4881 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4882 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4883 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4884 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4885 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4886 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4887 		 RREG32(CP_STALLED_STAT1));
4888 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4889 		 RREG32(CP_STALLED_STAT2));
4890 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4891 		 RREG32(CP_STALLED_STAT3));
4892 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4893 		 RREG32(CP_CPF_BUSY_STAT));
4894 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4895 		 RREG32(CP_CPF_STALLED_STAT1));
4896 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4897 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4898 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4899 		 RREG32(CP_CPC_STALLED_STAT1));
4900 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4901 }
4902 
4903 /**
4904  * cik_gpu_check_soft_reset - check which blocks are busy
4905  *
4906  * @rdev: radeon_device pointer
4907  *
4908  * Check which blocks are busy and return the relevant reset
4909  * mask to be used by cik_gpu_soft_reset().
4910  * Returns a mask of the blocks to be reset.
4911  */
4912 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4913 {
4914 	u32 reset_mask = 0;
4915 	u32 tmp;
4916 
4917 	/* GRBM_STATUS */
4918 	tmp = RREG32(GRBM_STATUS);
4919 	if (tmp & (PA_BUSY | SC_BUSY |
4920 		   BCI_BUSY | SX_BUSY |
4921 		   TA_BUSY | VGT_BUSY |
4922 		   DB_BUSY | CB_BUSY |
4923 		   GDS_BUSY | SPI_BUSY |
4924 		   IA_BUSY | IA_BUSY_NO_DMA))
4925 		reset_mask |= RADEON_RESET_GFX;
4926 
4927 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4928 		reset_mask |= RADEON_RESET_CP;
4929 
4930 	/* GRBM_STATUS2 */
4931 	tmp = RREG32(GRBM_STATUS2);
4932 	if (tmp & RLC_BUSY)
4933 		reset_mask |= RADEON_RESET_RLC;
4934 
4935 	/* SDMA0_STATUS_REG */
4936 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4937 	if (!(tmp & SDMA_IDLE))
4938 		reset_mask |= RADEON_RESET_DMA;
4939 
4940 	/* SDMA1_STATUS_REG */
4941 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4942 	if (!(tmp & SDMA_IDLE))
4943 		reset_mask |= RADEON_RESET_DMA1;
4944 
4945 	/* SRBM_STATUS2 */
4946 	tmp = RREG32(SRBM_STATUS2);
4947 	if (tmp & SDMA_BUSY)
4948 		reset_mask |= RADEON_RESET_DMA;
4949 
4950 	if (tmp & SDMA1_BUSY)
4951 		reset_mask |= RADEON_RESET_DMA1;
4952 
4953 	/* SRBM_STATUS */
4954 	tmp = RREG32(SRBM_STATUS);
4955 
4956 	if (tmp & IH_BUSY)
4957 		reset_mask |= RADEON_RESET_IH;
4958 
4959 	if (tmp & SEM_BUSY)
4960 		reset_mask |= RADEON_RESET_SEM;
4961 
4962 	if (tmp & GRBM_RQ_PENDING)
4963 		reset_mask |= RADEON_RESET_GRBM;
4964 
4965 	if (tmp & VMC_BUSY)
4966 		reset_mask |= RADEON_RESET_VMC;
4967 
4968 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4969 		   MCC_BUSY | MCD_BUSY))
4970 		reset_mask |= RADEON_RESET_MC;
4971 
4972 	if (evergreen_is_display_hung(rdev))
4973 		reset_mask |= RADEON_RESET_DISPLAY;
4974 
4975 	/* Skip MC reset as it's mostly likely not hung, just busy */
4976 	if (reset_mask & RADEON_RESET_MC) {
4977 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4978 		reset_mask &= ~RADEON_RESET_MC;
4979 	}
4980 
4981 	return reset_mask;
4982 }
4983 
4984 /**
4985  * cik_gpu_soft_reset - soft reset GPU
4986  *
4987  * @rdev: radeon_device pointer
4988  * @reset_mask: mask of which blocks to reset
4989  *
4990  * Soft reset the blocks specified in @reset_mask.
4991  */
4992 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4993 {
4994 	struct evergreen_mc_save save;
4995 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4996 	u32 tmp;
4997 
4998 	if (reset_mask == 0)
4999 		return;
5000 
5001 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5002 
5003 	cik_print_gpu_status_regs(rdev);
5004 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5005 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5006 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5007 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5008 
5009 	/* disable CG/PG */
5010 	cik_fini_pg(rdev);
5011 	cik_fini_cg(rdev);
5012 
5013 	/* stop the rlc */
5014 	cik_rlc_stop(rdev);
5015 
5016 	/* Disable GFX parsing/prefetching */
5017 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5018 
5019 	/* Disable MEC parsing/prefetching */
5020 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5021 
5022 	if (reset_mask & RADEON_RESET_DMA) {
5023 		/* sdma0 */
5024 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5025 		tmp |= SDMA_HALT;
5026 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5027 	}
5028 	if (reset_mask & RADEON_RESET_DMA1) {
5029 		/* sdma1 */
5030 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5031 		tmp |= SDMA_HALT;
5032 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5033 	}
5034 
5035 	evergreen_mc_stop(rdev, &save);
5036 	if (evergreen_mc_wait_for_idle(rdev)) {
5037 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5038 	}
5039 
5040 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5041 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5042 
5043 	if (reset_mask & RADEON_RESET_CP) {
5044 		grbm_soft_reset |= SOFT_RESET_CP;
5045 
5046 		srbm_soft_reset |= SOFT_RESET_GRBM;
5047 	}
5048 
5049 	if (reset_mask & RADEON_RESET_DMA)
5050 		srbm_soft_reset |= SOFT_RESET_SDMA;
5051 
5052 	if (reset_mask & RADEON_RESET_DMA1)
5053 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5054 
5055 	if (reset_mask & RADEON_RESET_DISPLAY)
5056 		srbm_soft_reset |= SOFT_RESET_DC;
5057 
5058 	if (reset_mask & RADEON_RESET_RLC)
5059 		grbm_soft_reset |= SOFT_RESET_RLC;
5060 
5061 	if (reset_mask & RADEON_RESET_SEM)
5062 		srbm_soft_reset |= SOFT_RESET_SEM;
5063 
5064 	if (reset_mask & RADEON_RESET_IH)
5065 		srbm_soft_reset |= SOFT_RESET_IH;
5066 
5067 	if (reset_mask & RADEON_RESET_GRBM)
5068 		srbm_soft_reset |= SOFT_RESET_GRBM;
5069 
5070 	if (reset_mask & RADEON_RESET_VMC)
5071 		srbm_soft_reset |= SOFT_RESET_VMC;
5072 
5073 	if (!(rdev->flags & RADEON_IS_IGP)) {
5074 		if (reset_mask & RADEON_RESET_MC)
5075 			srbm_soft_reset |= SOFT_RESET_MC;
5076 	}
5077 
5078 	if (grbm_soft_reset) {
5079 		tmp = RREG32(GRBM_SOFT_RESET);
5080 		tmp |= grbm_soft_reset;
5081 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5082 		WREG32(GRBM_SOFT_RESET, tmp);
5083 		tmp = RREG32(GRBM_SOFT_RESET);
5084 
5085 		udelay(50);
5086 
5087 		tmp &= ~grbm_soft_reset;
5088 		WREG32(GRBM_SOFT_RESET, tmp);
5089 		tmp = RREG32(GRBM_SOFT_RESET);
5090 	}
5091 
5092 	if (srbm_soft_reset) {
5093 		tmp = RREG32(SRBM_SOFT_RESET);
5094 		tmp |= srbm_soft_reset;
5095 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5096 		WREG32(SRBM_SOFT_RESET, tmp);
5097 		tmp = RREG32(SRBM_SOFT_RESET);
5098 
5099 		udelay(50);
5100 
5101 		tmp &= ~srbm_soft_reset;
5102 		WREG32(SRBM_SOFT_RESET, tmp);
5103 		tmp = RREG32(SRBM_SOFT_RESET);
5104 	}
5105 
5106 	/* Wait a little for things to settle down */
5107 	udelay(50);
5108 
5109 	evergreen_mc_resume(rdev, &save);
5110 	udelay(50);
5111 
5112 	cik_print_gpu_status_regs(rdev);
5113 }
5114 
5115 struct kv_reset_save_regs {
5116 	u32 gmcon_reng_execute;
5117 	u32 gmcon_misc;
5118 	u32 gmcon_misc3;
5119 };
5120 
5121 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5122 				   struct kv_reset_save_regs *save)
5123 {
5124 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5125 	save->gmcon_misc = RREG32(GMCON_MISC);
5126 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5127 
5128 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5129 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5130 						STCTRL_STUTTER_EN));
5131 }
5132 
5133 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5134 				      struct kv_reset_save_regs *save)
5135 {
5136 	int i;
5137 
5138 	WREG32(GMCON_PGFSM_WRITE, 0);
5139 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5140 
5141 	for (i = 0; i < 5; i++)
5142 		WREG32(GMCON_PGFSM_WRITE, 0);
5143 
5144 	WREG32(GMCON_PGFSM_WRITE, 0);
5145 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5146 
5147 	for (i = 0; i < 5; i++)
5148 		WREG32(GMCON_PGFSM_WRITE, 0);
5149 
5150 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5151 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5152 
5153 	for (i = 0; i < 5; i++)
5154 		WREG32(GMCON_PGFSM_WRITE, 0);
5155 
5156 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5157 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5158 
5159 	for (i = 0; i < 5; i++)
5160 		WREG32(GMCON_PGFSM_WRITE, 0);
5161 
5162 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5163 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5164 
5165 	for (i = 0; i < 5; i++)
5166 		WREG32(GMCON_PGFSM_WRITE, 0);
5167 
5168 	WREG32(GMCON_PGFSM_WRITE, 0);
5169 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5170 
5171 	for (i = 0; i < 5; i++)
5172 		WREG32(GMCON_PGFSM_WRITE, 0);
5173 
5174 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5175 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5176 
5177 	for (i = 0; i < 5; i++)
5178 		WREG32(GMCON_PGFSM_WRITE, 0);
5179 
5180 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5181 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5182 
5183 	for (i = 0; i < 5; i++)
5184 		WREG32(GMCON_PGFSM_WRITE, 0);
5185 
5186 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5187 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5188 
5189 	for (i = 0; i < 5; i++)
5190 		WREG32(GMCON_PGFSM_WRITE, 0);
5191 
5192 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5193 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5194 
5195 	for (i = 0; i < 5; i++)
5196 		WREG32(GMCON_PGFSM_WRITE, 0);
5197 
5198 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5199 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5200 
5201 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5202 	WREG32(GMCON_MISC, save->gmcon_misc);
5203 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5204 }
5205 
5206 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5207 {
5208 	struct evergreen_mc_save save;
5209 	struct kv_reset_save_regs kv_save = { 0 };
5210 	u32 tmp, i;
5211 
5212 	dev_info(rdev->dev, "GPU pci config reset\n");
5213 
5214 	/* disable dpm? */
5215 
5216 	/* disable cg/pg */
5217 	cik_fini_pg(rdev);
5218 	cik_fini_cg(rdev);
5219 
5220 	/* Disable GFX parsing/prefetching */
5221 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5222 
5223 	/* Disable MEC parsing/prefetching */
5224 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5225 
5226 	/* sdma0 */
5227 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5228 	tmp |= SDMA_HALT;
5229 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5230 	/* sdma1 */
5231 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5232 	tmp |= SDMA_HALT;
5233 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5234 	/* XXX other engines? */
5235 
5236 	/* halt the rlc, disable cp internal ints */
5237 	cik_rlc_stop(rdev);
5238 
5239 	udelay(50);
5240 
5241 	/* disable mem access */
5242 	evergreen_mc_stop(rdev, &save);
5243 	if (evergreen_mc_wait_for_idle(rdev)) {
5244 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5245 	}
5246 
5247 	if (rdev->flags & RADEON_IS_IGP)
5248 		kv_save_regs_for_reset(rdev, &kv_save);
5249 
5250 	/* disable BM */
5251 	pci_clear_master(rdev->pdev);
5252 	/* reset */
5253 	radeon_pci_config_reset(rdev);
5254 
5255 	udelay(100);
5256 
5257 	/* wait for asic to come out of reset */
5258 	for (i = 0; i < rdev->usec_timeout; i++) {
5259 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5260 			break;
5261 		udelay(1);
5262 	}
5263 
5264 	/* does asic init need to be run first??? */
5265 	if (rdev->flags & RADEON_IS_IGP)
5266 		kv_restore_regs_for_reset(rdev, &kv_save);
5267 }
5268 
5269 /**
5270  * cik_asic_reset - soft reset GPU
5271  *
5272  * @rdev: radeon_device pointer
5273  * @hard: force hard reset
5274  *
5275  * Look up which blocks are hung and attempt
5276  * to reset them.
5277  * Returns 0 for success.
5278  */
5279 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5280 {
5281 	u32 reset_mask;
5282 
5283 	if (hard) {
5284 		cik_gpu_pci_config_reset(rdev);
5285 		return 0;
5286 	}
5287 
5288 	reset_mask = cik_gpu_check_soft_reset(rdev);
5289 
5290 	if (reset_mask)
5291 		r600_set_bios_scratch_engine_hung(rdev, true);
5292 
5293 	/* try soft reset */
5294 	cik_gpu_soft_reset(rdev, reset_mask);
5295 
5296 	reset_mask = cik_gpu_check_soft_reset(rdev);
5297 
5298 	/* try pci config reset */
5299 	if (reset_mask && radeon_hard_reset)
5300 		cik_gpu_pci_config_reset(rdev);
5301 
5302 	reset_mask = cik_gpu_check_soft_reset(rdev);
5303 
5304 	if (!reset_mask)
5305 		r600_set_bios_scratch_engine_hung(rdev, false);
5306 
5307 	return 0;
5308 }
5309 
5310 /**
5311  * cik_gfx_is_lockup - check if the 3D engine is locked up
5312  *
5313  * @rdev: radeon_device pointer
5314  * @ring: radeon_ring structure holding ring information
5315  *
5316  * Check if the 3D engine is locked up (CIK).
5317  * Returns true if the engine is locked, false if not.
5318  */
5319 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5320 {
5321 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5322 
5323 	if (!(reset_mask & (RADEON_RESET_GFX |
5324 			    RADEON_RESET_COMPUTE |
5325 			    RADEON_RESET_CP))) {
5326 		radeon_ring_lockup_update(rdev, ring);
5327 		return false;
5328 	}
5329 	return radeon_ring_test_lockup(rdev, ring);
5330 }
5331 
5332 /* MC */
5333 /**
5334  * cik_mc_program - program the GPU memory controller
5335  *
5336  * @rdev: radeon_device pointer
5337  *
5338  * Set the location of vram, gart, and AGP in the GPU's
5339  * physical address space (CIK).
5340  */
5341 static void cik_mc_program(struct radeon_device *rdev)
5342 {
5343 	struct evergreen_mc_save save;
5344 	u32 tmp;
5345 	int i, j;
5346 
5347 	/* Initialize HDP */
5348 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5349 		WREG32((0x2c14 + j), 0x00000000);
5350 		WREG32((0x2c18 + j), 0x00000000);
5351 		WREG32((0x2c1c + j), 0x00000000);
5352 		WREG32((0x2c20 + j), 0x00000000);
5353 		WREG32((0x2c24 + j), 0x00000000);
5354 	}
5355 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5356 
5357 	evergreen_mc_stop(rdev, &save);
5358 	if (radeon_mc_wait_for_idle(rdev)) {
5359 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5360 	}
5361 	/* Lockout access through VGA aperture*/
5362 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5363 	/* Update configuration */
5364 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5365 	       rdev->mc.vram_start >> 12);
5366 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5367 	       rdev->mc.vram_end >> 12);
5368 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5369 	       rdev->vram_scratch.gpu_addr >> 12);
5370 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5371 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5372 	WREG32(MC_VM_FB_LOCATION, tmp);
5373 	/* XXX double check these! */
5374 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5375 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5376 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5377 	WREG32(MC_VM_AGP_BASE, 0);
5378 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5379 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5380 	if (radeon_mc_wait_for_idle(rdev)) {
5381 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5382 	}
5383 	evergreen_mc_resume(rdev, &save);
5384 	/* we need to own VRAM, so turn off the VGA renderer here
5385 	 * to stop it overwriting our objects */
5386 	rv515_vga_render_disable(rdev);
5387 }
5388 
5389 /**
5390  * cik_mc_init - initialize the memory controller driver params
5391  *
5392  * @rdev: radeon_device pointer
5393  *
5394  * Look up the amount of vram, vram width, and decide how to place
5395  * vram and gart within the GPU's physical address space (CIK).
5396  * Returns 0 for success.
5397  */
5398 static int cik_mc_init(struct radeon_device *rdev)
5399 {
5400 	u32 tmp;
5401 	int chansize, numchan;
5402 
5403 	/* Get VRAM informations */
5404 	rdev->mc.vram_is_ddr = true;
5405 	tmp = RREG32(MC_ARB_RAMCFG);
5406 	if (tmp & CHANSIZE_MASK) {
5407 		chansize = 64;
5408 	} else {
5409 		chansize = 32;
5410 	}
5411 	tmp = RREG32(MC_SHARED_CHMAP);
5412 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5413 	case 0:
5414 	default:
5415 		numchan = 1;
5416 		break;
5417 	case 1:
5418 		numchan = 2;
5419 		break;
5420 	case 2:
5421 		numchan = 4;
5422 		break;
5423 	case 3:
5424 		numchan = 8;
5425 		break;
5426 	case 4:
5427 		numchan = 3;
5428 		break;
5429 	case 5:
5430 		numchan = 6;
5431 		break;
5432 	case 6:
5433 		numchan = 10;
5434 		break;
5435 	case 7:
5436 		numchan = 12;
5437 		break;
5438 	case 8:
5439 		numchan = 16;
5440 		break;
5441 	}
5442 	rdev->mc.vram_width = numchan * chansize;
5443 	/* Could aper size report 0 ? */
5444 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5445 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5446 	/* size in MB on si */
5447 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5448 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5449 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5450 	si_vram_gtt_location(rdev, &rdev->mc);
5451 	radeon_update_bandwidth_info(rdev);
5452 
5453 	return 0;
5454 }
5455 
5456 /*
5457  * GART
5458  * VMID 0 is the physical GPU addresses as used by the kernel.
5459  * VMIDs 1-15 are used for userspace clients and are handled
5460  * by the radeon vm/hsa code.
5461  */
5462 /**
5463  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5464  *
5465  * @rdev: radeon_device pointer
5466  *
5467  * Flush the TLB for the VMID 0 page table (CIK).
5468  */
5469 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5470 {
5471 	/* flush hdp cache */
5472 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5473 
5474 	/* bits 0-15 are the VM contexts0-15 */
5475 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5476 }
5477 
5478 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5479 {
5480 	int i;
5481 	uint32_t sh_mem_bases, sh_mem_config;
5482 
5483 	sh_mem_bases = 0x6000 | 0x6000 << 16;
5484 	sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5485 	sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5486 
5487 	mutex_lock(&rdev->srbm_mutex);
5488 	for (i = 8; i < 16; i++) {
5489 		cik_srbm_select(rdev, 0, 0, 0, i);
5490 		/* CP and shaders */
5491 		WREG32(SH_MEM_CONFIG, sh_mem_config);
5492 		WREG32(SH_MEM_APE1_BASE, 1);
5493 		WREG32(SH_MEM_APE1_LIMIT, 0);
5494 		WREG32(SH_MEM_BASES, sh_mem_bases);
5495 	}
5496 	cik_srbm_select(rdev, 0, 0, 0, 0);
5497 	mutex_unlock(&rdev->srbm_mutex);
5498 }
5499 
5500 /**
5501  * cik_pcie_gart_enable - gart enable
5502  *
5503  * @rdev: radeon_device pointer
5504  *
5505  * This sets up the TLBs, programs the page tables for VMID0,
5506  * sets up the hw for VMIDs 1-15 which are allocated on
5507  * demand, and sets up the global locations for the LDS, GDS,
5508  * and GPUVM for FSA64 clients (CIK).
5509  * Returns 0 for success, errors for failure.
5510  */
5511 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5512 {
5513 	int r, i;
5514 
5515 	if (rdev->gart.robj == NULL) {
5516 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5517 		return -EINVAL;
5518 	}
5519 	r = radeon_gart_table_vram_pin(rdev);
5520 	if (r)
5521 		return r;
5522 	/* Setup TLB control */
5523 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5524 	       (0xA << 7) |
5525 	       ENABLE_L1_TLB |
5526 	       ENABLE_L1_FRAGMENT_PROCESSING |
5527 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5528 	       ENABLE_ADVANCED_DRIVER_MODEL |
5529 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5530 	/* Setup L2 cache */
5531 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5532 	       ENABLE_L2_FRAGMENT_PROCESSING |
5533 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5534 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5535 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5536 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5537 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5538 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5539 	       BANK_SELECT(4) |
5540 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5541 	/* setup context0 */
5542 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5543 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5544 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5545 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5546 			(u32)(rdev->dummy_page.addr >> 12));
5547 	WREG32(VM_CONTEXT0_CNTL2, 0);
5548 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5549 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5550 
5551 	WREG32(0x15D4, 0);
5552 	WREG32(0x15D8, 0);
5553 	WREG32(0x15DC, 0);
5554 
5555 	/* restore context1-15 */
5556 	/* set vm size, must be a multiple of 4 */
5557 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5558 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5559 	for (i = 1; i < 16; i++) {
5560 		if (i < 8)
5561 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5562 			       rdev->vm_manager.saved_table_addr[i]);
5563 		else
5564 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5565 			       rdev->vm_manager.saved_table_addr[i]);
5566 	}
5567 
5568 	/* enable context1-15 */
5569 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5570 	       (u32)(rdev->dummy_page.addr >> 12));
5571 	WREG32(VM_CONTEXT1_CNTL2, 4);
5572 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5573 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5574 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5575 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5576 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5577 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5578 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5579 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5580 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5581 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5582 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5583 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5584 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5585 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5586 
5587 	if (rdev->family == CHIP_KAVERI) {
5588 		u32 tmp = RREG32(CHUB_CONTROL);
5589 		tmp &= ~BYPASS_VM;
5590 		WREG32(CHUB_CONTROL, tmp);
5591 	}
5592 
5593 	/* XXX SH_MEM regs */
5594 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5595 	mutex_lock(&rdev->srbm_mutex);
5596 	for (i = 0; i < 16; i++) {
5597 		cik_srbm_select(rdev, 0, 0, 0, i);
5598 		/* CP and shaders */
5599 		WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5600 		WREG32(SH_MEM_APE1_BASE, 1);
5601 		WREG32(SH_MEM_APE1_LIMIT, 0);
5602 		WREG32(SH_MEM_BASES, 0);
5603 		/* SDMA GFX */
5604 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5605 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5606 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5607 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5608 		/* XXX SDMA RLC - todo */
5609 	}
5610 	cik_srbm_select(rdev, 0, 0, 0, 0);
5611 	mutex_unlock(&rdev->srbm_mutex);
5612 
5613 	cik_pcie_init_compute_vmid(rdev);
5614 
5615 	cik_pcie_gart_tlb_flush(rdev);
5616 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5617 		 (unsigned)(rdev->mc.gtt_size >> 20),
5618 		 (unsigned long long)rdev->gart.table_addr);
5619 	rdev->gart.ready = true;
5620 	return 0;
5621 }
5622 
5623 /**
5624  * cik_pcie_gart_disable - gart disable
5625  *
5626  * @rdev: radeon_device pointer
5627  *
5628  * This disables all VM page table (CIK).
5629  */
5630 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5631 {
5632 	unsigned i;
5633 
5634 	for (i = 1; i < 16; ++i) {
5635 		uint32_t reg;
5636 		if (i < 8)
5637 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5638 		else
5639 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5640 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5641 	}
5642 
5643 	/* Disable all tables */
5644 	WREG32(VM_CONTEXT0_CNTL, 0);
5645 	WREG32(VM_CONTEXT1_CNTL, 0);
5646 	/* Setup TLB control */
5647 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5648 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5649 	/* Setup L2 cache */
5650 	WREG32(VM_L2_CNTL,
5651 	       ENABLE_L2_FRAGMENT_PROCESSING |
5652 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5653 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5654 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5655 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5656 	WREG32(VM_L2_CNTL2, 0);
5657 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5658 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5659 	radeon_gart_table_vram_unpin(rdev);
5660 }
5661 
5662 /**
5663  * cik_pcie_gart_fini - vm fini callback
5664  *
5665  * @rdev: radeon_device pointer
5666  *
5667  * Tears down the driver GART/VM setup (CIK).
5668  */
5669 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5670 {
5671 	cik_pcie_gart_disable(rdev);
5672 	radeon_gart_table_vram_free(rdev);
5673 	radeon_gart_fini(rdev);
5674 }
5675 
5676 /* vm parser */
5677 /**
5678  * cik_ib_parse - vm ib_parse callback
5679  *
5680  * @rdev: radeon_device pointer
5681  * @ib: indirect buffer pointer
5682  *
5683  * CIK uses hw IB checking so this is a nop (CIK).
5684  */
5685 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5686 {
5687 	return 0;
5688 }
5689 
5690 /*
5691  * vm
5692  * VMID 0 is the physical GPU addresses as used by the kernel.
5693  * VMIDs 1-15 are used for userspace clients and are handled
5694  * by the radeon vm/hsa code.
5695  */
5696 /**
5697  * cik_vm_init - cik vm init callback
5698  *
5699  * @rdev: radeon_device pointer
5700  *
5701  * Inits cik specific vm parameters (number of VMs, base of vram for
5702  * VMIDs 1-15) (CIK).
5703  * Returns 0 for success.
5704  */
5705 int cik_vm_init(struct radeon_device *rdev)
5706 {
5707 	/*
5708 	 * number of VMs
5709 	 * VMID 0 is reserved for System
5710 	 * radeon graphics/compute will use VMIDs 1-7
5711 	 * amdkfd will use VMIDs 8-15
5712 	 */
5713 	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5714 	/* base offset of vram pages */
5715 	if (rdev->flags & RADEON_IS_IGP) {
5716 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5717 		tmp <<= 22;
5718 		rdev->vm_manager.vram_base_offset = tmp;
5719 	} else
5720 		rdev->vm_manager.vram_base_offset = 0;
5721 
5722 	return 0;
5723 }
5724 
5725 /**
5726  * cik_vm_fini - cik vm fini callback
5727  *
5728  * @rdev: radeon_device pointer
5729  *
5730  * Tear down any asic specific VM setup (CIK).
5731  */
5732 void cik_vm_fini(struct radeon_device *rdev)
5733 {
5734 }
5735 
5736 /**
5737  * cik_vm_decode_fault - print human readable fault info
5738  *
5739  * @rdev: radeon_device pointer
5740  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5741  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5742  *
5743  * Print human readable fault information (CIK).
5744  */
5745 static void cik_vm_decode_fault(struct radeon_device *rdev,
5746 				u32 status, u32 addr, u32 mc_client)
5747 {
5748 	u32 mc_id;
5749 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5750 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5751 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5752 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5753 
5754 	if (rdev->family == CHIP_HAWAII)
5755 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5756 	else
5757 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5758 
5759 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5760 	       protections, vmid, addr,
5761 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5762 	       block, mc_client, mc_id);
5763 }
5764 
5765 /**
5766  * cik_vm_flush - cik vm flush using the CP
5767  *
5768  * @rdev: radeon_device pointer
5769  *
5770  * Update the page table base and flush the VM TLB
5771  * using the CP (CIK).
5772  */
5773 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5774 		  unsigned vm_id, uint64_t pd_addr)
5775 {
5776 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5777 
5778 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5779 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5780 				 WRITE_DATA_DST_SEL(0)));
5781 	if (vm_id < 8) {
5782 		radeon_ring_write(ring,
5783 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5784 	} else {
5785 		radeon_ring_write(ring,
5786 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5787 	}
5788 	radeon_ring_write(ring, 0);
5789 	radeon_ring_write(ring, pd_addr >> 12);
5790 
5791 	/* update SH_MEM_* regs */
5792 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5793 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5794 				 WRITE_DATA_DST_SEL(0)));
5795 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5796 	radeon_ring_write(ring, 0);
5797 	radeon_ring_write(ring, VMID(vm_id));
5798 
5799 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5800 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5801 				 WRITE_DATA_DST_SEL(0)));
5802 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5803 	radeon_ring_write(ring, 0);
5804 
5805 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5806 	radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5807 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5808 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5809 
5810 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5811 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5812 				 WRITE_DATA_DST_SEL(0)));
5813 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5814 	radeon_ring_write(ring, 0);
5815 	radeon_ring_write(ring, VMID(0));
5816 
5817 	/* HDP flush */
5818 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5819 
5820 	/* bits 0-15 are the VM contexts0-15 */
5821 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5822 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5823 				 WRITE_DATA_DST_SEL(0)));
5824 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5825 	radeon_ring_write(ring, 0);
5826 	radeon_ring_write(ring, 1 << vm_id);
5827 
5828 	/* wait for the invalidate to complete */
5829 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5830 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5831 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5832 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5833 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5834 	radeon_ring_write(ring, 0);
5835 	radeon_ring_write(ring, 0); /* ref */
5836 	radeon_ring_write(ring, 0); /* mask */
5837 	radeon_ring_write(ring, 0x20); /* poll interval */
5838 
5839 	/* compute doesn't have PFP */
5840 	if (usepfp) {
5841 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5842 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5843 		radeon_ring_write(ring, 0x0);
5844 	}
5845 }
5846 
5847 /*
5848  * RLC
5849  * The RLC is a multi-purpose microengine that handles a
5850  * variety of functions, the most important of which is
5851  * the interrupt controller.
5852  */
5853 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5854 					  bool enable)
5855 {
5856 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5857 
5858 	if (enable)
5859 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5860 	else
5861 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5862 	WREG32(CP_INT_CNTL_RING0, tmp);
5863 }
5864 
5865 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5866 {
5867 	u32 tmp;
5868 
5869 	tmp = RREG32(RLC_LB_CNTL);
5870 	if (enable)
5871 		tmp |= LOAD_BALANCE_ENABLE;
5872 	else
5873 		tmp &= ~LOAD_BALANCE_ENABLE;
5874 	WREG32(RLC_LB_CNTL, tmp);
5875 }
5876 
5877 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5878 {
5879 	u32 i, j, k;
5880 	u32 mask;
5881 
5882 	mutex_lock(&rdev->grbm_idx_mutex);
5883 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5884 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5885 			cik_select_se_sh(rdev, i, j);
5886 			for (k = 0; k < rdev->usec_timeout; k++) {
5887 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5888 					break;
5889 				udelay(1);
5890 			}
5891 		}
5892 	}
5893 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5894 	mutex_unlock(&rdev->grbm_idx_mutex);
5895 
5896 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5897 	for (k = 0; k < rdev->usec_timeout; k++) {
5898 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5899 			break;
5900 		udelay(1);
5901 	}
5902 }
5903 
5904 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5905 {
5906 	u32 tmp;
5907 
5908 	tmp = RREG32(RLC_CNTL);
5909 	if (tmp != rlc)
5910 		WREG32(RLC_CNTL, rlc);
5911 }
5912 
5913 static u32 cik_halt_rlc(struct radeon_device *rdev)
5914 {
5915 	u32 data, orig;
5916 
5917 	orig = data = RREG32(RLC_CNTL);
5918 
5919 	if (data & RLC_ENABLE) {
5920 		u32 i;
5921 
5922 		data &= ~RLC_ENABLE;
5923 		WREG32(RLC_CNTL, data);
5924 
5925 		for (i = 0; i < rdev->usec_timeout; i++) {
5926 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5927 				break;
5928 			udelay(1);
5929 		}
5930 
5931 		cik_wait_for_rlc_serdes(rdev);
5932 	}
5933 
5934 	return orig;
5935 }
5936 
5937 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5938 {
5939 	u32 tmp, i, mask;
5940 
5941 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5942 	WREG32(RLC_GPR_REG2, tmp);
5943 
5944 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5945 	for (i = 0; i < rdev->usec_timeout; i++) {
5946 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5947 			break;
5948 		udelay(1);
5949 	}
5950 
5951 	for (i = 0; i < rdev->usec_timeout; i++) {
5952 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5953 			break;
5954 		udelay(1);
5955 	}
5956 }
5957 
5958 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5959 {
5960 	u32 tmp;
5961 
5962 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5963 	WREG32(RLC_GPR_REG2, tmp);
5964 }
5965 
5966 /**
5967  * cik_rlc_stop - stop the RLC ME
5968  *
5969  * @rdev: radeon_device pointer
5970  *
5971  * Halt the RLC ME (MicroEngine) (CIK).
5972  */
5973 static void cik_rlc_stop(struct radeon_device *rdev)
5974 {
5975 	WREG32(RLC_CNTL, 0);
5976 
5977 	cik_enable_gui_idle_interrupt(rdev, false);
5978 
5979 	cik_wait_for_rlc_serdes(rdev);
5980 }
5981 
5982 /**
5983  * cik_rlc_start - start the RLC ME
5984  *
5985  * @rdev: radeon_device pointer
5986  *
5987  * Unhalt the RLC ME (MicroEngine) (CIK).
5988  */
5989 static void cik_rlc_start(struct radeon_device *rdev)
5990 {
5991 	WREG32(RLC_CNTL, RLC_ENABLE);
5992 
5993 	cik_enable_gui_idle_interrupt(rdev, true);
5994 
5995 	udelay(50);
5996 }
5997 
5998 /**
5999  * cik_rlc_resume - setup the RLC hw
6000  *
6001  * @rdev: radeon_device pointer
6002  *
6003  * Initialize the RLC registers, load the ucode,
6004  * and start the RLC (CIK).
6005  * Returns 0 for success, -EINVAL if the ucode is not available.
6006  */
6007 static int cik_rlc_resume(struct radeon_device *rdev)
6008 {
6009 	u32 i, size, tmp;
6010 
6011 	if (!rdev->rlc_fw)
6012 		return -EINVAL;
6013 
6014 	cik_rlc_stop(rdev);
6015 
6016 	/* disable CG */
6017 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6018 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6019 
6020 	si_rlc_reset(rdev);
6021 
6022 	cik_init_pg(rdev);
6023 
6024 	cik_init_cg(rdev);
6025 
6026 	WREG32(RLC_LB_CNTR_INIT, 0);
6027 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6028 
6029 	mutex_lock(&rdev->grbm_idx_mutex);
6030 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6031 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6032 	WREG32(RLC_LB_PARAMS, 0x00600408);
6033 	WREG32(RLC_LB_CNTL, 0x80000004);
6034 	mutex_unlock(&rdev->grbm_idx_mutex);
6035 
6036 	WREG32(RLC_MC_CNTL, 0);
6037 	WREG32(RLC_UCODE_CNTL, 0);
6038 
6039 	if (rdev->new_fw) {
6040 		const struct rlc_firmware_header_v1_0 *hdr =
6041 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6042 		const __le32 *fw_data = (const __le32 *)
6043 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6044 
6045 		radeon_ucode_print_rlc_hdr(&hdr->header);
6046 
6047 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6048 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6049 		for (i = 0; i < size; i++)
6050 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6051 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6052 	} else {
6053 		const __be32 *fw_data;
6054 
6055 		switch (rdev->family) {
6056 		case CHIP_BONAIRE:
6057 		case CHIP_HAWAII:
6058 		default:
6059 			size = BONAIRE_RLC_UCODE_SIZE;
6060 			break;
6061 		case CHIP_KAVERI:
6062 			size = KV_RLC_UCODE_SIZE;
6063 			break;
6064 		case CHIP_KABINI:
6065 			size = KB_RLC_UCODE_SIZE;
6066 			break;
6067 		case CHIP_MULLINS:
6068 			size = ML_RLC_UCODE_SIZE;
6069 			break;
6070 		}
6071 
6072 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6073 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6074 		for (i = 0; i < size; i++)
6075 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6076 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6077 	}
6078 
6079 	/* XXX - find out what chips support lbpw */
6080 	cik_enable_lbpw(rdev, false);
6081 
6082 	if (rdev->family == CHIP_BONAIRE)
6083 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6084 
6085 	cik_rlc_start(rdev);
6086 
6087 	return 0;
6088 }
6089 
6090 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6091 {
6092 	u32 data, orig, tmp, tmp2;
6093 
6094 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6095 
6096 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6097 		cik_enable_gui_idle_interrupt(rdev, true);
6098 
6099 		tmp = cik_halt_rlc(rdev);
6100 
6101 		mutex_lock(&rdev->grbm_idx_mutex);
6102 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6103 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6104 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6105 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6106 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6107 		mutex_unlock(&rdev->grbm_idx_mutex);
6108 
6109 		cik_update_rlc(rdev, tmp);
6110 
6111 		data |= CGCG_EN | CGLS_EN;
6112 	} else {
6113 		cik_enable_gui_idle_interrupt(rdev, false);
6114 
6115 		RREG32(CB_CGTT_SCLK_CTRL);
6116 		RREG32(CB_CGTT_SCLK_CTRL);
6117 		RREG32(CB_CGTT_SCLK_CTRL);
6118 		RREG32(CB_CGTT_SCLK_CTRL);
6119 
6120 		data &= ~(CGCG_EN | CGLS_EN);
6121 	}
6122 
6123 	if (orig != data)
6124 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6125 
6126 }
6127 
6128 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6129 {
6130 	u32 data, orig, tmp = 0;
6131 
6132 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6133 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6134 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6135 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6136 				data |= CP_MEM_LS_EN;
6137 				if (orig != data)
6138 					WREG32(CP_MEM_SLP_CNTL, data);
6139 			}
6140 		}
6141 
6142 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6143 		data |= 0x00000001;
6144 		data &= 0xfffffffd;
6145 		if (orig != data)
6146 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6147 
6148 		tmp = cik_halt_rlc(rdev);
6149 
6150 		mutex_lock(&rdev->grbm_idx_mutex);
6151 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6152 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6153 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6154 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6155 		WREG32(RLC_SERDES_WR_CTRL, data);
6156 		mutex_unlock(&rdev->grbm_idx_mutex);
6157 
6158 		cik_update_rlc(rdev, tmp);
6159 
6160 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6161 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6162 			data &= ~SM_MODE_MASK;
6163 			data |= SM_MODE(0x2);
6164 			data |= SM_MODE_ENABLE;
6165 			data &= ~CGTS_OVERRIDE;
6166 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6167 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6168 				data &= ~CGTS_LS_OVERRIDE;
6169 			data &= ~ON_MONITOR_ADD_MASK;
6170 			data |= ON_MONITOR_ADD_EN;
6171 			data |= ON_MONITOR_ADD(0x96);
6172 			if (orig != data)
6173 				WREG32(CGTS_SM_CTRL_REG, data);
6174 		}
6175 	} else {
6176 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6177 		data |= 0x00000003;
6178 		if (orig != data)
6179 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6180 
6181 		data = RREG32(RLC_MEM_SLP_CNTL);
6182 		if (data & RLC_MEM_LS_EN) {
6183 			data &= ~RLC_MEM_LS_EN;
6184 			WREG32(RLC_MEM_SLP_CNTL, data);
6185 		}
6186 
6187 		data = RREG32(CP_MEM_SLP_CNTL);
6188 		if (data & CP_MEM_LS_EN) {
6189 			data &= ~CP_MEM_LS_EN;
6190 			WREG32(CP_MEM_SLP_CNTL, data);
6191 		}
6192 
6193 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6194 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6195 		if (orig != data)
6196 			WREG32(CGTS_SM_CTRL_REG, data);
6197 
6198 		tmp = cik_halt_rlc(rdev);
6199 
6200 		mutex_lock(&rdev->grbm_idx_mutex);
6201 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6202 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6203 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6204 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6205 		WREG32(RLC_SERDES_WR_CTRL, data);
6206 		mutex_unlock(&rdev->grbm_idx_mutex);
6207 
6208 		cik_update_rlc(rdev, tmp);
6209 	}
6210 }
6211 
6212 static const u32 mc_cg_registers[] =
6213 {
6214 	MC_HUB_MISC_HUB_CG,
6215 	MC_HUB_MISC_SIP_CG,
6216 	MC_HUB_MISC_VM_CG,
6217 	MC_XPB_CLK_GAT,
6218 	ATC_MISC_CG,
6219 	MC_CITF_MISC_WR_CG,
6220 	MC_CITF_MISC_RD_CG,
6221 	MC_CITF_MISC_VM_CG,
6222 	VM_L2_CG,
6223 };
6224 
6225 static void cik_enable_mc_ls(struct radeon_device *rdev,
6226 			     bool enable)
6227 {
6228 	int i;
6229 	u32 orig, data;
6230 
6231 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6232 		orig = data = RREG32(mc_cg_registers[i]);
6233 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6234 			data |= MC_LS_ENABLE;
6235 		else
6236 			data &= ~MC_LS_ENABLE;
6237 		if (data != orig)
6238 			WREG32(mc_cg_registers[i], data);
6239 	}
6240 }
6241 
6242 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6243 			       bool enable)
6244 {
6245 	int i;
6246 	u32 orig, data;
6247 
6248 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6249 		orig = data = RREG32(mc_cg_registers[i]);
6250 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6251 			data |= MC_CG_ENABLE;
6252 		else
6253 			data &= ~MC_CG_ENABLE;
6254 		if (data != orig)
6255 			WREG32(mc_cg_registers[i], data);
6256 	}
6257 }
6258 
6259 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6260 				 bool enable)
6261 {
6262 	u32 orig, data;
6263 
6264 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6265 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6266 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6267 	} else {
6268 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6269 		data |= 0xff000000;
6270 		if (data != orig)
6271 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6272 
6273 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6274 		data |= 0xff000000;
6275 		if (data != orig)
6276 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6277 	}
6278 }
6279 
6280 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6281 				 bool enable)
6282 {
6283 	u32 orig, data;
6284 
6285 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6286 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6287 		data |= 0x100;
6288 		if (orig != data)
6289 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6290 
6291 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6292 		data |= 0x100;
6293 		if (orig != data)
6294 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6295 	} else {
6296 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6297 		data &= ~0x100;
6298 		if (orig != data)
6299 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6300 
6301 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6302 		data &= ~0x100;
6303 		if (orig != data)
6304 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6305 	}
6306 }
6307 
6308 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6309 				bool enable)
6310 {
6311 	u32 orig, data;
6312 
6313 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6314 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6315 		data = 0xfff;
6316 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6317 
6318 		orig = data = RREG32(UVD_CGC_CTRL);
6319 		data |= DCM;
6320 		if (orig != data)
6321 			WREG32(UVD_CGC_CTRL, data);
6322 	} else {
6323 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6324 		data &= ~0xfff;
6325 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6326 
6327 		orig = data = RREG32(UVD_CGC_CTRL);
6328 		data &= ~DCM;
6329 		if (orig != data)
6330 			WREG32(UVD_CGC_CTRL, data);
6331 	}
6332 }
6333 
6334 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6335 			       bool enable)
6336 {
6337 	u32 orig, data;
6338 
6339 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6340 
6341 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6342 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6343 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6344 	else
6345 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6346 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6347 
6348 	if (orig != data)
6349 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6350 }
6351 
6352 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6353 				bool enable)
6354 {
6355 	u32 orig, data;
6356 
6357 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6358 
6359 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6360 		data &= ~CLOCK_GATING_DIS;
6361 	else
6362 		data |= CLOCK_GATING_DIS;
6363 
6364 	if (orig != data)
6365 		WREG32(HDP_HOST_PATH_CNTL, data);
6366 }
6367 
6368 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6369 			      bool enable)
6370 {
6371 	u32 orig, data;
6372 
6373 	orig = data = RREG32(HDP_MEM_POWER_LS);
6374 
6375 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6376 		data |= HDP_LS_ENABLE;
6377 	else
6378 		data &= ~HDP_LS_ENABLE;
6379 
6380 	if (orig != data)
6381 		WREG32(HDP_MEM_POWER_LS, data);
6382 }
6383 
6384 void cik_update_cg(struct radeon_device *rdev,
6385 		   u32 block, bool enable)
6386 {
6387 
6388 	if (block & RADEON_CG_BLOCK_GFX) {
6389 		cik_enable_gui_idle_interrupt(rdev, false);
6390 		/* order matters! */
6391 		if (enable) {
6392 			cik_enable_mgcg(rdev, true);
6393 			cik_enable_cgcg(rdev, true);
6394 		} else {
6395 			cik_enable_cgcg(rdev, false);
6396 			cik_enable_mgcg(rdev, false);
6397 		}
6398 		cik_enable_gui_idle_interrupt(rdev, true);
6399 	}
6400 
6401 	if (block & RADEON_CG_BLOCK_MC) {
6402 		if (!(rdev->flags & RADEON_IS_IGP)) {
6403 			cik_enable_mc_mgcg(rdev, enable);
6404 			cik_enable_mc_ls(rdev, enable);
6405 		}
6406 	}
6407 
6408 	if (block & RADEON_CG_BLOCK_SDMA) {
6409 		cik_enable_sdma_mgcg(rdev, enable);
6410 		cik_enable_sdma_mgls(rdev, enable);
6411 	}
6412 
6413 	if (block & RADEON_CG_BLOCK_BIF) {
6414 		cik_enable_bif_mgls(rdev, enable);
6415 	}
6416 
6417 	if (block & RADEON_CG_BLOCK_UVD) {
6418 		if (rdev->has_uvd)
6419 			cik_enable_uvd_mgcg(rdev, enable);
6420 	}
6421 
6422 	if (block & RADEON_CG_BLOCK_HDP) {
6423 		cik_enable_hdp_mgcg(rdev, enable);
6424 		cik_enable_hdp_ls(rdev, enable);
6425 	}
6426 
6427 	if (block & RADEON_CG_BLOCK_VCE) {
6428 		vce_v2_0_enable_mgcg(rdev, enable);
6429 	}
6430 }
6431 
6432 static void cik_init_cg(struct radeon_device *rdev)
6433 {
6434 
6435 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6436 
6437 	if (rdev->has_uvd)
6438 		si_init_uvd_internal_cg(rdev);
6439 
6440 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6441 			     RADEON_CG_BLOCK_SDMA |
6442 			     RADEON_CG_BLOCK_BIF |
6443 			     RADEON_CG_BLOCK_UVD |
6444 			     RADEON_CG_BLOCK_HDP), true);
6445 }
6446 
6447 static void cik_fini_cg(struct radeon_device *rdev)
6448 {
6449 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6450 			     RADEON_CG_BLOCK_SDMA |
6451 			     RADEON_CG_BLOCK_BIF |
6452 			     RADEON_CG_BLOCK_UVD |
6453 			     RADEON_CG_BLOCK_HDP), false);
6454 
6455 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6456 }
6457 
6458 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6459 					  bool enable)
6460 {
6461 	u32 data, orig;
6462 
6463 	orig = data = RREG32(RLC_PG_CNTL);
6464 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6465 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6466 	else
6467 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6468 	if (orig != data)
6469 		WREG32(RLC_PG_CNTL, data);
6470 }
6471 
6472 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6473 					  bool enable)
6474 {
6475 	u32 data, orig;
6476 
6477 	orig = data = RREG32(RLC_PG_CNTL);
6478 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6479 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6480 	else
6481 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6482 	if (orig != data)
6483 		WREG32(RLC_PG_CNTL, data);
6484 }
6485 
6486 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6487 {
6488 	u32 data, orig;
6489 
6490 	orig = data = RREG32(RLC_PG_CNTL);
6491 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6492 		data &= ~DISABLE_CP_PG;
6493 	else
6494 		data |= DISABLE_CP_PG;
6495 	if (orig != data)
6496 		WREG32(RLC_PG_CNTL, data);
6497 }
6498 
6499 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6500 {
6501 	u32 data, orig;
6502 
6503 	orig = data = RREG32(RLC_PG_CNTL);
6504 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6505 		data &= ~DISABLE_GDS_PG;
6506 	else
6507 		data |= DISABLE_GDS_PG;
6508 	if (orig != data)
6509 		WREG32(RLC_PG_CNTL, data);
6510 }
6511 
6512 #define CP_ME_TABLE_SIZE    96
6513 #define CP_ME_TABLE_OFFSET  2048
6514 #define CP_MEC_TABLE_OFFSET 4096
6515 
6516 void cik_init_cp_pg_table(struct radeon_device *rdev)
6517 {
6518 	volatile u32 *dst_ptr;
6519 	int me, i, max_me = 4;
6520 	u32 bo_offset = 0;
6521 	u32 table_offset, table_size;
6522 
6523 	if (rdev->family == CHIP_KAVERI)
6524 		max_me = 5;
6525 
6526 	if (rdev->rlc.cp_table_ptr == NULL)
6527 		return;
6528 
6529 	/* write the cp table buffer */
6530 	dst_ptr = rdev->rlc.cp_table_ptr;
6531 	for (me = 0; me < max_me; me++) {
6532 		if (rdev->new_fw) {
6533 			const __le32 *fw_data;
6534 			const struct gfx_firmware_header_v1_0 *hdr;
6535 
6536 			if (me == 0) {
6537 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6538 				fw_data = (const __le32 *)
6539 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6540 				table_offset = le32_to_cpu(hdr->jt_offset);
6541 				table_size = le32_to_cpu(hdr->jt_size);
6542 			} else if (me == 1) {
6543 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6544 				fw_data = (const __le32 *)
6545 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6546 				table_offset = le32_to_cpu(hdr->jt_offset);
6547 				table_size = le32_to_cpu(hdr->jt_size);
6548 			} else if (me == 2) {
6549 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6550 				fw_data = (const __le32 *)
6551 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6552 				table_offset = le32_to_cpu(hdr->jt_offset);
6553 				table_size = le32_to_cpu(hdr->jt_size);
6554 			} else if (me == 3) {
6555 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6556 				fw_data = (const __le32 *)
6557 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6558 				table_offset = le32_to_cpu(hdr->jt_offset);
6559 				table_size = le32_to_cpu(hdr->jt_size);
6560 			} else {
6561 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6562 				fw_data = (const __le32 *)
6563 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6564 				table_offset = le32_to_cpu(hdr->jt_offset);
6565 				table_size = le32_to_cpu(hdr->jt_size);
6566 			}
6567 
6568 			for (i = 0; i < table_size; i ++) {
6569 				dst_ptr[bo_offset + i] =
6570 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6571 			}
6572 			bo_offset += table_size;
6573 		} else {
6574 			const __be32 *fw_data;
6575 			table_size = CP_ME_TABLE_SIZE;
6576 
6577 			if (me == 0) {
6578 				fw_data = (const __be32 *)rdev->ce_fw->data;
6579 				table_offset = CP_ME_TABLE_OFFSET;
6580 			} else if (me == 1) {
6581 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6582 				table_offset = CP_ME_TABLE_OFFSET;
6583 			} else if (me == 2) {
6584 				fw_data = (const __be32 *)rdev->me_fw->data;
6585 				table_offset = CP_ME_TABLE_OFFSET;
6586 			} else {
6587 				fw_data = (const __be32 *)rdev->mec_fw->data;
6588 				table_offset = CP_MEC_TABLE_OFFSET;
6589 			}
6590 
6591 			for (i = 0; i < table_size; i ++) {
6592 				dst_ptr[bo_offset + i] =
6593 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6594 			}
6595 			bo_offset += table_size;
6596 		}
6597 	}
6598 }
6599 
6600 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6601 				bool enable)
6602 {
6603 	u32 data, orig;
6604 
6605 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6606 		orig = data = RREG32(RLC_PG_CNTL);
6607 		data |= GFX_PG_ENABLE;
6608 		if (orig != data)
6609 			WREG32(RLC_PG_CNTL, data);
6610 
6611 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6612 		data |= AUTO_PG_EN;
6613 		if (orig != data)
6614 			WREG32(RLC_AUTO_PG_CTRL, data);
6615 	} else {
6616 		orig = data = RREG32(RLC_PG_CNTL);
6617 		data &= ~GFX_PG_ENABLE;
6618 		if (orig != data)
6619 			WREG32(RLC_PG_CNTL, data);
6620 
6621 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6622 		data &= ~AUTO_PG_EN;
6623 		if (orig != data)
6624 			WREG32(RLC_AUTO_PG_CTRL, data);
6625 
6626 		data = RREG32(DB_RENDER_CONTROL);
6627 	}
6628 }
6629 
6630 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6631 {
6632 	u32 mask = 0, tmp, tmp1;
6633 	int i;
6634 
6635 	mutex_lock(&rdev->grbm_idx_mutex);
6636 	cik_select_se_sh(rdev, se, sh);
6637 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6638 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6639 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6640 	mutex_unlock(&rdev->grbm_idx_mutex);
6641 
6642 	tmp &= 0xffff0000;
6643 
6644 	tmp |= tmp1;
6645 	tmp >>= 16;
6646 
6647 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6648 		mask <<= 1;
6649 		mask |= 1;
6650 	}
6651 
6652 	return (~tmp) & mask;
6653 }
6654 
6655 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6656 {
6657 	u32 i, j, k, active_cu_number = 0;
6658 	u32 mask, counter, cu_bitmap;
6659 	u32 tmp = 0;
6660 
6661 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6662 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6663 			mask = 1;
6664 			cu_bitmap = 0;
6665 			counter = 0;
6666 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6667 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6668 					if (counter < 2)
6669 						cu_bitmap |= mask;
6670 					counter ++;
6671 				}
6672 				mask <<= 1;
6673 			}
6674 
6675 			active_cu_number += counter;
6676 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6677 		}
6678 	}
6679 
6680 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6681 
6682 	tmp = RREG32(RLC_MAX_PG_CU);
6683 	tmp &= ~MAX_PU_CU_MASK;
6684 	tmp |= MAX_PU_CU(active_cu_number);
6685 	WREG32(RLC_MAX_PG_CU, tmp);
6686 }
6687 
6688 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6689 				       bool enable)
6690 {
6691 	u32 data, orig;
6692 
6693 	orig = data = RREG32(RLC_PG_CNTL);
6694 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6695 		data |= STATIC_PER_CU_PG_ENABLE;
6696 	else
6697 		data &= ~STATIC_PER_CU_PG_ENABLE;
6698 	if (orig != data)
6699 		WREG32(RLC_PG_CNTL, data);
6700 }
6701 
6702 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6703 					bool enable)
6704 {
6705 	u32 data, orig;
6706 
6707 	orig = data = RREG32(RLC_PG_CNTL);
6708 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6709 		data |= DYN_PER_CU_PG_ENABLE;
6710 	else
6711 		data &= ~DYN_PER_CU_PG_ENABLE;
6712 	if (orig != data)
6713 		WREG32(RLC_PG_CNTL, data);
6714 }
6715 
6716 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6717 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6718 
6719 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6720 {
6721 	u32 data, orig;
6722 	u32 i;
6723 
6724 	if (rdev->rlc.cs_data) {
6725 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6726 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6727 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6728 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6729 	} else {
6730 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6731 		for (i = 0; i < 3; i++)
6732 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6733 	}
6734 	if (rdev->rlc.reg_list) {
6735 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6736 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6737 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6738 	}
6739 
6740 	orig = data = RREG32(RLC_PG_CNTL);
6741 	data |= GFX_PG_SRC;
6742 	if (orig != data)
6743 		WREG32(RLC_PG_CNTL, data);
6744 
6745 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6746 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6747 
6748 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6749 	data &= ~IDLE_POLL_COUNT_MASK;
6750 	data |= IDLE_POLL_COUNT(0x60);
6751 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6752 
6753 	data = 0x10101010;
6754 	WREG32(RLC_PG_DELAY, data);
6755 
6756 	data = RREG32(RLC_PG_DELAY_2);
6757 	data &= ~0xff;
6758 	data |= 0x3;
6759 	WREG32(RLC_PG_DELAY_2, data);
6760 
6761 	data = RREG32(RLC_AUTO_PG_CTRL);
6762 	data &= ~GRBM_REG_SGIT_MASK;
6763 	data |= GRBM_REG_SGIT(0x700);
6764 	WREG32(RLC_AUTO_PG_CTRL, data);
6765 
6766 }
6767 
6768 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6769 {
6770 	cik_enable_gfx_cgpg(rdev, enable);
6771 	cik_enable_gfx_static_mgpg(rdev, enable);
6772 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6773 }
6774 
6775 u32 cik_get_csb_size(struct radeon_device *rdev)
6776 {
6777 	u32 count = 0;
6778 	const struct cs_section_def *sect = NULL;
6779 	const struct cs_extent_def *ext = NULL;
6780 
6781 	if (rdev->rlc.cs_data == NULL)
6782 		return 0;
6783 
6784 	/* begin clear state */
6785 	count += 2;
6786 	/* context control state */
6787 	count += 3;
6788 
6789 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6790 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6791 			if (sect->id == SECT_CONTEXT)
6792 				count += 2 + ext->reg_count;
6793 			else
6794 				return 0;
6795 		}
6796 	}
6797 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6798 	count += 4;
6799 	/* end clear state */
6800 	count += 2;
6801 	/* clear state */
6802 	count += 2;
6803 
6804 	return count;
6805 }
6806 
6807 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6808 {
6809 	u32 count = 0, i;
6810 	const struct cs_section_def *sect = NULL;
6811 	const struct cs_extent_def *ext = NULL;
6812 
6813 	if (rdev->rlc.cs_data == NULL)
6814 		return;
6815 	if (buffer == NULL)
6816 		return;
6817 
6818 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6819 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6820 
6821 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6822 	buffer[count++] = cpu_to_le32(0x80000000);
6823 	buffer[count++] = cpu_to_le32(0x80000000);
6824 
6825 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6826 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6827 			if (sect->id == SECT_CONTEXT) {
6828 				buffer[count++] =
6829 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6830 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6831 				for (i = 0; i < ext->reg_count; i++)
6832 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6833 			} else {
6834 				return;
6835 			}
6836 		}
6837 	}
6838 
6839 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6840 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6841 	switch (rdev->family) {
6842 	case CHIP_BONAIRE:
6843 		buffer[count++] = cpu_to_le32(0x16000012);
6844 		buffer[count++] = cpu_to_le32(0x00000000);
6845 		break;
6846 	case CHIP_KAVERI:
6847 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6848 		buffer[count++] = cpu_to_le32(0x00000000);
6849 		break;
6850 	case CHIP_KABINI:
6851 	case CHIP_MULLINS:
6852 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6853 		buffer[count++] = cpu_to_le32(0x00000000);
6854 		break;
6855 	case CHIP_HAWAII:
6856 		buffer[count++] = cpu_to_le32(0x3a00161a);
6857 		buffer[count++] = cpu_to_le32(0x0000002e);
6858 		break;
6859 	default:
6860 		buffer[count++] = cpu_to_le32(0x00000000);
6861 		buffer[count++] = cpu_to_le32(0x00000000);
6862 		break;
6863 	}
6864 
6865 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6866 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6867 
6868 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6869 	buffer[count++] = cpu_to_le32(0);
6870 }
6871 
6872 static void cik_init_pg(struct radeon_device *rdev)
6873 {
6874 	if (rdev->pg_flags) {
6875 		cik_enable_sck_slowdown_on_pu(rdev, true);
6876 		cik_enable_sck_slowdown_on_pd(rdev, true);
6877 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6878 			cik_init_gfx_cgpg(rdev);
6879 			cik_enable_cp_pg(rdev, true);
6880 			cik_enable_gds_pg(rdev, true);
6881 		}
6882 		cik_init_ao_cu_mask(rdev);
6883 		cik_update_gfx_pg(rdev, true);
6884 	}
6885 }
6886 
6887 static void cik_fini_pg(struct radeon_device *rdev)
6888 {
6889 	if (rdev->pg_flags) {
6890 		cik_update_gfx_pg(rdev, false);
6891 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6892 			cik_enable_cp_pg(rdev, false);
6893 			cik_enable_gds_pg(rdev, false);
6894 		}
6895 	}
6896 }
6897 
6898 /*
6899  * Interrupts
6900  * Starting with r6xx, interrupts are handled via a ring buffer.
6901  * Ring buffers are areas of GPU accessible memory that the GPU
6902  * writes interrupt vectors into and the host reads vectors out of.
6903  * There is a rptr (read pointer) that determines where the
6904  * host is currently reading, and a wptr (write pointer)
6905  * which determines where the GPU has written.  When the
6906  * pointers are equal, the ring is idle.  When the GPU
6907  * writes vectors to the ring buffer, it increments the
6908  * wptr.  When there is an interrupt, the host then starts
6909  * fetching commands and processing them until the pointers are
6910  * equal again at which point it updates the rptr.
6911  */
6912 
6913 /**
6914  * cik_enable_interrupts - Enable the interrupt ring buffer
6915  *
6916  * @rdev: radeon_device pointer
6917  *
6918  * Enable the interrupt ring buffer (CIK).
6919  */
6920 static void cik_enable_interrupts(struct radeon_device *rdev)
6921 {
6922 	u32 ih_cntl = RREG32(IH_CNTL);
6923 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6924 
6925 	ih_cntl |= ENABLE_INTR;
6926 	ih_rb_cntl |= IH_RB_ENABLE;
6927 	WREG32(IH_CNTL, ih_cntl);
6928 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6929 	rdev->ih.enabled = true;
6930 }
6931 
6932 /**
6933  * cik_disable_interrupts - Disable the interrupt ring buffer
6934  *
6935  * @rdev: radeon_device pointer
6936  *
6937  * Disable the interrupt ring buffer (CIK).
6938  */
6939 static void cik_disable_interrupts(struct radeon_device *rdev)
6940 {
6941 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6942 	u32 ih_cntl = RREG32(IH_CNTL);
6943 
6944 	ih_rb_cntl &= ~IH_RB_ENABLE;
6945 	ih_cntl &= ~ENABLE_INTR;
6946 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6947 	WREG32(IH_CNTL, ih_cntl);
6948 	/* set rptr, wptr to 0 */
6949 	WREG32(IH_RB_RPTR, 0);
6950 	WREG32(IH_RB_WPTR, 0);
6951 	rdev->ih.enabled = false;
6952 	rdev->ih.rptr = 0;
6953 }
6954 
6955 /**
6956  * cik_disable_interrupt_state - Disable all interrupt sources
6957  *
6958  * @rdev: radeon_device pointer
6959  *
6960  * Clear all interrupt enable bits used by the driver (CIK).
6961  */
6962 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6963 {
6964 	u32 tmp;
6965 
6966 	/* gfx ring */
6967 	tmp = RREG32(CP_INT_CNTL_RING0) &
6968 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6969 	WREG32(CP_INT_CNTL_RING0, tmp);
6970 	/* sdma */
6971 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6972 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6973 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6974 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6975 	/* compute queues */
6976 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6977 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6978 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6979 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6980 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6981 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6982 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6983 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6984 	/* grbm */
6985 	WREG32(GRBM_INT_CNTL, 0);
6986 	/* SRBM */
6987 	WREG32(SRBM_INT_CNTL, 0);
6988 	/* vline/vblank, etc. */
6989 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6990 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6991 	if (rdev->num_crtc >= 4) {
6992 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6993 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6994 	}
6995 	if (rdev->num_crtc >= 6) {
6996 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6997 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6998 	}
6999 	/* pflip */
7000 	if (rdev->num_crtc >= 2) {
7001 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7002 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7003 	}
7004 	if (rdev->num_crtc >= 4) {
7005 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7006 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7007 	}
7008 	if (rdev->num_crtc >= 6) {
7009 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7010 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7011 	}
7012 
7013 	/* dac hotplug */
7014 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7015 
7016 	/* digital hotplug */
7017 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7018 	WREG32(DC_HPD1_INT_CONTROL, tmp);
7019 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7020 	WREG32(DC_HPD2_INT_CONTROL, tmp);
7021 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7022 	WREG32(DC_HPD3_INT_CONTROL, tmp);
7023 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7024 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7025 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7026 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7027 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7028 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7029 
7030 }
7031 
7032 /**
7033  * cik_irq_init - init and enable the interrupt ring
7034  *
7035  * @rdev: radeon_device pointer
7036  *
7037  * Allocate a ring buffer for the interrupt controller,
7038  * enable the RLC, disable interrupts, enable the IH
7039  * ring buffer and enable it (CIK).
7040  * Called at device load and reume.
7041  * Returns 0 for success, errors for failure.
7042  */
7043 static int cik_irq_init(struct radeon_device *rdev)
7044 {
7045 	int ret = 0;
7046 	int rb_bufsz;
7047 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7048 
7049 	/* allocate ring */
7050 	ret = r600_ih_ring_alloc(rdev);
7051 	if (ret)
7052 		return ret;
7053 
7054 	/* disable irqs */
7055 	cik_disable_interrupts(rdev);
7056 
7057 	/* init rlc */
7058 	ret = cik_rlc_resume(rdev);
7059 	if (ret) {
7060 		r600_ih_ring_fini(rdev);
7061 		return ret;
7062 	}
7063 
7064 	/* setup interrupt control */
7065 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7066 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7067 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7068 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7069 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7070 	 */
7071 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7072 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7073 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7074 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7075 
7076 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7077 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7078 
7079 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7080 		      IH_WPTR_OVERFLOW_CLEAR |
7081 		      (rb_bufsz << 1));
7082 
7083 	if (rdev->wb.enabled)
7084 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7085 
7086 	/* set the writeback address whether it's enabled or not */
7087 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7088 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7089 
7090 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7091 
7092 	/* set rptr, wptr to 0 */
7093 	WREG32(IH_RB_RPTR, 0);
7094 	WREG32(IH_RB_WPTR, 0);
7095 
7096 	/* Default settings for IH_CNTL (disabled at first) */
7097 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7098 	/* RPTR_REARM only works if msi's are enabled */
7099 	if (rdev->msi_enabled)
7100 		ih_cntl |= RPTR_REARM;
7101 	WREG32(IH_CNTL, ih_cntl);
7102 
7103 	/* force the active interrupt state to all disabled */
7104 	cik_disable_interrupt_state(rdev);
7105 
7106 	pci_set_master(rdev->pdev);
7107 
7108 	/* enable irqs */
7109 	cik_enable_interrupts(rdev);
7110 
7111 	return ret;
7112 }
7113 
7114 /**
7115  * cik_irq_set - enable/disable interrupt sources
7116  *
7117  * @rdev: radeon_device pointer
7118  *
7119  * Enable interrupt sources on the GPU (vblanks, hpd,
7120  * etc.) (CIK).
7121  * Returns 0 for success, errors for failure.
7122  */
7123 int cik_irq_set(struct radeon_device *rdev)
7124 {
7125 	u32 cp_int_cntl;
7126 	u32 cp_m1p0;
7127 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7128 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7129 	u32 grbm_int_cntl = 0;
7130 	u32 dma_cntl, dma_cntl1;
7131 
7132 	if (!rdev->irq.installed) {
7133 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7134 		return -EINVAL;
7135 	}
7136 	/* don't enable anything if the ih is disabled */
7137 	if (!rdev->ih.enabled) {
7138 		cik_disable_interrupts(rdev);
7139 		/* force the active interrupt state to all disabled */
7140 		cik_disable_interrupt_state(rdev);
7141 		return 0;
7142 	}
7143 
7144 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7145 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7146 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7147 
7148 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7149 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7150 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7151 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7152 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7153 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7154 
7155 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7156 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7157 
7158 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7159 
7160 	/* enable CP interrupts on all rings */
7161 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7162 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7163 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7164 	}
7165 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7166 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7167 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7168 		if (ring->me == 1) {
7169 			switch (ring->pipe) {
7170 			case 0:
7171 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7172 				break;
7173 			default:
7174 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7175 				break;
7176 			}
7177 		} else {
7178 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7179 		}
7180 	}
7181 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7182 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7183 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7184 		if (ring->me == 1) {
7185 			switch (ring->pipe) {
7186 			case 0:
7187 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7188 				break;
7189 			default:
7190 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7191 				break;
7192 			}
7193 		} else {
7194 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7195 		}
7196 	}
7197 
7198 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7199 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7200 		dma_cntl |= TRAP_ENABLE;
7201 	}
7202 
7203 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7204 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7205 		dma_cntl1 |= TRAP_ENABLE;
7206 	}
7207 
7208 	if (rdev->irq.crtc_vblank_int[0] ||
7209 	    atomic_read(&rdev->irq.pflip[0])) {
7210 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7211 		crtc1 |= VBLANK_INTERRUPT_MASK;
7212 	}
7213 	if (rdev->irq.crtc_vblank_int[1] ||
7214 	    atomic_read(&rdev->irq.pflip[1])) {
7215 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7216 		crtc2 |= VBLANK_INTERRUPT_MASK;
7217 	}
7218 	if (rdev->irq.crtc_vblank_int[2] ||
7219 	    atomic_read(&rdev->irq.pflip[2])) {
7220 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7221 		crtc3 |= VBLANK_INTERRUPT_MASK;
7222 	}
7223 	if (rdev->irq.crtc_vblank_int[3] ||
7224 	    atomic_read(&rdev->irq.pflip[3])) {
7225 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7226 		crtc4 |= VBLANK_INTERRUPT_MASK;
7227 	}
7228 	if (rdev->irq.crtc_vblank_int[4] ||
7229 	    atomic_read(&rdev->irq.pflip[4])) {
7230 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7231 		crtc5 |= VBLANK_INTERRUPT_MASK;
7232 	}
7233 	if (rdev->irq.crtc_vblank_int[5] ||
7234 	    atomic_read(&rdev->irq.pflip[5])) {
7235 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7236 		crtc6 |= VBLANK_INTERRUPT_MASK;
7237 	}
7238 	if (rdev->irq.hpd[0]) {
7239 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7240 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7241 	}
7242 	if (rdev->irq.hpd[1]) {
7243 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7244 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7245 	}
7246 	if (rdev->irq.hpd[2]) {
7247 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7248 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7249 	}
7250 	if (rdev->irq.hpd[3]) {
7251 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7252 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7253 	}
7254 	if (rdev->irq.hpd[4]) {
7255 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7256 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7257 	}
7258 	if (rdev->irq.hpd[5]) {
7259 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7260 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7261 	}
7262 
7263 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7264 
7265 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7266 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7267 
7268 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7269 
7270 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7271 
7272 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7273 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7274 	if (rdev->num_crtc >= 4) {
7275 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7276 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7277 	}
7278 	if (rdev->num_crtc >= 6) {
7279 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7280 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7281 	}
7282 
7283 	if (rdev->num_crtc >= 2) {
7284 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7285 		       GRPH_PFLIP_INT_MASK);
7286 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7287 		       GRPH_PFLIP_INT_MASK);
7288 	}
7289 	if (rdev->num_crtc >= 4) {
7290 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7291 		       GRPH_PFLIP_INT_MASK);
7292 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7293 		       GRPH_PFLIP_INT_MASK);
7294 	}
7295 	if (rdev->num_crtc >= 6) {
7296 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7297 		       GRPH_PFLIP_INT_MASK);
7298 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7299 		       GRPH_PFLIP_INT_MASK);
7300 	}
7301 
7302 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7303 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7304 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7305 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7306 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7307 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7308 
7309 	/* posting read */
7310 	RREG32(SRBM_STATUS);
7311 
7312 	return 0;
7313 }
7314 
7315 /**
7316  * cik_irq_ack - ack interrupt sources
7317  *
7318  * @rdev: radeon_device pointer
7319  *
7320  * Ack interrupt sources on the GPU (vblanks, hpd,
7321  * etc.) (CIK).  Certain interrupts sources are sw
7322  * generated and do not require an explicit ack.
7323  */
7324 static inline void cik_irq_ack(struct radeon_device *rdev)
7325 {
7326 	u32 tmp;
7327 
7328 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7329 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7330 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7331 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7332 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7333 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7334 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7335 
7336 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7337 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7338 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7339 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7340 	if (rdev->num_crtc >= 4) {
7341 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7342 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7343 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7344 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7345 	}
7346 	if (rdev->num_crtc >= 6) {
7347 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7348 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7349 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7350 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7351 	}
7352 
7353 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7354 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7355 		       GRPH_PFLIP_INT_CLEAR);
7356 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7357 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7358 		       GRPH_PFLIP_INT_CLEAR);
7359 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7360 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7361 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7362 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7363 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7364 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7365 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7366 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7367 
7368 	if (rdev->num_crtc >= 4) {
7369 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7370 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7371 			       GRPH_PFLIP_INT_CLEAR);
7372 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7373 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7374 			       GRPH_PFLIP_INT_CLEAR);
7375 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7376 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7377 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7378 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7379 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7380 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7381 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7382 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7383 	}
7384 
7385 	if (rdev->num_crtc >= 6) {
7386 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7387 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7388 			       GRPH_PFLIP_INT_CLEAR);
7389 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7390 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7391 			       GRPH_PFLIP_INT_CLEAR);
7392 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7393 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7394 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7395 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7396 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7397 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7398 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7399 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7400 	}
7401 
7402 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7403 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7404 		tmp |= DC_HPDx_INT_ACK;
7405 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7406 	}
7407 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7408 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7409 		tmp |= DC_HPDx_INT_ACK;
7410 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7411 	}
7412 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7413 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7414 		tmp |= DC_HPDx_INT_ACK;
7415 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7416 	}
7417 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7418 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7419 		tmp |= DC_HPDx_INT_ACK;
7420 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7421 	}
7422 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7423 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7424 		tmp |= DC_HPDx_INT_ACK;
7425 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7426 	}
7427 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7428 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7429 		tmp |= DC_HPDx_INT_ACK;
7430 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7431 	}
7432 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7433 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7434 		tmp |= DC_HPDx_RX_INT_ACK;
7435 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7436 	}
7437 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7438 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7439 		tmp |= DC_HPDx_RX_INT_ACK;
7440 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7441 	}
7442 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7443 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7444 		tmp |= DC_HPDx_RX_INT_ACK;
7445 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7446 	}
7447 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7448 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7449 		tmp |= DC_HPDx_RX_INT_ACK;
7450 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7451 	}
7452 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7453 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7454 		tmp |= DC_HPDx_RX_INT_ACK;
7455 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7456 	}
7457 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7458 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7459 		tmp |= DC_HPDx_RX_INT_ACK;
7460 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7461 	}
7462 }
7463 
7464 /**
7465  * cik_irq_disable - disable interrupts
7466  *
7467  * @rdev: radeon_device pointer
7468  *
7469  * Disable interrupts on the hw (CIK).
7470  */
7471 static void cik_irq_disable(struct radeon_device *rdev)
7472 {
7473 	cik_disable_interrupts(rdev);
7474 	/* Wait and acknowledge irq */
7475 	mdelay(1);
7476 	cik_irq_ack(rdev);
7477 	cik_disable_interrupt_state(rdev);
7478 }
7479 
7480 /**
7481  * cik_irq_disable - disable interrupts for suspend
7482  *
7483  * @rdev: radeon_device pointer
7484  *
7485  * Disable interrupts and stop the RLC (CIK).
7486  * Used for suspend.
7487  */
7488 static void cik_irq_suspend(struct radeon_device *rdev)
7489 {
7490 	cik_irq_disable(rdev);
7491 	cik_rlc_stop(rdev);
7492 }
7493 
7494 /**
7495  * cik_irq_fini - tear down interrupt support
7496  *
7497  * @rdev: radeon_device pointer
7498  *
7499  * Disable interrupts on the hw and free the IH ring
7500  * buffer (CIK).
7501  * Used for driver unload.
7502  */
7503 static void cik_irq_fini(struct radeon_device *rdev)
7504 {
7505 	cik_irq_suspend(rdev);
7506 	r600_ih_ring_fini(rdev);
7507 }
7508 
7509 /**
7510  * cik_get_ih_wptr - get the IH ring buffer wptr
7511  *
7512  * @rdev: radeon_device pointer
7513  *
7514  * Get the IH ring buffer wptr from either the register
7515  * or the writeback memory buffer (CIK).  Also check for
7516  * ring buffer overflow and deal with it.
7517  * Used by cik_irq_process().
7518  * Returns the value of the wptr.
7519  */
7520 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7521 {
7522 	u32 wptr, tmp;
7523 
7524 	if (rdev->wb.enabled)
7525 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7526 	else
7527 		wptr = RREG32(IH_RB_WPTR);
7528 
7529 	if (wptr & RB_OVERFLOW) {
7530 		wptr &= ~RB_OVERFLOW;
7531 		/* When a ring buffer overflow happen start parsing interrupt
7532 		 * from the last not overwritten vector (wptr + 16). Hopefully
7533 		 * this should allow us to catchup.
7534 		 */
7535 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7536 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7537 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7538 		tmp = RREG32(IH_RB_CNTL);
7539 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7540 		WREG32(IH_RB_CNTL, tmp);
7541 	}
7542 	return (wptr & rdev->ih.ptr_mask);
7543 }
7544 
7545 /*        CIK IV Ring
7546  * Each IV ring entry is 128 bits:
7547  * [7:0]    - interrupt source id
7548  * [31:8]   - reserved
7549  * [59:32]  - interrupt source data
7550  * [63:60]  - reserved
7551  * [71:64]  - RINGID
7552  *            CP:
7553  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7554  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7555  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7556  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7557  *            PIPE_ID - ME0 0=3D
7558  *                    - ME1&2 compute dispatcher (4 pipes each)
7559  *            SDMA:
7560  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7561  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7562  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7563  * [79:72]  - VMID
7564  * [95:80]  - PASID
7565  * [127:96] - reserved
7566  */
7567 /**
7568  * cik_irq_process - interrupt handler
7569  *
7570  * @rdev: radeon_device pointer
7571  *
7572  * Interrupt hander (CIK).  Walk the IH ring,
7573  * ack interrupts and schedule work to handle
7574  * interrupt events.
7575  * Returns irq process return code.
7576  */
7577 irqreturn_t cik_irq_process(struct radeon_device *rdev)
7578 {
7579 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7580 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7581 	u32 wptr;
7582 	u32 rptr;
7583 	u32 src_id, src_data, ring_id;
7584 	u8 me_id, pipe_id, queue_id;
7585 	u32 ring_index;
7586 	bool queue_hotplug = false;
7587 	bool queue_dp = false;
7588 	bool queue_reset = false;
7589 	u32 addr, status, mc_client;
7590 	bool queue_thermal = false;
7591 
7592 	if (!rdev->ih.enabled || rdev->shutdown)
7593 		return IRQ_NONE;
7594 
7595 	wptr = cik_get_ih_wptr(rdev);
7596 
7597 restart_ih:
7598 	/* is somebody else already processing irqs? */
7599 	if (atomic_xchg(&rdev->ih.lock, 1))
7600 		return IRQ_NONE;
7601 
7602 	rptr = rdev->ih.rptr;
7603 	DRM_DEBUG_VBLANK("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7604 
7605 	/* Order reading of wptr vs. reading of IH ring data */
7606 	rmb();
7607 
7608 	/* display interrupts */
7609 	cik_irq_ack(rdev);
7610 
7611 	while (rptr != wptr) {
7612 		/* wptr/rptr are in bytes! */
7613 		ring_index = rptr / 4;
7614 
7615 #pragma GCC diagnostic push
7616 #pragma GCC diagnostic ignored "-Wcast-qual"
7617 		radeon_kfd_interrupt(rdev,
7618 				(const void *) &rdev->ih.ring[ring_index]);
7619 #pragma GCC diagnostic pop
7620 
7621 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7622 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7623 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7624 
7625 		switch (src_id) {
7626 		case 1: /* D1 vblank/vline */
7627 			switch (src_data) {
7628 			case 0: /* D1 vblank */
7629 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7630 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7631 
7632 				if (rdev->irq.crtc_vblank_int[0]) {
7633 					drm_handle_vblank(rdev->ddev, 0);
7634 					rdev->pm.vblank_sync = true;
7635 					wake_up(&rdev->irq.vblank_queue);
7636 				}
7637 				if (atomic_read(&rdev->irq.pflip[0]))
7638 					radeon_crtc_handle_vblank(rdev, 0);
7639 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7640 				DRM_DEBUG_VBLANK("IH: D1 vblank\n");
7641 
7642 				break;
7643 			case 1: /* D1 vline */
7644 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7645 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7646 
7647 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7648 				DRM_DEBUG_VBLANK("IH: D1 vline\n");
7649 
7650 				break;
7651 			default:
7652 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7653 				break;
7654 			}
7655 			break;
7656 		case 2: /* D2 vblank/vline */
7657 			switch (src_data) {
7658 			case 0: /* D2 vblank */
7659 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7660 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7661 
7662 				if (rdev->irq.crtc_vblank_int[1]) {
7663 					drm_handle_vblank(rdev->ddev, 1);
7664 					rdev->pm.vblank_sync = true;
7665 					wake_up(&rdev->irq.vblank_queue);
7666 				}
7667 				if (atomic_read(&rdev->irq.pflip[1]))
7668 					radeon_crtc_handle_vblank(rdev, 1);
7669 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7670 				DRM_DEBUG_VBLANK("IH: D2 vblank\n");
7671 
7672 				break;
7673 			case 1: /* D2 vline */
7674 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7675 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7676 
7677 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7678 				DRM_DEBUG_VBLANK("IH: D2 vline\n");
7679 
7680 				break;
7681 			default:
7682 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7683 				break;
7684 			}
7685 			break;
7686 		case 3: /* D3 vblank/vline */
7687 			switch (src_data) {
7688 			case 0: /* D3 vblank */
7689 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7690 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7691 
7692 				if (rdev->irq.crtc_vblank_int[2]) {
7693 					drm_handle_vblank(rdev->ddev, 2);
7694 					rdev->pm.vblank_sync = true;
7695 					wake_up(&rdev->irq.vblank_queue);
7696 				}
7697 				if (atomic_read(&rdev->irq.pflip[2]))
7698 					radeon_crtc_handle_vblank(rdev, 2);
7699 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7700 				DRM_DEBUG_VBLANK("IH: D3 vblank\n");
7701 
7702 				break;
7703 			case 1: /* D3 vline */
7704 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7705 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7706 
7707 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7708 				DRM_DEBUG_VBLANK("IH: D3 vline\n");
7709 
7710 				break;
7711 			default:
7712 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7713 				break;
7714 			}
7715 			break;
7716 		case 4: /* D4 vblank/vline */
7717 			switch (src_data) {
7718 			case 0: /* D4 vblank */
7719 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7720 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7721 
7722 				if (rdev->irq.crtc_vblank_int[3]) {
7723 					drm_handle_vblank(rdev->ddev, 3);
7724 					rdev->pm.vblank_sync = true;
7725 					wake_up(&rdev->irq.vblank_queue);
7726 				}
7727 				if (atomic_read(&rdev->irq.pflip[3]))
7728 					radeon_crtc_handle_vblank(rdev, 3);
7729 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7730 				DRM_DEBUG_VBLANK("IH: D4 vblank\n");
7731 
7732 				break;
7733 			case 1: /* D4 vline */
7734 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7735 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7736 
7737 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7738 				DRM_DEBUG_VBLANK("IH: D4 vline\n");
7739 
7740 				break;
7741 			default:
7742 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7743 				break;
7744 			}
7745 			break;
7746 		case 5: /* D5 vblank/vline */
7747 			switch (src_data) {
7748 			case 0: /* D5 vblank */
7749 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7750 					DRM_DEBUG_VBLANK("IH: IH event w/o asserted irq bit?\n");
7751 
7752 				if (rdev->irq.crtc_vblank_int[4]) {
7753 					drm_handle_vblank(rdev->ddev, 4);
7754 					rdev->pm.vblank_sync = true;
7755 					wake_up(&rdev->irq.vblank_queue);
7756 				}
7757 				if (atomic_read(&rdev->irq.pflip[4]))
7758 					radeon_crtc_handle_vblank(rdev, 4);
7759 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7760 				DRM_DEBUG_VBLANK("IH: D5 vblank\n");
7761 
7762 				break;
7763 			case 1: /* D5 vline */
7764 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7765 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7766 
7767 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7768 				DRM_DEBUG("IH: D5 vline\n");
7769 
7770 				break;
7771 			default:
7772 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7773 				break;
7774 			}
7775 			break;
7776 		case 6: /* D6 vblank/vline */
7777 			switch (src_data) {
7778 			case 0: /* D6 vblank */
7779 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7780 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7781 
7782 				if (rdev->irq.crtc_vblank_int[5]) {
7783 					drm_handle_vblank(rdev->ddev, 5);
7784 					rdev->pm.vblank_sync = true;
7785 					wake_up(&rdev->irq.vblank_queue);
7786 				}
7787 				if (atomic_read(&rdev->irq.pflip[5]))
7788 					radeon_crtc_handle_vblank(rdev, 5);
7789 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7790 				DRM_DEBUG_VBLANK("IH: D6 vblank\n");
7791 
7792 				break;
7793 			case 1: /* D6 vline */
7794 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7795 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7796 
7797 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7798 				DRM_DEBUG_VBLANK("IH: D6 vline\n");
7799 
7800 				break;
7801 			default:
7802 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7803 				break;
7804 			}
7805 			break;
7806 		case 8: /* D1 page flip */
7807 		case 10: /* D2 page flip */
7808 		case 12: /* D3 page flip */
7809 		case 14: /* D4 page flip */
7810 		case 16: /* D5 page flip */
7811 		case 18: /* D6 page flip */
7812 			DRM_DEBUG_VBLANK("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7813 			if (radeon_use_pflipirq > 0)
7814 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7815 			break;
7816 		case 42: /* HPD hotplug */
7817 			switch (src_data) {
7818 			case 0:
7819 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7820 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7821 
7822 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7823 				queue_hotplug = true;
7824 				DRM_DEBUG("IH: HPD1\n");
7825 
7826 				break;
7827 			case 1:
7828 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7829 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7830 
7831 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7832 				queue_hotplug = true;
7833 				DRM_DEBUG("IH: HPD2\n");
7834 
7835 				break;
7836 			case 2:
7837 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7838 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7839 
7840 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7841 				queue_hotplug = true;
7842 				DRM_DEBUG("IH: HPD3\n");
7843 
7844 				break;
7845 			case 3:
7846 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7847 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7848 
7849 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7850 				queue_hotplug = true;
7851 				DRM_DEBUG("IH: HPD4\n");
7852 
7853 				break;
7854 			case 4:
7855 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7856 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7857 
7858 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7859 				queue_hotplug = true;
7860 				DRM_DEBUG("IH: HPD5\n");
7861 
7862 				break;
7863 			case 5:
7864 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7865 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7866 
7867 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7868 				queue_hotplug = true;
7869 				DRM_DEBUG("IH: HPD6\n");
7870 
7871 				break;
7872 			case 6:
7873 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7874 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7875 
7876 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7877 				queue_dp = true;
7878 				DRM_DEBUG("IH: HPD_RX 1\n");
7879 
7880 				break;
7881 			case 7:
7882 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7883 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7884 
7885 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7886 				queue_dp = true;
7887 				DRM_DEBUG("IH: HPD_RX 2\n");
7888 
7889 				break;
7890 			case 8:
7891 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7892 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7893 
7894 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7895 				queue_dp = true;
7896 				DRM_DEBUG("IH: HPD_RX 3\n");
7897 
7898 				break;
7899 			case 9:
7900 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7901 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7902 
7903 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7904 				queue_dp = true;
7905 				DRM_DEBUG("IH: HPD_RX 4\n");
7906 
7907 				break;
7908 			case 10:
7909 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7910 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7911 
7912 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7913 				queue_dp = true;
7914 				DRM_DEBUG("IH: HPD_RX 5\n");
7915 
7916 				break;
7917 			case 11:
7918 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7919 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7920 
7921 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7922 				queue_dp = true;
7923 				DRM_DEBUG("IH: HPD_RX 6\n");
7924 
7925 				break;
7926 			default:
7927 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7928 				break;
7929 			}
7930 			break;
7931 		case 96:
7932 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7933 			WREG32(SRBM_INT_ACK, 0x1);
7934 			break;
7935 		case 124: /* UVD */
7936 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7937 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7938 			break;
7939 		case 146:
7940 		case 147:
7941 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7942 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7943 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7944 			/* reset addr and status */
7945 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7946 			if (addr == 0x0 && status == 0x0)
7947 				break;
7948 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7949 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7950 				addr);
7951 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7952 				status);
7953 			cik_vm_decode_fault(rdev, status, addr, mc_client);
7954 			break;
7955 		case 167: /* VCE */
7956 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7957 			switch (src_data) {
7958 			case 0:
7959 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7960 				break;
7961 			case 1:
7962 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7963 				break;
7964 			default:
7965 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7966 				break;
7967 			}
7968 			break;
7969 		case 176: /* GFX RB CP_INT */
7970 		case 177: /* GFX IB CP_INT */
7971 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7972 			break;
7973 		case 181: /* CP EOP event */
7974 			DRM_DEBUG("IH: CP EOP\n");
7975 			/* XXX check the bitfield order! */
7976 			me_id = (ring_id & 0x60) >> 5;
7977 			pipe_id = (ring_id & 0x18) >> 3;
7978 			queue_id = (ring_id & 0x7) >> 0;
7979 			switch (me_id) {
7980 			case 0:
7981 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7982 				break;
7983 			case 1:
7984 			case 2:
7985 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7986 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7987 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7988 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7989 				break;
7990 			}
7991 			break;
7992 		case 184: /* CP Privileged reg access */
7993 			DRM_ERROR("Illegal register access in command stream\n");
7994 			/* XXX check the bitfield order! */
7995 			me_id = (ring_id & 0x60) >> 5;
7996 			pipe_id = (ring_id & 0x18) >> 3;
7997 			queue_id = (ring_id & 0x7) >> 0;
7998 			switch (me_id) {
7999 			case 0:
8000 				/* This results in a full GPU reset, but all we need to do is soft
8001 				 * reset the CP for gfx
8002 				 */
8003 				queue_reset = true;
8004 				break;
8005 			case 1:
8006 				/* XXX compute */
8007 				queue_reset = true;
8008 				break;
8009 			case 2:
8010 				/* XXX compute */
8011 				queue_reset = true;
8012 				break;
8013 			}
8014 			break;
8015 		case 185: /* CP Privileged inst */
8016 			DRM_ERROR("Illegal instruction in command stream\n");
8017 			/* XXX check the bitfield order! */
8018 			me_id = (ring_id & 0x60) >> 5;
8019 			pipe_id = (ring_id & 0x18) >> 3;
8020 			queue_id = (ring_id & 0x7) >> 0;
8021 			switch (me_id) {
8022 			case 0:
8023 				/* This results in a full GPU reset, but all we need to do is soft
8024 				 * reset the CP for gfx
8025 				 */
8026 				queue_reset = true;
8027 				break;
8028 			case 1:
8029 				/* XXX compute */
8030 				queue_reset = true;
8031 				break;
8032 			case 2:
8033 				/* XXX compute */
8034 				queue_reset = true;
8035 				break;
8036 			}
8037 			break;
8038 		case 224: /* SDMA trap event */
8039 			/* XXX check the bitfield order! */
8040 			me_id = (ring_id & 0x3) >> 0;
8041 			queue_id = (ring_id & 0xc) >> 2;
8042 			DRM_DEBUG("IH: SDMA trap\n");
8043 			switch (me_id) {
8044 			case 0:
8045 				switch (queue_id) {
8046 				case 0:
8047 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8048 					break;
8049 				case 1:
8050 					/* XXX compute */
8051 					break;
8052 				case 2:
8053 					/* XXX compute */
8054 					break;
8055 				}
8056 				break;
8057 			case 1:
8058 				switch (queue_id) {
8059 				case 0:
8060 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8061 					break;
8062 				case 1:
8063 					/* XXX compute */
8064 					break;
8065 				case 2:
8066 					/* XXX compute */
8067 					break;
8068 				}
8069 				break;
8070 			}
8071 			break;
8072 		case 230: /* thermal low to high */
8073 			DRM_DEBUG("IH: thermal low to high\n");
8074 			rdev->pm.dpm.thermal.high_to_low = false;
8075 			queue_thermal = true;
8076 			break;
8077 		case 231: /* thermal high to low */
8078 			DRM_DEBUG("IH: thermal high to low\n");
8079 			rdev->pm.dpm.thermal.high_to_low = true;
8080 			queue_thermal = true;
8081 			break;
8082 		case 233: /* GUI IDLE */
8083 			DRM_DEBUG("IH: GUI idle\n");
8084 			break;
8085 		case 241: /* SDMA Privileged inst */
8086 		case 247: /* SDMA Privileged inst */
8087 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8088 			/* XXX check the bitfield order! */
8089 			me_id = (ring_id & 0x3) >> 0;
8090 			queue_id = (ring_id & 0xc) >> 2;
8091 			switch (me_id) {
8092 			case 0:
8093 				switch (queue_id) {
8094 				case 0:
8095 					queue_reset = true;
8096 					break;
8097 				case 1:
8098 					/* XXX compute */
8099 					queue_reset = true;
8100 					break;
8101 				case 2:
8102 					/* XXX compute */
8103 					queue_reset = true;
8104 					break;
8105 				}
8106 				break;
8107 			case 1:
8108 				switch (queue_id) {
8109 				case 0:
8110 					queue_reset = true;
8111 					break;
8112 				case 1:
8113 					/* XXX compute */
8114 					queue_reset = true;
8115 					break;
8116 				case 2:
8117 					/* XXX compute */
8118 					queue_reset = true;
8119 					break;
8120 				}
8121 				break;
8122 			}
8123 			break;
8124 		default:
8125 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8126 			break;
8127 		}
8128 
8129 		/* wptr/rptr are in bytes! */
8130 		rptr += 16;
8131 		rptr &= rdev->ih.ptr_mask;
8132 		WREG32(IH_RB_RPTR, rptr);
8133 	}
8134 	if (queue_dp)
8135 		schedule_work(&rdev->dp_work);
8136 	if (queue_hotplug)
8137 		schedule_delayed_work(&rdev->hotplug_work, 0);
8138 	if (queue_reset) {
8139 		rdev->needs_reset = true;
8140 		wake_up_all(&rdev->fence_queue);
8141 	}
8142 	if (queue_thermal)
8143 		schedule_work(&rdev->pm.dpm.thermal.work);
8144 	rdev->ih.rptr = rptr;
8145 	atomic_set(&rdev->ih.lock, 0);
8146 
8147 	/* make sure wptr hasn't changed while processing */
8148 	wptr = cik_get_ih_wptr(rdev);
8149 	if (wptr != rptr)
8150 		goto restart_ih;
8151 
8152 	return IRQ_HANDLED;
8153 }
8154 
8155 /*
8156  * startup/shutdown callbacks
8157  */
8158 static void cik_uvd_init(struct radeon_device *rdev)
8159 {
8160 	int r;
8161 
8162 	if (!rdev->has_uvd)
8163 		return;
8164 
8165 	r = radeon_uvd_init(rdev);
8166 	if (r) {
8167 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8168 		/*
8169 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8170 		 * to early fails cik_uvd_start() and thus nothing happens
8171 		 * there. So it is pointless to try to go through that code
8172 		 * hence why we disable uvd here.
8173 		 */
8174 		rdev->has_uvd = 0;
8175 		return;
8176 	}
8177 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8178 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8179 }
8180 
8181 static void cik_uvd_start(struct radeon_device *rdev)
8182 {
8183 	int r;
8184 
8185 	if (!rdev->has_uvd)
8186 		return;
8187 
8188 	r = radeon_uvd_resume(rdev);
8189 	if (r) {
8190 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8191 		goto error;
8192 	}
8193 	r = uvd_v4_2_resume(rdev);
8194 	if (r) {
8195 		dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8196 		goto error;
8197 	}
8198 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8199 	if (r) {
8200 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8201 		goto error;
8202 	}
8203 	return;
8204 
8205 error:
8206 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8207 }
8208 
8209 static void cik_uvd_resume(struct radeon_device *rdev)
8210 {
8211 	struct radeon_ring *ring;
8212 	int r;
8213 
8214 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8215 		return;
8216 
8217 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8218 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8219 	if (r) {
8220 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8221 		return;
8222 	}
8223 	r = uvd_v1_0_init(rdev);
8224 	if (r) {
8225 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8226 		return;
8227 	}
8228 }
8229 
8230 static void cik_vce_init(struct radeon_device *rdev)
8231 {
8232 	int r;
8233 
8234 	if (!rdev->has_vce)
8235 		return;
8236 
8237 	r = radeon_vce_init(rdev);
8238 	if (r) {
8239 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8240 		/*
8241 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8242 		 * to early fails cik_vce_start() and thus nothing happens
8243 		 * there. So it is pointless to try to go through that code
8244 		 * hence why we disable vce here.
8245 		 */
8246 		rdev->has_vce = 0;
8247 		return;
8248 	}
8249 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8250 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8251 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8252 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8253 }
8254 
8255 static void cik_vce_start(struct radeon_device *rdev)
8256 {
8257 	int r;
8258 
8259 	if (!rdev->has_vce)
8260 		return;
8261 
8262 	r = radeon_vce_resume(rdev);
8263 	if (r) {
8264 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8265 		goto error;
8266 	}
8267 	r = vce_v2_0_resume(rdev);
8268 	if (r) {
8269 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8270 		goto error;
8271 	}
8272 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8273 	if (r) {
8274 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8275 		goto error;
8276 	}
8277 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8278 	if (r) {
8279 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8280 		goto error;
8281 	}
8282 	return;
8283 
8284 error:
8285 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8286 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8287 }
8288 
8289 static void cik_vce_resume(struct radeon_device *rdev)
8290 {
8291 	struct radeon_ring *ring;
8292 	int r;
8293 
8294 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8295 		return;
8296 
8297 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8298 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8299 	if (r) {
8300 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8301 		return;
8302 	}
8303 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8304 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8305 	if (r) {
8306 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8307 		return;
8308 	}
8309 	r = vce_v1_0_init(rdev);
8310 	if (r) {
8311 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8312 		return;
8313 	}
8314 }
8315 
8316 /**
8317  * cik_startup - program the asic to a functional state
8318  *
8319  * @rdev: radeon_device pointer
8320  *
8321  * Programs the asic to a functional state (CIK).
8322  * Called by cik_init() and cik_resume().
8323  * Returns 0 for success, error for failure.
8324  */
8325 static int cik_startup(struct radeon_device *rdev)
8326 {
8327 	struct radeon_ring *ring;
8328 	u32 nop;
8329 	int r;
8330 
8331 	/* enable pcie gen2/3 link */
8332 	cik_pcie_gen3_enable(rdev);
8333 	/* enable aspm */
8334 	cik_program_aspm(rdev);
8335 
8336 	/* scratch needs to be initialized before MC */
8337 	r = r600_vram_scratch_init(rdev);
8338 	if (r)
8339 		return r;
8340 
8341 	cik_mc_program(rdev);
8342 
8343 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8344 		r = ci_mc_load_microcode(rdev);
8345 		if (r) {
8346 			DRM_ERROR("Failed to load MC firmware!\n");
8347 			return r;
8348 		}
8349 	}
8350 
8351 	r = cik_pcie_gart_enable(rdev);
8352 	if (r)
8353 		return r;
8354 	cik_gpu_init(rdev);
8355 
8356 	/* allocate rlc buffers */
8357 	if (rdev->flags & RADEON_IS_IGP) {
8358 		if (rdev->family == CHIP_KAVERI) {
8359 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8360 			rdev->rlc.reg_list_size =
8361 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8362 		} else {
8363 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8364 			rdev->rlc.reg_list_size =
8365 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8366 		}
8367 	}
8368 	rdev->rlc.cs_data = ci_cs_data;
8369 	rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8370 	rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8371 	r = sumo_rlc_init(rdev);
8372 	if (r) {
8373 		DRM_ERROR("Failed to init rlc BOs!\n");
8374 		return r;
8375 	}
8376 
8377 	/* allocate wb buffer */
8378 	r = radeon_wb_init(rdev);
8379 	if (r)
8380 		return r;
8381 
8382 	/* allocate mec buffers */
8383 	r = cik_mec_init(rdev);
8384 	if (r) {
8385 		DRM_ERROR("Failed to init MEC BOs!\n");
8386 		return r;
8387 	}
8388 
8389 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8390 	if (r) {
8391 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8392 		return r;
8393 	}
8394 
8395 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8396 	if (r) {
8397 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8398 		return r;
8399 	}
8400 
8401 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8402 	if (r) {
8403 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8404 		return r;
8405 	}
8406 
8407 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8408 	if (r) {
8409 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8410 		return r;
8411 	}
8412 
8413 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8414 	if (r) {
8415 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8416 		return r;
8417 	}
8418 
8419 	cik_uvd_start(rdev);
8420 	cik_vce_start(rdev);
8421 
8422 	/* Enable IRQ */
8423 	if (!rdev->irq.installed) {
8424 		r = radeon_irq_kms_init(rdev);
8425 		if (r)
8426 			return r;
8427 	}
8428 
8429 	r = cik_irq_init(rdev);
8430 	if (r) {
8431 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8432 		radeon_irq_kms_fini(rdev);
8433 		return r;
8434 	}
8435 	cik_irq_set(rdev);
8436 
8437 	if (rdev->family == CHIP_HAWAII) {
8438 		if (rdev->new_fw)
8439 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8440 		else
8441 			nop = RADEON_CP_PACKET2;
8442 	} else {
8443 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8444 	}
8445 
8446 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8447 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8448 			     nop);
8449 	if (r)
8450 		return r;
8451 
8452 	/* set up the compute queues */
8453 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8454 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8455 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8456 			     nop);
8457 	if (r)
8458 		return r;
8459 	ring->me = 1; /* first MEC */
8460 	ring->pipe = 0; /* first pipe */
8461 	ring->queue = 0; /* first queue */
8462 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8463 
8464 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8465 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8466 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8467 			     nop);
8468 	if (r)
8469 		return r;
8470 	/* dGPU only have 1 MEC */
8471 	ring->me = 1; /* first MEC */
8472 	ring->pipe = 0; /* first pipe */
8473 	ring->queue = 1; /* second queue */
8474 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8475 
8476 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8477 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8478 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8479 	if (r)
8480 		return r;
8481 
8482 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8483 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8484 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8485 	if (r)
8486 		return r;
8487 
8488 	r = cik_cp_resume(rdev);
8489 	if (r)
8490 		return r;
8491 
8492 	r = cik_sdma_resume(rdev);
8493 	if (r)
8494 		return r;
8495 
8496 	cik_uvd_resume(rdev);
8497 	cik_vce_resume(rdev);
8498 
8499 	r = radeon_ib_pool_init(rdev);
8500 	if (r) {
8501 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8502 		return r;
8503 	}
8504 
8505 	r = radeon_vm_manager_init(rdev);
8506 	if (r) {
8507 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8508 		return r;
8509 	}
8510 
8511 	r = radeon_audio_init(rdev);
8512 	if (r)
8513 		return r;
8514 
8515 	r = radeon_kfd_resume(rdev);
8516 	if (r)
8517 		return r;
8518 
8519 	return 0;
8520 }
8521 
8522 /**
8523  * cik_resume - resume the asic to a functional state
8524  *
8525  * @rdev: radeon_device pointer
8526  *
8527  * Programs the asic to a functional state (CIK).
8528  * Called at resume.
8529  * Returns 0 for success, error for failure.
8530  */
8531 int cik_resume(struct radeon_device *rdev)
8532 {
8533 	int r;
8534 
8535 	/* post card */
8536 	atom_asic_init(rdev->mode_info.atom_context);
8537 
8538 	/* init golden registers */
8539 	cik_init_golden_registers(rdev);
8540 
8541 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8542 		radeon_pm_resume(rdev);
8543 
8544 	rdev->accel_working = true;
8545 	r = cik_startup(rdev);
8546 	if (r) {
8547 		DRM_ERROR("cik startup failed on resume\n");
8548 		rdev->accel_working = false;
8549 		return r;
8550 	}
8551 
8552 	return r;
8553 
8554 }
8555 
8556 /**
8557  * cik_suspend - suspend the asic
8558  *
8559  * @rdev: radeon_device pointer
8560  *
8561  * Bring the chip into a state suitable for suspend (CIK).
8562  * Called at suspend.
8563  * Returns 0 for success.
8564  */
8565 int cik_suspend(struct radeon_device *rdev)
8566 {
8567 	radeon_kfd_suspend(rdev);
8568 	radeon_pm_suspend(rdev);
8569 	radeon_audio_fini(rdev);
8570 	radeon_vm_manager_fini(rdev);
8571 	cik_cp_enable(rdev, false);
8572 	cik_sdma_enable(rdev, false);
8573 	if (rdev->has_uvd) {
8574 		uvd_v1_0_fini(rdev);
8575 		radeon_uvd_suspend(rdev);
8576 	}
8577 	if (rdev->has_vce)
8578 		radeon_vce_suspend(rdev);
8579 	cik_fini_pg(rdev);
8580 	cik_fini_cg(rdev);
8581 	cik_irq_suspend(rdev);
8582 	radeon_wb_disable(rdev);
8583 	cik_pcie_gart_disable(rdev);
8584 	return 0;
8585 }
8586 
8587 /* Plan is to move initialization in that function and use
8588  * helper function so that radeon_device_init pretty much
8589  * do nothing more than calling asic specific function. This
8590  * should also allow to remove a bunch of callback function
8591  * like vram_info.
8592  */
8593 /**
8594  * cik_init - asic specific driver and hw init
8595  *
8596  * @rdev: radeon_device pointer
8597  *
8598  * Setup asic specific driver variables and program the hw
8599  * to a functional state (CIK).
8600  * Called at driver startup.
8601  * Returns 0 for success, errors for failure.
8602  */
8603 int cik_init(struct radeon_device *rdev)
8604 {
8605 	struct radeon_ring *ring;
8606 	int r;
8607 
8608 	/* Read BIOS */
8609 	if (!radeon_get_bios(rdev)) {
8610 		if (ASIC_IS_AVIVO(rdev))
8611 			return -EINVAL;
8612 	}
8613 	/* Must be an ATOMBIOS */
8614 	if (!rdev->is_atom_bios) {
8615 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8616 		return -EINVAL;
8617 	}
8618 	r = radeon_atombios_init(rdev);
8619 	if (r)
8620 		return r;
8621 
8622 	/* Post card if necessary */
8623 	if (!radeon_card_posted(rdev)) {
8624 		if (!rdev->bios) {
8625 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8626 			return -EINVAL;
8627 		}
8628 		DRM_INFO("GPU not posted. posting now...\n");
8629 		atom_asic_init(rdev->mode_info.atom_context);
8630 	}
8631 	/* init golden registers */
8632 	cik_init_golden_registers(rdev);
8633 	/* Initialize scratch registers */
8634 	cik_scratch_init(rdev);
8635 	/* Initialize surface registers */
8636 	radeon_surface_init(rdev);
8637 	/* Initialize clocks */
8638 	radeon_get_clock_info(rdev->ddev);
8639 
8640 	/* Fence driver */
8641 	r = radeon_fence_driver_init(rdev);
8642 	if (r)
8643 		return r;
8644 
8645 	/* initialize memory controller */
8646 	r = cik_mc_init(rdev);
8647 	if (r)
8648 		return r;
8649 	/* Memory manager */
8650 	r = radeon_bo_init(rdev);
8651 	if (r)
8652 		return r;
8653 
8654 	if (rdev->flags & RADEON_IS_IGP) {
8655 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8656 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8657 			r = cik_init_microcode(rdev);
8658 			if (r) {
8659 				DRM_ERROR("Failed to load firmware!\n");
8660 				return r;
8661 			}
8662 		}
8663 	} else {
8664 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8665 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8666 		    !rdev->mc_fw) {
8667 			r = cik_init_microcode(rdev);
8668 			if (r) {
8669 				DRM_ERROR("Failed to load firmware!\n");
8670 				return r;
8671 			}
8672 		}
8673 	}
8674 
8675 	/* Initialize power management */
8676 	radeon_pm_init(rdev);
8677 
8678 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8679 	ring->ring_obj = NULL;
8680 	r600_ring_init(rdev, ring, 1024 * 1024);
8681 
8682 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8683 	ring->ring_obj = NULL;
8684 	r600_ring_init(rdev, ring, 1024 * 1024);
8685 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8686 	if (r)
8687 		return r;
8688 
8689 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8690 	ring->ring_obj = NULL;
8691 	r600_ring_init(rdev, ring, 1024 * 1024);
8692 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8693 	if (r)
8694 		return r;
8695 
8696 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8697 	ring->ring_obj = NULL;
8698 	r600_ring_init(rdev, ring, 256 * 1024);
8699 
8700 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8701 	ring->ring_obj = NULL;
8702 	r600_ring_init(rdev, ring, 256 * 1024);
8703 
8704 	cik_uvd_init(rdev);
8705 	cik_vce_init(rdev);
8706 
8707 	rdev->ih.ring_obj = NULL;
8708 	r600_ih_ring_init(rdev, 64 * 1024);
8709 
8710 	r = r600_pcie_gart_init(rdev);
8711 	if (r)
8712 		return r;
8713 
8714 	rdev->accel_working = true;
8715 	r = cik_startup(rdev);
8716 	if (r) {
8717 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8718 		cik_cp_fini(rdev);
8719 		cik_sdma_fini(rdev);
8720 		cik_irq_fini(rdev);
8721 		sumo_rlc_fini(rdev);
8722 		cik_mec_fini(rdev);
8723 		radeon_wb_fini(rdev);
8724 		radeon_ib_pool_fini(rdev);
8725 		radeon_vm_manager_fini(rdev);
8726 		radeon_irq_kms_fini(rdev);
8727 		cik_pcie_gart_fini(rdev);
8728 		rdev->accel_working = false;
8729 	}
8730 
8731 	/* Don't start up if the MC ucode is missing.
8732 	 * The default clocks and voltages before the MC ucode
8733 	 * is loaded are not suffient for advanced operations.
8734 	 */
8735 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8736 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8737 		return -EINVAL;
8738 	}
8739 
8740 	return 0;
8741 }
8742 
8743 /**
8744  * cik_fini - asic specific driver and hw fini
8745  *
8746  * @rdev: radeon_device pointer
8747  *
8748  * Tear down the asic specific driver variables and program the hw
8749  * to an idle state (CIK).
8750  * Called at driver unload.
8751  */
8752 void cik_fini(struct radeon_device *rdev)
8753 {
8754 	radeon_pm_fini(rdev);
8755 	cik_cp_fini(rdev);
8756 	cik_sdma_fini(rdev);
8757 	cik_fini_pg(rdev);
8758 	cik_fini_cg(rdev);
8759 	cik_irq_fini(rdev);
8760 	sumo_rlc_fini(rdev);
8761 	cik_mec_fini(rdev);
8762 	radeon_wb_fini(rdev);
8763 	radeon_vm_manager_fini(rdev);
8764 	radeon_ib_pool_fini(rdev);
8765 	radeon_irq_kms_fini(rdev);
8766 	uvd_v1_0_fini(rdev);
8767 	radeon_uvd_fini(rdev);
8768 	radeon_vce_fini(rdev);
8769 	cik_pcie_gart_fini(rdev);
8770 	r600_vram_scratch_fini(rdev);
8771 	radeon_gem_fini(rdev);
8772 	radeon_fence_driver_fini(rdev);
8773 	radeon_bo_fini(rdev);
8774 	radeon_atombios_fini(rdev);
8775 	cik_fini_microcode(rdev);
8776 	kfree(rdev->bios);
8777 	rdev->bios = NULL;
8778 }
8779 
8780 void dce8_program_fmt(struct drm_encoder *encoder)
8781 {
8782 	struct drm_device *dev = encoder->dev;
8783 	struct radeon_device *rdev = dev->dev_private;
8784 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8785 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8786 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8787 	int bpc = 0;
8788 	u32 tmp = 0;
8789 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8790 
8791 	if (connector) {
8792 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8793 		bpc = radeon_get_monitor_bpc(connector);
8794 		dither = radeon_connector->dither;
8795 	}
8796 
8797 	/* LVDS/eDP FMT is set up by atom */
8798 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8799 		return;
8800 
8801 	/* not needed for analog */
8802 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8803 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8804 		return;
8805 
8806 	if (bpc == 0)
8807 		return;
8808 
8809 	switch (bpc) {
8810 	case 6:
8811 		if (dither == RADEON_FMT_DITHER_ENABLE)
8812 			/* XXX sort out optimal dither settings */
8813 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8814 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8815 		else
8816 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8817 		break;
8818 	case 8:
8819 		if (dither == RADEON_FMT_DITHER_ENABLE)
8820 			/* XXX sort out optimal dither settings */
8821 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8822 				FMT_RGB_RANDOM_ENABLE |
8823 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8824 		else
8825 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8826 		break;
8827 	case 10:
8828 		if (dither == RADEON_FMT_DITHER_ENABLE)
8829 			/* XXX sort out optimal dither settings */
8830 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8831 				FMT_RGB_RANDOM_ENABLE |
8832 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8833 		else
8834 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8835 		break;
8836 	default:
8837 		/* not needed */
8838 		break;
8839 	}
8840 
8841 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8842 }
8843 
8844 /* display watermark setup */
8845 /**
8846  * dce8_line_buffer_adjust - Set up the line buffer
8847  *
8848  * @rdev: radeon_device pointer
8849  * @radeon_crtc: the selected display controller
8850  * @mode: the current display mode on the selected display
8851  * controller
8852  *
8853  * Setup up the line buffer allocation for
8854  * the selected display controller (CIK).
8855  * Returns the line buffer size in pixels.
8856  */
8857 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8858 				   struct radeon_crtc *radeon_crtc,
8859 				   struct drm_display_mode *mode)
8860 {
8861 	u32 tmp, buffer_alloc, i;
8862 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8863 	/*
8864 	 * Line Buffer Setup
8865 	 * There are 6 line buffers, one for each display controllers.
8866 	 * There are 3 partitions per LB. Select the number of partitions
8867 	 * to enable based on the display width.  For display widths larger
8868 	 * than 4096, you need use to use 2 display controllers and combine
8869 	 * them using the stereo blender.
8870 	 */
8871 	if (radeon_crtc->base.enabled && mode) {
8872 		if (mode->crtc_hdisplay < 1920) {
8873 			tmp = 1;
8874 			buffer_alloc = 2;
8875 		} else if (mode->crtc_hdisplay < 2560) {
8876 			tmp = 2;
8877 			buffer_alloc = 2;
8878 		} else if (mode->crtc_hdisplay < 4096) {
8879 			tmp = 0;
8880 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8881 		} else {
8882 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8883 			tmp = 0;
8884 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8885 		}
8886 	} else {
8887 		tmp = 1;
8888 		buffer_alloc = 0;
8889 	}
8890 
8891 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8892 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8893 
8894 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8895 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8896 	for (i = 0; i < rdev->usec_timeout; i++) {
8897 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8898 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8899 			break;
8900 		udelay(1);
8901 	}
8902 
8903 	if (radeon_crtc->base.enabled && mode) {
8904 		switch (tmp) {
8905 		case 0:
8906 		default:
8907 			return 4096 * 2;
8908 		case 1:
8909 			return 1920 * 2;
8910 		case 2:
8911 			return 2560 * 2;
8912 		}
8913 	}
8914 
8915 	/* controller not enabled, so no lb used */
8916 	return 0;
8917 }
8918 
8919 /**
8920  * cik_get_number_of_dram_channels - get the number of dram channels
8921  *
8922  * @rdev: radeon_device pointer
8923  *
8924  * Look up the number of video ram channels (CIK).
8925  * Used for display watermark bandwidth calculations
8926  * Returns the number of dram channels
8927  */
8928 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8929 {
8930 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8931 
8932 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8933 	case 0:
8934 	default:
8935 		return 1;
8936 	case 1:
8937 		return 2;
8938 	case 2:
8939 		return 4;
8940 	case 3:
8941 		return 8;
8942 	case 4:
8943 		return 3;
8944 	case 5:
8945 		return 6;
8946 	case 6:
8947 		return 10;
8948 	case 7:
8949 		return 12;
8950 	case 8:
8951 		return 16;
8952 	}
8953 }
8954 
8955 struct dce8_wm_params {
8956 	u32 dram_channels; /* number of dram channels */
8957 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8958 	u32 sclk;          /* engine clock in kHz */
8959 	u32 disp_clk;      /* display clock in kHz */
8960 	u32 src_width;     /* viewport width */
8961 	u32 active_time;   /* active display time in ns */
8962 	u32 blank_time;    /* blank time in ns */
8963 	bool interlaced;    /* mode is interlaced */
8964 	fixed20_12 vsc;    /* vertical scale ratio */
8965 	u32 num_heads;     /* number of active crtcs */
8966 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8967 	u32 lb_size;       /* line buffer allocated to pipe */
8968 	u32 vtaps;         /* vertical scaler taps */
8969 };
8970 
8971 /**
8972  * dce8_dram_bandwidth - get the dram bandwidth
8973  *
8974  * @wm: watermark calculation data
8975  *
8976  * Calculate the raw dram bandwidth (CIK).
8977  * Used for display watermark bandwidth calculations
8978  * Returns the dram bandwidth in MBytes/s
8979  */
8980 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8981 {
8982 	/* Calculate raw DRAM Bandwidth */
8983 	fixed20_12 dram_efficiency; /* 0.7 */
8984 	fixed20_12 yclk, dram_channels, bandwidth;
8985 	fixed20_12 a;
8986 
8987 	a.full = dfixed_const(1000);
8988 	yclk.full = dfixed_const(wm->yclk);
8989 	yclk.full = dfixed_div(yclk, a);
8990 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8991 	a.full = dfixed_const(10);
8992 	dram_efficiency.full = dfixed_const(7);
8993 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8994 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8995 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8996 
8997 	return dfixed_trunc(bandwidth);
8998 }
8999 
9000 /**
9001  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9002  *
9003  * @wm: watermark calculation data
9004  *
9005  * Calculate the dram bandwidth used for display (CIK).
9006  * Used for display watermark bandwidth calculations
9007  * Returns the dram bandwidth for display in MBytes/s
9008  */
9009 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9010 {
9011 	/* Calculate DRAM Bandwidth and the part allocated to display. */
9012 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9013 	fixed20_12 yclk, dram_channels, bandwidth;
9014 	fixed20_12 a;
9015 
9016 	a.full = dfixed_const(1000);
9017 	yclk.full = dfixed_const(wm->yclk);
9018 	yclk.full = dfixed_div(yclk, a);
9019 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9020 	a.full = dfixed_const(10);
9021 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9022 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9023 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9024 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9025 
9026 	return dfixed_trunc(bandwidth);
9027 }
9028 
9029 /**
9030  * dce8_data_return_bandwidth - get the data return bandwidth
9031  *
9032  * @wm: watermark calculation data
9033  *
9034  * Calculate the data return bandwidth used for display (CIK).
9035  * Used for display watermark bandwidth calculations
9036  * Returns the data return bandwidth in MBytes/s
9037  */
9038 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9039 {
9040 	/* Calculate the display Data return Bandwidth */
9041 	fixed20_12 return_efficiency; /* 0.8 */
9042 	fixed20_12 sclk, bandwidth;
9043 	fixed20_12 a;
9044 
9045 	a.full = dfixed_const(1000);
9046 	sclk.full = dfixed_const(wm->sclk);
9047 	sclk.full = dfixed_div(sclk, a);
9048 	a.full = dfixed_const(10);
9049 	return_efficiency.full = dfixed_const(8);
9050 	return_efficiency.full = dfixed_div(return_efficiency, a);
9051 	a.full = dfixed_const(32);
9052 	bandwidth.full = dfixed_mul(a, sclk);
9053 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9054 
9055 	return dfixed_trunc(bandwidth);
9056 }
9057 
9058 /**
9059  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9060  *
9061  * @wm: watermark calculation data
9062  *
9063  * Calculate the dmif bandwidth used for display (CIK).
9064  * Used for display watermark bandwidth calculations
9065  * Returns the dmif bandwidth in MBytes/s
9066  */
9067 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9068 {
9069 	/* Calculate the DMIF Request Bandwidth */
9070 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9071 	fixed20_12 disp_clk, bandwidth;
9072 	fixed20_12 a, b;
9073 
9074 	a.full = dfixed_const(1000);
9075 	disp_clk.full = dfixed_const(wm->disp_clk);
9076 	disp_clk.full = dfixed_div(disp_clk, a);
9077 	a.full = dfixed_const(32);
9078 	b.full = dfixed_mul(a, disp_clk);
9079 
9080 	a.full = dfixed_const(10);
9081 	disp_clk_request_efficiency.full = dfixed_const(8);
9082 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9083 
9084 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9085 
9086 	return dfixed_trunc(bandwidth);
9087 }
9088 
9089 /**
9090  * dce8_available_bandwidth - get the min available bandwidth
9091  *
9092  * @wm: watermark calculation data
9093  *
9094  * Calculate the min available bandwidth used for display (CIK).
9095  * Used for display watermark bandwidth calculations
9096  * Returns the min available bandwidth in MBytes/s
9097  */
9098 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9099 {
9100 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9101 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9102 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9103 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9104 
9105 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9106 }
9107 
9108 /**
9109  * dce8_average_bandwidth - get the average available bandwidth
9110  *
9111  * @wm: watermark calculation data
9112  *
9113  * Calculate the average available bandwidth used for display (CIK).
9114  * Used for display watermark bandwidth calculations
9115  * Returns the average available bandwidth in MBytes/s
9116  */
9117 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9118 {
9119 	/* Calculate the display mode Average Bandwidth
9120 	 * DisplayMode should contain the source and destination dimensions,
9121 	 * timing, etc.
9122 	 */
9123 	fixed20_12 bpp;
9124 	fixed20_12 line_time;
9125 	fixed20_12 src_width;
9126 	fixed20_12 bandwidth;
9127 	fixed20_12 a;
9128 
9129 	a.full = dfixed_const(1000);
9130 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9131 	line_time.full = dfixed_div(line_time, a);
9132 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9133 	src_width.full = dfixed_const(wm->src_width);
9134 	bandwidth.full = dfixed_mul(src_width, bpp);
9135 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9136 	bandwidth.full = dfixed_div(bandwidth, line_time);
9137 
9138 	return dfixed_trunc(bandwidth);
9139 }
9140 
9141 /**
9142  * dce8_latency_watermark - get the latency watermark
9143  *
9144  * @wm: watermark calculation data
9145  *
9146  * Calculate the latency watermark (CIK).
9147  * Used for display watermark bandwidth calculations
9148  * Returns the latency watermark in ns
9149  */
9150 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9151 {
9152 	/* First calculate the latency in ns */
9153 	u32 mc_latency = 2000; /* 2000 ns. */
9154 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9155 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9156 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9157 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9158 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9159 		(wm->num_heads * cursor_line_pair_return_time);
9160 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9161 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9162 	u32 tmp, dmif_size = 12288;
9163 	fixed20_12 a, b, c;
9164 
9165 	if (wm->num_heads == 0)
9166 		return 0;
9167 
9168 	a.full = dfixed_const(2);
9169 	b.full = dfixed_const(1);
9170 	if ((wm->vsc.full > a.full) ||
9171 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9172 	    (wm->vtaps >= 5) ||
9173 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9174 		max_src_lines_per_dst_line = 4;
9175 	else
9176 		max_src_lines_per_dst_line = 2;
9177 
9178 	a.full = dfixed_const(available_bandwidth);
9179 	b.full = dfixed_const(wm->num_heads);
9180 	a.full = dfixed_div(a, b);
9181 	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9182 	tmp = min(dfixed_trunc(a), tmp);
9183 
9184 	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9185 
9186 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9187 	b.full = dfixed_const(1000);
9188 	c.full = dfixed_const(lb_fill_bw);
9189 	b.full = dfixed_div(c, b);
9190 	a.full = dfixed_div(a, b);
9191 	line_fill_time = dfixed_trunc(a);
9192 
9193 	if (line_fill_time < wm->active_time)
9194 		return latency;
9195 	else
9196 		return latency + (line_fill_time - wm->active_time);
9197 
9198 }
9199 
9200 /**
9201  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9202  * average and available dram bandwidth
9203  *
9204  * @wm: watermark calculation data
9205  *
9206  * Check if the display average bandwidth fits in the display
9207  * dram bandwidth (CIK).
9208  * Used for display watermark bandwidth calculations
9209  * Returns true if the display fits, false if not.
9210  */
9211 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9212 {
9213 	if (dce8_average_bandwidth(wm) <=
9214 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9215 		return true;
9216 	else
9217 		return false;
9218 }
9219 
9220 /**
9221  * dce8_average_bandwidth_vs_available_bandwidth - check
9222  * average and available bandwidth
9223  *
9224  * @wm: watermark calculation data
9225  *
9226  * Check if the display average bandwidth fits in the display
9227  * available bandwidth (CIK).
9228  * Used for display watermark bandwidth calculations
9229  * Returns true if the display fits, false if not.
9230  */
9231 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9232 {
9233 	if (dce8_average_bandwidth(wm) <=
9234 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9235 		return true;
9236 	else
9237 		return false;
9238 }
9239 
9240 /**
9241  * dce8_check_latency_hiding - check latency hiding
9242  *
9243  * @wm: watermark calculation data
9244  *
9245  * Check latency hiding (CIK).
9246  * Used for display watermark bandwidth calculations
9247  * Returns true if the display fits, false if not.
9248  */
9249 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9250 {
9251 	u32 lb_partitions = wm->lb_size / wm->src_width;
9252 	u32 line_time = wm->active_time + wm->blank_time;
9253 	u32 latency_tolerant_lines;
9254 	u32 latency_hiding;
9255 	fixed20_12 a;
9256 
9257 	a.full = dfixed_const(1);
9258 	if (wm->vsc.full > a.full)
9259 		latency_tolerant_lines = 1;
9260 	else {
9261 		if (lb_partitions <= (wm->vtaps + 1))
9262 			latency_tolerant_lines = 1;
9263 		else
9264 			latency_tolerant_lines = 2;
9265 	}
9266 
9267 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9268 
9269 	if (dce8_latency_watermark(wm) <= latency_hiding)
9270 		return true;
9271 	else
9272 		return false;
9273 }
9274 
9275 /**
9276  * dce8_program_watermarks - program display watermarks
9277  *
9278  * @rdev: radeon_device pointer
9279  * @radeon_crtc: the selected display controller
9280  * @lb_size: line buffer size
9281  * @num_heads: number of display controllers in use
9282  *
9283  * Calculate and program the display watermarks for the
9284  * selected display controller (CIK).
9285  */
9286 static void dce8_program_watermarks(struct radeon_device *rdev,
9287 				    struct radeon_crtc *radeon_crtc,
9288 				    u32 lb_size, u32 num_heads)
9289 {
9290 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9291 	struct dce8_wm_params wm_low, wm_high;
9292 	u32 active_time;
9293 	u32 line_time = 0;
9294 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9295 	u32 tmp, wm_mask;
9296 
9297 	if (radeon_crtc->base.enabled && num_heads && mode) {
9298 		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9299 					    (u32)mode->clock);
9300 		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9301 					  (u32)mode->clock);
9302 		line_time = min(line_time, (u32)65535);
9303 
9304 		/* watermark for high clocks */
9305 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9306 		    rdev->pm.dpm_enabled) {
9307 			wm_high.yclk =
9308 				radeon_dpm_get_mclk(rdev, false) * 10;
9309 			wm_high.sclk =
9310 				radeon_dpm_get_sclk(rdev, false) * 10;
9311 		} else {
9312 			wm_high.yclk = rdev->pm.current_mclk * 10;
9313 			wm_high.sclk = rdev->pm.current_sclk * 10;
9314 		}
9315 
9316 		wm_high.disp_clk = mode->clock;
9317 		wm_high.src_width = mode->crtc_hdisplay;
9318 		wm_high.active_time = active_time;
9319 		wm_high.blank_time = line_time - wm_high.active_time;
9320 		wm_high.interlaced = false;
9321 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9322 			wm_high.interlaced = true;
9323 		wm_high.vsc = radeon_crtc->vsc;
9324 		wm_high.vtaps = 1;
9325 		if (radeon_crtc->rmx_type != RMX_OFF)
9326 			wm_high.vtaps = 2;
9327 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9328 		wm_high.lb_size = lb_size;
9329 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9330 		wm_high.num_heads = num_heads;
9331 
9332 		/* set for high clocks */
9333 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9334 
9335 		/* possibly force display priority to high */
9336 		/* should really do this at mode validation time... */
9337 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9338 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9339 		    !dce8_check_latency_hiding(&wm_high) ||
9340 		    (rdev->disp_priority == 2)) {
9341 			DRM_DEBUG_KMS("force priority to high\n");
9342 		}
9343 
9344 		/* watermark for low clocks */
9345 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9346 		    rdev->pm.dpm_enabled) {
9347 			wm_low.yclk =
9348 				radeon_dpm_get_mclk(rdev, true) * 10;
9349 			wm_low.sclk =
9350 				radeon_dpm_get_sclk(rdev, true) * 10;
9351 		} else {
9352 			wm_low.yclk = rdev->pm.current_mclk * 10;
9353 			wm_low.sclk = rdev->pm.current_sclk * 10;
9354 		}
9355 
9356 		wm_low.disp_clk = mode->clock;
9357 		wm_low.src_width = mode->crtc_hdisplay;
9358 		wm_low.active_time = active_time;
9359 		wm_low.blank_time = line_time - wm_low.active_time;
9360 		wm_low.interlaced = false;
9361 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9362 			wm_low.interlaced = true;
9363 		wm_low.vsc = radeon_crtc->vsc;
9364 		wm_low.vtaps = 1;
9365 		if (radeon_crtc->rmx_type != RMX_OFF)
9366 			wm_low.vtaps = 2;
9367 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9368 		wm_low.lb_size = lb_size;
9369 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9370 		wm_low.num_heads = num_heads;
9371 
9372 		/* set for low clocks */
9373 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9374 
9375 		/* possibly force display priority to high */
9376 		/* should really do this at mode validation time... */
9377 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9378 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9379 		    !dce8_check_latency_hiding(&wm_low) ||
9380 		    (rdev->disp_priority == 2)) {
9381 			DRM_DEBUG_KMS("force priority to high\n");
9382 		}
9383 
9384 		/* Save number of lines the linebuffer leads before the scanout */
9385 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9386 	}
9387 
9388 	/* select wm A */
9389 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9390 	tmp = wm_mask;
9391 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9392 	tmp |= LATENCY_WATERMARK_MASK(1);
9393 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9394 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9395 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9396 		LATENCY_HIGH_WATERMARK(line_time)));
9397 	/* select wm B */
9398 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9399 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9400 	tmp |= LATENCY_WATERMARK_MASK(2);
9401 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9402 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9403 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9404 		LATENCY_HIGH_WATERMARK(line_time)));
9405 	/* restore original selection */
9406 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9407 
9408 	/* save values for DPM */
9409 	radeon_crtc->line_time = line_time;
9410 	radeon_crtc->wm_high = latency_watermark_a;
9411 	radeon_crtc->wm_low = latency_watermark_b;
9412 }
9413 
9414 /**
9415  * dce8_bandwidth_update - program display watermarks
9416  *
9417  * @rdev: radeon_device pointer
9418  *
9419  * Calculate and program the display watermarks and line
9420  * buffer allocation (CIK).
9421  */
9422 void dce8_bandwidth_update(struct radeon_device *rdev)
9423 {
9424 	struct drm_display_mode *mode = NULL;
9425 	u32 num_heads = 0, lb_size;
9426 	int i;
9427 
9428 	if (!rdev->mode_info.mode_config_initialized)
9429 		return;
9430 
9431 	radeon_update_display_priority(rdev);
9432 
9433 	for (i = 0; i < rdev->num_crtc; i++) {
9434 		if (rdev->mode_info.crtcs[i]->base.enabled)
9435 			num_heads++;
9436 	}
9437 	for (i = 0; i < rdev->num_crtc; i++) {
9438 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9439 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9440 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9441 	}
9442 }
9443 
9444 /**
9445  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9446  *
9447  * @rdev: radeon_device pointer
9448  *
9449  * Fetches a GPU clock counter snapshot (SI).
9450  * Returns the 64 bit clock counter snapshot.
9451  */
9452 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9453 {
9454 	uint64_t clock;
9455 
9456 	mutex_lock(&rdev->gpu_clock_mutex);
9457 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9458 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9459 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9460 	mutex_unlock(&rdev->gpu_clock_mutex);
9461 	return clock;
9462 }
9463 
9464 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9465 			     u32 cntl_reg, u32 status_reg)
9466 {
9467 	int r, i;
9468 	struct atom_clock_dividers dividers;
9469 	uint32_t tmp;
9470 
9471 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9472 					   clock, false, &dividers);
9473 	if (r)
9474 		return r;
9475 
9476 	tmp = RREG32_SMC(cntl_reg);
9477 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9478 	tmp |= dividers.post_divider;
9479 	WREG32_SMC(cntl_reg, tmp);
9480 
9481 	for (i = 0; i < 100; i++) {
9482 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9483 			break;
9484 		mdelay(10);
9485 	}
9486 	if (i == 100)
9487 		return -ETIMEDOUT;
9488 
9489 	return 0;
9490 }
9491 
9492 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9493 {
9494 	int r = 0;
9495 
9496 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9497 	if (r)
9498 		return r;
9499 
9500 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9501 	return r;
9502 }
9503 
9504 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9505 {
9506 	int r, i;
9507 	struct atom_clock_dividers dividers;
9508 	u32 tmp;
9509 
9510 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9511 					   ecclk, false, &dividers);
9512 	if (r)
9513 		return r;
9514 
9515 	for (i = 0; i < 100; i++) {
9516 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9517 			break;
9518 		mdelay(10);
9519 	}
9520 	if (i == 100)
9521 		return -ETIMEDOUT;
9522 
9523 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9524 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9525 	tmp |= dividers.post_divider;
9526 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9527 
9528 	for (i = 0; i < 100; i++) {
9529 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9530 			break;
9531 		mdelay(10);
9532 	}
9533 	if (i == 100)
9534 		return -ETIMEDOUT;
9535 
9536 	return 0;
9537 }
9538 
9539 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9540 {
9541 	struct pci_dev *root = rdev->pdev->bus->self;
9542 	int bridge_pos, gpu_pos;
9543 	u32 speed_cntl, mask, current_data_rate;
9544 	int ret, i;
9545 	u16 tmp16;
9546 
9547 #if 0
9548 	if (pci_is_root_bus(rdev->pdev->bus))
9549 		return;
9550 #endif
9551 
9552 	if (radeon_pcie_gen2 == 0)
9553 		return;
9554 
9555 	if (rdev->flags & RADEON_IS_IGP)
9556 		return;
9557 
9558 	if (!(rdev->flags & RADEON_IS_PCIE))
9559 		return;
9560 
9561 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9562 	if (ret != 0)
9563 		return;
9564 
9565 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9566 		return;
9567 
9568 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9569 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9570 		LC_CURRENT_DATA_RATE_SHIFT;
9571 	if (mask & DRM_PCIE_SPEED_80) {
9572 		if (current_data_rate == 2) {
9573 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9574 			return;
9575 		}
9576 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9577 	} else if (mask & DRM_PCIE_SPEED_50) {
9578 		if (current_data_rate == 1) {
9579 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9580 			return;
9581 		}
9582 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9583 	}
9584 
9585 	bridge_pos = pci_pcie_cap(root);
9586 	if (!bridge_pos)
9587 		return;
9588 
9589 	gpu_pos = pci_pcie_cap(rdev->pdev);
9590 	if (!gpu_pos)
9591 		return;
9592 
9593 	if (mask & DRM_PCIE_SPEED_80) {
9594 		/* re-try equalization if gen3 is not already enabled */
9595 		if (current_data_rate != 2) {
9596 			u16 bridge_cfg, gpu_cfg;
9597 			u16 bridge_cfg2, gpu_cfg2;
9598 			u32 max_lw, current_lw, tmp;
9599 
9600 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9601 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9602 
9603 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9604 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9605 
9606 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9607 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9608 
9609 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9610 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9611 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9612 
9613 			if (current_lw < max_lw) {
9614 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9615 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9616 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9617 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9618 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9619 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9620 				}
9621 			}
9622 
9623 			for (i = 0; i < 10; i++) {
9624 				/* check status */
9625 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9626 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9627 					break;
9628 
9629 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9630 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9631 
9632 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9633 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9634 
9635 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9636 				tmp |= LC_SET_QUIESCE;
9637 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9638 
9639 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9640 				tmp |= LC_REDO_EQ;
9641 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9642 
9643 				mdelay(100);
9644 
9645 				/* linkctl */
9646 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9647 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9648 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9649 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9650 
9651 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9652 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9653 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9654 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9655 
9656 				/* linkctl2 */
9657 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9658 				tmp16 &= ~((1 << 4) | (7 << 9));
9659 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9660 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9661 
9662 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9663 				tmp16 &= ~((1 << 4) | (7 << 9));
9664 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9665 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9666 
9667 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9668 				tmp &= ~LC_SET_QUIESCE;
9669 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9670 			}
9671 		}
9672 	}
9673 
9674 	/* set the link speed */
9675 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9676 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9677 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9678 
9679 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9680 	tmp16 &= ~0xf;
9681 	if (mask & DRM_PCIE_SPEED_80)
9682 		tmp16 |= 3; /* gen3 */
9683 	else if (mask & DRM_PCIE_SPEED_50)
9684 		tmp16 |= 2; /* gen2 */
9685 	else
9686 		tmp16 |= 1; /* gen1 */
9687 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9688 
9689 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9690 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9691 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9692 
9693 	for (i = 0; i < rdev->usec_timeout; i++) {
9694 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9695 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9696 			break;
9697 		udelay(1);
9698 	}
9699 }
9700 
9701 static void cik_program_aspm(struct radeon_device *rdev)
9702 {
9703 	u32 data, orig;
9704 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9705 #if 0
9706 	bool disable_clkreq = false;
9707 #endif
9708 
9709 	if (radeon_aspm == 0)
9710 		return;
9711 
9712 	/* XXX double check IGPs */
9713 	if (rdev->flags & RADEON_IS_IGP)
9714 		return;
9715 
9716 	if (!(rdev->flags & RADEON_IS_PCIE))
9717 		return;
9718 
9719 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9720 	data &= ~LC_XMIT_N_FTS_MASK;
9721 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9722 	if (orig != data)
9723 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9724 
9725 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9726 	data |= LC_GO_TO_RECOVERY;
9727 	if (orig != data)
9728 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9729 
9730 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9731 	data |= P_IGNORE_EDB_ERR;
9732 	if (orig != data)
9733 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9734 
9735 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9736 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9737 	data |= LC_PMI_TO_L1_DIS;
9738 	if (!disable_l0s)
9739 		data |= LC_L0S_INACTIVITY(7);
9740 
9741 	if (!disable_l1) {
9742 		data |= LC_L1_INACTIVITY(7);
9743 		data &= ~LC_PMI_TO_L1_DIS;
9744 		if (orig != data)
9745 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9746 
9747 		if (!disable_plloff_in_l1) {
9748 			bool clk_req_support;
9749 
9750 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9751 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9752 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9753 			if (orig != data)
9754 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9755 
9756 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9757 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9758 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9759 			if (orig != data)
9760 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9761 
9762 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9763 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9764 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9765 			if (orig != data)
9766 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9767 
9768 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9769 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9770 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9771 			if (orig != data)
9772 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9773 
9774 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9775 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9776 			data |= LC_DYN_LANES_PWR_STATE(3);
9777 			if (orig != data)
9778 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9779 
9780 #ifdef zMN_TODO
9781 			if (!disable_clkreq &&
9782 			    !pci_is_root_bus(rdev->pdev->bus)) {
9783 				struct pci_dev *root = rdev->pdev->bus->self;
9784 				u32 lnkcap;
9785 
9786 				clk_req_support = false;
9787 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9788 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9789 					clk_req_support = true;
9790 			} else {
9791 				clk_req_support = false;
9792 			}
9793 #else
9794 			clk_req_support = false;
9795 #endif
9796 
9797 			if (clk_req_support) {
9798 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9799 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9800 				if (orig != data)
9801 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9802 
9803 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9804 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9805 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9806 				if (orig != data)
9807 					WREG32_SMC(THM_CLK_CNTL, data);
9808 
9809 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9810 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9811 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9812 				if (orig != data)
9813 					WREG32_SMC(MISC_CLK_CTRL, data);
9814 
9815 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9816 				data &= ~BCLK_AS_XCLK;
9817 				if (orig != data)
9818 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9819 
9820 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9821 				data &= ~FORCE_BIF_REFCLK_EN;
9822 				if (orig != data)
9823 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9824 
9825 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9826 				data &= ~MPLL_CLKOUT_SEL_MASK;
9827 				data |= MPLL_CLKOUT_SEL(4);
9828 				if (orig != data)
9829 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9830 			}
9831 		}
9832 	} else {
9833 		if (orig != data)
9834 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9835 	}
9836 
9837 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9838 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9839 	if (orig != data)
9840 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9841 
9842 	if (!disable_l0s) {
9843 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9844 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9845 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9846 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9847 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9848 				data &= ~LC_L0S_INACTIVITY_MASK;
9849 				if (orig != data)
9850 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9851 			}
9852 		}
9853 	}
9854 }
9855