xref: /dragonfly/sys/dev/drm/radeon/cik.c (revision 62dc643e)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include "radeon_audio.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
45 
46 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
47 MODULE_FIRMWARE("radeon/bonaire_me.bin");
48 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
49 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
50 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
51 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
52 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
53 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
54 
55 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
56 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
57 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
64 
65 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
66 MODULE_FIRMWARE("radeon/hawaii_me.bin");
67 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
68 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
69 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
70 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
71 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
72 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
73 
74 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
75 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
76 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
80 
81 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
82 MODULE_FIRMWARE("radeon/kaveri_me.bin");
83 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
84 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
85 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
86 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
87 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
88 
89 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
90 MODULE_FIRMWARE("radeon/KABINI_me.bin");
91 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
92 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
93 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
94 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
95 
96 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
97 MODULE_FIRMWARE("radeon/kabini_me.bin");
98 MODULE_FIRMWARE("radeon/kabini_ce.bin");
99 MODULE_FIRMWARE("radeon/kabini_mec.bin");
100 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
101 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
102 
103 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
104 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
105 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
109 
110 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
111 MODULE_FIRMWARE("radeon/mullins_me.bin");
112 MODULE_FIRMWARE("radeon/mullins_ce.bin");
113 MODULE_FIRMWARE("radeon/mullins_mec.bin");
114 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
115 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
116 
117 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
118 static void cik_rlc_stop(struct radeon_device *rdev);
119 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
120 static void cik_program_aspm(struct radeon_device *rdev);
121 static void cik_init_pg(struct radeon_device *rdev);
122 static void cik_init_cg(struct radeon_device *rdev);
123 static void cik_fini_pg(struct radeon_device *rdev);
124 static void cik_fini_cg(struct radeon_device *rdev);
125 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
126 					  bool enable);
127 /**
128  * cik_get_allowed_info_register - fetch the register for the info ioctl
129  *
130  * @rdev: radeon_device pointer
131  * @reg: register offset in bytes
132  * @val: register value
133  *
134  * Returns 0 for success or -EINVAL for an invalid register
135  *
136  */
137 int cik_get_allowed_info_register(struct radeon_device *rdev,
138 				  u32 reg, u32 *val)
139 {
140 	switch (reg) {
141 	case GRBM_STATUS:
142 	case GRBM_STATUS2:
143 	case GRBM_STATUS_SE0:
144 	case GRBM_STATUS_SE1:
145 	case GRBM_STATUS_SE2:
146 	case GRBM_STATUS_SE3:
147 	case SRBM_STATUS:
148 	case SRBM_STATUS2:
149 	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
150 	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
151 	case UVD_STATUS:
152 	/* TODO VCE */
153 		*val = RREG32(reg);
154 		return 0;
155 	default:
156 		return -EINVAL;
157 	}
158 }
159 
160 /*
161  * Indirect registers accessor
162  */
163 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
164 {
165 	unsigned long flags;
166 	u32 r;
167 
168 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
169 	WREG32(CIK_DIDT_IND_INDEX, (reg));
170 	r = RREG32(CIK_DIDT_IND_DATA);
171 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
172 	return r;
173 }
174 
175 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
176 {
177 	unsigned long flags;
178 
179 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
180 	WREG32(CIK_DIDT_IND_INDEX, (reg));
181 	WREG32(CIK_DIDT_IND_DATA, (v));
182 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
183 }
184 
185 /* get temperature in millidegrees */
186 int ci_get_temp(struct radeon_device *rdev)
187 {
188 	u32 temp;
189 	int actual_temp = 0;
190 
191 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
192 		CTF_TEMP_SHIFT;
193 
194 	if (temp & 0x200)
195 		actual_temp = 255;
196 	else
197 		actual_temp = temp & 0x1ff;
198 
199 	actual_temp = actual_temp * 1000;
200 
201 	return actual_temp;
202 }
203 
204 /* get temperature in millidegrees */
205 int kv_get_temp(struct radeon_device *rdev)
206 {
207 	u32 temp;
208 	int actual_temp = 0;
209 
210 	temp = RREG32_SMC(0xC0300E0C);
211 
212 	if (temp)
213 		actual_temp = (temp / 8) - 49;
214 	else
215 		actual_temp = 0;
216 
217 	actual_temp = actual_temp * 1000;
218 
219 	return actual_temp;
220 }
221 
222 /*
223  * Indirect registers accessor
224  */
225 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
226 {
227 	unsigned long flags;
228 	u32 r;
229 
230 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
231 	WREG32(PCIE_INDEX, reg);
232 	(void)RREG32(PCIE_INDEX);
233 	r = RREG32(PCIE_DATA);
234 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
235 	return r;
236 }
237 
238 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
239 {
240 	unsigned long flags;
241 
242 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
243 	WREG32(PCIE_INDEX, reg);
244 	(void)RREG32(PCIE_INDEX);
245 	WREG32(PCIE_DATA, v);
246 	(void)RREG32(PCIE_DATA);
247 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
248 }
249 
250 static const u32 spectre_rlc_save_restore_register_list[] =
251 {
252 	(0x0e00 << 16) | (0xc12c >> 2),
253 	0x00000000,
254 	(0x0e00 << 16) | (0xc140 >> 2),
255 	0x00000000,
256 	(0x0e00 << 16) | (0xc150 >> 2),
257 	0x00000000,
258 	(0x0e00 << 16) | (0xc15c >> 2),
259 	0x00000000,
260 	(0x0e00 << 16) | (0xc168 >> 2),
261 	0x00000000,
262 	(0x0e00 << 16) | (0xc170 >> 2),
263 	0x00000000,
264 	(0x0e00 << 16) | (0xc178 >> 2),
265 	0x00000000,
266 	(0x0e00 << 16) | (0xc204 >> 2),
267 	0x00000000,
268 	(0x0e00 << 16) | (0xc2b4 >> 2),
269 	0x00000000,
270 	(0x0e00 << 16) | (0xc2b8 >> 2),
271 	0x00000000,
272 	(0x0e00 << 16) | (0xc2bc >> 2),
273 	0x00000000,
274 	(0x0e00 << 16) | (0xc2c0 >> 2),
275 	0x00000000,
276 	(0x0e00 << 16) | (0x8228 >> 2),
277 	0x00000000,
278 	(0x0e00 << 16) | (0x829c >> 2),
279 	0x00000000,
280 	(0x0e00 << 16) | (0x869c >> 2),
281 	0x00000000,
282 	(0x0600 << 16) | (0x98f4 >> 2),
283 	0x00000000,
284 	(0x0e00 << 16) | (0x98f8 >> 2),
285 	0x00000000,
286 	(0x0e00 << 16) | (0x9900 >> 2),
287 	0x00000000,
288 	(0x0e00 << 16) | (0xc260 >> 2),
289 	0x00000000,
290 	(0x0e00 << 16) | (0x90e8 >> 2),
291 	0x00000000,
292 	(0x0e00 << 16) | (0x3c000 >> 2),
293 	0x00000000,
294 	(0x0e00 << 16) | (0x3c00c >> 2),
295 	0x00000000,
296 	(0x0e00 << 16) | (0x8c1c >> 2),
297 	0x00000000,
298 	(0x0e00 << 16) | (0x9700 >> 2),
299 	0x00000000,
300 	(0x0e00 << 16) | (0xcd20 >> 2),
301 	0x00000000,
302 	(0x4e00 << 16) | (0xcd20 >> 2),
303 	0x00000000,
304 	(0x5e00 << 16) | (0xcd20 >> 2),
305 	0x00000000,
306 	(0x6e00 << 16) | (0xcd20 >> 2),
307 	0x00000000,
308 	(0x7e00 << 16) | (0xcd20 >> 2),
309 	0x00000000,
310 	(0x8e00 << 16) | (0xcd20 >> 2),
311 	0x00000000,
312 	(0x9e00 << 16) | (0xcd20 >> 2),
313 	0x00000000,
314 	(0xae00 << 16) | (0xcd20 >> 2),
315 	0x00000000,
316 	(0xbe00 << 16) | (0xcd20 >> 2),
317 	0x00000000,
318 	(0x0e00 << 16) | (0x89bc >> 2),
319 	0x00000000,
320 	(0x0e00 << 16) | (0x8900 >> 2),
321 	0x00000000,
322 	0x3,
323 	(0x0e00 << 16) | (0xc130 >> 2),
324 	0x00000000,
325 	(0x0e00 << 16) | (0xc134 >> 2),
326 	0x00000000,
327 	(0x0e00 << 16) | (0xc1fc >> 2),
328 	0x00000000,
329 	(0x0e00 << 16) | (0xc208 >> 2),
330 	0x00000000,
331 	(0x0e00 << 16) | (0xc264 >> 2),
332 	0x00000000,
333 	(0x0e00 << 16) | (0xc268 >> 2),
334 	0x00000000,
335 	(0x0e00 << 16) | (0xc26c >> 2),
336 	0x00000000,
337 	(0x0e00 << 16) | (0xc270 >> 2),
338 	0x00000000,
339 	(0x0e00 << 16) | (0xc274 >> 2),
340 	0x00000000,
341 	(0x0e00 << 16) | (0xc278 >> 2),
342 	0x00000000,
343 	(0x0e00 << 16) | (0xc27c >> 2),
344 	0x00000000,
345 	(0x0e00 << 16) | (0xc280 >> 2),
346 	0x00000000,
347 	(0x0e00 << 16) | (0xc284 >> 2),
348 	0x00000000,
349 	(0x0e00 << 16) | (0xc288 >> 2),
350 	0x00000000,
351 	(0x0e00 << 16) | (0xc28c >> 2),
352 	0x00000000,
353 	(0x0e00 << 16) | (0xc290 >> 2),
354 	0x00000000,
355 	(0x0e00 << 16) | (0xc294 >> 2),
356 	0x00000000,
357 	(0x0e00 << 16) | (0xc298 >> 2),
358 	0x00000000,
359 	(0x0e00 << 16) | (0xc29c >> 2),
360 	0x00000000,
361 	(0x0e00 << 16) | (0xc2a0 >> 2),
362 	0x00000000,
363 	(0x0e00 << 16) | (0xc2a4 >> 2),
364 	0x00000000,
365 	(0x0e00 << 16) | (0xc2a8 >> 2),
366 	0x00000000,
367 	(0x0e00 << 16) | (0xc2ac  >> 2),
368 	0x00000000,
369 	(0x0e00 << 16) | (0xc2b0 >> 2),
370 	0x00000000,
371 	(0x0e00 << 16) | (0x301d0 >> 2),
372 	0x00000000,
373 	(0x0e00 << 16) | (0x30238 >> 2),
374 	0x00000000,
375 	(0x0e00 << 16) | (0x30250 >> 2),
376 	0x00000000,
377 	(0x0e00 << 16) | (0x30254 >> 2),
378 	0x00000000,
379 	(0x0e00 << 16) | (0x30258 >> 2),
380 	0x00000000,
381 	(0x0e00 << 16) | (0x3025c >> 2),
382 	0x00000000,
383 	(0x4e00 << 16) | (0xc900 >> 2),
384 	0x00000000,
385 	(0x5e00 << 16) | (0xc900 >> 2),
386 	0x00000000,
387 	(0x6e00 << 16) | (0xc900 >> 2),
388 	0x00000000,
389 	(0x7e00 << 16) | (0xc900 >> 2),
390 	0x00000000,
391 	(0x8e00 << 16) | (0xc900 >> 2),
392 	0x00000000,
393 	(0x9e00 << 16) | (0xc900 >> 2),
394 	0x00000000,
395 	(0xae00 << 16) | (0xc900 >> 2),
396 	0x00000000,
397 	(0xbe00 << 16) | (0xc900 >> 2),
398 	0x00000000,
399 	(0x4e00 << 16) | (0xc904 >> 2),
400 	0x00000000,
401 	(0x5e00 << 16) | (0xc904 >> 2),
402 	0x00000000,
403 	(0x6e00 << 16) | (0xc904 >> 2),
404 	0x00000000,
405 	(0x7e00 << 16) | (0xc904 >> 2),
406 	0x00000000,
407 	(0x8e00 << 16) | (0xc904 >> 2),
408 	0x00000000,
409 	(0x9e00 << 16) | (0xc904 >> 2),
410 	0x00000000,
411 	(0xae00 << 16) | (0xc904 >> 2),
412 	0x00000000,
413 	(0xbe00 << 16) | (0xc904 >> 2),
414 	0x00000000,
415 	(0x4e00 << 16) | (0xc908 >> 2),
416 	0x00000000,
417 	(0x5e00 << 16) | (0xc908 >> 2),
418 	0x00000000,
419 	(0x6e00 << 16) | (0xc908 >> 2),
420 	0x00000000,
421 	(0x7e00 << 16) | (0xc908 >> 2),
422 	0x00000000,
423 	(0x8e00 << 16) | (0xc908 >> 2),
424 	0x00000000,
425 	(0x9e00 << 16) | (0xc908 >> 2),
426 	0x00000000,
427 	(0xae00 << 16) | (0xc908 >> 2),
428 	0x00000000,
429 	(0xbe00 << 16) | (0xc908 >> 2),
430 	0x00000000,
431 	(0x4e00 << 16) | (0xc90c >> 2),
432 	0x00000000,
433 	(0x5e00 << 16) | (0xc90c >> 2),
434 	0x00000000,
435 	(0x6e00 << 16) | (0xc90c >> 2),
436 	0x00000000,
437 	(0x7e00 << 16) | (0xc90c >> 2),
438 	0x00000000,
439 	(0x8e00 << 16) | (0xc90c >> 2),
440 	0x00000000,
441 	(0x9e00 << 16) | (0xc90c >> 2),
442 	0x00000000,
443 	(0xae00 << 16) | (0xc90c >> 2),
444 	0x00000000,
445 	(0xbe00 << 16) | (0xc90c >> 2),
446 	0x00000000,
447 	(0x4e00 << 16) | (0xc910 >> 2),
448 	0x00000000,
449 	(0x5e00 << 16) | (0xc910 >> 2),
450 	0x00000000,
451 	(0x6e00 << 16) | (0xc910 >> 2),
452 	0x00000000,
453 	(0x7e00 << 16) | (0xc910 >> 2),
454 	0x00000000,
455 	(0x8e00 << 16) | (0xc910 >> 2),
456 	0x00000000,
457 	(0x9e00 << 16) | (0xc910 >> 2),
458 	0x00000000,
459 	(0xae00 << 16) | (0xc910 >> 2),
460 	0x00000000,
461 	(0xbe00 << 16) | (0xc910 >> 2),
462 	0x00000000,
463 	(0x0e00 << 16) | (0xc99c >> 2),
464 	0x00000000,
465 	(0x0e00 << 16) | (0x9834 >> 2),
466 	0x00000000,
467 	(0x0000 << 16) | (0x30f00 >> 2),
468 	0x00000000,
469 	(0x0001 << 16) | (0x30f00 >> 2),
470 	0x00000000,
471 	(0x0000 << 16) | (0x30f04 >> 2),
472 	0x00000000,
473 	(0x0001 << 16) | (0x30f04 >> 2),
474 	0x00000000,
475 	(0x0000 << 16) | (0x30f08 >> 2),
476 	0x00000000,
477 	(0x0001 << 16) | (0x30f08 >> 2),
478 	0x00000000,
479 	(0x0000 << 16) | (0x30f0c >> 2),
480 	0x00000000,
481 	(0x0001 << 16) | (0x30f0c >> 2),
482 	0x00000000,
483 	(0x0600 << 16) | (0x9b7c >> 2),
484 	0x00000000,
485 	(0x0e00 << 16) | (0x8a14 >> 2),
486 	0x00000000,
487 	(0x0e00 << 16) | (0x8a18 >> 2),
488 	0x00000000,
489 	(0x0600 << 16) | (0x30a00 >> 2),
490 	0x00000000,
491 	(0x0e00 << 16) | (0x8bf0 >> 2),
492 	0x00000000,
493 	(0x0e00 << 16) | (0x8bcc >> 2),
494 	0x00000000,
495 	(0x0e00 << 16) | (0x8b24 >> 2),
496 	0x00000000,
497 	(0x0e00 << 16) | (0x30a04 >> 2),
498 	0x00000000,
499 	(0x0600 << 16) | (0x30a10 >> 2),
500 	0x00000000,
501 	(0x0600 << 16) | (0x30a14 >> 2),
502 	0x00000000,
503 	(0x0600 << 16) | (0x30a18 >> 2),
504 	0x00000000,
505 	(0x0600 << 16) | (0x30a2c >> 2),
506 	0x00000000,
507 	(0x0e00 << 16) | (0xc700 >> 2),
508 	0x00000000,
509 	(0x0e00 << 16) | (0xc704 >> 2),
510 	0x00000000,
511 	(0x0e00 << 16) | (0xc708 >> 2),
512 	0x00000000,
513 	(0x0e00 << 16) | (0xc768 >> 2),
514 	0x00000000,
515 	(0x0400 << 16) | (0xc770 >> 2),
516 	0x00000000,
517 	(0x0400 << 16) | (0xc774 >> 2),
518 	0x00000000,
519 	(0x0400 << 16) | (0xc778 >> 2),
520 	0x00000000,
521 	(0x0400 << 16) | (0xc77c >> 2),
522 	0x00000000,
523 	(0x0400 << 16) | (0xc780 >> 2),
524 	0x00000000,
525 	(0x0400 << 16) | (0xc784 >> 2),
526 	0x00000000,
527 	(0x0400 << 16) | (0xc788 >> 2),
528 	0x00000000,
529 	(0x0400 << 16) | (0xc78c >> 2),
530 	0x00000000,
531 	(0x0400 << 16) | (0xc798 >> 2),
532 	0x00000000,
533 	(0x0400 << 16) | (0xc79c >> 2),
534 	0x00000000,
535 	(0x0400 << 16) | (0xc7a0 >> 2),
536 	0x00000000,
537 	(0x0400 << 16) | (0xc7a4 >> 2),
538 	0x00000000,
539 	(0x0400 << 16) | (0xc7a8 >> 2),
540 	0x00000000,
541 	(0x0400 << 16) | (0xc7ac >> 2),
542 	0x00000000,
543 	(0x0400 << 16) | (0xc7b0 >> 2),
544 	0x00000000,
545 	(0x0400 << 16) | (0xc7b4 >> 2),
546 	0x00000000,
547 	(0x0e00 << 16) | (0x9100 >> 2),
548 	0x00000000,
549 	(0x0e00 << 16) | (0x3c010 >> 2),
550 	0x00000000,
551 	(0x0e00 << 16) | (0x92a8 >> 2),
552 	0x00000000,
553 	(0x0e00 << 16) | (0x92ac >> 2),
554 	0x00000000,
555 	(0x0e00 << 16) | (0x92b4 >> 2),
556 	0x00000000,
557 	(0x0e00 << 16) | (0x92b8 >> 2),
558 	0x00000000,
559 	(0x0e00 << 16) | (0x92bc >> 2),
560 	0x00000000,
561 	(0x0e00 << 16) | (0x92c0 >> 2),
562 	0x00000000,
563 	(0x0e00 << 16) | (0x92c4 >> 2),
564 	0x00000000,
565 	(0x0e00 << 16) | (0x92c8 >> 2),
566 	0x00000000,
567 	(0x0e00 << 16) | (0x92cc >> 2),
568 	0x00000000,
569 	(0x0e00 << 16) | (0x92d0 >> 2),
570 	0x00000000,
571 	(0x0e00 << 16) | (0x8c00 >> 2),
572 	0x00000000,
573 	(0x0e00 << 16) | (0x8c04 >> 2),
574 	0x00000000,
575 	(0x0e00 << 16) | (0x8c20 >> 2),
576 	0x00000000,
577 	(0x0e00 << 16) | (0x8c38 >> 2),
578 	0x00000000,
579 	(0x0e00 << 16) | (0x8c3c >> 2),
580 	0x00000000,
581 	(0x0e00 << 16) | (0xae00 >> 2),
582 	0x00000000,
583 	(0x0e00 << 16) | (0x9604 >> 2),
584 	0x00000000,
585 	(0x0e00 << 16) | (0xac08 >> 2),
586 	0x00000000,
587 	(0x0e00 << 16) | (0xac0c >> 2),
588 	0x00000000,
589 	(0x0e00 << 16) | (0xac10 >> 2),
590 	0x00000000,
591 	(0x0e00 << 16) | (0xac14 >> 2),
592 	0x00000000,
593 	(0x0e00 << 16) | (0xac58 >> 2),
594 	0x00000000,
595 	(0x0e00 << 16) | (0xac68 >> 2),
596 	0x00000000,
597 	(0x0e00 << 16) | (0xac6c >> 2),
598 	0x00000000,
599 	(0x0e00 << 16) | (0xac70 >> 2),
600 	0x00000000,
601 	(0x0e00 << 16) | (0xac74 >> 2),
602 	0x00000000,
603 	(0x0e00 << 16) | (0xac78 >> 2),
604 	0x00000000,
605 	(0x0e00 << 16) | (0xac7c >> 2),
606 	0x00000000,
607 	(0x0e00 << 16) | (0xac80 >> 2),
608 	0x00000000,
609 	(0x0e00 << 16) | (0xac84 >> 2),
610 	0x00000000,
611 	(0x0e00 << 16) | (0xac88 >> 2),
612 	0x00000000,
613 	(0x0e00 << 16) | (0xac8c >> 2),
614 	0x00000000,
615 	(0x0e00 << 16) | (0x970c >> 2),
616 	0x00000000,
617 	(0x0e00 << 16) | (0x9714 >> 2),
618 	0x00000000,
619 	(0x0e00 << 16) | (0x9718 >> 2),
620 	0x00000000,
621 	(0x0e00 << 16) | (0x971c >> 2),
622 	0x00000000,
623 	(0x0e00 << 16) | (0x31068 >> 2),
624 	0x00000000,
625 	(0x4e00 << 16) | (0x31068 >> 2),
626 	0x00000000,
627 	(0x5e00 << 16) | (0x31068 >> 2),
628 	0x00000000,
629 	(0x6e00 << 16) | (0x31068 >> 2),
630 	0x00000000,
631 	(0x7e00 << 16) | (0x31068 >> 2),
632 	0x00000000,
633 	(0x8e00 << 16) | (0x31068 >> 2),
634 	0x00000000,
635 	(0x9e00 << 16) | (0x31068 >> 2),
636 	0x00000000,
637 	(0xae00 << 16) | (0x31068 >> 2),
638 	0x00000000,
639 	(0xbe00 << 16) | (0x31068 >> 2),
640 	0x00000000,
641 	(0x0e00 << 16) | (0xcd10 >> 2),
642 	0x00000000,
643 	(0x0e00 << 16) | (0xcd14 >> 2),
644 	0x00000000,
645 	(0x0e00 << 16) | (0x88b0 >> 2),
646 	0x00000000,
647 	(0x0e00 << 16) | (0x88b4 >> 2),
648 	0x00000000,
649 	(0x0e00 << 16) | (0x88b8 >> 2),
650 	0x00000000,
651 	(0x0e00 << 16) | (0x88bc >> 2),
652 	0x00000000,
653 	(0x0400 << 16) | (0x89c0 >> 2),
654 	0x00000000,
655 	(0x0e00 << 16) | (0x88c4 >> 2),
656 	0x00000000,
657 	(0x0e00 << 16) | (0x88c8 >> 2),
658 	0x00000000,
659 	(0x0e00 << 16) | (0x88d0 >> 2),
660 	0x00000000,
661 	(0x0e00 << 16) | (0x88d4 >> 2),
662 	0x00000000,
663 	(0x0e00 << 16) | (0x88d8 >> 2),
664 	0x00000000,
665 	(0x0e00 << 16) | (0x8980 >> 2),
666 	0x00000000,
667 	(0x0e00 << 16) | (0x30938 >> 2),
668 	0x00000000,
669 	(0x0e00 << 16) | (0x3093c >> 2),
670 	0x00000000,
671 	(0x0e00 << 16) | (0x30940 >> 2),
672 	0x00000000,
673 	(0x0e00 << 16) | (0x89a0 >> 2),
674 	0x00000000,
675 	(0x0e00 << 16) | (0x30900 >> 2),
676 	0x00000000,
677 	(0x0e00 << 16) | (0x30904 >> 2),
678 	0x00000000,
679 	(0x0e00 << 16) | (0x89b4 >> 2),
680 	0x00000000,
681 	(0x0e00 << 16) | (0x3c210 >> 2),
682 	0x00000000,
683 	(0x0e00 << 16) | (0x3c214 >> 2),
684 	0x00000000,
685 	(0x0e00 << 16) | (0x3c218 >> 2),
686 	0x00000000,
687 	(0x0e00 << 16) | (0x8904 >> 2),
688 	0x00000000,
689 	0x5,
690 	(0x0e00 << 16) | (0x8c28 >> 2),
691 	(0x0e00 << 16) | (0x8c2c >> 2),
692 	(0x0e00 << 16) | (0x8c30 >> 2),
693 	(0x0e00 << 16) | (0x8c34 >> 2),
694 	(0x0e00 << 16) | (0x9600 >> 2),
695 };
696 
697 static const u32 kalindi_rlc_save_restore_register_list[] =
698 {
699 	(0x0e00 << 16) | (0xc12c >> 2),
700 	0x00000000,
701 	(0x0e00 << 16) | (0xc140 >> 2),
702 	0x00000000,
703 	(0x0e00 << 16) | (0xc150 >> 2),
704 	0x00000000,
705 	(0x0e00 << 16) | (0xc15c >> 2),
706 	0x00000000,
707 	(0x0e00 << 16) | (0xc168 >> 2),
708 	0x00000000,
709 	(0x0e00 << 16) | (0xc170 >> 2),
710 	0x00000000,
711 	(0x0e00 << 16) | (0xc204 >> 2),
712 	0x00000000,
713 	(0x0e00 << 16) | (0xc2b4 >> 2),
714 	0x00000000,
715 	(0x0e00 << 16) | (0xc2b8 >> 2),
716 	0x00000000,
717 	(0x0e00 << 16) | (0xc2bc >> 2),
718 	0x00000000,
719 	(0x0e00 << 16) | (0xc2c0 >> 2),
720 	0x00000000,
721 	(0x0e00 << 16) | (0x8228 >> 2),
722 	0x00000000,
723 	(0x0e00 << 16) | (0x829c >> 2),
724 	0x00000000,
725 	(0x0e00 << 16) | (0x869c >> 2),
726 	0x00000000,
727 	(0x0600 << 16) | (0x98f4 >> 2),
728 	0x00000000,
729 	(0x0e00 << 16) | (0x98f8 >> 2),
730 	0x00000000,
731 	(0x0e00 << 16) | (0x9900 >> 2),
732 	0x00000000,
733 	(0x0e00 << 16) | (0xc260 >> 2),
734 	0x00000000,
735 	(0x0e00 << 16) | (0x90e8 >> 2),
736 	0x00000000,
737 	(0x0e00 << 16) | (0x3c000 >> 2),
738 	0x00000000,
739 	(0x0e00 << 16) | (0x3c00c >> 2),
740 	0x00000000,
741 	(0x0e00 << 16) | (0x8c1c >> 2),
742 	0x00000000,
743 	(0x0e00 << 16) | (0x9700 >> 2),
744 	0x00000000,
745 	(0x0e00 << 16) | (0xcd20 >> 2),
746 	0x00000000,
747 	(0x4e00 << 16) | (0xcd20 >> 2),
748 	0x00000000,
749 	(0x5e00 << 16) | (0xcd20 >> 2),
750 	0x00000000,
751 	(0x6e00 << 16) | (0xcd20 >> 2),
752 	0x00000000,
753 	(0x7e00 << 16) | (0xcd20 >> 2),
754 	0x00000000,
755 	(0x0e00 << 16) | (0x89bc >> 2),
756 	0x00000000,
757 	(0x0e00 << 16) | (0x8900 >> 2),
758 	0x00000000,
759 	0x3,
760 	(0x0e00 << 16) | (0xc130 >> 2),
761 	0x00000000,
762 	(0x0e00 << 16) | (0xc134 >> 2),
763 	0x00000000,
764 	(0x0e00 << 16) | (0xc1fc >> 2),
765 	0x00000000,
766 	(0x0e00 << 16) | (0xc208 >> 2),
767 	0x00000000,
768 	(0x0e00 << 16) | (0xc264 >> 2),
769 	0x00000000,
770 	(0x0e00 << 16) | (0xc268 >> 2),
771 	0x00000000,
772 	(0x0e00 << 16) | (0xc26c >> 2),
773 	0x00000000,
774 	(0x0e00 << 16) | (0xc270 >> 2),
775 	0x00000000,
776 	(0x0e00 << 16) | (0xc274 >> 2),
777 	0x00000000,
778 	(0x0e00 << 16) | (0xc28c >> 2),
779 	0x00000000,
780 	(0x0e00 << 16) | (0xc290 >> 2),
781 	0x00000000,
782 	(0x0e00 << 16) | (0xc294 >> 2),
783 	0x00000000,
784 	(0x0e00 << 16) | (0xc298 >> 2),
785 	0x00000000,
786 	(0x0e00 << 16) | (0xc2a0 >> 2),
787 	0x00000000,
788 	(0x0e00 << 16) | (0xc2a4 >> 2),
789 	0x00000000,
790 	(0x0e00 << 16) | (0xc2a8 >> 2),
791 	0x00000000,
792 	(0x0e00 << 16) | (0xc2ac >> 2),
793 	0x00000000,
794 	(0x0e00 << 16) | (0x301d0 >> 2),
795 	0x00000000,
796 	(0x0e00 << 16) | (0x30238 >> 2),
797 	0x00000000,
798 	(0x0e00 << 16) | (0x30250 >> 2),
799 	0x00000000,
800 	(0x0e00 << 16) | (0x30254 >> 2),
801 	0x00000000,
802 	(0x0e00 << 16) | (0x30258 >> 2),
803 	0x00000000,
804 	(0x0e00 << 16) | (0x3025c >> 2),
805 	0x00000000,
806 	(0x4e00 << 16) | (0xc900 >> 2),
807 	0x00000000,
808 	(0x5e00 << 16) | (0xc900 >> 2),
809 	0x00000000,
810 	(0x6e00 << 16) | (0xc900 >> 2),
811 	0x00000000,
812 	(0x7e00 << 16) | (0xc900 >> 2),
813 	0x00000000,
814 	(0x4e00 << 16) | (0xc904 >> 2),
815 	0x00000000,
816 	(0x5e00 << 16) | (0xc904 >> 2),
817 	0x00000000,
818 	(0x6e00 << 16) | (0xc904 >> 2),
819 	0x00000000,
820 	(0x7e00 << 16) | (0xc904 >> 2),
821 	0x00000000,
822 	(0x4e00 << 16) | (0xc908 >> 2),
823 	0x00000000,
824 	(0x5e00 << 16) | (0xc908 >> 2),
825 	0x00000000,
826 	(0x6e00 << 16) | (0xc908 >> 2),
827 	0x00000000,
828 	(0x7e00 << 16) | (0xc908 >> 2),
829 	0x00000000,
830 	(0x4e00 << 16) | (0xc90c >> 2),
831 	0x00000000,
832 	(0x5e00 << 16) | (0xc90c >> 2),
833 	0x00000000,
834 	(0x6e00 << 16) | (0xc90c >> 2),
835 	0x00000000,
836 	(0x7e00 << 16) | (0xc90c >> 2),
837 	0x00000000,
838 	(0x4e00 << 16) | (0xc910 >> 2),
839 	0x00000000,
840 	(0x5e00 << 16) | (0xc910 >> 2),
841 	0x00000000,
842 	(0x6e00 << 16) | (0xc910 >> 2),
843 	0x00000000,
844 	(0x7e00 << 16) | (0xc910 >> 2),
845 	0x00000000,
846 	(0x0e00 << 16) | (0xc99c >> 2),
847 	0x00000000,
848 	(0x0e00 << 16) | (0x9834 >> 2),
849 	0x00000000,
850 	(0x0000 << 16) | (0x30f00 >> 2),
851 	0x00000000,
852 	(0x0000 << 16) | (0x30f04 >> 2),
853 	0x00000000,
854 	(0x0000 << 16) | (0x30f08 >> 2),
855 	0x00000000,
856 	(0x0000 << 16) | (0x30f0c >> 2),
857 	0x00000000,
858 	(0x0600 << 16) | (0x9b7c >> 2),
859 	0x00000000,
860 	(0x0e00 << 16) | (0x8a14 >> 2),
861 	0x00000000,
862 	(0x0e00 << 16) | (0x8a18 >> 2),
863 	0x00000000,
864 	(0x0600 << 16) | (0x30a00 >> 2),
865 	0x00000000,
866 	(0x0e00 << 16) | (0x8bf0 >> 2),
867 	0x00000000,
868 	(0x0e00 << 16) | (0x8bcc >> 2),
869 	0x00000000,
870 	(0x0e00 << 16) | (0x8b24 >> 2),
871 	0x00000000,
872 	(0x0e00 << 16) | (0x30a04 >> 2),
873 	0x00000000,
874 	(0x0600 << 16) | (0x30a10 >> 2),
875 	0x00000000,
876 	(0x0600 << 16) | (0x30a14 >> 2),
877 	0x00000000,
878 	(0x0600 << 16) | (0x30a18 >> 2),
879 	0x00000000,
880 	(0x0600 << 16) | (0x30a2c >> 2),
881 	0x00000000,
882 	(0x0e00 << 16) | (0xc700 >> 2),
883 	0x00000000,
884 	(0x0e00 << 16) | (0xc704 >> 2),
885 	0x00000000,
886 	(0x0e00 << 16) | (0xc708 >> 2),
887 	0x00000000,
888 	(0x0e00 << 16) | (0xc768 >> 2),
889 	0x00000000,
890 	(0x0400 << 16) | (0xc770 >> 2),
891 	0x00000000,
892 	(0x0400 << 16) | (0xc774 >> 2),
893 	0x00000000,
894 	(0x0400 << 16) | (0xc798 >> 2),
895 	0x00000000,
896 	(0x0400 << 16) | (0xc79c >> 2),
897 	0x00000000,
898 	(0x0e00 << 16) | (0x9100 >> 2),
899 	0x00000000,
900 	(0x0e00 << 16) | (0x3c010 >> 2),
901 	0x00000000,
902 	(0x0e00 << 16) | (0x8c00 >> 2),
903 	0x00000000,
904 	(0x0e00 << 16) | (0x8c04 >> 2),
905 	0x00000000,
906 	(0x0e00 << 16) | (0x8c20 >> 2),
907 	0x00000000,
908 	(0x0e00 << 16) | (0x8c38 >> 2),
909 	0x00000000,
910 	(0x0e00 << 16) | (0x8c3c >> 2),
911 	0x00000000,
912 	(0x0e00 << 16) | (0xae00 >> 2),
913 	0x00000000,
914 	(0x0e00 << 16) | (0x9604 >> 2),
915 	0x00000000,
916 	(0x0e00 << 16) | (0xac08 >> 2),
917 	0x00000000,
918 	(0x0e00 << 16) | (0xac0c >> 2),
919 	0x00000000,
920 	(0x0e00 << 16) | (0xac10 >> 2),
921 	0x00000000,
922 	(0x0e00 << 16) | (0xac14 >> 2),
923 	0x00000000,
924 	(0x0e00 << 16) | (0xac58 >> 2),
925 	0x00000000,
926 	(0x0e00 << 16) | (0xac68 >> 2),
927 	0x00000000,
928 	(0x0e00 << 16) | (0xac6c >> 2),
929 	0x00000000,
930 	(0x0e00 << 16) | (0xac70 >> 2),
931 	0x00000000,
932 	(0x0e00 << 16) | (0xac74 >> 2),
933 	0x00000000,
934 	(0x0e00 << 16) | (0xac78 >> 2),
935 	0x00000000,
936 	(0x0e00 << 16) | (0xac7c >> 2),
937 	0x00000000,
938 	(0x0e00 << 16) | (0xac80 >> 2),
939 	0x00000000,
940 	(0x0e00 << 16) | (0xac84 >> 2),
941 	0x00000000,
942 	(0x0e00 << 16) | (0xac88 >> 2),
943 	0x00000000,
944 	(0x0e00 << 16) | (0xac8c >> 2),
945 	0x00000000,
946 	(0x0e00 << 16) | (0x970c >> 2),
947 	0x00000000,
948 	(0x0e00 << 16) | (0x9714 >> 2),
949 	0x00000000,
950 	(0x0e00 << 16) | (0x9718 >> 2),
951 	0x00000000,
952 	(0x0e00 << 16) | (0x971c >> 2),
953 	0x00000000,
954 	(0x0e00 << 16) | (0x31068 >> 2),
955 	0x00000000,
956 	(0x4e00 << 16) | (0x31068 >> 2),
957 	0x00000000,
958 	(0x5e00 << 16) | (0x31068 >> 2),
959 	0x00000000,
960 	(0x6e00 << 16) | (0x31068 >> 2),
961 	0x00000000,
962 	(0x7e00 << 16) | (0x31068 >> 2),
963 	0x00000000,
964 	(0x0e00 << 16) | (0xcd10 >> 2),
965 	0x00000000,
966 	(0x0e00 << 16) | (0xcd14 >> 2),
967 	0x00000000,
968 	(0x0e00 << 16) | (0x88b0 >> 2),
969 	0x00000000,
970 	(0x0e00 << 16) | (0x88b4 >> 2),
971 	0x00000000,
972 	(0x0e00 << 16) | (0x88b8 >> 2),
973 	0x00000000,
974 	(0x0e00 << 16) | (0x88bc >> 2),
975 	0x00000000,
976 	(0x0400 << 16) | (0x89c0 >> 2),
977 	0x00000000,
978 	(0x0e00 << 16) | (0x88c4 >> 2),
979 	0x00000000,
980 	(0x0e00 << 16) | (0x88c8 >> 2),
981 	0x00000000,
982 	(0x0e00 << 16) | (0x88d0 >> 2),
983 	0x00000000,
984 	(0x0e00 << 16) | (0x88d4 >> 2),
985 	0x00000000,
986 	(0x0e00 << 16) | (0x88d8 >> 2),
987 	0x00000000,
988 	(0x0e00 << 16) | (0x8980 >> 2),
989 	0x00000000,
990 	(0x0e00 << 16) | (0x30938 >> 2),
991 	0x00000000,
992 	(0x0e00 << 16) | (0x3093c >> 2),
993 	0x00000000,
994 	(0x0e00 << 16) | (0x30940 >> 2),
995 	0x00000000,
996 	(0x0e00 << 16) | (0x89a0 >> 2),
997 	0x00000000,
998 	(0x0e00 << 16) | (0x30900 >> 2),
999 	0x00000000,
1000 	(0x0e00 << 16) | (0x30904 >> 2),
1001 	0x00000000,
1002 	(0x0e00 << 16) | (0x89b4 >> 2),
1003 	0x00000000,
1004 	(0x0e00 << 16) | (0x3e1fc >> 2),
1005 	0x00000000,
1006 	(0x0e00 << 16) | (0x3c210 >> 2),
1007 	0x00000000,
1008 	(0x0e00 << 16) | (0x3c214 >> 2),
1009 	0x00000000,
1010 	(0x0e00 << 16) | (0x3c218 >> 2),
1011 	0x00000000,
1012 	(0x0e00 << 16) | (0x8904 >> 2),
1013 	0x00000000,
1014 	0x5,
1015 	(0x0e00 << 16) | (0x8c28 >> 2),
1016 	(0x0e00 << 16) | (0x8c2c >> 2),
1017 	(0x0e00 << 16) | (0x8c30 >> 2),
1018 	(0x0e00 << 16) | (0x8c34 >> 2),
1019 	(0x0e00 << 16) | (0x9600 >> 2),
1020 };
1021 
1022 static const u32 bonaire_golden_spm_registers[] =
1023 {
1024 	0x30800, 0xe0ffffff, 0xe0000000
1025 };
1026 
1027 static const u32 bonaire_golden_common_registers[] =
1028 {
1029 	0xc770, 0xffffffff, 0x00000800,
1030 	0xc774, 0xffffffff, 0x00000800,
1031 	0xc798, 0xffffffff, 0x00007fbf,
1032 	0xc79c, 0xffffffff, 0x00007faf
1033 };
1034 
1035 static const u32 bonaire_golden_registers[] =
1036 {
1037 	0x3354, 0x00000333, 0x00000333,
1038 	0x3350, 0x000c0fc0, 0x00040200,
1039 	0x9a10, 0x00010000, 0x00058208,
1040 	0x3c000, 0xffff1fff, 0x00140000,
1041 	0x3c200, 0xfdfc0fff, 0x00000100,
1042 	0x3c234, 0x40000000, 0x40000200,
1043 	0x9830, 0xffffffff, 0x00000000,
1044 	0x9834, 0xf00fffff, 0x00000400,
1045 	0x9838, 0x0002021c, 0x00020200,
1046 	0xc78, 0x00000080, 0x00000000,
1047 	0x5bb0, 0x000000f0, 0x00000070,
1048 	0x5bc0, 0xf0311fff, 0x80300000,
1049 	0x98f8, 0x73773777, 0x12010001,
1050 	0x350c, 0x00810000, 0x408af000,
1051 	0x7030, 0x31000111, 0x00000011,
1052 	0x2f48, 0x73773777, 0x12010001,
1053 	0x220c, 0x00007fb6, 0x0021a1b1,
1054 	0x2210, 0x00007fb6, 0x002021b1,
1055 	0x2180, 0x00007fb6, 0x00002191,
1056 	0x2218, 0x00007fb6, 0x002121b1,
1057 	0x221c, 0x00007fb6, 0x002021b1,
1058 	0x21dc, 0x00007fb6, 0x00002191,
1059 	0x21e0, 0x00007fb6, 0x00002191,
1060 	0x3628, 0x0000003f, 0x0000000a,
1061 	0x362c, 0x0000003f, 0x0000000a,
1062 	0x2ae4, 0x00073ffe, 0x000022a2,
1063 	0x240c, 0x000007ff, 0x00000000,
1064 	0x8a14, 0xf000003f, 0x00000007,
1065 	0x8bf0, 0x00002001, 0x00000001,
1066 	0x8b24, 0xffffffff, 0x00ffffff,
1067 	0x30a04, 0x0000ff0f, 0x00000000,
1068 	0x28a4c, 0x07ffffff, 0x06000000,
1069 	0x4d8, 0x00000fff, 0x00000100,
1070 	0x3e78, 0x00000001, 0x00000002,
1071 	0x9100, 0x03000000, 0x0362c688,
1072 	0x8c00, 0x000000ff, 0x00000001,
1073 	0xe40, 0x00001fff, 0x00001fff,
1074 	0x9060, 0x0000007f, 0x00000020,
1075 	0x9508, 0x00010000, 0x00010000,
1076 	0xac14, 0x000003ff, 0x000000f3,
1077 	0xac0c, 0xffffffff, 0x00001032
1078 };
1079 
1080 static const u32 bonaire_mgcg_cgcg_init[] =
1081 {
1082 	0xc420, 0xffffffff, 0xfffffffc,
1083 	0x30800, 0xffffffff, 0xe0000000,
1084 	0x3c2a0, 0xffffffff, 0x00000100,
1085 	0x3c208, 0xffffffff, 0x00000100,
1086 	0x3c2c0, 0xffffffff, 0xc0000100,
1087 	0x3c2c8, 0xffffffff, 0xc0000100,
1088 	0x3c2c4, 0xffffffff, 0xc0000100,
1089 	0x55e4, 0xffffffff, 0x00600100,
1090 	0x3c280, 0xffffffff, 0x00000100,
1091 	0x3c214, 0xffffffff, 0x06000100,
1092 	0x3c220, 0xffffffff, 0x00000100,
1093 	0x3c218, 0xffffffff, 0x06000100,
1094 	0x3c204, 0xffffffff, 0x00000100,
1095 	0x3c2e0, 0xffffffff, 0x00000100,
1096 	0x3c224, 0xffffffff, 0x00000100,
1097 	0x3c200, 0xffffffff, 0x00000100,
1098 	0x3c230, 0xffffffff, 0x00000100,
1099 	0x3c234, 0xffffffff, 0x00000100,
1100 	0x3c250, 0xffffffff, 0x00000100,
1101 	0x3c254, 0xffffffff, 0x00000100,
1102 	0x3c258, 0xffffffff, 0x00000100,
1103 	0x3c25c, 0xffffffff, 0x00000100,
1104 	0x3c260, 0xffffffff, 0x00000100,
1105 	0x3c27c, 0xffffffff, 0x00000100,
1106 	0x3c278, 0xffffffff, 0x00000100,
1107 	0x3c210, 0xffffffff, 0x06000100,
1108 	0x3c290, 0xffffffff, 0x00000100,
1109 	0x3c274, 0xffffffff, 0x00000100,
1110 	0x3c2b4, 0xffffffff, 0x00000100,
1111 	0x3c2b0, 0xffffffff, 0x00000100,
1112 	0x3c270, 0xffffffff, 0x00000100,
1113 	0x30800, 0xffffffff, 0xe0000000,
1114 	0x3c020, 0xffffffff, 0x00010000,
1115 	0x3c024, 0xffffffff, 0x00030002,
1116 	0x3c028, 0xffffffff, 0x00040007,
1117 	0x3c02c, 0xffffffff, 0x00060005,
1118 	0x3c030, 0xffffffff, 0x00090008,
1119 	0x3c034, 0xffffffff, 0x00010000,
1120 	0x3c038, 0xffffffff, 0x00030002,
1121 	0x3c03c, 0xffffffff, 0x00040007,
1122 	0x3c040, 0xffffffff, 0x00060005,
1123 	0x3c044, 0xffffffff, 0x00090008,
1124 	0x3c048, 0xffffffff, 0x00010000,
1125 	0x3c04c, 0xffffffff, 0x00030002,
1126 	0x3c050, 0xffffffff, 0x00040007,
1127 	0x3c054, 0xffffffff, 0x00060005,
1128 	0x3c058, 0xffffffff, 0x00090008,
1129 	0x3c05c, 0xffffffff, 0x00010000,
1130 	0x3c060, 0xffffffff, 0x00030002,
1131 	0x3c064, 0xffffffff, 0x00040007,
1132 	0x3c068, 0xffffffff, 0x00060005,
1133 	0x3c06c, 0xffffffff, 0x00090008,
1134 	0x3c070, 0xffffffff, 0x00010000,
1135 	0x3c074, 0xffffffff, 0x00030002,
1136 	0x3c078, 0xffffffff, 0x00040007,
1137 	0x3c07c, 0xffffffff, 0x00060005,
1138 	0x3c080, 0xffffffff, 0x00090008,
1139 	0x3c084, 0xffffffff, 0x00010000,
1140 	0x3c088, 0xffffffff, 0x00030002,
1141 	0x3c08c, 0xffffffff, 0x00040007,
1142 	0x3c090, 0xffffffff, 0x00060005,
1143 	0x3c094, 0xffffffff, 0x00090008,
1144 	0x3c098, 0xffffffff, 0x00010000,
1145 	0x3c09c, 0xffffffff, 0x00030002,
1146 	0x3c0a0, 0xffffffff, 0x00040007,
1147 	0x3c0a4, 0xffffffff, 0x00060005,
1148 	0x3c0a8, 0xffffffff, 0x00090008,
1149 	0x3c000, 0xffffffff, 0x96e00200,
1150 	0x8708, 0xffffffff, 0x00900100,
1151 	0xc424, 0xffffffff, 0x0020003f,
1152 	0x38, 0xffffffff, 0x0140001c,
1153 	0x3c, 0x000f0000, 0x000f0000,
1154 	0x220, 0xffffffff, 0xC060000C,
1155 	0x224, 0xc0000fff, 0x00000100,
1156 	0xf90, 0xffffffff, 0x00000100,
1157 	0xf98, 0x00000101, 0x00000000,
1158 	0x20a8, 0xffffffff, 0x00000104,
1159 	0x55e4, 0xff000fff, 0x00000100,
1160 	0x30cc, 0xc0000fff, 0x00000104,
1161 	0xc1e4, 0x00000001, 0x00000001,
1162 	0xd00c, 0xff000ff0, 0x00000100,
1163 	0xd80c, 0xff000ff0, 0x00000100
1164 };
1165 
1166 static const u32 spectre_golden_spm_registers[] =
1167 {
1168 	0x30800, 0xe0ffffff, 0xe0000000
1169 };
1170 
1171 static const u32 spectre_golden_common_registers[] =
1172 {
1173 	0xc770, 0xffffffff, 0x00000800,
1174 	0xc774, 0xffffffff, 0x00000800,
1175 	0xc798, 0xffffffff, 0x00007fbf,
1176 	0xc79c, 0xffffffff, 0x00007faf
1177 };
1178 
1179 static const u32 spectre_golden_registers[] =
1180 {
1181 	0x3c000, 0xffff1fff, 0x96940200,
1182 	0x3c00c, 0xffff0001, 0xff000000,
1183 	0x3c200, 0xfffc0fff, 0x00000100,
1184 	0x6ed8, 0x00010101, 0x00010000,
1185 	0x9834, 0xf00fffff, 0x00000400,
1186 	0x9838, 0xfffffffc, 0x00020200,
1187 	0x5bb0, 0x000000f0, 0x00000070,
1188 	0x5bc0, 0xf0311fff, 0x80300000,
1189 	0x98f8, 0x73773777, 0x12010001,
1190 	0x9b7c, 0x00ff0000, 0x00fc0000,
1191 	0x2f48, 0x73773777, 0x12010001,
1192 	0x8a14, 0xf000003f, 0x00000007,
1193 	0x8b24, 0xffffffff, 0x00ffffff,
1194 	0x28350, 0x3f3f3fff, 0x00000082,
1195 	0x28354, 0x0000003f, 0x00000000,
1196 	0x3e78, 0x00000001, 0x00000002,
1197 	0x913c, 0xffff03df, 0x00000004,
1198 	0xc768, 0x00000008, 0x00000008,
1199 	0x8c00, 0x000008ff, 0x00000800,
1200 	0x9508, 0x00010000, 0x00010000,
1201 	0xac0c, 0xffffffff, 0x54763210,
1202 	0x214f8, 0x01ff01ff, 0x00000002,
1203 	0x21498, 0x007ff800, 0x00200000,
1204 	0x2015c, 0xffffffff, 0x00000f40,
1205 	0x30934, 0xffffffff, 0x00000001
1206 };
1207 
1208 static const u32 spectre_mgcg_cgcg_init[] =
1209 {
1210 	0xc420, 0xffffffff, 0xfffffffc,
1211 	0x30800, 0xffffffff, 0xe0000000,
1212 	0x3c2a0, 0xffffffff, 0x00000100,
1213 	0x3c208, 0xffffffff, 0x00000100,
1214 	0x3c2c0, 0xffffffff, 0x00000100,
1215 	0x3c2c8, 0xffffffff, 0x00000100,
1216 	0x3c2c4, 0xffffffff, 0x00000100,
1217 	0x55e4, 0xffffffff, 0x00600100,
1218 	0x3c280, 0xffffffff, 0x00000100,
1219 	0x3c214, 0xffffffff, 0x06000100,
1220 	0x3c220, 0xffffffff, 0x00000100,
1221 	0x3c218, 0xffffffff, 0x06000100,
1222 	0x3c204, 0xffffffff, 0x00000100,
1223 	0x3c2e0, 0xffffffff, 0x00000100,
1224 	0x3c224, 0xffffffff, 0x00000100,
1225 	0x3c200, 0xffffffff, 0x00000100,
1226 	0x3c230, 0xffffffff, 0x00000100,
1227 	0x3c234, 0xffffffff, 0x00000100,
1228 	0x3c250, 0xffffffff, 0x00000100,
1229 	0x3c254, 0xffffffff, 0x00000100,
1230 	0x3c258, 0xffffffff, 0x00000100,
1231 	0x3c25c, 0xffffffff, 0x00000100,
1232 	0x3c260, 0xffffffff, 0x00000100,
1233 	0x3c27c, 0xffffffff, 0x00000100,
1234 	0x3c278, 0xffffffff, 0x00000100,
1235 	0x3c210, 0xffffffff, 0x06000100,
1236 	0x3c290, 0xffffffff, 0x00000100,
1237 	0x3c274, 0xffffffff, 0x00000100,
1238 	0x3c2b4, 0xffffffff, 0x00000100,
1239 	0x3c2b0, 0xffffffff, 0x00000100,
1240 	0x3c270, 0xffffffff, 0x00000100,
1241 	0x30800, 0xffffffff, 0xe0000000,
1242 	0x3c020, 0xffffffff, 0x00010000,
1243 	0x3c024, 0xffffffff, 0x00030002,
1244 	0x3c028, 0xffffffff, 0x00040007,
1245 	0x3c02c, 0xffffffff, 0x00060005,
1246 	0x3c030, 0xffffffff, 0x00090008,
1247 	0x3c034, 0xffffffff, 0x00010000,
1248 	0x3c038, 0xffffffff, 0x00030002,
1249 	0x3c03c, 0xffffffff, 0x00040007,
1250 	0x3c040, 0xffffffff, 0x00060005,
1251 	0x3c044, 0xffffffff, 0x00090008,
1252 	0x3c048, 0xffffffff, 0x00010000,
1253 	0x3c04c, 0xffffffff, 0x00030002,
1254 	0x3c050, 0xffffffff, 0x00040007,
1255 	0x3c054, 0xffffffff, 0x00060005,
1256 	0x3c058, 0xffffffff, 0x00090008,
1257 	0x3c05c, 0xffffffff, 0x00010000,
1258 	0x3c060, 0xffffffff, 0x00030002,
1259 	0x3c064, 0xffffffff, 0x00040007,
1260 	0x3c068, 0xffffffff, 0x00060005,
1261 	0x3c06c, 0xffffffff, 0x00090008,
1262 	0x3c070, 0xffffffff, 0x00010000,
1263 	0x3c074, 0xffffffff, 0x00030002,
1264 	0x3c078, 0xffffffff, 0x00040007,
1265 	0x3c07c, 0xffffffff, 0x00060005,
1266 	0x3c080, 0xffffffff, 0x00090008,
1267 	0x3c084, 0xffffffff, 0x00010000,
1268 	0x3c088, 0xffffffff, 0x00030002,
1269 	0x3c08c, 0xffffffff, 0x00040007,
1270 	0x3c090, 0xffffffff, 0x00060005,
1271 	0x3c094, 0xffffffff, 0x00090008,
1272 	0x3c098, 0xffffffff, 0x00010000,
1273 	0x3c09c, 0xffffffff, 0x00030002,
1274 	0x3c0a0, 0xffffffff, 0x00040007,
1275 	0x3c0a4, 0xffffffff, 0x00060005,
1276 	0x3c0a8, 0xffffffff, 0x00090008,
1277 	0x3c0ac, 0xffffffff, 0x00010000,
1278 	0x3c0b0, 0xffffffff, 0x00030002,
1279 	0x3c0b4, 0xffffffff, 0x00040007,
1280 	0x3c0b8, 0xffffffff, 0x00060005,
1281 	0x3c0bc, 0xffffffff, 0x00090008,
1282 	0x3c000, 0xffffffff, 0x96e00200,
1283 	0x8708, 0xffffffff, 0x00900100,
1284 	0xc424, 0xffffffff, 0x0020003f,
1285 	0x38, 0xffffffff, 0x0140001c,
1286 	0x3c, 0x000f0000, 0x000f0000,
1287 	0x220, 0xffffffff, 0xC060000C,
1288 	0x224, 0xc0000fff, 0x00000100,
1289 	0xf90, 0xffffffff, 0x00000100,
1290 	0xf98, 0x00000101, 0x00000000,
1291 	0x20a8, 0xffffffff, 0x00000104,
1292 	0x55e4, 0xff000fff, 0x00000100,
1293 	0x30cc, 0xc0000fff, 0x00000104,
1294 	0xc1e4, 0x00000001, 0x00000001,
1295 	0xd00c, 0xff000ff0, 0x00000100,
1296 	0xd80c, 0xff000ff0, 0x00000100
1297 };
1298 
1299 static const u32 kalindi_golden_spm_registers[] =
1300 {
1301 	0x30800, 0xe0ffffff, 0xe0000000
1302 };
1303 
1304 static const u32 kalindi_golden_common_registers[] =
1305 {
1306 	0xc770, 0xffffffff, 0x00000800,
1307 	0xc774, 0xffffffff, 0x00000800,
1308 	0xc798, 0xffffffff, 0x00007fbf,
1309 	0xc79c, 0xffffffff, 0x00007faf
1310 };
1311 
1312 static const u32 kalindi_golden_registers[] =
1313 {
1314 	0x3c000, 0xffffdfff, 0x6e944040,
1315 	0x55e4, 0xff607fff, 0xfc000100,
1316 	0x3c220, 0xff000fff, 0x00000100,
1317 	0x3c224, 0xff000fff, 0x00000100,
1318 	0x3c200, 0xfffc0fff, 0x00000100,
1319 	0x6ed8, 0x00010101, 0x00010000,
1320 	0x9830, 0xffffffff, 0x00000000,
1321 	0x9834, 0xf00fffff, 0x00000400,
1322 	0x5bb0, 0x000000f0, 0x00000070,
1323 	0x5bc0, 0xf0311fff, 0x80300000,
1324 	0x98f8, 0x73773777, 0x12010001,
1325 	0x98fc, 0xffffffff, 0x00000010,
1326 	0x9b7c, 0x00ff0000, 0x00fc0000,
1327 	0x8030, 0x00001f0f, 0x0000100a,
1328 	0x2f48, 0x73773777, 0x12010001,
1329 	0x2408, 0x000fffff, 0x000c007f,
1330 	0x8a14, 0xf000003f, 0x00000007,
1331 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1332 	0x30a04, 0x0000ff0f, 0x00000000,
1333 	0x28a4c, 0x07ffffff, 0x06000000,
1334 	0x4d8, 0x00000fff, 0x00000100,
1335 	0x3e78, 0x00000001, 0x00000002,
1336 	0xc768, 0x00000008, 0x00000008,
1337 	0x8c00, 0x000000ff, 0x00000003,
1338 	0x214f8, 0x01ff01ff, 0x00000002,
1339 	0x21498, 0x007ff800, 0x00200000,
1340 	0x2015c, 0xffffffff, 0x00000f40,
1341 	0x88c4, 0x001f3ae3, 0x00000082,
1342 	0x88d4, 0x0000001f, 0x00000010,
1343 	0x30934, 0xffffffff, 0x00000000
1344 };
1345 
1346 static const u32 kalindi_mgcg_cgcg_init[] =
1347 {
1348 	0xc420, 0xffffffff, 0xfffffffc,
1349 	0x30800, 0xffffffff, 0xe0000000,
1350 	0x3c2a0, 0xffffffff, 0x00000100,
1351 	0x3c208, 0xffffffff, 0x00000100,
1352 	0x3c2c0, 0xffffffff, 0x00000100,
1353 	0x3c2c8, 0xffffffff, 0x00000100,
1354 	0x3c2c4, 0xffffffff, 0x00000100,
1355 	0x55e4, 0xffffffff, 0x00600100,
1356 	0x3c280, 0xffffffff, 0x00000100,
1357 	0x3c214, 0xffffffff, 0x06000100,
1358 	0x3c220, 0xffffffff, 0x00000100,
1359 	0x3c218, 0xffffffff, 0x06000100,
1360 	0x3c204, 0xffffffff, 0x00000100,
1361 	0x3c2e0, 0xffffffff, 0x00000100,
1362 	0x3c224, 0xffffffff, 0x00000100,
1363 	0x3c200, 0xffffffff, 0x00000100,
1364 	0x3c230, 0xffffffff, 0x00000100,
1365 	0x3c234, 0xffffffff, 0x00000100,
1366 	0x3c250, 0xffffffff, 0x00000100,
1367 	0x3c254, 0xffffffff, 0x00000100,
1368 	0x3c258, 0xffffffff, 0x00000100,
1369 	0x3c25c, 0xffffffff, 0x00000100,
1370 	0x3c260, 0xffffffff, 0x00000100,
1371 	0x3c27c, 0xffffffff, 0x00000100,
1372 	0x3c278, 0xffffffff, 0x00000100,
1373 	0x3c210, 0xffffffff, 0x06000100,
1374 	0x3c290, 0xffffffff, 0x00000100,
1375 	0x3c274, 0xffffffff, 0x00000100,
1376 	0x3c2b4, 0xffffffff, 0x00000100,
1377 	0x3c2b0, 0xffffffff, 0x00000100,
1378 	0x3c270, 0xffffffff, 0x00000100,
1379 	0x30800, 0xffffffff, 0xe0000000,
1380 	0x3c020, 0xffffffff, 0x00010000,
1381 	0x3c024, 0xffffffff, 0x00030002,
1382 	0x3c028, 0xffffffff, 0x00040007,
1383 	0x3c02c, 0xffffffff, 0x00060005,
1384 	0x3c030, 0xffffffff, 0x00090008,
1385 	0x3c034, 0xffffffff, 0x00010000,
1386 	0x3c038, 0xffffffff, 0x00030002,
1387 	0x3c03c, 0xffffffff, 0x00040007,
1388 	0x3c040, 0xffffffff, 0x00060005,
1389 	0x3c044, 0xffffffff, 0x00090008,
1390 	0x3c000, 0xffffffff, 0x96e00200,
1391 	0x8708, 0xffffffff, 0x00900100,
1392 	0xc424, 0xffffffff, 0x0020003f,
1393 	0x38, 0xffffffff, 0x0140001c,
1394 	0x3c, 0x000f0000, 0x000f0000,
1395 	0x220, 0xffffffff, 0xC060000C,
1396 	0x224, 0xc0000fff, 0x00000100,
1397 	0x20a8, 0xffffffff, 0x00000104,
1398 	0x55e4, 0xff000fff, 0x00000100,
1399 	0x30cc, 0xc0000fff, 0x00000104,
1400 	0xc1e4, 0x00000001, 0x00000001,
1401 	0xd00c, 0xff000ff0, 0x00000100,
1402 	0xd80c, 0xff000ff0, 0x00000100
1403 };
1404 
1405 static const u32 hawaii_golden_spm_registers[] =
1406 {
1407 	0x30800, 0xe0ffffff, 0xe0000000
1408 };
1409 
1410 static const u32 hawaii_golden_common_registers[] =
1411 {
1412 	0x30800, 0xffffffff, 0xe0000000,
1413 	0x28350, 0xffffffff, 0x3a00161a,
1414 	0x28354, 0xffffffff, 0x0000002e,
1415 	0x9a10, 0xffffffff, 0x00018208,
1416 	0x98f8, 0xffffffff, 0x12011003
1417 };
1418 
1419 static const u32 hawaii_golden_registers[] =
1420 {
1421 	0x3354, 0x00000333, 0x00000333,
1422 	0x9a10, 0x00010000, 0x00058208,
1423 	0x9830, 0xffffffff, 0x00000000,
1424 	0x9834, 0xf00fffff, 0x00000400,
1425 	0x9838, 0x0002021c, 0x00020200,
1426 	0xc78, 0x00000080, 0x00000000,
1427 	0x5bb0, 0x000000f0, 0x00000070,
1428 	0x5bc0, 0xf0311fff, 0x80300000,
1429 	0x350c, 0x00810000, 0x408af000,
1430 	0x7030, 0x31000111, 0x00000011,
1431 	0x2f48, 0x73773777, 0x12010001,
1432 	0x2120, 0x0000007f, 0x0000001b,
1433 	0x21dc, 0x00007fb6, 0x00002191,
1434 	0x3628, 0x0000003f, 0x0000000a,
1435 	0x362c, 0x0000003f, 0x0000000a,
1436 	0x2ae4, 0x00073ffe, 0x000022a2,
1437 	0x240c, 0x000007ff, 0x00000000,
1438 	0x8bf0, 0x00002001, 0x00000001,
1439 	0x8b24, 0xffffffff, 0x00ffffff,
1440 	0x30a04, 0x0000ff0f, 0x00000000,
1441 	0x28a4c, 0x07ffffff, 0x06000000,
1442 	0x3e78, 0x00000001, 0x00000002,
1443 	0xc768, 0x00000008, 0x00000008,
1444 	0xc770, 0x00000f00, 0x00000800,
1445 	0xc774, 0x00000f00, 0x00000800,
1446 	0xc798, 0x00ffffff, 0x00ff7fbf,
1447 	0xc79c, 0x00ffffff, 0x00ff7faf,
1448 	0x8c00, 0x000000ff, 0x00000800,
1449 	0xe40, 0x00001fff, 0x00001fff,
1450 	0x9060, 0x0000007f, 0x00000020,
1451 	0x9508, 0x00010000, 0x00010000,
1452 	0xae00, 0x00100000, 0x000ff07c,
1453 	0xac14, 0x000003ff, 0x0000000f,
1454 	0xac10, 0xffffffff, 0x7564fdec,
1455 	0xac0c, 0xffffffff, 0x3120b9a8,
1456 	0xac08, 0x20000000, 0x0f9c0000
1457 };
1458 
1459 static const u32 hawaii_mgcg_cgcg_init[] =
1460 {
1461 	0xc420, 0xffffffff, 0xfffffffd,
1462 	0x30800, 0xffffffff, 0xe0000000,
1463 	0x3c2a0, 0xffffffff, 0x00000100,
1464 	0x3c208, 0xffffffff, 0x00000100,
1465 	0x3c2c0, 0xffffffff, 0x00000100,
1466 	0x3c2c8, 0xffffffff, 0x00000100,
1467 	0x3c2c4, 0xffffffff, 0x00000100,
1468 	0x55e4, 0xffffffff, 0x00200100,
1469 	0x3c280, 0xffffffff, 0x00000100,
1470 	0x3c214, 0xffffffff, 0x06000100,
1471 	0x3c220, 0xffffffff, 0x00000100,
1472 	0x3c218, 0xffffffff, 0x06000100,
1473 	0x3c204, 0xffffffff, 0x00000100,
1474 	0x3c2e0, 0xffffffff, 0x00000100,
1475 	0x3c224, 0xffffffff, 0x00000100,
1476 	0x3c200, 0xffffffff, 0x00000100,
1477 	0x3c230, 0xffffffff, 0x00000100,
1478 	0x3c234, 0xffffffff, 0x00000100,
1479 	0x3c250, 0xffffffff, 0x00000100,
1480 	0x3c254, 0xffffffff, 0x00000100,
1481 	0x3c258, 0xffffffff, 0x00000100,
1482 	0x3c25c, 0xffffffff, 0x00000100,
1483 	0x3c260, 0xffffffff, 0x00000100,
1484 	0x3c27c, 0xffffffff, 0x00000100,
1485 	0x3c278, 0xffffffff, 0x00000100,
1486 	0x3c210, 0xffffffff, 0x06000100,
1487 	0x3c290, 0xffffffff, 0x00000100,
1488 	0x3c274, 0xffffffff, 0x00000100,
1489 	0x3c2b4, 0xffffffff, 0x00000100,
1490 	0x3c2b0, 0xffffffff, 0x00000100,
1491 	0x3c270, 0xffffffff, 0x00000100,
1492 	0x30800, 0xffffffff, 0xe0000000,
1493 	0x3c020, 0xffffffff, 0x00010000,
1494 	0x3c024, 0xffffffff, 0x00030002,
1495 	0x3c028, 0xffffffff, 0x00040007,
1496 	0x3c02c, 0xffffffff, 0x00060005,
1497 	0x3c030, 0xffffffff, 0x00090008,
1498 	0x3c034, 0xffffffff, 0x00010000,
1499 	0x3c038, 0xffffffff, 0x00030002,
1500 	0x3c03c, 0xffffffff, 0x00040007,
1501 	0x3c040, 0xffffffff, 0x00060005,
1502 	0x3c044, 0xffffffff, 0x00090008,
1503 	0x3c048, 0xffffffff, 0x00010000,
1504 	0x3c04c, 0xffffffff, 0x00030002,
1505 	0x3c050, 0xffffffff, 0x00040007,
1506 	0x3c054, 0xffffffff, 0x00060005,
1507 	0x3c058, 0xffffffff, 0x00090008,
1508 	0x3c05c, 0xffffffff, 0x00010000,
1509 	0x3c060, 0xffffffff, 0x00030002,
1510 	0x3c064, 0xffffffff, 0x00040007,
1511 	0x3c068, 0xffffffff, 0x00060005,
1512 	0x3c06c, 0xffffffff, 0x00090008,
1513 	0x3c070, 0xffffffff, 0x00010000,
1514 	0x3c074, 0xffffffff, 0x00030002,
1515 	0x3c078, 0xffffffff, 0x00040007,
1516 	0x3c07c, 0xffffffff, 0x00060005,
1517 	0x3c080, 0xffffffff, 0x00090008,
1518 	0x3c084, 0xffffffff, 0x00010000,
1519 	0x3c088, 0xffffffff, 0x00030002,
1520 	0x3c08c, 0xffffffff, 0x00040007,
1521 	0x3c090, 0xffffffff, 0x00060005,
1522 	0x3c094, 0xffffffff, 0x00090008,
1523 	0x3c098, 0xffffffff, 0x00010000,
1524 	0x3c09c, 0xffffffff, 0x00030002,
1525 	0x3c0a0, 0xffffffff, 0x00040007,
1526 	0x3c0a4, 0xffffffff, 0x00060005,
1527 	0x3c0a8, 0xffffffff, 0x00090008,
1528 	0x3c0ac, 0xffffffff, 0x00010000,
1529 	0x3c0b0, 0xffffffff, 0x00030002,
1530 	0x3c0b4, 0xffffffff, 0x00040007,
1531 	0x3c0b8, 0xffffffff, 0x00060005,
1532 	0x3c0bc, 0xffffffff, 0x00090008,
1533 	0x3c0c0, 0xffffffff, 0x00010000,
1534 	0x3c0c4, 0xffffffff, 0x00030002,
1535 	0x3c0c8, 0xffffffff, 0x00040007,
1536 	0x3c0cc, 0xffffffff, 0x00060005,
1537 	0x3c0d0, 0xffffffff, 0x00090008,
1538 	0x3c0d4, 0xffffffff, 0x00010000,
1539 	0x3c0d8, 0xffffffff, 0x00030002,
1540 	0x3c0dc, 0xffffffff, 0x00040007,
1541 	0x3c0e0, 0xffffffff, 0x00060005,
1542 	0x3c0e4, 0xffffffff, 0x00090008,
1543 	0x3c0e8, 0xffffffff, 0x00010000,
1544 	0x3c0ec, 0xffffffff, 0x00030002,
1545 	0x3c0f0, 0xffffffff, 0x00040007,
1546 	0x3c0f4, 0xffffffff, 0x00060005,
1547 	0x3c0f8, 0xffffffff, 0x00090008,
1548 	0xc318, 0xffffffff, 0x00020200,
1549 	0x3350, 0xffffffff, 0x00000200,
1550 	0x15c0, 0xffffffff, 0x00000400,
1551 	0x55e8, 0xffffffff, 0x00000000,
1552 	0x2f50, 0xffffffff, 0x00000902,
1553 	0x3c000, 0xffffffff, 0x96940200,
1554 	0x8708, 0xffffffff, 0x00900100,
1555 	0xc424, 0xffffffff, 0x0020003f,
1556 	0x38, 0xffffffff, 0x0140001c,
1557 	0x3c, 0x000f0000, 0x000f0000,
1558 	0x220, 0xffffffff, 0xc060000c,
1559 	0x224, 0xc0000fff, 0x00000100,
1560 	0xf90, 0xffffffff, 0x00000100,
1561 	0xf98, 0x00000101, 0x00000000,
1562 	0x20a8, 0xffffffff, 0x00000104,
1563 	0x55e4, 0xff000fff, 0x00000100,
1564 	0x30cc, 0xc0000fff, 0x00000104,
1565 	0xc1e4, 0x00000001, 0x00000001,
1566 	0xd00c, 0xff000ff0, 0x00000100,
1567 	0xd80c, 0xff000ff0, 0x00000100
1568 };
1569 
1570 static const u32 godavari_golden_registers[] =
1571 {
1572 	0x55e4, 0xff607fff, 0xfc000100,
1573 	0x6ed8, 0x00010101, 0x00010000,
1574 	0x9830, 0xffffffff, 0x00000000,
1575 	0x98302, 0xf00fffff, 0x00000400,
1576 	0x6130, 0xffffffff, 0x00010000,
1577 	0x5bb0, 0x000000f0, 0x00000070,
1578 	0x5bc0, 0xf0311fff, 0x80300000,
1579 	0x98f8, 0x73773777, 0x12010001,
1580 	0x98fc, 0xffffffff, 0x00000010,
1581 	0x8030, 0x00001f0f, 0x0000100a,
1582 	0x2f48, 0x73773777, 0x12010001,
1583 	0x2408, 0x000fffff, 0x000c007f,
1584 	0x8a14, 0xf000003f, 0x00000007,
1585 	0x8b24, 0xffffffff, 0x00ff0fff,
1586 	0x30a04, 0x0000ff0f, 0x00000000,
1587 	0x28a4c, 0x07ffffff, 0x06000000,
1588 	0x4d8, 0x00000fff, 0x00000100,
1589 	0xd014, 0x00010000, 0x00810001,
1590 	0xd814, 0x00010000, 0x00810001,
1591 	0x3e78, 0x00000001, 0x00000002,
1592 	0xc768, 0x00000008, 0x00000008,
1593 	0xc770, 0x00000f00, 0x00000800,
1594 	0xc774, 0x00000f00, 0x00000800,
1595 	0xc798, 0x00ffffff, 0x00ff7fbf,
1596 	0xc79c, 0x00ffffff, 0x00ff7faf,
1597 	0x8c00, 0x000000ff, 0x00000001,
1598 	0x214f8, 0x01ff01ff, 0x00000002,
1599 	0x21498, 0x007ff800, 0x00200000,
1600 	0x2015c, 0xffffffff, 0x00000f40,
1601 	0x88c4, 0x001f3ae3, 0x00000082,
1602 	0x88d4, 0x0000001f, 0x00000010,
1603 	0x30934, 0xffffffff, 0x00000000
1604 };
1605 
1606 
1607 static void cik_init_golden_registers(struct radeon_device *rdev)
1608 {
1609 	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1610 	mutex_lock(&rdev->grbm_idx_mutex);
1611 	switch (rdev->family) {
1612 	case CHIP_BONAIRE:
1613 		radeon_program_register_sequence(rdev,
1614 						 bonaire_mgcg_cgcg_init,
1615 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1616 		radeon_program_register_sequence(rdev,
1617 						 bonaire_golden_registers,
1618 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1619 		radeon_program_register_sequence(rdev,
1620 						 bonaire_golden_common_registers,
1621 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1622 		radeon_program_register_sequence(rdev,
1623 						 bonaire_golden_spm_registers,
1624 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1625 		break;
1626 	case CHIP_KABINI:
1627 		radeon_program_register_sequence(rdev,
1628 						 kalindi_mgcg_cgcg_init,
1629 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1630 		radeon_program_register_sequence(rdev,
1631 						 kalindi_golden_registers,
1632 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1633 		radeon_program_register_sequence(rdev,
1634 						 kalindi_golden_common_registers,
1635 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1636 		radeon_program_register_sequence(rdev,
1637 						 kalindi_golden_spm_registers,
1638 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1639 		break;
1640 	case CHIP_MULLINS:
1641 		radeon_program_register_sequence(rdev,
1642 						 kalindi_mgcg_cgcg_init,
1643 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1644 		radeon_program_register_sequence(rdev,
1645 						 godavari_golden_registers,
1646 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1647 		radeon_program_register_sequence(rdev,
1648 						 kalindi_golden_common_registers,
1649 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1650 		radeon_program_register_sequence(rdev,
1651 						 kalindi_golden_spm_registers,
1652 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1653 		break;
1654 	case CHIP_KAVERI:
1655 		radeon_program_register_sequence(rdev,
1656 						 spectre_mgcg_cgcg_init,
1657 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1658 		radeon_program_register_sequence(rdev,
1659 						 spectre_golden_registers,
1660 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1661 		radeon_program_register_sequence(rdev,
1662 						 spectre_golden_common_registers,
1663 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1664 		radeon_program_register_sequence(rdev,
1665 						 spectre_golden_spm_registers,
1666 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1667 		break;
1668 	case CHIP_HAWAII:
1669 		radeon_program_register_sequence(rdev,
1670 						 hawaii_mgcg_cgcg_init,
1671 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1672 		radeon_program_register_sequence(rdev,
1673 						 hawaii_golden_registers,
1674 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1675 		radeon_program_register_sequence(rdev,
1676 						 hawaii_golden_common_registers,
1677 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1678 		radeon_program_register_sequence(rdev,
1679 						 hawaii_golden_spm_registers,
1680 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1681 		break;
1682 	default:
1683 		break;
1684 	}
1685 	mutex_unlock(&rdev->grbm_idx_mutex);
1686 }
1687 
1688 /**
1689  * cik_get_xclk - get the xclk
1690  *
1691  * @rdev: radeon_device pointer
1692  *
1693  * Returns the reference clock used by the gfx engine
1694  * (CIK).
1695  */
1696 u32 cik_get_xclk(struct radeon_device *rdev)
1697 {
1698         u32 reference_clock = rdev->clock.spll.reference_freq;
1699 
1700 	if (rdev->flags & RADEON_IS_IGP) {
1701 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1702 			return reference_clock / 2;
1703 	} else {
1704 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1705 			return reference_clock / 4;
1706 	}
1707 	return reference_clock;
1708 }
1709 
1710 /**
1711  * cik_mm_rdoorbell - read a doorbell dword
1712  *
1713  * @rdev: radeon_device pointer
1714  * @index: doorbell index
1715  *
1716  * Returns the value in the doorbell aperture at the
1717  * requested doorbell index (CIK).
1718  */
1719 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1720 {
1721 	if (index < rdev->doorbell.num_doorbells) {
1722 		return readl(rdev->doorbell.ptr + index);
1723 	} else {
1724 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1725 		return 0;
1726 	}
1727 }
1728 
1729 /**
1730  * cik_mm_wdoorbell - write a doorbell dword
1731  *
1732  * @rdev: radeon_device pointer
1733  * @index: doorbell index
1734  * @v: value to write
1735  *
1736  * Writes @v to the doorbell aperture at the
1737  * requested doorbell index (CIK).
1738  */
1739 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1740 {
1741 	if (index < rdev->doorbell.num_doorbells) {
1742 		writel(v, rdev->doorbell.ptr + index);
1743 	} else {
1744 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1745 	}
1746 }
1747 
1748 #define BONAIRE_IO_MC_REGS_SIZE 36
1749 
1750 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1751 {
1752 	{0x00000070, 0x04400000},
1753 	{0x00000071, 0x80c01803},
1754 	{0x00000072, 0x00004004},
1755 	{0x00000073, 0x00000100},
1756 	{0x00000074, 0x00ff0000},
1757 	{0x00000075, 0x34000000},
1758 	{0x00000076, 0x08000014},
1759 	{0x00000077, 0x00cc08ec},
1760 	{0x00000078, 0x00000400},
1761 	{0x00000079, 0x00000000},
1762 	{0x0000007a, 0x04090000},
1763 	{0x0000007c, 0x00000000},
1764 	{0x0000007e, 0x4408a8e8},
1765 	{0x0000007f, 0x00000304},
1766 	{0x00000080, 0x00000000},
1767 	{0x00000082, 0x00000001},
1768 	{0x00000083, 0x00000002},
1769 	{0x00000084, 0xf3e4f400},
1770 	{0x00000085, 0x052024e3},
1771 	{0x00000087, 0x00000000},
1772 	{0x00000088, 0x01000000},
1773 	{0x0000008a, 0x1c0a0000},
1774 	{0x0000008b, 0xff010000},
1775 	{0x0000008d, 0xffffefff},
1776 	{0x0000008e, 0xfff3efff},
1777 	{0x0000008f, 0xfff3efbf},
1778 	{0x00000092, 0xf7ffffff},
1779 	{0x00000093, 0xffffff7f},
1780 	{0x00000095, 0x00101101},
1781 	{0x00000096, 0x00000fff},
1782 	{0x00000097, 0x00116fff},
1783 	{0x00000098, 0x60010000},
1784 	{0x00000099, 0x10010000},
1785 	{0x0000009a, 0x00006000},
1786 	{0x0000009b, 0x00001000},
1787 	{0x0000009f, 0x00b48000}
1788 };
1789 
1790 #define HAWAII_IO_MC_REGS_SIZE 22
1791 
1792 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1793 {
1794 	{0x0000007d, 0x40000000},
1795 	{0x0000007e, 0x40180304},
1796 	{0x0000007f, 0x0000ff00},
1797 	{0x00000081, 0x00000000},
1798 	{0x00000083, 0x00000800},
1799 	{0x00000086, 0x00000000},
1800 	{0x00000087, 0x00000100},
1801 	{0x00000088, 0x00020100},
1802 	{0x00000089, 0x00000000},
1803 	{0x0000008b, 0x00040000},
1804 	{0x0000008c, 0x00000100},
1805 	{0x0000008e, 0xff010000},
1806 	{0x00000090, 0xffffefff},
1807 	{0x00000091, 0xfff3efff},
1808 	{0x00000092, 0xfff3efbf},
1809 	{0x00000093, 0xf7ffffff},
1810 	{0x00000094, 0xffffff7f},
1811 	{0x00000095, 0x00000fff},
1812 	{0x00000096, 0x00116fff},
1813 	{0x00000097, 0x60010000},
1814 	{0x00000098, 0x10010000},
1815 	{0x0000009f, 0x00c79000}
1816 };
1817 
1818 
1819 /**
1820  * cik_srbm_select - select specific register instances
1821  *
1822  * @rdev: radeon_device pointer
1823  * @me: selected ME (micro engine)
1824  * @pipe: pipe
1825  * @queue: queue
1826  * @vmid: VMID
1827  *
1828  * Switches the currently active registers instances.  Some
1829  * registers are instanced per VMID, others are instanced per
1830  * me/pipe/queue combination.
1831  */
1832 static void cik_srbm_select(struct radeon_device *rdev,
1833 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1834 {
1835 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1836 			     MEID(me & 0x3) |
1837 			     VMID(vmid & 0xf) |
1838 			     QUEUEID(queue & 0x7));
1839 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1840 }
1841 
1842 /* ucode loading */
1843 /**
1844  * ci_mc_load_microcode - load MC ucode into the hw
1845  *
1846  * @rdev: radeon_device pointer
1847  *
1848  * Load the GDDR MC ucode into the hw (CIK).
1849  * Returns 0 on success, error on failure.
1850  */
1851 int ci_mc_load_microcode(struct radeon_device *rdev)
1852 {
1853 	const __be32 *fw_data = NULL;
1854 	const __le32 *new_fw_data = NULL;
1855 	u32 running, blackout = 0, tmp;
1856 	u32 *io_mc_regs = NULL;
1857 	const __le32 *new_io_mc_regs = NULL;
1858 	int i, regs_size, ucode_size;
1859 
1860 	if (!rdev->mc_fw)
1861 		return -EINVAL;
1862 
1863 	if (rdev->new_fw) {
1864 		const struct mc_firmware_header_v1_0 *hdr =
1865 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1866 
1867 		radeon_ucode_print_mc_hdr(&hdr->header);
1868 
1869 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1870 		new_io_mc_regs = (const __le32 *)
1871 			((const char *)rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1872 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1873 		new_fw_data = (const __le32 *)
1874 			((const char *)rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1875 	} else {
1876 		ucode_size = rdev->mc_fw->datasize / 4;
1877 
1878 		switch (rdev->family) {
1879 		case CHIP_BONAIRE:
1880 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1881 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1882 			break;
1883 		case CHIP_HAWAII:
1884 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1885 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1886 			break;
1887 		default:
1888 			return -EINVAL;
1889 		}
1890 		fw_data = (const __be32 *)rdev->mc_fw->data;
1891 	}
1892 
1893 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1894 
1895 	if (running == 0) {
1896 		if (running) {
1897 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1898 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1899 		}
1900 
1901 		/* reset the engine and set to writable */
1902 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1903 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1904 
1905 		/* load mc io regs */
1906 		for (i = 0; i < regs_size; i++) {
1907 			if (rdev->new_fw) {
1908 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1909 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1910 			} else {
1911 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1912 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1913 			}
1914 		}
1915 
1916 		tmp = RREG32(MC_SEQ_MISC0);
1917 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1918 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1919 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1920 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1921 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1922 		}
1923 
1924 		/* load the MC ucode */
1925 		for (i = 0; i < ucode_size; i++) {
1926 			if (rdev->new_fw)
1927 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1928 			else
1929 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1930 		}
1931 
1932 		/* put the engine back into the active state */
1933 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1934 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1935 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1936 
1937 		/* wait for training to complete */
1938 		for (i = 0; i < rdev->usec_timeout; i++) {
1939 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1940 				break;
1941 			udelay(1);
1942 		}
1943 		for (i = 0; i < rdev->usec_timeout; i++) {
1944 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1945 				break;
1946 			udelay(1);
1947 		}
1948 
1949 		if (running)
1950 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1951 	}
1952 
1953 	return 0;
1954 }
1955 
1956 /**
1957  * cik_init_microcode - load ucode images from disk
1958  *
1959  * @rdev: radeon_device pointer
1960  *
1961  * Use the firmware interface to load the ucode images into
1962  * the driver (not loaded into hw).
1963  * Returns 0 on success, error on failure.
1964  */
1965 static int cik_init_microcode(struct radeon_device *rdev)
1966 {
1967 	const char *chip_name;
1968 	const char *new_chip_name;
1969 	size_t pfp_req_size, me_req_size, ce_req_size,
1970 		mec_req_size, rlc_req_size, mc_req_size = 0,
1971 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1972 	char fw_name[30];
1973 	int new_fw = 0;
1974 	int err;
1975 	int num_fw;
1976 
1977 	DRM_DEBUG("\n");
1978 
1979 	switch (rdev->family) {
1980 	case CHIP_BONAIRE:
1981 		chip_name = "BONAIRE";
1982 		new_chip_name = "bonaire";
1983 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1984 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1985 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1986 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1987 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1988 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1989 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1990 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1991 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1992 		num_fw = 8;
1993 		break;
1994 	case CHIP_HAWAII:
1995 		chip_name = "HAWAII";
1996 		new_chip_name = "hawaii";
1997 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1998 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1999 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2000 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2001 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2002 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2003 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2004 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2005 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2006 		num_fw = 8;
2007 		break;
2008 	case CHIP_KAVERI:
2009 		chip_name = "KAVERI";
2010 		new_chip_name = "kaveri";
2011 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2012 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2013 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2014 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2015 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2016 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2017 		num_fw = 7;
2018 		break;
2019 	case CHIP_KABINI:
2020 		chip_name = "KABINI";
2021 		new_chip_name = "kabini";
2022 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2023 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2024 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2025 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2026 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2027 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2028 		num_fw = 6;
2029 		break;
2030 	case CHIP_MULLINS:
2031 		chip_name = "MULLINS";
2032 		new_chip_name = "mullins";
2033 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2034 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2035 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2036 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2037 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2038 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2039 		num_fw = 6;
2040 		break;
2041 	default: BUG();
2042 	}
2043 
2044 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2045 
2046 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", new_chip_name);
2047 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2048 	if (err) {
2049 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
2050 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2051 		if (err)
2052 			goto out;
2053 		if (rdev->pfp_fw->datasize != pfp_req_size) {
2054 			printk(KERN_ERR
2055 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2056 			       rdev->pfp_fw->datasize, fw_name);
2057 			err = -EINVAL;
2058 			goto out;
2059 		}
2060 	} else {
2061 		err = radeon_ucode_validate(rdev->pfp_fw);
2062 		if (err) {
2063 			printk(KERN_ERR
2064 			       "cik_fw: validation failed for firmware \"%s\"\n",
2065 			       fw_name);
2066 			goto out;
2067 		} else {
2068 			new_fw++;
2069 		}
2070 	}
2071 
2072 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", new_chip_name);
2073 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2074 	if (err) {
2075 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
2076 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2077 		if (err)
2078 			goto out;
2079 		if (rdev->me_fw->datasize != me_req_size) {
2080 			printk(KERN_ERR
2081 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2082 			       rdev->me_fw->datasize, fw_name);
2083 			err = -EINVAL;
2084 		}
2085 	} else {
2086 		err = radeon_ucode_validate(rdev->me_fw);
2087 		if (err) {
2088 			printk(KERN_ERR
2089 			       "cik_fw: validation failed for firmware \"%s\"\n",
2090 			       fw_name);
2091 			goto out;
2092 		} else {
2093 			new_fw++;
2094 		}
2095 	}
2096 
2097 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", new_chip_name);
2098 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2099 	if (err) {
2100 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
2101 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2102 		if (err)
2103 			goto out;
2104 		if (rdev->ce_fw->datasize != ce_req_size) {
2105 			printk(KERN_ERR
2106 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2107 			       rdev->ce_fw->datasize, fw_name);
2108 			err = -EINVAL;
2109 		}
2110 	} else {
2111 		err = radeon_ucode_validate(rdev->ce_fw);
2112 		if (err) {
2113 			printk(KERN_ERR
2114 			       "cik_fw: validation failed for firmware \"%s\"\n",
2115 			       fw_name);
2116 			goto out;
2117 		} else {
2118 			new_fw++;
2119 		}
2120 	}
2121 
2122 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", new_chip_name);
2123 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2124 	if (err) {
2125 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", chip_name);
2126 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2127 		if (err)
2128 			goto out;
2129 		if (rdev->mec_fw->datasize != mec_req_size) {
2130 			printk(KERN_ERR
2131 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2132 			       rdev->mec_fw->datasize, fw_name);
2133 			err = -EINVAL;
2134 		}
2135 	} else {
2136 		err = radeon_ucode_validate(rdev->mec_fw);
2137 		if (err) {
2138 			printk(KERN_ERR
2139 			       "cik_fw: validation failed for firmware \"%s\"\n",
2140 			       fw_name);
2141 			goto out;
2142 		} else {
2143 			new_fw++;
2144 		}
2145 	}
2146 
2147 	if (rdev->family == CHIP_KAVERI) {
2148 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec2", new_chip_name);
2149 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2150 		if (err) {
2151 			goto out;
2152 		} else {
2153 			err = radeon_ucode_validate(rdev->mec2_fw);
2154 			if (err) {
2155 				goto out;
2156 			} else {
2157 				new_fw++;
2158 			}
2159 		}
2160 	}
2161 
2162 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", new_chip_name);
2163 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2164 	if (err) {
2165 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
2166 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2167 		if (err)
2168 			goto out;
2169 		if (rdev->rlc_fw->datasize != rlc_req_size) {
2170 			printk(KERN_ERR
2171 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2172 			       rdev->rlc_fw->datasize, fw_name);
2173 			err = -EINVAL;
2174 		}
2175 	} else {
2176 		err = radeon_ucode_validate(rdev->rlc_fw);
2177 		if (err) {
2178 			printk(KERN_ERR
2179 			       "cik_fw: validation failed for firmware \"%s\"\n",
2180 			       fw_name);
2181 			goto out;
2182 		} else {
2183 			new_fw++;
2184 		}
2185 	}
2186 
2187 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", new_chip_name);
2188 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2189 	if (err) {
2190 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", chip_name);
2191 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2192 		if (err)
2193 			goto out;
2194 		if (rdev->sdma_fw->datasize != sdma_req_size) {
2195 			printk(KERN_ERR
2196 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2197 			       rdev->sdma_fw->datasize, fw_name);
2198 			err = -EINVAL;
2199 		}
2200 	} else {
2201 		err = radeon_ucode_validate(rdev->sdma_fw);
2202 		if (err) {
2203 			printk(KERN_ERR
2204 			       "cik_fw: validation failed for firmware \"%s\"\n",
2205 			       fw_name);
2206 			goto out;
2207 		} else {
2208 			new_fw++;
2209 		}
2210 	}
2211 
2212 	/* No SMC, MC ucode on APUs */
2213 	if (!(rdev->flags & RADEON_IS_IGP)) {
2214 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", new_chip_name);
2215 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2216 		if (err) {
2217 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc2", chip_name);
2218 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2219 			if (err) {
2220 				ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
2221 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2222 				if (err)
2223 					goto out;
2224 			}
2225 			if ((rdev->mc_fw->datasize != mc_req_size) &&
2226 			    (rdev->mc_fw->datasize != mc2_req_size)){
2227 				printk(KERN_ERR
2228 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2229 				       rdev->mc_fw->datasize, fw_name);
2230 				err = -EINVAL;
2231 			}
2232 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->datasize);
2233 		} else {
2234 			err = radeon_ucode_validate(rdev->mc_fw);
2235 			if (err) {
2236 				printk(KERN_ERR
2237 				       "cik_fw: validation failed for firmware \"%s\"\n",
2238 				       fw_name);
2239 				goto out;
2240 			} else {
2241 				new_fw++;
2242 			}
2243 		}
2244 
2245 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", new_chip_name);
2246 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2247 		if (err) {
2248 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
2249 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2250 			if (err) {
2251 				printk(KERN_ERR
2252 				       "smc: error loading firmware \"%s\"\n",
2253 				       fw_name);
2254 				release_firmware(rdev->smc_fw);
2255 				rdev->smc_fw = NULL;
2256 				err = 0;
2257 			} else if (rdev->smc_fw->datasize != smc_req_size) {
2258 				printk(KERN_ERR
2259 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2260 				       rdev->smc_fw->datasize, fw_name);
2261 				err = -EINVAL;
2262 			}
2263 		} else {
2264 			err = radeon_ucode_validate(rdev->smc_fw);
2265 			if (err) {
2266 				printk(KERN_ERR
2267 				       "cik_fw: validation failed for firmware \"%s\"\n",
2268 				       fw_name);
2269 				goto out;
2270 			} else {
2271 				new_fw++;
2272 			}
2273 		}
2274 	}
2275 
2276 	if (new_fw == 0) {
2277 		rdev->new_fw = false;
2278 	} else if (new_fw < num_fw) {
2279 		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2280 		err = -EINVAL;
2281 	} else {
2282 		rdev->new_fw = true;
2283 	}
2284 
2285 out:
2286 	if (err) {
2287 		if (err != -EINVAL)
2288 			printk(KERN_ERR
2289 			       "cik_cp: Failed to load firmware \"%s\"\n",
2290 			       fw_name);
2291 		release_firmware(rdev->pfp_fw);
2292 		rdev->pfp_fw = NULL;
2293 		release_firmware(rdev->me_fw);
2294 		rdev->me_fw = NULL;
2295 		release_firmware(rdev->ce_fw);
2296 		rdev->ce_fw = NULL;
2297 		release_firmware(rdev->mec_fw);
2298 		rdev->mec_fw = NULL;
2299 		release_firmware(rdev->mec2_fw);
2300 		rdev->mec2_fw = NULL;
2301 		release_firmware(rdev->rlc_fw);
2302 		rdev->rlc_fw = NULL;
2303 		release_firmware(rdev->sdma_fw);
2304 		rdev->sdma_fw = NULL;
2305 		release_firmware(rdev->mc_fw);
2306 		rdev->mc_fw = NULL;
2307 		release_firmware(rdev->smc_fw);
2308 		rdev->smc_fw = NULL;
2309 	}
2310 	return err;
2311 }
2312 
2313 /**
2314  * cik_fini_microcode - drop the firmwares image references
2315  *
2316  * @rdev: radeon_device pointer
2317  *
2318  * Drop the pfp, me, mec, mec2, rlc, sdma, mc, smc and ce firmware image references.
2319  * Called at driver shutdown.
2320  */
2321 static void cik_fini_microcode(struct radeon_device *rdev)
2322 {
2323 	release_firmware(rdev->pfp_fw);
2324 	rdev->pfp_fw = NULL;
2325 	release_firmware(rdev->me_fw);
2326 	rdev->me_fw = NULL;
2327 	release_firmware(rdev->ce_fw);
2328 	rdev->ce_fw = NULL;
2329 	release_firmware(rdev->mec_fw);
2330 	rdev->mec_fw = NULL;
2331 	release_firmware(rdev->mec2_fw);
2332 	rdev->mec2_fw = NULL;
2333 	release_firmware(rdev->rlc_fw);
2334 	rdev->rlc_fw = NULL;
2335 	release_firmware(rdev->sdma_fw);
2336 	rdev->sdma_fw = NULL;
2337 	release_firmware(rdev->mc_fw);
2338 	rdev->mc_fw = NULL;
2339 	release_firmware(rdev->smc_fw);
2340 	rdev->smc_fw = NULL;
2341 }
2342 
2343 /*
2344  * Core functions
2345  */
2346 /**
2347  * cik_tiling_mode_table_init - init the hw tiling table
2348  *
2349  * @rdev: radeon_device pointer
2350  *
2351  * Starting with SI, the tiling setup is done globally in a
2352  * set of 32 tiling modes.  Rather than selecting each set of
2353  * parameters per surface as on older asics, we just select
2354  * which index in the tiling table we want to use, and the
2355  * surface uses those parameters (CIK).
2356  */
2357 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2358 {
2359 	u32 *tile = rdev->config.cik.tile_mode_array;
2360 	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2361 	const u32 num_tile_mode_states =
2362 			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2363 	const u32 num_secondary_tile_mode_states =
2364 			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2365 	u32 reg_offset, split_equal_to_row_size;
2366 	u32 num_pipe_configs;
2367 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2368 		rdev->config.cik.max_shader_engines;
2369 
2370 	switch (rdev->config.cik.mem_row_size_in_kb) {
2371 	case 1:
2372 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2373 		break;
2374 	case 2:
2375 	default:
2376 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2377 		break;
2378 	case 4:
2379 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2380 		break;
2381 	}
2382 
2383 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2384 	if (num_pipe_configs > 8)
2385 		num_pipe_configs = 16;
2386 
2387 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2388 		tile[reg_offset] = 0;
2389 	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2390 		macrotile[reg_offset] = 0;
2391 
2392 	switch(num_pipe_configs) {
2393 	case 16:
2394 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2395 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2396 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2397 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2398 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2399 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2400 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2402 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2403 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2404 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2406 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2408 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2410 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2411 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2412 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2413 			   TILE_SPLIT(split_equal_to_row_size));
2414 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2415 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2416 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2417 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2418 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2419 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2421 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2422 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2423 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424 			   TILE_SPLIT(split_equal_to_row_size));
2425 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2426 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2427 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2428 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2429 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2430 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2431 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2432 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2435 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2436 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2437 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2439 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2440 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2441 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2442 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2443 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2444 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2445 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2446 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2447 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2448 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2449 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2450 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2451 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2452 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2453 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2454 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2455 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2457 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2458 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2459 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2460 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2461 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2462 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2463 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2464 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2465 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2466 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2467 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2468 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2469 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2470 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2471 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2472 
2473 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2475 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2476 			   NUM_BANKS(ADDR_SURF_16_BANK));
2477 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2479 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480 			   NUM_BANKS(ADDR_SURF_16_BANK));
2481 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2484 			   NUM_BANKS(ADDR_SURF_16_BANK));
2485 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488 			   NUM_BANKS(ADDR_SURF_16_BANK));
2489 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2492 			   NUM_BANKS(ADDR_SURF_8_BANK));
2493 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2496 			   NUM_BANKS(ADDR_SURF_4_BANK));
2497 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2500 			   NUM_BANKS(ADDR_SURF_2_BANK));
2501 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2503 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2504 			   NUM_BANKS(ADDR_SURF_16_BANK));
2505 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2506 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2507 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2508 			   NUM_BANKS(ADDR_SURF_16_BANK));
2509 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2511 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2512 			    NUM_BANKS(ADDR_SURF_16_BANK));
2513 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2514 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2515 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2516 			    NUM_BANKS(ADDR_SURF_8_BANK));
2517 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2518 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2519 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2520 			    NUM_BANKS(ADDR_SURF_4_BANK));
2521 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2523 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2524 			    NUM_BANKS(ADDR_SURF_2_BANK));
2525 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2526 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2527 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2528 			    NUM_BANKS(ADDR_SURF_2_BANK));
2529 
2530 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2531 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2532 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2533 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2534 		break;
2535 
2536 	case 8:
2537 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2538 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2539 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2540 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2541 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2542 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2543 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2544 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2545 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2547 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2549 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2551 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2553 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2554 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2555 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556 			   TILE_SPLIT(split_equal_to_row_size));
2557 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2558 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2560 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2561 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2562 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2564 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2565 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2566 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567 			   TILE_SPLIT(split_equal_to_row_size));
2568 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2569 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2570 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2571 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2572 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2573 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2574 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2575 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2577 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2578 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2579 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2580 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2581 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2582 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2583 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2585 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2586 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2588 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2589 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2590 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2592 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2593 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2594 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2595 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2596 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2597 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2598 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2599 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2600 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2601 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2602 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2603 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2605 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2606 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2607 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2608 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2609 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2610 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2611 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2612 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2613 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2614 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2615 
2616 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2618 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2619 				NUM_BANKS(ADDR_SURF_16_BANK));
2620 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2622 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2623 				NUM_BANKS(ADDR_SURF_16_BANK));
2624 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2626 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2627 				NUM_BANKS(ADDR_SURF_16_BANK));
2628 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2630 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2631 				NUM_BANKS(ADDR_SURF_16_BANK));
2632 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2633 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2634 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2635 				NUM_BANKS(ADDR_SURF_8_BANK));
2636 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2638 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2639 				NUM_BANKS(ADDR_SURF_4_BANK));
2640 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2642 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2643 				NUM_BANKS(ADDR_SURF_2_BANK));
2644 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2645 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2646 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2647 				NUM_BANKS(ADDR_SURF_16_BANK));
2648 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2649 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2650 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2651 				NUM_BANKS(ADDR_SURF_16_BANK));
2652 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2653 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2654 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2655 				NUM_BANKS(ADDR_SURF_16_BANK));
2656 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2657 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2658 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2659 				NUM_BANKS(ADDR_SURF_16_BANK));
2660 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2662 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2663 				NUM_BANKS(ADDR_SURF_8_BANK));
2664 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2665 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2666 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2667 				NUM_BANKS(ADDR_SURF_4_BANK));
2668 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2669 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2670 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2671 				NUM_BANKS(ADDR_SURF_2_BANK));
2672 
2673 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2674 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2675 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2676 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2677 		break;
2678 
2679 	case 4:
2680 		if (num_rbs == 4) {
2681 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2682 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2683 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2684 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2685 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2687 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2688 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2689 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2690 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2691 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2693 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2695 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2697 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2698 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2699 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700 			   TILE_SPLIT(split_equal_to_row_size));
2701 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2702 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2704 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2705 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2706 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2708 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2709 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2710 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711 			   TILE_SPLIT(split_equal_to_row_size));
2712 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2713 			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
2714 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2715 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2717 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2718 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2719 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2721 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2722 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2723 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2724 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2725 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2726 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2727 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2729 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2730 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2731 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2732 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2733 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2734 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2735 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2736 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2737 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2738 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2739 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2740 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2741 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2742 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2743 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2744 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2745 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2746 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2747 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2748 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2749 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2750 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2751 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2752 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2753 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2754 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2755 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2756 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2757 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2758 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2759 
2760 		} else if (num_rbs < 4) {
2761 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2762 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2763 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2764 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2765 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2766 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2767 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2768 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2769 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2770 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2771 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2772 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2773 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2774 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2775 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2776 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2777 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2778 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2779 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2780 			   TILE_SPLIT(split_equal_to_row_size));
2781 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2782 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2783 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2784 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2785 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2786 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2788 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2789 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2790 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791 			   TILE_SPLIT(split_equal_to_row_size));
2792 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2793 			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
2794 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2795 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2796 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2797 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2798 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2799 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2800 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2801 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2802 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2803 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2804 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2805 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2806 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2807 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2808 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2809 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2810 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2811 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2812 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2813 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2814 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2815 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2816 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2817 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2818 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2819 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2820 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2821 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2822 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2823 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2824 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2825 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2826 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2827 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2828 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2829 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2830 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2831 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2832 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2833 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2834 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2835 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2836 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2837 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2838 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2839 		}
2840 
2841 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2843 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2844 				NUM_BANKS(ADDR_SURF_16_BANK));
2845 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2846 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2847 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2848 				NUM_BANKS(ADDR_SURF_16_BANK));
2849 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2851 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2852 				NUM_BANKS(ADDR_SURF_16_BANK));
2853 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2855 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2856 				NUM_BANKS(ADDR_SURF_16_BANK));
2857 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2859 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2860 				NUM_BANKS(ADDR_SURF_16_BANK));
2861 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2863 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2864 				NUM_BANKS(ADDR_SURF_8_BANK));
2865 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2866 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2867 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2868 				NUM_BANKS(ADDR_SURF_4_BANK));
2869 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2870 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2871 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2872 				NUM_BANKS(ADDR_SURF_16_BANK));
2873 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2874 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2875 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2876 				NUM_BANKS(ADDR_SURF_16_BANK));
2877 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2878 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2879 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2880 				NUM_BANKS(ADDR_SURF_16_BANK));
2881 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2882 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2883 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2884 				NUM_BANKS(ADDR_SURF_16_BANK));
2885 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2886 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2887 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2888 				NUM_BANKS(ADDR_SURF_16_BANK));
2889 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2890 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2891 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2892 				NUM_BANKS(ADDR_SURF_8_BANK));
2893 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2894 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2895 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2896 				NUM_BANKS(ADDR_SURF_4_BANK));
2897 
2898 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2899 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2900 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2901 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2902 		break;
2903 
2904 	case 2:
2905 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2906 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2907 			   PIPE_CONFIG(ADDR_SURF_P2) |
2908 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2909 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2911 			   PIPE_CONFIG(ADDR_SURF_P2) |
2912 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2913 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2914 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2915 			   PIPE_CONFIG(ADDR_SURF_P2) |
2916 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2917 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2919 			   PIPE_CONFIG(ADDR_SURF_P2) |
2920 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2921 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2922 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2923 			   PIPE_CONFIG(ADDR_SURF_P2) |
2924 			   TILE_SPLIT(split_equal_to_row_size));
2925 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2926 			   PIPE_CONFIG(ADDR_SURF_P2) |
2927 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2928 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2929 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2930 			   PIPE_CONFIG(ADDR_SURF_P2) |
2931 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2932 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2933 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2934 			   PIPE_CONFIG(ADDR_SURF_P2) |
2935 			   TILE_SPLIT(split_equal_to_row_size));
2936 		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2937 			   PIPE_CONFIG(ADDR_SURF_P2);
2938 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2939 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2940 			   PIPE_CONFIG(ADDR_SURF_P2));
2941 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2942 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2943 			    PIPE_CONFIG(ADDR_SURF_P2) |
2944 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2945 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2946 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2947 			    PIPE_CONFIG(ADDR_SURF_P2) |
2948 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2949 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2950 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2951 			    PIPE_CONFIG(ADDR_SURF_P2) |
2952 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2953 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2954 			    PIPE_CONFIG(ADDR_SURF_P2) |
2955 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2956 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2957 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2958 			    PIPE_CONFIG(ADDR_SURF_P2) |
2959 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2960 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2961 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2962 			    PIPE_CONFIG(ADDR_SURF_P2) |
2963 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2964 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2965 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2966 			    PIPE_CONFIG(ADDR_SURF_P2) |
2967 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2968 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2969 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2970 			    PIPE_CONFIG(ADDR_SURF_P2));
2971 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2972 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2973 			    PIPE_CONFIG(ADDR_SURF_P2) |
2974 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2975 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2976 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2977 			    PIPE_CONFIG(ADDR_SURF_P2) |
2978 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2979 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2980 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2981 			    PIPE_CONFIG(ADDR_SURF_P2) |
2982 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2983 
2984 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2985 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2986 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2987 				NUM_BANKS(ADDR_SURF_16_BANK));
2988 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2989 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2990 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2991 				NUM_BANKS(ADDR_SURF_16_BANK));
2992 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2993 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2994 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2995 				NUM_BANKS(ADDR_SURF_16_BANK));
2996 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2997 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2998 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2999 				NUM_BANKS(ADDR_SURF_16_BANK));
3000 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3001 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3002 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003 				NUM_BANKS(ADDR_SURF_16_BANK));
3004 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3006 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007 				NUM_BANKS(ADDR_SURF_16_BANK));
3008 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3010 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3011 				NUM_BANKS(ADDR_SURF_8_BANK));
3012 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3013 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3014 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3015 				NUM_BANKS(ADDR_SURF_16_BANK));
3016 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3017 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3018 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3019 				NUM_BANKS(ADDR_SURF_16_BANK));
3020 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3021 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3022 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3023 				NUM_BANKS(ADDR_SURF_16_BANK));
3024 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3025 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3026 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3027 				NUM_BANKS(ADDR_SURF_16_BANK));
3028 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3029 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3030 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3031 				NUM_BANKS(ADDR_SURF_16_BANK));
3032 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3033 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3034 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3035 				NUM_BANKS(ADDR_SURF_16_BANK));
3036 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3037 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3038 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3039 				NUM_BANKS(ADDR_SURF_8_BANK));
3040 
3041 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3042 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3043 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3044 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3045 		break;
3046 
3047 	default:
3048 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3049 	}
3050 }
3051 
3052 /**
3053  * cik_select_se_sh - select which SE, SH to address
3054  *
3055  * @rdev: radeon_device pointer
3056  * @se_num: shader engine to address
3057  * @sh_num: sh block to address
3058  *
3059  * Select which SE, SH combinations to address. Certain
3060  * registers are instanced per SE or SH.  0xffffffff means
3061  * broadcast to all SEs or SHs (CIK).
3062  */
3063 static void cik_select_se_sh(struct radeon_device *rdev,
3064 			     u32 se_num, u32 sh_num)
3065 {
3066 	u32 data = INSTANCE_BROADCAST_WRITES;
3067 
3068 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3069 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3070 	else if (se_num == 0xffffffff)
3071 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3072 	else if (sh_num == 0xffffffff)
3073 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3074 	else
3075 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3076 	WREG32(GRBM_GFX_INDEX, data);
3077 }
3078 
3079 /**
3080  * cik_create_bitmask - create a bitmask
3081  *
3082  * @bit_width: length of the mask
3083  *
3084  * create a variable length bit mask (CIK).
3085  * Returns the bitmask.
3086  */
3087 static u32 cik_create_bitmask(u32 bit_width)
3088 {
3089 	u32 i, mask = 0;
3090 
3091 	for (i = 0; i < bit_width; i++) {
3092 		mask <<= 1;
3093 		mask |= 1;
3094 	}
3095 	return mask;
3096 }
3097 
3098 /**
3099  * cik_get_rb_disabled - computes the mask of disabled RBs
3100  *
3101  * @rdev: radeon_device pointer
3102  * @max_rb_num: max RBs (render backends) for the asic
3103  * @se_num: number of SEs (shader engines) for the asic
3104  * @sh_per_se: number of SH blocks per SE for the asic
3105  *
3106  * Calculates the bitmask of disabled RBs (CIK).
3107  * Returns the disabled RB bitmask.
3108  */
3109 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3110 			      u32 max_rb_num_per_se,
3111 			      u32 sh_per_se)
3112 {
3113 	u32 data, mask;
3114 
3115 	data = RREG32(CC_RB_BACKEND_DISABLE);
3116 	if (data & 1)
3117 		data &= BACKEND_DISABLE_MASK;
3118 	else
3119 		data = 0;
3120 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3121 
3122 	data >>= BACKEND_DISABLE_SHIFT;
3123 
3124 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3125 
3126 	return data & mask;
3127 }
3128 
3129 /**
3130  * cik_setup_rb - setup the RBs on the asic
3131  *
3132  * @rdev: radeon_device pointer
3133  * @se_num: number of SEs (shader engines) for the asic
3134  * @sh_per_se: number of SH blocks per SE for the asic
3135  * @max_rb_num: max RBs (render backends) for the asic
3136  *
3137  * Configures per-SE/SH RB registers (CIK).
3138  */
3139 static void cik_setup_rb(struct radeon_device *rdev,
3140 			 u32 se_num, u32 sh_per_se,
3141 			 u32 max_rb_num_per_se)
3142 {
3143 	int i, j;
3144 	u32 data, mask;
3145 	u32 disabled_rbs = 0;
3146 	u32 enabled_rbs = 0;
3147 
3148 	mutex_lock(&rdev->grbm_idx_mutex);
3149 	for (i = 0; i < se_num; i++) {
3150 		for (j = 0; j < sh_per_se; j++) {
3151 			cik_select_se_sh(rdev, i, j);
3152 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3153 			if (rdev->family == CHIP_HAWAII)
3154 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3155 			else
3156 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3157 		}
3158 	}
3159 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3160 	mutex_unlock(&rdev->grbm_idx_mutex);
3161 
3162 	mask = 1;
3163 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3164 		if (!(disabled_rbs & mask))
3165 			enabled_rbs |= mask;
3166 		mask <<= 1;
3167 	}
3168 
3169 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3170 
3171 	mutex_lock(&rdev->grbm_idx_mutex);
3172 	for (i = 0; i < se_num; i++) {
3173 		cik_select_se_sh(rdev, i, 0xffffffff);
3174 		data = 0;
3175 		for (j = 0; j < sh_per_se; j++) {
3176 			switch (enabled_rbs & 3) {
3177 			case 0:
3178 				if (j == 0)
3179 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3180 				else
3181 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3182 				break;
3183 			case 1:
3184 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3185 				break;
3186 			case 2:
3187 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3188 				break;
3189 			case 3:
3190 			default:
3191 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3192 				break;
3193 			}
3194 			enabled_rbs >>= 2;
3195 		}
3196 		WREG32(PA_SC_RASTER_CONFIG, data);
3197 	}
3198 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3199 	mutex_unlock(&rdev->grbm_idx_mutex);
3200 }
3201 
3202 /**
3203  * cik_gpu_init - setup the 3D engine
3204  *
3205  * @rdev: radeon_device pointer
3206  *
3207  * Configures the 3D engine and tiling configuration
3208  * registers so that the 3D engine is usable.
3209  */
3210 static void cik_gpu_init(struct radeon_device *rdev)
3211 {
3212 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3213 	u32 mc_shared_chmap, mc_arb_ramcfg;
3214 	u32 hdp_host_path_cntl;
3215 	u32 tmp;
3216 	int i, j;
3217 
3218 	switch (rdev->family) {
3219 	case CHIP_BONAIRE:
3220 		rdev->config.cik.max_shader_engines = 2;
3221 		rdev->config.cik.max_tile_pipes = 4;
3222 		rdev->config.cik.max_cu_per_sh = 7;
3223 		rdev->config.cik.max_sh_per_se = 1;
3224 		rdev->config.cik.max_backends_per_se = 2;
3225 		rdev->config.cik.max_texture_channel_caches = 4;
3226 		rdev->config.cik.max_gprs = 256;
3227 		rdev->config.cik.max_gs_threads = 32;
3228 		rdev->config.cik.max_hw_contexts = 8;
3229 
3230 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3231 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3232 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3233 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3234 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3235 		break;
3236 	case CHIP_HAWAII:
3237 		rdev->config.cik.max_shader_engines = 4;
3238 		rdev->config.cik.max_tile_pipes = 16;
3239 		rdev->config.cik.max_cu_per_sh = 11;
3240 		rdev->config.cik.max_sh_per_se = 1;
3241 		rdev->config.cik.max_backends_per_se = 4;
3242 		rdev->config.cik.max_texture_channel_caches = 16;
3243 		rdev->config.cik.max_gprs = 256;
3244 		rdev->config.cik.max_gs_threads = 32;
3245 		rdev->config.cik.max_hw_contexts = 8;
3246 
3247 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3248 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3249 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3250 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3251 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3252 		break;
3253 	case CHIP_KAVERI:
3254 		rdev->config.cik.max_shader_engines = 1;
3255 		rdev->config.cik.max_tile_pipes = 4;
3256 		if ((rdev->pdev->device == 0x1304) ||
3257 		    (rdev->pdev->device == 0x1305) ||
3258 		    (rdev->pdev->device == 0x130C) ||
3259 		    (rdev->pdev->device == 0x130F) ||
3260 		    (rdev->pdev->device == 0x1310) ||
3261 		    (rdev->pdev->device == 0x1311) ||
3262 		    (rdev->pdev->device == 0x131C)) {
3263 			rdev->config.cik.max_cu_per_sh = 8;
3264 			rdev->config.cik.max_backends_per_se = 2;
3265 		} else if ((rdev->pdev->device == 0x1309) ||
3266 			   (rdev->pdev->device == 0x130A) ||
3267 			   (rdev->pdev->device == 0x130D) ||
3268 			   (rdev->pdev->device == 0x1313) ||
3269 			   (rdev->pdev->device == 0x131D)) {
3270 			rdev->config.cik.max_cu_per_sh = 6;
3271 			rdev->config.cik.max_backends_per_se = 2;
3272 		} else if ((rdev->pdev->device == 0x1306) ||
3273 			   (rdev->pdev->device == 0x1307) ||
3274 			   (rdev->pdev->device == 0x130B) ||
3275 			   (rdev->pdev->device == 0x130E) ||
3276 			   (rdev->pdev->device == 0x1315) ||
3277 			   (rdev->pdev->device == 0x1318) ||
3278 			   (rdev->pdev->device == 0x131B)) {
3279 			rdev->config.cik.max_cu_per_sh = 4;
3280 			rdev->config.cik.max_backends_per_se = 1;
3281 		} else {
3282 			rdev->config.cik.max_cu_per_sh = 3;
3283 			rdev->config.cik.max_backends_per_se = 1;
3284 		}
3285 		rdev->config.cik.max_sh_per_se = 1;
3286 		rdev->config.cik.max_texture_channel_caches = 4;
3287 		rdev->config.cik.max_gprs = 256;
3288 		rdev->config.cik.max_gs_threads = 16;
3289 		rdev->config.cik.max_hw_contexts = 8;
3290 
3291 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3292 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3293 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3294 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3295 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3296 		break;
3297 	case CHIP_KABINI:
3298 	case CHIP_MULLINS:
3299 	default:
3300 		rdev->config.cik.max_shader_engines = 1;
3301 		rdev->config.cik.max_tile_pipes = 2;
3302 		rdev->config.cik.max_cu_per_sh = 2;
3303 		rdev->config.cik.max_sh_per_se = 1;
3304 		rdev->config.cik.max_backends_per_se = 1;
3305 		rdev->config.cik.max_texture_channel_caches = 2;
3306 		rdev->config.cik.max_gprs = 256;
3307 		rdev->config.cik.max_gs_threads = 16;
3308 		rdev->config.cik.max_hw_contexts = 8;
3309 
3310 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3311 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3312 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3313 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3314 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3315 		break;
3316 	}
3317 
3318 	/* Initialize HDP */
3319 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3320 		WREG32((0x2c14 + j), 0x00000000);
3321 		WREG32((0x2c18 + j), 0x00000000);
3322 		WREG32((0x2c1c + j), 0x00000000);
3323 		WREG32((0x2c20 + j), 0x00000000);
3324 		WREG32((0x2c24 + j), 0x00000000);
3325 	}
3326 
3327 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3328 	WREG32(SRBM_INT_CNTL, 0x1);
3329 	WREG32(SRBM_INT_ACK, 0x1);
3330 
3331 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3332 
3333 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3334 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3335 
3336 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3337 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3338 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3339 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3340 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3341 		rdev->config.cik.mem_row_size_in_kb = 4;
3342 	/* XXX use MC settings? */
3343 	rdev->config.cik.shader_engine_tile_size = 32;
3344 	rdev->config.cik.num_gpus = 1;
3345 	rdev->config.cik.multi_gpu_tile_size = 64;
3346 
3347 	/* fix up row size */
3348 	gb_addr_config &= ~ROW_SIZE_MASK;
3349 	switch (rdev->config.cik.mem_row_size_in_kb) {
3350 	case 1:
3351 	default:
3352 		gb_addr_config |= ROW_SIZE(0);
3353 		break;
3354 	case 2:
3355 		gb_addr_config |= ROW_SIZE(1);
3356 		break;
3357 	case 4:
3358 		gb_addr_config |= ROW_SIZE(2);
3359 		break;
3360 	}
3361 
3362 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3363 	 * not have bank info, so create a custom tiling dword.
3364 	 * bits 3:0   num_pipes
3365 	 * bits 7:4   num_banks
3366 	 * bits 11:8  group_size
3367 	 * bits 15:12 row_size
3368 	 */
3369 	rdev->config.cik.tile_config = 0;
3370 	switch (rdev->config.cik.num_tile_pipes) {
3371 	case 1:
3372 		rdev->config.cik.tile_config |= (0 << 0);
3373 		break;
3374 	case 2:
3375 		rdev->config.cik.tile_config |= (1 << 0);
3376 		break;
3377 	case 4:
3378 		rdev->config.cik.tile_config |= (2 << 0);
3379 		break;
3380 	case 8:
3381 	default:
3382 		/* XXX what about 12? */
3383 		rdev->config.cik.tile_config |= (3 << 0);
3384 		break;
3385 	}
3386 	rdev->config.cik.tile_config |=
3387 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3388 	rdev->config.cik.tile_config |=
3389 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3390 	rdev->config.cik.tile_config |=
3391 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3392 
3393 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3394 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3395 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3396 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3397 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3398 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3399 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3400 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3401 
3402 	cik_tiling_mode_table_init(rdev);
3403 
3404 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3405 		     rdev->config.cik.max_sh_per_se,
3406 		     rdev->config.cik.max_backends_per_se);
3407 
3408 	rdev->config.cik.active_cus = 0;
3409 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3410 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3411 			rdev->config.cik.active_cus +=
3412 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3413 		}
3414 	}
3415 
3416 	/* set HW defaults for 3D engine */
3417 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3418 
3419 	mutex_lock(&rdev->grbm_idx_mutex);
3420 	/*
3421 	 * making sure that the following register writes will be broadcasted
3422 	 * to all the shaders
3423 	 */
3424 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3425 	WREG32(SX_DEBUG_1, 0x20);
3426 
3427 	WREG32(TA_CNTL_AUX, 0x00010000);
3428 
3429 	tmp = RREG32(SPI_CONFIG_CNTL);
3430 	tmp |= 0x03000000;
3431 	WREG32(SPI_CONFIG_CNTL, tmp);
3432 
3433 	WREG32(SQ_CONFIG, 1);
3434 
3435 	WREG32(DB_DEBUG, 0);
3436 
3437 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3438 	tmp |= 0x00000400;
3439 	WREG32(DB_DEBUG2, tmp);
3440 
3441 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3442 	tmp |= 0x00020200;
3443 	WREG32(DB_DEBUG3, tmp);
3444 
3445 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3446 	tmp |= 0x00018208;
3447 	WREG32(CB_HW_CONTROL, tmp);
3448 
3449 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3450 
3451 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3452 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3453 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3454 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3455 
3456 	WREG32(VGT_NUM_INSTANCES, 1);
3457 
3458 	WREG32(CP_PERFMON_CNTL, 0);
3459 
3460 	WREG32(SQ_CONFIG, 0);
3461 
3462 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3463 					  FORCE_EOV_MAX_REZ_CNT(255)));
3464 
3465 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3466 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3467 
3468 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3469 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3470 
3471 	tmp = RREG32(HDP_MISC_CNTL);
3472 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3473 	WREG32(HDP_MISC_CNTL, tmp);
3474 
3475 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3476 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3477 
3478 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3479 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3480 	mutex_unlock(&rdev->grbm_idx_mutex);
3481 
3482 	udelay(50);
3483 }
3484 
3485 /*
3486  * GPU scratch registers helpers function.
3487  */
3488 /**
3489  * cik_scratch_init - setup driver info for CP scratch regs
3490  *
3491  * @rdev: radeon_device pointer
3492  *
3493  * Set up the number and offset of the CP scratch registers.
3494  * NOTE: use of CP scratch registers is a legacy inferface and
3495  * is not used by default on newer asics (r6xx+).  On newer asics,
3496  * memory buffers are used for fences rather than scratch regs.
3497  */
3498 static void cik_scratch_init(struct radeon_device *rdev)
3499 {
3500 	int i;
3501 
3502 	rdev->scratch.num_reg = 7;
3503 	rdev->scratch.reg_base = SCRATCH_REG0;
3504 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3505 		rdev->scratch.free[i] = true;
3506 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3507 	}
3508 }
3509 
3510 /**
3511  * cik_ring_test - basic gfx ring test
3512  *
3513  * @rdev: radeon_device pointer
3514  * @ring: radeon_ring structure holding ring information
3515  *
3516  * Allocate a scratch register and write to it using the gfx ring (CIK).
3517  * Provides a basic gfx ring test to verify that the ring is working.
3518  * Used by cik_cp_gfx_resume();
3519  * Returns 0 on success, error on failure.
3520  */
3521 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3522 {
3523 	uint32_t scratch;
3524 	uint32_t tmp = 0;
3525 	unsigned i;
3526 	int r;
3527 
3528 	r = radeon_scratch_get(rdev, &scratch);
3529 	if (r) {
3530 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3531 		return r;
3532 	}
3533 	WREG32(scratch, 0xCAFEDEAD);
3534 	r = radeon_ring_lock(rdev, ring, 3);
3535 	if (r) {
3536 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3537 		radeon_scratch_free(rdev, scratch);
3538 		return r;
3539 	}
3540 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3541 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3542 	radeon_ring_write(ring, 0xDEADBEEF);
3543 	radeon_ring_unlock_commit(rdev, ring, false);
3544 
3545 	for (i = 0; i < rdev->usec_timeout; i++) {
3546 		tmp = RREG32(scratch);
3547 		if (tmp == 0xDEADBEEF)
3548 			break;
3549 		DRM_UDELAY(1);
3550 	}
3551 	if (i < rdev->usec_timeout) {
3552 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3553 	} else {
3554 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3555 			  ring->idx, scratch, tmp);
3556 		r = -EINVAL;
3557 	}
3558 	radeon_scratch_free(rdev, scratch);
3559 	return r;
3560 }
3561 
3562 /**
3563  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3564  *
3565  * @rdev: radeon_device pointer
3566  * @ridx: radeon ring index
3567  *
3568  * Emits an hdp flush on the cp.
3569  */
3570 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3571 				       int ridx)
3572 {
3573 	struct radeon_ring *ring = &rdev->ring[ridx];
3574 	u32 ref_and_mask;
3575 
3576 	switch (ring->idx) {
3577 	case CAYMAN_RING_TYPE_CP1_INDEX:
3578 	case CAYMAN_RING_TYPE_CP2_INDEX:
3579 	default:
3580 		switch (ring->me) {
3581 		case 0:
3582 			ref_and_mask = CP2 << ring->pipe;
3583 			break;
3584 		case 1:
3585 			ref_and_mask = CP6 << ring->pipe;
3586 			break;
3587 		default:
3588 			return;
3589 		}
3590 		break;
3591 	case RADEON_RING_TYPE_GFX_INDEX:
3592 		ref_and_mask = CP0;
3593 		break;
3594 	}
3595 
3596 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3597 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3598 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3599 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3600 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3601 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3602 	radeon_ring_write(ring, ref_and_mask);
3603 	radeon_ring_write(ring, ref_and_mask);
3604 	radeon_ring_write(ring, 0x20); /* poll interval */
3605 }
3606 
3607 /**
3608  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3609  *
3610  * @rdev: radeon_device pointer
3611  * @fence: radeon fence object
3612  *
3613  * Emits a fence sequnce number on the gfx ring and flushes
3614  * GPU caches.
3615  */
3616 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3617 			     struct radeon_fence *fence)
3618 {
3619 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3620 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3621 
3622 	/* Workaround for cache flush problems. First send a dummy EOP
3623 	 * event down the pipe with seq one below.
3624 	 */
3625 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3626 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3627 				 EOP_TC_ACTION_EN |
3628 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3629 				 EVENT_INDEX(5)));
3630 	radeon_ring_write(ring, addr & 0xfffffffc);
3631 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3632 				DATA_SEL(1) | INT_SEL(0));
3633 	radeon_ring_write(ring, fence->seq - 1);
3634 	radeon_ring_write(ring, 0);
3635 
3636 	/* Then send the real EOP event down the pipe. */
3637 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3638 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3639 				 EOP_TC_ACTION_EN |
3640 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3641 				 EVENT_INDEX(5)));
3642 	radeon_ring_write(ring, addr & 0xfffffffc);
3643 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3644 	radeon_ring_write(ring, fence->seq);
3645 	radeon_ring_write(ring, 0);
3646 }
3647 
3648 /**
3649  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3650  *
3651  * @rdev: radeon_device pointer
3652  * @fence: radeon fence object
3653  *
3654  * Emits a fence sequnce number on the compute ring and flushes
3655  * GPU caches.
3656  */
3657 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3658 				 struct radeon_fence *fence)
3659 {
3660 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3661 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3662 
3663 	/* RELEASE_MEM - flush caches, send int */
3664 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3665 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3666 				 EOP_TC_ACTION_EN |
3667 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3668 				 EVENT_INDEX(5)));
3669 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3670 	radeon_ring_write(ring, addr & 0xfffffffc);
3671 	radeon_ring_write(ring, upper_32_bits(addr));
3672 	radeon_ring_write(ring, fence->seq);
3673 	radeon_ring_write(ring, 0);
3674 }
3675 
3676 /**
3677  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3678  *
3679  * @rdev: radeon_device pointer
3680  * @ring: radeon ring buffer object
3681  * @semaphore: radeon semaphore object
3682  * @emit_wait: Is this a sempahore wait?
3683  *
3684  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3685  * from running ahead of semaphore waits.
3686  */
3687 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3688 			     struct radeon_ring *ring,
3689 			     struct radeon_semaphore *semaphore,
3690 			     bool emit_wait)
3691 {
3692 	uint64_t addr = semaphore->gpu_addr;
3693 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3694 
3695 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3696 	radeon_ring_write(ring, lower_32_bits(addr));
3697 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3698 
3699 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3700 		/* Prevent the PFP from running ahead of the semaphore wait */
3701 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3702 		radeon_ring_write(ring, 0x0);
3703 	}
3704 
3705 	return true;
3706 }
3707 
3708 /**
3709  * cik_copy_cpdma - copy pages using the CP DMA engine
3710  *
3711  * @rdev: radeon_device pointer
3712  * @src_offset: src GPU address
3713  * @dst_offset: dst GPU address
3714  * @num_gpu_pages: number of GPU pages to xfer
3715  * @fence: radeon fence object
3716  *
3717  * Copy GPU paging using the CP DMA engine (CIK+).
3718  * Used by the radeon ttm implementation to move pages if
3719  * registered as the asic copy callback.
3720  */
3721 int cik_copy_cpdma(struct radeon_device *rdev,
3722 		   uint64_t src_offset, uint64_t dst_offset,
3723 		   unsigned num_gpu_pages,
3724 		   struct radeon_fence **fence)
3725 {
3726 	struct radeon_semaphore *sem = NULL;
3727 	int ring_index = rdev->asic->copy.blit_ring_index;
3728 	struct radeon_ring *ring = &rdev->ring[ring_index];
3729 	u32 size_in_bytes, cur_size_in_bytes, control;
3730 	int i, num_loops;
3731 	int r = 0;
3732 
3733 	r = radeon_semaphore_create(rdev, &sem);
3734 	if (r) {
3735 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3736 		return r;
3737 	}
3738 
3739 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3740 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3741 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3742 	if (r) {
3743 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3744 		radeon_semaphore_free(rdev, &sem, NULL);
3745 		return r;
3746 	}
3747 
3748 	radeon_semaphore_sync_to(sem, *fence);
3749 	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3750 
3751 	for (i = 0; i < num_loops; i++) {
3752 		cur_size_in_bytes = size_in_bytes;
3753 		if (cur_size_in_bytes > 0x1fffff)
3754 			cur_size_in_bytes = 0x1fffff;
3755 		size_in_bytes -= cur_size_in_bytes;
3756 		control = 0;
3757 		if (size_in_bytes == 0)
3758 			control |= PACKET3_DMA_DATA_CP_SYNC;
3759 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3760 		radeon_ring_write(ring, control);
3761 		radeon_ring_write(ring, lower_32_bits(src_offset));
3762 		radeon_ring_write(ring, upper_32_bits(src_offset));
3763 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3764 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3765 		radeon_ring_write(ring, cur_size_in_bytes);
3766 		src_offset += cur_size_in_bytes;
3767 		dst_offset += cur_size_in_bytes;
3768 	}
3769 
3770 	r = radeon_fence_emit(rdev, fence, ring->idx);
3771 	if (r) {
3772 		radeon_ring_unlock_undo(rdev, ring);
3773 		radeon_semaphore_free(rdev, &sem, NULL);
3774 		return r;
3775 	}
3776 
3777 	radeon_ring_unlock_commit(rdev, ring, false);
3778 	radeon_semaphore_free(rdev, &sem, *fence);
3779 
3780 	return r;
3781 }
3782 
3783 /*
3784  * IB stuff
3785  */
3786 /**
3787  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3788  *
3789  * @rdev: radeon_device pointer
3790  * @ib: radeon indirect buffer object
3791  *
3792  * Emits a DE (drawing engine) or CE (constant engine) IB
3793  * on the gfx ring.  IBs are usually generated by userspace
3794  * acceleration drivers and submitted to the kernel for
3795  * scheduling on the ring.  This function schedules the IB
3796  * on the gfx ring for execution by the GPU.
3797  */
3798 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3799 {
3800 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3801 	u32 header, control = INDIRECT_BUFFER_VALID;
3802 
3803 	if (ib->is_const_ib) {
3804 		/* set switch buffer packet before const IB */
3805 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3806 		radeon_ring_write(ring, 0);
3807 
3808 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3809 	} else {
3810 		u32 next_rptr;
3811 		if (ring->rptr_save_reg) {
3812 			next_rptr = ring->wptr + 3 + 4;
3813 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3814 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3815 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3816 			radeon_ring_write(ring, next_rptr);
3817 		} else if (rdev->wb.enabled) {
3818 			next_rptr = ring->wptr + 5 + 4;
3819 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3820 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3821 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3822 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3823 			radeon_ring_write(ring, next_rptr);
3824 		}
3825 
3826 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3827 	}
3828 
3829 	control |= ib->length_dw |
3830 		(ib->vm ? (ib->vm->id << 24) : 0);
3831 
3832 	radeon_ring_write(ring, header);
3833 	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3834 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3835 	radeon_ring_write(ring, control);
3836 }
3837 
3838 /**
3839  * cik_ib_test - basic gfx ring IB test
3840  *
3841  * @rdev: radeon_device pointer
3842  * @ring: radeon_ring structure holding ring information
3843  *
3844  * Allocate an IB and execute it on the gfx ring (CIK).
3845  * Provides a basic gfx ring test to verify that IBs are working.
3846  * Returns 0 on success, error on failure.
3847  */
3848 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3849 {
3850 	struct radeon_ib ib;
3851 	uint32_t scratch;
3852 	uint32_t tmp = 0;
3853 	unsigned i;
3854 	int r;
3855 
3856 	r = radeon_scratch_get(rdev, &scratch);
3857 	if (r) {
3858 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3859 		return r;
3860 	}
3861 	WREG32(scratch, 0xCAFEDEAD);
3862 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3863 	if (r) {
3864 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3865 		radeon_scratch_free(rdev, scratch);
3866 		return r;
3867 	}
3868 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3869 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3870 	ib.ptr[2] = 0xDEADBEEF;
3871 	ib.length_dw = 3;
3872 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3873 	if (r) {
3874 		radeon_scratch_free(rdev, scratch);
3875 		radeon_ib_free(rdev, &ib);
3876 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3877 		return r;
3878 	}
3879 	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3880 		RADEON_USEC_IB_TEST_TIMEOUT));
3881 	if (r < 0) {
3882 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3883 		radeon_scratch_free(rdev, scratch);
3884 		radeon_ib_free(rdev, &ib);
3885 		return r;
3886 	} else if (r == 0) {
3887 		DRM_ERROR("radeon: fence wait timed out.\n");
3888 #if 0
3889 		radeon_scratch_free(rdev, scratch);
3890 		radeon_ib_free(rdev, &ib);
3891 		return -ETIMEDOUT;
3892 #endif
3893  	}
3894 	r = 0;
3895 	for (i = 0; i < rdev->usec_timeout; i++) {
3896 		tmp = RREG32(scratch);
3897 		if (tmp == 0xDEADBEEF)
3898 			break;
3899 		DRM_UDELAY(1);
3900 	}
3901 	if (i < rdev->usec_timeout) {
3902 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3903 	} else {
3904 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3905 			  scratch, tmp);
3906 		r = -EINVAL;
3907 	}
3908 	radeon_scratch_free(rdev, scratch);
3909 	radeon_ib_free(rdev, &ib);
3910 	return r;
3911 }
3912 
3913 /*
3914  * CP.
3915  * On CIK, gfx and compute now have independant command processors.
3916  *
3917  * GFX
3918  * Gfx consists of a single ring and can process both gfx jobs and
3919  * compute jobs.  The gfx CP consists of three microengines (ME):
3920  * PFP - Pre-Fetch Parser
3921  * ME - Micro Engine
3922  * CE - Constant Engine
3923  * The PFP and ME make up what is considered the Drawing Engine (DE).
3924  * The CE is an asynchronous engine used for updating buffer desciptors
3925  * used by the DE so that they can be loaded into cache in parallel
3926  * while the DE is processing state update packets.
3927  *
3928  * Compute
3929  * The compute CP consists of two microengines (ME):
3930  * MEC1 - Compute MicroEngine 1
3931  * MEC2 - Compute MicroEngine 2
3932  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3933  * The queues are exposed to userspace and are programmed directly
3934  * by the compute runtime.
3935  */
3936 /**
3937  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3938  *
3939  * @rdev: radeon_device pointer
3940  * @enable: enable or disable the MEs
3941  *
3942  * Halts or unhalts the gfx MEs.
3943  */
3944 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3945 {
3946 	if (enable)
3947 		WREG32(CP_ME_CNTL, 0);
3948 	else {
3949 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3950 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3951 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3952 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3953 	}
3954 	udelay(50);
3955 }
3956 
3957 /**
3958  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3959  *
3960  * @rdev: radeon_device pointer
3961  *
3962  * Loads the gfx PFP, ME, and CE ucode.
3963  * Returns 0 for success, -EINVAL if the ucode is not available.
3964  */
3965 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3966 {
3967 	int i;
3968 
3969 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3970 		return -EINVAL;
3971 
3972 	cik_cp_gfx_enable(rdev, false);
3973 
3974 	if (rdev->new_fw) {
3975 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3976 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3977 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3978 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3979 		const struct gfx_firmware_header_v1_0 *me_hdr =
3980 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3981 		const __le32 *fw_data;
3982 		u32 fw_size;
3983 
3984 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3985 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3986 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3987 
3988 		/* PFP */
3989 		fw_data = (const __le32 *)
3990 			((const char *)rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3991 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3992 		WREG32(CP_PFP_UCODE_ADDR, 0);
3993 		for (i = 0; i < fw_size; i++)
3994 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3995 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3996 
3997 		/* CE */
3998 		fw_data = (const __le32 *)
3999 			((const char *)rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4000 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4001 		WREG32(CP_CE_UCODE_ADDR, 0);
4002 		for (i = 0; i < fw_size; i++)
4003 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4004 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4005 
4006 		/* ME */
4007 		fw_data = (const __be32 *)
4008 			((const char *)rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4009 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4010 		WREG32(CP_ME_RAM_WADDR, 0);
4011 		for (i = 0; i < fw_size; i++)
4012 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4013 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4014 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4015 	} else {
4016 		const __be32 *fw_data;
4017 
4018 		/* PFP */
4019 		fw_data = (const __be32 *)rdev->pfp_fw->data;
4020 		WREG32(CP_PFP_UCODE_ADDR, 0);
4021 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4022 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4023 		WREG32(CP_PFP_UCODE_ADDR, 0);
4024 
4025 		/* CE */
4026 		fw_data = (const __be32 *)rdev->ce_fw->data;
4027 		WREG32(CP_CE_UCODE_ADDR, 0);
4028 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4029 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4030 		WREG32(CP_CE_UCODE_ADDR, 0);
4031 
4032 		/* ME */
4033 		fw_data = (const __be32 *)rdev->me_fw->data;
4034 		WREG32(CP_ME_RAM_WADDR, 0);
4035 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4036 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4037 		WREG32(CP_ME_RAM_WADDR, 0);
4038 	}
4039 
4040 	return 0;
4041 }
4042 
4043 /**
4044  * cik_cp_gfx_start - start the gfx ring
4045  *
4046  * @rdev: radeon_device pointer
4047  *
4048  * Enables the ring and loads the clear state context and other
4049  * packets required to init the ring.
4050  * Returns 0 for success, error for failure.
4051  */
4052 static int cik_cp_gfx_start(struct radeon_device *rdev)
4053 {
4054 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4055 	int r, i;
4056 
4057 	/* init the CP */
4058 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4059 	WREG32(CP_ENDIAN_SWAP, 0);
4060 	WREG32(CP_DEVICE_ID, 1);
4061 
4062 	cik_cp_gfx_enable(rdev, true);
4063 
4064 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4065 	if (r) {
4066 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4067 		return r;
4068 	}
4069 
4070 	/* init the CE partitions.  CE only used for gfx on CIK */
4071 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4072 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4073 	radeon_ring_write(ring, 0x8000);
4074 	radeon_ring_write(ring, 0x8000);
4075 
4076 	/* setup clear context state */
4077 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4078 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4079 
4080 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4081 	radeon_ring_write(ring, 0x80000000);
4082 	radeon_ring_write(ring, 0x80000000);
4083 
4084 	for (i = 0; i < cik_default_size; i++)
4085 		radeon_ring_write(ring, cik_default_state[i]);
4086 
4087 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4088 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4089 
4090 	/* set clear context state */
4091 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4092 	radeon_ring_write(ring, 0);
4093 
4094 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4095 	radeon_ring_write(ring, 0x00000316);
4096 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4097 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4098 
4099 	radeon_ring_unlock_commit(rdev, ring, false);
4100 
4101 	return 0;
4102 }
4103 
4104 /**
4105  * cik_cp_gfx_fini - stop the gfx ring
4106  *
4107  * @rdev: radeon_device pointer
4108  *
4109  * Stop the gfx ring and tear down the driver ring
4110  * info.
4111  */
4112 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4113 {
4114 	cik_cp_gfx_enable(rdev, false);
4115 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4116 }
4117 
4118 /**
4119  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4120  *
4121  * @rdev: radeon_device pointer
4122  *
4123  * Program the location and size of the gfx ring buffer
4124  * and test it to make sure it's working.
4125  * Returns 0 for success, error for failure.
4126  */
4127 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4128 {
4129 	struct radeon_ring *ring;
4130 	u32 tmp;
4131 	u32 rb_bufsz;
4132 	u64 rb_addr;
4133 	int r;
4134 
4135 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4136 	if (rdev->family != CHIP_HAWAII)
4137 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4138 
4139 	/* Set the write pointer delay */
4140 	WREG32(CP_RB_WPTR_DELAY, 0);
4141 
4142 	/* set the RB to use vmid 0 */
4143 	WREG32(CP_RB_VMID, 0);
4144 
4145 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4146 
4147 	/* ring 0 - compute and gfx */
4148 	/* Set ring buffer size */
4149 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4150 	rb_bufsz = order_base_2(ring->ring_size / 8);
4151 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4152 #ifdef __BIG_ENDIAN
4153 	tmp |= BUF_SWAP_32BIT;
4154 #endif
4155 	WREG32(CP_RB0_CNTL, tmp);
4156 
4157 	/* Initialize the ring buffer's read and write pointers */
4158 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4159 	ring->wptr = 0;
4160 	WREG32(CP_RB0_WPTR, ring->wptr);
4161 
4162 	/* set the wb address wether it's enabled or not */
4163 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4164 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4165 
4166 	/* scratch register shadowing is no longer supported */
4167 	WREG32(SCRATCH_UMSK, 0);
4168 
4169 	if (!rdev->wb.enabled)
4170 		tmp |= RB_NO_UPDATE;
4171 
4172 	mdelay(1);
4173 	WREG32(CP_RB0_CNTL, tmp);
4174 
4175 	rb_addr = ring->gpu_addr >> 8;
4176 	WREG32(CP_RB0_BASE, rb_addr);
4177 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4178 
4179 	/* start the ring */
4180 	cik_cp_gfx_start(rdev);
4181 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4182 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4183 	if (r) {
4184 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4185 		return r;
4186 	}
4187 
4188 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4189 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4190 
4191 	return 0;
4192 }
4193 
4194 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4195 		     struct radeon_ring *ring)
4196 {
4197 	u32 rptr;
4198 
4199 	if (rdev->wb.enabled)
4200 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4201 	else
4202 		rptr = RREG32(CP_RB0_RPTR);
4203 
4204 	return rptr;
4205 }
4206 
4207 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4208 		     struct radeon_ring *ring)
4209 {
4210 	u32 wptr;
4211 
4212 	wptr = RREG32(CP_RB0_WPTR);
4213 
4214 	return wptr;
4215 }
4216 
4217 void cik_gfx_set_wptr(struct radeon_device *rdev,
4218 		      struct radeon_ring *ring)
4219 {
4220 	WREG32(CP_RB0_WPTR, ring->wptr);
4221 	(void)RREG32(CP_RB0_WPTR);
4222 }
4223 
4224 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4225 			 struct radeon_ring *ring)
4226 {
4227 	u32 rptr;
4228 
4229 	if (rdev->wb.enabled) {
4230 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4231 	} else {
4232 		mutex_lock(&rdev->srbm_mutex);
4233 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4234 		rptr = RREG32(CP_HQD_PQ_RPTR);
4235 		cik_srbm_select(rdev, 0, 0, 0, 0);
4236 		mutex_unlock(&rdev->srbm_mutex);
4237 	}
4238 
4239 	return rptr;
4240 }
4241 
4242 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4243 			 struct radeon_ring *ring)
4244 {
4245 	u32 wptr;
4246 
4247 	if (rdev->wb.enabled) {
4248 		/* XXX check if swapping is necessary on BE */
4249 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4250 	} else {
4251 		mutex_lock(&rdev->srbm_mutex);
4252 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4253 		wptr = RREG32(CP_HQD_PQ_WPTR);
4254 		cik_srbm_select(rdev, 0, 0, 0, 0);
4255 		mutex_unlock(&rdev->srbm_mutex);
4256 	}
4257 
4258 	return wptr;
4259 }
4260 
4261 void cik_compute_set_wptr(struct radeon_device *rdev,
4262 			  struct radeon_ring *ring)
4263 {
4264 	/* XXX check if swapping is necessary on BE */
4265 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4266 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4267 }
4268 
4269 static void cik_compute_stop(struct radeon_device *rdev,
4270 			     struct radeon_ring *ring)
4271 {
4272 	u32 j, tmp;
4273 
4274 	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4275 	/* Disable wptr polling. */
4276 	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4277 	tmp &= ~WPTR_POLL_EN;
4278 	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4279 	/* Disable HQD. */
4280 	if (RREG32(CP_HQD_ACTIVE) & 1) {
4281 		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4282 		for (j = 0; j < rdev->usec_timeout; j++) {
4283 			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4284 				break;
4285 			udelay(1);
4286 		}
4287 		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4288 		WREG32(CP_HQD_PQ_RPTR, 0);
4289 		WREG32(CP_HQD_PQ_WPTR, 0);
4290 	}
4291 	cik_srbm_select(rdev, 0, 0, 0, 0);
4292 }
4293 
4294 /**
4295  * cik_cp_compute_enable - enable/disable the compute CP MEs
4296  *
4297  * @rdev: radeon_device pointer
4298  * @enable: enable or disable the MEs
4299  *
4300  * Halts or unhalts the compute MEs.
4301  */
4302 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4303 {
4304 	if (enable)
4305 		WREG32(CP_MEC_CNTL, 0);
4306 	else {
4307 		/*
4308 		 * To make hibernation reliable we need to clear compute ring
4309 		 * configuration before halting the compute ring.
4310 		 */
4311 		mutex_lock(&rdev->srbm_mutex);
4312 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4313 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4314 		mutex_unlock(&rdev->srbm_mutex);
4315 
4316 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4317 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4318 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4319 	}
4320 	udelay(50);
4321 }
4322 
4323 /**
4324  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4325  *
4326  * @rdev: radeon_device pointer
4327  *
4328  * Loads the compute MEC1&2 ucode.
4329  * Returns 0 for success, -EINVAL if the ucode is not available.
4330  */
4331 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4332 {
4333 	int i;
4334 
4335 	if (!rdev->mec_fw)
4336 		return -EINVAL;
4337 
4338 	cik_cp_compute_enable(rdev, false);
4339 
4340 	if (rdev->new_fw) {
4341 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4342 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4343 		const __le32 *fw_data;
4344 		u32 fw_size;
4345 
4346 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4347 
4348 		/* MEC1 */
4349 		fw_data = (const __le32 *)
4350 			((const char *)rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4351 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4352 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4353 		for (i = 0; i < fw_size; i++)
4354 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4355 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4356 
4357 		/* MEC2 */
4358 		if (rdev->family == CHIP_KAVERI) {
4359 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4360 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4361 
4362 			fw_data = (const __le32 *)
4363 				((const char *)rdev->mec2_fw->data +
4364 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4365 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4366 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4367 			for (i = 0; i < fw_size; i++)
4368 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4369 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4370 		}
4371 	} else {
4372 		const __be32 *fw_data;
4373 
4374 		/* MEC1 */
4375 		fw_data = (const __be32 *)rdev->mec_fw->data;
4376 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4377 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4378 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4379 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4380 
4381 		if (rdev->family == CHIP_KAVERI) {
4382 			/* MEC2 */
4383 			fw_data = (const __be32 *)rdev->mec_fw->data;
4384 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4385 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4386 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4387 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4388 		}
4389 	}
4390 
4391 	return 0;
4392 }
4393 
4394 /**
4395  * cik_cp_compute_start - start the compute queues
4396  *
4397  * @rdev: radeon_device pointer
4398  *
4399  * Enable the compute queues.
4400  * Returns 0 for success, error for failure.
4401  */
4402 static int cik_cp_compute_start(struct radeon_device *rdev)
4403 {
4404 	cik_cp_compute_enable(rdev, true);
4405 
4406 	return 0;
4407 }
4408 
4409 /**
4410  * cik_cp_compute_fini - stop the compute queues
4411  *
4412  * @rdev: radeon_device pointer
4413  *
4414  * Stop the compute queues and tear down the driver queue
4415  * info.
4416  */
4417 static void cik_cp_compute_fini(struct radeon_device *rdev)
4418 {
4419 	int i, idx, r;
4420 
4421 	cik_cp_compute_enable(rdev, false);
4422 
4423 	for (i = 0; i < 2; i++) {
4424 		if (i == 0)
4425 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4426 		else
4427 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4428 
4429 		if (rdev->ring[idx].mqd_obj) {
4430 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4431 			if (unlikely(r != 0))
4432 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4433 
4434 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4435 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4436 
4437 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4438 			rdev->ring[idx].mqd_obj = NULL;
4439 		}
4440 	}
4441 }
4442 
4443 static void cik_mec_fini(struct radeon_device *rdev)
4444 {
4445 	int r;
4446 
4447 	if (rdev->mec.hpd_eop_obj) {
4448 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4449 		if (unlikely(r != 0))
4450 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4451 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4452 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4453 
4454 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4455 		rdev->mec.hpd_eop_obj = NULL;
4456 	}
4457 }
4458 
4459 #define MEC_HPD_SIZE 2048
4460 
4461 static int cik_mec_init(struct radeon_device *rdev)
4462 {
4463 	int r;
4464 	u32 *hpd;
4465 
4466 	/*
4467 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4468 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4469 	 */
4470 	if (rdev->family == CHIP_KAVERI)
4471 		rdev->mec.num_mec = 2;
4472 	else
4473 		rdev->mec.num_mec = 1;
4474 	rdev->mec.num_pipe = 4;
4475 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4476 
4477 	if (rdev->mec.hpd_eop_obj == NULL) {
4478 		r = radeon_bo_create(rdev,
4479 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4480 				     PAGE_SIZE, true,
4481 				     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4482 				     &rdev->mec.hpd_eop_obj);
4483 		if (r) {
4484 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4485 			return r;
4486 		}
4487 	}
4488 
4489 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4490 	if (unlikely(r != 0)) {
4491 		cik_mec_fini(rdev);
4492 		return r;
4493 	}
4494 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4495 			  &rdev->mec.hpd_eop_gpu_addr);
4496 	if (r) {
4497 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4498 		cik_mec_fini(rdev);
4499 		return r;
4500 	}
4501 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4502 	if (r) {
4503 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4504 		cik_mec_fini(rdev);
4505 		return r;
4506 	}
4507 
4508 	/* clear memory.  Not sure if this is required or not */
4509 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4510 
4511 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4512 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4513 
4514 	return 0;
4515 }
4516 
4517 struct hqd_registers
4518 {
4519 	u32 cp_mqd_base_addr;
4520 	u32 cp_mqd_base_addr_hi;
4521 	u32 cp_hqd_active;
4522 	u32 cp_hqd_vmid;
4523 	u32 cp_hqd_persistent_state;
4524 	u32 cp_hqd_pipe_priority;
4525 	u32 cp_hqd_queue_priority;
4526 	u32 cp_hqd_quantum;
4527 	u32 cp_hqd_pq_base;
4528 	u32 cp_hqd_pq_base_hi;
4529 	u32 cp_hqd_pq_rptr;
4530 	u32 cp_hqd_pq_rptr_report_addr;
4531 	u32 cp_hqd_pq_rptr_report_addr_hi;
4532 	u32 cp_hqd_pq_wptr_poll_addr;
4533 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4534 	u32 cp_hqd_pq_doorbell_control;
4535 	u32 cp_hqd_pq_wptr;
4536 	u32 cp_hqd_pq_control;
4537 	u32 cp_hqd_ib_base_addr;
4538 	u32 cp_hqd_ib_base_addr_hi;
4539 	u32 cp_hqd_ib_rptr;
4540 	u32 cp_hqd_ib_control;
4541 	u32 cp_hqd_iq_timer;
4542 	u32 cp_hqd_iq_rptr;
4543 	u32 cp_hqd_dequeue_request;
4544 	u32 cp_hqd_dma_offload;
4545 	u32 cp_hqd_sema_cmd;
4546 	u32 cp_hqd_msg_type;
4547 	u32 cp_hqd_atomic0_preop_lo;
4548 	u32 cp_hqd_atomic0_preop_hi;
4549 	u32 cp_hqd_atomic1_preop_lo;
4550 	u32 cp_hqd_atomic1_preop_hi;
4551 	u32 cp_hqd_hq_scheduler0;
4552 	u32 cp_hqd_hq_scheduler1;
4553 	u32 cp_mqd_control;
4554 };
4555 
4556 struct bonaire_mqd
4557 {
4558 	u32 header;
4559 	u32 dispatch_initiator;
4560 	u32 dimensions[3];
4561 	u32 start_idx[3];
4562 	u32 num_threads[3];
4563 	u32 pipeline_stat_enable;
4564 	u32 perf_counter_enable;
4565 	u32 pgm[2];
4566 	u32 tba[2];
4567 	u32 tma[2];
4568 	u32 pgm_rsrc[2];
4569 	u32 vmid;
4570 	u32 resource_limits;
4571 	u32 static_thread_mgmt01[2];
4572 	u32 tmp_ring_size;
4573 	u32 static_thread_mgmt23[2];
4574 	u32 restart[3];
4575 	u32 thread_trace_enable;
4576 	u32 reserved1;
4577 	u32 user_data[16];
4578 	u32 vgtcs_invoke_count[2];
4579 	struct hqd_registers queue_state;
4580 	u32 dequeue_cntr;
4581 	u32 interrupt_queue[64];
4582 };
4583 
4584 /**
4585  * cik_cp_compute_resume - setup the compute queue registers
4586  *
4587  * @rdev: radeon_device pointer
4588  *
4589  * Program the compute queues and test them to make sure they
4590  * are working.
4591  * Returns 0 for success, error for failure.
4592  */
4593 static int cik_cp_compute_resume(struct radeon_device *rdev)
4594 {
4595 	int r, i, j, idx;
4596 	u32 tmp;
4597 	bool use_doorbell = true;
4598 	u64 hqd_gpu_addr;
4599 	u64 mqd_gpu_addr;
4600 	u64 eop_gpu_addr;
4601 	u64 wb_gpu_addr;
4602 	u32 *buf;
4603 	struct bonaire_mqd *mqd;
4604 
4605 	r = cik_cp_compute_start(rdev);
4606 	if (r)
4607 		return r;
4608 
4609 	/* fix up chicken bits */
4610 	tmp = RREG32(CP_CPF_DEBUG);
4611 	tmp |= (1 << 23);
4612 	WREG32(CP_CPF_DEBUG, tmp);
4613 
4614 	/* init the pipes */
4615 	mutex_lock(&rdev->srbm_mutex);
4616 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4617 		int me = (i < 4) ? 1 : 2;
4618 		int pipe = (i < 4) ? i : (i - 4);
4619 
4620 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4621 
4622 		cik_srbm_select(rdev, me, pipe, 0, 0);
4623 
4624 		/* write the EOP addr */
4625 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4626 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4627 
4628 		/* set the VMID assigned */
4629 		WREG32(CP_HPD_EOP_VMID, 0);
4630 
4631 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4632 		tmp = RREG32(CP_HPD_EOP_CONTROL);
4633 		tmp &= ~EOP_SIZE_MASK;
4634 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4635 		WREG32(CP_HPD_EOP_CONTROL, tmp);
4636 	}
4637 	cik_srbm_select(rdev, 0, 0, 0, 0);
4638 	mutex_unlock(&rdev->srbm_mutex);
4639 
4640 	/* init the queues.  Just two for now. */
4641 	for (i = 0; i < 2; i++) {
4642 		if (i == 0)
4643 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4644 		else
4645 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4646 
4647 		if (rdev->ring[idx].mqd_obj == NULL) {
4648 			r = radeon_bo_create(rdev,
4649 					     sizeof(struct bonaire_mqd),
4650 					     PAGE_SIZE, true,
4651 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4652 					     &rdev->ring[idx].mqd_obj);
4653 			if (r) {
4654 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4655 				return r;
4656 			}
4657 		}
4658 
4659 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4660 		if (unlikely(r != 0)) {
4661 			cik_cp_compute_fini(rdev);
4662 			return r;
4663 		}
4664 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4665 				  &mqd_gpu_addr);
4666 		if (r) {
4667 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4668 			cik_cp_compute_fini(rdev);
4669 			return r;
4670 		}
4671 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4672 		if (r) {
4673 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4674 			cik_cp_compute_fini(rdev);
4675 			return r;
4676 		}
4677 
4678 		/* init the mqd struct */
4679 		memset(buf, 0, sizeof(struct bonaire_mqd));
4680 
4681 		mqd = (struct bonaire_mqd *)buf;
4682 		mqd->header = 0xC0310800;
4683 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4684 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4685 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4686 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4687 
4688 		mutex_lock(&rdev->srbm_mutex);
4689 		cik_srbm_select(rdev, rdev->ring[idx].me,
4690 				rdev->ring[idx].pipe,
4691 				rdev->ring[idx].queue, 0);
4692 
4693 		/* disable wptr polling */
4694 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4695 		tmp &= ~WPTR_POLL_EN;
4696 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4697 
4698 		/* enable doorbell? */
4699 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4700 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4701 		if (use_doorbell)
4702 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4703 		else
4704 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4705 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4706 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4707 
4708 		/* disable the queue if it's active */
4709 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4710 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4711 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4712 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4713 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4714 			for (j = 0; j < rdev->usec_timeout; j++) {
4715 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4716 					break;
4717 				udelay(1);
4718 			}
4719 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4720 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4721 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4722 		}
4723 
4724 		/* set the pointer to the MQD */
4725 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4726 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4727 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4728 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4729 		/* set MQD vmid to 0 */
4730 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4731 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4732 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4733 
4734 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4735 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4736 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4737 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4738 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4739 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4740 
4741 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4742 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4743 		mqd->queue_state.cp_hqd_pq_control &=
4744 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4745 
4746 		mqd->queue_state.cp_hqd_pq_control |=
4747 			order_base_2(rdev->ring[idx].ring_size / 8);
4748 		mqd->queue_state.cp_hqd_pq_control |=
4749 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4750 #ifdef __BIG_ENDIAN
4751 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4752 #endif
4753 		mqd->queue_state.cp_hqd_pq_control &=
4754 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4755 		mqd->queue_state.cp_hqd_pq_control |=
4756 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4757 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4758 
4759 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4760 		if (i == 0)
4761 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4762 		else
4763 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4764 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4765 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4766 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4767 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4768 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4769 
4770 		/* set the wb address wether it's enabled or not */
4771 		if (i == 0)
4772 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4773 		else
4774 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4775 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4776 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4777 			upper_32_bits(wb_gpu_addr) & 0xffff;
4778 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4779 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4780 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4781 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4782 
4783 		/* enable the doorbell if requested */
4784 		if (use_doorbell) {
4785 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4786 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4787 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4788 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4789 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4790 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4791 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4792 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4793 
4794 		} else {
4795 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4796 		}
4797 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4798 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4799 
4800 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4801 		rdev->ring[idx].wptr = 0;
4802 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4803 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4804 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4805 
4806 		/* set the vmid for the queue */
4807 		mqd->queue_state.cp_hqd_vmid = 0;
4808 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4809 
4810 		/* activate the queue */
4811 		mqd->queue_state.cp_hqd_active = 1;
4812 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4813 
4814 		cik_srbm_select(rdev, 0, 0, 0, 0);
4815 		mutex_unlock(&rdev->srbm_mutex);
4816 
4817 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4818 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4819 
4820 		rdev->ring[idx].ready = true;
4821 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4822 		if (r)
4823 			rdev->ring[idx].ready = false;
4824 	}
4825 
4826 	return 0;
4827 }
4828 
4829 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4830 {
4831 	cik_cp_gfx_enable(rdev, enable);
4832 	cik_cp_compute_enable(rdev, enable);
4833 }
4834 
4835 static int cik_cp_load_microcode(struct radeon_device *rdev)
4836 {
4837 	int r;
4838 
4839 	r = cik_cp_gfx_load_microcode(rdev);
4840 	if (r)
4841 		return r;
4842 	r = cik_cp_compute_load_microcode(rdev);
4843 	if (r)
4844 		return r;
4845 
4846 	return 0;
4847 }
4848 
4849 static void cik_cp_fini(struct radeon_device *rdev)
4850 {
4851 	cik_cp_gfx_fini(rdev);
4852 	cik_cp_compute_fini(rdev);
4853 }
4854 
4855 static int cik_cp_resume(struct radeon_device *rdev)
4856 {
4857 	int r;
4858 
4859 	cik_enable_gui_idle_interrupt(rdev, false);
4860 
4861 	r = cik_cp_load_microcode(rdev);
4862 	if (r)
4863 		return r;
4864 
4865 	r = cik_cp_gfx_resume(rdev);
4866 	if (r)
4867 		return r;
4868 	r = cik_cp_compute_resume(rdev);
4869 	if (r)
4870 		return r;
4871 
4872 	cik_enable_gui_idle_interrupt(rdev, true);
4873 
4874 	return 0;
4875 }
4876 
4877 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4878 {
4879 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4880 		RREG32(GRBM_STATUS));
4881 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4882 		RREG32(GRBM_STATUS2));
4883 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4884 		RREG32(GRBM_STATUS_SE0));
4885 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4886 		RREG32(GRBM_STATUS_SE1));
4887 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4888 		RREG32(GRBM_STATUS_SE2));
4889 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4890 		RREG32(GRBM_STATUS_SE3));
4891 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4892 		RREG32(SRBM_STATUS));
4893 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4894 		RREG32(SRBM_STATUS2));
4895 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4896 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4897 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4898 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4899 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4900 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4901 		 RREG32(CP_STALLED_STAT1));
4902 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4903 		 RREG32(CP_STALLED_STAT2));
4904 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4905 		 RREG32(CP_STALLED_STAT3));
4906 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4907 		 RREG32(CP_CPF_BUSY_STAT));
4908 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4909 		 RREG32(CP_CPF_STALLED_STAT1));
4910 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4911 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4912 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4913 		 RREG32(CP_CPC_STALLED_STAT1));
4914 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4915 }
4916 
4917 /**
4918  * cik_gpu_check_soft_reset - check which blocks are busy
4919  *
4920  * @rdev: radeon_device pointer
4921  *
4922  * Check which blocks are busy and return the relevant reset
4923  * mask to be used by cik_gpu_soft_reset().
4924  * Returns a mask of the blocks to be reset.
4925  */
4926 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4927 {
4928 	u32 reset_mask = 0;
4929 	u32 tmp;
4930 
4931 	/* GRBM_STATUS */
4932 	tmp = RREG32(GRBM_STATUS);
4933 	if (tmp & (PA_BUSY | SC_BUSY |
4934 		   BCI_BUSY | SX_BUSY |
4935 		   TA_BUSY | VGT_BUSY |
4936 		   DB_BUSY | CB_BUSY |
4937 		   GDS_BUSY | SPI_BUSY |
4938 		   IA_BUSY | IA_BUSY_NO_DMA))
4939 		reset_mask |= RADEON_RESET_GFX;
4940 
4941 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4942 		reset_mask |= RADEON_RESET_CP;
4943 
4944 	/* GRBM_STATUS2 */
4945 	tmp = RREG32(GRBM_STATUS2);
4946 	if (tmp & RLC_BUSY)
4947 		reset_mask |= RADEON_RESET_RLC;
4948 
4949 	/* SDMA0_STATUS_REG */
4950 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4951 	if (!(tmp & SDMA_IDLE))
4952 		reset_mask |= RADEON_RESET_DMA;
4953 
4954 	/* SDMA1_STATUS_REG */
4955 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4956 	if (!(tmp & SDMA_IDLE))
4957 		reset_mask |= RADEON_RESET_DMA1;
4958 
4959 	/* SRBM_STATUS2 */
4960 	tmp = RREG32(SRBM_STATUS2);
4961 	if (tmp & SDMA_BUSY)
4962 		reset_mask |= RADEON_RESET_DMA;
4963 
4964 	if (tmp & SDMA1_BUSY)
4965 		reset_mask |= RADEON_RESET_DMA1;
4966 
4967 	/* SRBM_STATUS */
4968 	tmp = RREG32(SRBM_STATUS);
4969 
4970 	if (tmp & IH_BUSY)
4971 		reset_mask |= RADEON_RESET_IH;
4972 
4973 	if (tmp & SEM_BUSY)
4974 		reset_mask |= RADEON_RESET_SEM;
4975 
4976 	if (tmp & GRBM_RQ_PENDING)
4977 		reset_mask |= RADEON_RESET_GRBM;
4978 
4979 	if (tmp & VMC_BUSY)
4980 		reset_mask |= RADEON_RESET_VMC;
4981 
4982 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4983 		   MCC_BUSY | MCD_BUSY))
4984 		reset_mask |= RADEON_RESET_MC;
4985 
4986 	if (evergreen_is_display_hung(rdev))
4987 		reset_mask |= RADEON_RESET_DISPLAY;
4988 
4989 	/* Skip MC reset as it's mostly likely not hung, just busy */
4990 	if (reset_mask & RADEON_RESET_MC) {
4991 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4992 		reset_mask &= ~RADEON_RESET_MC;
4993 	}
4994 
4995 	return reset_mask;
4996 }
4997 
4998 /**
4999  * cik_gpu_soft_reset - soft reset GPU
5000  *
5001  * @rdev: radeon_device pointer
5002  * @reset_mask: mask of which blocks to reset
5003  *
5004  * Soft reset the blocks specified in @reset_mask.
5005  */
5006 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5007 {
5008 	struct evergreen_mc_save save;
5009 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5010 	u32 tmp;
5011 
5012 	if (reset_mask == 0)
5013 		return;
5014 
5015 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5016 
5017 	cik_print_gpu_status_regs(rdev);
5018 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5019 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5020 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5021 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5022 
5023 	/* disable CG/PG */
5024 	cik_fini_pg(rdev);
5025 	cik_fini_cg(rdev);
5026 
5027 	/* stop the rlc */
5028 	cik_rlc_stop(rdev);
5029 
5030 	/* Disable GFX parsing/prefetching */
5031 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5032 
5033 	/* Disable MEC parsing/prefetching */
5034 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5035 
5036 	if (reset_mask & RADEON_RESET_DMA) {
5037 		/* sdma0 */
5038 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5039 		tmp |= SDMA_HALT;
5040 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5041 	}
5042 	if (reset_mask & RADEON_RESET_DMA1) {
5043 		/* sdma1 */
5044 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5045 		tmp |= SDMA_HALT;
5046 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5047 	}
5048 
5049 	evergreen_mc_stop(rdev, &save);
5050 	if (evergreen_mc_wait_for_idle(rdev)) {
5051 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5052 	}
5053 
5054 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5055 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5056 
5057 	if (reset_mask & RADEON_RESET_CP) {
5058 		grbm_soft_reset |= SOFT_RESET_CP;
5059 
5060 		srbm_soft_reset |= SOFT_RESET_GRBM;
5061 	}
5062 
5063 	if (reset_mask & RADEON_RESET_DMA)
5064 		srbm_soft_reset |= SOFT_RESET_SDMA;
5065 
5066 	if (reset_mask & RADEON_RESET_DMA1)
5067 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5068 
5069 	if (reset_mask & RADEON_RESET_DISPLAY)
5070 		srbm_soft_reset |= SOFT_RESET_DC;
5071 
5072 	if (reset_mask & RADEON_RESET_RLC)
5073 		grbm_soft_reset |= SOFT_RESET_RLC;
5074 
5075 	if (reset_mask & RADEON_RESET_SEM)
5076 		srbm_soft_reset |= SOFT_RESET_SEM;
5077 
5078 	if (reset_mask & RADEON_RESET_IH)
5079 		srbm_soft_reset |= SOFT_RESET_IH;
5080 
5081 	if (reset_mask & RADEON_RESET_GRBM)
5082 		srbm_soft_reset |= SOFT_RESET_GRBM;
5083 
5084 	if (reset_mask & RADEON_RESET_VMC)
5085 		srbm_soft_reset |= SOFT_RESET_VMC;
5086 
5087 	if (!(rdev->flags & RADEON_IS_IGP)) {
5088 		if (reset_mask & RADEON_RESET_MC)
5089 			srbm_soft_reset |= SOFT_RESET_MC;
5090 	}
5091 
5092 	if (grbm_soft_reset) {
5093 		tmp = RREG32(GRBM_SOFT_RESET);
5094 		tmp |= grbm_soft_reset;
5095 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5096 		WREG32(GRBM_SOFT_RESET, tmp);
5097 		tmp = RREG32(GRBM_SOFT_RESET);
5098 
5099 		udelay(50);
5100 
5101 		tmp &= ~grbm_soft_reset;
5102 		WREG32(GRBM_SOFT_RESET, tmp);
5103 		tmp = RREG32(GRBM_SOFT_RESET);
5104 	}
5105 
5106 	if (srbm_soft_reset) {
5107 		tmp = RREG32(SRBM_SOFT_RESET);
5108 		tmp |= srbm_soft_reset;
5109 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5110 		WREG32(SRBM_SOFT_RESET, tmp);
5111 		tmp = RREG32(SRBM_SOFT_RESET);
5112 
5113 		udelay(50);
5114 
5115 		tmp &= ~srbm_soft_reset;
5116 		WREG32(SRBM_SOFT_RESET, tmp);
5117 		tmp = RREG32(SRBM_SOFT_RESET);
5118 	}
5119 
5120 	/* Wait a little for things to settle down */
5121 	udelay(50);
5122 
5123 	evergreen_mc_resume(rdev, &save);
5124 	udelay(50);
5125 
5126 	cik_print_gpu_status_regs(rdev);
5127 }
5128 
5129 struct kv_reset_save_regs {
5130 	u32 gmcon_reng_execute;
5131 	u32 gmcon_misc;
5132 	u32 gmcon_misc3;
5133 };
5134 
5135 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5136 				   struct kv_reset_save_regs *save)
5137 {
5138 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5139 	save->gmcon_misc = RREG32(GMCON_MISC);
5140 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5141 
5142 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5143 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5144 						STCTRL_STUTTER_EN));
5145 }
5146 
5147 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5148 				      struct kv_reset_save_regs *save)
5149 {
5150 	int i;
5151 
5152 	WREG32(GMCON_PGFSM_WRITE, 0);
5153 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5154 
5155 	for (i = 0; i < 5; i++)
5156 		WREG32(GMCON_PGFSM_WRITE, 0);
5157 
5158 	WREG32(GMCON_PGFSM_WRITE, 0);
5159 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5160 
5161 	for (i = 0; i < 5; i++)
5162 		WREG32(GMCON_PGFSM_WRITE, 0);
5163 
5164 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5165 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5166 
5167 	for (i = 0; i < 5; i++)
5168 		WREG32(GMCON_PGFSM_WRITE, 0);
5169 
5170 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5171 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5172 
5173 	for (i = 0; i < 5; i++)
5174 		WREG32(GMCON_PGFSM_WRITE, 0);
5175 
5176 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5177 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5178 
5179 	for (i = 0; i < 5; i++)
5180 		WREG32(GMCON_PGFSM_WRITE, 0);
5181 
5182 	WREG32(GMCON_PGFSM_WRITE, 0);
5183 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5184 
5185 	for (i = 0; i < 5; i++)
5186 		WREG32(GMCON_PGFSM_WRITE, 0);
5187 
5188 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5189 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5190 
5191 	for (i = 0; i < 5; i++)
5192 		WREG32(GMCON_PGFSM_WRITE, 0);
5193 
5194 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5195 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5196 
5197 	for (i = 0; i < 5; i++)
5198 		WREG32(GMCON_PGFSM_WRITE, 0);
5199 
5200 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5201 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5202 
5203 	for (i = 0; i < 5; i++)
5204 		WREG32(GMCON_PGFSM_WRITE, 0);
5205 
5206 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5207 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5208 
5209 	for (i = 0; i < 5; i++)
5210 		WREG32(GMCON_PGFSM_WRITE, 0);
5211 
5212 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5213 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5214 
5215 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5216 	WREG32(GMCON_MISC, save->gmcon_misc);
5217 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5218 }
5219 
5220 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5221 {
5222 	struct evergreen_mc_save save;
5223 	struct kv_reset_save_regs kv_save = { 0 };
5224 	u32 tmp, i;
5225 
5226 	dev_info(rdev->dev, "GPU pci config reset\n");
5227 
5228 	/* disable dpm? */
5229 
5230 	/* disable cg/pg */
5231 	cik_fini_pg(rdev);
5232 	cik_fini_cg(rdev);
5233 
5234 	/* Disable GFX parsing/prefetching */
5235 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5236 
5237 	/* Disable MEC parsing/prefetching */
5238 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5239 
5240 	/* sdma0 */
5241 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5242 	tmp |= SDMA_HALT;
5243 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5244 	/* sdma1 */
5245 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5246 	tmp |= SDMA_HALT;
5247 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5248 	/* XXX other engines? */
5249 
5250 	/* halt the rlc, disable cp internal ints */
5251 	cik_rlc_stop(rdev);
5252 
5253 	udelay(50);
5254 
5255 	/* disable mem access */
5256 	evergreen_mc_stop(rdev, &save);
5257 	if (evergreen_mc_wait_for_idle(rdev)) {
5258 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5259 	}
5260 
5261 	if (rdev->flags & RADEON_IS_IGP)
5262 		kv_save_regs_for_reset(rdev, &kv_save);
5263 
5264 	/* disable BM */
5265 	pci_disable_busmaster(rdev->pdev->dev.bsddev);
5266 	/* reset */
5267 	radeon_pci_config_reset(rdev);
5268 
5269 	udelay(100);
5270 
5271 	/* wait for asic to come out of reset */
5272 	for (i = 0; i < rdev->usec_timeout; i++) {
5273 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5274 			break;
5275 		udelay(1);
5276 	}
5277 
5278 	/* does asic init need to be run first??? */
5279 	if (rdev->flags & RADEON_IS_IGP)
5280 		kv_restore_regs_for_reset(rdev, &kv_save);
5281 }
5282 
5283 /**
5284  * cik_asic_reset - soft reset GPU
5285  *
5286  * @rdev: radeon_device pointer
5287  * @hard: force hard reset
5288  *
5289  * Look up which blocks are hung and attempt
5290  * to reset them.
5291  * Returns 0 for success.
5292  */
5293 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5294 {
5295 	u32 reset_mask;
5296 
5297 	if (hard) {
5298 		cik_gpu_pci_config_reset(rdev);
5299 		return 0;
5300 	}
5301 
5302 	reset_mask = cik_gpu_check_soft_reset(rdev);
5303 
5304 	if (reset_mask)
5305 		r600_set_bios_scratch_engine_hung(rdev, true);
5306 
5307 	/* try soft reset */
5308 	cik_gpu_soft_reset(rdev, reset_mask);
5309 
5310 	reset_mask = cik_gpu_check_soft_reset(rdev);
5311 
5312 	/* try pci config reset */
5313 	if (reset_mask && radeon_hard_reset)
5314 		cik_gpu_pci_config_reset(rdev);
5315 
5316 	reset_mask = cik_gpu_check_soft_reset(rdev);
5317 
5318 	if (!reset_mask)
5319 		r600_set_bios_scratch_engine_hung(rdev, false);
5320 
5321 	return 0;
5322 }
5323 
5324 /**
5325  * cik_gfx_is_lockup - check if the 3D engine is locked up
5326  *
5327  * @rdev: radeon_device pointer
5328  * @ring: radeon_ring structure holding ring information
5329  *
5330  * Check if the 3D engine is locked up (CIK).
5331  * Returns true if the engine is locked, false if not.
5332  */
5333 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5334 {
5335 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5336 
5337 	if (!(reset_mask & (RADEON_RESET_GFX |
5338 			    RADEON_RESET_COMPUTE |
5339 			    RADEON_RESET_CP))) {
5340 		radeon_ring_lockup_update(rdev, ring);
5341 		return false;
5342 	}
5343 	return radeon_ring_test_lockup(rdev, ring);
5344 }
5345 
5346 /* MC */
5347 /**
5348  * cik_mc_program - program the GPU memory controller
5349  *
5350  * @rdev: radeon_device pointer
5351  *
5352  * Set the location of vram, gart, and AGP in the GPU's
5353  * physical address space (CIK).
5354  */
5355 static void cik_mc_program(struct radeon_device *rdev)
5356 {
5357 	struct evergreen_mc_save save;
5358 	u32 tmp;
5359 	int i, j;
5360 
5361 	/* Initialize HDP */
5362 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5363 		WREG32((0x2c14 + j), 0x00000000);
5364 		WREG32((0x2c18 + j), 0x00000000);
5365 		WREG32((0x2c1c + j), 0x00000000);
5366 		WREG32((0x2c20 + j), 0x00000000);
5367 		WREG32((0x2c24 + j), 0x00000000);
5368 	}
5369 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5370 
5371 	evergreen_mc_stop(rdev, &save);
5372 	if (radeon_mc_wait_for_idle(rdev)) {
5373 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5374 	}
5375 	/* Lockout access through VGA aperture*/
5376 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5377 	/* Update configuration */
5378 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5379 	       rdev->mc.vram_start >> 12);
5380 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5381 	       rdev->mc.vram_end >> 12);
5382 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5383 	       rdev->vram_scratch.gpu_addr >> 12);
5384 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5385 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5386 	WREG32(MC_VM_FB_LOCATION, tmp);
5387 	/* XXX double check these! */
5388 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5389 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5390 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5391 	WREG32(MC_VM_AGP_BASE, 0);
5392 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5393 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5394 	if (radeon_mc_wait_for_idle(rdev)) {
5395 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5396 	}
5397 	evergreen_mc_resume(rdev, &save);
5398 	/* we need to own VRAM, so turn off the VGA renderer here
5399 	 * to stop it overwriting our objects */
5400 	rv515_vga_render_disable(rdev);
5401 }
5402 
5403 /**
5404  * cik_mc_init - initialize the memory controller driver params
5405  *
5406  * @rdev: radeon_device pointer
5407  *
5408  * Look up the amount of vram, vram width, and decide how to place
5409  * vram and gart within the GPU's physical address space (CIK).
5410  * Returns 0 for success.
5411  */
5412 static int cik_mc_init(struct radeon_device *rdev)
5413 {
5414 	u32 tmp;
5415 	int chansize, numchan;
5416 
5417 	/* Get VRAM informations */
5418 	rdev->mc.vram_is_ddr = true;
5419 	tmp = RREG32(MC_ARB_RAMCFG);
5420 	if (tmp & CHANSIZE_MASK) {
5421 		chansize = 64;
5422 	} else {
5423 		chansize = 32;
5424 	}
5425 	tmp = RREG32(MC_SHARED_CHMAP);
5426 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5427 	case 0:
5428 	default:
5429 		numchan = 1;
5430 		break;
5431 	case 1:
5432 		numchan = 2;
5433 		break;
5434 	case 2:
5435 		numchan = 4;
5436 		break;
5437 	case 3:
5438 		numchan = 8;
5439 		break;
5440 	case 4:
5441 		numchan = 3;
5442 		break;
5443 	case 5:
5444 		numchan = 6;
5445 		break;
5446 	case 6:
5447 		numchan = 10;
5448 		break;
5449 	case 7:
5450 		numchan = 12;
5451 		break;
5452 	case 8:
5453 		numchan = 16;
5454 		break;
5455 	}
5456 	rdev->mc.vram_width = numchan * chansize;
5457 	/* Could aper size report 0 ? */
5458 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5459 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5460 	/* size in MB on si */
5461 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5462 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5463 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5464 	si_vram_gtt_location(rdev, &rdev->mc);
5465 	radeon_update_bandwidth_info(rdev);
5466 
5467 	return 0;
5468 }
5469 
5470 /*
5471  * GART
5472  * VMID 0 is the physical GPU addresses as used by the kernel.
5473  * VMIDs 1-15 are used for userspace clients and are handled
5474  * by the radeon vm/hsa code.
5475  */
5476 /**
5477  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5478  *
5479  * @rdev: radeon_device pointer
5480  *
5481  * Flush the TLB for the VMID 0 page table (CIK).
5482  */
5483 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5484 {
5485 	/* flush hdp cache */
5486 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5487 
5488 	/* bits 0-15 are the VM contexts0-15 */
5489 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5490 }
5491 
5492 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5493 {
5494 	int i;
5495 	uint32_t sh_mem_bases, sh_mem_config;
5496 
5497 	sh_mem_bases = 0x6000 | 0x6000 << 16;
5498 	sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5499 	sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5500 
5501 	mutex_lock(&rdev->srbm_mutex);
5502 	for (i = 8; i < 16; i++) {
5503 		cik_srbm_select(rdev, 0, 0, 0, i);
5504 		/* CP and shaders */
5505 		WREG32(SH_MEM_CONFIG, sh_mem_config);
5506 		WREG32(SH_MEM_APE1_BASE, 1);
5507 		WREG32(SH_MEM_APE1_LIMIT, 0);
5508 		WREG32(SH_MEM_BASES, sh_mem_bases);
5509 	}
5510 	cik_srbm_select(rdev, 0, 0, 0, 0);
5511 	mutex_unlock(&rdev->srbm_mutex);
5512 }
5513 
5514 /**
5515  * cik_pcie_gart_enable - gart enable
5516  *
5517  * @rdev: radeon_device pointer
5518  *
5519  * This sets up the TLBs, programs the page tables for VMID0,
5520  * sets up the hw for VMIDs 1-15 which are allocated on
5521  * demand, and sets up the global locations for the LDS, GDS,
5522  * and GPUVM for FSA64 clients (CIK).
5523  * Returns 0 for success, errors for failure.
5524  */
5525 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5526 {
5527 	int r, i;
5528 
5529 	if (rdev->gart.robj == NULL) {
5530 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5531 		return -EINVAL;
5532 	}
5533 	r = radeon_gart_table_vram_pin(rdev);
5534 	if (r)
5535 		return r;
5536 	/* Setup TLB control */
5537 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5538 	       (0xA << 7) |
5539 	       ENABLE_L1_TLB |
5540 	       ENABLE_L1_FRAGMENT_PROCESSING |
5541 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5542 	       ENABLE_ADVANCED_DRIVER_MODEL |
5543 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5544 	/* Setup L2 cache */
5545 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5546 	       ENABLE_L2_FRAGMENT_PROCESSING |
5547 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5548 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5549 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5550 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5551 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5552 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5553 	       BANK_SELECT(4) |
5554 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5555 	/* setup context0 */
5556 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5557 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5558 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5559 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5560 			(u32)(rdev->dummy_page.addr >> 12));
5561 	WREG32(VM_CONTEXT0_CNTL2, 0);
5562 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5563 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5564 
5565 	WREG32(0x15D4, 0);
5566 	WREG32(0x15D8, 0);
5567 	WREG32(0x15DC, 0);
5568 
5569 	/* restore context1-15 */
5570 	/* set vm size, must be a multiple of 4 */
5571 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5572 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5573 	for (i = 1; i < 16; i++) {
5574 		if (i < 8)
5575 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5576 			       rdev->vm_manager.saved_table_addr[i]);
5577 		else
5578 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5579 			       rdev->vm_manager.saved_table_addr[i]);
5580 	}
5581 
5582 	/* enable context1-15 */
5583 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5584 	       (u32)(rdev->dummy_page.addr >> 12));
5585 	WREG32(VM_CONTEXT1_CNTL2, 4);
5586 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5587 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5588 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5589 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5590 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5591 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5592 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5593 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5594 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5595 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5596 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5597 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5598 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5599 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5600 
5601 	if (rdev->family == CHIP_KAVERI) {
5602 		u32 tmp = RREG32(CHUB_CONTROL);
5603 		tmp &= ~BYPASS_VM;
5604 		WREG32(CHUB_CONTROL, tmp);
5605 	}
5606 
5607 	/* XXX SH_MEM regs */
5608 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5609 	mutex_lock(&rdev->srbm_mutex);
5610 	for (i = 0; i < 16; i++) {
5611 		cik_srbm_select(rdev, 0, 0, 0, i);
5612 		/* CP and shaders */
5613 		WREG32(SH_MEM_CONFIG, 0);
5614 		WREG32(SH_MEM_APE1_BASE, 1);
5615 		WREG32(SH_MEM_APE1_LIMIT, 0);
5616 		WREG32(SH_MEM_BASES, 0);
5617 		/* SDMA GFX */
5618 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5619 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5620 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5621 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5622 		/* XXX SDMA RLC - todo */
5623 	}
5624 	cik_srbm_select(rdev, 0, 0, 0, 0);
5625 	mutex_unlock(&rdev->srbm_mutex);
5626 
5627 	cik_pcie_init_compute_vmid(rdev);
5628 
5629 	cik_pcie_gart_tlb_flush(rdev);
5630 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5631 		 (unsigned)(rdev->mc.gtt_size >> 20),
5632 		 (unsigned long long)rdev->gart.table_addr);
5633 	rdev->gart.ready = true;
5634 	return 0;
5635 }
5636 
5637 /**
5638  * cik_pcie_gart_disable - gart disable
5639  *
5640  * @rdev: radeon_device pointer
5641  *
5642  * This disables all VM page table (CIK).
5643  */
5644 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5645 {
5646 	unsigned i;
5647 
5648 	for (i = 1; i < 16; ++i) {
5649 		uint32_t reg;
5650 		if (i < 8)
5651 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5652 		else
5653 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5654 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5655 	}
5656 
5657 	/* Disable all tables */
5658 	WREG32(VM_CONTEXT0_CNTL, 0);
5659 	WREG32(VM_CONTEXT1_CNTL, 0);
5660 	/* Setup TLB control */
5661 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5662 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5663 	/* Setup L2 cache */
5664 	WREG32(VM_L2_CNTL,
5665 	       ENABLE_L2_FRAGMENT_PROCESSING |
5666 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5667 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5668 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5669 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5670 	WREG32(VM_L2_CNTL2, 0);
5671 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5672 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5673 	radeon_gart_table_vram_unpin(rdev);
5674 }
5675 
5676 /**
5677  * cik_pcie_gart_fini - vm fini callback
5678  *
5679  * @rdev: radeon_device pointer
5680  *
5681  * Tears down the driver GART/VM setup (CIK).
5682  */
5683 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5684 {
5685 	cik_pcie_gart_disable(rdev);
5686 	radeon_gart_table_vram_free(rdev);
5687 	radeon_gart_fini(rdev);
5688 }
5689 
5690 /* vm parser */
5691 /**
5692  * cik_ib_parse - vm ib_parse callback
5693  *
5694  * @rdev: radeon_device pointer
5695  * @ib: indirect buffer pointer
5696  *
5697  * CIK uses hw IB checking so this is a nop (CIK).
5698  */
5699 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5700 {
5701 	return 0;
5702 }
5703 
5704 /*
5705  * vm
5706  * VMID 0 is the physical GPU addresses as used by the kernel.
5707  * VMIDs 1-15 are used for userspace clients and are handled
5708  * by the radeon vm/hsa code.
5709  */
5710 /**
5711  * cik_vm_init - cik vm init callback
5712  *
5713  * @rdev: radeon_device pointer
5714  *
5715  * Inits cik specific vm parameters (number of VMs, base of vram for
5716  * VMIDs 1-15) (CIK).
5717  * Returns 0 for success.
5718  */
5719 int cik_vm_init(struct radeon_device *rdev)
5720 {
5721 	/* number of VMs */
5722 	rdev->vm_manager.nvm = 16;
5723 	/* base offset of vram pages */
5724 	if (rdev->flags & RADEON_IS_IGP) {
5725 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5726 		tmp <<= 22;
5727 		rdev->vm_manager.vram_base_offset = tmp;
5728 	} else
5729 		rdev->vm_manager.vram_base_offset = 0;
5730 
5731 	return 0;
5732 }
5733 
5734 /**
5735  * cik_vm_fini - cik vm fini callback
5736  *
5737  * @rdev: radeon_device pointer
5738  *
5739  * Tear down any asic specific VM setup (CIK).
5740  */
5741 void cik_vm_fini(struct radeon_device *rdev)
5742 {
5743 }
5744 
5745 /**
5746  * cik_vm_decode_fault - print human readable fault info
5747  *
5748  * @rdev: radeon_device pointer
5749  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5750  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5751  *
5752  * Print human readable fault information (CIK).
5753  */
5754 static void cik_vm_decode_fault(struct radeon_device *rdev,
5755 				u32 status, u32 addr, u32 mc_client)
5756 {
5757 	u32 mc_id;
5758 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5759 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5760 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5761 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5762 
5763 	if (rdev->family == CHIP_HAWAII)
5764 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5765 	else
5766 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5767 
5768 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5769 	       protections, vmid, addr,
5770 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5771 	       block, mc_client, mc_id);
5772 }
5773 
5774 /**
5775  * cik_vm_flush - cik vm flush using the CP
5776  *
5777  * @rdev: radeon_device pointer
5778  *
5779  * Update the page table base and flush the VM TLB
5780  * using the CP (CIK).
5781  */
5782 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5783 		  unsigned vm_id, uint64_t pd_addr)
5784 {
5785 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5786 
5787 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5788 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5789 				 WRITE_DATA_DST_SEL(0)));
5790 	if (vm_id < 8) {
5791  		radeon_ring_write(ring,
5792 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5793 	} else {
5794 		radeon_ring_write(ring,
5795 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5796 	}
5797 	radeon_ring_write(ring, 0);
5798 	radeon_ring_write(ring, pd_addr >> 12);
5799 
5800 	/* update SH_MEM_* regs */
5801 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5802 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5803 				 WRITE_DATA_DST_SEL(0)));
5804 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5805 	radeon_ring_write(ring, 0);
5806 	radeon_ring_write(ring, VMID(vm_id));
5807 
5808 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5809 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5810 				 WRITE_DATA_DST_SEL(0)));
5811 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5812 	radeon_ring_write(ring, 0);
5813 
5814 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5815 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5816 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5817 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5818 
5819 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5820 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5821 				 WRITE_DATA_DST_SEL(0)));
5822 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5823 	radeon_ring_write(ring, 0);
5824 	radeon_ring_write(ring, VMID(0));
5825 
5826 	/* HDP flush */
5827 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5828 
5829 	/* bits 0-15 are the VM contexts0-15 */
5830 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5831 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5832 				 WRITE_DATA_DST_SEL(0)));
5833 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5834 	radeon_ring_write(ring, 0);
5835 	radeon_ring_write(ring, 1 << vm_id);
5836 
5837 	/* wait for the invalidate to complete */
5838 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5839 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5840 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5841 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5842 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5843 	radeon_ring_write(ring, 0);
5844 	radeon_ring_write(ring, 0); /* ref */
5845 	radeon_ring_write(ring, 0); /* mask */
5846 	radeon_ring_write(ring, 0x20); /* poll interval */
5847 
5848 	/* compute doesn't have PFP */
5849 	if (usepfp) {
5850 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5851 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5852 		radeon_ring_write(ring, 0x0);
5853 	}
5854 }
5855 
5856 /*
5857  * RLC
5858  * The RLC is a multi-purpose microengine that handles a
5859  * variety of functions, the most important of which is
5860  * the interrupt controller.
5861  */
5862 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5863 					  bool enable)
5864 {
5865 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5866 
5867 	if (enable)
5868 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5869 	else
5870 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5871 	WREG32(CP_INT_CNTL_RING0, tmp);
5872 }
5873 
5874 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5875 {
5876 	u32 tmp;
5877 
5878 	tmp = RREG32(RLC_LB_CNTL);
5879 	if (enable)
5880 		tmp |= LOAD_BALANCE_ENABLE;
5881 	else
5882 		tmp &= ~LOAD_BALANCE_ENABLE;
5883 	WREG32(RLC_LB_CNTL, tmp);
5884 }
5885 
5886 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5887 {
5888 	u32 i, j, k;
5889 	u32 mask;
5890 
5891 	mutex_lock(&rdev->grbm_idx_mutex);
5892 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5893 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5894 			cik_select_se_sh(rdev, i, j);
5895 			for (k = 0; k < rdev->usec_timeout; k++) {
5896 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5897 					break;
5898 				udelay(1);
5899 			}
5900 		}
5901 	}
5902 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5903 	mutex_unlock(&rdev->grbm_idx_mutex);
5904 
5905 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5906 	for (k = 0; k < rdev->usec_timeout; k++) {
5907 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5908 			break;
5909 		udelay(1);
5910 	}
5911 }
5912 
5913 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5914 {
5915 	u32 tmp;
5916 
5917 	tmp = RREG32(RLC_CNTL);
5918 	if (tmp != rlc)
5919 		WREG32(RLC_CNTL, rlc);
5920 }
5921 
5922 static u32 cik_halt_rlc(struct radeon_device *rdev)
5923 {
5924 	u32 data, orig;
5925 
5926 	orig = data = RREG32(RLC_CNTL);
5927 
5928 	if (data & RLC_ENABLE) {
5929 		u32 i;
5930 
5931 		data &= ~RLC_ENABLE;
5932 		WREG32(RLC_CNTL, data);
5933 
5934 		for (i = 0; i < rdev->usec_timeout; i++) {
5935 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5936 				break;
5937 			udelay(1);
5938 		}
5939 
5940 		cik_wait_for_rlc_serdes(rdev);
5941 	}
5942 
5943 	return orig;
5944 }
5945 
5946 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5947 {
5948 	u32 tmp, i, mask;
5949 
5950 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5951 	WREG32(RLC_GPR_REG2, tmp);
5952 
5953 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5954 	for (i = 0; i < rdev->usec_timeout; i++) {
5955 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5956 			break;
5957 		udelay(1);
5958 	}
5959 
5960 	for (i = 0; i < rdev->usec_timeout; i++) {
5961 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5962 			break;
5963 		udelay(1);
5964 	}
5965 }
5966 
5967 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5968 {
5969 	u32 tmp;
5970 
5971 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5972 	WREG32(RLC_GPR_REG2, tmp);
5973 }
5974 
5975 /**
5976  * cik_rlc_stop - stop the RLC ME
5977  *
5978  * @rdev: radeon_device pointer
5979  *
5980  * Halt the RLC ME (MicroEngine) (CIK).
5981  */
5982 static void cik_rlc_stop(struct radeon_device *rdev)
5983 {
5984 	WREG32(RLC_CNTL, 0);
5985 
5986 	cik_enable_gui_idle_interrupt(rdev, false);
5987 
5988 	cik_wait_for_rlc_serdes(rdev);
5989 }
5990 
5991 /**
5992  * cik_rlc_start - start the RLC ME
5993  *
5994  * @rdev: radeon_device pointer
5995  *
5996  * Unhalt the RLC ME (MicroEngine) (CIK).
5997  */
5998 static void cik_rlc_start(struct radeon_device *rdev)
5999 {
6000 	WREG32(RLC_CNTL, RLC_ENABLE);
6001 
6002 	cik_enable_gui_idle_interrupt(rdev, true);
6003 
6004 	udelay(50);
6005 }
6006 
6007 /**
6008  * cik_rlc_resume - setup the RLC hw
6009  *
6010  * @rdev: radeon_device pointer
6011  *
6012  * Initialize the RLC registers, load the ucode,
6013  * and start the RLC (CIK).
6014  * Returns 0 for success, -EINVAL if the ucode is not available.
6015  */
6016 static int cik_rlc_resume(struct radeon_device *rdev)
6017 {
6018 	u32 i, size, tmp;
6019 
6020 	if (!rdev->rlc_fw)
6021 		return -EINVAL;
6022 
6023 	cik_rlc_stop(rdev);
6024 
6025 	/* disable CG */
6026 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6027 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6028 
6029 	si_rlc_reset(rdev);
6030 
6031 	cik_init_pg(rdev);
6032 
6033 	cik_init_cg(rdev);
6034 
6035 	WREG32(RLC_LB_CNTR_INIT, 0);
6036 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6037 
6038 	mutex_lock(&rdev->grbm_idx_mutex);
6039 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6040 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6041 	WREG32(RLC_LB_PARAMS, 0x00600408);
6042 	WREG32(RLC_LB_CNTL, 0x80000004);
6043 	mutex_unlock(&rdev->grbm_idx_mutex);
6044 
6045 	WREG32(RLC_MC_CNTL, 0);
6046 	WREG32(RLC_UCODE_CNTL, 0);
6047 
6048 	if (rdev->new_fw) {
6049 		const struct rlc_firmware_header_v1_0 *hdr =
6050 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6051 		const __le32 *fw_data = (const __le32 *)
6052 			((const char *)rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6053 
6054 		radeon_ucode_print_rlc_hdr(&hdr->header);
6055 
6056 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6057 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6058 		for (i = 0; i < size; i++)
6059 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6060 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6061 	} else {
6062 		const __be32 *fw_data;
6063 
6064 		switch (rdev->family) {
6065 		case CHIP_BONAIRE:
6066 		case CHIP_HAWAII:
6067 		default:
6068 			size = BONAIRE_RLC_UCODE_SIZE;
6069 			break;
6070 		case CHIP_KAVERI:
6071 			size = KV_RLC_UCODE_SIZE;
6072 			break;
6073 		case CHIP_KABINI:
6074 			size = KB_RLC_UCODE_SIZE;
6075 			break;
6076 		case CHIP_MULLINS:
6077 			size = ML_RLC_UCODE_SIZE;
6078 			break;
6079 		}
6080 
6081 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6082 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6083 		for (i = 0; i < size; i++)
6084 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6085 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6086 	}
6087 
6088 	/* XXX - find out what chips support lbpw */
6089 	cik_enable_lbpw(rdev, false);
6090 
6091 	if (rdev->family == CHIP_BONAIRE)
6092 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6093 
6094 	cik_rlc_start(rdev);
6095 
6096 	return 0;
6097 }
6098 
6099 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6100 {
6101 	u32 data, orig, tmp, tmp2;
6102 
6103 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6104 
6105 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6106 		cik_enable_gui_idle_interrupt(rdev, true);
6107 
6108 		tmp = cik_halt_rlc(rdev);
6109 
6110 		mutex_lock(&rdev->grbm_idx_mutex);
6111 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6112 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6113 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6114 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6115 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6116 		mutex_unlock(&rdev->grbm_idx_mutex);
6117 
6118 		cik_update_rlc(rdev, tmp);
6119 
6120 		data |= CGCG_EN | CGLS_EN;
6121 	} else {
6122 		cik_enable_gui_idle_interrupt(rdev, false);
6123 
6124 		RREG32(CB_CGTT_SCLK_CTRL);
6125 		RREG32(CB_CGTT_SCLK_CTRL);
6126 		RREG32(CB_CGTT_SCLK_CTRL);
6127 		RREG32(CB_CGTT_SCLK_CTRL);
6128 
6129 		data &= ~(CGCG_EN | CGLS_EN);
6130 	}
6131 
6132 	if (orig != data)
6133 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6134 
6135 }
6136 
6137 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6138 {
6139 	u32 data, orig, tmp = 0;
6140 
6141 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6142 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6143 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6144 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6145 				data |= CP_MEM_LS_EN;
6146 				if (orig != data)
6147 					WREG32(CP_MEM_SLP_CNTL, data);
6148 			}
6149 		}
6150 
6151 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6152 		data |= 0x00000001;
6153 		data &= 0xfffffffd;
6154 		if (orig != data)
6155 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6156 
6157 		tmp = cik_halt_rlc(rdev);
6158 
6159 		mutex_lock(&rdev->grbm_idx_mutex);
6160 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6161 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6162 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6163 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6164 		WREG32(RLC_SERDES_WR_CTRL, data);
6165 		mutex_unlock(&rdev->grbm_idx_mutex);
6166 
6167 		cik_update_rlc(rdev, tmp);
6168 
6169 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6170 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6171 			data &= ~SM_MODE_MASK;
6172 			data |= SM_MODE(0x2);
6173 			data |= SM_MODE_ENABLE;
6174 			data &= ~CGTS_OVERRIDE;
6175 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6176 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6177 				data &= ~CGTS_LS_OVERRIDE;
6178 			data &= ~ON_MONITOR_ADD_MASK;
6179 			data |= ON_MONITOR_ADD_EN;
6180 			data |= ON_MONITOR_ADD(0x96);
6181 			if (orig != data)
6182 				WREG32(CGTS_SM_CTRL_REG, data);
6183 		}
6184 	} else {
6185 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6186 		data |= 0x00000003;
6187 		if (orig != data)
6188 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6189 
6190 		data = RREG32(RLC_MEM_SLP_CNTL);
6191 		if (data & RLC_MEM_LS_EN) {
6192 			data &= ~RLC_MEM_LS_EN;
6193 			WREG32(RLC_MEM_SLP_CNTL, data);
6194 		}
6195 
6196 		data = RREG32(CP_MEM_SLP_CNTL);
6197 		if (data & CP_MEM_LS_EN) {
6198 			data &= ~CP_MEM_LS_EN;
6199 			WREG32(CP_MEM_SLP_CNTL, data);
6200 		}
6201 
6202 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6203 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6204 		if (orig != data)
6205 			WREG32(CGTS_SM_CTRL_REG, data);
6206 
6207 		tmp = cik_halt_rlc(rdev);
6208 
6209 		mutex_lock(&rdev->grbm_idx_mutex);
6210 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6211 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6212 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6213 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6214 		WREG32(RLC_SERDES_WR_CTRL, data);
6215 		mutex_unlock(&rdev->grbm_idx_mutex);
6216 
6217 		cik_update_rlc(rdev, tmp);
6218 	}
6219 }
6220 
6221 static const u32 mc_cg_registers[] =
6222 {
6223 	MC_HUB_MISC_HUB_CG,
6224 	MC_HUB_MISC_SIP_CG,
6225 	MC_HUB_MISC_VM_CG,
6226 	MC_XPB_CLK_GAT,
6227 	ATC_MISC_CG,
6228 	MC_CITF_MISC_WR_CG,
6229 	MC_CITF_MISC_RD_CG,
6230 	MC_CITF_MISC_VM_CG,
6231 	VM_L2_CG,
6232 };
6233 
6234 static void cik_enable_mc_ls(struct radeon_device *rdev,
6235 			     bool enable)
6236 {
6237 	int i;
6238 	u32 orig, data;
6239 
6240 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6241 		orig = data = RREG32(mc_cg_registers[i]);
6242 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6243 			data |= MC_LS_ENABLE;
6244 		else
6245 			data &= ~MC_LS_ENABLE;
6246 		if (data != orig)
6247 			WREG32(mc_cg_registers[i], data);
6248 	}
6249 }
6250 
6251 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6252 			       bool enable)
6253 {
6254 	int i;
6255 	u32 orig, data;
6256 
6257 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6258 		orig = data = RREG32(mc_cg_registers[i]);
6259 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6260 			data |= MC_CG_ENABLE;
6261 		else
6262 			data &= ~MC_CG_ENABLE;
6263 		if (data != orig)
6264 			WREG32(mc_cg_registers[i], data);
6265 	}
6266 }
6267 
6268 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6269 				 bool enable)
6270 {
6271 	u32 orig, data;
6272 
6273 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6274 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6275 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6276 	} else {
6277 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6278 		data |= 0xff000000;
6279 		if (data != orig)
6280 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6281 
6282 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6283 		data |= 0xff000000;
6284 		if (data != orig)
6285 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6286 	}
6287 }
6288 
6289 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6290 				 bool enable)
6291 {
6292 	u32 orig, data;
6293 
6294 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6295 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6296 		data |= 0x100;
6297 		if (orig != data)
6298 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6299 
6300 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6301 		data |= 0x100;
6302 		if (orig != data)
6303 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6304 	} else {
6305 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6306 		data &= ~0x100;
6307 		if (orig != data)
6308 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6309 
6310 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6311 		data &= ~0x100;
6312 		if (orig != data)
6313 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6314 	}
6315 }
6316 
6317 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6318 				bool enable)
6319 {
6320 	u32 orig, data;
6321 
6322 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6323 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6324 		data = 0xfff;
6325 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6326 
6327 		orig = data = RREG32(UVD_CGC_CTRL);
6328 		data |= DCM;
6329 		if (orig != data)
6330 			WREG32(UVD_CGC_CTRL, data);
6331 	} else {
6332 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6333 		data &= ~0xfff;
6334 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6335 
6336 		orig = data = RREG32(UVD_CGC_CTRL);
6337 		data &= ~DCM;
6338 		if (orig != data)
6339 			WREG32(UVD_CGC_CTRL, data);
6340 	}
6341 }
6342 
6343 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6344 			       bool enable)
6345 {
6346 	u32 orig, data;
6347 
6348 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6349 
6350 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6351 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6352 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6353 	else
6354 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6355 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6356 
6357 	if (orig != data)
6358 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6359 }
6360 
6361 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6362 				bool enable)
6363 {
6364 	u32 orig, data;
6365 
6366 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6367 
6368 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6369 		data &= ~CLOCK_GATING_DIS;
6370 	else
6371 		data |= CLOCK_GATING_DIS;
6372 
6373 	if (orig != data)
6374 		WREG32(HDP_HOST_PATH_CNTL, data);
6375 }
6376 
6377 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6378 			      bool enable)
6379 {
6380 	u32 orig, data;
6381 
6382 	orig = data = RREG32(HDP_MEM_POWER_LS);
6383 
6384 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6385 		data |= HDP_LS_ENABLE;
6386 	else
6387 		data &= ~HDP_LS_ENABLE;
6388 
6389 	if (orig != data)
6390 		WREG32(HDP_MEM_POWER_LS, data);
6391 }
6392 
6393 void cik_update_cg(struct radeon_device *rdev,
6394 		   u32 block, bool enable)
6395 {
6396 
6397 	if (block & RADEON_CG_BLOCK_GFX) {
6398 		cik_enable_gui_idle_interrupt(rdev, false);
6399 		/* order matters! */
6400 		if (enable) {
6401 			cik_enable_mgcg(rdev, true);
6402 			cik_enable_cgcg(rdev, true);
6403 		} else {
6404 			cik_enable_cgcg(rdev, false);
6405 			cik_enable_mgcg(rdev, false);
6406 		}
6407 		cik_enable_gui_idle_interrupt(rdev, true);
6408 	}
6409 
6410 	if (block & RADEON_CG_BLOCK_MC) {
6411 		if (!(rdev->flags & RADEON_IS_IGP)) {
6412 			cik_enable_mc_mgcg(rdev, enable);
6413 			cik_enable_mc_ls(rdev, enable);
6414 		}
6415 	}
6416 
6417 	if (block & RADEON_CG_BLOCK_SDMA) {
6418 		cik_enable_sdma_mgcg(rdev, enable);
6419 		cik_enable_sdma_mgls(rdev, enable);
6420 	}
6421 
6422 	if (block & RADEON_CG_BLOCK_BIF) {
6423 		cik_enable_bif_mgls(rdev, enable);
6424 	}
6425 
6426 	if (block & RADEON_CG_BLOCK_UVD) {
6427 		if (rdev->has_uvd)
6428 			cik_enable_uvd_mgcg(rdev, enable);
6429 	}
6430 
6431 	if (block & RADEON_CG_BLOCK_HDP) {
6432 		cik_enable_hdp_mgcg(rdev, enable);
6433 		cik_enable_hdp_ls(rdev, enable);
6434 	}
6435 
6436 	if (block & RADEON_CG_BLOCK_VCE) {
6437 		vce_v2_0_enable_mgcg(rdev, enable);
6438 	}
6439 }
6440 
6441 static void cik_init_cg(struct radeon_device *rdev)
6442 {
6443 
6444 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6445 
6446 	if (rdev->has_uvd)
6447 		si_init_uvd_internal_cg(rdev);
6448 
6449 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6450 			     RADEON_CG_BLOCK_SDMA |
6451 			     RADEON_CG_BLOCK_BIF |
6452 			     RADEON_CG_BLOCK_UVD |
6453 			     RADEON_CG_BLOCK_HDP), true);
6454 }
6455 
6456 static void cik_fini_cg(struct radeon_device *rdev)
6457 {
6458 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6459 			     RADEON_CG_BLOCK_SDMA |
6460 			     RADEON_CG_BLOCK_BIF |
6461 			     RADEON_CG_BLOCK_UVD |
6462 			     RADEON_CG_BLOCK_HDP), false);
6463 
6464 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6465 }
6466 
6467 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6468 					  bool enable)
6469 {
6470 	u32 data, orig;
6471 
6472 	orig = data = RREG32(RLC_PG_CNTL);
6473 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6474 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6475 	else
6476 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6477 	if (orig != data)
6478 		WREG32(RLC_PG_CNTL, data);
6479 }
6480 
6481 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6482 					  bool enable)
6483 {
6484 	u32 data, orig;
6485 
6486 	orig = data = RREG32(RLC_PG_CNTL);
6487 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6488 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6489 	else
6490 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6491 	if (orig != data)
6492 		WREG32(RLC_PG_CNTL, data);
6493 }
6494 
6495 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6496 {
6497 	u32 data, orig;
6498 
6499 	orig = data = RREG32(RLC_PG_CNTL);
6500 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6501 		data &= ~DISABLE_CP_PG;
6502 	else
6503 		data |= DISABLE_CP_PG;
6504 	if (orig != data)
6505 		WREG32(RLC_PG_CNTL, data);
6506 }
6507 
6508 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6509 {
6510 	u32 data, orig;
6511 
6512 	orig = data = RREG32(RLC_PG_CNTL);
6513 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6514 		data &= ~DISABLE_GDS_PG;
6515 	else
6516 		data |= DISABLE_GDS_PG;
6517 	if (orig != data)
6518 		WREG32(RLC_PG_CNTL, data);
6519 }
6520 
6521 #define CP_ME_TABLE_SIZE    96
6522 #define CP_ME_TABLE_OFFSET  2048
6523 #define CP_MEC_TABLE_OFFSET 4096
6524 
6525 void cik_init_cp_pg_table(struct radeon_device *rdev)
6526 {
6527 	volatile u32 *dst_ptr;
6528 	int me, i, max_me = 4;
6529 	u32 bo_offset = 0;
6530 	u32 table_offset, table_size;
6531 
6532 	if (rdev->family == CHIP_KAVERI)
6533 		max_me = 5;
6534 
6535 	if (rdev->rlc.cp_table_ptr == NULL)
6536 		return;
6537 
6538 	/* write the cp table buffer */
6539 	dst_ptr = rdev->rlc.cp_table_ptr;
6540 	for (me = 0; me < max_me; me++) {
6541 		if (rdev->new_fw) {
6542 			const __le32 *fw_data;
6543 			const struct gfx_firmware_header_v1_0 *hdr;
6544 
6545 			if (me == 0) {
6546 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6547 				fw_data = (const __le32 *)
6548 					((const char *)rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6549 				table_offset = le32_to_cpu(hdr->jt_offset);
6550 				table_size = le32_to_cpu(hdr->jt_size);
6551 			} else if (me == 1) {
6552 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6553 				fw_data = (const __le32 *)
6554 					((const char *)rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6555 				table_offset = le32_to_cpu(hdr->jt_offset);
6556 				table_size = le32_to_cpu(hdr->jt_size);
6557 			} else if (me == 2) {
6558 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6559 				fw_data = (const __le32 *)
6560 					((const char *)rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6561 				table_offset = le32_to_cpu(hdr->jt_offset);
6562 				table_size = le32_to_cpu(hdr->jt_size);
6563 			} else if (me == 3) {
6564 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6565 				fw_data = (const __le32 *)
6566 					((const char *)rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6567 				table_offset = le32_to_cpu(hdr->jt_offset);
6568 				table_size = le32_to_cpu(hdr->jt_size);
6569 			} else {
6570 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6571 				fw_data = (const __le32 *)
6572 					((const char *)rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6573 				table_offset = le32_to_cpu(hdr->jt_offset);
6574 				table_size = le32_to_cpu(hdr->jt_size);
6575 			}
6576 
6577 			for (i = 0; i < table_size; i ++) {
6578 				dst_ptr[bo_offset + i] =
6579 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6580 			}
6581 			bo_offset += table_size;
6582 		} else {
6583 			const __be32 *fw_data;
6584 			table_size = CP_ME_TABLE_SIZE;
6585 
6586 			if (me == 0) {
6587 				fw_data = (const __be32 *)rdev->ce_fw->data;
6588 				table_offset = CP_ME_TABLE_OFFSET;
6589 			} else if (me == 1) {
6590 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6591 				table_offset = CP_ME_TABLE_OFFSET;
6592 			} else if (me == 2) {
6593 				fw_data = (const __be32 *)rdev->me_fw->data;
6594 				table_offset = CP_ME_TABLE_OFFSET;
6595 			} else {
6596 				fw_data = (const __be32 *)rdev->mec_fw->data;
6597 				table_offset = CP_MEC_TABLE_OFFSET;
6598 			}
6599 
6600 			for (i = 0; i < table_size; i ++) {
6601 				dst_ptr[bo_offset + i] =
6602 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6603 			}
6604 			bo_offset += table_size;
6605 		}
6606 	}
6607 }
6608 
6609 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6610 				bool enable)
6611 {
6612 	u32 data, orig;
6613 
6614 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6615 		orig = data = RREG32(RLC_PG_CNTL);
6616 		data |= GFX_PG_ENABLE;
6617 		if (orig != data)
6618 			WREG32(RLC_PG_CNTL, data);
6619 
6620 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6621 		data |= AUTO_PG_EN;
6622 		if (orig != data)
6623 			WREG32(RLC_AUTO_PG_CTRL, data);
6624 	} else {
6625 		orig = data = RREG32(RLC_PG_CNTL);
6626 		data &= ~GFX_PG_ENABLE;
6627 		if (orig != data)
6628 			WREG32(RLC_PG_CNTL, data);
6629 
6630 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6631 		data &= ~AUTO_PG_EN;
6632 		if (orig != data)
6633 			WREG32(RLC_AUTO_PG_CTRL, data);
6634 
6635 		data = RREG32(DB_RENDER_CONTROL);
6636 	}
6637 }
6638 
6639 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6640 {
6641 	u32 mask = 0, tmp, tmp1;
6642 	int i;
6643 
6644 	mutex_lock(&rdev->grbm_idx_mutex);
6645 	cik_select_se_sh(rdev, se, sh);
6646 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6647 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6648 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6649 	mutex_unlock(&rdev->grbm_idx_mutex);
6650 
6651 	tmp &= 0xffff0000;
6652 
6653 	tmp |= tmp1;
6654 	tmp >>= 16;
6655 
6656 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6657 		mask <<= 1;
6658 		mask |= 1;
6659 	}
6660 
6661 	return (~tmp) & mask;
6662 }
6663 
6664 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6665 {
6666 	u32 i, j, k, active_cu_number = 0;
6667 	u32 mask, counter, cu_bitmap;
6668 	u32 tmp = 0;
6669 
6670 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6671 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6672 			mask = 1;
6673 			cu_bitmap = 0;
6674 			counter = 0;
6675 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6676 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6677 					if (counter < 2)
6678 						cu_bitmap |= mask;
6679 					counter ++;
6680 				}
6681 				mask <<= 1;
6682 			}
6683 
6684 			active_cu_number += counter;
6685 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6686 		}
6687 	}
6688 
6689 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6690 
6691 	tmp = RREG32(RLC_MAX_PG_CU);
6692 	tmp &= ~MAX_PU_CU_MASK;
6693 	tmp |= MAX_PU_CU(active_cu_number);
6694 	WREG32(RLC_MAX_PG_CU, tmp);
6695 }
6696 
6697 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6698 				       bool enable)
6699 {
6700 	u32 data, orig;
6701 
6702 	orig = data = RREG32(RLC_PG_CNTL);
6703 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6704 		data |= STATIC_PER_CU_PG_ENABLE;
6705 	else
6706 		data &= ~STATIC_PER_CU_PG_ENABLE;
6707 	if (orig != data)
6708 		WREG32(RLC_PG_CNTL, data);
6709 }
6710 
6711 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6712 					bool enable)
6713 {
6714 	u32 data, orig;
6715 
6716 	orig = data = RREG32(RLC_PG_CNTL);
6717 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6718 		data |= DYN_PER_CU_PG_ENABLE;
6719 	else
6720 		data &= ~DYN_PER_CU_PG_ENABLE;
6721 	if (orig != data)
6722 		WREG32(RLC_PG_CNTL, data);
6723 }
6724 
6725 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6726 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6727 
6728 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6729 {
6730 	u32 data, orig;
6731 	u32 i;
6732 
6733 	if (rdev->rlc.cs_data) {
6734 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6735 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6736 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6737 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6738 	} else {
6739 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6740 		for (i = 0; i < 3; i++)
6741 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6742 	}
6743 	if (rdev->rlc.reg_list) {
6744 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6745 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6746 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6747 	}
6748 
6749 	orig = data = RREG32(RLC_PG_CNTL);
6750 	data |= GFX_PG_SRC;
6751 	if (orig != data)
6752 		WREG32(RLC_PG_CNTL, data);
6753 
6754 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6755 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6756 
6757 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6758 	data &= ~IDLE_POLL_COUNT_MASK;
6759 	data |= IDLE_POLL_COUNT(0x60);
6760 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6761 
6762 	data = 0x10101010;
6763 	WREG32(RLC_PG_DELAY, data);
6764 
6765 	data = RREG32(RLC_PG_DELAY_2);
6766 	data &= ~0xff;
6767 	data |= 0x3;
6768 	WREG32(RLC_PG_DELAY_2, data);
6769 
6770 	data = RREG32(RLC_AUTO_PG_CTRL);
6771 	data &= ~GRBM_REG_SGIT_MASK;
6772 	data |= GRBM_REG_SGIT(0x700);
6773 	WREG32(RLC_AUTO_PG_CTRL, data);
6774 
6775 }
6776 
6777 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6778 {
6779 	cik_enable_gfx_cgpg(rdev, enable);
6780 	cik_enable_gfx_static_mgpg(rdev, enable);
6781 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6782 }
6783 
6784 u32 cik_get_csb_size(struct radeon_device *rdev)
6785 {
6786 	u32 count = 0;
6787 	const struct cs_section_def *sect = NULL;
6788 	const struct cs_extent_def *ext = NULL;
6789 
6790 	if (rdev->rlc.cs_data == NULL)
6791 		return 0;
6792 
6793 	/* begin clear state */
6794 	count += 2;
6795 	/* context control state */
6796 	count += 3;
6797 
6798 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6799 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6800 			if (sect->id == SECT_CONTEXT)
6801 				count += 2 + ext->reg_count;
6802 			else
6803 				return 0;
6804 		}
6805 	}
6806 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6807 	count += 4;
6808 	/* end clear state */
6809 	count += 2;
6810 	/* clear state */
6811 	count += 2;
6812 
6813 	return count;
6814 }
6815 
6816 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6817 {
6818 	u32 count = 0, i;
6819 	const struct cs_section_def *sect = NULL;
6820 	const struct cs_extent_def *ext = NULL;
6821 
6822 	if (rdev->rlc.cs_data == NULL)
6823 		return;
6824 	if (buffer == NULL)
6825 		return;
6826 
6827 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6828 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6829 
6830 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6831 	buffer[count++] = cpu_to_le32(0x80000000);
6832 	buffer[count++] = cpu_to_le32(0x80000000);
6833 
6834 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6835 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6836 			if (sect->id == SECT_CONTEXT) {
6837 				buffer[count++] =
6838 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6839 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6840 				for (i = 0; i < ext->reg_count; i++)
6841 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6842 			} else {
6843 				return;
6844 			}
6845 		}
6846 	}
6847 
6848 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6849 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6850 	switch (rdev->family) {
6851 	case CHIP_BONAIRE:
6852 		buffer[count++] = cpu_to_le32(0x16000012);
6853 		buffer[count++] = cpu_to_le32(0x00000000);
6854 		break;
6855 	case CHIP_KAVERI:
6856 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6857 		buffer[count++] = cpu_to_le32(0x00000000);
6858 		break;
6859 	case CHIP_KABINI:
6860 	case CHIP_MULLINS:
6861 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6862 		buffer[count++] = cpu_to_le32(0x00000000);
6863 		break;
6864 	case CHIP_HAWAII:
6865 		buffer[count++] = cpu_to_le32(0x3a00161a);
6866 		buffer[count++] = cpu_to_le32(0x0000002e);
6867 		break;
6868 	default:
6869 		buffer[count++] = cpu_to_le32(0x00000000);
6870 		buffer[count++] = cpu_to_le32(0x00000000);
6871 		break;
6872 	}
6873 
6874 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6875 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6876 
6877 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6878 	buffer[count++] = cpu_to_le32(0);
6879 }
6880 
6881 static void cik_init_pg(struct radeon_device *rdev)
6882 {
6883 	if (rdev->pg_flags) {
6884 		cik_enable_sck_slowdown_on_pu(rdev, true);
6885 		cik_enable_sck_slowdown_on_pd(rdev, true);
6886 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6887 			cik_init_gfx_cgpg(rdev);
6888 			cik_enable_cp_pg(rdev, true);
6889 			cik_enable_gds_pg(rdev, true);
6890 		}
6891 		cik_init_ao_cu_mask(rdev);
6892 		cik_update_gfx_pg(rdev, true);
6893 	}
6894 }
6895 
6896 static void cik_fini_pg(struct radeon_device *rdev)
6897 {
6898 	if (rdev->pg_flags) {
6899 		cik_update_gfx_pg(rdev, false);
6900 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6901 			cik_enable_cp_pg(rdev, false);
6902 			cik_enable_gds_pg(rdev, false);
6903 		}
6904 	}
6905 }
6906 
6907 /*
6908  * Interrupts
6909  * Starting with r6xx, interrupts are handled via a ring buffer.
6910  * Ring buffers are areas of GPU accessible memory that the GPU
6911  * writes interrupt vectors into and the host reads vectors out of.
6912  * There is a rptr (read pointer) that determines where the
6913  * host is currently reading, and a wptr (write pointer)
6914  * which determines where the GPU has written.  When the
6915  * pointers are equal, the ring is idle.  When the GPU
6916  * writes vectors to the ring buffer, it increments the
6917  * wptr.  When there is an interrupt, the host then starts
6918  * fetching commands and processing them until the pointers are
6919  * equal again at which point it updates the rptr.
6920  */
6921 
6922 /**
6923  * cik_enable_interrupts - Enable the interrupt ring buffer
6924  *
6925  * @rdev: radeon_device pointer
6926  *
6927  * Enable the interrupt ring buffer (CIK).
6928  */
6929 static void cik_enable_interrupts(struct radeon_device *rdev)
6930 {
6931 	u32 ih_cntl = RREG32(IH_CNTL);
6932 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6933 
6934 	ih_cntl |= ENABLE_INTR;
6935 	ih_rb_cntl |= IH_RB_ENABLE;
6936 	WREG32(IH_CNTL, ih_cntl);
6937 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6938 	rdev->ih.enabled = true;
6939 }
6940 
6941 /**
6942  * cik_disable_interrupts - Disable the interrupt ring buffer
6943  *
6944  * @rdev: radeon_device pointer
6945  *
6946  * Disable the interrupt ring buffer (CIK).
6947  */
6948 static void cik_disable_interrupts(struct radeon_device *rdev)
6949 {
6950 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6951 	u32 ih_cntl = RREG32(IH_CNTL);
6952 
6953 	ih_rb_cntl &= ~IH_RB_ENABLE;
6954 	ih_cntl &= ~ENABLE_INTR;
6955 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6956 	WREG32(IH_CNTL, ih_cntl);
6957 	/* set rptr, wptr to 0 */
6958 	WREG32(IH_RB_RPTR, 0);
6959 	WREG32(IH_RB_WPTR, 0);
6960 	rdev->ih.enabled = false;
6961 	rdev->ih.rptr = 0;
6962 }
6963 
6964 /**
6965  * cik_disable_interrupt_state - Disable all interrupt sources
6966  *
6967  * @rdev: radeon_device pointer
6968  *
6969  * Clear all interrupt enable bits used by the driver (CIK).
6970  */
6971 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6972 {
6973 	u32 tmp;
6974 
6975 	/* gfx ring */
6976 	tmp = RREG32(CP_INT_CNTL_RING0) &
6977 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6978 	WREG32(CP_INT_CNTL_RING0, tmp);
6979 	/* sdma */
6980 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6981 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6982 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6983 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6984 	/* compute queues */
6985 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6986 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6987 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6988 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6989 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6990 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6991 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6992 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6993 	/* grbm */
6994 	WREG32(GRBM_INT_CNTL, 0);
6995 	/* SRBM */
6996 	WREG32(SRBM_INT_CNTL, 0);
6997 	/* vline/vblank, etc. */
6998 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6999 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7000 	if (rdev->num_crtc >= 4) {
7001 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7002 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7003 	}
7004 	if (rdev->num_crtc >= 6) {
7005 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7006 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7007 	}
7008 	/* pflip */
7009 	if (rdev->num_crtc >= 2) {
7010 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7011 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7012 	}
7013 	if (rdev->num_crtc >= 4) {
7014 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7015 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7016 	}
7017 	if (rdev->num_crtc >= 6) {
7018 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7019 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7020 	}
7021 
7022 	/* dac hotplug */
7023 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7024 
7025 	/* digital hotplug */
7026 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7027 	WREG32(DC_HPD1_INT_CONTROL, tmp);
7028 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7029 	WREG32(DC_HPD2_INT_CONTROL, tmp);
7030 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7031 	WREG32(DC_HPD3_INT_CONTROL, tmp);
7032 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7033 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7034 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7035 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7036 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7037 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7038 
7039 }
7040 
7041 /**
7042  * cik_irq_init - init and enable the interrupt ring
7043  *
7044  * @rdev: radeon_device pointer
7045  *
7046  * Allocate a ring buffer for the interrupt controller,
7047  * enable the RLC, disable interrupts, enable the IH
7048  * ring buffer and enable it (CIK).
7049  * Called at device load and reume.
7050  * Returns 0 for success, errors for failure.
7051  */
7052 static int cik_irq_init(struct radeon_device *rdev)
7053 {
7054 	int ret = 0;
7055 	int rb_bufsz;
7056 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7057 
7058 	/* allocate ring */
7059 	ret = r600_ih_ring_alloc(rdev);
7060 	if (ret)
7061 		return ret;
7062 
7063 	/* disable irqs */
7064 	cik_disable_interrupts(rdev);
7065 
7066 	/* init rlc */
7067 	ret = cik_rlc_resume(rdev);
7068 	if (ret) {
7069 		r600_ih_ring_fini(rdev);
7070 		return ret;
7071 	}
7072 
7073 	/* setup interrupt control */
7074 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7075 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7076 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7077 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7078 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7079 	 */
7080 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7081 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7082 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7083 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7084 
7085 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7086 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7087 
7088 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7089 		      IH_WPTR_OVERFLOW_CLEAR |
7090 		      (rb_bufsz << 1));
7091 
7092 	if (rdev->wb.enabled)
7093 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7094 
7095 	/* set the writeback address whether it's enabled or not */
7096 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7097 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7098 
7099 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7100 
7101 	/* set rptr, wptr to 0 */
7102 	WREG32(IH_RB_RPTR, 0);
7103 	WREG32(IH_RB_WPTR, 0);
7104 
7105 	/* Default settings for IH_CNTL (disabled at first) */
7106 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7107 	/* RPTR_REARM only works if msi's are enabled */
7108 	if (rdev->msi_enabled)
7109 		ih_cntl |= RPTR_REARM;
7110 	WREG32(IH_CNTL, ih_cntl);
7111 
7112 	/* force the active interrupt state to all disabled */
7113 	cik_disable_interrupt_state(rdev);
7114 
7115 	pci_enable_busmaster(rdev->pdev->dev.bsddev);
7116 
7117 	/* enable irqs */
7118 	cik_enable_interrupts(rdev);
7119 
7120 	return ret;
7121 }
7122 
7123 /**
7124  * cik_irq_set - enable/disable interrupt sources
7125  *
7126  * @rdev: radeon_device pointer
7127  *
7128  * Enable interrupt sources on the GPU (vblanks, hpd,
7129  * etc.) (CIK).
7130  * Returns 0 for success, errors for failure.
7131  */
7132 int cik_irq_set(struct radeon_device *rdev)
7133 {
7134 	u32 cp_int_cntl;
7135 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7136 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7137 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7138 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7139 	u32 grbm_int_cntl = 0;
7140 	u32 dma_cntl, dma_cntl1;
7141 
7142 	if (!rdev->irq.installed) {
7143 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7144 		return -EINVAL;
7145 	}
7146 	/* don't enable anything if the ih is disabled */
7147 	if (!rdev->ih.enabled) {
7148 		cik_disable_interrupts(rdev);
7149 		/* force the active interrupt state to all disabled */
7150 		cik_disable_interrupt_state(rdev);
7151 		return 0;
7152 	}
7153 
7154 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7155 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7156 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7157 
7158 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7159 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7160 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7161 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7162 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7163 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7164 
7165 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7166 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7167 
7168 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7169 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7170 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7171 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7172 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7173 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7174 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7175 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7176 
7177 	/* enable CP interrupts on all rings */
7178 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7179 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7180 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7181 	}
7182 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7183 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7184 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7185 		if (ring->me == 1) {
7186 			switch (ring->pipe) {
7187 			case 0:
7188 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7189 				break;
7190 			case 1:
7191 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7192 				break;
7193 			case 2:
7194 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7195 				break;
7196 			case 3:
7197 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7198 				break;
7199 			default:
7200 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7201 				break;
7202 			}
7203 		} else if (ring->me == 2) {
7204 			switch (ring->pipe) {
7205 			case 0:
7206 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7207 				break;
7208 			case 1:
7209 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7210 				break;
7211 			case 2:
7212 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7213 				break;
7214 			case 3:
7215 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7216 				break;
7217 			default:
7218 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7219 				break;
7220 			}
7221 		} else {
7222 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7223 		}
7224 	}
7225 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7226 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7227 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7228 		if (ring->me == 1) {
7229 			switch (ring->pipe) {
7230 			case 0:
7231 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7232 				break;
7233 			case 1:
7234 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7235 				break;
7236 			case 2:
7237 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7238 				break;
7239 			case 3:
7240 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7241 				break;
7242 			default:
7243 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7244 				break;
7245 			}
7246 		} else if (ring->me == 2) {
7247 			switch (ring->pipe) {
7248 			case 0:
7249 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7250 				break;
7251 			case 1:
7252 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7253 				break;
7254 			case 2:
7255 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7256 				break;
7257 			case 3:
7258 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7259 				break;
7260 			default:
7261 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7262 				break;
7263 			}
7264 		} else {
7265 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7266 		}
7267 	}
7268 
7269 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7270 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7271 		dma_cntl |= TRAP_ENABLE;
7272 	}
7273 
7274 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7275 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7276 		dma_cntl1 |= TRAP_ENABLE;
7277 	}
7278 
7279 	if (rdev->irq.crtc_vblank_int[0] ||
7280 	    atomic_read(&rdev->irq.pflip[0])) {
7281 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7282 		crtc1 |= VBLANK_INTERRUPT_MASK;
7283 	}
7284 	if (rdev->irq.crtc_vblank_int[1] ||
7285 	    atomic_read(&rdev->irq.pflip[1])) {
7286 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7287 		crtc2 |= VBLANK_INTERRUPT_MASK;
7288 	}
7289 	if (rdev->irq.crtc_vblank_int[2] ||
7290 	    atomic_read(&rdev->irq.pflip[2])) {
7291 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7292 		crtc3 |= VBLANK_INTERRUPT_MASK;
7293 	}
7294 	if (rdev->irq.crtc_vblank_int[3] ||
7295 	    atomic_read(&rdev->irq.pflip[3])) {
7296 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7297 		crtc4 |= VBLANK_INTERRUPT_MASK;
7298 	}
7299 	if (rdev->irq.crtc_vblank_int[4] ||
7300 	    atomic_read(&rdev->irq.pflip[4])) {
7301 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7302 		crtc5 |= VBLANK_INTERRUPT_MASK;
7303 	}
7304 	if (rdev->irq.crtc_vblank_int[5] ||
7305 	    atomic_read(&rdev->irq.pflip[5])) {
7306 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7307 		crtc6 |= VBLANK_INTERRUPT_MASK;
7308 	}
7309  	if (rdev->irq.hpd[0]) {
7310  		DRM_DEBUG("cik_irq_set: hpd 1\n");
7311 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7312  	}
7313  	if (rdev->irq.hpd[1]) {
7314  		DRM_DEBUG("cik_irq_set: hpd 2\n");
7315 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7316  	}
7317  	if (rdev->irq.hpd[2]) {
7318  		DRM_DEBUG("cik_irq_set: hpd 3\n");
7319 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7320  	}
7321  	if (rdev->irq.hpd[3]) {
7322  		DRM_DEBUG("cik_irq_set: hpd 4\n");
7323 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7324  	}
7325  	if (rdev->irq.hpd[4]) {
7326  		DRM_DEBUG("cik_irq_set: hpd 5\n");
7327 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7328  	}
7329  	if (rdev->irq.hpd[5]) {
7330  		DRM_DEBUG("cik_irq_set: hpd 6\n");
7331 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7332  	}
7333 
7334 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7335 
7336 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7337 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7338 
7339 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7340 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7341 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7342 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7343 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7344 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7345 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7346 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7347 
7348 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7349 
7350 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7351 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7352 	if (rdev->num_crtc >= 4) {
7353 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7354 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7355 	}
7356 	if (rdev->num_crtc >= 6) {
7357 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7358 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7359 	}
7360 
7361 	if (rdev->num_crtc >= 2) {
7362 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7363 		       GRPH_PFLIP_INT_MASK);
7364 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7365 		       GRPH_PFLIP_INT_MASK);
7366 	}
7367 	if (rdev->num_crtc >= 4) {
7368 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7369 		       GRPH_PFLIP_INT_MASK);
7370 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7371 		       GRPH_PFLIP_INT_MASK);
7372 	}
7373 	if (rdev->num_crtc >= 6) {
7374 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7375 		       GRPH_PFLIP_INT_MASK);
7376 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7377 		       GRPH_PFLIP_INT_MASK);
7378 	}
7379 
7380 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7381 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7382 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7383 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7384 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7385 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7386 
7387 	/* posting read */
7388 	RREG32(SRBM_STATUS);
7389 
7390 	return 0;
7391 }
7392 
7393 /**
7394  * cik_irq_ack - ack interrupt sources
7395  *
7396  * @rdev: radeon_device pointer
7397  *
7398  * Ack interrupt sources on the GPU (vblanks, hpd,
7399  * etc.) (CIK).  Certain interrupts sources are sw
7400  * generated and do not require an explicit ack.
7401  */
7402 static inline void cik_irq_ack(struct radeon_device *rdev)
7403 {
7404 	u32 tmp;
7405 
7406 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7407 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7408 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7409 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7410 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7411 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7412 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7413 
7414 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7415 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7416 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7417 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7418 	if (rdev->num_crtc >= 4) {
7419 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7420 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7421 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7422 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7423 	}
7424 	if (rdev->num_crtc >= 6) {
7425 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7426 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7427 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7428 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7429 	}
7430 
7431 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7432 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7433 		       GRPH_PFLIP_INT_CLEAR);
7434 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7435 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7436 		       GRPH_PFLIP_INT_CLEAR);
7437 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7438 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7439 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7440 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7441 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7442 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7443 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7444 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7445 
7446 	if (rdev->num_crtc >= 4) {
7447 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7448 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7449 			       GRPH_PFLIP_INT_CLEAR);
7450 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7451 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7452 			       GRPH_PFLIP_INT_CLEAR);
7453 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7454 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7455 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7456 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7457 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7458 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7459 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7460 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7461 	}
7462 
7463 	if (rdev->num_crtc >= 6) {
7464 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7465 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7466 			       GRPH_PFLIP_INT_CLEAR);
7467 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7468 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7469 			       GRPH_PFLIP_INT_CLEAR);
7470 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7471 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7472 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7473 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7474 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7475 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7476 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7477 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7478 	}
7479 
7480 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7481 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7482 		tmp |= DC_HPDx_INT_ACK;
7483 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7484 	}
7485 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7486 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7487 		tmp |= DC_HPDx_INT_ACK;
7488 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7489 	}
7490 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7491 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7492 		tmp |= DC_HPDx_INT_ACK;
7493 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7494 	}
7495 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7496 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7497 		tmp |= DC_HPDx_INT_ACK;
7498 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7499 	}
7500 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7501 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7502 		tmp |= DC_HPDx_INT_ACK;
7503 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7504 	}
7505 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7506 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7507 		tmp |= DC_HPDx_INT_ACK;
7508 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7509 	}
7510 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7511 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7512 		tmp |= DC_HPDx_RX_INT_ACK;
7513 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7514 	}
7515 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7516 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7517 		tmp |= DC_HPDx_RX_INT_ACK;
7518 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7519 	}
7520 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7521 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7522 		tmp |= DC_HPDx_RX_INT_ACK;
7523 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7524 	}
7525 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7526 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7527 		tmp |= DC_HPDx_RX_INT_ACK;
7528 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7529 	}
7530 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7531 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7532 		tmp |= DC_HPDx_RX_INT_ACK;
7533 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7534 	}
7535 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7536 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7537 		tmp |= DC_HPDx_RX_INT_ACK;
7538 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7539 	}
7540 }
7541 
7542 /**
7543  * cik_irq_disable - disable interrupts
7544  *
7545  * @rdev: radeon_device pointer
7546  *
7547  * Disable interrupts on the hw (CIK).
7548  */
7549 static void cik_irq_disable(struct radeon_device *rdev)
7550 {
7551 	cik_disable_interrupts(rdev);
7552 	/* Wait and acknowledge irq */
7553 	mdelay(1);
7554 	cik_irq_ack(rdev);
7555 	cik_disable_interrupt_state(rdev);
7556 }
7557 
7558 /**
7559  * cik_irq_disable - disable interrupts for suspend
7560  *
7561  * @rdev: radeon_device pointer
7562  *
7563  * Disable interrupts and stop the RLC (CIK).
7564  * Used for suspend.
7565  */
7566 static void cik_irq_suspend(struct radeon_device *rdev)
7567 {
7568 	cik_irq_disable(rdev);
7569 	cik_rlc_stop(rdev);
7570 }
7571 
7572 /**
7573  * cik_irq_fini - tear down interrupt support
7574  *
7575  * @rdev: radeon_device pointer
7576  *
7577  * Disable interrupts on the hw and free the IH ring
7578  * buffer (CIK).
7579  * Used for driver unload.
7580  */
7581 static void cik_irq_fini(struct radeon_device *rdev)
7582 {
7583 	cik_irq_suspend(rdev);
7584 	r600_ih_ring_fini(rdev);
7585 }
7586 
7587 /**
7588  * cik_get_ih_wptr - get the IH ring buffer wptr
7589  *
7590  * @rdev: radeon_device pointer
7591  *
7592  * Get the IH ring buffer wptr from either the register
7593  * or the writeback memory buffer (CIK).  Also check for
7594  * ring buffer overflow and deal with it.
7595  * Used by cik_irq_process().
7596  * Returns the value of the wptr.
7597  */
7598 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7599 {
7600 	u32 wptr, tmp;
7601 
7602 	if (rdev->wb.enabled)
7603 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7604 	else
7605 		wptr = RREG32(IH_RB_WPTR);
7606 
7607 	if (wptr & RB_OVERFLOW) {
7608 		wptr &= ~RB_OVERFLOW;
7609 		/* When a ring buffer overflow happen start parsing interrupt
7610 		 * from the last not overwritten vector (wptr + 16). Hopefully
7611 		 * this should allow us to catchup.
7612 		 */
7613 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7614 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7615 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7616 		tmp = RREG32(IH_RB_CNTL);
7617 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7618 		WREG32(IH_RB_CNTL, tmp);
7619 	}
7620 	return (wptr & rdev->ih.ptr_mask);
7621 }
7622 
7623 /*        CIK IV Ring
7624  * Each IV ring entry is 128 bits:
7625  * [7:0]    - interrupt source id
7626  * [31:8]   - reserved
7627  * [59:32]  - interrupt source data
7628  * [63:60]  - reserved
7629  * [71:64]  - RINGID
7630  *            CP:
7631  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7632  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7633  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7634  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7635  *            PIPE_ID - ME0 0=3D
7636  *                    - ME1&2 compute dispatcher (4 pipes each)
7637  *            SDMA:
7638  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7639  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7640  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7641  * [79:72]  - VMID
7642  * [95:80]  - PASID
7643  * [127:96] - reserved
7644  */
7645 /**
7646  * cik_irq_process - interrupt handler
7647  *
7648  * @rdev: radeon_device pointer
7649  *
7650  * Interrupt hander (CIK).  Walk the IH ring,
7651  * ack interrupts and schedule work to handle
7652  * interrupt events.
7653  * Returns irq process return code.
7654  */
7655 irqreturn_t cik_irq_process(struct radeon_device *rdev)
7656 {
7657 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7658 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7659 	u32 wptr;
7660 	u32 rptr;
7661 	u32 src_id, src_data, ring_id;
7662 	u8 me_id, pipe_id, queue_id;
7663 	u32 ring_index;
7664 	bool queue_hotplug = false;
7665 	bool queue_dp = false;
7666 	bool queue_reset = false;
7667 	u32 addr, status, mc_client;
7668 	bool queue_thermal = false;
7669 
7670 	if (!rdev->ih.enabled || rdev->shutdown)
7671 		return IRQ_NONE;
7672 
7673 	wptr = cik_get_ih_wptr(rdev);
7674 
7675 restart_ih:
7676 	/* is somebody else already processing irqs? */
7677 	if (atomic_xchg(&rdev->ih.lock, 1))
7678 		return IRQ_NONE;
7679 
7680 	rptr = rdev->ih.rptr;
7681 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7682 
7683 	/* Order reading of wptr vs. reading of IH ring data */
7684 	rmb();
7685 
7686 	/* display interrupts */
7687 	cik_irq_ack(rdev);
7688 
7689 	while (rptr != wptr) {
7690 		/* wptr/rptr are in bytes! */
7691 		ring_index = rptr / 4;
7692 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7693 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7694 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7695 
7696 		switch (src_id) {
7697 		case 1: /* D1 vblank/vline */
7698 			switch (src_data) {
7699 			case 0: /* D1 vblank */
7700 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7701 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7702 
7703 				if (rdev->irq.crtc_vblank_int[0]) {
7704 					drm_handle_vblank(rdev->ddev, 0);
7705 					rdev->pm.vblank_sync = true;
7706 					wake_up(&rdev->irq.vblank_queue);
7707  				}
7708 				if (atomic_read(&rdev->irq.pflip[0]))
7709 					radeon_crtc_handle_vblank(rdev, 0);
7710 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7711 				DRM_DEBUG("IH: D1 vblank\n");
7712 
7713 				break;
7714 			case 1: /* D1 vline */
7715 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7716 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7717 
7718 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7719 				DRM_DEBUG("IH: D1 vline\n");
7720 
7721 				break;
7722 			default:
7723 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7724 				break;
7725 			}
7726 			break;
7727 		case 2: /* D2 vblank/vline */
7728 			switch (src_data) {
7729 			case 0: /* D2 vblank */
7730 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7731 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7732 
7733 				if (rdev->irq.crtc_vblank_int[1]) {
7734 					drm_handle_vblank(rdev->ddev, 1);
7735 					rdev->pm.vblank_sync = true;
7736 					wake_up(&rdev->irq.vblank_queue);
7737  				}
7738 				if (atomic_read(&rdev->irq.pflip[1]))
7739 					radeon_crtc_handle_vblank(rdev, 1);
7740 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7741 				DRM_DEBUG("IH: D2 vblank\n");
7742 
7743 				break;
7744 			case 1: /* D2 vline */
7745 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7746 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7747 
7748 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7749 				DRM_DEBUG("IH: D2 vline\n");
7750 
7751 				break;
7752 			default:
7753 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7754 				break;
7755 			}
7756 			break;
7757 		case 3: /* D3 vblank/vline */
7758 			switch (src_data) {
7759 			case 0: /* D3 vblank */
7760 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7761 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7762 
7763 				if (rdev->irq.crtc_vblank_int[2]) {
7764 					drm_handle_vblank(rdev->ddev, 2);
7765 					rdev->pm.vblank_sync = true;
7766 					wake_up(&rdev->irq.vblank_queue);
7767  				}
7768 				if (atomic_read(&rdev->irq.pflip[2]))
7769 					radeon_crtc_handle_vblank(rdev, 2);
7770 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7771 				DRM_DEBUG("IH: D3 vblank\n");
7772 
7773 				break;
7774 			case 1: /* D3 vline */
7775 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7776 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7777 
7778 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7779 				DRM_DEBUG("IH: D3 vline\n");
7780 
7781 				break;
7782 			default:
7783 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7784 				break;
7785 			}
7786 			break;
7787 		case 4: /* D4 vblank/vline */
7788 			switch (src_data) {
7789 			case 0: /* D4 vblank */
7790 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7791 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7792 
7793 				if (rdev->irq.crtc_vblank_int[3]) {
7794 					drm_handle_vblank(rdev->ddev, 3);
7795 					rdev->pm.vblank_sync = true;
7796 					wake_up(&rdev->irq.vblank_queue);
7797  				}
7798 				if (atomic_read(&rdev->irq.pflip[3]))
7799 					radeon_crtc_handle_vblank(rdev, 3);
7800 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7801 				DRM_DEBUG("IH: D4 vblank\n");
7802 
7803 				break;
7804 			case 1: /* D4 vline */
7805 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7806 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7807 
7808 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7809 				DRM_DEBUG("IH: D4 vline\n");
7810 
7811 				break;
7812 			default:
7813 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7814 				break;
7815 			}
7816 			break;
7817 		case 5: /* D5 vblank/vline */
7818 			switch (src_data) {
7819 			case 0: /* D5 vblank */
7820 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7821 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7822 
7823 				if (rdev->irq.crtc_vblank_int[4]) {
7824 					drm_handle_vblank(rdev->ddev, 4);
7825 					rdev->pm.vblank_sync = true;
7826 					wake_up(&rdev->irq.vblank_queue);
7827  				}
7828 				if (atomic_read(&rdev->irq.pflip[4]))
7829 					radeon_crtc_handle_vblank(rdev, 4);
7830 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7831 				DRM_DEBUG("IH: D5 vblank\n");
7832 
7833 				break;
7834 			case 1: /* D5 vline */
7835 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7836 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7837 
7838 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7839 				DRM_DEBUG("IH: D5 vline\n");
7840 
7841 				break;
7842 			default:
7843 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7844 				break;
7845 			}
7846 			break;
7847 		case 6: /* D6 vblank/vline */
7848 			switch (src_data) {
7849 			case 0: /* D6 vblank */
7850 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7851 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7852 
7853 				if (rdev->irq.crtc_vblank_int[5]) {
7854 					drm_handle_vblank(rdev->ddev, 5);
7855 					rdev->pm.vblank_sync = true;
7856 					wake_up(&rdev->irq.vblank_queue);
7857  				}
7858 				if (atomic_read(&rdev->irq.pflip[5]))
7859 					radeon_crtc_handle_vblank(rdev, 5);
7860 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7861 				DRM_DEBUG("IH: D6 vblank\n");
7862 
7863 				break;
7864 			case 1: /* D6 vline */
7865 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7866 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7867 
7868 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7869 				DRM_DEBUG("IH: D6 vline\n");
7870 
7871 				break;
7872 			default:
7873 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7874 				break;
7875 			}
7876 			break;
7877 		case 8: /* D1 page flip */
7878 		case 10: /* D2 page flip */
7879 		case 12: /* D3 page flip */
7880 		case 14: /* D4 page flip */
7881 		case 16: /* D5 page flip */
7882 		case 18: /* D6 page flip */
7883 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7884 			if (radeon_use_pflipirq > 0)
7885 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7886 			break;
7887 		case 42: /* HPD hotplug */
7888 			switch (src_data) {
7889 			case 0:
7890 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7891 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7892 
7893 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7894 				queue_hotplug = true;
7895 				DRM_DEBUG("IH: HPD1\n");
7896 
7897 				break;
7898 			case 1:
7899 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7900 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7901 
7902 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7903 				queue_hotplug = true;
7904 				DRM_DEBUG("IH: HPD2\n");
7905 
7906 				break;
7907 			case 2:
7908 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7909 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7910 
7911 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7912 				queue_hotplug = true;
7913 				DRM_DEBUG("IH: HPD3\n");
7914 
7915 				break;
7916 			case 3:
7917 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7918 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7919 
7920 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7921 				queue_hotplug = true;
7922 				DRM_DEBUG("IH: HPD4\n");
7923 
7924 				break;
7925 			case 4:
7926 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7927 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7928 
7929 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7930 				queue_hotplug = true;
7931 				DRM_DEBUG("IH: HPD5\n");
7932 
7933 				break;
7934 			case 5:
7935 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7936 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7937 
7938 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7939 				queue_hotplug = true;
7940 				DRM_DEBUG("IH: HPD6\n");
7941 
7942 				break;
7943 			case 6:
7944 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7945 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7946 
7947 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7948 				queue_dp = true;
7949 				DRM_DEBUG("IH: HPD_RX 1\n");
7950 
7951 				break;
7952 			case 7:
7953 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7954 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7955 
7956 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7957 				queue_dp = true;
7958 				DRM_DEBUG("IH: HPD_RX 2\n");
7959 
7960 				break;
7961 			case 8:
7962 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7963 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7964 
7965 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7966 				queue_dp = true;
7967 				DRM_DEBUG("IH: HPD_RX 3\n");
7968 
7969 				break;
7970 			case 9:
7971 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7972 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7973 
7974 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7975 				queue_dp = true;
7976 				DRM_DEBUG("IH: HPD_RX 4\n");
7977 
7978 				break;
7979 			case 10:
7980 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7981 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7982 
7983 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7984 				queue_dp = true;
7985 				DRM_DEBUG("IH: HPD_RX 5\n");
7986 
7987 				break;
7988 			case 11:
7989 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7990 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7991 
7992 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7993 				queue_dp = true;
7994 				DRM_DEBUG("IH: HPD_RX 6\n");
7995 
7996 				break;
7997 			default:
7998 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7999 				break;
8000 			}
8001 			break;
8002 		case 96:
8003 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
8004 			WREG32(SRBM_INT_ACK, 0x1);
8005 			break;
8006 		case 124: /* UVD */
8007 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8008 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8009 			break;
8010 		case 146:
8011 		case 147:
8012 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8013 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8014 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8015 			/* reset addr and status */
8016 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8017 			if (addr == 0x0 && status == 0x0)
8018 				break;
8019 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8020 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8021 				addr);
8022 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8023 				status);
8024 			cik_vm_decode_fault(rdev, status, addr, mc_client);
8025 			break;
8026 		case 167: /* VCE */
8027 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8028 			switch (src_data) {
8029 			case 0:
8030 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8031 				break;
8032 			case 1:
8033 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8034 				break;
8035 			default:
8036 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8037 				break;
8038 			}
8039 			break;
8040 		case 176: /* GFX RB CP_INT */
8041 		case 177: /* GFX IB CP_INT */
8042 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8043 			break;
8044 		case 181: /* CP EOP event */
8045 			DRM_DEBUG("IH: CP EOP\n");
8046 			/* XXX check the bitfield order! */
8047 			me_id = (ring_id & 0x60) >> 5;
8048 			pipe_id = (ring_id & 0x18) >> 3;
8049 			queue_id = (ring_id & 0x7) >> 0;
8050 			switch (me_id) {
8051 			case 0:
8052 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8053 				break;
8054 			case 1:
8055 			case 2:
8056 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8057 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8058 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8059 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8060 				break;
8061 			}
8062 			break;
8063 		case 184: /* CP Privileged reg access */
8064 			DRM_ERROR("Illegal register access in command stream\n");
8065 			/* XXX check the bitfield order! */
8066 			me_id = (ring_id & 0x60) >> 5;
8067 			pipe_id = (ring_id & 0x18) >> 3;
8068 			queue_id = (ring_id & 0x7) >> 0;
8069 			switch (me_id) {
8070 			case 0:
8071 				/* This results in a full GPU reset, but all we need to do is soft
8072 				 * reset the CP for gfx
8073 				 */
8074 				queue_reset = true;
8075 				break;
8076 			case 1:
8077 				/* XXX compute */
8078 				queue_reset = true;
8079 				break;
8080 			case 2:
8081 				/* XXX compute */
8082 				queue_reset = true;
8083 				break;
8084 			}
8085 			break;
8086 		case 185: /* CP Privileged inst */
8087 			DRM_ERROR("Illegal instruction in command stream\n");
8088 			/* XXX check the bitfield order! */
8089 			me_id = (ring_id & 0x60) >> 5;
8090 			pipe_id = (ring_id & 0x18) >> 3;
8091 			queue_id = (ring_id & 0x7) >> 0;
8092 			switch (me_id) {
8093 			case 0:
8094 				/* This results in a full GPU reset, but all we need to do is soft
8095 				 * reset the CP for gfx
8096 				 */
8097 				queue_reset = true;
8098 				break;
8099 			case 1:
8100 				/* XXX compute */
8101 				queue_reset = true;
8102 				break;
8103 			case 2:
8104 				/* XXX compute */
8105 				queue_reset = true;
8106 				break;
8107 			}
8108 			break;
8109 		case 224: /* SDMA trap event */
8110 			/* XXX check the bitfield order! */
8111 			me_id = (ring_id & 0x3) >> 0;
8112 			queue_id = (ring_id & 0xc) >> 2;
8113 			DRM_DEBUG("IH: SDMA trap\n");
8114 			switch (me_id) {
8115 			case 0:
8116 				switch (queue_id) {
8117 				case 0:
8118 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8119 					break;
8120 				case 1:
8121 					/* XXX compute */
8122 					break;
8123 				case 2:
8124 					/* XXX compute */
8125 					break;
8126 				}
8127 				break;
8128 			case 1:
8129 				switch (queue_id) {
8130 				case 0:
8131 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8132 					break;
8133 				case 1:
8134 					/* XXX compute */
8135 					break;
8136 				case 2:
8137 					/* XXX compute */
8138 					break;
8139 				}
8140 				break;
8141 			}
8142 			break;
8143 		case 230: /* thermal low to high */
8144 			DRM_DEBUG("IH: thermal low to high\n");
8145 			rdev->pm.dpm.thermal.high_to_low = false;
8146 			queue_thermal = true;
8147 			break;
8148 		case 231: /* thermal high to low */
8149 			DRM_DEBUG("IH: thermal high to low\n");
8150 			rdev->pm.dpm.thermal.high_to_low = true;
8151 			queue_thermal = true;
8152 			break;
8153 		case 233: /* GUI IDLE */
8154 			DRM_DEBUG("IH: GUI idle\n");
8155 			break;
8156 		case 241: /* SDMA Privileged inst */
8157 		case 247: /* SDMA Privileged inst */
8158 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8159 			/* XXX check the bitfield order! */
8160 			me_id = (ring_id & 0x3) >> 0;
8161 			queue_id = (ring_id & 0xc) >> 2;
8162 			switch (me_id) {
8163 			case 0:
8164 				switch (queue_id) {
8165 				case 0:
8166 					queue_reset = true;
8167 					break;
8168 				case 1:
8169 					/* XXX compute */
8170 					queue_reset = true;
8171 					break;
8172 				case 2:
8173 					/* XXX compute */
8174 					queue_reset = true;
8175 					break;
8176 				}
8177 				break;
8178 			case 1:
8179 				switch (queue_id) {
8180 				case 0:
8181 					queue_reset = true;
8182 					break;
8183 				case 1:
8184 					/* XXX compute */
8185 					queue_reset = true;
8186 					break;
8187 				case 2:
8188 					/* XXX compute */
8189 					queue_reset = true;
8190 					break;
8191 				}
8192 				break;
8193 			}
8194 			break;
8195 		default:
8196 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8197 			break;
8198 		}
8199 
8200 		/* wptr/rptr are in bytes! */
8201 		rptr += 16;
8202 		rptr &= rdev->ih.ptr_mask;
8203 		WREG32(IH_RB_RPTR, rptr);
8204 	}
8205 	if (queue_dp)
8206 		schedule_work(&rdev->dp_work);
8207 	if (queue_hotplug)
8208 		taskqueue_enqueue(rdev->tq, &rdev->hotplug_work);
8209 	if (queue_reset) {
8210 		rdev->needs_reset = true;
8211 		wake_up_all(&rdev->fence_queue);
8212 	}
8213 	if (queue_thermal)
8214 		taskqueue_enqueue(rdev->tq, &rdev->pm.dpm.thermal.work);
8215 	rdev->ih.rptr = rptr;
8216 	atomic_set(&rdev->ih.lock, 0);
8217 
8218 	/* make sure wptr hasn't changed while processing */
8219 	wptr = cik_get_ih_wptr(rdev);
8220 	if (wptr != rptr)
8221 		goto restart_ih;
8222 
8223 	return IRQ_HANDLED;
8224 }
8225 
8226 /*
8227  * startup/shutdown callbacks
8228  */
8229 static void cik_uvd_init(struct radeon_device *rdev)
8230 {
8231 	int r;
8232 
8233 	if (!rdev->has_uvd)
8234 		return;
8235 
8236 	r = radeon_uvd_init(rdev);
8237 	if (r) {
8238 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8239 		/*
8240 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8241 		 * to early fails cik_uvd_start() and thus nothing happens
8242 		 * there. So it is pointless to try to go through that code
8243 		 * hence why we disable uvd here.
8244 		 */
8245 		rdev->has_uvd = 0;
8246 		return;
8247 	}
8248 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8249 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8250 }
8251 
8252 static void cik_uvd_start(struct radeon_device *rdev)
8253 {
8254 	int r;
8255 
8256 	if (!rdev->has_uvd)
8257 		return;
8258 
8259 	r = radeon_uvd_resume(rdev);
8260 	if (r) {
8261 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8262 		goto error;
8263 	}
8264 	r = uvd_v4_2_resume(rdev);
8265 	if (r) {
8266 		dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8267 		goto error;
8268 	}
8269 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8270 	if (r) {
8271 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8272 		goto error;
8273 	}
8274 	return;
8275 
8276 error:
8277 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8278 }
8279 
8280 static void cik_uvd_resume(struct radeon_device *rdev)
8281 {
8282 	struct radeon_ring *ring;
8283 	int r;
8284 
8285 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8286 		return;
8287 
8288 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8289 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, RADEON_CP_PACKET2);
8290 	if (r) {
8291 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8292 		return;
8293 	}
8294 	r = uvd_v1_0_init(rdev);
8295 	if (r) {
8296 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8297 		return;
8298 	}
8299 }
8300 
8301 static void cik_vce_init(struct radeon_device *rdev)
8302 {
8303 	int r;
8304 
8305 	if (!rdev->has_vce)
8306 		return;
8307 
8308 	r = radeon_vce_init(rdev);
8309 	if (r) {
8310 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8311 		/*
8312 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8313 		 * to early fails cik_vce_start() and thus nothing happens
8314 		 * there. So it is pointless to try to go through that code
8315 		 * hence why we disable vce here.
8316 		 */
8317 		rdev->has_vce = 0;
8318 		return;
8319 	}
8320 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8321 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8322 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8323 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8324 }
8325 
8326 static void cik_vce_start(struct radeon_device *rdev)
8327 {
8328 	int r;
8329 
8330 	if (!rdev->has_vce)
8331 		return;
8332 
8333 	r = radeon_vce_resume(rdev);
8334 	if (r) {
8335 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8336 		goto error;
8337 	}
8338 	r = vce_v2_0_resume(rdev);
8339 	if (r) {
8340 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8341 		goto error;
8342 	}
8343 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8344 	if (r) {
8345 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8346 		goto error;
8347 	}
8348 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8349 	if (r) {
8350 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8351 		goto error;
8352 	}
8353 	return;
8354 
8355 error:
8356 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8357 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8358 }
8359 
8360 static void cik_vce_resume(struct radeon_device *rdev)
8361 {
8362 	struct radeon_ring *ring;
8363 	int r;
8364 
8365 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8366 		return;
8367 
8368 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8369 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8370 	if (r) {
8371 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8372 		return;
8373 	}
8374 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8375 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8376 	if (r) {
8377 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8378 		return;
8379 	}
8380 	r = vce_v1_0_init(rdev);
8381 	if (r) {
8382 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8383 		return;
8384 	}
8385 }
8386 
8387 /**
8388  * cik_startup - program the asic to a functional state
8389  *
8390  * @rdev: radeon_device pointer
8391  *
8392  * Programs the asic to a functional state (CIK).
8393  * Called by cik_init() and cik_resume().
8394  * Returns 0 for success, error for failure.
8395  */
8396 static int cik_startup(struct radeon_device *rdev)
8397 {
8398 	struct radeon_ring *ring;
8399 	u32 nop;
8400 	int r;
8401 
8402 	/* enable pcie gen2/3 link */
8403 	cik_pcie_gen3_enable(rdev);
8404 	/* enable aspm */
8405 	cik_program_aspm(rdev);
8406 
8407 	/* scratch needs to be initialized before MC */
8408 	r = r600_vram_scratch_init(rdev);
8409 	if (r)
8410 		return r;
8411 
8412 	cik_mc_program(rdev);
8413 
8414 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8415 		r = ci_mc_load_microcode(rdev);
8416 		if (r) {
8417 			DRM_ERROR("Failed to load MC firmware!\n");
8418 			return r;
8419 		}
8420 	}
8421 
8422 	r = cik_pcie_gart_enable(rdev);
8423 	if (r)
8424 		return r;
8425 	cik_gpu_init(rdev);
8426 
8427 	/* allocate rlc buffers */
8428 	if (rdev->flags & RADEON_IS_IGP) {
8429 		if (rdev->family == CHIP_KAVERI) {
8430 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8431 			rdev->rlc.reg_list_size =
8432 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8433 		} else {
8434 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8435 			rdev->rlc.reg_list_size =
8436 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8437 		}
8438 	}
8439 	rdev->rlc.cs_data = ci_cs_data;
8440 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8441 	r = sumo_rlc_init(rdev);
8442 	if (r) {
8443 		DRM_ERROR("Failed to init rlc BOs!\n");
8444 		return r;
8445 	}
8446 
8447 	/* allocate wb buffer */
8448 	r = radeon_wb_init(rdev);
8449 	if (r)
8450 		return r;
8451 
8452 	/* allocate mec buffers */
8453 	r = cik_mec_init(rdev);
8454 	if (r) {
8455 		DRM_ERROR("Failed to init MEC BOs!\n");
8456 		return r;
8457 	}
8458 
8459 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8460 	if (r) {
8461 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8462 		return r;
8463 	}
8464 
8465 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8466 	if (r) {
8467 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8468 		return r;
8469 	}
8470 
8471 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8472 	if (r) {
8473 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8474 		return r;
8475 	}
8476 
8477 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8478 	if (r) {
8479 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8480 		return r;
8481 	}
8482 
8483 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8484 	if (r) {
8485 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8486 		return r;
8487 	}
8488 
8489 	cik_uvd_start(rdev);
8490 	cik_vce_start(rdev);
8491 
8492 	/* Enable IRQ */
8493 	if (!rdev->irq.installed) {
8494 		r = radeon_irq_kms_init(rdev);
8495 		if (r)
8496 			return r;
8497 	}
8498 
8499 	r = cik_irq_init(rdev);
8500 	if (r) {
8501 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8502 		radeon_irq_kms_fini(rdev);
8503 		return r;
8504 	}
8505 	cik_irq_set(rdev);
8506 
8507 	if (rdev->family == CHIP_HAWAII) {
8508 		if (rdev->new_fw)
8509 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8510 		else
8511 			nop = RADEON_CP_PACKET2;
8512 	} else {
8513 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8514 	}
8515 
8516 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8517 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8518 			     nop);
8519 	if (r)
8520 		return r;
8521 
8522 	/* set up the compute queues */
8523 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8524 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8525 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8526 			     nop);
8527 	if (r)
8528 		return r;
8529 	ring->me = 1; /* first MEC */
8530 	ring->pipe = 0; /* first pipe */
8531 	ring->queue = 0; /* first queue */
8532 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8533 
8534 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8535 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8536 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8537 			     nop);
8538 	if (r)
8539 		return r;
8540 	/* dGPU only have 1 MEC */
8541 	ring->me = 1; /* first MEC */
8542 	ring->pipe = 0; /* first pipe */
8543 	ring->queue = 1; /* second queue */
8544 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8545 
8546 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8547 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8548 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8549 	if (r)
8550 		return r;
8551 
8552 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8553 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8554 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8555 	if (r)
8556 		return r;
8557 
8558 	r = cik_cp_resume(rdev);
8559 	if (r)
8560 		return r;
8561 
8562 	r = cik_sdma_resume(rdev);
8563 	if (r)
8564 		return r;
8565 
8566 	cik_uvd_resume(rdev);
8567 	cik_vce_resume(rdev);
8568 
8569 	r = radeon_ib_pool_init(rdev);
8570 	if (r) {
8571 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8572 		return r;
8573 	}
8574 
8575 	r = radeon_vm_manager_init(rdev);
8576 	if (r) {
8577 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8578 		return r;
8579 	}
8580 
8581 	r = radeon_audio_init(rdev);
8582 	if (r)
8583 		return r;
8584 
8585 	return 0;
8586 }
8587 
8588 /**
8589  * cik_resume - resume the asic to a functional state
8590  *
8591  * @rdev: radeon_device pointer
8592  *
8593  * Programs the asic to a functional state (CIK).
8594  * Called at resume.
8595  * Returns 0 for success, error for failure.
8596  */
8597 int cik_resume(struct radeon_device *rdev)
8598 {
8599 	int r;
8600 
8601 	/* post card */
8602 	atom_asic_init(rdev->mode_info.atom_context);
8603 
8604 	/* init golden registers */
8605 	cik_init_golden_registers(rdev);
8606 
8607 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8608 		radeon_pm_resume(rdev);
8609 
8610 	rdev->accel_working = true;
8611 	r = cik_startup(rdev);
8612 	if (r) {
8613 		DRM_ERROR("cik startup failed on resume\n");
8614 		rdev->accel_working = false;
8615 		return r;
8616 	}
8617 
8618 	return r;
8619 
8620 }
8621 
8622 /**
8623  * cik_suspend - suspend the asic
8624  *
8625  * @rdev: radeon_device pointer
8626  *
8627  * Bring the chip into a state suitable for suspend (CIK).
8628  * Called at suspend.
8629  * Returns 0 for success.
8630  */
8631 int cik_suspend(struct radeon_device *rdev)
8632 {
8633 	radeon_pm_suspend(rdev);
8634 	radeon_audio_fini(rdev);
8635 	radeon_vm_manager_fini(rdev);
8636 	cik_cp_enable(rdev, false);
8637 	cik_sdma_enable(rdev, false);
8638 	if (rdev->has_uvd) {
8639 	uvd_v1_0_fini(rdev);
8640 	radeon_uvd_suspend(rdev);
8641 	}
8642 	if (rdev->has_vce)
8643 	radeon_vce_suspend(rdev);
8644 	cik_fini_pg(rdev);
8645 	cik_fini_cg(rdev);
8646 	cik_irq_suspend(rdev);
8647 	radeon_wb_disable(rdev);
8648 	cik_pcie_gart_disable(rdev);
8649 	return 0;
8650 }
8651 
8652 /* Plan is to move initialization in that function and use
8653  * helper function so that radeon_device_init pretty much
8654  * do nothing more than calling asic specific function. This
8655  * should also allow to remove a bunch of callback function
8656  * like vram_info.
8657  */
8658 /**
8659  * cik_init - asic specific driver and hw init
8660  *
8661  * @rdev: radeon_device pointer
8662  *
8663  * Setup asic specific driver variables and program the hw
8664  * to a functional state (CIK).
8665  * Called at driver startup.
8666  * Returns 0 for success, errors for failure.
8667  */
8668 int cik_init(struct radeon_device *rdev)
8669 {
8670 	struct radeon_ring *ring;
8671 	int r;
8672 
8673 	/* Read BIOS */
8674 	if (!radeon_get_bios(rdev)) {
8675 		if (ASIC_IS_AVIVO(rdev))
8676 			return -EINVAL;
8677 	}
8678 	/* Must be an ATOMBIOS */
8679 	if (!rdev->is_atom_bios) {
8680 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8681 		return -EINVAL;
8682 	}
8683 	r = radeon_atombios_init(rdev);
8684 	if (r)
8685 		return r;
8686 
8687 	/* Post card if necessary */
8688 	if (!radeon_card_posted(rdev)) {
8689 		if (!rdev->bios) {
8690 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8691 			return -EINVAL;
8692 		}
8693 		DRM_INFO("GPU not posted. posting now...\n");
8694 		atom_asic_init(rdev->mode_info.atom_context);
8695 	}
8696 	/* init golden registers */
8697 	cik_init_golden_registers(rdev);
8698 	/* Initialize scratch registers */
8699 	cik_scratch_init(rdev);
8700 	/* Initialize surface registers */
8701 	radeon_surface_init(rdev);
8702 	/* Initialize clocks */
8703 	radeon_get_clock_info(rdev->ddev);
8704 
8705 	/* Fence driver */
8706 	r = radeon_fence_driver_init(rdev);
8707 	if (r)
8708 		return r;
8709 
8710 	/* initialize memory controller */
8711 	r = cik_mc_init(rdev);
8712 	if (r)
8713 		return r;
8714 	/* Memory manager */
8715 	r = radeon_bo_init(rdev);
8716 	if (r)
8717 		return r;
8718 
8719 	if (rdev->flags & RADEON_IS_IGP) {
8720 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8721 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8722 			r = cik_init_microcode(rdev);
8723 			if (r) {
8724 				DRM_ERROR("Failed to load firmware!\n");
8725 				return r;
8726 			}
8727 		}
8728 	} else {
8729 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8730 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8731 		    !rdev->mc_fw) {
8732 			r = cik_init_microcode(rdev);
8733 			if (r) {
8734 				DRM_ERROR("Failed to load firmware!\n");
8735 				return r;
8736 			}
8737 		}
8738 	}
8739 
8740 	/* Initialize power management */
8741 	radeon_pm_init(rdev);
8742 
8743 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8744 	ring->ring_obj = NULL;
8745 	r600_ring_init(rdev, ring, 1024 * 1024);
8746 
8747 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8748 	ring->ring_obj = NULL;
8749 	r600_ring_init(rdev, ring, 1024 * 1024);
8750 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8751 	if (r)
8752 		return r;
8753 
8754 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8755 	ring->ring_obj = NULL;
8756 	r600_ring_init(rdev, ring, 1024 * 1024);
8757 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8758 	if (r)
8759 		return r;
8760 
8761 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8762 	ring->ring_obj = NULL;
8763 	r600_ring_init(rdev, ring, 256 * 1024);
8764 
8765 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8766 	ring->ring_obj = NULL;
8767 	r600_ring_init(rdev, ring, 256 * 1024);
8768 
8769 	cik_uvd_init(rdev);
8770 	cik_vce_init(rdev);
8771 
8772 	rdev->ih.ring_obj = NULL;
8773 	r600_ih_ring_init(rdev, 64 * 1024);
8774 
8775 	r = r600_pcie_gart_init(rdev);
8776 	if (r)
8777 		return r;
8778 
8779 	rdev->accel_working = true;
8780 	r = cik_startup(rdev);
8781 	if (r) {
8782 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8783 		cik_cp_fini(rdev);
8784 		cik_sdma_fini(rdev);
8785 		cik_irq_fini(rdev);
8786 		sumo_rlc_fini(rdev);
8787 		cik_mec_fini(rdev);
8788 		radeon_wb_fini(rdev);
8789 		radeon_ib_pool_fini(rdev);
8790 		radeon_vm_manager_fini(rdev);
8791 		radeon_irq_kms_fini(rdev);
8792 		cik_pcie_gart_fini(rdev);
8793 		rdev->accel_working = false;
8794 	}
8795 
8796 	/* Don't start up if the MC ucode is missing.
8797 	 * The default clocks and voltages before the MC ucode
8798 	 * is loaded are not suffient for advanced operations.
8799 	 */
8800 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8801 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8802 		return -EINVAL;
8803 	}
8804 
8805 	return 0;
8806 }
8807 
8808 /**
8809  * cik_fini - asic specific driver and hw fini
8810  *
8811  * @rdev: radeon_device pointer
8812  *
8813  * Tear down the asic specific driver variables and program the hw
8814  * to an idle state (CIK).
8815  * Called at driver unload.
8816  */
8817 void cik_fini(struct radeon_device *rdev)
8818 {
8819 	radeon_pm_fini(rdev);
8820 	cik_cp_fini(rdev);
8821 	cik_sdma_fini(rdev);
8822 	cik_fini_pg(rdev);
8823 	cik_fini_cg(rdev);
8824 	cik_irq_fini(rdev);
8825 	sumo_rlc_fini(rdev);
8826 	cik_mec_fini(rdev);
8827 	radeon_wb_fini(rdev);
8828 	radeon_vm_manager_fini(rdev);
8829 	radeon_ib_pool_fini(rdev);
8830 	radeon_irq_kms_fini(rdev);
8831 	uvd_v1_0_fini(rdev);
8832 	radeon_uvd_fini(rdev);
8833 	radeon_vce_fini(rdev);
8834 	cik_pcie_gart_fini(rdev);
8835 	r600_vram_scratch_fini(rdev);
8836 	radeon_gem_fini(rdev);
8837 	radeon_fence_driver_fini(rdev);
8838 	radeon_bo_fini(rdev);
8839 	radeon_atombios_fini(rdev);
8840 	cik_fini_microcode(rdev);
8841 	kfree(rdev->bios);
8842 	rdev->bios = NULL;
8843 }
8844 
8845 void dce8_program_fmt(struct drm_encoder *encoder)
8846 {
8847 	struct drm_device *dev = encoder->dev;
8848 	struct radeon_device *rdev = dev->dev_private;
8849 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8850 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8851 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8852 	int bpc = 0;
8853 	u32 tmp = 0;
8854 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8855 
8856 	if (connector) {
8857 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8858 		bpc = radeon_get_monitor_bpc(connector);
8859 		dither = radeon_connector->dither;
8860 	}
8861 
8862 	/* LVDS/eDP FMT is set up by atom */
8863 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8864 		return;
8865 
8866 	/* not needed for analog */
8867 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8868 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8869 		return;
8870 
8871 	if (bpc == 0)
8872 		return;
8873 
8874 	switch (bpc) {
8875 	case 6:
8876 		if (dither == RADEON_FMT_DITHER_ENABLE)
8877 			/* XXX sort out optimal dither settings */
8878 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8879 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8880 		else
8881 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8882 		break;
8883 	case 8:
8884 		if (dither == RADEON_FMT_DITHER_ENABLE)
8885 			/* XXX sort out optimal dither settings */
8886 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8887 				FMT_RGB_RANDOM_ENABLE |
8888 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8889 		else
8890 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8891 		break;
8892 	case 10:
8893 		if (dither == RADEON_FMT_DITHER_ENABLE)
8894 			/* XXX sort out optimal dither settings */
8895 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8896 				FMT_RGB_RANDOM_ENABLE |
8897 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8898 		else
8899 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8900 		break;
8901 	default:
8902 		/* not needed */
8903 		break;
8904 	}
8905 
8906 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8907 }
8908 
8909 /* display watermark setup */
8910 /**
8911  * dce8_line_buffer_adjust - Set up the line buffer
8912  *
8913  * @rdev: radeon_device pointer
8914  * @radeon_crtc: the selected display controller
8915  * @mode: the current display mode on the selected display
8916  * controller
8917  *
8918  * Setup up the line buffer allocation for
8919  * the selected display controller (CIK).
8920  * Returns the line buffer size in pixels.
8921  */
8922 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8923 				   struct radeon_crtc *radeon_crtc,
8924 				   struct drm_display_mode *mode)
8925 {
8926 	u32 tmp, buffer_alloc, i;
8927 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8928 	/*
8929 	 * Line Buffer Setup
8930 	 * There are 6 line buffers, one for each display controllers.
8931 	 * There are 3 partitions per LB. Select the number of partitions
8932 	 * to enable based on the display width.  For display widths larger
8933 	 * than 4096, you need use to use 2 display controllers and combine
8934 	 * them using the stereo blender.
8935 	 */
8936 	if (radeon_crtc->base.enabled && mode) {
8937 		if (mode->crtc_hdisplay < 1920) {
8938 			tmp = 1;
8939 			buffer_alloc = 2;
8940 		} else if (mode->crtc_hdisplay < 2560) {
8941 			tmp = 2;
8942 			buffer_alloc = 2;
8943 		} else if (mode->crtc_hdisplay < 4096) {
8944 			tmp = 0;
8945 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8946 		} else {
8947 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8948 			tmp = 0;
8949 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8950 		}
8951 	} else {
8952 		tmp = 1;
8953 		buffer_alloc = 0;
8954 	}
8955 
8956 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8957 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8958 
8959 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8960 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8961 	for (i = 0; i < rdev->usec_timeout; i++) {
8962 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8963 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8964 			break;
8965 		udelay(1);
8966 	}
8967 
8968 	if (radeon_crtc->base.enabled && mode) {
8969 		switch (tmp) {
8970 		case 0:
8971 		default:
8972 			return 4096 * 2;
8973 		case 1:
8974 			return 1920 * 2;
8975 		case 2:
8976 			return 2560 * 2;
8977 		}
8978 	}
8979 
8980 	/* controller not enabled, so no lb used */
8981 	return 0;
8982 }
8983 
8984 /**
8985  * cik_get_number_of_dram_channels - get the number of dram channels
8986  *
8987  * @rdev: radeon_device pointer
8988  *
8989  * Look up the number of video ram channels (CIK).
8990  * Used for display watermark bandwidth calculations
8991  * Returns the number of dram channels
8992  */
8993 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8994 {
8995 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8996 
8997 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8998 	case 0:
8999 	default:
9000 		return 1;
9001 	case 1:
9002 		return 2;
9003 	case 2:
9004 		return 4;
9005 	case 3:
9006 		return 8;
9007 	case 4:
9008 		return 3;
9009 	case 5:
9010 		return 6;
9011 	case 6:
9012 		return 10;
9013 	case 7:
9014 		return 12;
9015 	case 8:
9016 		return 16;
9017 	}
9018 }
9019 
9020 struct dce8_wm_params {
9021 	u32 dram_channels; /* number of dram channels */
9022 	u32 yclk;          /* bandwidth per dram data pin in kHz */
9023 	u32 sclk;          /* engine clock in kHz */
9024 	u32 disp_clk;      /* display clock in kHz */
9025 	u32 src_width;     /* viewport width */
9026 	u32 active_time;   /* active display time in ns */
9027 	u32 blank_time;    /* blank time in ns */
9028 	bool interlaced;    /* mode is interlaced */
9029 	fixed20_12 vsc;    /* vertical scale ratio */
9030 	u32 num_heads;     /* number of active crtcs */
9031 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
9032 	u32 lb_size;       /* line buffer allocated to pipe */
9033 	u32 vtaps;         /* vertical scaler taps */
9034 };
9035 
9036 /**
9037  * dce8_dram_bandwidth - get the dram bandwidth
9038  *
9039  * @wm: watermark calculation data
9040  *
9041  * Calculate the raw dram bandwidth (CIK).
9042  * Used for display watermark bandwidth calculations
9043  * Returns the dram bandwidth in MBytes/s
9044  */
9045 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9046 {
9047 	/* Calculate raw DRAM Bandwidth */
9048 	fixed20_12 dram_efficiency; /* 0.7 */
9049 	fixed20_12 yclk, dram_channels, bandwidth;
9050 	fixed20_12 a;
9051 
9052 	a.full = dfixed_const(1000);
9053 	yclk.full = dfixed_const(wm->yclk);
9054 	yclk.full = dfixed_div(yclk, a);
9055 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9056 	a.full = dfixed_const(10);
9057 	dram_efficiency.full = dfixed_const(7);
9058 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
9059 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9060 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9061 
9062 	return dfixed_trunc(bandwidth);
9063 }
9064 
9065 /**
9066  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9067  *
9068  * @wm: watermark calculation data
9069  *
9070  * Calculate the dram bandwidth used for display (CIK).
9071  * Used for display watermark bandwidth calculations
9072  * Returns the dram bandwidth for display in MBytes/s
9073  */
9074 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9075 {
9076 	/* Calculate DRAM Bandwidth and the part allocated to display. */
9077 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9078 	fixed20_12 yclk, dram_channels, bandwidth;
9079 	fixed20_12 a;
9080 
9081 	a.full = dfixed_const(1000);
9082 	yclk.full = dfixed_const(wm->yclk);
9083 	yclk.full = dfixed_div(yclk, a);
9084 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9085 	a.full = dfixed_const(10);
9086 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9087 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9088 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9089 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9090 
9091 	return dfixed_trunc(bandwidth);
9092 }
9093 
9094 /**
9095  * dce8_data_return_bandwidth - get the data return bandwidth
9096  *
9097  * @wm: watermark calculation data
9098  *
9099  * Calculate the data return bandwidth used for display (CIK).
9100  * Used for display watermark bandwidth calculations
9101  * Returns the data return bandwidth in MBytes/s
9102  */
9103 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9104 {
9105 	/* Calculate the display Data return Bandwidth */
9106 	fixed20_12 return_efficiency; /* 0.8 */
9107 	fixed20_12 sclk, bandwidth;
9108 	fixed20_12 a;
9109 
9110 	a.full = dfixed_const(1000);
9111 	sclk.full = dfixed_const(wm->sclk);
9112 	sclk.full = dfixed_div(sclk, a);
9113 	a.full = dfixed_const(10);
9114 	return_efficiency.full = dfixed_const(8);
9115 	return_efficiency.full = dfixed_div(return_efficiency, a);
9116 	a.full = dfixed_const(32);
9117 	bandwidth.full = dfixed_mul(a, sclk);
9118 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9119 
9120 	return dfixed_trunc(bandwidth);
9121 }
9122 
9123 /**
9124  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9125  *
9126  * @wm: watermark calculation data
9127  *
9128  * Calculate the dmif bandwidth used for display (CIK).
9129  * Used for display watermark bandwidth calculations
9130  * Returns the dmif bandwidth in MBytes/s
9131  */
9132 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9133 {
9134 	/* Calculate the DMIF Request Bandwidth */
9135 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9136 	fixed20_12 disp_clk, bandwidth;
9137 	fixed20_12 a, b;
9138 
9139 	a.full = dfixed_const(1000);
9140 	disp_clk.full = dfixed_const(wm->disp_clk);
9141 	disp_clk.full = dfixed_div(disp_clk, a);
9142 	a.full = dfixed_const(32);
9143 	b.full = dfixed_mul(a, disp_clk);
9144 
9145 	a.full = dfixed_const(10);
9146 	disp_clk_request_efficiency.full = dfixed_const(8);
9147 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9148 
9149 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9150 
9151 	return dfixed_trunc(bandwidth);
9152 }
9153 
9154 /**
9155  * dce8_available_bandwidth - get the min available bandwidth
9156  *
9157  * @wm: watermark calculation data
9158  *
9159  * Calculate the min available bandwidth used for display (CIK).
9160  * Used for display watermark bandwidth calculations
9161  * Returns the min available bandwidth in MBytes/s
9162  */
9163 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9164 {
9165 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9166 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9167 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9168 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9169 
9170 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9171 }
9172 
9173 /**
9174  * dce8_average_bandwidth - get the average available bandwidth
9175  *
9176  * @wm: watermark calculation data
9177  *
9178  * Calculate the average available bandwidth used for display (CIK).
9179  * Used for display watermark bandwidth calculations
9180  * Returns the average available bandwidth in MBytes/s
9181  */
9182 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9183 {
9184 	/* Calculate the display mode Average Bandwidth
9185 	 * DisplayMode should contain the source and destination dimensions,
9186 	 * timing, etc.
9187 	 */
9188 	fixed20_12 bpp;
9189 	fixed20_12 line_time;
9190 	fixed20_12 src_width;
9191 	fixed20_12 bandwidth;
9192 	fixed20_12 a;
9193 
9194 	a.full = dfixed_const(1000);
9195 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9196 	line_time.full = dfixed_div(line_time, a);
9197 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9198 	src_width.full = dfixed_const(wm->src_width);
9199 	bandwidth.full = dfixed_mul(src_width, bpp);
9200 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9201 	bandwidth.full = dfixed_div(bandwidth, line_time);
9202 
9203 	return dfixed_trunc(bandwidth);
9204 }
9205 
9206 /**
9207  * dce8_latency_watermark - get the latency watermark
9208  *
9209  * @wm: watermark calculation data
9210  *
9211  * Calculate the latency watermark (CIK).
9212  * Used for display watermark bandwidth calculations
9213  * Returns the latency watermark in ns
9214  */
9215 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9216 {
9217 	/* First calculate the latency in ns */
9218 	u32 mc_latency = 2000; /* 2000 ns. */
9219 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9220 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9221 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9222 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9223 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9224 		(wm->num_heads * cursor_line_pair_return_time);
9225 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9226 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9227 	u32 tmp, dmif_size = 12288;
9228 	fixed20_12 a, b, c;
9229 
9230 	if (wm->num_heads == 0)
9231 		return 0;
9232 
9233 	a.full = dfixed_const(2);
9234 	b.full = dfixed_const(1);
9235 	if ((wm->vsc.full > a.full) ||
9236 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9237 	    (wm->vtaps >= 5) ||
9238 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9239 		max_src_lines_per_dst_line = 4;
9240 	else
9241 		max_src_lines_per_dst_line = 2;
9242 
9243 	a.full = dfixed_const(available_bandwidth);
9244 	b.full = dfixed_const(wm->num_heads);
9245 	a.full = dfixed_div(a, b);
9246 
9247 	b.full = dfixed_const(mc_latency + 512);
9248 	c.full = dfixed_const(wm->disp_clk);
9249 	b.full = dfixed_div(b, c);
9250 
9251 	c.full = dfixed_const(dmif_size);
9252 	b.full = dfixed_div(c, b);
9253 
9254 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9255 
9256 	b.full = dfixed_const(1000);
9257 	c.full = dfixed_const(wm->disp_clk);
9258 	b.full = dfixed_div(c, b);
9259 	c.full = dfixed_const(wm->bytes_per_pixel);
9260 	b.full = dfixed_mul(b, c);
9261 
9262 	lb_fill_bw = min(tmp, dfixed_trunc(b));
9263 
9264 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9265 	b.full = dfixed_const(1000);
9266 	c.full = dfixed_const(lb_fill_bw);
9267 	b.full = dfixed_div(c, b);
9268 	a.full = dfixed_div(a, b);
9269 	line_fill_time = dfixed_trunc(a);
9270 
9271 	if (line_fill_time < wm->active_time)
9272 		return latency;
9273 	else
9274 		return latency + (line_fill_time - wm->active_time);
9275 
9276 }
9277 
9278 /**
9279  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9280  * average and available dram bandwidth
9281  *
9282  * @wm: watermark calculation data
9283  *
9284  * Check if the display average bandwidth fits in the display
9285  * dram bandwidth (CIK).
9286  * Used for display watermark bandwidth calculations
9287  * Returns true if the display fits, false if not.
9288  */
9289 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9290 {
9291 	if (dce8_average_bandwidth(wm) <=
9292 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9293 		return true;
9294 	else
9295 		return false;
9296 }
9297 
9298 /**
9299  * dce8_average_bandwidth_vs_available_bandwidth - check
9300  * average and available bandwidth
9301  *
9302  * @wm: watermark calculation data
9303  *
9304  * Check if the display average bandwidth fits in the display
9305  * available bandwidth (CIK).
9306  * Used for display watermark bandwidth calculations
9307  * Returns true if the display fits, false if not.
9308  */
9309 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9310 {
9311 	if (dce8_average_bandwidth(wm) <=
9312 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9313 		return true;
9314 	else
9315 		return false;
9316 }
9317 
9318 /**
9319  * dce8_check_latency_hiding - check latency hiding
9320  *
9321  * @wm: watermark calculation data
9322  *
9323  * Check latency hiding (CIK).
9324  * Used for display watermark bandwidth calculations
9325  * Returns true if the display fits, false if not.
9326  */
9327 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9328 {
9329 	u32 lb_partitions = wm->lb_size / wm->src_width;
9330 	u32 line_time = wm->active_time + wm->blank_time;
9331 	u32 latency_tolerant_lines;
9332 	u32 latency_hiding;
9333 	fixed20_12 a;
9334 
9335 	a.full = dfixed_const(1);
9336 	if (wm->vsc.full > a.full)
9337 		latency_tolerant_lines = 1;
9338 	else {
9339 		if (lb_partitions <= (wm->vtaps + 1))
9340 			latency_tolerant_lines = 1;
9341 		else
9342 			latency_tolerant_lines = 2;
9343 	}
9344 
9345 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9346 
9347 	if (dce8_latency_watermark(wm) <= latency_hiding)
9348 		return true;
9349 	else
9350 		return false;
9351 }
9352 
9353 /**
9354  * dce8_program_watermarks - program display watermarks
9355  *
9356  * @rdev: radeon_device pointer
9357  * @radeon_crtc: the selected display controller
9358  * @lb_size: line buffer size
9359  * @num_heads: number of display controllers in use
9360  *
9361  * Calculate and program the display watermarks for the
9362  * selected display controller (CIK).
9363  */
9364 static void dce8_program_watermarks(struct radeon_device *rdev,
9365 				    struct radeon_crtc *radeon_crtc,
9366 				    u32 lb_size, u32 num_heads)
9367 {
9368 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9369 	struct dce8_wm_params wm_low, wm_high;
9370 	u32 pixel_period;
9371 	u32 line_time = 0;
9372 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9373 	u32 tmp, wm_mask;
9374 
9375 	if (radeon_crtc->base.enabled && num_heads && mode) {
9376 		pixel_period = 1000000 / (u32)mode->clock;
9377 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9378 
9379 		/* watermark for high clocks */
9380 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9381 		    rdev->pm.dpm_enabled) {
9382 			wm_high.yclk =
9383 				radeon_dpm_get_mclk(rdev, false) * 10;
9384 			wm_high.sclk =
9385 				radeon_dpm_get_sclk(rdev, false) * 10;
9386 		} else {
9387 			wm_high.yclk = rdev->pm.current_mclk * 10;
9388 			wm_high.sclk = rdev->pm.current_sclk * 10;
9389 		}
9390 
9391 		wm_high.disp_clk = mode->clock;
9392 		wm_high.src_width = mode->crtc_hdisplay;
9393 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9394 		wm_high.blank_time = line_time - wm_high.active_time;
9395 		wm_high.interlaced = false;
9396 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9397 			wm_high.interlaced = true;
9398 		wm_high.vsc = radeon_crtc->vsc;
9399 		wm_high.vtaps = 1;
9400 		if (radeon_crtc->rmx_type != RMX_OFF)
9401 			wm_high.vtaps = 2;
9402 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9403 		wm_high.lb_size = lb_size;
9404 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9405 		wm_high.num_heads = num_heads;
9406 
9407 		/* set for high clocks */
9408 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9409 
9410 		/* possibly force display priority to high */
9411 		/* should really do this at mode validation time... */
9412 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9413 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9414 		    !dce8_check_latency_hiding(&wm_high) ||
9415 		    (rdev->disp_priority == 2)) {
9416 			DRM_DEBUG_KMS("force priority to high\n");
9417 		}
9418 
9419 		/* watermark for low clocks */
9420 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9421 		    rdev->pm.dpm_enabled) {
9422 			wm_low.yclk =
9423 				radeon_dpm_get_mclk(rdev, true) * 10;
9424 			wm_low.sclk =
9425 				radeon_dpm_get_sclk(rdev, true) * 10;
9426 		} else {
9427 			wm_low.yclk = rdev->pm.current_mclk * 10;
9428 			wm_low.sclk = rdev->pm.current_sclk * 10;
9429 		}
9430 
9431 		wm_low.disp_clk = mode->clock;
9432 		wm_low.src_width = mode->crtc_hdisplay;
9433 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9434 		wm_low.blank_time = line_time - wm_low.active_time;
9435 		wm_low.interlaced = false;
9436 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9437 			wm_low.interlaced = true;
9438 		wm_low.vsc = radeon_crtc->vsc;
9439 		wm_low.vtaps = 1;
9440 		if (radeon_crtc->rmx_type != RMX_OFF)
9441 			wm_low.vtaps = 2;
9442 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9443 		wm_low.lb_size = lb_size;
9444 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9445 		wm_low.num_heads = num_heads;
9446 
9447 		/* set for low clocks */
9448 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9449 
9450 		/* possibly force display priority to high */
9451 		/* should really do this at mode validation time... */
9452 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9453 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9454 		    !dce8_check_latency_hiding(&wm_low) ||
9455 		    (rdev->disp_priority == 2)) {
9456 			DRM_DEBUG_KMS("force priority to high\n");
9457 		}
9458 
9459 		/* Save number of lines the linebuffer leads before the scanout */
9460 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9461 	}
9462 
9463 	/* select wm A */
9464 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9465 	tmp = wm_mask;
9466 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9467 	tmp |= LATENCY_WATERMARK_MASK(1);
9468 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9469 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9470 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9471 		LATENCY_HIGH_WATERMARK(line_time)));
9472 	/* select wm B */
9473 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9474 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9475 	tmp |= LATENCY_WATERMARK_MASK(2);
9476 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9477 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9478 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9479 		LATENCY_HIGH_WATERMARK(line_time)));
9480 	/* restore original selection */
9481 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9482 
9483 	/* save values for DPM */
9484 	radeon_crtc->line_time = line_time;
9485 	radeon_crtc->wm_high = latency_watermark_a;
9486 	radeon_crtc->wm_low = latency_watermark_b;
9487 }
9488 
9489 /**
9490  * dce8_bandwidth_update - program display watermarks
9491  *
9492  * @rdev: radeon_device pointer
9493  *
9494  * Calculate and program the display watermarks and line
9495  * buffer allocation (CIK).
9496  */
9497 void dce8_bandwidth_update(struct radeon_device *rdev)
9498 {
9499 	struct drm_display_mode *mode = NULL;
9500 	u32 num_heads = 0, lb_size;
9501 	int i;
9502 
9503 	if (!rdev->mode_info.mode_config_initialized)
9504 		return;
9505 
9506 	radeon_update_display_priority(rdev);
9507 
9508 	for (i = 0; i < rdev->num_crtc; i++) {
9509 		if (rdev->mode_info.crtcs[i]->base.enabled)
9510 			num_heads++;
9511 	}
9512 	for (i = 0; i < rdev->num_crtc; i++) {
9513 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9514 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9515 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9516 	}
9517 }
9518 
9519 /**
9520  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9521  *
9522  * @rdev: radeon_device pointer
9523  *
9524  * Fetches a GPU clock counter snapshot (SI).
9525  * Returns the 64 bit clock counter snapshot.
9526  */
9527 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9528 {
9529 	uint64_t clock;
9530 
9531 	mutex_lock(&rdev->gpu_clock_mutex);
9532 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9533 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9534 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9535 	mutex_unlock(&rdev->gpu_clock_mutex);
9536 	return clock;
9537 }
9538 
9539 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9540                               u32 cntl_reg, u32 status_reg)
9541 {
9542 	int r, i;
9543 	struct atom_clock_dividers dividers;
9544 	uint32_t tmp;
9545 
9546 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9547 					   clock, false, &dividers);
9548 	if (r)
9549 		return r;
9550 
9551 	tmp = RREG32_SMC(cntl_reg);
9552 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9553 	tmp |= dividers.post_divider;
9554 	WREG32_SMC(cntl_reg, tmp);
9555 
9556 	for (i = 0; i < 100; i++) {
9557 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9558 			break;
9559 		mdelay(10);
9560 	}
9561 	if (i == 100)
9562 		return -ETIMEDOUT;
9563 
9564 	return 0;
9565 }
9566 
9567 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9568 {
9569 	int r = 0;
9570 
9571 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9572 	if (r)
9573 		return r;
9574 
9575 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9576 	return r;
9577 }
9578 
9579 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9580 {
9581 	int r, i;
9582 	struct atom_clock_dividers dividers;
9583 	u32 tmp;
9584 
9585 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9586 					   ecclk, false, &dividers);
9587 	if (r)
9588 		return r;
9589 
9590 	for (i = 0; i < 100; i++) {
9591 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9592 			break;
9593 		mdelay(10);
9594 	}
9595 	if (i == 100)
9596 		return -ETIMEDOUT;
9597 
9598 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9599 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9600 	tmp |= dividers.post_divider;
9601 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9602 
9603 	for (i = 0; i < 100; i++) {
9604 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9605 			break;
9606 		mdelay(10);
9607 	}
9608 	if (i == 100)
9609 		return -ETIMEDOUT;
9610 
9611 	return 0;
9612 }
9613 
9614 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9615 {
9616 	struct pci_dev *root = rdev->pdev->bus->self;
9617 	int bridge_pos, gpu_pos;
9618 	u32 speed_cntl, mask, current_data_rate;
9619 	int ret, i;
9620 	u16 tmp16;
9621 
9622 	if (radeon_pcie_gen2 == 0)
9623 		return;
9624 
9625 	if (rdev->flags & RADEON_IS_IGP)
9626 		return;
9627 
9628 	if (!(rdev->flags & RADEON_IS_PCIE))
9629 		return;
9630 
9631 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9632 	if (ret != 0)
9633 		return;
9634 
9635 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9636 		return;
9637 
9638 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9639 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9640 		LC_CURRENT_DATA_RATE_SHIFT;
9641 	if (mask & DRM_PCIE_SPEED_80) {
9642 		if (current_data_rate == 2) {
9643 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9644 			return;
9645 		}
9646 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9647 	} else if (mask & DRM_PCIE_SPEED_50) {
9648 		if (current_data_rate == 1) {
9649 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9650 			return;
9651 		}
9652 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9653 	}
9654 
9655 	bridge_pos = pci_get_pciecap_ptr(root->dev.bsddev);
9656 	if (!bridge_pos)
9657 		return;
9658 
9659 	gpu_pos = pci_get_pciecap_ptr(rdev->pdev->dev.bsddev);
9660 	if (!gpu_pos)
9661 		return;
9662 
9663 	if (mask & DRM_PCIE_SPEED_80) {
9664 		/* re-try equalization if gen3 is not already enabled */
9665 		if (current_data_rate != 2) {
9666 			u16 bridge_cfg, gpu_cfg;
9667 			u16 bridge_cfg2, gpu_cfg2;
9668 			u32 max_lw, current_lw, tmp;
9669 
9670 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9671 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9672 
9673 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9674 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9675 
9676 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9677 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9678 
9679 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9680 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9681 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9682 
9683 			if (current_lw < max_lw) {
9684 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9685 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9686 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9687 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9688 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9689 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9690 				}
9691 			}
9692 
9693 			for (i = 0; i < 10; i++) {
9694 				/* check status */
9695 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9696 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9697 					break;
9698 
9699 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9700 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9701 
9702 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9703 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9704 
9705 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9706 				tmp |= LC_SET_QUIESCE;
9707 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9708 
9709 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9710 				tmp |= LC_REDO_EQ;
9711 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9712 
9713 				mdelay(100);
9714 
9715 				/* linkctl */
9716 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9717 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9718 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9719 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9720 
9721 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9722 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9723 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9724 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9725 
9726 				/* linkctl2 */
9727 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9728 				tmp16 &= ~((1 << 4) | (7 << 9));
9729 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9730 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9731 
9732 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9733 				tmp16 &= ~((1 << 4) | (7 << 9));
9734 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9735 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9736 
9737 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9738 				tmp &= ~LC_SET_QUIESCE;
9739 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9740 			}
9741 		}
9742 	}
9743 
9744 	/* set the link speed */
9745 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9746 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9747 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9748 
9749 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9750 	tmp16 &= ~0xf;
9751 	if (mask & DRM_PCIE_SPEED_80)
9752 		tmp16 |= 3; /* gen3 */
9753 	else if (mask & DRM_PCIE_SPEED_50)
9754 		tmp16 |= 2; /* gen2 */
9755 	else
9756 		tmp16 |= 1; /* gen1 */
9757 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9758 
9759 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9760 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9761 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9762 
9763 	for (i = 0; i < rdev->usec_timeout; i++) {
9764 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9765 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9766 			break;
9767 		udelay(1);
9768 	}
9769 }
9770 
9771 static void cik_program_aspm(struct radeon_device *rdev)
9772 {
9773 	u32 data, orig;
9774 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9775 	bool disable_clkreq = false;
9776 
9777 	if (radeon_aspm == 0)
9778 		return;
9779 
9780 	/* XXX double check IGPs */
9781 	if (rdev->flags & RADEON_IS_IGP)
9782 		return;
9783 
9784 	if (!(rdev->flags & RADEON_IS_PCIE))
9785 		return;
9786 
9787 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9788 	data &= ~LC_XMIT_N_FTS_MASK;
9789 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9790 	if (orig != data)
9791 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9792 
9793 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9794 	data |= LC_GO_TO_RECOVERY;
9795 	if (orig != data)
9796 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9797 
9798 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9799 	data |= P_IGNORE_EDB_ERR;
9800 	if (orig != data)
9801 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9802 
9803 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9804 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9805 	data |= LC_PMI_TO_L1_DIS;
9806 	if (!disable_l0s)
9807 		data |= LC_L0S_INACTIVITY(7);
9808 
9809 	if (!disable_l1) {
9810 		data |= LC_L1_INACTIVITY(7);
9811 		data &= ~LC_PMI_TO_L1_DIS;
9812 		if (orig != data)
9813 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9814 
9815 		if (!disable_plloff_in_l1) {
9816 			bool clk_req_support;
9817 
9818 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9819 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9820 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9821 			if (orig != data)
9822 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9823 
9824 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9825 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9826 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9827 			if (orig != data)
9828 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9829 
9830 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9831 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9832 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9833 			if (orig != data)
9834 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9835 
9836 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9837 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9838 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9839 			if (orig != data)
9840 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9841 
9842 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9843 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9844 			data |= LC_DYN_LANES_PWR_STATE(3);
9845 			if (orig != data)
9846 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9847 
9848 			if (!disable_clkreq) {
9849 #ifdef zMN_TODO
9850 				struct pci_dev *root = rdev->pdev->bus->self;
9851 				u32 lnkcap;
9852 
9853 				clk_req_support = false;
9854 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9855 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9856 					clk_req_support = true;
9857 #else
9858 				clk_req_support = false;
9859 #endif
9860 			} else {
9861 				clk_req_support = false;
9862 			}
9863 
9864 			if (clk_req_support) {
9865 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9866 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9867 				if (orig != data)
9868 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9869 
9870 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9871 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9872 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9873 				if (orig != data)
9874 					WREG32_SMC(THM_CLK_CNTL, data);
9875 
9876 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9877 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9878 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9879 				if (orig != data)
9880 					WREG32_SMC(MISC_CLK_CTRL, data);
9881 
9882 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9883 				data &= ~BCLK_AS_XCLK;
9884 				if (orig != data)
9885 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9886 
9887 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9888 				data &= ~FORCE_BIF_REFCLK_EN;
9889 				if (orig != data)
9890 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9891 
9892 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9893 				data &= ~MPLL_CLKOUT_SEL_MASK;
9894 				data |= MPLL_CLKOUT_SEL(4);
9895 				if (orig != data)
9896 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9897 			}
9898 		}
9899 	} else {
9900 		if (orig != data)
9901 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9902 	}
9903 
9904 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9905 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9906 	if (orig != data)
9907 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9908 
9909 	if (!disable_l0s) {
9910 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9911 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9912 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9913 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9914 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9915 				data &= ~LC_L0S_INACTIVITY_MASK;
9916 				if (orig != data)
9917 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9918 			}
9919 		}
9920 	}
9921 }
9922